| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 1.0, |
| "eval_steps": 500, |
| "global_step": 625, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.0016, |
| "grad_norm": 36.86253356933594, |
| "learning_rate": 0.0, |
| "loss": 6.970664024353027, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.0032, |
| "grad_norm": 46.62815475463867, |
| "learning_rate": 1e-05, |
| "loss": 7.119298934936523, |
| "step": 2 |
| }, |
| { |
| "epoch": 0.0048, |
| "grad_norm": 38.75471878051758, |
| "learning_rate": 2e-05, |
| "loss": 6.804569721221924, |
| "step": 3 |
| }, |
| { |
| "epoch": 0.0064, |
| "grad_norm": 23.79888343811035, |
| "learning_rate": 3e-05, |
| "loss": 6.857824325561523, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.008, |
| "grad_norm": 21.74065589904785, |
| "learning_rate": 4e-05, |
| "loss": 6.550006866455078, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.0096, |
| "grad_norm": 18.586503982543945, |
| "learning_rate": 5e-05, |
| "loss": 6.686573028564453, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.0112, |
| "grad_norm": 15.070769309997559, |
| "learning_rate": 6e-05, |
| "loss": 6.578455924987793, |
| "step": 7 |
| }, |
| { |
| "epoch": 0.0128, |
| "grad_norm": 7.648688316345215, |
| "learning_rate": 7.000000000000001e-05, |
| "loss": 6.312182426452637, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.0144, |
| "grad_norm": 11.109657287597656, |
| "learning_rate": 8e-05, |
| "loss": 6.330634593963623, |
| "step": 9 |
| }, |
| { |
| "epoch": 0.016, |
| "grad_norm": 11.878482818603516, |
| "learning_rate": 8.999999999999999e-05, |
| "loss": 6.246118545532227, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.0176, |
| "grad_norm": 15.020891189575195, |
| "learning_rate": 0.0001, |
| "loss": 6.080811500549316, |
| "step": 11 |
| }, |
| { |
| "epoch": 0.0192, |
| "grad_norm": 10.975037574768066, |
| "learning_rate": 0.00011, |
| "loss": 6.293004989624023, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.0208, |
| "grad_norm": 5.8413214683532715, |
| "learning_rate": 0.00012, |
| "loss": 6.052936553955078, |
| "step": 13 |
| }, |
| { |
| "epoch": 0.0224, |
| "grad_norm": 11.264659881591797, |
| "learning_rate": 0.00013000000000000002, |
| "loss": 6.178928852081299, |
| "step": 14 |
| }, |
| { |
| "epoch": 0.024, |
| "grad_norm": 5.662702560424805, |
| "learning_rate": 0.00014000000000000001, |
| "loss": 6.111515998840332, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.0256, |
| "grad_norm": 7.999163627624512, |
| "learning_rate": 0.00015, |
| "loss": 6.4416985511779785, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.0272, |
| "grad_norm": 4.368480205535889, |
| "learning_rate": 0.00016, |
| "loss": 6.070431709289551, |
| "step": 17 |
| }, |
| { |
| "epoch": 0.0288, |
| "grad_norm": 6.195078372955322, |
| "learning_rate": 0.00017, |
| "loss": 6.400940895080566, |
| "step": 18 |
| }, |
| { |
| "epoch": 0.0304, |
| "grad_norm": 4.218802452087402, |
| "learning_rate": 0.00017999999999999998, |
| "loss": 5.938872337341309, |
| "step": 19 |
| }, |
| { |
| "epoch": 0.032, |
| "grad_norm": 8.09906005859375, |
| "learning_rate": 0.00019, |
| "loss": 6.1384053230285645, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.0336, |
| "grad_norm": 5.7899065017700195, |
| "learning_rate": 0.0002, |
| "loss": 6.211584568023682, |
| "step": 21 |
| }, |
| { |
| "epoch": 0.0352, |
| "grad_norm": 3.5106873512268066, |
| "learning_rate": 0.00021, |
| "loss": 6.081808567047119, |
| "step": 22 |
| }, |
| { |
| "epoch": 0.0368, |
| "grad_norm": 5.990793228149414, |
| "learning_rate": 0.00022, |
| "loss": 6.311020374298096, |
| "step": 23 |
| }, |
| { |
| "epoch": 0.0384, |
| "grad_norm": 4.144802570343018, |
| "learning_rate": 0.00023, |
| "loss": 6.124863147735596, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.04, |
| "grad_norm": 4.716309547424316, |
| "learning_rate": 0.00024, |
| "loss": 6.189701557159424, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.0416, |
| "grad_norm": 3.5594468116760254, |
| "learning_rate": 0.00025, |
| "loss": 5.711904048919678, |
| "step": 26 |
| }, |
| { |
| "epoch": 0.0432, |
| "grad_norm": 7.885351657867432, |
| "learning_rate": 0.00026000000000000003, |
| "loss": 6.188915729522705, |
| "step": 27 |
| }, |
| { |
| "epoch": 0.0448, |
| "grad_norm": 4.330770492553711, |
| "learning_rate": 0.00027, |
| "loss": 6.156501293182373, |
| "step": 28 |
| }, |
| { |
| "epoch": 0.0464, |
| "grad_norm": 6.669336318969727, |
| "learning_rate": 0.00028000000000000003, |
| "loss": 6.337223052978516, |
| "step": 29 |
| }, |
| { |
| "epoch": 0.048, |
| "grad_norm": 4.431726932525635, |
| "learning_rate": 0.00029, |
| "loss": 5.854226112365723, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.0496, |
| "grad_norm": 5.652588367462158, |
| "learning_rate": 0.0003, |
| "loss": 6.1465911865234375, |
| "step": 31 |
| }, |
| { |
| "epoch": 0.0512, |
| "grad_norm": 4.4275360107421875, |
| "learning_rate": 0.00031, |
| "loss": 6.568665504455566, |
| "step": 32 |
| }, |
| { |
| "epoch": 0.0528, |
| "grad_norm": 5.78800106048584, |
| "learning_rate": 0.00032, |
| "loss": 5.84707498550415, |
| "step": 33 |
| }, |
| { |
| "epoch": 0.0544, |
| "grad_norm": 5.778809547424316, |
| "learning_rate": 0.00033, |
| "loss": 6.26806640625, |
| "step": 34 |
| }, |
| { |
| "epoch": 0.056, |
| "grad_norm": 3.150599718093872, |
| "learning_rate": 0.00034, |
| "loss": 5.942642688751221, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.0576, |
| "grad_norm": 5.755363464355469, |
| "learning_rate": 0.00035, |
| "loss": 6.048552989959717, |
| "step": 36 |
| }, |
| { |
| "epoch": 0.0592, |
| "grad_norm": 4.171663284301758, |
| "learning_rate": 0.00035999999999999997, |
| "loss": 6.371613025665283, |
| "step": 37 |
| }, |
| { |
| "epoch": 0.0608, |
| "grad_norm": 4.288946628570557, |
| "learning_rate": 0.00037, |
| "loss": 6.1250200271606445, |
| "step": 38 |
| }, |
| { |
| "epoch": 0.0624, |
| "grad_norm": 4.6287713050842285, |
| "learning_rate": 0.00038, |
| "loss": 6.222686767578125, |
| "step": 39 |
| }, |
| { |
| "epoch": 0.064, |
| "grad_norm": 5.058150291442871, |
| "learning_rate": 0.00039000000000000005, |
| "loss": 6.543748378753662, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.0656, |
| "grad_norm": 4.104369640350342, |
| "learning_rate": 0.0004, |
| "loss": 6.065921783447266, |
| "step": 41 |
| }, |
| { |
| "epoch": 0.0672, |
| "grad_norm": 6.011862754821777, |
| "learning_rate": 0.00041, |
| "loss": 5.975309371948242, |
| "step": 42 |
| }, |
| { |
| "epoch": 0.0688, |
| "grad_norm": 3.899702787399292, |
| "learning_rate": 0.00042, |
| "loss": 6.357814788818359, |
| "step": 43 |
| }, |
| { |
| "epoch": 0.0704, |
| "grad_norm": 4.300708770751953, |
| "learning_rate": 0.00043, |
| "loss": 5.761978626251221, |
| "step": 44 |
| }, |
| { |
| "epoch": 0.072, |
| "grad_norm": 5.165529727935791, |
| "learning_rate": 0.00044, |
| "loss": 6.23648738861084, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.0736, |
| "grad_norm": 3.270381212234497, |
| "learning_rate": 0.00045000000000000004, |
| "loss": 6.216146469116211, |
| "step": 46 |
| }, |
| { |
| "epoch": 0.0752, |
| "grad_norm": 3.381625175476074, |
| "learning_rate": 0.00046, |
| "loss": 5.920130729675293, |
| "step": 47 |
| }, |
| { |
| "epoch": 0.0768, |
| "grad_norm": 2.7397425174713135, |
| "learning_rate": 0.00047, |
| "loss": 5.948547840118408, |
| "step": 48 |
| }, |
| { |
| "epoch": 0.0784, |
| "grad_norm": 4.689820289611816, |
| "learning_rate": 0.00048, |
| "loss": 6.4204936027526855, |
| "step": 49 |
| }, |
| { |
| "epoch": 0.08, |
| "grad_norm": 4.353704929351807, |
| "learning_rate": 0.00049, |
| "loss": 5.919530391693115, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.0816, |
| "grad_norm": 3.5159783363342285, |
| "learning_rate": 0.0005, |
| "loss": 6.303610324859619, |
| "step": 51 |
| }, |
| { |
| "epoch": 0.0832, |
| "grad_norm": 3.121208429336548, |
| "learning_rate": 0.000499996268589849, |
| "loss": 5.74945068359375, |
| "step": 52 |
| }, |
| { |
| "epoch": 0.0848, |
| "grad_norm": 3.5212831497192383, |
| "learning_rate": 0.0004999850744707835, |
| "loss": 6.07124662399292, |
| "step": 53 |
| }, |
| { |
| "epoch": 0.0864, |
| "grad_norm": 2.848412275314331, |
| "learning_rate": 0.0004999664179769621, |
| "loss": 6.209238052368164, |
| "step": 54 |
| }, |
| { |
| "epoch": 0.088, |
| "grad_norm": 2.6709463596343994, |
| "learning_rate": 0.0004999402996653051, |
| "loss": 5.881043910980225, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.0896, |
| "grad_norm": 2.7929718494415283, |
| "learning_rate": 0.0004999067203154777, |
| "loss": 6.170549392700195, |
| "step": 56 |
| }, |
| { |
| "epoch": 0.0912, |
| "grad_norm": 2.7909319400787354, |
| "learning_rate": 0.0004998656809298664, |
| "loss": 5.91437292098999, |
| "step": 57 |
| }, |
| { |
| "epoch": 0.0928, |
| "grad_norm": 3.028071880340576, |
| "learning_rate": 0.0004998171827335494, |
| "loss": 5.768723964691162, |
| "step": 58 |
| }, |
| { |
| "epoch": 0.0944, |
| "grad_norm": 3.5717194080352783, |
| "learning_rate": 0.0004997612271742601, |
| "loss": 6.126382827758789, |
| "step": 59 |
| }, |
| { |
| "epoch": 0.096, |
| "grad_norm": 2.5707123279571533, |
| "learning_rate": 0.0004996978159223436, |
| "loss": 6.031285285949707, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.0976, |
| "grad_norm": 2.886106252670288, |
| "learning_rate": 0.000499626950870707, |
| "loss": 5.81216287612915, |
| "step": 61 |
| }, |
| { |
| "epoch": 0.0992, |
| "grad_norm": 3.2320756912231445, |
| "learning_rate": 0.000499548634134763, |
| "loss": 6.256302833557129, |
| "step": 62 |
| }, |
| { |
| "epoch": 0.1008, |
| "grad_norm": 2.3101658821105957, |
| "learning_rate": 0.0004994628680523662, |
| "loss": 6.089540481567383, |
| "step": 63 |
| }, |
| { |
| "epoch": 0.1024, |
| "grad_norm": 2.2067813873291016, |
| "learning_rate": 0.0004993696551837443, |
| "loss": 6.167810440063477, |
| "step": 64 |
| }, |
| { |
| "epoch": 0.104, |
| "grad_norm": 2.936598300933838, |
| "learning_rate": 0.0004992689983114208, |
| "loss": 6.019635200500488, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.1056, |
| "grad_norm": 3.3017938137054443, |
| "learning_rate": 0.0004991609004401324, |
| "loss": 5.883628845214844, |
| "step": 66 |
| }, |
| { |
| "epoch": 0.1072, |
| "grad_norm": 3.359445333480835, |
| "learning_rate": 0.0004990453647967389, |
| "loss": 5.827721118927002, |
| "step": 67 |
| }, |
| { |
| "epoch": 0.1088, |
| "grad_norm": 3.057800769805908, |
| "learning_rate": 0.0004989223948301272, |
| "loss": 5.853091239929199, |
| "step": 68 |
| }, |
| { |
| "epoch": 0.1104, |
| "grad_norm": 3.4538474082946777, |
| "learning_rate": 0.0004987919942111087, |
| "loss": 6.159923553466797, |
| "step": 69 |
| }, |
| { |
| "epoch": 0.112, |
| "grad_norm": 2.778003692626953, |
| "learning_rate": 0.0004986541668323086, |
| "loss": 5.855865478515625, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.1136, |
| "grad_norm": 2.497781753540039, |
| "learning_rate": 0.0004985089168080509, |
| "loss": 6.018093109130859, |
| "step": 71 |
| }, |
| { |
| "epoch": 0.1152, |
| "grad_norm": 2.0816121101379395, |
| "learning_rate": 0.0004983562484742349, |
| "loss": 6.006240367889404, |
| "step": 72 |
| }, |
| { |
| "epoch": 0.1168, |
| "grad_norm": 2.8136582374572754, |
| "learning_rate": 0.000498196166388206, |
| "loss": 5.550631999969482, |
| "step": 73 |
| }, |
| { |
| "epoch": 0.1184, |
| "grad_norm": 2.223203420639038, |
| "learning_rate": 0.0004980286753286195, |
| "loss": 5.823319911956787, |
| "step": 74 |
| }, |
| { |
| "epoch": 0.12, |
| "grad_norm": 2.3398818969726562, |
| "learning_rate": 0.0004978537802952981, |
| "loss": 5.757394790649414, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.1216, |
| "grad_norm": 3.7000091075897217, |
| "learning_rate": 0.0004976714865090827, |
| "loss": 6.139785289764404, |
| "step": 76 |
| }, |
| { |
| "epoch": 0.1232, |
| "grad_norm": 2.992990255355835, |
| "learning_rate": 0.0004974817994116764, |
| "loss": 5.841603755950928, |
| "step": 77 |
| }, |
| { |
| "epoch": 0.1248, |
| "grad_norm": 4.935225963592529, |
| "learning_rate": 0.0004972847246654819, |
| "loss": 5.688216209411621, |
| "step": 78 |
| }, |
| { |
| "epoch": 0.1264, |
| "grad_norm": 2.531768798828125, |
| "learning_rate": 0.0004970802681534331, |
| "loss": 6.026415824890137, |
| "step": 79 |
| }, |
| { |
| "epoch": 0.128, |
| "grad_norm": 3.366121530532837, |
| "learning_rate": 0.0004968684359788187, |
| "loss": 6.1217217445373535, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.1296, |
| "grad_norm": 2.439563035964966, |
| "learning_rate": 0.0004966492344651005, |
| "loss": 5.786462783813477, |
| "step": 81 |
| }, |
| { |
| "epoch": 0.1312, |
| "grad_norm": 2.759390115737915, |
| "learning_rate": 0.0004964226701557246, |
| "loss": 6.397160053253174, |
| "step": 82 |
| }, |
| { |
| "epoch": 0.1328, |
| "grad_norm": 2.6187775135040283, |
| "learning_rate": 0.000496188749813926, |
| "loss": 5.781584739685059, |
| "step": 83 |
| }, |
| { |
| "epoch": 0.1344, |
| "grad_norm": 2.3311808109283447, |
| "learning_rate": 0.0004959474804225263, |
| "loss": 5.623251914978027, |
| "step": 84 |
| }, |
| { |
| "epoch": 0.136, |
| "grad_norm": 1.8278515338897705, |
| "learning_rate": 0.0004956988691837262, |
| "loss": 5.646507263183594, |
| "step": 85 |
| }, |
| { |
| "epoch": 0.1376, |
| "grad_norm": 1.940083622932434, |
| "learning_rate": 0.0004954429235188896, |
| "loss": 5.845520496368408, |
| "step": 86 |
| }, |
| { |
| "epoch": 0.1392, |
| "grad_norm": 1.715268611907959, |
| "learning_rate": 0.0004951796510683226, |
| "loss": 5.86661434173584, |
| "step": 87 |
| }, |
| { |
| "epoch": 0.1408, |
| "grad_norm": 2.3065476417541504, |
| "learning_rate": 0.0004949090596910452, |
| "loss": 6.391292572021484, |
| "step": 88 |
| }, |
| { |
| "epoch": 0.1424, |
| "grad_norm": 2.54691481590271, |
| "learning_rate": 0.0004946311574645565, |
| "loss": 5.941152572631836, |
| "step": 89 |
| }, |
| { |
| "epoch": 0.144, |
| "grad_norm": 2.3436925411224365, |
| "learning_rate": 0.0004943459526845942, |
| "loss": 5.867047309875488, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.1456, |
| "grad_norm": 2.8488574028015137, |
| "learning_rate": 0.0004940534538648862, |
| "loss": 6.295483112335205, |
| "step": 91 |
| }, |
| { |
| "epoch": 0.1472, |
| "grad_norm": 2.0991811752319336, |
| "learning_rate": 0.0004937536697368971, |
| "loss": 6.155615329742432, |
| "step": 92 |
| }, |
| { |
| "epoch": 0.1488, |
| "grad_norm": 2.874187707901001, |
| "learning_rate": 0.0004934466092495673, |
| "loss": 6.002193450927734, |
| "step": 93 |
| }, |
| { |
| "epoch": 0.1504, |
| "grad_norm": 2.6309406757354736, |
| "learning_rate": 0.0004931322815690456, |
| "loss": 6.190125942230225, |
| "step": 94 |
| }, |
| { |
| "epoch": 0.152, |
| "grad_norm": 2.5140063762664795, |
| "learning_rate": 0.0004928106960784163, |
| "loss": 5.832353591918945, |
| "step": 95 |
| }, |
| { |
| "epoch": 0.1536, |
| "grad_norm": 2.2540531158447266, |
| "learning_rate": 0.0004924818623774179, |
| "loss": 5.870430946350098, |
| "step": 96 |
| }, |
| { |
| "epoch": 0.1552, |
| "grad_norm": 2.5736892223358154, |
| "learning_rate": 0.0004921457902821578, |
| "loss": 5.9354658126831055, |
| "step": 97 |
| }, |
| { |
| "epoch": 0.1568, |
| "grad_norm": 2.8597569465637207, |
| "learning_rate": 0.0004918024898248188, |
| "loss": 5.980432987213135, |
| "step": 98 |
| }, |
| { |
| "epoch": 0.1584, |
| "grad_norm": 2.679422616958618, |
| "learning_rate": 0.0004914519712533592, |
| "loss": 5.808017253875732, |
| "step": 99 |
| }, |
| { |
| "epoch": 0.16, |
| "grad_norm": 2.6200029850006104, |
| "learning_rate": 0.0004910942450312075, |
| "loss": 6.042236804962158, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.1616, |
| "grad_norm": 2.3748672008514404, |
| "learning_rate": 0.0004907293218369499, |
| "loss": 5.913302421569824, |
| "step": 101 |
| }, |
| { |
| "epoch": 0.1632, |
| "grad_norm": 2.0950937271118164, |
| "learning_rate": 0.000490357212564011, |
| "loss": 5.478336334228516, |
| "step": 102 |
| }, |
| { |
| "epoch": 0.1648, |
| "grad_norm": 2.222339391708374, |
| "learning_rate": 0.0004899779283203296, |
| "loss": 5.753122329711914, |
| "step": 103 |
| }, |
| { |
| "epoch": 0.1664, |
| "grad_norm": 1.8135013580322266, |
| "learning_rate": 0.0004895914804280262, |
| "loss": 5.8378705978393555, |
| "step": 104 |
| }, |
| { |
| "epoch": 0.168, |
| "grad_norm": 1.834136962890625, |
| "learning_rate": 0.0004891978804230655, |
| "loss": 5.386728286743164, |
| "step": 105 |
| }, |
| { |
| "epoch": 0.1696, |
| "grad_norm": 2.7069461345672607, |
| "learning_rate": 0.000488797140054912, |
| "loss": 5.91385555267334, |
| "step": 106 |
| }, |
| { |
| "epoch": 0.1712, |
| "grad_norm": 2.961819648742676, |
| "learning_rate": 0.0004883892712861791, |
| "loss": 5.622028350830078, |
| "step": 107 |
| }, |
| { |
| "epoch": 0.1728, |
| "grad_norm": 2.6172969341278076, |
| "learning_rate": 0.0004879742862922721, |
| "loss": 5.701954364776611, |
| "step": 108 |
| }, |
| { |
| "epoch": 0.1744, |
| "grad_norm": 2.4764273166656494, |
| "learning_rate": 0.0004875521974610247, |
| "loss": 5.922611236572266, |
| "step": 109 |
| }, |
| { |
| "epoch": 0.176, |
| "grad_norm": 2.321749448776245, |
| "learning_rate": 0.00048712301739232933, |
| "loss": 5.958606719970703, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.1776, |
| "grad_norm": 2.569371461868286, |
| "learning_rate": 0.00048668675889776094, |
| "loss": 5.966418266296387, |
| "step": 111 |
| }, |
| { |
| "epoch": 0.1792, |
| "grad_norm": 2.0367257595062256, |
| "learning_rate": 0.00048624343500019453, |
| "loss": 5.828032970428467, |
| "step": 112 |
| }, |
| { |
| "epoch": 0.1808, |
| "grad_norm": 2.0033013820648193, |
| "learning_rate": 0.0004857930589334164, |
| "loss": 5.9207658767700195, |
| "step": 113 |
| }, |
| { |
| "epoch": 0.1824, |
| "grad_norm": 2.4433813095092773, |
| "learning_rate": 0.00048533564414172915, |
| "loss": 5.987303256988525, |
| "step": 114 |
| }, |
| { |
| "epoch": 0.184, |
| "grad_norm": 1.6759791374206543, |
| "learning_rate": 0.00048487120427955047, |
| "loss": 5.758200168609619, |
| "step": 115 |
| }, |
| { |
| "epoch": 0.1856, |
| "grad_norm": 1.9562362432479858, |
| "learning_rate": 0.0004843997532110051, |
| "loss": 6.076003074645996, |
| "step": 116 |
| }, |
| { |
| "epoch": 0.1872, |
| "grad_norm": 1.9110207557678223, |
| "learning_rate": 0.0004839213050095116, |
| "loss": 5.927783966064453, |
| "step": 117 |
| }, |
| { |
| "epoch": 0.1888, |
| "grad_norm": 1.9068591594696045, |
| "learning_rate": 0.00048343587395736177, |
| "loss": 5.609103202819824, |
| "step": 118 |
| }, |
| { |
| "epoch": 0.1904, |
| "grad_norm": 1.801079273223877, |
| "learning_rate": 0.0004829434745452944, |
| "loss": 6.146678924560547, |
| "step": 119 |
| }, |
| { |
| "epoch": 0.192, |
| "grad_norm": 2.160980224609375, |
| "learning_rate": 0.00048244412147206283, |
| "loss": 5.927748203277588, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.1936, |
| "grad_norm": 2.2416179180145264, |
| "learning_rate": 0.0004819378296439961, |
| "loss": 5.8890509605407715, |
| "step": 121 |
| }, |
| { |
| "epoch": 0.1952, |
| "grad_norm": 2.4966790676116943, |
| "learning_rate": 0.000481424614174554, |
| "loss": 5.660029411315918, |
| "step": 122 |
| }, |
| { |
| "epoch": 0.1968, |
| "grad_norm": 2.2897145748138428, |
| "learning_rate": 0.00048090449038387564, |
| "loss": 5.889649391174316, |
| "step": 123 |
| }, |
| { |
| "epoch": 0.1984, |
| "grad_norm": 2.1526010036468506, |
| "learning_rate": 0.00048037747379832266, |
| "loss": 5.937025547027588, |
| "step": 124 |
| }, |
| { |
| "epoch": 0.2, |
| "grad_norm": 2.3477089405059814, |
| "learning_rate": 0.0004798435801500154, |
| "loss": 5.83440637588501, |
| "step": 125 |
| }, |
| { |
| "epoch": 0.2016, |
| "grad_norm": 1.9011043310165405, |
| "learning_rate": 0.00047930282537636326, |
| "loss": 6.049851417541504, |
| "step": 126 |
| }, |
| { |
| "epoch": 0.2032, |
| "grad_norm": 2.7886276245117188, |
| "learning_rate": 0.00047875522561958907, |
| "loss": 6.053065299987793, |
| "step": 127 |
| }, |
| { |
| "epoch": 0.2048, |
| "grad_norm": 1.8351131677627563, |
| "learning_rate": 0.0004782007972262471, |
| "loss": 5.606479644775391, |
| "step": 128 |
| }, |
| { |
| "epoch": 0.2064, |
| "grad_norm": 1.759033441543579, |
| "learning_rate": 0.0004776395567467353, |
| "loss": 5.892756462097168, |
| "step": 129 |
| }, |
| { |
| "epoch": 0.208, |
| "grad_norm": 1.9948967695236206, |
| "learning_rate": 0.00047707152093480097, |
| "loss": 5.802677631378174, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.2096, |
| "grad_norm": 1.7873433828353882, |
| "learning_rate": 0.0004764967067470409, |
| "loss": 5.694087505340576, |
| "step": 131 |
| }, |
| { |
| "epoch": 0.2112, |
| "grad_norm": 2.129274606704712, |
| "learning_rate": 0.00047591513134239506, |
| "loss": 6.053646087646484, |
| "step": 132 |
| }, |
| { |
| "epoch": 0.2128, |
| "grad_norm": 1.815743327140808, |
| "learning_rate": 0.0004753268120816344, |
| "loss": 5.840423107147217, |
| "step": 133 |
| }, |
| { |
| "epoch": 0.2144, |
| "grad_norm": 1.6211766004562378, |
| "learning_rate": 0.0004747317665268427, |
| "loss": 5.866158962249756, |
| "step": 134 |
| }, |
| { |
| "epoch": 0.216, |
| "grad_norm": 1.5764577388763428, |
| "learning_rate": 0.000474130012440892, |
| "loss": 5.642172813415527, |
| "step": 135 |
| }, |
| { |
| "epoch": 0.2176, |
| "grad_norm": 1.6282553672790527, |
| "learning_rate": 0.0004735215677869128, |
| "loss": 5.813696384429932, |
| "step": 136 |
| }, |
| { |
| "epoch": 0.2192, |
| "grad_norm": 1.587697148323059, |
| "learning_rate": 0.0004729064507277576, |
| "loss": 5.456190586090088, |
| "step": 137 |
| }, |
| { |
| "epoch": 0.2208, |
| "grad_norm": 2.2339489459991455, |
| "learning_rate": 0.0004722846796254586, |
| "loss": 5.826436996459961, |
| "step": 138 |
| }, |
| { |
| "epoch": 0.2224, |
| "grad_norm": 1.6775805950164795, |
| "learning_rate": 0.00047165627304068, |
| "loss": 5.307504653930664, |
| "step": 139 |
| }, |
| { |
| "epoch": 0.224, |
| "grad_norm": 1.7358742952346802, |
| "learning_rate": 0.0004710212497321633, |
| "loss": 5.858373641967773, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.2256, |
| "grad_norm": 1.7377792596817017, |
| "learning_rate": 0.0004703796286561679, |
| "loss": 5.746421813964844, |
| "step": 141 |
| }, |
| { |
| "epoch": 0.2272, |
| "grad_norm": 1.7279226779937744, |
| "learning_rate": 0.00046973142896590504, |
| "loss": 5.818030834197998, |
| "step": 142 |
| }, |
| { |
| "epoch": 0.2288, |
| "grad_norm": 1.896462321281433, |
| "learning_rate": 0.0004690766700109659, |
| "loss": 5.706021308898926, |
| "step": 143 |
| }, |
| { |
| "epoch": 0.2304, |
| "grad_norm": 1.599483609199524, |
| "learning_rate": 0.00046841537133674414, |
| "loss": 5.414737701416016, |
| "step": 144 |
| }, |
| { |
| "epoch": 0.232, |
| "grad_norm": 2.0782713890075684, |
| "learning_rate": 0.00046774755268385253, |
| "loss": 6.040131092071533, |
| "step": 145 |
| }, |
| { |
| "epoch": 0.2336, |
| "grad_norm": 1.5299904346466064, |
| "learning_rate": 0.00046707323398753343, |
| "loss": 5.940986633300781, |
| "step": 146 |
| }, |
| { |
| "epoch": 0.2352, |
| "grad_norm": 1.7263022661209106, |
| "learning_rate": 0.00046639243537706387, |
| "loss": 5.658965587615967, |
| "step": 147 |
| }, |
| { |
| "epoch": 0.2368, |
| "grad_norm": 1.9568145275115967, |
| "learning_rate": 0.0004657051771751546, |
| "loss": 5.630545139312744, |
| "step": 148 |
| }, |
| { |
| "epoch": 0.2384, |
| "grad_norm": 1.7731075286865234, |
| "learning_rate": 0.0004650114798973434, |
| "loss": 5.288701057434082, |
| "step": 149 |
| }, |
| { |
| "epoch": 0.24, |
| "grad_norm": 1.5925266742706299, |
| "learning_rate": 0.000464311364251383, |
| "loss": 5.936962127685547, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.2416, |
| "grad_norm": 1.6020593643188477, |
| "learning_rate": 0.0004636048511366222, |
| "loss": 5.519335746765137, |
| "step": 151 |
| }, |
| { |
| "epoch": 0.2432, |
| "grad_norm": 1.5809364318847656, |
| "learning_rate": 0.0004628919616433827, |
| "loss": 5.557144641876221, |
| "step": 152 |
| }, |
| { |
| "epoch": 0.2448, |
| "grad_norm": 1.8422110080718994, |
| "learning_rate": 0.0004621727170523293, |
| "loss": 5.852574348449707, |
| "step": 153 |
| }, |
| { |
| "epoch": 0.2464, |
| "grad_norm": 1.6175079345703125, |
| "learning_rate": 0.0004614471388338346, |
| "loss": 5.70945405960083, |
| "step": 154 |
| }, |
| { |
| "epoch": 0.248, |
| "grad_norm": 1.7624582052230835, |
| "learning_rate": 0.00046071524864733796, |
| "loss": 5.58186149597168, |
| "step": 155 |
| }, |
| { |
| "epoch": 0.2496, |
| "grad_norm": 1.5593520402908325, |
| "learning_rate": 0.0004599770683406991, |
| "loss": 5.716488361358643, |
| "step": 156 |
| }, |
| { |
| "epoch": 0.2512, |
| "grad_norm": 1.9119805097579956, |
| "learning_rate": 0.0004592326199495461, |
| "loss": 5.6072845458984375, |
| "step": 157 |
| }, |
| { |
| "epoch": 0.2528, |
| "grad_norm": 1.7177708148956299, |
| "learning_rate": 0.0004584819256966171, |
| "loss": 5.845829010009766, |
| "step": 158 |
| }, |
| { |
| "epoch": 0.2544, |
| "grad_norm": 2.197434663772583, |
| "learning_rate": 0.0004577250079910973, |
| "loss": 5.7057013511657715, |
| "step": 159 |
| }, |
| { |
| "epoch": 0.256, |
| "grad_norm": 2.089193344116211, |
| "learning_rate": 0.00045696188942795005, |
| "loss": 5.745038986206055, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.2576, |
| "grad_norm": 2.2623579502105713, |
| "learning_rate": 0.0004561925927872421, |
| "loss": 5.437371253967285, |
| "step": 161 |
| }, |
| { |
| "epoch": 0.2592, |
| "grad_norm": 1.5014855861663818, |
| "learning_rate": 0.000455417141033464, |
| "loss": 5.617335796356201, |
| "step": 162 |
| }, |
| { |
| "epoch": 0.2608, |
| "grad_norm": 1.6091152429580688, |
| "learning_rate": 0.00045463555731484396, |
| "loss": 5.750364303588867, |
| "step": 163 |
| }, |
| { |
| "epoch": 0.2624, |
| "grad_norm": 1.7927204370498657, |
| "learning_rate": 0.0004538478649626574, |
| "loss": 6.134846210479736, |
| "step": 164 |
| }, |
| { |
| "epoch": 0.264, |
| "grad_norm": 1.5488578081130981, |
| "learning_rate": 0.00045305408749053016, |
| "loss": 5.881228923797607, |
| "step": 165 |
| }, |
| { |
| "epoch": 0.2656, |
| "grad_norm": 1.6964894533157349, |
| "learning_rate": 0.0004522542485937369, |
| "loss": 5.726894855499268, |
| "step": 166 |
| }, |
| { |
| "epoch": 0.2672, |
| "grad_norm": 1.640055775642395, |
| "learning_rate": 0.0004514483721484933, |
| "loss": 5.594513893127441, |
| "step": 167 |
| }, |
| { |
| "epoch": 0.2688, |
| "grad_norm": 1.622751235961914, |
| "learning_rate": 0.0004506364822112439, |
| "loss": 5.518566131591797, |
| "step": 168 |
| }, |
| { |
| "epoch": 0.2704, |
| "grad_norm": 1.5396101474761963, |
| "learning_rate": 0.00044981860301794335, |
| "loss": 5.589843273162842, |
| "step": 169 |
| }, |
| { |
| "epoch": 0.272, |
| "grad_norm": 1.4792349338531494, |
| "learning_rate": 0.0004489947589833336, |
| "loss": 5.4407501220703125, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.2736, |
| "grad_norm": 1.678307056427002, |
| "learning_rate": 0.00044816497470021456, |
| "loss": 5.557910919189453, |
| "step": 171 |
| }, |
| { |
| "epoch": 0.2752, |
| "grad_norm": 1.7133512496948242, |
| "learning_rate": 0.0004473292749387102, |
| "loss": 5.618350982666016, |
| "step": 172 |
| }, |
| { |
| "epoch": 0.2768, |
| "grad_norm": 1.4833654165267944, |
| "learning_rate": 0.00044648768464552904, |
| "loss": 5.650544166564941, |
| "step": 173 |
| }, |
| { |
| "epoch": 0.2784, |
| "grad_norm": 1.787833571434021, |
| "learning_rate": 0.00044564022894321966, |
| "loss": 5.516573429107666, |
| "step": 174 |
| }, |
| { |
| "epoch": 0.28, |
| "grad_norm": 2.016937255859375, |
| "learning_rate": 0.00044478693312942054, |
| "loss": 5.867213249206543, |
| "step": 175 |
| }, |
| { |
| "epoch": 0.2816, |
| "grad_norm": 1.6533347368240356, |
| "learning_rate": 0.00044392782267610497, |
| "loss": 5.728193283081055, |
| "step": 176 |
| }, |
| { |
| "epoch": 0.2832, |
| "grad_norm": 1.545316457748413, |
| "learning_rate": 0.00044306292322882063, |
| "loss": 5.591842174530029, |
| "step": 177 |
| }, |
| { |
| "epoch": 0.2848, |
| "grad_norm": 1.8199504613876343, |
| "learning_rate": 0.00044219226060592415, |
| "loss": 5.673701763153076, |
| "step": 178 |
| }, |
| { |
| "epoch": 0.2864, |
| "grad_norm": 1.597760558128357, |
| "learning_rate": 0.0004413158607978104, |
| "loss": 5.541760444641113, |
| "step": 179 |
| }, |
| { |
| "epoch": 0.288, |
| "grad_norm": 1.8495144844055176, |
| "learning_rate": 0.0004404337499661364, |
| "loss": 5.602829456329346, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.2896, |
| "grad_norm": 1.8567280769348145, |
| "learning_rate": 0.00043954595444304067, |
| "loss": 5.71918249130249, |
| "step": 181 |
| }, |
| { |
| "epoch": 0.2912, |
| "grad_norm": 1.8808255195617676, |
| "learning_rate": 0.0004386525007303571, |
| "loss": 5.545975208282471, |
| "step": 182 |
| }, |
| { |
| "epoch": 0.2928, |
| "grad_norm": 1.7914137840270996, |
| "learning_rate": 0.00043775341549882364, |
| "loss": 5.760030269622803, |
| "step": 183 |
| }, |
| { |
| "epoch": 0.2944, |
| "grad_norm": 1.5386247634887695, |
| "learning_rate": 0.00043684872558728637, |
| "loss": 5.41167688369751, |
| "step": 184 |
| }, |
| { |
| "epoch": 0.296, |
| "grad_norm": 1.7406638860702515, |
| "learning_rate": 0.00043593845800189826, |
| "loss": 5.6405463218688965, |
| "step": 185 |
| }, |
| { |
| "epoch": 0.2976, |
| "grad_norm": 1.7136033773422241, |
| "learning_rate": 0.000435022639915313, |
| "loss": 5.921665191650391, |
| "step": 186 |
| }, |
| { |
| "epoch": 0.2992, |
| "grad_norm": 1.6137181520462036, |
| "learning_rate": 0.00043410129866587377, |
| "loss": 5.523682117462158, |
| "step": 187 |
| }, |
| { |
| "epoch": 0.3008, |
| "grad_norm": 1.4593943357467651, |
| "learning_rate": 0.00043317446175679733, |
| "loss": 5.579282283782959, |
| "step": 188 |
| }, |
| { |
| "epoch": 0.3024, |
| "grad_norm": 1.498769760131836, |
| "learning_rate": 0.00043224215685535287, |
| "loss": 5.65568733215332, |
| "step": 189 |
| }, |
| { |
| "epoch": 0.304, |
| "grad_norm": 1.4099656343460083, |
| "learning_rate": 0.00043130441179203626, |
| "loss": 5.450364589691162, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.3056, |
| "grad_norm": 1.762242317199707, |
| "learning_rate": 0.00043036125455973894, |
| "loss": 5.701364517211914, |
| "step": 191 |
| }, |
| { |
| "epoch": 0.3072, |
| "grad_norm": 1.9644355773925781, |
| "learning_rate": 0.00042941271331291275, |
| "loss": 5.515183448791504, |
| "step": 192 |
| }, |
| { |
| "epoch": 0.3088, |
| "grad_norm": 1.9126542806625366, |
| "learning_rate": 0.0004284588163667292, |
| "loss": 5.794773578643799, |
| "step": 193 |
| }, |
| { |
| "epoch": 0.3104, |
| "grad_norm": 1.8638148307800293, |
| "learning_rate": 0.0004274995921962343, |
| "loss": 5.806097030639648, |
| "step": 194 |
| }, |
| { |
| "epoch": 0.312, |
| "grad_norm": 1.701051115989685, |
| "learning_rate": 0.00042653506943549844, |
| "loss": 5.101565361022949, |
| "step": 195 |
| }, |
| { |
| "epoch": 0.3136, |
| "grad_norm": 2.270686626434326, |
| "learning_rate": 0.00042556527687676184, |
| "loss": 5.6310319900512695, |
| "step": 196 |
| }, |
| { |
| "epoch": 0.3152, |
| "grad_norm": 1.8609226942062378, |
| "learning_rate": 0.00042459024346957477, |
| "loss": 5.535915851593018, |
| "step": 197 |
| }, |
| { |
| "epoch": 0.3168, |
| "grad_norm": 2.0503954887390137, |
| "learning_rate": 0.0004236099983199338, |
| "loss": 5.734372138977051, |
| "step": 198 |
| }, |
| { |
| "epoch": 0.3184, |
| "grad_norm": 1.6068768501281738, |
| "learning_rate": 0.00042262457068941247, |
| "loss": 5.578657150268555, |
| "step": 199 |
| }, |
| { |
| "epoch": 0.32, |
| "grad_norm": 1.602341651916504, |
| "learning_rate": 0.000421633989994288, |
| "loss": 5.451129913330078, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.3216, |
| "grad_norm": 1.4740185737609863, |
| "learning_rate": 0.00042063828580466355, |
| "loss": 5.597467422485352, |
| "step": 201 |
| }, |
| { |
| "epoch": 0.3232, |
| "grad_norm": 1.6884571313858032, |
| "learning_rate": 0.0004196374878435846, |
| "loss": 5.773179054260254, |
| "step": 202 |
| }, |
| { |
| "epoch": 0.3248, |
| "grad_norm": 3.2064454555511475, |
| "learning_rate": 0.00041863162598615265, |
| "loss": 5.903354167938232, |
| "step": 203 |
| }, |
| { |
| "epoch": 0.3264, |
| "grad_norm": 2.3717195987701416, |
| "learning_rate": 0.0004176207302586329, |
| "loss": 5.43741512298584, |
| "step": 204 |
| }, |
| { |
| "epoch": 0.328, |
| "grad_norm": 1.7029227018356323, |
| "learning_rate": 0.0004166048308375578, |
| "loss": 5.542079925537109, |
| "step": 205 |
| }, |
| { |
| "epoch": 0.3296, |
| "grad_norm": 1.4132956266403198, |
| "learning_rate": 0.0004155839580488269, |
| "loss": 5.548293590545654, |
| "step": 206 |
| }, |
| { |
| "epoch": 0.3312, |
| "grad_norm": 1.7507219314575195, |
| "learning_rate": 0.0004145581423668008, |
| "loss": 5.625497817993164, |
| "step": 207 |
| }, |
| { |
| "epoch": 0.3328, |
| "grad_norm": 1.7790549993515015, |
| "learning_rate": 0.00041352741441339175, |
| "loss": 5.523196220397949, |
| "step": 208 |
| }, |
| { |
| "epoch": 0.3344, |
| "grad_norm": 1.6135910749435425, |
| "learning_rate": 0.0004124918049571499, |
| "loss": 5.497952461242676, |
| "step": 209 |
| }, |
| { |
| "epoch": 0.336, |
| "grad_norm": 1.700406789779663, |
| "learning_rate": 0.00041145134491234425, |
| "loss": 5.513679027557373, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.3376, |
| "grad_norm": 1.5768215656280518, |
| "learning_rate": 0.00041040606533804025, |
| "loss": 5.65580940246582, |
| "step": 211 |
| }, |
| { |
| "epoch": 0.3392, |
| "grad_norm": 1.5992205142974854, |
| "learning_rate": 0.00040935599743717243, |
| "loss": 5.415986061096191, |
| "step": 212 |
| }, |
| { |
| "epoch": 0.3408, |
| "grad_norm": 2.1629347801208496, |
| "learning_rate": 0.00040830117255561294, |
| "loss": 5.394900321960449, |
| "step": 213 |
| }, |
| { |
| "epoch": 0.3424, |
| "grad_norm": 1.5803372859954834, |
| "learning_rate": 0.000407241622181236, |
| "loss": 5.085600852966309, |
| "step": 214 |
| }, |
| { |
| "epoch": 0.344, |
| "grad_norm": 1.4815354347229004, |
| "learning_rate": 0.0004061773779429776, |
| "loss": 5.647576332092285, |
| "step": 215 |
| }, |
| { |
| "epoch": 0.3456, |
| "grad_norm": 1.5663725137710571, |
| "learning_rate": 0.00040510847160989203, |
| "loss": 5.418036460876465, |
| "step": 216 |
| }, |
| { |
| "epoch": 0.3472, |
| "grad_norm": 1.7371917963027954, |
| "learning_rate": 0.00040403493509020275, |
| "loss": 5.280213356018066, |
| "step": 217 |
| }, |
| { |
| "epoch": 0.3488, |
| "grad_norm": 1.4984663724899292, |
| "learning_rate": 0.0004029568004303501, |
| "loss": 5.509110927581787, |
| "step": 218 |
| }, |
| { |
| "epoch": 0.3504, |
| "grad_norm": 1.5602787733078003, |
| "learning_rate": 0.0004018740998140352, |
| "loss": 5.608109951019287, |
| "step": 219 |
| }, |
| { |
| "epoch": 0.352, |
| "grad_norm": 1.6253869533538818, |
| "learning_rate": 0.0004007868655612586, |
| "loss": 5.198980331420898, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.3536, |
| "grad_norm": 2.013225555419922, |
| "learning_rate": 0.00039969513012735566, |
| "loss": 5.129229545593262, |
| "step": 221 |
| }, |
| { |
| "epoch": 0.3552, |
| "grad_norm": 1.4294469356536865, |
| "learning_rate": 0.00039859892610202786, |
| "loss": 5.616961479187012, |
| "step": 222 |
| }, |
| { |
| "epoch": 0.3568, |
| "grad_norm": 1.7147184610366821, |
| "learning_rate": 0.0003974982862083697, |
| "loss": 5.369600772857666, |
| "step": 223 |
| }, |
| { |
| "epoch": 0.3584, |
| "grad_norm": 1.6554255485534668, |
| "learning_rate": 0.00039639324330189234, |
| "loss": 5.445437431335449, |
| "step": 224 |
| }, |
| { |
| "epoch": 0.36, |
| "grad_norm": 2.799031972885132, |
| "learning_rate": 0.00039528383036954224, |
| "loss": 5.5256500244140625, |
| "step": 225 |
| }, |
| { |
| "epoch": 0.3616, |
| "grad_norm": 1.364023208618164, |
| "learning_rate": 0.00039417008052871684, |
| "loss": 5.256645202636719, |
| "step": 226 |
| }, |
| { |
| "epoch": 0.3632, |
| "grad_norm": 1.6340276002883911, |
| "learning_rate": 0.0003930520270262757, |
| "loss": 5.542902946472168, |
| "step": 227 |
| }, |
| { |
| "epoch": 0.3648, |
| "grad_norm": 1.289225459098816, |
| "learning_rate": 0.0003919297032375485, |
| "loss": 5.363834381103516, |
| "step": 228 |
| }, |
| { |
| "epoch": 0.3664, |
| "grad_norm": 1.7022228240966797, |
| "learning_rate": 0.00039080314266533826, |
| "loss": 5.533950328826904, |
| "step": 229 |
| }, |
| { |
| "epoch": 0.368, |
| "grad_norm": 1.5650995969772339, |
| "learning_rate": 0.00038967237893892134, |
| "loss": 5.173304557800293, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.3696, |
| "grad_norm": 1.7082035541534424, |
| "learning_rate": 0.00038853744581304376, |
| "loss": 5.347742080688477, |
| "step": 231 |
| }, |
| { |
| "epoch": 0.3712, |
| "grad_norm": 1.5300484895706177, |
| "learning_rate": 0.00038739837716691327, |
| "loss": 5.307585716247559, |
| "step": 232 |
| }, |
| { |
| "epoch": 0.3728, |
| "grad_norm": 1.4221162796020508, |
| "learning_rate": 0.0003862552070031886, |
| "loss": 5.390194892883301, |
| "step": 233 |
| }, |
| { |
| "epoch": 0.3744, |
| "grad_norm": 1.5934863090515137, |
| "learning_rate": 0.00038510796944696355, |
| "loss": 5.698745250701904, |
| "step": 234 |
| }, |
| { |
| "epoch": 0.376, |
| "grad_norm": 1.574376106262207, |
| "learning_rate": 0.00038395669874474915, |
| "loss": 5.695178508758545, |
| "step": 235 |
| }, |
| { |
| "epoch": 0.3776, |
| "grad_norm": 1.4545917510986328, |
| "learning_rate": 0.00038280142926345084, |
| "loss": 5.21755313873291, |
| "step": 236 |
| }, |
| { |
| "epoch": 0.3792, |
| "grad_norm": 1.6824661493301392, |
| "learning_rate": 0.0003816421954893428, |
| "loss": 5.816608428955078, |
| "step": 237 |
| }, |
| { |
| "epoch": 0.3808, |
| "grad_norm": 1.943800449371338, |
| "learning_rate": 0.0003804790320270384, |
| "loss": 5.530592441558838, |
| "step": 238 |
| }, |
| { |
| "epoch": 0.3824, |
| "grad_norm": 1.4291504621505737, |
| "learning_rate": 0.00037931197359845713, |
| "loss": 5.4604811668396, |
| "step": 239 |
| }, |
| { |
| "epoch": 0.384, |
| "grad_norm": 1.450872778892517, |
| "learning_rate": 0.00037814105504178853, |
| "loss": 5.420169353485107, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.3856, |
| "grad_norm": 1.431982159614563, |
| "learning_rate": 0.00037696631131045155, |
| "loss": 5.437797546386719, |
| "step": 241 |
| }, |
| { |
| "epoch": 0.3872, |
| "grad_norm": 1.5654010772705078, |
| "learning_rate": 0.00037578777747205173, |
| "loss": 5.542431354522705, |
| "step": 242 |
| }, |
| { |
| "epoch": 0.3888, |
| "grad_norm": 1.4680758714675903, |
| "learning_rate": 0.000374605488707334, |
| "loss": 5.8609299659729, |
| "step": 243 |
| }, |
| { |
| "epoch": 0.3904, |
| "grad_norm": 1.484171748161316, |
| "learning_rate": 0.0003734194803091329, |
| "loss": 5.2261762619018555, |
| "step": 244 |
| }, |
| { |
| "epoch": 0.392, |
| "grad_norm": 1.378163456916809, |
| "learning_rate": 0.00037222978768131857, |
| "loss": 5.523834228515625, |
| "step": 245 |
| }, |
| { |
| "epoch": 0.3936, |
| "grad_norm": 1.8471333980560303, |
| "learning_rate": 0.00037103644633774014, |
| "loss": 5.406384468078613, |
| "step": 246 |
| }, |
| { |
| "epoch": 0.3952, |
| "grad_norm": 1.4139055013656616, |
| "learning_rate": 0.00036983949190116575, |
| "loss": 5.400781631469727, |
| "step": 247 |
| }, |
| { |
| "epoch": 0.3968, |
| "grad_norm": 1.2311971187591553, |
| "learning_rate": 0.0003686389601022188, |
| "loss": 5.407512664794922, |
| "step": 248 |
| }, |
| { |
| "epoch": 0.3984, |
| "grad_norm": 1.7283658981323242, |
| "learning_rate": 0.0003674348867783115, |
| "loss": 5.575046062469482, |
| "step": 249 |
| }, |
| { |
| "epoch": 0.4, |
| "grad_norm": 1.3995170593261719, |
| "learning_rate": 0.0003662273078725754, |
| "loss": 5.523738384246826, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.4016, |
| "grad_norm": 1.3066350221633911, |
| "learning_rate": 0.00036501625943278804, |
| "loss": 5.64078426361084, |
| "step": 251 |
| }, |
| { |
| "epoch": 0.4032, |
| "grad_norm": 1.3789863586425781, |
| "learning_rate": 0.0003638017776102968, |
| "loss": 5.428204536437988, |
| "step": 252 |
| }, |
| { |
| "epoch": 0.4048, |
| "grad_norm": 1.721011757850647, |
| "learning_rate": 0.00036258389865894027, |
| "loss": 5.646852016448975, |
| "step": 253 |
| }, |
| { |
| "epoch": 0.4064, |
| "grad_norm": 1.7198848724365234, |
| "learning_rate": 0.0003613626589339652, |
| "loss": 5.864961624145508, |
| "step": 254 |
| }, |
| { |
| "epoch": 0.408, |
| "grad_norm": 1.8125197887420654, |
| "learning_rate": 0.00036013809489094246, |
| "loss": 5.502827167510986, |
| "step": 255 |
| }, |
| { |
| "epoch": 0.4096, |
| "grad_norm": 1.5398613214492798, |
| "learning_rate": 0.00035891024308467727, |
| "loss": 5.422593116760254, |
| "step": 256 |
| }, |
| { |
| "epoch": 0.4112, |
| "grad_norm": 1.2854444980621338, |
| "learning_rate": 0.0003576791401681194, |
| "loss": 5.769440650939941, |
| "step": 257 |
| }, |
| { |
| "epoch": 0.4128, |
| "grad_norm": 1.302415370941162, |
| "learning_rate": 0.0003564448228912682, |
| "loss": 5.568209171295166, |
| "step": 258 |
| }, |
| { |
| "epoch": 0.4144, |
| "grad_norm": 1.4718657732009888, |
| "learning_rate": 0.00035520732810007566, |
| "loss": 5.543675422668457, |
| "step": 259 |
| }, |
| { |
| "epoch": 0.416, |
| "grad_norm": 1.6336448192596436, |
| "learning_rate": 0.0003539666927353469, |
| "loss": 5.599291801452637, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.4176, |
| "grad_norm": 1.7621365785598755, |
| "learning_rate": 0.00035272295383163713, |
| "loss": 5.4962263107299805, |
| "step": 261 |
| }, |
| { |
| "epoch": 0.4192, |
| "grad_norm": 1.6452198028564453, |
| "learning_rate": 0.00035147614851614587, |
| "loss": 5.347473621368408, |
| "step": 262 |
| }, |
| { |
| "epoch": 0.4208, |
| "grad_norm": 1.3223097324371338, |
| "learning_rate": 0.00035022631400760944, |
| "loss": 5.4395928382873535, |
| "step": 263 |
| }, |
| { |
| "epoch": 0.4224, |
| "grad_norm": 1.178402304649353, |
| "learning_rate": 0.0003489734876151891, |
| "loss": 5.452559471130371, |
| "step": 264 |
| }, |
| { |
| "epoch": 0.424, |
| "grad_norm": 1.493491530418396, |
| "learning_rate": 0.0003477177067373579, |
| "loss": 5.549748420715332, |
| "step": 265 |
| }, |
| { |
| "epoch": 0.4256, |
| "grad_norm": 1.2983075380325317, |
| "learning_rate": 0.0003464590088607839, |
| "loss": 5.593997478485107, |
| "step": 266 |
| }, |
| { |
| "epoch": 0.4272, |
| "grad_norm": 1.4325454235076904, |
| "learning_rate": 0.00034519743155921127, |
| "loss": 5.567399978637695, |
| "step": 267 |
| }, |
| { |
| "epoch": 0.4288, |
| "grad_norm": 1.3392157554626465, |
| "learning_rate": 0.00034393301249233897, |
| "loss": 5.392118453979492, |
| "step": 268 |
| }, |
| { |
| "epoch": 0.4304, |
| "grad_norm": 1.543241262435913, |
| "learning_rate": 0.000342665789404696, |
| "loss": 5.2302565574646, |
| "step": 269 |
| }, |
| { |
| "epoch": 0.432, |
| "grad_norm": 1.5115416049957275, |
| "learning_rate": 0.00034139580012451523, |
| "loss": 5.704424858093262, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.4336, |
| "grad_norm": 1.3637906312942505, |
| "learning_rate": 0.0003401230825626037, |
| "loss": 5.522019863128662, |
| "step": 271 |
| }, |
| { |
| "epoch": 0.4352, |
| "grad_norm": 1.5312447547912598, |
| "learning_rate": 0.00033884767471121125, |
| "loss": 5.600247859954834, |
| "step": 272 |
| }, |
| { |
| "epoch": 0.4368, |
| "grad_norm": 1.467431664466858, |
| "learning_rate": 0.00033756961464289633, |
| "loss": 5.204289436340332, |
| "step": 273 |
| }, |
| { |
| "epoch": 0.4384, |
| "grad_norm": 1.352095603942871, |
| "learning_rate": 0.0003362889405093894, |
| "loss": 5.327722549438477, |
| "step": 274 |
| }, |
| { |
| "epoch": 0.44, |
| "grad_norm": 1.3652808666229248, |
| "learning_rate": 0.0003350056905404543, |
| "loss": 5.118766784667969, |
| "step": 275 |
| }, |
| { |
| "epoch": 0.4416, |
| "grad_norm": 1.6171950101852417, |
| "learning_rate": 0.00033371990304274655, |
| "loss": 5.259974479675293, |
| "step": 276 |
| }, |
| { |
| "epoch": 0.4432, |
| "grad_norm": 1.6351940631866455, |
| "learning_rate": 0.0003324316163986704, |
| "loss": 5.432730197906494, |
| "step": 277 |
| }, |
| { |
| "epoch": 0.4448, |
| "grad_norm": 1.6966768503189087, |
| "learning_rate": 0.00033114086906523265, |
| "loss": 5.381967544555664, |
| "step": 278 |
| }, |
| { |
| "epoch": 0.4464, |
| "grad_norm": 1.3781499862670898, |
| "learning_rate": 0.00032984769957289503, |
| "loss": 5.303073883056641, |
| "step": 279 |
| }, |
| { |
| "epoch": 0.448, |
| "grad_norm": 1.5721884965896606, |
| "learning_rate": 0.0003285521465244237, |
| "loss": 5.291014671325684, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.4496, |
| "grad_norm": 1.1372907161712646, |
| "learning_rate": 0.00032725424859373687, |
| "loss": 5.211060523986816, |
| "step": 281 |
| }, |
| { |
| "epoch": 0.4512, |
| "grad_norm": 1.293617844581604, |
| "learning_rate": 0.00032595404452475085, |
| "loss": 5.443847179412842, |
| "step": 282 |
| }, |
| { |
| "epoch": 0.4528, |
| "grad_norm": 2.1258699893951416, |
| "learning_rate": 0.0003246515731302228, |
| "loss": 5.064897537231445, |
| "step": 283 |
| }, |
| { |
| "epoch": 0.4544, |
| "grad_norm": 1.418958067893982, |
| "learning_rate": 0.00032334687329059264, |
| "loss": 5.420772552490234, |
| "step": 284 |
| }, |
| { |
| "epoch": 0.456, |
| "grad_norm": 1.2100834846496582, |
| "learning_rate": 0.0003220399839528222, |
| "loss": 5.425792217254639, |
| "step": 285 |
| }, |
| { |
| "epoch": 0.4576, |
| "grad_norm": 1.2931607961654663, |
| "learning_rate": 0.0003207309441292325, |
| "loss": 5.330716609954834, |
| "step": 286 |
| }, |
| { |
| "epoch": 0.4592, |
| "grad_norm": 1.4552083015441895, |
| "learning_rate": 0.0003194197928963396, |
| "loss": 5.734864234924316, |
| "step": 287 |
| }, |
| { |
| "epoch": 0.4608, |
| "grad_norm": 1.377821683883667, |
| "learning_rate": 0.00031810656939368744, |
| "loss": 5.4975361824035645, |
| "step": 288 |
| }, |
| { |
| "epoch": 0.4624, |
| "grad_norm": 1.3547130823135376, |
| "learning_rate": 0.0003167913128226803, |
| "loss": 5.421193599700928, |
| "step": 289 |
| }, |
| { |
| "epoch": 0.464, |
| "grad_norm": 1.4445191621780396, |
| "learning_rate": 0.0003154740624454118, |
| "loss": 5.138959884643555, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.4656, |
| "grad_norm": 1.3757892847061157, |
| "learning_rate": 0.00031415485758349345, |
| "loss": 5.1781840324401855, |
| "step": 291 |
| }, |
| { |
| "epoch": 0.4672, |
| "grad_norm": 1.2458899021148682, |
| "learning_rate": 0.0003128337376168805, |
| "loss": 4.89755916595459, |
| "step": 292 |
| }, |
| { |
| "epoch": 0.4688, |
| "grad_norm": 1.581918478012085, |
| "learning_rate": 0.00031151074198269656, |
| "loss": 5.327348709106445, |
| "step": 293 |
| }, |
| { |
| "epoch": 0.4704, |
| "grad_norm": 1.5751845836639404, |
| "learning_rate": 0.00031018591017405644, |
| "loss": 5.386034965515137, |
| "step": 294 |
| }, |
| { |
| "epoch": 0.472, |
| "grad_norm": 1.6921762228012085, |
| "learning_rate": 0.0003088592817388869, |
| "loss": 5.158099174499512, |
| "step": 295 |
| }, |
| { |
| "epoch": 0.4736, |
| "grad_norm": 1.62604820728302, |
| "learning_rate": 0.0003075308962787466, |
| "loss": 5.450359344482422, |
| "step": 296 |
| }, |
| { |
| "epoch": 0.4752, |
| "grad_norm": 1.2735328674316406, |
| "learning_rate": 0.00030620079344764327, |
| "loss": 5.264720439910889, |
| "step": 297 |
| }, |
| { |
| "epoch": 0.4768, |
| "grad_norm": 1.6045722961425781, |
| "learning_rate": 0.00030486901295085066, |
| "loss": 5.421563625335693, |
| "step": 298 |
| }, |
| { |
| "epoch": 0.4784, |
| "grad_norm": 1.4631224870681763, |
| "learning_rate": 0.0003035355945437228, |
| "loss": 5.549293041229248, |
| "step": 299 |
| }, |
| { |
| "epoch": 0.48, |
| "grad_norm": 1.34758460521698, |
| "learning_rate": 0.00030220057803050765, |
| "loss": 5.213095664978027, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.4816, |
| "grad_norm": 1.659041404724121, |
| "learning_rate": 0.0003008640032631585, |
| "loss": 5.40679931640625, |
| "step": 301 |
| }, |
| { |
| "epoch": 0.4832, |
| "grad_norm": 1.3234513998031616, |
| "learning_rate": 0.00029952591014014454, |
| "loss": 5.249087333679199, |
| "step": 302 |
| }, |
| { |
| "epoch": 0.4848, |
| "grad_norm": 1.2783095836639404, |
| "learning_rate": 0.0002981863386052599, |
| "loss": 5.571717262268066, |
| "step": 303 |
| }, |
| { |
| "epoch": 0.4864, |
| "grad_norm": 1.2698612213134766, |
| "learning_rate": 0.0002968453286464312, |
| "loss": 5.460443019866943, |
| "step": 304 |
| }, |
| { |
| "epoch": 0.488, |
| "grad_norm": 1.411340594291687, |
| "learning_rate": 0.00029550292029452375, |
| "loss": 5.521218776702881, |
| "step": 305 |
| }, |
| { |
| "epoch": 0.4896, |
| "grad_norm": 1.2482413053512573, |
| "learning_rate": 0.0002941591536221469, |
| "loss": 5.2962646484375, |
| "step": 306 |
| }, |
| { |
| "epoch": 0.4912, |
| "grad_norm": 1.3746726512908936, |
| "learning_rate": 0.0002928140687424573, |
| "loss": 5.614439964294434, |
| "step": 307 |
| }, |
| { |
| "epoch": 0.4928, |
| "grad_norm": 1.5684117078781128, |
| "learning_rate": 0.00029146770580796205, |
| "loss": 5.34489107131958, |
| "step": 308 |
| }, |
| { |
| "epoch": 0.4944, |
| "grad_norm": 1.8253686428070068, |
| "learning_rate": 0.00029012010500931965, |
| "loss": 5.56744384765625, |
| "step": 309 |
| }, |
| { |
| "epoch": 0.496, |
| "grad_norm": 1.4048644304275513, |
| "learning_rate": 0.00028877130657414054, |
| "loss": 5.361034393310547, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.4976, |
| "grad_norm": 1.3948677778244019, |
| "learning_rate": 0.0002874213507657861, |
| "loss": 5.47017240524292, |
| "step": 311 |
| }, |
| { |
| "epoch": 0.4992, |
| "grad_norm": 1.4963343143463135, |
| "learning_rate": 0.00028607027788216674, |
| "loss": 5.397054672241211, |
| "step": 312 |
| }, |
| { |
| "epoch": 0.5008, |
| "grad_norm": 1.3787459135055542, |
| "learning_rate": 0.00028471812825453914, |
| "loss": 5.223832607269287, |
| "step": 313 |
| }, |
| { |
| "epoch": 0.5024, |
| "grad_norm": 1.6353243589401245, |
| "learning_rate": 0.0002833649422463019, |
| "loss": 5.2796525955200195, |
| "step": 314 |
| }, |
| { |
| "epoch": 0.504, |
| "grad_norm": 1.3684626817703247, |
| "learning_rate": 0.0002820107602517913, |
| "loss": 5.421512126922607, |
| "step": 315 |
| }, |
| { |
| "epoch": 0.5056, |
| "grad_norm": 1.2275117635726929, |
| "learning_rate": 0.0002806556226950746, |
| "loss": 5.282046318054199, |
| "step": 316 |
| }, |
| { |
| "epoch": 0.5072, |
| "grad_norm": 1.5556248426437378, |
| "learning_rate": 0.00027929957002874436, |
| "loss": 5.28046178817749, |
| "step": 317 |
| }, |
| { |
| "epoch": 0.5088, |
| "grad_norm": 1.5862129926681519, |
| "learning_rate": 0.00027794264273270987, |
| "loss": 5.368446350097656, |
| "step": 318 |
| }, |
| { |
| "epoch": 0.5104, |
| "grad_norm": 1.4724379777908325, |
| "learning_rate": 0.00027658488131298946, |
| "loss": 5.535717010498047, |
| "step": 319 |
| }, |
| { |
| "epoch": 0.512, |
| "grad_norm": 1.257763147354126, |
| "learning_rate": 0.00027522632630050116, |
| "loss": 5.145805835723877, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.5136, |
| "grad_norm": 1.2067614793777466, |
| "learning_rate": 0.00027386701824985254, |
| "loss": 5.230715274810791, |
| "step": 321 |
| }, |
| { |
| "epoch": 0.5152, |
| "grad_norm": 1.3885655403137207, |
| "learning_rate": 0.00027250699773813066, |
| "loss": 5.397106170654297, |
| "step": 322 |
| }, |
| { |
| "epoch": 0.5168, |
| "grad_norm": 1.3261369466781616, |
| "learning_rate": 0.00027114630536369, |
| "loss": 5.118717193603516, |
| "step": 323 |
| }, |
| { |
| "epoch": 0.5184, |
| "grad_norm": 1.3677432537078857, |
| "learning_rate": 0.0002697849817449415, |
| "loss": 5.1717400550842285, |
| "step": 324 |
| }, |
| { |
| "epoch": 0.52, |
| "grad_norm": 1.476125955581665, |
| "learning_rate": 0.00026842306751913926, |
| "loss": 5.247461318969727, |
| "step": 325 |
| }, |
| { |
| "epoch": 0.5216, |
| "grad_norm": 1.4229127168655396, |
| "learning_rate": 0.0002670606033411678, |
| "loss": 5.157002925872803, |
| "step": 326 |
| }, |
| { |
| "epoch": 0.5232, |
| "grad_norm": 1.4474886655807495, |
| "learning_rate": 0.0002656976298823284, |
| "loss": 5.441634178161621, |
| "step": 327 |
| }, |
| { |
| "epoch": 0.5248, |
| "grad_norm": 1.4530051946640015, |
| "learning_rate": 0.00026433418782912505, |
| "loss": 5.526297569274902, |
| "step": 328 |
| }, |
| { |
| "epoch": 0.5264, |
| "grad_norm": 1.1628731489181519, |
| "learning_rate": 0.00026297031788205, |
| "loss": 5.242552280426025, |
| "step": 329 |
| }, |
| { |
| "epoch": 0.528, |
| "grad_norm": 1.245635747909546, |
| "learning_rate": 0.00026160606075436844, |
| "loss": 5.074901103973389, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.5296, |
| "grad_norm": 1.2995966672897339, |
| "learning_rate": 0.0002602414571709036, |
| "loss": 5.35468864440918, |
| "step": 331 |
| }, |
| { |
| "epoch": 0.5312, |
| "grad_norm": 1.1434332132339478, |
| "learning_rate": 0.00025887654786682076, |
| "loss": 5.233968257904053, |
| "step": 332 |
| }, |
| { |
| "epoch": 0.5328, |
| "grad_norm": 1.8108292818069458, |
| "learning_rate": 0.0002575113735864114, |
| "loss": 5.389377593994141, |
| "step": 333 |
| }, |
| { |
| "epoch": 0.5344, |
| "grad_norm": 1.7074164152145386, |
| "learning_rate": 0.0002561459750818769, |
| "loss": 5.581827163696289, |
| "step": 334 |
| }, |
| { |
| "epoch": 0.536, |
| "grad_norm": 1.5946106910705566, |
| "learning_rate": 0.0002547803931121119, |
| "loss": 5.279594898223877, |
| "step": 335 |
| }, |
| { |
| "epoch": 0.5376, |
| "grad_norm": 1.6184440851211548, |
| "learning_rate": 0.00025341466844148775, |
| "loss": 5.198509693145752, |
| "step": 336 |
| }, |
| { |
| "epoch": 0.5392, |
| "grad_norm": 1.2537761926651, |
| "learning_rate": 0.0002520488418386358, |
| "loss": 5.231502056121826, |
| "step": 337 |
| }, |
| { |
| "epoch": 0.5408, |
| "grad_norm": 1.4233760833740234, |
| "learning_rate": 0.00025068295407523, |
| "loss": 5.152407646179199, |
| "step": 338 |
| }, |
| { |
| "epoch": 0.5424, |
| "grad_norm": 1.2714813947677612, |
| "learning_rate": 0.00024931704592477, |
| "loss": 5.5605878829956055, |
| "step": 339 |
| }, |
| { |
| "epoch": 0.544, |
| "grad_norm": 1.2656306028366089, |
| "learning_rate": 0.0002479511581613642, |
| "loss": 5.457594394683838, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.5456, |
| "grad_norm": 1.1355462074279785, |
| "learning_rate": 0.00024658533155851227, |
| "loss": 5.645468711853027, |
| "step": 341 |
| }, |
| { |
| "epoch": 0.5472, |
| "grad_norm": 1.564833641052246, |
| "learning_rate": 0.0002452196068878881, |
| "loss": 5.560579299926758, |
| "step": 342 |
| }, |
| { |
| "epoch": 0.5488, |
| "grad_norm": 1.2560124397277832, |
| "learning_rate": 0.00024385402491812317, |
| "loss": 5.102597236633301, |
| "step": 343 |
| }, |
| { |
| "epoch": 0.5504, |
| "grad_norm": 1.47645103931427, |
| "learning_rate": 0.00024248862641358866, |
| "loss": 5.347832679748535, |
| "step": 344 |
| }, |
| { |
| "epoch": 0.552, |
| "grad_norm": 1.634925127029419, |
| "learning_rate": 0.00024112345213317933, |
| "loss": 5.229283332824707, |
| "step": 345 |
| }, |
| { |
| "epoch": 0.5536, |
| "grad_norm": 1.3205620050430298, |
| "learning_rate": 0.00023975854282909641, |
| "loss": 5.406874179840088, |
| "step": 346 |
| }, |
| { |
| "epoch": 0.5552, |
| "grad_norm": 1.5099257230758667, |
| "learning_rate": 0.00023839393924563162, |
| "loss": 5.050958156585693, |
| "step": 347 |
| }, |
| { |
| "epoch": 0.5568, |
| "grad_norm": 1.2842683792114258, |
| "learning_rate": 0.0002370296821179501, |
| "loss": 5.189534664154053, |
| "step": 348 |
| }, |
| { |
| "epoch": 0.5584, |
| "grad_norm": 1.1710087060928345, |
| "learning_rate": 0.00023566581217087493, |
| "loss": 5.227584362030029, |
| "step": 349 |
| }, |
| { |
| "epoch": 0.56, |
| "grad_norm": 1.3577237129211426, |
| "learning_rate": 0.00023430237011767165, |
| "loss": 5.079989433288574, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.5616, |
| "grad_norm": 1.2834707498550415, |
| "learning_rate": 0.00023293939665883229, |
| "loss": 5.309730052947998, |
| "step": 351 |
| }, |
| { |
| "epoch": 0.5632, |
| "grad_norm": 1.4233572483062744, |
| "learning_rate": 0.0002315769324808608, |
| "loss": 5.27959680557251, |
| "step": 352 |
| }, |
| { |
| "epoch": 0.5648, |
| "grad_norm": 1.7741755247116089, |
| "learning_rate": 0.00023021501825505847, |
| "loss": 5.245169162750244, |
| "step": 353 |
| }, |
| { |
| "epoch": 0.5664, |
| "grad_norm": 1.743356466293335, |
| "learning_rate": 0.00022885369463631, |
| "loss": 5.384469985961914, |
| "step": 354 |
| }, |
| { |
| "epoch": 0.568, |
| "grad_norm": 1.3255281448364258, |
| "learning_rate": 0.00022749300226186948, |
| "loss": 5.170154094696045, |
| "step": 355 |
| }, |
| { |
| "epoch": 0.5696, |
| "grad_norm": 1.3135267496109009, |
| "learning_rate": 0.0002261329817501475, |
| "loss": 5.177214622497559, |
| "step": 356 |
| }, |
| { |
| "epoch": 0.5712, |
| "grad_norm": 1.334771990776062, |
| "learning_rate": 0.00022477367369949885, |
| "loss": 5.129632472991943, |
| "step": 357 |
| }, |
| { |
| "epoch": 0.5728, |
| "grad_norm": 1.5144758224487305, |
| "learning_rate": 0.00022341511868701055, |
| "loss": 4.999809741973877, |
| "step": 358 |
| }, |
| { |
| "epoch": 0.5744, |
| "grad_norm": 1.2315837144851685, |
| "learning_rate": 0.0002220573572672902, |
| "loss": 5.348094940185547, |
| "step": 359 |
| }, |
| { |
| "epoch": 0.576, |
| "grad_norm": 1.4335271120071411, |
| "learning_rate": 0.00022070042997125567, |
| "loss": 5.095552444458008, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.5776, |
| "grad_norm": 1.6949377059936523, |
| "learning_rate": 0.00021934437730492543, |
| "loss": 5.0214924812316895, |
| "step": 361 |
| }, |
| { |
| "epoch": 0.5792, |
| "grad_norm": 1.3676100969314575, |
| "learning_rate": 0.00021798923974820884, |
| "loss": 5.584174633026123, |
| "step": 362 |
| }, |
| { |
| "epoch": 0.5808, |
| "grad_norm": 1.3218090534210205, |
| "learning_rate": 0.0002166350577536981, |
| "loss": 5.239519119262695, |
| "step": 363 |
| }, |
| { |
| "epoch": 0.5824, |
| "grad_norm": 1.3069649934768677, |
| "learning_rate": 0.00021528187174546092, |
| "loss": 5.372768402099609, |
| "step": 364 |
| }, |
| { |
| "epoch": 0.584, |
| "grad_norm": 1.3426185846328735, |
| "learning_rate": 0.00021392972211783332, |
| "loss": 5.219846248626709, |
| "step": 365 |
| }, |
| { |
| "epoch": 0.5856, |
| "grad_norm": 1.3411294221878052, |
| "learning_rate": 0.00021257864923421402, |
| "loss": 4.874852180480957, |
| "step": 366 |
| }, |
| { |
| "epoch": 0.5872, |
| "grad_norm": 1.4060436487197876, |
| "learning_rate": 0.00021122869342585948, |
| "loss": 5.2531046867370605, |
| "step": 367 |
| }, |
| { |
| "epoch": 0.5888, |
| "grad_norm": 1.192141056060791, |
| "learning_rate": 0.00020987989499068042, |
| "loss": 5.342706203460693, |
| "step": 368 |
| }, |
| { |
| "epoch": 0.5904, |
| "grad_norm": 1.3001792430877686, |
| "learning_rate": 0.00020853229419203807, |
| "loss": 5.323460578918457, |
| "step": 369 |
| }, |
| { |
| "epoch": 0.592, |
| "grad_norm": 1.4926820993423462, |
| "learning_rate": 0.0002071859312575427, |
| "loss": 5.296498775482178, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.5936, |
| "grad_norm": 1.434384822845459, |
| "learning_rate": 0.00020584084637785316, |
| "loss": 5.084543228149414, |
| "step": 371 |
| }, |
| { |
| "epoch": 0.5952, |
| "grad_norm": 2.288747787475586, |
| "learning_rate": 0.00020449707970547629, |
| "loss": 5.0905585289001465, |
| "step": 372 |
| }, |
| { |
| "epoch": 0.5968, |
| "grad_norm": 1.4251408576965332, |
| "learning_rate": 0.0002031546713535688, |
| "loss": 5.365981101989746, |
| "step": 373 |
| }, |
| { |
| "epoch": 0.5984, |
| "grad_norm": 1.317584753036499, |
| "learning_rate": 0.00020181366139474012, |
| "loss": 5.608163356781006, |
| "step": 374 |
| }, |
| { |
| "epoch": 0.6, |
| "grad_norm": 1.187654733657837, |
| "learning_rate": 0.00020047408985985552, |
| "loss": 4.876247406005859, |
| "step": 375 |
| }, |
| { |
| "epoch": 0.6016, |
| "grad_norm": 2.2563083171844482, |
| "learning_rate": 0.0001991359967368416, |
| "loss": 5.187510013580322, |
| "step": 376 |
| }, |
| { |
| "epoch": 0.6032, |
| "grad_norm": 1.282902479171753, |
| "learning_rate": 0.00019779942196949238, |
| "loss": 5.240813255310059, |
| "step": 377 |
| }, |
| { |
| "epoch": 0.6048, |
| "grad_norm": 1.345765471458435, |
| "learning_rate": 0.00019646440545627723, |
| "loss": 5.2197957038879395, |
| "step": 378 |
| }, |
| { |
| "epoch": 0.6064, |
| "grad_norm": 1.2917436361312866, |
| "learning_rate": 0.0001951309870491494, |
| "loss": 5.324549674987793, |
| "step": 379 |
| }, |
| { |
| "epoch": 0.608, |
| "grad_norm": 1.2900662422180176, |
| "learning_rate": 0.0001937992065523567, |
| "loss": 5.294788360595703, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.6096, |
| "grad_norm": 1.5021880865097046, |
| "learning_rate": 0.00019246910372125342, |
| "loss": 5.409048080444336, |
| "step": 381 |
| }, |
| { |
| "epoch": 0.6112, |
| "grad_norm": 1.6828486919403076, |
| "learning_rate": 0.0001911407182611131, |
| "loss": 5.392390251159668, |
| "step": 382 |
| }, |
| { |
| "epoch": 0.6128, |
| "grad_norm": 1.5416451692581177, |
| "learning_rate": 0.00018981408982594365, |
| "loss": 5.151852130889893, |
| "step": 383 |
| }, |
| { |
| "epoch": 0.6144, |
| "grad_norm": 1.2479101419448853, |
| "learning_rate": 0.00018848925801730342, |
| "loss": 5.193958759307861, |
| "step": 384 |
| }, |
| { |
| "epoch": 0.616, |
| "grad_norm": 1.471063256263733, |
| "learning_rate": 0.00018716626238311958, |
| "loss": 4.912611961364746, |
| "step": 385 |
| }, |
| { |
| "epoch": 0.6176, |
| "grad_norm": 1.285828948020935, |
| "learning_rate": 0.00018584514241650667, |
| "loss": 4.898399829864502, |
| "step": 386 |
| }, |
| { |
| "epoch": 0.6192, |
| "grad_norm": 1.8733534812927246, |
| "learning_rate": 0.0001845259375545882, |
| "loss": 4.954188346862793, |
| "step": 387 |
| }, |
| { |
| "epoch": 0.6208, |
| "grad_norm": 1.55088472366333, |
| "learning_rate": 0.00018320868717731977, |
| "loss": 5.39755916595459, |
| "step": 388 |
| }, |
| { |
| "epoch": 0.6224, |
| "grad_norm": 1.876209020614624, |
| "learning_rate": 0.00018189343060631257, |
| "loss": 5.461378574371338, |
| "step": 389 |
| }, |
| { |
| "epoch": 0.624, |
| "grad_norm": 1.192241907119751, |
| "learning_rate": 0.0001805802071036605, |
| "loss": 4.95612096786499, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.6256, |
| "grad_norm": 1.3008099794387817, |
| "learning_rate": 0.00017926905587076748, |
| "loss": 5.485091209411621, |
| "step": 391 |
| }, |
| { |
| "epoch": 0.6272, |
| "grad_norm": 1.7544057369232178, |
| "learning_rate": 0.00017796001604717787, |
| "loss": 4.80226993560791, |
| "step": 392 |
| }, |
| { |
| "epoch": 0.6288, |
| "grad_norm": 1.2537293434143066, |
| "learning_rate": 0.00017665312670940743, |
| "loss": 5.096302509307861, |
| "step": 393 |
| }, |
| { |
| "epoch": 0.6304, |
| "grad_norm": 1.1589773893356323, |
| "learning_rate": 0.0001753484268697772, |
| "loss": 5.296406269073486, |
| "step": 394 |
| }, |
| { |
| "epoch": 0.632, |
| "grad_norm": 1.2438563108444214, |
| "learning_rate": 0.0001740459554752492, |
| "loss": 5.258586406707764, |
| "step": 395 |
| }, |
| { |
| "epoch": 0.6336, |
| "grad_norm": 1.2174347639083862, |
| "learning_rate": 0.00017274575140626317, |
| "loss": 5.269428253173828, |
| "step": 396 |
| }, |
| { |
| "epoch": 0.6352, |
| "grad_norm": 1.4118070602416992, |
| "learning_rate": 0.00017144785347557643, |
| "loss": 4.895862579345703, |
| "step": 397 |
| }, |
| { |
| "epoch": 0.6368, |
| "grad_norm": 1.2514833211898804, |
| "learning_rate": 0.000170152300427105, |
| "loss": 5.026675701141357, |
| "step": 398 |
| }, |
| { |
| "epoch": 0.6384, |
| "grad_norm": 1.2788375616073608, |
| "learning_rate": 0.0001688591309347674, |
| "loss": 5.225519180297852, |
| "step": 399 |
| }, |
| { |
| "epoch": 0.64, |
| "grad_norm": 2.105532646179199, |
| "learning_rate": 0.00016756838360132968, |
| "loss": 4.846694469451904, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.6416, |
| "grad_norm": 1.5078997611999512, |
| "learning_rate": 0.00016628009695725346, |
| "loss": 5.365673065185547, |
| "step": 401 |
| }, |
| { |
| "epoch": 0.6432, |
| "grad_norm": 1.2744578123092651, |
| "learning_rate": 0.00016499430945954576, |
| "loss": 5.406460285186768, |
| "step": 402 |
| }, |
| { |
| "epoch": 0.6448, |
| "grad_norm": 1.494751214981079, |
| "learning_rate": 0.0001637110594906106, |
| "loss": 5.130960464477539, |
| "step": 403 |
| }, |
| { |
| "epoch": 0.6464, |
| "grad_norm": 1.4280880689620972, |
| "learning_rate": 0.00016243038535710365, |
| "loss": 5.194888114929199, |
| "step": 404 |
| }, |
| { |
| "epoch": 0.648, |
| "grad_norm": 1.2241549491882324, |
| "learning_rate": 0.00016115232528878876, |
| "loss": 4.969592571258545, |
| "step": 405 |
| }, |
| { |
| "epoch": 0.6496, |
| "grad_norm": 1.2147563695907593, |
| "learning_rate": 0.00015987691743739636, |
| "loss": 5.176176071166992, |
| "step": 406 |
| }, |
| { |
| "epoch": 0.6512, |
| "grad_norm": 1.1825661659240723, |
| "learning_rate": 0.00015860419987548486, |
| "loss": 5.012125015258789, |
| "step": 407 |
| }, |
| { |
| "epoch": 0.6528, |
| "grad_norm": 1.3765822649002075, |
| "learning_rate": 0.00015733421059530397, |
| "loss": 5.192166328430176, |
| "step": 408 |
| }, |
| { |
| "epoch": 0.6544, |
| "grad_norm": 1.455336332321167, |
| "learning_rate": 0.00015606698750766107, |
| "loss": 5.153839111328125, |
| "step": 409 |
| }, |
| { |
| "epoch": 0.656, |
| "grad_norm": 1.2426291704177856, |
| "learning_rate": 0.00015480256844078877, |
| "loss": 5.300335884094238, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.6576, |
| "grad_norm": 1.2273467779159546, |
| "learning_rate": 0.00015354099113921613, |
| "loss": 5.370866775512695, |
| "step": 411 |
| }, |
| { |
| "epoch": 0.6592, |
| "grad_norm": 1.3272308111190796, |
| "learning_rate": 0.0001522822932626421, |
| "loss": 5.237664699554443, |
| "step": 412 |
| }, |
| { |
| "epoch": 0.6608, |
| "grad_norm": 1.486881136894226, |
| "learning_rate": 0.00015102651238481092, |
| "loss": 5.199460029602051, |
| "step": 413 |
| }, |
| { |
| "epoch": 0.6624, |
| "grad_norm": 1.225791096687317, |
| "learning_rate": 0.0001497736859923906, |
| "loss": 5.001354217529297, |
| "step": 414 |
| }, |
| { |
| "epoch": 0.664, |
| "grad_norm": 1.1577017307281494, |
| "learning_rate": 0.00014852385148385412, |
| "loss": 4.978085517883301, |
| "step": 415 |
| }, |
| { |
| "epoch": 0.6656, |
| "grad_norm": 1.1296128034591675, |
| "learning_rate": 0.00014727704616836296, |
| "loss": 5.08205509185791, |
| "step": 416 |
| }, |
| { |
| "epoch": 0.6672, |
| "grad_norm": 1.450363278388977, |
| "learning_rate": 0.00014603330726465315, |
| "loss": 5.209231853485107, |
| "step": 417 |
| }, |
| { |
| "epoch": 0.6688, |
| "grad_norm": 1.1756222248077393, |
| "learning_rate": 0.00014479267189992435, |
| "loss": 5.059493064880371, |
| "step": 418 |
| }, |
| { |
| "epoch": 0.6704, |
| "grad_norm": 1.3998825550079346, |
| "learning_rate": 0.00014355517710873183, |
| "loss": 4.99937629699707, |
| "step": 419 |
| }, |
| { |
| "epoch": 0.672, |
| "grad_norm": 1.3438893556594849, |
| "learning_rate": 0.00014232085983188064, |
| "loss": 5.317448616027832, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.6736, |
| "grad_norm": 1.080320119857788, |
| "learning_rate": 0.00014108975691532271, |
| "loss": 5.1715264320373535, |
| "step": 421 |
| }, |
| { |
| "epoch": 0.6752, |
| "grad_norm": 1.2611881494522095, |
| "learning_rate": 0.00013986190510905758, |
| "loss": 4.58638858795166, |
| "step": 422 |
| }, |
| { |
| "epoch": 0.6768, |
| "grad_norm": 1.2457435131072998, |
| "learning_rate": 0.0001386373410660347, |
| "loss": 4.950125217437744, |
| "step": 423 |
| }, |
| { |
| "epoch": 0.6784, |
| "grad_norm": 1.7552827596664429, |
| "learning_rate": 0.00013741610134105983, |
| "loss": 5.444072723388672, |
| "step": 424 |
| }, |
| { |
| "epoch": 0.68, |
| "grad_norm": 1.21152925491333, |
| "learning_rate": 0.0001361982223897032, |
| "loss": 5.073456287384033, |
| "step": 425 |
| }, |
| { |
| "epoch": 0.6816, |
| "grad_norm": 1.5059016942977905, |
| "learning_rate": 0.00013498374056721197, |
| "loss": 5.584665298461914, |
| "step": 426 |
| }, |
| { |
| "epoch": 0.6832, |
| "grad_norm": 1.4177290201187134, |
| "learning_rate": 0.00013377269212742457, |
| "loss": 5.289451599121094, |
| "step": 427 |
| }, |
| { |
| "epoch": 0.6848, |
| "grad_norm": 1.4181674718856812, |
| "learning_rate": 0.0001325651132216886, |
| "loss": 4.7561540603637695, |
| "step": 428 |
| }, |
| { |
| "epoch": 0.6864, |
| "grad_norm": 1.1193443536758423, |
| "learning_rate": 0.00013136103989778137, |
| "loss": 5.055768013000488, |
| "step": 429 |
| }, |
| { |
| "epoch": 0.688, |
| "grad_norm": 1.1662368774414062, |
| "learning_rate": 0.00013016050809883434, |
| "loss": 4.925864219665527, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.6896, |
| "grad_norm": 1.188244104385376, |
| "learning_rate": 0.00012896355366225998, |
| "loss": 4.825364589691162, |
| "step": 431 |
| }, |
| { |
| "epoch": 0.6912, |
| "grad_norm": 1.4330700635910034, |
| "learning_rate": 0.00012777021231868144, |
| "loss": 5.1424055099487305, |
| "step": 432 |
| }, |
| { |
| "epoch": 0.6928, |
| "grad_norm": 1.5289138555526733, |
| "learning_rate": 0.00012658051969086713, |
| "loss": 5.1443772315979, |
| "step": 433 |
| }, |
| { |
| "epoch": 0.6944, |
| "grad_norm": 1.455989122390747, |
| "learning_rate": 0.00012539451129266603, |
| "loss": 4.967620849609375, |
| "step": 434 |
| }, |
| { |
| "epoch": 0.696, |
| "grad_norm": 1.36936354637146, |
| "learning_rate": 0.00012421222252794833, |
| "loss": 5.1624908447265625, |
| "step": 435 |
| }, |
| { |
| "epoch": 0.6976, |
| "grad_norm": 1.3274517059326172, |
| "learning_rate": 0.0001230336886895485, |
| "loss": 5.160506725311279, |
| "step": 436 |
| }, |
| { |
| "epoch": 0.6992, |
| "grad_norm": 1.3301618099212646, |
| "learning_rate": 0.0001218589449582116, |
| "loss": 4.8344645500183105, |
| "step": 437 |
| }, |
| { |
| "epoch": 0.7008, |
| "grad_norm": 1.4845178127288818, |
| "learning_rate": 0.00012068802640154292, |
| "loss": 4.987344264984131, |
| "step": 438 |
| }, |
| { |
| "epoch": 0.7024, |
| "grad_norm": 1.2381513118743896, |
| "learning_rate": 0.00011952096797296167, |
| "loss": 4.904998779296875, |
| "step": 439 |
| }, |
| { |
| "epoch": 0.704, |
| "grad_norm": 1.395328402519226, |
| "learning_rate": 0.00011835780451065722, |
| "loss": 4.8166656494140625, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.7056, |
| "grad_norm": 1.9488160610198975, |
| "learning_rate": 0.00011719857073654922, |
| "loss": 5.329633712768555, |
| "step": 441 |
| }, |
| { |
| "epoch": 0.7072, |
| "grad_norm": 1.4535843133926392, |
| "learning_rate": 0.00011604330125525078, |
| "loss": 4.918258190155029, |
| "step": 442 |
| }, |
| { |
| "epoch": 0.7088, |
| "grad_norm": 1.4393301010131836, |
| "learning_rate": 0.00011489203055303646, |
| "loss": 5.293149471282959, |
| "step": 443 |
| }, |
| { |
| "epoch": 0.7104, |
| "grad_norm": 1.5147560834884644, |
| "learning_rate": 0.00011374479299681142, |
| "loss": 5.193087100982666, |
| "step": 444 |
| }, |
| { |
| "epoch": 0.712, |
| "grad_norm": 1.9008417129516602, |
| "learning_rate": 0.00011260162283308678, |
| "loss": 5.060847282409668, |
| "step": 445 |
| }, |
| { |
| "epoch": 0.7136, |
| "grad_norm": 1.42693030834198, |
| "learning_rate": 0.00011146255418695633, |
| "loss": 5.017470836639404, |
| "step": 446 |
| }, |
| { |
| "epoch": 0.7152, |
| "grad_norm": 1.3155730962753296, |
| "learning_rate": 0.00011032762106107872, |
| "loss": 5.276302337646484, |
| "step": 447 |
| }, |
| { |
| "epoch": 0.7168, |
| "grad_norm": 1.414832592010498, |
| "learning_rate": 0.00010919685733466175, |
| "loss": 5.105321884155273, |
| "step": 448 |
| }, |
| { |
| "epoch": 0.7184, |
| "grad_norm": 1.4331352710723877, |
| "learning_rate": 0.00010807029676245145, |
| "loss": 5.178823471069336, |
| "step": 449 |
| }, |
| { |
| "epoch": 0.72, |
| "grad_norm": 2.958193302154541, |
| "learning_rate": 0.00010694797297372433, |
| "loss": 5.053134918212891, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.7216, |
| "grad_norm": 1.4789056777954102, |
| "learning_rate": 0.00010582991947128323, |
| "loss": 5.253017425537109, |
| "step": 451 |
| }, |
| { |
| "epoch": 0.7232, |
| "grad_norm": 1.4438488483428955, |
| "learning_rate": 0.00010471616963045788, |
| "loss": 4.795893669128418, |
| "step": 452 |
| }, |
| { |
| "epoch": 0.7248, |
| "grad_norm": 1.0840559005737305, |
| "learning_rate": 0.00010360675669810765, |
| "loss": 4.984047889709473, |
| "step": 453 |
| }, |
| { |
| "epoch": 0.7264, |
| "grad_norm": 1.0405324697494507, |
| "learning_rate": 0.00010250171379163034, |
| "loss": 5.2449116706848145, |
| "step": 454 |
| }, |
| { |
| "epoch": 0.728, |
| "grad_norm": 1.5884569883346558, |
| "learning_rate": 0.00010140107389797223, |
| "loss": 4.744875907897949, |
| "step": 455 |
| }, |
| { |
| "epoch": 0.7296, |
| "grad_norm": 1.3832892179489136, |
| "learning_rate": 0.00010030486987264437, |
| "loss": 5.204304218292236, |
| "step": 456 |
| }, |
| { |
| "epoch": 0.7312, |
| "grad_norm": 1.5350919961929321, |
| "learning_rate": 9.921313443874142e-05, |
| "loss": 4.8627400398254395, |
| "step": 457 |
| }, |
| { |
| "epoch": 0.7328, |
| "grad_norm": 1.3951729536056519, |
| "learning_rate": 9.812590018596485e-05, |
| "loss": 4.816617488861084, |
| "step": 458 |
| }, |
| { |
| "epoch": 0.7344, |
| "grad_norm": 1.4187312126159668, |
| "learning_rate": 9.704319956964996e-05, |
| "loss": 5.244232654571533, |
| "step": 459 |
| }, |
| { |
| "epoch": 0.736, |
| "grad_norm": 1.9965143203735352, |
| "learning_rate": 9.596506490979737e-05, |
| "loss": 5.668506145477295, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.7376, |
| "grad_norm": 1.6400834321975708, |
| "learning_rate": 9.489152839010798e-05, |
| "loss": 5.365629196166992, |
| "step": 461 |
| }, |
| { |
| "epoch": 0.7392, |
| "grad_norm": 1.442253828048706, |
| "learning_rate": 9.382262205702247e-05, |
| "loss": 5.322830677032471, |
| "step": 462 |
| }, |
| { |
| "epoch": 0.7408, |
| "grad_norm": 1.1397078037261963, |
| "learning_rate": 9.275837781876404e-05, |
| "loss": 5.002555847167969, |
| "step": 463 |
| }, |
| { |
| "epoch": 0.7424, |
| "grad_norm": 1.4520896673202515, |
| "learning_rate": 9.16988274443871e-05, |
| "loss": 5.138970375061035, |
| "step": 464 |
| }, |
| { |
| "epoch": 0.744, |
| "grad_norm": 1.3373026847839355, |
| "learning_rate": 9.064400256282756e-05, |
| "loss": 5.060115814208984, |
| "step": 465 |
| }, |
| { |
| "epoch": 0.7456, |
| "grad_norm": 1.3698216676712036, |
| "learning_rate": 8.959393466195972e-05, |
| "loss": 5.160407066345215, |
| "step": 466 |
| }, |
| { |
| "epoch": 0.7472, |
| "grad_norm": 1.45284104347229, |
| "learning_rate": 8.854865508765577e-05, |
| "loss": 4.794371604919434, |
| "step": 467 |
| }, |
| { |
| "epoch": 0.7488, |
| "grad_norm": 1.2445486783981323, |
| "learning_rate": 8.750819504285015e-05, |
| "loss": 4.926098823547363, |
| "step": 468 |
| }, |
| { |
| "epoch": 0.7504, |
| "grad_norm": 1.5558010339736938, |
| "learning_rate": 8.647258558660828e-05, |
| "loss": 5.0971245765686035, |
| "step": 469 |
| }, |
| { |
| "epoch": 0.752, |
| "grad_norm": 1.5887895822525024, |
| "learning_rate": 8.544185763319925e-05, |
| "loss": 5.4126152992248535, |
| "step": 470 |
| }, |
| { |
| "epoch": 0.7536, |
| "grad_norm": 1.1927727460861206, |
| "learning_rate": 8.441604195117314e-05, |
| "loss": 4.76765251159668, |
| "step": 471 |
| }, |
| { |
| "epoch": 0.7552, |
| "grad_norm": 1.1783281564712524, |
| "learning_rate": 8.339516916244216e-05, |
| "loss": 5.2575907707214355, |
| "step": 472 |
| }, |
| { |
| "epoch": 0.7568, |
| "grad_norm": 1.4256731271743774, |
| "learning_rate": 8.237926974136715e-05, |
| "loss": 4.811319351196289, |
| "step": 473 |
| }, |
| { |
| "epoch": 0.7584, |
| "grad_norm": 1.1950210332870483, |
| "learning_rate": 8.136837401384733e-05, |
| "loss": 5.229648590087891, |
| "step": 474 |
| }, |
| { |
| "epoch": 0.76, |
| "grad_norm": 1.409590721130371, |
| "learning_rate": 8.036251215641546e-05, |
| "loss": 5.007275104522705, |
| "step": 475 |
| }, |
| { |
| "epoch": 0.7616, |
| "grad_norm": 1.3664684295654297, |
| "learning_rate": 7.936171419533653e-05, |
| "loss": 5.1865339279174805, |
| "step": 476 |
| }, |
| { |
| "epoch": 0.7632, |
| "grad_norm": 1.272782564163208, |
| "learning_rate": 7.836601000571197e-05, |
| "loss": 5.0746636390686035, |
| "step": 477 |
| }, |
| { |
| "epoch": 0.7648, |
| "grad_norm": 1.430291771888733, |
| "learning_rate": 7.737542931058755e-05, |
| "loss": 5.309817790985107, |
| "step": 478 |
| }, |
| { |
| "epoch": 0.7664, |
| "grad_norm": 1.391274094581604, |
| "learning_rate": 7.63900016800663e-05, |
| "loss": 4.913700103759766, |
| "step": 479 |
| }, |
| { |
| "epoch": 0.768, |
| "grad_norm": 1.8367639780044556, |
| "learning_rate": 7.54097565304252e-05, |
| "loss": 4.870950222015381, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.7696, |
| "grad_norm": 1.5375534296035767, |
| "learning_rate": 7.443472312323824e-05, |
| "loss": 5.078888893127441, |
| "step": 481 |
| }, |
| { |
| "epoch": 0.7712, |
| "grad_norm": 1.3212310075759888, |
| "learning_rate": 7.346493056450157e-05, |
| "loss": 4.916213512420654, |
| "step": 482 |
| }, |
| { |
| "epoch": 0.7728, |
| "grad_norm": 1.4506617784500122, |
| "learning_rate": 7.250040780376577e-05, |
| "loss": 4.79956579208374, |
| "step": 483 |
| }, |
| { |
| "epoch": 0.7744, |
| "grad_norm": 1.269956350326538, |
| "learning_rate": 7.154118363327075e-05, |
| "loss": 5.207999229431152, |
| "step": 484 |
| }, |
| { |
| "epoch": 0.776, |
| "grad_norm": 1.386398196220398, |
| "learning_rate": 7.058728668708727e-05, |
| "loss": 4.866647720336914, |
| "step": 485 |
| }, |
| { |
| "epoch": 0.7776, |
| "grad_norm": 1.2891589403152466, |
| "learning_rate": 6.963874544026109e-05, |
| "loss": 5.038686752319336, |
| "step": 486 |
| }, |
| { |
| "epoch": 0.7792, |
| "grad_norm": 1.2647722959518433, |
| "learning_rate": 6.869558820796376e-05, |
| "loss": 5.102810859680176, |
| "step": 487 |
| }, |
| { |
| "epoch": 0.7808, |
| "grad_norm": 1.2693649530410767, |
| "learning_rate": 6.775784314464717e-05, |
| "loss": 4.887539863586426, |
| "step": 488 |
| }, |
| { |
| "epoch": 0.7824, |
| "grad_norm": 1.6362860202789307, |
| "learning_rate": 6.68255382432027e-05, |
| "loss": 4.774933338165283, |
| "step": 489 |
| }, |
| { |
| "epoch": 0.784, |
| "grad_norm": 1.5527857542037964, |
| "learning_rate": 6.589870133412626e-05, |
| "loss": 5.0828680992126465, |
| "step": 490 |
| }, |
| { |
| "epoch": 0.7856, |
| "grad_norm": 1.6107929944992065, |
| "learning_rate": 6.497736008468701e-05, |
| "loss": 4.6461639404296875, |
| "step": 491 |
| }, |
| { |
| "epoch": 0.7872, |
| "grad_norm": 1.12363862991333, |
| "learning_rate": 6.406154199810179e-05, |
| "loss": 5.033900260925293, |
| "step": 492 |
| }, |
| { |
| "epoch": 0.7888, |
| "grad_norm": 1.1499987840652466, |
| "learning_rate": 6.315127441271368e-05, |
| "loss": 4.9476094245910645, |
| "step": 493 |
| }, |
| { |
| "epoch": 0.7904, |
| "grad_norm": 1.5613439083099365, |
| "learning_rate": 6.224658450117637e-05, |
| "loss": 5.146108150482178, |
| "step": 494 |
| }, |
| { |
| "epoch": 0.792, |
| "grad_norm": 1.2324504852294922, |
| "learning_rate": 6.134749926964289e-05, |
| "loss": 4.819706916809082, |
| "step": 495 |
| }, |
| { |
| "epoch": 0.7936, |
| "grad_norm": 1.1125681400299072, |
| "learning_rate": 6.0454045556959356e-05, |
| "loss": 4.930054664611816, |
| "step": 496 |
| }, |
| { |
| "epoch": 0.7952, |
| "grad_norm": 1.6992604732513428, |
| "learning_rate": 5.9566250033863567e-05, |
| "loss": 5.198884963989258, |
| "step": 497 |
| }, |
| { |
| "epoch": 0.7968, |
| "grad_norm": 1.920567512512207, |
| "learning_rate": 5.8684139202189654e-05, |
| "loss": 5.21380615234375, |
| "step": 498 |
| }, |
| { |
| "epoch": 0.7984, |
| "grad_norm": 1.3954874277114868, |
| "learning_rate": 5.780773939407585e-05, |
| "loss": 4.928266525268555, |
| "step": 499 |
| }, |
| { |
| "epoch": 0.8, |
| "grad_norm": 1.4884490966796875, |
| "learning_rate": 5.693707677117943e-05, |
| "loss": 5.14831018447876, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.8016, |
| "grad_norm": 1.7664364576339722, |
| "learning_rate": 5.607217732389502e-05, |
| "loss": 5.231863975524902, |
| "step": 501 |
| }, |
| { |
| "epoch": 0.8032, |
| "grad_norm": 1.1272830963134766, |
| "learning_rate": 5.5213066870579476e-05, |
| "loss": 5.004734039306641, |
| "step": 502 |
| }, |
| { |
| "epoch": 0.8048, |
| "grad_norm": 1.2964353561401367, |
| "learning_rate": 5.4359771056780333e-05, |
| "loss": 4.362703323364258, |
| "step": 503 |
| }, |
| { |
| "epoch": 0.8064, |
| "grad_norm": 1.3352986574172974, |
| "learning_rate": 5.3512315354470956e-05, |
| "loss": 4.99576473236084, |
| "step": 504 |
| }, |
| { |
| "epoch": 0.808, |
| "grad_norm": 1.4980597496032715, |
| "learning_rate": 5.267072506128981e-05, |
| "loss": 5.139542579650879, |
| "step": 505 |
| }, |
| { |
| "epoch": 0.8096, |
| "grad_norm": 1.1959021091461182, |
| "learning_rate": 5.183502529978548e-05, |
| "loss": 5.123270034790039, |
| "step": 506 |
| }, |
| { |
| "epoch": 0.8112, |
| "grad_norm": 1.3239198923110962, |
| "learning_rate": 5.10052410166664e-05, |
| "loss": 5.379024028778076, |
| "step": 507 |
| }, |
| { |
| "epoch": 0.8128, |
| "grad_norm": 1.204946756362915, |
| "learning_rate": 5.018139698205665e-05, |
| "loss": 5.012156963348389, |
| "step": 508 |
| }, |
| { |
| "epoch": 0.8144, |
| "grad_norm": 1.5109254121780396, |
| "learning_rate": 4.9363517788756195e-05, |
| "loss": 4.902032852172852, |
| "step": 509 |
| }, |
| { |
| "epoch": 0.816, |
| "grad_norm": 1.1028631925582886, |
| "learning_rate": 4.855162785150674e-05, |
| "loss": 5.165895938873291, |
| "step": 510 |
| }, |
| { |
| "epoch": 0.8176, |
| "grad_norm": 1.042698860168457, |
| "learning_rate": 4.7745751406263163e-05, |
| "loss": 4.897646427154541, |
| "step": 511 |
| }, |
| { |
| "epoch": 0.8192, |
| "grad_norm": 1.2713276147842407, |
| "learning_rate": 4.694591250946983e-05, |
| "loss": 4.820833206176758, |
| "step": 512 |
| }, |
| { |
| "epoch": 0.8208, |
| "grad_norm": 1.1189286708831787, |
| "learning_rate": 4.615213503734267e-05, |
| "loss": 4.981866836547852, |
| "step": 513 |
| }, |
| { |
| "epoch": 0.8224, |
| "grad_norm": 1.3545044660568237, |
| "learning_rate": 4.536444268515608e-05, |
| "loss": 4.901456832885742, |
| "step": 514 |
| }, |
| { |
| "epoch": 0.824, |
| "grad_norm": 1.3025493621826172, |
| "learning_rate": 4.458285896653602e-05, |
| "loss": 5.010705947875977, |
| "step": 515 |
| }, |
| { |
| "epoch": 0.8256, |
| "grad_norm": 1.5655075311660767, |
| "learning_rate": 4.380740721275786e-05, |
| "loss": 5.438045501708984, |
| "step": 516 |
| }, |
| { |
| "epoch": 0.8272, |
| "grad_norm": 1.4804078340530396, |
| "learning_rate": 4.303811057205007e-05, |
| "loss": 4.864298343658447, |
| "step": 517 |
| }, |
| { |
| "epoch": 0.8288, |
| "grad_norm": 1.3067195415496826, |
| "learning_rate": 4.227499200890275e-05, |
| "loss": 5.399082183837891, |
| "step": 518 |
| }, |
| { |
| "epoch": 0.8304, |
| "grad_norm": 1.3728652000427246, |
| "learning_rate": 4.1518074303383004e-05, |
| "loss": 4.861635684967041, |
| "step": 519 |
| }, |
| { |
| "epoch": 0.832, |
| "grad_norm": 1.0616425275802612, |
| "learning_rate": 4.076738005045394e-05, |
| "loss": 5.093954563140869, |
| "step": 520 |
| }, |
| { |
| "epoch": 0.8336, |
| "grad_norm": 1.2632859945297241, |
| "learning_rate": 4.002293165930088e-05, |
| "loss": 5.069172382354736, |
| "step": 521 |
| }, |
| { |
| "epoch": 0.8352, |
| "grad_norm": 1.54668390750885, |
| "learning_rate": 3.9284751352662045e-05, |
| "loss": 5.132449150085449, |
| "step": 522 |
| }, |
| { |
| "epoch": 0.8368, |
| "grad_norm": 1.4716906547546387, |
| "learning_rate": 3.855286116616541e-05, |
| "loss": 4.952608585357666, |
| "step": 523 |
| }, |
| { |
| "epoch": 0.8384, |
| "grad_norm": 1.315252423286438, |
| "learning_rate": 3.782728294767068e-05, |
| "loss": 4.983213424682617, |
| "step": 524 |
| }, |
| { |
| "epoch": 0.84, |
| "grad_norm": 1.4445892572402954, |
| "learning_rate": 3.7108038356617305e-05, |
| "loss": 5.154409885406494, |
| "step": 525 |
| }, |
| { |
| "epoch": 0.8416, |
| "grad_norm": 1.3014910221099854, |
| "learning_rate": 3.6395148863377855e-05, |
| "loss": 4.867927551269531, |
| "step": 526 |
| }, |
| { |
| "epoch": 0.8432, |
| "grad_norm": 1.1832693815231323, |
| "learning_rate": 3.568863574861708e-05, |
| "loss": 4.7219462394714355, |
| "step": 527 |
| }, |
| { |
| "epoch": 0.8448, |
| "grad_norm": 1.38213312625885, |
| "learning_rate": 3.49885201026566e-05, |
| "loss": 4.771894931793213, |
| "step": 528 |
| }, |
| { |
| "epoch": 0.8464, |
| "grad_norm": 1.2693217992782593, |
| "learning_rate": 3.4294822824845444e-05, |
| "loss": 4.964877128601074, |
| "step": 529 |
| }, |
| { |
| "epoch": 0.848, |
| "grad_norm": 1.170465350151062, |
| "learning_rate": 3.3607564622936207e-05, |
| "loss": 4.916166305541992, |
| "step": 530 |
| }, |
| { |
| "epoch": 0.8496, |
| "grad_norm": 1.267838716506958, |
| "learning_rate": 3.292676601246661e-05, |
| "loss": 5.243579387664795, |
| "step": 531 |
| }, |
| { |
| "epoch": 0.8512, |
| "grad_norm": 1.3622010946273804, |
| "learning_rate": 3.2252447316147456e-05, |
| "loss": 4.598936080932617, |
| "step": 532 |
| }, |
| { |
| "epoch": 0.8528, |
| "grad_norm": 1.5820192098617554, |
| "learning_rate": 3.1584628663255847e-05, |
| "loss": 5.2594170570373535, |
| "step": 533 |
| }, |
| { |
| "epoch": 0.8544, |
| "grad_norm": 1.5312021970748901, |
| "learning_rate": 3.092332998903416e-05, |
| "loss": 5.157290935516357, |
| "step": 534 |
| }, |
| { |
| "epoch": 0.856, |
| "grad_norm": 1.4027749300003052, |
| "learning_rate": 3.0268571034094944e-05, |
| "loss": 5.125532150268555, |
| "step": 535 |
| }, |
| { |
| "epoch": 0.8576, |
| "grad_norm": 1.1611146926879883, |
| "learning_rate": 2.962037134383211e-05, |
| "loss": 5.000718593597412, |
| "step": 536 |
| }, |
| { |
| "epoch": 0.8592, |
| "grad_norm": 1.3523814678192139, |
| "learning_rate": 2.8978750267836752e-05, |
| "loss": 4.671696662902832, |
| "step": 537 |
| }, |
| { |
| "epoch": 0.8608, |
| "grad_norm": 1.2509510517120361, |
| "learning_rate": 2.8343726959320082e-05, |
| "loss": 5.075153350830078, |
| "step": 538 |
| }, |
| { |
| "epoch": 0.8624, |
| "grad_norm": 1.3108588457107544, |
| "learning_rate": 2.7715320374541357e-05, |
| "loss": 4.994152545928955, |
| "step": 539 |
| }, |
| { |
| "epoch": 0.864, |
| "grad_norm": 1.1837953329086304, |
| "learning_rate": 2.7093549272242445e-05, |
| "loss": 5.121654510498047, |
| "step": 540 |
| }, |
| { |
| "epoch": 0.8656, |
| "grad_norm": 1.5410609245300293, |
| "learning_rate": 2.6478432213087213e-05, |
| "loss": 4.955600738525391, |
| "step": 541 |
| }, |
| { |
| "epoch": 0.8672, |
| "grad_norm": 1.0305265188217163, |
| "learning_rate": 2.5869987559107992e-05, |
| "loss": 5.132237911224365, |
| "step": 542 |
| }, |
| { |
| "epoch": 0.8688, |
| "grad_norm": 1.219406247138977, |
| "learning_rate": 2.5268233473157294e-05, |
| "loss": 4.905612468719482, |
| "step": 543 |
| }, |
| { |
| "epoch": 0.8704, |
| "grad_norm": 1.5246868133544922, |
| "learning_rate": 2.467318791836559e-05, |
| "loss": 5.272589206695557, |
| "step": 544 |
| }, |
| { |
| "epoch": 0.872, |
| "grad_norm": 1.2425482273101807, |
| "learning_rate": 2.408486865760495e-05, |
| "loss": 5.108579158782959, |
| "step": 545 |
| }, |
| { |
| "epoch": 0.8736, |
| "grad_norm": 1.1925750970840454, |
| "learning_rate": 2.3503293252959136e-05, |
| "loss": 5.024507522583008, |
| "step": 546 |
| }, |
| { |
| "epoch": 0.8752, |
| "grad_norm": 1.2723841667175293, |
| "learning_rate": 2.2928479065199072e-05, |
| "loss": 5.255931377410889, |
| "step": 547 |
| }, |
| { |
| "epoch": 0.8768, |
| "grad_norm": 1.620451807975769, |
| "learning_rate": 2.2360443253264777e-05, |
| "loss": 5.196926593780518, |
| "step": 548 |
| }, |
| { |
| "epoch": 0.8784, |
| "grad_norm": 1.1335077285766602, |
| "learning_rate": 2.179920277375294e-05, |
| "loss": 4.717995643615723, |
| "step": 549 |
| }, |
| { |
| "epoch": 0.88, |
| "grad_norm": 1.1418888568878174, |
| "learning_rate": 2.1244774380410976e-05, |
| "loss": 5.335053443908691, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.8816, |
| "grad_norm": 1.3852171897888184, |
| "learning_rate": 2.0697174623636794e-05, |
| "loss": 5.047591209411621, |
| "step": 551 |
| }, |
| { |
| "epoch": 0.8832, |
| "grad_norm": 1.2350728511810303, |
| "learning_rate": 2.015641984998459e-05, |
| "loss": 4.715671062469482, |
| "step": 552 |
| }, |
| { |
| "epoch": 0.8848, |
| "grad_norm": 1.115648865699768, |
| "learning_rate": 1.9622526201677344e-05, |
| "loss": 5.0985612869262695, |
| "step": 553 |
| }, |
| { |
| "epoch": 0.8864, |
| "grad_norm": 1.7186869382858276, |
| "learning_rate": 1.9095509616124385e-05, |
| "loss": 4.931835651397705, |
| "step": 554 |
| }, |
| { |
| "epoch": 0.888, |
| "grad_norm": 1.2360730171203613, |
| "learning_rate": 1.85753858254461e-05, |
| "loss": 4.8929924964904785, |
| "step": 555 |
| }, |
| { |
| "epoch": 0.8896, |
| "grad_norm": 1.146570086479187, |
| "learning_rate": 1.8062170356003854e-05, |
| "loss": 5.117987632751465, |
| "step": 556 |
| }, |
| { |
| "epoch": 0.8912, |
| "grad_norm": 1.1873035430908203, |
| "learning_rate": 1.7555878527937163e-05, |
| "loss": 4.8101091384887695, |
| "step": 557 |
| }, |
| { |
| "epoch": 0.8928, |
| "grad_norm": 1.1898494958877563, |
| "learning_rate": 1.7056525454705623e-05, |
| "loss": 5.127380847930908, |
| "step": 558 |
| }, |
| { |
| "epoch": 0.8944, |
| "grad_norm": 1.431149959564209, |
| "learning_rate": 1.656412604263824e-05, |
| "loss": 5.338906764984131, |
| "step": 559 |
| }, |
| { |
| "epoch": 0.896, |
| "grad_norm": 1.1228066682815552, |
| "learning_rate": 1.607869499048839e-05, |
| "loss": 4.9782185554504395, |
| "step": 560 |
| }, |
| { |
| "epoch": 0.8976, |
| "grad_norm": 1.3961535692214966, |
| "learning_rate": 1.5600246788994937e-05, |
| "loss": 4.974421501159668, |
| "step": 561 |
| }, |
| { |
| "epoch": 0.8992, |
| "grad_norm": 1.281671166419983, |
| "learning_rate": 1.5128795720449617e-05, |
| "loss": 4.919782638549805, |
| "step": 562 |
| }, |
| { |
| "epoch": 0.9008, |
| "grad_norm": 1.220367670059204, |
| "learning_rate": 1.4664355858270862e-05, |
| "loss": 4.936645030975342, |
| "step": 563 |
| }, |
| { |
| "epoch": 0.9024, |
| "grad_norm": 1.0977709293365479, |
| "learning_rate": 1.4206941066583629e-05, |
| "loss": 4.759374618530273, |
| "step": 564 |
| }, |
| { |
| "epoch": 0.904, |
| "grad_norm": 1.2086211442947388, |
| "learning_rate": 1.3756564999805515e-05, |
| "loss": 5.17381477355957, |
| "step": 565 |
| }, |
| { |
| "epoch": 0.9056, |
| "grad_norm": 1.172023892402649, |
| "learning_rate": 1.3313241102239054e-05, |
| "loss": 4.950685977935791, |
| "step": 566 |
| }, |
| { |
| "epoch": 0.9072, |
| "grad_norm": 1.210207462310791, |
| "learning_rate": 1.2876982607670674e-05, |
| "loss": 5.04666805267334, |
| "step": 567 |
| }, |
| { |
| "epoch": 0.9088, |
| "grad_norm": 1.0206573009490967, |
| "learning_rate": 1.2447802538975345e-05, |
| "loss": 5.030869483947754, |
| "step": 568 |
| }, |
| { |
| "epoch": 0.9104, |
| "grad_norm": 1.2772059440612793, |
| "learning_rate": 1.2025713707727953e-05, |
| "loss": 5.230049133300781, |
| "step": 569 |
| }, |
| { |
| "epoch": 0.912, |
| "grad_norm": 1.1435636281967163, |
| "learning_rate": 1.1610728713820906e-05, |
| "loss": 5.214902400970459, |
| "step": 570 |
| }, |
| { |
| "epoch": 0.9136, |
| "grad_norm": 1.430433988571167, |
| "learning_rate": 1.120285994508799e-05, |
| "loss": 4.8903584480285645, |
| "step": 571 |
| }, |
| { |
| "epoch": 0.9152, |
| "grad_norm": 1.2580111026763916, |
| "learning_rate": 1.08021195769345e-05, |
| "loss": 5.1730055809021, |
| "step": 572 |
| }, |
| { |
| "epoch": 0.9168, |
| "grad_norm": 1.3038173913955688, |
| "learning_rate": 1.0408519571973806e-05, |
| "loss": 5.069331169128418, |
| "step": 573 |
| }, |
| { |
| "epoch": 0.9184, |
| "grad_norm": 1.4082874059677124, |
| "learning_rate": 1.0022071679670425e-05, |
| "loss": 5.165510177612305, |
| "step": 574 |
| }, |
| { |
| "epoch": 0.92, |
| "grad_norm": 1.3335379362106323, |
| "learning_rate": 9.642787435989008e-06, |
| "loss": 4.859002113342285, |
| "step": 575 |
| }, |
| { |
| "epoch": 0.9216, |
| "grad_norm": 1.1995774507522583, |
| "learning_rate": 9.270678163050216e-06, |
| "loss": 5.164345741271973, |
| "step": 576 |
| }, |
| { |
| "epoch": 0.9232, |
| "grad_norm": 1.0635286569595337, |
| "learning_rate": 8.90575496879248e-06, |
| "loss": 4.728398323059082, |
| "step": 577 |
| }, |
| { |
| "epoch": 0.9248, |
| "grad_norm": 1.1882269382476807, |
| "learning_rate": 8.548028746640846e-06, |
| "loss": 4.7602972984313965, |
| "step": 578 |
| }, |
| { |
| "epoch": 0.9264, |
| "grad_norm": 1.389762282371521, |
| "learning_rate": 8.197510175181277e-06, |
| "loss": 5.069275856018066, |
| "step": 579 |
| }, |
| { |
| "epoch": 0.928, |
| "grad_norm": 1.1334697008132935, |
| "learning_rate": 7.854209717842232e-06, |
| "loss": 5.110383033752441, |
| "step": 580 |
| }, |
| { |
| "epoch": 0.9296, |
| "grad_norm": 1.244832992553711, |
| "learning_rate": 7.518137622582188e-06, |
| "loss": 5.184660911560059, |
| "step": 581 |
| }, |
| { |
| "epoch": 0.9312, |
| "grad_norm": 1.1092815399169922, |
| "learning_rate": 7.1893039215838175e-06, |
| "loss": 4.963058948516846, |
| "step": 582 |
| }, |
| { |
| "epoch": 0.9328, |
| "grad_norm": 1.6420494318008423, |
| "learning_rate": 6.867718430954351e-06, |
| "loss": 4.9267964363098145, |
| "step": 583 |
| }, |
| { |
| "epoch": 0.9344, |
| "grad_norm": 1.49501371383667, |
| "learning_rate": 6.553390750432708e-06, |
| "loss": 4.730033874511719, |
| "step": 584 |
| }, |
| { |
| "epoch": 0.936, |
| "grad_norm": 1.2878178358078003, |
| "learning_rate": 6.246330263102895e-06, |
| "loss": 5.060173034667969, |
| "step": 585 |
| }, |
| { |
| "epoch": 0.9376, |
| "grad_norm": 1.2040040493011475, |
| "learning_rate": 5.9465461351138615e-06, |
| "loss": 5.053962707519531, |
| "step": 586 |
| }, |
| { |
| "epoch": 0.9392, |
| "grad_norm": 1.1503539085388184, |
| "learning_rate": 5.654047315405892e-06, |
| "loss": 4.980835437774658, |
| "step": 587 |
| }, |
| { |
| "epoch": 0.9408, |
| "grad_norm": 0.9667116403579712, |
| "learning_rate": 5.368842535443508e-06, |
| "loss": 5.023655414581299, |
| "step": 588 |
| }, |
| { |
| "epoch": 0.9424, |
| "grad_norm": 1.2056710720062256, |
| "learning_rate": 5.09094030895485e-06, |
| "loss": 4.959043979644775, |
| "step": 589 |
| }, |
| { |
| "epoch": 0.944, |
| "grad_norm": 1.0608792304992676, |
| "learning_rate": 4.8203489316773485e-06, |
| "loss": 5.312167644500732, |
| "step": 590 |
| }, |
| { |
| "epoch": 0.9456, |
| "grad_norm": 1.4500396251678467, |
| "learning_rate": 4.557076481110367e-06, |
| "loss": 4.965682029724121, |
| "step": 591 |
| }, |
| { |
| "epoch": 0.9472, |
| "grad_norm": 1.118233561515808, |
| "learning_rate": 4.301130816273813e-06, |
| "loss": 4.988546848297119, |
| "step": 592 |
| }, |
| { |
| "epoch": 0.9488, |
| "grad_norm": 1.2060961723327637, |
| "learning_rate": 4.05251957747374e-06, |
| "loss": 5.0205888748168945, |
| "step": 593 |
| }, |
| { |
| "epoch": 0.9504, |
| "grad_norm": 1.270868182182312, |
| "learning_rate": 3.811250186074089e-06, |
| "loss": 5.278676509857178, |
| "step": 594 |
| }, |
| { |
| "epoch": 0.952, |
| "grad_norm": 1.4645127058029175, |
| "learning_rate": 3.5773298442753898e-06, |
| "loss": 4.93894100189209, |
| "step": 595 |
| }, |
| { |
| "epoch": 0.9536, |
| "grad_norm": 1.21164870262146, |
| "learning_rate": 3.3507655348995192e-06, |
| "loss": 5.321264266967773, |
| "step": 596 |
| }, |
| { |
| "epoch": 0.9552, |
| "grad_norm": 1.2144756317138672, |
| "learning_rate": 3.131564021181338e-06, |
| "loss": 4.879669666290283, |
| "step": 597 |
| }, |
| { |
| "epoch": 0.9568, |
| "grad_norm": 1.7862255573272705, |
| "learning_rate": 2.9197318465669364e-06, |
| "loss": 5.113965034484863, |
| "step": 598 |
| }, |
| { |
| "epoch": 0.9584, |
| "grad_norm": 1.427722692489624, |
| "learning_rate": 2.7152753345181247e-06, |
| "loss": 4.928999423980713, |
| "step": 599 |
| }, |
| { |
| "epoch": 0.96, |
| "grad_norm": 1.3544409275054932, |
| "learning_rate": 2.518200588323666e-06, |
| "loss": 5.407461166381836, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.9616, |
| "grad_norm": 1.8953897953033447, |
| "learning_rate": 2.328513490917311e-06, |
| "loss": 4.892749309539795, |
| "step": 601 |
| }, |
| { |
| "epoch": 0.9632, |
| "grad_norm": 1.3621735572814941, |
| "learning_rate": 2.1462197047019127e-06, |
| "loss": 5.107844352722168, |
| "step": 602 |
| }, |
| { |
| "epoch": 0.9648, |
| "grad_norm": 1.19562566280365, |
| "learning_rate": 1.9713246713805587e-06, |
| "loss": 5.338631629943848, |
| "step": 603 |
| }, |
| { |
| "epoch": 0.9664, |
| "grad_norm": 1.0211833715438843, |
| "learning_rate": 1.803833611794037e-06, |
| "loss": 4.848773002624512, |
| "step": 604 |
| }, |
| { |
| "epoch": 0.968, |
| "grad_norm": 1.4424593448638916, |
| "learning_rate": 1.643751525765097e-06, |
| "loss": 5.272921562194824, |
| "step": 605 |
| }, |
| { |
| "epoch": 0.9696, |
| "grad_norm": 1.2189918756484985, |
| "learning_rate": 1.4910831919490997e-06, |
| "loss": 4.7630157470703125, |
| "step": 606 |
| }, |
| { |
| "epoch": 0.9712, |
| "grad_norm": 1.1489924192428589, |
| "learning_rate": 1.345833167691407e-06, |
| "loss": 5.053176403045654, |
| "step": 607 |
| }, |
| { |
| "epoch": 0.9728, |
| "grad_norm": 1.102137804031372, |
| "learning_rate": 1.2080057888913253e-06, |
| "loss": 5.1648359298706055, |
| "step": 608 |
| }, |
| { |
| "epoch": 0.9744, |
| "grad_norm": 1.193506121635437, |
| "learning_rate": 1.0776051698727362e-06, |
| "loss": 4.978764533996582, |
| "step": 609 |
| }, |
| { |
| "epoch": 0.976, |
| "grad_norm": 1.3150538206100464, |
| "learning_rate": 9.546352032611395e-07, |
| "loss": 5.2356038093566895, |
| "step": 610 |
| }, |
| { |
| "epoch": 0.9776, |
| "grad_norm": 1.2881925106048584, |
| "learning_rate": 8.390995598676066e-07, |
| "loss": 5.024952411651611, |
| "step": 611 |
| }, |
| { |
| "epoch": 0.9792, |
| "grad_norm": 1.2736302614212036, |
| "learning_rate": 7.310016885791471e-07, |
| "loss": 5.065498352050781, |
| "step": 612 |
| }, |
| { |
| "epoch": 0.9808, |
| "grad_norm": 1.3327683210372925, |
| "learning_rate": 6.303448162556791e-07, |
| "loss": 5.073752403259277, |
| "step": 613 |
| }, |
| { |
| "epoch": 0.9824, |
| "grad_norm": 1.3384580612182617, |
| "learning_rate": 5.371319476338288e-07, |
| "loss": 5.055788993835449, |
| "step": 614 |
| }, |
| { |
| "epoch": 0.984, |
| "grad_norm": 1.3576717376708984, |
| "learning_rate": 4.513658652371133e-07, |
| "loss": 5.128819465637207, |
| "step": 615 |
| }, |
| { |
| "epoch": 0.9856, |
| "grad_norm": 1.2477798461914062, |
| "learning_rate": 3.7304912929300716e-07, |
| "loss": 4.873608112335205, |
| "step": 616 |
| }, |
| { |
| "epoch": 0.9872, |
| "grad_norm": 1.2135578393936157, |
| "learning_rate": 3.0218407765642e-07, |
| "loss": 5.116058349609375, |
| "step": 617 |
| }, |
| { |
| "epoch": 0.9888, |
| "grad_norm": 1.3859200477600098, |
| "learning_rate": 2.387728257399191e-07, |
| "loss": 4.957461357116699, |
| "step": 618 |
| }, |
| { |
| "epoch": 0.9904, |
| "grad_norm": 1.2815113067626953, |
| "learning_rate": 1.8281726645061335e-07, |
| "loss": 4.715893745422363, |
| "step": 619 |
| }, |
| { |
| "epoch": 0.992, |
| "grad_norm": 1.275434136390686, |
| "learning_rate": 1.343190701336705e-07, |
| "loss": 4.888550281524658, |
| "step": 620 |
| }, |
| { |
| "epoch": 0.9936, |
| "grad_norm": 1.0546596050262451, |
| "learning_rate": 9.327968452232938e-08, |
| "loss": 4.995277404785156, |
| "step": 621 |
| }, |
| { |
| "epoch": 0.9952, |
| "grad_norm": 1.248382568359375, |
| "learning_rate": 5.970033469490655e-08, |
| "loss": 5.184177398681641, |
| "step": 622 |
| }, |
| { |
| "epoch": 0.9968, |
| "grad_norm": 1.3588132858276367, |
| "learning_rate": 3.3582023037964645e-08, |
| "loss": 4.7490763664245605, |
| "step": 623 |
| }, |
| { |
| "epoch": 0.9984, |
| "grad_norm": 1.121005654335022, |
| "learning_rate": 1.492552921655843e-08, |
| "loss": 4.857783317565918, |
| "step": 624 |
| }, |
| { |
| "epoch": 1.0, |
| "grad_norm": 1.4019795656204224, |
| "learning_rate": 3.731410150975556e-09, |
| "loss": 5.065018177032471, |
| "step": 625 |
| } |
| ], |
| "logging_steps": 1, |
| "max_steps": 625, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 5.933484093429535e+17, |
| "train_batch_size": 32, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|