| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 1.0, |
| "eval_steps": 500, |
| "global_step": 894, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.0011185682326621924, |
| "grad_norm": 2.231250286102295, |
| "learning_rate": 5.0000000000000004e-08, |
| "loss": 1.0507, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.0022371364653243847, |
| "grad_norm": 2.1123249530792236, |
| "learning_rate": 1.0000000000000001e-07, |
| "loss": 1.04, |
| "step": 2 |
| }, |
| { |
| "epoch": 0.003355704697986577, |
| "grad_norm": 2.0946707725524902, |
| "learning_rate": 1.5000000000000002e-07, |
| "loss": 1.0307, |
| "step": 3 |
| }, |
| { |
| "epoch": 0.0044742729306487695, |
| "grad_norm": 2.0837416648864746, |
| "learning_rate": 2.0000000000000002e-07, |
| "loss": 1.0484, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.005592841163310962, |
| "grad_norm": 1.9843275547027588, |
| "learning_rate": 2.5000000000000004e-07, |
| "loss": 1.0012, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.006711409395973154, |
| "grad_norm": 2.121988296508789, |
| "learning_rate": 3.0000000000000004e-07, |
| "loss": 1.0603, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.007829977628635347, |
| "grad_norm": 2.029029369354248, |
| "learning_rate": 3.5000000000000004e-07, |
| "loss": 1.0323, |
| "step": 7 |
| }, |
| { |
| "epoch": 0.008948545861297539, |
| "grad_norm": 1.9815905094146729, |
| "learning_rate": 4.0000000000000003e-07, |
| "loss": 1.0273, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.010067114093959731, |
| "grad_norm": 2.3339314460754395, |
| "learning_rate": 4.5000000000000003e-07, |
| "loss": 1.0805, |
| "step": 9 |
| }, |
| { |
| "epoch": 0.011185682326621925, |
| "grad_norm": 2.1078243255615234, |
| "learning_rate": 5.000000000000001e-07, |
| "loss": 1.0382, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.012304250559284116, |
| "grad_norm": 1.8874777555465698, |
| "learning_rate": 5.5e-07, |
| "loss": 1.008, |
| "step": 11 |
| }, |
| { |
| "epoch": 0.013422818791946308, |
| "grad_norm": 1.9720211029052734, |
| "learning_rate": 6.000000000000001e-07, |
| "loss": 1.0065, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.0145413870246085, |
| "grad_norm": 2.0002245903015137, |
| "learning_rate": 6.5e-07, |
| "loss": 1.0379, |
| "step": 13 |
| }, |
| { |
| "epoch": 0.015659955257270694, |
| "grad_norm": 1.983207106590271, |
| "learning_rate": 7.000000000000001e-07, |
| "loss": 1.0271, |
| "step": 14 |
| }, |
| { |
| "epoch": 0.016778523489932886, |
| "grad_norm": 1.886121153831482, |
| "learning_rate": 7.5e-07, |
| "loss": 1.0019, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.017897091722595078, |
| "grad_norm": 1.9403958320617676, |
| "learning_rate": 8.000000000000001e-07, |
| "loss": 0.9885, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.01901565995525727, |
| "grad_norm": 1.9739996194839478, |
| "learning_rate": 8.500000000000001e-07, |
| "loss": 0.9904, |
| "step": 17 |
| }, |
| { |
| "epoch": 0.020134228187919462, |
| "grad_norm": 1.7419469356536865, |
| "learning_rate": 9.000000000000001e-07, |
| "loss": 0.9709, |
| "step": 18 |
| }, |
| { |
| "epoch": 0.021252796420581657, |
| "grad_norm": 1.7856152057647705, |
| "learning_rate": 9.500000000000001e-07, |
| "loss": 0.9859, |
| "step": 19 |
| }, |
| { |
| "epoch": 0.02237136465324385, |
| "grad_norm": 1.6159933805465698, |
| "learning_rate": 1.0000000000000002e-06, |
| "loss": 0.9895, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.02348993288590604, |
| "grad_norm": 1.7010679244995117, |
| "learning_rate": 1.0500000000000001e-06, |
| "loss": 1.0115, |
| "step": 21 |
| }, |
| { |
| "epoch": 0.024608501118568233, |
| "grad_norm": 1.7860039472579956, |
| "learning_rate": 1.1e-06, |
| "loss": 0.9917, |
| "step": 22 |
| }, |
| { |
| "epoch": 0.025727069351230425, |
| "grad_norm": 1.3735058307647705, |
| "learning_rate": 1.1500000000000002e-06, |
| "loss": 0.9441, |
| "step": 23 |
| }, |
| { |
| "epoch": 0.026845637583892617, |
| "grad_norm": 1.439109206199646, |
| "learning_rate": 1.2000000000000002e-06, |
| "loss": 0.9304, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.02796420581655481, |
| "grad_norm": 1.380369782447815, |
| "learning_rate": 1.25e-06, |
| "loss": 0.9469, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.029082774049217, |
| "grad_norm": 1.2287472486495972, |
| "learning_rate": 1.3e-06, |
| "loss": 0.8808, |
| "step": 26 |
| }, |
| { |
| "epoch": 0.030201342281879196, |
| "grad_norm": 1.0899194478988647, |
| "learning_rate": 1.3500000000000002e-06, |
| "loss": 0.8912, |
| "step": 27 |
| }, |
| { |
| "epoch": 0.03131991051454139, |
| "grad_norm": 1.0445002317428589, |
| "learning_rate": 1.4000000000000001e-06, |
| "loss": 0.876, |
| "step": 28 |
| }, |
| { |
| "epoch": 0.03243847874720358, |
| "grad_norm": 1.0201383829116821, |
| "learning_rate": 1.45e-06, |
| "loss": 0.9003, |
| "step": 29 |
| }, |
| { |
| "epoch": 0.03355704697986577, |
| "grad_norm": 0.9528365731239319, |
| "learning_rate": 1.5e-06, |
| "loss": 0.8537, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.03467561521252797, |
| "grad_norm": 0.9615768194198608, |
| "learning_rate": 1.5500000000000002e-06, |
| "loss": 0.8819, |
| "step": 31 |
| }, |
| { |
| "epoch": 0.035794183445190156, |
| "grad_norm": 0.9578896760940552, |
| "learning_rate": 1.6000000000000001e-06, |
| "loss": 0.8859, |
| "step": 32 |
| }, |
| { |
| "epoch": 0.03691275167785235, |
| "grad_norm": 0.977853536605835, |
| "learning_rate": 1.6500000000000003e-06, |
| "loss": 0.8835, |
| "step": 33 |
| }, |
| { |
| "epoch": 0.03803131991051454, |
| "grad_norm": 0.8976068496704102, |
| "learning_rate": 1.7000000000000002e-06, |
| "loss": 0.8599, |
| "step": 34 |
| }, |
| { |
| "epoch": 0.039149888143176735, |
| "grad_norm": 0.8779590725898743, |
| "learning_rate": 1.75e-06, |
| "loss": 0.8708, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.040268456375838924, |
| "grad_norm": 0.853705644607544, |
| "learning_rate": 1.8000000000000001e-06, |
| "loss": 0.8465, |
| "step": 36 |
| }, |
| { |
| "epoch": 0.04138702460850112, |
| "grad_norm": 0.8480839729309082, |
| "learning_rate": 1.85e-06, |
| "loss": 0.8292, |
| "step": 37 |
| }, |
| { |
| "epoch": 0.042505592841163314, |
| "grad_norm": 0.8372538089752197, |
| "learning_rate": 1.9000000000000002e-06, |
| "loss": 0.8026, |
| "step": 38 |
| }, |
| { |
| "epoch": 0.0436241610738255, |
| "grad_norm": 0.8592961430549622, |
| "learning_rate": 1.9500000000000004e-06, |
| "loss": 0.8153, |
| "step": 39 |
| }, |
| { |
| "epoch": 0.0447427293064877, |
| "grad_norm": 0.8222276568412781, |
| "learning_rate": 2.0000000000000003e-06, |
| "loss": 0.814, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.04586129753914989, |
| "grad_norm": 0.825672447681427, |
| "learning_rate": 2.05e-06, |
| "loss": 0.7793, |
| "step": 41 |
| }, |
| { |
| "epoch": 0.04697986577181208, |
| "grad_norm": 0.8016732335090637, |
| "learning_rate": 2.1000000000000002e-06, |
| "loss": 0.771, |
| "step": 42 |
| }, |
| { |
| "epoch": 0.04809843400447427, |
| "grad_norm": 0.7026550769805908, |
| "learning_rate": 2.15e-06, |
| "loss": 0.7664, |
| "step": 43 |
| }, |
| { |
| "epoch": 0.049217002237136466, |
| "grad_norm": 0.6678670644760132, |
| "learning_rate": 2.2e-06, |
| "loss": 0.7774, |
| "step": 44 |
| }, |
| { |
| "epoch": 0.050335570469798654, |
| "grad_norm": 0.6766750812530518, |
| "learning_rate": 2.25e-06, |
| "loss": 0.7832, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.05145413870246085, |
| "grad_norm": 0.7094117999076843, |
| "learning_rate": 2.3000000000000004e-06, |
| "loss": 0.7861, |
| "step": 46 |
| }, |
| { |
| "epoch": 0.052572706935123045, |
| "grad_norm": 0.6871191263198853, |
| "learning_rate": 2.35e-06, |
| "loss": 0.7848, |
| "step": 47 |
| }, |
| { |
| "epoch": 0.053691275167785234, |
| "grad_norm": 0.6089867353439331, |
| "learning_rate": 2.4000000000000003e-06, |
| "loss": 0.7658, |
| "step": 48 |
| }, |
| { |
| "epoch": 0.05480984340044743, |
| "grad_norm": 0.5112010836601257, |
| "learning_rate": 2.4500000000000003e-06, |
| "loss": 0.7921, |
| "step": 49 |
| }, |
| { |
| "epoch": 0.05592841163310962, |
| "grad_norm": 0.5008496046066284, |
| "learning_rate": 2.5e-06, |
| "loss": 0.7105, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.05704697986577181, |
| "grad_norm": 0.5599631071090698, |
| "learning_rate": 2.55e-06, |
| "loss": 0.7526, |
| "step": 51 |
| }, |
| { |
| "epoch": 0.058165548098434, |
| "grad_norm": 0.6905913352966309, |
| "learning_rate": 2.6e-06, |
| "loss": 0.7496, |
| "step": 52 |
| }, |
| { |
| "epoch": 0.0592841163310962, |
| "grad_norm": 0.6198621392250061, |
| "learning_rate": 2.6500000000000005e-06, |
| "loss": 0.7297, |
| "step": 53 |
| }, |
| { |
| "epoch": 0.06040268456375839, |
| "grad_norm": 0.6158658862113953, |
| "learning_rate": 2.7000000000000004e-06, |
| "loss": 0.7309, |
| "step": 54 |
| }, |
| { |
| "epoch": 0.06152125279642058, |
| "grad_norm": 0.5798735618591309, |
| "learning_rate": 2.7500000000000004e-06, |
| "loss": 0.7102, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.06263982102908278, |
| "grad_norm": 0.5550254583358765, |
| "learning_rate": 2.8000000000000003e-06, |
| "loss": 0.7488, |
| "step": 56 |
| }, |
| { |
| "epoch": 0.06375838926174497, |
| "grad_norm": 0.4888734221458435, |
| "learning_rate": 2.85e-06, |
| "loss": 0.75, |
| "step": 57 |
| }, |
| { |
| "epoch": 0.06487695749440715, |
| "grad_norm": 0.4579496383666992, |
| "learning_rate": 2.9e-06, |
| "loss": 0.7108, |
| "step": 58 |
| }, |
| { |
| "epoch": 0.06599552572706935, |
| "grad_norm": 0.5775673389434814, |
| "learning_rate": 2.95e-06, |
| "loss": 0.7337, |
| "step": 59 |
| }, |
| { |
| "epoch": 0.06711409395973154, |
| "grad_norm": 0.5035051703453064, |
| "learning_rate": 3e-06, |
| "loss": 0.7677, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.06823266219239374, |
| "grad_norm": 0.4771614074707031, |
| "learning_rate": 3.05e-06, |
| "loss": 0.724, |
| "step": 61 |
| }, |
| { |
| "epoch": 0.06935123042505593, |
| "grad_norm": 0.45495525002479553, |
| "learning_rate": 3.1000000000000004e-06, |
| "loss": 0.7393, |
| "step": 62 |
| }, |
| { |
| "epoch": 0.07046979865771812, |
| "grad_norm": 0.36385607719421387, |
| "learning_rate": 3.1500000000000003e-06, |
| "loss": 0.7029, |
| "step": 63 |
| }, |
| { |
| "epoch": 0.07158836689038031, |
| "grad_norm": 0.3554967939853668, |
| "learning_rate": 3.2000000000000003e-06, |
| "loss": 0.6991, |
| "step": 64 |
| }, |
| { |
| "epoch": 0.07270693512304251, |
| "grad_norm": 0.36548176407814026, |
| "learning_rate": 3.2500000000000002e-06, |
| "loss": 0.7292, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.0738255033557047, |
| "grad_norm": 0.35280168056488037, |
| "learning_rate": 3.3000000000000006e-06, |
| "loss": 0.7295, |
| "step": 66 |
| }, |
| { |
| "epoch": 0.07494407158836688, |
| "grad_norm": 0.3599022924900055, |
| "learning_rate": 3.3500000000000005e-06, |
| "loss": 0.6956, |
| "step": 67 |
| }, |
| { |
| "epoch": 0.07606263982102908, |
| "grad_norm": 0.3802206516265869, |
| "learning_rate": 3.4000000000000005e-06, |
| "loss": 0.6796, |
| "step": 68 |
| }, |
| { |
| "epoch": 0.07718120805369127, |
| "grad_norm": 0.3787902891635895, |
| "learning_rate": 3.45e-06, |
| "loss": 0.7141, |
| "step": 69 |
| }, |
| { |
| "epoch": 0.07829977628635347, |
| "grad_norm": 0.374461829662323, |
| "learning_rate": 3.5e-06, |
| "loss": 0.7043, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.07941834451901567, |
| "grad_norm": 0.34469330310821533, |
| "learning_rate": 3.5500000000000003e-06, |
| "loss": 0.7037, |
| "step": 71 |
| }, |
| { |
| "epoch": 0.08053691275167785, |
| "grad_norm": 0.346836119890213, |
| "learning_rate": 3.6000000000000003e-06, |
| "loss": 0.7246, |
| "step": 72 |
| }, |
| { |
| "epoch": 0.08165548098434004, |
| "grad_norm": 0.34163376688957214, |
| "learning_rate": 3.65e-06, |
| "loss": 0.6977, |
| "step": 73 |
| }, |
| { |
| "epoch": 0.08277404921700224, |
| "grad_norm": 0.3481418788433075, |
| "learning_rate": 3.7e-06, |
| "loss": 0.7356, |
| "step": 74 |
| }, |
| { |
| "epoch": 0.08389261744966443, |
| "grad_norm": 0.3230934739112854, |
| "learning_rate": 3.7500000000000005e-06, |
| "loss": 0.6869, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.08501118568232663, |
| "grad_norm": 0.319917231798172, |
| "learning_rate": 3.8000000000000005e-06, |
| "loss": 0.6722, |
| "step": 76 |
| }, |
| { |
| "epoch": 0.08612975391498881, |
| "grad_norm": 0.3535120487213135, |
| "learning_rate": 3.85e-06, |
| "loss": 0.6951, |
| "step": 77 |
| }, |
| { |
| "epoch": 0.087248322147651, |
| "grad_norm": 0.3229662775993347, |
| "learning_rate": 3.900000000000001e-06, |
| "loss": 0.69, |
| "step": 78 |
| }, |
| { |
| "epoch": 0.0883668903803132, |
| "grad_norm": 0.33365264534950256, |
| "learning_rate": 3.95e-06, |
| "loss": 0.701, |
| "step": 79 |
| }, |
| { |
| "epoch": 0.0894854586129754, |
| "grad_norm": 0.3302946984767914, |
| "learning_rate": 4.000000000000001e-06, |
| "loss": 0.6733, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.09060402684563758, |
| "grad_norm": 0.3478582799434662, |
| "learning_rate": 4.05e-06, |
| "loss": 0.7022, |
| "step": 81 |
| }, |
| { |
| "epoch": 0.09172259507829977, |
| "grad_norm": 0.33355170488357544, |
| "learning_rate": 4.1e-06, |
| "loss": 0.7141, |
| "step": 82 |
| }, |
| { |
| "epoch": 0.09284116331096197, |
| "grad_norm": 0.3217330574989319, |
| "learning_rate": 4.15e-06, |
| "loss": 0.6799, |
| "step": 83 |
| }, |
| { |
| "epoch": 0.09395973154362416, |
| "grad_norm": 0.328838050365448, |
| "learning_rate": 4.2000000000000004e-06, |
| "loss": 0.6943, |
| "step": 84 |
| }, |
| { |
| "epoch": 0.09507829977628636, |
| "grad_norm": 0.3279136121273041, |
| "learning_rate": 4.25e-06, |
| "loss": 0.6699, |
| "step": 85 |
| }, |
| { |
| "epoch": 0.09619686800894854, |
| "grad_norm": 0.333351194858551, |
| "learning_rate": 4.3e-06, |
| "loss": 0.712, |
| "step": 86 |
| }, |
| { |
| "epoch": 0.09731543624161074, |
| "grad_norm": 0.33052128553390503, |
| "learning_rate": 4.350000000000001e-06, |
| "loss": 0.7169, |
| "step": 87 |
| }, |
| { |
| "epoch": 0.09843400447427293, |
| "grad_norm": 0.31631597876548767, |
| "learning_rate": 4.4e-06, |
| "loss": 0.6772, |
| "step": 88 |
| }, |
| { |
| "epoch": 0.09955257270693513, |
| "grad_norm": 0.327311635017395, |
| "learning_rate": 4.450000000000001e-06, |
| "loss": 0.6873, |
| "step": 89 |
| }, |
| { |
| "epoch": 0.10067114093959731, |
| "grad_norm": 0.32048892974853516, |
| "learning_rate": 4.5e-06, |
| "loss": 0.6614, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.1017897091722595, |
| "grad_norm": 0.32614201307296753, |
| "learning_rate": 4.5500000000000005e-06, |
| "loss": 0.7197, |
| "step": 91 |
| }, |
| { |
| "epoch": 0.1029082774049217, |
| "grad_norm": 0.31145355105400085, |
| "learning_rate": 4.600000000000001e-06, |
| "loss": 0.6567, |
| "step": 92 |
| }, |
| { |
| "epoch": 0.1040268456375839, |
| "grad_norm": 0.31379351019859314, |
| "learning_rate": 4.65e-06, |
| "loss": 0.7013, |
| "step": 93 |
| }, |
| { |
| "epoch": 0.10514541387024609, |
| "grad_norm": 0.32741424441337585, |
| "learning_rate": 4.7e-06, |
| "loss": 0.6737, |
| "step": 94 |
| }, |
| { |
| "epoch": 0.10626398210290827, |
| "grad_norm": 0.325630247592926, |
| "learning_rate": 4.75e-06, |
| "loss": 0.6673, |
| "step": 95 |
| }, |
| { |
| "epoch": 0.10738255033557047, |
| "grad_norm": 0.3153480291366577, |
| "learning_rate": 4.800000000000001e-06, |
| "loss": 0.6943, |
| "step": 96 |
| }, |
| { |
| "epoch": 0.10850111856823266, |
| "grad_norm": 0.3244793117046356, |
| "learning_rate": 4.85e-06, |
| "loss": 0.6896, |
| "step": 97 |
| }, |
| { |
| "epoch": 0.10961968680089486, |
| "grad_norm": 0.3078743517398834, |
| "learning_rate": 4.9000000000000005e-06, |
| "loss": 0.6837, |
| "step": 98 |
| }, |
| { |
| "epoch": 0.11073825503355705, |
| "grad_norm": 0.3314874470233917, |
| "learning_rate": 4.95e-06, |
| "loss": 0.7416, |
| "step": 99 |
| }, |
| { |
| "epoch": 0.11185682326621924, |
| "grad_norm": 0.31931284070014954, |
| "learning_rate": 5e-06, |
| "loss": 0.6903, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.11297539149888143, |
| "grad_norm": 0.3176276981830597, |
| "learning_rate": 4.999999554776598e-06, |
| "loss": 0.6761, |
| "step": 101 |
| }, |
| { |
| "epoch": 0.11409395973154363, |
| "grad_norm": 0.3285365700721741, |
| "learning_rate": 4.999998219106549e-06, |
| "loss": 0.6892, |
| "step": 102 |
| }, |
| { |
| "epoch": 0.11521252796420582, |
| "grad_norm": 0.31144094467163086, |
| "learning_rate": 4.99999599299033e-06, |
| "loss": 0.6586, |
| "step": 103 |
| }, |
| { |
| "epoch": 0.116331096196868, |
| "grad_norm": 0.313289076089859, |
| "learning_rate": 4.999992876428732e-06, |
| "loss": 0.708, |
| "step": 104 |
| }, |
| { |
| "epoch": 0.1174496644295302, |
| "grad_norm": 0.3252837061882019, |
| "learning_rate": 4.999988869422867e-06, |
| "loss": 0.7083, |
| "step": 105 |
| }, |
| { |
| "epoch": 0.1185682326621924, |
| "grad_norm": 0.3168275058269501, |
| "learning_rate": 4.9999839719741615e-06, |
| "loss": 0.6806, |
| "step": 106 |
| }, |
| { |
| "epoch": 0.11968680089485459, |
| "grad_norm": 0.31589415669441223, |
| "learning_rate": 4.9999781840843594e-06, |
| "loss": 0.6702, |
| "step": 107 |
| }, |
| { |
| "epoch": 0.12080536912751678, |
| "grad_norm": 0.318037748336792, |
| "learning_rate": 4.999971505755523e-06, |
| "loss": 0.6601, |
| "step": 108 |
| }, |
| { |
| "epoch": 0.12192393736017897, |
| "grad_norm": 0.33259475231170654, |
| "learning_rate": 4.999963936990031e-06, |
| "loss": 0.7001, |
| "step": 109 |
| }, |
| { |
| "epoch": 0.12304250559284116, |
| "grad_norm": 0.33322346210479736, |
| "learning_rate": 4.999955477790579e-06, |
| "loss": 0.6731, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.12416107382550336, |
| "grad_norm": 0.31344881653785706, |
| "learning_rate": 4.999946128160179e-06, |
| "loss": 0.6667, |
| "step": 111 |
| }, |
| { |
| "epoch": 0.12527964205816555, |
| "grad_norm": 0.32769575715065, |
| "learning_rate": 4.999935888102162e-06, |
| "loss": 0.7123, |
| "step": 112 |
| }, |
| { |
| "epoch": 0.12639821029082773, |
| "grad_norm": 0.314619243144989, |
| "learning_rate": 4.9999247576201765e-06, |
| "loss": 0.683, |
| "step": 113 |
| }, |
| { |
| "epoch": 0.12751677852348994, |
| "grad_norm": 0.3301268219947815, |
| "learning_rate": 4.999912736718185e-06, |
| "loss": 0.6761, |
| "step": 114 |
| }, |
| { |
| "epoch": 0.12863534675615212, |
| "grad_norm": 0.31477460265159607, |
| "learning_rate": 4.99989982540047e-06, |
| "loss": 0.6722, |
| "step": 115 |
| }, |
| { |
| "epoch": 0.1297539149888143, |
| "grad_norm": 0.31430870294570923, |
| "learning_rate": 4.999886023671629e-06, |
| "loss": 0.6693, |
| "step": 116 |
| }, |
| { |
| "epoch": 0.13087248322147652, |
| "grad_norm": 0.31705909967422485, |
| "learning_rate": 4.999871331536581e-06, |
| "loss": 0.6567, |
| "step": 117 |
| }, |
| { |
| "epoch": 0.1319910514541387, |
| "grad_norm": 0.3331652879714966, |
| "learning_rate": 4.999855749000555e-06, |
| "loss": 0.6895, |
| "step": 118 |
| }, |
| { |
| "epoch": 0.1331096196868009, |
| "grad_norm": 0.32147714495658875, |
| "learning_rate": 4.999839276069105e-06, |
| "loss": 0.6693, |
| "step": 119 |
| }, |
| { |
| "epoch": 0.1342281879194631, |
| "grad_norm": 0.3312559127807617, |
| "learning_rate": 4.999821912748095e-06, |
| "loss": 0.6843, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.13534675615212527, |
| "grad_norm": 0.34178397059440613, |
| "learning_rate": 4.999803659043712e-06, |
| "loss": 0.6774, |
| "step": 121 |
| }, |
| { |
| "epoch": 0.13646532438478748, |
| "grad_norm": 0.3154846727848053, |
| "learning_rate": 4.999784514962456e-06, |
| "loss": 0.6638, |
| "step": 122 |
| }, |
| { |
| "epoch": 0.13758389261744966, |
| "grad_norm": 0.31137940287590027, |
| "learning_rate": 4.999764480511145e-06, |
| "loss": 0.6467, |
| "step": 123 |
| }, |
| { |
| "epoch": 0.13870246085011187, |
| "grad_norm": 0.3188192546367645, |
| "learning_rate": 4.999743555696918e-06, |
| "loss": 0.6511, |
| "step": 124 |
| }, |
| { |
| "epoch": 0.13982102908277405, |
| "grad_norm": 0.30495911836624146, |
| "learning_rate": 4.999721740527225e-06, |
| "loss": 0.6637, |
| "step": 125 |
| }, |
| { |
| "epoch": 0.14093959731543623, |
| "grad_norm": 0.3152139186859131, |
| "learning_rate": 4.999699035009837e-06, |
| "loss": 0.6631, |
| "step": 126 |
| }, |
| { |
| "epoch": 0.14205816554809844, |
| "grad_norm": 0.32285481691360474, |
| "learning_rate": 4.999675439152842e-06, |
| "loss": 0.6621, |
| "step": 127 |
| }, |
| { |
| "epoch": 0.14317673378076062, |
| "grad_norm": 0.3176666796207428, |
| "learning_rate": 4.999650952964643e-06, |
| "loss": 0.6654, |
| "step": 128 |
| }, |
| { |
| "epoch": 0.14429530201342283, |
| "grad_norm": 0.314035028219223, |
| "learning_rate": 4.999625576453962e-06, |
| "loss": 0.6927, |
| "step": 129 |
| }, |
| { |
| "epoch": 0.14541387024608501, |
| "grad_norm": 0.3227815628051758, |
| "learning_rate": 4.999599309629839e-06, |
| "loss": 0.6865, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.1465324384787472, |
| "grad_norm": 0.3137218952178955, |
| "learning_rate": 4.9995721525016275e-06, |
| "loss": 0.6499, |
| "step": 131 |
| }, |
| { |
| "epoch": 0.1476510067114094, |
| "grad_norm": 0.32401353120803833, |
| "learning_rate": 4.999544105079001e-06, |
| "loss": 0.64, |
| "step": 132 |
| }, |
| { |
| "epoch": 0.1487695749440716, |
| "grad_norm": 0.3110584020614624, |
| "learning_rate": 4.99951516737195e-06, |
| "loss": 0.6747, |
| "step": 133 |
| }, |
| { |
| "epoch": 0.14988814317673377, |
| "grad_norm": 0.3246876895427704, |
| "learning_rate": 4.999485339390781e-06, |
| "loss": 0.6943, |
| "step": 134 |
| }, |
| { |
| "epoch": 0.15100671140939598, |
| "grad_norm": 0.3346574008464813, |
| "learning_rate": 4.999454621146117e-06, |
| "loss": 0.6675, |
| "step": 135 |
| }, |
| { |
| "epoch": 0.15212527964205816, |
| "grad_norm": 0.3305971920490265, |
| "learning_rate": 4.999423012648902e-06, |
| "loss": 0.7065, |
| "step": 136 |
| }, |
| { |
| "epoch": 0.15324384787472037, |
| "grad_norm": 0.31732234358787537, |
| "learning_rate": 4.9993905139103924e-06, |
| "loss": 0.7038, |
| "step": 137 |
| }, |
| { |
| "epoch": 0.15436241610738255, |
| "grad_norm": 0.3233291208744049, |
| "learning_rate": 4.999357124942163e-06, |
| "loss": 0.6856, |
| "step": 138 |
| }, |
| { |
| "epoch": 0.15548098434004473, |
| "grad_norm": 0.31733304262161255, |
| "learning_rate": 4.999322845756107e-06, |
| "loss": 0.702, |
| "step": 139 |
| }, |
| { |
| "epoch": 0.15659955257270694, |
| "grad_norm": 0.33124351501464844, |
| "learning_rate": 4.9992876763644346e-06, |
| "loss": 0.6616, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.15771812080536912, |
| "grad_norm": 0.3264501094818115, |
| "learning_rate": 4.999251616779671e-06, |
| "loss": 0.6773, |
| "step": 141 |
| }, |
| { |
| "epoch": 0.15883668903803133, |
| "grad_norm": 0.34606418013572693, |
| "learning_rate": 4.999214667014662e-06, |
| "loss": 0.6765, |
| "step": 142 |
| }, |
| { |
| "epoch": 0.1599552572706935, |
| "grad_norm": 0.3292436897754669, |
| "learning_rate": 4.999176827082566e-06, |
| "loss": 0.6692, |
| "step": 143 |
| }, |
| { |
| "epoch": 0.1610738255033557, |
| "grad_norm": 0.31322377920150757, |
| "learning_rate": 4.9991380969968615e-06, |
| "loss": 0.6811, |
| "step": 144 |
| }, |
| { |
| "epoch": 0.1621923937360179, |
| "grad_norm": 0.32053160667419434, |
| "learning_rate": 4.999098476771344e-06, |
| "loss": 0.6544, |
| "step": 145 |
| }, |
| { |
| "epoch": 0.16331096196868009, |
| "grad_norm": 0.34363314509391785, |
| "learning_rate": 4.9990579664201244e-06, |
| "loss": 0.6839, |
| "step": 146 |
| }, |
| { |
| "epoch": 0.1644295302013423, |
| "grad_norm": 0.3260481357574463, |
| "learning_rate": 4.999016565957633e-06, |
| "loss": 0.7048, |
| "step": 147 |
| }, |
| { |
| "epoch": 0.16554809843400448, |
| "grad_norm": 0.3410928547382355, |
| "learning_rate": 4.998974275398614e-06, |
| "loss": 0.6846, |
| "step": 148 |
| }, |
| { |
| "epoch": 0.16666666666666666, |
| "grad_norm": 0.33661726117134094, |
| "learning_rate": 4.998931094758132e-06, |
| "loss": 0.6468, |
| "step": 149 |
| }, |
| { |
| "epoch": 0.16778523489932887, |
| "grad_norm": 0.32493966817855835, |
| "learning_rate": 4.998887024051565e-06, |
| "loss": 0.6741, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.16890380313199105, |
| "grad_norm": 0.3396671414375305, |
| "learning_rate": 4.998842063294613e-06, |
| "loss": 0.6703, |
| "step": 151 |
| }, |
| { |
| "epoch": 0.17002237136465326, |
| "grad_norm": 0.32851850986480713, |
| "learning_rate": 4.998796212503287e-06, |
| "loss": 0.6589, |
| "step": 152 |
| }, |
| { |
| "epoch": 0.17114093959731544, |
| "grad_norm": 0.33433711528778076, |
| "learning_rate": 4.99874947169392e-06, |
| "loss": 0.6594, |
| "step": 153 |
| }, |
| { |
| "epoch": 0.17225950782997762, |
| "grad_norm": 0.3388006091117859, |
| "learning_rate": 4.99870184088316e-06, |
| "loss": 0.6604, |
| "step": 154 |
| }, |
| { |
| "epoch": 0.17337807606263983, |
| "grad_norm": 0.31802693009376526, |
| "learning_rate": 4.998653320087971e-06, |
| "loss": 0.6467, |
| "step": 155 |
| }, |
| { |
| "epoch": 0.174496644295302, |
| "grad_norm": 0.33016613125801086, |
| "learning_rate": 4.998603909325636e-06, |
| "loss": 0.6599, |
| "step": 156 |
| }, |
| { |
| "epoch": 0.1756152125279642, |
| "grad_norm": 0.32546237111091614, |
| "learning_rate": 4.998553608613755e-06, |
| "loss": 0.6519, |
| "step": 157 |
| }, |
| { |
| "epoch": 0.1767337807606264, |
| "grad_norm": 0.3362942337989807, |
| "learning_rate": 4.998502417970242e-06, |
| "loss": 0.671, |
| "step": 158 |
| }, |
| { |
| "epoch": 0.17785234899328858, |
| "grad_norm": 0.33070167899131775, |
| "learning_rate": 4.998450337413331e-06, |
| "loss": 0.6624, |
| "step": 159 |
| }, |
| { |
| "epoch": 0.1789709172259508, |
| "grad_norm": 0.32430973649024963, |
| "learning_rate": 4.998397366961571e-06, |
| "loss": 0.6263, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.18008948545861297, |
| "grad_norm": 0.32481464743614197, |
| "learning_rate": 4.998343506633831e-06, |
| "loss": 0.6683, |
| "step": 161 |
| }, |
| { |
| "epoch": 0.18120805369127516, |
| "grad_norm": 0.33035483956336975, |
| "learning_rate": 4.998288756449292e-06, |
| "loss": 0.6816, |
| "step": 162 |
| }, |
| { |
| "epoch": 0.18232662192393737, |
| "grad_norm": 0.33188679814338684, |
| "learning_rate": 4.998233116427458e-06, |
| "loss": 0.6693, |
| "step": 163 |
| }, |
| { |
| "epoch": 0.18344519015659955, |
| "grad_norm": 0.33667680621147156, |
| "learning_rate": 4.998176586588145e-06, |
| "loss": 0.6619, |
| "step": 164 |
| }, |
| { |
| "epoch": 0.18456375838926176, |
| "grad_norm": 0.33836281299591064, |
| "learning_rate": 4.998119166951488e-06, |
| "loss": 0.6697, |
| "step": 165 |
| }, |
| { |
| "epoch": 0.18568232662192394, |
| "grad_norm": 0.31710004806518555, |
| "learning_rate": 4.998060857537938e-06, |
| "loss": 0.6386, |
| "step": 166 |
| }, |
| { |
| "epoch": 0.18680089485458612, |
| "grad_norm": 0.3220674395561218, |
| "learning_rate": 4.9980016583682655e-06, |
| "loss": 0.6477, |
| "step": 167 |
| }, |
| { |
| "epoch": 0.18791946308724833, |
| "grad_norm": 0.31624945998191833, |
| "learning_rate": 4.997941569463554e-06, |
| "loss": 0.6771, |
| "step": 168 |
| }, |
| { |
| "epoch": 0.1890380313199105, |
| "grad_norm": 0.33520039916038513, |
| "learning_rate": 4.997880590845208e-06, |
| "loss": 0.6777, |
| "step": 169 |
| }, |
| { |
| "epoch": 0.19015659955257272, |
| "grad_norm": 0.33738774061203003, |
| "learning_rate": 4.997818722534944e-06, |
| "loss": 0.6603, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.1912751677852349, |
| "grad_norm": 0.33408045768737793, |
| "learning_rate": 4.9977559645548e-06, |
| "loss": 0.6581, |
| "step": 171 |
| }, |
| { |
| "epoch": 0.19239373601789708, |
| "grad_norm": 0.3269501328468323, |
| "learning_rate": 4.997692316927129e-06, |
| "loss": 0.6623, |
| "step": 172 |
| }, |
| { |
| "epoch": 0.1935123042505593, |
| "grad_norm": 0.33073386549949646, |
| "learning_rate": 4.997627779674601e-06, |
| "loss": 0.6465, |
| "step": 173 |
| }, |
| { |
| "epoch": 0.19463087248322147, |
| "grad_norm": 0.33027294278144836, |
| "learning_rate": 4.997562352820201e-06, |
| "loss": 0.6795, |
| "step": 174 |
| }, |
| { |
| "epoch": 0.19574944071588368, |
| "grad_norm": 0.3329165577888489, |
| "learning_rate": 4.997496036387235e-06, |
| "loss": 0.6717, |
| "step": 175 |
| }, |
| { |
| "epoch": 0.19686800894854586, |
| "grad_norm": 0.3321872353553772, |
| "learning_rate": 4.997428830399322e-06, |
| "loss": 0.6415, |
| "step": 176 |
| }, |
| { |
| "epoch": 0.19798657718120805, |
| "grad_norm": 0.3222750723361969, |
| "learning_rate": 4.997360734880401e-06, |
| "loss": 0.657, |
| "step": 177 |
| }, |
| { |
| "epoch": 0.19910514541387025, |
| "grad_norm": 0.33294835686683655, |
| "learning_rate": 4.997291749854725e-06, |
| "loss": 0.6931, |
| "step": 178 |
| }, |
| { |
| "epoch": 0.20022371364653244, |
| "grad_norm": 0.3413322865962982, |
| "learning_rate": 4.997221875346863e-06, |
| "loss": 0.6761, |
| "step": 179 |
| }, |
| { |
| "epoch": 0.20134228187919462, |
| "grad_norm": 0.3300095796585083, |
| "learning_rate": 4.997151111381707e-06, |
| "loss": 0.6626, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.20246085011185683, |
| "grad_norm": 0.337289035320282, |
| "learning_rate": 4.997079457984459e-06, |
| "loss": 0.6861, |
| "step": 181 |
| }, |
| { |
| "epoch": 0.203579418344519, |
| "grad_norm": 0.3266119658946991, |
| "learning_rate": 4.997006915180642e-06, |
| "loss": 0.6687, |
| "step": 182 |
| }, |
| { |
| "epoch": 0.20469798657718122, |
| "grad_norm": 0.33044853806495667, |
| "learning_rate": 4.996933482996092e-06, |
| "loss": 0.6637, |
| "step": 183 |
| }, |
| { |
| "epoch": 0.2058165548098434, |
| "grad_norm": 0.33716171979904175, |
| "learning_rate": 4.996859161456965e-06, |
| "loss": 0.6644, |
| "step": 184 |
| }, |
| { |
| "epoch": 0.20693512304250558, |
| "grad_norm": 0.32554203271865845, |
| "learning_rate": 4.996783950589733e-06, |
| "loss": 0.6524, |
| "step": 185 |
| }, |
| { |
| "epoch": 0.2080536912751678, |
| "grad_norm": 0.3271404504776001, |
| "learning_rate": 4.996707850421184e-06, |
| "loss": 0.6581, |
| "step": 186 |
| }, |
| { |
| "epoch": 0.20917225950782997, |
| "grad_norm": 0.34464138746261597, |
| "learning_rate": 4.996630860978424e-06, |
| "loss": 0.6768, |
| "step": 187 |
| }, |
| { |
| "epoch": 0.21029082774049218, |
| "grad_norm": 0.3408767282962799, |
| "learning_rate": 4.996552982288875e-06, |
| "loss": 0.6556, |
| "step": 188 |
| }, |
| { |
| "epoch": 0.21140939597315436, |
| "grad_norm": 0.3375307023525238, |
| "learning_rate": 4.996474214380276e-06, |
| "loss": 0.6819, |
| "step": 189 |
| }, |
| { |
| "epoch": 0.21252796420581654, |
| "grad_norm": 0.3313542902469635, |
| "learning_rate": 4.99639455728068e-06, |
| "loss": 0.6483, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.21364653243847875, |
| "grad_norm": 0.3327822685241699, |
| "learning_rate": 4.996314011018462e-06, |
| "loss": 0.6669, |
| "step": 191 |
| }, |
| { |
| "epoch": 0.21476510067114093, |
| "grad_norm": 0.33021339774131775, |
| "learning_rate": 4.99623257562231e-06, |
| "loss": 0.6734, |
| "step": 192 |
| }, |
| { |
| "epoch": 0.21588366890380314, |
| "grad_norm": 0.32687169313430786, |
| "learning_rate": 4.996150251121229e-06, |
| "loss": 0.6387, |
| "step": 193 |
| }, |
| { |
| "epoch": 0.21700223713646533, |
| "grad_norm": 0.3394392728805542, |
| "learning_rate": 4.996067037544542e-06, |
| "loss": 0.6623, |
| "step": 194 |
| }, |
| { |
| "epoch": 0.2181208053691275, |
| "grad_norm": 0.33284223079681396, |
| "learning_rate": 4.995982934921887e-06, |
| "loss": 0.6405, |
| "step": 195 |
| }, |
| { |
| "epoch": 0.21923937360178972, |
| "grad_norm": 0.344235360622406, |
| "learning_rate": 4.995897943283221e-06, |
| "loss": 0.6741, |
| "step": 196 |
| }, |
| { |
| "epoch": 0.2203579418344519, |
| "grad_norm": 0.33437255024909973, |
| "learning_rate": 4.995812062658815e-06, |
| "loss": 0.6718, |
| "step": 197 |
| }, |
| { |
| "epoch": 0.2214765100671141, |
| "grad_norm": 0.3216111361980438, |
| "learning_rate": 4.995725293079257e-06, |
| "loss": 0.6709, |
| "step": 198 |
| }, |
| { |
| "epoch": 0.2225950782997763, |
| "grad_norm": 0.3448997139930725, |
| "learning_rate": 4.9956376345754556e-06, |
| "loss": 0.6458, |
| "step": 199 |
| }, |
| { |
| "epoch": 0.22371364653243847, |
| "grad_norm": 0.34354478120803833, |
| "learning_rate": 4.99554908717863e-06, |
| "loss": 0.6615, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.22483221476510068, |
| "grad_norm": 0.3417740762233734, |
| "learning_rate": 4.99545965092032e-06, |
| "loss": 0.6847, |
| "step": 201 |
| }, |
| { |
| "epoch": 0.22595078299776286, |
| "grad_norm": 0.3366676867008209, |
| "learning_rate": 4.99536932583238e-06, |
| "loss": 0.649, |
| "step": 202 |
| }, |
| { |
| "epoch": 0.22706935123042504, |
| "grad_norm": 0.3610089421272278, |
| "learning_rate": 4.995278111946983e-06, |
| "loss": 0.6616, |
| "step": 203 |
| }, |
| { |
| "epoch": 0.22818791946308725, |
| "grad_norm": 0.3359774053096771, |
| "learning_rate": 4.995186009296618e-06, |
| "loss": 0.6519, |
| "step": 204 |
| }, |
| { |
| "epoch": 0.22930648769574943, |
| "grad_norm": 0.34075963497161865, |
| "learning_rate": 4.9950930179140885e-06, |
| "loss": 0.6762, |
| "step": 205 |
| }, |
| { |
| "epoch": 0.23042505592841164, |
| "grad_norm": 0.32416507601737976, |
| "learning_rate": 4.994999137832517e-06, |
| "loss": 0.6499, |
| "step": 206 |
| }, |
| { |
| "epoch": 0.23154362416107382, |
| "grad_norm": 0.32749176025390625, |
| "learning_rate": 4.99490436908534e-06, |
| "loss": 0.645, |
| "step": 207 |
| }, |
| { |
| "epoch": 0.232662192393736, |
| "grad_norm": 0.3349708318710327, |
| "learning_rate": 4.994808711706314e-06, |
| "loss": 0.6676, |
| "step": 208 |
| }, |
| { |
| "epoch": 0.23378076062639822, |
| "grad_norm": 0.3491227328777313, |
| "learning_rate": 4.9947121657295094e-06, |
| "loss": 0.6287, |
| "step": 209 |
| }, |
| { |
| "epoch": 0.2348993288590604, |
| "grad_norm": 0.3400874733924866, |
| "learning_rate": 4.994614731189314e-06, |
| "loss": 0.6473, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.2360178970917226, |
| "grad_norm": 0.3388952612876892, |
| "learning_rate": 4.994516408120432e-06, |
| "loss": 0.6821, |
| "step": 211 |
| }, |
| { |
| "epoch": 0.2371364653243848, |
| "grad_norm": 0.33224812150001526, |
| "learning_rate": 4.994417196557884e-06, |
| "loss": 0.649, |
| "step": 212 |
| }, |
| { |
| "epoch": 0.23825503355704697, |
| "grad_norm": 0.3307199478149414, |
| "learning_rate": 4.994317096537006e-06, |
| "loss": 0.6581, |
| "step": 213 |
| }, |
| { |
| "epoch": 0.23937360178970918, |
| "grad_norm": 0.3505164682865143, |
| "learning_rate": 4.994216108093452e-06, |
| "loss": 0.6498, |
| "step": 214 |
| }, |
| { |
| "epoch": 0.24049217002237136, |
| "grad_norm": 0.3284938335418701, |
| "learning_rate": 4.994114231263193e-06, |
| "loss": 0.6503, |
| "step": 215 |
| }, |
| { |
| "epoch": 0.24161073825503357, |
| "grad_norm": 0.3475011885166168, |
| "learning_rate": 4.994011466082514e-06, |
| "loss": 0.6724, |
| "step": 216 |
| }, |
| { |
| "epoch": 0.24272930648769575, |
| "grad_norm": 0.34667858481407166, |
| "learning_rate": 4.993907812588019e-06, |
| "loss": 0.6373, |
| "step": 217 |
| }, |
| { |
| "epoch": 0.24384787472035793, |
| "grad_norm": 0.34276899695396423, |
| "learning_rate": 4.993803270816627e-06, |
| "loss": 0.6513, |
| "step": 218 |
| }, |
| { |
| "epoch": 0.24496644295302014, |
| "grad_norm": 0.34308409690856934, |
| "learning_rate": 4.993697840805572e-06, |
| "loss": 0.6596, |
| "step": 219 |
| }, |
| { |
| "epoch": 0.24608501118568232, |
| "grad_norm": 0.33108261227607727, |
| "learning_rate": 4.9935915225924075e-06, |
| "loss": 0.6623, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.24720357941834453, |
| "grad_norm": 0.3529888093471527, |
| "learning_rate": 4.9934843162150015e-06, |
| "loss": 0.658, |
| "step": 221 |
| }, |
| { |
| "epoch": 0.2483221476510067, |
| "grad_norm": 0.3457166850566864, |
| "learning_rate": 4.993376221711538e-06, |
| "loss": 0.6342, |
| "step": 222 |
| }, |
| { |
| "epoch": 0.2494407158836689, |
| "grad_norm": 0.35108813643455505, |
| "learning_rate": 4.993267239120519e-06, |
| "loss": 0.6325, |
| "step": 223 |
| }, |
| { |
| "epoch": 0.2505592841163311, |
| "grad_norm": 0.3448682129383087, |
| "learning_rate": 4.993157368480761e-06, |
| "loss": 0.6746, |
| "step": 224 |
| }, |
| { |
| "epoch": 0.2516778523489933, |
| "grad_norm": 0.34094589948654175, |
| "learning_rate": 4.993046609831397e-06, |
| "loss": 0.6313, |
| "step": 225 |
| }, |
| { |
| "epoch": 0.25279642058165547, |
| "grad_norm": 0.33934327960014343, |
| "learning_rate": 4.9929349632118785e-06, |
| "loss": 0.6371, |
| "step": 226 |
| }, |
| { |
| "epoch": 0.2539149888143177, |
| "grad_norm": 0.3517382740974426, |
| "learning_rate": 4.99282242866197e-06, |
| "loss": 0.6411, |
| "step": 227 |
| }, |
| { |
| "epoch": 0.2550335570469799, |
| "grad_norm": 0.34098172187805176, |
| "learning_rate": 4.992709006221755e-06, |
| "loss": 0.6648, |
| "step": 228 |
| }, |
| { |
| "epoch": 0.25615212527964204, |
| "grad_norm": 0.3397183120250702, |
| "learning_rate": 4.992594695931632e-06, |
| "loss": 0.6038, |
| "step": 229 |
| }, |
| { |
| "epoch": 0.25727069351230425, |
| "grad_norm": 0.35994404554367065, |
| "learning_rate": 4.992479497832316e-06, |
| "loss": 0.6832, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.25838926174496646, |
| "grad_norm": 0.3505656123161316, |
| "learning_rate": 4.992363411964838e-06, |
| "loss": 0.682, |
| "step": 231 |
| }, |
| { |
| "epoch": 0.2595078299776286, |
| "grad_norm": 0.343159556388855, |
| "learning_rate": 4.992246438370545e-06, |
| "loss": 0.6597, |
| "step": 232 |
| }, |
| { |
| "epoch": 0.2606263982102908, |
| "grad_norm": 0.36491280794143677, |
| "learning_rate": 4.9921285770911e-06, |
| "loss": 0.6422, |
| "step": 233 |
| }, |
| { |
| "epoch": 0.26174496644295303, |
| "grad_norm": 0.3656606078147888, |
| "learning_rate": 4.992009828168484e-06, |
| "loss": 0.6988, |
| "step": 234 |
| }, |
| { |
| "epoch": 0.26286353467561524, |
| "grad_norm": 0.3348519206047058, |
| "learning_rate": 4.991890191644993e-06, |
| "loss": 0.6281, |
| "step": 235 |
| }, |
| { |
| "epoch": 0.2639821029082774, |
| "grad_norm": 0.3407367467880249, |
| "learning_rate": 4.991769667563237e-06, |
| "loss": 0.6487, |
| "step": 236 |
| }, |
| { |
| "epoch": 0.2651006711409396, |
| "grad_norm": 0.3644556999206543, |
| "learning_rate": 4.991648255966145e-06, |
| "loss": 0.6443, |
| "step": 237 |
| }, |
| { |
| "epoch": 0.2662192393736018, |
| "grad_norm": 0.3346659243106842, |
| "learning_rate": 4.991525956896962e-06, |
| "loss": 0.632, |
| "step": 238 |
| }, |
| { |
| "epoch": 0.26733780760626397, |
| "grad_norm": 0.36535120010375977, |
| "learning_rate": 4.991402770399249e-06, |
| "loss": 0.6347, |
| "step": 239 |
| }, |
| { |
| "epoch": 0.2684563758389262, |
| "grad_norm": 0.34253549575805664, |
| "learning_rate": 4.991278696516879e-06, |
| "loss": 0.6946, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.2695749440715884, |
| "grad_norm": 0.35125476121902466, |
| "learning_rate": 4.9911537352940485e-06, |
| "loss": 0.6669, |
| "step": 241 |
| }, |
| { |
| "epoch": 0.27069351230425054, |
| "grad_norm": 0.35836276412010193, |
| "learning_rate": 4.991027886775264e-06, |
| "loss": 0.6534, |
| "step": 242 |
| }, |
| { |
| "epoch": 0.27181208053691275, |
| "grad_norm": 0.34344252943992615, |
| "learning_rate": 4.990901151005349e-06, |
| "loss": 0.6595, |
| "step": 243 |
| }, |
| { |
| "epoch": 0.27293064876957496, |
| "grad_norm": 0.35524797439575195, |
| "learning_rate": 4.9907735280294465e-06, |
| "loss": 0.6612, |
| "step": 244 |
| }, |
| { |
| "epoch": 0.2740492170022371, |
| "grad_norm": 0.3483973741531372, |
| "learning_rate": 4.990645017893013e-06, |
| "loss": 0.6694, |
| "step": 245 |
| }, |
| { |
| "epoch": 0.2751677852348993, |
| "grad_norm": 0.34605199098587036, |
| "learning_rate": 4.990515620641819e-06, |
| "loss": 0.6453, |
| "step": 246 |
| }, |
| { |
| "epoch": 0.27628635346756153, |
| "grad_norm": 0.3441944122314453, |
| "learning_rate": 4.990385336321954e-06, |
| "loss": 0.6356, |
| "step": 247 |
| }, |
| { |
| "epoch": 0.27740492170022374, |
| "grad_norm": 0.36233291029930115, |
| "learning_rate": 4.990254164979823e-06, |
| "loss": 0.673, |
| "step": 248 |
| }, |
| { |
| "epoch": 0.2785234899328859, |
| "grad_norm": 0.3624320328235626, |
| "learning_rate": 4.990122106662145e-06, |
| "loss": 0.6459, |
| "step": 249 |
| }, |
| { |
| "epoch": 0.2796420581655481, |
| "grad_norm": 0.3635922372341156, |
| "learning_rate": 4.989989161415959e-06, |
| "loss": 0.6552, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.2807606263982103, |
| "grad_norm": 0.3370678424835205, |
| "learning_rate": 4.989855329288615e-06, |
| "loss": 0.6098, |
| "step": 251 |
| }, |
| { |
| "epoch": 0.28187919463087246, |
| "grad_norm": 0.35488393902778625, |
| "learning_rate": 4.989720610327782e-06, |
| "loss": 0.6554, |
| "step": 252 |
| }, |
| { |
| "epoch": 0.2829977628635347, |
| "grad_norm": 0.3495752513408661, |
| "learning_rate": 4.989585004581444e-06, |
| "loss": 0.6339, |
| "step": 253 |
| }, |
| { |
| "epoch": 0.2841163310961969, |
| "grad_norm": 0.3451905846595764, |
| "learning_rate": 4.989448512097901e-06, |
| "loss": 0.6954, |
| "step": 254 |
| }, |
| { |
| "epoch": 0.28523489932885904, |
| "grad_norm": 0.3532280921936035, |
| "learning_rate": 4.989311132925768e-06, |
| "loss": 0.6198, |
| "step": 255 |
| }, |
| { |
| "epoch": 0.28635346756152125, |
| "grad_norm": 0.3570882976055145, |
| "learning_rate": 4.989172867113976e-06, |
| "loss": 0.6492, |
| "step": 256 |
| }, |
| { |
| "epoch": 0.28747203579418346, |
| "grad_norm": 0.33201339840888977, |
| "learning_rate": 4.9890337147117755e-06, |
| "loss": 0.6324, |
| "step": 257 |
| }, |
| { |
| "epoch": 0.28859060402684567, |
| "grad_norm": 0.3354071080684662, |
| "learning_rate": 4.988893675768726e-06, |
| "loss": 0.628, |
| "step": 258 |
| }, |
| { |
| "epoch": 0.2897091722595078, |
| "grad_norm": 0.341133713722229, |
| "learning_rate": 4.988752750334708e-06, |
| "loss": 0.6316, |
| "step": 259 |
| }, |
| { |
| "epoch": 0.29082774049217003, |
| "grad_norm": 0.33613571524620056, |
| "learning_rate": 4.9886109384599165e-06, |
| "loss": 0.6401, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.29194630872483224, |
| "grad_norm": 0.3657302260398865, |
| "learning_rate": 4.988468240194861e-06, |
| "loss": 0.6743, |
| "step": 261 |
| }, |
| { |
| "epoch": 0.2930648769574944, |
| "grad_norm": 0.3349529504776001, |
| "learning_rate": 4.988324655590369e-06, |
| "loss": 0.6121, |
| "step": 262 |
| }, |
| { |
| "epoch": 0.2941834451901566, |
| "grad_norm": 0.34364601969718933, |
| "learning_rate": 4.98818018469758e-06, |
| "loss": 0.6427, |
| "step": 263 |
| }, |
| { |
| "epoch": 0.2953020134228188, |
| "grad_norm": 0.35895341634750366, |
| "learning_rate": 4.988034827567953e-06, |
| "loss": 0.6913, |
| "step": 264 |
| }, |
| { |
| "epoch": 0.29642058165548096, |
| "grad_norm": 0.3571792244911194, |
| "learning_rate": 4.987888584253262e-06, |
| "loss": 0.6286, |
| "step": 265 |
| }, |
| { |
| "epoch": 0.2975391498881432, |
| "grad_norm": 0.3684253394603729, |
| "learning_rate": 4.987741454805594e-06, |
| "loss": 0.6365, |
| "step": 266 |
| }, |
| { |
| "epoch": 0.2986577181208054, |
| "grad_norm": 0.3521265983581543, |
| "learning_rate": 4.987593439277353e-06, |
| "loss": 0.6172, |
| "step": 267 |
| }, |
| { |
| "epoch": 0.29977628635346754, |
| "grad_norm": 0.34943872690200806, |
| "learning_rate": 4.98744453772126e-06, |
| "loss": 0.6382, |
| "step": 268 |
| }, |
| { |
| "epoch": 0.30089485458612975, |
| "grad_norm": 0.35075268149375916, |
| "learning_rate": 4.9872947501903515e-06, |
| "loss": 0.6497, |
| "step": 269 |
| }, |
| { |
| "epoch": 0.30201342281879195, |
| "grad_norm": 0.3558429479598999, |
| "learning_rate": 4.987144076737978e-06, |
| "loss": 0.6561, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.30313199105145416, |
| "grad_norm": 0.35898861289024353, |
| "learning_rate": 4.986992517417805e-06, |
| "loss": 0.6613, |
| "step": 271 |
| }, |
| { |
| "epoch": 0.3042505592841163, |
| "grad_norm": 0.35135966539382935, |
| "learning_rate": 4.986840072283815e-06, |
| "loss": 0.6507, |
| "step": 272 |
| }, |
| { |
| "epoch": 0.3053691275167785, |
| "grad_norm": 0.3530326783657074, |
| "learning_rate": 4.986686741390308e-06, |
| "loss": 0.6459, |
| "step": 273 |
| }, |
| { |
| "epoch": 0.30648769574944074, |
| "grad_norm": 0.34631597995758057, |
| "learning_rate": 4.986532524791894e-06, |
| "loss": 0.6074, |
| "step": 274 |
| }, |
| { |
| "epoch": 0.3076062639821029, |
| "grad_norm": 0.37036123871803284, |
| "learning_rate": 4.986377422543503e-06, |
| "loss": 0.6416, |
| "step": 275 |
| }, |
| { |
| "epoch": 0.3087248322147651, |
| "grad_norm": 0.3576701283454895, |
| "learning_rate": 4.98622143470038e-06, |
| "loss": 0.5939, |
| "step": 276 |
| }, |
| { |
| "epoch": 0.3098434004474273, |
| "grad_norm": 0.35285863280296326, |
| "learning_rate": 4.986064561318083e-06, |
| "loss": 0.6405, |
| "step": 277 |
| }, |
| { |
| "epoch": 0.31096196868008946, |
| "grad_norm": 0.35576099157333374, |
| "learning_rate": 4.985906802452488e-06, |
| "loss": 0.6348, |
| "step": 278 |
| }, |
| { |
| "epoch": 0.31208053691275167, |
| "grad_norm": 0.35739952325820923, |
| "learning_rate": 4.985748158159785e-06, |
| "loss": 0.65, |
| "step": 279 |
| }, |
| { |
| "epoch": 0.3131991051454139, |
| "grad_norm": 0.35885411500930786, |
| "learning_rate": 4.985588628496481e-06, |
| "loss": 0.6575, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.3143176733780761, |
| "grad_norm": 0.36760038137435913, |
| "learning_rate": 4.985428213519396e-06, |
| "loss": 0.6606, |
| "step": 281 |
| }, |
| { |
| "epoch": 0.31543624161073824, |
| "grad_norm": 0.35371875762939453, |
| "learning_rate": 4.9852669132856645e-06, |
| "loss": 0.6495, |
| "step": 282 |
| }, |
| { |
| "epoch": 0.31655480984340045, |
| "grad_norm": 0.3489198684692383, |
| "learning_rate": 4.985104727852741e-06, |
| "loss": 0.6402, |
| "step": 283 |
| }, |
| { |
| "epoch": 0.31767337807606266, |
| "grad_norm": 0.3595716953277588, |
| "learning_rate": 4.984941657278392e-06, |
| "loss": 0.6495, |
| "step": 284 |
| }, |
| { |
| "epoch": 0.3187919463087248, |
| "grad_norm": 0.3551527261734009, |
| "learning_rate": 4.984777701620698e-06, |
| "loss": 0.6555, |
| "step": 285 |
| }, |
| { |
| "epoch": 0.319910514541387, |
| "grad_norm": 0.35428524017333984, |
| "learning_rate": 4.984612860938059e-06, |
| "loss": 0.6435, |
| "step": 286 |
| }, |
| { |
| "epoch": 0.32102908277404923, |
| "grad_norm": 0.36495980620384216, |
| "learning_rate": 4.984447135289185e-06, |
| "loss": 0.6375, |
| "step": 287 |
| }, |
| { |
| "epoch": 0.3221476510067114, |
| "grad_norm": 0.36956459283828735, |
| "learning_rate": 4.984280524733107e-06, |
| "loss": 0.654, |
| "step": 288 |
| }, |
| { |
| "epoch": 0.3232662192393736, |
| "grad_norm": 0.34770330786705017, |
| "learning_rate": 4.984113029329166e-06, |
| "loss": 0.6313, |
| "step": 289 |
| }, |
| { |
| "epoch": 0.3243847874720358, |
| "grad_norm": 0.3676060140132904, |
| "learning_rate": 4.9839446491370215e-06, |
| "loss": 0.6697, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.32550335570469796, |
| "grad_norm": 0.3490992486476898, |
| "learning_rate": 4.983775384216646e-06, |
| "loss": 0.6343, |
| "step": 291 |
| }, |
| { |
| "epoch": 0.32662192393736017, |
| "grad_norm": 0.36720773577690125, |
| "learning_rate": 4.983605234628328e-06, |
| "loss": 0.6609, |
| "step": 292 |
| }, |
| { |
| "epoch": 0.3277404921700224, |
| "grad_norm": 0.3619595766067505, |
| "learning_rate": 4.983434200432672e-06, |
| "loss": 0.6635, |
| "step": 293 |
| }, |
| { |
| "epoch": 0.3288590604026846, |
| "grad_norm": 0.35261741280555725, |
| "learning_rate": 4.983262281690596e-06, |
| "loss": 0.6273, |
| "step": 294 |
| }, |
| { |
| "epoch": 0.32997762863534674, |
| "grad_norm": 0.34182801842689514, |
| "learning_rate": 4.983089478463335e-06, |
| "loss": 0.6271, |
| "step": 295 |
| }, |
| { |
| "epoch": 0.33109619686800895, |
| "grad_norm": 0.3623373806476593, |
| "learning_rate": 4.982915790812436e-06, |
| "loss": 0.6491, |
| "step": 296 |
| }, |
| { |
| "epoch": 0.33221476510067116, |
| "grad_norm": 0.3656613826751709, |
| "learning_rate": 4.982741218799763e-06, |
| "loss": 0.6672, |
| "step": 297 |
| }, |
| { |
| "epoch": 0.3333333333333333, |
| "grad_norm": 0.3533206880092621, |
| "learning_rate": 4.982565762487498e-06, |
| "loss": 0.6257, |
| "step": 298 |
| }, |
| { |
| "epoch": 0.3344519015659955, |
| "grad_norm": 0.35345301032066345, |
| "learning_rate": 4.982389421938131e-06, |
| "loss": 0.6486, |
| "step": 299 |
| }, |
| { |
| "epoch": 0.33557046979865773, |
| "grad_norm": 0.36566492915153503, |
| "learning_rate": 4.982212197214472e-06, |
| "loss": 0.6411, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.3366890380313199, |
| "grad_norm": 0.3681536018848419, |
| "learning_rate": 4.982034088379646e-06, |
| "loss": 0.6208, |
| "step": 301 |
| }, |
| { |
| "epoch": 0.3378076062639821, |
| "grad_norm": 0.3649962246417999, |
| "learning_rate": 4.98185509549709e-06, |
| "loss": 0.6557, |
| "step": 302 |
| }, |
| { |
| "epoch": 0.3389261744966443, |
| "grad_norm": 0.350563108921051, |
| "learning_rate": 4.981675218630557e-06, |
| "loss": 0.6361, |
| "step": 303 |
| }, |
| { |
| "epoch": 0.3400447427293065, |
| "grad_norm": 0.35432034730911255, |
| "learning_rate": 4.981494457844117e-06, |
| "loss": 0.6294, |
| "step": 304 |
| }, |
| { |
| "epoch": 0.34116331096196867, |
| "grad_norm": 0.3590239882469177, |
| "learning_rate": 4.981312813202153e-06, |
| "loss": 0.6318, |
| "step": 305 |
| }, |
| { |
| "epoch": 0.3422818791946309, |
| "grad_norm": 0.3664059042930603, |
| "learning_rate": 4.981130284769361e-06, |
| "loss": 0.648, |
| "step": 306 |
| }, |
| { |
| "epoch": 0.3434004474272931, |
| "grad_norm": 0.35878944396972656, |
| "learning_rate": 4.9809468726107555e-06, |
| "loss": 0.619, |
| "step": 307 |
| }, |
| { |
| "epoch": 0.34451901565995524, |
| "grad_norm": 0.35763007402420044, |
| "learning_rate": 4.980762576791664e-06, |
| "loss": 0.6567, |
| "step": 308 |
| }, |
| { |
| "epoch": 0.34563758389261745, |
| "grad_norm": 0.35878288745880127, |
| "learning_rate": 4.980577397377728e-06, |
| "loss": 0.6421, |
| "step": 309 |
| }, |
| { |
| "epoch": 0.34675615212527966, |
| "grad_norm": 0.3597519099712372, |
| "learning_rate": 4.980391334434906e-06, |
| "loss": 0.6352, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.3478747203579418, |
| "grad_norm": 0.3498169481754303, |
| "learning_rate": 4.980204388029466e-06, |
| "loss": 0.626, |
| "step": 311 |
| }, |
| { |
| "epoch": 0.348993288590604, |
| "grad_norm": 0.36353105306625366, |
| "learning_rate": 4.980016558227998e-06, |
| "loss": 0.6513, |
| "step": 312 |
| }, |
| { |
| "epoch": 0.35011185682326623, |
| "grad_norm": 0.355794221162796, |
| "learning_rate": 4.979827845097402e-06, |
| "loss": 0.6385, |
| "step": 313 |
| }, |
| { |
| "epoch": 0.3512304250559284, |
| "grad_norm": 0.3594406545162201, |
| "learning_rate": 4.979638248704894e-06, |
| "loss": 0.6134, |
| "step": 314 |
| }, |
| { |
| "epoch": 0.3523489932885906, |
| "grad_norm": 0.3639025092124939, |
| "learning_rate": 4.979447769118002e-06, |
| "loss": 0.6386, |
| "step": 315 |
| }, |
| { |
| "epoch": 0.3534675615212528, |
| "grad_norm": 0.3681359887123108, |
| "learning_rate": 4.979256406404574e-06, |
| "loss": 0.6213, |
| "step": 316 |
| }, |
| { |
| "epoch": 0.354586129753915, |
| "grad_norm": 0.35021546483039856, |
| "learning_rate": 4.979064160632766e-06, |
| "loss": 0.5933, |
| "step": 317 |
| }, |
| { |
| "epoch": 0.35570469798657717, |
| "grad_norm": 0.35783469676971436, |
| "learning_rate": 4.978871031871054e-06, |
| "loss": 0.6054, |
| "step": 318 |
| }, |
| { |
| "epoch": 0.3568232662192394, |
| "grad_norm": 0.3768575191497803, |
| "learning_rate": 4.978677020188226e-06, |
| "loss": 0.651, |
| "step": 319 |
| }, |
| { |
| "epoch": 0.3579418344519016, |
| "grad_norm": 0.3841581642627716, |
| "learning_rate": 4.978482125653385e-06, |
| "loss": 0.6447, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.35906040268456374, |
| "grad_norm": 0.3749678134918213, |
| "learning_rate": 4.978286348335949e-06, |
| "loss": 0.6403, |
| "step": 321 |
| }, |
| { |
| "epoch": 0.36017897091722595, |
| "grad_norm": 0.35757121443748474, |
| "learning_rate": 4.978089688305647e-06, |
| "loss": 0.6297, |
| "step": 322 |
| }, |
| { |
| "epoch": 0.36129753914988816, |
| "grad_norm": 0.36303797364234924, |
| "learning_rate": 4.977892145632528e-06, |
| "loss": 0.6438, |
| "step": 323 |
| }, |
| { |
| "epoch": 0.3624161073825503, |
| "grad_norm": 0.3670295476913452, |
| "learning_rate": 4.977693720386951e-06, |
| "loss": 0.6055, |
| "step": 324 |
| }, |
| { |
| "epoch": 0.3635346756152125, |
| "grad_norm": 0.3521486818790436, |
| "learning_rate": 4.977494412639591e-06, |
| "loss": 0.6072, |
| "step": 325 |
| }, |
| { |
| "epoch": 0.36465324384787473, |
| "grad_norm": 0.35688498616218567, |
| "learning_rate": 4.9772942224614375e-06, |
| "loss": 0.6252, |
| "step": 326 |
| }, |
| { |
| "epoch": 0.36577181208053694, |
| "grad_norm": 0.3542022109031677, |
| "learning_rate": 4.9770931499237925e-06, |
| "loss": 0.6407, |
| "step": 327 |
| }, |
| { |
| "epoch": 0.3668903803131991, |
| "grad_norm": 0.3773319125175476, |
| "learning_rate": 4.976891195098277e-06, |
| "loss": 0.6524, |
| "step": 328 |
| }, |
| { |
| "epoch": 0.3680089485458613, |
| "grad_norm": 0.3697628080844879, |
| "learning_rate": 4.97668835805682e-06, |
| "loss": 0.6503, |
| "step": 329 |
| }, |
| { |
| "epoch": 0.3691275167785235, |
| "grad_norm": 0.36458712816238403, |
| "learning_rate": 4.976484638871669e-06, |
| "loss": 0.6722, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.37024608501118567, |
| "grad_norm": 0.3523670434951782, |
| "learning_rate": 4.976280037615385e-06, |
| "loss": 0.6273, |
| "step": 331 |
| }, |
| { |
| "epoch": 0.3713646532438479, |
| "grad_norm": 0.35473543405532837, |
| "learning_rate": 4.9760745543608414e-06, |
| "loss": 0.6243, |
| "step": 332 |
| }, |
| { |
| "epoch": 0.3724832214765101, |
| "grad_norm": 0.36719077825546265, |
| "learning_rate": 4.9758681891812276e-06, |
| "loss": 0.6476, |
| "step": 333 |
| }, |
| { |
| "epoch": 0.37360178970917224, |
| "grad_norm": 0.3712293207645416, |
| "learning_rate": 4.9756609421500464e-06, |
| "loss": 0.6475, |
| "step": 334 |
| }, |
| { |
| "epoch": 0.37472035794183445, |
| "grad_norm": 0.36749109625816345, |
| "learning_rate": 4.9754528133411144e-06, |
| "loss": 0.6428, |
| "step": 335 |
| }, |
| { |
| "epoch": 0.37583892617449666, |
| "grad_norm": 0.35749316215515137, |
| "learning_rate": 4.975243802828563e-06, |
| "loss": 0.6123, |
| "step": 336 |
| }, |
| { |
| "epoch": 0.3769574944071588, |
| "grad_norm": 0.37466734647750854, |
| "learning_rate": 4.975033910686837e-06, |
| "loss": 0.6393, |
| "step": 337 |
| }, |
| { |
| "epoch": 0.378076062639821, |
| "grad_norm": 0.36750441789627075, |
| "learning_rate": 4.974823136990697e-06, |
| "loss": 0.6405, |
| "step": 338 |
| }, |
| { |
| "epoch": 0.37919463087248323, |
| "grad_norm": 0.3831922113895416, |
| "learning_rate": 4.9746114818152135e-06, |
| "loss": 0.6633, |
| "step": 339 |
| }, |
| { |
| "epoch": 0.38031319910514544, |
| "grad_norm": 0.35679274797439575, |
| "learning_rate": 4.974398945235776e-06, |
| "loss": 0.6431, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.3814317673378076, |
| "grad_norm": 0.36524152755737305, |
| "learning_rate": 4.974185527328084e-06, |
| "loss": 0.6419, |
| "step": 341 |
| }, |
| { |
| "epoch": 0.3825503355704698, |
| "grad_norm": 0.3668903410434723, |
| "learning_rate": 4.9739712281681525e-06, |
| "loss": 0.6418, |
| "step": 342 |
| }, |
| { |
| "epoch": 0.383668903803132, |
| "grad_norm": 0.37841862440109253, |
| "learning_rate": 4.973756047832312e-06, |
| "loss": 0.6585, |
| "step": 343 |
| }, |
| { |
| "epoch": 0.38478747203579416, |
| "grad_norm": 0.37758868932724, |
| "learning_rate": 4.9735399863972024e-06, |
| "loss": 0.6493, |
| "step": 344 |
| }, |
| { |
| "epoch": 0.3859060402684564, |
| "grad_norm": 0.3663494288921356, |
| "learning_rate": 4.973323043939783e-06, |
| "loss": 0.6728, |
| "step": 345 |
| }, |
| { |
| "epoch": 0.3870246085011186, |
| "grad_norm": 0.3930216431617737, |
| "learning_rate": 4.973105220537322e-06, |
| "loss": 0.6608, |
| "step": 346 |
| }, |
| { |
| "epoch": 0.38814317673378074, |
| "grad_norm": 0.390828400850296, |
| "learning_rate": 4.972886516267404e-06, |
| "loss": 0.6497, |
| "step": 347 |
| }, |
| { |
| "epoch": 0.38926174496644295, |
| "grad_norm": 0.37600764632225037, |
| "learning_rate": 4.972666931207927e-06, |
| "loss": 0.6426, |
| "step": 348 |
| }, |
| { |
| "epoch": 0.39038031319910516, |
| "grad_norm": 0.36520275473594666, |
| "learning_rate": 4.972446465437103e-06, |
| "loss": 0.645, |
| "step": 349 |
| }, |
| { |
| "epoch": 0.39149888143176736, |
| "grad_norm": 0.3984422981739044, |
| "learning_rate": 4.972225119033457e-06, |
| "loss": 0.6368, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.3926174496644295, |
| "grad_norm": 0.3725559711456299, |
| "learning_rate": 4.972002892075827e-06, |
| "loss": 0.625, |
| "step": 351 |
| }, |
| { |
| "epoch": 0.39373601789709173, |
| "grad_norm": 0.3889387547969818, |
| "learning_rate": 4.9717797846433655e-06, |
| "loss": 0.6258, |
| "step": 352 |
| }, |
| { |
| "epoch": 0.39485458612975394, |
| "grad_norm": 0.37537676095962524, |
| "learning_rate": 4.97155579681554e-06, |
| "loss": 0.64, |
| "step": 353 |
| }, |
| { |
| "epoch": 0.3959731543624161, |
| "grad_norm": 0.3795606791973114, |
| "learning_rate": 4.97133092867213e-06, |
| "loss": 0.6849, |
| "step": 354 |
| }, |
| { |
| "epoch": 0.3970917225950783, |
| "grad_norm": 0.38519197702407837, |
| "learning_rate": 4.971105180293228e-06, |
| "loss": 0.6493, |
| "step": 355 |
| }, |
| { |
| "epoch": 0.3982102908277405, |
| "grad_norm": 0.3789883553981781, |
| "learning_rate": 4.97087855175924e-06, |
| "loss": 0.6281, |
| "step": 356 |
| }, |
| { |
| "epoch": 0.39932885906040266, |
| "grad_norm": 0.3687867820262909, |
| "learning_rate": 4.970651043150887e-06, |
| "loss": 0.6278, |
| "step": 357 |
| }, |
| { |
| "epoch": 0.4004474272930649, |
| "grad_norm": 0.365581214427948, |
| "learning_rate": 4.970422654549204e-06, |
| "loss": 0.647, |
| "step": 358 |
| }, |
| { |
| "epoch": 0.4015659955257271, |
| "grad_norm": 0.37871256470680237, |
| "learning_rate": 4.970193386035537e-06, |
| "loss": 0.6349, |
| "step": 359 |
| }, |
| { |
| "epoch": 0.40268456375838924, |
| "grad_norm": 0.37639445066452026, |
| "learning_rate": 4.969963237691547e-06, |
| "loss": 0.6544, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.40380313199105144, |
| "grad_norm": 0.38033226132392883, |
| "learning_rate": 4.9697322095992075e-06, |
| "loss": 0.6216, |
| "step": 361 |
| }, |
| { |
| "epoch": 0.40492170022371365, |
| "grad_norm": 0.3785533010959625, |
| "learning_rate": 4.969500301840805e-06, |
| "loss": 0.6379, |
| "step": 362 |
| }, |
| { |
| "epoch": 0.40604026845637586, |
| "grad_norm": 0.36831173300743103, |
| "learning_rate": 4.969267514498942e-06, |
| "loss": 0.6305, |
| "step": 363 |
| }, |
| { |
| "epoch": 0.407158836689038, |
| "grad_norm": 0.3860856592655182, |
| "learning_rate": 4.969033847656531e-06, |
| "loss": 0.6428, |
| "step": 364 |
| }, |
| { |
| "epoch": 0.4082774049217002, |
| "grad_norm": 0.37992534041404724, |
| "learning_rate": 4.9687993013968e-06, |
| "loss": 0.629, |
| "step": 365 |
| }, |
| { |
| "epoch": 0.40939597315436244, |
| "grad_norm": 0.37179872393608093, |
| "learning_rate": 4.9685638758032885e-06, |
| "loss": 0.6146, |
| "step": 366 |
| }, |
| { |
| "epoch": 0.4105145413870246, |
| "grad_norm": 0.3762771487236023, |
| "learning_rate": 4.96832757095985e-06, |
| "loss": 0.6294, |
| "step": 367 |
| }, |
| { |
| "epoch": 0.4116331096196868, |
| "grad_norm": 0.37078356742858887, |
| "learning_rate": 4.968090386950653e-06, |
| "loss": 0.6438, |
| "step": 368 |
| }, |
| { |
| "epoch": 0.412751677852349, |
| "grad_norm": 0.3619535565376282, |
| "learning_rate": 4.967852323860176e-06, |
| "loss": 0.6229, |
| "step": 369 |
| }, |
| { |
| "epoch": 0.41387024608501116, |
| "grad_norm": 0.3610592782497406, |
| "learning_rate": 4.967613381773211e-06, |
| "loss": 0.6332, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.41498881431767337, |
| "grad_norm": 0.36372244358062744, |
| "learning_rate": 4.9673735607748665e-06, |
| "loss": 0.6379, |
| "step": 371 |
| }, |
| { |
| "epoch": 0.4161073825503356, |
| "grad_norm": 0.3713506758213043, |
| "learning_rate": 4.96713286095056e-06, |
| "loss": 0.6051, |
| "step": 372 |
| }, |
| { |
| "epoch": 0.4172259507829978, |
| "grad_norm": 0.37290191650390625, |
| "learning_rate": 4.9668912823860244e-06, |
| "loss": 0.6431, |
| "step": 373 |
| }, |
| { |
| "epoch": 0.41834451901565994, |
| "grad_norm": 0.3736407458782196, |
| "learning_rate": 4.966648825167305e-06, |
| "loss": 0.6296, |
| "step": 374 |
| }, |
| { |
| "epoch": 0.41946308724832215, |
| "grad_norm": 0.38261404633522034, |
| "learning_rate": 4.9664054893807586e-06, |
| "loss": 0.6559, |
| "step": 375 |
| }, |
| { |
| "epoch": 0.42058165548098436, |
| "grad_norm": 0.36865612864494324, |
| "learning_rate": 4.966161275113057e-06, |
| "loss": 0.6372, |
| "step": 376 |
| }, |
| { |
| "epoch": 0.4217002237136465, |
| "grad_norm": 0.3745094835758209, |
| "learning_rate": 4.965916182451185e-06, |
| "loss": 0.6526, |
| "step": 377 |
| }, |
| { |
| "epoch": 0.4228187919463087, |
| "grad_norm": 0.3758225440979004, |
| "learning_rate": 4.965670211482437e-06, |
| "loss": 0.6423, |
| "step": 378 |
| }, |
| { |
| "epoch": 0.42393736017897093, |
| "grad_norm": 0.37716934084892273, |
| "learning_rate": 4.965423362294426e-06, |
| "loss": 0.6431, |
| "step": 379 |
| }, |
| { |
| "epoch": 0.4250559284116331, |
| "grad_norm": 0.3757461905479431, |
| "learning_rate": 4.965175634975072e-06, |
| "loss": 0.6335, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.4261744966442953, |
| "grad_norm": 0.3701077401638031, |
| "learning_rate": 4.964927029612611e-06, |
| "loss": 0.6182, |
| "step": 381 |
| }, |
| { |
| "epoch": 0.4272930648769575, |
| "grad_norm": 0.38263121247291565, |
| "learning_rate": 4.96467754629559e-06, |
| "loss": 0.6371, |
| "step": 382 |
| }, |
| { |
| "epoch": 0.42841163310961966, |
| "grad_norm": 0.3740926682949066, |
| "learning_rate": 4.9644271851128715e-06, |
| "loss": 0.6272, |
| "step": 383 |
| }, |
| { |
| "epoch": 0.42953020134228187, |
| "grad_norm": 0.39056089520454407, |
| "learning_rate": 4.964175946153627e-06, |
| "loss": 0.624, |
| "step": 384 |
| }, |
| { |
| "epoch": 0.4306487695749441, |
| "grad_norm": 0.3867873549461365, |
| "learning_rate": 4.963923829507343e-06, |
| "loss": 0.6714, |
| "step": 385 |
| }, |
| { |
| "epoch": 0.4317673378076063, |
| "grad_norm": 0.3808860182762146, |
| "learning_rate": 4.963670835263819e-06, |
| "loss": 0.6412, |
| "step": 386 |
| }, |
| { |
| "epoch": 0.43288590604026844, |
| "grad_norm": 0.3839844763278961, |
| "learning_rate": 4.963416963513166e-06, |
| "loss": 0.6288, |
| "step": 387 |
| }, |
| { |
| "epoch": 0.43400447427293065, |
| "grad_norm": 0.37187114357948303, |
| "learning_rate": 4.963162214345806e-06, |
| "loss": 0.6307, |
| "step": 388 |
| }, |
| { |
| "epoch": 0.43512304250559286, |
| "grad_norm": 0.36723873019218445, |
| "learning_rate": 4.962906587852477e-06, |
| "loss": 0.6285, |
| "step": 389 |
| }, |
| { |
| "epoch": 0.436241610738255, |
| "grad_norm": 0.3754160404205322, |
| "learning_rate": 4.962650084124226e-06, |
| "loss": 0.6227, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.4373601789709172, |
| "grad_norm": 0.374239981174469, |
| "learning_rate": 4.962392703252417e-06, |
| "loss": 0.6612, |
| "step": 391 |
| }, |
| { |
| "epoch": 0.43847874720357943, |
| "grad_norm": 0.3758632242679596, |
| "learning_rate": 4.9621344453287214e-06, |
| "loss": 0.6408, |
| "step": 392 |
| }, |
| { |
| "epoch": 0.4395973154362416, |
| "grad_norm": 0.3839190602302551, |
| "learning_rate": 4.9618753104451254e-06, |
| "loss": 0.6524, |
| "step": 393 |
| }, |
| { |
| "epoch": 0.4407158836689038, |
| "grad_norm": 0.36766374111175537, |
| "learning_rate": 4.961615298693928e-06, |
| "loss": 0.6232, |
| "step": 394 |
| }, |
| { |
| "epoch": 0.441834451901566, |
| "grad_norm": 0.3692423701286316, |
| "learning_rate": 4.961354410167739e-06, |
| "loss": 0.6436, |
| "step": 395 |
| }, |
| { |
| "epoch": 0.4429530201342282, |
| "grad_norm": 0.3720521926879883, |
| "learning_rate": 4.961092644959482e-06, |
| "loss": 0.6346, |
| "step": 396 |
| }, |
| { |
| "epoch": 0.44407158836689037, |
| "grad_norm": 0.373910129070282, |
| "learning_rate": 4.960830003162392e-06, |
| "loss": 0.6211, |
| "step": 397 |
| }, |
| { |
| "epoch": 0.4451901565995526, |
| "grad_norm": 0.37455853819847107, |
| "learning_rate": 4.960566484870017e-06, |
| "loss": 0.6366, |
| "step": 398 |
| }, |
| { |
| "epoch": 0.4463087248322148, |
| "grad_norm": 0.387390673160553, |
| "learning_rate": 4.960302090176215e-06, |
| "loss": 0.6543, |
| "step": 399 |
| }, |
| { |
| "epoch": 0.44742729306487694, |
| "grad_norm": 0.3862502872943878, |
| "learning_rate": 4.960036819175159e-06, |
| "loss": 0.6351, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.44854586129753915, |
| "grad_norm": 0.38686901330947876, |
| "learning_rate": 4.959770671961334e-06, |
| "loss": 0.6247, |
| "step": 401 |
| }, |
| { |
| "epoch": 0.44966442953020136, |
| "grad_norm": 0.38111770153045654, |
| "learning_rate": 4.959503648629534e-06, |
| "loss": 0.6624, |
| "step": 402 |
| }, |
| { |
| "epoch": 0.4507829977628635, |
| "grad_norm": 0.3962753713130951, |
| "learning_rate": 4.959235749274866e-06, |
| "loss": 0.6224, |
| "step": 403 |
| }, |
| { |
| "epoch": 0.4519015659955257, |
| "grad_norm": 0.36403393745422363, |
| "learning_rate": 4.958966973992754e-06, |
| "loss": 0.6215, |
| "step": 404 |
| }, |
| { |
| "epoch": 0.45302013422818793, |
| "grad_norm": 0.3858584463596344, |
| "learning_rate": 4.958697322878926e-06, |
| "loss": 0.6473, |
| "step": 405 |
| }, |
| { |
| "epoch": 0.4541387024608501, |
| "grad_norm": 0.39325979351997375, |
| "learning_rate": 4.958426796029429e-06, |
| "loss": 0.6664, |
| "step": 406 |
| }, |
| { |
| "epoch": 0.4552572706935123, |
| "grad_norm": 0.37423112988471985, |
| "learning_rate": 4.958155393540618e-06, |
| "loss": 0.6416, |
| "step": 407 |
| }, |
| { |
| "epoch": 0.4563758389261745, |
| "grad_norm": 0.3979191482067108, |
| "learning_rate": 4.9578831155091585e-06, |
| "loss": 0.6493, |
| "step": 408 |
| }, |
| { |
| "epoch": 0.4574944071588367, |
| "grad_norm": 0.375473290681839, |
| "learning_rate": 4.957609962032034e-06, |
| "loss": 0.6246, |
| "step": 409 |
| }, |
| { |
| "epoch": 0.45861297539149887, |
| "grad_norm": 0.37951260805130005, |
| "learning_rate": 4.957335933206533e-06, |
| "loss": 0.6374, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.4597315436241611, |
| "grad_norm": 0.384162575006485, |
| "learning_rate": 4.9570610291302605e-06, |
| "loss": 0.6411, |
| "step": 411 |
| }, |
| { |
| "epoch": 0.4608501118568233, |
| "grad_norm": 0.37713801860809326, |
| "learning_rate": 4.95678524990113e-06, |
| "loss": 0.6384, |
| "step": 412 |
| }, |
| { |
| "epoch": 0.46196868008948544, |
| "grad_norm": 0.3779420554637909, |
| "learning_rate": 4.95650859561737e-06, |
| "loss": 0.6238, |
| "step": 413 |
| }, |
| { |
| "epoch": 0.46308724832214765, |
| "grad_norm": 0.3826324939727783, |
| "learning_rate": 4.956231066377517e-06, |
| "loss": 0.6373, |
| "step": 414 |
| }, |
| { |
| "epoch": 0.46420581655480986, |
| "grad_norm": 0.3693124055862427, |
| "learning_rate": 4.955952662280422e-06, |
| "loss": 0.6264, |
| "step": 415 |
| }, |
| { |
| "epoch": 0.465324384787472, |
| "grad_norm": 0.3891177177429199, |
| "learning_rate": 4.9556733834252465e-06, |
| "loss": 0.6755, |
| "step": 416 |
| }, |
| { |
| "epoch": 0.4664429530201342, |
| "grad_norm": 0.3732079863548279, |
| "learning_rate": 4.955393229911465e-06, |
| "loss": 0.6163, |
| "step": 417 |
| }, |
| { |
| "epoch": 0.46756152125279643, |
| "grad_norm": 0.39267081022262573, |
| "learning_rate": 4.955112201838859e-06, |
| "loss": 0.653, |
| "step": 418 |
| }, |
| { |
| "epoch": 0.46868008948545864, |
| "grad_norm": 0.37127041816711426, |
| "learning_rate": 4.9548302993075275e-06, |
| "loss": 0.6024, |
| "step": 419 |
| }, |
| { |
| "epoch": 0.4697986577181208, |
| "grad_norm": 0.38274380564689636, |
| "learning_rate": 4.954547522417878e-06, |
| "loss": 0.6103, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.470917225950783, |
| "grad_norm": 0.39440205693244934, |
| "learning_rate": 4.954263871270627e-06, |
| "loss": 0.6388, |
| "step": 421 |
| }, |
| { |
| "epoch": 0.4720357941834452, |
| "grad_norm": 0.38207298517227173, |
| "learning_rate": 4.953979345966808e-06, |
| "loss": 0.6157, |
| "step": 422 |
| }, |
| { |
| "epoch": 0.47315436241610737, |
| "grad_norm": 0.37390536069869995, |
| "learning_rate": 4.953693946607762e-06, |
| "loss": 0.612, |
| "step": 423 |
| }, |
| { |
| "epoch": 0.4742729306487696, |
| "grad_norm": 0.3679952621459961, |
| "learning_rate": 4.953407673295141e-06, |
| "loss": 0.5962, |
| "step": 424 |
| }, |
| { |
| "epoch": 0.4753914988814318, |
| "grad_norm": 0.36741313338279724, |
| "learning_rate": 4.953120526130911e-06, |
| "loss": 0.5802, |
| "step": 425 |
| }, |
| { |
| "epoch": 0.47651006711409394, |
| "grad_norm": 0.40101951360702515, |
| "learning_rate": 4.952832505217347e-06, |
| "loss": 0.631, |
| "step": 426 |
| }, |
| { |
| "epoch": 0.47762863534675615, |
| "grad_norm": 0.37646785378456116, |
| "learning_rate": 4.952543610657036e-06, |
| "loss": 0.6192, |
| "step": 427 |
| }, |
| { |
| "epoch": 0.47874720357941836, |
| "grad_norm": 0.3909439444541931, |
| "learning_rate": 4.952253842552876e-06, |
| "loss": 0.6288, |
| "step": 428 |
| }, |
| { |
| "epoch": 0.4798657718120805, |
| "grad_norm": 0.379685640335083, |
| "learning_rate": 4.9519632010080765e-06, |
| "loss": 0.6296, |
| "step": 429 |
| }, |
| { |
| "epoch": 0.4809843400447427, |
| "grad_norm": 0.3872782588005066, |
| "learning_rate": 4.9516716861261575e-06, |
| "loss": 0.6307, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.48210290827740493, |
| "grad_norm": 0.4066009223461151, |
| "learning_rate": 4.951379298010951e-06, |
| "loss": 0.6454, |
| "step": 431 |
| }, |
| { |
| "epoch": 0.48322147651006714, |
| "grad_norm": 0.38412487506866455, |
| "learning_rate": 4.951086036766599e-06, |
| "loss": 0.6254, |
| "step": 432 |
| }, |
| { |
| "epoch": 0.4843400447427293, |
| "grad_norm": 0.37819865345954895, |
| "learning_rate": 4.9507919024975545e-06, |
| "loss": 0.629, |
| "step": 433 |
| }, |
| { |
| "epoch": 0.4854586129753915, |
| "grad_norm": 0.38674691319465637, |
| "learning_rate": 4.950496895308582e-06, |
| "loss": 0.6357, |
| "step": 434 |
| }, |
| { |
| "epoch": 0.4865771812080537, |
| "grad_norm": 0.39304593205451965, |
| "learning_rate": 4.950201015304758e-06, |
| "loss": 0.6475, |
| "step": 435 |
| }, |
| { |
| "epoch": 0.48769574944071586, |
| "grad_norm": 0.381124347448349, |
| "learning_rate": 4.949904262591467e-06, |
| "loss": 0.6523, |
| "step": 436 |
| }, |
| { |
| "epoch": 0.4888143176733781, |
| "grad_norm": 0.4084749221801758, |
| "learning_rate": 4.949606637274408e-06, |
| "loss": 0.6773, |
| "step": 437 |
| }, |
| { |
| "epoch": 0.4899328859060403, |
| "grad_norm": 0.3967250883579254, |
| "learning_rate": 4.949308139459586e-06, |
| "loss": 0.6263, |
| "step": 438 |
| }, |
| { |
| "epoch": 0.49105145413870244, |
| "grad_norm": 0.39761948585510254, |
| "learning_rate": 4.949008769253322e-06, |
| "loss": 0.6273, |
| "step": 439 |
| }, |
| { |
| "epoch": 0.49217002237136465, |
| "grad_norm": 0.3865715265274048, |
| "learning_rate": 4.948708526762244e-06, |
| "loss": 0.6464, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.49328859060402686, |
| "grad_norm": 0.3970697820186615, |
| "learning_rate": 4.948407412093292e-06, |
| "loss": 0.6229, |
| "step": 441 |
| }, |
| { |
| "epoch": 0.49440715883668906, |
| "grad_norm": 0.3817065954208374, |
| "learning_rate": 4.948105425353718e-06, |
| "loss": 0.6375, |
| "step": 442 |
| }, |
| { |
| "epoch": 0.4955257270693512, |
| "grad_norm": 0.3877985179424286, |
| "learning_rate": 4.947802566651082e-06, |
| "loss": 0.6389, |
| "step": 443 |
| }, |
| { |
| "epoch": 0.4966442953020134, |
| "grad_norm": 0.40800127387046814, |
| "learning_rate": 4.947498836093257e-06, |
| "loss": 0.6627, |
| "step": 444 |
| }, |
| { |
| "epoch": 0.49776286353467564, |
| "grad_norm": 0.40732380747795105, |
| "learning_rate": 4.947194233788423e-06, |
| "loss": 0.6156, |
| "step": 445 |
| }, |
| { |
| "epoch": 0.4988814317673378, |
| "grad_norm": 0.3948177695274353, |
| "learning_rate": 4.946888759845074e-06, |
| "loss": 0.6481, |
| "step": 446 |
| }, |
| { |
| "epoch": 0.5, |
| "grad_norm": 0.38517189025878906, |
| "learning_rate": 4.9465824143720145e-06, |
| "loss": 0.6224, |
| "step": 447 |
| }, |
| { |
| "epoch": 0.5011185682326622, |
| "grad_norm": 0.3713424503803253, |
| "learning_rate": 4.946275197478358e-06, |
| "loss": 0.626, |
| "step": 448 |
| }, |
| { |
| "epoch": 0.5022371364653244, |
| "grad_norm": 0.4172223210334778, |
| "learning_rate": 4.945967109273527e-06, |
| "loss": 0.6405, |
| "step": 449 |
| }, |
| { |
| "epoch": 0.5033557046979866, |
| "grad_norm": 0.4550599157810211, |
| "learning_rate": 4.945658149867257e-06, |
| "loss": 0.6103, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.5044742729306487, |
| "grad_norm": 0.3938581347465515, |
| "learning_rate": 4.945348319369593e-06, |
| "loss": 0.6304, |
| "step": 451 |
| }, |
| { |
| "epoch": 0.5055928411633109, |
| "grad_norm": 0.3923262059688568, |
| "learning_rate": 4.94503761789089e-06, |
| "loss": 0.6603, |
| "step": 452 |
| }, |
| { |
| "epoch": 0.5067114093959731, |
| "grad_norm": 0.3978983163833618, |
| "learning_rate": 4.944726045541814e-06, |
| "loss": 0.6445, |
| "step": 453 |
| }, |
| { |
| "epoch": 0.5078299776286354, |
| "grad_norm": 0.4101882576942444, |
| "learning_rate": 4.9444136024333374e-06, |
| "loss": 0.6223, |
| "step": 454 |
| }, |
| { |
| "epoch": 0.5089485458612976, |
| "grad_norm": 0.4056575298309326, |
| "learning_rate": 4.944100288676749e-06, |
| "loss": 0.6343, |
| "step": 455 |
| }, |
| { |
| "epoch": 0.5100671140939598, |
| "grad_norm": 0.39720436930656433, |
| "learning_rate": 4.943786104383644e-06, |
| "loss": 0.6246, |
| "step": 456 |
| }, |
| { |
| "epoch": 0.5111856823266219, |
| "grad_norm": 0.3909725248813629, |
| "learning_rate": 4.943471049665925e-06, |
| "loss": 0.6339, |
| "step": 457 |
| }, |
| { |
| "epoch": 0.5123042505592841, |
| "grad_norm": 0.3773731291294098, |
| "learning_rate": 4.943155124635812e-06, |
| "loss": 0.6215, |
| "step": 458 |
| }, |
| { |
| "epoch": 0.5134228187919463, |
| "grad_norm": 0.4020001292228699, |
| "learning_rate": 4.9428383294058295e-06, |
| "loss": 0.6269, |
| "step": 459 |
| }, |
| { |
| "epoch": 0.5145413870246085, |
| "grad_norm": 0.3916706144809723, |
| "learning_rate": 4.942520664088812e-06, |
| "loss": 0.6233, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.5156599552572707, |
| "grad_norm": 0.38717713952064514, |
| "learning_rate": 4.9422021287979076e-06, |
| "loss": 0.6216, |
| "step": 461 |
| }, |
| { |
| "epoch": 0.5167785234899329, |
| "grad_norm": 0.38645485043525696, |
| "learning_rate": 4.941882723646568e-06, |
| "loss": 0.6092, |
| "step": 462 |
| }, |
| { |
| "epoch": 0.5178970917225951, |
| "grad_norm": 0.38496407866477966, |
| "learning_rate": 4.9415624487485615e-06, |
| "loss": 0.6368, |
| "step": 463 |
| }, |
| { |
| "epoch": 0.5190156599552572, |
| "grad_norm": 0.3946744501590729, |
| "learning_rate": 4.941241304217962e-06, |
| "loss": 0.6525, |
| "step": 464 |
| }, |
| { |
| "epoch": 0.5201342281879194, |
| "grad_norm": 0.3994438648223877, |
| "learning_rate": 4.940919290169155e-06, |
| "loss": 0.6314, |
| "step": 465 |
| }, |
| { |
| "epoch": 0.5212527964205816, |
| "grad_norm": 0.3929794728755951, |
| "learning_rate": 4.940596406716834e-06, |
| "loss": 0.6148, |
| "step": 466 |
| }, |
| { |
| "epoch": 0.5223713646532439, |
| "grad_norm": 0.42620542645454407, |
| "learning_rate": 4.940272653976005e-06, |
| "loss": 0.6468, |
| "step": 467 |
| }, |
| { |
| "epoch": 0.5234899328859061, |
| "grad_norm": 0.4014374613761902, |
| "learning_rate": 4.9399480320619805e-06, |
| "loss": 0.6451, |
| "step": 468 |
| }, |
| { |
| "epoch": 0.5246085011185683, |
| "grad_norm": 0.39342424273490906, |
| "learning_rate": 4.939622541090384e-06, |
| "loss": 0.6696, |
| "step": 469 |
| }, |
| { |
| "epoch": 0.5257270693512305, |
| "grad_norm": 0.3870956301689148, |
| "learning_rate": 4.939296181177149e-06, |
| "loss": 0.6451, |
| "step": 470 |
| }, |
| { |
| "epoch": 0.5268456375838926, |
| "grad_norm": 0.39973214268684387, |
| "learning_rate": 4.938968952438518e-06, |
| "loss": 0.6254, |
| "step": 471 |
| }, |
| { |
| "epoch": 0.5279642058165548, |
| "grad_norm": 0.3956799805164337, |
| "learning_rate": 4.938640854991041e-06, |
| "loss": 0.6169, |
| "step": 472 |
| }, |
| { |
| "epoch": 0.529082774049217, |
| "grad_norm": 0.38881829380989075, |
| "learning_rate": 4.938311888951583e-06, |
| "loss": 0.5989, |
| "step": 473 |
| }, |
| { |
| "epoch": 0.5302013422818792, |
| "grad_norm": 0.392107218503952, |
| "learning_rate": 4.93798205443731e-06, |
| "loss": 0.6284, |
| "step": 474 |
| }, |
| { |
| "epoch": 0.5313199105145414, |
| "grad_norm": 0.4042797088623047, |
| "learning_rate": 4.937651351565707e-06, |
| "loss": 0.6235, |
| "step": 475 |
| }, |
| { |
| "epoch": 0.5324384787472036, |
| "grad_norm": 0.380206435918808, |
| "learning_rate": 4.937319780454559e-06, |
| "loss": 0.5894, |
| "step": 476 |
| }, |
| { |
| "epoch": 0.5335570469798657, |
| "grad_norm": 0.3989536166191101, |
| "learning_rate": 4.936987341221968e-06, |
| "loss": 0.6522, |
| "step": 477 |
| }, |
| { |
| "epoch": 0.5346756152125279, |
| "grad_norm": 0.38699498772621155, |
| "learning_rate": 4.9366540339863395e-06, |
| "loss": 0.6202, |
| "step": 478 |
| }, |
| { |
| "epoch": 0.5357941834451901, |
| "grad_norm": 0.4171985387802124, |
| "learning_rate": 4.936319858866391e-06, |
| "loss": 0.624, |
| "step": 479 |
| }, |
| { |
| "epoch": 0.5369127516778524, |
| "grad_norm": 0.3932148218154907, |
| "learning_rate": 4.93598481598115e-06, |
| "loss": 0.6215, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.5380313199105146, |
| "grad_norm": 0.3934101462364197, |
| "learning_rate": 4.935648905449949e-06, |
| "loss": 0.6402, |
| "step": 481 |
| }, |
| { |
| "epoch": 0.5391498881431768, |
| "grad_norm": 0.3917444348335266, |
| "learning_rate": 4.935312127392434e-06, |
| "loss": 0.641, |
| "step": 482 |
| }, |
| { |
| "epoch": 0.540268456375839, |
| "grad_norm": 0.4036387503147125, |
| "learning_rate": 4.9349744819285584e-06, |
| "loss": 0.6405, |
| "step": 483 |
| }, |
| { |
| "epoch": 0.5413870246085011, |
| "grad_norm": 0.38611260056495667, |
| "learning_rate": 4.934635969178584e-06, |
| "loss": 0.6231, |
| "step": 484 |
| }, |
| { |
| "epoch": 0.5425055928411633, |
| "grad_norm": 0.39185649156570435, |
| "learning_rate": 4.9342965892630805e-06, |
| "loss": 0.6214, |
| "step": 485 |
| }, |
| { |
| "epoch": 0.5436241610738255, |
| "grad_norm": 0.3736090362071991, |
| "learning_rate": 4.933956342302929e-06, |
| "loss": 0.6053, |
| "step": 486 |
| }, |
| { |
| "epoch": 0.5447427293064877, |
| "grad_norm": 0.39649662375450134, |
| "learning_rate": 4.93361522841932e-06, |
| "loss": 0.6408, |
| "step": 487 |
| }, |
| { |
| "epoch": 0.5458612975391499, |
| "grad_norm": 0.3990592658519745, |
| "learning_rate": 4.933273247733746e-06, |
| "loss": 0.6081, |
| "step": 488 |
| }, |
| { |
| "epoch": 0.5469798657718121, |
| "grad_norm": 0.39177680015563965, |
| "learning_rate": 4.932930400368019e-06, |
| "loss": 0.6114, |
| "step": 489 |
| }, |
| { |
| "epoch": 0.5480984340044742, |
| "grad_norm": 0.3953116536140442, |
| "learning_rate": 4.9325866864442495e-06, |
| "loss": 0.6339, |
| "step": 490 |
| }, |
| { |
| "epoch": 0.5492170022371364, |
| "grad_norm": 0.38563409447669983, |
| "learning_rate": 4.932242106084864e-06, |
| "loss": 0.6331, |
| "step": 491 |
| }, |
| { |
| "epoch": 0.5503355704697986, |
| "grad_norm": 0.40618443489074707, |
| "learning_rate": 4.931896659412593e-06, |
| "loss": 0.6441, |
| "step": 492 |
| }, |
| { |
| "epoch": 0.5514541387024608, |
| "grad_norm": 0.4008066654205322, |
| "learning_rate": 4.931550346550479e-06, |
| "loss": 0.6243, |
| "step": 493 |
| }, |
| { |
| "epoch": 0.5525727069351231, |
| "grad_norm": 0.39776620268821716, |
| "learning_rate": 4.931203167621868e-06, |
| "loss": 0.6152, |
| "step": 494 |
| }, |
| { |
| "epoch": 0.5536912751677853, |
| "grad_norm": 0.38687410950660706, |
| "learning_rate": 4.930855122750421e-06, |
| "loss": 0.5969, |
| "step": 495 |
| }, |
| { |
| "epoch": 0.5548098434004475, |
| "grad_norm": 0.3877246081829071, |
| "learning_rate": 4.9305062120601035e-06, |
| "loss": 0.6016, |
| "step": 496 |
| }, |
| { |
| "epoch": 0.5559284116331096, |
| "grad_norm": 0.40948086977005005, |
| "learning_rate": 4.930156435675189e-06, |
| "loss": 0.6168, |
| "step": 497 |
| }, |
| { |
| "epoch": 0.5570469798657718, |
| "grad_norm": 0.404021292924881, |
| "learning_rate": 4.929805793720262e-06, |
| "loss": 0.6092, |
| "step": 498 |
| }, |
| { |
| "epoch": 0.558165548098434, |
| "grad_norm": 0.39509114623069763, |
| "learning_rate": 4.929454286320211e-06, |
| "loss": 0.6346, |
| "step": 499 |
| }, |
| { |
| "epoch": 0.5592841163310962, |
| "grad_norm": 0.39687812328338623, |
| "learning_rate": 4.9291019136002385e-06, |
| "loss": 0.639, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.5604026845637584, |
| "grad_norm": 0.39210405945777893, |
| "learning_rate": 4.92874867568585e-06, |
| "loss": 0.6083, |
| "step": 501 |
| }, |
| { |
| "epoch": 0.5615212527964206, |
| "grad_norm": 0.4022452235221863, |
| "learning_rate": 4.928394572702862e-06, |
| "loss": 0.6252, |
| "step": 502 |
| }, |
| { |
| "epoch": 0.5626398210290827, |
| "grad_norm": 0.40317103266716003, |
| "learning_rate": 4.928039604777399e-06, |
| "loss": 0.614, |
| "step": 503 |
| }, |
| { |
| "epoch": 0.5637583892617449, |
| "grad_norm": 0.4097250998020172, |
| "learning_rate": 4.9276837720358924e-06, |
| "loss": 0.6218, |
| "step": 504 |
| }, |
| { |
| "epoch": 0.5648769574944071, |
| "grad_norm": 0.3927348554134369, |
| "learning_rate": 4.927327074605083e-06, |
| "loss": 0.6079, |
| "step": 505 |
| }, |
| { |
| "epoch": 0.5659955257270693, |
| "grad_norm": 0.3961605131626129, |
| "learning_rate": 4.9269695126120185e-06, |
| "loss": 0.612, |
| "step": 506 |
| }, |
| { |
| "epoch": 0.5671140939597316, |
| "grad_norm": 0.3945924639701843, |
| "learning_rate": 4.926611086184054e-06, |
| "loss": 0.6268, |
| "step": 507 |
| }, |
| { |
| "epoch": 0.5682326621923938, |
| "grad_norm": 0.39072591066360474, |
| "learning_rate": 4.926251795448854e-06, |
| "loss": 0.6176, |
| "step": 508 |
| }, |
| { |
| "epoch": 0.569351230425056, |
| "grad_norm": 0.39760643243789673, |
| "learning_rate": 4.9258916405343904e-06, |
| "loss": 0.6437, |
| "step": 509 |
| }, |
| { |
| "epoch": 0.5704697986577181, |
| "grad_norm": 0.39866000413894653, |
| "learning_rate": 4.925530621568942e-06, |
| "loss": 0.6383, |
| "step": 510 |
| }, |
| { |
| "epoch": 0.5715883668903803, |
| "grad_norm": 0.3932257294654846, |
| "learning_rate": 4.925168738681097e-06, |
| "loss": 0.6156, |
| "step": 511 |
| }, |
| { |
| "epoch": 0.5727069351230425, |
| "grad_norm": 0.39929500222206116, |
| "learning_rate": 4.924805991999751e-06, |
| "loss": 0.6069, |
| "step": 512 |
| }, |
| { |
| "epoch": 0.5738255033557047, |
| "grad_norm": 0.41192054748535156, |
| "learning_rate": 4.924442381654105e-06, |
| "loss": 0.6451, |
| "step": 513 |
| }, |
| { |
| "epoch": 0.5749440715883669, |
| "grad_norm": 0.41147273778915405, |
| "learning_rate": 4.92407790777367e-06, |
| "loss": 0.647, |
| "step": 514 |
| }, |
| { |
| "epoch": 0.5760626398210291, |
| "grad_norm": 0.4128178358078003, |
| "learning_rate": 4.923712570488264e-06, |
| "loss": 0.5909, |
| "step": 515 |
| }, |
| { |
| "epoch": 0.5771812080536913, |
| "grad_norm": 0.4081036150455475, |
| "learning_rate": 4.923346369928012e-06, |
| "loss": 0.6248, |
| "step": 516 |
| }, |
| { |
| "epoch": 0.5782997762863534, |
| "grad_norm": 0.3965778350830078, |
| "learning_rate": 4.922979306223347e-06, |
| "loss": 0.6019, |
| "step": 517 |
| }, |
| { |
| "epoch": 0.5794183445190156, |
| "grad_norm": 0.3979526162147522, |
| "learning_rate": 4.922611379505009e-06, |
| "loss": 0.6368, |
| "step": 518 |
| }, |
| { |
| "epoch": 0.5805369127516778, |
| "grad_norm": 0.38306665420532227, |
| "learning_rate": 4.922242589904046e-06, |
| "loss": 0.62, |
| "step": 519 |
| }, |
| { |
| "epoch": 0.5816554809843401, |
| "grad_norm": 0.3833399713039398, |
| "learning_rate": 4.921872937551814e-06, |
| "loss": 0.6064, |
| "step": 520 |
| }, |
| { |
| "epoch": 0.5827740492170023, |
| "grad_norm": 0.39361608028411865, |
| "learning_rate": 4.921502422579973e-06, |
| "loss": 0.6236, |
| "step": 521 |
| }, |
| { |
| "epoch": 0.5838926174496645, |
| "grad_norm": 0.39250272512435913, |
| "learning_rate": 4.921131045120494e-06, |
| "loss": 0.624, |
| "step": 522 |
| }, |
| { |
| "epoch": 0.5850111856823266, |
| "grad_norm": 0.40747684240341187, |
| "learning_rate": 4.920758805305654e-06, |
| "loss": 0.6096, |
| "step": 523 |
| }, |
| { |
| "epoch": 0.5861297539149888, |
| "grad_norm": 0.39987003803253174, |
| "learning_rate": 4.920385703268037e-06, |
| "loss": 0.6282, |
| "step": 524 |
| }, |
| { |
| "epoch": 0.587248322147651, |
| "grad_norm": 0.39122274518013, |
| "learning_rate": 4.920011739140532e-06, |
| "loss": 0.6479, |
| "step": 525 |
| }, |
| { |
| "epoch": 0.5883668903803132, |
| "grad_norm": 0.39809542894363403, |
| "learning_rate": 4.919636913056339e-06, |
| "loss": 0.6213, |
| "step": 526 |
| }, |
| { |
| "epoch": 0.5894854586129754, |
| "grad_norm": 0.39921343326568604, |
| "learning_rate": 4.919261225148963e-06, |
| "loss": 0.6118, |
| "step": 527 |
| }, |
| { |
| "epoch": 0.5906040268456376, |
| "grad_norm": 0.4086368680000305, |
| "learning_rate": 4.9188846755522155e-06, |
| "loss": 0.6214, |
| "step": 528 |
| }, |
| { |
| "epoch": 0.5917225950782998, |
| "grad_norm": 0.4066048264503479, |
| "learning_rate": 4.918507264400216e-06, |
| "loss": 0.6316, |
| "step": 529 |
| }, |
| { |
| "epoch": 0.5928411633109619, |
| "grad_norm": 0.41961807012557983, |
| "learning_rate": 4.91812899182739e-06, |
| "loss": 0.5948, |
| "step": 530 |
| }, |
| { |
| "epoch": 0.5939597315436241, |
| "grad_norm": 0.39992618560791016, |
| "learning_rate": 4.917749857968469e-06, |
| "loss": 0.6113, |
| "step": 531 |
| }, |
| { |
| "epoch": 0.5950782997762863, |
| "grad_norm": 0.41020235419273376, |
| "learning_rate": 4.917369862958494e-06, |
| "loss": 0.622, |
| "step": 532 |
| }, |
| { |
| "epoch": 0.5961968680089486, |
| "grad_norm": 0.40504705905914307, |
| "learning_rate": 4.916989006932811e-06, |
| "loss": 0.621, |
| "step": 533 |
| }, |
| { |
| "epoch": 0.5973154362416108, |
| "grad_norm": 0.3829837441444397, |
| "learning_rate": 4.9166072900270725e-06, |
| "loss": 0.5942, |
| "step": 534 |
| }, |
| { |
| "epoch": 0.598434004474273, |
| "grad_norm": 0.4082834720611572, |
| "learning_rate": 4.9162247123772375e-06, |
| "loss": 0.5923, |
| "step": 535 |
| }, |
| { |
| "epoch": 0.5995525727069351, |
| "grad_norm": 0.40038296580314636, |
| "learning_rate": 4.915841274119572e-06, |
| "loss": 0.6057, |
| "step": 536 |
| }, |
| { |
| "epoch": 0.6006711409395973, |
| "grad_norm": 0.40687569975852966, |
| "learning_rate": 4.91545697539065e-06, |
| "loss": 0.6343, |
| "step": 537 |
| }, |
| { |
| "epoch": 0.6017897091722595, |
| "grad_norm": 0.386262983083725, |
| "learning_rate": 4.9150718163273494e-06, |
| "loss": 0.6372, |
| "step": 538 |
| }, |
| { |
| "epoch": 0.6029082774049217, |
| "grad_norm": 0.39570850133895874, |
| "learning_rate": 4.914685797066855e-06, |
| "loss": 0.6157, |
| "step": 539 |
| }, |
| { |
| "epoch": 0.6040268456375839, |
| "grad_norm": 0.40055716037750244, |
| "learning_rate": 4.9142989177466594e-06, |
| "loss": 0.6141, |
| "step": 540 |
| }, |
| { |
| "epoch": 0.6051454138702461, |
| "grad_norm": 0.4038466811180115, |
| "learning_rate": 4.913911178504562e-06, |
| "loss": 0.6286, |
| "step": 541 |
| }, |
| { |
| "epoch": 0.6062639821029083, |
| "grad_norm": 0.38774847984313965, |
| "learning_rate": 4.913522579478664e-06, |
| "loss": 0.6343, |
| "step": 542 |
| }, |
| { |
| "epoch": 0.6073825503355704, |
| "grad_norm": 0.39426755905151367, |
| "learning_rate": 4.913133120807379e-06, |
| "loss": 0.6121, |
| "step": 543 |
| }, |
| { |
| "epoch": 0.6085011185682326, |
| "grad_norm": 0.4076898396015167, |
| "learning_rate": 4.912742802629423e-06, |
| "loss": 0.6273, |
| "step": 544 |
| }, |
| { |
| "epoch": 0.6096196868008948, |
| "grad_norm": 0.3859540820121765, |
| "learning_rate": 4.91235162508382e-06, |
| "loss": 0.6314, |
| "step": 545 |
| }, |
| { |
| "epoch": 0.610738255033557, |
| "grad_norm": 0.3914327621459961, |
| "learning_rate": 4.911959588309897e-06, |
| "loss": 0.6027, |
| "step": 546 |
| }, |
| { |
| "epoch": 0.6118568232662193, |
| "grad_norm": 0.3892766833305359, |
| "learning_rate": 4.9115666924472906e-06, |
| "loss": 0.5922, |
| "step": 547 |
| }, |
| { |
| "epoch": 0.6129753914988815, |
| "grad_norm": 0.3921322226524353, |
| "learning_rate": 4.911172937635942e-06, |
| "loss": 0.6066, |
| "step": 548 |
| }, |
| { |
| "epoch": 0.6140939597315436, |
| "grad_norm": 0.3972843885421753, |
| "learning_rate": 4.910778324016098e-06, |
| "loss": 0.614, |
| "step": 549 |
| }, |
| { |
| "epoch": 0.6152125279642058, |
| "grad_norm": 0.4027954638004303, |
| "learning_rate": 4.9103828517283105e-06, |
| "loss": 0.6174, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.616331096196868, |
| "grad_norm": 0.40479257702827454, |
| "learning_rate": 4.909986520913441e-06, |
| "loss": 0.6114, |
| "step": 551 |
| }, |
| { |
| "epoch": 0.6174496644295302, |
| "grad_norm": 0.4246085584163666, |
| "learning_rate": 4.909589331712651e-06, |
| "loss": 0.6145, |
| "step": 552 |
| }, |
| { |
| "epoch": 0.6185682326621924, |
| "grad_norm": 0.4173775017261505, |
| "learning_rate": 4.909191284267413e-06, |
| "loss": 0.6375, |
| "step": 553 |
| }, |
| { |
| "epoch": 0.6196868008948546, |
| "grad_norm": 0.4135677218437195, |
| "learning_rate": 4.908792378719502e-06, |
| "loss": 0.6444, |
| "step": 554 |
| }, |
| { |
| "epoch": 0.6208053691275168, |
| "grad_norm": 0.40163061022758484, |
| "learning_rate": 4.9083926152110004e-06, |
| "loss": 0.6128, |
| "step": 555 |
| }, |
| { |
| "epoch": 0.6219239373601789, |
| "grad_norm": 0.41246625781059265, |
| "learning_rate": 4.907991993884295e-06, |
| "loss": 0.6229, |
| "step": 556 |
| }, |
| { |
| "epoch": 0.6230425055928411, |
| "grad_norm": 0.4114304780960083, |
| "learning_rate": 4.907590514882079e-06, |
| "loss": 0.6028, |
| "step": 557 |
| }, |
| { |
| "epoch": 0.6241610738255033, |
| "grad_norm": 0.40224742889404297, |
| "learning_rate": 4.90718817834735e-06, |
| "loss": 0.5896, |
| "step": 558 |
| }, |
| { |
| "epoch": 0.6252796420581656, |
| "grad_norm": 0.397650808095932, |
| "learning_rate": 4.906784984423411e-06, |
| "loss": 0.6309, |
| "step": 559 |
| }, |
| { |
| "epoch": 0.6263982102908278, |
| "grad_norm": 0.4087318480014801, |
| "learning_rate": 4.906380933253874e-06, |
| "loss": 0.6002, |
| "step": 560 |
| }, |
| { |
| "epoch": 0.62751677852349, |
| "grad_norm": 0.3988543450832367, |
| "learning_rate": 4.90597602498265e-06, |
| "loss": 0.6415, |
| "step": 561 |
| }, |
| { |
| "epoch": 0.6286353467561522, |
| "grad_norm": 0.38457274436950684, |
| "learning_rate": 4.905570259753961e-06, |
| "loss": 0.6105, |
| "step": 562 |
| }, |
| { |
| "epoch": 0.6297539149888143, |
| "grad_norm": 0.38756313920021057, |
| "learning_rate": 4.905163637712331e-06, |
| "loss": 0.5953, |
| "step": 563 |
| }, |
| { |
| "epoch": 0.6308724832214765, |
| "grad_norm": 0.4071662127971649, |
| "learning_rate": 4.90475615900259e-06, |
| "loss": 0.635, |
| "step": 564 |
| }, |
| { |
| "epoch": 0.6319910514541387, |
| "grad_norm": 0.4213521182537079, |
| "learning_rate": 4.904347823769875e-06, |
| "loss": 0.6141, |
| "step": 565 |
| }, |
| { |
| "epoch": 0.6331096196868009, |
| "grad_norm": 0.4104982018470764, |
| "learning_rate": 4.9039386321596235e-06, |
| "loss": 0.6235, |
| "step": 566 |
| }, |
| { |
| "epoch": 0.6342281879194631, |
| "grad_norm": 0.41318175196647644, |
| "learning_rate": 4.903528584317583e-06, |
| "loss": 0.6315, |
| "step": 567 |
| }, |
| { |
| "epoch": 0.6353467561521253, |
| "grad_norm": 0.39332863688468933, |
| "learning_rate": 4.903117680389802e-06, |
| "loss": 0.5807, |
| "step": 568 |
| }, |
| { |
| "epoch": 0.6364653243847874, |
| "grad_norm": 0.4188497066497803, |
| "learning_rate": 4.902705920522638e-06, |
| "loss": 0.6176, |
| "step": 569 |
| }, |
| { |
| "epoch": 0.6375838926174496, |
| "grad_norm": 0.41399574279785156, |
| "learning_rate": 4.9022933048627496e-06, |
| "loss": 0.6067, |
| "step": 570 |
| }, |
| { |
| "epoch": 0.6387024608501118, |
| "grad_norm": 0.4197136461734772, |
| "learning_rate": 4.901879833557102e-06, |
| "loss": 0.6182, |
| "step": 571 |
| }, |
| { |
| "epoch": 0.639821029082774, |
| "grad_norm": 0.41715213656425476, |
| "learning_rate": 4.9014655067529645e-06, |
| "loss": 0.6088, |
| "step": 572 |
| }, |
| { |
| "epoch": 0.6409395973154363, |
| "grad_norm": 0.40957003831863403, |
| "learning_rate": 4.901050324597912e-06, |
| "loss": 0.5942, |
| "step": 573 |
| }, |
| { |
| "epoch": 0.6420581655480985, |
| "grad_norm": 0.4082324206829071, |
| "learning_rate": 4.9006342872398235e-06, |
| "loss": 0.6389, |
| "step": 574 |
| }, |
| { |
| "epoch": 0.6431767337807607, |
| "grad_norm": 0.41336655616760254, |
| "learning_rate": 4.900217394826882e-06, |
| "loss": 0.6122, |
| "step": 575 |
| }, |
| { |
| "epoch": 0.6442953020134228, |
| "grad_norm": 0.40878477692604065, |
| "learning_rate": 4.899799647507577e-06, |
| "loss": 0.6372, |
| "step": 576 |
| }, |
| { |
| "epoch": 0.645413870246085, |
| "grad_norm": 0.4028140604496002, |
| "learning_rate": 4.899381045430701e-06, |
| "loss": 0.5949, |
| "step": 577 |
| }, |
| { |
| "epoch": 0.6465324384787472, |
| "grad_norm": 0.42413756251335144, |
| "learning_rate": 4.89896158874535e-06, |
| "loss": 0.6217, |
| "step": 578 |
| }, |
| { |
| "epoch": 0.6476510067114094, |
| "grad_norm": 0.4108542501926422, |
| "learning_rate": 4.898541277600927e-06, |
| "loss": 0.6283, |
| "step": 579 |
| }, |
| { |
| "epoch": 0.6487695749440716, |
| "grad_norm": 0.4101778566837311, |
| "learning_rate": 4.898120112147135e-06, |
| "loss": 0.6028, |
| "step": 580 |
| }, |
| { |
| "epoch": 0.6498881431767338, |
| "grad_norm": 0.4104720652103424, |
| "learning_rate": 4.897698092533988e-06, |
| "loss": 0.6481, |
| "step": 581 |
| }, |
| { |
| "epoch": 0.6510067114093959, |
| "grad_norm": 0.4004007577896118, |
| "learning_rate": 4.897275218911799e-06, |
| "loss": 0.6042, |
| "step": 582 |
| }, |
| { |
| "epoch": 0.6521252796420581, |
| "grad_norm": 0.4042847752571106, |
| "learning_rate": 4.896851491431185e-06, |
| "loss": 0.6076, |
| "step": 583 |
| }, |
| { |
| "epoch": 0.6532438478747203, |
| "grad_norm": 0.40685543417930603, |
| "learning_rate": 4.89642691024307e-06, |
| "loss": 0.6066, |
| "step": 584 |
| }, |
| { |
| "epoch": 0.6543624161073825, |
| "grad_norm": 0.41699886322021484, |
| "learning_rate": 4.896001475498682e-06, |
| "loss": 0.6091, |
| "step": 585 |
| }, |
| { |
| "epoch": 0.6554809843400448, |
| "grad_norm": 0.3905144929885864, |
| "learning_rate": 4.89557518734955e-06, |
| "loss": 0.6272, |
| "step": 586 |
| }, |
| { |
| "epoch": 0.656599552572707, |
| "grad_norm": 0.40033814311027527, |
| "learning_rate": 4.895148045947509e-06, |
| "loss": 0.6183, |
| "step": 587 |
| }, |
| { |
| "epoch": 0.6577181208053692, |
| "grad_norm": 0.39996397495269775, |
| "learning_rate": 4.894720051444698e-06, |
| "loss": 0.5996, |
| "step": 588 |
| }, |
| { |
| "epoch": 0.6588366890380313, |
| "grad_norm": 0.42592981457710266, |
| "learning_rate": 4.894291203993561e-06, |
| "loss": 0.6506, |
| "step": 589 |
| }, |
| { |
| "epoch": 0.6599552572706935, |
| "grad_norm": 0.40710797905921936, |
| "learning_rate": 4.8938615037468405e-06, |
| "loss": 0.6044, |
| "step": 590 |
| }, |
| { |
| "epoch": 0.6610738255033557, |
| "grad_norm": 0.427405446767807, |
| "learning_rate": 4.893430950857591e-06, |
| "loss": 0.6236, |
| "step": 591 |
| }, |
| { |
| "epoch": 0.6621923937360179, |
| "grad_norm": 0.40190666913986206, |
| "learning_rate": 4.892999545479163e-06, |
| "loss": 0.6031, |
| "step": 592 |
| }, |
| { |
| "epoch": 0.6633109619686801, |
| "grad_norm": 0.4019568860530853, |
| "learning_rate": 4.8925672877652155e-06, |
| "loss": 0.6232, |
| "step": 593 |
| }, |
| { |
| "epoch": 0.6644295302013423, |
| "grad_norm": 0.40001606941223145, |
| "learning_rate": 4.892134177869709e-06, |
| "loss": 0.6141, |
| "step": 594 |
| }, |
| { |
| "epoch": 0.6655480984340044, |
| "grad_norm": 0.40650853514671326, |
| "learning_rate": 4.891700215946909e-06, |
| "loss": 0.6011, |
| "step": 595 |
| }, |
| { |
| "epoch": 0.6666666666666666, |
| "grad_norm": 0.41019386053085327, |
| "learning_rate": 4.8912654021513815e-06, |
| "loss": 0.6262, |
| "step": 596 |
| }, |
| { |
| "epoch": 0.6677852348993288, |
| "grad_norm": 0.4064581096172333, |
| "learning_rate": 4.890829736638e-06, |
| "loss": 0.6329, |
| "step": 597 |
| }, |
| { |
| "epoch": 0.668903803131991, |
| "grad_norm": 0.4091740846633911, |
| "learning_rate": 4.890393219561938e-06, |
| "loss": 0.6193, |
| "step": 598 |
| }, |
| { |
| "epoch": 0.6700223713646533, |
| "grad_norm": 0.407552570104599, |
| "learning_rate": 4.889955851078674e-06, |
| "loss": 0.6535, |
| "step": 599 |
| }, |
| { |
| "epoch": 0.6711409395973155, |
| "grad_norm": 0.3950585126876831, |
| "learning_rate": 4.889517631343988e-06, |
| "loss": 0.6033, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.6722595078299777, |
| "grad_norm": 0.39515334367752075, |
| "learning_rate": 4.889078560513968e-06, |
| "loss": 0.6006, |
| "step": 601 |
| }, |
| { |
| "epoch": 0.6733780760626398, |
| "grad_norm": 0.4188336431980133, |
| "learning_rate": 4.888638638744999e-06, |
| "loss": 0.6333, |
| "step": 602 |
| }, |
| { |
| "epoch": 0.674496644295302, |
| "grad_norm": 0.4050522446632385, |
| "learning_rate": 4.888197866193772e-06, |
| "loss": 0.6329, |
| "step": 603 |
| }, |
| { |
| "epoch": 0.6756152125279642, |
| "grad_norm": 0.39428868889808655, |
| "learning_rate": 4.887756243017282e-06, |
| "loss": 0.6007, |
| "step": 604 |
| }, |
| { |
| "epoch": 0.6767337807606264, |
| "grad_norm": 0.402410626411438, |
| "learning_rate": 4.887313769372823e-06, |
| "loss": 0.5885, |
| "step": 605 |
| }, |
| { |
| "epoch": 0.6778523489932886, |
| "grad_norm": 0.4062318205833435, |
| "learning_rate": 4.886870445417998e-06, |
| "loss": 0.6312, |
| "step": 606 |
| }, |
| { |
| "epoch": 0.6789709172259508, |
| "grad_norm": 0.4123631715774536, |
| "learning_rate": 4.886426271310708e-06, |
| "loss": 0.619, |
| "step": 607 |
| }, |
| { |
| "epoch": 0.680089485458613, |
| "grad_norm": 0.40353336930274963, |
| "learning_rate": 4.885981247209159e-06, |
| "loss": 0.6324, |
| "step": 608 |
| }, |
| { |
| "epoch": 0.6812080536912751, |
| "grad_norm": 0.40726402401924133, |
| "learning_rate": 4.885535373271858e-06, |
| "loss": 0.5819, |
| "step": 609 |
| }, |
| { |
| "epoch": 0.6823266219239373, |
| "grad_norm": 0.4142349660396576, |
| "learning_rate": 4.885088649657618e-06, |
| "loss": 0.6175, |
| "step": 610 |
| }, |
| { |
| "epoch": 0.6834451901565995, |
| "grad_norm": 0.4165160059928894, |
| "learning_rate": 4.884641076525549e-06, |
| "loss": 0.597, |
| "step": 611 |
| }, |
| { |
| "epoch": 0.6845637583892618, |
| "grad_norm": 0.4037843942642212, |
| "learning_rate": 4.884192654035069e-06, |
| "loss": 0.6183, |
| "step": 612 |
| }, |
| { |
| "epoch": 0.685682326621924, |
| "grad_norm": 0.41777777671813965, |
| "learning_rate": 4.883743382345898e-06, |
| "loss": 0.6063, |
| "step": 613 |
| }, |
| { |
| "epoch": 0.6868008948545862, |
| "grad_norm": 0.41021421551704407, |
| "learning_rate": 4.883293261618054e-06, |
| "loss": 0.6134, |
| "step": 614 |
| }, |
| { |
| "epoch": 0.6879194630872483, |
| "grad_norm": 0.4215847849845886, |
| "learning_rate": 4.882842292011863e-06, |
| "loss": 0.6458, |
| "step": 615 |
| }, |
| { |
| "epoch": 0.6890380313199105, |
| "grad_norm": 0.42801633477211, |
| "learning_rate": 4.882390473687949e-06, |
| "loss": 0.6259, |
| "step": 616 |
| }, |
| { |
| "epoch": 0.6901565995525727, |
| "grad_norm": 0.40879589319229126, |
| "learning_rate": 4.881937806807241e-06, |
| "loss": 0.6208, |
| "step": 617 |
| }, |
| { |
| "epoch": 0.6912751677852349, |
| "grad_norm": 0.39453622698783875, |
| "learning_rate": 4.881484291530969e-06, |
| "loss": 0.5966, |
| "step": 618 |
| }, |
| { |
| "epoch": 0.6923937360178971, |
| "grad_norm": 0.3992539048194885, |
| "learning_rate": 4.881029928020666e-06, |
| "loss": 0.5976, |
| "step": 619 |
| }, |
| { |
| "epoch": 0.6935123042505593, |
| "grad_norm": 0.4175397753715515, |
| "learning_rate": 4.880574716438166e-06, |
| "loss": 0.6261, |
| "step": 620 |
| }, |
| { |
| "epoch": 0.6946308724832215, |
| "grad_norm": 0.40533408522605896, |
| "learning_rate": 4.880118656945606e-06, |
| "loss": 0.5945, |
| "step": 621 |
| }, |
| { |
| "epoch": 0.6957494407158836, |
| "grad_norm": 0.4089728593826294, |
| "learning_rate": 4.879661749705424e-06, |
| "loss": 0.6226, |
| "step": 622 |
| }, |
| { |
| "epoch": 0.6968680089485458, |
| "grad_norm": 0.4341566562652588, |
| "learning_rate": 4.879203994880362e-06, |
| "loss": 0.6463, |
| "step": 623 |
| }, |
| { |
| "epoch": 0.697986577181208, |
| "grad_norm": 0.44256189465522766, |
| "learning_rate": 4.878745392633462e-06, |
| "loss": 0.653, |
| "step": 624 |
| }, |
| { |
| "epoch": 0.6991051454138703, |
| "grad_norm": 0.4098159372806549, |
| "learning_rate": 4.878285943128067e-06, |
| "loss": 0.5808, |
| "step": 625 |
| }, |
| { |
| "epoch": 0.7002237136465325, |
| "grad_norm": 0.43130752444267273, |
| "learning_rate": 4.8778256465278245e-06, |
| "loss": 0.6261, |
| "step": 626 |
| }, |
| { |
| "epoch": 0.7013422818791947, |
| "grad_norm": 0.4110218286514282, |
| "learning_rate": 4.877364502996682e-06, |
| "loss": 0.5954, |
| "step": 627 |
| }, |
| { |
| "epoch": 0.7024608501118568, |
| "grad_norm": 0.42106136679649353, |
| "learning_rate": 4.87690251269889e-06, |
| "loss": 0.6026, |
| "step": 628 |
| }, |
| { |
| "epoch": 0.703579418344519, |
| "grad_norm": 0.4233524203300476, |
| "learning_rate": 4.876439675798997e-06, |
| "loss": 0.6432, |
| "step": 629 |
| }, |
| { |
| "epoch": 0.7046979865771812, |
| "grad_norm": 0.42107197642326355, |
| "learning_rate": 4.87597599246186e-06, |
| "loss": 0.6198, |
| "step": 630 |
| }, |
| { |
| "epoch": 0.7058165548098434, |
| "grad_norm": 0.43851831555366516, |
| "learning_rate": 4.875511462852628e-06, |
| "loss": 0.6293, |
| "step": 631 |
| }, |
| { |
| "epoch": 0.7069351230425056, |
| "grad_norm": 0.41345685720443726, |
| "learning_rate": 4.87504608713676e-06, |
| "loss": 0.6178, |
| "step": 632 |
| }, |
| { |
| "epoch": 0.7080536912751678, |
| "grad_norm": 0.41011178493499756, |
| "learning_rate": 4.874579865480013e-06, |
| "loss": 0.6441, |
| "step": 633 |
| }, |
| { |
| "epoch": 0.70917225950783, |
| "grad_norm": 0.41372135281562805, |
| "learning_rate": 4.874112798048442e-06, |
| "loss": 0.6142, |
| "step": 634 |
| }, |
| { |
| "epoch": 0.7102908277404921, |
| "grad_norm": 0.41231900453567505, |
| "learning_rate": 4.8736448850084105e-06, |
| "loss": 0.6277, |
| "step": 635 |
| }, |
| { |
| "epoch": 0.7114093959731543, |
| "grad_norm": 0.4147928059101105, |
| "learning_rate": 4.873176126526578e-06, |
| "loss": 0.6197, |
| "step": 636 |
| }, |
| { |
| "epoch": 0.7125279642058165, |
| "grad_norm": 0.4046717882156372, |
| "learning_rate": 4.8727065227699035e-06, |
| "loss": 0.6138, |
| "step": 637 |
| }, |
| { |
| "epoch": 0.7136465324384788, |
| "grad_norm": 0.4150887727737427, |
| "learning_rate": 4.872236073905654e-06, |
| "loss": 0.616, |
| "step": 638 |
| }, |
| { |
| "epoch": 0.714765100671141, |
| "grad_norm": 0.41429632902145386, |
| "learning_rate": 4.87176478010139e-06, |
| "loss": 0.6153, |
| "step": 639 |
| }, |
| { |
| "epoch": 0.7158836689038032, |
| "grad_norm": 0.41153407096862793, |
| "learning_rate": 4.8712926415249785e-06, |
| "loss": 0.6171, |
| "step": 640 |
| }, |
| { |
| "epoch": 0.7170022371364653, |
| "grad_norm": 0.4178698658943176, |
| "learning_rate": 4.870819658344584e-06, |
| "loss": 0.6417, |
| "step": 641 |
| }, |
| { |
| "epoch": 0.7181208053691275, |
| "grad_norm": 0.40587952733039856, |
| "learning_rate": 4.870345830728675e-06, |
| "loss": 0.6206, |
| "step": 642 |
| }, |
| { |
| "epoch": 0.7192393736017897, |
| "grad_norm": 0.42633864283561707, |
| "learning_rate": 4.869871158846016e-06, |
| "loss": 0.6246, |
| "step": 643 |
| }, |
| { |
| "epoch": 0.7203579418344519, |
| "grad_norm": 0.41023534536361694, |
| "learning_rate": 4.8693956428656766e-06, |
| "loss": 0.601, |
| "step": 644 |
| }, |
| { |
| "epoch": 0.7214765100671141, |
| "grad_norm": 0.40645042061805725, |
| "learning_rate": 4.868919282957024e-06, |
| "loss": 0.6193, |
| "step": 645 |
| }, |
| { |
| "epoch": 0.7225950782997763, |
| "grad_norm": 0.40088531374931335, |
| "learning_rate": 4.86844207928973e-06, |
| "loss": 0.5869, |
| "step": 646 |
| }, |
| { |
| "epoch": 0.7237136465324385, |
| "grad_norm": 0.4136696755886078, |
| "learning_rate": 4.8679640320337625e-06, |
| "loss": 0.6413, |
| "step": 647 |
| }, |
| { |
| "epoch": 0.7248322147651006, |
| "grad_norm": 0.40026187896728516, |
| "learning_rate": 4.867485141359394e-06, |
| "loss": 0.6075, |
| "step": 648 |
| }, |
| { |
| "epoch": 0.7259507829977628, |
| "grad_norm": 0.40911242365837097, |
| "learning_rate": 4.867005407437192e-06, |
| "loss": 0.6411, |
| "step": 649 |
| }, |
| { |
| "epoch": 0.727069351230425, |
| "grad_norm": 0.42306697368621826, |
| "learning_rate": 4.866524830438029e-06, |
| "loss": 0.6376, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.7281879194630873, |
| "grad_norm": 0.40857061743736267, |
| "learning_rate": 4.866043410533077e-06, |
| "loss": 0.6071, |
| "step": 651 |
| }, |
| { |
| "epoch": 0.7293064876957495, |
| "grad_norm": 0.41601142287254333, |
| "learning_rate": 4.8655611478938055e-06, |
| "loss": 0.6079, |
| "step": 652 |
| }, |
| { |
| "epoch": 0.7304250559284117, |
| "grad_norm": 0.40857282280921936, |
| "learning_rate": 4.8650780426919895e-06, |
| "loss": 0.6246, |
| "step": 653 |
| }, |
| { |
| "epoch": 0.7315436241610739, |
| "grad_norm": 0.4063502252101898, |
| "learning_rate": 4.864594095099697e-06, |
| "loss": 0.6105, |
| "step": 654 |
| }, |
| { |
| "epoch": 0.732662192393736, |
| "grad_norm": 0.40278729796409607, |
| "learning_rate": 4.864109305289303e-06, |
| "loss": 0.5936, |
| "step": 655 |
| }, |
| { |
| "epoch": 0.7337807606263982, |
| "grad_norm": 0.4201098382472992, |
| "learning_rate": 4.863623673433478e-06, |
| "loss": 0.6081, |
| "step": 656 |
| }, |
| { |
| "epoch": 0.7348993288590604, |
| "grad_norm": 0.40003877878189087, |
| "learning_rate": 4.863137199705192e-06, |
| "loss": 0.6085, |
| "step": 657 |
| }, |
| { |
| "epoch": 0.7360178970917226, |
| "grad_norm": 0.41234898567199707, |
| "learning_rate": 4.86264988427772e-06, |
| "loss": 0.6252, |
| "step": 658 |
| }, |
| { |
| "epoch": 0.7371364653243848, |
| "grad_norm": 0.4233507513999939, |
| "learning_rate": 4.862161727324632e-06, |
| "loss": 0.5987, |
| "step": 659 |
| }, |
| { |
| "epoch": 0.738255033557047, |
| "grad_norm": 0.4099391996860504, |
| "learning_rate": 4.861672729019798e-06, |
| "loss": 0.6293, |
| "step": 660 |
| }, |
| { |
| "epoch": 0.7393736017897091, |
| "grad_norm": 0.4255772829055786, |
| "learning_rate": 4.861182889537389e-06, |
| "loss": 0.6268, |
| "step": 661 |
| }, |
| { |
| "epoch": 0.7404921700223713, |
| "grad_norm": 0.4317517578601837, |
| "learning_rate": 4.860692209051877e-06, |
| "loss": 0.6444, |
| "step": 662 |
| }, |
| { |
| "epoch": 0.7416107382550335, |
| "grad_norm": 0.4352816939353943, |
| "learning_rate": 4.86020068773803e-06, |
| "loss": 0.6304, |
| "step": 663 |
| }, |
| { |
| "epoch": 0.7427293064876958, |
| "grad_norm": 0.3987254500389099, |
| "learning_rate": 4.859708325770919e-06, |
| "loss": 0.611, |
| "step": 664 |
| }, |
| { |
| "epoch": 0.743847874720358, |
| "grad_norm": 0.4213384985923767, |
| "learning_rate": 4.859215123325912e-06, |
| "loss": 0.6292, |
| "step": 665 |
| }, |
| { |
| "epoch": 0.7449664429530202, |
| "grad_norm": 0.4062172472476959, |
| "learning_rate": 4.8587210805786785e-06, |
| "loss": 0.6197, |
| "step": 666 |
| }, |
| { |
| "epoch": 0.7460850111856824, |
| "grad_norm": 0.41443362832069397, |
| "learning_rate": 4.858226197705183e-06, |
| "loss": 0.6414, |
| "step": 667 |
| }, |
| { |
| "epoch": 0.7472035794183445, |
| "grad_norm": 0.4183506667613983, |
| "learning_rate": 4.857730474881696e-06, |
| "loss": 0.6294, |
| "step": 668 |
| }, |
| { |
| "epoch": 0.7483221476510067, |
| "grad_norm": 0.42685073614120483, |
| "learning_rate": 4.857233912284781e-06, |
| "loss": 0.6264, |
| "step": 669 |
| }, |
| { |
| "epoch": 0.7494407158836689, |
| "grad_norm": 0.4143792390823364, |
| "learning_rate": 4.856736510091304e-06, |
| "loss": 0.6575, |
| "step": 670 |
| }, |
| { |
| "epoch": 0.7505592841163311, |
| "grad_norm": 0.4124217629432678, |
| "learning_rate": 4.8562382684784284e-06, |
| "loss": 0.6295, |
| "step": 671 |
| }, |
| { |
| "epoch": 0.7516778523489933, |
| "grad_norm": 0.4060792624950409, |
| "learning_rate": 4.855739187623619e-06, |
| "loss": 0.5983, |
| "step": 672 |
| }, |
| { |
| "epoch": 0.7527964205816555, |
| "grad_norm": 0.4100533723831177, |
| "learning_rate": 4.855239267704635e-06, |
| "loss": 0.6271, |
| "step": 673 |
| }, |
| { |
| "epoch": 0.7539149888143176, |
| "grad_norm": 0.4047471582889557, |
| "learning_rate": 4.854738508899538e-06, |
| "loss": 0.5843, |
| "step": 674 |
| }, |
| { |
| "epoch": 0.7550335570469798, |
| "grad_norm": 0.41550201177597046, |
| "learning_rate": 4.854236911386689e-06, |
| "loss": 0.6015, |
| "step": 675 |
| }, |
| { |
| "epoch": 0.756152125279642, |
| "grad_norm": 0.4035356044769287, |
| "learning_rate": 4.853734475344745e-06, |
| "loss": 0.6085, |
| "step": 676 |
| }, |
| { |
| "epoch": 0.7572706935123042, |
| "grad_norm": 0.4054676294326782, |
| "learning_rate": 4.853231200952665e-06, |
| "loss": 0.5879, |
| "step": 677 |
| }, |
| { |
| "epoch": 0.7583892617449665, |
| "grad_norm": 0.4165349304676056, |
| "learning_rate": 4.852727088389702e-06, |
| "loss": 0.6065, |
| "step": 678 |
| }, |
| { |
| "epoch": 0.7595078299776287, |
| "grad_norm": 0.41854768991470337, |
| "learning_rate": 4.8522221378354125e-06, |
| "loss": 0.6115, |
| "step": 679 |
| }, |
| { |
| "epoch": 0.7606263982102909, |
| "grad_norm": 0.4189227223396301, |
| "learning_rate": 4.851716349469647e-06, |
| "loss": 0.6174, |
| "step": 680 |
| }, |
| { |
| "epoch": 0.761744966442953, |
| "grad_norm": 0.44432833790779114, |
| "learning_rate": 4.851209723472559e-06, |
| "loss": 0.6382, |
| "step": 681 |
| }, |
| { |
| "epoch": 0.7628635346756152, |
| "grad_norm": 0.4199828803539276, |
| "learning_rate": 4.8507022600245954e-06, |
| "loss": 0.6125, |
| "step": 682 |
| }, |
| { |
| "epoch": 0.7639821029082774, |
| "grad_norm": 0.44079893827438354, |
| "learning_rate": 4.850193959306506e-06, |
| "loss": 0.6263, |
| "step": 683 |
| }, |
| { |
| "epoch": 0.7651006711409396, |
| "grad_norm": 0.41406047344207764, |
| "learning_rate": 4.8496848214993355e-06, |
| "loss": 0.5979, |
| "step": 684 |
| }, |
| { |
| "epoch": 0.7662192393736018, |
| "grad_norm": 0.43209850788116455, |
| "learning_rate": 4.849174846784428e-06, |
| "loss": 0.6451, |
| "step": 685 |
| }, |
| { |
| "epoch": 0.767337807606264, |
| "grad_norm": 0.4180072844028473, |
| "learning_rate": 4.848664035343425e-06, |
| "loss": 0.6009, |
| "step": 686 |
| }, |
| { |
| "epoch": 0.7684563758389261, |
| "grad_norm": 0.4092356860637665, |
| "learning_rate": 4.8481523873582685e-06, |
| "loss": 0.6431, |
| "step": 687 |
| }, |
| { |
| "epoch": 0.7695749440715883, |
| "grad_norm": 0.41440829634666443, |
| "learning_rate": 4.847639903011196e-06, |
| "loss": 0.6001, |
| "step": 688 |
| }, |
| { |
| "epoch": 0.7706935123042505, |
| "grad_norm": 0.4246008098125458, |
| "learning_rate": 4.8471265824847415e-06, |
| "loss": 0.6137, |
| "step": 689 |
| }, |
| { |
| "epoch": 0.7718120805369127, |
| "grad_norm": 0.4177666902542114, |
| "learning_rate": 4.846612425961742e-06, |
| "loss": 0.6026, |
| "step": 690 |
| }, |
| { |
| "epoch": 0.772930648769575, |
| "grad_norm": 0.4130840003490448, |
| "learning_rate": 4.846097433625327e-06, |
| "loss": 0.6183, |
| "step": 691 |
| }, |
| { |
| "epoch": 0.7740492170022372, |
| "grad_norm": 0.406780868768692, |
| "learning_rate": 4.845581605658926e-06, |
| "loss": 0.5992, |
| "step": 692 |
| }, |
| { |
| "epoch": 0.7751677852348994, |
| "grad_norm": 0.42086103558540344, |
| "learning_rate": 4.845064942246267e-06, |
| "loss": 0.6057, |
| "step": 693 |
| }, |
| { |
| "epoch": 0.7762863534675615, |
| "grad_norm": 0.4122505486011505, |
| "learning_rate": 4.844547443571374e-06, |
| "loss": 0.6134, |
| "step": 694 |
| }, |
| { |
| "epoch": 0.7774049217002237, |
| "grad_norm": 0.43634387850761414, |
| "learning_rate": 4.8440291098185686e-06, |
| "loss": 0.6044, |
| "step": 695 |
| }, |
| { |
| "epoch": 0.7785234899328859, |
| "grad_norm": 0.4160690903663635, |
| "learning_rate": 4.843509941172471e-06, |
| "loss": 0.6046, |
| "step": 696 |
| }, |
| { |
| "epoch": 0.7796420581655481, |
| "grad_norm": 0.41897231340408325, |
| "learning_rate": 4.842989937817997e-06, |
| "loss": 0.6186, |
| "step": 697 |
| }, |
| { |
| "epoch": 0.7807606263982103, |
| "grad_norm": 0.4187341034412384, |
| "learning_rate": 4.842469099940361e-06, |
| "loss": 0.6266, |
| "step": 698 |
| }, |
| { |
| "epoch": 0.7818791946308725, |
| "grad_norm": 0.4075968563556671, |
| "learning_rate": 4.841947427725076e-06, |
| "loss": 0.5772, |
| "step": 699 |
| }, |
| { |
| "epoch": 0.7829977628635347, |
| "grad_norm": 0.4157114028930664, |
| "learning_rate": 4.841424921357948e-06, |
| "loss": 0.5999, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.7841163310961968, |
| "grad_norm": 0.4198933243751526, |
| "learning_rate": 4.840901581025083e-06, |
| "loss": 0.6273, |
| "step": 701 |
| }, |
| { |
| "epoch": 0.785234899328859, |
| "grad_norm": 0.42646607756614685, |
| "learning_rate": 4.840377406912887e-06, |
| "loss": 0.6074, |
| "step": 702 |
| }, |
| { |
| "epoch": 0.7863534675615212, |
| "grad_norm": 0.42644554376602173, |
| "learning_rate": 4.839852399208056e-06, |
| "loss": 0.5872, |
| "step": 703 |
| }, |
| { |
| "epoch": 0.7874720357941835, |
| "grad_norm": 0.43172845244407654, |
| "learning_rate": 4.839326558097587e-06, |
| "loss": 0.633, |
| "step": 704 |
| }, |
| { |
| "epoch": 0.7885906040268457, |
| "grad_norm": 0.4165332317352295, |
| "learning_rate": 4.838799883768775e-06, |
| "loss": 0.6206, |
| "step": 705 |
| }, |
| { |
| "epoch": 0.7897091722595079, |
| "grad_norm": 0.4209877550601959, |
| "learning_rate": 4.83827237640921e-06, |
| "loss": 0.6015, |
| "step": 706 |
| }, |
| { |
| "epoch": 0.79082774049217, |
| "grad_norm": 0.4267021715641022, |
| "learning_rate": 4.837744036206777e-06, |
| "loss": 0.5975, |
| "step": 707 |
| }, |
| { |
| "epoch": 0.7919463087248322, |
| "grad_norm": 0.4415457546710968, |
| "learning_rate": 4.837214863349662e-06, |
| "loss": 0.6251, |
| "step": 708 |
| }, |
| { |
| "epoch": 0.7930648769574944, |
| "grad_norm": 0.43104031682014465, |
| "learning_rate": 4.836684858026343e-06, |
| "loss": 0.6048, |
| "step": 709 |
| }, |
| { |
| "epoch": 0.7941834451901566, |
| "grad_norm": 0.41736820340156555, |
| "learning_rate": 4.8361540204255985e-06, |
| "loss": 0.5948, |
| "step": 710 |
| }, |
| { |
| "epoch": 0.7953020134228188, |
| "grad_norm": 0.4202009439468384, |
| "learning_rate": 4.835622350736499e-06, |
| "loss": 0.6099, |
| "step": 711 |
| }, |
| { |
| "epoch": 0.796420581655481, |
| "grad_norm": 0.42279568314552307, |
| "learning_rate": 4.8350898491484175e-06, |
| "loss": 0.6247, |
| "step": 712 |
| }, |
| { |
| "epoch": 0.7975391498881432, |
| "grad_norm": 0.4266239404678345, |
| "learning_rate": 4.8345565158510176e-06, |
| "loss": 0.6136, |
| "step": 713 |
| }, |
| { |
| "epoch": 0.7986577181208053, |
| "grad_norm": 0.42605841159820557, |
| "learning_rate": 4.83402235103426e-06, |
| "loss": 0.6001, |
| "step": 714 |
| }, |
| { |
| "epoch": 0.7997762863534675, |
| "grad_norm": 0.42846307158470154, |
| "learning_rate": 4.8334873548884055e-06, |
| "loss": 0.5941, |
| "step": 715 |
| }, |
| { |
| "epoch": 0.8008948545861297, |
| "grad_norm": 0.44009047746658325, |
| "learning_rate": 4.832951527604007e-06, |
| "loss": 0.622, |
| "step": 716 |
| }, |
| { |
| "epoch": 0.802013422818792, |
| "grad_norm": 0.44512951374053955, |
| "learning_rate": 4.8324148693719145e-06, |
| "loss": 0.6507, |
| "step": 717 |
| }, |
| { |
| "epoch": 0.8031319910514542, |
| "grad_norm": 0.455010324716568, |
| "learning_rate": 4.831877380383276e-06, |
| "loss": 0.6201, |
| "step": 718 |
| }, |
| { |
| "epoch": 0.8042505592841164, |
| "grad_norm": 0.43456459045410156, |
| "learning_rate": 4.83133906082953e-06, |
| "loss": 0.623, |
| "step": 719 |
| }, |
| { |
| "epoch": 0.8053691275167785, |
| "grad_norm": 0.42063653469085693, |
| "learning_rate": 4.830799910902418e-06, |
| "loss": 0.5841, |
| "step": 720 |
| }, |
| { |
| "epoch": 0.8064876957494407, |
| "grad_norm": 0.41323843598365784, |
| "learning_rate": 4.8302599307939725e-06, |
| "loss": 0.6127, |
| "step": 721 |
| }, |
| { |
| "epoch": 0.8076062639821029, |
| "grad_norm": 0.41982001066207886, |
| "learning_rate": 4.829719120696523e-06, |
| "loss": 0.6274, |
| "step": 722 |
| }, |
| { |
| "epoch": 0.8087248322147651, |
| "grad_norm": 0.43330860137939453, |
| "learning_rate": 4.829177480802694e-06, |
| "loss": 0.6416, |
| "step": 723 |
| }, |
| { |
| "epoch": 0.8098434004474273, |
| "grad_norm": 0.4351330101490021, |
| "learning_rate": 4.828635011305407e-06, |
| "loss": 0.6399, |
| "step": 724 |
| }, |
| { |
| "epoch": 0.8109619686800895, |
| "grad_norm": 0.4017598032951355, |
| "learning_rate": 4.828091712397878e-06, |
| "loss": 0.5817, |
| "step": 725 |
| }, |
| { |
| "epoch": 0.8120805369127517, |
| "grad_norm": 0.42594751715660095, |
| "learning_rate": 4.827547584273618e-06, |
| "loss": 0.6438, |
| "step": 726 |
| }, |
| { |
| "epoch": 0.8131991051454138, |
| "grad_norm": 0.409135639667511, |
| "learning_rate": 4.827002627126433e-06, |
| "loss": 0.5797, |
| "step": 727 |
| }, |
| { |
| "epoch": 0.814317673378076, |
| "grad_norm": 0.4304857850074768, |
| "learning_rate": 4.826456841150428e-06, |
| "loss": 0.6173, |
| "step": 728 |
| }, |
| { |
| "epoch": 0.8154362416107382, |
| "grad_norm": 0.446872740983963, |
| "learning_rate": 4.825910226539997e-06, |
| "loss": 0.6059, |
| "step": 729 |
| }, |
| { |
| "epoch": 0.8165548098434005, |
| "grad_norm": 0.42625290155410767, |
| "learning_rate": 4.8253627834898355e-06, |
| "loss": 0.5994, |
| "step": 730 |
| }, |
| { |
| "epoch": 0.8176733780760627, |
| "grad_norm": 0.42183029651641846, |
| "learning_rate": 4.824814512194929e-06, |
| "loss": 0.6202, |
| "step": 731 |
| }, |
| { |
| "epoch": 0.8187919463087249, |
| "grad_norm": 0.4235664904117584, |
| "learning_rate": 4.824265412850559e-06, |
| "loss": 0.6263, |
| "step": 732 |
| }, |
| { |
| "epoch": 0.819910514541387, |
| "grad_norm": 0.4118615686893463, |
| "learning_rate": 4.823715485652307e-06, |
| "loss": 0.6058, |
| "step": 733 |
| }, |
| { |
| "epoch": 0.8210290827740492, |
| "grad_norm": 0.43514224886894226, |
| "learning_rate": 4.823164730796042e-06, |
| "loss": 0.6092, |
| "step": 734 |
| }, |
| { |
| "epoch": 0.8221476510067114, |
| "grad_norm": 0.41756734251976013, |
| "learning_rate": 4.8226131484779325e-06, |
| "loss": 0.6281, |
| "step": 735 |
| }, |
| { |
| "epoch": 0.8232662192393736, |
| "grad_norm": 0.438475638628006, |
| "learning_rate": 4.822060738894439e-06, |
| "loss": 0.6122, |
| "step": 736 |
| }, |
| { |
| "epoch": 0.8243847874720358, |
| "grad_norm": 0.426792174577713, |
| "learning_rate": 4.821507502242321e-06, |
| "loss": 0.6407, |
| "step": 737 |
| }, |
| { |
| "epoch": 0.825503355704698, |
| "grad_norm": 0.42697012424468994, |
| "learning_rate": 4.820953438718626e-06, |
| "loss": 0.5996, |
| "step": 738 |
| }, |
| { |
| "epoch": 0.8266219239373602, |
| "grad_norm": 0.42373016476631165, |
| "learning_rate": 4.820398548520702e-06, |
| "loss": 0.6075, |
| "step": 739 |
| }, |
| { |
| "epoch": 0.8277404921700223, |
| "grad_norm": 0.42235615849494934, |
| "learning_rate": 4.81984283184619e-06, |
| "loss": 0.608, |
| "step": 740 |
| }, |
| { |
| "epoch": 0.8288590604026845, |
| "grad_norm": 0.41180866956710815, |
| "learning_rate": 4.819286288893022e-06, |
| "loss": 0.6127, |
| "step": 741 |
| }, |
| { |
| "epoch": 0.8299776286353467, |
| "grad_norm": 0.4207548499107361, |
| "learning_rate": 4.818728919859426e-06, |
| "loss": 0.6131, |
| "step": 742 |
| }, |
| { |
| "epoch": 0.831096196868009, |
| "grad_norm": 0.4295390546321869, |
| "learning_rate": 4.818170724943928e-06, |
| "loss": 0.629, |
| "step": 743 |
| }, |
| { |
| "epoch": 0.8322147651006712, |
| "grad_norm": 0.4099291265010834, |
| "learning_rate": 4.817611704345344e-06, |
| "loss": 0.6055, |
| "step": 744 |
| }, |
| { |
| "epoch": 0.8333333333333334, |
| "grad_norm": 0.4142029583454132, |
| "learning_rate": 4.817051858262785e-06, |
| "loss": 0.6127, |
| "step": 745 |
| }, |
| { |
| "epoch": 0.8344519015659956, |
| "grad_norm": 0.41662877798080444, |
| "learning_rate": 4.816491186895656e-06, |
| "loss": 0.6171, |
| "step": 746 |
| }, |
| { |
| "epoch": 0.8355704697986577, |
| "grad_norm": 0.4345078766345978, |
| "learning_rate": 4.815929690443657e-06, |
| "loss": 0.6091, |
| "step": 747 |
| }, |
| { |
| "epoch": 0.8366890380313199, |
| "grad_norm": 0.43010810017585754, |
| "learning_rate": 4.8153673691067806e-06, |
| "loss": 0.626, |
| "step": 748 |
| }, |
| { |
| "epoch": 0.8378076062639821, |
| "grad_norm": 0.4256346821784973, |
| "learning_rate": 4.814804223085313e-06, |
| "loss": 0.6216, |
| "step": 749 |
| }, |
| { |
| "epoch": 0.8389261744966443, |
| "grad_norm": 0.42812031507492065, |
| "learning_rate": 4.814240252579836e-06, |
| "loss": 0.6138, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.8400447427293065, |
| "grad_norm": 0.41682377457618713, |
| "learning_rate": 4.813675457791224e-06, |
| "loss": 0.5783, |
| "step": 751 |
| }, |
| { |
| "epoch": 0.8411633109619687, |
| "grad_norm": 0.4257197380065918, |
| "learning_rate": 4.8131098389206435e-06, |
| "loss": 0.6006, |
| "step": 752 |
| }, |
| { |
| "epoch": 0.8422818791946308, |
| "grad_norm": 0.42872053384780884, |
| "learning_rate": 4.812543396169557e-06, |
| "loss": 0.6272, |
| "step": 753 |
| }, |
| { |
| "epoch": 0.843400447427293, |
| "grad_norm": 0.4263060986995697, |
| "learning_rate": 4.81197612973972e-06, |
| "loss": 0.6093, |
| "step": 754 |
| }, |
| { |
| "epoch": 0.8445190156599552, |
| "grad_norm": 0.4151028096675873, |
| "learning_rate": 4.811408039833178e-06, |
| "loss": 0.5773, |
| "step": 755 |
| }, |
| { |
| "epoch": 0.8456375838926175, |
| "grad_norm": 0.4382091164588928, |
| "learning_rate": 4.810839126652275e-06, |
| "loss": 0.596, |
| "step": 756 |
| }, |
| { |
| "epoch": 0.8467561521252797, |
| "grad_norm": 0.4290934205055237, |
| "learning_rate": 4.810269390399646e-06, |
| "loss": 0.5904, |
| "step": 757 |
| }, |
| { |
| "epoch": 0.8478747203579419, |
| "grad_norm": 0.4299798011779785, |
| "learning_rate": 4.809698831278217e-06, |
| "loss": 0.6449, |
| "step": 758 |
| }, |
| { |
| "epoch": 0.8489932885906041, |
| "grad_norm": 0.4188365042209625, |
| "learning_rate": 4.809127449491211e-06, |
| "loss": 0.6007, |
| "step": 759 |
| }, |
| { |
| "epoch": 0.8501118568232662, |
| "grad_norm": 0.41594186425209045, |
| "learning_rate": 4.808555245242141e-06, |
| "loss": 0.5888, |
| "step": 760 |
| }, |
| { |
| "epoch": 0.8512304250559284, |
| "grad_norm": 0.4184630513191223, |
| "learning_rate": 4.807982218734814e-06, |
| "loss": 0.6495, |
| "step": 761 |
| }, |
| { |
| "epoch": 0.8523489932885906, |
| "grad_norm": 0.418849378824234, |
| "learning_rate": 4.80740837017333e-06, |
| "loss": 0.6128, |
| "step": 762 |
| }, |
| { |
| "epoch": 0.8534675615212528, |
| "grad_norm": 0.42030590772628784, |
| "learning_rate": 4.8068336997620804e-06, |
| "loss": 0.6294, |
| "step": 763 |
| }, |
| { |
| "epoch": 0.854586129753915, |
| "grad_norm": 0.4204208552837372, |
| "learning_rate": 4.806258207705753e-06, |
| "loss": 0.6279, |
| "step": 764 |
| }, |
| { |
| "epoch": 0.8557046979865772, |
| "grad_norm": 0.41544729471206665, |
| "learning_rate": 4.805681894209324e-06, |
| "loss": 0.6235, |
| "step": 765 |
| }, |
| { |
| "epoch": 0.8568232662192393, |
| "grad_norm": 0.430301308631897, |
| "learning_rate": 4.805104759478065e-06, |
| "loss": 0.5876, |
| "step": 766 |
| }, |
| { |
| "epoch": 0.8579418344519015, |
| "grad_norm": 0.4640876054763794, |
| "learning_rate": 4.804526803717539e-06, |
| "loss": 0.6264, |
| "step": 767 |
| }, |
| { |
| "epoch": 0.8590604026845637, |
| "grad_norm": 0.43245360255241394, |
| "learning_rate": 4.8039480271336005e-06, |
| "loss": 0.5871, |
| "step": 768 |
| }, |
| { |
| "epoch": 0.860178970917226, |
| "grad_norm": 0.41617804765701294, |
| "learning_rate": 4.803368429932399e-06, |
| "loss": 0.6218, |
| "step": 769 |
| }, |
| { |
| "epoch": 0.8612975391498882, |
| "grad_norm": 0.4325237274169922, |
| "learning_rate": 4.8027880123203726e-06, |
| "loss": 0.5874, |
| "step": 770 |
| }, |
| { |
| "epoch": 0.8624161073825504, |
| "grad_norm": 0.4480580687522888, |
| "learning_rate": 4.802206774504255e-06, |
| "loss": 0.6093, |
| "step": 771 |
| }, |
| { |
| "epoch": 0.8635346756152126, |
| "grad_norm": 0.44137299060821533, |
| "learning_rate": 4.801624716691072e-06, |
| "loss": 0.6031, |
| "step": 772 |
| }, |
| { |
| "epoch": 0.8646532438478747, |
| "grad_norm": 0.4269614815711975, |
| "learning_rate": 4.801041839088139e-06, |
| "loss": 0.5963, |
| "step": 773 |
| }, |
| { |
| "epoch": 0.8657718120805369, |
| "grad_norm": 0.4120911657810211, |
| "learning_rate": 4.800458141903064e-06, |
| "loss": 0.5959, |
| "step": 774 |
| }, |
| { |
| "epoch": 0.8668903803131991, |
| "grad_norm": 0.4332381784915924, |
| "learning_rate": 4.799873625343747e-06, |
| "loss": 0.6007, |
| "step": 775 |
| }, |
| { |
| "epoch": 0.8680089485458613, |
| "grad_norm": 0.4361085295677185, |
| "learning_rate": 4.7992882896183825e-06, |
| "loss": 0.6012, |
| "step": 776 |
| }, |
| { |
| "epoch": 0.8691275167785235, |
| "grad_norm": 0.43251463770866394, |
| "learning_rate": 4.798702134935454e-06, |
| "loss": 0.5799, |
| "step": 777 |
| }, |
| { |
| "epoch": 0.8702460850111857, |
| "grad_norm": 0.4305305778980255, |
| "learning_rate": 4.798115161503735e-06, |
| "loss": 0.6068, |
| "step": 778 |
| }, |
| { |
| "epoch": 0.8713646532438478, |
| "grad_norm": 0.4410499930381775, |
| "learning_rate": 4.797527369532296e-06, |
| "loss": 0.6486, |
| "step": 779 |
| }, |
| { |
| "epoch": 0.87248322147651, |
| "grad_norm": 0.43271416425704956, |
| "learning_rate": 4.796938759230494e-06, |
| "loss": 0.6367, |
| "step": 780 |
| }, |
| { |
| "epoch": 0.8736017897091722, |
| "grad_norm": 0.430905282497406, |
| "learning_rate": 4.7963493308079815e-06, |
| "loss": 0.5753, |
| "step": 781 |
| }, |
| { |
| "epoch": 0.8747203579418344, |
| "grad_norm": 0.4054125249385834, |
| "learning_rate": 4.7957590844746986e-06, |
| "loss": 0.5806, |
| "step": 782 |
| }, |
| { |
| "epoch": 0.8758389261744967, |
| "grad_norm": 0.4322145879268646, |
| "learning_rate": 4.795168020440878e-06, |
| "loss": 0.5989, |
| "step": 783 |
| }, |
| { |
| "epoch": 0.8769574944071589, |
| "grad_norm": 0.433716744184494, |
| "learning_rate": 4.7945761389170464e-06, |
| "loss": 0.6284, |
| "step": 784 |
| }, |
| { |
| "epoch": 0.8780760626398211, |
| "grad_norm": 0.4301673471927643, |
| "learning_rate": 4.793983440114018e-06, |
| "loss": 0.6469, |
| "step": 785 |
| }, |
| { |
| "epoch": 0.8791946308724832, |
| "grad_norm": 0.4279182553291321, |
| "learning_rate": 4.7933899242428986e-06, |
| "loss": 0.6032, |
| "step": 786 |
| }, |
| { |
| "epoch": 0.8803131991051454, |
| "grad_norm": 0.4133903682231903, |
| "learning_rate": 4.792795591515087e-06, |
| "loss": 0.5745, |
| "step": 787 |
| }, |
| { |
| "epoch": 0.8814317673378076, |
| "grad_norm": 0.45094674825668335, |
| "learning_rate": 4.792200442142273e-06, |
| "loss": 0.6212, |
| "step": 788 |
| }, |
| { |
| "epoch": 0.8825503355704698, |
| "grad_norm": 0.4304371774196625, |
| "learning_rate": 4.7916044763364344e-06, |
| "loss": 0.61, |
| "step": 789 |
| }, |
| { |
| "epoch": 0.883668903803132, |
| "grad_norm": 0.4232103228569031, |
| "learning_rate": 4.791007694309842e-06, |
| "loss": 0.5942, |
| "step": 790 |
| }, |
| { |
| "epoch": 0.8847874720357942, |
| "grad_norm": 0.41822549700737, |
| "learning_rate": 4.790410096275057e-06, |
| "loss": 0.5829, |
| "step": 791 |
| }, |
| { |
| "epoch": 0.8859060402684564, |
| "grad_norm": 0.44473201036453247, |
| "learning_rate": 4.789811682444931e-06, |
| "loss": 0.6359, |
| "step": 792 |
| }, |
| { |
| "epoch": 0.8870246085011185, |
| "grad_norm": 0.42226001620292664, |
| "learning_rate": 4.7892124530326065e-06, |
| "loss": 0.5966, |
| "step": 793 |
| }, |
| { |
| "epoch": 0.8881431767337807, |
| "grad_norm": 0.4254050552845001, |
| "learning_rate": 4.788612408251517e-06, |
| "loss": 0.6211, |
| "step": 794 |
| }, |
| { |
| "epoch": 0.889261744966443, |
| "grad_norm": 0.42909836769104004, |
| "learning_rate": 4.788011548315383e-06, |
| "loss": 0.6039, |
| "step": 795 |
| }, |
| { |
| "epoch": 0.8903803131991052, |
| "grad_norm": 0.4296148419380188, |
| "learning_rate": 4.78740987343822e-06, |
| "loss": 0.6055, |
| "step": 796 |
| }, |
| { |
| "epoch": 0.8914988814317674, |
| "grad_norm": 0.4225505292415619, |
| "learning_rate": 4.786807383834332e-06, |
| "loss": 0.5947, |
| "step": 797 |
| }, |
| { |
| "epoch": 0.8926174496644296, |
| "grad_norm": 0.4271566867828369, |
| "learning_rate": 4.786204079718314e-06, |
| "loss": 0.6002, |
| "step": 798 |
| }, |
| { |
| "epoch": 0.8937360178970917, |
| "grad_norm": 0.42522063851356506, |
| "learning_rate": 4.785599961305048e-06, |
| "loss": 0.6231, |
| "step": 799 |
| }, |
| { |
| "epoch": 0.8948545861297539, |
| "grad_norm": 0.4384607970714569, |
| "learning_rate": 4.784995028809707e-06, |
| "loss": 0.6072, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.8959731543624161, |
| "grad_norm": 0.4194418787956238, |
| "learning_rate": 4.784389282447759e-06, |
| "loss": 0.5979, |
| "step": 801 |
| }, |
| { |
| "epoch": 0.8970917225950783, |
| "grad_norm": 0.43825826048851013, |
| "learning_rate": 4.7837827224349544e-06, |
| "loss": 0.6256, |
| "step": 802 |
| }, |
| { |
| "epoch": 0.8982102908277405, |
| "grad_norm": 0.43381622433662415, |
| "learning_rate": 4.783175348987339e-06, |
| "loss": 0.5932, |
| "step": 803 |
| }, |
| { |
| "epoch": 0.8993288590604027, |
| "grad_norm": 0.4468502700328827, |
| "learning_rate": 4.7825671623212456e-06, |
| "loss": 0.618, |
| "step": 804 |
| }, |
| { |
| "epoch": 0.9004474272930649, |
| "grad_norm": 0.4352877140045166, |
| "learning_rate": 4.781958162653298e-06, |
| "loss": 0.5898, |
| "step": 805 |
| }, |
| { |
| "epoch": 0.901565995525727, |
| "grad_norm": 0.4242689907550812, |
| "learning_rate": 4.781348350200408e-06, |
| "loss": 0.5856, |
| "step": 806 |
| }, |
| { |
| "epoch": 0.9026845637583892, |
| "grad_norm": 0.4262087941169739, |
| "learning_rate": 4.780737725179778e-06, |
| "loss": 0.5994, |
| "step": 807 |
| }, |
| { |
| "epoch": 0.9038031319910514, |
| "grad_norm": 0.42303264141082764, |
| "learning_rate": 4.780126287808899e-06, |
| "loss": 0.6106, |
| "step": 808 |
| }, |
| { |
| "epoch": 0.9049217002237137, |
| "grad_norm": 0.43589121103286743, |
| "learning_rate": 4.779514038305555e-06, |
| "loss": 0.6251, |
| "step": 809 |
| }, |
| { |
| "epoch": 0.9060402684563759, |
| "grad_norm": 0.43768516182899475, |
| "learning_rate": 4.778900976887813e-06, |
| "loss": 0.6124, |
| "step": 810 |
| }, |
| { |
| "epoch": 0.9071588366890381, |
| "grad_norm": 0.4439849257469177, |
| "learning_rate": 4.778287103774033e-06, |
| "loss": 0.6397, |
| "step": 811 |
| }, |
| { |
| "epoch": 0.9082774049217002, |
| "grad_norm": 0.44813254475593567, |
| "learning_rate": 4.777672419182863e-06, |
| "loss": 0.6213, |
| "step": 812 |
| }, |
| { |
| "epoch": 0.9093959731543624, |
| "grad_norm": 0.4133831858634949, |
| "learning_rate": 4.777056923333244e-06, |
| "loss": 0.6138, |
| "step": 813 |
| }, |
| { |
| "epoch": 0.9105145413870246, |
| "grad_norm": 0.4255264699459076, |
| "learning_rate": 4.7764406164444e-06, |
| "loss": 0.6143, |
| "step": 814 |
| }, |
| { |
| "epoch": 0.9116331096196868, |
| "grad_norm": 0.42810630798339844, |
| "learning_rate": 4.775823498735845e-06, |
| "loss": 0.6253, |
| "step": 815 |
| }, |
| { |
| "epoch": 0.912751677852349, |
| "grad_norm": 0.42162856459617615, |
| "learning_rate": 4.775205570427386e-06, |
| "loss": 0.602, |
| "step": 816 |
| }, |
| { |
| "epoch": 0.9138702460850112, |
| "grad_norm": 0.4342280328273773, |
| "learning_rate": 4.7745868317391135e-06, |
| "loss": 0.6088, |
| "step": 817 |
| }, |
| { |
| "epoch": 0.9149888143176734, |
| "grad_norm": 0.42438629269599915, |
| "learning_rate": 4.773967282891411e-06, |
| "loss": 0.5788, |
| "step": 818 |
| }, |
| { |
| "epoch": 0.9161073825503355, |
| "grad_norm": 0.437950074672699, |
| "learning_rate": 4.7733469241049475e-06, |
| "loss": 0.6277, |
| "step": 819 |
| }, |
| { |
| "epoch": 0.9172259507829977, |
| "grad_norm": 0.4286377429962158, |
| "learning_rate": 4.772725755600682e-06, |
| "loss": 0.6024, |
| "step": 820 |
| }, |
| { |
| "epoch": 0.9183445190156599, |
| "grad_norm": 0.4317566156387329, |
| "learning_rate": 4.772103777599861e-06, |
| "loss": 0.6048, |
| "step": 821 |
| }, |
| { |
| "epoch": 0.9194630872483222, |
| "grad_norm": 0.4509202837944031, |
| "learning_rate": 4.771480990324021e-06, |
| "loss": 0.6219, |
| "step": 822 |
| }, |
| { |
| "epoch": 0.9205816554809844, |
| "grad_norm": 0.4387308955192566, |
| "learning_rate": 4.7708573939949845e-06, |
| "loss": 0.6082, |
| "step": 823 |
| }, |
| { |
| "epoch": 0.9217002237136466, |
| "grad_norm": 0.457883358001709, |
| "learning_rate": 4.770232988834864e-06, |
| "loss": 0.6112, |
| "step": 824 |
| }, |
| { |
| "epoch": 0.9228187919463087, |
| "grad_norm": 0.44200408458709717, |
| "learning_rate": 4.769607775066058e-06, |
| "loss": 0.6146, |
| "step": 825 |
| }, |
| { |
| "epoch": 0.9239373601789709, |
| "grad_norm": 0.44704675674438477, |
| "learning_rate": 4.768981752911256e-06, |
| "loss": 0.5921, |
| "step": 826 |
| }, |
| { |
| "epoch": 0.9250559284116331, |
| "grad_norm": 0.4367467164993286, |
| "learning_rate": 4.768354922593433e-06, |
| "loss": 0.6075, |
| "step": 827 |
| }, |
| { |
| "epoch": 0.9261744966442953, |
| "grad_norm": 0.4321734309196472, |
| "learning_rate": 4.767727284335852e-06, |
| "loss": 0.6041, |
| "step": 828 |
| }, |
| { |
| "epoch": 0.9272930648769575, |
| "grad_norm": 0.42991605401039124, |
| "learning_rate": 4.767098838362065e-06, |
| "loss": 0.5804, |
| "step": 829 |
| }, |
| { |
| "epoch": 0.9284116331096197, |
| "grad_norm": 0.43791651725769043, |
| "learning_rate": 4.766469584895912e-06, |
| "loss": 0.6005, |
| "step": 830 |
| }, |
| { |
| "epoch": 0.9295302013422819, |
| "grad_norm": 0.41972237825393677, |
| "learning_rate": 4.765839524161518e-06, |
| "loss": 0.582, |
| "step": 831 |
| }, |
| { |
| "epoch": 0.930648769574944, |
| "grad_norm": 0.4424271881580353, |
| "learning_rate": 4.765208656383299e-06, |
| "loss": 0.5978, |
| "step": 832 |
| }, |
| { |
| "epoch": 0.9317673378076062, |
| "grad_norm": 0.45667219161987305, |
| "learning_rate": 4.7645769817859554e-06, |
| "loss": 0.6208, |
| "step": 833 |
| }, |
| { |
| "epoch": 0.9328859060402684, |
| "grad_norm": 0.4423377811908722, |
| "learning_rate": 4.763944500594476e-06, |
| "loss": 0.6061, |
| "step": 834 |
| }, |
| { |
| "epoch": 0.9340044742729307, |
| "grad_norm": 0.4316536784172058, |
| "learning_rate": 4.7633112130341385e-06, |
| "loss": 0.6116, |
| "step": 835 |
| }, |
| { |
| "epoch": 0.9351230425055929, |
| "grad_norm": 0.4591672718524933, |
| "learning_rate": 4.762677119330505e-06, |
| "loss": 0.5729, |
| "step": 836 |
| }, |
| { |
| "epoch": 0.9362416107382551, |
| "grad_norm": 0.4469880759716034, |
| "learning_rate": 4.762042219709427e-06, |
| "loss": 0.6025, |
| "step": 837 |
| }, |
| { |
| "epoch": 0.9373601789709173, |
| "grad_norm": 0.4560692012310028, |
| "learning_rate": 4.761406514397042e-06, |
| "loss": 0.6103, |
| "step": 838 |
| }, |
| { |
| "epoch": 0.9384787472035794, |
| "grad_norm": 0.4428820013999939, |
| "learning_rate": 4.760770003619775e-06, |
| "loss": 0.6258, |
| "step": 839 |
| }, |
| { |
| "epoch": 0.9395973154362416, |
| "grad_norm": 0.44238874316215515, |
| "learning_rate": 4.760132687604338e-06, |
| "loss": 0.6032, |
| "step": 840 |
| }, |
| { |
| "epoch": 0.9407158836689038, |
| "grad_norm": 0.46432724595069885, |
| "learning_rate": 4.759494566577727e-06, |
| "loss": 0.6266, |
| "step": 841 |
| }, |
| { |
| "epoch": 0.941834451901566, |
| "grad_norm": 0.42941901087760925, |
| "learning_rate": 4.75885564076723e-06, |
| "loss": 0.5927, |
| "step": 842 |
| }, |
| { |
| "epoch": 0.9429530201342282, |
| "grad_norm": 0.43781232833862305, |
| "learning_rate": 4.758215910400418e-06, |
| "loss": 0.5967, |
| "step": 843 |
| }, |
| { |
| "epoch": 0.9440715883668904, |
| "grad_norm": 0.45641666650772095, |
| "learning_rate": 4.757575375705149e-06, |
| "loss": 0.6423, |
| "step": 844 |
| }, |
| { |
| "epoch": 0.9451901565995525, |
| "grad_norm": 0.43784114718437195, |
| "learning_rate": 4.756934036909567e-06, |
| "loss": 0.606, |
| "step": 845 |
| }, |
| { |
| "epoch": 0.9463087248322147, |
| "grad_norm": 0.4379528760910034, |
| "learning_rate": 4.756291894242106e-06, |
| "loss": 0.6201, |
| "step": 846 |
| }, |
| { |
| "epoch": 0.9474272930648769, |
| "grad_norm": 0.42831459641456604, |
| "learning_rate": 4.755648947931479e-06, |
| "loss": 0.6121, |
| "step": 847 |
| }, |
| { |
| "epoch": 0.9485458612975392, |
| "grad_norm": 0.43790462613105774, |
| "learning_rate": 4.7550051982066945e-06, |
| "loss": 0.5785, |
| "step": 848 |
| }, |
| { |
| "epoch": 0.9496644295302014, |
| "grad_norm": 0.4407269358634949, |
| "learning_rate": 4.75436064529704e-06, |
| "loss": 0.6127, |
| "step": 849 |
| }, |
| { |
| "epoch": 0.9507829977628636, |
| "grad_norm": 0.4252265393733978, |
| "learning_rate": 4.753715289432092e-06, |
| "loss": 0.6129, |
| "step": 850 |
| }, |
| { |
| "epoch": 0.9519015659955258, |
| "grad_norm": 0.4376990795135498, |
| "learning_rate": 4.753069130841712e-06, |
| "loss": 0.614, |
| "step": 851 |
| }, |
| { |
| "epoch": 0.9530201342281879, |
| "grad_norm": 0.43123552203178406, |
| "learning_rate": 4.752422169756048e-06, |
| "loss": 0.6169, |
| "step": 852 |
| }, |
| { |
| "epoch": 0.9541387024608501, |
| "grad_norm": 0.4513196349143982, |
| "learning_rate": 4.7517744064055345e-06, |
| "loss": 0.6381, |
| "step": 853 |
| }, |
| { |
| "epoch": 0.9552572706935123, |
| "grad_norm": 0.44663751125335693, |
| "learning_rate": 4.751125841020891e-06, |
| "loss": 0.605, |
| "step": 854 |
| }, |
| { |
| "epoch": 0.9563758389261745, |
| "grad_norm": 0.44196903705596924, |
| "learning_rate": 4.750476473833123e-06, |
| "loss": 0.6163, |
| "step": 855 |
| }, |
| { |
| "epoch": 0.9574944071588367, |
| "grad_norm": 0.40786847472190857, |
| "learning_rate": 4.74982630507352e-06, |
| "loss": 0.5624, |
| "step": 856 |
| }, |
| { |
| "epoch": 0.9586129753914989, |
| "grad_norm": 0.43438002467155457, |
| "learning_rate": 4.749175334973659e-06, |
| "loss": 0.6183, |
| "step": 857 |
| }, |
| { |
| "epoch": 0.959731543624161, |
| "grad_norm": 0.43120619654655457, |
| "learning_rate": 4.748523563765401e-06, |
| "loss": 0.6097, |
| "step": 858 |
| }, |
| { |
| "epoch": 0.9608501118568232, |
| "grad_norm": 0.4761989414691925, |
| "learning_rate": 4.747870991680895e-06, |
| "loss": 0.6029, |
| "step": 859 |
| }, |
| { |
| "epoch": 0.9619686800894854, |
| "grad_norm": 0.44484785199165344, |
| "learning_rate": 4.747217618952571e-06, |
| "loss": 0.5955, |
| "step": 860 |
| }, |
| { |
| "epoch": 0.9630872483221476, |
| "grad_norm": 0.4473284184932709, |
| "learning_rate": 4.746563445813148e-06, |
| "loss": 0.6367, |
| "step": 861 |
| }, |
| { |
| "epoch": 0.9642058165548099, |
| "grad_norm": 0.4486042857170105, |
| "learning_rate": 4.745908472495628e-06, |
| "loss": 0.5917, |
| "step": 862 |
| }, |
| { |
| "epoch": 0.9653243847874721, |
| "grad_norm": 0.45661088824272156, |
| "learning_rate": 4.745252699233298e-06, |
| "loss": 0.61, |
| "step": 863 |
| }, |
| { |
| "epoch": 0.9664429530201343, |
| "grad_norm": 0.4235589802265167, |
| "learning_rate": 4.744596126259731e-06, |
| "loss": 0.5887, |
| "step": 864 |
| }, |
| { |
| "epoch": 0.9675615212527964, |
| "grad_norm": 0.45463138818740845, |
| "learning_rate": 4.743938753808785e-06, |
| "loss": 0.6295, |
| "step": 865 |
| }, |
| { |
| "epoch": 0.9686800894854586, |
| "grad_norm": 0.4576405882835388, |
| "learning_rate": 4.743280582114601e-06, |
| "loss": 0.6301, |
| "step": 866 |
| }, |
| { |
| "epoch": 0.9697986577181208, |
| "grad_norm": 0.4380008280277252, |
| "learning_rate": 4.742621611411606e-06, |
| "loss": 0.619, |
| "step": 867 |
| }, |
| { |
| "epoch": 0.970917225950783, |
| "grad_norm": 0.4485333263874054, |
| "learning_rate": 4.7419618419345124e-06, |
| "loss": 0.6311, |
| "step": 868 |
| }, |
| { |
| "epoch": 0.9720357941834452, |
| "grad_norm": 0.44375064969062805, |
| "learning_rate": 4.741301273918314e-06, |
| "loss": 0.6095, |
| "step": 869 |
| }, |
| { |
| "epoch": 0.9731543624161074, |
| "grad_norm": 0.4440104365348816, |
| "learning_rate": 4.740639907598293e-06, |
| "loss": 0.6173, |
| "step": 870 |
| }, |
| { |
| "epoch": 0.9742729306487695, |
| "grad_norm": 0.4545641243457794, |
| "learning_rate": 4.739977743210014e-06, |
| "loss": 0.6046, |
| "step": 871 |
| }, |
| { |
| "epoch": 0.9753914988814317, |
| "grad_norm": 0.41214534640312195, |
| "learning_rate": 4.739314780989324e-06, |
| "loss": 0.6072, |
| "step": 872 |
| }, |
| { |
| "epoch": 0.9765100671140939, |
| "grad_norm": 0.4223095178604126, |
| "learning_rate": 4.738651021172357e-06, |
| "loss": 0.5878, |
| "step": 873 |
| }, |
| { |
| "epoch": 0.9776286353467561, |
| "grad_norm": 0.42885127663612366, |
| "learning_rate": 4.7379864639955304e-06, |
| "loss": 0.577, |
| "step": 874 |
| }, |
| { |
| "epoch": 0.9787472035794184, |
| "grad_norm": 0.42921069264411926, |
| "learning_rate": 4.737321109695546e-06, |
| "loss": 0.5844, |
| "step": 875 |
| }, |
| { |
| "epoch": 0.9798657718120806, |
| "grad_norm": 0.43462637066841125, |
| "learning_rate": 4.736654958509387e-06, |
| "loss": 0.6135, |
| "step": 876 |
| }, |
| { |
| "epoch": 0.9809843400447428, |
| "grad_norm": 0.43558555841445923, |
| "learning_rate": 4.735988010674324e-06, |
| "loss": 0.6255, |
| "step": 877 |
| }, |
| { |
| "epoch": 0.9821029082774049, |
| "grad_norm": 0.44332823157310486, |
| "learning_rate": 4.735320266427909e-06, |
| "loss": 0.6266, |
| "step": 878 |
| }, |
| { |
| "epoch": 0.9832214765100671, |
| "grad_norm": 0.4158824682235718, |
| "learning_rate": 4.734651726007978e-06, |
| "loss": 0.585, |
| "step": 879 |
| }, |
| { |
| "epoch": 0.9843400447427293, |
| "grad_norm": 0.4264974296092987, |
| "learning_rate": 4.733982389652652e-06, |
| "loss": 0.5871, |
| "step": 880 |
| }, |
| { |
| "epoch": 0.9854586129753915, |
| "grad_norm": 0.44846397638320923, |
| "learning_rate": 4.733312257600332e-06, |
| "loss": 0.6441, |
| "step": 881 |
| }, |
| { |
| "epoch": 0.9865771812080537, |
| "grad_norm": 0.46592265367507935, |
| "learning_rate": 4.732641330089707e-06, |
| "loss": 0.6326, |
| "step": 882 |
| }, |
| { |
| "epoch": 0.9876957494407159, |
| "grad_norm": 0.423447847366333, |
| "learning_rate": 4.731969607359747e-06, |
| "loss": 0.5922, |
| "step": 883 |
| }, |
| { |
| "epoch": 0.9888143176733781, |
| "grad_norm": 0.43406492471694946, |
| "learning_rate": 4.731297089649704e-06, |
| "loss": 0.6234, |
| "step": 884 |
| }, |
| { |
| "epoch": 0.9899328859060402, |
| "grad_norm": 0.443352073431015, |
| "learning_rate": 4.730623777199115e-06, |
| "loss": 0.6397, |
| "step": 885 |
| }, |
| { |
| "epoch": 0.9910514541387024, |
| "grad_norm": 0.4311400353908539, |
| "learning_rate": 4.7299496702478e-06, |
| "loss": 0.6073, |
| "step": 886 |
| }, |
| { |
| "epoch": 0.9921700223713646, |
| "grad_norm": 0.4217356741428375, |
| "learning_rate": 4.729274769035861e-06, |
| "loss": 0.6177, |
| "step": 887 |
| }, |
| { |
| "epoch": 0.9932885906040269, |
| "grad_norm": 0.45120054483413696, |
| "learning_rate": 4.728599073803685e-06, |
| "loss": 0.6181, |
| "step": 888 |
| }, |
| { |
| "epoch": 0.9944071588366891, |
| "grad_norm": 0.4567187428474426, |
| "learning_rate": 4.7279225847919375e-06, |
| "loss": 0.5839, |
| "step": 889 |
| }, |
| { |
| "epoch": 0.9955257270693513, |
| "grad_norm": 0.4429774880409241, |
| "learning_rate": 4.727245302241572e-06, |
| "loss": 0.6033, |
| "step": 890 |
| }, |
| { |
| "epoch": 0.9966442953020134, |
| "grad_norm": 0.44755569100379944, |
| "learning_rate": 4.726567226393821e-06, |
| "loss": 0.5877, |
| "step": 891 |
| }, |
| { |
| "epoch": 0.9977628635346756, |
| "grad_norm": 0.45908474922180176, |
| "learning_rate": 4.725888357490201e-06, |
| "loss": 0.6017, |
| "step": 892 |
| }, |
| { |
| "epoch": 0.9988814317673378, |
| "grad_norm": 0.4454707205295563, |
| "learning_rate": 4.725208695772511e-06, |
| "loss": 0.6007, |
| "step": 893 |
| }, |
| { |
| "epoch": 1.0, |
| "grad_norm": 0.43688732385635376, |
| "learning_rate": 4.7245282414828305e-06, |
| "loss": 0.6202, |
| "step": 894 |
| } |
| ], |
| "logging_steps": 1, |
| "max_steps": 5364, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 6, |
| "save_steps": 894, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 2.0026878455835525e+18, |
| "train_batch_size": 4, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|