| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 0.9999100800287744, |
| "eval_steps": 500, |
| "global_step": 2780, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.0003596798849024368, |
| "grad_norm": 24.657894057885567, |
| "learning_rate": 2.3809523809523807e-08, |
| "loss": 1.9628, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.0007193597698048736, |
| "grad_norm": 30.667354093274785, |
| "learning_rate": 4.7619047619047613e-08, |
| "loss": 2.072, |
| "step": 2 |
| }, |
| { |
| "epoch": 0.0010790396547073105, |
| "grad_norm": 24.314787012232703, |
| "learning_rate": 7.142857142857142e-08, |
| "loss": 1.8467, |
| "step": 3 |
| }, |
| { |
| "epoch": 0.0014387195396097473, |
| "grad_norm": 24.64290123054978, |
| "learning_rate": 9.523809523809523e-08, |
| "loss": 1.6867, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.001798399424512184, |
| "grad_norm": 35.88136055532647, |
| "learning_rate": 1.1904761904761903e-07, |
| "loss": 1.806, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.002158079309414621, |
| "grad_norm": 27.677265995459226, |
| "learning_rate": 1.4285714285714285e-07, |
| "loss": 1.8982, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.0025177591943170577, |
| "grad_norm": 23.480436398186388, |
| "learning_rate": 1.6666666666666665e-07, |
| "loss": 1.9013, |
| "step": 7 |
| }, |
| { |
| "epoch": 0.0028774390792194945, |
| "grad_norm": 24.984895340397184, |
| "learning_rate": 1.9047619047619045e-07, |
| "loss": 1.841, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.0032371189641219314, |
| "grad_norm": 27.260529495430646, |
| "learning_rate": 2.1428571428571426e-07, |
| "loss": 1.6571, |
| "step": 9 |
| }, |
| { |
| "epoch": 0.003596798849024368, |
| "grad_norm": 27.54796671224951, |
| "learning_rate": 2.3809523809523806e-07, |
| "loss": 2.0824, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.003956478733926805, |
| "grad_norm": 33.69190420427175, |
| "learning_rate": 2.619047619047619e-07, |
| "loss": 1.8739, |
| "step": 11 |
| }, |
| { |
| "epoch": 0.004316158618829242, |
| "grad_norm": 30.568165478816997, |
| "learning_rate": 2.857142857142857e-07, |
| "loss": 1.7997, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.004675838503731679, |
| "grad_norm": 27.70479526484769, |
| "learning_rate": 3.095238095238095e-07, |
| "loss": 1.9171, |
| "step": 13 |
| }, |
| { |
| "epoch": 0.0050355183886341155, |
| "grad_norm": 24.018535717962276, |
| "learning_rate": 3.333333333333333e-07, |
| "loss": 1.8336, |
| "step": 14 |
| }, |
| { |
| "epoch": 0.005395198273536552, |
| "grad_norm": 26.97998601552695, |
| "learning_rate": 3.5714285714285716e-07, |
| "loss": 1.7935, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.005754878158438989, |
| "grad_norm": 24.331417458830614, |
| "learning_rate": 3.809523809523809e-07, |
| "loss": 1.9425, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.006114558043341426, |
| "grad_norm": 29.93694414395371, |
| "learning_rate": 4.0476190476190476e-07, |
| "loss": 2.0054, |
| "step": 17 |
| }, |
| { |
| "epoch": 0.006474237928243863, |
| "grad_norm": 24.869232469336673, |
| "learning_rate": 4.285714285714285e-07, |
| "loss": 1.8427, |
| "step": 18 |
| }, |
| { |
| "epoch": 0.0068339178131462995, |
| "grad_norm": 33.75206502968894, |
| "learning_rate": 4.5238095238095237e-07, |
| "loss": 1.8214, |
| "step": 19 |
| }, |
| { |
| "epoch": 0.007193597698048736, |
| "grad_norm": 55.90355991587864, |
| "learning_rate": 4.761904761904761e-07, |
| "loss": 1.917, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.007553277582951173, |
| "grad_norm": 41.74883319338933, |
| "learning_rate": 5e-07, |
| "loss": 1.6926, |
| "step": 21 |
| }, |
| { |
| "epoch": 0.00791295746785361, |
| "grad_norm": 25.198019181519257, |
| "learning_rate": 5.238095238095238e-07, |
| "loss": 1.8096, |
| "step": 22 |
| }, |
| { |
| "epoch": 0.008272637352756048, |
| "grad_norm": 30.935660001988825, |
| "learning_rate": 5.476190476190477e-07, |
| "loss": 1.8552, |
| "step": 23 |
| }, |
| { |
| "epoch": 0.008632317237658484, |
| "grad_norm": 27.76638468602567, |
| "learning_rate": 5.714285714285714e-07, |
| "loss": 1.8051, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.008991997122560921, |
| "grad_norm": 31.429674327113258, |
| "learning_rate": 5.952380952380952e-07, |
| "loss": 1.8426, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.009351677007463357, |
| "grad_norm": 25.38911642996415, |
| "learning_rate": 6.19047619047619e-07, |
| "loss": 1.8152, |
| "step": 26 |
| }, |
| { |
| "epoch": 0.009711356892365795, |
| "grad_norm": 26.199219756218067, |
| "learning_rate": 6.428571428571429e-07, |
| "loss": 1.915, |
| "step": 27 |
| }, |
| { |
| "epoch": 0.010071036777268231, |
| "grad_norm": 49.64436899684689, |
| "learning_rate": 6.666666666666666e-07, |
| "loss": 1.61, |
| "step": 28 |
| }, |
| { |
| "epoch": 0.010430716662170669, |
| "grad_norm": 25.55310912363886, |
| "learning_rate": 6.904761904761904e-07, |
| "loss": 1.9478, |
| "step": 29 |
| }, |
| { |
| "epoch": 0.010790396547073105, |
| "grad_norm": 29.322538544916103, |
| "learning_rate": 7.142857142857143e-07, |
| "loss": 1.9344, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.011150076431975542, |
| "grad_norm": 40.34553988667848, |
| "learning_rate": 7.380952380952381e-07, |
| "loss": 1.7039, |
| "step": 31 |
| }, |
| { |
| "epoch": 0.011509756316877978, |
| "grad_norm": 31.75709179665269, |
| "learning_rate": 7.619047619047618e-07, |
| "loss": 1.8525, |
| "step": 32 |
| }, |
| { |
| "epoch": 0.011869436201780416, |
| "grad_norm": 28.65644980651086, |
| "learning_rate": 7.857142857142856e-07, |
| "loss": 1.6667, |
| "step": 33 |
| }, |
| { |
| "epoch": 0.012229116086682852, |
| "grad_norm": 25.909345062352944, |
| "learning_rate": 8.095238095238095e-07, |
| "loss": 1.74, |
| "step": 34 |
| }, |
| { |
| "epoch": 0.01258879597158529, |
| "grad_norm": 26.409036934947846, |
| "learning_rate": 8.333333333333333e-07, |
| "loss": 1.9297, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.012948475856487725, |
| "grad_norm": 31.591347593753483, |
| "learning_rate": 8.57142857142857e-07, |
| "loss": 1.9569, |
| "step": 36 |
| }, |
| { |
| "epoch": 0.013308155741390163, |
| "grad_norm": 27.929599025076882, |
| "learning_rate": 8.809523809523809e-07, |
| "loss": 1.7864, |
| "step": 37 |
| }, |
| { |
| "epoch": 0.013667835626292599, |
| "grad_norm": 25.184971911331967, |
| "learning_rate": 9.047619047619047e-07, |
| "loss": 1.7775, |
| "step": 38 |
| }, |
| { |
| "epoch": 0.014027515511195037, |
| "grad_norm": 37.127863696545276, |
| "learning_rate": 9.285714285714285e-07, |
| "loss": 1.6813, |
| "step": 39 |
| }, |
| { |
| "epoch": 0.014387195396097473, |
| "grad_norm": 35.24940880196834, |
| "learning_rate": 9.523809523809522e-07, |
| "loss": 1.6952, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.01474687528099991, |
| "grad_norm": 37.49249332791816, |
| "learning_rate": 9.761904761904762e-07, |
| "loss": 1.7087, |
| "step": 41 |
| }, |
| { |
| "epoch": 0.015106555165902346, |
| "grad_norm": 26.412721137721803, |
| "learning_rate": 1e-06, |
| "loss": 1.6767, |
| "step": 42 |
| }, |
| { |
| "epoch": 0.015466235050804784, |
| "grad_norm": 25.768557436561615, |
| "learning_rate": 1.0238095238095238e-06, |
| "loss": 2.1104, |
| "step": 43 |
| }, |
| { |
| "epoch": 0.01582591493570722, |
| "grad_norm": 23.10779963872319, |
| "learning_rate": 1.0476190476190476e-06, |
| "loss": 1.7659, |
| "step": 44 |
| }, |
| { |
| "epoch": 0.016185594820609658, |
| "grad_norm": 26.556637934978006, |
| "learning_rate": 1.0714285714285714e-06, |
| "loss": 1.7681, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.016545274705512095, |
| "grad_norm": 28.212255142491433, |
| "learning_rate": 1.0952380952380954e-06, |
| "loss": 1.6662, |
| "step": 46 |
| }, |
| { |
| "epoch": 0.01690495459041453, |
| "grad_norm": 25.607079101859597, |
| "learning_rate": 1.119047619047619e-06, |
| "loss": 1.8303, |
| "step": 47 |
| }, |
| { |
| "epoch": 0.017264634475316967, |
| "grad_norm": 21.710874177286694, |
| "learning_rate": 1.1428571428571428e-06, |
| "loss": 1.6683, |
| "step": 48 |
| }, |
| { |
| "epoch": 0.017624314360219405, |
| "grad_norm": 39.50586583131758, |
| "learning_rate": 1.1666666666666668e-06, |
| "loss": 1.8498, |
| "step": 49 |
| }, |
| { |
| "epoch": 0.017983994245121843, |
| "grad_norm": 24.230732356415412, |
| "learning_rate": 1.1904761904761904e-06, |
| "loss": 1.6856, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.018343674130024277, |
| "grad_norm": 25.21450860021096, |
| "learning_rate": 1.2142857142857142e-06, |
| "loss": 1.7257, |
| "step": 51 |
| }, |
| { |
| "epoch": 0.018703354014926715, |
| "grad_norm": 21.76255172359649, |
| "learning_rate": 1.238095238095238e-06, |
| "loss": 1.7963, |
| "step": 52 |
| }, |
| { |
| "epoch": 0.019063033899829152, |
| "grad_norm": 24.898114689531777, |
| "learning_rate": 1.2619047619047618e-06, |
| "loss": 1.7276, |
| "step": 53 |
| }, |
| { |
| "epoch": 0.01942271378473159, |
| "grad_norm": 23.3232999678589, |
| "learning_rate": 1.2857142857142858e-06, |
| "loss": 1.7695, |
| "step": 54 |
| }, |
| { |
| "epoch": 0.019782393669634024, |
| "grad_norm": 20.509465613870823, |
| "learning_rate": 1.3095238095238094e-06, |
| "loss": 1.6424, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.020142073554536462, |
| "grad_norm": 20.398195215022653, |
| "learning_rate": 1.3333333333333332e-06, |
| "loss": 1.717, |
| "step": 56 |
| }, |
| { |
| "epoch": 0.0205017534394389, |
| "grad_norm": 31.327297532044103, |
| "learning_rate": 1.3571428571428572e-06, |
| "loss": 1.7493, |
| "step": 57 |
| }, |
| { |
| "epoch": 0.020861433324341337, |
| "grad_norm": 24.707276082229445, |
| "learning_rate": 1.3809523809523808e-06, |
| "loss": 1.5969, |
| "step": 58 |
| }, |
| { |
| "epoch": 0.02122111320924377, |
| "grad_norm": 28.030731682265625, |
| "learning_rate": 1.4047619047619046e-06, |
| "loss": 1.5655, |
| "step": 59 |
| }, |
| { |
| "epoch": 0.02158079309414621, |
| "grad_norm": 20.344002649738734, |
| "learning_rate": 1.4285714285714286e-06, |
| "loss": 1.6057, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.021940472979048647, |
| "grad_norm": 22.72060239339675, |
| "learning_rate": 1.4523809523809522e-06, |
| "loss": 1.5706, |
| "step": 61 |
| }, |
| { |
| "epoch": 0.022300152863951084, |
| "grad_norm": 17.85007758440868, |
| "learning_rate": 1.4761904761904762e-06, |
| "loss": 1.4835, |
| "step": 62 |
| }, |
| { |
| "epoch": 0.02265983274885352, |
| "grad_norm": 20.159874324160384, |
| "learning_rate": 1.5e-06, |
| "loss": 1.5554, |
| "step": 63 |
| }, |
| { |
| "epoch": 0.023019512633755956, |
| "grad_norm": 19.439259199449676, |
| "learning_rate": 1.5238095238095236e-06, |
| "loss": 1.6593, |
| "step": 64 |
| }, |
| { |
| "epoch": 0.023379192518658394, |
| "grad_norm": 34.04377995684072, |
| "learning_rate": 1.5476190476190476e-06, |
| "loss": 1.7608, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.02373887240356083, |
| "grad_norm": 77.75542556402162, |
| "learning_rate": 1.5714285714285712e-06, |
| "loss": 1.4184, |
| "step": 66 |
| }, |
| { |
| "epoch": 0.02409855228846327, |
| "grad_norm": 17.931373921754478, |
| "learning_rate": 1.5952380952380953e-06, |
| "loss": 1.4383, |
| "step": 67 |
| }, |
| { |
| "epoch": 0.024458232173365704, |
| "grad_norm": 47.325411387811286, |
| "learning_rate": 1.619047619047619e-06, |
| "loss": 1.607, |
| "step": 68 |
| }, |
| { |
| "epoch": 0.02481791205826814, |
| "grad_norm": 19.14892383129688, |
| "learning_rate": 1.6428571428571426e-06, |
| "loss": 1.672, |
| "step": 69 |
| }, |
| { |
| "epoch": 0.02517759194317058, |
| "grad_norm": 20.795136990227526, |
| "learning_rate": 1.6666666666666667e-06, |
| "loss": 1.5908, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.025537271828073017, |
| "grad_norm": 245.7142651154435, |
| "learning_rate": 1.6904761904761905e-06, |
| "loss": 1.5133, |
| "step": 71 |
| }, |
| { |
| "epoch": 0.02589695171297545, |
| "grad_norm": 18.120188391248455, |
| "learning_rate": 1.714285714285714e-06, |
| "loss": 1.4675, |
| "step": 72 |
| }, |
| { |
| "epoch": 0.02625663159787789, |
| "grad_norm": 37.54469099426714, |
| "learning_rate": 1.738095238095238e-06, |
| "loss": 1.476, |
| "step": 73 |
| }, |
| { |
| "epoch": 0.026616311482780326, |
| "grad_norm": 20.079174089707283, |
| "learning_rate": 1.7619047619047619e-06, |
| "loss": 1.6322, |
| "step": 74 |
| }, |
| { |
| "epoch": 0.026975991367682764, |
| "grad_norm": 26.452336796901825, |
| "learning_rate": 1.7857142857142857e-06, |
| "loss": 1.494, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.027335671252585198, |
| "grad_norm": 15.633645614281596, |
| "learning_rate": 1.8095238095238095e-06, |
| "loss": 1.4588, |
| "step": 76 |
| }, |
| { |
| "epoch": 0.027695351137487636, |
| "grad_norm": 16.04992319096084, |
| "learning_rate": 1.833333333333333e-06, |
| "loss": 1.3675, |
| "step": 77 |
| }, |
| { |
| "epoch": 0.028055031022390074, |
| "grad_norm": 14.054679148021458, |
| "learning_rate": 1.857142857142857e-06, |
| "loss": 1.4436, |
| "step": 78 |
| }, |
| { |
| "epoch": 0.02841471090729251, |
| "grad_norm": 34.68375069175576, |
| "learning_rate": 1.8809523809523809e-06, |
| "loss": 1.4779, |
| "step": 79 |
| }, |
| { |
| "epoch": 0.028774390792194945, |
| "grad_norm": 17.221014096550647, |
| "learning_rate": 1.9047619047619045e-06, |
| "loss": 1.4736, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.029134070677097383, |
| "grad_norm": 25.417838482669037, |
| "learning_rate": 1.9285714285714285e-06, |
| "loss": 1.4455, |
| "step": 81 |
| }, |
| { |
| "epoch": 0.02949375056199982, |
| "grad_norm": 22.554088669119682, |
| "learning_rate": 1.9523809523809523e-06, |
| "loss": 1.3496, |
| "step": 82 |
| }, |
| { |
| "epoch": 0.02985343044690226, |
| "grad_norm": 13.2745362959651, |
| "learning_rate": 1.976190476190476e-06, |
| "loss": 1.4045, |
| "step": 83 |
| }, |
| { |
| "epoch": 0.030213110331804693, |
| "grad_norm": 18.116068926216787, |
| "learning_rate": 2e-06, |
| "loss": 1.4533, |
| "step": 84 |
| }, |
| { |
| "epoch": 0.03057279021670713, |
| "grad_norm": 11.651678918563489, |
| "learning_rate": 1.9999993210623e-06, |
| "loss": 1.3389, |
| "step": 85 |
| }, |
| { |
| "epoch": 0.030932470101609568, |
| "grad_norm": 18.39855056566554, |
| "learning_rate": 1.9999972842501214e-06, |
| "loss": 1.486, |
| "step": 86 |
| }, |
| { |
| "epoch": 0.031292149986512, |
| "grad_norm": 43.28759772676706, |
| "learning_rate": 1.9999938895662306e-06, |
| "loss": 1.4033, |
| "step": 87 |
| }, |
| { |
| "epoch": 0.03165182987141444, |
| "grad_norm": 20.562126556479306, |
| "learning_rate": 1.999989137015237e-06, |
| "loss": 1.3676, |
| "step": 88 |
| }, |
| { |
| "epoch": 0.03201150975631688, |
| "grad_norm": 17.960377843247592, |
| "learning_rate": 1.999983026603594e-06, |
| "loss": 1.4498, |
| "step": 89 |
| }, |
| { |
| "epoch": 0.032371189641219315, |
| "grad_norm": 62.91438352499693, |
| "learning_rate": 1.9999755583395984e-06, |
| "loss": 1.5197, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.03273086952612175, |
| "grad_norm": 15.735368933888223, |
| "learning_rate": 1.9999667322333914e-06, |
| "loss": 1.3134, |
| "step": 91 |
| }, |
| { |
| "epoch": 0.03309054941102419, |
| "grad_norm": 25.610197392574353, |
| "learning_rate": 1.9999565482969577e-06, |
| "loss": 1.3616, |
| "step": 92 |
| }, |
| { |
| "epoch": 0.03345022929592663, |
| "grad_norm": 16.073082498652944, |
| "learning_rate": 1.999945006544126e-06, |
| "loss": 1.3045, |
| "step": 93 |
| }, |
| { |
| "epoch": 0.03380990918082906, |
| "grad_norm": 41.00521294340134, |
| "learning_rate": 1.9999321069905685e-06, |
| "loss": 1.4665, |
| "step": 94 |
| }, |
| { |
| "epoch": 0.0341695890657315, |
| "grad_norm": 22.559526486442397, |
| "learning_rate": 1.999917849653801e-06, |
| "loss": 1.4456, |
| "step": 95 |
| }, |
| { |
| "epoch": 0.034529268950633935, |
| "grad_norm": 19.801315354513516, |
| "learning_rate": 1.999902234553183e-06, |
| "loss": 1.354, |
| "step": 96 |
| }, |
| { |
| "epoch": 0.03488894883553637, |
| "grad_norm": 14.632262747530882, |
| "learning_rate": 1.9998852617099185e-06, |
| "loss": 1.2639, |
| "step": 97 |
| }, |
| { |
| "epoch": 0.03524862872043881, |
| "grad_norm": 15.246448969933628, |
| "learning_rate": 1.9998669311470544e-06, |
| "loss": 1.2934, |
| "step": 98 |
| }, |
| { |
| "epoch": 0.03560830860534125, |
| "grad_norm": 21.648660776413315, |
| "learning_rate": 1.9998472428894807e-06, |
| "loss": 1.2827, |
| "step": 99 |
| }, |
| { |
| "epoch": 0.035967988490243685, |
| "grad_norm": 44.669300603767674, |
| "learning_rate": 1.9998261969639324e-06, |
| "loss": 1.3849, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.03632766837514612, |
| "grad_norm": 13.223212712604015, |
| "learning_rate": 1.9998037933989864e-06, |
| "loss": 1.3349, |
| "step": 101 |
| }, |
| { |
| "epoch": 0.036687348260048554, |
| "grad_norm": 26.408214197921858, |
| "learning_rate": 1.9997800322250646e-06, |
| "loss": 1.3088, |
| "step": 102 |
| }, |
| { |
| "epoch": 0.03704702814495099, |
| "grad_norm": 39.847526841472714, |
| "learning_rate": 1.9997549134744313e-06, |
| "loss": 1.2439, |
| "step": 103 |
| }, |
| { |
| "epoch": 0.03740670802985343, |
| "grad_norm": 19.032937778796384, |
| "learning_rate": 1.9997284371811954e-06, |
| "loss": 1.3567, |
| "step": 104 |
| }, |
| { |
| "epoch": 0.03776638791475587, |
| "grad_norm": 16.573596403359943, |
| "learning_rate": 1.9997006033813074e-06, |
| "loss": 1.3914, |
| "step": 105 |
| }, |
| { |
| "epoch": 0.038126067799658304, |
| "grad_norm": 15.605607152976063, |
| "learning_rate": 1.9996714121125624e-06, |
| "loss": 1.3379, |
| "step": 106 |
| }, |
| { |
| "epoch": 0.03848574768456074, |
| "grad_norm": 25.202668710987126, |
| "learning_rate": 1.9996408634145993e-06, |
| "loss": 1.2859, |
| "step": 107 |
| }, |
| { |
| "epoch": 0.03884542756946318, |
| "grad_norm": 17.576552133251525, |
| "learning_rate": 1.9996089573288983e-06, |
| "loss": 1.3148, |
| "step": 108 |
| }, |
| { |
| "epoch": 0.03920510745436562, |
| "grad_norm": 64.99202894639889, |
| "learning_rate": 1.9995756938987842e-06, |
| "loss": 1.2813, |
| "step": 109 |
| }, |
| { |
| "epoch": 0.03956478733926805, |
| "grad_norm": 15.179557087516176, |
| "learning_rate": 1.9995410731694255e-06, |
| "loss": 1.3602, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.039924467224170486, |
| "grad_norm": 42.7405319283968, |
| "learning_rate": 1.9995050951878317e-06, |
| "loss": 1.2673, |
| "step": 111 |
| }, |
| { |
| "epoch": 0.040284147109072924, |
| "grad_norm": 12.367805261335855, |
| "learning_rate": 1.9994677600028566e-06, |
| "loss": 1.2554, |
| "step": 112 |
| }, |
| { |
| "epoch": 0.04064382699397536, |
| "grad_norm": 91.0547066038717, |
| "learning_rate": 1.9994290676651974e-06, |
| "loss": 1.3692, |
| "step": 113 |
| }, |
| { |
| "epoch": 0.0410035068788778, |
| "grad_norm": 16.55349276096106, |
| "learning_rate": 1.9993890182273932e-06, |
| "loss": 1.2412, |
| "step": 114 |
| }, |
| { |
| "epoch": 0.04136318676378024, |
| "grad_norm": 17.297600766623404, |
| "learning_rate": 1.9993476117438255e-06, |
| "loss": 1.2596, |
| "step": 115 |
| }, |
| { |
| "epoch": 0.041722866648682674, |
| "grad_norm": 16.00366028375049, |
| "learning_rate": 1.9993048482707196e-06, |
| "loss": 1.2698, |
| "step": 116 |
| }, |
| { |
| "epoch": 0.04208254653358511, |
| "grad_norm": 15.196236021120892, |
| "learning_rate": 1.9992607278661437e-06, |
| "loss": 1.1785, |
| "step": 117 |
| }, |
| { |
| "epoch": 0.04244222641848754, |
| "grad_norm": 22.149541341272936, |
| "learning_rate": 1.999215250590006e-06, |
| "loss": 1.3311, |
| "step": 118 |
| }, |
| { |
| "epoch": 0.04280190630338998, |
| "grad_norm": 21.977962462219512, |
| "learning_rate": 1.9991684165040613e-06, |
| "loss": 1.2641, |
| "step": 119 |
| }, |
| { |
| "epoch": 0.04316158618829242, |
| "grad_norm": 14.111609395101187, |
| "learning_rate": 1.999120225671903e-06, |
| "loss": 1.1405, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.043521266073194856, |
| "grad_norm": 20.62292555056753, |
| "learning_rate": 1.999070678158968e-06, |
| "loss": 1.3071, |
| "step": 121 |
| }, |
| { |
| "epoch": 0.043880945958097294, |
| "grad_norm": 23.38451805462871, |
| "learning_rate": 1.9990197740325364e-06, |
| "loss": 1.108, |
| "step": 122 |
| }, |
| { |
| "epoch": 0.04424062584299973, |
| "grad_norm": 13.558092001639237, |
| "learning_rate": 1.9989675133617293e-06, |
| "loss": 1.2362, |
| "step": 123 |
| }, |
| { |
| "epoch": 0.04460030572790217, |
| "grad_norm": 14.905071611078164, |
| "learning_rate": 1.9989138962175105e-06, |
| "loss": 1.1732, |
| "step": 124 |
| }, |
| { |
| "epoch": 0.04495998561280461, |
| "grad_norm": 39.48608516481057, |
| "learning_rate": 1.9988589226726847e-06, |
| "loss": 1.2309, |
| "step": 125 |
| }, |
| { |
| "epoch": 0.04531966549770704, |
| "grad_norm": 12.779123007395382, |
| "learning_rate": 1.9988025928019e-06, |
| "loss": 1.1954, |
| "step": 126 |
| }, |
| { |
| "epoch": 0.045679345382609475, |
| "grad_norm": 17.35216140700479, |
| "learning_rate": 1.9987449066816448e-06, |
| "loss": 1.1587, |
| "step": 127 |
| }, |
| { |
| "epoch": 0.04603902526751191, |
| "grad_norm": 18.05552910414134, |
| "learning_rate": 1.99868586439025e-06, |
| "loss": 1.1195, |
| "step": 128 |
| }, |
| { |
| "epoch": 0.04639870515241435, |
| "grad_norm": 25.563244537484564, |
| "learning_rate": 1.9986254660078872e-06, |
| "loss": 1.0623, |
| "step": 129 |
| }, |
| { |
| "epoch": 0.04675838503731679, |
| "grad_norm": 11.114112784403885, |
| "learning_rate": 1.9985637116165705e-06, |
| "loss": 1.1891, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.047118064922219226, |
| "grad_norm": 11.694158316033255, |
| "learning_rate": 1.998500601300154e-06, |
| "loss": 1.2493, |
| "step": 131 |
| }, |
| { |
| "epoch": 0.04747774480712166, |
| "grad_norm": 18.678734986125306, |
| "learning_rate": 1.998436135144334e-06, |
| "loss": 1.1552, |
| "step": 132 |
| }, |
| { |
| "epoch": 0.0478374246920241, |
| "grad_norm": 11.975265356576603, |
| "learning_rate": 1.998370313236648e-06, |
| "loss": 1.1419, |
| "step": 133 |
| }, |
| { |
| "epoch": 0.04819710457692654, |
| "grad_norm": 11.566190390478454, |
| "learning_rate": 1.998303135666473e-06, |
| "loss": 1.1454, |
| "step": 134 |
| }, |
| { |
| "epoch": 0.04855678446182897, |
| "grad_norm": 9.162200598633703, |
| "learning_rate": 1.9982346025250284e-06, |
| "loss": 1.1242, |
| "step": 135 |
| }, |
| { |
| "epoch": 0.04891646434673141, |
| "grad_norm": 11.9348502171034, |
| "learning_rate": 1.9981647139053736e-06, |
| "loss": 1.12, |
| "step": 136 |
| }, |
| { |
| "epoch": 0.049276144231633845, |
| "grad_norm": 22.085669626967896, |
| "learning_rate": 1.998093469902408e-06, |
| "loss": 1.0606, |
| "step": 137 |
| }, |
| { |
| "epoch": 0.04963582411653628, |
| "grad_norm": 16.735251001069905, |
| "learning_rate": 1.998020870612873e-06, |
| "loss": 1.0911, |
| "step": 138 |
| }, |
| { |
| "epoch": 0.04999550400143872, |
| "grad_norm": 9.426173644069113, |
| "learning_rate": 1.997946916135349e-06, |
| "loss": 1.112, |
| "step": 139 |
| }, |
| { |
| "epoch": 0.05035518388634116, |
| "grad_norm": 20.191952895136268, |
| "learning_rate": 1.9978716065702566e-06, |
| "loss": 1.1656, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.050714863771243596, |
| "grad_norm": 13.677037264490226, |
| "learning_rate": 1.9977949420198572e-06, |
| "loss": 1.0637, |
| "step": 141 |
| }, |
| { |
| "epoch": 0.05107454365614603, |
| "grad_norm": 12.033881085585996, |
| "learning_rate": 1.997716922588252e-06, |
| "loss": 1.1782, |
| "step": 142 |
| }, |
| { |
| "epoch": 0.051434223541048464, |
| "grad_norm": 14.640616921424659, |
| "learning_rate": 1.9976375483813812e-06, |
| "loss": 1.0661, |
| "step": 143 |
| }, |
| { |
| "epoch": 0.0517939034259509, |
| "grad_norm": 17.648574437085983, |
| "learning_rate": 1.9975568195070253e-06, |
| "loss": 1.101, |
| "step": 144 |
| }, |
| { |
| "epoch": 0.05215358331085334, |
| "grad_norm": 12.44101732999933, |
| "learning_rate": 1.997474736074804e-06, |
| "loss": 1.0811, |
| "step": 145 |
| }, |
| { |
| "epoch": 0.05251326319575578, |
| "grad_norm": 17.633040231320773, |
| "learning_rate": 1.997391298196176e-06, |
| "loss": 1.024, |
| "step": 146 |
| }, |
| { |
| "epoch": 0.052872943080658215, |
| "grad_norm": 10.134440849280935, |
| "learning_rate": 1.99730650598444e-06, |
| "loss": 1.0928, |
| "step": 147 |
| }, |
| { |
| "epoch": 0.05323262296556065, |
| "grad_norm": 77.64793512021097, |
| "learning_rate": 1.9972203595547333e-06, |
| "loss": 1.0911, |
| "step": 148 |
| }, |
| { |
| "epoch": 0.05359230285046309, |
| "grad_norm": 19.966471706357808, |
| "learning_rate": 1.997132859024032e-06, |
| "loss": 1.0408, |
| "step": 149 |
| }, |
| { |
| "epoch": 0.05395198273536553, |
| "grad_norm": 11.447878411704954, |
| "learning_rate": 1.99704400451115e-06, |
| "loss": 1.0656, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.05431166262026796, |
| "grad_norm": 41.450180842662306, |
| "learning_rate": 1.9969537961367422e-06, |
| "loss": 1.1232, |
| "step": 151 |
| }, |
| { |
| "epoch": 0.054671342505170396, |
| "grad_norm": 26.578399242870077, |
| "learning_rate": 1.9968622340232992e-06, |
| "loss": 1.0539, |
| "step": 152 |
| }, |
| { |
| "epoch": 0.055031022390072834, |
| "grad_norm": 9.67652417545913, |
| "learning_rate": 1.9967693182951516e-06, |
| "loss": 1.0482, |
| "step": 153 |
| }, |
| { |
| "epoch": 0.05539070227497527, |
| "grad_norm": 10.301154397842641, |
| "learning_rate": 1.996675049078467e-06, |
| "loss": 1.0324, |
| "step": 154 |
| }, |
| { |
| "epoch": 0.05575038215987771, |
| "grad_norm": 10.804979711551514, |
| "learning_rate": 1.9965794265012514e-06, |
| "loss": 1.0874, |
| "step": 155 |
| }, |
| { |
| "epoch": 0.05611006204478015, |
| "grad_norm": 29.866331694883616, |
| "learning_rate": 1.9964824506933476e-06, |
| "loss": 0.9488, |
| "step": 156 |
| }, |
| { |
| "epoch": 0.056469741929682585, |
| "grad_norm": 16.536655370052248, |
| "learning_rate": 1.9963841217864383e-06, |
| "loss": 1.1381, |
| "step": 157 |
| }, |
| { |
| "epoch": 0.05682942181458502, |
| "grad_norm": 8.454711341009409, |
| "learning_rate": 1.9962844399140403e-06, |
| "loss": 1.0745, |
| "step": 158 |
| }, |
| { |
| "epoch": 0.05718910169948745, |
| "grad_norm": 10.592894543320343, |
| "learning_rate": 1.99618340521151e-06, |
| "loss": 1.1185, |
| "step": 159 |
| }, |
| { |
| "epoch": 0.05754878158438989, |
| "grad_norm": 15.744873745417562, |
| "learning_rate": 1.99608101781604e-06, |
| "loss": 1.0278, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.05790846146929233, |
| "grad_norm": 35.28796719476405, |
| "learning_rate": 1.995977277866659e-06, |
| "loss": 1.0456, |
| "step": 161 |
| }, |
| { |
| "epoch": 0.058268141354194766, |
| "grad_norm": 8.937312674896397, |
| "learning_rate": 1.9958721855042337e-06, |
| "loss": 1.0146, |
| "step": 162 |
| }, |
| { |
| "epoch": 0.058627821239097204, |
| "grad_norm": 15.188344915353202, |
| "learning_rate": 1.9957657408714654e-06, |
| "loss": 1.0028, |
| "step": 163 |
| }, |
| { |
| "epoch": 0.05898750112399964, |
| "grad_norm": 21.617655880303055, |
| "learning_rate": 1.995657944112894e-06, |
| "loss": 1.0596, |
| "step": 164 |
| }, |
| { |
| "epoch": 0.05934718100890208, |
| "grad_norm": 24.033180560114804, |
| "learning_rate": 1.995548795374893e-06, |
| "loss": 0.9765, |
| "step": 165 |
| }, |
| { |
| "epoch": 0.05970686089380452, |
| "grad_norm": 18.980234992969166, |
| "learning_rate": 1.9954382948056734e-06, |
| "loss": 1.0193, |
| "step": 166 |
| }, |
| { |
| "epoch": 0.06006654077870695, |
| "grad_norm": 10.685604514771951, |
| "learning_rate": 1.9953264425552803e-06, |
| "loss": 1.0075, |
| "step": 167 |
| }, |
| { |
| "epoch": 0.060426220663609385, |
| "grad_norm": 20.39061216772055, |
| "learning_rate": 1.9952132387755962e-06, |
| "loss": 1.1359, |
| "step": 168 |
| }, |
| { |
| "epoch": 0.06078590054851182, |
| "grad_norm": 94.23235824636771, |
| "learning_rate": 1.995098683620337e-06, |
| "loss": 0.9968, |
| "step": 169 |
| }, |
| { |
| "epoch": 0.06114558043341426, |
| "grad_norm": 20.38255400065285, |
| "learning_rate": 1.994982777245055e-06, |
| "loss": 1.0343, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.0615052603183167, |
| "grad_norm": 29.676079933187594, |
| "learning_rate": 1.994865519807136e-06, |
| "loss": 1.0832, |
| "step": 171 |
| }, |
| { |
| "epoch": 0.061864940203219136, |
| "grad_norm": 12.901534441904019, |
| "learning_rate": 1.9947469114658014e-06, |
| "loss": 1.0846, |
| "step": 172 |
| }, |
| { |
| "epoch": 0.062224620088121574, |
| "grad_norm": 10.486038081142027, |
| "learning_rate": 1.9946269523821066e-06, |
| "loss": 1.0267, |
| "step": 173 |
| }, |
| { |
| "epoch": 0.062584299973024, |
| "grad_norm": 22.73799500944774, |
| "learning_rate": 1.9945056427189404e-06, |
| "loss": 0.9535, |
| "step": 174 |
| }, |
| { |
| "epoch": 0.06294397985792645, |
| "grad_norm": 16.85081562222787, |
| "learning_rate": 1.994382982641027e-06, |
| "loss": 0.9932, |
| "step": 175 |
| }, |
| { |
| "epoch": 0.06330365974282888, |
| "grad_norm": 16.251880100755784, |
| "learning_rate": 1.994258972314923e-06, |
| "loss": 1.0392, |
| "step": 176 |
| }, |
| { |
| "epoch": 0.06366333962773132, |
| "grad_norm": 32.41304280526982, |
| "learning_rate": 1.994133611909019e-06, |
| "loss": 1.0575, |
| "step": 177 |
| }, |
| { |
| "epoch": 0.06402301951263376, |
| "grad_norm": 14.947138189495567, |
| "learning_rate": 1.994006901593539e-06, |
| "loss": 0.9463, |
| "step": 178 |
| }, |
| { |
| "epoch": 0.06438269939753619, |
| "grad_norm": 10.534537683564043, |
| "learning_rate": 1.99387884154054e-06, |
| "loss": 1.0129, |
| "step": 179 |
| }, |
| { |
| "epoch": 0.06474237928243863, |
| "grad_norm": 18.770367176923315, |
| "learning_rate": 1.993749431923911e-06, |
| "loss": 1.0142, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.06510205916734106, |
| "grad_norm": 11.869627533386478, |
| "learning_rate": 1.993618672919375e-06, |
| "loss": 1.0921, |
| "step": 181 |
| }, |
| { |
| "epoch": 0.0654617390522435, |
| "grad_norm": 12.213531444916233, |
| "learning_rate": 1.993486564704486e-06, |
| "loss": 0.9263, |
| "step": 182 |
| }, |
| { |
| "epoch": 0.06582141893714594, |
| "grad_norm": 8.304800269697004, |
| "learning_rate": 1.9933531074586295e-06, |
| "loss": 1.0208, |
| "step": 183 |
| }, |
| { |
| "epoch": 0.06618109882204838, |
| "grad_norm": 11.210120271679068, |
| "learning_rate": 1.9932183013630252e-06, |
| "loss": 0.949, |
| "step": 184 |
| }, |
| { |
| "epoch": 0.06654077870695081, |
| "grad_norm": 10.674834342779953, |
| "learning_rate": 1.993082146600723e-06, |
| "loss": 0.9653, |
| "step": 185 |
| }, |
| { |
| "epoch": 0.06690045859185326, |
| "grad_norm": 9.157668727064559, |
| "learning_rate": 1.9929446433566033e-06, |
| "loss": 0.9695, |
| "step": 186 |
| }, |
| { |
| "epoch": 0.06726013847675569, |
| "grad_norm": 35.81790736149004, |
| "learning_rate": 1.9928057918173785e-06, |
| "loss": 1.0061, |
| "step": 187 |
| }, |
| { |
| "epoch": 0.06761981836165812, |
| "grad_norm": 14.232279118378148, |
| "learning_rate": 1.992665592171592e-06, |
| "loss": 1.0155, |
| "step": 188 |
| }, |
| { |
| "epoch": 0.06797949824656056, |
| "grad_norm": 15.793796762381849, |
| "learning_rate": 1.9925240446096176e-06, |
| "loss": 1.0623, |
| "step": 189 |
| }, |
| { |
| "epoch": 0.068339178131463, |
| "grad_norm": 39.43576277183396, |
| "learning_rate": 1.992381149323659e-06, |
| "loss": 1.0001, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.06869885801636544, |
| "grad_norm": 31.001449639725706, |
| "learning_rate": 1.9922369065077497e-06, |
| "loss": 1.0187, |
| "step": 191 |
| }, |
| { |
| "epoch": 0.06905853790126787, |
| "grad_norm": 8.228870313870372, |
| "learning_rate": 1.992091316357754e-06, |
| "loss": 0.9295, |
| "step": 192 |
| }, |
| { |
| "epoch": 0.06941821778617031, |
| "grad_norm": 29.05814131108659, |
| "learning_rate": 1.9919443790713656e-06, |
| "loss": 0.9984, |
| "step": 193 |
| }, |
| { |
| "epoch": 0.06977789767107274, |
| "grad_norm": 110.22787600422478, |
| "learning_rate": 1.991796094848106e-06, |
| "loss": 0.9936, |
| "step": 194 |
| }, |
| { |
| "epoch": 0.07013757755597518, |
| "grad_norm": 12.618535381583916, |
| "learning_rate": 1.9916464638893276e-06, |
| "loss": 0.9948, |
| "step": 195 |
| }, |
| { |
| "epoch": 0.07049725744087762, |
| "grad_norm": 16.072952603434494, |
| "learning_rate": 1.9914954863982104e-06, |
| "loss": 0.9892, |
| "step": 196 |
| }, |
| { |
| "epoch": 0.07085693732578005, |
| "grad_norm": 96.02536139809511, |
| "learning_rate": 1.991343162579763e-06, |
| "loss": 0.9598, |
| "step": 197 |
| }, |
| { |
| "epoch": 0.0712166172106825, |
| "grad_norm": 38.460090688210364, |
| "learning_rate": 1.9911894926408216e-06, |
| "loss": 1.0457, |
| "step": 198 |
| }, |
| { |
| "epoch": 0.07157629709558493, |
| "grad_norm": 8.085643587932019, |
| "learning_rate": 1.9910344767900516e-06, |
| "loss": 0.9666, |
| "step": 199 |
| }, |
| { |
| "epoch": 0.07193597698048737, |
| "grad_norm": 9.409402571435368, |
| "learning_rate": 1.990878115237945e-06, |
| "loss": 1.0249, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.0722956568653898, |
| "grad_norm": 7.734707494269557, |
| "learning_rate": 1.9907204081968206e-06, |
| "loss": 0.9392, |
| "step": 201 |
| }, |
| { |
| "epoch": 0.07265533675029225, |
| "grad_norm": 26.66088795484755, |
| "learning_rate": 1.990561355880826e-06, |
| "loss": 1.0388, |
| "step": 202 |
| }, |
| { |
| "epoch": 0.07301501663519468, |
| "grad_norm": 10.493587163710068, |
| "learning_rate": 1.990400958505934e-06, |
| "loss": 1.0221, |
| "step": 203 |
| }, |
| { |
| "epoch": 0.07337469652009711, |
| "grad_norm": 12.851538063052784, |
| "learning_rate": 1.9902392162899436e-06, |
| "loss": 1.0027, |
| "step": 204 |
| }, |
| { |
| "epoch": 0.07373437640499955, |
| "grad_norm": 16.34581335012308, |
| "learning_rate": 1.9900761294524815e-06, |
| "loss": 0.8526, |
| "step": 205 |
| }, |
| { |
| "epoch": 0.07409405628990198, |
| "grad_norm": 15.465585604721932, |
| "learning_rate": 1.989911698214999e-06, |
| "loss": 1.0492, |
| "step": 206 |
| }, |
| { |
| "epoch": 0.07445373617480443, |
| "grad_norm": 13.85049814171573, |
| "learning_rate": 1.9897459228007732e-06, |
| "loss": 0.9283, |
| "step": 207 |
| }, |
| { |
| "epoch": 0.07481341605970686, |
| "grad_norm": 12.445607790320484, |
| "learning_rate": 1.989578803434907e-06, |
| "loss": 0.9975, |
| "step": 208 |
| }, |
| { |
| "epoch": 0.0751730959446093, |
| "grad_norm": 8.566268373195546, |
| "learning_rate": 1.9894103403443263e-06, |
| "loss": 0.9703, |
| "step": 209 |
| }, |
| { |
| "epoch": 0.07553277582951173, |
| "grad_norm": 10.895634757757644, |
| "learning_rate": 1.9892405337577844e-06, |
| "loss": 1.0748, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.07589245571441418, |
| "grad_norm": 10.356787874855785, |
| "learning_rate": 1.989069383905856e-06, |
| "loss": 0.9088, |
| "step": 211 |
| }, |
| { |
| "epoch": 0.07625213559931661, |
| "grad_norm": 11.44872572247474, |
| "learning_rate": 1.9888968910209433e-06, |
| "loss": 0.9444, |
| "step": 212 |
| }, |
| { |
| "epoch": 0.07661181548421904, |
| "grad_norm": 29.747404916680768, |
| "learning_rate": 1.988723055337268e-06, |
| "loss": 0.9275, |
| "step": 213 |
| }, |
| { |
| "epoch": 0.07697149536912148, |
| "grad_norm": 8.787407613024634, |
| "learning_rate": 1.988547877090879e-06, |
| "loss": 0.9263, |
| "step": 214 |
| }, |
| { |
| "epoch": 0.07733117525402392, |
| "grad_norm": 13.958900773060638, |
| "learning_rate": 1.988371356519646e-06, |
| "loss": 0.9726, |
| "step": 215 |
| }, |
| { |
| "epoch": 0.07769085513892636, |
| "grad_norm": 24.499239616110906, |
| "learning_rate": 1.988193493863261e-06, |
| "loss": 0.9279, |
| "step": 216 |
| }, |
| { |
| "epoch": 0.07805053502382879, |
| "grad_norm": 15.629383308803032, |
| "learning_rate": 1.988014289363241e-06, |
| "loss": 0.99, |
| "step": 217 |
| }, |
| { |
| "epoch": 0.07841021490873124, |
| "grad_norm": 14.741040148864387, |
| "learning_rate": 1.987833743262922e-06, |
| "loss": 0.9291, |
| "step": 218 |
| }, |
| { |
| "epoch": 0.07876989479363367, |
| "grad_norm": 10.400173804859792, |
| "learning_rate": 1.9876518558074634e-06, |
| "loss": 0.9717, |
| "step": 219 |
| }, |
| { |
| "epoch": 0.0791295746785361, |
| "grad_norm": 219.94552736960952, |
| "learning_rate": 1.9874686272438462e-06, |
| "loss": 0.9617, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.07948925456343854, |
| "grad_norm": 16.654361262872754, |
| "learning_rate": 1.987284057820872e-06, |
| "loss": 0.954, |
| "step": 221 |
| }, |
| { |
| "epoch": 0.07984893444834097, |
| "grad_norm": 19.913358977779705, |
| "learning_rate": 1.9870981477891625e-06, |
| "loss": 0.9919, |
| "step": 222 |
| }, |
| { |
| "epoch": 0.08020861433324342, |
| "grad_norm": 26.51927334904652, |
| "learning_rate": 1.9869108974011603e-06, |
| "loss": 0.9587, |
| "step": 223 |
| }, |
| { |
| "epoch": 0.08056829421814585, |
| "grad_norm": 7.743524165285064, |
| "learning_rate": 1.9867223069111286e-06, |
| "loss": 0.9628, |
| "step": 224 |
| }, |
| { |
| "epoch": 0.08092797410304829, |
| "grad_norm": 44.7631620880274, |
| "learning_rate": 1.98653237657515e-06, |
| "loss": 0.9877, |
| "step": 225 |
| }, |
| { |
| "epoch": 0.08128765398795072, |
| "grad_norm": 15.42485811260832, |
| "learning_rate": 1.9863411066511254e-06, |
| "loss": 1.0684, |
| "step": 226 |
| }, |
| { |
| "epoch": 0.08164733387285317, |
| "grad_norm": 8.709227213717968, |
| "learning_rate": 1.986148497398776e-06, |
| "loss": 0.9141, |
| "step": 227 |
| }, |
| { |
| "epoch": 0.0820070137577556, |
| "grad_norm": 32.36945680273078, |
| "learning_rate": 1.985954549079641e-06, |
| "loss": 0.9186, |
| "step": 228 |
| }, |
| { |
| "epoch": 0.08236669364265803, |
| "grad_norm": 11.980831305408493, |
| "learning_rate": 1.9857592619570782e-06, |
| "loss": 0.9199, |
| "step": 229 |
| }, |
| { |
| "epoch": 0.08272637352756047, |
| "grad_norm": 11.952525858137294, |
| "learning_rate": 1.9855626362962634e-06, |
| "loss": 0.9792, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.0830860534124629, |
| "grad_norm": 11.27300140826299, |
| "learning_rate": 1.9853646723641893e-06, |
| "loss": 0.9684, |
| "step": 231 |
| }, |
| { |
| "epoch": 0.08344573329736535, |
| "grad_norm": 13.15367279550054, |
| "learning_rate": 1.9851653704296664e-06, |
| "loss": 0.9036, |
| "step": 232 |
| }, |
| { |
| "epoch": 0.08380541318226778, |
| "grad_norm": 11.20570851960056, |
| "learning_rate": 1.9849647307633218e-06, |
| "loss": 0.8904, |
| "step": 233 |
| }, |
| { |
| "epoch": 0.08416509306717022, |
| "grad_norm": 36.5714456594361, |
| "learning_rate": 1.9847627536375997e-06, |
| "loss": 0.9049, |
| "step": 234 |
| }, |
| { |
| "epoch": 0.08452477295207265, |
| "grad_norm": 14.09924306368202, |
| "learning_rate": 1.9845594393267594e-06, |
| "loss": 0.9414, |
| "step": 235 |
| }, |
| { |
| "epoch": 0.08488445283697509, |
| "grad_norm": 9.18220985691734, |
| "learning_rate": 1.984354788106876e-06, |
| "loss": 0.9278, |
| "step": 236 |
| }, |
| { |
| "epoch": 0.08524413272187753, |
| "grad_norm": 10.030956245273718, |
| "learning_rate": 1.9841488002558414e-06, |
| "loss": 0.9914, |
| "step": 237 |
| }, |
| { |
| "epoch": 0.08560381260677996, |
| "grad_norm": 25.38489005292741, |
| "learning_rate": 1.9839414760533604e-06, |
| "loss": 0.9513, |
| "step": 238 |
| }, |
| { |
| "epoch": 0.0859634924916824, |
| "grad_norm": 28.306112709700646, |
| "learning_rate": 1.9837328157809545e-06, |
| "loss": 0.9452, |
| "step": 239 |
| }, |
| { |
| "epoch": 0.08632317237658484, |
| "grad_norm": 8.256455535562468, |
| "learning_rate": 1.983522819721957e-06, |
| "loss": 0.8817, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.08668285226148728, |
| "grad_norm": 11.283564132645212, |
| "learning_rate": 1.9833114881615176e-06, |
| "loss": 0.8688, |
| "step": 241 |
| }, |
| { |
| "epoch": 0.08704253214638971, |
| "grad_norm": 8.739851010743765, |
| "learning_rate": 1.9830988213865977e-06, |
| "loss": 0.901, |
| "step": 242 |
| }, |
| { |
| "epoch": 0.08740221203129216, |
| "grad_norm": 13.429560807663824, |
| "learning_rate": 1.9828848196859724e-06, |
| "loss": 0.9433, |
| "step": 243 |
| }, |
| { |
| "epoch": 0.08776189191619459, |
| "grad_norm": 9.762304293644194, |
| "learning_rate": 1.9826694833502295e-06, |
| "loss": 0.8557, |
| "step": 244 |
| }, |
| { |
| "epoch": 0.08812157180109702, |
| "grad_norm": 18.78325006841531, |
| "learning_rate": 1.9824528126717683e-06, |
| "loss": 0.8549, |
| "step": 245 |
| }, |
| { |
| "epoch": 0.08848125168599946, |
| "grad_norm": 9.560722810791209, |
| "learning_rate": 1.9822348079448013e-06, |
| "loss": 0.9296, |
| "step": 246 |
| }, |
| { |
| "epoch": 0.0888409315709019, |
| "grad_norm": 27.147006922557473, |
| "learning_rate": 1.982015469465351e-06, |
| "loss": 0.8618, |
| "step": 247 |
| }, |
| { |
| "epoch": 0.08920061145580434, |
| "grad_norm": 20.124472837337276, |
| "learning_rate": 1.9817947975312526e-06, |
| "loss": 0.911, |
| "step": 248 |
| }, |
| { |
| "epoch": 0.08956029134070677, |
| "grad_norm": 12.301748162630073, |
| "learning_rate": 1.9815727924421506e-06, |
| "loss": 0.8915, |
| "step": 249 |
| }, |
| { |
| "epoch": 0.08991997122560921, |
| "grad_norm": 9.574588238888932, |
| "learning_rate": 1.9813494544995e-06, |
| "loss": 0.8568, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.09027965111051164, |
| "grad_norm": 8.558822505649715, |
| "learning_rate": 1.9811247840065667e-06, |
| "loss": 0.9092, |
| "step": 251 |
| }, |
| { |
| "epoch": 0.09063933099541407, |
| "grad_norm": 10.687295740498575, |
| "learning_rate": 1.9808987812684244e-06, |
| "loss": 0.8546, |
| "step": 252 |
| }, |
| { |
| "epoch": 0.09099901088031652, |
| "grad_norm": 8.278440678155265, |
| "learning_rate": 1.980671446591957e-06, |
| "loss": 0.9665, |
| "step": 253 |
| }, |
| { |
| "epoch": 0.09135869076521895, |
| "grad_norm": 10.179982442382315, |
| "learning_rate": 1.9804427802858566e-06, |
| "loss": 1.0027, |
| "step": 254 |
| }, |
| { |
| "epoch": 0.0917183706501214, |
| "grad_norm": 33.91581096469286, |
| "learning_rate": 1.980212782660624e-06, |
| "loss": 0.9011, |
| "step": 255 |
| }, |
| { |
| "epoch": 0.09207805053502383, |
| "grad_norm": 11.022829190878873, |
| "learning_rate": 1.9799814540285666e-06, |
| "loss": 0.9284, |
| "step": 256 |
| }, |
| { |
| "epoch": 0.09243773041992627, |
| "grad_norm": 11.446092031229947, |
| "learning_rate": 1.9797487947038e-06, |
| "loss": 0.8747, |
| "step": 257 |
| }, |
| { |
| "epoch": 0.0927974103048287, |
| "grad_norm": 12.379375654292929, |
| "learning_rate": 1.9795148050022473e-06, |
| "loss": 0.9398, |
| "step": 258 |
| }, |
| { |
| "epoch": 0.09315709018973115, |
| "grad_norm": 10.665168488416498, |
| "learning_rate": 1.9792794852416368e-06, |
| "loss": 0.898, |
| "step": 259 |
| }, |
| { |
| "epoch": 0.09351677007463358, |
| "grad_norm": 16.559548126823888, |
| "learning_rate": 1.979042835741503e-06, |
| "loss": 0.923, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.093876449959536, |
| "grad_norm": 14.215417562017038, |
| "learning_rate": 1.978804856823187e-06, |
| "loss": 0.9251, |
| "step": 261 |
| }, |
| { |
| "epoch": 0.09423612984443845, |
| "grad_norm": 9.201940638447635, |
| "learning_rate": 1.9785655488098346e-06, |
| "loss": 0.8465, |
| "step": 262 |
| }, |
| { |
| "epoch": 0.09459580972934088, |
| "grad_norm": 11.452188467726437, |
| "learning_rate": 1.978324912026396e-06, |
| "loss": 0.8753, |
| "step": 263 |
| }, |
| { |
| "epoch": 0.09495548961424333, |
| "grad_norm": 17.15325293077976, |
| "learning_rate": 1.9780829467996257e-06, |
| "loss": 0.9477, |
| "step": 264 |
| }, |
| { |
| "epoch": 0.09531516949914576, |
| "grad_norm": 10.004227224146033, |
| "learning_rate": 1.977839653458083e-06, |
| "loss": 0.9036, |
| "step": 265 |
| }, |
| { |
| "epoch": 0.0956748493840482, |
| "grad_norm": 8.616156980769292, |
| "learning_rate": 1.9775950323321298e-06, |
| "loss": 0.939, |
| "step": 266 |
| }, |
| { |
| "epoch": 0.09603452926895063, |
| "grad_norm": 14.128821933261587, |
| "learning_rate": 1.9773490837539304e-06, |
| "loss": 0.9686, |
| "step": 267 |
| }, |
| { |
| "epoch": 0.09639420915385308, |
| "grad_norm": 14.668127731373431, |
| "learning_rate": 1.9771018080574533e-06, |
| "loss": 0.8869, |
| "step": 268 |
| }, |
| { |
| "epoch": 0.09675388903875551, |
| "grad_norm": 12.018016633334687, |
| "learning_rate": 1.9768532055784677e-06, |
| "loss": 0.9346, |
| "step": 269 |
| }, |
| { |
| "epoch": 0.09711356892365794, |
| "grad_norm": 16.455704186742906, |
| "learning_rate": 1.976603276654544e-06, |
| "loss": 0.88, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.09747324880856038, |
| "grad_norm": 34.040812158589354, |
| "learning_rate": 1.976352021625056e-06, |
| "loss": 0.8906, |
| "step": 271 |
| }, |
| { |
| "epoch": 0.09783292869346281, |
| "grad_norm": 9.26169790660666, |
| "learning_rate": 1.9760994408311753e-06, |
| "loss": 0.9632, |
| "step": 272 |
| }, |
| { |
| "epoch": 0.09819260857836526, |
| "grad_norm": 10.463662543924356, |
| "learning_rate": 1.9758455346158764e-06, |
| "loss": 0.9285, |
| "step": 273 |
| }, |
| { |
| "epoch": 0.09855228846326769, |
| "grad_norm": 11.246341713615287, |
| "learning_rate": 1.9755903033239315e-06, |
| "loss": 0.9197, |
| "step": 274 |
| }, |
| { |
| "epoch": 0.09891196834817013, |
| "grad_norm": 9.598455481645157, |
| "learning_rate": 1.975333747301913e-06, |
| "loss": 0.8562, |
| "step": 275 |
| }, |
| { |
| "epoch": 0.09927164823307257, |
| "grad_norm": 12.452620100983056, |
| "learning_rate": 1.9750758668981923e-06, |
| "loss": 0.9208, |
| "step": 276 |
| }, |
| { |
| "epoch": 0.099631328117975, |
| "grad_norm": 16.907941826083178, |
| "learning_rate": 1.974816662462939e-06, |
| "loss": 0.8863, |
| "step": 277 |
| }, |
| { |
| "epoch": 0.09999100800287744, |
| "grad_norm": 16.54484269720697, |
| "learning_rate": 1.9745561343481196e-06, |
| "loss": 0.9162, |
| "step": 278 |
| }, |
| { |
| "epoch": 0.10035068788777987, |
| "grad_norm": 16.024180075889007, |
| "learning_rate": 1.974294282907499e-06, |
| "loss": 0.9985, |
| "step": 279 |
| }, |
| { |
| "epoch": 0.10071036777268232, |
| "grad_norm": 14.096905052340583, |
| "learning_rate": 1.97403110849664e-06, |
| "loss": 0.9025, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.10107004765758475, |
| "grad_norm": 25.494600045804013, |
| "learning_rate": 1.973766611472899e-06, |
| "loss": 0.892, |
| "step": 281 |
| }, |
| { |
| "epoch": 0.10142972754248719, |
| "grad_norm": 11.168220744518289, |
| "learning_rate": 1.9735007921954314e-06, |
| "loss": 0.8846, |
| "step": 282 |
| }, |
| { |
| "epoch": 0.10178940742738962, |
| "grad_norm": 12.99935475702517, |
| "learning_rate": 1.9732336510251863e-06, |
| "loss": 0.8938, |
| "step": 283 |
| }, |
| { |
| "epoch": 0.10214908731229207, |
| "grad_norm": 13.287549329776299, |
| "learning_rate": 1.9729651883249074e-06, |
| "loss": 0.8901, |
| "step": 284 |
| }, |
| { |
| "epoch": 0.1025087671971945, |
| "grad_norm": 9.310937580536969, |
| "learning_rate": 1.972695404459134e-06, |
| "loss": 0.9032, |
| "step": 285 |
| }, |
| { |
| "epoch": 0.10286844708209693, |
| "grad_norm": 22.642549782369525, |
| "learning_rate": 1.9724242997941994e-06, |
| "loss": 0.8748, |
| "step": 286 |
| }, |
| { |
| "epoch": 0.10322812696699937, |
| "grad_norm": 11.391713797213066, |
| "learning_rate": 1.9721518746982296e-06, |
| "loss": 0.9434, |
| "step": 287 |
| }, |
| { |
| "epoch": 0.1035878068519018, |
| "grad_norm": 10.277355772563425, |
| "learning_rate": 1.971878129541144e-06, |
| "loss": 0.8842, |
| "step": 288 |
| }, |
| { |
| "epoch": 0.10394748673680425, |
| "grad_norm": 51.70100815959815, |
| "learning_rate": 1.971603064694654e-06, |
| "loss": 0.9127, |
| "step": 289 |
| }, |
| { |
| "epoch": 0.10430716662170668, |
| "grad_norm": 13.914980770347539, |
| "learning_rate": 1.971326680532264e-06, |
| "loss": 0.9081, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.10466684650660912, |
| "grad_norm": 10.948719016310948, |
| "learning_rate": 1.971048977429269e-06, |
| "loss": 0.8867, |
| "step": 291 |
| }, |
| { |
| "epoch": 0.10502652639151155, |
| "grad_norm": 11.543650466595189, |
| "learning_rate": 1.970769955762755e-06, |
| "loss": 0.9236, |
| "step": 292 |
| }, |
| { |
| "epoch": 0.10538620627641399, |
| "grad_norm": 17.480050188284167, |
| "learning_rate": 1.9704896159115995e-06, |
| "loss": 0.8941, |
| "step": 293 |
| }, |
| { |
| "epoch": 0.10574588616131643, |
| "grad_norm": 11.361668925439416, |
| "learning_rate": 1.970207958256468e-06, |
| "loss": 0.8741, |
| "step": 294 |
| }, |
| { |
| "epoch": 0.10610556604621886, |
| "grad_norm": 11.004344996715577, |
| "learning_rate": 1.969924983179817e-06, |
| "loss": 0.8695, |
| "step": 295 |
| }, |
| { |
| "epoch": 0.1064652459311213, |
| "grad_norm": 12.097699216737382, |
| "learning_rate": 1.9696406910658916e-06, |
| "loss": 0.861, |
| "step": 296 |
| }, |
| { |
| "epoch": 0.10682492581602374, |
| "grad_norm": 8.257198644737876, |
| "learning_rate": 1.9693550823007247e-06, |
| "loss": 0.8375, |
| "step": 297 |
| }, |
| { |
| "epoch": 0.10718460570092618, |
| "grad_norm": 22.0001141358469, |
| "learning_rate": 1.9690681572721376e-06, |
| "loss": 0.8442, |
| "step": 298 |
| }, |
| { |
| "epoch": 0.10754428558582861, |
| "grad_norm": 14.233797906171292, |
| "learning_rate": 1.9687799163697384e-06, |
| "loss": 0.9239, |
| "step": 299 |
| }, |
| { |
| "epoch": 0.10790396547073106, |
| "grad_norm": 11.770942295021396, |
| "learning_rate": 1.968490359984923e-06, |
| "loss": 0.9098, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.10826364535563349, |
| "grad_norm": 23.60257924918355, |
| "learning_rate": 1.9681994885108726e-06, |
| "loss": 0.8693, |
| "step": 301 |
| }, |
| { |
| "epoch": 0.10862332524053592, |
| "grad_norm": 14.960614091519401, |
| "learning_rate": 1.967907302342554e-06, |
| "loss": 0.8315, |
| "step": 302 |
| }, |
| { |
| "epoch": 0.10898300512543836, |
| "grad_norm": 20.45544688064615, |
| "learning_rate": 1.96761380187672e-06, |
| "loss": 0.9464, |
| "step": 303 |
| }, |
| { |
| "epoch": 0.10934268501034079, |
| "grad_norm": 14.957920336704825, |
| "learning_rate": 1.967318987511908e-06, |
| "loss": 0.9121, |
| "step": 304 |
| }, |
| { |
| "epoch": 0.10970236489524324, |
| "grad_norm": 26.138065669695667, |
| "learning_rate": 1.967022859648438e-06, |
| "loss": 0.8295, |
| "step": 305 |
| }, |
| { |
| "epoch": 0.11006204478014567, |
| "grad_norm": 18.768323476716652, |
| "learning_rate": 1.966725418688416e-06, |
| "loss": 0.8819, |
| "step": 306 |
| }, |
| { |
| "epoch": 0.11042172466504811, |
| "grad_norm": 17.74264546609931, |
| "learning_rate": 1.9664266650357295e-06, |
| "loss": 0.9849, |
| "step": 307 |
| }, |
| { |
| "epoch": 0.11078140454995054, |
| "grad_norm": 14.258252350797768, |
| "learning_rate": 1.966126599096048e-06, |
| "loss": 0.9227, |
| "step": 308 |
| }, |
| { |
| "epoch": 0.11114108443485297, |
| "grad_norm": 10.994564258836762, |
| "learning_rate": 1.965825221276825e-06, |
| "loss": 0.9719, |
| "step": 309 |
| }, |
| { |
| "epoch": 0.11150076431975542, |
| "grad_norm": 15.122948478081264, |
| "learning_rate": 1.9655225319872925e-06, |
| "loss": 0.9255, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.11186044420465785, |
| "grad_norm": 12.314006203316827, |
| "learning_rate": 1.9652185316384657e-06, |
| "loss": 0.9275, |
| "step": 311 |
| }, |
| { |
| "epoch": 0.1122201240895603, |
| "grad_norm": 23.25875296453776, |
| "learning_rate": 1.964913220643139e-06, |
| "loss": 0.9677, |
| "step": 312 |
| }, |
| { |
| "epoch": 0.11257980397446272, |
| "grad_norm": 17.358740759679804, |
| "learning_rate": 1.964606599415887e-06, |
| "loss": 0.9023, |
| "step": 313 |
| }, |
| { |
| "epoch": 0.11293948385936517, |
| "grad_norm": 12.346597085386588, |
| "learning_rate": 1.9642986683730623e-06, |
| "loss": 0.906, |
| "step": 314 |
| }, |
| { |
| "epoch": 0.1132991637442676, |
| "grad_norm": 16.91901558140758, |
| "learning_rate": 1.963989427932798e-06, |
| "loss": 0.9201, |
| "step": 315 |
| }, |
| { |
| "epoch": 0.11365884362917004, |
| "grad_norm": 9.741199157688925, |
| "learning_rate": 1.9636788785150034e-06, |
| "loss": 0.9499, |
| "step": 316 |
| }, |
| { |
| "epoch": 0.11401852351407248, |
| "grad_norm": 13.262595663362411, |
| "learning_rate": 1.963367020541366e-06, |
| "loss": 0.914, |
| "step": 317 |
| }, |
| { |
| "epoch": 0.1143782033989749, |
| "grad_norm": 19.22847042204688, |
| "learning_rate": 1.9630538544353504e-06, |
| "loss": 0.9313, |
| "step": 318 |
| }, |
| { |
| "epoch": 0.11473788328387735, |
| "grad_norm": 13.02293930823946, |
| "learning_rate": 1.9627393806221965e-06, |
| "loss": 0.8973, |
| "step": 319 |
| }, |
| { |
| "epoch": 0.11509756316877978, |
| "grad_norm": 76.118414409978, |
| "learning_rate": 1.962423599528921e-06, |
| "loss": 0.8645, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.11545724305368223, |
| "grad_norm": 19.283564453806168, |
| "learning_rate": 1.9621065115843153e-06, |
| "loss": 0.9104, |
| "step": 321 |
| }, |
| { |
| "epoch": 0.11581692293858466, |
| "grad_norm": 21.313272273661486, |
| "learning_rate": 1.961788117218945e-06, |
| "loss": 0.8843, |
| "step": 322 |
| }, |
| { |
| "epoch": 0.1161766028234871, |
| "grad_norm": 51.153040936522814, |
| "learning_rate": 1.96146841686515e-06, |
| "loss": 0.9051, |
| "step": 323 |
| }, |
| { |
| "epoch": 0.11653628270838953, |
| "grad_norm": 11.133909813691858, |
| "learning_rate": 1.9611474109570443e-06, |
| "loss": 0.862, |
| "step": 324 |
| }, |
| { |
| "epoch": 0.11689596259329196, |
| "grad_norm": 10.558210832457515, |
| "learning_rate": 1.9608250999305127e-06, |
| "loss": 0.8813, |
| "step": 325 |
| }, |
| { |
| "epoch": 0.11725564247819441, |
| "grad_norm": 16.281419689047077, |
| "learning_rate": 1.9605014842232148e-06, |
| "loss": 0.8851, |
| "step": 326 |
| }, |
| { |
| "epoch": 0.11761532236309684, |
| "grad_norm": 16.565785391837323, |
| "learning_rate": 1.9601765642745793e-06, |
| "loss": 1.0049, |
| "step": 327 |
| }, |
| { |
| "epoch": 0.11797500224799928, |
| "grad_norm": 14.5572818578826, |
| "learning_rate": 1.9598503405258075e-06, |
| "loss": 0.9034, |
| "step": 328 |
| }, |
| { |
| "epoch": 0.11833468213290171, |
| "grad_norm": 11.868932975556522, |
| "learning_rate": 1.9595228134198705e-06, |
| "loss": 0.8458, |
| "step": 329 |
| }, |
| { |
| "epoch": 0.11869436201780416, |
| "grad_norm": 9.98695028024214, |
| "learning_rate": 1.9591939834015094e-06, |
| "loss": 0.8226, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.11905404190270659, |
| "grad_norm": 9.223681177074399, |
| "learning_rate": 1.958863850917234e-06, |
| "loss": 0.8638, |
| "step": 331 |
| }, |
| { |
| "epoch": 0.11941372178760903, |
| "grad_norm": 12.89149079121029, |
| "learning_rate": 1.9585324164153234e-06, |
| "loss": 0.8801, |
| "step": 332 |
| }, |
| { |
| "epoch": 0.11977340167251146, |
| "grad_norm": 18.140293724032134, |
| "learning_rate": 1.9581996803458243e-06, |
| "loss": 0.9153, |
| "step": 333 |
| }, |
| { |
| "epoch": 0.1201330815574139, |
| "grad_norm": 33.52740780248001, |
| "learning_rate": 1.957865643160551e-06, |
| "loss": 0.8737, |
| "step": 334 |
| }, |
| { |
| "epoch": 0.12049276144231634, |
| "grad_norm": 8.280847090521378, |
| "learning_rate": 1.9575303053130846e-06, |
| "loss": 0.8294, |
| "step": 335 |
| }, |
| { |
| "epoch": 0.12085244132721877, |
| "grad_norm": 7.678778659702913, |
| "learning_rate": 1.9571936672587716e-06, |
| "loss": 0.8831, |
| "step": 336 |
| }, |
| { |
| "epoch": 0.12121212121212122, |
| "grad_norm": 15.784348499821462, |
| "learning_rate": 1.9568557294547242e-06, |
| "loss": 0.9263, |
| "step": 337 |
| }, |
| { |
| "epoch": 0.12157180109702365, |
| "grad_norm": 12.10096678184232, |
| "learning_rate": 1.9565164923598204e-06, |
| "loss": 0.8828, |
| "step": 338 |
| }, |
| { |
| "epoch": 0.12193148098192609, |
| "grad_norm": 8.816663905269253, |
| "learning_rate": 1.956175956434702e-06, |
| "loss": 0.8907, |
| "step": 339 |
| }, |
| { |
| "epoch": 0.12229116086682852, |
| "grad_norm": 29.90225834951316, |
| "learning_rate": 1.955834122141774e-06, |
| "loss": 0.7933, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.12265084075173097, |
| "grad_norm": 10.292172711015832, |
| "learning_rate": 1.9554909899452053e-06, |
| "loss": 0.8857, |
| "step": 341 |
| }, |
| { |
| "epoch": 0.1230105206366334, |
| "grad_norm": 39.15541881489192, |
| "learning_rate": 1.955146560310926e-06, |
| "loss": 0.8972, |
| "step": 342 |
| }, |
| { |
| "epoch": 0.12337020052153583, |
| "grad_norm": 11.893045761955474, |
| "learning_rate": 1.954800833706629e-06, |
| "loss": 0.8419, |
| "step": 343 |
| }, |
| { |
| "epoch": 0.12372988040643827, |
| "grad_norm": 68.55531647862284, |
| "learning_rate": 1.954453810601768e-06, |
| "loss": 0.8472, |
| "step": 344 |
| }, |
| { |
| "epoch": 0.1240895602913407, |
| "grad_norm": 10.504320449547713, |
| "learning_rate": 1.954105491467557e-06, |
| "loss": 0.9026, |
| "step": 345 |
| }, |
| { |
| "epoch": 0.12444924017624315, |
| "grad_norm": 12.598885612377023, |
| "learning_rate": 1.9537558767769696e-06, |
| "loss": 0.8975, |
| "step": 346 |
| }, |
| { |
| "epoch": 0.12480892006114558, |
| "grad_norm": 23.969375774147824, |
| "learning_rate": 1.95340496700474e-06, |
| "loss": 0.916, |
| "step": 347 |
| }, |
| { |
| "epoch": 0.125168599946048, |
| "grad_norm": 13.999278996159775, |
| "learning_rate": 1.953052762627359e-06, |
| "loss": 0.7556, |
| "step": 348 |
| }, |
| { |
| "epoch": 0.12552827983095047, |
| "grad_norm": 16.442131135298013, |
| "learning_rate": 1.9526992641230767e-06, |
| "loss": 0.8536, |
| "step": 349 |
| }, |
| { |
| "epoch": 0.1258879597158529, |
| "grad_norm": 13.146056020396799, |
| "learning_rate": 1.9523444719719e-06, |
| "loss": 0.9139, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.12624763960075533, |
| "grad_norm": 12.700247793000134, |
| "learning_rate": 1.951988386655592e-06, |
| "loss": 0.8299, |
| "step": 351 |
| }, |
| { |
| "epoch": 0.12660731948565776, |
| "grad_norm": 11.963896379243318, |
| "learning_rate": 1.951631008657673e-06, |
| "loss": 0.8569, |
| "step": 352 |
| }, |
| { |
| "epoch": 0.1269669993705602, |
| "grad_norm": 52.23592527717699, |
| "learning_rate": 1.9512723384634173e-06, |
| "loss": 0.8283, |
| "step": 353 |
| }, |
| { |
| "epoch": 0.12732667925546265, |
| "grad_norm": 10.03356695138902, |
| "learning_rate": 1.9509123765598543e-06, |
| "loss": 0.8658, |
| "step": 354 |
| }, |
| { |
| "epoch": 0.12768635914036508, |
| "grad_norm": 9.293741966429987, |
| "learning_rate": 1.9505511234357674e-06, |
| "loss": 0.7894, |
| "step": 355 |
| }, |
| { |
| "epoch": 0.1280460390252675, |
| "grad_norm": 10.747400133317525, |
| "learning_rate": 1.9501885795816933e-06, |
| "loss": 0.7884, |
| "step": 356 |
| }, |
| { |
| "epoch": 0.12840571891016994, |
| "grad_norm": 83.51327413134986, |
| "learning_rate": 1.949824745489922e-06, |
| "loss": 0.8946, |
| "step": 357 |
| }, |
| { |
| "epoch": 0.12876539879507237, |
| "grad_norm": 30.717710225026778, |
| "learning_rate": 1.949459621654494e-06, |
| "loss": 0.8959, |
| "step": 358 |
| }, |
| { |
| "epoch": 0.12912507867997483, |
| "grad_norm": 30.82688771552335, |
| "learning_rate": 1.949093208571202e-06, |
| "loss": 0.8639, |
| "step": 359 |
| }, |
| { |
| "epoch": 0.12948475856487726, |
| "grad_norm": 13.061305788972607, |
| "learning_rate": 1.9487255067375904e-06, |
| "loss": 0.9012, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.1298444384497797, |
| "grad_norm": 13.398942621099714, |
| "learning_rate": 1.9483565166529515e-06, |
| "loss": 0.9357, |
| "step": 361 |
| }, |
| { |
| "epoch": 0.13020411833468212, |
| "grad_norm": 12.669122185267923, |
| "learning_rate": 1.947986238818328e-06, |
| "loss": 0.8448, |
| "step": 362 |
| }, |
| { |
| "epoch": 0.13056379821958458, |
| "grad_norm": 13.424355636023794, |
| "learning_rate": 1.947614673736511e-06, |
| "loss": 0.8521, |
| "step": 363 |
| }, |
| { |
| "epoch": 0.130923478104487, |
| "grad_norm": 9.175568758023797, |
| "learning_rate": 1.94724182191204e-06, |
| "loss": 0.8636, |
| "step": 364 |
| }, |
| { |
| "epoch": 0.13128315798938944, |
| "grad_norm": 12.328782554527889, |
| "learning_rate": 1.946867683851201e-06, |
| "loss": 0.9347, |
| "step": 365 |
| }, |
| { |
| "epoch": 0.13164283787429187, |
| "grad_norm": 12.013763613914739, |
| "learning_rate": 1.9464922600620267e-06, |
| "loss": 0.8363, |
| "step": 366 |
| }, |
| { |
| "epoch": 0.1320025177591943, |
| "grad_norm": 9.269438078892499, |
| "learning_rate": 1.946115551054296e-06, |
| "loss": 0.817, |
| "step": 367 |
| }, |
| { |
| "epoch": 0.13236219764409676, |
| "grad_norm": 10.963559708168836, |
| "learning_rate": 1.945737557339533e-06, |
| "loss": 0.8938, |
| "step": 368 |
| }, |
| { |
| "epoch": 0.1327218775289992, |
| "grad_norm": 15.66670813226693, |
| "learning_rate": 1.9453582794310064e-06, |
| "loss": 0.9166, |
| "step": 369 |
| }, |
| { |
| "epoch": 0.13308155741390162, |
| "grad_norm": 12.100424658153491, |
| "learning_rate": 1.9449777178437274e-06, |
| "loss": 0.7974, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.13344123729880406, |
| "grad_norm": 15.678206138304043, |
| "learning_rate": 1.9445958730944515e-06, |
| "loss": 0.8629, |
| "step": 371 |
| }, |
| { |
| "epoch": 0.1338009171837065, |
| "grad_norm": 11.550421434718123, |
| "learning_rate": 1.9442127457016765e-06, |
| "loss": 0.8583, |
| "step": 372 |
| }, |
| { |
| "epoch": 0.13416059706860894, |
| "grad_norm": 11.909500141730392, |
| "learning_rate": 1.943828336185642e-06, |
| "loss": 0.914, |
| "step": 373 |
| }, |
| { |
| "epoch": 0.13452027695351138, |
| "grad_norm": 9.940660597221434, |
| "learning_rate": 1.9434426450683275e-06, |
| "loss": 0.7436, |
| "step": 374 |
| }, |
| { |
| "epoch": 0.1348799568384138, |
| "grad_norm": 10.34248930812127, |
| "learning_rate": 1.943055672873454e-06, |
| "loss": 0.8108, |
| "step": 375 |
| }, |
| { |
| "epoch": 0.13523963672331624, |
| "grad_norm": 12.457340019224596, |
| "learning_rate": 1.942667420126481e-06, |
| "loss": 0.8272, |
| "step": 376 |
| }, |
| { |
| "epoch": 0.1355993166082187, |
| "grad_norm": 9.465393908237939, |
| "learning_rate": 1.942277887354608e-06, |
| "loss": 0.8666, |
| "step": 377 |
| }, |
| { |
| "epoch": 0.13595899649312113, |
| "grad_norm": 14.936676198112885, |
| "learning_rate": 1.941887075086772e-06, |
| "loss": 0.7892, |
| "step": 378 |
| }, |
| { |
| "epoch": 0.13631867637802356, |
| "grad_norm": 16.657805002799737, |
| "learning_rate": 1.9414949838536467e-06, |
| "loss": 0.8826, |
| "step": 379 |
| }, |
| { |
| "epoch": 0.136678356262926, |
| "grad_norm": 14.465245609908502, |
| "learning_rate": 1.9411016141876437e-06, |
| "loss": 0.8851, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.13703803614782845, |
| "grad_norm": 12.225630986738413, |
| "learning_rate": 1.9407069666229093e-06, |
| "loss": 0.8197, |
| "step": 381 |
| }, |
| { |
| "epoch": 0.13739771603273088, |
| "grad_norm": 9.515207261688014, |
| "learning_rate": 1.9403110416953264e-06, |
| "loss": 0.8497, |
| "step": 382 |
| }, |
| { |
| "epoch": 0.1377573959176333, |
| "grad_norm": 15.813970332166535, |
| "learning_rate": 1.939913839942512e-06, |
| "loss": 0.8754, |
| "step": 383 |
| }, |
| { |
| "epoch": 0.13811707580253574, |
| "grad_norm": 12.87993595588049, |
| "learning_rate": 1.9395153619038154e-06, |
| "loss": 0.9, |
| "step": 384 |
| }, |
| { |
| "epoch": 0.13847675568743817, |
| "grad_norm": 21.12374700170286, |
| "learning_rate": 1.939115608120321e-06, |
| "loss": 0.8271, |
| "step": 385 |
| }, |
| { |
| "epoch": 0.13883643557234063, |
| "grad_norm": 8.357132133993698, |
| "learning_rate": 1.938714579134845e-06, |
| "loss": 0.8227, |
| "step": 386 |
| }, |
| { |
| "epoch": 0.13919611545724306, |
| "grad_norm": 9.359497020025131, |
| "learning_rate": 1.9383122754919338e-06, |
| "loss": 0.8339, |
| "step": 387 |
| }, |
| { |
| "epoch": 0.1395557953421455, |
| "grad_norm": 270.40586750608423, |
| "learning_rate": 1.9379086977378663e-06, |
| "loss": 0.9283, |
| "step": 388 |
| }, |
| { |
| "epoch": 0.13991547522704792, |
| "grad_norm": 9.080060254672393, |
| "learning_rate": 1.9375038464206504e-06, |
| "loss": 0.8596, |
| "step": 389 |
| }, |
| { |
| "epoch": 0.14027515511195035, |
| "grad_norm": 9.878471431037067, |
| "learning_rate": 1.9370977220900236e-06, |
| "loss": 0.9245, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.1406348349968528, |
| "grad_norm": 10.555714443464495, |
| "learning_rate": 1.936690325297453e-06, |
| "loss": 0.9154, |
| "step": 391 |
| }, |
| { |
| "epoch": 0.14099451488175524, |
| "grad_norm": 10.700724258109885, |
| "learning_rate": 1.936281656596132e-06, |
| "loss": 0.9078, |
| "step": 392 |
| }, |
| { |
| "epoch": 0.14135419476665767, |
| "grad_norm": 8.07202896315807, |
| "learning_rate": 1.9358717165409816e-06, |
| "loss": 0.7941, |
| "step": 393 |
| }, |
| { |
| "epoch": 0.1417138746515601, |
| "grad_norm": 13.946094269658285, |
| "learning_rate": 1.93546050568865e-06, |
| "loss": 0.8197, |
| "step": 394 |
| }, |
| { |
| "epoch": 0.14207355453646256, |
| "grad_norm": 9.373248085121805, |
| "learning_rate": 1.93504802459751e-06, |
| "loss": 0.8974, |
| "step": 395 |
| }, |
| { |
| "epoch": 0.142433234421365, |
| "grad_norm": 20.94564386485015, |
| "learning_rate": 1.934634273827659e-06, |
| "loss": 0.8379, |
| "step": 396 |
| }, |
| { |
| "epoch": 0.14279291430626742, |
| "grad_norm": 10.643078401471191, |
| "learning_rate": 1.93421925394092e-06, |
| "loss": 0.8434, |
| "step": 397 |
| }, |
| { |
| "epoch": 0.14315259419116985, |
| "grad_norm": 12.292583457238141, |
| "learning_rate": 1.9338029655008375e-06, |
| "loss": 0.8723, |
| "step": 398 |
| }, |
| { |
| "epoch": 0.14351227407607228, |
| "grad_norm": 11.303797133940236, |
| "learning_rate": 1.9333854090726796e-06, |
| "loss": 0.8724, |
| "step": 399 |
| }, |
| { |
| "epoch": 0.14387195396097474, |
| "grad_norm": 7.762057454413017, |
| "learning_rate": 1.9329665852234356e-06, |
| "loss": 0.86, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.14423163384587717, |
| "grad_norm": 14.321946837228882, |
| "learning_rate": 1.9325464945218168e-06, |
| "loss": 0.9621, |
| "step": 401 |
| }, |
| { |
| "epoch": 0.1445913137307796, |
| "grad_norm": 6.825717053445401, |
| "learning_rate": 1.9321251375382536e-06, |
| "loss": 0.8112, |
| "step": 402 |
| }, |
| { |
| "epoch": 0.14495099361568203, |
| "grad_norm": 14.49694123220213, |
| "learning_rate": 1.931702514844896e-06, |
| "loss": 0.856, |
| "step": 403 |
| }, |
| { |
| "epoch": 0.1453106735005845, |
| "grad_norm": 11.663421014632888, |
| "learning_rate": 1.9312786270156135e-06, |
| "loss": 0.8233, |
| "step": 404 |
| }, |
| { |
| "epoch": 0.14567035338548692, |
| "grad_norm": 14.164098459094538, |
| "learning_rate": 1.9308534746259925e-06, |
| "loss": 0.8564, |
| "step": 405 |
| }, |
| { |
| "epoch": 0.14603003327038935, |
| "grad_norm": 12.359332513895684, |
| "learning_rate": 1.9304270582533377e-06, |
| "loss": 0.865, |
| "step": 406 |
| }, |
| { |
| "epoch": 0.14638971315529178, |
| "grad_norm": 12.139694859275288, |
| "learning_rate": 1.929999378476668e-06, |
| "loss": 0.8756, |
| "step": 407 |
| }, |
| { |
| "epoch": 0.14674939304019421, |
| "grad_norm": 21.360173742690147, |
| "learning_rate": 1.9295704358767207e-06, |
| "loss": 0.8693, |
| "step": 408 |
| }, |
| { |
| "epoch": 0.14710907292509667, |
| "grad_norm": 11.100332470691239, |
| "learning_rate": 1.9291402310359458e-06, |
| "loss": 0.9017, |
| "step": 409 |
| }, |
| { |
| "epoch": 0.1474687528099991, |
| "grad_norm": 43.95482902174156, |
| "learning_rate": 1.9287087645385083e-06, |
| "loss": 0.856, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.14782843269490153, |
| "grad_norm": 9.177097100103019, |
| "learning_rate": 1.928276036970285e-06, |
| "loss": 0.876, |
| "step": 411 |
| }, |
| { |
| "epoch": 0.14818811257980397, |
| "grad_norm": 10.285115271330506, |
| "learning_rate": 1.927842048918867e-06, |
| "loss": 0.8525, |
| "step": 412 |
| }, |
| { |
| "epoch": 0.14854779246470642, |
| "grad_norm": 8.03920252705612, |
| "learning_rate": 1.9274068009735547e-06, |
| "loss": 0.8298, |
| "step": 413 |
| }, |
| { |
| "epoch": 0.14890747234960885, |
| "grad_norm": 13.19357438089089, |
| "learning_rate": 1.926970293725362e-06, |
| "loss": 0.8565, |
| "step": 414 |
| }, |
| { |
| "epoch": 0.14926715223451129, |
| "grad_norm": 11.014939550913528, |
| "learning_rate": 1.926532527767011e-06, |
| "loss": 0.9041, |
| "step": 415 |
| }, |
| { |
| "epoch": 0.14962683211941372, |
| "grad_norm": 34.139306188602696, |
| "learning_rate": 1.926093503692933e-06, |
| "loss": 0.8974, |
| "step": 416 |
| }, |
| { |
| "epoch": 0.14998651200431615, |
| "grad_norm": 126.99384682987264, |
| "learning_rate": 1.925653222099268e-06, |
| "loss": 0.8406, |
| "step": 417 |
| }, |
| { |
| "epoch": 0.1503461918892186, |
| "grad_norm": 7.92044174289043, |
| "learning_rate": 1.9252116835838636e-06, |
| "loss": 0.8821, |
| "step": 418 |
| }, |
| { |
| "epoch": 0.15070587177412104, |
| "grad_norm": 7.348465342243873, |
| "learning_rate": 1.9247688887462746e-06, |
| "loss": 0.8577, |
| "step": 419 |
| }, |
| { |
| "epoch": 0.15106555165902347, |
| "grad_norm": 7.970909157239155, |
| "learning_rate": 1.9243248381877606e-06, |
| "loss": 0.937, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.1514252315439259, |
| "grad_norm": 27.181556708867408, |
| "learning_rate": 1.9238795325112867e-06, |
| "loss": 0.904, |
| "step": 421 |
| }, |
| { |
| "epoch": 0.15178491142882836, |
| "grad_norm": 38.73261243697153, |
| "learning_rate": 1.9234329723215234e-06, |
| "loss": 0.8058, |
| "step": 422 |
| }, |
| { |
| "epoch": 0.1521445913137308, |
| "grad_norm": 8.612041746525732, |
| "learning_rate": 1.922985158224843e-06, |
| "loss": 0.8325, |
| "step": 423 |
| }, |
| { |
| "epoch": 0.15250427119863322, |
| "grad_norm": 9.35579094951297, |
| "learning_rate": 1.9225360908293216e-06, |
| "loss": 0.9131, |
| "step": 424 |
| }, |
| { |
| "epoch": 0.15286395108353565, |
| "grad_norm": 10.069976190703061, |
| "learning_rate": 1.922085770744737e-06, |
| "loss": 0.8255, |
| "step": 425 |
| }, |
| { |
| "epoch": 0.15322363096843808, |
| "grad_norm": 8.493391879280153, |
| "learning_rate": 1.921634198582567e-06, |
| "loss": 0.7657, |
| "step": 426 |
| }, |
| { |
| "epoch": 0.15358331085334054, |
| "grad_norm": 6.5652776306338305, |
| "learning_rate": 1.9211813749559914e-06, |
| "loss": 0.8071, |
| "step": 427 |
| }, |
| { |
| "epoch": 0.15394299073824297, |
| "grad_norm": 9.892167099058437, |
| "learning_rate": 1.9207273004798874e-06, |
| "loss": 0.8277, |
| "step": 428 |
| }, |
| { |
| "epoch": 0.1543026706231454, |
| "grad_norm": 12.10168551634377, |
| "learning_rate": 1.9202719757708315e-06, |
| "loss": 0.8913, |
| "step": 429 |
| }, |
| { |
| "epoch": 0.15466235050804783, |
| "grad_norm": 8.400143155457233, |
| "learning_rate": 1.919815401447099e-06, |
| "loss": 0.8193, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.15502203039295026, |
| "grad_norm": 8.67439915761609, |
| "learning_rate": 1.91935757812866e-06, |
| "loss": 0.9119, |
| "step": 431 |
| }, |
| { |
| "epoch": 0.15538171027785272, |
| "grad_norm": 6.6432568009165305, |
| "learning_rate": 1.9188985064371815e-06, |
| "loss": 0.8462, |
| "step": 432 |
| }, |
| { |
| "epoch": 0.15574139016275515, |
| "grad_norm": 10.370296729266624, |
| "learning_rate": 1.9184381869960256e-06, |
| "loss": 0.8642, |
| "step": 433 |
| }, |
| { |
| "epoch": 0.15610107004765758, |
| "grad_norm": 7.986242744346441, |
| "learning_rate": 1.9179766204302495e-06, |
| "loss": 0.7959, |
| "step": 434 |
| }, |
| { |
| "epoch": 0.15646074993256, |
| "grad_norm": 17.1456213560963, |
| "learning_rate": 1.9175138073666027e-06, |
| "loss": 0.8015, |
| "step": 435 |
| }, |
| { |
| "epoch": 0.15682042981746247, |
| "grad_norm": 8.916530755054769, |
| "learning_rate": 1.9170497484335276e-06, |
| "loss": 0.7821, |
| "step": 436 |
| }, |
| { |
| "epoch": 0.1571801097023649, |
| "grad_norm": 35.16726375571258, |
| "learning_rate": 1.9165844442611584e-06, |
| "loss": 0.8719, |
| "step": 437 |
| }, |
| { |
| "epoch": 0.15753978958726733, |
| "grad_norm": 9.395656545549057, |
| "learning_rate": 1.91611789548132e-06, |
| "loss": 0.7904, |
| "step": 438 |
| }, |
| { |
| "epoch": 0.15789946947216976, |
| "grad_norm": 13.635627198065489, |
| "learning_rate": 1.9156501027275274e-06, |
| "loss": 0.8651, |
| "step": 439 |
| }, |
| { |
| "epoch": 0.1582591493570722, |
| "grad_norm": 9.22448593403895, |
| "learning_rate": 1.915181066634986e-06, |
| "loss": 0.8507, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.15861882924197465, |
| "grad_norm": 14.688094562563721, |
| "learning_rate": 1.914710787840587e-06, |
| "loss": 0.7956, |
| "step": 441 |
| }, |
| { |
| "epoch": 0.15897850912687708, |
| "grad_norm": 8.911901702895749, |
| "learning_rate": 1.914239266982911e-06, |
| "loss": 0.8144, |
| "step": 442 |
| }, |
| { |
| "epoch": 0.1593381890117795, |
| "grad_norm": 12.767614911255095, |
| "learning_rate": 1.913766504702225e-06, |
| "loss": 0.8392, |
| "step": 443 |
| }, |
| { |
| "epoch": 0.15969786889668194, |
| "grad_norm": 11.049958977930125, |
| "learning_rate": 1.9132925016404803e-06, |
| "loss": 0.9043, |
| "step": 444 |
| }, |
| { |
| "epoch": 0.1600575487815844, |
| "grad_norm": 24.35116451385261, |
| "learning_rate": 1.9128172584413146e-06, |
| "loss": 0.8345, |
| "step": 445 |
| }, |
| { |
| "epoch": 0.16041722866648683, |
| "grad_norm": 9.82011280699428, |
| "learning_rate": 1.912340775750049e-06, |
| "loss": 0.7961, |
| "step": 446 |
| }, |
| { |
| "epoch": 0.16077690855138926, |
| "grad_norm": 7.902611715439257, |
| "learning_rate": 1.9118630542136872e-06, |
| "loss": 0.911, |
| "step": 447 |
| }, |
| { |
| "epoch": 0.1611365884362917, |
| "grad_norm": 70.31395398173764, |
| "learning_rate": 1.911384094480916e-06, |
| "loss": 0.8239, |
| "step": 448 |
| }, |
| { |
| "epoch": 0.16149626832119413, |
| "grad_norm": 33.338731991358, |
| "learning_rate": 1.9109038972021027e-06, |
| "loss": 0.8022, |
| "step": 449 |
| }, |
| { |
| "epoch": 0.16185594820609658, |
| "grad_norm": 11.33718969042203, |
| "learning_rate": 1.9104224630292957e-06, |
| "loss": 0.8216, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.16221562809099901, |
| "grad_norm": 8.452034969158248, |
| "learning_rate": 1.9099397926162225e-06, |
| "loss": 0.8445, |
| "step": 451 |
| }, |
| { |
| "epoch": 0.16257530797590145, |
| "grad_norm": 11.4488154951291, |
| "learning_rate": 1.909455886618289e-06, |
| "loss": 0.8916, |
| "step": 452 |
| }, |
| { |
| "epoch": 0.16293498786080388, |
| "grad_norm": 12.31967554618124, |
| "learning_rate": 1.90897074569258e-06, |
| "loss": 0.8674, |
| "step": 453 |
| }, |
| { |
| "epoch": 0.16329466774570633, |
| "grad_norm": 10.697499242046135, |
| "learning_rate": 1.9084843704978553e-06, |
| "loss": 0.902, |
| "step": 454 |
| }, |
| { |
| "epoch": 0.16365434763060877, |
| "grad_norm": 10.085248528236896, |
| "learning_rate": 1.9079967616945532e-06, |
| "loss": 0.8762, |
| "step": 455 |
| }, |
| { |
| "epoch": 0.1640140275155112, |
| "grad_norm": 20.071003715679723, |
| "learning_rate": 1.9075079199447843e-06, |
| "loss": 0.8851, |
| "step": 456 |
| }, |
| { |
| "epoch": 0.16437370740041363, |
| "grad_norm": 7.493447739075657, |
| "learning_rate": 1.9070178459123362e-06, |
| "loss": 0.8059, |
| "step": 457 |
| }, |
| { |
| "epoch": 0.16473338728531606, |
| "grad_norm": 16.7299671266415, |
| "learning_rate": 1.9065265402626673e-06, |
| "loss": 0.8399, |
| "step": 458 |
| }, |
| { |
| "epoch": 0.16509306717021852, |
| "grad_norm": 11.39504360829656, |
| "learning_rate": 1.9060340036629098e-06, |
| "loss": 0.9617, |
| "step": 459 |
| }, |
| { |
| "epoch": 0.16545274705512095, |
| "grad_norm": 20.206501898154706, |
| "learning_rate": 1.9055402367818672e-06, |
| "loss": 0.8597, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.16581242694002338, |
| "grad_norm": 13.324287414295295, |
| "learning_rate": 1.9050452402900132e-06, |
| "loss": 0.9062, |
| "step": 461 |
| }, |
| { |
| "epoch": 0.1661721068249258, |
| "grad_norm": 12.33113766605342, |
| "learning_rate": 1.9045490148594916e-06, |
| "loss": 0.8202, |
| "step": 462 |
| }, |
| { |
| "epoch": 0.16653178670982827, |
| "grad_norm": 16.117376983943213, |
| "learning_rate": 1.9040515611641142e-06, |
| "loss": 0.863, |
| "step": 463 |
| }, |
| { |
| "epoch": 0.1668914665947307, |
| "grad_norm": 14.213075056424216, |
| "learning_rate": 1.9035528798793616e-06, |
| "loss": 0.7904, |
| "step": 464 |
| }, |
| { |
| "epoch": 0.16725114647963313, |
| "grad_norm": 12.164792118405405, |
| "learning_rate": 1.9030529716823803e-06, |
| "loss": 0.8742, |
| "step": 465 |
| }, |
| { |
| "epoch": 0.16761082636453556, |
| "grad_norm": 11.496257274302343, |
| "learning_rate": 1.9025518372519844e-06, |
| "loss": 0.7988, |
| "step": 466 |
| }, |
| { |
| "epoch": 0.167970506249438, |
| "grad_norm": 10.35968011430579, |
| "learning_rate": 1.902049477268651e-06, |
| "loss": 0.9183, |
| "step": 467 |
| }, |
| { |
| "epoch": 0.16833018613434045, |
| "grad_norm": 8.60003816280571, |
| "learning_rate": 1.9015458924145226e-06, |
| "loss": 0.8665, |
| "step": 468 |
| }, |
| { |
| "epoch": 0.16868986601924288, |
| "grad_norm": 10.042650282563095, |
| "learning_rate": 1.901041083373405e-06, |
| "loss": 0.871, |
| "step": 469 |
| }, |
| { |
| "epoch": 0.1690495459041453, |
| "grad_norm": 8.07460363018041, |
| "learning_rate": 1.9005350508307658e-06, |
| "loss": 0.7647, |
| "step": 470 |
| }, |
| { |
| "epoch": 0.16940922578904774, |
| "grad_norm": 14.764742702869542, |
| "learning_rate": 1.900027795473734e-06, |
| "loss": 0.8248, |
| "step": 471 |
| }, |
| { |
| "epoch": 0.16976890567395017, |
| "grad_norm": 12.707930104442196, |
| "learning_rate": 1.8995193179910996e-06, |
| "loss": 0.8234, |
| "step": 472 |
| }, |
| { |
| "epoch": 0.17012858555885263, |
| "grad_norm": 9.221906057488562, |
| "learning_rate": 1.8990096190733111e-06, |
| "loss": 0.901, |
| "step": 473 |
| }, |
| { |
| "epoch": 0.17048826544375506, |
| "grad_norm": 8.934916040042465, |
| "learning_rate": 1.8984986994124764e-06, |
| "loss": 0.8368, |
| "step": 474 |
| }, |
| { |
| "epoch": 0.1708479453286575, |
| "grad_norm": 10.708952670356158, |
| "learning_rate": 1.8979865597023607e-06, |
| "loss": 0.8035, |
| "step": 475 |
| }, |
| { |
| "epoch": 0.17120762521355992, |
| "grad_norm": 9.897343600602069, |
| "learning_rate": 1.897473200638386e-06, |
| "loss": 0.8099, |
| "step": 476 |
| }, |
| { |
| "epoch": 0.17156730509846238, |
| "grad_norm": 9.0242865083605, |
| "learning_rate": 1.89695862291763e-06, |
| "loss": 0.8575, |
| "step": 477 |
| }, |
| { |
| "epoch": 0.1719269849833648, |
| "grad_norm": 11.294148942631974, |
| "learning_rate": 1.896442827238825e-06, |
| "loss": 0.8016, |
| "step": 478 |
| }, |
| { |
| "epoch": 0.17228666486826724, |
| "grad_norm": 8.181165543542155, |
| "learning_rate": 1.895925814302357e-06, |
| "loss": 0.8631, |
| "step": 479 |
| }, |
| { |
| "epoch": 0.17264634475316967, |
| "grad_norm": 10.916696791012598, |
| "learning_rate": 1.8954075848102654e-06, |
| "loss": 0.7915, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.1730060246380721, |
| "grad_norm": 9.32733756404691, |
| "learning_rate": 1.8948881394662416e-06, |
| "loss": 0.7793, |
| "step": 481 |
| }, |
| { |
| "epoch": 0.17336570452297456, |
| "grad_norm": 12.052464990784205, |
| "learning_rate": 1.8943674789756274e-06, |
| "loss": 0.8623, |
| "step": 482 |
| }, |
| { |
| "epoch": 0.173725384407877, |
| "grad_norm": 19.7058645389506, |
| "learning_rate": 1.8938456040454148e-06, |
| "loss": 0.836, |
| "step": 483 |
| }, |
| { |
| "epoch": 0.17408506429277942, |
| "grad_norm": 26.01931607553377, |
| "learning_rate": 1.8933225153842444e-06, |
| "loss": 0.8655, |
| "step": 484 |
| }, |
| { |
| "epoch": 0.17444474417768185, |
| "grad_norm": 18.735199040250077, |
| "learning_rate": 1.8927982137024066e-06, |
| "loss": 0.839, |
| "step": 485 |
| }, |
| { |
| "epoch": 0.1748044240625843, |
| "grad_norm": 15.61104075774782, |
| "learning_rate": 1.8922726997118368e-06, |
| "loss": 0.9053, |
| "step": 486 |
| }, |
| { |
| "epoch": 0.17516410394748674, |
| "grad_norm": 13.842802256021539, |
| "learning_rate": 1.891745974126118e-06, |
| "loss": 0.8572, |
| "step": 487 |
| }, |
| { |
| "epoch": 0.17552378383238917, |
| "grad_norm": 18.7217105013184, |
| "learning_rate": 1.8912180376604776e-06, |
| "loss": 0.879, |
| "step": 488 |
| }, |
| { |
| "epoch": 0.1758834637172916, |
| "grad_norm": 13.134751258066478, |
| "learning_rate": 1.890688891031788e-06, |
| "loss": 0.7968, |
| "step": 489 |
| }, |
| { |
| "epoch": 0.17624314360219404, |
| "grad_norm": 13.500016989697537, |
| "learning_rate": 1.890158534958564e-06, |
| "loss": 0.7834, |
| "step": 490 |
| }, |
| { |
| "epoch": 0.1766028234870965, |
| "grad_norm": 27.62671510273035, |
| "learning_rate": 1.8896269701609632e-06, |
| "loss": 0.8369, |
| "step": 491 |
| }, |
| { |
| "epoch": 0.17696250337199892, |
| "grad_norm": 22.6387971806034, |
| "learning_rate": 1.8890941973607842e-06, |
| "loss": 0.7804, |
| "step": 492 |
| }, |
| { |
| "epoch": 0.17732218325690136, |
| "grad_norm": 80.5144766426964, |
| "learning_rate": 1.8885602172814663e-06, |
| "loss": 0.9074, |
| "step": 493 |
| }, |
| { |
| "epoch": 0.1776818631418038, |
| "grad_norm": 8.581003452540251, |
| "learning_rate": 1.888025030648088e-06, |
| "loss": 0.8666, |
| "step": 494 |
| }, |
| { |
| "epoch": 0.17804154302670624, |
| "grad_norm": 12.721986027130344, |
| "learning_rate": 1.8874886381873654e-06, |
| "loss": 0.8943, |
| "step": 495 |
| }, |
| { |
| "epoch": 0.17840122291160868, |
| "grad_norm": 10.857560555875054, |
| "learning_rate": 1.8869510406276535e-06, |
| "loss": 0.8547, |
| "step": 496 |
| }, |
| { |
| "epoch": 0.1787609027965111, |
| "grad_norm": 9.487050734369223, |
| "learning_rate": 1.8864122386989423e-06, |
| "loss": 0.8365, |
| "step": 497 |
| }, |
| { |
| "epoch": 0.17912058268141354, |
| "grad_norm": 10.327204356972482, |
| "learning_rate": 1.8858722331328577e-06, |
| "loss": 0.8412, |
| "step": 498 |
| }, |
| { |
| "epoch": 0.17948026256631597, |
| "grad_norm": 9.95664866490052, |
| "learning_rate": 1.8853310246626604e-06, |
| "loss": 0.8252, |
| "step": 499 |
| }, |
| { |
| "epoch": 0.17983994245121843, |
| "grad_norm": 12.777054919130249, |
| "learning_rate": 1.8847886140232436e-06, |
| "loss": 0.8191, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.18019962233612086, |
| "grad_norm": 11.663994161516209, |
| "learning_rate": 1.8842450019511333e-06, |
| "loss": 0.8332, |
| "step": 501 |
| }, |
| { |
| "epoch": 0.1805593022210233, |
| "grad_norm": 13.433640005929696, |
| "learning_rate": 1.8837001891844872e-06, |
| "loss": 0.8071, |
| "step": 502 |
| }, |
| { |
| "epoch": 0.18091898210592572, |
| "grad_norm": 10.554856785551745, |
| "learning_rate": 1.8831541764630935e-06, |
| "loss": 0.8987, |
| "step": 503 |
| }, |
| { |
| "epoch": 0.18127866199082815, |
| "grad_norm": 9.383086892636745, |
| "learning_rate": 1.8826069645283686e-06, |
| "loss": 0.8411, |
| "step": 504 |
| }, |
| { |
| "epoch": 0.1816383418757306, |
| "grad_norm": 10.170715585820707, |
| "learning_rate": 1.8820585541233589e-06, |
| "loss": 0.8284, |
| "step": 505 |
| }, |
| { |
| "epoch": 0.18199802176063304, |
| "grad_norm": 15.744352966889148, |
| "learning_rate": 1.8815089459927369e-06, |
| "loss": 0.8144, |
| "step": 506 |
| }, |
| { |
| "epoch": 0.18235770164553547, |
| "grad_norm": 9.770670238104348, |
| "learning_rate": 1.8809581408828024e-06, |
| "loss": 0.8206, |
| "step": 507 |
| }, |
| { |
| "epoch": 0.1827173815304379, |
| "grad_norm": 11.333576038323153, |
| "learning_rate": 1.8804061395414793e-06, |
| "loss": 0.8452, |
| "step": 508 |
| }, |
| { |
| "epoch": 0.18307706141534036, |
| "grad_norm": 12.453020924416958, |
| "learning_rate": 1.8798529427183176e-06, |
| "loss": 0.8419, |
| "step": 509 |
| }, |
| { |
| "epoch": 0.1834367413002428, |
| "grad_norm": 10.426367414034607, |
| "learning_rate": 1.8792985511644894e-06, |
| "loss": 0.8769, |
| "step": 510 |
| }, |
| { |
| "epoch": 0.18379642118514522, |
| "grad_norm": 16.90275020796146, |
| "learning_rate": 1.878742965632789e-06, |
| "loss": 0.9086, |
| "step": 511 |
| }, |
| { |
| "epoch": 0.18415610107004765, |
| "grad_norm": 8.978038437485434, |
| "learning_rate": 1.8781861868776326e-06, |
| "loss": 0.8797, |
| "step": 512 |
| }, |
| { |
| "epoch": 0.18451578095495008, |
| "grad_norm": 126.89374658111018, |
| "learning_rate": 1.877628215655056e-06, |
| "loss": 0.8737, |
| "step": 513 |
| }, |
| { |
| "epoch": 0.18487546083985254, |
| "grad_norm": 6.941430734744093, |
| "learning_rate": 1.8770690527227154e-06, |
| "loss": 0.8342, |
| "step": 514 |
| }, |
| { |
| "epoch": 0.18523514072475497, |
| "grad_norm": 10.359513217199083, |
| "learning_rate": 1.8765086988398835e-06, |
| "loss": 0.8977, |
| "step": 515 |
| }, |
| { |
| "epoch": 0.1855948206096574, |
| "grad_norm": 25.151243920382832, |
| "learning_rate": 1.8759471547674517e-06, |
| "loss": 0.8332, |
| "step": 516 |
| }, |
| { |
| "epoch": 0.18595450049455983, |
| "grad_norm": 18.948640562206315, |
| "learning_rate": 1.8753844212679267e-06, |
| "loss": 0.867, |
| "step": 517 |
| }, |
| { |
| "epoch": 0.1863141803794623, |
| "grad_norm": 9.948782803153867, |
| "learning_rate": 1.8748204991054302e-06, |
| "loss": 0.8165, |
| "step": 518 |
| }, |
| { |
| "epoch": 0.18667386026436472, |
| "grad_norm": 13.71406044526494, |
| "learning_rate": 1.8742553890456985e-06, |
| "loss": 0.864, |
| "step": 519 |
| }, |
| { |
| "epoch": 0.18703354014926715, |
| "grad_norm": 66.03135486053179, |
| "learning_rate": 1.8736890918560806e-06, |
| "loss": 0.8248, |
| "step": 520 |
| }, |
| { |
| "epoch": 0.18739322003416958, |
| "grad_norm": 7.404838929316901, |
| "learning_rate": 1.8731216083055372e-06, |
| "loss": 0.7933, |
| "step": 521 |
| }, |
| { |
| "epoch": 0.187752899919072, |
| "grad_norm": 8.790376905298293, |
| "learning_rate": 1.8725529391646411e-06, |
| "loss": 0.8204, |
| "step": 522 |
| }, |
| { |
| "epoch": 0.18811257980397447, |
| "grad_norm": 11.767370844635321, |
| "learning_rate": 1.8719830852055734e-06, |
| "loss": 0.7924, |
| "step": 523 |
| }, |
| { |
| "epoch": 0.1884722596888769, |
| "grad_norm": 16.86102096800946, |
| "learning_rate": 1.8714120472021249e-06, |
| "loss": 0.8037, |
| "step": 524 |
| }, |
| { |
| "epoch": 0.18883193957377933, |
| "grad_norm": 11.200751122260533, |
| "learning_rate": 1.870839825929694e-06, |
| "loss": 0.867, |
| "step": 525 |
| }, |
| { |
| "epoch": 0.18919161945868176, |
| "grad_norm": 12.847431421238989, |
| "learning_rate": 1.8702664221652863e-06, |
| "loss": 0.7669, |
| "step": 526 |
| }, |
| { |
| "epoch": 0.18955129934358422, |
| "grad_norm": 8.507177391831126, |
| "learning_rate": 1.869691836687512e-06, |
| "loss": 0.827, |
| "step": 527 |
| }, |
| { |
| "epoch": 0.18991097922848665, |
| "grad_norm": 13.681998332161132, |
| "learning_rate": 1.8691160702765875e-06, |
| "loss": 0.8366, |
| "step": 528 |
| }, |
| { |
| "epoch": 0.19027065911338908, |
| "grad_norm": 8.016047400192488, |
| "learning_rate": 1.868539123714331e-06, |
| "loss": 0.8178, |
| "step": 529 |
| }, |
| { |
| "epoch": 0.19063033899829152, |
| "grad_norm": 11.598905703734358, |
| "learning_rate": 1.8679609977841643e-06, |
| "loss": 0.8801, |
| "step": 530 |
| }, |
| { |
| "epoch": 0.19099001888319395, |
| "grad_norm": 8.344476621453797, |
| "learning_rate": 1.8673816932711106e-06, |
| "loss": 0.8611, |
| "step": 531 |
| }, |
| { |
| "epoch": 0.1913496987680964, |
| "grad_norm": 220.24124707635522, |
| "learning_rate": 1.866801210961793e-06, |
| "loss": 0.7756, |
| "step": 532 |
| }, |
| { |
| "epoch": 0.19170937865299884, |
| "grad_norm": 7.7309606873755525, |
| "learning_rate": 1.8662195516444344e-06, |
| "loss": 0.8871, |
| "step": 533 |
| }, |
| { |
| "epoch": 0.19206905853790127, |
| "grad_norm": 12.87691886716015, |
| "learning_rate": 1.8656367161088556e-06, |
| "loss": 0.7906, |
| "step": 534 |
| }, |
| { |
| "epoch": 0.1924287384228037, |
| "grad_norm": 15.505546334154891, |
| "learning_rate": 1.8650527051464742e-06, |
| "loss": 0.8578, |
| "step": 535 |
| }, |
| { |
| "epoch": 0.19278841830770616, |
| "grad_norm": 13.605855564448348, |
| "learning_rate": 1.8644675195503047e-06, |
| "loss": 0.8955, |
| "step": 536 |
| }, |
| { |
| "epoch": 0.19314809819260859, |
| "grad_norm": 9.750630774470594, |
| "learning_rate": 1.8638811601149565e-06, |
| "loss": 0.9195, |
| "step": 537 |
| }, |
| { |
| "epoch": 0.19350777807751102, |
| "grad_norm": 8.441215495649715, |
| "learning_rate": 1.863293627636632e-06, |
| "loss": 0.8546, |
| "step": 538 |
| }, |
| { |
| "epoch": 0.19386745796241345, |
| "grad_norm": 19.253028018882947, |
| "learning_rate": 1.8627049229131276e-06, |
| "loss": 0.8292, |
| "step": 539 |
| }, |
| { |
| "epoch": 0.19422713784731588, |
| "grad_norm": 23.313553166247857, |
| "learning_rate": 1.8621150467438307e-06, |
| "loss": 0.803, |
| "step": 540 |
| }, |
| { |
| "epoch": 0.19458681773221834, |
| "grad_norm": 24.909947074852294, |
| "learning_rate": 1.8615239999297199e-06, |
| "loss": 0.8334, |
| "step": 541 |
| }, |
| { |
| "epoch": 0.19494649761712077, |
| "grad_norm": 14.208748631565902, |
| "learning_rate": 1.8609317832733628e-06, |
| "loss": 0.849, |
| "step": 542 |
| }, |
| { |
| "epoch": 0.1953061775020232, |
| "grad_norm": 72.2808028401263, |
| "learning_rate": 1.8603383975789164e-06, |
| "loss": 0.9316, |
| "step": 543 |
| }, |
| { |
| "epoch": 0.19566585738692563, |
| "grad_norm": 8.863515350100679, |
| "learning_rate": 1.8597438436521238e-06, |
| "loss": 0.8319, |
| "step": 544 |
| }, |
| { |
| "epoch": 0.19602553727182806, |
| "grad_norm": 11.343673945919084, |
| "learning_rate": 1.8591481223003155e-06, |
| "loss": 0.8225, |
| "step": 545 |
| }, |
| { |
| "epoch": 0.19638521715673052, |
| "grad_norm": 12.638901386572595, |
| "learning_rate": 1.858551234332407e-06, |
| "loss": 0.9001, |
| "step": 546 |
| }, |
| { |
| "epoch": 0.19674489704163295, |
| "grad_norm": 8.83864644270351, |
| "learning_rate": 1.8579531805588978e-06, |
| "loss": 0.8782, |
| "step": 547 |
| }, |
| { |
| "epoch": 0.19710457692653538, |
| "grad_norm": 17.835291838539035, |
| "learning_rate": 1.8573539617918699e-06, |
| "loss": 0.7971, |
| "step": 548 |
| }, |
| { |
| "epoch": 0.1974642568114378, |
| "grad_norm": 13.19953651388432, |
| "learning_rate": 1.8567535788449881e-06, |
| "loss": 0.829, |
| "step": 549 |
| }, |
| { |
| "epoch": 0.19782393669634027, |
| "grad_norm": 8.124384602473048, |
| "learning_rate": 1.8561520325334977e-06, |
| "loss": 0.8405, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.1981836165812427, |
| "grad_norm": 8.187675496849428, |
| "learning_rate": 1.8555493236742238e-06, |
| "loss": 0.8524, |
| "step": 551 |
| }, |
| { |
| "epoch": 0.19854329646614513, |
| "grad_norm": 56.26064098328229, |
| "learning_rate": 1.8549454530855694e-06, |
| "loss": 0.8115, |
| "step": 552 |
| }, |
| { |
| "epoch": 0.19890297635104756, |
| "grad_norm": 9.973278810508788, |
| "learning_rate": 1.854340421587516e-06, |
| "loss": 0.8622, |
| "step": 553 |
| }, |
| { |
| "epoch": 0.19926265623595, |
| "grad_norm": 15.73714198878386, |
| "learning_rate": 1.8537342300016206e-06, |
| "loss": 0.8425, |
| "step": 554 |
| }, |
| { |
| "epoch": 0.19962233612085245, |
| "grad_norm": 20.407221284007207, |
| "learning_rate": 1.8531268791510163e-06, |
| "loss": 0.9294, |
| "step": 555 |
| }, |
| { |
| "epoch": 0.19998201600575488, |
| "grad_norm": 15.744069716766877, |
| "learning_rate": 1.8525183698604094e-06, |
| "loss": 0.8993, |
| "step": 556 |
| }, |
| { |
| "epoch": 0.2003416958906573, |
| "grad_norm": 42.07300620484659, |
| "learning_rate": 1.8519087029560798e-06, |
| "loss": 0.8133, |
| "step": 557 |
| }, |
| { |
| "epoch": 0.20070137577555974, |
| "grad_norm": 13.508080414174918, |
| "learning_rate": 1.8512978792658798e-06, |
| "loss": 0.7981, |
| "step": 558 |
| }, |
| { |
| "epoch": 0.2010610556604622, |
| "grad_norm": 15.37269219328954, |
| "learning_rate": 1.850685899619231e-06, |
| "loss": 0.8778, |
| "step": 559 |
| }, |
| { |
| "epoch": 0.20142073554536463, |
| "grad_norm": 17.113977534513946, |
| "learning_rate": 1.8500727648471257e-06, |
| "loss": 0.8015, |
| "step": 560 |
| }, |
| { |
| "epoch": 0.20178041543026706, |
| "grad_norm": 17.954420979530887, |
| "learning_rate": 1.849458475782125e-06, |
| "loss": 0.8618, |
| "step": 561 |
| }, |
| { |
| "epoch": 0.2021400953151695, |
| "grad_norm": 12.661761182791771, |
| "learning_rate": 1.8488430332583564e-06, |
| "loss": 0.8386, |
| "step": 562 |
| }, |
| { |
| "epoch": 0.20249977520007192, |
| "grad_norm": 16.593297444328744, |
| "learning_rate": 1.8482264381115146e-06, |
| "loss": 0.8486, |
| "step": 563 |
| }, |
| { |
| "epoch": 0.20285945508497438, |
| "grad_norm": 166.1578676776432, |
| "learning_rate": 1.8476086911788584e-06, |
| "loss": 0.8574, |
| "step": 564 |
| }, |
| { |
| "epoch": 0.2032191349698768, |
| "grad_norm": 9.232219788176112, |
| "learning_rate": 1.8469897932992118e-06, |
| "loss": 0.8386, |
| "step": 565 |
| }, |
| { |
| "epoch": 0.20357881485477924, |
| "grad_norm": 803.6760447649217, |
| "learning_rate": 1.8463697453129607e-06, |
| "loss": 0.8087, |
| "step": 566 |
| }, |
| { |
| "epoch": 0.20393849473968167, |
| "grad_norm": 8.897839753882822, |
| "learning_rate": 1.8457485480620529e-06, |
| "loss": 0.8682, |
| "step": 567 |
| }, |
| { |
| "epoch": 0.20429817462458413, |
| "grad_norm": 9.607245818294034, |
| "learning_rate": 1.8451262023899971e-06, |
| "loss": 0.7801, |
| "step": 568 |
| }, |
| { |
| "epoch": 0.20465785450948656, |
| "grad_norm": 16.666904794818137, |
| "learning_rate": 1.844502709141861e-06, |
| "loss": 0.8333, |
| "step": 569 |
| }, |
| { |
| "epoch": 0.205017534394389, |
| "grad_norm": 49.6053151118419, |
| "learning_rate": 1.843878069164271e-06, |
| "loss": 0.827, |
| "step": 570 |
| }, |
| { |
| "epoch": 0.20537721427929143, |
| "grad_norm": 21.17528972453623, |
| "learning_rate": 1.84325228330541e-06, |
| "loss": 0.8328, |
| "step": 571 |
| }, |
| { |
| "epoch": 0.20573689416419386, |
| "grad_norm": 113.77732313185646, |
| "learning_rate": 1.8426253524150176e-06, |
| "loss": 0.8754, |
| "step": 572 |
| }, |
| { |
| "epoch": 0.20609657404909631, |
| "grad_norm": 14.576975005767219, |
| "learning_rate": 1.8419972773443876e-06, |
| "loss": 0.8254, |
| "step": 573 |
| }, |
| { |
| "epoch": 0.20645625393399875, |
| "grad_norm": 11.251350372431052, |
| "learning_rate": 1.8413680589463673e-06, |
| "loss": 0.8429, |
| "step": 574 |
| }, |
| { |
| "epoch": 0.20681593381890118, |
| "grad_norm": 10.082724721956959, |
| "learning_rate": 1.8407376980753577e-06, |
| "loss": 0.8393, |
| "step": 575 |
| }, |
| { |
| "epoch": 0.2071756137038036, |
| "grad_norm": 26.295582863141938, |
| "learning_rate": 1.8401061955873099e-06, |
| "loss": 0.8415, |
| "step": 576 |
| }, |
| { |
| "epoch": 0.20753529358870604, |
| "grad_norm": 13.31851233479887, |
| "learning_rate": 1.8394735523397256e-06, |
| "loss": 0.8582, |
| "step": 577 |
| }, |
| { |
| "epoch": 0.2078949734736085, |
| "grad_norm": 19.282797406828767, |
| "learning_rate": 1.8388397691916552e-06, |
| "loss": 0.8357, |
| "step": 578 |
| }, |
| { |
| "epoch": 0.20825465335851093, |
| "grad_norm": 11.977505691018543, |
| "learning_rate": 1.8382048470036979e-06, |
| "loss": 0.8085, |
| "step": 579 |
| }, |
| { |
| "epoch": 0.20861433324341336, |
| "grad_norm": 13.059344673081393, |
| "learning_rate": 1.8375687866379988e-06, |
| "loss": 0.8757, |
| "step": 580 |
| }, |
| { |
| "epoch": 0.2089740131283158, |
| "grad_norm": 18.053152775353162, |
| "learning_rate": 1.8369315889582481e-06, |
| "loss": 0.772, |
| "step": 581 |
| }, |
| { |
| "epoch": 0.20933369301321825, |
| "grad_norm": 9.52888367295666, |
| "learning_rate": 1.8362932548296813e-06, |
| "loss": 0.8515, |
| "step": 582 |
| }, |
| { |
| "epoch": 0.20969337289812068, |
| "grad_norm": 23.13394593879355, |
| "learning_rate": 1.8356537851190761e-06, |
| "loss": 0.8452, |
| "step": 583 |
| }, |
| { |
| "epoch": 0.2100530527830231, |
| "grad_norm": 10.933333046608094, |
| "learning_rate": 1.8350131806947533e-06, |
| "loss": 0.8864, |
| "step": 584 |
| }, |
| { |
| "epoch": 0.21041273266792554, |
| "grad_norm": 7.363294631767518, |
| "learning_rate": 1.834371442426574e-06, |
| "loss": 0.7625, |
| "step": 585 |
| }, |
| { |
| "epoch": 0.21077241255282797, |
| "grad_norm": 25.090864142853984, |
| "learning_rate": 1.833728571185938e-06, |
| "loss": 0.8253, |
| "step": 586 |
| }, |
| { |
| "epoch": 0.21113209243773043, |
| "grad_norm": 12.565351258808075, |
| "learning_rate": 1.833084567845785e-06, |
| "loss": 0.7504, |
| "step": 587 |
| }, |
| { |
| "epoch": 0.21149177232263286, |
| "grad_norm": 16.15110531469703, |
| "learning_rate": 1.8324394332805911e-06, |
| "loss": 0.8094, |
| "step": 588 |
| }, |
| { |
| "epoch": 0.2118514522075353, |
| "grad_norm": 12.752498938310502, |
| "learning_rate": 1.8317931683663686e-06, |
| "loss": 0.7901, |
| "step": 589 |
| }, |
| { |
| "epoch": 0.21221113209243772, |
| "grad_norm": 8.229226597208003, |
| "learning_rate": 1.8311457739806645e-06, |
| "loss": 0.7494, |
| "step": 590 |
| }, |
| { |
| "epoch": 0.21257081197734018, |
| "grad_norm": 9.919964958921899, |
| "learning_rate": 1.8304972510025605e-06, |
| "loss": 0.8116, |
| "step": 591 |
| }, |
| { |
| "epoch": 0.2129304918622426, |
| "grad_norm": 9.539232699936269, |
| "learning_rate": 1.8298476003126692e-06, |
| "loss": 0.835, |
| "step": 592 |
| }, |
| { |
| "epoch": 0.21329017174714504, |
| "grad_norm": 10.808501663044773, |
| "learning_rate": 1.8291968227931357e-06, |
| "loss": 0.8389, |
| "step": 593 |
| }, |
| { |
| "epoch": 0.21364985163204747, |
| "grad_norm": 12.414953510669138, |
| "learning_rate": 1.8285449193276348e-06, |
| "loss": 0.8508, |
| "step": 594 |
| }, |
| { |
| "epoch": 0.2140095315169499, |
| "grad_norm": 25.29400974823337, |
| "learning_rate": 1.8278918908013695e-06, |
| "loss": 0.8451, |
| "step": 595 |
| }, |
| { |
| "epoch": 0.21436921140185236, |
| "grad_norm": 22.802866918366973, |
| "learning_rate": 1.8272377381010724e-06, |
| "loss": 0.8105, |
| "step": 596 |
| }, |
| { |
| "epoch": 0.2147288912867548, |
| "grad_norm": 13.833252675358032, |
| "learning_rate": 1.8265824621150003e-06, |
| "loss": 0.8345, |
| "step": 597 |
| }, |
| { |
| "epoch": 0.21508857117165722, |
| "grad_norm": 11.973137632215282, |
| "learning_rate": 1.8259260637329367e-06, |
| "loss": 0.8041, |
| "step": 598 |
| }, |
| { |
| "epoch": 0.21544825105655965, |
| "grad_norm": 9.071656710108867, |
| "learning_rate": 1.825268543846189e-06, |
| "loss": 0.8871, |
| "step": 599 |
| }, |
| { |
| "epoch": 0.2158079309414621, |
| "grad_norm": 38.103980714930515, |
| "learning_rate": 1.824609903347587e-06, |
| "loss": 0.8215, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.21616761082636454, |
| "grad_norm": 24.149403872754025, |
| "learning_rate": 1.8239501431314825e-06, |
| "loss": 0.8871, |
| "step": 601 |
| }, |
| { |
| "epoch": 0.21652729071126697, |
| "grad_norm": 34.62534097027228, |
| "learning_rate": 1.8232892640937481e-06, |
| "loss": 0.8904, |
| "step": 602 |
| }, |
| { |
| "epoch": 0.2168869705961694, |
| "grad_norm": 8.736596796911465, |
| "learning_rate": 1.8226272671317744e-06, |
| "loss": 0.788, |
| "step": 603 |
| }, |
| { |
| "epoch": 0.21724665048107183, |
| "grad_norm": 15.23005069606993, |
| "learning_rate": 1.8219641531444712e-06, |
| "loss": 0.7981, |
| "step": 604 |
| }, |
| { |
| "epoch": 0.2176063303659743, |
| "grad_norm": 10.319790765410714, |
| "learning_rate": 1.8212999230322648e-06, |
| "loss": 0.8544, |
| "step": 605 |
| }, |
| { |
| "epoch": 0.21796601025087672, |
| "grad_norm": 15.292884480203284, |
| "learning_rate": 1.8206345776970968e-06, |
| "loss": 0.8734, |
| "step": 606 |
| }, |
| { |
| "epoch": 0.21832569013577915, |
| "grad_norm": 41.4760877649855, |
| "learning_rate": 1.8199681180424231e-06, |
| "loss": 0.8189, |
| "step": 607 |
| }, |
| { |
| "epoch": 0.21868537002068159, |
| "grad_norm": 57.998102387960245, |
| "learning_rate": 1.8193005449732133e-06, |
| "loss": 0.7815, |
| "step": 608 |
| }, |
| { |
| "epoch": 0.21904504990558404, |
| "grad_norm": 14.687290384292297, |
| "learning_rate": 1.818631859395948e-06, |
| "loss": 0.8302, |
| "step": 609 |
| }, |
| { |
| "epoch": 0.21940472979048647, |
| "grad_norm": 11.204183156820001, |
| "learning_rate": 1.817962062218619e-06, |
| "loss": 0.8424, |
| "step": 610 |
| }, |
| { |
| "epoch": 0.2197644096753889, |
| "grad_norm": 19.133163995291312, |
| "learning_rate": 1.8172911543507274e-06, |
| "loss": 0.7586, |
| "step": 611 |
| }, |
| { |
| "epoch": 0.22012408956029134, |
| "grad_norm": 10.37788675490883, |
| "learning_rate": 1.8166191367032826e-06, |
| "loss": 0.8972, |
| "step": 612 |
| }, |
| { |
| "epoch": 0.22048376944519377, |
| "grad_norm": 16.856991399854, |
| "learning_rate": 1.8159460101888012e-06, |
| "loss": 0.9049, |
| "step": 613 |
| }, |
| { |
| "epoch": 0.22084344933009623, |
| "grad_norm": 9.580492105108986, |
| "learning_rate": 1.815271775721304e-06, |
| "loss": 0.8372, |
| "step": 614 |
| }, |
| { |
| "epoch": 0.22120312921499866, |
| "grad_norm": 10.686207643013471, |
| "learning_rate": 1.8145964342163186e-06, |
| "loss": 0.8238, |
| "step": 615 |
| }, |
| { |
| "epoch": 0.2215628090999011, |
| "grad_norm": 77.3835210722144, |
| "learning_rate": 1.813919986590874e-06, |
| "loss": 0.7959, |
| "step": 616 |
| }, |
| { |
| "epoch": 0.22192248898480352, |
| "grad_norm": 15.988387697616838, |
| "learning_rate": 1.813242433763502e-06, |
| "loss": 0.8358, |
| "step": 617 |
| }, |
| { |
| "epoch": 0.22228216886970595, |
| "grad_norm": 10.826804108361445, |
| "learning_rate": 1.812563776654235e-06, |
| "loss": 0.8695, |
| "step": 618 |
| }, |
| { |
| "epoch": 0.2226418487546084, |
| "grad_norm": 27.447521435342345, |
| "learning_rate": 1.8118840161846047e-06, |
| "loss": 0.8576, |
| "step": 619 |
| }, |
| { |
| "epoch": 0.22300152863951084, |
| "grad_norm": 11.387854391226709, |
| "learning_rate": 1.811203153277641e-06, |
| "loss": 0.8884, |
| "step": 620 |
| }, |
| { |
| "epoch": 0.22336120852441327, |
| "grad_norm": 22.688520547371507, |
| "learning_rate": 1.8105211888578706e-06, |
| "loss": 0.8684, |
| "step": 621 |
| }, |
| { |
| "epoch": 0.2237208884093157, |
| "grad_norm": 19.7224020082186, |
| "learning_rate": 1.809838123851317e-06, |
| "loss": 0.8326, |
| "step": 622 |
| }, |
| { |
| "epoch": 0.22408056829421816, |
| "grad_norm": 10.103024522841077, |
| "learning_rate": 1.8091539591854968e-06, |
| "loss": 0.814, |
| "step": 623 |
| }, |
| { |
| "epoch": 0.2244402481791206, |
| "grad_norm": 12.91559714015716, |
| "learning_rate": 1.8084686957894205e-06, |
| "loss": 0.853, |
| "step": 624 |
| }, |
| { |
| "epoch": 0.22479992806402302, |
| "grad_norm": 8.383315946503124, |
| "learning_rate": 1.8077823345935903e-06, |
| "loss": 0.8612, |
| "step": 625 |
| }, |
| { |
| "epoch": 0.22515960794892545, |
| "grad_norm": 15.542093256359411, |
| "learning_rate": 1.8070948765299994e-06, |
| "loss": 0.8629, |
| "step": 626 |
| }, |
| { |
| "epoch": 0.22551928783382788, |
| "grad_norm": 19.90685107530346, |
| "learning_rate": 1.8064063225321303e-06, |
| "loss": 0.766, |
| "step": 627 |
| }, |
| { |
| "epoch": 0.22587896771873034, |
| "grad_norm": 9.54510925955737, |
| "learning_rate": 1.805716673534953e-06, |
| "loss": 0.8849, |
| "step": 628 |
| }, |
| { |
| "epoch": 0.22623864760363277, |
| "grad_norm": 14.31665729605147, |
| "learning_rate": 1.8050259304749251e-06, |
| "loss": 0.8654, |
| "step": 629 |
| }, |
| { |
| "epoch": 0.2265983274885352, |
| "grad_norm": 16.528827262123798, |
| "learning_rate": 1.8043340942899902e-06, |
| "loss": 0.7675, |
| "step": 630 |
| }, |
| { |
| "epoch": 0.22695800737343763, |
| "grad_norm": 10.048294889909558, |
| "learning_rate": 1.8036411659195749e-06, |
| "loss": 0.8595, |
| "step": 631 |
| }, |
| { |
| "epoch": 0.2273176872583401, |
| "grad_norm": 28.43706659648747, |
| "learning_rate": 1.80294714630459e-06, |
| "loss": 0.8223, |
| "step": 632 |
| }, |
| { |
| "epoch": 0.22767736714324252, |
| "grad_norm": 71.59101307653549, |
| "learning_rate": 1.8022520363874273e-06, |
| "loss": 0.8443, |
| "step": 633 |
| }, |
| { |
| "epoch": 0.22803704702814495, |
| "grad_norm": 10.0449247754985, |
| "learning_rate": 1.8015558371119602e-06, |
| "loss": 0.7915, |
| "step": 634 |
| }, |
| { |
| "epoch": 0.22839672691304738, |
| "grad_norm": 13.741192708054545, |
| "learning_rate": 1.8008585494235396e-06, |
| "loss": 0.834, |
| "step": 635 |
| }, |
| { |
| "epoch": 0.2287564067979498, |
| "grad_norm": 9.262015952475151, |
| "learning_rate": 1.8001601742689957e-06, |
| "loss": 0.8289, |
| "step": 636 |
| }, |
| { |
| "epoch": 0.22911608668285227, |
| "grad_norm": 10.601838370846096, |
| "learning_rate": 1.7994607125966353e-06, |
| "loss": 0.7977, |
| "step": 637 |
| }, |
| { |
| "epoch": 0.2294757665677547, |
| "grad_norm": 15.376559229012283, |
| "learning_rate": 1.7987601653562399e-06, |
| "loss": 0.797, |
| "step": 638 |
| }, |
| { |
| "epoch": 0.22983544645265713, |
| "grad_norm": 30.38370495537623, |
| "learning_rate": 1.798058533499065e-06, |
| "loss": 0.9089, |
| "step": 639 |
| }, |
| { |
| "epoch": 0.23019512633755956, |
| "grad_norm": 9.21023757987484, |
| "learning_rate": 1.79735581797784e-06, |
| "loss": 0.8337, |
| "step": 640 |
| }, |
| { |
| "epoch": 0.23055480622246202, |
| "grad_norm": 13.636729599543848, |
| "learning_rate": 1.7966520197467646e-06, |
| "loss": 0.8558, |
| "step": 641 |
| }, |
| { |
| "epoch": 0.23091448610736445, |
| "grad_norm": 9.349057503980244, |
| "learning_rate": 1.795947139761509e-06, |
| "loss": 0.7809, |
| "step": 642 |
| }, |
| { |
| "epoch": 0.23127416599226688, |
| "grad_norm": 11.595908969999318, |
| "learning_rate": 1.7952411789792123e-06, |
| "loss": 0.8439, |
| "step": 643 |
| }, |
| { |
| "epoch": 0.23163384587716931, |
| "grad_norm": 10.612729019075717, |
| "learning_rate": 1.7945341383584816e-06, |
| "loss": 0.8131, |
| "step": 644 |
| }, |
| { |
| "epoch": 0.23199352576207175, |
| "grad_norm": 24.365221574146226, |
| "learning_rate": 1.7938260188593901e-06, |
| "loss": 0.8217, |
| "step": 645 |
| }, |
| { |
| "epoch": 0.2323532056469742, |
| "grad_norm": 9.952610882034666, |
| "learning_rate": 1.7931168214434753e-06, |
| "loss": 0.9276, |
| "step": 646 |
| }, |
| { |
| "epoch": 0.23271288553187663, |
| "grad_norm": 11.51841503044773, |
| "learning_rate": 1.7924065470737396e-06, |
| "loss": 0.8442, |
| "step": 647 |
| }, |
| { |
| "epoch": 0.23307256541677907, |
| "grad_norm": 18.199580289712692, |
| "learning_rate": 1.7916951967146464e-06, |
| "loss": 0.817, |
| "step": 648 |
| }, |
| { |
| "epoch": 0.2334322453016815, |
| "grad_norm": 21.40225469340493, |
| "learning_rate": 1.7909827713321214e-06, |
| "loss": 0.8596, |
| "step": 649 |
| }, |
| { |
| "epoch": 0.23379192518658393, |
| "grad_norm": 9.322355475761332, |
| "learning_rate": 1.7902692718935493e-06, |
| "loss": 0.7432, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.23415160507148639, |
| "grad_norm": 42.78840005902247, |
| "learning_rate": 1.7895546993677733e-06, |
| "loss": 0.8617, |
| "step": 651 |
| }, |
| { |
| "epoch": 0.23451128495638882, |
| "grad_norm": 12.912124674274, |
| "learning_rate": 1.788839054725094e-06, |
| "loss": 0.8521, |
| "step": 652 |
| }, |
| { |
| "epoch": 0.23487096484129125, |
| "grad_norm": 26.057742118486228, |
| "learning_rate": 1.7881223389372676e-06, |
| "loss": 0.7674, |
| "step": 653 |
| }, |
| { |
| "epoch": 0.23523064472619368, |
| "grad_norm": 9.457050216997986, |
| "learning_rate": 1.787404552977505e-06, |
| "loss": 0.868, |
| "step": 654 |
| }, |
| { |
| "epoch": 0.23559032461109614, |
| "grad_norm": 48.60675393536446, |
| "learning_rate": 1.7866856978204697e-06, |
| "loss": 0.8065, |
| "step": 655 |
| }, |
| { |
| "epoch": 0.23595000449599857, |
| "grad_norm": 8.940770729593302, |
| "learning_rate": 1.785965774442278e-06, |
| "loss": 0.8632, |
| "step": 656 |
| }, |
| { |
| "epoch": 0.236309684380901, |
| "grad_norm": 30.24633174828573, |
| "learning_rate": 1.7852447838204956e-06, |
| "loss": 0.7572, |
| "step": 657 |
| }, |
| { |
| "epoch": 0.23666936426580343, |
| "grad_norm": 28.318681389494447, |
| "learning_rate": 1.7845227269341383e-06, |
| "loss": 0.8503, |
| "step": 658 |
| }, |
| { |
| "epoch": 0.23702904415070586, |
| "grad_norm": 27.320400067616706, |
| "learning_rate": 1.7837996047636695e-06, |
| "loss": 0.8268, |
| "step": 659 |
| }, |
| { |
| "epoch": 0.23738872403560832, |
| "grad_norm": 24.962246438076164, |
| "learning_rate": 1.7830754182909985e-06, |
| "loss": 0.9086, |
| "step": 660 |
| }, |
| { |
| "epoch": 0.23774840392051075, |
| "grad_norm": 9.45182507885013, |
| "learning_rate": 1.7823501684994804e-06, |
| "loss": 0.8342, |
| "step": 661 |
| }, |
| { |
| "epoch": 0.23810808380541318, |
| "grad_norm": 9.158151559844153, |
| "learning_rate": 1.7816238563739144e-06, |
| "loss": 0.8722, |
| "step": 662 |
| }, |
| { |
| "epoch": 0.2384677636903156, |
| "grad_norm": 8.873008087691057, |
| "learning_rate": 1.7808964829005414e-06, |
| "loss": 0.897, |
| "step": 663 |
| }, |
| { |
| "epoch": 0.23882744357521807, |
| "grad_norm": 16.74472931542851, |
| "learning_rate": 1.7801680490670447e-06, |
| "loss": 0.905, |
| "step": 664 |
| }, |
| { |
| "epoch": 0.2391871234601205, |
| "grad_norm": 15.274376064023013, |
| "learning_rate": 1.779438555862546e-06, |
| "loss": 0.9388, |
| "step": 665 |
| }, |
| { |
| "epoch": 0.23954680334502293, |
| "grad_norm": 8.126377106569372, |
| "learning_rate": 1.7787080042776062e-06, |
| "loss": 0.803, |
| "step": 666 |
| }, |
| { |
| "epoch": 0.23990648322992536, |
| "grad_norm": 19.720922442725232, |
| "learning_rate": 1.7779763953042237e-06, |
| "loss": 0.8114, |
| "step": 667 |
| }, |
| { |
| "epoch": 0.2402661631148278, |
| "grad_norm": 6.70288102967279, |
| "learning_rate": 1.777243729935832e-06, |
| "loss": 0.8596, |
| "step": 668 |
| }, |
| { |
| "epoch": 0.24062584299973025, |
| "grad_norm": 12.445261188563348, |
| "learning_rate": 1.7765100091672999e-06, |
| "loss": 0.8305, |
| "step": 669 |
| }, |
| { |
| "epoch": 0.24098552288463268, |
| "grad_norm": 14.470509216945313, |
| "learning_rate": 1.7757752339949281e-06, |
| "loss": 0.8205, |
| "step": 670 |
| }, |
| { |
| "epoch": 0.2413452027695351, |
| "grad_norm": 75.73337553871086, |
| "learning_rate": 1.77503940541645e-06, |
| "loss": 0.8631, |
| "step": 671 |
| }, |
| { |
| "epoch": 0.24170488265443754, |
| "grad_norm": 9.689768060150353, |
| "learning_rate": 1.7743025244310292e-06, |
| "loss": 0.7986, |
| "step": 672 |
| }, |
| { |
| "epoch": 0.24206456253934, |
| "grad_norm": 31.777854668635033, |
| "learning_rate": 1.7735645920392584e-06, |
| "loss": 0.8363, |
| "step": 673 |
| }, |
| { |
| "epoch": 0.24242424242424243, |
| "grad_norm": 17.7545677230147, |
| "learning_rate": 1.7728256092431574e-06, |
| "loss": 0.8747, |
| "step": 674 |
| }, |
| { |
| "epoch": 0.24278392230914486, |
| "grad_norm": 13.33007316226056, |
| "learning_rate": 1.772085577046173e-06, |
| "loss": 0.9042, |
| "step": 675 |
| }, |
| { |
| "epoch": 0.2431436021940473, |
| "grad_norm": 10.22093374125928, |
| "learning_rate": 1.771344496453177e-06, |
| "loss": 0.8308, |
| "step": 676 |
| }, |
| { |
| "epoch": 0.24350328207894972, |
| "grad_norm": 41.301615171234594, |
| "learning_rate": 1.770602368470464e-06, |
| "loss": 0.8183, |
| "step": 677 |
| }, |
| { |
| "epoch": 0.24386296196385218, |
| "grad_norm": 10.371878412263237, |
| "learning_rate": 1.7698591941057516e-06, |
| "loss": 0.8899, |
| "step": 678 |
| }, |
| { |
| "epoch": 0.2442226418487546, |
| "grad_norm": 39.69708829055009, |
| "learning_rate": 1.7691149743681782e-06, |
| "loss": 0.8367, |
| "step": 679 |
| }, |
| { |
| "epoch": 0.24458232173365704, |
| "grad_norm": 25.888723986232677, |
| "learning_rate": 1.768369710268301e-06, |
| "loss": 0.8365, |
| "step": 680 |
| }, |
| { |
| "epoch": 0.24494200161855947, |
| "grad_norm": 18.206774975675085, |
| "learning_rate": 1.767623402818096e-06, |
| "loss": 0.8885, |
| "step": 681 |
| }, |
| { |
| "epoch": 0.24530168150346193, |
| "grad_norm": 8.935330841122331, |
| "learning_rate": 1.766876053030956e-06, |
| "loss": 0.903, |
| "step": 682 |
| }, |
| { |
| "epoch": 0.24566136138836436, |
| "grad_norm": 33.95519093624401, |
| "learning_rate": 1.7661276619216885e-06, |
| "loss": 0.7565, |
| "step": 683 |
| }, |
| { |
| "epoch": 0.2460210412732668, |
| "grad_norm": 11.177340777055564, |
| "learning_rate": 1.7653782305065156e-06, |
| "loss": 0.85, |
| "step": 684 |
| }, |
| { |
| "epoch": 0.24638072115816922, |
| "grad_norm": 12.859137623323068, |
| "learning_rate": 1.7646277598030715e-06, |
| "loss": 0.7715, |
| "step": 685 |
| }, |
| { |
| "epoch": 0.24674040104307166, |
| "grad_norm": 7.650967682616375, |
| "learning_rate": 1.7638762508304023e-06, |
| "loss": 0.7208, |
| "step": 686 |
| }, |
| { |
| "epoch": 0.24710008092797411, |
| "grad_norm": 10.767439185965463, |
| "learning_rate": 1.7631237046089632e-06, |
| "loss": 0.8317, |
| "step": 687 |
| }, |
| { |
| "epoch": 0.24745976081287654, |
| "grad_norm": 11.672712327778948, |
| "learning_rate": 1.7623701221606187e-06, |
| "loss": 0.7783, |
| "step": 688 |
| }, |
| { |
| "epoch": 0.24781944069777898, |
| "grad_norm": 16.700574370186114, |
| "learning_rate": 1.7616155045086392e-06, |
| "loss": 0.8823, |
| "step": 689 |
| }, |
| { |
| "epoch": 0.2481791205826814, |
| "grad_norm": 10.467456780761605, |
| "learning_rate": 1.7608598526777017e-06, |
| "loss": 0.8705, |
| "step": 690 |
| }, |
| { |
| "epoch": 0.24853880046758384, |
| "grad_norm": 39.549181218877564, |
| "learning_rate": 1.7601031676938875e-06, |
| "loss": 0.8143, |
| "step": 691 |
| }, |
| { |
| "epoch": 0.2488984803524863, |
| "grad_norm": 12.8898510426591, |
| "learning_rate": 1.7593454505846803e-06, |
| "loss": 0.7451, |
| "step": 692 |
| }, |
| { |
| "epoch": 0.24925816023738873, |
| "grad_norm": 21.893261282880133, |
| "learning_rate": 1.7585867023789655e-06, |
| "loss": 0.8776, |
| "step": 693 |
| }, |
| { |
| "epoch": 0.24961784012229116, |
| "grad_norm": 16.328323744615236, |
| "learning_rate": 1.7578269241070287e-06, |
| "loss": 0.8591, |
| "step": 694 |
| }, |
| { |
| "epoch": 0.2499775200071936, |
| "grad_norm": 21.12995839406831, |
| "learning_rate": 1.7570661168005541e-06, |
| "loss": 0.8083, |
| "step": 695 |
| }, |
| { |
| "epoch": 0.250337199892096, |
| "grad_norm": 15.584874342335457, |
| "learning_rate": 1.7563042814926233e-06, |
| "loss": 0.8413, |
| "step": 696 |
| }, |
| { |
| "epoch": 0.25069687977699845, |
| "grad_norm": 298.07947482680925, |
| "learning_rate": 1.7555414192177137e-06, |
| "loss": 0.8321, |
| "step": 697 |
| }, |
| { |
| "epoch": 0.25105655966190094, |
| "grad_norm": 16.30630259438712, |
| "learning_rate": 1.754777531011697e-06, |
| "loss": 0.8303, |
| "step": 698 |
| }, |
| { |
| "epoch": 0.25141623954680337, |
| "grad_norm": 15.99104046759504, |
| "learning_rate": 1.7540126179118384e-06, |
| "loss": 0.8423, |
| "step": 699 |
| }, |
| { |
| "epoch": 0.2517759194317058, |
| "grad_norm": 7.132549295527994, |
| "learning_rate": 1.7532466809567948e-06, |
| "loss": 0.8063, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.2521355993166082, |
| "grad_norm": 12.305338193314348, |
| "learning_rate": 1.7524797211866126e-06, |
| "loss": 0.8488, |
| "step": 701 |
| }, |
| { |
| "epoch": 0.25249527920151066, |
| "grad_norm": 8.474168280891881, |
| "learning_rate": 1.751711739642728e-06, |
| "loss": 0.7771, |
| "step": 702 |
| }, |
| { |
| "epoch": 0.2528549590864131, |
| "grad_norm": 8.74400685739328, |
| "learning_rate": 1.7509427373679642e-06, |
| "loss": 0.8312, |
| "step": 703 |
| }, |
| { |
| "epoch": 0.2532146389713155, |
| "grad_norm": 18.048435642397564, |
| "learning_rate": 1.7501727154065303e-06, |
| "loss": 0.799, |
| "step": 704 |
| }, |
| { |
| "epoch": 0.25357431885621795, |
| "grad_norm": 7.860827866052275, |
| "learning_rate": 1.7494016748040203e-06, |
| "loss": 0.8036, |
| "step": 705 |
| }, |
| { |
| "epoch": 0.2539339987411204, |
| "grad_norm": 9.074872447593906, |
| "learning_rate": 1.7486296166074115e-06, |
| "loss": 0.8129, |
| "step": 706 |
| }, |
| { |
| "epoch": 0.2542936786260228, |
| "grad_norm": 8.108719393583089, |
| "learning_rate": 1.747856541865062e-06, |
| "loss": 0.9203, |
| "step": 707 |
| }, |
| { |
| "epoch": 0.2546533585109253, |
| "grad_norm": 25.11094384341223, |
| "learning_rate": 1.7470824516267122e-06, |
| "loss": 0.8529, |
| "step": 708 |
| }, |
| { |
| "epoch": 0.25501303839582773, |
| "grad_norm": 15.553324645409731, |
| "learning_rate": 1.746307346943479e-06, |
| "loss": 0.8235, |
| "step": 709 |
| }, |
| { |
| "epoch": 0.25537271828073016, |
| "grad_norm": 16.569294273110824, |
| "learning_rate": 1.7455312288678586e-06, |
| "loss": 0.8668, |
| "step": 710 |
| }, |
| { |
| "epoch": 0.2557323981656326, |
| "grad_norm": 21.109256363359002, |
| "learning_rate": 1.7447540984537222e-06, |
| "loss": 0.8766, |
| "step": 711 |
| }, |
| { |
| "epoch": 0.256092078050535, |
| "grad_norm": 10.89201806284621, |
| "learning_rate": 1.7439759567563167e-06, |
| "loss": 0.8322, |
| "step": 712 |
| }, |
| { |
| "epoch": 0.25645175793543745, |
| "grad_norm": 10.637309913168227, |
| "learning_rate": 1.7431968048322615e-06, |
| "loss": 0.8087, |
| "step": 713 |
| }, |
| { |
| "epoch": 0.2568114378203399, |
| "grad_norm": 51.93925558999249, |
| "learning_rate": 1.742416643739547e-06, |
| "loss": 0.8422, |
| "step": 714 |
| }, |
| { |
| "epoch": 0.2571711177052423, |
| "grad_norm": 15.458855078161623, |
| "learning_rate": 1.7416354745375355e-06, |
| "loss": 0.8765, |
| "step": 715 |
| }, |
| { |
| "epoch": 0.25753079759014474, |
| "grad_norm": 9.67024721977002, |
| "learning_rate": 1.7408532982869573e-06, |
| "loss": 0.7901, |
| "step": 716 |
| }, |
| { |
| "epoch": 0.25789047747504723, |
| "grad_norm": 19.090486922876156, |
| "learning_rate": 1.7400701160499102e-06, |
| "loss": 0.6879, |
| "step": 717 |
| }, |
| { |
| "epoch": 0.25825015735994966, |
| "grad_norm": 18.300311821518722, |
| "learning_rate": 1.7392859288898585e-06, |
| "loss": 0.8627, |
| "step": 718 |
| }, |
| { |
| "epoch": 0.2586098372448521, |
| "grad_norm": 36.93267430999449, |
| "learning_rate": 1.73850073787163e-06, |
| "loss": 0.8248, |
| "step": 719 |
| }, |
| { |
| "epoch": 0.2589695171297545, |
| "grad_norm": 15.184990783948566, |
| "learning_rate": 1.7377145440614162e-06, |
| "loss": 0.9503, |
| "step": 720 |
| }, |
| { |
| "epoch": 0.25932919701465695, |
| "grad_norm": 9.000596816529308, |
| "learning_rate": 1.7369273485267712e-06, |
| "loss": 0.7975, |
| "step": 721 |
| }, |
| { |
| "epoch": 0.2596888768995594, |
| "grad_norm": 9.340713980030525, |
| "learning_rate": 1.7361391523366079e-06, |
| "loss": 0.8181, |
| "step": 722 |
| }, |
| { |
| "epoch": 0.2600485567844618, |
| "grad_norm": 10.548205661898189, |
| "learning_rate": 1.7353499565611984e-06, |
| "loss": 0.818, |
| "step": 723 |
| }, |
| { |
| "epoch": 0.26040823666936425, |
| "grad_norm": 13.929969767317461, |
| "learning_rate": 1.7345597622721727e-06, |
| "loss": 0.8653, |
| "step": 724 |
| }, |
| { |
| "epoch": 0.2607679165542667, |
| "grad_norm": 9.336168977450368, |
| "learning_rate": 1.7337685705425156e-06, |
| "loss": 0.7825, |
| "step": 725 |
| }, |
| { |
| "epoch": 0.26112759643916916, |
| "grad_norm": 10.31428096377927, |
| "learning_rate": 1.7329763824465673e-06, |
| "loss": 0.8273, |
| "step": 726 |
| }, |
| { |
| "epoch": 0.2614872763240716, |
| "grad_norm": 34.68010377643249, |
| "learning_rate": 1.7321831990600204e-06, |
| "loss": 0.8133, |
| "step": 727 |
| }, |
| { |
| "epoch": 0.261846956208974, |
| "grad_norm": 18.793582469750675, |
| "learning_rate": 1.7313890214599191e-06, |
| "loss": 0.8366, |
| "step": 728 |
| }, |
| { |
| "epoch": 0.26220663609387646, |
| "grad_norm": 13.557344626433261, |
| "learning_rate": 1.7305938507246576e-06, |
| "loss": 0.8497, |
| "step": 729 |
| }, |
| { |
| "epoch": 0.2625663159787789, |
| "grad_norm": 12.896197123309461, |
| "learning_rate": 1.7297976879339787e-06, |
| "loss": 0.8175, |
| "step": 730 |
| }, |
| { |
| "epoch": 0.2629259958636813, |
| "grad_norm": 8.93948575659342, |
| "learning_rate": 1.7290005341689722e-06, |
| "loss": 0.725, |
| "step": 731 |
| }, |
| { |
| "epoch": 0.26328567574858375, |
| "grad_norm": 10.865900403804043, |
| "learning_rate": 1.728202390512074e-06, |
| "loss": 0.8266, |
| "step": 732 |
| }, |
| { |
| "epoch": 0.2636453556334862, |
| "grad_norm": 12.367235565753614, |
| "learning_rate": 1.727403258047063e-06, |
| "loss": 0.7802, |
| "step": 733 |
| }, |
| { |
| "epoch": 0.2640050355183886, |
| "grad_norm": 13.355752507775879, |
| "learning_rate": 1.7266031378590623e-06, |
| "loss": 0.8084, |
| "step": 734 |
| }, |
| { |
| "epoch": 0.2643647154032911, |
| "grad_norm": 15.835476175252543, |
| "learning_rate": 1.7258020310345347e-06, |
| "loss": 0.8823, |
| "step": 735 |
| }, |
| { |
| "epoch": 0.2647243952881935, |
| "grad_norm": 15.452252506761473, |
| "learning_rate": 1.7249999386612841e-06, |
| "loss": 0.8159, |
| "step": 736 |
| }, |
| { |
| "epoch": 0.26508407517309596, |
| "grad_norm": 16.121510751539834, |
| "learning_rate": 1.7241968618284517e-06, |
| "loss": 0.9353, |
| "step": 737 |
| }, |
| { |
| "epoch": 0.2654437550579984, |
| "grad_norm": 9.91924725908932, |
| "learning_rate": 1.7233928016265157e-06, |
| "loss": 0.8034, |
| "step": 738 |
| }, |
| { |
| "epoch": 0.2658034349429008, |
| "grad_norm": 30.814627689517852, |
| "learning_rate": 1.7225877591472897e-06, |
| "loss": 0.899, |
| "step": 739 |
| }, |
| { |
| "epoch": 0.26616311482780325, |
| "grad_norm": 11.583573012041843, |
| "learning_rate": 1.721781735483921e-06, |
| "loss": 0.8618, |
| "step": 740 |
| }, |
| { |
| "epoch": 0.2665227947127057, |
| "grad_norm": 22.63758120139141, |
| "learning_rate": 1.7209747317308895e-06, |
| "loss": 0.87, |
| "step": 741 |
| }, |
| { |
| "epoch": 0.2668824745976081, |
| "grad_norm": 10.887909712467465, |
| "learning_rate": 1.7201667489840057e-06, |
| "loss": 0.794, |
| "step": 742 |
| }, |
| { |
| "epoch": 0.26724215448251054, |
| "grad_norm": 13.833672774372257, |
| "learning_rate": 1.7193577883404096e-06, |
| "loss": 0.8163, |
| "step": 743 |
| }, |
| { |
| "epoch": 0.267601834367413, |
| "grad_norm": 10.67679191166491, |
| "learning_rate": 1.7185478508985686e-06, |
| "loss": 0.8903, |
| "step": 744 |
| }, |
| { |
| "epoch": 0.26796151425231546, |
| "grad_norm": 19.764225276756495, |
| "learning_rate": 1.7177369377582774e-06, |
| "loss": 0.8141, |
| "step": 745 |
| }, |
| { |
| "epoch": 0.2683211941372179, |
| "grad_norm": 7.453247870573237, |
| "learning_rate": 1.7169250500206543e-06, |
| "loss": 0.7496, |
| "step": 746 |
| }, |
| { |
| "epoch": 0.2686808740221203, |
| "grad_norm": 11.858782522727958, |
| "learning_rate": 1.7161121887881423e-06, |
| "loss": 0.8056, |
| "step": 747 |
| }, |
| { |
| "epoch": 0.26904055390702275, |
| "grad_norm": 562.7981120857124, |
| "learning_rate": 1.715298355164505e-06, |
| "loss": 0.7878, |
| "step": 748 |
| }, |
| { |
| "epoch": 0.2694002337919252, |
| "grad_norm": 29.610471689590998, |
| "learning_rate": 1.7144835502548278e-06, |
| "loss": 0.817, |
| "step": 749 |
| }, |
| { |
| "epoch": 0.2697599136768276, |
| "grad_norm": 9.888278314422525, |
| "learning_rate": 1.713667775165514e-06, |
| "loss": 0.8582, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.27011959356173004, |
| "grad_norm": 18.158214855130225, |
| "learning_rate": 1.7128510310042842e-06, |
| "loss": 0.7803, |
| "step": 751 |
| }, |
| { |
| "epoch": 0.2704792734466325, |
| "grad_norm": 13.538079639632755, |
| "learning_rate": 1.7120333188801755e-06, |
| "loss": 0.8486, |
| "step": 752 |
| }, |
| { |
| "epoch": 0.27083895333153496, |
| "grad_norm": 11.011178771660498, |
| "learning_rate": 1.711214639903539e-06, |
| "loss": 0.8576, |
| "step": 753 |
| }, |
| { |
| "epoch": 0.2711986332164374, |
| "grad_norm": 19.191293483671636, |
| "learning_rate": 1.7103949951860388e-06, |
| "loss": 0.8468, |
| "step": 754 |
| }, |
| { |
| "epoch": 0.2715583131013398, |
| "grad_norm": 31.547073310234587, |
| "learning_rate": 1.7095743858406504e-06, |
| "loss": 0.8026, |
| "step": 755 |
| }, |
| { |
| "epoch": 0.27191799298624225, |
| "grad_norm": 12.222187320095932, |
| "learning_rate": 1.7087528129816589e-06, |
| "loss": 0.7992, |
| "step": 756 |
| }, |
| { |
| "epoch": 0.2722776728711447, |
| "grad_norm": 10.245665096008041, |
| "learning_rate": 1.7079302777246577e-06, |
| "loss": 0.7533, |
| "step": 757 |
| }, |
| { |
| "epoch": 0.2726373527560471, |
| "grad_norm": 12.281298617797551, |
| "learning_rate": 1.7071067811865474e-06, |
| "loss": 0.8466, |
| "step": 758 |
| }, |
| { |
| "epoch": 0.27299703264094954, |
| "grad_norm": 31.92308596034205, |
| "learning_rate": 1.7062823244855338e-06, |
| "loss": 0.8583, |
| "step": 759 |
| }, |
| { |
| "epoch": 0.273356712525852, |
| "grad_norm": 19.03299009694274, |
| "learning_rate": 1.705456908741126e-06, |
| "loss": 0.8752, |
| "step": 760 |
| }, |
| { |
| "epoch": 0.2737163924107544, |
| "grad_norm": 11.873500571277848, |
| "learning_rate": 1.7046305350741364e-06, |
| "loss": 0.7947, |
| "step": 761 |
| }, |
| { |
| "epoch": 0.2740760722956569, |
| "grad_norm": 8.234454258923366, |
| "learning_rate": 1.7038032046066766e-06, |
| "loss": 0.8159, |
| "step": 762 |
| }, |
| { |
| "epoch": 0.2744357521805593, |
| "grad_norm": 14.261554962936753, |
| "learning_rate": 1.7029749184621589e-06, |
| "loss": 0.8358, |
| "step": 763 |
| }, |
| { |
| "epoch": 0.27479543206546175, |
| "grad_norm": 8.924596703742441, |
| "learning_rate": 1.7021456777652925e-06, |
| "loss": 0.8722, |
| "step": 764 |
| }, |
| { |
| "epoch": 0.2751551119503642, |
| "grad_norm": 16.004147890619333, |
| "learning_rate": 1.7013154836420828e-06, |
| "loss": 0.8113, |
| "step": 765 |
| }, |
| { |
| "epoch": 0.2755147918352666, |
| "grad_norm": 25.725102108427837, |
| "learning_rate": 1.7004843372198306e-06, |
| "loss": 0.8038, |
| "step": 766 |
| }, |
| { |
| "epoch": 0.27587447172016905, |
| "grad_norm": 20.592545691085085, |
| "learning_rate": 1.6996522396271282e-06, |
| "loss": 0.8962, |
| "step": 767 |
| }, |
| { |
| "epoch": 0.2762341516050715, |
| "grad_norm": 14.580869058386387, |
| "learning_rate": 1.6988191919938614e-06, |
| "loss": 0.8223, |
| "step": 768 |
| }, |
| { |
| "epoch": 0.2765938314899739, |
| "grad_norm": 13.042675904203861, |
| "learning_rate": 1.6979851954512046e-06, |
| "loss": 0.7806, |
| "step": 769 |
| }, |
| { |
| "epoch": 0.27695351137487634, |
| "grad_norm": 20.874681915407717, |
| "learning_rate": 1.697150251131621e-06, |
| "loss": 0.8159, |
| "step": 770 |
| }, |
| { |
| "epoch": 0.2773131912597788, |
| "grad_norm": 18.01054025980178, |
| "learning_rate": 1.6963143601688613e-06, |
| "loss": 0.7994, |
| "step": 771 |
| }, |
| { |
| "epoch": 0.27767287114468125, |
| "grad_norm": 9.86441283566478, |
| "learning_rate": 1.6954775236979613e-06, |
| "loss": 0.7713, |
| "step": 772 |
| }, |
| { |
| "epoch": 0.2780325510295837, |
| "grad_norm": 9.603505030837669, |
| "learning_rate": 1.6946397428552403e-06, |
| "loss": 0.8715, |
| "step": 773 |
| }, |
| { |
| "epoch": 0.2783922309144861, |
| "grad_norm": 20.907997863761388, |
| "learning_rate": 1.6938010187783008e-06, |
| "loss": 0.8625, |
| "step": 774 |
| }, |
| { |
| "epoch": 0.27875191079938855, |
| "grad_norm": 19.27890273228633, |
| "learning_rate": 1.692961352606025e-06, |
| "loss": 0.8799, |
| "step": 775 |
| }, |
| { |
| "epoch": 0.279111590684291, |
| "grad_norm": 10.928377651485217, |
| "learning_rate": 1.6921207454785754e-06, |
| "loss": 0.7825, |
| "step": 776 |
| }, |
| { |
| "epoch": 0.2794712705691934, |
| "grad_norm": 10.287586539812986, |
| "learning_rate": 1.6912791985373915e-06, |
| "loss": 0.7821, |
| "step": 777 |
| }, |
| { |
| "epoch": 0.27983095045409584, |
| "grad_norm": 26.2017756984771, |
| "learning_rate": 1.6904367129251894e-06, |
| "loss": 0.8283, |
| "step": 778 |
| }, |
| { |
| "epoch": 0.28019063033899827, |
| "grad_norm": 29.916481658741684, |
| "learning_rate": 1.6895932897859595e-06, |
| "loss": 0.8102, |
| "step": 779 |
| }, |
| { |
| "epoch": 0.2805503102239007, |
| "grad_norm": 29.738609268126158, |
| "learning_rate": 1.6887489302649653e-06, |
| "loss": 0.8027, |
| "step": 780 |
| }, |
| { |
| "epoch": 0.2809099901088032, |
| "grad_norm": 14.32653397494833, |
| "learning_rate": 1.6879036355087419e-06, |
| "loss": 0.8453, |
| "step": 781 |
| }, |
| { |
| "epoch": 0.2812696699937056, |
| "grad_norm": 13.982071385865439, |
| "learning_rate": 1.6870574066650943e-06, |
| "loss": 0.8141, |
| "step": 782 |
| }, |
| { |
| "epoch": 0.28162934987860805, |
| "grad_norm": 13.895810588892795, |
| "learning_rate": 1.6862102448830953e-06, |
| "loss": 0.8058, |
| "step": 783 |
| }, |
| { |
| "epoch": 0.2819890297635105, |
| "grad_norm": 23.242023805795334, |
| "learning_rate": 1.6853621513130856e-06, |
| "loss": 0.8829, |
| "step": 784 |
| }, |
| { |
| "epoch": 0.2823487096484129, |
| "grad_norm": 22.034185469781356, |
| "learning_rate": 1.6845131271066705e-06, |
| "loss": 0.8221, |
| "step": 785 |
| }, |
| { |
| "epoch": 0.28270838953331534, |
| "grad_norm": 13.888880004280962, |
| "learning_rate": 1.683663173416719e-06, |
| "loss": 0.8347, |
| "step": 786 |
| }, |
| { |
| "epoch": 0.28306806941821777, |
| "grad_norm": 15.136351064460394, |
| "learning_rate": 1.6828122913973624e-06, |
| "loss": 0.7982, |
| "step": 787 |
| }, |
| { |
| "epoch": 0.2834277493031202, |
| "grad_norm": 17.739420272026308, |
| "learning_rate": 1.6819604822039924e-06, |
| "loss": 0.7921, |
| "step": 788 |
| }, |
| { |
| "epoch": 0.28378742918802263, |
| "grad_norm": 12.20138141629904, |
| "learning_rate": 1.6811077469932599e-06, |
| "loss": 0.8411, |
| "step": 789 |
| }, |
| { |
| "epoch": 0.2841471090729251, |
| "grad_norm": 67.77069162527476, |
| "learning_rate": 1.6802540869230727e-06, |
| "loss": 0.9363, |
| "step": 790 |
| }, |
| { |
| "epoch": 0.28450678895782755, |
| "grad_norm": 11.058784409530341, |
| "learning_rate": 1.679399503152595e-06, |
| "loss": 0.7848, |
| "step": 791 |
| }, |
| { |
| "epoch": 0.28486646884273, |
| "grad_norm": 37.726997131440754, |
| "learning_rate": 1.6785439968422456e-06, |
| "loss": 0.8963, |
| "step": 792 |
| }, |
| { |
| "epoch": 0.2852261487276324, |
| "grad_norm": 13.837868128904297, |
| "learning_rate": 1.6776875691536945e-06, |
| "loss": 0.8425, |
| "step": 793 |
| }, |
| { |
| "epoch": 0.28558582861253484, |
| "grad_norm": 10.65287112444893, |
| "learning_rate": 1.6768302212498644e-06, |
| "loss": 0.846, |
| "step": 794 |
| }, |
| { |
| "epoch": 0.2859455084974373, |
| "grad_norm": 9.740983854721788, |
| "learning_rate": 1.6759719542949267e-06, |
| "loss": 0.8351, |
| "step": 795 |
| }, |
| { |
| "epoch": 0.2863051883823397, |
| "grad_norm": 15.975826234129812, |
| "learning_rate": 1.675112769454301e-06, |
| "loss": 0.8553, |
| "step": 796 |
| }, |
| { |
| "epoch": 0.28666486826724213, |
| "grad_norm": 24.87491128009603, |
| "learning_rate": 1.6742526678946537e-06, |
| "loss": 0.8334, |
| "step": 797 |
| }, |
| { |
| "epoch": 0.28702454815214457, |
| "grad_norm": 11.849439163597227, |
| "learning_rate": 1.673391650783895e-06, |
| "loss": 0.8172, |
| "step": 798 |
| }, |
| { |
| "epoch": 0.28738422803704705, |
| "grad_norm": 34.28785479628006, |
| "learning_rate": 1.6725297192911792e-06, |
| "loss": 0.8297, |
| "step": 799 |
| }, |
| { |
| "epoch": 0.2877439079219495, |
| "grad_norm": 8.532384451679459, |
| "learning_rate": 1.6716668745869016e-06, |
| "loss": 0.7997, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.2881035878068519, |
| "grad_norm": 9.696577956984706, |
| "learning_rate": 1.670803117842698e-06, |
| "loss": 0.7694, |
| "step": 801 |
| }, |
| { |
| "epoch": 0.28846326769175434, |
| "grad_norm": 10.684746551489631, |
| "learning_rate": 1.669938450231442e-06, |
| "loss": 0.8267, |
| "step": 802 |
| }, |
| { |
| "epoch": 0.2888229475766568, |
| "grad_norm": 36.494588193321704, |
| "learning_rate": 1.6690728729272454e-06, |
| "loss": 0.8525, |
| "step": 803 |
| }, |
| { |
| "epoch": 0.2891826274615592, |
| "grad_norm": 58.7376340621734, |
| "learning_rate": 1.6682063871054532e-06, |
| "loss": 0.8488, |
| "step": 804 |
| }, |
| { |
| "epoch": 0.28954230734646164, |
| "grad_norm": 12.134256747560729, |
| "learning_rate": 1.667338993942646e-06, |
| "loss": 0.7648, |
| "step": 805 |
| }, |
| { |
| "epoch": 0.28990198723136407, |
| "grad_norm": 8.948659088851144, |
| "learning_rate": 1.6664706946166356e-06, |
| "loss": 0.7751, |
| "step": 806 |
| }, |
| { |
| "epoch": 0.2902616671162665, |
| "grad_norm": 7.572355457329241, |
| "learning_rate": 1.6656014903064638e-06, |
| "loss": 0.7932, |
| "step": 807 |
| }, |
| { |
| "epoch": 0.290621347001169, |
| "grad_norm": 16.210024820799884, |
| "learning_rate": 1.664731382192402e-06, |
| "loss": 0.7886, |
| "step": 808 |
| }, |
| { |
| "epoch": 0.2909810268860714, |
| "grad_norm": 40.53526437751432, |
| "learning_rate": 1.6638603714559487e-06, |
| "loss": 0.8116, |
| "step": 809 |
| }, |
| { |
| "epoch": 0.29134070677097385, |
| "grad_norm": 28.42826510371944, |
| "learning_rate": 1.662988459279828e-06, |
| "loss": 0.8047, |
| "step": 810 |
| }, |
| { |
| "epoch": 0.2917003866558763, |
| "grad_norm": 33.46427640754087, |
| "learning_rate": 1.6621156468479875e-06, |
| "loss": 0.8323, |
| "step": 811 |
| }, |
| { |
| "epoch": 0.2920600665407787, |
| "grad_norm": 33.90446508938218, |
| "learning_rate": 1.6612419353455986e-06, |
| "loss": 0.7955, |
| "step": 812 |
| }, |
| { |
| "epoch": 0.29241974642568114, |
| "grad_norm": 11.872234229630894, |
| "learning_rate": 1.660367325959052e-06, |
| "loss": 0.834, |
| "step": 813 |
| }, |
| { |
| "epoch": 0.29277942631058357, |
| "grad_norm": 9.539864324874296, |
| "learning_rate": 1.6594918198759585e-06, |
| "loss": 0.7772, |
| "step": 814 |
| }, |
| { |
| "epoch": 0.293139106195486, |
| "grad_norm": 26.351591824713594, |
| "learning_rate": 1.658615418285146e-06, |
| "loss": 0.8662, |
| "step": 815 |
| }, |
| { |
| "epoch": 0.29349878608038843, |
| "grad_norm": 15.695857483765826, |
| "learning_rate": 1.6577381223766589e-06, |
| "loss": 0.8019, |
| "step": 816 |
| }, |
| { |
| "epoch": 0.2938584659652909, |
| "grad_norm": 12.45686309056222, |
| "learning_rate": 1.6568599333417558e-06, |
| "loss": 0.8718, |
| "step": 817 |
| }, |
| { |
| "epoch": 0.29421814585019335, |
| "grad_norm": 16.1517356511705, |
| "learning_rate": 1.6559808523729078e-06, |
| "loss": 0.8688, |
| "step": 818 |
| }, |
| { |
| "epoch": 0.2945778257350958, |
| "grad_norm": 13.117121759539312, |
| "learning_rate": 1.6551008806637973e-06, |
| "loss": 0.8312, |
| "step": 819 |
| }, |
| { |
| "epoch": 0.2949375056199982, |
| "grad_norm": 15.3307311798555, |
| "learning_rate": 1.6542200194093167e-06, |
| "loss": 0.8336, |
| "step": 820 |
| }, |
| { |
| "epoch": 0.29529718550490064, |
| "grad_norm": 20.590254012592517, |
| "learning_rate": 1.653338269805565e-06, |
| "loss": 0.8925, |
| "step": 821 |
| }, |
| { |
| "epoch": 0.29565686538980307, |
| "grad_norm": 10.511054798493147, |
| "learning_rate": 1.6524556330498491e-06, |
| "loss": 0.7651, |
| "step": 822 |
| }, |
| { |
| "epoch": 0.2960165452747055, |
| "grad_norm": 9.442663727447568, |
| "learning_rate": 1.6515721103406794e-06, |
| "loss": 0.7767, |
| "step": 823 |
| }, |
| { |
| "epoch": 0.29637622515960793, |
| "grad_norm": 13.254710699947385, |
| "learning_rate": 1.6506877028777697e-06, |
| "loss": 0.8256, |
| "step": 824 |
| }, |
| { |
| "epoch": 0.29673590504451036, |
| "grad_norm": 14.322695950891546, |
| "learning_rate": 1.6498024118620348e-06, |
| "loss": 0.8412, |
| "step": 825 |
| }, |
| { |
| "epoch": 0.29709558492941285, |
| "grad_norm": 12.753944063299416, |
| "learning_rate": 1.6489162384955903e-06, |
| "loss": 0.8549, |
| "step": 826 |
| }, |
| { |
| "epoch": 0.2974552648143153, |
| "grad_norm": 15.385293947218466, |
| "learning_rate": 1.6480291839817487e-06, |
| "loss": 0.8415, |
| "step": 827 |
| }, |
| { |
| "epoch": 0.2978149446992177, |
| "grad_norm": 15.553950849317074, |
| "learning_rate": 1.6471412495250195e-06, |
| "loss": 0.7959, |
| "step": 828 |
| }, |
| { |
| "epoch": 0.29817462458412014, |
| "grad_norm": 59.15493918077238, |
| "learning_rate": 1.646252436331107e-06, |
| "loss": 0.8605, |
| "step": 829 |
| }, |
| { |
| "epoch": 0.29853430446902257, |
| "grad_norm": 15.926913399975538, |
| "learning_rate": 1.6453627456069093e-06, |
| "loss": 0.8099, |
| "step": 830 |
| }, |
| { |
| "epoch": 0.298893984353925, |
| "grad_norm": 10.391601367933777, |
| "learning_rate": 1.6444721785605147e-06, |
| "loss": 0.7969, |
| "step": 831 |
| }, |
| { |
| "epoch": 0.29925366423882743, |
| "grad_norm": 19.686532965429244, |
| "learning_rate": 1.6435807364012033e-06, |
| "loss": 0.8759, |
| "step": 832 |
| }, |
| { |
| "epoch": 0.29961334412372986, |
| "grad_norm": 59.95400710719536, |
| "learning_rate": 1.6426884203394416e-06, |
| "loss": 0.818, |
| "step": 833 |
| }, |
| { |
| "epoch": 0.2999730240086323, |
| "grad_norm": 8.213078614768975, |
| "learning_rate": 1.6417952315868842e-06, |
| "loss": 0.8344, |
| "step": 834 |
| }, |
| { |
| "epoch": 0.3003327038935348, |
| "grad_norm": 9.450209629692555, |
| "learning_rate": 1.6409011713563696e-06, |
| "loss": 0.7793, |
| "step": 835 |
| }, |
| { |
| "epoch": 0.3006923837784372, |
| "grad_norm": 25.223960723123003, |
| "learning_rate": 1.6400062408619206e-06, |
| "loss": 0.805, |
| "step": 836 |
| }, |
| { |
| "epoch": 0.30105206366333964, |
| "grad_norm": 11.274061295553878, |
| "learning_rate": 1.6391104413187414e-06, |
| "loss": 0.8034, |
| "step": 837 |
| }, |
| { |
| "epoch": 0.3014117435482421, |
| "grad_norm": 20.489780181155755, |
| "learning_rate": 1.638213773943216e-06, |
| "loss": 0.8294, |
| "step": 838 |
| }, |
| { |
| "epoch": 0.3017714234331445, |
| "grad_norm": 13.11232749838915, |
| "learning_rate": 1.6373162399529065e-06, |
| "loss": 0.8592, |
| "step": 839 |
| }, |
| { |
| "epoch": 0.30213110331804693, |
| "grad_norm": 34.94053578829066, |
| "learning_rate": 1.6364178405665533e-06, |
| "loss": 0.8019, |
| "step": 840 |
| }, |
| { |
| "epoch": 0.30249078320294936, |
| "grad_norm": 8.027001898160389, |
| "learning_rate": 1.6355185770040696e-06, |
| "loss": 0.7753, |
| "step": 841 |
| }, |
| { |
| "epoch": 0.3028504630878518, |
| "grad_norm": 11.5424881957026, |
| "learning_rate": 1.6346184504865442e-06, |
| "loss": 0.8159, |
| "step": 842 |
| }, |
| { |
| "epoch": 0.3032101429727542, |
| "grad_norm": 38.00803930831098, |
| "learning_rate": 1.6337174622362364e-06, |
| "loss": 0.7729, |
| "step": 843 |
| }, |
| { |
| "epoch": 0.3035698228576567, |
| "grad_norm": 11.388205546620558, |
| "learning_rate": 1.632815613476576e-06, |
| "loss": 0.842, |
| "step": 844 |
| }, |
| { |
| "epoch": 0.30392950274255914, |
| "grad_norm": 13.345383026363749, |
| "learning_rate": 1.6319129054321614e-06, |
| "loss": 0.82, |
| "step": 845 |
| }, |
| { |
| "epoch": 0.3042891826274616, |
| "grad_norm": 15.481172112555491, |
| "learning_rate": 1.6310093393287572e-06, |
| "loss": 0.7908, |
| "step": 846 |
| }, |
| { |
| "epoch": 0.304648862512364, |
| "grad_norm": 17.793364757904424, |
| "learning_rate": 1.6301049163932938e-06, |
| "loss": 0.7903, |
| "step": 847 |
| }, |
| { |
| "epoch": 0.30500854239726644, |
| "grad_norm": 9.398055804932289, |
| "learning_rate": 1.629199637853865e-06, |
| "loss": 0.8063, |
| "step": 848 |
| }, |
| { |
| "epoch": 0.30536822228216887, |
| "grad_norm": 14.577557639396854, |
| "learning_rate": 1.6282935049397266e-06, |
| "loss": 0.8735, |
| "step": 849 |
| }, |
| { |
| "epoch": 0.3057279021670713, |
| "grad_norm": 16.84760719078329, |
| "learning_rate": 1.6273865188812934e-06, |
| "loss": 0.8729, |
| "step": 850 |
| }, |
| { |
| "epoch": 0.30608758205197373, |
| "grad_norm": 87.16748970514807, |
| "learning_rate": 1.6264786809101398e-06, |
| "loss": 0.8031, |
| "step": 851 |
| }, |
| { |
| "epoch": 0.30644726193687616, |
| "grad_norm": 7.324167866399497, |
| "learning_rate": 1.6255699922589968e-06, |
| "loss": 0.8374, |
| "step": 852 |
| }, |
| { |
| "epoch": 0.3068069418217786, |
| "grad_norm": 17.704387559171618, |
| "learning_rate": 1.6246604541617503e-06, |
| "loss": 0.8714, |
| "step": 853 |
| }, |
| { |
| "epoch": 0.3071666217066811, |
| "grad_norm": 10.960718810059277, |
| "learning_rate": 1.6237500678534395e-06, |
| "loss": 0.877, |
| "step": 854 |
| }, |
| { |
| "epoch": 0.3075263015915835, |
| "grad_norm": 13.267520621820575, |
| "learning_rate": 1.622838834570256e-06, |
| "loss": 0.7507, |
| "step": 855 |
| }, |
| { |
| "epoch": 0.30788598147648594, |
| "grad_norm": 10.101131660097286, |
| "learning_rate": 1.6219267555495404e-06, |
| "loss": 0.8485, |
| "step": 856 |
| }, |
| { |
| "epoch": 0.30824566136138837, |
| "grad_norm": 28.91401873395612, |
| "learning_rate": 1.6210138320297832e-06, |
| "loss": 0.8341, |
| "step": 857 |
| }, |
| { |
| "epoch": 0.3086053412462908, |
| "grad_norm": 9.10947963373033, |
| "learning_rate": 1.62010006525062e-06, |
| "loss": 0.7946, |
| "step": 858 |
| }, |
| { |
| "epoch": 0.30896502113119323, |
| "grad_norm": 29.423941116569043, |
| "learning_rate": 1.619185456452833e-06, |
| "loss": 0.8638, |
| "step": 859 |
| }, |
| { |
| "epoch": 0.30932470101609566, |
| "grad_norm": 70.7487803817577, |
| "learning_rate": 1.6182700068783461e-06, |
| "loss": 0.78, |
| "step": 860 |
| }, |
| { |
| "epoch": 0.3096843809009981, |
| "grad_norm": 11.862012856538149, |
| "learning_rate": 1.6173537177702264e-06, |
| "loss": 0.8702, |
| "step": 861 |
| }, |
| { |
| "epoch": 0.3100440607859005, |
| "grad_norm": 42.081334101558895, |
| "learning_rate": 1.6164365903726802e-06, |
| "loss": 0.8349, |
| "step": 862 |
| }, |
| { |
| "epoch": 0.310403740670803, |
| "grad_norm": 20.841721919283344, |
| "learning_rate": 1.615518625931052e-06, |
| "loss": 0.8459, |
| "step": 863 |
| }, |
| { |
| "epoch": 0.31076342055570544, |
| "grad_norm": 18.631068109566076, |
| "learning_rate": 1.6145998256918235e-06, |
| "loss": 0.8353, |
| "step": 864 |
| }, |
| { |
| "epoch": 0.31112310044060787, |
| "grad_norm": 13.858882449564776, |
| "learning_rate": 1.613680190902611e-06, |
| "loss": 0.7288, |
| "step": 865 |
| }, |
| { |
| "epoch": 0.3114827803255103, |
| "grad_norm": 10.066126035348045, |
| "learning_rate": 1.6127597228121634e-06, |
| "loss": 0.8649, |
| "step": 866 |
| }, |
| { |
| "epoch": 0.31184246021041273, |
| "grad_norm": 28.67784112319774, |
| "learning_rate": 1.611838422670362e-06, |
| "loss": 0.8838, |
| "step": 867 |
| }, |
| { |
| "epoch": 0.31220214009531516, |
| "grad_norm": 8.108908627388885, |
| "learning_rate": 1.610916291728218e-06, |
| "loss": 0.8829, |
| "step": 868 |
| }, |
| { |
| "epoch": 0.3125618199802176, |
| "grad_norm": 87.9045905781619, |
| "learning_rate": 1.6099933312378692e-06, |
| "loss": 0.7908, |
| "step": 869 |
| }, |
| { |
| "epoch": 0.31292149986512, |
| "grad_norm": 8.440832703255046, |
| "learning_rate": 1.6090695424525824e-06, |
| "loss": 0.8192, |
| "step": 870 |
| }, |
| { |
| "epoch": 0.31328117975002245, |
| "grad_norm": 14.108324533497566, |
| "learning_rate": 1.6081449266267466e-06, |
| "loss": 0.8439, |
| "step": 871 |
| }, |
| { |
| "epoch": 0.31364085963492494, |
| "grad_norm": 21.058597406933576, |
| "learning_rate": 1.6072194850158754e-06, |
| "loss": 0.7903, |
| "step": 872 |
| }, |
| { |
| "epoch": 0.31400053951982737, |
| "grad_norm": 17.1153669670534, |
| "learning_rate": 1.606293218876603e-06, |
| "loss": 0.8127, |
| "step": 873 |
| }, |
| { |
| "epoch": 0.3143602194047298, |
| "grad_norm": 20.063520749174305, |
| "learning_rate": 1.6053661294666831e-06, |
| "loss": 0.7895, |
| "step": 874 |
| }, |
| { |
| "epoch": 0.31471989928963223, |
| "grad_norm": 12.09554048451555, |
| "learning_rate": 1.6044382180449882e-06, |
| "loss": 0.8388, |
| "step": 875 |
| }, |
| { |
| "epoch": 0.31507957917453466, |
| "grad_norm": 9.740248851394714, |
| "learning_rate": 1.6035094858715062e-06, |
| "loss": 0.8546, |
| "step": 876 |
| }, |
| { |
| "epoch": 0.3154392590594371, |
| "grad_norm": 11.899318966703719, |
| "learning_rate": 1.6025799342073394e-06, |
| "loss": 0.8245, |
| "step": 877 |
| }, |
| { |
| "epoch": 0.3157989389443395, |
| "grad_norm": 31.6291426460199, |
| "learning_rate": 1.6016495643147035e-06, |
| "loss": 0.856, |
| "step": 878 |
| }, |
| { |
| "epoch": 0.31615861882924196, |
| "grad_norm": 8.958829848892421, |
| "learning_rate": 1.6007183774569243e-06, |
| "loss": 0.8267, |
| "step": 879 |
| }, |
| { |
| "epoch": 0.3165182987141444, |
| "grad_norm": 8.648010929262487, |
| "learning_rate": 1.5997863748984384e-06, |
| "loss": 0.8109, |
| "step": 880 |
| }, |
| { |
| "epoch": 0.3168779785990469, |
| "grad_norm": 11.004894249365195, |
| "learning_rate": 1.5988535579047886e-06, |
| "loss": 0.8167, |
| "step": 881 |
| }, |
| { |
| "epoch": 0.3172376584839493, |
| "grad_norm": 50.09024201272788, |
| "learning_rate": 1.597919927742624e-06, |
| "loss": 0.8158, |
| "step": 882 |
| }, |
| { |
| "epoch": 0.31759733836885173, |
| "grad_norm": 8.893349558410735, |
| "learning_rate": 1.5969854856796987e-06, |
| "loss": 0.8407, |
| "step": 883 |
| }, |
| { |
| "epoch": 0.31795701825375416, |
| "grad_norm": 10.592646511680684, |
| "learning_rate": 1.596050232984868e-06, |
| "loss": 0.8009, |
| "step": 884 |
| }, |
| { |
| "epoch": 0.3183166981386566, |
| "grad_norm": 10.323815809361715, |
| "learning_rate": 1.5951141709280884e-06, |
| "loss": 0.8136, |
| "step": 885 |
| }, |
| { |
| "epoch": 0.318676378023559, |
| "grad_norm": 12.20503725223687, |
| "learning_rate": 1.5941773007804163e-06, |
| "loss": 0.7926, |
| "step": 886 |
| }, |
| { |
| "epoch": 0.31903605790846146, |
| "grad_norm": 18.903991133341332, |
| "learning_rate": 1.5932396238140039e-06, |
| "loss": 0.804, |
| "step": 887 |
| }, |
| { |
| "epoch": 0.3193957377933639, |
| "grad_norm": 16.745142488988655, |
| "learning_rate": 1.5923011413020996e-06, |
| "loss": 0.8534, |
| "step": 888 |
| }, |
| { |
| "epoch": 0.3197554176782663, |
| "grad_norm": 11.82810409668907, |
| "learning_rate": 1.5913618545190466e-06, |
| "loss": 0.8231, |
| "step": 889 |
| }, |
| { |
| "epoch": 0.3201150975631688, |
| "grad_norm": 12.8017171989301, |
| "learning_rate": 1.5904217647402784e-06, |
| "loss": 0.8213, |
| "step": 890 |
| }, |
| { |
| "epoch": 0.32047477744807124, |
| "grad_norm": 45.437697875363554, |
| "learning_rate": 1.5894808732423206e-06, |
| "loss": 0.9166, |
| "step": 891 |
| }, |
| { |
| "epoch": 0.32083445733297367, |
| "grad_norm": 18.312331492175126, |
| "learning_rate": 1.5885391813027857e-06, |
| "loss": 0.8344, |
| "step": 892 |
| }, |
| { |
| "epoch": 0.3211941372178761, |
| "grad_norm": 14.817855719365022, |
| "learning_rate": 1.587596690200375e-06, |
| "loss": 0.8171, |
| "step": 893 |
| }, |
| { |
| "epoch": 0.3215538171027785, |
| "grad_norm": 16.579974562069108, |
| "learning_rate": 1.5866534012148728e-06, |
| "loss": 0.8675, |
| "step": 894 |
| }, |
| { |
| "epoch": 0.32191349698768096, |
| "grad_norm": 19.738290812053823, |
| "learning_rate": 1.5857093156271493e-06, |
| "loss": 0.8253, |
| "step": 895 |
| }, |
| { |
| "epoch": 0.3222731768725834, |
| "grad_norm": 45.64460615929968, |
| "learning_rate": 1.5847644347191543e-06, |
| "loss": 0.7757, |
| "step": 896 |
| }, |
| { |
| "epoch": 0.3226328567574858, |
| "grad_norm": 24.035059526207903, |
| "learning_rate": 1.5838187597739185e-06, |
| "loss": 0.7758, |
| "step": 897 |
| }, |
| { |
| "epoch": 0.32299253664238825, |
| "grad_norm": 8.548187185744933, |
| "learning_rate": 1.5828722920755509e-06, |
| "loss": 0.752, |
| "step": 898 |
| }, |
| { |
| "epoch": 0.32335221652729074, |
| "grad_norm": 10.573658213625091, |
| "learning_rate": 1.581925032909236e-06, |
| "loss": 0.8239, |
| "step": 899 |
| }, |
| { |
| "epoch": 0.32371189641219317, |
| "grad_norm": 12.419125746778684, |
| "learning_rate": 1.5809769835612345e-06, |
| "loss": 0.821, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.3240715762970956, |
| "grad_norm": 18.363616736374816, |
| "learning_rate": 1.5800281453188791e-06, |
| "loss": 0.8306, |
| "step": 901 |
| }, |
| { |
| "epoch": 0.32443125618199803, |
| "grad_norm": 8.317619792668513, |
| "learning_rate": 1.5790785194705736e-06, |
| "loss": 0.7871, |
| "step": 902 |
| }, |
| { |
| "epoch": 0.32479093606690046, |
| "grad_norm": 9.104270713842354, |
| "learning_rate": 1.5781281073057918e-06, |
| "loss": 0.7563, |
| "step": 903 |
| }, |
| { |
| "epoch": 0.3251506159518029, |
| "grad_norm": 8.197051622130894, |
| "learning_rate": 1.577176910115075e-06, |
| "loss": 0.8388, |
| "step": 904 |
| }, |
| { |
| "epoch": 0.3255102958367053, |
| "grad_norm": 17.408789825661064, |
| "learning_rate": 1.5762249291900303e-06, |
| "loss": 0.856, |
| "step": 905 |
| }, |
| { |
| "epoch": 0.32586997572160775, |
| "grad_norm": 8.64218462106854, |
| "learning_rate": 1.5752721658233293e-06, |
| "loss": 0.7881, |
| "step": 906 |
| }, |
| { |
| "epoch": 0.3262296556065102, |
| "grad_norm": 21.889925421685135, |
| "learning_rate": 1.574318621308706e-06, |
| "loss": 0.8433, |
| "step": 907 |
| }, |
| { |
| "epoch": 0.32658933549141267, |
| "grad_norm": 9.032394276112345, |
| "learning_rate": 1.573364296940955e-06, |
| "loss": 0.8011, |
| "step": 908 |
| }, |
| { |
| "epoch": 0.3269490153763151, |
| "grad_norm": 7.425594569946747, |
| "learning_rate": 1.5724091940159302e-06, |
| "loss": 0.8167, |
| "step": 909 |
| }, |
| { |
| "epoch": 0.32730869526121753, |
| "grad_norm": 9.657849211152683, |
| "learning_rate": 1.5714533138305417e-06, |
| "loss": 0.7789, |
| "step": 910 |
| }, |
| { |
| "epoch": 0.32766837514611996, |
| "grad_norm": 12.962398629844566, |
| "learning_rate": 1.570496657682756e-06, |
| "loss": 0.7405, |
| "step": 911 |
| }, |
| { |
| "epoch": 0.3280280550310224, |
| "grad_norm": 9.728680873151585, |
| "learning_rate": 1.5695392268715933e-06, |
| "loss": 0.8395, |
| "step": 912 |
| }, |
| { |
| "epoch": 0.3283877349159248, |
| "grad_norm": 17.36816506706582, |
| "learning_rate": 1.5685810226971245e-06, |
| "loss": 0.8606, |
| "step": 913 |
| }, |
| { |
| "epoch": 0.32874741480082725, |
| "grad_norm": 15.394141419267186, |
| "learning_rate": 1.5676220464604723e-06, |
| "loss": 0.8734, |
| "step": 914 |
| }, |
| { |
| "epoch": 0.3291070946857297, |
| "grad_norm": 20.301988586288964, |
| "learning_rate": 1.5666622994638068e-06, |
| "loss": 0.7856, |
| "step": 915 |
| }, |
| { |
| "epoch": 0.3294667745706321, |
| "grad_norm": 7.365140544006204, |
| "learning_rate": 1.5657017830103445e-06, |
| "loss": 0.7697, |
| "step": 916 |
| }, |
| { |
| "epoch": 0.3298264544555346, |
| "grad_norm": 14.840746023750308, |
| "learning_rate": 1.564740498404347e-06, |
| "loss": 0.852, |
| "step": 917 |
| }, |
| { |
| "epoch": 0.33018613434043703, |
| "grad_norm": 11.209068774761208, |
| "learning_rate": 1.5637784469511197e-06, |
| "loss": 0.8597, |
| "step": 918 |
| }, |
| { |
| "epoch": 0.33054581422533946, |
| "grad_norm": 12.600439575847025, |
| "learning_rate": 1.5628156299570078e-06, |
| "loss": 0.884, |
| "step": 919 |
| }, |
| { |
| "epoch": 0.3309054941102419, |
| "grad_norm": 11.004504418307047, |
| "learning_rate": 1.5618520487293978e-06, |
| "loss": 0.767, |
| "step": 920 |
| }, |
| { |
| "epoch": 0.3312651739951443, |
| "grad_norm": 16.47348784457448, |
| "learning_rate": 1.5608877045767117e-06, |
| "loss": 0.7997, |
| "step": 921 |
| }, |
| { |
| "epoch": 0.33162485388004675, |
| "grad_norm": 14.172981813767054, |
| "learning_rate": 1.5599225988084096e-06, |
| "loss": 0.8803, |
| "step": 922 |
| }, |
| { |
| "epoch": 0.3319845337649492, |
| "grad_norm": 9.312027744107203, |
| "learning_rate": 1.5589567327349844e-06, |
| "loss": 0.8002, |
| "step": 923 |
| }, |
| { |
| "epoch": 0.3323442136498516, |
| "grad_norm": 9.462458498320945, |
| "learning_rate": 1.5579901076679623e-06, |
| "loss": 0.8819, |
| "step": 924 |
| }, |
| { |
| "epoch": 0.33270389353475405, |
| "grad_norm": 30.483989133537733, |
| "learning_rate": 1.5570227249198993e-06, |
| "loss": 0.8576, |
| "step": 925 |
| }, |
| { |
| "epoch": 0.33306357341965653, |
| "grad_norm": 10.674129254191316, |
| "learning_rate": 1.556054585804381e-06, |
| "loss": 0.8554, |
| "step": 926 |
| }, |
| { |
| "epoch": 0.33342325330455896, |
| "grad_norm": 18.07952400429078, |
| "learning_rate": 1.5550856916360193e-06, |
| "loss": 0.7764, |
| "step": 927 |
| }, |
| { |
| "epoch": 0.3337829331894614, |
| "grad_norm": 35.58945059855189, |
| "learning_rate": 1.5541160437304521e-06, |
| "loss": 0.7932, |
| "step": 928 |
| }, |
| { |
| "epoch": 0.3341426130743638, |
| "grad_norm": 11.530320098353496, |
| "learning_rate": 1.5531456434043402e-06, |
| "loss": 0.8376, |
| "step": 929 |
| }, |
| { |
| "epoch": 0.33450229295926626, |
| "grad_norm": 14.940415551040594, |
| "learning_rate": 1.5521744919753665e-06, |
| "loss": 0.8139, |
| "step": 930 |
| }, |
| { |
| "epoch": 0.3348619728441687, |
| "grad_norm": 19.133265887417053, |
| "learning_rate": 1.5512025907622337e-06, |
| "loss": 0.7948, |
| "step": 931 |
| }, |
| { |
| "epoch": 0.3352216527290711, |
| "grad_norm": 20.304909409452367, |
| "learning_rate": 1.5502299410846625e-06, |
| "loss": 0.7893, |
| "step": 932 |
| }, |
| { |
| "epoch": 0.33558133261397355, |
| "grad_norm": 18.20029987081357, |
| "learning_rate": 1.5492565442633894e-06, |
| "loss": 0.7907, |
| "step": 933 |
| }, |
| { |
| "epoch": 0.335941012498876, |
| "grad_norm": 11.477770769822252, |
| "learning_rate": 1.5482824016201667e-06, |
| "loss": 0.7741, |
| "step": 934 |
| }, |
| { |
| "epoch": 0.3363006923837784, |
| "grad_norm": 19.009389322033904, |
| "learning_rate": 1.5473075144777585e-06, |
| "loss": 0.8435, |
| "step": 935 |
| }, |
| { |
| "epoch": 0.3366603722686809, |
| "grad_norm": 9.206957963342228, |
| "learning_rate": 1.5463318841599405e-06, |
| "loss": 0.7693, |
| "step": 936 |
| }, |
| { |
| "epoch": 0.3370200521535833, |
| "grad_norm": 24.27902007180823, |
| "learning_rate": 1.5453555119914963e-06, |
| "loss": 0.8786, |
| "step": 937 |
| }, |
| { |
| "epoch": 0.33737973203848576, |
| "grad_norm": 23.1158786632023, |
| "learning_rate": 1.544378399298218e-06, |
| "loss": 0.8064, |
| "step": 938 |
| }, |
| { |
| "epoch": 0.3377394119233882, |
| "grad_norm": 9.442153134819922, |
| "learning_rate": 1.5434005474069029e-06, |
| "loss": 0.8602, |
| "step": 939 |
| }, |
| { |
| "epoch": 0.3380990918082906, |
| "grad_norm": 24.385261977107035, |
| "learning_rate": 1.5424219576453523e-06, |
| "loss": 0.7806, |
| "step": 940 |
| }, |
| { |
| "epoch": 0.33845877169319305, |
| "grad_norm": 9.629777488734236, |
| "learning_rate": 1.541442631342369e-06, |
| "loss": 0.8477, |
| "step": 941 |
| }, |
| { |
| "epoch": 0.3388184515780955, |
| "grad_norm": 13.53376392358462, |
| "learning_rate": 1.5404625698277557e-06, |
| "loss": 0.8692, |
| "step": 942 |
| }, |
| { |
| "epoch": 0.3391781314629979, |
| "grad_norm": 42.45227574325178, |
| "learning_rate": 1.5394817744323146e-06, |
| "loss": 0.7987, |
| "step": 943 |
| }, |
| { |
| "epoch": 0.33953781134790034, |
| "grad_norm": 9.390799236590395, |
| "learning_rate": 1.5385002464878427e-06, |
| "loss": 0.7894, |
| "step": 944 |
| }, |
| { |
| "epoch": 0.33989749123280283, |
| "grad_norm": 11.917413039094523, |
| "learning_rate": 1.5375179873271333e-06, |
| "loss": 0.7952, |
| "step": 945 |
| }, |
| { |
| "epoch": 0.34025717111770526, |
| "grad_norm": 15.306425899284218, |
| "learning_rate": 1.536534998283972e-06, |
| "loss": 0.8162, |
| "step": 946 |
| }, |
| { |
| "epoch": 0.3406168510026077, |
| "grad_norm": 12.319391145056539, |
| "learning_rate": 1.5355512806931347e-06, |
| "loss": 0.7321, |
| "step": 947 |
| }, |
| { |
| "epoch": 0.3409765308875101, |
| "grad_norm": 15.21947795438788, |
| "learning_rate": 1.5345668358903883e-06, |
| "loss": 0.8332, |
| "step": 948 |
| }, |
| { |
| "epoch": 0.34133621077241255, |
| "grad_norm": 12.79023939223576, |
| "learning_rate": 1.5335816652124857e-06, |
| "loss": 0.7526, |
| "step": 949 |
| }, |
| { |
| "epoch": 0.341695890657315, |
| "grad_norm": 12.241239776171069, |
| "learning_rate": 1.5325957699971657e-06, |
| "loss": 0.8083, |
| "step": 950 |
| }, |
| { |
| "epoch": 0.3420555705422174, |
| "grad_norm": 15.981128158826372, |
| "learning_rate": 1.5316091515831518e-06, |
| "loss": 0.8536, |
| "step": 951 |
| }, |
| { |
| "epoch": 0.34241525042711984, |
| "grad_norm": 48.50595550604798, |
| "learning_rate": 1.530621811310148e-06, |
| "loss": 0.8867, |
| "step": 952 |
| }, |
| { |
| "epoch": 0.3427749303120223, |
| "grad_norm": 11.161134334105054, |
| "learning_rate": 1.52963375051884e-06, |
| "loss": 0.7749, |
| "step": 953 |
| }, |
| { |
| "epoch": 0.34313461019692476, |
| "grad_norm": 9.40407424062396, |
| "learning_rate": 1.5286449705508913e-06, |
| "loss": 0.8432, |
| "step": 954 |
| }, |
| { |
| "epoch": 0.3434942900818272, |
| "grad_norm": 11.85964243313317, |
| "learning_rate": 1.5276554727489415e-06, |
| "loss": 0.8039, |
| "step": 955 |
| }, |
| { |
| "epoch": 0.3438539699667296, |
| "grad_norm": 14.442397679824648, |
| "learning_rate": 1.5266652584566055e-06, |
| "loss": 0.7995, |
| "step": 956 |
| }, |
| { |
| "epoch": 0.34421364985163205, |
| "grad_norm": 7.721625200366785, |
| "learning_rate": 1.525674329018471e-06, |
| "loss": 0.8102, |
| "step": 957 |
| }, |
| { |
| "epoch": 0.3445733297365345, |
| "grad_norm": 11.322671671562892, |
| "learning_rate": 1.5246826857800968e-06, |
| "loss": 0.7819, |
| "step": 958 |
| }, |
| { |
| "epoch": 0.3449330096214369, |
| "grad_norm": 12.528623186014242, |
| "learning_rate": 1.5236903300880105e-06, |
| "loss": 0.8461, |
| "step": 959 |
| }, |
| { |
| "epoch": 0.34529268950633935, |
| "grad_norm": 23.142820551204743, |
| "learning_rate": 1.5226972632897077e-06, |
| "loss": 0.774, |
| "step": 960 |
| }, |
| { |
| "epoch": 0.3456523693912418, |
| "grad_norm": 11.677380786620736, |
| "learning_rate": 1.5217034867336497e-06, |
| "loss": 0.7439, |
| "step": 961 |
| }, |
| { |
| "epoch": 0.3460120492761442, |
| "grad_norm": 13.080329634154356, |
| "learning_rate": 1.5207090017692603e-06, |
| "loss": 0.9005, |
| "step": 962 |
| }, |
| { |
| "epoch": 0.3463717291610467, |
| "grad_norm": 8.76279501287922, |
| "learning_rate": 1.5197138097469273e-06, |
| "loss": 0.7786, |
| "step": 963 |
| }, |
| { |
| "epoch": 0.3467314090459491, |
| "grad_norm": 9.145020451349145, |
| "learning_rate": 1.5187179120179966e-06, |
| "loss": 0.826, |
| "step": 964 |
| }, |
| { |
| "epoch": 0.34709108893085155, |
| "grad_norm": 11.96564424412383, |
| "learning_rate": 1.517721309934774e-06, |
| "loss": 0.8053, |
| "step": 965 |
| }, |
| { |
| "epoch": 0.347450768815754, |
| "grad_norm": 22.996590327643915, |
| "learning_rate": 1.5167240048505198e-06, |
| "loss": 0.7496, |
| "step": 966 |
| }, |
| { |
| "epoch": 0.3478104487006564, |
| "grad_norm": 46.47732574352865, |
| "learning_rate": 1.5157259981194511e-06, |
| "loss": 0.8158, |
| "step": 967 |
| }, |
| { |
| "epoch": 0.34817012858555885, |
| "grad_norm": 9.077840131285068, |
| "learning_rate": 1.5147272910967365e-06, |
| "loss": 0.7397, |
| "step": 968 |
| }, |
| { |
| "epoch": 0.3485298084704613, |
| "grad_norm": 38.49321976187874, |
| "learning_rate": 1.5137278851384957e-06, |
| "loss": 0.8061, |
| "step": 969 |
| }, |
| { |
| "epoch": 0.3488894883553637, |
| "grad_norm": 14.699600184538111, |
| "learning_rate": 1.512727781601797e-06, |
| "loss": 0.8551, |
| "step": 970 |
| }, |
| { |
| "epoch": 0.34924916824026614, |
| "grad_norm": 50.54241823893945, |
| "learning_rate": 1.5117269818446568e-06, |
| "loss": 0.785, |
| "step": 971 |
| }, |
| { |
| "epoch": 0.3496088481251686, |
| "grad_norm": 12.219731366639781, |
| "learning_rate": 1.5107254872260365e-06, |
| "loss": 0.7976, |
| "step": 972 |
| }, |
| { |
| "epoch": 0.34996852801007106, |
| "grad_norm": 66.37116117852479, |
| "learning_rate": 1.5097232991058406e-06, |
| "loss": 0.8306, |
| "step": 973 |
| }, |
| { |
| "epoch": 0.3503282078949735, |
| "grad_norm": 14.744398293903853, |
| "learning_rate": 1.5087204188449162e-06, |
| "loss": 0.8264, |
| "step": 974 |
| }, |
| { |
| "epoch": 0.3506878877798759, |
| "grad_norm": 11.939324628211166, |
| "learning_rate": 1.5077168478050493e-06, |
| "loss": 0.861, |
| "step": 975 |
| }, |
| { |
| "epoch": 0.35104756766477835, |
| "grad_norm": 15.47147625235276, |
| "learning_rate": 1.5067125873489648e-06, |
| "loss": 0.8891, |
| "step": 976 |
| }, |
| { |
| "epoch": 0.3514072475496808, |
| "grad_norm": 21.26032650919122, |
| "learning_rate": 1.5057076388403228e-06, |
| "loss": 0.8627, |
| "step": 977 |
| }, |
| { |
| "epoch": 0.3517669274345832, |
| "grad_norm": 10.057782853454887, |
| "learning_rate": 1.5047020036437185e-06, |
| "loss": 0.8373, |
| "step": 978 |
| }, |
| { |
| "epoch": 0.35212660731948564, |
| "grad_norm": 49.39308264228791, |
| "learning_rate": 1.503695683124679e-06, |
| "loss": 0.8177, |
| "step": 979 |
| }, |
| { |
| "epoch": 0.35248628720438807, |
| "grad_norm": 506.43358996335076, |
| "learning_rate": 1.5026886786496622e-06, |
| "loss": 0.8482, |
| "step": 980 |
| }, |
| { |
| "epoch": 0.35284596708929056, |
| "grad_norm": 9.421474554469173, |
| "learning_rate": 1.5016809915860546e-06, |
| "loss": 0.8716, |
| "step": 981 |
| }, |
| { |
| "epoch": 0.353205646974193, |
| "grad_norm": 10.756179124311437, |
| "learning_rate": 1.50067262330217e-06, |
| "loss": 0.8771, |
| "step": 982 |
| }, |
| { |
| "epoch": 0.3535653268590954, |
| "grad_norm": 11.635019042438932, |
| "learning_rate": 1.4996635751672466e-06, |
| "loss": 0.8755, |
| "step": 983 |
| }, |
| { |
| "epoch": 0.35392500674399785, |
| "grad_norm": 12.993547435586098, |
| "learning_rate": 1.4986538485514464e-06, |
| "loss": 0.815, |
| "step": 984 |
| }, |
| { |
| "epoch": 0.3542846866289003, |
| "grad_norm": 12.48020482228494, |
| "learning_rate": 1.4976434448258517e-06, |
| "loss": 0.7832, |
| "step": 985 |
| }, |
| { |
| "epoch": 0.3546443665138027, |
| "grad_norm": 22.268540662140516, |
| "learning_rate": 1.4966323653624655e-06, |
| "loss": 0.8134, |
| "step": 986 |
| }, |
| { |
| "epoch": 0.35500404639870514, |
| "grad_norm": 16.195090971616366, |
| "learning_rate": 1.4956206115342074e-06, |
| "loss": 0.8019, |
| "step": 987 |
| }, |
| { |
| "epoch": 0.3553637262836076, |
| "grad_norm": 14.341331507258404, |
| "learning_rate": 1.4946081847149133e-06, |
| "loss": 0.7698, |
| "step": 988 |
| }, |
| { |
| "epoch": 0.35572340616851, |
| "grad_norm": 25.95684895405825, |
| "learning_rate": 1.4935950862793321e-06, |
| "loss": 0.8292, |
| "step": 989 |
| }, |
| { |
| "epoch": 0.3560830860534125, |
| "grad_norm": 11.501720854734899, |
| "learning_rate": 1.4925813176031258e-06, |
| "loss": 0.8614, |
| "step": 990 |
| }, |
| { |
| "epoch": 0.3564427659383149, |
| "grad_norm": 23.09362849823031, |
| "learning_rate": 1.4915668800628657e-06, |
| "loss": 0.8302, |
| "step": 991 |
| }, |
| { |
| "epoch": 0.35680244582321735, |
| "grad_norm": 16.445160172029425, |
| "learning_rate": 1.490551775036032e-06, |
| "loss": 0.8054, |
| "step": 992 |
| }, |
| { |
| "epoch": 0.3571621257081198, |
| "grad_norm": 71.19646553017826, |
| "learning_rate": 1.4895360039010098e-06, |
| "loss": 0.8277, |
| "step": 993 |
| }, |
| { |
| "epoch": 0.3575218055930222, |
| "grad_norm": 12.55336952743194, |
| "learning_rate": 1.4885195680370912e-06, |
| "loss": 0.839, |
| "step": 994 |
| }, |
| { |
| "epoch": 0.35788148547792464, |
| "grad_norm": 8.47142278878508, |
| "learning_rate": 1.4875024688244682e-06, |
| "loss": 0.7229, |
| "step": 995 |
| }, |
| { |
| "epoch": 0.3582411653628271, |
| "grad_norm": 11.800045379862597, |
| "learning_rate": 1.4864847076442355e-06, |
| "loss": 0.7626, |
| "step": 996 |
| }, |
| { |
| "epoch": 0.3586008452477295, |
| "grad_norm": 12.20058748151486, |
| "learning_rate": 1.4854662858783854e-06, |
| "loss": 0.8258, |
| "step": 997 |
| }, |
| { |
| "epoch": 0.35896052513263194, |
| "grad_norm": 8.480065883757563, |
| "learning_rate": 1.4844472049098085e-06, |
| "loss": 0.8154, |
| "step": 998 |
| }, |
| { |
| "epoch": 0.3593202050175344, |
| "grad_norm": 10.14693035519556, |
| "learning_rate": 1.4834274661222895e-06, |
| "loss": 0.7837, |
| "step": 999 |
| }, |
| { |
| "epoch": 0.35967988490243685, |
| "grad_norm": 17.540092433906466, |
| "learning_rate": 1.4824070709005061e-06, |
| "loss": 0.757, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.3600395647873393, |
| "grad_norm": 12.006178562565093, |
| "learning_rate": 1.4813860206300284e-06, |
| "loss": 0.8361, |
| "step": 1001 |
| }, |
| { |
| "epoch": 0.3603992446722417, |
| "grad_norm": 11.474505736413265, |
| "learning_rate": 1.4803643166973152e-06, |
| "loss": 0.751, |
| "step": 1002 |
| }, |
| { |
| "epoch": 0.36075892455714414, |
| "grad_norm": 12.295639473927698, |
| "learning_rate": 1.4793419604897137e-06, |
| "loss": 0.8236, |
| "step": 1003 |
| }, |
| { |
| "epoch": 0.3611186044420466, |
| "grad_norm": 10.585220227913453, |
| "learning_rate": 1.4783189533954553e-06, |
| "loss": 0.8428, |
| "step": 1004 |
| }, |
| { |
| "epoch": 0.361478284326949, |
| "grad_norm": 30.23962883155143, |
| "learning_rate": 1.477295296803657e-06, |
| "loss": 0.849, |
| "step": 1005 |
| }, |
| { |
| "epoch": 0.36183796421185144, |
| "grad_norm": 10.507032050043298, |
| "learning_rate": 1.4762709921043163e-06, |
| "loss": 0.7515, |
| "step": 1006 |
| }, |
| { |
| "epoch": 0.36219764409675387, |
| "grad_norm": 13.380543084981593, |
| "learning_rate": 1.4752460406883121e-06, |
| "loss": 0.8287, |
| "step": 1007 |
| }, |
| { |
| "epoch": 0.3625573239816563, |
| "grad_norm": 20.284545047328862, |
| "learning_rate": 1.4742204439473997e-06, |
| "loss": 0.872, |
| "step": 1008 |
| }, |
| { |
| "epoch": 0.3629170038665588, |
| "grad_norm": 11.297613284740551, |
| "learning_rate": 1.4731942032742125e-06, |
| "loss": 0.8191, |
| "step": 1009 |
| }, |
| { |
| "epoch": 0.3632766837514612, |
| "grad_norm": 14.76800834057125, |
| "learning_rate": 1.472167320062257e-06, |
| "loss": 0.8411, |
| "step": 1010 |
| }, |
| { |
| "epoch": 0.36363636363636365, |
| "grad_norm": 9.204006423305321, |
| "learning_rate": 1.471139795705913e-06, |
| "loss": 0.8407, |
| "step": 1011 |
| }, |
| { |
| "epoch": 0.3639960435212661, |
| "grad_norm": 8.349398346809124, |
| "learning_rate": 1.4701116316004306e-06, |
| "loss": 0.8025, |
| "step": 1012 |
| }, |
| { |
| "epoch": 0.3643557234061685, |
| "grad_norm": 43.767938880585604, |
| "learning_rate": 1.4690828291419281e-06, |
| "loss": 0.8662, |
| "step": 1013 |
| }, |
| { |
| "epoch": 0.36471540329107094, |
| "grad_norm": 19.22175395386423, |
| "learning_rate": 1.4680533897273912e-06, |
| "loss": 0.7194, |
| "step": 1014 |
| }, |
| { |
| "epoch": 0.36507508317597337, |
| "grad_norm": 19.349534981345588, |
| "learning_rate": 1.4670233147546707e-06, |
| "loss": 0.7998, |
| "step": 1015 |
| }, |
| { |
| "epoch": 0.3654347630608758, |
| "grad_norm": 10.127794747695477, |
| "learning_rate": 1.4659926056224796e-06, |
| "loss": 0.7606, |
| "step": 1016 |
| }, |
| { |
| "epoch": 0.36579444294577823, |
| "grad_norm": 8.774573308044332, |
| "learning_rate": 1.4649612637303928e-06, |
| "loss": 0.823, |
| "step": 1017 |
| }, |
| { |
| "epoch": 0.3661541228306807, |
| "grad_norm": 28.81593303028586, |
| "learning_rate": 1.4639292904788438e-06, |
| "loss": 0.7986, |
| "step": 1018 |
| }, |
| { |
| "epoch": 0.36651380271558315, |
| "grad_norm": 8.516115832154505, |
| "learning_rate": 1.462896687269124e-06, |
| "loss": 0.7682, |
| "step": 1019 |
| }, |
| { |
| "epoch": 0.3668734826004856, |
| "grad_norm": 11.481660856244018, |
| "learning_rate": 1.4618634555033799e-06, |
| "loss": 0.8325, |
| "step": 1020 |
| }, |
| { |
| "epoch": 0.367233162485388, |
| "grad_norm": 13.55050121060625, |
| "learning_rate": 1.460829596584611e-06, |
| "loss": 0.8463, |
| "step": 1021 |
| }, |
| { |
| "epoch": 0.36759284237029044, |
| "grad_norm": 31.753290279863542, |
| "learning_rate": 1.4597951119166694e-06, |
| "loss": 0.7871, |
| "step": 1022 |
| }, |
| { |
| "epoch": 0.36795252225519287, |
| "grad_norm": 8.851091556426617, |
| "learning_rate": 1.4587600029042562e-06, |
| "loss": 0.9002, |
| "step": 1023 |
| }, |
| { |
| "epoch": 0.3683122021400953, |
| "grad_norm": 17.178291585278597, |
| "learning_rate": 1.4577242709529207e-06, |
| "loss": 0.8519, |
| "step": 1024 |
| }, |
| { |
| "epoch": 0.36867188202499773, |
| "grad_norm": 8.280756385424112, |
| "learning_rate": 1.4566879174690575e-06, |
| "loss": 0.7981, |
| "step": 1025 |
| }, |
| { |
| "epoch": 0.36903156190990016, |
| "grad_norm": 9.944689050913146, |
| "learning_rate": 1.4556509438599056e-06, |
| "loss": 0.8026, |
| "step": 1026 |
| }, |
| { |
| "epoch": 0.36939124179480265, |
| "grad_norm": 8.561981316354894, |
| "learning_rate": 1.454613351533546e-06, |
| "loss": 0.8253, |
| "step": 1027 |
| }, |
| { |
| "epoch": 0.3697509216797051, |
| "grad_norm": 238.36052734951087, |
| "learning_rate": 1.4535751418988998e-06, |
| "loss": 0.8453, |
| "step": 1028 |
| }, |
| { |
| "epoch": 0.3701106015646075, |
| "grad_norm": 10.415708338046755, |
| "learning_rate": 1.4525363163657263e-06, |
| "loss": 0.8229, |
| "step": 1029 |
| }, |
| { |
| "epoch": 0.37047028144950994, |
| "grad_norm": 21.256646098290446, |
| "learning_rate": 1.4514968763446212e-06, |
| "loss": 0.8246, |
| "step": 1030 |
| }, |
| { |
| "epoch": 0.3708299613344124, |
| "grad_norm": 13.820460952610205, |
| "learning_rate": 1.4504568232470142e-06, |
| "loss": 0.8992, |
| "step": 1031 |
| }, |
| { |
| "epoch": 0.3711896412193148, |
| "grad_norm": 7.847945102008554, |
| "learning_rate": 1.4494161584851686e-06, |
| "loss": 0.7884, |
| "step": 1032 |
| }, |
| { |
| "epoch": 0.37154932110421723, |
| "grad_norm": 36.901480910997556, |
| "learning_rate": 1.4483748834721764e-06, |
| "loss": 0.8843, |
| "step": 1033 |
| }, |
| { |
| "epoch": 0.37190900098911966, |
| "grad_norm": 31.933756144343516, |
| "learning_rate": 1.4473329996219603e-06, |
| "loss": 0.8774, |
| "step": 1034 |
| }, |
| { |
| "epoch": 0.3722686808740221, |
| "grad_norm": 12.570281355916684, |
| "learning_rate": 1.4462905083492682e-06, |
| "loss": 0.7677, |
| "step": 1035 |
| }, |
| { |
| "epoch": 0.3726283607589246, |
| "grad_norm": 24.448011209673478, |
| "learning_rate": 1.4452474110696738e-06, |
| "loss": 0.9001, |
| "step": 1036 |
| }, |
| { |
| "epoch": 0.372988040643827, |
| "grad_norm": 16.64188594311135, |
| "learning_rate": 1.4442037091995725e-06, |
| "loss": 0.7804, |
| "step": 1037 |
| }, |
| { |
| "epoch": 0.37334772052872944, |
| "grad_norm": 10.787966777653638, |
| "learning_rate": 1.443159404156182e-06, |
| "loss": 0.7997, |
| "step": 1038 |
| }, |
| { |
| "epoch": 0.3737074004136319, |
| "grad_norm": 24.73374128276489, |
| "learning_rate": 1.4421144973575382e-06, |
| "loss": 0.8383, |
| "step": 1039 |
| }, |
| { |
| "epoch": 0.3740670802985343, |
| "grad_norm": 12.779768819403833, |
| "learning_rate": 1.4410689902224946e-06, |
| "loss": 0.8078, |
| "step": 1040 |
| }, |
| { |
| "epoch": 0.37442676018343674, |
| "grad_norm": 12.334032840610424, |
| "learning_rate": 1.4400228841707193e-06, |
| "loss": 0.8493, |
| "step": 1041 |
| }, |
| { |
| "epoch": 0.37478644006833917, |
| "grad_norm": 10.383999430125927, |
| "learning_rate": 1.438976180622694e-06, |
| "loss": 0.7998, |
| "step": 1042 |
| }, |
| { |
| "epoch": 0.3751461199532416, |
| "grad_norm": 8.54968246530765, |
| "learning_rate": 1.4379288809997119e-06, |
| "loss": 0.7873, |
| "step": 1043 |
| }, |
| { |
| "epoch": 0.375505799838144, |
| "grad_norm": 9.278192896177686, |
| "learning_rate": 1.4368809867238752e-06, |
| "loss": 0.7953, |
| "step": 1044 |
| }, |
| { |
| "epoch": 0.3758654797230465, |
| "grad_norm": 12.610003559394263, |
| "learning_rate": 1.435832499218094e-06, |
| "loss": 0.8028, |
| "step": 1045 |
| }, |
| { |
| "epoch": 0.37622515960794894, |
| "grad_norm": 10.644761875930058, |
| "learning_rate": 1.4347834199060833e-06, |
| "loss": 0.7547, |
| "step": 1046 |
| }, |
| { |
| "epoch": 0.3765848394928514, |
| "grad_norm": 9.931803934694264, |
| "learning_rate": 1.4337337502123626e-06, |
| "loss": 0.7901, |
| "step": 1047 |
| }, |
| { |
| "epoch": 0.3769445193777538, |
| "grad_norm": 12.302593898497262, |
| "learning_rate": 1.432683491562252e-06, |
| "loss": 0.8149, |
| "step": 1048 |
| }, |
| { |
| "epoch": 0.37730419926265624, |
| "grad_norm": 22.05992852868052, |
| "learning_rate": 1.4316326453818727e-06, |
| "loss": 0.8674, |
| "step": 1049 |
| }, |
| { |
| "epoch": 0.37766387914755867, |
| "grad_norm": 17.45343281461148, |
| "learning_rate": 1.4305812130981415e-06, |
| "loss": 0.7951, |
| "step": 1050 |
| }, |
| { |
| "epoch": 0.3780235590324611, |
| "grad_norm": 11.71295204378767, |
| "learning_rate": 1.4295291961387741e-06, |
| "loss": 0.7485, |
| "step": 1051 |
| }, |
| { |
| "epoch": 0.37838323891736353, |
| "grad_norm": 13.500946291081991, |
| "learning_rate": 1.4284765959322772e-06, |
| "loss": 0.8303, |
| "step": 1052 |
| }, |
| { |
| "epoch": 0.37874291880226596, |
| "grad_norm": 10.445069567715905, |
| "learning_rate": 1.4274234139079511e-06, |
| "loss": 0.7582, |
| "step": 1053 |
| }, |
| { |
| "epoch": 0.37910259868716845, |
| "grad_norm": 34.764581398060585, |
| "learning_rate": 1.4263696514958858e-06, |
| "loss": 0.7737, |
| "step": 1054 |
| }, |
| { |
| "epoch": 0.3794622785720709, |
| "grad_norm": 18.256324339254622, |
| "learning_rate": 1.4253153101269596e-06, |
| "loss": 0.7754, |
| "step": 1055 |
| }, |
| { |
| "epoch": 0.3798219584569733, |
| "grad_norm": 9.211567863325408, |
| "learning_rate": 1.4242603912328365e-06, |
| "loss": 0.8182, |
| "step": 1056 |
| }, |
| { |
| "epoch": 0.38018163834187574, |
| "grad_norm": 7.852936897662152, |
| "learning_rate": 1.4232048962459648e-06, |
| "loss": 0.8347, |
| "step": 1057 |
| }, |
| { |
| "epoch": 0.38054131822677817, |
| "grad_norm": 16.74312021145382, |
| "learning_rate": 1.4221488265995754e-06, |
| "loss": 0.8697, |
| "step": 1058 |
| }, |
| { |
| "epoch": 0.3809009981116806, |
| "grad_norm": 11.73914518751653, |
| "learning_rate": 1.421092183727679e-06, |
| "loss": 0.8545, |
| "step": 1059 |
| }, |
| { |
| "epoch": 0.38126067799658303, |
| "grad_norm": 8.879225199770659, |
| "learning_rate": 1.4200349690650653e-06, |
| "loss": 0.8468, |
| "step": 1060 |
| }, |
| { |
| "epoch": 0.38162035788148546, |
| "grad_norm": 96.90806110127664, |
| "learning_rate": 1.4189771840472995e-06, |
| "loss": 0.785, |
| "step": 1061 |
| }, |
| { |
| "epoch": 0.3819800377663879, |
| "grad_norm": 8.306537706936869, |
| "learning_rate": 1.4179188301107228e-06, |
| "loss": 0.8579, |
| "step": 1062 |
| }, |
| { |
| "epoch": 0.3823397176512904, |
| "grad_norm": 10.093154336668416, |
| "learning_rate": 1.416859908692447e-06, |
| "loss": 0.857, |
| "step": 1063 |
| }, |
| { |
| "epoch": 0.3826993975361928, |
| "grad_norm": 19.53334063430709, |
| "learning_rate": 1.4158004212303563e-06, |
| "loss": 0.8353, |
| "step": 1064 |
| }, |
| { |
| "epoch": 0.38305907742109524, |
| "grad_norm": 30.012211191584232, |
| "learning_rate": 1.414740369163102e-06, |
| "loss": 0.7892, |
| "step": 1065 |
| }, |
| { |
| "epoch": 0.38341875730599767, |
| "grad_norm": 7.6470646148037815, |
| "learning_rate": 1.413679753930103e-06, |
| "loss": 0.8003, |
| "step": 1066 |
| }, |
| { |
| "epoch": 0.3837784371909001, |
| "grad_norm": 15.82080272840552, |
| "learning_rate": 1.4126185769715426e-06, |
| "loss": 0.82, |
| "step": 1067 |
| }, |
| { |
| "epoch": 0.38413811707580253, |
| "grad_norm": 7.730309662956251, |
| "learning_rate": 1.4115568397283668e-06, |
| "loss": 0.8057, |
| "step": 1068 |
| }, |
| { |
| "epoch": 0.38449779696070496, |
| "grad_norm": 10.327301299623088, |
| "learning_rate": 1.410494543642283e-06, |
| "loss": 0.8456, |
| "step": 1069 |
| }, |
| { |
| "epoch": 0.3848574768456074, |
| "grad_norm": 8.781388123477912, |
| "learning_rate": 1.4094316901557562e-06, |
| "loss": 0.809, |
| "step": 1070 |
| }, |
| { |
| "epoch": 0.3852171567305098, |
| "grad_norm": 98.0099367935755, |
| "learning_rate": 1.408368280712009e-06, |
| "loss": 0.774, |
| "step": 1071 |
| }, |
| { |
| "epoch": 0.3855768366154123, |
| "grad_norm": 21.51930297396018, |
| "learning_rate": 1.4073043167550196e-06, |
| "loss": 0.7839, |
| "step": 1072 |
| }, |
| { |
| "epoch": 0.38593651650031474, |
| "grad_norm": 10.168407590746957, |
| "learning_rate": 1.406239799729518e-06, |
| "loss": 0.8474, |
| "step": 1073 |
| }, |
| { |
| "epoch": 0.38629619638521717, |
| "grad_norm": 11.143546461481547, |
| "learning_rate": 1.4051747310809861e-06, |
| "loss": 0.8234, |
| "step": 1074 |
| }, |
| { |
| "epoch": 0.3866558762701196, |
| "grad_norm": 10.014573885018102, |
| "learning_rate": 1.4041091122556537e-06, |
| "loss": 0.8624, |
| "step": 1075 |
| }, |
| { |
| "epoch": 0.38701555615502203, |
| "grad_norm": 9.203697477888994, |
| "learning_rate": 1.403042944700499e-06, |
| "loss": 0.7798, |
| "step": 1076 |
| }, |
| { |
| "epoch": 0.38737523603992446, |
| "grad_norm": 17.70155693236071, |
| "learning_rate": 1.4019762298632442e-06, |
| "loss": 0.8215, |
| "step": 1077 |
| }, |
| { |
| "epoch": 0.3877349159248269, |
| "grad_norm": 21.144004989509693, |
| "learning_rate": 1.400908969192356e-06, |
| "loss": 0.8341, |
| "step": 1078 |
| }, |
| { |
| "epoch": 0.3880945958097293, |
| "grad_norm": 42.44024629613774, |
| "learning_rate": 1.3998411641370401e-06, |
| "loss": 0.8266, |
| "step": 1079 |
| }, |
| { |
| "epoch": 0.38845427569463176, |
| "grad_norm": 30.42272061255153, |
| "learning_rate": 1.398772816147244e-06, |
| "loss": 0.8196, |
| "step": 1080 |
| }, |
| { |
| "epoch": 0.3888139555795342, |
| "grad_norm": 10.105715515251974, |
| "learning_rate": 1.3977039266736506e-06, |
| "loss": 0.7754, |
| "step": 1081 |
| }, |
| { |
| "epoch": 0.3891736354644367, |
| "grad_norm": 11.746605806057, |
| "learning_rate": 1.3966344971676786e-06, |
| "loss": 0.9183, |
| "step": 1082 |
| }, |
| { |
| "epoch": 0.3895333153493391, |
| "grad_norm": 26.766407550188198, |
| "learning_rate": 1.39556452908148e-06, |
| "loss": 0.8562, |
| "step": 1083 |
| }, |
| { |
| "epoch": 0.38989299523424154, |
| "grad_norm": 18.759620530870333, |
| "learning_rate": 1.3944940238679381e-06, |
| "loss": 0.7709, |
| "step": 1084 |
| }, |
| { |
| "epoch": 0.39025267511914397, |
| "grad_norm": 9.30940685054903, |
| "learning_rate": 1.3934229829806657e-06, |
| "loss": 0.8328, |
| "step": 1085 |
| }, |
| { |
| "epoch": 0.3906123550040464, |
| "grad_norm": 14.569851121933846, |
| "learning_rate": 1.3923514078740031e-06, |
| "loss": 0.8178, |
| "step": 1086 |
| }, |
| { |
| "epoch": 0.3909720348889488, |
| "grad_norm": 9.050830910649225, |
| "learning_rate": 1.3912793000030152e-06, |
| "loss": 0.7815, |
| "step": 1087 |
| }, |
| { |
| "epoch": 0.39133171477385126, |
| "grad_norm": 20.182619801572873, |
| "learning_rate": 1.3902066608234916e-06, |
| "loss": 0.8243, |
| "step": 1088 |
| }, |
| { |
| "epoch": 0.3916913946587537, |
| "grad_norm": 17.198218422594294, |
| "learning_rate": 1.389133491791942e-06, |
| "loss": 0.859, |
| "step": 1089 |
| }, |
| { |
| "epoch": 0.3920510745436561, |
| "grad_norm": 11.92497597568398, |
| "learning_rate": 1.388059794365597e-06, |
| "loss": 0.8262, |
| "step": 1090 |
| }, |
| { |
| "epoch": 0.3924107544285586, |
| "grad_norm": 14.691891278167247, |
| "learning_rate": 1.3869855700024028e-06, |
| "loss": 0.872, |
| "step": 1091 |
| }, |
| { |
| "epoch": 0.39277043431346104, |
| "grad_norm": 58.65616037540032, |
| "learning_rate": 1.3859108201610235e-06, |
| "loss": 0.8695, |
| "step": 1092 |
| }, |
| { |
| "epoch": 0.39313011419836347, |
| "grad_norm": 17.730256377111736, |
| "learning_rate": 1.3848355463008344e-06, |
| "loss": 0.7734, |
| "step": 1093 |
| }, |
| { |
| "epoch": 0.3934897940832659, |
| "grad_norm": 8.898215862601006, |
| "learning_rate": 1.383759749881924e-06, |
| "loss": 0.8358, |
| "step": 1094 |
| }, |
| { |
| "epoch": 0.39384947396816833, |
| "grad_norm": 9.273330467024028, |
| "learning_rate": 1.3826834323650898e-06, |
| "loss": 0.8524, |
| "step": 1095 |
| }, |
| { |
| "epoch": 0.39420915385307076, |
| "grad_norm": 8.076849244230383, |
| "learning_rate": 1.3816065952118365e-06, |
| "loss": 0.7429, |
| "step": 1096 |
| }, |
| { |
| "epoch": 0.3945688337379732, |
| "grad_norm": 9.3288544839028, |
| "learning_rate": 1.3805292398843753e-06, |
| "loss": 0.8314, |
| "step": 1097 |
| }, |
| { |
| "epoch": 0.3949285136228756, |
| "grad_norm": 110.01055869569176, |
| "learning_rate": 1.37945136784562e-06, |
| "loss": 0.7984, |
| "step": 1098 |
| }, |
| { |
| "epoch": 0.39528819350777805, |
| "grad_norm": 12.333689127918134, |
| "learning_rate": 1.3783729805591873e-06, |
| "loss": 0.8953, |
| "step": 1099 |
| }, |
| { |
| "epoch": 0.39564787339268054, |
| "grad_norm": 19.067730795891276, |
| "learning_rate": 1.3772940794893914e-06, |
| "loss": 0.785, |
| "step": 1100 |
| }, |
| { |
| "epoch": 0.39600755327758297, |
| "grad_norm": 24.615349848830494, |
| "learning_rate": 1.376214666101247e-06, |
| "loss": 0.7983, |
| "step": 1101 |
| }, |
| { |
| "epoch": 0.3963672331624854, |
| "grad_norm": 13.380900687883715, |
| "learning_rate": 1.3751347418604621e-06, |
| "loss": 0.884, |
| "step": 1102 |
| }, |
| { |
| "epoch": 0.39672691304738783, |
| "grad_norm": 8.981897254003437, |
| "learning_rate": 1.3740543082334397e-06, |
| "loss": 0.7893, |
| "step": 1103 |
| }, |
| { |
| "epoch": 0.39708659293229026, |
| "grad_norm": 15.905987338375768, |
| "learning_rate": 1.3729733666872734e-06, |
| "loss": 0.8062, |
| "step": 1104 |
| }, |
| { |
| "epoch": 0.3974462728171927, |
| "grad_norm": 12.35174296620484, |
| "learning_rate": 1.3718919186897479e-06, |
| "loss": 0.8566, |
| "step": 1105 |
| }, |
| { |
| "epoch": 0.3978059527020951, |
| "grad_norm": 13.415687900596817, |
| "learning_rate": 1.3708099657093345e-06, |
| "loss": 0.8429, |
| "step": 1106 |
| }, |
| { |
| "epoch": 0.39816563258699755, |
| "grad_norm": 7.651709359237927, |
| "learning_rate": 1.3697275092151906e-06, |
| "loss": 0.8384, |
| "step": 1107 |
| }, |
| { |
| "epoch": 0.3985253124719, |
| "grad_norm": 11.375541052279686, |
| "learning_rate": 1.3686445506771568e-06, |
| "loss": 0.8101, |
| "step": 1108 |
| }, |
| { |
| "epoch": 0.39888499235680247, |
| "grad_norm": 29.087280176558043, |
| "learning_rate": 1.3675610915657566e-06, |
| "loss": 0.7717, |
| "step": 1109 |
| }, |
| { |
| "epoch": 0.3992446722417049, |
| "grad_norm": 8.957288712416425, |
| "learning_rate": 1.366477133352192e-06, |
| "loss": 0.8606, |
| "step": 1110 |
| }, |
| { |
| "epoch": 0.39960435212660733, |
| "grad_norm": 26.20743774800868, |
| "learning_rate": 1.3653926775083435e-06, |
| "loss": 0.8882, |
| "step": 1111 |
| }, |
| { |
| "epoch": 0.39996403201150976, |
| "grad_norm": 18.21422500140279, |
| "learning_rate": 1.3643077255067664e-06, |
| "loss": 0.8443, |
| "step": 1112 |
| }, |
| { |
| "epoch": 0.4003237118964122, |
| "grad_norm": 13.865360018920079, |
| "learning_rate": 1.3632222788206913e-06, |
| "loss": 0.7962, |
| "step": 1113 |
| }, |
| { |
| "epoch": 0.4006833917813146, |
| "grad_norm": 20.948730649362815, |
| "learning_rate": 1.3621363389240187e-06, |
| "loss": 0.9649, |
| "step": 1114 |
| }, |
| { |
| "epoch": 0.40104307166621705, |
| "grad_norm": 8.86259566115294, |
| "learning_rate": 1.36104990729132e-06, |
| "loss": 0.84, |
| "step": 1115 |
| }, |
| { |
| "epoch": 0.4014027515511195, |
| "grad_norm": 16.948794281131754, |
| "learning_rate": 1.359962985397834e-06, |
| "loss": 0.7539, |
| "step": 1116 |
| }, |
| { |
| "epoch": 0.4017624314360219, |
| "grad_norm": 12.958441105299196, |
| "learning_rate": 1.3588755747194653e-06, |
| "loss": 0.7699, |
| "step": 1117 |
| }, |
| { |
| "epoch": 0.4021221113209244, |
| "grad_norm": 8.103833208902573, |
| "learning_rate": 1.3577876767327819e-06, |
| "loss": 0.8126, |
| "step": 1118 |
| }, |
| { |
| "epoch": 0.40248179120582683, |
| "grad_norm": 10.616654719018031, |
| "learning_rate": 1.3566992929150135e-06, |
| "loss": 0.8136, |
| "step": 1119 |
| }, |
| { |
| "epoch": 0.40284147109072926, |
| "grad_norm": 11.982269199251334, |
| "learning_rate": 1.3556104247440504e-06, |
| "loss": 0.7557, |
| "step": 1120 |
| }, |
| { |
| "epoch": 0.4032011509756317, |
| "grad_norm": 12.61067259437186, |
| "learning_rate": 1.3545210736984392e-06, |
| "loss": 0.816, |
| "step": 1121 |
| }, |
| { |
| "epoch": 0.4035608308605341, |
| "grad_norm": 30.616245149026604, |
| "learning_rate": 1.3534312412573834e-06, |
| "loss": 0.8658, |
| "step": 1122 |
| }, |
| { |
| "epoch": 0.40392051074543656, |
| "grad_norm": 33.15211026998674, |
| "learning_rate": 1.3523409289007397e-06, |
| "loss": 0.8177, |
| "step": 1123 |
| }, |
| { |
| "epoch": 0.404280190630339, |
| "grad_norm": 18.671486206489256, |
| "learning_rate": 1.3512501381090155e-06, |
| "loss": 0.773, |
| "step": 1124 |
| }, |
| { |
| "epoch": 0.4046398705152414, |
| "grad_norm": 15.838979634447169, |
| "learning_rate": 1.3501588703633702e-06, |
| "loss": 0.7824, |
| "step": 1125 |
| }, |
| { |
| "epoch": 0.40499955040014385, |
| "grad_norm": 15.728426784086825, |
| "learning_rate": 1.3490671271456081e-06, |
| "loss": 0.8208, |
| "step": 1126 |
| }, |
| { |
| "epoch": 0.40535923028504633, |
| "grad_norm": 13.296285622727124, |
| "learning_rate": 1.3479749099381817e-06, |
| "loss": 0.874, |
| "step": 1127 |
| }, |
| { |
| "epoch": 0.40571891016994877, |
| "grad_norm": 16.681213486243337, |
| "learning_rate": 1.3468822202241847e-06, |
| "loss": 0.862, |
| "step": 1128 |
| }, |
| { |
| "epoch": 0.4060785900548512, |
| "grad_norm": 16.518023213124945, |
| "learning_rate": 1.3457890594873545e-06, |
| "loss": 0.7991, |
| "step": 1129 |
| }, |
| { |
| "epoch": 0.4064382699397536, |
| "grad_norm": 7.661635968834577, |
| "learning_rate": 1.3446954292120664e-06, |
| "loss": 0.8425, |
| "step": 1130 |
| }, |
| { |
| "epoch": 0.40679794982465606, |
| "grad_norm": 24.024008499482644, |
| "learning_rate": 1.3436013308833348e-06, |
| "loss": 0.8415, |
| "step": 1131 |
| }, |
| { |
| "epoch": 0.4071576297095585, |
| "grad_norm": 20.253189055266766, |
| "learning_rate": 1.3425067659868084e-06, |
| "loss": 0.847, |
| "step": 1132 |
| }, |
| { |
| "epoch": 0.4075173095944609, |
| "grad_norm": 30.131942993580697, |
| "learning_rate": 1.3414117360087697e-06, |
| "loss": 0.877, |
| "step": 1133 |
| }, |
| { |
| "epoch": 0.40787698947936335, |
| "grad_norm": 7.759127811054072, |
| "learning_rate": 1.340316242436134e-06, |
| "loss": 0.7446, |
| "step": 1134 |
| }, |
| { |
| "epoch": 0.4082366693642658, |
| "grad_norm": 10.095478497154414, |
| "learning_rate": 1.339220286756444e-06, |
| "loss": 0.8167, |
| "step": 1135 |
| }, |
| { |
| "epoch": 0.40859634924916827, |
| "grad_norm": 7.703546068052134, |
| "learning_rate": 1.3381238704578716e-06, |
| "loss": 0.7909, |
| "step": 1136 |
| }, |
| { |
| "epoch": 0.4089560291340707, |
| "grad_norm": 9.605978827864863, |
| "learning_rate": 1.3370269950292132e-06, |
| "loss": 0.8524, |
| "step": 1137 |
| }, |
| { |
| "epoch": 0.40931570901897313, |
| "grad_norm": 14.056068815384645, |
| "learning_rate": 1.3359296619598892e-06, |
| "loss": 0.8288, |
| "step": 1138 |
| }, |
| { |
| "epoch": 0.40967538890387556, |
| "grad_norm": 10.631086756096403, |
| "learning_rate": 1.334831872739941e-06, |
| "loss": 0.7538, |
| "step": 1139 |
| }, |
| { |
| "epoch": 0.410035068788778, |
| "grad_norm": 8.684603876787211, |
| "learning_rate": 1.3337336288600297e-06, |
| "loss": 0.8113, |
| "step": 1140 |
| }, |
| { |
| "epoch": 0.4103947486736804, |
| "grad_norm": 37.72680423919033, |
| "learning_rate": 1.3326349318114334e-06, |
| "loss": 0.829, |
| "step": 1141 |
| }, |
| { |
| "epoch": 0.41075442855858285, |
| "grad_norm": 15.359788775736618, |
| "learning_rate": 1.3315357830860458e-06, |
| "loss": 0.7683, |
| "step": 1142 |
| }, |
| { |
| "epoch": 0.4111141084434853, |
| "grad_norm": 9.887705149920857, |
| "learning_rate": 1.3304361841763745e-06, |
| "loss": 0.7601, |
| "step": 1143 |
| }, |
| { |
| "epoch": 0.4114737883283877, |
| "grad_norm": 11.806306747173277, |
| "learning_rate": 1.3293361365755372e-06, |
| "loss": 0.8496, |
| "step": 1144 |
| }, |
| { |
| "epoch": 0.4118334682132902, |
| "grad_norm": 8.680706338600322, |
| "learning_rate": 1.3282356417772616e-06, |
| "loss": 0.7706, |
| "step": 1145 |
| }, |
| { |
| "epoch": 0.41219314809819263, |
| "grad_norm": 16.094061475463267, |
| "learning_rate": 1.3271347012758828e-06, |
| "loss": 0.7889, |
| "step": 1146 |
| }, |
| { |
| "epoch": 0.41255282798309506, |
| "grad_norm": 8.12872960363513, |
| "learning_rate": 1.3260333165663405e-06, |
| "loss": 0.8137, |
| "step": 1147 |
| }, |
| { |
| "epoch": 0.4129125078679975, |
| "grad_norm": 22.447001029360134, |
| "learning_rate": 1.324931489144178e-06, |
| "loss": 0.8666, |
| "step": 1148 |
| }, |
| { |
| "epoch": 0.4132721877528999, |
| "grad_norm": 10.386632819287223, |
| "learning_rate": 1.3238292205055394e-06, |
| "loss": 0.7832, |
| "step": 1149 |
| }, |
| { |
| "epoch": 0.41363186763780235, |
| "grad_norm": 12.107129360878472, |
| "learning_rate": 1.3227265121471689e-06, |
| "loss": 0.8319, |
| "step": 1150 |
| }, |
| { |
| "epoch": 0.4139915475227048, |
| "grad_norm": 11.661602549688038, |
| "learning_rate": 1.3216233655664064e-06, |
| "loss": 0.8509, |
| "step": 1151 |
| }, |
| { |
| "epoch": 0.4143512274076072, |
| "grad_norm": 7.598324574193326, |
| "learning_rate": 1.3205197822611876e-06, |
| "loss": 0.7538, |
| "step": 1152 |
| }, |
| { |
| "epoch": 0.41471090729250965, |
| "grad_norm": 10.398107676973735, |
| "learning_rate": 1.3194157637300413e-06, |
| "loss": 0.7999, |
| "step": 1153 |
| }, |
| { |
| "epoch": 0.4150705871774121, |
| "grad_norm": 14.88430692067026, |
| "learning_rate": 1.318311311472087e-06, |
| "loss": 0.7972, |
| "step": 1154 |
| }, |
| { |
| "epoch": 0.41543026706231456, |
| "grad_norm": 9.035578815704389, |
| "learning_rate": 1.3172064269870334e-06, |
| "loss": 0.7613, |
| "step": 1155 |
| }, |
| { |
| "epoch": 0.415789946947217, |
| "grad_norm": 9.698803746884536, |
| "learning_rate": 1.3161011117751754e-06, |
| "loss": 0.9181, |
| "step": 1156 |
| }, |
| { |
| "epoch": 0.4161496268321194, |
| "grad_norm": 8.563695782621274, |
| "learning_rate": 1.3149953673373943e-06, |
| "loss": 0.8396, |
| "step": 1157 |
| }, |
| { |
| "epoch": 0.41650930671702185, |
| "grad_norm": 12.592925677092781, |
| "learning_rate": 1.3138891951751526e-06, |
| "loss": 0.7965, |
| "step": 1158 |
| }, |
| { |
| "epoch": 0.4168689866019243, |
| "grad_norm": 29.84248534355901, |
| "learning_rate": 1.3127825967904943e-06, |
| "loss": 0.7692, |
| "step": 1159 |
| }, |
| { |
| "epoch": 0.4172286664868267, |
| "grad_norm": 16.973817623923992, |
| "learning_rate": 1.3116755736860421e-06, |
| "loss": 0.8561, |
| "step": 1160 |
| }, |
| { |
| "epoch": 0.41758834637172915, |
| "grad_norm": 30.96181954207072, |
| "learning_rate": 1.3105681273649957e-06, |
| "loss": 0.8723, |
| "step": 1161 |
| }, |
| { |
| "epoch": 0.4179480262566316, |
| "grad_norm": 8.705331458524364, |
| "learning_rate": 1.3094602593311292e-06, |
| "loss": 0.8276, |
| "step": 1162 |
| }, |
| { |
| "epoch": 0.418307706141534, |
| "grad_norm": 11.948946543596383, |
| "learning_rate": 1.3083519710887894e-06, |
| "loss": 0.7731, |
| "step": 1163 |
| }, |
| { |
| "epoch": 0.4186673860264365, |
| "grad_norm": 14.658515304037246, |
| "learning_rate": 1.307243264142893e-06, |
| "loss": 0.7962, |
| "step": 1164 |
| }, |
| { |
| "epoch": 0.4190270659113389, |
| "grad_norm": 9.97210830939083, |
| "learning_rate": 1.3061341399989266e-06, |
| "loss": 0.8541, |
| "step": 1165 |
| }, |
| { |
| "epoch": 0.41938674579624136, |
| "grad_norm": 11.08794321794277, |
| "learning_rate": 1.3050246001629423e-06, |
| "loss": 0.8469, |
| "step": 1166 |
| }, |
| { |
| "epoch": 0.4197464256811438, |
| "grad_norm": 31.824399831222916, |
| "learning_rate": 1.3039146461415573e-06, |
| "loss": 0.8517, |
| "step": 1167 |
| }, |
| { |
| "epoch": 0.4201061055660462, |
| "grad_norm": 15.681118811218097, |
| "learning_rate": 1.30280427944195e-06, |
| "loss": 0.8312, |
| "step": 1168 |
| }, |
| { |
| "epoch": 0.42046578545094865, |
| "grad_norm": 10.889552931272489, |
| "learning_rate": 1.3016935015718612e-06, |
| "loss": 0.8185, |
| "step": 1169 |
| }, |
| { |
| "epoch": 0.4208254653358511, |
| "grad_norm": 23.96497199753361, |
| "learning_rate": 1.3005823140395877e-06, |
| "loss": 0.8774, |
| "step": 1170 |
| }, |
| { |
| "epoch": 0.4211851452207535, |
| "grad_norm": 8.076426939251284, |
| "learning_rate": 1.2994707183539847e-06, |
| "loss": 0.8003, |
| "step": 1171 |
| }, |
| { |
| "epoch": 0.42154482510565594, |
| "grad_norm": 13.089536429771677, |
| "learning_rate": 1.29835871602446e-06, |
| "loss": 0.794, |
| "step": 1172 |
| }, |
| { |
| "epoch": 0.4219045049905584, |
| "grad_norm": 11.486572894743636, |
| "learning_rate": 1.2972463085609741e-06, |
| "loss": 0.7904, |
| "step": 1173 |
| }, |
| { |
| "epoch": 0.42226418487546086, |
| "grad_norm": 27.854878543369725, |
| "learning_rate": 1.2961334974740386e-06, |
| "loss": 0.875, |
| "step": 1174 |
| }, |
| { |
| "epoch": 0.4226238647603633, |
| "grad_norm": 14.369805685256482, |
| "learning_rate": 1.2950202842747114e-06, |
| "loss": 0.8853, |
| "step": 1175 |
| }, |
| { |
| "epoch": 0.4229835446452657, |
| "grad_norm": 82.9946121363335, |
| "learning_rate": 1.2939066704745977e-06, |
| "loss": 0.7587, |
| "step": 1176 |
| }, |
| { |
| "epoch": 0.42334322453016815, |
| "grad_norm": 25.80508621463069, |
| "learning_rate": 1.2927926575858462e-06, |
| "loss": 0.7773, |
| "step": 1177 |
| }, |
| { |
| "epoch": 0.4237029044150706, |
| "grad_norm": 15.569812419461757, |
| "learning_rate": 1.2916782471211476e-06, |
| "loss": 0.7987, |
| "step": 1178 |
| }, |
| { |
| "epoch": 0.424062584299973, |
| "grad_norm": 10.82278896220999, |
| "learning_rate": 1.2905634405937325e-06, |
| "loss": 0.8372, |
| "step": 1179 |
| }, |
| { |
| "epoch": 0.42442226418487544, |
| "grad_norm": 9.49168857907145, |
| "learning_rate": 1.2894482395173693e-06, |
| "loss": 0.7634, |
| "step": 1180 |
| }, |
| { |
| "epoch": 0.4247819440697779, |
| "grad_norm": 13.358456853547283, |
| "learning_rate": 1.2883326454063621e-06, |
| "loss": 0.8061, |
| "step": 1181 |
| }, |
| { |
| "epoch": 0.42514162395468036, |
| "grad_norm": 10.116587204702688, |
| "learning_rate": 1.2872166597755488e-06, |
| "loss": 0.7762, |
| "step": 1182 |
| }, |
| { |
| "epoch": 0.4255013038395828, |
| "grad_norm": 15.816590836728992, |
| "learning_rate": 1.2861002841402981e-06, |
| "loss": 0.8105, |
| "step": 1183 |
| }, |
| { |
| "epoch": 0.4258609837244852, |
| "grad_norm": 15.057231555218781, |
| "learning_rate": 1.2849835200165103e-06, |
| "loss": 0.8121, |
| "step": 1184 |
| }, |
| { |
| "epoch": 0.42622066360938765, |
| "grad_norm": 10.617264268821197, |
| "learning_rate": 1.2838663689206105e-06, |
| "loss": 0.7983, |
| "step": 1185 |
| }, |
| { |
| "epoch": 0.4265803434942901, |
| "grad_norm": 25.285208379786997, |
| "learning_rate": 1.2827488323695521e-06, |
| "loss": 0.8107, |
| "step": 1186 |
| }, |
| { |
| "epoch": 0.4269400233791925, |
| "grad_norm": 11.791095382152786, |
| "learning_rate": 1.2816309118808094e-06, |
| "loss": 0.7905, |
| "step": 1187 |
| }, |
| { |
| "epoch": 0.42729970326409494, |
| "grad_norm": 20.31416399255396, |
| "learning_rate": 1.2805126089723797e-06, |
| "loss": 0.8098, |
| "step": 1188 |
| }, |
| { |
| "epoch": 0.4276593831489974, |
| "grad_norm": 13.500661045780163, |
| "learning_rate": 1.2793939251627786e-06, |
| "loss": 0.8793, |
| "step": 1189 |
| }, |
| { |
| "epoch": 0.4280190630338998, |
| "grad_norm": 12.351809166340395, |
| "learning_rate": 1.27827486197104e-06, |
| "loss": 0.8212, |
| "step": 1190 |
| }, |
| { |
| "epoch": 0.4283787429188023, |
| "grad_norm": 9.00943104462971, |
| "learning_rate": 1.2771554209167115e-06, |
| "loss": 0.7854, |
| "step": 1191 |
| }, |
| { |
| "epoch": 0.4287384228037047, |
| "grad_norm": 12.66360659572935, |
| "learning_rate": 1.2760356035198553e-06, |
| "loss": 0.8775, |
| "step": 1192 |
| }, |
| { |
| "epoch": 0.42909810268860715, |
| "grad_norm": 7.212969656334713, |
| "learning_rate": 1.2749154113010429e-06, |
| "loss": 0.8348, |
| "step": 1193 |
| }, |
| { |
| "epoch": 0.4294577825735096, |
| "grad_norm": 17.116970732423972, |
| "learning_rate": 1.273794845781357e-06, |
| "loss": 0.8068, |
| "step": 1194 |
| }, |
| { |
| "epoch": 0.429817462458412, |
| "grad_norm": 19.640202487139703, |
| "learning_rate": 1.272673908482385e-06, |
| "loss": 0.8116, |
| "step": 1195 |
| }, |
| { |
| "epoch": 0.43017714234331444, |
| "grad_norm": 17.183855064124938, |
| "learning_rate": 1.2715526009262208e-06, |
| "loss": 0.8086, |
| "step": 1196 |
| }, |
| { |
| "epoch": 0.4305368222282169, |
| "grad_norm": 15.824711313247183, |
| "learning_rate": 1.2704309246354597e-06, |
| "loss": 0.7469, |
| "step": 1197 |
| }, |
| { |
| "epoch": 0.4308965021131193, |
| "grad_norm": 101.31063472576798, |
| "learning_rate": 1.2693088811331985e-06, |
| "loss": 0.7757, |
| "step": 1198 |
| }, |
| { |
| "epoch": 0.43125618199802174, |
| "grad_norm": 13.437342130247243, |
| "learning_rate": 1.2681864719430326e-06, |
| "loss": 0.8051, |
| "step": 1199 |
| }, |
| { |
| "epoch": 0.4316158618829242, |
| "grad_norm": 11.560463329221609, |
| "learning_rate": 1.267063698589054e-06, |
| "loss": 0.8188, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.43197554176782665, |
| "grad_norm": 15.9775143215431, |
| "learning_rate": 1.2659405625958485e-06, |
| "loss": 0.7999, |
| "step": 1201 |
| }, |
| { |
| "epoch": 0.4323352216527291, |
| "grad_norm": 11.061264786476832, |
| "learning_rate": 1.2648170654884952e-06, |
| "loss": 0.7652, |
| "step": 1202 |
| }, |
| { |
| "epoch": 0.4326949015376315, |
| "grad_norm": 9.301545465701375, |
| "learning_rate": 1.2636932087925636e-06, |
| "loss": 0.8292, |
| "step": 1203 |
| }, |
| { |
| "epoch": 0.43305458142253395, |
| "grad_norm": 29.793784920402466, |
| "learning_rate": 1.26256899403411e-06, |
| "loss": 0.8507, |
| "step": 1204 |
| }, |
| { |
| "epoch": 0.4334142613074364, |
| "grad_norm": 14.99437408721803, |
| "learning_rate": 1.261444422739679e-06, |
| "loss": 0.8628, |
| "step": 1205 |
| }, |
| { |
| "epoch": 0.4337739411923388, |
| "grad_norm": 8.171720154254778, |
| "learning_rate": 1.2603194964362978e-06, |
| "loss": 0.8719, |
| "step": 1206 |
| }, |
| { |
| "epoch": 0.43413362107724124, |
| "grad_norm": 15.461314713557933, |
| "learning_rate": 1.2591942166514763e-06, |
| "loss": 0.7838, |
| "step": 1207 |
| }, |
| { |
| "epoch": 0.43449330096214367, |
| "grad_norm": 10.039494905553514, |
| "learning_rate": 1.2580685849132038e-06, |
| "loss": 0.8216, |
| "step": 1208 |
| }, |
| { |
| "epoch": 0.43485298084704616, |
| "grad_norm": 27.279568189454807, |
| "learning_rate": 1.2569426027499483e-06, |
| "loss": 0.8054, |
| "step": 1209 |
| }, |
| { |
| "epoch": 0.4352126607319486, |
| "grad_norm": 17.918777685269298, |
| "learning_rate": 1.2558162716906535e-06, |
| "loss": 0.7491, |
| "step": 1210 |
| }, |
| { |
| "epoch": 0.435572340616851, |
| "grad_norm": 15.32934599202498, |
| "learning_rate": 1.2546895932647364e-06, |
| "loss": 0.8193, |
| "step": 1211 |
| }, |
| { |
| "epoch": 0.43593202050175345, |
| "grad_norm": 9.127247215540397, |
| "learning_rate": 1.2535625690020858e-06, |
| "loss": 0.8449, |
| "step": 1212 |
| }, |
| { |
| "epoch": 0.4362917003866559, |
| "grad_norm": 14.029464558108746, |
| "learning_rate": 1.2524352004330605e-06, |
| "loss": 0.8291, |
| "step": 1213 |
| }, |
| { |
| "epoch": 0.4366513802715583, |
| "grad_norm": 10.089731825442499, |
| "learning_rate": 1.2513074890884863e-06, |
| "loss": 0.8157, |
| "step": 1214 |
| }, |
| { |
| "epoch": 0.43701106015646074, |
| "grad_norm": 17.512841039384675, |
| "learning_rate": 1.2501794364996553e-06, |
| "loss": 0.7185, |
| "step": 1215 |
| }, |
| { |
| "epoch": 0.43737074004136317, |
| "grad_norm": 10.710023680718834, |
| "learning_rate": 1.249051044198321e-06, |
| "loss": 0.8398, |
| "step": 1216 |
| }, |
| { |
| "epoch": 0.4377304199262656, |
| "grad_norm": 15.138685298177357, |
| "learning_rate": 1.247922313716701e-06, |
| "loss": 0.7848, |
| "step": 1217 |
| }, |
| { |
| "epoch": 0.4380900998111681, |
| "grad_norm": 11.154967860168135, |
| "learning_rate": 1.2467932465874698e-06, |
| "loss": 0.7925, |
| "step": 1218 |
| }, |
| { |
| "epoch": 0.4384497796960705, |
| "grad_norm": 28.175644182641264, |
| "learning_rate": 1.2456638443437604e-06, |
| "loss": 0.8244, |
| "step": 1219 |
| }, |
| { |
| "epoch": 0.43880945958097295, |
| "grad_norm": 13.515139584672358, |
| "learning_rate": 1.2445341085191598e-06, |
| "loss": 0.8696, |
| "step": 1220 |
| }, |
| { |
| "epoch": 0.4391691394658754, |
| "grad_norm": 7.60111989861678, |
| "learning_rate": 1.243404040647709e-06, |
| "loss": 0.8147, |
| "step": 1221 |
| }, |
| { |
| "epoch": 0.4395288193507778, |
| "grad_norm": 11.874802985111746, |
| "learning_rate": 1.2422736422638989e-06, |
| "loss": 0.821, |
| "step": 1222 |
| }, |
| { |
| "epoch": 0.43988849923568024, |
| "grad_norm": 10.265771757817534, |
| "learning_rate": 1.24114291490267e-06, |
| "loss": 0.7894, |
| "step": 1223 |
| }, |
| { |
| "epoch": 0.44024817912058267, |
| "grad_norm": 9.305337328289518, |
| "learning_rate": 1.2400118600994089e-06, |
| "loss": 0.8681, |
| "step": 1224 |
| }, |
| { |
| "epoch": 0.4406078590054851, |
| "grad_norm": 15.078958442910196, |
| "learning_rate": 1.2388804793899472e-06, |
| "loss": 0.8619, |
| "step": 1225 |
| }, |
| { |
| "epoch": 0.44096753889038753, |
| "grad_norm": 10.629870712533126, |
| "learning_rate": 1.2377487743105593e-06, |
| "loss": 0.8269, |
| "step": 1226 |
| }, |
| { |
| "epoch": 0.44132721877528996, |
| "grad_norm": 13.607965360091349, |
| "learning_rate": 1.2366167463979589e-06, |
| "loss": 0.8167, |
| "step": 1227 |
| }, |
| { |
| "epoch": 0.44168689866019245, |
| "grad_norm": 62.836568572867826, |
| "learning_rate": 1.2354843971892997e-06, |
| "loss": 0.8038, |
| "step": 1228 |
| }, |
| { |
| "epoch": 0.4420465785450949, |
| "grad_norm": 24.402575205654706, |
| "learning_rate": 1.2343517282221702e-06, |
| "loss": 0.7943, |
| "step": 1229 |
| }, |
| { |
| "epoch": 0.4424062584299973, |
| "grad_norm": 39.445338745066515, |
| "learning_rate": 1.233218741034594e-06, |
| "loss": 0.7956, |
| "step": 1230 |
| }, |
| { |
| "epoch": 0.44276593831489974, |
| "grad_norm": 12.306449046969655, |
| "learning_rate": 1.2320854371650266e-06, |
| "loss": 0.7592, |
| "step": 1231 |
| }, |
| { |
| "epoch": 0.4431256181998022, |
| "grad_norm": 9.349786884584788, |
| "learning_rate": 1.2309518181523536e-06, |
| "loss": 0.836, |
| "step": 1232 |
| }, |
| { |
| "epoch": 0.4434852980847046, |
| "grad_norm": 14.046011861194604, |
| "learning_rate": 1.2298178855358873e-06, |
| "loss": 0.8548, |
| "step": 1233 |
| }, |
| { |
| "epoch": 0.44384497796960704, |
| "grad_norm": 11.757374318577009, |
| "learning_rate": 1.2286836408553685e-06, |
| "loss": 0.8351, |
| "step": 1234 |
| }, |
| { |
| "epoch": 0.44420465785450947, |
| "grad_norm": 8.04805363719931, |
| "learning_rate": 1.227549085650959e-06, |
| "loss": 0.8048, |
| "step": 1235 |
| }, |
| { |
| "epoch": 0.4445643377394119, |
| "grad_norm": 12.275919702845814, |
| "learning_rate": 1.226414221463244e-06, |
| "loss": 0.7916, |
| "step": 1236 |
| }, |
| { |
| "epoch": 0.4449240176243144, |
| "grad_norm": 12.570661828913073, |
| "learning_rate": 1.2252790498332272e-06, |
| "loss": 0.849, |
| "step": 1237 |
| }, |
| { |
| "epoch": 0.4452836975092168, |
| "grad_norm": 10.488860725402276, |
| "learning_rate": 1.2241435723023308e-06, |
| "loss": 0.7694, |
| "step": 1238 |
| }, |
| { |
| "epoch": 0.44564337739411924, |
| "grad_norm": 7.622957083352856, |
| "learning_rate": 1.2230077904123912e-06, |
| "loss": 0.8339, |
| "step": 1239 |
| }, |
| { |
| "epoch": 0.4460030572790217, |
| "grad_norm": 39.197658683374236, |
| "learning_rate": 1.2218717057056592e-06, |
| "loss": 0.8131, |
| "step": 1240 |
| }, |
| { |
| "epoch": 0.4463627371639241, |
| "grad_norm": 10.548348927119244, |
| "learning_rate": 1.2207353197247956e-06, |
| "loss": 0.7641, |
| "step": 1241 |
| }, |
| { |
| "epoch": 0.44672241704882654, |
| "grad_norm": 7.259534641391028, |
| "learning_rate": 1.2195986340128718e-06, |
| "loss": 0.786, |
| "step": 1242 |
| }, |
| { |
| "epoch": 0.44708209693372897, |
| "grad_norm": 24.7303333263242, |
| "learning_rate": 1.2184616501133646e-06, |
| "loss": 0.8738, |
| "step": 1243 |
| }, |
| { |
| "epoch": 0.4474417768186314, |
| "grad_norm": 13.900637558705428, |
| "learning_rate": 1.2173243695701573e-06, |
| "loss": 0.8664, |
| "step": 1244 |
| }, |
| { |
| "epoch": 0.44780145670353383, |
| "grad_norm": 11.298140909025424, |
| "learning_rate": 1.2161867939275343e-06, |
| "loss": 0.8792, |
| "step": 1245 |
| }, |
| { |
| "epoch": 0.4481611365884363, |
| "grad_norm": 14.50133406883414, |
| "learning_rate": 1.2150489247301825e-06, |
| "loss": 0.8256, |
| "step": 1246 |
| }, |
| { |
| "epoch": 0.44852081647333875, |
| "grad_norm": 31.132010892010488, |
| "learning_rate": 1.2139107635231855e-06, |
| "loss": 0.8271, |
| "step": 1247 |
| }, |
| { |
| "epoch": 0.4488804963582412, |
| "grad_norm": 12.569487198202452, |
| "learning_rate": 1.2127723118520252e-06, |
| "loss": 0.8615, |
| "step": 1248 |
| }, |
| { |
| "epoch": 0.4492401762431436, |
| "grad_norm": 10.949709264797026, |
| "learning_rate": 1.2116335712625765e-06, |
| "loss": 0.8454, |
| "step": 1249 |
| }, |
| { |
| "epoch": 0.44959985612804604, |
| "grad_norm": 10.332432325251993, |
| "learning_rate": 1.2104945433011078e-06, |
| "loss": 0.797, |
| "step": 1250 |
| }, |
| { |
| "epoch": 0.44995953601294847, |
| "grad_norm": 10.14572559356846, |
| "learning_rate": 1.2093552295142768e-06, |
| "loss": 0.907, |
| "step": 1251 |
| }, |
| { |
| "epoch": 0.4503192158978509, |
| "grad_norm": 18.970962703868434, |
| "learning_rate": 1.2082156314491297e-06, |
| "loss": 0.7562, |
| "step": 1252 |
| }, |
| { |
| "epoch": 0.45067889578275333, |
| "grad_norm": 11.616910049291329, |
| "learning_rate": 1.2070757506530988e-06, |
| "loss": 0.8113, |
| "step": 1253 |
| }, |
| { |
| "epoch": 0.45103857566765576, |
| "grad_norm": 10.081351483948243, |
| "learning_rate": 1.205935588674e-06, |
| "loss": 0.7622, |
| "step": 1254 |
| }, |
| { |
| "epoch": 0.45139825555255825, |
| "grad_norm": 20.255342800605845, |
| "learning_rate": 1.2047951470600317e-06, |
| "loss": 0.8975, |
| "step": 1255 |
| }, |
| { |
| "epoch": 0.4517579354374607, |
| "grad_norm": 13.878665803083992, |
| "learning_rate": 1.2036544273597707e-06, |
| "loss": 0.8028, |
| "step": 1256 |
| }, |
| { |
| "epoch": 0.4521176153223631, |
| "grad_norm": 16.323478583232987, |
| "learning_rate": 1.202513431122173e-06, |
| "loss": 0.9193, |
| "step": 1257 |
| }, |
| { |
| "epoch": 0.45247729520726554, |
| "grad_norm": 12.595777252246911, |
| "learning_rate": 1.2013721598965687e-06, |
| "loss": 0.9226, |
| "step": 1258 |
| }, |
| { |
| "epoch": 0.45283697509216797, |
| "grad_norm": 29.133036085665644, |
| "learning_rate": 1.2002306152326625e-06, |
| "loss": 0.7382, |
| "step": 1259 |
| }, |
| { |
| "epoch": 0.4531966549770704, |
| "grad_norm": 13.19818067106045, |
| "learning_rate": 1.1990887986805295e-06, |
| "loss": 0.7563, |
| "step": 1260 |
| }, |
| { |
| "epoch": 0.45355633486197283, |
| "grad_norm": 59.41462666030352, |
| "learning_rate": 1.1979467117906141e-06, |
| "loss": 0.8029, |
| "step": 1261 |
| }, |
| { |
| "epoch": 0.45391601474687526, |
| "grad_norm": 9.281685333565427, |
| "learning_rate": 1.1968043561137284e-06, |
| "loss": 0.8307, |
| "step": 1262 |
| }, |
| { |
| "epoch": 0.4542756946317777, |
| "grad_norm": 10.829792460540387, |
| "learning_rate": 1.1956617332010486e-06, |
| "loss": 0.7897, |
| "step": 1263 |
| }, |
| { |
| "epoch": 0.4546353745166802, |
| "grad_norm": 9.87199135934417, |
| "learning_rate": 1.194518844604115e-06, |
| "loss": 0.8053, |
| "step": 1264 |
| }, |
| { |
| "epoch": 0.4549950544015826, |
| "grad_norm": 18.600256673133423, |
| "learning_rate": 1.193375691874827e-06, |
| "loss": 0.8669, |
| "step": 1265 |
| }, |
| { |
| "epoch": 0.45535473428648504, |
| "grad_norm": 15.82984920467322, |
| "learning_rate": 1.1922322765654444e-06, |
| "loss": 0.7869, |
| "step": 1266 |
| }, |
| { |
| "epoch": 0.45571441417138747, |
| "grad_norm": 29.131611437423444, |
| "learning_rate": 1.191088600228582e-06, |
| "loss": 0.7925, |
| "step": 1267 |
| }, |
| { |
| "epoch": 0.4560740940562899, |
| "grad_norm": 42.60802444905089, |
| "learning_rate": 1.1899446644172104e-06, |
| "loss": 0.7573, |
| "step": 1268 |
| }, |
| { |
| "epoch": 0.45643377394119233, |
| "grad_norm": 10.628041432812774, |
| "learning_rate": 1.1888004706846518e-06, |
| "loss": 0.799, |
| "step": 1269 |
| }, |
| { |
| "epoch": 0.45679345382609476, |
| "grad_norm": 18.94420908670456, |
| "learning_rate": 1.187656020584578e-06, |
| "loss": 0.7464, |
| "step": 1270 |
| }, |
| { |
| "epoch": 0.4571531337109972, |
| "grad_norm": 9.262181828568094, |
| "learning_rate": 1.1865113156710105e-06, |
| "loss": 0.7261, |
| "step": 1271 |
| }, |
| { |
| "epoch": 0.4575128135958996, |
| "grad_norm": 10.497722925087944, |
| "learning_rate": 1.1853663574983154e-06, |
| "loss": 0.8084, |
| "step": 1272 |
| }, |
| { |
| "epoch": 0.4578724934808021, |
| "grad_norm": 24.43866104784505, |
| "learning_rate": 1.1842211476212036e-06, |
| "loss": 0.8101, |
| "step": 1273 |
| }, |
| { |
| "epoch": 0.45823217336570454, |
| "grad_norm": 7.289370864106929, |
| "learning_rate": 1.183075687594727e-06, |
| "loss": 0.7586, |
| "step": 1274 |
| }, |
| { |
| "epoch": 0.458591853250607, |
| "grad_norm": 11.556599457183667, |
| "learning_rate": 1.181929978974278e-06, |
| "loss": 0.7747, |
| "step": 1275 |
| }, |
| { |
| "epoch": 0.4589515331355094, |
| "grad_norm": 12.053272756336359, |
| "learning_rate": 1.1807840233155862e-06, |
| "loss": 0.8078, |
| "step": 1276 |
| }, |
| { |
| "epoch": 0.45931121302041183, |
| "grad_norm": 10.666529440001169, |
| "learning_rate": 1.179637822174716e-06, |
| "loss": 0.901, |
| "step": 1277 |
| }, |
| { |
| "epoch": 0.45967089290531427, |
| "grad_norm": 13.548560706866235, |
| "learning_rate": 1.1784913771080663e-06, |
| "loss": 0.8418, |
| "step": 1278 |
| }, |
| { |
| "epoch": 0.4600305727902167, |
| "grad_norm": 48.48568465151353, |
| "learning_rate": 1.1773446896723666e-06, |
| "loss": 0.8083, |
| "step": 1279 |
| }, |
| { |
| "epoch": 0.4603902526751191, |
| "grad_norm": 11.366387829012211, |
| "learning_rate": 1.1761977614246757e-06, |
| "loss": 0.855, |
| "step": 1280 |
| }, |
| { |
| "epoch": 0.46074993256002156, |
| "grad_norm": 12.01127680637438, |
| "learning_rate": 1.1750505939223784e-06, |
| "loss": 0.8486, |
| "step": 1281 |
| }, |
| { |
| "epoch": 0.46110961244492404, |
| "grad_norm": 7.978281297381684, |
| "learning_rate": 1.1739031887231864e-06, |
| "loss": 0.8512, |
| "step": 1282 |
| }, |
| { |
| "epoch": 0.4614692923298265, |
| "grad_norm": 11.795596679560251, |
| "learning_rate": 1.1727555473851321e-06, |
| "loss": 0.8486, |
| "step": 1283 |
| }, |
| { |
| "epoch": 0.4618289722147289, |
| "grad_norm": 15.49662766160169, |
| "learning_rate": 1.17160767146657e-06, |
| "loss": 0.841, |
| "step": 1284 |
| }, |
| { |
| "epoch": 0.46218865209963134, |
| "grad_norm": 16.986608292313473, |
| "learning_rate": 1.170459562526172e-06, |
| "loss": 0.8212, |
| "step": 1285 |
| }, |
| { |
| "epoch": 0.46254833198453377, |
| "grad_norm": 9.93564733691553, |
| "learning_rate": 1.1693112221229276e-06, |
| "loss": 0.7663, |
| "step": 1286 |
| }, |
| { |
| "epoch": 0.4629080118694362, |
| "grad_norm": 54.42918082000089, |
| "learning_rate": 1.1681626518161396e-06, |
| "loss": 0.8096, |
| "step": 1287 |
| }, |
| { |
| "epoch": 0.46326769175433863, |
| "grad_norm": 7.922207343459904, |
| "learning_rate": 1.1670138531654236e-06, |
| "loss": 0.8284, |
| "step": 1288 |
| }, |
| { |
| "epoch": 0.46362737163924106, |
| "grad_norm": 13.072860874905023, |
| "learning_rate": 1.1658648277307047e-06, |
| "loss": 0.7808, |
| "step": 1289 |
| }, |
| { |
| "epoch": 0.4639870515241435, |
| "grad_norm": 16.501809646437952, |
| "learning_rate": 1.1647155770722169e-06, |
| "loss": 0.8181, |
| "step": 1290 |
| }, |
| { |
| "epoch": 0.464346731409046, |
| "grad_norm": 14.341375858719926, |
| "learning_rate": 1.1635661027504983e-06, |
| "loss": 0.8495, |
| "step": 1291 |
| }, |
| { |
| "epoch": 0.4647064112939484, |
| "grad_norm": 9.923435204329014, |
| "learning_rate": 1.162416406326393e-06, |
| "loss": 0.7881, |
| "step": 1292 |
| }, |
| { |
| "epoch": 0.46506609117885084, |
| "grad_norm": 11.687141456686968, |
| "learning_rate": 1.161266489361045e-06, |
| "loss": 0.8102, |
| "step": 1293 |
| }, |
| { |
| "epoch": 0.46542577106375327, |
| "grad_norm": 7.318134783993273, |
| "learning_rate": 1.1601163534158979e-06, |
| "loss": 0.7568, |
| "step": 1294 |
| }, |
| { |
| "epoch": 0.4657854509486557, |
| "grad_norm": 7.717278544222602, |
| "learning_rate": 1.1589660000526934e-06, |
| "loss": 0.8592, |
| "step": 1295 |
| }, |
| { |
| "epoch": 0.46614513083355813, |
| "grad_norm": 9.639364847193715, |
| "learning_rate": 1.1578154308334682e-06, |
| "loss": 0.8454, |
| "step": 1296 |
| }, |
| { |
| "epoch": 0.46650481071846056, |
| "grad_norm": 7.546318024475833, |
| "learning_rate": 1.1566646473205516e-06, |
| "loss": 0.8949, |
| "step": 1297 |
| }, |
| { |
| "epoch": 0.466864490603363, |
| "grad_norm": 20.974931918438077, |
| "learning_rate": 1.1555136510765644e-06, |
| "loss": 0.8186, |
| "step": 1298 |
| }, |
| { |
| "epoch": 0.4672241704882654, |
| "grad_norm": 16.749368758432176, |
| "learning_rate": 1.154362443664416e-06, |
| "loss": 0.8559, |
| "step": 1299 |
| }, |
| { |
| "epoch": 0.46758385037316785, |
| "grad_norm": 14.427679951488976, |
| "learning_rate": 1.1532110266473026e-06, |
| "loss": 0.7989, |
| "step": 1300 |
| }, |
| { |
| "epoch": 0.46794353025807034, |
| "grad_norm": 8.46357969121419, |
| "learning_rate": 1.1520594015887048e-06, |
| "loss": 0.7918, |
| "step": 1301 |
| }, |
| { |
| "epoch": 0.46830321014297277, |
| "grad_norm": 21.35180893075654, |
| "learning_rate": 1.1509075700523867e-06, |
| "loss": 0.837, |
| "step": 1302 |
| }, |
| { |
| "epoch": 0.4686628900278752, |
| "grad_norm": 13.21873278578323, |
| "learning_rate": 1.1497555336023913e-06, |
| "loss": 0.838, |
| "step": 1303 |
| }, |
| { |
| "epoch": 0.46902256991277763, |
| "grad_norm": 8.170795276298515, |
| "learning_rate": 1.1486032938030407e-06, |
| "loss": 0.8402, |
| "step": 1304 |
| }, |
| { |
| "epoch": 0.46938224979768006, |
| "grad_norm": 32.83502426851321, |
| "learning_rate": 1.1474508522189333e-06, |
| "loss": 0.761, |
| "step": 1305 |
| }, |
| { |
| "epoch": 0.4697419296825825, |
| "grad_norm": 15.1106562782342, |
| "learning_rate": 1.1462982104149407e-06, |
| "loss": 0.9193, |
| "step": 1306 |
| }, |
| { |
| "epoch": 0.4701016095674849, |
| "grad_norm": 38.89986928464333, |
| "learning_rate": 1.1451453699562074e-06, |
| "loss": 0.7628, |
| "step": 1307 |
| }, |
| { |
| "epoch": 0.47046128945238735, |
| "grad_norm": 11.047045316832111, |
| "learning_rate": 1.1439923324081463e-06, |
| "loss": 0.846, |
| "step": 1308 |
| }, |
| { |
| "epoch": 0.4708209693372898, |
| "grad_norm": 17.16310657966234, |
| "learning_rate": 1.14283909933644e-06, |
| "loss": 0.8376, |
| "step": 1309 |
| }, |
| { |
| "epoch": 0.47118064922219227, |
| "grad_norm": 11.43598914484691, |
| "learning_rate": 1.141685672307034e-06, |
| "loss": 0.814, |
| "step": 1310 |
| }, |
| { |
| "epoch": 0.4715403291070947, |
| "grad_norm": 28.209360740494237, |
| "learning_rate": 1.1405320528861391e-06, |
| "loss": 0.7522, |
| "step": 1311 |
| }, |
| { |
| "epoch": 0.47190000899199713, |
| "grad_norm": 8.049434855095333, |
| "learning_rate": 1.1393782426402266e-06, |
| "loss": 0.7578, |
| "step": 1312 |
| }, |
| { |
| "epoch": 0.47225968887689956, |
| "grad_norm": 9.649229554253916, |
| "learning_rate": 1.1382242431360273e-06, |
| "loss": 0.8434, |
| "step": 1313 |
| }, |
| { |
| "epoch": 0.472619368761802, |
| "grad_norm": 13.371114231283347, |
| "learning_rate": 1.1370700559405282e-06, |
| "loss": 0.8427, |
| "step": 1314 |
| }, |
| { |
| "epoch": 0.4729790486467044, |
| "grad_norm": 17.018058939594745, |
| "learning_rate": 1.1359156826209723e-06, |
| "loss": 0.8202, |
| "step": 1315 |
| }, |
| { |
| "epoch": 0.47333872853160686, |
| "grad_norm": 16.720685610019604, |
| "learning_rate": 1.1347611247448542e-06, |
| "loss": 0.8163, |
| "step": 1316 |
| }, |
| { |
| "epoch": 0.4736984084165093, |
| "grad_norm": 11.349509462719638, |
| "learning_rate": 1.1336063838799203e-06, |
| "loss": 0.8061, |
| "step": 1317 |
| }, |
| { |
| "epoch": 0.4740580883014117, |
| "grad_norm": 52.62375682555366, |
| "learning_rate": 1.1324514615941643e-06, |
| "loss": 0.8094, |
| "step": 1318 |
| }, |
| { |
| "epoch": 0.4744177681863142, |
| "grad_norm": 8.373983084050154, |
| "learning_rate": 1.1312963594558269e-06, |
| "loss": 0.8, |
| "step": 1319 |
| }, |
| { |
| "epoch": 0.47477744807121663, |
| "grad_norm": 21.3569404939675, |
| "learning_rate": 1.1301410790333928e-06, |
| "loss": 0.8031, |
| "step": 1320 |
| }, |
| { |
| "epoch": 0.47513712795611907, |
| "grad_norm": 7.938213921263037, |
| "learning_rate": 1.128985621895589e-06, |
| "loss": 0.8187, |
| "step": 1321 |
| }, |
| { |
| "epoch": 0.4754968078410215, |
| "grad_norm": 10.108542668174916, |
| "learning_rate": 1.127829989611382e-06, |
| "loss": 0.7671, |
| "step": 1322 |
| }, |
| { |
| "epoch": 0.4758564877259239, |
| "grad_norm": 26.649203021223208, |
| "learning_rate": 1.1266741837499772e-06, |
| "loss": 0.877, |
| "step": 1323 |
| }, |
| { |
| "epoch": 0.47621616761082636, |
| "grad_norm": 10.835403456104503, |
| "learning_rate": 1.1255182058808142e-06, |
| "loss": 0.8013, |
| "step": 1324 |
| }, |
| { |
| "epoch": 0.4765758474957288, |
| "grad_norm": 16.69247054672645, |
| "learning_rate": 1.124362057573567e-06, |
| "loss": 0.7688, |
| "step": 1325 |
| }, |
| { |
| "epoch": 0.4769355273806312, |
| "grad_norm": 14.321681582885438, |
| "learning_rate": 1.1232057403981414e-06, |
| "loss": 0.7841, |
| "step": 1326 |
| }, |
| { |
| "epoch": 0.47729520726553365, |
| "grad_norm": 9.321991160951313, |
| "learning_rate": 1.1220492559246716e-06, |
| "loss": 0.8265, |
| "step": 1327 |
| }, |
| { |
| "epoch": 0.47765488715043614, |
| "grad_norm": 21.250382371137924, |
| "learning_rate": 1.1208926057235196e-06, |
| "loss": 0.8528, |
| "step": 1328 |
| }, |
| { |
| "epoch": 0.47801456703533857, |
| "grad_norm": 10.41683671396487, |
| "learning_rate": 1.1197357913652723e-06, |
| "loss": 0.8448, |
| "step": 1329 |
| }, |
| { |
| "epoch": 0.478374246920241, |
| "grad_norm": 9.696248340882471, |
| "learning_rate": 1.1185788144207393e-06, |
| "loss": 0.8706, |
| "step": 1330 |
| }, |
| { |
| "epoch": 0.47873392680514343, |
| "grad_norm": 8.773555354216427, |
| "learning_rate": 1.1174216764609513e-06, |
| "loss": 0.8033, |
| "step": 1331 |
| }, |
| { |
| "epoch": 0.47909360669004586, |
| "grad_norm": 15.713430787734534, |
| "learning_rate": 1.1162643790571572e-06, |
| "loss": 0.8287, |
| "step": 1332 |
| }, |
| { |
| "epoch": 0.4794532865749483, |
| "grad_norm": 21.999837055541647, |
| "learning_rate": 1.115106923780823e-06, |
| "loss": 0.7078, |
| "step": 1333 |
| }, |
| { |
| "epoch": 0.4798129664598507, |
| "grad_norm": 29.56044026061468, |
| "learning_rate": 1.1139493122036288e-06, |
| "loss": 0.8464, |
| "step": 1334 |
| }, |
| { |
| "epoch": 0.48017264634475315, |
| "grad_norm": 15.644608725288286, |
| "learning_rate": 1.1127915458974664e-06, |
| "loss": 0.8285, |
| "step": 1335 |
| }, |
| { |
| "epoch": 0.4805323262296556, |
| "grad_norm": 17.631169257133948, |
| "learning_rate": 1.1116336264344388e-06, |
| "loss": 0.8656, |
| "step": 1336 |
| }, |
| { |
| "epoch": 0.48089200611455807, |
| "grad_norm": 16.153840889427563, |
| "learning_rate": 1.1104755553868556e-06, |
| "loss": 0.7716, |
| "step": 1337 |
| }, |
| { |
| "epoch": 0.4812516859994605, |
| "grad_norm": 12.574642214183019, |
| "learning_rate": 1.109317334327234e-06, |
| "loss": 0.7618, |
| "step": 1338 |
| }, |
| { |
| "epoch": 0.48161136588436293, |
| "grad_norm": 9.725647080860101, |
| "learning_rate": 1.1081589648282928e-06, |
| "loss": 0.8191, |
| "step": 1339 |
| }, |
| { |
| "epoch": 0.48197104576926536, |
| "grad_norm": 16.049163537310257, |
| "learning_rate": 1.1070004484629542e-06, |
| "loss": 0.7816, |
| "step": 1340 |
| }, |
| { |
| "epoch": 0.4823307256541678, |
| "grad_norm": 12.70889920815928, |
| "learning_rate": 1.1058417868043385e-06, |
| "loss": 0.7665, |
| "step": 1341 |
| }, |
| { |
| "epoch": 0.4826904055390702, |
| "grad_norm": 12.747243830870412, |
| "learning_rate": 1.1046829814257647e-06, |
| "loss": 0.7383, |
| "step": 1342 |
| }, |
| { |
| "epoch": 0.48305008542397265, |
| "grad_norm": 18.477751772361778, |
| "learning_rate": 1.1035240339007451e-06, |
| "loss": 0.8569, |
| "step": 1343 |
| }, |
| { |
| "epoch": 0.4834097653088751, |
| "grad_norm": 10.187595645045208, |
| "learning_rate": 1.102364945802987e-06, |
| "loss": 0.8328, |
| "step": 1344 |
| }, |
| { |
| "epoch": 0.4837694451937775, |
| "grad_norm": 17.366969367388446, |
| "learning_rate": 1.101205718706387e-06, |
| "loss": 0.7632, |
| "step": 1345 |
| }, |
| { |
| "epoch": 0.48412912507868, |
| "grad_norm": 20.525432589105105, |
| "learning_rate": 1.1000463541850312e-06, |
| "loss": 0.8232, |
| "step": 1346 |
| }, |
| { |
| "epoch": 0.48448880496358243, |
| "grad_norm": 9.98374707321502, |
| "learning_rate": 1.0988868538131921e-06, |
| "loss": 0.8608, |
| "step": 1347 |
| }, |
| { |
| "epoch": 0.48484848484848486, |
| "grad_norm": 16.896881644874085, |
| "learning_rate": 1.0977272191653271e-06, |
| "loss": 0.8108, |
| "step": 1348 |
| }, |
| { |
| "epoch": 0.4852081647333873, |
| "grad_norm": 23.16193027472863, |
| "learning_rate": 1.0965674518160747e-06, |
| "loss": 0.7395, |
| "step": 1349 |
| }, |
| { |
| "epoch": 0.4855678446182897, |
| "grad_norm": 13.68948597753657, |
| "learning_rate": 1.0954075533402556e-06, |
| "loss": 0.7869, |
| "step": 1350 |
| }, |
| { |
| "epoch": 0.48592752450319215, |
| "grad_norm": 26.143039351841896, |
| "learning_rate": 1.0942475253128664e-06, |
| "loss": 0.7691, |
| "step": 1351 |
| }, |
| { |
| "epoch": 0.4862872043880946, |
| "grad_norm": 19.19169239426908, |
| "learning_rate": 1.0930873693090815e-06, |
| "loss": 0.7822, |
| "step": 1352 |
| }, |
| { |
| "epoch": 0.486646884272997, |
| "grad_norm": 40.92276343320756, |
| "learning_rate": 1.0919270869042474e-06, |
| "loss": 0.7735, |
| "step": 1353 |
| }, |
| { |
| "epoch": 0.48700656415789945, |
| "grad_norm": 11.678223770091648, |
| "learning_rate": 1.0907666796738837e-06, |
| "loss": 0.789, |
| "step": 1354 |
| }, |
| { |
| "epoch": 0.48736624404280193, |
| "grad_norm": 10.509971370373918, |
| "learning_rate": 1.0896061491936782e-06, |
| "loss": 0.8999, |
| "step": 1355 |
| }, |
| { |
| "epoch": 0.48772592392770436, |
| "grad_norm": 10.172828871993636, |
| "learning_rate": 1.088445497039487e-06, |
| "loss": 0.7966, |
| "step": 1356 |
| }, |
| { |
| "epoch": 0.4880856038126068, |
| "grad_norm": 11.52124986705594, |
| "learning_rate": 1.0872847247873313e-06, |
| "loss": 0.7998, |
| "step": 1357 |
| }, |
| { |
| "epoch": 0.4884452836975092, |
| "grad_norm": 32.04553875479848, |
| "learning_rate": 1.086123834013395e-06, |
| "loss": 0.8151, |
| "step": 1358 |
| }, |
| { |
| "epoch": 0.48880496358241166, |
| "grad_norm": 10.22603073573459, |
| "learning_rate": 1.084962826294023e-06, |
| "loss": 0.72, |
| "step": 1359 |
| }, |
| { |
| "epoch": 0.4891646434673141, |
| "grad_norm": 9.240702419200277, |
| "learning_rate": 1.0838017032057191e-06, |
| "loss": 0.8467, |
| "step": 1360 |
| }, |
| { |
| "epoch": 0.4895243233522165, |
| "grad_norm": 8.945883592984766, |
| "learning_rate": 1.0826404663251445e-06, |
| "loss": 0.7825, |
| "step": 1361 |
| }, |
| { |
| "epoch": 0.48988400323711895, |
| "grad_norm": 10.060625848398129, |
| "learning_rate": 1.0814791172291132e-06, |
| "loss": 0.8255, |
| "step": 1362 |
| }, |
| { |
| "epoch": 0.4902436831220214, |
| "grad_norm": 19.05127417333616, |
| "learning_rate": 1.0803176574945932e-06, |
| "loss": 0.7784, |
| "step": 1363 |
| }, |
| { |
| "epoch": 0.49060336300692386, |
| "grad_norm": 11.93794686525435, |
| "learning_rate": 1.0791560886987015e-06, |
| "loss": 0.8153, |
| "step": 1364 |
| }, |
| { |
| "epoch": 0.4909630428918263, |
| "grad_norm": 13.854128210349783, |
| "learning_rate": 1.0779944124187046e-06, |
| "loss": 0.8223, |
| "step": 1365 |
| }, |
| { |
| "epoch": 0.4913227227767287, |
| "grad_norm": 20.4817390782388, |
| "learning_rate": 1.0768326302320133e-06, |
| "loss": 0.7273, |
| "step": 1366 |
| }, |
| { |
| "epoch": 0.49168240266163116, |
| "grad_norm": 13.400956287747531, |
| "learning_rate": 1.0756707437161841e-06, |
| "loss": 0.773, |
| "step": 1367 |
| }, |
| { |
| "epoch": 0.4920420825465336, |
| "grad_norm": 10.538271198806664, |
| "learning_rate": 1.074508754448913e-06, |
| "loss": 0.835, |
| "step": 1368 |
| }, |
| { |
| "epoch": 0.492401762431436, |
| "grad_norm": 12.436747695465828, |
| "learning_rate": 1.0733466640080373e-06, |
| "loss": 0.8027, |
| "step": 1369 |
| }, |
| { |
| "epoch": 0.49276144231633845, |
| "grad_norm": 11.051007256517842, |
| "learning_rate": 1.0721844739715309e-06, |
| "loss": 0.7913, |
| "step": 1370 |
| }, |
| { |
| "epoch": 0.4931211222012409, |
| "grad_norm": 8.332551161729398, |
| "learning_rate": 1.071022185917503e-06, |
| "loss": 0.7725, |
| "step": 1371 |
| }, |
| { |
| "epoch": 0.4934808020861433, |
| "grad_norm": 9.823582866338528, |
| "learning_rate": 1.0698598014241959e-06, |
| "loss": 0.7831, |
| "step": 1372 |
| }, |
| { |
| "epoch": 0.49384048197104574, |
| "grad_norm": 18.00426132442602, |
| "learning_rate": 1.0686973220699834e-06, |
| "loss": 0.8159, |
| "step": 1373 |
| }, |
| { |
| "epoch": 0.49420016185594823, |
| "grad_norm": 7.140595216851977, |
| "learning_rate": 1.0675347494333667e-06, |
| "loss": 0.7904, |
| "step": 1374 |
| }, |
| { |
| "epoch": 0.49455984174085066, |
| "grad_norm": 21.6065240185961, |
| "learning_rate": 1.0663720850929751e-06, |
| "loss": 0.8269, |
| "step": 1375 |
| }, |
| { |
| "epoch": 0.4949195216257531, |
| "grad_norm": 44.46686647900651, |
| "learning_rate": 1.065209330627562e-06, |
| "loss": 0.8579, |
| "step": 1376 |
| }, |
| { |
| "epoch": 0.4952792015106555, |
| "grad_norm": 8.205664225193278, |
| "learning_rate": 1.064046487616003e-06, |
| "loss": 0.7465, |
| "step": 1377 |
| }, |
| { |
| "epoch": 0.49563888139555795, |
| "grad_norm": 7.058285244781892, |
| "learning_rate": 1.062883557637294e-06, |
| "loss": 0.8295, |
| "step": 1378 |
| }, |
| { |
| "epoch": 0.4959985612804604, |
| "grad_norm": 11.023738749472114, |
| "learning_rate": 1.0617205422705492e-06, |
| "loss": 0.8333, |
| "step": 1379 |
| }, |
| { |
| "epoch": 0.4963582411653628, |
| "grad_norm": 19.382091374578597, |
| "learning_rate": 1.060557443094998e-06, |
| "loss": 0.7928, |
| "step": 1380 |
| }, |
| { |
| "epoch": 0.49671792105026524, |
| "grad_norm": 21.399481545241475, |
| "learning_rate": 1.059394261689985e-06, |
| "loss": 0.7789, |
| "step": 1381 |
| }, |
| { |
| "epoch": 0.4970776009351677, |
| "grad_norm": 34.560384015302354, |
| "learning_rate": 1.0582309996349647e-06, |
| "loss": 0.8029, |
| "step": 1382 |
| }, |
| { |
| "epoch": 0.49743728082007016, |
| "grad_norm": 13.318672546302027, |
| "learning_rate": 1.0570676585095026e-06, |
| "loss": 0.8695, |
| "step": 1383 |
| }, |
| { |
| "epoch": 0.4977969607049726, |
| "grad_norm": 10.822353934734869, |
| "learning_rate": 1.0559042398932711e-06, |
| "loss": 0.719, |
| "step": 1384 |
| }, |
| { |
| "epoch": 0.498156640589875, |
| "grad_norm": 9.328541550684049, |
| "learning_rate": 1.0547407453660471e-06, |
| "loss": 0.8812, |
| "step": 1385 |
| }, |
| { |
| "epoch": 0.49851632047477745, |
| "grad_norm": 10.228891897402235, |
| "learning_rate": 1.053577176507712e-06, |
| "loss": 0.774, |
| "step": 1386 |
| }, |
| { |
| "epoch": 0.4988760003596799, |
| "grad_norm": 16.094006547659376, |
| "learning_rate": 1.0524135348982465e-06, |
| "loss": 0.7738, |
| "step": 1387 |
| }, |
| { |
| "epoch": 0.4992356802445823, |
| "grad_norm": 11.13873927702271, |
| "learning_rate": 1.0512498221177317e-06, |
| "loss": 0.7719, |
| "step": 1388 |
| }, |
| { |
| "epoch": 0.49959536012948474, |
| "grad_norm": 13.20489245236757, |
| "learning_rate": 1.0500860397463438e-06, |
| "loss": 0.7941, |
| "step": 1389 |
| }, |
| { |
| "epoch": 0.4999550400143872, |
| "grad_norm": 46.87310262944863, |
| "learning_rate": 1.0489221893643552e-06, |
| "loss": 0.8437, |
| "step": 1390 |
| }, |
| { |
| "epoch": 0.5003147198992897, |
| "grad_norm": 12.47578093303396, |
| "learning_rate": 1.0477582725521285e-06, |
| "loss": 0.8346, |
| "step": 1391 |
| }, |
| { |
| "epoch": 0.500674399784192, |
| "grad_norm": 11.642973996885482, |
| "learning_rate": 1.0465942908901189e-06, |
| "loss": 0.8496, |
| "step": 1392 |
| }, |
| { |
| "epoch": 0.5010340796690945, |
| "grad_norm": 14.487060735985708, |
| "learning_rate": 1.0454302459588674e-06, |
| "loss": 0.7931, |
| "step": 1393 |
| }, |
| { |
| "epoch": 0.5013937595539969, |
| "grad_norm": 21.194495689068134, |
| "learning_rate": 1.044266139339003e-06, |
| "loss": 0.7935, |
| "step": 1394 |
| }, |
| { |
| "epoch": 0.5017534394388994, |
| "grad_norm": 19.22494025894822, |
| "learning_rate": 1.0431019726112365e-06, |
| "loss": 0.795, |
| "step": 1395 |
| }, |
| { |
| "epoch": 0.5021131193238019, |
| "grad_norm": 12.664414830532246, |
| "learning_rate": 1.041937747356362e-06, |
| "loss": 0.8353, |
| "step": 1396 |
| }, |
| { |
| "epoch": 0.5024727992087042, |
| "grad_norm": 78.46427391405251, |
| "learning_rate": 1.040773465155252e-06, |
| "loss": 0.7851, |
| "step": 1397 |
| }, |
| { |
| "epoch": 0.5028324790936067, |
| "grad_norm": 8.50956585993304, |
| "learning_rate": 1.0396091275888566e-06, |
| "loss": 0.8766, |
| "step": 1398 |
| }, |
| { |
| "epoch": 0.5031921589785091, |
| "grad_norm": 22.599333988844542, |
| "learning_rate": 1.038444736238201e-06, |
| "loss": 0.8871, |
| "step": 1399 |
| }, |
| { |
| "epoch": 0.5035518388634116, |
| "grad_norm": 20.845736915187427, |
| "learning_rate": 1.0372802926843843e-06, |
| "loss": 0.8241, |
| "step": 1400 |
| }, |
| { |
| "epoch": 0.503911518748314, |
| "grad_norm": 9.166515327483395, |
| "learning_rate": 1.036115798508575e-06, |
| "loss": 0.813, |
| "step": 1401 |
| }, |
| { |
| "epoch": 0.5042711986332165, |
| "grad_norm": 11.81132719359345, |
| "learning_rate": 1.0349512552920112e-06, |
| "loss": 0.8076, |
| "step": 1402 |
| }, |
| { |
| "epoch": 0.5046308785181188, |
| "grad_norm": 8.350875162883664, |
| "learning_rate": 1.0337866646159978e-06, |
| "loss": 0.8202, |
| "step": 1403 |
| }, |
| { |
| "epoch": 0.5049905584030213, |
| "grad_norm": 17.291666992097348, |
| "learning_rate": 1.0326220280619036e-06, |
| "loss": 0.8115, |
| "step": 1404 |
| }, |
| { |
| "epoch": 0.5053502382879238, |
| "grad_norm": 12.20286423286577, |
| "learning_rate": 1.03145734721116e-06, |
| "loss": 0.8031, |
| "step": 1405 |
| }, |
| { |
| "epoch": 0.5057099181728262, |
| "grad_norm": 10.0654211351684, |
| "learning_rate": 1.0302926236452586e-06, |
| "loss": 0.7799, |
| "step": 1406 |
| }, |
| { |
| "epoch": 0.5060695980577287, |
| "grad_norm": 11.282504147389032, |
| "learning_rate": 1.0291278589457486e-06, |
| "loss": 0.731, |
| "step": 1407 |
| }, |
| { |
| "epoch": 0.506429277942631, |
| "grad_norm": 12.519439675020324, |
| "learning_rate": 1.0279630546942354e-06, |
| "loss": 0.8127, |
| "step": 1408 |
| }, |
| { |
| "epoch": 0.5067889578275335, |
| "grad_norm": 10.836142356182036, |
| "learning_rate": 1.0267982124723781e-06, |
| "loss": 0.8929, |
| "step": 1409 |
| }, |
| { |
| "epoch": 0.5071486377124359, |
| "grad_norm": 13.070906754805897, |
| "learning_rate": 1.0256333338618874e-06, |
| "loss": 0.8247, |
| "step": 1410 |
| }, |
| { |
| "epoch": 0.5075083175973384, |
| "grad_norm": 19.769708659412878, |
| "learning_rate": 1.0244684204445236e-06, |
| "loss": 0.8108, |
| "step": 1411 |
| }, |
| { |
| "epoch": 0.5078679974822408, |
| "grad_norm": 9.992181293854209, |
| "learning_rate": 1.0233034738020932e-06, |
| "loss": 0.8022, |
| "step": 1412 |
| }, |
| { |
| "epoch": 0.5082276773671432, |
| "grad_norm": 19.53932795176293, |
| "learning_rate": 1.022138495516449e-06, |
| "loss": 0.8203, |
| "step": 1413 |
| }, |
| { |
| "epoch": 0.5085873572520456, |
| "grad_norm": 20.04206472752638, |
| "learning_rate": 1.0209734871694863e-06, |
| "loss": 0.8613, |
| "step": 1414 |
| }, |
| { |
| "epoch": 0.5089470371369481, |
| "grad_norm": 12.528200450149379, |
| "learning_rate": 1.0198084503431414e-06, |
| "loss": 0.8216, |
| "step": 1415 |
| }, |
| { |
| "epoch": 0.5093067170218506, |
| "grad_norm": 8.533127280227811, |
| "learning_rate": 1.0186433866193892e-06, |
| "loss": 0.9119, |
| "step": 1416 |
| }, |
| { |
| "epoch": 0.509666396906753, |
| "grad_norm": 11.118502235354354, |
| "learning_rate": 1.0174782975802408e-06, |
| "loss": 0.8355, |
| "step": 1417 |
| }, |
| { |
| "epoch": 0.5100260767916555, |
| "grad_norm": 59.59495590275687, |
| "learning_rate": 1.016313184807742e-06, |
| "loss": 0.8441, |
| "step": 1418 |
| }, |
| { |
| "epoch": 0.5103857566765578, |
| "grad_norm": 10.357110554360238, |
| "learning_rate": 1.0151480498839712e-06, |
| "loss": 0.8593, |
| "step": 1419 |
| }, |
| { |
| "epoch": 0.5107454365614603, |
| "grad_norm": 18.11079726858921, |
| "learning_rate": 1.0139828943910357e-06, |
| "loss": 0.8574, |
| "step": 1420 |
| }, |
| { |
| "epoch": 0.5111051164463627, |
| "grad_norm": 8.670492965431258, |
| "learning_rate": 1.012817719911072e-06, |
| "loss": 0.7924, |
| "step": 1421 |
| }, |
| { |
| "epoch": 0.5114647963312652, |
| "grad_norm": 15.467203175658279, |
| "learning_rate": 1.0116525280262419e-06, |
| "loss": 0.7738, |
| "step": 1422 |
| }, |
| { |
| "epoch": 0.5118244762161676, |
| "grad_norm": 7.324319828884616, |
| "learning_rate": 1.0104873203187305e-06, |
| "loss": 0.8766, |
| "step": 1423 |
| }, |
| { |
| "epoch": 0.51218415610107, |
| "grad_norm": 9.15166688944549, |
| "learning_rate": 1.0093220983707448e-06, |
| "loss": 0.7628, |
| "step": 1424 |
| }, |
| { |
| "epoch": 0.5125438359859725, |
| "grad_norm": 10.055659156945493, |
| "learning_rate": 1.008156863764511e-06, |
| "loss": 0.819, |
| "step": 1425 |
| }, |
| { |
| "epoch": 0.5129035158708749, |
| "grad_norm": 9.109683293235477, |
| "learning_rate": 1.0069916180822727e-06, |
| "loss": 0.7693, |
| "step": 1426 |
| }, |
| { |
| "epoch": 0.5132631957557774, |
| "grad_norm": 433.16603237625316, |
| "learning_rate": 1.0058263629062883e-06, |
| "loss": 0.8065, |
| "step": 1427 |
| }, |
| { |
| "epoch": 0.5136228756406798, |
| "grad_norm": 79.67214769742216, |
| "learning_rate": 1.0046610998188288e-06, |
| "loss": 0.7972, |
| "step": 1428 |
| }, |
| { |
| "epoch": 0.5139825555255823, |
| "grad_norm": 11.928414979810299, |
| "learning_rate": 1.0034958304021767e-06, |
| "loss": 0.8611, |
| "step": 1429 |
| }, |
| { |
| "epoch": 0.5143422354104846, |
| "grad_norm": 13.517459801013043, |
| "learning_rate": 1.0023305562386221e-06, |
| "loss": 0.78, |
| "step": 1430 |
| }, |
| { |
| "epoch": 0.5147019152953871, |
| "grad_norm": 11.23971683250357, |
| "learning_rate": 1.0011652789104629e-06, |
| "loss": 0.7937, |
| "step": 1431 |
| }, |
| { |
| "epoch": 0.5150615951802895, |
| "grad_norm": 106.71880860032363, |
| "learning_rate": 1e-06, |
| "loss": 0.8431, |
| "step": 1432 |
| }, |
| { |
| "epoch": 0.515421275065192, |
| "grad_norm": 9.547930827763363, |
| "learning_rate": 9.98834721089537e-07, |
| "loss": 0.8941, |
| "step": 1433 |
| }, |
| { |
| "epoch": 0.5157809549500945, |
| "grad_norm": 7.9713663876106935, |
| "learning_rate": 9.976694437613776e-07, |
| "loss": 0.7785, |
| "step": 1434 |
| }, |
| { |
| "epoch": 0.5161406348349968, |
| "grad_norm": 17.969196382796778, |
| "learning_rate": 9.965041695978237e-07, |
| "loss": 0.7539, |
| "step": 1435 |
| }, |
| { |
| "epoch": 0.5165003147198993, |
| "grad_norm": 14.849388441928985, |
| "learning_rate": 9.953389001811713e-07, |
| "loss": 0.822, |
| "step": 1436 |
| }, |
| { |
| "epoch": 0.5168599946048017, |
| "grad_norm": 11.618250156707834, |
| "learning_rate": 9.941736370937118e-07, |
| "loss": 0.8114, |
| "step": 1437 |
| }, |
| { |
| "epoch": 0.5172196744897042, |
| "grad_norm": 30.362467421486922, |
| "learning_rate": 9.930083819177272e-07, |
| "loss": 0.8269, |
| "step": 1438 |
| }, |
| { |
| "epoch": 0.5175793543746066, |
| "grad_norm": 10.711150851738822, |
| "learning_rate": 9.91843136235489e-07, |
| "loss": 0.8406, |
| "step": 1439 |
| }, |
| { |
| "epoch": 0.517939034259509, |
| "grad_norm": 21.966707029951465, |
| "learning_rate": 9.906779016292554e-07, |
| "loss": 0.8269, |
| "step": 1440 |
| }, |
| { |
| "epoch": 0.5182987141444114, |
| "grad_norm": 6.889870704423818, |
| "learning_rate": 9.895126796812696e-07, |
| "loss": 0.8528, |
| "step": 1441 |
| }, |
| { |
| "epoch": 0.5186583940293139, |
| "grad_norm": 10.036096239883863, |
| "learning_rate": 9.88347471973758e-07, |
| "loss": 0.8566, |
| "step": 1442 |
| }, |
| { |
| "epoch": 0.5190180739142164, |
| "grad_norm": 12.626659221449701, |
| "learning_rate": 9.871822800889282e-07, |
| "loss": 0.8295, |
| "step": 1443 |
| }, |
| { |
| "epoch": 0.5193777537991188, |
| "grad_norm": 13.160401226184998, |
| "learning_rate": 9.860171056089645e-07, |
| "loss": 0.8984, |
| "step": 1444 |
| }, |
| { |
| "epoch": 0.5197374336840213, |
| "grad_norm": 81.19250120234352, |
| "learning_rate": 9.84851950116029e-07, |
| "loss": 0.8519, |
| "step": 1445 |
| }, |
| { |
| "epoch": 0.5200971135689236, |
| "grad_norm": 17.169130559487115, |
| "learning_rate": 9.836868151922578e-07, |
| "loss": 0.8276, |
| "step": 1446 |
| }, |
| { |
| "epoch": 0.5204567934538261, |
| "grad_norm": 12.062677567952813, |
| "learning_rate": 9.825217024197593e-07, |
| "loss": 0.8482, |
| "step": 1447 |
| }, |
| { |
| "epoch": 0.5208164733387285, |
| "grad_norm": 13.841853689575174, |
| "learning_rate": 9.81356613380611e-07, |
| "loss": 0.8572, |
| "step": 1448 |
| }, |
| { |
| "epoch": 0.521176153223631, |
| "grad_norm": 14.375266001914962, |
| "learning_rate": 9.801915496568585e-07, |
| "loss": 0.7885, |
| "step": 1449 |
| }, |
| { |
| "epoch": 0.5215358331085334, |
| "grad_norm": 12.844866158220983, |
| "learning_rate": 9.790265128305136e-07, |
| "loss": 0.8304, |
| "step": 1450 |
| }, |
| { |
| "epoch": 0.5218955129934358, |
| "grad_norm": 13.736766260260909, |
| "learning_rate": 9.77861504483551e-07, |
| "loss": 0.7662, |
| "step": 1451 |
| }, |
| { |
| "epoch": 0.5222551928783383, |
| "grad_norm": 10.887638718572212, |
| "learning_rate": 9.76696526197907e-07, |
| "loss": 0.8089, |
| "step": 1452 |
| }, |
| { |
| "epoch": 0.5226148727632407, |
| "grad_norm": 14.662475373623883, |
| "learning_rate": 9.755315795554765e-07, |
| "loss": 0.7953, |
| "step": 1453 |
| }, |
| { |
| "epoch": 0.5229745526481432, |
| "grad_norm": 19.29409847751306, |
| "learning_rate": 9.743666661381123e-07, |
| "loss": 0.7501, |
| "step": 1454 |
| }, |
| { |
| "epoch": 0.5233342325330456, |
| "grad_norm": 12.944227049303985, |
| "learning_rate": 9.73201787527622e-07, |
| "loss": 0.8094, |
| "step": 1455 |
| }, |
| { |
| "epoch": 0.523693912417948, |
| "grad_norm": 20.84274658229437, |
| "learning_rate": 9.720369453057648e-07, |
| "loss": 0.8358, |
| "step": 1456 |
| }, |
| { |
| "epoch": 0.5240535923028504, |
| "grad_norm": 12.187489960033687, |
| "learning_rate": 9.708721410542516e-07, |
| "loss": 0.8417, |
| "step": 1457 |
| }, |
| { |
| "epoch": 0.5244132721877529, |
| "grad_norm": 15.544458243079116, |
| "learning_rate": 9.697073763547415e-07, |
| "loss": 0.771, |
| "step": 1458 |
| }, |
| { |
| "epoch": 0.5247729520726553, |
| "grad_norm": 10.343413968696762, |
| "learning_rate": 9.6854265278884e-07, |
| "loss": 0.8581, |
| "step": 1459 |
| }, |
| { |
| "epoch": 0.5251326319575578, |
| "grad_norm": 11.403801343493468, |
| "learning_rate": 9.673779719380965e-07, |
| "loss": 0.8037, |
| "step": 1460 |
| }, |
| { |
| "epoch": 0.5254923118424603, |
| "grad_norm": 10.809633080377072, |
| "learning_rate": 9.662133353840023e-07, |
| "loss": 0.791, |
| "step": 1461 |
| }, |
| { |
| "epoch": 0.5258519917273626, |
| "grad_norm": 11.58855078217236, |
| "learning_rate": 9.65048744707989e-07, |
| "loss": 0.8823, |
| "step": 1462 |
| }, |
| { |
| "epoch": 0.5262116716122651, |
| "grad_norm": 15.640967862665777, |
| "learning_rate": 9.638842014914252e-07, |
| "loss": 0.8181, |
| "step": 1463 |
| }, |
| { |
| "epoch": 0.5265713514971675, |
| "grad_norm": 10.193297185297975, |
| "learning_rate": 9.627197073156158e-07, |
| "loss": 0.7847, |
| "step": 1464 |
| }, |
| { |
| "epoch": 0.52693103138207, |
| "grad_norm": 10.429545287628065, |
| "learning_rate": 9.615552637617988e-07, |
| "loss": 0.7273, |
| "step": 1465 |
| }, |
| { |
| "epoch": 0.5272907112669724, |
| "grad_norm": 20.54073989969678, |
| "learning_rate": 9.603908724111435e-07, |
| "loss": 0.8451, |
| "step": 1466 |
| }, |
| { |
| "epoch": 0.5276503911518748, |
| "grad_norm": 9.495552383487167, |
| "learning_rate": 9.59226534844748e-07, |
| "loss": 0.8064, |
| "step": 1467 |
| }, |
| { |
| "epoch": 0.5280100710367772, |
| "grad_norm": 11.400319429575987, |
| "learning_rate": 9.58062252643638e-07, |
| "loss": 0.8652, |
| "step": 1468 |
| }, |
| { |
| "epoch": 0.5283697509216797, |
| "grad_norm": 32.94938027219996, |
| "learning_rate": 9.568980273887636e-07, |
| "loss": 0.7586, |
| "step": 1469 |
| }, |
| { |
| "epoch": 0.5287294308065822, |
| "grad_norm": 28.295421524856383, |
| "learning_rate": 9.557338606609972e-07, |
| "loss": 0.769, |
| "step": 1470 |
| }, |
| { |
| "epoch": 0.5290891106914846, |
| "grad_norm": 68.60370187564824, |
| "learning_rate": 9.545697540411325e-07, |
| "loss": 0.8393, |
| "step": 1471 |
| }, |
| { |
| "epoch": 0.529448790576387, |
| "grad_norm": 7.431586021514424, |
| "learning_rate": 9.534057091098813e-07, |
| "loss": 0.8348, |
| "step": 1472 |
| }, |
| { |
| "epoch": 0.5298084704612894, |
| "grad_norm": 29.972602708832238, |
| "learning_rate": 9.522417274478715e-07, |
| "loss": 0.7477, |
| "step": 1473 |
| }, |
| { |
| "epoch": 0.5301681503461919, |
| "grad_norm": 18.61238653204131, |
| "learning_rate": 9.510778106356449e-07, |
| "loss": 0.793, |
| "step": 1474 |
| }, |
| { |
| "epoch": 0.5305278302310943, |
| "grad_norm": 10.815377021249557, |
| "learning_rate": 9.499139602536559e-07, |
| "loss": 0.8629, |
| "step": 1475 |
| }, |
| { |
| "epoch": 0.5308875101159968, |
| "grad_norm": 9.614312682859984, |
| "learning_rate": 9.487501778822683e-07, |
| "loss": 0.8357, |
| "step": 1476 |
| }, |
| { |
| "epoch": 0.5312471900008991, |
| "grad_norm": 17.47808247149681, |
| "learning_rate": 9.475864651017534e-07, |
| "loss": 0.8385, |
| "step": 1477 |
| }, |
| { |
| "epoch": 0.5316068698858016, |
| "grad_norm": 10.662664118752236, |
| "learning_rate": 9.464228234922881e-07, |
| "loss": 0.7612, |
| "step": 1478 |
| }, |
| { |
| "epoch": 0.5319665497707041, |
| "grad_norm": 16.040971042085854, |
| "learning_rate": 9.452592546339526e-07, |
| "loss": 0.8207, |
| "step": 1479 |
| }, |
| { |
| "epoch": 0.5323262296556065, |
| "grad_norm": 10.19304995373299, |
| "learning_rate": 9.440957601067292e-07, |
| "loss": 0.822, |
| "step": 1480 |
| }, |
| { |
| "epoch": 0.532685909540509, |
| "grad_norm": 77.96987404339865, |
| "learning_rate": 9.429323414904974e-07, |
| "loss": 0.8543, |
| "step": 1481 |
| }, |
| { |
| "epoch": 0.5330455894254114, |
| "grad_norm": 12.961172981124303, |
| "learning_rate": 9.417690003650352e-07, |
| "loss": 0.8026, |
| "step": 1482 |
| }, |
| { |
| "epoch": 0.5334052693103138, |
| "grad_norm": 10.970260110165201, |
| "learning_rate": 9.40605738310015e-07, |
| "loss": 0.8781, |
| "step": 1483 |
| }, |
| { |
| "epoch": 0.5337649491952162, |
| "grad_norm": 11.726910688626658, |
| "learning_rate": 9.394425569050016e-07, |
| "loss": 0.8684, |
| "step": 1484 |
| }, |
| { |
| "epoch": 0.5341246290801187, |
| "grad_norm": 18.136468552117464, |
| "learning_rate": 9.382794577294509e-07, |
| "loss": 0.8169, |
| "step": 1485 |
| }, |
| { |
| "epoch": 0.5344843089650211, |
| "grad_norm": 37.14349559707124, |
| "learning_rate": 9.371164423627059e-07, |
| "loss": 0.7818, |
| "step": 1486 |
| }, |
| { |
| "epoch": 0.5348439888499236, |
| "grad_norm": 11.69014220736439, |
| "learning_rate": 9.359535123839969e-07, |
| "loss": 0.8351, |
| "step": 1487 |
| }, |
| { |
| "epoch": 0.535203668734826, |
| "grad_norm": 11.223978301119892, |
| "learning_rate": 9.347906693724378e-07, |
| "loss": 0.8401, |
| "step": 1488 |
| }, |
| { |
| "epoch": 0.5355633486197284, |
| "grad_norm": 7.540815914834626, |
| "learning_rate": 9.336279149070251e-07, |
| "loss": 0.7378, |
| "step": 1489 |
| }, |
| { |
| "epoch": 0.5359230285046309, |
| "grad_norm": 60.946790599090555, |
| "learning_rate": 9.324652505666335e-07, |
| "loss": 0.8744, |
| "step": 1490 |
| }, |
| { |
| "epoch": 0.5362827083895333, |
| "grad_norm": 9.41396235262222, |
| "learning_rate": 9.313026779300168e-07, |
| "loss": 0.7885, |
| "step": 1491 |
| }, |
| { |
| "epoch": 0.5366423882744358, |
| "grad_norm": 8.27483048687943, |
| "learning_rate": 9.301401985758038e-07, |
| "loss": 0.851, |
| "step": 1492 |
| }, |
| { |
| "epoch": 0.5370020681593382, |
| "grad_norm": 8.97709051839709, |
| "learning_rate": 9.289778140824972e-07, |
| "loss": 0.7918, |
| "step": 1493 |
| }, |
| { |
| "epoch": 0.5373617480442406, |
| "grad_norm": 12.26427197135262, |
| "learning_rate": 9.278155260284691e-07, |
| "loss": 0.8106, |
| "step": 1494 |
| }, |
| { |
| "epoch": 0.537721427929143, |
| "grad_norm": 10.671233564758419, |
| "learning_rate": 9.266533359919626e-07, |
| "loss": 0.7956, |
| "step": 1495 |
| }, |
| { |
| "epoch": 0.5380811078140455, |
| "grad_norm": 7.350928819737693, |
| "learning_rate": 9.254912455510868e-07, |
| "loss": 0.7588, |
| "step": 1496 |
| }, |
| { |
| "epoch": 0.538440787698948, |
| "grad_norm": 9.552539809403818, |
| "learning_rate": 9.243292562838162e-07, |
| "loss": 0.8061, |
| "step": 1497 |
| }, |
| { |
| "epoch": 0.5388004675838504, |
| "grad_norm": 17.642432824835815, |
| "learning_rate": 9.231673697679866e-07, |
| "loss": 0.8017, |
| "step": 1498 |
| }, |
| { |
| "epoch": 0.5391601474687528, |
| "grad_norm": 9.334809921162734, |
| "learning_rate": 9.220055875812954e-07, |
| "loss": 0.7367, |
| "step": 1499 |
| }, |
| { |
| "epoch": 0.5395198273536552, |
| "grad_norm": 8.715324114387212, |
| "learning_rate": 9.208439113012983e-07, |
| "loss": 0.8062, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.5398795072385577, |
| "grad_norm": 13.031352351676288, |
| "learning_rate": 9.196823425054073e-07, |
| "loss": 0.7777, |
| "step": 1501 |
| }, |
| { |
| "epoch": 0.5402391871234601, |
| "grad_norm": 8.442707719950755, |
| "learning_rate": 9.185208827708869e-07, |
| "loss": 0.7946, |
| "step": 1502 |
| }, |
| { |
| "epoch": 0.5405988670083626, |
| "grad_norm": 13.126914120443349, |
| "learning_rate": 9.173595336748557e-07, |
| "loss": 0.8134, |
| "step": 1503 |
| }, |
| { |
| "epoch": 0.540958546893265, |
| "grad_norm": 23.512187948367245, |
| "learning_rate": 9.161982967942806e-07, |
| "loss": 0.7985, |
| "step": 1504 |
| }, |
| { |
| "epoch": 0.5413182267781674, |
| "grad_norm": 10.761145031843663, |
| "learning_rate": 9.150371737059772e-07, |
| "loss": 0.8286, |
| "step": 1505 |
| }, |
| { |
| "epoch": 0.5416779066630699, |
| "grad_norm": 10.184059228494528, |
| "learning_rate": 9.138761659866052e-07, |
| "loss": 0.8173, |
| "step": 1506 |
| }, |
| { |
| "epoch": 0.5420375865479723, |
| "grad_norm": 27.90335595700793, |
| "learning_rate": 9.127152752126688e-07, |
| "loss": 0.8092, |
| "step": 1507 |
| }, |
| { |
| "epoch": 0.5423972664328748, |
| "grad_norm": 43.52143495960643, |
| "learning_rate": 9.115545029605128e-07, |
| "loss": 0.771, |
| "step": 1508 |
| }, |
| { |
| "epoch": 0.5427569463177772, |
| "grad_norm": 10.801115688015788, |
| "learning_rate": 9.103938508063221e-07, |
| "loss": 0.7346, |
| "step": 1509 |
| }, |
| { |
| "epoch": 0.5431166262026796, |
| "grad_norm": 10.60432526070125, |
| "learning_rate": 9.092333203261167e-07, |
| "loss": 0.8135, |
| "step": 1510 |
| }, |
| { |
| "epoch": 0.543476306087582, |
| "grad_norm": 11.596061165601153, |
| "learning_rate": 9.080729130957527e-07, |
| "loss": 0.8025, |
| "step": 1511 |
| }, |
| { |
| "epoch": 0.5438359859724845, |
| "grad_norm": 8.561347390062522, |
| "learning_rate": 9.069126306909186e-07, |
| "loss": 0.7888, |
| "step": 1512 |
| }, |
| { |
| "epoch": 0.5441956658573869, |
| "grad_norm": 29.250079789481337, |
| "learning_rate": 9.057524746871333e-07, |
| "loss": 0.79, |
| "step": 1513 |
| }, |
| { |
| "epoch": 0.5445553457422894, |
| "grad_norm": 11.968239449874433, |
| "learning_rate": 9.045924466597447e-07, |
| "loss": 0.8773, |
| "step": 1514 |
| }, |
| { |
| "epoch": 0.5449150256271919, |
| "grad_norm": 9.934235153048508, |
| "learning_rate": 9.034325481839252e-07, |
| "loss": 0.8296, |
| "step": 1515 |
| }, |
| { |
| "epoch": 0.5452747055120942, |
| "grad_norm": 15.402183864430881, |
| "learning_rate": 9.022727808346731e-07, |
| "loss": 0.8458, |
| "step": 1516 |
| }, |
| { |
| "epoch": 0.5456343853969967, |
| "grad_norm": 18.35032544724908, |
| "learning_rate": 9.011131461868077e-07, |
| "loss": 0.769, |
| "step": 1517 |
| }, |
| { |
| "epoch": 0.5459940652818991, |
| "grad_norm": 12.860782881222937, |
| "learning_rate": 8.99953645814969e-07, |
| "loss": 0.8618, |
| "step": 1518 |
| }, |
| { |
| "epoch": 0.5463537451668016, |
| "grad_norm": 9.549990369811512, |
| "learning_rate": 8.987942812936132e-07, |
| "loss": 0.7778, |
| "step": 1519 |
| }, |
| { |
| "epoch": 0.546713425051704, |
| "grad_norm": 15.989159785555255, |
| "learning_rate": 8.976350541970129e-07, |
| "loss": 0.8149, |
| "step": 1520 |
| }, |
| { |
| "epoch": 0.5470731049366064, |
| "grad_norm": 10.664324645088254, |
| "learning_rate": 8.964759660992545e-07, |
| "loss": 0.7395, |
| "step": 1521 |
| }, |
| { |
| "epoch": 0.5474327848215088, |
| "grad_norm": 9.098071536124595, |
| "learning_rate": 8.953170185742355e-07, |
| "loss": 0.7646, |
| "step": 1522 |
| }, |
| { |
| "epoch": 0.5477924647064113, |
| "grad_norm": 425.6877435366872, |
| "learning_rate": 8.941582131956614e-07, |
| "loss": 0.8052, |
| "step": 1523 |
| }, |
| { |
| "epoch": 0.5481521445913138, |
| "grad_norm": 17.728284471453698, |
| "learning_rate": 8.929995515370459e-07, |
| "loss": 0.8455, |
| "step": 1524 |
| }, |
| { |
| "epoch": 0.5485118244762162, |
| "grad_norm": 10.354466220926385, |
| "learning_rate": 8.918410351717073e-07, |
| "loss": 0.7448, |
| "step": 1525 |
| }, |
| { |
| "epoch": 0.5488715043611186, |
| "grad_norm": 7.833962450955127, |
| "learning_rate": 8.906826656727664e-07, |
| "loss": 0.837, |
| "step": 1526 |
| }, |
| { |
| "epoch": 0.549231184246021, |
| "grad_norm": 10.557262707031633, |
| "learning_rate": 8.895244446131443e-07, |
| "loss": 0.7661, |
| "step": 1527 |
| }, |
| { |
| "epoch": 0.5495908641309235, |
| "grad_norm": 24.680259140974954, |
| "learning_rate": 8.883663735655612e-07, |
| "loss": 0.8053, |
| "step": 1528 |
| }, |
| { |
| "epoch": 0.5499505440158259, |
| "grad_norm": 10.64492012360957, |
| "learning_rate": 8.872084541025336e-07, |
| "loss": 0.8016, |
| "step": 1529 |
| }, |
| { |
| "epoch": 0.5503102239007284, |
| "grad_norm": 19.601472181141222, |
| "learning_rate": 8.860506877963714e-07, |
| "loss": 0.7641, |
| "step": 1530 |
| }, |
| { |
| "epoch": 0.5506699037856307, |
| "grad_norm": 9.671158817227013, |
| "learning_rate": 8.848930762191768e-07, |
| "loss": 0.8009, |
| "step": 1531 |
| }, |
| { |
| "epoch": 0.5510295836705332, |
| "grad_norm": 14.7316121621697, |
| "learning_rate": 8.837356209428426e-07, |
| "loss": 0.9208, |
| "step": 1532 |
| }, |
| { |
| "epoch": 0.5513892635554357, |
| "grad_norm": 10.997978454337733, |
| "learning_rate": 8.825783235390488e-07, |
| "loss": 0.8159, |
| "step": 1533 |
| }, |
| { |
| "epoch": 0.5517489434403381, |
| "grad_norm": 8.272077025049713, |
| "learning_rate": 8.814211855792609e-07, |
| "loss": 0.7496, |
| "step": 1534 |
| }, |
| { |
| "epoch": 0.5521086233252406, |
| "grad_norm": 13.40204884222874, |
| "learning_rate": 8.802642086347277e-07, |
| "loss": 0.7705, |
| "step": 1535 |
| }, |
| { |
| "epoch": 0.552468303210143, |
| "grad_norm": 9.13911646763841, |
| "learning_rate": 8.791073942764805e-07, |
| "loss": 0.8221, |
| "step": 1536 |
| }, |
| { |
| "epoch": 0.5528279830950454, |
| "grad_norm": 95.37842495705642, |
| "learning_rate": 8.779507440753284e-07, |
| "loss": 0.8346, |
| "step": 1537 |
| }, |
| { |
| "epoch": 0.5531876629799478, |
| "grad_norm": 12.253181880734747, |
| "learning_rate": 8.767942596018585e-07, |
| "loss": 0.8203, |
| "step": 1538 |
| }, |
| { |
| "epoch": 0.5535473428648503, |
| "grad_norm": 8.814195351830474, |
| "learning_rate": 8.756379424264328e-07, |
| "loss": 0.7809, |
| "step": 1539 |
| }, |
| { |
| "epoch": 0.5539070227497527, |
| "grad_norm": 11.992274017200714, |
| "learning_rate": 8.74481794119186e-07, |
| "loss": 0.7505, |
| "step": 1540 |
| }, |
| { |
| "epoch": 0.5542667026346552, |
| "grad_norm": 11.097112573647356, |
| "learning_rate": 8.733258162500228e-07, |
| "loss": 0.8214, |
| "step": 1541 |
| }, |
| { |
| "epoch": 0.5546263825195576, |
| "grad_norm": 15.971347238479705, |
| "learning_rate": 8.721700103886176e-07, |
| "loss": 0.827, |
| "step": 1542 |
| }, |
| { |
| "epoch": 0.55498606240446, |
| "grad_norm": 30.014919253725115, |
| "learning_rate": 8.710143781044113e-07, |
| "loss": 0.8519, |
| "step": 1543 |
| }, |
| { |
| "epoch": 0.5553457422893625, |
| "grad_norm": 26.81170112824856, |
| "learning_rate": 8.698589209666073e-07, |
| "loss": 0.8627, |
| "step": 1544 |
| }, |
| { |
| "epoch": 0.5557054221742649, |
| "grad_norm": 18.33628542089601, |
| "learning_rate": 8.687036405441732e-07, |
| "loss": 0.7444, |
| "step": 1545 |
| }, |
| { |
| "epoch": 0.5560651020591674, |
| "grad_norm": 10.040499270398149, |
| "learning_rate": 8.675485384058356e-07, |
| "loss": 0.8195, |
| "step": 1546 |
| }, |
| { |
| "epoch": 0.5564247819440697, |
| "grad_norm": 17.268711898463536, |
| "learning_rate": 8.663936161200798e-07, |
| "loss": 0.7472, |
| "step": 1547 |
| }, |
| { |
| "epoch": 0.5567844618289722, |
| "grad_norm": 7.149729583504873, |
| "learning_rate": 8.652388752551457e-07, |
| "loss": 0.7998, |
| "step": 1548 |
| }, |
| { |
| "epoch": 0.5571441417138746, |
| "grad_norm": 27.709180822307594, |
| "learning_rate": 8.640843173790277e-07, |
| "loss": 0.7689, |
| "step": 1549 |
| }, |
| { |
| "epoch": 0.5575038215987771, |
| "grad_norm": 18.26212825646285, |
| "learning_rate": 8.629299440594717e-07, |
| "loss": 0.7806, |
| "step": 1550 |
| }, |
| { |
| "epoch": 0.5578635014836796, |
| "grad_norm": 8.704360397256853, |
| "learning_rate": 8.617757568639731e-07, |
| "loss": 0.8493, |
| "step": 1551 |
| }, |
| { |
| "epoch": 0.558223181368582, |
| "grad_norm": 15.388803067466043, |
| "learning_rate": 8.606217573597737e-07, |
| "loss": 0.7594, |
| "step": 1552 |
| }, |
| { |
| "epoch": 0.5585828612534844, |
| "grad_norm": 13.75311500960052, |
| "learning_rate": 8.594679471138611e-07, |
| "loss": 0.799, |
| "step": 1553 |
| }, |
| { |
| "epoch": 0.5589425411383868, |
| "grad_norm": 9.725642219860728, |
| "learning_rate": 8.58314327692966e-07, |
| "loss": 0.8048, |
| "step": 1554 |
| }, |
| { |
| "epoch": 0.5593022210232893, |
| "grad_norm": 15.927126719512732, |
| "learning_rate": 8.571609006635604e-07, |
| "loss": 0.8141, |
| "step": 1555 |
| }, |
| { |
| "epoch": 0.5596619009081917, |
| "grad_norm": 8.05123488042932, |
| "learning_rate": 8.560076675918535e-07, |
| "loss": 0.8883, |
| "step": 1556 |
| }, |
| { |
| "epoch": 0.5600215807930942, |
| "grad_norm": 8.732920737327605, |
| "learning_rate": 8.548546300437927e-07, |
| "loss": 0.7587, |
| "step": 1557 |
| }, |
| { |
| "epoch": 0.5603812606779965, |
| "grad_norm": 38.797439762016744, |
| "learning_rate": 8.537017895850591e-07, |
| "loss": 0.8558, |
| "step": 1558 |
| }, |
| { |
| "epoch": 0.560740940562899, |
| "grad_norm": 21.26469174794471, |
| "learning_rate": 8.525491477810669e-07, |
| "loss": 0.8658, |
| "step": 1559 |
| }, |
| { |
| "epoch": 0.5611006204478014, |
| "grad_norm": 13.346270839636258, |
| "learning_rate": 8.513967061969593e-07, |
| "loss": 0.7985, |
| "step": 1560 |
| }, |
| { |
| "epoch": 0.5614603003327039, |
| "grad_norm": 9.342793989203471, |
| "learning_rate": 8.502444663976087e-07, |
| "loss": 0.8314, |
| "step": 1561 |
| }, |
| { |
| "epoch": 0.5618199802176064, |
| "grad_norm": 16.37141319073614, |
| "learning_rate": 8.490924299476133e-07, |
| "loss": 0.737, |
| "step": 1562 |
| }, |
| { |
| "epoch": 0.5621796601025087, |
| "grad_norm": 15.262379384490513, |
| "learning_rate": 8.479405984112948e-07, |
| "loss": 0.7485, |
| "step": 1563 |
| }, |
| { |
| "epoch": 0.5625393399874112, |
| "grad_norm": 14.432524222304044, |
| "learning_rate": 8.467889733526976e-07, |
| "loss": 0.8049, |
| "step": 1564 |
| }, |
| { |
| "epoch": 0.5628990198723136, |
| "grad_norm": 8.35035684372924, |
| "learning_rate": 8.456375563355842e-07, |
| "loss": 0.7843, |
| "step": 1565 |
| }, |
| { |
| "epoch": 0.5632586997572161, |
| "grad_norm": 9.03293272220025, |
| "learning_rate": 8.444863489234356e-07, |
| "loss": 0.8045, |
| "step": 1566 |
| }, |
| { |
| "epoch": 0.5636183796421185, |
| "grad_norm": 28.474405239707348, |
| "learning_rate": 8.433353526794482e-07, |
| "loss": 0.8365, |
| "step": 1567 |
| }, |
| { |
| "epoch": 0.563978059527021, |
| "grad_norm": 13.94415385830813, |
| "learning_rate": 8.42184569166532e-07, |
| "loss": 0.8721, |
| "step": 1568 |
| }, |
| { |
| "epoch": 0.5643377394119233, |
| "grad_norm": 9.51307347896263, |
| "learning_rate": 8.410339999473065e-07, |
| "loss": 0.7889, |
| "step": 1569 |
| }, |
| { |
| "epoch": 0.5646974192968258, |
| "grad_norm": 8.301031918905753, |
| "learning_rate": 8.398836465841019e-07, |
| "loss": 0.7683, |
| "step": 1570 |
| }, |
| { |
| "epoch": 0.5650570991817283, |
| "grad_norm": 8.592527188963984, |
| "learning_rate": 8.387335106389549e-07, |
| "loss": 0.784, |
| "step": 1571 |
| }, |
| { |
| "epoch": 0.5654167790666307, |
| "grad_norm": 19.199075553424677, |
| "learning_rate": 8.375835936736071e-07, |
| "loss": 0.816, |
| "step": 1572 |
| }, |
| { |
| "epoch": 0.5657764589515332, |
| "grad_norm": 18.549140323686558, |
| "learning_rate": 8.364338972495016e-07, |
| "loss": 0.8246, |
| "step": 1573 |
| }, |
| { |
| "epoch": 0.5661361388364355, |
| "grad_norm": 9.371172026712124, |
| "learning_rate": 8.352844229277832e-07, |
| "loss": 0.8167, |
| "step": 1574 |
| }, |
| { |
| "epoch": 0.566495818721338, |
| "grad_norm": 14.200981394455908, |
| "learning_rate": 8.341351722692951e-07, |
| "loss": 0.785, |
| "step": 1575 |
| }, |
| { |
| "epoch": 0.5668554986062404, |
| "grad_norm": 12.554963101861757, |
| "learning_rate": 8.329861468345767e-07, |
| "loss": 0.8083, |
| "step": 1576 |
| }, |
| { |
| "epoch": 0.5672151784911429, |
| "grad_norm": 23.556173884147064, |
| "learning_rate": 8.318373481838604e-07, |
| "loss": 0.7853, |
| "step": 1577 |
| }, |
| { |
| "epoch": 0.5675748583760453, |
| "grad_norm": 11.931308569332678, |
| "learning_rate": 8.306887778770723e-07, |
| "loss": 0.7943, |
| "step": 1578 |
| }, |
| { |
| "epoch": 0.5679345382609478, |
| "grad_norm": 15.530610503935467, |
| "learning_rate": 8.295404374738277e-07, |
| "loss": 0.8688, |
| "step": 1579 |
| }, |
| { |
| "epoch": 0.5682942181458502, |
| "grad_norm": 10.763767906174117, |
| "learning_rate": 8.283923285334303e-07, |
| "loss": 0.8201, |
| "step": 1580 |
| }, |
| { |
| "epoch": 0.5686538980307526, |
| "grad_norm": 15.974553059167864, |
| "learning_rate": 8.27244452614868e-07, |
| "loss": 0.771, |
| "step": 1581 |
| }, |
| { |
| "epoch": 0.5690135779156551, |
| "grad_norm": 12.632225729750193, |
| "learning_rate": 8.260968112768136e-07, |
| "loss": 0.8878, |
| "step": 1582 |
| }, |
| { |
| "epoch": 0.5693732578005575, |
| "grad_norm": 29.514389708254264, |
| "learning_rate": 8.249494060776214e-07, |
| "loss": 0.8457, |
| "step": 1583 |
| }, |
| { |
| "epoch": 0.56973293768546, |
| "grad_norm": 75.55606752285239, |
| "learning_rate": 8.238022385753247e-07, |
| "loss": 0.8524, |
| "step": 1584 |
| }, |
| { |
| "epoch": 0.5700926175703623, |
| "grad_norm": 16.74577979702134, |
| "learning_rate": 8.226553103276334e-07, |
| "loss": 0.8064, |
| "step": 1585 |
| }, |
| { |
| "epoch": 0.5704522974552648, |
| "grad_norm": 10.292321500664308, |
| "learning_rate": 8.215086228919336e-07, |
| "loss": 0.7388, |
| "step": 1586 |
| }, |
| { |
| "epoch": 0.5708119773401672, |
| "grad_norm": 21.035453250353576, |
| "learning_rate": 8.203621778252838e-07, |
| "loss": 0.817, |
| "step": 1587 |
| }, |
| { |
| "epoch": 0.5711716572250697, |
| "grad_norm": 6.894357013489585, |
| "learning_rate": 8.19215976684414e-07, |
| "loss": 0.7624, |
| "step": 1588 |
| }, |
| { |
| "epoch": 0.5715313371099722, |
| "grad_norm": 13.671745224088038, |
| "learning_rate": 8.180700210257221e-07, |
| "loss": 0.7699, |
| "step": 1589 |
| }, |
| { |
| "epoch": 0.5718910169948745, |
| "grad_norm": 10.76508931416682, |
| "learning_rate": 8.16924312405273e-07, |
| "loss": 0.8594, |
| "step": 1590 |
| }, |
| { |
| "epoch": 0.572250696879777, |
| "grad_norm": 8.52246032699738, |
| "learning_rate": 8.157788523787966e-07, |
| "loss": 0.7449, |
| "step": 1591 |
| }, |
| { |
| "epoch": 0.5726103767646794, |
| "grad_norm": 10.420745032826296, |
| "learning_rate": 8.146336425016848e-07, |
| "loss": 0.8031, |
| "step": 1592 |
| }, |
| { |
| "epoch": 0.5729700566495819, |
| "grad_norm": 18.16963817231503, |
| "learning_rate": 8.134886843289899e-07, |
| "loss": 0.8334, |
| "step": 1593 |
| }, |
| { |
| "epoch": 0.5733297365344843, |
| "grad_norm": 14.348144219376929, |
| "learning_rate": 8.123439794154221e-07, |
| "loss": 0.8811, |
| "step": 1594 |
| }, |
| { |
| "epoch": 0.5736894164193868, |
| "grad_norm": 11.625933704619902, |
| "learning_rate": 8.111995293153484e-07, |
| "loss": 0.809, |
| "step": 1595 |
| }, |
| { |
| "epoch": 0.5740490963042891, |
| "grad_norm": 11.567273100902918, |
| "learning_rate": 8.100553355827896e-07, |
| "loss": 0.7572, |
| "step": 1596 |
| }, |
| { |
| "epoch": 0.5744087761891916, |
| "grad_norm": 26.793760953721204, |
| "learning_rate": 8.089113997714179e-07, |
| "loss": 0.8495, |
| "step": 1597 |
| }, |
| { |
| "epoch": 0.5747684560740941, |
| "grad_norm": 17.518932515308045, |
| "learning_rate": 8.077677234345557e-07, |
| "loss": 0.8459, |
| "step": 1598 |
| }, |
| { |
| "epoch": 0.5751281359589965, |
| "grad_norm": 11.232300984378966, |
| "learning_rate": 8.066243081251729e-07, |
| "loss": 0.8607, |
| "step": 1599 |
| }, |
| { |
| "epoch": 0.575487815843899, |
| "grad_norm": 11.014541018417304, |
| "learning_rate": 8.054811553958851e-07, |
| "loss": 0.8606, |
| "step": 1600 |
| }, |
| { |
| "epoch": 0.5758474957288013, |
| "grad_norm": 10.043805051702309, |
| "learning_rate": 8.043382667989513e-07, |
| "loss": 0.7684, |
| "step": 1601 |
| }, |
| { |
| "epoch": 0.5762071756137038, |
| "grad_norm": 10.829708717271632, |
| "learning_rate": 8.031956438862717e-07, |
| "loss": 0.845, |
| "step": 1602 |
| }, |
| { |
| "epoch": 0.5765668554986062, |
| "grad_norm": 7.900714257136215, |
| "learning_rate": 8.02053288209386e-07, |
| "loss": 0.826, |
| "step": 1603 |
| }, |
| { |
| "epoch": 0.5769265353835087, |
| "grad_norm": 7.701628007687078, |
| "learning_rate": 8.009112013194706e-07, |
| "loss": 0.7989, |
| "step": 1604 |
| }, |
| { |
| "epoch": 0.5772862152684111, |
| "grad_norm": 13.014171738803286, |
| "learning_rate": 7.997693847673376e-07, |
| "loss": 0.8044, |
| "step": 1605 |
| }, |
| { |
| "epoch": 0.5776458951533135, |
| "grad_norm": 12.034836978503431, |
| "learning_rate": 7.986278401034314e-07, |
| "loss": 0.7526, |
| "step": 1606 |
| }, |
| { |
| "epoch": 0.578005575038216, |
| "grad_norm": 8.76378610955473, |
| "learning_rate": 7.97486568877827e-07, |
| "loss": 0.8735, |
| "step": 1607 |
| }, |
| { |
| "epoch": 0.5783652549231184, |
| "grad_norm": 8.083337469431553, |
| "learning_rate": 7.96345572640229e-07, |
| "loss": 0.8618, |
| "step": 1608 |
| }, |
| { |
| "epoch": 0.5787249348080209, |
| "grad_norm": 17.5439931020745, |
| "learning_rate": 7.952048529399684e-07, |
| "loss": 0.8366, |
| "step": 1609 |
| }, |
| { |
| "epoch": 0.5790846146929233, |
| "grad_norm": 24.84467945535168, |
| "learning_rate": 7.94064411326e-07, |
| "loss": 0.8344, |
| "step": 1610 |
| }, |
| { |
| "epoch": 0.5794442945778258, |
| "grad_norm": 12.652147596563642, |
| "learning_rate": 7.929242493469011e-07, |
| "loss": 0.7845, |
| "step": 1611 |
| }, |
| { |
| "epoch": 0.5798039744627281, |
| "grad_norm": 13.230941718882002, |
| "learning_rate": 7.917843685508701e-07, |
| "loss": 0.7772, |
| "step": 1612 |
| }, |
| { |
| "epoch": 0.5801636543476306, |
| "grad_norm": 7.4947545373566955, |
| "learning_rate": 7.906447704857232e-07, |
| "loss": 0.7963, |
| "step": 1613 |
| }, |
| { |
| "epoch": 0.580523334232533, |
| "grad_norm": 13.049017429706673, |
| "learning_rate": 7.895054566988923e-07, |
| "loss": 0.7467, |
| "step": 1614 |
| }, |
| { |
| "epoch": 0.5808830141174355, |
| "grad_norm": 12.331575353775431, |
| "learning_rate": 7.883664287374234e-07, |
| "loss": 0.9336, |
| "step": 1615 |
| }, |
| { |
| "epoch": 0.581242694002338, |
| "grad_norm": 8.464406782261062, |
| "learning_rate": 7.872276881479748e-07, |
| "loss": 0.8211, |
| "step": 1616 |
| }, |
| { |
| "epoch": 0.5816023738872403, |
| "grad_norm": 11.254172352508087, |
| "learning_rate": 7.860892364768143e-07, |
| "loss": 0.9262, |
| "step": 1617 |
| }, |
| { |
| "epoch": 0.5819620537721428, |
| "grad_norm": 11.331168954777441, |
| "learning_rate": 7.849510752698179e-07, |
| "loss": 0.755, |
| "step": 1618 |
| }, |
| { |
| "epoch": 0.5823217336570452, |
| "grad_norm": 18.84593878696583, |
| "learning_rate": 7.838132060724656e-07, |
| "loss": 0.7542, |
| "step": 1619 |
| }, |
| { |
| "epoch": 0.5826814135419477, |
| "grad_norm": 9.630203661435184, |
| "learning_rate": 7.826756304298428e-07, |
| "loss": 0.8442, |
| "step": 1620 |
| }, |
| { |
| "epoch": 0.5830410934268501, |
| "grad_norm": 12.235166401525152, |
| "learning_rate": 7.815383498866351e-07, |
| "loss": 0.8065, |
| "step": 1621 |
| }, |
| { |
| "epoch": 0.5834007733117526, |
| "grad_norm": 37.672315833888156, |
| "learning_rate": 7.804013659871284e-07, |
| "loss": 0.7702, |
| "step": 1622 |
| }, |
| { |
| "epoch": 0.5837604531966549, |
| "grad_norm": 13.29452157979337, |
| "learning_rate": 7.792646802752044e-07, |
| "loss": 0.7965, |
| "step": 1623 |
| }, |
| { |
| "epoch": 0.5841201330815574, |
| "grad_norm": 10.764627403580235, |
| "learning_rate": 7.78128294294341e-07, |
| "loss": 0.7735, |
| "step": 1624 |
| }, |
| { |
| "epoch": 0.5844798129664599, |
| "grad_norm": 8.737810342888794, |
| "learning_rate": 7.769922095876087e-07, |
| "loss": 0.7802, |
| "step": 1625 |
| }, |
| { |
| "epoch": 0.5848394928513623, |
| "grad_norm": 14.381254651476704, |
| "learning_rate": 7.758564276976695e-07, |
| "loss": 0.7713, |
| "step": 1626 |
| }, |
| { |
| "epoch": 0.5851991727362648, |
| "grad_norm": 10.22112868832261, |
| "learning_rate": 7.747209501667728e-07, |
| "loss": 0.7799, |
| "step": 1627 |
| }, |
| { |
| "epoch": 0.5855588526211671, |
| "grad_norm": 10.624653396163458, |
| "learning_rate": 7.73585778536756e-07, |
| "loss": 0.7532, |
| "step": 1628 |
| }, |
| { |
| "epoch": 0.5859185325060696, |
| "grad_norm": 12.149799716586122, |
| "learning_rate": 7.724509143490407e-07, |
| "loss": 0.8116, |
| "step": 1629 |
| }, |
| { |
| "epoch": 0.586278212390972, |
| "grad_norm": 10.637991127225355, |
| "learning_rate": 7.713163591446317e-07, |
| "loss": 0.7916, |
| "step": 1630 |
| }, |
| { |
| "epoch": 0.5866378922758745, |
| "grad_norm": 7.961233529096206, |
| "learning_rate": 7.701821144641125e-07, |
| "loss": 0.834, |
| "step": 1631 |
| }, |
| { |
| "epoch": 0.5869975721607769, |
| "grad_norm": 15.480251438295925, |
| "learning_rate": 7.690481818476467e-07, |
| "loss": 0.8796, |
| "step": 1632 |
| }, |
| { |
| "epoch": 0.5873572520456793, |
| "grad_norm": 22.493955035772476, |
| "learning_rate": 7.679145628349733e-07, |
| "loss": 0.8464, |
| "step": 1633 |
| }, |
| { |
| "epoch": 0.5877169319305818, |
| "grad_norm": 8.238857603348297, |
| "learning_rate": 7.667812589654061e-07, |
| "loss": 0.7564, |
| "step": 1634 |
| }, |
| { |
| "epoch": 0.5880766118154842, |
| "grad_norm": 11.141054739419813, |
| "learning_rate": 7.656482717778298e-07, |
| "loss": 0.7308, |
| "step": 1635 |
| }, |
| { |
| "epoch": 0.5884362917003867, |
| "grad_norm": 13.536432967707173, |
| "learning_rate": 7.645156028107004e-07, |
| "loss": 0.7676, |
| "step": 1636 |
| }, |
| { |
| "epoch": 0.5887959715852891, |
| "grad_norm": 17.565506542196687, |
| "learning_rate": 7.633832536020409e-07, |
| "loss": 0.8364, |
| "step": 1637 |
| }, |
| { |
| "epoch": 0.5891556514701916, |
| "grad_norm": 12.592416432890023, |
| "learning_rate": 7.622512256894411e-07, |
| "loss": 0.8126, |
| "step": 1638 |
| }, |
| { |
| "epoch": 0.5895153313550939, |
| "grad_norm": 8.757685937856875, |
| "learning_rate": 7.611195206100528e-07, |
| "loss": 0.803, |
| "step": 1639 |
| }, |
| { |
| "epoch": 0.5898750112399964, |
| "grad_norm": 25.36742880137935, |
| "learning_rate": 7.599881399005911e-07, |
| "loss": 0.8508, |
| "step": 1640 |
| }, |
| { |
| "epoch": 0.5902346911248988, |
| "grad_norm": 10.397222307031699, |
| "learning_rate": 7.5885708509733e-07, |
| "loss": 0.7614, |
| "step": 1641 |
| }, |
| { |
| "epoch": 0.5905943710098013, |
| "grad_norm": 9.026116126538364, |
| "learning_rate": 7.577263577361009e-07, |
| "loss": 0.8438, |
| "step": 1642 |
| }, |
| { |
| "epoch": 0.5909540508947038, |
| "grad_norm": 13.858732556753937, |
| "learning_rate": 7.565959593522912e-07, |
| "loss": 0.8125, |
| "step": 1643 |
| }, |
| { |
| "epoch": 0.5913137307796061, |
| "grad_norm": 12.255674448639857, |
| "learning_rate": 7.554658914808403e-07, |
| "loss": 0.7885, |
| "step": 1644 |
| }, |
| { |
| "epoch": 0.5916734106645086, |
| "grad_norm": 16.936034511555206, |
| "learning_rate": 7.543361556562396e-07, |
| "loss": 0.8317, |
| "step": 1645 |
| }, |
| { |
| "epoch": 0.592033090549411, |
| "grad_norm": 38.265449794638506, |
| "learning_rate": 7.532067534125299e-07, |
| "loss": 0.8391, |
| "step": 1646 |
| }, |
| { |
| "epoch": 0.5923927704343135, |
| "grad_norm": 11.01071430615483, |
| "learning_rate": 7.520776862832992e-07, |
| "loss": 0.8272, |
| "step": 1647 |
| }, |
| { |
| "epoch": 0.5927524503192159, |
| "grad_norm": 16.703534635338457, |
| "learning_rate": 7.509489558016789e-07, |
| "loss": 0.7588, |
| "step": 1648 |
| }, |
| { |
| "epoch": 0.5931121302041183, |
| "grad_norm": 13.918339297662166, |
| "learning_rate": 7.49820563500345e-07, |
| "loss": 0.7804, |
| "step": 1649 |
| }, |
| { |
| "epoch": 0.5934718100890207, |
| "grad_norm": 11.949416105380944, |
| "learning_rate": 7.486925109115134e-07, |
| "loss": 0.8599, |
| "step": 1650 |
| }, |
| { |
| "epoch": 0.5938314899739232, |
| "grad_norm": 39.123143987842894, |
| "learning_rate": 7.475647995669396e-07, |
| "loss": 0.793, |
| "step": 1651 |
| }, |
| { |
| "epoch": 0.5941911698588257, |
| "grad_norm": 12.33935596524586, |
| "learning_rate": 7.464374309979142e-07, |
| "loss": 0.7959, |
| "step": 1652 |
| }, |
| { |
| "epoch": 0.5945508497437281, |
| "grad_norm": 34.48309402454981, |
| "learning_rate": 7.453104067352636e-07, |
| "loss": 0.7531, |
| "step": 1653 |
| }, |
| { |
| "epoch": 0.5949105296286306, |
| "grad_norm": 10.537791080208066, |
| "learning_rate": 7.441837283093463e-07, |
| "loss": 0.8255, |
| "step": 1654 |
| }, |
| { |
| "epoch": 0.5952702095135329, |
| "grad_norm": 9.4365930426022, |
| "learning_rate": 7.430573972500518e-07, |
| "loss": 0.8213, |
| "step": 1655 |
| }, |
| { |
| "epoch": 0.5956298893984354, |
| "grad_norm": 14.142693442308168, |
| "learning_rate": 7.419314150867964e-07, |
| "loss": 0.8534, |
| "step": 1656 |
| }, |
| { |
| "epoch": 0.5959895692833378, |
| "grad_norm": 12.728732783659234, |
| "learning_rate": 7.40805783348524e-07, |
| "loss": 0.8539, |
| "step": 1657 |
| }, |
| { |
| "epoch": 0.5963492491682403, |
| "grad_norm": 10.607627014324681, |
| "learning_rate": 7.396805035637021e-07, |
| "loss": 0.7968, |
| "step": 1658 |
| }, |
| { |
| "epoch": 0.5967089290531427, |
| "grad_norm": 15.922722762925876, |
| "learning_rate": 7.385555772603212e-07, |
| "loss": 0.7998, |
| "step": 1659 |
| }, |
| { |
| "epoch": 0.5970686089380451, |
| "grad_norm": 9.371180648602238, |
| "learning_rate": 7.374310059658899e-07, |
| "loss": 0.8754, |
| "step": 1660 |
| }, |
| { |
| "epoch": 0.5974282888229476, |
| "grad_norm": 21.04428397163054, |
| "learning_rate": 7.363067912074366e-07, |
| "loss": 0.8101, |
| "step": 1661 |
| }, |
| { |
| "epoch": 0.59778796870785, |
| "grad_norm": 8.686895121328693, |
| "learning_rate": 7.351829345115046e-07, |
| "loss": 0.7323, |
| "step": 1662 |
| }, |
| { |
| "epoch": 0.5981476485927525, |
| "grad_norm": 11.025227706986179, |
| "learning_rate": 7.340594374041515e-07, |
| "loss": 0.8196, |
| "step": 1663 |
| }, |
| { |
| "epoch": 0.5985073284776549, |
| "grad_norm": 8.66746116218025, |
| "learning_rate": 7.329363014109462e-07, |
| "loss": 0.8179, |
| "step": 1664 |
| }, |
| { |
| "epoch": 0.5988670083625574, |
| "grad_norm": 13.456402927781026, |
| "learning_rate": 7.318135280569673e-07, |
| "loss": 0.7836, |
| "step": 1665 |
| }, |
| { |
| "epoch": 0.5992266882474597, |
| "grad_norm": 8.996043879179224, |
| "learning_rate": 7.306911188668016e-07, |
| "loss": 0.8255, |
| "step": 1666 |
| }, |
| { |
| "epoch": 0.5995863681323622, |
| "grad_norm": 14.971979355282974, |
| "learning_rate": 7.295690753645403e-07, |
| "loss": 0.7884, |
| "step": 1667 |
| }, |
| { |
| "epoch": 0.5999460480172646, |
| "grad_norm": 13.741062159581267, |
| "learning_rate": 7.284473990737794e-07, |
| "loss": 0.7873, |
| "step": 1668 |
| }, |
| { |
| "epoch": 0.6003057279021671, |
| "grad_norm": 13.18784983193868, |
| "learning_rate": 7.27326091517615e-07, |
| "loss": 0.8479, |
| "step": 1669 |
| }, |
| { |
| "epoch": 0.6006654077870696, |
| "grad_norm": 14.518311097413894, |
| "learning_rate": 7.262051542186429e-07, |
| "loss": 0.8235, |
| "step": 1670 |
| }, |
| { |
| "epoch": 0.6010250876719719, |
| "grad_norm": 13.527841789567097, |
| "learning_rate": 7.250845886989567e-07, |
| "loss": 0.8892, |
| "step": 1671 |
| }, |
| { |
| "epoch": 0.6013847675568744, |
| "grad_norm": 11.632315458958761, |
| "learning_rate": 7.239643964801449e-07, |
| "loss": 0.8006, |
| "step": 1672 |
| }, |
| { |
| "epoch": 0.6017444474417768, |
| "grad_norm": 17.626581694580985, |
| "learning_rate": 7.228445790832885e-07, |
| "loss": 0.8434, |
| "step": 1673 |
| }, |
| { |
| "epoch": 0.6021041273266793, |
| "grad_norm": 28.75211959641394, |
| "learning_rate": 7.217251380289601e-07, |
| "loss": 0.8482, |
| "step": 1674 |
| }, |
| { |
| "epoch": 0.6024638072115817, |
| "grad_norm": 12.345644933002072, |
| "learning_rate": 7.206060748372212e-07, |
| "loss": 0.873, |
| "step": 1675 |
| }, |
| { |
| "epoch": 0.6028234870964841, |
| "grad_norm": 8.424235857182305, |
| "learning_rate": 7.194873910276203e-07, |
| "loss": 0.792, |
| "step": 1676 |
| }, |
| { |
| "epoch": 0.6031831669813865, |
| "grad_norm": 16.950896602803002, |
| "learning_rate": 7.183690881191907e-07, |
| "loss": 0.7963, |
| "step": 1677 |
| }, |
| { |
| "epoch": 0.603542846866289, |
| "grad_norm": 11.204109864484545, |
| "learning_rate": 7.17251167630448e-07, |
| "loss": 0.8174, |
| "step": 1678 |
| }, |
| { |
| "epoch": 0.6039025267511915, |
| "grad_norm": 18.393413798144334, |
| "learning_rate": 7.161336310793893e-07, |
| "loss": 0.7951, |
| "step": 1679 |
| }, |
| { |
| "epoch": 0.6042622066360939, |
| "grad_norm": 14.165209694169475, |
| "learning_rate": 7.150164799834902e-07, |
| "loss": 0.7865, |
| "step": 1680 |
| }, |
| { |
| "epoch": 0.6046218865209964, |
| "grad_norm": 21.166700297544274, |
| "learning_rate": 7.138997158597019e-07, |
| "loss": 0.8377, |
| "step": 1681 |
| }, |
| { |
| "epoch": 0.6049815664058987, |
| "grad_norm": 15.084184088563942, |
| "learning_rate": 7.127833402244514e-07, |
| "loss": 0.8096, |
| "step": 1682 |
| }, |
| { |
| "epoch": 0.6053412462908012, |
| "grad_norm": 14.157352525865917, |
| "learning_rate": 7.116673545936378e-07, |
| "loss": 0.8438, |
| "step": 1683 |
| }, |
| { |
| "epoch": 0.6057009261757036, |
| "grad_norm": 11.058120466139862, |
| "learning_rate": 7.105517604826307e-07, |
| "loss": 0.8251, |
| "step": 1684 |
| }, |
| { |
| "epoch": 0.6060606060606061, |
| "grad_norm": 10.01805659251896, |
| "learning_rate": 7.094365594062675e-07, |
| "loss": 0.7872, |
| "step": 1685 |
| }, |
| { |
| "epoch": 0.6064202859455085, |
| "grad_norm": 14.417351252084616, |
| "learning_rate": 7.083217528788524e-07, |
| "loss": 0.7586, |
| "step": 1686 |
| }, |
| { |
| "epoch": 0.6067799658304109, |
| "grad_norm": 23.018726266905283, |
| "learning_rate": 7.072073424141537e-07, |
| "loss": 0.8036, |
| "step": 1687 |
| }, |
| { |
| "epoch": 0.6071396457153134, |
| "grad_norm": 23.108894385648515, |
| "learning_rate": 7.060933295254025e-07, |
| "loss": 0.859, |
| "step": 1688 |
| }, |
| { |
| "epoch": 0.6074993256002158, |
| "grad_norm": 14.950461839381, |
| "learning_rate": 7.049797157252888e-07, |
| "loss": 0.8105, |
| "step": 1689 |
| }, |
| { |
| "epoch": 0.6078590054851183, |
| "grad_norm": 8.919361344653822, |
| "learning_rate": 7.038665025259615e-07, |
| "loss": 0.8807, |
| "step": 1690 |
| }, |
| { |
| "epoch": 0.6082186853700207, |
| "grad_norm": 14.925105391763122, |
| "learning_rate": 7.027536914390257e-07, |
| "loss": 0.8696, |
| "step": 1691 |
| }, |
| { |
| "epoch": 0.6085783652549231, |
| "grad_norm": 69.98923360282737, |
| "learning_rate": 7.016412839755399e-07, |
| "loss": 0.784, |
| "step": 1692 |
| }, |
| { |
| "epoch": 0.6089380451398255, |
| "grad_norm": 35.44843074757706, |
| "learning_rate": 7.005292816460155e-07, |
| "loss": 0.8002, |
| "step": 1693 |
| }, |
| { |
| "epoch": 0.609297725024728, |
| "grad_norm": 9.939970667136594, |
| "learning_rate": 6.994176859604121e-07, |
| "loss": 0.8157, |
| "step": 1694 |
| }, |
| { |
| "epoch": 0.6096574049096304, |
| "grad_norm": 7.863375520704018, |
| "learning_rate": 6.983064984281389e-07, |
| "loss": 0.7949, |
| "step": 1695 |
| }, |
| { |
| "epoch": 0.6100170847945329, |
| "grad_norm": 97.29261826984394, |
| "learning_rate": 6.971957205580497e-07, |
| "loss": 0.7726, |
| "step": 1696 |
| }, |
| { |
| "epoch": 0.6103767646794354, |
| "grad_norm": 9.81021368261749, |
| "learning_rate": 6.96085353858443e-07, |
| "loss": 0.7813, |
| "step": 1697 |
| }, |
| { |
| "epoch": 0.6107364445643377, |
| "grad_norm": 16.02010761133172, |
| "learning_rate": 6.949753998370578e-07, |
| "loss": 0.7399, |
| "step": 1698 |
| }, |
| { |
| "epoch": 0.6110961244492402, |
| "grad_norm": 18.792860754940573, |
| "learning_rate": 6.938658600010734e-07, |
| "loss": 0.825, |
| "step": 1699 |
| }, |
| { |
| "epoch": 0.6114558043341426, |
| "grad_norm": 31.606223339116642, |
| "learning_rate": 6.92756735857107e-07, |
| "loss": 0.7607, |
| "step": 1700 |
| }, |
| { |
| "epoch": 0.6118154842190451, |
| "grad_norm": 24.82977511564448, |
| "learning_rate": 6.91648028911211e-07, |
| "loss": 0.8375, |
| "step": 1701 |
| }, |
| { |
| "epoch": 0.6121751641039475, |
| "grad_norm": 12.601085318467119, |
| "learning_rate": 6.905397406688708e-07, |
| "loss": 0.8364, |
| "step": 1702 |
| }, |
| { |
| "epoch": 0.6125348439888499, |
| "grad_norm": 12.705958637702684, |
| "learning_rate": 6.894318726350041e-07, |
| "loss": 0.8558, |
| "step": 1703 |
| }, |
| { |
| "epoch": 0.6128945238737523, |
| "grad_norm": 12.086710100708398, |
| "learning_rate": 6.883244263139577e-07, |
| "loss": 0.8178, |
| "step": 1704 |
| }, |
| { |
| "epoch": 0.6132542037586548, |
| "grad_norm": 7.7903843239514785, |
| "learning_rate": 6.87217403209506e-07, |
| "loss": 0.8087, |
| "step": 1705 |
| }, |
| { |
| "epoch": 0.6136138836435572, |
| "grad_norm": 12.198803272759948, |
| "learning_rate": 6.861108048248477e-07, |
| "loss": 0.7609, |
| "step": 1706 |
| }, |
| { |
| "epoch": 0.6139735635284597, |
| "grad_norm": 9.528152912375319, |
| "learning_rate": 6.850046326626058e-07, |
| "loss": 0.7557, |
| "step": 1707 |
| }, |
| { |
| "epoch": 0.6143332434133622, |
| "grad_norm": 10.049261688513136, |
| "learning_rate": 6.838988882248243e-07, |
| "loss": 0.8031, |
| "step": 1708 |
| }, |
| { |
| "epoch": 0.6146929232982645, |
| "grad_norm": 8.447514196519638, |
| "learning_rate": 6.827935730129669e-07, |
| "loss": 0.7961, |
| "step": 1709 |
| }, |
| { |
| "epoch": 0.615052603183167, |
| "grad_norm": 23.11292378241723, |
| "learning_rate": 6.816886885279131e-07, |
| "loss": 0.8443, |
| "step": 1710 |
| }, |
| { |
| "epoch": 0.6154122830680694, |
| "grad_norm": 11.119142446850603, |
| "learning_rate": 6.805842362699588e-07, |
| "loss": 0.7363, |
| "step": 1711 |
| }, |
| { |
| "epoch": 0.6157719629529719, |
| "grad_norm": 11.202186712862288, |
| "learning_rate": 6.794802177388122e-07, |
| "loss": 0.8158, |
| "step": 1712 |
| }, |
| { |
| "epoch": 0.6161316428378742, |
| "grad_norm": 13.296179979250441, |
| "learning_rate": 6.783766344335939e-07, |
| "loss": 0.8386, |
| "step": 1713 |
| }, |
| { |
| "epoch": 0.6164913227227767, |
| "grad_norm": 21.518111938157613, |
| "learning_rate": 6.772734878528312e-07, |
| "loss": 0.8087, |
| "step": 1714 |
| }, |
| { |
| "epoch": 0.6168510026076791, |
| "grad_norm": 46.66561116718014, |
| "learning_rate": 6.761707794944604e-07, |
| "loss": 0.8192, |
| "step": 1715 |
| }, |
| { |
| "epoch": 0.6172106824925816, |
| "grad_norm": 12.296866343279195, |
| "learning_rate": 6.750685108558221e-07, |
| "loss": 0.863, |
| "step": 1716 |
| }, |
| { |
| "epoch": 0.6175703623774841, |
| "grad_norm": 11.297803831567172, |
| "learning_rate": 6.739666834336598e-07, |
| "loss": 0.8261, |
| "step": 1717 |
| }, |
| { |
| "epoch": 0.6179300422623865, |
| "grad_norm": 13.288465766136282, |
| "learning_rate": 6.728652987241174e-07, |
| "loss": 0.8536, |
| "step": 1718 |
| }, |
| { |
| "epoch": 0.618289722147289, |
| "grad_norm": 14.012393699858668, |
| "learning_rate": 6.717643582227384e-07, |
| "loss": 0.8657, |
| "step": 1719 |
| }, |
| { |
| "epoch": 0.6186494020321913, |
| "grad_norm": 17.641059860172508, |
| "learning_rate": 6.706638634244628e-07, |
| "loss": 0.7291, |
| "step": 1720 |
| }, |
| { |
| "epoch": 0.6190090819170938, |
| "grad_norm": 21.798931499322553, |
| "learning_rate": 6.695638158236254e-07, |
| "loss": 0.8835, |
| "step": 1721 |
| }, |
| { |
| "epoch": 0.6193687618019962, |
| "grad_norm": 9.725572593880349, |
| "learning_rate": 6.684642169139543e-07, |
| "loss": 0.8005, |
| "step": 1722 |
| }, |
| { |
| "epoch": 0.6197284416868987, |
| "grad_norm": 14.195318412043791, |
| "learning_rate": 6.673650681885668e-07, |
| "loss": 0.7608, |
| "step": 1723 |
| }, |
| { |
| "epoch": 0.620088121571801, |
| "grad_norm": 12.923746404615972, |
| "learning_rate": 6.662663711399705e-07, |
| "loss": 0.7936, |
| "step": 1724 |
| }, |
| { |
| "epoch": 0.6204478014567035, |
| "grad_norm": 9.857065771465349, |
| "learning_rate": 6.651681272600591e-07, |
| "loss": 0.8411, |
| "step": 1725 |
| }, |
| { |
| "epoch": 0.620807481341606, |
| "grad_norm": 14.888609976537028, |
| "learning_rate": 6.64070338040111e-07, |
| "loss": 0.8305, |
| "step": 1726 |
| }, |
| { |
| "epoch": 0.6211671612265084, |
| "grad_norm": 11.79121313028303, |
| "learning_rate": 6.629730049707868e-07, |
| "loss": 0.7806, |
| "step": 1727 |
| }, |
| { |
| "epoch": 0.6215268411114109, |
| "grad_norm": 9.388306130381386, |
| "learning_rate": 6.618761295421284e-07, |
| "loss": 0.8264, |
| "step": 1728 |
| }, |
| { |
| "epoch": 0.6218865209963133, |
| "grad_norm": 10.896893029733993, |
| "learning_rate": 6.607797132435559e-07, |
| "loss": 0.7853, |
| "step": 1729 |
| }, |
| { |
| "epoch": 0.6222462008812157, |
| "grad_norm": 24.33571401714642, |
| "learning_rate": 6.596837575638663e-07, |
| "loss": 0.8379, |
| "step": 1730 |
| }, |
| { |
| "epoch": 0.6226058807661181, |
| "grad_norm": 14.737447005737831, |
| "learning_rate": 6.585882639912302e-07, |
| "loss": 0.7539, |
| "step": 1731 |
| }, |
| { |
| "epoch": 0.6229655606510206, |
| "grad_norm": 11.369836569855831, |
| "learning_rate": 6.574932340131917e-07, |
| "loss": 0.8058, |
| "step": 1732 |
| }, |
| { |
| "epoch": 0.623325240535923, |
| "grad_norm": 10.305482256804485, |
| "learning_rate": 6.563986691166655e-07, |
| "loss": 0.7784, |
| "step": 1733 |
| }, |
| { |
| "epoch": 0.6236849204208255, |
| "grad_norm": 22.814135768221593, |
| "learning_rate": 6.553045707879336e-07, |
| "loss": 0.7798, |
| "step": 1734 |
| }, |
| { |
| "epoch": 0.624044600305728, |
| "grad_norm": 28.116517448330193, |
| "learning_rate": 6.542109405126457e-07, |
| "loss": 0.7473, |
| "step": 1735 |
| }, |
| { |
| "epoch": 0.6244042801906303, |
| "grad_norm": 14.580162755655769, |
| "learning_rate": 6.531177797758154e-07, |
| "loss": 0.8007, |
| "step": 1736 |
| }, |
| { |
| "epoch": 0.6247639600755328, |
| "grad_norm": 16.04566880088962, |
| "learning_rate": 6.520250900618185e-07, |
| "loss": 0.9144, |
| "step": 1737 |
| }, |
| { |
| "epoch": 0.6251236399604352, |
| "grad_norm": 15.673671179728919, |
| "learning_rate": 6.509328728543917e-07, |
| "loss": 0.8013, |
| "step": 1738 |
| }, |
| { |
| "epoch": 0.6254833198453377, |
| "grad_norm": 35.50996042236339, |
| "learning_rate": 6.498411296366299e-07, |
| "loss": 0.8097, |
| "step": 1739 |
| }, |
| { |
| "epoch": 0.62584299973024, |
| "grad_norm": 16.880957362909395, |
| "learning_rate": 6.487498618909844e-07, |
| "loss": 0.756, |
| "step": 1740 |
| }, |
| { |
| "epoch": 0.6262026796151425, |
| "grad_norm": 14.035214514055044, |
| "learning_rate": 6.476590710992604e-07, |
| "loss": 0.8141, |
| "step": 1741 |
| }, |
| { |
| "epoch": 0.6265623595000449, |
| "grad_norm": 15.814422206787214, |
| "learning_rate": 6.465687587426165e-07, |
| "loss": 0.8343, |
| "step": 1742 |
| }, |
| { |
| "epoch": 0.6269220393849474, |
| "grad_norm": 15.188642929960775, |
| "learning_rate": 6.454789263015609e-07, |
| "loss": 0.8491, |
| "step": 1743 |
| }, |
| { |
| "epoch": 0.6272817192698499, |
| "grad_norm": 14.583983954449385, |
| "learning_rate": 6.443895752559498e-07, |
| "loss": 0.7419, |
| "step": 1744 |
| }, |
| { |
| "epoch": 0.6276413991547523, |
| "grad_norm": 133.0666371031153, |
| "learning_rate": 6.433007070849863e-07, |
| "loss": 0.9221, |
| "step": 1745 |
| }, |
| { |
| "epoch": 0.6280010790396547, |
| "grad_norm": 8.003593212036993, |
| "learning_rate": 6.422123232672181e-07, |
| "loss": 0.7631, |
| "step": 1746 |
| }, |
| { |
| "epoch": 0.6283607589245571, |
| "grad_norm": 7.922901645887125, |
| "learning_rate": 6.411244252805351e-07, |
| "loss": 0.8314, |
| "step": 1747 |
| }, |
| { |
| "epoch": 0.6287204388094596, |
| "grad_norm": 14.548918361862677, |
| "learning_rate": 6.400370146021661e-07, |
| "loss": 0.7949, |
| "step": 1748 |
| }, |
| { |
| "epoch": 0.629080118694362, |
| "grad_norm": 10.892622626019605, |
| "learning_rate": 6.389500927086799e-07, |
| "loss": 0.7548, |
| "step": 1749 |
| }, |
| { |
| "epoch": 0.6294397985792645, |
| "grad_norm": 8.588043511139587, |
| "learning_rate": 6.378636610759811e-07, |
| "loss": 0.8308, |
| "step": 1750 |
| }, |
| { |
| "epoch": 0.6297994784641668, |
| "grad_norm": 12.340274835878352, |
| "learning_rate": 6.367777211793089e-07, |
| "loss": 0.8127, |
| "step": 1751 |
| }, |
| { |
| "epoch": 0.6301591583490693, |
| "grad_norm": 15.78505028084241, |
| "learning_rate": 6.356922744932334e-07, |
| "loss": 0.815, |
| "step": 1752 |
| }, |
| { |
| "epoch": 0.6305188382339718, |
| "grad_norm": 11.773994533558056, |
| "learning_rate": 6.346073224916565e-07, |
| "loss": 0.9125, |
| "step": 1753 |
| }, |
| { |
| "epoch": 0.6308785181188742, |
| "grad_norm": 9.971471855959267, |
| "learning_rate": 6.335228666478077e-07, |
| "loss": 0.8055, |
| "step": 1754 |
| }, |
| { |
| "epoch": 0.6312381980037767, |
| "grad_norm": 23.62925258069257, |
| "learning_rate": 6.324389084342434e-07, |
| "loss": 0.8056, |
| "step": 1755 |
| }, |
| { |
| "epoch": 0.631597877888679, |
| "grad_norm": 25.21027748771099, |
| "learning_rate": 6.31355449322843e-07, |
| "loss": 0.8109, |
| "step": 1756 |
| }, |
| { |
| "epoch": 0.6319575577735815, |
| "grad_norm": 27.015234963332013, |
| "learning_rate": 6.302724907848095e-07, |
| "loss": 0.876, |
| "step": 1757 |
| }, |
| { |
| "epoch": 0.6323172376584839, |
| "grad_norm": 9.541397387947747, |
| "learning_rate": 6.291900342906653e-07, |
| "loss": 0.7522, |
| "step": 1758 |
| }, |
| { |
| "epoch": 0.6326769175433864, |
| "grad_norm": 15.225320262719087, |
| "learning_rate": 6.281080813102521e-07, |
| "loss": 0.8016, |
| "step": 1759 |
| }, |
| { |
| "epoch": 0.6330365974282888, |
| "grad_norm": 22.474412580868492, |
| "learning_rate": 6.270266333127265e-07, |
| "loss": 0.8175, |
| "step": 1760 |
| }, |
| { |
| "epoch": 0.6333962773131913, |
| "grad_norm": 29.12108471492303, |
| "learning_rate": 6.259456917665604e-07, |
| "loss": 0.8182, |
| "step": 1761 |
| }, |
| { |
| "epoch": 0.6337559571980937, |
| "grad_norm": 12.204041386576975, |
| "learning_rate": 6.248652581395377e-07, |
| "loss": 0.7633, |
| "step": 1762 |
| }, |
| { |
| "epoch": 0.6341156370829961, |
| "grad_norm": 20.134854518828618, |
| "learning_rate": 6.237853338987531e-07, |
| "loss": 0.8276, |
| "step": 1763 |
| }, |
| { |
| "epoch": 0.6344753169678986, |
| "grad_norm": 12.916402082316186, |
| "learning_rate": 6.227059205106085e-07, |
| "loss": 0.7856, |
| "step": 1764 |
| }, |
| { |
| "epoch": 0.634834996852801, |
| "grad_norm": 11.036413506251774, |
| "learning_rate": 6.216270194408129e-07, |
| "loss": 0.9049, |
| "step": 1765 |
| }, |
| { |
| "epoch": 0.6351946767377035, |
| "grad_norm": 10.062422641931368, |
| "learning_rate": 6.205486321543797e-07, |
| "loss": 0.8437, |
| "step": 1766 |
| }, |
| { |
| "epoch": 0.6355543566226058, |
| "grad_norm": 51.73009803827442, |
| "learning_rate": 6.194707601156248e-07, |
| "loss": 0.7957, |
| "step": 1767 |
| }, |
| { |
| "epoch": 0.6359140365075083, |
| "grad_norm": 11.334317202740843, |
| "learning_rate": 6.183934047881635e-07, |
| "loss": 0.7942, |
| "step": 1768 |
| }, |
| { |
| "epoch": 0.6362737163924107, |
| "grad_norm": 99.22508216639132, |
| "learning_rate": 6.173165676349102e-07, |
| "loss": 0.933, |
| "step": 1769 |
| }, |
| { |
| "epoch": 0.6366333962773132, |
| "grad_norm": 11.198191543243793, |
| "learning_rate": 6.162402501180759e-07, |
| "loss": 0.795, |
| "step": 1770 |
| }, |
| { |
| "epoch": 0.6369930761622157, |
| "grad_norm": 17.03016020066533, |
| "learning_rate": 6.151644536991655e-07, |
| "loss": 0.798, |
| "step": 1771 |
| }, |
| { |
| "epoch": 0.637352756047118, |
| "grad_norm": 10.164294215006395, |
| "learning_rate": 6.140891798389769e-07, |
| "loss": 0.8287, |
| "step": 1772 |
| }, |
| { |
| "epoch": 0.6377124359320205, |
| "grad_norm": 8.807026779685549, |
| "learning_rate": 6.130144299975972e-07, |
| "loss": 0.8174, |
| "step": 1773 |
| }, |
| { |
| "epoch": 0.6380721158169229, |
| "grad_norm": 21.14645986610551, |
| "learning_rate": 6.119402056344032e-07, |
| "loss": 0.8481, |
| "step": 1774 |
| }, |
| { |
| "epoch": 0.6384317957018254, |
| "grad_norm": 7.879847914566813, |
| "learning_rate": 6.108665082080578e-07, |
| "loss": 0.8275, |
| "step": 1775 |
| }, |
| { |
| "epoch": 0.6387914755867278, |
| "grad_norm": 12.434974102674712, |
| "learning_rate": 6.097933391765087e-07, |
| "loss": 0.7707, |
| "step": 1776 |
| }, |
| { |
| "epoch": 0.6391511554716303, |
| "grad_norm": 8.505561474001542, |
| "learning_rate": 6.087206999969847e-07, |
| "loss": 0.7402, |
| "step": 1777 |
| }, |
| { |
| "epoch": 0.6395108353565326, |
| "grad_norm": 56.64536593663647, |
| "learning_rate": 6.07648592125997e-07, |
| "loss": 0.7865, |
| "step": 1778 |
| }, |
| { |
| "epoch": 0.6398705152414351, |
| "grad_norm": 12.507722360780223, |
| "learning_rate": 6.065770170193341e-07, |
| "loss": 0.8205, |
| "step": 1779 |
| }, |
| { |
| "epoch": 0.6402301951263376, |
| "grad_norm": 12.722603618717091, |
| "learning_rate": 6.05505976132062e-07, |
| "loss": 0.7469, |
| "step": 1780 |
| }, |
| { |
| "epoch": 0.64058987501124, |
| "grad_norm": 10.269738579037984, |
| "learning_rate": 6.044354709185202e-07, |
| "loss": 0.7875, |
| "step": 1781 |
| }, |
| { |
| "epoch": 0.6409495548961425, |
| "grad_norm": 30.758909873658617, |
| "learning_rate": 6.033655028323215e-07, |
| "loss": 0.7216, |
| "step": 1782 |
| }, |
| { |
| "epoch": 0.6413092347810448, |
| "grad_norm": 14.316956448393078, |
| "learning_rate": 6.022960733263493e-07, |
| "loss": 0.8473, |
| "step": 1783 |
| }, |
| { |
| "epoch": 0.6416689146659473, |
| "grad_norm": 12.431060394684565, |
| "learning_rate": 6.01227183852756e-07, |
| "loss": 0.7882, |
| "step": 1784 |
| }, |
| { |
| "epoch": 0.6420285945508497, |
| "grad_norm": 9.500973513559808, |
| "learning_rate": 6.001588358629597e-07, |
| "loss": 0.8482, |
| "step": 1785 |
| }, |
| { |
| "epoch": 0.6423882744357522, |
| "grad_norm": 20.92781239812433, |
| "learning_rate": 5.990910308076442e-07, |
| "loss": 0.7918, |
| "step": 1786 |
| }, |
| { |
| "epoch": 0.6427479543206546, |
| "grad_norm": 8.664992551100244, |
| "learning_rate": 5.980237701367556e-07, |
| "loss": 0.7638, |
| "step": 1787 |
| }, |
| { |
| "epoch": 0.643107634205557, |
| "grad_norm": 11.090661919182084, |
| "learning_rate": 5.969570552995014e-07, |
| "loss": 0.8059, |
| "step": 1788 |
| }, |
| { |
| "epoch": 0.6434673140904595, |
| "grad_norm": 18.287091314618046, |
| "learning_rate": 5.958908877443465e-07, |
| "loss": 0.786, |
| "step": 1789 |
| }, |
| { |
| "epoch": 0.6438269939753619, |
| "grad_norm": 48.024587013915905, |
| "learning_rate": 5.948252689190141e-07, |
| "loss": 0.7355, |
| "step": 1790 |
| }, |
| { |
| "epoch": 0.6441866738602644, |
| "grad_norm": 12.859201948345953, |
| "learning_rate": 5.937602002704818e-07, |
| "loss": 0.8528, |
| "step": 1791 |
| }, |
| { |
| "epoch": 0.6445463537451668, |
| "grad_norm": 10.955287621995861, |
| "learning_rate": 5.926956832449805e-07, |
| "loss": 0.8152, |
| "step": 1792 |
| }, |
| { |
| "epoch": 0.6449060336300693, |
| "grad_norm": 15.435275641349563, |
| "learning_rate": 5.916317192879909e-07, |
| "loss": 0.852, |
| "step": 1793 |
| }, |
| { |
| "epoch": 0.6452657135149716, |
| "grad_norm": 76.3314449574763, |
| "learning_rate": 5.90568309844244e-07, |
| "loss": 0.7824, |
| "step": 1794 |
| }, |
| { |
| "epoch": 0.6456253933998741, |
| "grad_norm": 14.837934574547674, |
| "learning_rate": 5.895054563577171e-07, |
| "loss": 0.7831, |
| "step": 1795 |
| }, |
| { |
| "epoch": 0.6459850732847765, |
| "grad_norm": 24.168391521465132, |
| "learning_rate": 5.884431602716331e-07, |
| "loss": 0.782, |
| "step": 1796 |
| }, |
| { |
| "epoch": 0.646344753169679, |
| "grad_norm": 10.7570153448703, |
| "learning_rate": 5.873814230284575e-07, |
| "loss": 0.7747, |
| "step": 1797 |
| }, |
| { |
| "epoch": 0.6467044330545815, |
| "grad_norm": 90.55006051473893, |
| "learning_rate": 5.86320246069897e-07, |
| "loss": 0.7818, |
| "step": 1798 |
| }, |
| { |
| "epoch": 0.6470641129394838, |
| "grad_norm": 19.438214260499464, |
| "learning_rate": 5.852596308368981e-07, |
| "loss": 0.866, |
| "step": 1799 |
| }, |
| { |
| "epoch": 0.6474237928243863, |
| "grad_norm": 13.657916190310873, |
| "learning_rate": 5.841995787696438e-07, |
| "loss": 0.7081, |
| "step": 1800 |
| }, |
| { |
| "epoch": 0.6477834727092887, |
| "grad_norm": 14.665885552199574, |
| "learning_rate": 5.831400913075529e-07, |
| "loss": 0.856, |
| "step": 1801 |
| }, |
| { |
| "epoch": 0.6481431525941912, |
| "grad_norm": 10.162631162678629, |
| "learning_rate": 5.820811698892774e-07, |
| "loss": 0.7576, |
| "step": 1802 |
| }, |
| { |
| "epoch": 0.6485028324790936, |
| "grad_norm": 11.323067223912123, |
| "learning_rate": 5.810228159527002e-07, |
| "loss": 0.8584, |
| "step": 1803 |
| }, |
| { |
| "epoch": 0.6488625123639961, |
| "grad_norm": 11.27372613935998, |
| "learning_rate": 5.799650309349348e-07, |
| "loss": 0.8396, |
| "step": 1804 |
| }, |
| { |
| "epoch": 0.6492221922488984, |
| "grad_norm": 17.460419964750038, |
| "learning_rate": 5.789078162723212e-07, |
| "loss": 0.7775, |
| "step": 1805 |
| }, |
| { |
| "epoch": 0.6495818721338009, |
| "grad_norm": 14.508483865670446, |
| "learning_rate": 5.778511734004248e-07, |
| "loss": 0.8165, |
| "step": 1806 |
| }, |
| { |
| "epoch": 0.6499415520187034, |
| "grad_norm": 9.676751789233563, |
| "learning_rate": 5.767951037540349e-07, |
| "loss": 0.7913, |
| "step": 1807 |
| }, |
| { |
| "epoch": 0.6503012319036058, |
| "grad_norm": 11.971497636489813, |
| "learning_rate": 5.757396087671633e-07, |
| "loss": 0.8127, |
| "step": 1808 |
| }, |
| { |
| "epoch": 0.6506609117885083, |
| "grad_norm": 10.770337299108396, |
| "learning_rate": 5.746846898730402e-07, |
| "loss": 0.8241, |
| "step": 1809 |
| }, |
| { |
| "epoch": 0.6510205916734106, |
| "grad_norm": 12.045985601282291, |
| "learning_rate": 5.736303485041141e-07, |
| "loss": 0.8244, |
| "step": 1810 |
| }, |
| { |
| "epoch": 0.6513802715583131, |
| "grad_norm": 16.15883927758613, |
| "learning_rate": 5.725765860920487e-07, |
| "loss": 0.7476, |
| "step": 1811 |
| }, |
| { |
| "epoch": 0.6517399514432155, |
| "grad_norm": 16.66474408815995, |
| "learning_rate": 5.715234040677229e-07, |
| "loss": 0.8377, |
| "step": 1812 |
| }, |
| { |
| "epoch": 0.652099631328118, |
| "grad_norm": 21.888182978506073, |
| "learning_rate": 5.70470803861226e-07, |
| "loss": 0.7885, |
| "step": 1813 |
| }, |
| { |
| "epoch": 0.6524593112130204, |
| "grad_norm": 27.342810272479124, |
| "learning_rate": 5.694187869018583e-07, |
| "loss": 0.767, |
| "step": 1814 |
| }, |
| { |
| "epoch": 0.6528189910979229, |
| "grad_norm": 8.161359323030767, |
| "learning_rate": 5.683673546181274e-07, |
| "loss": 0.8278, |
| "step": 1815 |
| }, |
| { |
| "epoch": 0.6531786709828253, |
| "grad_norm": 15.661535313928974, |
| "learning_rate": 5.673165084377478e-07, |
| "loss": 0.7923, |
| "step": 1816 |
| }, |
| { |
| "epoch": 0.6535383508677277, |
| "grad_norm": 7.689055843441256, |
| "learning_rate": 5.662662497876374e-07, |
| "loss": 0.7484, |
| "step": 1817 |
| }, |
| { |
| "epoch": 0.6538980307526302, |
| "grad_norm": 24.838247028824966, |
| "learning_rate": 5.652165800939167e-07, |
| "loss": 0.7913, |
| "step": 1818 |
| }, |
| { |
| "epoch": 0.6542577106375326, |
| "grad_norm": 27.729926198991425, |
| "learning_rate": 5.641675007819057e-07, |
| "loss": 0.7404, |
| "step": 1819 |
| }, |
| { |
| "epoch": 0.6546173905224351, |
| "grad_norm": 63.23334053232722, |
| "learning_rate": 5.631190132761247e-07, |
| "loss": 0.7521, |
| "step": 1820 |
| }, |
| { |
| "epoch": 0.6549770704073374, |
| "grad_norm": 15.542551429874342, |
| "learning_rate": 5.620711190002878e-07, |
| "loss": 0.8185, |
| "step": 1821 |
| }, |
| { |
| "epoch": 0.6553367502922399, |
| "grad_norm": 9.809043926468073, |
| "learning_rate": 5.610238193773061e-07, |
| "loss": 0.7819, |
| "step": 1822 |
| }, |
| { |
| "epoch": 0.6556964301771423, |
| "grad_norm": 8.58369213799892, |
| "learning_rate": 5.599771158292805e-07, |
| "loss": 0.7355, |
| "step": 1823 |
| }, |
| { |
| "epoch": 0.6560561100620448, |
| "grad_norm": 13.820619552056009, |
| "learning_rate": 5.589310097775054e-07, |
| "loss": 0.8129, |
| "step": 1824 |
| }, |
| { |
| "epoch": 0.6564157899469473, |
| "grad_norm": 13.067661393809157, |
| "learning_rate": 5.578855026424618e-07, |
| "loss": 0.8095, |
| "step": 1825 |
| }, |
| { |
| "epoch": 0.6567754698318496, |
| "grad_norm": 10.305852999489563, |
| "learning_rate": 5.568405958438181e-07, |
| "loss": 0.8123, |
| "step": 1826 |
| }, |
| { |
| "epoch": 0.6571351497167521, |
| "grad_norm": 8.673385969068832, |
| "learning_rate": 5.557962908004274e-07, |
| "loss": 0.7976, |
| "step": 1827 |
| }, |
| { |
| "epoch": 0.6574948296016545, |
| "grad_norm": 15.45928638878636, |
| "learning_rate": 5.547525889303264e-07, |
| "loss": 0.7952, |
| "step": 1828 |
| }, |
| { |
| "epoch": 0.657854509486557, |
| "grad_norm": 10.10341774407125, |
| "learning_rate": 5.537094916507319e-07, |
| "loss": 0.7889, |
| "step": 1829 |
| }, |
| { |
| "epoch": 0.6582141893714594, |
| "grad_norm": 8.556881114275807, |
| "learning_rate": 5.526670003780399e-07, |
| "loss": 0.7618, |
| "step": 1830 |
| }, |
| { |
| "epoch": 0.6585738692563619, |
| "grad_norm": 19.342068228599718, |
| "learning_rate": 5.516251165278234e-07, |
| "loss": 0.7546, |
| "step": 1831 |
| }, |
| { |
| "epoch": 0.6589335491412642, |
| "grad_norm": 59.54816995344833, |
| "learning_rate": 5.505838415148316e-07, |
| "loss": 0.7827, |
| "step": 1832 |
| }, |
| { |
| "epoch": 0.6592932290261667, |
| "grad_norm": 10.14681148147005, |
| "learning_rate": 5.495431767529857e-07, |
| "loss": 0.7988, |
| "step": 1833 |
| }, |
| { |
| "epoch": 0.6596529089110692, |
| "grad_norm": 13.135031573911437, |
| "learning_rate": 5.485031236553791e-07, |
| "loss": 0.7537, |
| "step": 1834 |
| }, |
| { |
| "epoch": 0.6600125887959716, |
| "grad_norm": 6.967976646359824, |
| "learning_rate": 5.474636836342736e-07, |
| "loss": 0.7402, |
| "step": 1835 |
| }, |
| { |
| "epoch": 0.6603722686808741, |
| "grad_norm": 9.216758380737184, |
| "learning_rate": 5.464248581011002e-07, |
| "loss": 0.8323, |
| "step": 1836 |
| }, |
| { |
| "epoch": 0.6607319485657764, |
| "grad_norm": 11.600107990635108, |
| "learning_rate": 5.453866484664542e-07, |
| "loss": 0.7451, |
| "step": 1837 |
| }, |
| { |
| "epoch": 0.6610916284506789, |
| "grad_norm": 10.892374294450866, |
| "learning_rate": 5.443490561400948e-07, |
| "loss": 0.7621, |
| "step": 1838 |
| }, |
| { |
| "epoch": 0.6614513083355813, |
| "grad_norm": 11.45842615630558, |
| "learning_rate": 5.433120825309425e-07, |
| "loss": 0.7832, |
| "step": 1839 |
| }, |
| { |
| "epoch": 0.6618109882204838, |
| "grad_norm": 16.153448433055885, |
| "learning_rate": 5.422757290470794e-07, |
| "loss": 0.788, |
| "step": 1840 |
| }, |
| { |
| "epoch": 0.6621706681053862, |
| "grad_norm": 12.577386597072442, |
| "learning_rate": 5.412399970957439e-07, |
| "loss": 0.8579, |
| "step": 1841 |
| }, |
| { |
| "epoch": 0.6625303479902886, |
| "grad_norm": 12.782695711759963, |
| "learning_rate": 5.402048880833308e-07, |
| "loss": 0.7313, |
| "step": 1842 |
| }, |
| { |
| "epoch": 0.6628900278751911, |
| "grad_norm": 12.47174858281745, |
| "learning_rate": 5.391704034153894e-07, |
| "loss": 0.8876, |
| "step": 1843 |
| }, |
| { |
| "epoch": 0.6632497077600935, |
| "grad_norm": 18.66655631801789, |
| "learning_rate": 5.381365444966204e-07, |
| "loss": 0.7878, |
| "step": 1844 |
| }, |
| { |
| "epoch": 0.663609387644996, |
| "grad_norm": 16.319352082499176, |
| "learning_rate": 5.371033127308762e-07, |
| "loss": 0.7858, |
| "step": 1845 |
| }, |
| { |
| "epoch": 0.6639690675298984, |
| "grad_norm": 20.719394774267887, |
| "learning_rate": 5.360707095211565e-07, |
| "loss": 0.8364, |
| "step": 1846 |
| }, |
| { |
| "epoch": 0.6643287474148009, |
| "grad_norm": 14.478872964211194, |
| "learning_rate": 5.350387362696076e-07, |
| "loss": 0.7589, |
| "step": 1847 |
| }, |
| { |
| "epoch": 0.6646884272997032, |
| "grad_norm": 8.595109081067351, |
| "learning_rate": 5.340073943775205e-07, |
| "loss": 0.8464, |
| "step": 1848 |
| }, |
| { |
| "epoch": 0.6650481071846057, |
| "grad_norm": 11.04673137932079, |
| "learning_rate": 5.329766852453296e-07, |
| "loss": 0.7933, |
| "step": 1849 |
| }, |
| { |
| "epoch": 0.6654077870695081, |
| "grad_norm": 10.53038761065107, |
| "learning_rate": 5.319466102726087e-07, |
| "loss": 0.7025, |
| "step": 1850 |
| }, |
| { |
| "epoch": 0.6657674669544106, |
| "grad_norm": 15.143280900133385, |
| "learning_rate": 5.309171708580723e-07, |
| "loss": 0.8126, |
| "step": 1851 |
| }, |
| { |
| "epoch": 0.6661271468393131, |
| "grad_norm": 26.58920303139226, |
| "learning_rate": 5.298883683995696e-07, |
| "loss": 0.789, |
| "step": 1852 |
| }, |
| { |
| "epoch": 0.6664868267242154, |
| "grad_norm": 19.284983239901692, |
| "learning_rate": 5.288602042940871e-07, |
| "loss": 0.8039, |
| "step": 1853 |
| }, |
| { |
| "epoch": 0.6668465066091179, |
| "grad_norm": 12.145551006460884, |
| "learning_rate": 5.278326799377427e-07, |
| "loss": 0.7906, |
| "step": 1854 |
| }, |
| { |
| "epoch": 0.6672061864940203, |
| "grad_norm": 7.405085830831934, |
| "learning_rate": 5.26805796725788e-07, |
| "loss": 0.7586, |
| "step": 1855 |
| }, |
| { |
| "epoch": 0.6675658663789228, |
| "grad_norm": 28.002207391036826, |
| "learning_rate": 5.257795560526004e-07, |
| "loss": 0.8145, |
| "step": 1856 |
| }, |
| { |
| "epoch": 0.6679255462638252, |
| "grad_norm": 16.62199670397666, |
| "learning_rate": 5.247539593116883e-07, |
| "loss": 0.7662, |
| "step": 1857 |
| }, |
| { |
| "epoch": 0.6682852261487277, |
| "grad_norm": 16.954919581500018, |
| "learning_rate": 5.237290078956835e-07, |
| "loss": 0.8472, |
| "step": 1858 |
| }, |
| { |
| "epoch": 0.66864490603363, |
| "grad_norm": 18.123531709541997, |
| "learning_rate": 5.227047031963434e-07, |
| "loss": 0.7853, |
| "step": 1859 |
| }, |
| { |
| "epoch": 0.6690045859185325, |
| "grad_norm": 10.415660732269528, |
| "learning_rate": 5.216810466045448e-07, |
| "loss": 0.7737, |
| "step": 1860 |
| }, |
| { |
| "epoch": 0.6693642658034349, |
| "grad_norm": 56.09425474270922, |
| "learning_rate": 5.206580395102866e-07, |
| "loss": 0.9381, |
| "step": 1861 |
| }, |
| { |
| "epoch": 0.6697239456883374, |
| "grad_norm": 13.543162717953841, |
| "learning_rate": 5.196356833026845e-07, |
| "loss": 0.7916, |
| "step": 1862 |
| }, |
| { |
| "epoch": 0.6700836255732399, |
| "grad_norm": 64.23400514552281, |
| "learning_rate": 5.18613979369972e-07, |
| "loss": 0.7931, |
| "step": 1863 |
| }, |
| { |
| "epoch": 0.6704433054581422, |
| "grad_norm": 11.403829262924216, |
| "learning_rate": 5.175929290994941e-07, |
| "loss": 0.8338, |
| "step": 1864 |
| }, |
| { |
| "epoch": 0.6708029853430447, |
| "grad_norm": 42.81872528268903, |
| "learning_rate": 5.16572533877711e-07, |
| "loss": 0.8484, |
| "step": 1865 |
| }, |
| { |
| "epoch": 0.6711626652279471, |
| "grad_norm": 39.066022443972905, |
| "learning_rate": 5.155527950901914e-07, |
| "loss": 0.8624, |
| "step": 1866 |
| }, |
| { |
| "epoch": 0.6715223451128496, |
| "grad_norm": 41.603319141147985, |
| "learning_rate": 5.145337141216149e-07, |
| "loss": 0.7927, |
| "step": 1867 |
| }, |
| { |
| "epoch": 0.671882024997752, |
| "grad_norm": 10.974669475457675, |
| "learning_rate": 5.135152923557647e-07, |
| "loss": 0.8326, |
| "step": 1868 |
| }, |
| { |
| "epoch": 0.6722417048826544, |
| "grad_norm": 20.415936770591237, |
| "learning_rate": 5.124975311755319e-07, |
| "loss": 0.809, |
| "step": 1869 |
| }, |
| { |
| "epoch": 0.6726013847675568, |
| "grad_norm": 23.862286254886733, |
| "learning_rate": 5.114804319629087e-07, |
| "loss": 0.8581, |
| "step": 1870 |
| }, |
| { |
| "epoch": 0.6729610646524593, |
| "grad_norm": 18.29927024843112, |
| "learning_rate": 5.104639960989903e-07, |
| "loss": 0.847, |
| "step": 1871 |
| }, |
| { |
| "epoch": 0.6733207445373618, |
| "grad_norm": 14.9425225110357, |
| "learning_rate": 5.094482249639682e-07, |
| "loss": 0.8231, |
| "step": 1872 |
| }, |
| { |
| "epoch": 0.6736804244222642, |
| "grad_norm": 7.204493801894027, |
| "learning_rate": 5.084331199371342e-07, |
| "loss": 0.8022, |
| "step": 1873 |
| }, |
| { |
| "epoch": 0.6740401043071667, |
| "grad_norm": 14.905346487920383, |
| "learning_rate": 5.074186823968739e-07, |
| "loss": 0.8486, |
| "step": 1874 |
| }, |
| { |
| "epoch": 0.674399784192069, |
| "grad_norm": 9.676339560933618, |
| "learning_rate": 5.064049137206677e-07, |
| "loss": 0.7858, |
| "step": 1875 |
| }, |
| { |
| "epoch": 0.6747594640769715, |
| "grad_norm": 9.49562244506686, |
| "learning_rate": 5.053918152850867e-07, |
| "loss": 0.7997, |
| "step": 1876 |
| }, |
| { |
| "epoch": 0.6751191439618739, |
| "grad_norm": 9.780546884507606, |
| "learning_rate": 5.043793884657925e-07, |
| "loss": 0.7547, |
| "step": 1877 |
| }, |
| { |
| "epoch": 0.6754788238467764, |
| "grad_norm": 9.470322325518556, |
| "learning_rate": 5.033676346375342e-07, |
| "loss": 0.8269, |
| "step": 1878 |
| }, |
| { |
| "epoch": 0.6758385037316788, |
| "grad_norm": 11.78272180033259, |
| "learning_rate": 5.02356555174148e-07, |
| "loss": 0.7977, |
| "step": 1879 |
| }, |
| { |
| "epoch": 0.6761981836165812, |
| "grad_norm": 10.724813017397079, |
| "learning_rate": 5.013461514485535e-07, |
| "loss": 0.8066, |
| "step": 1880 |
| }, |
| { |
| "epoch": 0.6765578635014837, |
| "grad_norm": 10.658619259472248, |
| "learning_rate": 5.003364248327533e-07, |
| "loss": 0.7963, |
| "step": 1881 |
| }, |
| { |
| "epoch": 0.6769175433863861, |
| "grad_norm": 17.35152082996863, |
| "learning_rate": 4.993273766978296e-07, |
| "loss": 0.8972, |
| "step": 1882 |
| }, |
| { |
| "epoch": 0.6772772232712886, |
| "grad_norm": 11.019779966265489, |
| "learning_rate": 4.983190084139452e-07, |
| "loss": 0.7863, |
| "step": 1883 |
| }, |
| { |
| "epoch": 0.677636903156191, |
| "grad_norm": 8.55292885917213, |
| "learning_rate": 4.973113213503378e-07, |
| "loss": 0.8869, |
| "step": 1884 |
| }, |
| { |
| "epoch": 0.6779965830410934, |
| "grad_norm": 26.74748899643954, |
| "learning_rate": 4.963043168753211e-07, |
| "loss": 0.7416, |
| "step": 1885 |
| }, |
| { |
| "epoch": 0.6783562629259958, |
| "grad_norm": 12.517286521411632, |
| "learning_rate": 4.952979963562813e-07, |
| "loss": 0.7884, |
| "step": 1886 |
| }, |
| { |
| "epoch": 0.6787159428108983, |
| "grad_norm": 12.144101674093983, |
| "learning_rate": 4.942923611596771e-07, |
| "loss": 0.7733, |
| "step": 1887 |
| }, |
| { |
| "epoch": 0.6790756226958007, |
| "grad_norm": 16.766125673571228, |
| "learning_rate": 4.932874126510352e-07, |
| "loss": 0.7858, |
| "step": 1888 |
| }, |
| { |
| "epoch": 0.6794353025807032, |
| "grad_norm": 7.743675635326617, |
| "learning_rate": 4.922831521949507e-07, |
| "loss": 0.8114, |
| "step": 1889 |
| }, |
| { |
| "epoch": 0.6797949824656057, |
| "grad_norm": 9.264045520369624, |
| "learning_rate": 4.912795811550836e-07, |
| "loss": 0.7946, |
| "step": 1890 |
| }, |
| { |
| "epoch": 0.680154662350508, |
| "grad_norm": 14.583110690880584, |
| "learning_rate": 4.902767008941593e-07, |
| "loss": 0.8318, |
| "step": 1891 |
| }, |
| { |
| "epoch": 0.6805143422354105, |
| "grad_norm": 9.560335815158966, |
| "learning_rate": 4.892745127739635e-07, |
| "loss": 0.8546, |
| "step": 1892 |
| }, |
| { |
| "epoch": 0.6808740221203129, |
| "grad_norm": 9.64660352252174, |
| "learning_rate": 4.882730181553433e-07, |
| "loss": 0.7828, |
| "step": 1893 |
| }, |
| { |
| "epoch": 0.6812337020052154, |
| "grad_norm": 17.208502485149108, |
| "learning_rate": 4.872722183982028e-07, |
| "loss": 0.7728, |
| "step": 1894 |
| }, |
| { |
| "epoch": 0.6815933818901178, |
| "grad_norm": 9.362234657825736, |
| "learning_rate": 4.862721148615043e-07, |
| "loss": 0.827, |
| "step": 1895 |
| }, |
| { |
| "epoch": 0.6819530617750202, |
| "grad_norm": 12.453547390878855, |
| "learning_rate": 4.852727089032634e-07, |
| "loss": 0.8151, |
| "step": 1896 |
| }, |
| { |
| "epoch": 0.6823127416599226, |
| "grad_norm": 15.795587125698905, |
| "learning_rate": 4.842740018805488e-07, |
| "loss": 0.7693, |
| "step": 1897 |
| }, |
| { |
| "epoch": 0.6826724215448251, |
| "grad_norm": 18.90914749581996, |
| "learning_rate": 4.832759951494798e-07, |
| "loss": 0.8, |
| "step": 1898 |
| }, |
| { |
| "epoch": 0.6830321014297276, |
| "grad_norm": 10.919574407657244, |
| "learning_rate": 4.822786900652261e-07, |
| "loss": 0.7599, |
| "step": 1899 |
| }, |
| { |
| "epoch": 0.68339178131463, |
| "grad_norm": 13.333825107133132, |
| "learning_rate": 4.812820879820033e-07, |
| "loss": 0.8187, |
| "step": 1900 |
| }, |
| { |
| "epoch": 0.6837514611995325, |
| "grad_norm": 17.048361445562644, |
| "learning_rate": 4.80286190253073e-07, |
| "loss": 0.775, |
| "step": 1901 |
| }, |
| { |
| "epoch": 0.6841111410844348, |
| "grad_norm": 7.944052113757398, |
| "learning_rate": 4.792909982307394e-07, |
| "loss": 0.8115, |
| "step": 1902 |
| }, |
| { |
| "epoch": 0.6844708209693373, |
| "grad_norm": 14.828276861307213, |
| "learning_rate": 4.782965132663505e-07, |
| "loss": 0.7556, |
| "step": 1903 |
| }, |
| { |
| "epoch": 0.6848305008542397, |
| "grad_norm": 18.213914896580356, |
| "learning_rate": 4.773027367102923e-07, |
| "loss": 0.8387, |
| "step": 1904 |
| }, |
| { |
| "epoch": 0.6851901807391422, |
| "grad_norm": 20.98108938653177, |
| "learning_rate": 4.763096699119896e-07, |
| "loss": 0.8085, |
| "step": 1905 |
| }, |
| { |
| "epoch": 0.6855498606240445, |
| "grad_norm": 16.73875927605446, |
| "learning_rate": 4.753173142199035e-07, |
| "loss": 0.7929, |
| "step": 1906 |
| }, |
| { |
| "epoch": 0.685909540508947, |
| "grad_norm": 54.077453505313734, |
| "learning_rate": 4.7432567098152886e-07, |
| "loss": 0.8155, |
| "step": 1907 |
| }, |
| { |
| "epoch": 0.6862692203938495, |
| "grad_norm": 19.07631987950139, |
| "learning_rate": 4.7333474154339446e-07, |
| "loss": 0.7632, |
| "step": 1908 |
| }, |
| { |
| "epoch": 0.6866289002787519, |
| "grad_norm": 13.156347839106504, |
| "learning_rate": 4.723445272510587e-07, |
| "loss": 0.7859, |
| "step": 1909 |
| }, |
| { |
| "epoch": 0.6869885801636544, |
| "grad_norm": 10.73868733548354, |
| "learning_rate": 4.7135502944910897e-07, |
| "loss": 0.8162, |
| "step": 1910 |
| }, |
| { |
| "epoch": 0.6873482600485568, |
| "grad_norm": 14.177996454482498, |
| "learning_rate": 4.7036624948115987e-07, |
| "loss": 0.815, |
| "step": 1911 |
| }, |
| { |
| "epoch": 0.6877079399334592, |
| "grad_norm": 7.439216214586587, |
| "learning_rate": 4.6937818868985204e-07, |
| "loss": 0.7929, |
| "step": 1912 |
| }, |
| { |
| "epoch": 0.6880676198183616, |
| "grad_norm": 10.92147774408899, |
| "learning_rate": 4.683908484168486e-07, |
| "loss": 0.8003, |
| "step": 1913 |
| }, |
| { |
| "epoch": 0.6884272997032641, |
| "grad_norm": 9.626214440116033, |
| "learning_rate": 4.6740423000283445e-07, |
| "loss": 0.7699, |
| "step": 1914 |
| }, |
| { |
| "epoch": 0.6887869795881665, |
| "grad_norm": 20.19775053928553, |
| "learning_rate": 4.6641833478751433e-07, |
| "loss": 0.7459, |
| "step": 1915 |
| }, |
| { |
| "epoch": 0.689146659473069, |
| "grad_norm": 27.79660959589243, |
| "learning_rate": 4.654331641096118e-07, |
| "loss": 0.8494, |
| "step": 1916 |
| }, |
| { |
| "epoch": 0.6895063393579715, |
| "grad_norm": 11.552197064577827, |
| "learning_rate": 4.6444871930686523e-07, |
| "loss": 0.8517, |
| "step": 1917 |
| }, |
| { |
| "epoch": 0.6898660192428738, |
| "grad_norm": 13.096752705587633, |
| "learning_rate": 4.6346500171602843e-07, |
| "loss": 0.8142, |
| "step": 1918 |
| }, |
| { |
| "epoch": 0.6902256991277763, |
| "grad_norm": 59.88184273645699, |
| "learning_rate": 4.6248201267286655e-07, |
| "loss": 0.775, |
| "step": 1919 |
| }, |
| { |
| "epoch": 0.6905853790126787, |
| "grad_norm": 20.949835354690556, |
| "learning_rate": 4.614997535121573e-07, |
| "loss": 0.789, |
| "step": 1920 |
| }, |
| { |
| "epoch": 0.6909450588975812, |
| "grad_norm": 21.181926495259585, |
| "learning_rate": 4.6051822556768573e-07, |
| "loss": 0.7091, |
| "step": 1921 |
| }, |
| { |
| "epoch": 0.6913047387824836, |
| "grad_norm": 16.283509132556304, |
| "learning_rate": 4.5953743017224446e-07, |
| "loss": 0.7371, |
| "step": 1922 |
| }, |
| { |
| "epoch": 0.691664418667386, |
| "grad_norm": 9.597201305512248, |
| "learning_rate": 4.5855736865763096e-07, |
| "loss": 0.7847, |
| "step": 1923 |
| }, |
| { |
| "epoch": 0.6920240985522884, |
| "grad_norm": 10.068854727813337, |
| "learning_rate": 4.575780423546476e-07, |
| "loss": 0.786, |
| "step": 1924 |
| }, |
| { |
| "epoch": 0.6923837784371909, |
| "grad_norm": 24.80676142205765, |
| "learning_rate": 4.565994525930966e-07, |
| "loss": 0.829, |
| "step": 1925 |
| }, |
| { |
| "epoch": 0.6927434583220934, |
| "grad_norm": 15.267556902740353, |
| "learning_rate": 4.5562160070178213e-07, |
| "loss": 0.8247, |
| "step": 1926 |
| }, |
| { |
| "epoch": 0.6931031382069958, |
| "grad_norm": 14.712025615536657, |
| "learning_rate": 4.5464448800850366e-07, |
| "loss": 0.776, |
| "step": 1927 |
| }, |
| { |
| "epoch": 0.6934628180918982, |
| "grad_norm": 9.762214400361668, |
| "learning_rate": 4.536681158400597e-07, |
| "loss": 0.7797, |
| "step": 1928 |
| }, |
| { |
| "epoch": 0.6938224979768006, |
| "grad_norm": 43.29180425471539, |
| "learning_rate": 4.5269248552224105e-07, |
| "loss": 0.7862, |
| "step": 1929 |
| }, |
| { |
| "epoch": 0.6941821778617031, |
| "grad_norm": 9.770913691162873, |
| "learning_rate": 4.517175983798334e-07, |
| "loss": 0.8091, |
| "step": 1930 |
| }, |
| { |
| "epoch": 0.6945418577466055, |
| "grad_norm": 7.634925217920101, |
| "learning_rate": 4.5074345573661057e-07, |
| "loss": 0.8002, |
| "step": 1931 |
| }, |
| { |
| "epoch": 0.694901537631508, |
| "grad_norm": 8.682744882544698, |
| "learning_rate": 4.497700589153378e-07, |
| "loss": 0.7941, |
| "step": 1932 |
| }, |
| { |
| "epoch": 0.6952612175164103, |
| "grad_norm": 15.118180922821043, |
| "learning_rate": 4.487974092377661e-07, |
| "loss": 0.8083, |
| "step": 1933 |
| }, |
| { |
| "epoch": 0.6956208974013128, |
| "grad_norm": 9.826564546122903, |
| "learning_rate": 4.478255080246337e-07, |
| "loss": 0.8315, |
| "step": 1934 |
| }, |
| { |
| "epoch": 0.6959805772862153, |
| "grad_norm": 17.69865416850283, |
| "learning_rate": 4.4685435659565975e-07, |
| "loss": 0.8013, |
| "step": 1935 |
| }, |
| { |
| "epoch": 0.6963402571711177, |
| "grad_norm": 9.554872577224419, |
| "learning_rate": 4.45883956269548e-07, |
| "loss": 0.8306, |
| "step": 1936 |
| }, |
| { |
| "epoch": 0.6966999370560202, |
| "grad_norm": 22.472663863651814, |
| "learning_rate": 4.449143083639805e-07, |
| "loss": 0.7858, |
| "step": 1937 |
| }, |
| { |
| "epoch": 0.6970596169409226, |
| "grad_norm": 16.04948422993258, |
| "learning_rate": 4.439454141956194e-07, |
| "loss": 0.8231, |
| "step": 1938 |
| }, |
| { |
| "epoch": 0.697419296825825, |
| "grad_norm": 12.693051165018526, |
| "learning_rate": 4.4297727508010065e-07, |
| "loss": 0.8049, |
| "step": 1939 |
| }, |
| { |
| "epoch": 0.6977789767107274, |
| "grad_norm": 13.998451344856502, |
| "learning_rate": 4.4200989233203777e-07, |
| "loss": 0.9358, |
| "step": 1940 |
| }, |
| { |
| "epoch": 0.6981386565956299, |
| "grad_norm": 15.132969994592672, |
| "learning_rate": 4.410432672650153e-07, |
| "loss": 0.8, |
| "step": 1941 |
| }, |
| { |
| "epoch": 0.6984983364805323, |
| "grad_norm": 28.32415329498456, |
| "learning_rate": 4.4007740119159065e-07, |
| "loss": 0.7684, |
| "step": 1942 |
| }, |
| { |
| "epoch": 0.6988580163654348, |
| "grad_norm": 7.444797986384278, |
| "learning_rate": 4.391122954232882e-07, |
| "loss": 0.7511, |
| "step": 1943 |
| }, |
| { |
| "epoch": 0.6992176962503373, |
| "grad_norm": 10.877249177667075, |
| "learning_rate": 4.3814795127060243e-07, |
| "loss": 0.7996, |
| "step": 1944 |
| }, |
| { |
| "epoch": 0.6995773761352396, |
| "grad_norm": 8.69990505795878, |
| "learning_rate": 4.371843700429917e-07, |
| "loss": 0.8351, |
| "step": 1945 |
| }, |
| { |
| "epoch": 0.6999370560201421, |
| "grad_norm": 9.371009111740484, |
| "learning_rate": 4.362215530488804e-07, |
| "loss": 0.7712, |
| "step": 1946 |
| }, |
| { |
| "epoch": 0.7002967359050445, |
| "grad_norm": 9.552032151923505, |
| "learning_rate": 4.352595015956527e-07, |
| "loss": 0.7947, |
| "step": 1947 |
| }, |
| { |
| "epoch": 0.700656415789947, |
| "grad_norm": 28.270828787029203, |
| "learning_rate": 4.342982169896555e-07, |
| "loss": 0.8124, |
| "step": 1948 |
| }, |
| { |
| "epoch": 0.7010160956748493, |
| "grad_norm": 9.330800789772027, |
| "learning_rate": 4.33337700536193e-07, |
| "loss": 0.8077, |
| "step": 1949 |
| }, |
| { |
| "epoch": 0.7013757755597518, |
| "grad_norm": 13.526816107359819, |
| "learning_rate": 4.323779535395278e-07, |
| "loss": 0.8185, |
| "step": 1950 |
| }, |
| { |
| "epoch": 0.7017354554446542, |
| "grad_norm": 13.426541469641524, |
| "learning_rate": 4.3141897730287535e-07, |
| "loss": 0.7832, |
| "step": 1951 |
| }, |
| { |
| "epoch": 0.7020951353295567, |
| "grad_norm": 12.99847244436818, |
| "learning_rate": 4.304607731284069e-07, |
| "loss": 0.734, |
| "step": 1952 |
| }, |
| { |
| "epoch": 0.7024548152144592, |
| "grad_norm": 22.515219398290462, |
| "learning_rate": 4.295033423172437e-07, |
| "loss": 0.8445, |
| "step": 1953 |
| }, |
| { |
| "epoch": 0.7028144950993616, |
| "grad_norm": 26.97147625270981, |
| "learning_rate": 4.285466861694582e-07, |
| "loss": 0.8348, |
| "step": 1954 |
| }, |
| { |
| "epoch": 0.703174174984264, |
| "grad_norm": 16.924420570758134, |
| "learning_rate": 4.2759080598406984e-07, |
| "loss": 0.8136, |
| "step": 1955 |
| }, |
| { |
| "epoch": 0.7035338548691664, |
| "grad_norm": 10.888168178794485, |
| "learning_rate": 4.2663570305904486e-07, |
| "loss": 0.809, |
| "step": 1956 |
| }, |
| { |
| "epoch": 0.7038935347540689, |
| "grad_norm": 33.89340294150417, |
| "learning_rate": 4.256813786912936e-07, |
| "loss": 0.8494, |
| "step": 1957 |
| }, |
| { |
| "epoch": 0.7042532146389713, |
| "grad_norm": 9.418302266803023, |
| "learning_rate": 4.247278341766705e-07, |
| "loss": 0.7757, |
| "step": 1958 |
| }, |
| { |
| "epoch": 0.7046128945238738, |
| "grad_norm": 16.829275622088073, |
| "learning_rate": 4.2377507080996965e-07, |
| "loss": 0.7698, |
| "step": 1959 |
| }, |
| { |
| "epoch": 0.7049725744087761, |
| "grad_norm": 16.20127709968279, |
| "learning_rate": 4.2282308988492524e-07, |
| "loss": 0.8087, |
| "step": 1960 |
| }, |
| { |
| "epoch": 0.7053322542936786, |
| "grad_norm": 13.413985764534917, |
| "learning_rate": 4.2187189269420807e-07, |
| "loss": 0.7703, |
| "step": 1961 |
| }, |
| { |
| "epoch": 0.7056919341785811, |
| "grad_norm": 9.345601704137358, |
| "learning_rate": 4.209214805294263e-07, |
| "loss": 0.8064, |
| "step": 1962 |
| }, |
| { |
| "epoch": 0.7060516140634835, |
| "grad_norm": 9.650003757320542, |
| "learning_rate": 4.19971854681121e-07, |
| "loss": 0.7875, |
| "step": 1963 |
| }, |
| { |
| "epoch": 0.706411293948386, |
| "grad_norm": 10.834910211171145, |
| "learning_rate": 4.190230164387655e-07, |
| "loss": 0.8344, |
| "step": 1964 |
| }, |
| { |
| "epoch": 0.7067709738332884, |
| "grad_norm": 27.01328289264853, |
| "learning_rate": 4.180749670907637e-07, |
| "loss": 0.794, |
| "step": 1965 |
| }, |
| { |
| "epoch": 0.7071306537181908, |
| "grad_norm": 14.513481418662815, |
| "learning_rate": 4.171277079244492e-07, |
| "loss": 0.843, |
| "step": 1966 |
| }, |
| { |
| "epoch": 0.7074903336030932, |
| "grad_norm": 13.34443731739716, |
| "learning_rate": 4.1618124022608136e-07, |
| "loss": 0.7949, |
| "step": 1967 |
| }, |
| { |
| "epoch": 0.7078500134879957, |
| "grad_norm": 14.408048881960235, |
| "learning_rate": 4.152355652808457e-07, |
| "loss": 0.8316, |
| "step": 1968 |
| }, |
| { |
| "epoch": 0.7082096933728981, |
| "grad_norm": 10.413228637565929, |
| "learning_rate": 4.1429068437285044e-07, |
| "loss": 0.7707, |
| "step": 1969 |
| }, |
| { |
| "epoch": 0.7085693732578006, |
| "grad_norm": 30.08608479735217, |
| "learning_rate": 4.133465987851268e-07, |
| "loss": 0.7963, |
| "step": 1970 |
| }, |
| { |
| "epoch": 0.708929053142703, |
| "grad_norm": 10.085831691124902, |
| "learning_rate": 4.124033097996251e-07, |
| "loss": 0.7691, |
| "step": 1971 |
| }, |
| { |
| "epoch": 0.7092887330276054, |
| "grad_norm": 15.398408177770955, |
| "learning_rate": 4.1146081869721427e-07, |
| "loss": 0.8193, |
| "step": 1972 |
| }, |
| { |
| "epoch": 0.7096484129125079, |
| "grad_norm": 11.82948307422115, |
| "learning_rate": 4.1051912675767966e-07, |
| "loss": 0.7734, |
| "step": 1973 |
| }, |
| { |
| "epoch": 0.7100080927974103, |
| "grad_norm": 15.811539804068559, |
| "learning_rate": 4.0957823525972137e-07, |
| "loss": 0.7838, |
| "step": 1974 |
| }, |
| { |
| "epoch": 0.7103677726823128, |
| "grad_norm": 22.68444207026732, |
| "learning_rate": 4.0863814548095344e-07, |
| "loss": 0.7359, |
| "step": 1975 |
| }, |
| { |
| "epoch": 0.7107274525672151, |
| "grad_norm": 46.43690895790918, |
| "learning_rate": 4.076988586979003e-07, |
| "loss": 0.8163, |
| "step": 1976 |
| }, |
| { |
| "epoch": 0.7110871324521176, |
| "grad_norm": 10.07946020957789, |
| "learning_rate": 4.067603761859965e-07, |
| "loss": 0.7408, |
| "step": 1977 |
| }, |
| { |
| "epoch": 0.71144681233702, |
| "grad_norm": 12.451771689573075, |
| "learning_rate": 4.058226992195838e-07, |
| "loss": 0.811, |
| "step": 1978 |
| }, |
| { |
| "epoch": 0.7118064922219225, |
| "grad_norm": 19.989042214097164, |
| "learning_rate": 4.048858290719115e-07, |
| "loss": 0.8138, |
| "step": 1979 |
| }, |
| { |
| "epoch": 0.712166172106825, |
| "grad_norm": 24.291023591576753, |
| "learning_rate": 4.0394976701513226e-07, |
| "loss": 0.8115, |
| "step": 1980 |
| }, |
| { |
| "epoch": 0.7125258519917274, |
| "grad_norm": 8.736651935734901, |
| "learning_rate": 4.0301451432030156e-07, |
| "loss": 0.7856, |
| "step": 1981 |
| }, |
| { |
| "epoch": 0.7128855318766298, |
| "grad_norm": 22.569035241318087, |
| "learning_rate": 4.0208007225737573e-07, |
| "loss": 0.8013, |
| "step": 1982 |
| }, |
| { |
| "epoch": 0.7132452117615322, |
| "grad_norm": 16.647520932154134, |
| "learning_rate": 4.011464420952114e-07, |
| "loss": 0.7989, |
| "step": 1983 |
| }, |
| { |
| "epoch": 0.7136048916464347, |
| "grad_norm": 73.6227989347106, |
| "learning_rate": 4.0021362510156166e-07, |
| "loss": 0.7922, |
| "step": 1984 |
| }, |
| { |
| "epoch": 0.7139645715313371, |
| "grad_norm": 11.824501604075454, |
| "learning_rate": 3.992816225430757e-07, |
| "loss": 0.7911, |
| "step": 1985 |
| }, |
| { |
| "epoch": 0.7143242514162396, |
| "grad_norm": 48.20878397409259, |
| "learning_rate": 3.9835043568529657e-07, |
| "loss": 0.7653, |
| "step": 1986 |
| }, |
| { |
| "epoch": 0.7146839313011419, |
| "grad_norm": 9.706882923552332, |
| "learning_rate": 3.974200657926606e-07, |
| "loss": 0.7461, |
| "step": 1987 |
| }, |
| { |
| "epoch": 0.7150436111860444, |
| "grad_norm": 16.316800895159567, |
| "learning_rate": 3.96490514128494e-07, |
| "loss": 0.8397, |
| "step": 1988 |
| }, |
| { |
| "epoch": 0.7154032910709469, |
| "grad_norm": 16.864508545034322, |
| "learning_rate": 3.95561781955012e-07, |
| "loss": 0.8423, |
| "step": 1989 |
| }, |
| { |
| "epoch": 0.7157629709558493, |
| "grad_norm": 10.59809473648667, |
| "learning_rate": 3.9463387053331676e-07, |
| "loss": 0.7803, |
| "step": 1990 |
| }, |
| { |
| "epoch": 0.7161226508407518, |
| "grad_norm": 10.822493210906448, |
| "learning_rate": 3.9370678112339716e-07, |
| "loss": 0.7719, |
| "step": 1991 |
| }, |
| { |
| "epoch": 0.7164823307256541, |
| "grad_norm": 8.335943511398538, |
| "learning_rate": 3.9278051498412466e-07, |
| "loss": 0.8375, |
| "step": 1992 |
| }, |
| { |
| "epoch": 0.7168420106105566, |
| "grad_norm": 14.68225712271285, |
| "learning_rate": 3.918550733732535e-07, |
| "loss": 0.85, |
| "step": 1993 |
| }, |
| { |
| "epoch": 0.717201690495459, |
| "grad_norm": 9.854404405078158, |
| "learning_rate": 3.9093045754741747e-07, |
| "loss": 0.8618, |
| "step": 1994 |
| }, |
| { |
| "epoch": 0.7175613703803615, |
| "grad_norm": 18.36600366376659, |
| "learning_rate": 3.900066687621305e-07, |
| "loss": 0.7804, |
| "step": 1995 |
| }, |
| { |
| "epoch": 0.7179210502652639, |
| "grad_norm": 12.991887734967218, |
| "learning_rate": 3.8908370827178216e-07, |
| "loss": 0.7598, |
| "step": 1996 |
| }, |
| { |
| "epoch": 0.7182807301501664, |
| "grad_norm": 22.804948845431067, |
| "learning_rate": 3.8816157732963807e-07, |
| "loss": 0.8132, |
| "step": 1997 |
| }, |
| { |
| "epoch": 0.7186404100350688, |
| "grad_norm": 214.02017904335946, |
| "learning_rate": 3.8724027718783646e-07, |
| "loss": 0.822, |
| "step": 1998 |
| }, |
| { |
| "epoch": 0.7190000899199712, |
| "grad_norm": 7.9949123256405645, |
| "learning_rate": 3.86319809097389e-07, |
| "loss": 0.7757, |
| "step": 1999 |
| }, |
| { |
| "epoch": 0.7193597698048737, |
| "grad_norm": 11.197490836725933, |
| "learning_rate": 3.854001743081764e-07, |
| "loss": 0.8376, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.7197194496897761, |
| "grad_norm": 12.522094083330089, |
| "learning_rate": 3.8448137406894797e-07, |
| "loss": 0.8051, |
| "step": 2001 |
| }, |
| { |
| "epoch": 0.7200791295746786, |
| "grad_norm": 10.149491054895638, |
| "learning_rate": 3.835634096273197e-07, |
| "loss": 0.7669, |
| "step": 2002 |
| }, |
| { |
| "epoch": 0.7204388094595809, |
| "grad_norm": 15.598336809557521, |
| "learning_rate": 3.826462822297736e-07, |
| "loss": 0.848, |
| "step": 2003 |
| }, |
| { |
| "epoch": 0.7207984893444834, |
| "grad_norm": 17.354478744327977, |
| "learning_rate": 3.8172999312165367e-07, |
| "loss": 0.7534, |
| "step": 2004 |
| }, |
| { |
| "epoch": 0.7211581692293858, |
| "grad_norm": 16.184795703538978, |
| "learning_rate": 3.8081454354716734e-07, |
| "loss": 0.7438, |
| "step": 2005 |
| }, |
| { |
| "epoch": 0.7215178491142883, |
| "grad_norm": 12.173927326851151, |
| "learning_rate": 3.798999347493799e-07, |
| "loss": 0.8225, |
| "step": 2006 |
| }, |
| { |
| "epoch": 0.7218775289991907, |
| "grad_norm": 171.9320034029074, |
| "learning_rate": 3.789861679702169e-07, |
| "loss": 0.7795, |
| "step": 2007 |
| }, |
| { |
| "epoch": 0.7222372088840932, |
| "grad_norm": 7.948351057381663, |
| "learning_rate": 3.780732444504592e-07, |
| "loss": 0.8606, |
| "step": 2008 |
| }, |
| { |
| "epoch": 0.7225968887689956, |
| "grad_norm": 22.116562347064672, |
| "learning_rate": 3.771611654297443e-07, |
| "loss": 0.903, |
| "step": 2009 |
| }, |
| { |
| "epoch": 0.722956568653898, |
| "grad_norm": 10.512462282058872, |
| "learning_rate": 3.7624993214656043e-07, |
| "loss": 0.8358, |
| "step": 2010 |
| }, |
| { |
| "epoch": 0.7233162485388005, |
| "grad_norm": 23.075159073089754, |
| "learning_rate": 3.7533954583824976e-07, |
| "loss": 0.8763, |
| "step": 2011 |
| }, |
| { |
| "epoch": 0.7236759284237029, |
| "grad_norm": 8.953974994412894, |
| "learning_rate": 3.74430007741003e-07, |
| "loss": 0.7403, |
| "step": 2012 |
| }, |
| { |
| "epoch": 0.7240356083086054, |
| "grad_norm": 12.80624775766204, |
| "learning_rate": 3.735213190898604e-07, |
| "loss": 0.8046, |
| "step": 2013 |
| }, |
| { |
| "epoch": 0.7243952881935077, |
| "grad_norm": 8.476228291194738, |
| "learning_rate": 3.726134811187066e-07, |
| "loss": 0.788, |
| "step": 2014 |
| }, |
| { |
| "epoch": 0.7247549680784102, |
| "grad_norm": 16.157490955268806, |
| "learning_rate": 3.717064950602736e-07, |
| "loss": 0.8756, |
| "step": 2015 |
| }, |
| { |
| "epoch": 0.7251146479633126, |
| "grad_norm": 11.908040661691748, |
| "learning_rate": 3.708003621461346e-07, |
| "loss": 0.8009, |
| "step": 2016 |
| }, |
| { |
| "epoch": 0.7254743278482151, |
| "grad_norm": 14.59810362091746, |
| "learning_rate": 3.698950836067064e-07, |
| "loss": 0.8144, |
| "step": 2017 |
| }, |
| { |
| "epoch": 0.7258340077331176, |
| "grad_norm": 17.554682865657345, |
| "learning_rate": 3.6899066067124284e-07, |
| "loss": 0.8152, |
| "step": 2018 |
| }, |
| { |
| "epoch": 0.72619368761802, |
| "grad_norm": 21.24521103903718, |
| "learning_rate": 3.680870945678388e-07, |
| "loss": 0.8427, |
| "step": 2019 |
| }, |
| { |
| "epoch": 0.7265533675029224, |
| "grad_norm": 12.355234140259519, |
| "learning_rate": 3.6718438652342376e-07, |
| "loss": 0.8953, |
| "step": 2020 |
| }, |
| { |
| "epoch": 0.7269130473878248, |
| "grad_norm": 9.99757087967759, |
| "learning_rate": 3.6628253776376375e-07, |
| "loss": 0.8058, |
| "step": 2021 |
| }, |
| { |
| "epoch": 0.7272727272727273, |
| "grad_norm": 9.422636518937265, |
| "learning_rate": 3.6538154951345566e-07, |
| "loss": 0.754, |
| "step": 2022 |
| }, |
| { |
| "epoch": 0.7276324071576297, |
| "grad_norm": 15.509519030816742, |
| "learning_rate": 3.644814229959302e-07, |
| "loss": 0.8216, |
| "step": 2023 |
| }, |
| { |
| "epoch": 0.7279920870425322, |
| "grad_norm": 8.274006258533728, |
| "learning_rate": 3.635821594334466e-07, |
| "loss": 0.7768, |
| "step": 2024 |
| }, |
| { |
| "epoch": 0.7283517669274345, |
| "grad_norm": 13.558731403121147, |
| "learning_rate": 3.6268376004709344e-07, |
| "loss": 0.7722, |
| "step": 2025 |
| }, |
| { |
| "epoch": 0.728711446812337, |
| "grad_norm": 7.813752032299304, |
| "learning_rate": 3.61786226056784e-07, |
| "loss": 0.8221, |
| "step": 2026 |
| }, |
| { |
| "epoch": 0.7290711266972395, |
| "grad_norm": 15.095957653688293, |
| "learning_rate": 3.608895586812586e-07, |
| "loss": 0.8057, |
| "step": 2027 |
| }, |
| { |
| "epoch": 0.7294308065821419, |
| "grad_norm": 17.28853822691677, |
| "learning_rate": 3.5999375913807904e-07, |
| "loss": 0.8369, |
| "step": 2028 |
| }, |
| { |
| "epoch": 0.7297904864670444, |
| "grad_norm": 15.788250879298607, |
| "learning_rate": 3.590988286436302e-07, |
| "loss": 0.8234, |
| "step": 2029 |
| }, |
| { |
| "epoch": 0.7301501663519467, |
| "grad_norm": 59.285793453033286, |
| "learning_rate": 3.5820476841311586e-07, |
| "loss": 0.8256, |
| "step": 2030 |
| }, |
| { |
| "epoch": 0.7305098462368492, |
| "grad_norm": 9.799300739333693, |
| "learning_rate": 3.5731157966055835e-07, |
| "loss": 0.8144, |
| "step": 2031 |
| }, |
| { |
| "epoch": 0.7308695261217516, |
| "grad_norm": 14.02110369680676, |
| "learning_rate": 3.564192635987966e-07, |
| "loss": 0.8119, |
| "step": 2032 |
| }, |
| { |
| "epoch": 0.7312292060066541, |
| "grad_norm": 11.044167830136333, |
| "learning_rate": 3.55527821439485e-07, |
| "loss": 0.784, |
| "step": 2033 |
| }, |
| { |
| "epoch": 0.7315888858915565, |
| "grad_norm": 11.212578833923999, |
| "learning_rate": 3.546372543930908e-07, |
| "loss": 0.703, |
| "step": 2034 |
| }, |
| { |
| "epoch": 0.731948565776459, |
| "grad_norm": 10.179725377907866, |
| "learning_rate": 3.537475636688929e-07, |
| "loss": 0.8103, |
| "step": 2035 |
| }, |
| { |
| "epoch": 0.7323082456613614, |
| "grad_norm": 9.140277108662909, |
| "learning_rate": 3.5285875047498073e-07, |
| "loss": 0.8338, |
| "step": 2036 |
| }, |
| { |
| "epoch": 0.7326679255462638, |
| "grad_norm": 15.783836317379054, |
| "learning_rate": 3.519708160182513e-07, |
| "loss": 0.7391, |
| "step": 2037 |
| }, |
| { |
| "epoch": 0.7330276054311663, |
| "grad_norm": 12.170788311536235, |
| "learning_rate": 3.510837615044097e-07, |
| "loss": 0.7683, |
| "step": 2038 |
| }, |
| { |
| "epoch": 0.7333872853160687, |
| "grad_norm": 10.937526810183094, |
| "learning_rate": 3.501975881379651e-07, |
| "loss": 0.7547, |
| "step": 2039 |
| }, |
| { |
| "epoch": 0.7337469652009712, |
| "grad_norm": 19.054117416753936, |
| "learning_rate": 3.493122971222304e-07, |
| "loss": 0.7536, |
| "step": 2040 |
| }, |
| { |
| "epoch": 0.7341066450858735, |
| "grad_norm": 24.43819899317763, |
| "learning_rate": 3.4842788965932036e-07, |
| "loss": 0.8195, |
| "step": 2041 |
| }, |
| { |
| "epoch": 0.734466324970776, |
| "grad_norm": 13.830521845361822, |
| "learning_rate": 3.4754436695015075e-07, |
| "loss": 0.8489, |
| "step": 2042 |
| }, |
| { |
| "epoch": 0.7348260048556784, |
| "grad_norm": 7.155680001752106, |
| "learning_rate": 3.466617301944348e-07, |
| "loss": 0.8186, |
| "step": 2043 |
| }, |
| { |
| "epoch": 0.7351856847405809, |
| "grad_norm": 7.494449154337195, |
| "learning_rate": 3.4577998059068345e-07, |
| "loss": 0.8351, |
| "step": 2044 |
| }, |
| { |
| "epoch": 0.7355453646254834, |
| "grad_norm": 9.533522287478453, |
| "learning_rate": 3.448991193362024e-07, |
| "loss": 0.8353, |
| "step": 2045 |
| }, |
| { |
| "epoch": 0.7359050445103857, |
| "grad_norm": 31.22646722400908, |
| "learning_rate": 3.4401914762709217e-07, |
| "loss": 0.6916, |
| "step": 2046 |
| }, |
| { |
| "epoch": 0.7362647243952882, |
| "grad_norm": 14.097405364781384, |
| "learning_rate": 3.4314006665824425e-07, |
| "loss": 0.811, |
| "step": 2047 |
| }, |
| { |
| "epoch": 0.7366244042801906, |
| "grad_norm": 40.90354140997975, |
| "learning_rate": 3.4226187762334126e-07, |
| "loss": 0.8321, |
| "step": 2048 |
| }, |
| { |
| "epoch": 0.7369840841650931, |
| "grad_norm": 20.388688863695087, |
| "learning_rate": 3.41384581714854e-07, |
| "loss": 0.8219, |
| "step": 2049 |
| }, |
| { |
| "epoch": 0.7373437640499955, |
| "grad_norm": 9.039715730630718, |
| "learning_rate": 3.405081801240416e-07, |
| "loss": 0.7441, |
| "step": 2050 |
| }, |
| { |
| "epoch": 0.737703443934898, |
| "grad_norm": 10.662421431616126, |
| "learning_rate": 3.396326740409481e-07, |
| "loss": 0.7736, |
| "step": 2051 |
| }, |
| { |
| "epoch": 0.7380631238198003, |
| "grad_norm": 13.947146247653457, |
| "learning_rate": 3.3875806465440147e-07, |
| "loss": 0.7789, |
| "step": 2052 |
| }, |
| { |
| "epoch": 0.7384228037047028, |
| "grad_norm": 11.653362956755682, |
| "learning_rate": 3.3788435315201215e-07, |
| "loss": 0.6684, |
| "step": 2053 |
| }, |
| { |
| "epoch": 0.7387824835896053, |
| "grad_norm": 10.52905110701649, |
| "learning_rate": 3.3701154072017235e-07, |
| "loss": 0.877, |
| "step": 2054 |
| }, |
| { |
| "epoch": 0.7391421634745077, |
| "grad_norm": 16.581261902849825, |
| "learning_rate": 3.361396285440513e-07, |
| "loss": 0.7918, |
| "step": 2055 |
| }, |
| { |
| "epoch": 0.7395018433594102, |
| "grad_norm": 16.753947709914218, |
| "learning_rate": 3.352686178075981e-07, |
| "loss": 0.8152, |
| "step": 2056 |
| }, |
| { |
| "epoch": 0.7398615232443125, |
| "grad_norm": 18.821758509696345, |
| "learning_rate": 3.343985096935361e-07, |
| "loss": 0.8248, |
| "step": 2057 |
| }, |
| { |
| "epoch": 0.740221203129215, |
| "grad_norm": 12.276146780111988, |
| "learning_rate": 3.3352930538336443e-07, |
| "loss": 0.7722, |
| "step": 2058 |
| }, |
| { |
| "epoch": 0.7405808830141174, |
| "grad_norm": 19.90004147321965, |
| "learning_rate": 3.3266100605735394e-07, |
| "loss": 0.8282, |
| "step": 2059 |
| }, |
| { |
| "epoch": 0.7409405628990199, |
| "grad_norm": 9.834270848478702, |
| "learning_rate": 3.317936128945469e-07, |
| "loss": 0.8405, |
| "step": 2060 |
| }, |
| { |
| "epoch": 0.7413002427839223, |
| "grad_norm": 17.338370497758397, |
| "learning_rate": 3.309271270727546e-07, |
| "loss": 0.7359, |
| "step": 2061 |
| }, |
| { |
| "epoch": 0.7416599226688247, |
| "grad_norm": 9.741758752743277, |
| "learning_rate": 3.300615497685578e-07, |
| "loss": 0.7996, |
| "step": 2062 |
| }, |
| { |
| "epoch": 0.7420196025537272, |
| "grad_norm": 31.504247815988247, |
| "learning_rate": 3.2919688215730225e-07, |
| "loss": 0.7738, |
| "step": 2063 |
| }, |
| { |
| "epoch": 0.7423792824386296, |
| "grad_norm": 9.4546055919221, |
| "learning_rate": 3.2833312541309864e-07, |
| "loss": 0.8385, |
| "step": 2064 |
| }, |
| { |
| "epoch": 0.7427389623235321, |
| "grad_norm": 9.80680314535367, |
| "learning_rate": 3.2747028070882074e-07, |
| "loss": 0.7487, |
| "step": 2065 |
| }, |
| { |
| "epoch": 0.7430986422084345, |
| "grad_norm": 9.542150367120579, |
| "learning_rate": 3.266083492161049e-07, |
| "loss": 0.7829, |
| "step": 2066 |
| }, |
| { |
| "epoch": 0.743458322093337, |
| "grad_norm": 20.31567157031523, |
| "learning_rate": 3.257473321053463e-07, |
| "loss": 0.7968, |
| "step": 2067 |
| }, |
| { |
| "epoch": 0.7438180019782393, |
| "grad_norm": 13.837825732503578, |
| "learning_rate": 3.24887230545699e-07, |
| "loss": 0.9057, |
| "step": 2068 |
| }, |
| { |
| "epoch": 0.7441776818631418, |
| "grad_norm": 25.850371390444145, |
| "learning_rate": 3.2402804570507316e-07, |
| "loss": 0.7896, |
| "step": 2069 |
| }, |
| { |
| "epoch": 0.7445373617480442, |
| "grad_norm": 12.56587736916352, |
| "learning_rate": 3.2316977875013565e-07, |
| "loss": 0.7503, |
| "step": 2070 |
| }, |
| { |
| "epoch": 0.7448970416329467, |
| "grad_norm": 23.9209488111018, |
| "learning_rate": 3.2231243084630567e-07, |
| "loss": 0.8051, |
| "step": 2071 |
| }, |
| { |
| "epoch": 0.7452567215178492, |
| "grad_norm": 15.065538782610378, |
| "learning_rate": 3.214560031577548e-07, |
| "loss": 0.8623, |
| "step": 2072 |
| }, |
| { |
| "epoch": 0.7456164014027515, |
| "grad_norm": 94.90024186443381, |
| "learning_rate": 3.2060049684740474e-07, |
| "loss": 0.7944, |
| "step": 2073 |
| }, |
| { |
| "epoch": 0.745976081287654, |
| "grad_norm": 8.949350221122438, |
| "learning_rate": 3.197459130769272e-07, |
| "loss": 0.9138, |
| "step": 2074 |
| }, |
| { |
| "epoch": 0.7463357611725564, |
| "grad_norm": 7.665554311410304, |
| "learning_rate": 3.1889225300674014e-07, |
| "loss": 0.8285, |
| "step": 2075 |
| }, |
| { |
| "epoch": 0.7466954410574589, |
| "grad_norm": 8.039592238769972, |
| "learning_rate": 3.180395177960077e-07, |
| "loss": 0.8628, |
| "step": 2076 |
| }, |
| { |
| "epoch": 0.7470551209423613, |
| "grad_norm": 46.5255094528435, |
| "learning_rate": 3.171877086026374e-07, |
| "loss": 0.8002, |
| "step": 2077 |
| }, |
| { |
| "epoch": 0.7474148008272637, |
| "grad_norm": 11.212560508497486, |
| "learning_rate": 3.163368265832809e-07, |
| "loss": 0.8214, |
| "step": 2078 |
| }, |
| { |
| "epoch": 0.7477744807121661, |
| "grad_norm": 25.645983032084697, |
| "learning_rate": 3.1548687289332955e-07, |
| "loss": 0.7532, |
| "step": 2079 |
| }, |
| { |
| "epoch": 0.7481341605970686, |
| "grad_norm": 19.823630319451564, |
| "learning_rate": 3.1463784868691454e-07, |
| "loss": 0.7899, |
| "step": 2080 |
| }, |
| { |
| "epoch": 0.7484938404819711, |
| "grad_norm": 14.78470146418017, |
| "learning_rate": 3.1378975511690465e-07, |
| "loss": 0.7984, |
| "step": 2081 |
| }, |
| { |
| "epoch": 0.7488535203668735, |
| "grad_norm": 34.461545612303475, |
| "learning_rate": 3.129425933349059e-07, |
| "loss": 0.8304, |
| "step": 2082 |
| }, |
| { |
| "epoch": 0.749213200251776, |
| "grad_norm": 11.61532578962588, |
| "learning_rate": 3.1209636449125787e-07, |
| "loss": 0.8576, |
| "step": 2083 |
| }, |
| { |
| "epoch": 0.7495728801366783, |
| "grad_norm": 12.4044011444979, |
| "learning_rate": 3.112510697350348e-07, |
| "loss": 0.7523, |
| "step": 2084 |
| }, |
| { |
| "epoch": 0.7499325600215808, |
| "grad_norm": 9.357511728174131, |
| "learning_rate": 3.104067102140404e-07, |
| "loss": 0.8337, |
| "step": 2085 |
| }, |
| { |
| "epoch": 0.7502922399064832, |
| "grad_norm": 12.16077722373795, |
| "learning_rate": 3.095632870748105e-07, |
| "loss": 0.7571, |
| "step": 2086 |
| }, |
| { |
| "epoch": 0.7506519197913857, |
| "grad_norm": 13.638246920777654, |
| "learning_rate": 3.087208014626081e-07, |
| "loss": 0.7929, |
| "step": 2087 |
| }, |
| { |
| "epoch": 0.751011599676288, |
| "grad_norm": 17.680869612047026, |
| "learning_rate": 3.078792545214247e-07, |
| "loss": 0.7412, |
| "step": 2088 |
| }, |
| { |
| "epoch": 0.7513712795611905, |
| "grad_norm": 14.24665972454809, |
| "learning_rate": 3.0703864739397487e-07, |
| "loss": 0.8377, |
| "step": 2089 |
| }, |
| { |
| "epoch": 0.751730959446093, |
| "grad_norm": 45.522628663774555, |
| "learning_rate": 3.061989812216994e-07, |
| "loss": 0.8219, |
| "step": 2090 |
| }, |
| { |
| "epoch": 0.7520906393309954, |
| "grad_norm": 13.222103675700293, |
| "learning_rate": 3.053602571447594e-07, |
| "loss": 0.8223, |
| "step": 2091 |
| }, |
| { |
| "epoch": 0.7524503192158979, |
| "grad_norm": 15.175365572068145, |
| "learning_rate": 3.04522476302039e-07, |
| "loss": 0.7151, |
| "step": 2092 |
| }, |
| { |
| "epoch": 0.7528099991008003, |
| "grad_norm": 17.72918727027556, |
| "learning_rate": 3.036856398311386e-07, |
| "loss": 0.7974, |
| "step": 2093 |
| }, |
| { |
| "epoch": 0.7531696789857028, |
| "grad_norm": 15.427939599509612, |
| "learning_rate": 3.02849748868379e-07, |
| "loss": 0.7558, |
| "step": 2094 |
| }, |
| { |
| "epoch": 0.7535293588706051, |
| "grad_norm": 12.664430894009977, |
| "learning_rate": 3.0201480454879524e-07, |
| "loss": 0.8245, |
| "step": 2095 |
| }, |
| { |
| "epoch": 0.7538890387555076, |
| "grad_norm": 11.062996843287921, |
| "learning_rate": 3.011808080061387e-07, |
| "loss": 0.8098, |
| "step": 2096 |
| }, |
| { |
| "epoch": 0.75424871864041, |
| "grad_norm": 18.255148000865006, |
| "learning_rate": 3.0034776037287146e-07, |
| "loss": 0.7799, |
| "step": 2097 |
| }, |
| { |
| "epoch": 0.7546083985253125, |
| "grad_norm": 8.101439238334796, |
| "learning_rate": 2.995156627801694e-07, |
| "loss": 0.8066, |
| "step": 2098 |
| }, |
| { |
| "epoch": 0.754968078410215, |
| "grad_norm": 13.523471148927442, |
| "learning_rate": 2.9868451635791705e-07, |
| "loss": 0.768, |
| "step": 2099 |
| }, |
| { |
| "epoch": 0.7553277582951173, |
| "grad_norm": 13.530099368833888, |
| "learning_rate": 2.9785432223470753e-07, |
| "loss": 0.8117, |
| "step": 2100 |
| }, |
| { |
| "epoch": 0.7556874381800198, |
| "grad_norm": 19.13140506976365, |
| "learning_rate": 2.970250815378409e-07, |
| "loss": 0.87, |
| "step": 2101 |
| }, |
| { |
| "epoch": 0.7560471180649222, |
| "grad_norm": 18.17155476521602, |
| "learning_rate": 2.9619679539332334e-07, |
| "loss": 0.7897, |
| "step": 2102 |
| }, |
| { |
| "epoch": 0.7564067979498247, |
| "grad_norm": 12.123078364836363, |
| "learning_rate": 2.953694649258638e-07, |
| "loss": 0.7877, |
| "step": 2103 |
| }, |
| { |
| "epoch": 0.7567664778347271, |
| "grad_norm": 7.608292682104856, |
| "learning_rate": 2.94543091258874e-07, |
| "loss": 0.7465, |
| "step": 2104 |
| }, |
| { |
| "epoch": 0.7571261577196295, |
| "grad_norm": 12.975406694520702, |
| "learning_rate": 2.9371767551446616e-07, |
| "loss": 0.7985, |
| "step": 2105 |
| }, |
| { |
| "epoch": 0.7574858376045319, |
| "grad_norm": 19.27946366676412, |
| "learning_rate": 2.9289321881345254e-07, |
| "loss": 0.811, |
| "step": 2106 |
| }, |
| { |
| "epoch": 0.7578455174894344, |
| "grad_norm": 46.025272332147345, |
| "learning_rate": 2.9206972227534234e-07, |
| "loss": 0.8429, |
| "step": 2107 |
| }, |
| { |
| "epoch": 0.7582051973743369, |
| "grad_norm": 7.682745343898326, |
| "learning_rate": 2.9124718701834105e-07, |
| "loss": 0.8021, |
| "step": 2108 |
| }, |
| { |
| "epoch": 0.7585648772592393, |
| "grad_norm": 10.627958729450935, |
| "learning_rate": 2.904256141593495e-07, |
| "loss": 0.8382, |
| "step": 2109 |
| }, |
| { |
| "epoch": 0.7589245571441418, |
| "grad_norm": 9.795786264443564, |
| "learning_rate": 2.896050048139611e-07, |
| "loss": 0.8071, |
| "step": 2110 |
| }, |
| { |
| "epoch": 0.7592842370290441, |
| "grad_norm": 18.768381546725443, |
| "learning_rate": 2.8878536009646105e-07, |
| "loss": 0.8058, |
| "step": 2111 |
| }, |
| { |
| "epoch": 0.7596439169139466, |
| "grad_norm": 18.423993695523897, |
| "learning_rate": 2.879666811198244e-07, |
| "loss": 0.8213, |
| "step": 2112 |
| }, |
| { |
| "epoch": 0.760003596798849, |
| "grad_norm": 11.02320646000752, |
| "learning_rate": 2.871489689957157e-07, |
| "loss": 0.8092, |
| "step": 2113 |
| }, |
| { |
| "epoch": 0.7603632766837515, |
| "grad_norm": 10.25284067769936, |
| "learning_rate": 2.863322248344862e-07, |
| "loss": 0.8196, |
| "step": 2114 |
| }, |
| { |
| "epoch": 0.7607229565686539, |
| "grad_norm": 8.574507521038404, |
| "learning_rate": 2.8551644974517233e-07, |
| "loss": 0.7875, |
| "step": 2115 |
| }, |
| { |
| "epoch": 0.7610826364535563, |
| "grad_norm": 69.39568915406147, |
| "learning_rate": 2.8470164483549475e-07, |
| "loss": 0.8059, |
| "step": 2116 |
| }, |
| { |
| "epoch": 0.7614423163384588, |
| "grad_norm": 9.438779925806173, |
| "learning_rate": 2.838878112118581e-07, |
| "loss": 0.7672, |
| "step": 2117 |
| }, |
| { |
| "epoch": 0.7618019962233612, |
| "grad_norm": 9.601180518139573, |
| "learning_rate": 2.8307494997934575e-07, |
| "loss": 0.8064, |
| "step": 2118 |
| }, |
| { |
| "epoch": 0.7621616761082637, |
| "grad_norm": 15.191257494193449, |
| "learning_rate": 2.822630622417228e-07, |
| "loss": 0.7851, |
| "step": 2119 |
| }, |
| { |
| "epoch": 0.7625213559931661, |
| "grad_norm": 10.74841922116242, |
| "learning_rate": 2.814521491014312e-07, |
| "loss": 0.837, |
| "step": 2120 |
| }, |
| { |
| "epoch": 0.7628810358780685, |
| "grad_norm": 11.650209232181394, |
| "learning_rate": 2.806422116595907e-07, |
| "loss": 0.8205, |
| "step": 2121 |
| }, |
| { |
| "epoch": 0.7632407157629709, |
| "grad_norm": 31.29876607629954, |
| "learning_rate": 2.798332510159942e-07, |
| "loss": 0.7754, |
| "step": 2122 |
| }, |
| { |
| "epoch": 0.7636003956478734, |
| "grad_norm": 12.216849267684562, |
| "learning_rate": 2.7902526826911054e-07, |
| "loss": 0.8268, |
| "step": 2123 |
| }, |
| { |
| "epoch": 0.7639600755327758, |
| "grad_norm": 8.36986123454882, |
| "learning_rate": 2.7821826451607887e-07, |
| "loss": 0.82, |
| "step": 2124 |
| }, |
| { |
| "epoch": 0.7643197554176783, |
| "grad_norm": 9.4368882957973, |
| "learning_rate": 2.7741224085271063e-07, |
| "loss": 0.7585, |
| "step": 2125 |
| }, |
| { |
| "epoch": 0.7646794353025808, |
| "grad_norm": 35.56391519578202, |
| "learning_rate": 2.7660719837348445e-07, |
| "loss": 0.7725, |
| "step": 2126 |
| }, |
| { |
| "epoch": 0.7650391151874831, |
| "grad_norm": 26.46363386044735, |
| "learning_rate": 2.7580313817154844e-07, |
| "loss": 0.8388, |
| "step": 2127 |
| }, |
| { |
| "epoch": 0.7653987950723856, |
| "grad_norm": 8.769898467051506, |
| "learning_rate": 2.750000613387157e-07, |
| "loss": 0.7511, |
| "step": 2128 |
| }, |
| { |
| "epoch": 0.765758474957288, |
| "grad_norm": 11.442060171791937, |
| "learning_rate": 2.741979689654653e-07, |
| "loss": 0.7838, |
| "step": 2129 |
| }, |
| { |
| "epoch": 0.7661181548421905, |
| "grad_norm": 45.76135894576074, |
| "learning_rate": 2.733968621409377e-07, |
| "loss": 0.8819, |
| "step": 2130 |
| }, |
| { |
| "epoch": 0.7664778347270929, |
| "grad_norm": 19.029392095367616, |
| "learning_rate": 2.725967419529369e-07, |
| "loss": 0.8424, |
| "step": 2131 |
| }, |
| { |
| "epoch": 0.7668375146119953, |
| "grad_norm": 37.74042828997578, |
| "learning_rate": 2.7179760948792596e-07, |
| "loss": 0.8091, |
| "step": 2132 |
| }, |
| { |
| "epoch": 0.7671971944968977, |
| "grad_norm": 24.68003304528056, |
| "learning_rate": 2.709994658310276e-07, |
| "loss": 0.8521, |
| "step": 2133 |
| }, |
| { |
| "epoch": 0.7675568743818002, |
| "grad_norm": 60.917685417016344, |
| "learning_rate": 2.702023120660213e-07, |
| "loss": 0.8373, |
| "step": 2134 |
| }, |
| { |
| "epoch": 0.7679165542667027, |
| "grad_norm": 13.285572185374493, |
| "learning_rate": 2.6940614927534255e-07, |
| "loss": 0.8285, |
| "step": 2135 |
| }, |
| { |
| "epoch": 0.7682762341516051, |
| "grad_norm": 13.95750492177659, |
| "learning_rate": 2.686109785400809e-07, |
| "loss": 0.8398, |
| "step": 2136 |
| }, |
| { |
| "epoch": 0.7686359140365076, |
| "grad_norm": 28.79794073125316, |
| "learning_rate": 2.678168009399796e-07, |
| "loss": 0.7885, |
| "step": 2137 |
| }, |
| { |
| "epoch": 0.7689955939214099, |
| "grad_norm": 12.483715319878124, |
| "learning_rate": 2.670236175534327e-07, |
| "loss": 0.8527, |
| "step": 2138 |
| }, |
| { |
| "epoch": 0.7693552738063124, |
| "grad_norm": 27.28468354299244, |
| "learning_rate": 2.6623142945748447e-07, |
| "loss": 0.8472, |
| "step": 2139 |
| }, |
| { |
| "epoch": 0.7697149536912148, |
| "grad_norm": 8.837428554019429, |
| "learning_rate": 2.654402377278273e-07, |
| "loss": 0.7795, |
| "step": 2140 |
| }, |
| { |
| "epoch": 0.7700746335761173, |
| "grad_norm": 13.568315862204315, |
| "learning_rate": 2.646500434388015e-07, |
| "loss": 0.7786, |
| "step": 2141 |
| }, |
| { |
| "epoch": 0.7704343134610196, |
| "grad_norm": 11.54364558142577, |
| "learning_rate": 2.638608476633921e-07, |
| "loss": 0.7903, |
| "step": 2142 |
| }, |
| { |
| "epoch": 0.7707939933459221, |
| "grad_norm": 15.35010777649344, |
| "learning_rate": 2.6307265147322886e-07, |
| "loss": 0.7506, |
| "step": 2143 |
| }, |
| { |
| "epoch": 0.7711536732308246, |
| "grad_norm": 12.44341166425653, |
| "learning_rate": 2.6228545593858353e-07, |
| "loss": 0.7809, |
| "step": 2144 |
| }, |
| { |
| "epoch": 0.771513353115727, |
| "grad_norm": 9.849527159029504, |
| "learning_rate": 2.6149926212837015e-07, |
| "loss": 0.8182, |
| "step": 2145 |
| }, |
| { |
| "epoch": 0.7718730330006295, |
| "grad_norm": 17.311250410971674, |
| "learning_rate": 2.6071407111014177e-07, |
| "loss": 0.7556, |
| "step": 2146 |
| }, |
| { |
| "epoch": 0.7722327128855319, |
| "grad_norm": 7.433177146386238, |
| "learning_rate": 2.599298839500899e-07, |
| "loss": 0.7906, |
| "step": 2147 |
| }, |
| { |
| "epoch": 0.7725923927704343, |
| "grad_norm": 9.368131404282552, |
| "learning_rate": 2.5914670171304254e-07, |
| "loss": 0.846, |
| "step": 2148 |
| }, |
| { |
| "epoch": 0.7729520726553367, |
| "grad_norm": 27.941836286050815, |
| "learning_rate": 2.5836452546246447e-07, |
| "loss": 0.7627, |
| "step": 2149 |
| }, |
| { |
| "epoch": 0.7733117525402392, |
| "grad_norm": 9.126302607974916, |
| "learning_rate": 2.57583356260453e-07, |
| "loss": 0.7827, |
| "step": 2150 |
| }, |
| { |
| "epoch": 0.7736714324251416, |
| "grad_norm": 8.83382897893599, |
| "learning_rate": 2.5680319516773885e-07, |
| "loss": 0.8658, |
| "step": 2151 |
| }, |
| { |
| "epoch": 0.7740311123100441, |
| "grad_norm": 7.5971246576903395, |
| "learning_rate": 2.560240432436831e-07, |
| "loss": 0.8016, |
| "step": 2152 |
| }, |
| { |
| "epoch": 0.7743907921949464, |
| "grad_norm": 22.060343200505887, |
| "learning_rate": 2.5524590154627756e-07, |
| "loss": 0.8256, |
| "step": 2153 |
| }, |
| { |
| "epoch": 0.7747504720798489, |
| "grad_norm": 12.317280851375296, |
| "learning_rate": 2.544687711321415e-07, |
| "loss": 0.7787, |
| "step": 2154 |
| }, |
| { |
| "epoch": 0.7751101519647514, |
| "grad_norm": 97.31961999779651, |
| "learning_rate": 2.536926530565211e-07, |
| "loss": 0.8061, |
| "step": 2155 |
| }, |
| { |
| "epoch": 0.7754698318496538, |
| "grad_norm": 15.478890567396736, |
| "learning_rate": 2.529175483732878e-07, |
| "loss": 0.7684, |
| "step": 2156 |
| }, |
| { |
| "epoch": 0.7758295117345563, |
| "grad_norm": 30.21366282134613, |
| "learning_rate": 2.521434581349378e-07, |
| "loss": 0.8361, |
| "step": 2157 |
| }, |
| { |
| "epoch": 0.7761891916194587, |
| "grad_norm": 10.59332738328557, |
| "learning_rate": 2.513703833925883e-07, |
| "loss": 0.7736, |
| "step": 2158 |
| }, |
| { |
| "epoch": 0.7765488715043611, |
| "grad_norm": 17.7406623566783, |
| "learning_rate": 2.505983251959798e-07, |
| "loss": 0.8598, |
| "step": 2159 |
| }, |
| { |
| "epoch": 0.7769085513892635, |
| "grad_norm": 13.111518092783921, |
| "learning_rate": 2.4982728459346967e-07, |
| "loss": 0.7786, |
| "step": 2160 |
| }, |
| { |
| "epoch": 0.777268231274166, |
| "grad_norm": 15.285683355007304, |
| "learning_rate": 2.4905726263203587e-07, |
| "loss": 0.7385, |
| "step": 2161 |
| }, |
| { |
| "epoch": 0.7776279111590684, |
| "grad_norm": 11.992210244588739, |
| "learning_rate": 2.4828826035727214e-07, |
| "loss": 0.7321, |
| "step": 2162 |
| }, |
| { |
| "epoch": 0.7779875910439709, |
| "grad_norm": 16.229451043839767, |
| "learning_rate": 2.4752027881338757e-07, |
| "loss": 0.798, |
| "step": 2163 |
| }, |
| { |
| "epoch": 0.7783472709288733, |
| "grad_norm": 13.289808587733184, |
| "learning_rate": 2.467533190432053e-07, |
| "loss": 0.8036, |
| "step": 2164 |
| }, |
| { |
| "epoch": 0.7787069508137757, |
| "grad_norm": 8.035681458730258, |
| "learning_rate": 2.459873820881615e-07, |
| "loss": 0.8259, |
| "step": 2165 |
| }, |
| { |
| "epoch": 0.7790666306986782, |
| "grad_norm": 16.319641097250493, |
| "learning_rate": 2.4522246898830304e-07, |
| "loss": 0.8338, |
| "step": 2166 |
| }, |
| { |
| "epoch": 0.7794263105835806, |
| "grad_norm": 11.834868911534926, |
| "learning_rate": 2.4445858078228643e-07, |
| "loss": 0.7462, |
| "step": 2167 |
| }, |
| { |
| "epoch": 0.7797859904684831, |
| "grad_norm": 12.69739034024986, |
| "learning_rate": 2.4369571850737657e-07, |
| "loss": 0.8008, |
| "step": 2168 |
| }, |
| { |
| "epoch": 0.7801456703533854, |
| "grad_norm": 16.92832348270919, |
| "learning_rate": 2.4293388319944574e-07, |
| "loss": 0.8546, |
| "step": 2169 |
| }, |
| { |
| "epoch": 0.7805053502382879, |
| "grad_norm": 15.34914146252241, |
| "learning_rate": 2.421730758929713e-07, |
| "loss": 0.8025, |
| "step": 2170 |
| }, |
| { |
| "epoch": 0.7808650301231903, |
| "grad_norm": 13.15048232145177, |
| "learning_rate": 2.414132976210346e-07, |
| "loss": 0.8324, |
| "step": 2171 |
| }, |
| { |
| "epoch": 0.7812247100080928, |
| "grad_norm": 15.702733431224983, |
| "learning_rate": 2.406545494153196e-07, |
| "loss": 0.8452, |
| "step": 2172 |
| }, |
| { |
| "epoch": 0.7815843898929953, |
| "grad_norm": 27.901819642019326, |
| "learning_rate": 2.398968323061125e-07, |
| "loss": 0.8131, |
| "step": 2173 |
| }, |
| { |
| "epoch": 0.7819440697778977, |
| "grad_norm": 12.340345284800485, |
| "learning_rate": 2.3914014732229827e-07, |
| "loss": 0.8028, |
| "step": 2174 |
| }, |
| { |
| "epoch": 0.7823037496628001, |
| "grad_norm": 11.59919363746485, |
| "learning_rate": 2.3838449549136098e-07, |
| "loss": 0.8401, |
| "step": 2175 |
| }, |
| { |
| "epoch": 0.7826634295477025, |
| "grad_norm": 14.074748636264571, |
| "learning_rate": 2.3762987783938138e-07, |
| "loss": 0.8651, |
| "step": 2176 |
| }, |
| { |
| "epoch": 0.783023109432605, |
| "grad_norm": 23.2984563671798, |
| "learning_rate": 2.368762953910367e-07, |
| "loss": 0.8034, |
| "step": 2177 |
| }, |
| { |
| "epoch": 0.7833827893175074, |
| "grad_norm": 12.530144218369257, |
| "learning_rate": 2.3612374916959777e-07, |
| "loss": 0.8701, |
| "step": 2178 |
| }, |
| { |
| "epoch": 0.7837424692024099, |
| "grad_norm": 19.979093313956696, |
| "learning_rate": 2.353722401969286e-07, |
| "loss": 0.7643, |
| "step": 2179 |
| }, |
| { |
| "epoch": 0.7841021490873122, |
| "grad_norm": 11.678206397949163, |
| "learning_rate": 2.3462176949348465e-07, |
| "loss": 0.8756, |
| "step": 2180 |
| }, |
| { |
| "epoch": 0.7844618289722147, |
| "grad_norm": 10.938030186767781, |
| "learning_rate": 2.3387233807831143e-07, |
| "loss": 0.7688, |
| "step": 2181 |
| }, |
| { |
| "epoch": 0.7848215088571172, |
| "grad_norm": 8.402466597228221, |
| "learning_rate": 2.3312394696904403e-07, |
| "loss": 0.825, |
| "step": 2182 |
| }, |
| { |
| "epoch": 0.7851811887420196, |
| "grad_norm": 8.984508957322987, |
| "learning_rate": 2.3237659718190394e-07, |
| "loss": 0.7859, |
| "step": 2183 |
| }, |
| { |
| "epoch": 0.7855408686269221, |
| "grad_norm": 15.217116220541765, |
| "learning_rate": 2.3163028973169917e-07, |
| "loss": 0.7741, |
| "step": 2184 |
| }, |
| { |
| "epoch": 0.7859005485118244, |
| "grad_norm": 11.432027827847797, |
| "learning_rate": 2.3088502563182178e-07, |
| "loss": 0.8224, |
| "step": 2185 |
| }, |
| { |
| "epoch": 0.7862602283967269, |
| "grad_norm": 34.900853193772434, |
| "learning_rate": 2.3014080589424834e-07, |
| "loss": 0.8385, |
| "step": 2186 |
| }, |
| { |
| "epoch": 0.7866199082816293, |
| "grad_norm": 8.633212140752239, |
| "learning_rate": 2.2939763152953573e-07, |
| "loss": 0.8747, |
| "step": 2187 |
| }, |
| { |
| "epoch": 0.7869795881665318, |
| "grad_norm": 33.25082323243209, |
| "learning_rate": 2.2865550354682327e-07, |
| "loss": 0.794, |
| "step": 2188 |
| }, |
| { |
| "epoch": 0.7873392680514342, |
| "grad_norm": 12.807958889113452, |
| "learning_rate": 2.279144229538269e-07, |
| "loss": 0.8289, |
| "step": 2189 |
| }, |
| { |
| "epoch": 0.7876989479363367, |
| "grad_norm": 17.16475182339757, |
| "learning_rate": 2.2717439075684263e-07, |
| "loss": 0.7846, |
| "step": 2190 |
| }, |
| { |
| "epoch": 0.7880586278212391, |
| "grad_norm": 20.462281814135242, |
| "learning_rate": 2.2643540796074156e-07, |
| "loss": 0.8003, |
| "step": 2191 |
| }, |
| { |
| "epoch": 0.7884183077061415, |
| "grad_norm": 9.673004562271768, |
| "learning_rate": 2.25697475568971e-07, |
| "loss": 0.7275, |
| "step": 2192 |
| }, |
| { |
| "epoch": 0.788777987591044, |
| "grad_norm": 16.26554078587725, |
| "learning_rate": 2.2496059458355e-07, |
| "loss": 0.8404, |
| "step": 2193 |
| }, |
| { |
| "epoch": 0.7891376674759464, |
| "grad_norm": 21.94352506788799, |
| "learning_rate": 2.2422476600507202e-07, |
| "loss": 0.7741, |
| "step": 2194 |
| }, |
| { |
| "epoch": 0.7894973473608489, |
| "grad_norm": 11.762770147070169, |
| "learning_rate": 2.2348999083270003e-07, |
| "loss": 0.7671, |
| "step": 2195 |
| }, |
| { |
| "epoch": 0.7898570272457512, |
| "grad_norm": 12.6430145839643, |
| "learning_rate": 2.2275627006416797e-07, |
| "loss": 0.8173, |
| "step": 2196 |
| }, |
| { |
| "epoch": 0.7902167071306537, |
| "grad_norm": 15.746501248599223, |
| "learning_rate": 2.2202360469577618e-07, |
| "loss": 0.7956, |
| "step": 2197 |
| }, |
| { |
| "epoch": 0.7905763870155561, |
| "grad_norm": 9.03212636630556, |
| "learning_rate": 2.2129199572239377e-07, |
| "loss": 0.7448, |
| "step": 2198 |
| }, |
| { |
| "epoch": 0.7909360669004586, |
| "grad_norm": 11.119608189348266, |
| "learning_rate": 2.2056144413745392e-07, |
| "loss": 0.8432, |
| "step": 2199 |
| }, |
| { |
| "epoch": 0.7912957467853611, |
| "grad_norm": 13.153580032958237, |
| "learning_rate": 2.1983195093295558e-07, |
| "loss": 0.8131, |
| "step": 2200 |
| }, |
| { |
| "epoch": 0.7916554266702635, |
| "grad_norm": 43.553967757305315, |
| "learning_rate": 2.191035170994584e-07, |
| "loss": 0.8152, |
| "step": 2201 |
| }, |
| { |
| "epoch": 0.7920151065551659, |
| "grad_norm": 10.000064039868674, |
| "learning_rate": 2.1837614362608569e-07, |
| "loss": 0.8311, |
| "step": 2202 |
| }, |
| { |
| "epoch": 0.7923747864400683, |
| "grad_norm": 20.717835143857574, |
| "learning_rate": 2.1764983150051951e-07, |
| "loss": 0.7754, |
| "step": 2203 |
| }, |
| { |
| "epoch": 0.7927344663249708, |
| "grad_norm": 28.512642566065328, |
| "learning_rate": 2.1692458170900197e-07, |
| "loss": 0.751, |
| "step": 2204 |
| }, |
| { |
| "epoch": 0.7930941462098732, |
| "grad_norm": 10.923945652982237, |
| "learning_rate": 2.162003952363307e-07, |
| "loss": 0.8223, |
| "step": 2205 |
| }, |
| { |
| "epoch": 0.7934538260947757, |
| "grad_norm": 14.368074118286325, |
| "learning_rate": 2.154772730658617e-07, |
| "loss": 0.7747, |
| "step": 2206 |
| }, |
| { |
| "epoch": 0.793813505979678, |
| "grad_norm": 18.327110926138854, |
| "learning_rate": 2.147552161795042e-07, |
| "loss": 0.7646, |
| "step": 2207 |
| }, |
| { |
| "epoch": 0.7941731858645805, |
| "grad_norm": 15.2718923928323, |
| "learning_rate": 2.1403422555772222e-07, |
| "loss": 0.8747, |
| "step": 2208 |
| }, |
| { |
| "epoch": 0.794532865749483, |
| "grad_norm": 9.7450328168099, |
| "learning_rate": 2.1331430217953018e-07, |
| "loss": 0.84, |
| "step": 2209 |
| }, |
| { |
| "epoch": 0.7948925456343854, |
| "grad_norm": 12.472111364195849, |
| "learning_rate": 2.125954470224951e-07, |
| "loss": 0.7651, |
| "step": 2210 |
| }, |
| { |
| "epoch": 0.7952522255192879, |
| "grad_norm": 11.409454024321807, |
| "learning_rate": 2.118776610627322e-07, |
| "loss": 0.7816, |
| "step": 2211 |
| }, |
| { |
| "epoch": 0.7956119054041902, |
| "grad_norm": 9.683463158955522, |
| "learning_rate": 2.111609452749059e-07, |
| "loss": 0.7534, |
| "step": 2212 |
| }, |
| { |
| "epoch": 0.7959715852890927, |
| "grad_norm": 16.518476649797382, |
| "learning_rate": 2.1044530063222677e-07, |
| "loss": 0.8282, |
| "step": 2213 |
| }, |
| { |
| "epoch": 0.7963312651739951, |
| "grad_norm": 11.076871284328455, |
| "learning_rate": 2.0973072810645077e-07, |
| "loss": 0.8042, |
| "step": 2214 |
| }, |
| { |
| "epoch": 0.7966909450588976, |
| "grad_norm": 43.22107060606698, |
| "learning_rate": 2.0901722866787842e-07, |
| "loss": 0.7533, |
| "step": 2215 |
| }, |
| { |
| "epoch": 0.7970506249438, |
| "grad_norm": 9.757499067901705, |
| "learning_rate": 2.083048032853534e-07, |
| "loss": 0.7762, |
| "step": 2216 |
| }, |
| { |
| "epoch": 0.7974103048287025, |
| "grad_norm": 16.026769525040617, |
| "learning_rate": 2.075934529262604e-07, |
| "loss": 0.8547, |
| "step": 2217 |
| }, |
| { |
| "epoch": 0.7977699847136049, |
| "grad_norm": 9.660745818513375, |
| "learning_rate": 2.068831785565246e-07, |
| "loss": 0.8358, |
| "step": 2218 |
| }, |
| { |
| "epoch": 0.7981296645985073, |
| "grad_norm": 9.096098076225008, |
| "learning_rate": 2.0617398114060979e-07, |
| "loss": 0.7666, |
| "step": 2219 |
| }, |
| { |
| "epoch": 0.7984893444834098, |
| "grad_norm": 11.264490778875238, |
| "learning_rate": 2.0546586164151824e-07, |
| "loss": 0.7986, |
| "step": 2220 |
| }, |
| { |
| "epoch": 0.7988490243683122, |
| "grad_norm": 9.544501329809775, |
| "learning_rate": 2.0475882102078767e-07, |
| "loss": 0.8543, |
| "step": 2221 |
| }, |
| { |
| "epoch": 0.7992087042532147, |
| "grad_norm": 12.395444297553139, |
| "learning_rate": 2.040528602384912e-07, |
| "loss": 0.8004, |
| "step": 2222 |
| }, |
| { |
| "epoch": 0.799568384138117, |
| "grad_norm": 34.52252165265367, |
| "learning_rate": 2.033479802532354e-07, |
| "loss": 0.8051, |
| "step": 2223 |
| }, |
| { |
| "epoch": 0.7999280640230195, |
| "grad_norm": 13.270992324968976, |
| "learning_rate": 2.0264418202215994e-07, |
| "loss": 0.822, |
| "step": 2224 |
| }, |
| { |
| "epoch": 0.8002877439079219, |
| "grad_norm": 9.766171438459644, |
| "learning_rate": 2.019414665009349e-07, |
| "loss": 0.7732, |
| "step": 2225 |
| }, |
| { |
| "epoch": 0.8006474237928244, |
| "grad_norm": 18.260697135030036, |
| "learning_rate": 2.0123983464376026e-07, |
| "loss": 0.8167, |
| "step": 2226 |
| }, |
| { |
| "epoch": 0.8010071036777269, |
| "grad_norm": 57.98811902368256, |
| "learning_rate": 2.0053928740336456e-07, |
| "loss": 0.7993, |
| "step": 2227 |
| }, |
| { |
| "epoch": 0.8013667835626292, |
| "grad_norm": 10.95989256001396, |
| "learning_rate": 1.9983982573100412e-07, |
| "loss": 0.7865, |
| "step": 2228 |
| }, |
| { |
| "epoch": 0.8017264634475317, |
| "grad_norm": 11.970402475239974, |
| "learning_rate": 1.991414505764605e-07, |
| "loss": 0.8536, |
| "step": 2229 |
| }, |
| { |
| "epoch": 0.8020861433324341, |
| "grad_norm": 32.45658865342094, |
| "learning_rate": 1.9844416288804e-07, |
| "loss": 0.7624, |
| "step": 2230 |
| }, |
| { |
| "epoch": 0.8024458232173366, |
| "grad_norm": 8.270619979920946, |
| "learning_rate": 1.977479636125724e-07, |
| "loss": 0.7601, |
| "step": 2231 |
| }, |
| { |
| "epoch": 0.802805503102239, |
| "grad_norm": 13.656390055544307, |
| "learning_rate": 1.9705285369540993e-07, |
| "loss": 0.7493, |
| "step": 2232 |
| }, |
| { |
| "epoch": 0.8031651829871415, |
| "grad_norm": 13.140181265672696, |
| "learning_rate": 1.963588340804251e-07, |
| "loss": 0.8749, |
| "step": 2233 |
| }, |
| { |
| "epoch": 0.8035248628720438, |
| "grad_norm": 18.307093635487576, |
| "learning_rate": 1.9566590571000996e-07, |
| "loss": 0.8121, |
| "step": 2234 |
| }, |
| { |
| "epoch": 0.8038845427569463, |
| "grad_norm": 11.080598749167935, |
| "learning_rate": 1.9497406952507455e-07, |
| "loss": 0.8414, |
| "step": 2235 |
| }, |
| { |
| "epoch": 0.8042442226418488, |
| "grad_norm": 10.283915598070877, |
| "learning_rate": 1.9428332646504696e-07, |
| "loss": 0.8315, |
| "step": 2236 |
| }, |
| { |
| "epoch": 0.8046039025267512, |
| "grad_norm": 10.114121338459718, |
| "learning_rate": 1.9359367746786992e-07, |
| "loss": 0.7474, |
| "step": 2237 |
| }, |
| { |
| "epoch": 0.8049635824116537, |
| "grad_norm": 11.161215023066037, |
| "learning_rate": 1.9290512347000065e-07, |
| "loss": 0.787, |
| "step": 2238 |
| }, |
| { |
| "epoch": 0.805323262296556, |
| "grad_norm": 71.78359928084078, |
| "learning_rate": 1.922176654064096e-07, |
| "loss": 0.7856, |
| "step": 2239 |
| }, |
| { |
| "epoch": 0.8056829421814585, |
| "grad_norm": 10.096572266223637, |
| "learning_rate": 1.915313042105795e-07, |
| "loss": 0.7992, |
| "step": 2240 |
| }, |
| { |
| "epoch": 0.8060426220663609, |
| "grad_norm": 12.440587005025414, |
| "learning_rate": 1.9084604081450328e-07, |
| "loss": 0.7781, |
| "step": 2241 |
| }, |
| { |
| "epoch": 0.8064023019512634, |
| "grad_norm": 7.801681726617352, |
| "learning_rate": 1.9016187614868306e-07, |
| "loss": 0.7551, |
| "step": 2242 |
| }, |
| { |
| "epoch": 0.8067619818361658, |
| "grad_norm": 245.08014326423097, |
| "learning_rate": 1.8947881114212938e-07, |
| "loss": 0.7905, |
| "step": 2243 |
| }, |
| { |
| "epoch": 0.8071216617210683, |
| "grad_norm": 7.459230321147278, |
| "learning_rate": 1.8879684672235906e-07, |
| "loss": 0.808, |
| "step": 2244 |
| }, |
| { |
| "epoch": 0.8074813416059707, |
| "grad_norm": 21.293012577378732, |
| "learning_rate": 1.881159838153954e-07, |
| "loss": 0.8194, |
| "step": 2245 |
| }, |
| { |
| "epoch": 0.8078410214908731, |
| "grad_norm": 17.210640953005576, |
| "learning_rate": 1.874362233457649e-07, |
| "loss": 0.7831, |
| "step": 2246 |
| }, |
| { |
| "epoch": 0.8082007013757756, |
| "grad_norm": 13.274695128165218, |
| "learning_rate": 1.8675756623649784e-07, |
| "loss": 0.8061, |
| "step": 2247 |
| }, |
| { |
| "epoch": 0.808560381260678, |
| "grad_norm": 109.63537116604127, |
| "learning_rate": 1.860800134091257e-07, |
| "loss": 0.7985, |
| "step": 2248 |
| }, |
| { |
| "epoch": 0.8089200611455805, |
| "grad_norm": 12.499922719400626, |
| "learning_rate": 1.8540356578368134e-07, |
| "loss": 0.7765, |
| "step": 2249 |
| }, |
| { |
| "epoch": 0.8092797410304828, |
| "grad_norm": 9.143999427644584, |
| "learning_rate": 1.8472822427869595e-07, |
| "loss": 0.8028, |
| "step": 2250 |
| }, |
| { |
| "epoch": 0.8096394209153853, |
| "grad_norm": 11.49632452562713, |
| "learning_rate": 1.8405398981119925e-07, |
| "loss": 0.8331, |
| "step": 2251 |
| }, |
| { |
| "epoch": 0.8099991008002877, |
| "grad_norm": 13.131820032853435, |
| "learning_rate": 1.833808632967173e-07, |
| "loss": 0.8637, |
| "step": 2252 |
| }, |
| { |
| "epoch": 0.8103587806851902, |
| "grad_norm": 28.848155079319856, |
| "learning_rate": 1.827088456492727e-07, |
| "loss": 0.7767, |
| "step": 2253 |
| }, |
| { |
| "epoch": 0.8107184605700927, |
| "grad_norm": 15.045863398114395, |
| "learning_rate": 1.820379377813812e-07, |
| "loss": 0.7856, |
| "step": 2254 |
| }, |
| { |
| "epoch": 0.811078140454995, |
| "grad_norm": 14.563166145473161, |
| "learning_rate": 1.8136814060405238e-07, |
| "loss": 0.8327, |
| "step": 2255 |
| }, |
| { |
| "epoch": 0.8114378203398975, |
| "grad_norm": 13.067979080382221, |
| "learning_rate": 1.8069945502678684e-07, |
| "loss": 0.8341, |
| "step": 2256 |
| }, |
| { |
| "epoch": 0.8117975002247999, |
| "grad_norm": 7.3393645572314705, |
| "learning_rate": 1.800318819575769e-07, |
| "loss": 0.8122, |
| "step": 2257 |
| }, |
| { |
| "epoch": 0.8121571801097024, |
| "grad_norm": 8.341294685695992, |
| "learning_rate": 1.793654223029033e-07, |
| "loss": 0.7387, |
| "step": 2258 |
| }, |
| { |
| "epoch": 0.8125168599946048, |
| "grad_norm": 9.953164368399987, |
| "learning_rate": 1.7870007696773537e-07, |
| "loss": 0.7421, |
| "step": 2259 |
| }, |
| { |
| "epoch": 0.8128765398795073, |
| "grad_norm": 17.530753665127847, |
| "learning_rate": 1.7803584685552876e-07, |
| "loss": 0.7636, |
| "step": 2260 |
| }, |
| { |
| "epoch": 0.8132362197644096, |
| "grad_norm": 12.159844887847953, |
| "learning_rate": 1.7737273286822562e-07, |
| "loss": 0.8127, |
| "step": 2261 |
| }, |
| { |
| "epoch": 0.8135958996493121, |
| "grad_norm": 10.656349633773116, |
| "learning_rate": 1.7671073590625184e-07, |
| "loss": 0.8071, |
| "step": 2262 |
| }, |
| { |
| "epoch": 0.8139555795342146, |
| "grad_norm": 13.921647802255146, |
| "learning_rate": 1.7604985686851749e-07, |
| "loss": 0.7979, |
| "step": 2263 |
| }, |
| { |
| "epoch": 0.814315259419117, |
| "grad_norm": 8.126821450958387, |
| "learning_rate": 1.753900966524129e-07, |
| "loss": 0.7862, |
| "step": 2264 |
| }, |
| { |
| "epoch": 0.8146749393040195, |
| "grad_norm": 18.1747354401724, |
| "learning_rate": 1.747314561538109e-07, |
| "loss": 0.8089, |
| "step": 2265 |
| }, |
| { |
| "epoch": 0.8150346191889218, |
| "grad_norm": 7.509916073561728, |
| "learning_rate": 1.74073936267063e-07, |
| "loss": 0.6772, |
| "step": 2266 |
| }, |
| { |
| "epoch": 0.8153942990738243, |
| "grad_norm": 22.82208884868346, |
| "learning_rate": 1.734175378849998e-07, |
| "loss": 0.8215, |
| "step": 2267 |
| }, |
| { |
| "epoch": 0.8157539789587267, |
| "grad_norm": 9.366109026277558, |
| "learning_rate": 1.7276226189892763e-07, |
| "loss": 0.8726, |
| "step": 2268 |
| }, |
| { |
| "epoch": 0.8161136588436292, |
| "grad_norm": 11.54468674035775, |
| "learning_rate": 1.7210810919863028e-07, |
| "loss": 0.7652, |
| "step": 2269 |
| }, |
| { |
| "epoch": 0.8164733387285316, |
| "grad_norm": 20.218143587638693, |
| "learning_rate": 1.7145508067236514e-07, |
| "loss": 0.7877, |
| "step": 2270 |
| }, |
| { |
| "epoch": 0.816833018613434, |
| "grad_norm": 12.569015763923973, |
| "learning_rate": 1.7080317720686432e-07, |
| "loss": 0.7718, |
| "step": 2271 |
| }, |
| { |
| "epoch": 0.8171926984983365, |
| "grad_norm": 13.661390149624827, |
| "learning_rate": 1.7015239968733065e-07, |
| "loss": 0.8318, |
| "step": 2272 |
| }, |
| { |
| "epoch": 0.8175523783832389, |
| "grad_norm": 23.004313653824514, |
| "learning_rate": 1.6950274899743944e-07, |
| "loss": 0.7868, |
| "step": 2273 |
| }, |
| { |
| "epoch": 0.8179120582681414, |
| "grad_norm": 9.115356687709024, |
| "learning_rate": 1.6885422601933507e-07, |
| "loss": 0.8094, |
| "step": 2274 |
| }, |
| { |
| "epoch": 0.8182717381530438, |
| "grad_norm": 11.49471905807452, |
| "learning_rate": 1.6820683163363159e-07, |
| "loss": 0.794, |
| "step": 2275 |
| }, |
| { |
| "epoch": 0.8186314180379463, |
| "grad_norm": 14.167061691113378, |
| "learning_rate": 1.67560566719409e-07, |
| "loss": 0.755, |
| "step": 2276 |
| }, |
| { |
| "epoch": 0.8189910979228486, |
| "grad_norm": 11.387631631432074, |
| "learning_rate": 1.669154321542151e-07, |
| "loss": 0.7853, |
| "step": 2277 |
| }, |
| { |
| "epoch": 0.8193507778077511, |
| "grad_norm": 37.87639215970217, |
| "learning_rate": 1.6627142881406186e-07, |
| "loss": 0.8561, |
| "step": 2278 |
| }, |
| { |
| "epoch": 0.8197104576926535, |
| "grad_norm": 27.221934678319457, |
| "learning_rate": 1.656285575734263e-07, |
| "loss": 0.8201, |
| "step": 2279 |
| }, |
| { |
| "epoch": 0.820070137577556, |
| "grad_norm": 16.209203561546843, |
| "learning_rate": 1.649868193052465e-07, |
| "loss": 0.8523, |
| "step": 2280 |
| }, |
| { |
| "epoch": 0.8204298174624585, |
| "grad_norm": 581.721183790373, |
| "learning_rate": 1.643462148809238e-07, |
| "loss": 0.8274, |
| "step": 2281 |
| }, |
| { |
| "epoch": 0.8207894973473608, |
| "grad_norm": 11.914813702261155, |
| "learning_rate": 1.6370674517031868e-07, |
| "loss": 0.802, |
| "step": 2282 |
| }, |
| { |
| "epoch": 0.8211491772322633, |
| "grad_norm": 12.227694797112598, |
| "learning_rate": 1.6306841104175218e-07, |
| "loss": 0.8508, |
| "step": 2283 |
| }, |
| { |
| "epoch": 0.8215088571171657, |
| "grad_norm": 20.330985201805433, |
| "learning_rate": 1.6243121336200127e-07, |
| "loss": 0.7792, |
| "step": 2284 |
| }, |
| { |
| "epoch": 0.8218685370020682, |
| "grad_norm": 20.298174086052803, |
| "learning_rate": 1.6179515299630199e-07, |
| "loss": 0.876, |
| "step": 2285 |
| }, |
| { |
| "epoch": 0.8222282168869706, |
| "grad_norm": 15.942233863267372, |
| "learning_rate": 1.611602308083444e-07, |
| "loss": 0.7673, |
| "step": 2286 |
| }, |
| { |
| "epoch": 0.822587896771873, |
| "grad_norm": 10.927464046082013, |
| "learning_rate": 1.6052644766027467e-07, |
| "loss": 0.8461, |
| "step": 2287 |
| }, |
| { |
| "epoch": 0.8229475766567754, |
| "grad_norm": 8.077305958641276, |
| "learning_rate": 1.5989380441269006e-07, |
| "loss": 0.7749, |
| "step": 2288 |
| }, |
| { |
| "epoch": 0.8233072565416779, |
| "grad_norm": 11.827162514740742, |
| "learning_rate": 1.5926230192464228e-07, |
| "loss": 0.8414, |
| "step": 2289 |
| }, |
| { |
| "epoch": 0.8236669364265804, |
| "grad_norm": 10.2629240946057, |
| "learning_rate": 1.5863194105363242e-07, |
| "loss": 0.734, |
| "step": 2290 |
| }, |
| { |
| "epoch": 0.8240266163114828, |
| "grad_norm": 13.148047468292425, |
| "learning_rate": 1.5800272265561254e-07, |
| "loss": 0.8053, |
| "step": 2291 |
| }, |
| { |
| "epoch": 0.8243862961963853, |
| "grad_norm": 15.035973238675208, |
| "learning_rate": 1.5737464758498243e-07, |
| "loss": 0.8434, |
| "step": 2292 |
| }, |
| { |
| "epoch": 0.8247459760812876, |
| "grad_norm": 10.933368632795961, |
| "learning_rate": 1.5674771669458996e-07, |
| "loss": 0.8182, |
| "step": 2293 |
| }, |
| { |
| "epoch": 0.8251056559661901, |
| "grad_norm": 12.115690506938334, |
| "learning_rate": 1.5612193083572877e-07, |
| "loss": 0.8067, |
| "step": 2294 |
| }, |
| { |
| "epoch": 0.8254653358510925, |
| "grad_norm": 11.569037443632995, |
| "learning_rate": 1.554972908581388e-07, |
| "loss": 0.8071, |
| "step": 2295 |
| }, |
| { |
| "epoch": 0.825825015735995, |
| "grad_norm": 49.8101801470549, |
| "learning_rate": 1.5487379761000273e-07, |
| "loss": 0.8369, |
| "step": 2296 |
| }, |
| { |
| "epoch": 0.8261846956208974, |
| "grad_norm": 10.746606837227537, |
| "learning_rate": 1.5425145193794697e-07, |
| "loss": 0.8167, |
| "step": 2297 |
| }, |
| { |
| "epoch": 0.8265443755057998, |
| "grad_norm": 18.159478189765707, |
| "learning_rate": 1.5363025468703917e-07, |
| "loss": 0.7757, |
| "step": 2298 |
| }, |
| { |
| "epoch": 0.8269040553907022, |
| "grad_norm": 10.381707934132592, |
| "learning_rate": 1.5301020670078802e-07, |
| "loss": 0.8095, |
| "step": 2299 |
| }, |
| { |
| "epoch": 0.8272637352756047, |
| "grad_norm": 17.8844332800837, |
| "learning_rate": 1.523913088211415e-07, |
| "loss": 0.784, |
| "step": 2300 |
| }, |
| { |
| "epoch": 0.8276234151605072, |
| "grad_norm": 13.426469758773418, |
| "learning_rate": 1.5177356188848556e-07, |
| "loss": 0.687, |
| "step": 2301 |
| }, |
| { |
| "epoch": 0.8279830950454096, |
| "grad_norm": 39.28947042539643, |
| "learning_rate": 1.5115696674164346e-07, |
| "loss": 0.8498, |
| "step": 2302 |
| }, |
| { |
| "epoch": 0.828342774930312, |
| "grad_norm": 8.23500161318244, |
| "learning_rate": 1.5054152421787503e-07, |
| "loss": 0.7089, |
| "step": 2303 |
| }, |
| { |
| "epoch": 0.8287024548152144, |
| "grad_norm": 13.77245256670465, |
| "learning_rate": 1.499272351528742e-07, |
| "loss": 0.7693, |
| "step": 2304 |
| }, |
| { |
| "epoch": 0.8290621347001169, |
| "grad_norm": 102.03568022021852, |
| "learning_rate": 1.4931410038076918e-07, |
| "loss": 0.8191, |
| "step": 2305 |
| }, |
| { |
| "epoch": 0.8294218145850193, |
| "grad_norm": 25.26898915553537, |
| "learning_rate": 1.4870212073412024e-07, |
| "loss": 0.7804, |
| "step": 2306 |
| }, |
| { |
| "epoch": 0.8297814944699218, |
| "grad_norm": 10.865074298124393, |
| "learning_rate": 1.4809129704391997e-07, |
| "loss": 0.7229, |
| "step": 2307 |
| }, |
| { |
| "epoch": 0.8301411743548242, |
| "grad_norm": 10.384806850095348, |
| "learning_rate": 1.4748163013959058e-07, |
| "loss": 0.7937, |
| "step": 2308 |
| }, |
| { |
| "epoch": 0.8305008542397266, |
| "grad_norm": 15.475422809640303, |
| "learning_rate": 1.4687312084898386e-07, |
| "loss": 0.8122, |
| "step": 2309 |
| }, |
| { |
| "epoch": 0.8308605341246291, |
| "grad_norm": 60.90412866491603, |
| "learning_rate": 1.4626576999837937e-07, |
| "loss": 0.8524, |
| "step": 2310 |
| }, |
| { |
| "epoch": 0.8312202140095315, |
| "grad_norm": 11.62715155651753, |
| "learning_rate": 1.456595784124839e-07, |
| "loss": 0.7943, |
| "step": 2311 |
| }, |
| { |
| "epoch": 0.831579893894434, |
| "grad_norm": 14.894094173204277, |
| "learning_rate": 1.4505454691443042e-07, |
| "loss": 0.7828, |
| "step": 2312 |
| }, |
| { |
| "epoch": 0.8319395737793364, |
| "grad_norm": 15.846119117751503, |
| "learning_rate": 1.4445067632577622e-07, |
| "loss": 0.8351, |
| "step": 2313 |
| }, |
| { |
| "epoch": 0.8322992536642388, |
| "grad_norm": 11.851981736041145, |
| "learning_rate": 1.438479674665022e-07, |
| "loss": 0.8186, |
| "step": 2314 |
| }, |
| { |
| "epoch": 0.8326589335491412, |
| "grad_norm": 18.741996160363115, |
| "learning_rate": 1.432464211550116e-07, |
| "loss": 0.8106, |
| "step": 2315 |
| }, |
| { |
| "epoch": 0.8330186134340437, |
| "grad_norm": 10.821171540681902, |
| "learning_rate": 1.4264603820813005e-07, |
| "loss": 0.8973, |
| "step": 2316 |
| }, |
| { |
| "epoch": 0.8333782933189461, |
| "grad_norm": 9.795264408170965, |
| "learning_rate": 1.4204681944110242e-07, |
| "loss": 0.7635, |
| "step": 2317 |
| }, |
| { |
| "epoch": 0.8337379732038486, |
| "grad_norm": 15.708568592847959, |
| "learning_rate": 1.41448765667593e-07, |
| "loss": 0.8359, |
| "step": 2318 |
| }, |
| { |
| "epoch": 0.8340976530887511, |
| "grad_norm": 9.194444315770358, |
| "learning_rate": 1.4085187769968431e-07, |
| "loss": 0.8341, |
| "step": 2319 |
| }, |
| { |
| "epoch": 0.8344573329736534, |
| "grad_norm": 11.930235028959371, |
| "learning_rate": 1.4025615634787613e-07, |
| "loss": 0.7688, |
| "step": 2320 |
| }, |
| { |
| "epoch": 0.8348170128585559, |
| "grad_norm": 14.356223028981091, |
| "learning_rate": 1.3966160242108372e-07, |
| "loss": 0.8027, |
| "step": 2321 |
| }, |
| { |
| "epoch": 0.8351766927434583, |
| "grad_norm": 9.562412213997689, |
| "learning_rate": 1.3906821672663704e-07, |
| "loss": 0.7349, |
| "step": 2322 |
| }, |
| { |
| "epoch": 0.8355363726283608, |
| "grad_norm": 10.43683433112123, |
| "learning_rate": 1.3847600007027994e-07, |
| "loss": 0.8652, |
| "step": 2323 |
| }, |
| { |
| "epoch": 0.8358960525132632, |
| "grad_norm": 7.776153454746316, |
| "learning_rate": 1.378849532561691e-07, |
| "loss": 0.7701, |
| "step": 2324 |
| }, |
| { |
| "epoch": 0.8362557323981656, |
| "grad_norm": 24.332429727924744, |
| "learning_rate": 1.372950770868724e-07, |
| "loss": 0.7697, |
| "step": 2325 |
| }, |
| { |
| "epoch": 0.836615412283068, |
| "grad_norm": 9.926742645416553, |
| "learning_rate": 1.3670637236336814e-07, |
| "loss": 0.7948, |
| "step": 2326 |
| }, |
| { |
| "epoch": 0.8369750921679705, |
| "grad_norm": 8.679025247195929, |
| "learning_rate": 1.3611883988504358e-07, |
| "loss": 0.8182, |
| "step": 2327 |
| }, |
| { |
| "epoch": 0.837334772052873, |
| "grad_norm": 14.838497631869352, |
| "learning_rate": 1.3553248044969524e-07, |
| "loss": 0.8189, |
| "step": 2328 |
| }, |
| { |
| "epoch": 0.8376944519377754, |
| "grad_norm": 10.051187806732115, |
| "learning_rate": 1.3494729485352586e-07, |
| "loss": 0.7508, |
| "step": 2329 |
| }, |
| { |
| "epoch": 0.8380541318226779, |
| "grad_norm": 11.298601823584674, |
| "learning_rate": 1.3436328389114472e-07, |
| "loss": 0.7925, |
| "step": 2330 |
| }, |
| { |
| "epoch": 0.8384138117075802, |
| "grad_norm": 10.243660862667488, |
| "learning_rate": 1.3378044835556557e-07, |
| "loss": 0.8215, |
| "step": 2331 |
| }, |
| { |
| "epoch": 0.8387734915924827, |
| "grad_norm": 11.258117329842404, |
| "learning_rate": 1.331987890382068e-07, |
| "loss": 0.806, |
| "step": 2332 |
| }, |
| { |
| "epoch": 0.8391331714773851, |
| "grad_norm": 12.887297854681147, |
| "learning_rate": 1.326183067288893e-07, |
| "loss": 0.7611, |
| "step": 2333 |
| }, |
| { |
| "epoch": 0.8394928513622876, |
| "grad_norm": 8.685018023842622, |
| "learning_rate": 1.3203900221583563e-07, |
| "loss": 0.7333, |
| "step": 2334 |
| }, |
| { |
| "epoch": 0.83985253124719, |
| "grad_norm": 12.059613850313461, |
| "learning_rate": 1.3146087628566894e-07, |
| "loss": 0.7111, |
| "step": 2335 |
| }, |
| { |
| "epoch": 0.8402122111320924, |
| "grad_norm": 15.434102439416437, |
| "learning_rate": 1.3088392972341257e-07, |
| "loss": 0.7703, |
| "step": 2336 |
| }, |
| { |
| "epoch": 0.8405718910169949, |
| "grad_norm": 7.988816915844384, |
| "learning_rate": 1.3030816331248783e-07, |
| "loss": 0.7412, |
| "step": 2337 |
| }, |
| { |
| "epoch": 0.8409315709018973, |
| "grad_norm": 17.8684095934584, |
| "learning_rate": 1.2973357783471385e-07, |
| "loss": 0.7891, |
| "step": 2338 |
| }, |
| { |
| "epoch": 0.8412912507867998, |
| "grad_norm": 10.605578795961502, |
| "learning_rate": 1.2916017407030587e-07, |
| "loss": 0.7966, |
| "step": 2339 |
| }, |
| { |
| "epoch": 0.8416509306717022, |
| "grad_norm": 7.979151580364497, |
| "learning_rate": 1.2858795279787515e-07, |
| "loss": 0.8085, |
| "step": 2340 |
| }, |
| { |
| "epoch": 0.8420106105566046, |
| "grad_norm": 15.045629674435435, |
| "learning_rate": 1.2801691479442654e-07, |
| "loss": 0.7548, |
| "step": 2341 |
| }, |
| { |
| "epoch": 0.842370290441507, |
| "grad_norm": 22.460005334784544, |
| "learning_rate": 1.2744706083535906e-07, |
| "loss": 0.783, |
| "step": 2342 |
| }, |
| { |
| "epoch": 0.8427299703264095, |
| "grad_norm": 25.059742983395793, |
| "learning_rate": 1.2687839169446256e-07, |
| "loss": 0.8405, |
| "step": 2343 |
| }, |
| { |
| "epoch": 0.8430896502113119, |
| "grad_norm": 9.113176144604909, |
| "learning_rate": 1.2631090814391943e-07, |
| "loss": 0.7647, |
| "step": 2344 |
| }, |
| { |
| "epoch": 0.8434493300962144, |
| "grad_norm": 12.555101725316918, |
| "learning_rate": 1.2574461095430144e-07, |
| "loss": 0.7997, |
| "step": 2345 |
| }, |
| { |
| "epoch": 0.8438090099811169, |
| "grad_norm": 7.54974258001931, |
| "learning_rate": 1.2517950089456997e-07, |
| "loss": 0.7705, |
| "step": 2346 |
| }, |
| { |
| "epoch": 0.8441686898660192, |
| "grad_norm": 27.249421233014953, |
| "learning_rate": 1.2461557873207328e-07, |
| "loss": 0.7757, |
| "step": 2347 |
| }, |
| { |
| "epoch": 0.8445283697509217, |
| "grad_norm": 9.405443108338877, |
| "learning_rate": 1.240528452325482e-07, |
| "loss": 0.7469, |
| "step": 2348 |
| }, |
| { |
| "epoch": 0.8448880496358241, |
| "grad_norm": 12.970887430421074, |
| "learning_rate": 1.2349130116011618e-07, |
| "loss": 0.8021, |
| "step": 2349 |
| }, |
| { |
| "epoch": 0.8452477295207266, |
| "grad_norm": 12.890285997253583, |
| "learning_rate": 1.229309472772847e-07, |
| "loss": 0.8027, |
| "step": 2350 |
| }, |
| { |
| "epoch": 0.845607409405629, |
| "grad_norm": 14.74162801700718, |
| "learning_rate": 1.2237178434494378e-07, |
| "loss": 0.8477, |
| "step": 2351 |
| }, |
| { |
| "epoch": 0.8459670892905314, |
| "grad_norm": 167.2267924054436, |
| "learning_rate": 1.2181381312236748e-07, |
| "loss": 0.7981, |
| "step": 2352 |
| }, |
| { |
| "epoch": 0.8463267691754338, |
| "grad_norm": 12.960623291109226, |
| "learning_rate": 1.2125703436721091e-07, |
| "loss": 0.7431, |
| "step": 2353 |
| }, |
| { |
| "epoch": 0.8466864490603363, |
| "grad_norm": 8.664210286860856, |
| "learning_rate": 1.207014488355107e-07, |
| "loss": 0.7999, |
| "step": 2354 |
| }, |
| { |
| "epoch": 0.8470461289452388, |
| "grad_norm": 28.279223774903546, |
| "learning_rate": 1.2014705728168218e-07, |
| "loss": 0.775, |
| "step": 2355 |
| }, |
| { |
| "epoch": 0.8474058088301412, |
| "grad_norm": 38.82554154366558, |
| "learning_rate": 1.1959386045852048e-07, |
| "loss": 0.8309, |
| "step": 2356 |
| }, |
| { |
| "epoch": 0.8477654887150436, |
| "grad_norm": 13.895528440091361, |
| "learning_rate": 1.1904185911719767e-07, |
| "loss": 0.7915, |
| "step": 2357 |
| }, |
| { |
| "epoch": 0.848125168599946, |
| "grad_norm": 12.125042321937775, |
| "learning_rate": 1.1849105400726322e-07, |
| "loss": 0.823, |
| "step": 2358 |
| }, |
| { |
| "epoch": 0.8484848484848485, |
| "grad_norm": 9.97026854760588, |
| "learning_rate": 1.1794144587664113e-07, |
| "loss": 0.8106, |
| "step": 2359 |
| }, |
| { |
| "epoch": 0.8488445283697509, |
| "grad_norm": 26.051019497196574, |
| "learning_rate": 1.1739303547163138e-07, |
| "loss": 0.7895, |
| "step": 2360 |
| }, |
| { |
| "epoch": 0.8492042082546534, |
| "grad_norm": 11.852463625231042, |
| "learning_rate": 1.1684582353690642e-07, |
| "loss": 0.8865, |
| "step": 2361 |
| }, |
| { |
| "epoch": 0.8495638881395557, |
| "grad_norm": 20.853977821016382, |
| "learning_rate": 1.1629981081551276e-07, |
| "loss": 0.7361, |
| "step": 2362 |
| }, |
| { |
| "epoch": 0.8499235680244582, |
| "grad_norm": 8.79126880257627, |
| "learning_rate": 1.1575499804886657e-07, |
| "loss": 0.773, |
| "step": 2363 |
| }, |
| { |
| "epoch": 0.8502832479093607, |
| "grad_norm": 13.126326647726362, |
| "learning_rate": 1.152113859767565e-07, |
| "loss": 0.8399, |
| "step": 2364 |
| }, |
| { |
| "epoch": 0.8506429277942631, |
| "grad_norm": 9.79130876826058, |
| "learning_rate": 1.1466897533733943e-07, |
| "loss": 0.7747, |
| "step": 2365 |
| }, |
| { |
| "epoch": 0.8510026076791656, |
| "grad_norm": 9.629331238536345, |
| "learning_rate": 1.1412776686714199e-07, |
| "loss": 0.7794, |
| "step": 2366 |
| }, |
| { |
| "epoch": 0.851362287564068, |
| "grad_norm": 12.657470224794194, |
| "learning_rate": 1.1358776130105763e-07, |
| "loss": 0.796, |
| "step": 2367 |
| }, |
| { |
| "epoch": 0.8517219674489704, |
| "grad_norm": 14.083863594684702, |
| "learning_rate": 1.1304895937234649e-07, |
| "loss": 0.7918, |
| "step": 2368 |
| }, |
| { |
| "epoch": 0.8520816473338728, |
| "grad_norm": 9.738032938074392, |
| "learning_rate": 1.1251136181263432e-07, |
| "loss": 0.8091, |
| "step": 2369 |
| }, |
| { |
| "epoch": 0.8524413272187753, |
| "grad_norm": 9.538609823673212, |
| "learning_rate": 1.1197496935191208e-07, |
| "loss": 0.8568, |
| "step": 2370 |
| }, |
| { |
| "epoch": 0.8528010071036777, |
| "grad_norm": 9.082270731085444, |
| "learning_rate": 1.114397827185336e-07, |
| "loss": 0.8054, |
| "step": 2371 |
| }, |
| { |
| "epoch": 0.8531606869885802, |
| "grad_norm": 35.09383378696445, |
| "learning_rate": 1.1090580263921578e-07, |
| "loss": 0.7911, |
| "step": 2372 |
| }, |
| { |
| "epoch": 0.8535203668734826, |
| "grad_norm": 1250.82091100558, |
| "learning_rate": 1.1037302983903684e-07, |
| "loss": 0.8286, |
| "step": 2373 |
| }, |
| { |
| "epoch": 0.853880046758385, |
| "grad_norm": 8.04664795906332, |
| "learning_rate": 1.0984146504143588e-07, |
| "loss": 0.743, |
| "step": 2374 |
| }, |
| { |
| "epoch": 0.8542397266432875, |
| "grad_norm": 13.908120492868619, |
| "learning_rate": 1.0931110896821182e-07, |
| "loss": 0.8826, |
| "step": 2375 |
| }, |
| { |
| "epoch": 0.8545994065281899, |
| "grad_norm": 8.293378871159591, |
| "learning_rate": 1.087819623395222e-07, |
| "loss": 0.82, |
| "step": 2376 |
| }, |
| { |
| "epoch": 0.8549590864130924, |
| "grad_norm": 12.821152267637753, |
| "learning_rate": 1.08254025873882e-07, |
| "loss": 0.8965, |
| "step": 2377 |
| }, |
| { |
| "epoch": 0.8553187662979947, |
| "grad_norm": 25.32307007964656, |
| "learning_rate": 1.0772730028816302e-07, |
| "loss": 0.891, |
| "step": 2378 |
| }, |
| { |
| "epoch": 0.8556784461828972, |
| "grad_norm": 24.227306921120448, |
| "learning_rate": 1.0720178629759347e-07, |
| "loss": 0.8235, |
| "step": 2379 |
| }, |
| { |
| "epoch": 0.8560381260677996, |
| "grad_norm": 49.12110666155429, |
| "learning_rate": 1.0667748461575544e-07, |
| "loss": 0.784, |
| "step": 2380 |
| }, |
| { |
| "epoch": 0.8563978059527021, |
| "grad_norm": 12.865030237798612, |
| "learning_rate": 1.0615439595458553e-07, |
| "loss": 0.7789, |
| "step": 2381 |
| }, |
| { |
| "epoch": 0.8567574858376046, |
| "grad_norm": 27.76612472619855, |
| "learning_rate": 1.0563252102437259e-07, |
| "loss": 0.7404, |
| "step": 2382 |
| }, |
| { |
| "epoch": 0.857117165722507, |
| "grad_norm": 11.219391329197183, |
| "learning_rate": 1.0511186053375832e-07, |
| "loss": 0.7762, |
| "step": 2383 |
| }, |
| { |
| "epoch": 0.8574768456074094, |
| "grad_norm": 12.787439446751387, |
| "learning_rate": 1.0459241518973439e-07, |
| "loss": 0.7834, |
| "step": 2384 |
| }, |
| { |
| "epoch": 0.8578365254923118, |
| "grad_norm": 14.73054580968944, |
| "learning_rate": 1.0407418569764304e-07, |
| "loss": 0.8216, |
| "step": 2385 |
| }, |
| { |
| "epoch": 0.8581962053772143, |
| "grad_norm": 11.97325876203116, |
| "learning_rate": 1.0355717276117503e-07, |
| "loss": 0.7907, |
| "step": 2386 |
| }, |
| { |
| "epoch": 0.8585558852621167, |
| "grad_norm": 9.302593646290104, |
| "learning_rate": 1.0304137708236992e-07, |
| "loss": 0.7809, |
| "step": 2387 |
| }, |
| { |
| "epoch": 0.8589155651470192, |
| "grad_norm": 12.586732395421038, |
| "learning_rate": 1.025267993616139e-07, |
| "loss": 0.7515, |
| "step": 2388 |
| }, |
| { |
| "epoch": 0.8592752450319215, |
| "grad_norm": 12.060204409506623, |
| "learning_rate": 1.0201344029763925e-07, |
| "loss": 0.7799, |
| "step": 2389 |
| }, |
| { |
| "epoch": 0.859634924916824, |
| "grad_norm": 11.687658910496713, |
| "learning_rate": 1.0150130058752349e-07, |
| "loss": 0.7901, |
| "step": 2390 |
| }, |
| { |
| "epoch": 0.8599946048017265, |
| "grad_norm": 18.111372110913653, |
| "learning_rate": 1.0099038092668899e-07, |
| "loss": 0.8045, |
| "step": 2391 |
| }, |
| { |
| "epoch": 0.8603542846866289, |
| "grad_norm": 16.779825294723597, |
| "learning_rate": 1.0048068200890037e-07, |
| "loss": 0.7497, |
| "step": 2392 |
| }, |
| { |
| "epoch": 0.8607139645715314, |
| "grad_norm": 10.189539353170648, |
| "learning_rate": 9.997220452626587e-08, |
| "loss": 0.7694, |
| "step": 2393 |
| }, |
| { |
| "epoch": 0.8610736444564338, |
| "grad_norm": 10.508841460655566, |
| "learning_rate": 9.946494916923398e-08, |
| "loss": 0.775, |
| "step": 2394 |
| }, |
| { |
| "epoch": 0.8614333243413362, |
| "grad_norm": 28.910622544604255, |
| "learning_rate": 9.895891662659484e-08, |
| "loss": 0.743, |
| "step": 2395 |
| }, |
| { |
| "epoch": 0.8617930042262386, |
| "grad_norm": 8.110628199281892, |
| "learning_rate": 9.845410758547723e-08, |
| "loss": 0.8655, |
| "step": 2396 |
| }, |
| { |
| "epoch": 0.8621526841111411, |
| "grad_norm": 11.794263357334533, |
| "learning_rate": 9.795052273134907e-08, |
| "loss": 0.7456, |
| "step": 2397 |
| }, |
| { |
| "epoch": 0.8625123639960435, |
| "grad_norm": 29.382340644289357, |
| "learning_rate": 9.74481627480156e-08, |
| "loss": 0.8409, |
| "step": 2398 |
| }, |
| { |
| "epoch": 0.862872043880946, |
| "grad_norm": 13.60620815681596, |
| "learning_rate": 9.694702831761937e-08, |
| "loss": 0.7681, |
| "step": 2399 |
| }, |
| { |
| "epoch": 0.8632317237658484, |
| "grad_norm": 22.600202809252284, |
| "learning_rate": 9.64471201206385e-08, |
| "loss": 0.8101, |
| "step": 2400 |
| }, |
| { |
| "epoch": 0.8635914036507508, |
| "grad_norm": 13.70468540326132, |
| "learning_rate": 9.594843883588588e-08, |
| "loss": 0.7787, |
| "step": 2401 |
| }, |
| { |
| "epoch": 0.8639510835356533, |
| "grad_norm": 17.287845281656523, |
| "learning_rate": 9.545098514050842e-08, |
| "loss": 0.84, |
| "step": 2402 |
| }, |
| { |
| "epoch": 0.8643107634205557, |
| "grad_norm": 28.82379473225906, |
| "learning_rate": 9.495475970998667e-08, |
| "loss": 0.8419, |
| "step": 2403 |
| }, |
| { |
| "epoch": 0.8646704433054582, |
| "grad_norm": 18.80326675692738, |
| "learning_rate": 9.445976321813276e-08, |
| "loss": 0.8139, |
| "step": 2404 |
| }, |
| { |
| "epoch": 0.8650301231903605, |
| "grad_norm": 8.873151323104636, |
| "learning_rate": 9.396599633709012e-08, |
| "loss": 0.8906, |
| "step": 2405 |
| }, |
| { |
| "epoch": 0.865389803075263, |
| "grad_norm": 29.108317139778812, |
| "learning_rate": 9.347345973733256e-08, |
| "loss": 0.7823, |
| "step": 2406 |
| }, |
| { |
| "epoch": 0.8657494829601654, |
| "grad_norm": 57.376392562292516, |
| "learning_rate": 9.298215408766375e-08, |
| "loss": 0.8272, |
| "step": 2407 |
| }, |
| { |
| "epoch": 0.8661091628450679, |
| "grad_norm": 12.507098112586599, |
| "learning_rate": 9.249208005521536e-08, |
| "loss": 0.8554, |
| "step": 2408 |
| }, |
| { |
| "epoch": 0.8664688427299704, |
| "grad_norm": 19.933083580678844, |
| "learning_rate": 9.200323830544699e-08, |
| "loss": 0.7296, |
| "step": 2409 |
| }, |
| { |
| "epoch": 0.8668285226148728, |
| "grad_norm": 11.540300634818104, |
| "learning_rate": 9.151562950214443e-08, |
| "loss": 0.8011, |
| "step": 2410 |
| }, |
| { |
| "epoch": 0.8671882024997752, |
| "grad_norm": 9.122997702763547, |
| "learning_rate": 9.102925430742015e-08, |
| "loss": 0.7509, |
| "step": 2411 |
| }, |
| { |
| "epoch": 0.8675478823846776, |
| "grad_norm": 9.276170308686988, |
| "learning_rate": 9.054411338171097e-08, |
| "loss": 0.7616, |
| "step": 2412 |
| }, |
| { |
| "epoch": 0.8679075622695801, |
| "grad_norm": 14.80827922183617, |
| "learning_rate": 9.006020738377762e-08, |
| "loss": 0.8061, |
| "step": 2413 |
| }, |
| { |
| "epoch": 0.8682672421544825, |
| "grad_norm": 15.06547846126099, |
| "learning_rate": 8.957753697070413e-08, |
| "loss": 0.8666, |
| "step": 2414 |
| }, |
| { |
| "epoch": 0.868626922039385, |
| "grad_norm": 8.648973227689028, |
| "learning_rate": 8.909610279789714e-08, |
| "loss": 0.7728, |
| "step": 2415 |
| }, |
| { |
| "epoch": 0.8689866019242873, |
| "grad_norm": 15.863682113126744, |
| "learning_rate": 8.861590551908404e-08, |
| "loss": 0.7556, |
| "step": 2416 |
| }, |
| { |
| "epoch": 0.8693462818091898, |
| "grad_norm": 28.09939290308373, |
| "learning_rate": 8.813694578631281e-08, |
| "loss": 0.7793, |
| "step": 2417 |
| }, |
| { |
| "epoch": 0.8697059616940923, |
| "grad_norm": 28.819946477351102, |
| "learning_rate": 8.76592242499511e-08, |
| "loss": 0.7464, |
| "step": 2418 |
| }, |
| { |
| "epoch": 0.8700656415789947, |
| "grad_norm": 106.5794495887269, |
| "learning_rate": 8.718274155868543e-08, |
| "loss": 0.7709, |
| "step": 2419 |
| }, |
| { |
| "epoch": 0.8704253214638972, |
| "grad_norm": 17.350613213171766, |
| "learning_rate": 8.670749835951963e-08, |
| "loss": 0.8158, |
| "step": 2420 |
| }, |
| { |
| "epoch": 0.8707850013487995, |
| "grad_norm": 8.004833697151915, |
| "learning_rate": 8.623349529777524e-08, |
| "loss": 0.7737, |
| "step": 2421 |
| }, |
| { |
| "epoch": 0.871144681233702, |
| "grad_norm": 10.6819927235279, |
| "learning_rate": 8.576073301708875e-08, |
| "loss": 0.8069, |
| "step": 2422 |
| }, |
| { |
| "epoch": 0.8715043611186044, |
| "grad_norm": 15.605449994443598, |
| "learning_rate": 8.528921215941298e-08, |
| "loss": 0.8134, |
| "step": 2423 |
| }, |
| { |
| "epoch": 0.8718640410035069, |
| "grad_norm": 16.08474338996225, |
| "learning_rate": 8.481893336501389e-08, |
| "loss": 0.9273, |
| "step": 2424 |
| }, |
| { |
| "epoch": 0.8722237208884093, |
| "grad_norm": 15.130247859775276, |
| "learning_rate": 8.434989727247232e-08, |
| "loss": 0.8169, |
| "step": 2425 |
| }, |
| { |
| "epoch": 0.8725834007733118, |
| "grad_norm": 18.686906062019553, |
| "learning_rate": 8.388210451868005e-08, |
| "loss": 0.8607, |
| "step": 2426 |
| }, |
| { |
| "epoch": 0.8729430806582142, |
| "grad_norm": 29.476551071939696, |
| "learning_rate": 8.341555573884173e-08, |
| "loss": 0.7818, |
| "step": 2427 |
| }, |
| { |
| "epoch": 0.8733027605431166, |
| "grad_norm": 13.2768876743159, |
| "learning_rate": 8.295025156647228e-08, |
| "loss": 0.8134, |
| "step": 2428 |
| }, |
| { |
| "epoch": 0.8736624404280191, |
| "grad_norm": 22.00954931797058, |
| "learning_rate": 8.248619263339728e-08, |
| "loss": 0.8381, |
| "step": 2429 |
| }, |
| { |
| "epoch": 0.8740221203129215, |
| "grad_norm": 13.944033996554646, |
| "learning_rate": 8.202337956975024e-08, |
| "loss": 0.846, |
| "step": 2430 |
| }, |
| { |
| "epoch": 0.874381800197824, |
| "grad_norm": 11.527076317610028, |
| "learning_rate": 8.156181300397413e-08, |
| "loss": 0.8129, |
| "step": 2431 |
| }, |
| { |
| "epoch": 0.8747414800827263, |
| "grad_norm": 12.669302449767654, |
| "learning_rate": 8.110149356281847e-08, |
| "loss": 0.8022, |
| "step": 2432 |
| }, |
| { |
| "epoch": 0.8751011599676288, |
| "grad_norm": 8.284644891082207, |
| "learning_rate": 8.06424218713403e-08, |
| "loss": 0.7859, |
| "step": 2433 |
| }, |
| { |
| "epoch": 0.8754608398525312, |
| "grad_norm": 8.656145459043579, |
| "learning_rate": 8.018459855290104e-08, |
| "loss": 0.8375, |
| "step": 2434 |
| }, |
| { |
| "epoch": 0.8758205197374337, |
| "grad_norm": 18.107629198048162, |
| "learning_rate": 7.972802422916825e-08, |
| "loss": 0.8271, |
| "step": 2435 |
| }, |
| { |
| "epoch": 0.8761801996223362, |
| "grad_norm": 13.099365057109353, |
| "learning_rate": 7.927269952011284e-08, |
| "loss": 0.8131, |
| "step": 2436 |
| }, |
| { |
| "epoch": 0.8765398795072386, |
| "grad_norm": 10.251909754431694, |
| "learning_rate": 7.881862504400883e-08, |
| "loss": 0.7417, |
| "step": 2437 |
| }, |
| { |
| "epoch": 0.876899559392141, |
| "grad_norm": 17.19512422107007, |
| "learning_rate": 7.836580141743288e-08, |
| "loss": 0.9009, |
| "step": 2438 |
| }, |
| { |
| "epoch": 0.8772592392770434, |
| "grad_norm": 89.25206201161427, |
| "learning_rate": 7.791422925526325e-08, |
| "loss": 0.8347, |
| "step": 2439 |
| }, |
| { |
| "epoch": 0.8776189191619459, |
| "grad_norm": 16.113169300626446, |
| "learning_rate": 7.746390917067846e-08, |
| "loss": 0.8061, |
| "step": 2440 |
| }, |
| { |
| "epoch": 0.8779785990468483, |
| "grad_norm": 23.32434417648339, |
| "learning_rate": 7.701484177515716e-08, |
| "loss": 0.7796, |
| "step": 2441 |
| }, |
| { |
| "epoch": 0.8783382789317508, |
| "grad_norm": 23.50007900752126, |
| "learning_rate": 7.656702767847678e-08, |
| "loss": 0.7934, |
| "step": 2442 |
| }, |
| { |
| "epoch": 0.8786979588166531, |
| "grad_norm": 10.184777637570136, |
| "learning_rate": 7.612046748871326e-08, |
| "loss": 0.7884, |
| "step": 2443 |
| }, |
| { |
| "epoch": 0.8790576387015556, |
| "grad_norm": 10.329105062305402, |
| "learning_rate": 7.567516181223965e-08, |
| "loss": 0.8039, |
| "step": 2444 |
| }, |
| { |
| "epoch": 0.8794173185864581, |
| "grad_norm": 168.65893464914373, |
| "learning_rate": 7.523111125372538e-08, |
| "loss": 0.7646, |
| "step": 2445 |
| }, |
| { |
| "epoch": 0.8797769984713605, |
| "grad_norm": 8.57131518316382, |
| "learning_rate": 7.478831641613615e-08, |
| "loss": 0.7408, |
| "step": 2446 |
| }, |
| { |
| "epoch": 0.880136678356263, |
| "grad_norm": 53.17870646724811, |
| "learning_rate": 7.434677790073196e-08, |
| "loss": 0.7859, |
| "step": 2447 |
| }, |
| { |
| "epoch": 0.8804963582411653, |
| "grad_norm": 9.39752643043822, |
| "learning_rate": 7.390649630706702e-08, |
| "loss": 0.7826, |
| "step": 2448 |
| }, |
| { |
| "epoch": 0.8808560381260678, |
| "grad_norm": 17.039867436299417, |
| "learning_rate": 7.346747223298888e-08, |
| "loss": 0.8614, |
| "step": 2449 |
| }, |
| { |
| "epoch": 0.8812157180109702, |
| "grad_norm": 24.478200274372707, |
| "learning_rate": 7.302970627463779e-08, |
| "loss": 0.8557, |
| "step": 2450 |
| }, |
| { |
| "epoch": 0.8815753978958727, |
| "grad_norm": 10.200742151291859, |
| "learning_rate": 7.259319902644512e-08, |
| "loss": 0.794, |
| "step": 2451 |
| }, |
| { |
| "epoch": 0.8819350777807751, |
| "grad_norm": 12.04275285618893, |
| "learning_rate": 7.215795108113343e-08, |
| "loss": 0.7997, |
| "step": 2452 |
| }, |
| { |
| "epoch": 0.8822947576656776, |
| "grad_norm": 19.699950092626096, |
| "learning_rate": 7.172396302971507e-08, |
| "loss": 0.8055, |
| "step": 2453 |
| }, |
| { |
| "epoch": 0.8826544375505799, |
| "grad_norm": 7.572428124589908, |
| "learning_rate": 7.129123546149208e-08, |
| "loss": 0.7693, |
| "step": 2454 |
| }, |
| { |
| "epoch": 0.8830141174354824, |
| "grad_norm": 8.84951609822255, |
| "learning_rate": 7.0859768964054e-08, |
| "loss": 0.8012, |
| "step": 2455 |
| }, |
| { |
| "epoch": 0.8833737973203849, |
| "grad_norm": 26.091676112292046, |
| "learning_rate": 7.042956412327916e-08, |
| "loss": 0.7914, |
| "step": 2456 |
| }, |
| { |
| "epoch": 0.8837334772052873, |
| "grad_norm": 21.259823000357553, |
| "learning_rate": 7.000062152333164e-08, |
| "loss": 0.8482, |
| "step": 2457 |
| }, |
| { |
| "epoch": 0.8840931570901898, |
| "grad_norm": 17.576420557735393, |
| "learning_rate": 6.957294174666262e-08, |
| "loss": 0.8336, |
| "step": 2458 |
| }, |
| { |
| "epoch": 0.8844528369750921, |
| "grad_norm": 13.527958380566336, |
| "learning_rate": 6.914652537400733e-08, |
| "loss": 0.8384, |
| "step": 2459 |
| }, |
| { |
| "epoch": 0.8848125168599946, |
| "grad_norm": 22.67826064074168, |
| "learning_rate": 6.872137298438652e-08, |
| "loss": 0.8449, |
| "step": 2460 |
| }, |
| { |
| "epoch": 0.885172196744897, |
| "grad_norm": 14.872871176877165, |
| "learning_rate": 6.829748515510381e-08, |
| "loss": 0.7367, |
| "step": 2461 |
| }, |
| { |
| "epoch": 0.8855318766297995, |
| "grad_norm": 25.033817472057137, |
| "learning_rate": 6.787486246174656e-08, |
| "loss": 0.858, |
| "step": 2462 |
| }, |
| { |
| "epoch": 0.8858915565147019, |
| "grad_norm": 68.62693486748991, |
| "learning_rate": 6.745350547818307e-08, |
| "loss": 0.7359, |
| "step": 2463 |
| }, |
| { |
| "epoch": 0.8862512363996043, |
| "grad_norm": 14.400838286048678, |
| "learning_rate": 6.703341477656421e-08, |
| "loss": 0.7596, |
| "step": 2464 |
| }, |
| { |
| "epoch": 0.8866109162845068, |
| "grad_norm": 11.111972047827287, |
| "learning_rate": 6.661459092732035e-08, |
| "loss": 0.7342, |
| "step": 2465 |
| }, |
| { |
| "epoch": 0.8869705961694092, |
| "grad_norm": 17.456689961196204, |
| "learning_rate": 6.619703449916259e-08, |
| "loss": 0.7515, |
| "step": 2466 |
| }, |
| { |
| "epoch": 0.8873302760543117, |
| "grad_norm": 16.8347650742012, |
| "learning_rate": 6.578074605908002e-08, |
| "loss": 0.7949, |
| "step": 2467 |
| }, |
| { |
| "epoch": 0.8876899559392141, |
| "grad_norm": 23.662246677536164, |
| "learning_rate": 6.536572617234082e-08, |
| "loss": 0.7869, |
| "step": 2468 |
| }, |
| { |
| "epoch": 0.8880496358241166, |
| "grad_norm": 15.299606346328554, |
| "learning_rate": 6.495197540248998e-08, |
| "loss": 0.7176, |
| "step": 2469 |
| }, |
| { |
| "epoch": 0.8884093157090189, |
| "grad_norm": 8.941773527942491, |
| "learning_rate": 6.453949431134987e-08, |
| "loss": 0.7632, |
| "step": 2470 |
| }, |
| { |
| "epoch": 0.8887689955939214, |
| "grad_norm": 13.760823992932636, |
| "learning_rate": 6.412828345901811e-08, |
| "loss": 0.8206, |
| "step": 2471 |
| }, |
| { |
| "epoch": 0.8891286754788238, |
| "grad_norm": 29.754042300263702, |
| "learning_rate": 6.371834340386806e-08, |
| "loss": 0.8171, |
| "step": 2472 |
| }, |
| { |
| "epoch": 0.8894883553637263, |
| "grad_norm": 8.639639912360048, |
| "learning_rate": 6.330967470254689e-08, |
| "loss": 0.7482, |
| "step": 2473 |
| }, |
| { |
| "epoch": 0.8898480352486288, |
| "grad_norm": 23.380442945006624, |
| "learning_rate": 6.290227790997605e-08, |
| "loss": 0.7601, |
| "step": 2474 |
| }, |
| { |
| "epoch": 0.8902077151335311, |
| "grad_norm": 14.740993896157672, |
| "learning_rate": 6.249615357934967e-08, |
| "loss": 0.7157, |
| "step": 2475 |
| }, |
| { |
| "epoch": 0.8905673950184336, |
| "grad_norm": 14.583092846147524, |
| "learning_rate": 6.209130226213377e-08, |
| "loss": 0.8589, |
| "step": 2476 |
| }, |
| { |
| "epoch": 0.890927074903336, |
| "grad_norm": 8.402288272443613, |
| "learning_rate": 6.168772450806603e-08, |
| "loss": 0.8275, |
| "step": 2477 |
| }, |
| { |
| "epoch": 0.8912867547882385, |
| "grad_norm": 10.4688747751167, |
| "learning_rate": 6.128542086515498e-08, |
| "loss": 0.8381, |
| "step": 2478 |
| }, |
| { |
| "epoch": 0.8916464346731409, |
| "grad_norm": 15.43687344409759, |
| "learning_rate": 6.088439187967865e-08, |
| "loss": 0.7961, |
| "step": 2479 |
| }, |
| { |
| "epoch": 0.8920061145580434, |
| "grad_norm": 23.699575211475587, |
| "learning_rate": 6.048463809618443e-08, |
| "loss": 0.8376, |
| "step": 2480 |
| }, |
| { |
| "epoch": 0.8923657944429457, |
| "grad_norm": 10.61473470795305, |
| "learning_rate": 6.0086160057488e-08, |
| "loss": 0.7562, |
| "step": 2481 |
| }, |
| { |
| "epoch": 0.8927254743278482, |
| "grad_norm": 9.472523365558738, |
| "learning_rate": 5.968895830467324e-08, |
| "loss": 0.8776, |
| "step": 2482 |
| }, |
| { |
| "epoch": 0.8930851542127507, |
| "grad_norm": 11.49518915052341, |
| "learning_rate": 5.929303337709046e-08, |
| "loss": 0.7815, |
| "step": 2483 |
| }, |
| { |
| "epoch": 0.8934448340976531, |
| "grad_norm": 8.884363641553396, |
| "learning_rate": 5.88983858123564e-08, |
| "loss": 0.8092, |
| "step": 2484 |
| }, |
| { |
| "epoch": 0.8938045139825556, |
| "grad_norm": 24.567263812985427, |
| "learning_rate": 5.850501614635317e-08, |
| "loss": 0.8345, |
| "step": 2485 |
| }, |
| { |
| "epoch": 0.8941641938674579, |
| "grad_norm": 20.815878420590963, |
| "learning_rate": 5.8112924913227945e-08, |
| "loss": 0.7904, |
| "step": 2486 |
| }, |
| { |
| "epoch": 0.8945238737523604, |
| "grad_norm": 15.641619948346964, |
| "learning_rate": 5.772211264539162e-08, |
| "loss": 0.8015, |
| "step": 2487 |
| }, |
| { |
| "epoch": 0.8948835536372628, |
| "grad_norm": 10.234778936016037, |
| "learning_rate": 5.733257987351869e-08, |
| "loss": 0.8038, |
| "step": 2488 |
| }, |
| { |
| "epoch": 0.8952432335221653, |
| "grad_norm": 36.35850049341599, |
| "learning_rate": 5.694432712654595e-08, |
| "loss": 0.7995, |
| "step": 2489 |
| }, |
| { |
| "epoch": 0.8956029134070677, |
| "grad_norm": 7.360898163803578, |
| "learning_rate": 5.6557354931672465e-08, |
| "loss": 0.8698, |
| "step": 2490 |
| }, |
| { |
| "epoch": 0.8959625932919701, |
| "grad_norm": 10.859256879240384, |
| "learning_rate": 5.617166381435812e-08, |
| "loss": 0.728, |
| "step": 2491 |
| }, |
| { |
| "epoch": 0.8963222731768726, |
| "grad_norm": 12.035500160936694, |
| "learning_rate": 5.5787254298323426e-08, |
| "loss": 0.8241, |
| "step": 2492 |
| }, |
| { |
| "epoch": 0.896681953061775, |
| "grad_norm": 7.175535588198203, |
| "learning_rate": 5.540412690554841e-08, |
| "loss": 0.8192, |
| "step": 2493 |
| }, |
| { |
| "epoch": 0.8970416329466775, |
| "grad_norm": 9.941413830807381, |
| "learning_rate": 5.5022282156272806e-08, |
| "loss": 0.7613, |
| "step": 2494 |
| }, |
| { |
| "epoch": 0.8974013128315799, |
| "grad_norm": 17.16769052805302, |
| "learning_rate": 5.464172056899363e-08, |
| "loss": 0.7722, |
| "step": 2495 |
| }, |
| { |
| "epoch": 0.8977609927164824, |
| "grad_norm": 12.120269744481789, |
| "learning_rate": 5.4262442660466756e-08, |
| "loss": 0.7857, |
| "step": 2496 |
| }, |
| { |
| "epoch": 0.8981206726013847, |
| "grad_norm": 75.32373857015753, |
| "learning_rate": 5.388444894570377e-08, |
| "loss": 0.8388, |
| "step": 2497 |
| }, |
| { |
| "epoch": 0.8984803524862872, |
| "grad_norm": 14.802277158407454, |
| "learning_rate": 5.350773993797331e-08, |
| "loss": 0.841, |
| "step": 2498 |
| }, |
| { |
| "epoch": 0.8988400323711896, |
| "grad_norm": 13.58398817417819, |
| "learning_rate": 5.3132316148799095e-08, |
| "loss": 0.7859, |
| "step": 2499 |
| }, |
| { |
| "epoch": 0.8991997122560921, |
| "grad_norm": 9.139002292783852, |
| "learning_rate": 5.275817808796013e-08, |
| "loss": 0.7882, |
| "step": 2500 |
| }, |
| { |
| "epoch": 0.8995593921409946, |
| "grad_norm": 11.111461052429972, |
| "learning_rate": 5.23853262634889e-08, |
| "loss": 0.8377, |
| "step": 2501 |
| }, |
| { |
| "epoch": 0.8999190720258969, |
| "grad_norm": 8.200434480095561, |
| "learning_rate": 5.2013761181672e-08, |
| "loss": 0.7919, |
| "step": 2502 |
| }, |
| { |
| "epoch": 0.9002787519107994, |
| "grad_norm": 13.89542235334582, |
| "learning_rate": 5.164348334704849e-08, |
| "loss": 0.8154, |
| "step": 2503 |
| }, |
| { |
| "epoch": 0.9006384317957018, |
| "grad_norm": 8.047659453419161, |
| "learning_rate": 5.127449326240951e-08, |
| "loss": 0.7924, |
| "step": 2504 |
| }, |
| { |
| "epoch": 0.9009981116806043, |
| "grad_norm": 20.904000094620358, |
| "learning_rate": 5.09067914287975e-08, |
| "loss": 0.7235, |
| "step": 2505 |
| }, |
| { |
| "epoch": 0.9013577915655067, |
| "grad_norm": 9.98471380828884, |
| "learning_rate": 5.054037834550595e-08, |
| "loss": 0.799, |
| "step": 2506 |
| }, |
| { |
| "epoch": 0.9017174714504091, |
| "grad_norm": 8.296611008202895, |
| "learning_rate": 5.017525451007809e-08, |
| "loss": 0.779, |
| "step": 2507 |
| }, |
| { |
| "epoch": 0.9020771513353115, |
| "grad_norm": 10.39266899999923, |
| "learning_rate": 4.981142041830644e-08, |
| "loss": 0.8545, |
| "step": 2508 |
| }, |
| { |
| "epoch": 0.902436831220214, |
| "grad_norm": 7.858549897953844, |
| "learning_rate": 4.9448876564232464e-08, |
| "loss": 0.8008, |
| "step": 2509 |
| }, |
| { |
| "epoch": 0.9027965111051165, |
| "grad_norm": 23.344213960087007, |
| "learning_rate": 4.908762344014572e-08, |
| "loss": 0.7701, |
| "step": 2510 |
| }, |
| { |
| "epoch": 0.9031561909900189, |
| "grad_norm": 49.67751895688126, |
| "learning_rate": 4.87276615365827e-08, |
| "loss": 0.7661, |
| "step": 2511 |
| }, |
| { |
| "epoch": 0.9035158708749214, |
| "grad_norm": 9.000851802072338, |
| "learning_rate": 4.836899134232686e-08, |
| "loss": 0.8127, |
| "step": 2512 |
| }, |
| { |
| "epoch": 0.9038755507598237, |
| "grad_norm": 13.131390273242742, |
| "learning_rate": 4.801161334440762e-08, |
| "loss": 0.8028, |
| "step": 2513 |
| }, |
| { |
| "epoch": 0.9042352306447262, |
| "grad_norm": 11.852376589318085, |
| "learning_rate": 4.765552802809991e-08, |
| "loss": 0.791, |
| "step": 2514 |
| }, |
| { |
| "epoch": 0.9045949105296286, |
| "grad_norm": 10.068709792910127, |
| "learning_rate": 4.7300735876923184e-08, |
| "loss": 0.8158, |
| "step": 2515 |
| }, |
| { |
| "epoch": 0.9049545904145311, |
| "grad_norm": 15.319789847910473, |
| "learning_rate": 4.6947237372640945e-08, |
| "loss": 0.8258, |
| "step": 2516 |
| }, |
| { |
| "epoch": 0.9053142702994335, |
| "grad_norm": 17.361132961110712, |
| "learning_rate": 4.659503299526013e-08, |
| "loss": 0.8196, |
| "step": 2517 |
| }, |
| { |
| "epoch": 0.9056739501843359, |
| "grad_norm": 11.683250687452635, |
| "learning_rate": 4.624412322303017e-08, |
| "loss": 0.8444, |
| "step": 2518 |
| }, |
| { |
| "epoch": 0.9060336300692384, |
| "grad_norm": 14.754742107720682, |
| "learning_rate": 4.589450853244314e-08, |
| "loss": 0.7155, |
| "step": 2519 |
| }, |
| { |
| "epoch": 0.9063933099541408, |
| "grad_norm": 8.867267420272865, |
| "learning_rate": 4.554618939823207e-08, |
| "loss": 0.8608, |
| "step": 2520 |
| }, |
| { |
| "epoch": 0.9067529898390433, |
| "grad_norm": 10.106898952777634, |
| "learning_rate": 4.519916629337106e-08, |
| "loss": 0.8043, |
| "step": 2521 |
| }, |
| { |
| "epoch": 0.9071126697239457, |
| "grad_norm": 10.523182003142079, |
| "learning_rate": 4.485343968907396e-08, |
| "loss": 0.7779, |
| "step": 2522 |
| }, |
| { |
| "epoch": 0.9074723496088482, |
| "grad_norm": 22.917096510725628, |
| "learning_rate": 4.450901005479468e-08, |
| "loss": 0.7474, |
| "step": 2523 |
| }, |
| { |
| "epoch": 0.9078320294937505, |
| "grad_norm": 13.072147811346369, |
| "learning_rate": 4.416587785822568e-08, |
| "loss": 0.778, |
| "step": 2524 |
| }, |
| { |
| "epoch": 0.908191709378653, |
| "grad_norm": 11.485613445553305, |
| "learning_rate": 4.382404356529801e-08, |
| "loss": 0.7712, |
| "step": 2525 |
| }, |
| { |
| "epoch": 0.9085513892635554, |
| "grad_norm": 10.877208036313718, |
| "learning_rate": 4.3483507640179494e-08, |
| "loss": 0.8222, |
| "step": 2526 |
| }, |
| { |
| "epoch": 0.9089110691484579, |
| "grad_norm": 32.74604399494894, |
| "learning_rate": 4.3144270545275805e-08, |
| "loss": 0.8116, |
| "step": 2527 |
| }, |
| { |
| "epoch": 0.9092707490333604, |
| "grad_norm": 34.911386803795274, |
| "learning_rate": 4.280633274122858e-08, |
| "loss": 0.7851, |
| "step": 2528 |
| }, |
| { |
| "epoch": 0.9096304289182627, |
| "grad_norm": 23.12238563090395, |
| "learning_rate": 4.246969468691553e-08, |
| "loss": 0.8234, |
| "step": 2529 |
| }, |
| { |
| "epoch": 0.9099901088031652, |
| "grad_norm": 29.191829829834525, |
| "learning_rate": 4.213435683944866e-08, |
| "loss": 0.7927, |
| "step": 2530 |
| }, |
| { |
| "epoch": 0.9103497886880676, |
| "grad_norm": 24.646806193615582, |
| "learning_rate": 4.1800319654175406e-08, |
| "loss": 0.8315, |
| "step": 2531 |
| }, |
| { |
| "epoch": 0.9107094685729701, |
| "grad_norm": 22.859981327872294, |
| "learning_rate": 4.146758358467639e-08, |
| "loss": 0.8253, |
| "step": 2532 |
| }, |
| { |
| "epoch": 0.9110691484578725, |
| "grad_norm": 12.996291640216834, |
| "learning_rate": 4.113614908276608e-08, |
| "loss": 0.6909, |
| "step": 2533 |
| }, |
| { |
| "epoch": 0.9114288283427749, |
| "grad_norm": 79.01114401298018, |
| "learning_rate": 4.0806016598490703e-08, |
| "loss": 0.8135, |
| "step": 2534 |
| }, |
| { |
| "epoch": 0.9117885082276773, |
| "grad_norm": 19.90454201196664, |
| "learning_rate": 4.047718658012944e-08, |
| "loss": 0.7807, |
| "step": 2535 |
| }, |
| { |
| "epoch": 0.9121481881125798, |
| "grad_norm": 8.167197207827675, |
| "learning_rate": 4.0149659474192356e-08, |
| "loss": 0.7933, |
| "step": 2536 |
| }, |
| { |
| "epoch": 0.9125078679974823, |
| "grad_norm": 12.86960868138441, |
| "learning_rate": 3.982343572542068e-08, |
| "loss": 0.8294, |
| "step": 2537 |
| }, |
| { |
| "epoch": 0.9128675478823847, |
| "grad_norm": 21.000756246385816, |
| "learning_rate": 3.94985157767852e-08, |
| "loss": 0.8321, |
| "step": 2538 |
| }, |
| { |
| "epoch": 0.9132272277672872, |
| "grad_norm": 8.592256301355716, |
| "learning_rate": 3.917490006948698e-08, |
| "loss": 0.825, |
| "step": 2539 |
| }, |
| { |
| "epoch": 0.9135869076521895, |
| "grad_norm": 20.33574361050648, |
| "learning_rate": 3.885258904295574e-08, |
| "loss": 0.7844, |
| "step": 2540 |
| }, |
| { |
| "epoch": 0.913946587537092, |
| "grad_norm": 16.092620216955467, |
| "learning_rate": 3.853158313484994e-08, |
| "loss": 0.747, |
| "step": 2541 |
| }, |
| { |
| "epoch": 0.9143062674219944, |
| "grad_norm": 12.022595522312857, |
| "learning_rate": 3.8211882781055136e-08, |
| "loss": 0.8149, |
| "step": 2542 |
| }, |
| { |
| "epoch": 0.9146659473068969, |
| "grad_norm": 10.921442456182392, |
| "learning_rate": 3.789348841568496e-08, |
| "loss": 0.7515, |
| "step": 2543 |
| }, |
| { |
| "epoch": 0.9150256271917993, |
| "grad_norm": 9.541539259640993, |
| "learning_rate": 3.7576400471079015e-08, |
| "loss": 0.8507, |
| "step": 2544 |
| }, |
| { |
| "epoch": 0.9153853070767017, |
| "grad_norm": 11.742185837327472, |
| "learning_rate": 3.7260619377803673e-08, |
| "loss": 0.8257, |
| "step": 2545 |
| }, |
| { |
| "epoch": 0.9157449869616042, |
| "grad_norm": 10.38470288875268, |
| "learning_rate": 3.6946145564649813e-08, |
| "loss": 0.8143, |
| "step": 2546 |
| }, |
| { |
| "epoch": 0.9161046668465066, |
| "grad_norm": 52.44317807546952, |
| "learning_rate": 3.663297945863386e-08, |
| "loss": 0.8216, |
| "step": 2547 |
| }, |
| { |
| "epoch": 0.9164643467314091, |
| "grad_norm": 16.112270463265936, |
| "learning_rate": 3.632112148499644e-08, |
| "loss": 0.8474, |
| "step": 2548 |
| }, |
| { |
| "epoch": 0.9168240266163115, |
| "grad_norm": 17.832343118912398, |
| "learning_rate": 3.6010572067201814e-08, |
| "loss": 0.8092, |
| "step": 2549 |
| }, |
| { |
| "epoch": 0.917183706501214, |
| "grad_norm": 10.2969972546708, |
| "learning_rate": 3.570133162693734e-08, |
| "loss": 0.8921, |
| "step": 2550 |
| }, |
| { |
| "epoch": 0.9175433863861163, |
| "grad_norm": 15.341033816609158, |
| "learning_rate": 3.5393400584113e-08, |
| "loss": 0.8019, |
| "step": 2551 |
| }, |
| { |
| "epoch": 0.9179030662710188, |
| "grad_norm": 28.508299636133415, |
| "learning_rate": 3.508677935686077e-08, |
| "loss": 0.8667, |
| "step": 2552 |
| }, |
| { |
| "epoch": 0.9182627461559212, |
| "grad_norm": 12.249861652869686, |
| "learning_rate": 3.478146836153417e-08, |
| "loss": 0.7562, |
| "step": 2553 |
| }, |
| { |
| "epoch": 0.9186224260408237, |
| "grad_norm": 9.324071352603683, |
| "learning_rate": 3.4477468012707455e-08, |
| "loss": 0.7514, |
| "step": 2554 |
| }, |
| { |
| "epoch": 0.9189821059257262, |
| "grad_norm": 9.837763647360559, |
| "learning_rate": 3.41747787231752e-08, |
| "loss": 0.8081, |
| "step": 2555 |
| }, |
| { |
| "epoch": 0.9193417858106285, |
| "grad_norm": 11.02043622469786, |
| "learning_rate": 3.387340090395163e-08, |
| "loss": 0.7744, |
| "step": 2556 |
| }, |
| { |
| "epoch": 0.919701465695531, |
| "grad_norm": 22.970139968632665, |
| "learning_rate": 3.357333496427039e-08, |
| "loss": 0.7736, |
| "step": 2557 |
| }, |
| { |
| "epoch": 0.9200611455804334, |
| "grad_norm": 13.114721774192239, |
| "learning_rate": 3.3274581311583783e-08, |
| "loss": 0.7807, |
| "step": 2558 |
| }, |
| { |
| "epoch": 0.9204208254653359, |
| "grad_norm": 10.921941898637904, |
| "learning_rate": 3.2977140351561736e-08, |
| "loss": 0.7888, |
| "step": 2559 |
| }, |
| { |
| "epoch": 0.9207805053502383, |
| "grad_norm": 9.11820697290462, |
| "learning_rate": 3.2681012488092185e-08, |
| "loss": 0.8308, |
| "step": 2560 |
| }, |
| { |
| "epoch": 0.9211401852351407, |
| "grad_norm": 9.553285009017845, |
| "learning_rate": 3.2386198123279915e-08, |
| "loss": 0.7925, |
| "step": 2561 |
| }, |
| { |
| "epoch": 0.9214998651200431, |
| "grad_norm": 12.302523221993972, |
| "learning_rate": 3.2092697657446044e-08, |
| "loss": 0.821, |
| "step": 2562 |
| }, |
| { |
| "epoch": 0.9218595450049456, |
| "grad_norm": 14.912768413919956, |
| "learning_rate": 3.180051148912755e-08, |
| "loss": 0.7751, |
| "step": 2563 |
| }, |
| { |
| "epoch": 0.9222192248898481, |
| "grad_norm": 8.69058806479011, |
| "learning_rate": 3.150964001507694e-08, |
| "loss": 0.74, |
| "step": 2564 |
| }, |
| { |
| "epoch": 0.9225789047747505, |
| "grad_norm": 8.300323424451845, |
| "learning_rate": 3.122008363026141e-08, |
| "loss": 0.7701, |
| "step": 2565 |
| }, |
| { |
| "epoch": 0.922938584659653, |
| "grad_norm": 18.17822427408239, |
| "learning_rate": 3.0931842727862536e-08, |
| "loss": 0.7585, |
| "step": 2566 |
| }, |
| { |
| "epoch": 0.9232982645445553, |
| "grad_norm": 15.417981907475573, |
| "learning_rate": 3.064491769927535e-08, |
| "loss": 0.8259, |
| "step": 2567 |
| }, |
| { |
| "epoch": 0.9236579444294578, |
| "grad_norm": 7.498723352819035, |
| "learning_rate": 3.035930893410843e-08, |
| "loss": 0.7875, |
| "step": 2568 |
| }, |
| { |
| "epoch": 0.9240176243143602, |
| "grad_norm": 7.869261716229639, |
| "learning_rate": 3.007501682018288e-08, |
| "loss": 0.7703, |
| "step": 2569 |
| }, |
| { |
| "epoch": 0.9243773041992627, |
| "grad_norm": 39.539952172595655, |
| "learning_rate": 2.9792041743532002e-08, |
| "loss": 0.8358, |
| "step": 2570 |
| }, |
| { |
| "epoch": 0.924736984084165, |
| "grad_norm": 9.922858274516205, |
| "learning_rate": 2.9510384088400674e-08, |
| "loss": 0.8146, |
| "step": 2571 |
| }, |
| { |
| "epoch": 0.9250966639690675, |
| "grad_norm": 18.170136855272634, |
| "learning_rate": 2.9230044237244732e-08, |
| "loss": 0.8725, |
| "step": 2572 |
| }, |
| { |
| "epoch": 0.92545634385397, |
| "grad_norm": 10.762704827491863, |
| "learning_rate": 2.895102257073101e-08, |
| "loss": 0.7705, |
| "step": 2573 |
| }, |
| { |
| "epoch": 0.9258160237388724, |
| "grad_norm": 22.28881961766892, |
| "learning_rate": 2.86733194677361e-08, |
| "loss": 0.8354, |
| "step": 2574 |
| }, |
| { |
| "epoch": 0.9261757036237749, |
| "grad_norm": 12.67808461853552, |
| "learning_rate": 2.8396935305346036e-08, |
| "loss": 0.8081, |
| "step": 2575 |
| }, |
| { |
| "epoch": 0.9265353835086773, |
| "grad_norm": 10.51996998799239, |
| "learning_rate": 2.812187045885628e-08, |
| "loss": 0.8434, |
| "step": 2576 |
| }, |
| { |
| "epoch": 0.9268950633935797, |
| "grad_norm": 16.685006494100037, |
| "learning_rate": 2.7848125301770498e-08, |
| "loss": 0.8404, |
| "step": 2577 |
| }, |
| { |
| "epoch": 0.9272547432784821, |
| "grad_norm": 17.391189679151648, |
| "learning_rate": 2.757570020580069e-08, |
| "loss": 0.8016, |
| "step": 2578 |
| }, |
| { |
| "epoch": 0.9276144231633846, |
| "grad_norm": 12.200773941293791, |
| "learning_rate": 2.730459554086595e-08, |
| "loss": 0.8153, |
| "step": 2579 |
| }, |
| { |
| "epoch": 0.927974103048287, |
| "grad_norm": 9.017422390363933, |
| "learning_rate": 2.7034811675092806e-08, |
| "loss": 0.8482, |
| "step": 2580 |
| }, |
| { |
| "epoch": 0.9283337829331895, |
| "grad_norm": 14.335525486714841, |
| "learning_rate": 2.676634897481389e-08, |
| "loss": 0.7806, |
| "step": 2581 |
| }, |
| { |
| "epoch": 0.928693462818092, |
| "grad_norm": 10.074582781397503, |
| "learning_rate": 2.649920780456849e-08, |
| "loss": 0.8156, |
| "step": 2582 |
| }, |
| { |
| "epoch": 0.9290531427029943, |
| "grad_norm": 48.25905128199228, |
| "learning_rate": 2.6233388527100773e-08, |
| "loss": 0.8383, |
| "step": 2583 |
| }, |
| { |
| "epoch": 0.9294128225878968, |
| "grad_norm": 13.313452109675913, |
| "learning_rate": 2.5968891503360235e-08, |
| "loss": 0.7692, |
| "step": 2584 |
| }, |
| { |
| "epoch": 0.9297725024727992, |
| "grad_norm": 21.51215656478663, |
| "learning_rate": 2.570571709250069e-08, |
| "loss": 0.8186, |
| "step": 2585 |
| }, |
| { |
| "epoch": 0.9301321823577017, |
| "grad_norm": 10.877961635645919, |
| "learning_rate": 2.5443865651880615e-08, |
| "loss": 0.76, |
| "step": 2586 |
| }, |
| { |
| "epoch": 0.930491862242604, |
| "grad_norm": 9.051919932920649, |
| "learning_rate": 2.5183337537061365e-08, |
| "loss": 0.7774, |
| "step": 2587 |
| }, |
| { |
| "epoch": 0.9308515421275065, |
| "grad_norm": 15.429443230815407, |
| "learning_rate": 2.492413310180763e-08, |
| "loss": 0.8207, |
| "step": 2588 |
| }, |
| { |
| "epoch": 0.9312112220124089, |
| "grad_norm": 11.971056851995911, |
| "learning_rate": 2.4666252698086864e-08, |
| "loss": 0.7847, |
| "step": 2589 |
| }, |
| { |
| "epoch": 0.9315709018973114, |
| "grad_norm": 8.886524433373074, |
| "learning_rate": 2.4409696676068513e-08, |
| "loss": 0.7715, |
| "step": 2590 |
| }, |
| { |
| "epoch": 0.9319305817822139, |
| "grad_norm": 8.932491424444477, |
| "learning_rate": 2.415446538412358e-08, |
| "loss": 0.7326, |
| "step": 2591 |
| }, |
| { |
| "epoch": 0.9322902616671163, |
| "grad_norm": 24.838760776155556, |
| "learning_rate": 2.3900559168824498e-08, |
| "loss": 0.8226, |
| "step": 2592 |
| }, |
| { |
| "epoch": 0.9326499415520187, |
| "grad_norm": 10.184889834390296, |
| "learning_rate": 2.3647978374944032e-08, |
| "loss": 0.8067, |
| "step": 2593 |
| }, |
| { |
| "epoch": 0.9330096214369211, |
| "grad_norm": 20.39934160539424, |
| "learning_rate": 2.3396723345455726e-08, |
| "loss": 0.7883, |
| "step": 2594 |
| }, |
| { |
| "epoch": 0.9333693013218236, |
| "grad_norm": 34.16356835417824, |
| "learning_rate": 2.3146794421532556e-08, |
| "loss": 0.7415, |
| "step": 2595 |
| }, |
| { |
| "epoch": 0.933728981206726, |
| "grad_norm": 8.381718901793608, |
| "learning_rate": 2.2898191942546608e-08, |
| "loss": 0.7556, |
| "step": 2596 |
| }, |
| { |
| "epoch": 0.9340886610916285, |
| "grad_norm": 8.969461130065204, |
| "learning_rate": 2.2650916246069297e-08, |
| "loss": 0.8207, |
| "step": 2597 |
| }, |
| { |
| "epoch": 0.9344483409765308, |
| "grad_norm": 16.188778153796065, |
| "learning_rate": 2.2404967667870143e-08, |
| "loss": 0.7943, |
| "step": 2598 |
| }, |
| { |
| "epoch": 0.9348080208614333, |
| "grad_norm": 17.95530480731631, |
| "learning_rate": 2.2160346541916674e-08, |
| "loss": 0.7264, |
| "step": 2599 |
| }, |
| { |
| "epoch": 0.9351677007463357, |
| "grad_norm": 24.190170970831844, |
| "learning_rate": 2.1917053200374068e-08, |
| "loss": 0.8199, |
| "step": 2600 |
| }, |
| { |
| "epoch": 0.9355273806312382, |
| "grad_norm": 10.123026970672111, |
| "learning_rate": 2.1675087973603957e-08, |
| "loss": 0.8274, |
| "step": 2601 |
| }, |
| { |
| "epoch": 0.9358870605161407, |
| "grad_norm": 14.224219466589894, |
| "learning_rate": 2.1434451190165292e-08, |
| "loss": 0.8012, |
| "step": 2602 |
| }, |
| { |
| "epoch": 0.936246740401043, |
| "grad_norm": 10.600627557539825, |
| "learning_rate": 2.1195143176812812e-08, |
| "loss": 0.7483, |
| "step": 2603 |
| }, |
| { |
| "epoch": 0.9366064202859455, |
| "grad_norm": 13.072901545139233, |
| "learning_rate": 2.0957164258497028e-08, |
| "loss": 0.7783, |
| "step": 2604 |
| }, |
| { |
| "epoch": 0.9369661001708479, |
| "grad_norm": 17.476053436586472, |
| "learning_rate": 2.072051475836334e-08, |
| "loss": 0.7654, |
| "step": 2605 |
| }, |
| { |
| "epoch": 0.9373257800557504, |
| "grad_norm": 10.49174050370654, |
| "learning_rate": 2.0485194997752696e-08, |
| "loss": 0.8084, |
| "step": 2606 |
| }, |
| { |
| "epoch": 0.9376854599406528, |
| "grad_norm": 12.951335904589302, |
| "learning_rate": 2.0251205296199613e-08, |
| "loss": 0.7594, |
| "step": 2607 |
| }, |
| { |
| "epoch": 0.9380451398255553, |
| "grad_norm": 14.401952391066391, |
| "learning_rate": 2.0018545971433486e-08, |
| "loss": 0.8313, |
| "step": 2608 |
| }, |
| { |
| "epoch": 0.9384048197104576, |
| "grad_norm": 10.086198937724093, |
| "learning_rate": 1.978721733937605e-08, |
| "loss": 0.752, |
| "step": 2609 |
| }, |
| { |
| "epoch": 0.9387644995953601, |
| "grad_norm": 16.618758204083733, |
| "learning_rate": 1.955721971414326e-08, |
| "loss": 0.7926, |
| "step": 2610 |
| }, |
| { |
| "epoch": 0.9391241794802626, |
| "grad_norm": 11.248267278277176, |
| "learning_rate": 1.932855340804296e-08, |
| "loss": 0.7707, |
| "step": 2611 |
| }, |
| { |
| "epoch": 0.939483859365165, |
| "grad_norm": 27.38329889632666, |
| "learning_rate": 1.9101218731575776e-08, |
| "loss": 0.7865, |
| "step": 2612 |
| }, |
| { |
| "epoch": 0.9398435392500675, |
| "grad_norm": 19.212814276037737, |
| "learning_rate": 1.8875215993433448e-08, |
| "loss": 0.8443, |
| "step": 2613 |
| }, |
| { |
| "epoch": 0.9402032191349698, |
| "grad_norm": 14.275362212978525, |
| "learning_rate": 1.8650545500499936e-08, |
| "loss": 0.8491, |
| "step": 2614 |
| }, |
| { |
| "epoch": 0.9405628990198723, |
| "grad_norm": 14.319193665650705, |
| "learning_rate": 1.8427207557849434e-08, |
| "loss": 0.862, |
| "step": 2615 |
| }, |
| { |
| "epoch": 0.9409225789047747, |
| "grad_norm": 9.750968215322466, |
| "learning_rate": 1.820520246874746e-08, |
| "loss": 0.7874, |
| "step": 2616 |
| }, |
| { |
| "epoch": 0.9412822587896772, |
| "grad_norm": 10.999793069766183, |
| "learning_rate": 1.7984530534648878e-08, |
| "loss": 0.8975, |
| "step": 2617 |
| }, |
| { |
| "epoch": 0.9416419386745796, |
| "grad_norm": 9.934147693276143, |
| "learning_rate": 1.7765192055198886e-08, |
| "loss": 0.7585, |
| "step": 2618 |
| }, |
| { |
| "epoch": 0.9420016185594821, |
| "grad_norm": 7.035081749310813, |
| "learning_rate": 1.7547187328231572e-08, |
| "loss": 0.8036, |
| "step": 2619 |
| }, |
| { |
| "epoch": 0.9423612984443845, |
| "grad_norm": 14.465406847865054, |
| "learning_rate": 1.73305166497707e-08, |
| "loss": 0.7117, |
| "step": 2620 |
| }, |
| { |
| "epoch": 0.9427209783292869, |
| "grad_norm": 13.419580416553304, |
| "learning_rate": 1.711518031402748e-08, |
| "loss": 0.846, |
| "step": 2621 |
| }, |
| { |
| "epoch": 0.9430806582141894, |
| "grad_norm": 24.297195597575204, |
| "learning_rate": 1.6901178613402124e-08, |
| "loss": 0.7531, |
| "step": 2622 |
| }, |
| { |
| "epoch": 0.9434403380990918, |
| "grad_norm": 14.640786860392414, |
| "learning_rate": 1.668851183848219e-08, |
| "loss": 0.7826, |
| "step": 2623 |
| }, |
| { |
| "epoch": 0.9438000179839943, |
| "grad_norm": 17.94888635356105, |
| "learning_rate": 1.647718027804279e-08, |
| "loss": 0.7278, |
| "step": 2624 |
| }, |
| { |
| "epoch": 0.9441596978688966, |
| "grad_norm": 59.51196881712969, |
| "learning_rate": 1.6267184219045604e-08, |
| "loss": 0.8323, |
| "step": 2625 |
| }, |
| { |
| "epoch": 0.9445193777537991, |
| "grad_norm": 13.566168202037804, |
| "learning_rate": 1.6058523946639425e-08, |
| "loss": 0.845, |
| "step": 2626 |
| }, |
| { |
| "epoch": 0.9448790576387015, |
| "grad_norm": 31.437962719597333, |
| "learning_rate": 1.5851199744158607e-08, |
| "loss": 0.8412, |
| "step": 2627 |
| }, |
| { |
| "epoch": 0.945238737523604, |
| "grad_norm": 24.101411848318847, |
| "learning_rate": 1.5645211893123845e-08, |
| "loss": 0.8179, |
| "step": 2628 |
| }, |
| { |
| "epoch": 0.9455984174085065, |
| "grad_norm": 13.703323167891854, |
| "learning_rate": 1.5440560673240733e-08, |
| "loss": 0.7231, |
| "step": 2629 |
| }, |
| { |
| "epoch": 0.9459580972934089, |
| "grad_norm": 18.103807635943653, |
| "learning_rate": 1.5237246362400312e-08, |
| "loss": 0.7647, |
| "step": 2630 |
| }, |
| { |
| "epoch": 0.9463177771783113, |
| "grad_norm": 7.46822224629538, |
| "learning_rate": 1.503526923667797e-08, |
| "loss": 0.8045, |
| "step": 2631 |
| }, |
| { |
| "epoch": 0.9466774570632137, |
| "grad_norm": 23.408064292569758, |
| "learning_rate": 1.4834629570333546e-08, |
| "loss": 0.8662, |
| "step": 2632 |
| }, |
| { |
| "epoch": 0.9470371369481162, |
| "grad_norm": 34.23282671327533, |
| "learning_rate": 1.4635327635810768e-08, |
| "loss": 0.6998, |
| "step": 2633 |
| }, |
| { |
| "epoch": 0.9473968168330186, |
| "grad_norm": 45.904877302306865, |
| "learning_rate": 1.4437363703736716e-08, |
| "loss": 0.7153, |
| "step": 2634 |
| }, |
| { |
| "epoch": 0.9477564967179211, |
| "grad_norm": 13.096331272320922, |
| "learning_rate": 1.4240738042921586e-08, |
| "loss": 0.802, |
| "step": 2635 |
| }, |
| { |
| "epoch": 0.9481161766028234, |
| "grad_norm": 13.455777039167259, |
| "learning_rate": 1.4045450920358914e-08, |
| "loss": 0.8362, |
| "step": 2636 |
| }, |
| { |
| "epoch": 0.9484758564877259, |
| "grad_norm": 164.2037460425521, |
| "learning_rate": 1.385150260122403e-08, |
| "loss": 0.834, |
| "step": 2637 |
| }, |
| { |
| "epoch": 0.9488355363726284, |
| "grad_norm": 18.680018112786254, |
| "learning_rate": 1.3658893348874712e-08, |
| "loss": 0.8363, |
| "step": 2638 |
| }, |
| { |
| "epoch": 0.9491952162575308, |
| "grad_norm": 9.617601101777366, |
| "learning_rate": 1.3467623424850083e-08, |
| "loss": 0.7966, |
| "step": 2639 |
| }, |
| { |
| "epoch": 0.9495548961424333, |
| "grad_norm": 16.39249663420055, |
| "learning_rate": 1.3277693088871166e-08, |
| "loss": 0.7445, |
| "step": 2640 |
| }, |
| { |
| "epoch": 0.9499145760273356, |
| "grad_norm": 56.34814949802547, |
| "learning_rate": 1.3089102598839441e-08, |
| "loss": 0.7713, |
| "step": 2641 |
| }, |
| { |
| "epoch": 0.9502742559122381, |
| "grad_norm": 15.464516964279793, |
| "learning_rate": 1.2901852210837505e-08, |
| "loss": 0.7829, |
| "step": 2642 |
| }, |
| { |
| "epoch": 0.9506339357971405, |
| "grad_norm": 8.517435737858888, |
| "learning_rate": 1.271594217912797e-08, |
| "loss": 0.7735, |
| "step": 2643 |
| }, |
| { |
| "epoch": 0.950993615682043, |
| "grad_norm": 12.980233129690735, |
| "learning_rate": 1.2531372756153458e-08, |
| "loss": 0.7528, |
| "step": 2644 |
| }, |
| { |
| "epoch": 0.9513532955669454, |
| "grad_norm": 8.473487915051534, |
| "learning_rate": 1.2348144192536269e-08, |
| "loss": 0.9364, |
| "step": 2645 |
| }, |
| { |
| "epoch": 0.9517129754518479, |
| "grad_norm": 13.7834439599011, |
| "learning_rate": 1.216625673707794e-08, |
| "loss": 0.7419, |
| "step": 2646 |
| }, |
| { |
| "epoch": 0.9520726553367503, |
| "grad_norm": 12.823487237000318, |
| "learning_rate": 1.1985710636759128e-08, |
| "loss": 0.7881, |
| "step": 2647 |
| }, |
| { |
| "epoch": 0.9524323352216527, |
| "grad_norm": 223.59177883502042, |
| "learning_rate": 1.1806506136738614e-08, |
| "loss": 0.7553, |
| "step": 2648 |
| }, |
| { |
| "epoch": 0.9527920151065552, |
| "grad_norm": 7.401975669691004, |
| "learning_rate": 1.1628643480354083e-08, |
| "loss": 0.8268, |
| "step": 2649 |
| }, |
| { |
| "epoch": 0.9531516949914576, |
| "grad_norm": 16.516496252813177, |
| "learning_rate": 1.1452122909120788e-08, |
| "loss": 0.8058, |
| "step": 2650 |
| }, |
| { |
| "epoch": 0.9535113748763601, |
| "grad_norm": 12.520635742432743, |
| "learning_rate": 1.1276944662731658e-08, |
| "loss": 0.8203, |
| "step": 2651 |
| }, |
| { |
| "epoch": 0.9538710547612624, |
| "grad_norm": 7.273339324226177, |
| "learning_rate": 1.1103108979056862e-08, |
| "loss": 0.759, |
| "step": 2652 |
| }, |
| { |
| "epoch": 0.9542307346461649, |
| "grad_norm": 12.221006199133758, |
| "learning_rate": 1.0930616094143696e-08, |
| "loss": 0.843, |
| "step": 2653 |
| }, |
| { |
| "epoch": 0.9545904145310673, |
| "grad_norm": 10.982164990366092, |
| "learning_rate": 1.0759466242215909e-08, |
| "loss": 0.7948, |
| "step": 2654 |
| }, |
| { |
| "epoch": 0.9549500944159698, |
| "grad_norm": 34.0030866637079, |
| "learning_rate": 1.058965965567371e-08, |
| "loss": 0.7652, |
| "step": 2655 |
| }, |
| { |
| "epoch": 0.9553097743008723, |
| "grad_norm": 15.747030639242206, |
| "learning_rate": 1.0421196565093216e-08, |
| "loss": 0.8198, |
| "step": 2656 |
| }, |
| { |
| "epoch": 0.9556694541857746, |
| "grad_norm": 24.440296459219436, |
| "learning_rate": 1.0254077199226552e-08, |
| "loss": 0.8896, |
| "step": 2657 |
| }, |
| { |
| "epoch": 0.9560291340706771, |
| "grad_norm": 43.92761632601037, |
| "learning_rate": 1.0088301785000753e-08, |
| "loss": 0.7735, |
| "step": 2658 |
| }, |
| { |
| "epoch": 0.9563888139555795, |
| "grad_norm": 7.338427187913148, |
| "learning_rate": 9.923870547518309e-09, |
| "loss": 0.7591, |
| "step": 2659 |
| }, |
| { |
| "epoch": 0.956748493840482, |
| "grad_norm": 10.638046454075525, |
| "learning_rate": 9.760783710056175e-09, |
| "loss": 0.8222, |
| "step": 2660 |
| }, |
| { |
| "epoch": 0.9571081737253844, |
| "grad_norm": 8.979052132037536, |
| "learning_rate": 9.599041494066207e-09, |
| "loss": 0.8273, |
| "step": 2661 |
| }, |
| { |
| "epoch": 0.9574678536102869, |
| "grad_norm": 23.602876418783797, |
| "learning_rate": 9.438644119174055e-09, |
| "loss": 0.8427, |
| "step": 2662 |
| }, |
| { |
| "epoch": 0.9578275334951892, |
| "grad_norm": 19.131944211146887, |
| "learning_rate": 9.279591803179277e-09, |
| "loss": 0.8288, |
| "step": 2663 |
| }, |
| { |
| "epoch": 0.9581872133800917, |
| "grad_norm": 9.059579836541714, |
| "learning_rate": 9.12188476205522e-09, |
| "loss": 0.8367, |
| "step": 2664 |
| }, |
| { |
| "epoch": 0.9585468932649942, |
| "grad_norm": 11.933115901091556, |
| "learning_rate": 8.965523209948366e-09, |
| "loss": 0.7535, |
| "step": 2665 |
| }, |
| { |
| "epoch": 0.9589065731498966, |
| "grad_norm": 14.054611295402403, |
| "learning_rate": 8.810507359178321e-09, |
| "loss": 0.8375, |
| "step": 2666 |
| }, |
| { |
| "epoch": 0.9592662530347991, |
| "grad_norm": 8.834709100235033, |
| "learning_rate": 8.656837420237151e-09, |
| "loss": 0.8657, |
| "step": 2667 |
| }, |
| { |
| "epoch": 0.9596259329197014, |
| "grad_norm": 10.745709201482285, |
| "learning_rate": 8.504513601789386e-09, |
| "loss": 0.7891, |
| "step": 2668 |
| }, |
| { |
| "epoch": 0.9599856128046039, |
| "grad_norm": 7.514508704592103, |
| "learning_rate": 8.353536110672133e-09, |
| "loss": 0.7951, |
| "step": 2669 |
| }, |
| { |
| "epoch": 0.9603452926895063, |
| "grad_norm": 10.628735366005465, |
| "learning_rate": 8.203905151893731e-09, |
| "loss": 0.8347, |
| "step": 2670 |
| }, |
| { |
| "epoch": 0.9607049725744088, |
| "grad_norm": 9.665634328411453, |
| "learning_rate": 8.055620928634433e-09, |
| "loss": 0.836, |
| "step": 2671 |
| }, |
| { |
| "epoch": 0.9610646524593112, |
| "grad_norm": 11.385271385247645, |
| "learning_rate": 7.90868364224584e-09, |
| "loss": 0.784, |
| "step": 2672 |
| }, |
| { |
| "epoch": 0.9614243323442137, |
| "grad_norm": 11.77954267090191, |
| "learning_rate": 7.76309349225035e-09, |
| "loss": 0.7976, |
| "step": 2673 |
| }, |
| { |
| "epoch": 0.9617840122291161, |
| "grad_norm": 96.72406629090244, |
| "learning_rate": 7.618850676341381e-09, |
| "loss": 0.7697, |
| "step": 2674 |
| }, |
| { |
| "epoch": 0.9621436921140185, |
| "grad_norm": 19.978249531416974, |
| "learning_rate": 7.475955390382483e-09, |
| "loss": 0.7746, |
| "step": 2675 |
| }, |
| { |
| "epoch": 0.962503371998921, |
| "grad_norm": 17.644446773428314, |
| "learning_rate": 7.334407828407885e-09, |
| "loss": 0.824, |
| "step": 2676 |
| }, |
| { |
| "epoch": 0.9628630518838234, |
| "grad_norm": 8.40972118443302, |
| "learning_rate": 7.1942081826215085e-09, |
| "loss": 0.6929, |
| "step": 2677 |
| }, |
| { |
| "epoch": 0.9632227317687259, |
| "grad_norm": 17.744908237982273, |
| "learning_rate": 7.0553566433968485e-09, |
| "loss": 0.7624, |
| "step": 2678 |
| }, |
| { |
| "epoch": 0.9635824116536282, |
| "grad_norm": 15.543084120458476, |
| "learning_rate": 6.917853399277196e-09, |
| "loss": 0.8241, |
| "step": 2679 |
| }, |
| { |
| "epoch": 0.9639420915385307, |
| "grad_norm": 7.202723902595622, |
| "learning_rate": 6.781698636974531e-09, |
| "loss": 0.737, |
| "step": 2680 |
| }, |
| { |
| "epoch": 0.9643017714234331, |
| "grad_norm": 15.979553321957102, |
| "learning_rate": 6.646892541370408e-09, |
| "loss": 0.7976, |
| "step": 2681 |
| }, |
| { |
| "epoch": 0.9646614513083356, |
| "grad_norm": 45.082630303411385, |
| "learning_rate": 6.513435295514402e-09, |
| "loss": 0.774, |
| "step": 2682 |
| }, |
| { |
| "epoch": 0.9650211311932381, |
| "grad_norm": 9.931768652353941, |
| "learning_rate": 6.381327080625109e-09, |
| "loss": 0.8188, |
| "step": 2683 |
| }, |
| { |
| "epoch": 0.9653808110781404, |
| "grad_norm": 8.624359023921118, |
| "learning_rate": 6.250568076088814e-09, |
| "loss": 0.8211, |
| "step": 2684 |
| }, |
| { |
| "epoch": 0.9657404909630429, |
| "grad_norm": 15.123515096117305, |
| "learning_rate": 6.121158459460041e-09, |
| "loss": 0.8049, |
| "step": 2685 |
| }, |
| { |
| "epoch": 0.9661001708479453, |
| "grad_norm": 11.989707609085379, |
| "learning_rate": 5.9930984064608946e-09, |
| "loss": 0.7931, |
| "step": 2686 |
| }, |
| { |
| "epoch": 0.9664598507328478, |
| "grad_norm": 8.768179389271808, |
| "learning_rate": 5.866388090980945e-09, |
| "loss": 0.7865, |
| "step": 2687 |
| }, |
| { |
| "epoch": 0.9668195306177502, |
| "grad_norm": 18.247534235534676, |
| "learning_rate": 5.741027685077005e-09, |
| "loss": 0.716, |
| "step": 2688 |
| }, |
| { |
| "epoch": 0.9671792105026527, |
| "grad_norm": 13.960685103084211, |
| "learning_rate": 5.61701735897302e-09, |
| "loss": 0.8079, |
| "step": 2689 |
| }, |
| { |
| "epoch": 0.967538890387555, |
| "grad_norm": 10.893739858824933, |
| "learning_rate": 5.494357281059403e-09, |
| "loss": 0.8029, |
| "step": 2690 |
| }, |
| { |
| "epoch": 0.9678985702724575, |
| "grad_norm": 20.614764178998072, |
| "learning_rate": 5.373047617893478e-09, |
| "loss": 0.7982, |
| "step": 2691 |
| }, |
| { |
| "epoch": 0.96825825015736, |
| "grad_norm": 11.193102120432794, |
| "learning_rate": 5.253088534198258e-09, |
| "loss": 0.772, |
| "step": 2692 |
| }, |
| { |
| "epoch": 0.9686179300422624, |
| "grad_norm": 9.543764352530816, |
| "learning_rate": 5.134480192863666e-09, |
| "loss": 0.7926, |
| "step": 2693 |
| }, |
| { |
| "epoch": 0.9689776099271649, |
| "grad_norm": 10.39901130663553, |
| "learning_rate": 5.01722275494465e-09, |
| "loss": 0.8168, |
| "step": 2694 |
| }, |
| { |
| "epoch": 0.9693372898120672, |
| "grad_norm": 20.38370112308751, |
| "learning_rate": 4.9013163796626234e-09, |
| "loss": 0.8422, |
| "step": 2695 |
| }, |
| { |
| "epoch": 0.9696969696969697, |
| "grad_norm": 16.007176538189192, |
| "learning_rate": 4.78676122440369e-09, |
| "loss": 0.8267, |
| "step": 2696 |
| }, |
| { |
| "epoch": 0.9700566495818721, |
| "grad_norm": 10.072443968941924, |
| "learning_rate": 4.6735574447195335e-09, |
| "loss": 0.8246, |
| "step": 2697 |
| }, |
| { |
| "epoch": 0.9704163294667746, |
| "grad_norm": 14.511353166731903, |
| "learning_rate": 4.561705194326748e-09, |
| "loss": 0.843, |
| "step": 2698 |
| }, |
| { |
| "epoch": 0.970776009351677, |
| "grad_norm": 13.514019727821859, |
| "learning_rate": 4.4512046251070636e-09, |
| "loss": 0.803, |
| "step": 2699 |
| }, |
| { |
| "epoch": 0.9711356892365794, |
| "grad_norm": 7.232606061135063, |
| "learning_rate": 4.342055887106011e-09, |
| "loss": 0.7693, |
| "step": 2700 |
| }, |
| { |
| "epoch": 0.9714953691214819, |
| "grad_norm": 8.87084975584444, |
| "learning_rate": 4.234259128534368e-09, |
| "loss": 0.7394, |
| "step": 2701 |
| }, |
| { |
| "epoch": 0.9718550490063843, |
| "grad_norm": 10.156862890036118, |
| "learning_rate": 4.12781449576638e-09, |
| "loss": 0.8209, |
| "step": 2702 |
| }, |
| { |
| "epoch": 0.9722147288912868, |
| "grad_norm": 9.317641827650974, |
| "learning_rate": 4.022722133340873e-09, |
| "loss": 0.7536, |
| "step": 2703 |
| }, |
| { |
| "epoch": 0.9725744087761892, |
| "grad_norm": 16.558863139113416, |
| "learning_rate": 3.918982183960029e-09, |
| "loss": 0.6953, |
| "step": 2704 |
| }, |
| { |
| "epoch": 0.9729340886610917, |
| "grad_norm": 14.54925389895523, |
| "learning_rate": 3.816594788489835e-09, |
| "loss": 0.7382, |
| "step": 2705 |
| }, |
| { |
| "epoch": 0.973293768545994, |
| "grad_norm": 10.300577211253348, |
| "learning_rate": 3.715560085959524e-09, |
| "loss": 0.7824, |
| "step": 2706 |
| }, |
| { |
| "epoch": 0.9736534484308965, |
| "grad_norm": 49.51727220703054, |
| "learning_rate": 3.615878213561796e-09, |
| "loss": 0.7771, |
| "step": 2707 |
| }, |
| { |
| "epoch": 0.9740131283157989, |
| "grad_norm": 19.386477701053455, |
| "learning_rate": 3.5175493066521566e-09, |
| "loss": 0.7306, |
| "step": 2708 |
| }, |
| { |
| "epoch": 0.9743728082007014, |
| "grad_norm": 10.11442472138445, |
| "learning_rate": 3.4205734987488022e-09, |
| "loss": 0.8273, |
| "step": 2709 |
| }, |
| { |
| "epoch": 0.9747324880856039, |
| "grad_norm": 9.448581140496838, |
| "learning_rate": 3.3249509215330653e-09, |
| "loss": 0.7171, |
| "step": 2710 |
| }, |
| { |
| "epoch": 0.9750921679705062, |
| "grad_norm": 10.047085026998909, |
| "learning_rate": 3.230681704848415e-09, |
| "loss": 0.7713, |
| "step": 2711 |
| }, |
| { |
| "epoch": 0.9754518478554087, |
| "grad_norm": 28.940520628953887, |
| "learning_rate": 3.137765976700679e-09, |
| "loss": 0.7832, |
| "step": 2712 |
| }, |
| { |
| "epoch": 0.9758115277403111, |
| "grad_norm": 9.672876193912304, |
| "learning_rate": 3.0462038632577126e-09, |
| "loss": 0.8147, |
| "step": 2713 |
| }, |
| { |
| "epoch": 0.9761712076252136, |
| "grad_norm": 21.468780176500793, |
| "learning_rate": 2.9559954888497273e-09, |
| "loss": 0.8169, |
| "step": 2714 |
| }, |
| { |
| "epoch": 0.976530887510116, |
| "grad_norm": 8.441217808841902, |
| "learning_rate": 2.867140975968185e-09, |
| "loss": 0.7538, |
| "step": 2715 |
| }, |
| { |
| "epoch": 0.9768905673950184, |
| "grad_norm": 23.627273043331684, |
| "learning_rate": 2.7796404452666842e-09, |
| "loss": 0.8236, |
| "step": 2716 |
| }, |
| { |
| "epoch": 0.9772502472799208, |
| "grad_norm": 14.339270513718278, |
| "learning_rate": 2.6934940155598497e-09, |
| "loss": 0.783, |
| "step": 2717 |
| }, |
| { |
| "epoch": 0.9776099271648233, |
| "grad_norm": 42.18713808719393, |
| "learning_rate": 2.608701803823998e-09, |
| "loss": 0.8042, |
| "step": 2718 |
| }, |
| { |
| "epoch": 0.9779696070497258, |
| "grad_norm": 58.728909365298485, |
| "learning_rate": 2.5252639251961415e-09, |
| "loss": 0.7737, |
| "step": 2719 |
| }, |
| { |
| "epoch": 0.9783292869346282, |
| "grad_norm": 47.30809751767778, |
| "learning_rate": 2.4431804929746504e-09, |
| "loss": 0.7958, |
| "step": 2720 |
| }, |
| { |
| "epoch": 0.9786889668195307, |
| "grad_norm": 9.998818970646738, |
| "learning_rate": 2.3624516186186994e-09, |
| "loss": 0.7902, |
| "step": 2721 |
| }, |
| { |
| "epoch": 0.979048646704433, |
| "grad_norm": 54.8025412208281, |
| "learning_rate": 2.2830774117478245e-09, |
| "loss": 0.7751, |
| "step": 2722 |
| }, |
| { |
| "epoch": 0.9794083265893355, |
| "grad_norm": 9.736425047874677, |
| "learning_rate": 2.2050579801424772e-09, |
| "loss": 0.7783, |
| "step": 2723 |
| }, |
| { |
| "epoch": 0.9797680064742379, |
| "grad_norm": 15.104438925217062, |
| "learning_rate": 2.128393429743247e-09, |
| "loss": 0.8454, |
| "step": 2724 |
| }, |
| { |
| "epoch": 0.9801276863591404, |
| "grad_norm": 34.20483043624472, |
| "learning_rate": 2.053083864651084e-09, |
| "loss": 0.8331, |
| "step": 2725 |
| }, |
| { |
| "epoch": 0.9804873662440428, |
| "grad_norm": 9.107948696828554, |
| "learning_rate": 1.9791293871269655e-09, |
| "loss": 0.7865, |
| "step": 2726 |
| }, |
| { |
| "epoch": 0.9808470461289452, |
| "grad_norm": 10.612138564329928, |
| "learning_rate": 1.9065300975917853e-09, |
| "loss": 0.7857, |
| "step": 2727 |
| }, |
| { |
| "epoch": 0.9812067260138477, |
| "grad_norm": 12.557314133584349, |
| "learning_rate": 1.8352860946265758e-09, |
| "loss": 0.8363, |
| "step": 2728 |
| }, |
| { |
| "epoch": 0.9815664058987501, |
| "grad_norm": 21.292533606395093, |
| "learning_rate": 1.7653974749715083e-09, |
| "loss": 0.7834, |
| "step": 2729 |
| }, |
| { |
| "epoch": 0.9819260857836526, |
| "grad_norm": 10.652717987422117, |
| "learning_rate": 1.696864333526893e-09, |
| "loss": 0.8064, |
| "step": 2730 |
| }, |
| { |
| "epoch": 0.982285765668555, |
| "grad_norm": 14.96513417679561, |
| "learning_rate": 1.6296867633519561e-09, |
| "loss": 0.8403, |
| "step": 2731 |
| }, |
| { |
| "epoch": 0.9826454455534575, |
| "grad_norm": 10.511363408813793, |
| "learning_rate": 1.5638648556656199e-09, |
| "loss": 0.8613, |
| "step": 2732 |
| }, |
| { |
| "epoch": 0.9830051254383598, |
| "grad_norm": 16.133962245000316, |
| "learning_rate": 1.499398699845722e-09, |
| "loss": 0.8369, |
| "step": 2733 |
| }, |
| { |
| "epoch": 0.9833648053232623, |
| "grad_norm": 12.810716009645525, |
| "learning_rate": 1.4362883834294625e-09, |
| "loss": 0.7847, |
| "step": 2734 |
| }, |
| { |
| "epoch": 0.9837244852081647, |
| "grad_norm": 13.472511295730412, |
| "learning_rate": 1.3745339921126253e-09, |
| "loss": 0.7475, |
| "step": 2735 |
| }, |
| { |
| "epoch": 0.9840841650930672, |
| "grad_norm": 15.788414497991974, |
| "learning_rate": 1.3141356097500222e-09, |
| "loss": 0.8171, |
| "step": 2736 |
| }, |
| { |
| "epoch": 0.9844438449779697, |
| "grad_norm": 110.75185154489911, |
| "learning_rate": 1.2550933183550494e-09, |
| "loss": 0.8388, |
| "step": 2737 |
| }, |
| { |
| "epoch": 0.984803524862872, |
| "grad_norm": 20.327737292045065, |
| "learning_rate": 1.1974071980999089e-09, |
| "loss": 0.8154, |
| "step": 2738 |
| }, |
| { |
| "epoch": 0.9851632047477745, |
| "grad_norm": 13.453153627263834, |
| "learning_rate": 1.1410773273151653e-09, |
| "loss": 0.7984, |
| "step": 2739 |
| }, |
| { |
| "epoch": 0.9855228846326769, |
| "grad_norm": 25.942566003142183, |
| "learning_rate": 1.0861037824896336e-09, |
| "loss": 0.8358, |
| "step": 2740 |
| }, |
| { |
| "epoch": 0.9858825645175794, |
| "grad_norm": 10.841796001652206, |
| "learning_rate": 1.0324866382707131e-09, |
| "loss": 0.7878, |
| "step": 2741 |
| }, |
| { |
| "epoch": 0.9862422444024818, |
| "grad_norm": 20.25552578544338, |
| "learning_rate": 9.802259674637214e-10, |
| "loss": 0.791, |
| "step": 2742 |
| }, |
| { |
| "epoch": 0.9866019242873842, |
| "grad_norm": 9.384407310708053, |
| "learning_rate": 9.293218410320047e-10, |
| "loss": 0.8044, |
| "step": 2743 |
| }, |
| { |
| "epoch": 0.9869616041722866, |
| "grad_norm": 10.552914433769093, |
| "learning_rate": 8.797743280972714e-10, |
| "loss": 0.7534, |
| "step": 2744 |
| }, |
| { |
| "epoch": 0.9873212840571891, |
| "grad_norm": 9.132184340166688, |
| "learning_rate": 8.315834959385925e-10, |
| "loss": 0.7398, |
| "step": 2745 |
| }, |
| { |
| "epoch": 0.9876809639420915, |
| "grad_norm": 10.817060944381932, |
| "learning_rate": 7.847494099934015e-10, |
| "loss": 0.7376, |
| "step": 2746 |
| }, |
| { |
| "epoch": 0.988040643826994, |
| "grad_norm": 7.404812161515181, |
| "learning_rate": 7.392721338563834e-10, |
| "loss": 0.7866, |
| "step": 2747 |
| }, |
| { |
| "epoch": 0.9884003237118965, |
| "grad_norm": 13.024550676718645, |
| "learning_rate": 6.951517292800301e-10, |
| "loss": 0.7846, |
| "step": 2748 |
| }, |
| { |
| "epoch": 0.9887600035967988, |
| "grad_norm": 9.171386738230428, |
| "learning_rate": 6.523882561744188e-10, |
| "loss": 0.8258, |
| "step": 2749 |
| }, |
| { |
| "epoch": 0.9891196834817013, |
| "grad_norm": 8.571326039535148, |
| "learning_rate": 6.109817726068778e-10, |
| "loss": 0.7831, |
| "step": 2750 |
| }, |
| { |
| "epoch": 0.9894793633666037, |
| "grad_norm": 14.004779110089878, |
| "learning_rate": 5.70932334802432e-10, |
| "loss": 0.799, |
| "step": 2751 |
| }, |
| { |
| "epoch": 0.9898390432515062, |
| "grad_norm": 12.068703242749297, |
| "learning_rate": 5.322399971431357e-10, |
| "loss": 0.7628, |
| "step": 2752 |
| }, |
| { |
| "epoch": 0.9901987231364086, |
| "grad_norm": 30.981918985778567, |
| "learning_rate": 4.949048121682953e-10, |
| "loss": 0.8533, |
| "step": 2753 |
| }, |
| { |
| "epoch": 0.990558403021311, |
| "grad_norm": 11.118356722937763, |
| "learning_rate": 4.589268305745797e-10, |
| "loss": 0.7183, |
| "step": 2754 |
| }, |
| { |
| "epoch": 0.9909180829062134, |
| "grad_norm": 10.271196213470246, |
| "learning_rate": 4.2430610121546586e-10, |
| "loss": 0.7859, |
| "step": 2755 |
| }, |
| { |
| "epoch": 0.9912777627911159, |
| "grad_norm": 6.525791514753714, |
| "learning_rate": 3.9104267110168234e-10, |
| "loss": 0.8111, |
| "step": 2756 |
| }, |
| { |
| "epoch": 0.9916374426760184, |
| "grad_norm": 20.043156103334308, |
| "learning_rate": 3.5913658540087654e-10, |
| "loss": 0.853, |
| "step": 2757 |
| }, |
| { |
| "epoch": 0.9919971225609208, |
| "grad_norm": 69.46079333495119, |
| "learning_rate": 3.285878874373926e-10, |
| "loss": 0.7862, |
| "step": 2758 |
| }, |
| { |
| "epoch": 0.9923568024458232, |
| "grad_norm": 9.504945412841762, |
| "learning_rate": 2.9939661869260445e-10, |
| "loss": 0.8189, |
| "step": 2759 |
| }, |
| { |
| "epoch": 0.9927164823307256, |
| "grad_norm": 13.469627197959296, |
| "learning_rate": 2.7156281880469366e-10, |
| "loss": 0.7905, |
| "step": 2760 |
| }, |
| { |
| "epoch": 0.9930761622156281, |
| "grad_norm": 13.210194487635937, |
| "learning_rate": 2.4508652556842756e-10, |
| "loss": 0.7695, |
| "step": 2761 |
| }, |
| { |
| "epoch": 0.9934358421005305, |
| "grad_norm": 8.594677550896781, |
| "learning_rate": 2.199677749352702e-10, |
| "loss": 0.7899, |
| "step": 2762 |
| }, |
| { |
| "epoch": 0.993795521985433, |
| "grad_norm": 30.38091428233754, |
| "learning_rate": 1.962066010134933e-10, |
| "loss": 0.7273, |
| "step": 2763 |
| }, |
| { |
| "epoch": 0.9941552018703353, |
| "grad_norm": 18.508392049584348, |
| "learning_rate": 1.7380303606773227e-10, |
| "loss": 0.848, |
| "step": 2764 |
| }, |
| { |
| "epoch": 0.9945148817552378, |
| "grad_norm": 10.467248005948727, |
| "learning_rate": 1.5275711051909724e-10, |
| "loss": 0.7759, |
| "step": 2765 |
| }, |
| { |
| "epoch": 0.9948745616401403, |
| "grad_norm": 10.146614594409671, |
| "learning_rate": 1.3306885294561697e-10, |
| "loss": 0.7582, |
| "step": 2766 |
| }, |
| { |
| "epoch": 0.9952342415250427, |
| "grad_norm": 8.67161970446411, |
| "learning_rate": 1.1473829008123992e-10, |
| "loss": 0.7286, |
| "step": 2767 |
| }, |
| { |
| "epoch": 0.9955939214099452, |
| "grad_norm": 9.589313135228506, |
| "learning_rate": 9.776544681672216e-11, |
| "loss": 0.7611, |
| "step": 2768 |
| }, |
| { |
| "epoch": 0.9959536012948476, |
| "grad_norm": 9.980806412038273, |
| "learning_rate": 8.215034619907247e-11, |
| "loss": 0.7889, |
| "step": 2769 |
| }, |
| { |
| "epoch": 0.99631328117975, |
| "grad_norm": 21.0842961965178, |
| "learning_rate": 6.789300943155218e-11, |
| "loss": 0.8279, |
| "step": 2770 |
| }, |
| { |
| "epoch": 0.9966729610646524, |
| "grad_norm": 28.22526942472408, |
| "learning_rate": 5.499345587389736e-11, |
| "loss": 0.8025, |
| "step": 2771 |
| }, |
| { |
| "epoch": 0.9970326409495549, |
| "grad_norm": 41.860819728538175, |
| "learning_rate": 4.345170304220769e-11, |
| "loss": 0.818, |
| "step": 2772 |
| }, |
| { |
| "epoch": 0.9973923208344573, |
| "grad_norm": 21.07573778196831, |
| "learning_rate": 3.326776660850239e-11, |
| "loss": 0.8747, |
| "step": 2773 |
| }, |
| { |
| "epoch": 0.9977520007193598, |
| "grad_norm": 15.028474676120968, |
| "learning_rate": 2.4441660401608444e-11, |
| "loss": 0.8014, |
| "step": 2774 |
| }, |
| { |
| "epoch": 0.9981116806042623, |
| "grad_norm": 15.303493993823228, |
| "learning_rate": 1.6973396405939312e-11, |
| "loss": 0.7715, |
| "step": 2775 |
| }, |
| { |
| "epoch": 0.9984713604891646, |
| "grad_norm": 12.09864455306113, |
| "learning_rate": 1.0862984762716187e-11, |
| "loss": 0.8467, |
| "step": 2776 |
| }, |
| { |
| "epoch": 0.9988310403740671, |
| "grad_norm": 8.12788452017707, |
| "learning_rate": 6.110433769079826e-12, |
| "loss": 0.8188, |
| "step": 2777 |
| }, |
| { |
| "epoch": 0.9991907202589695, |
| "grad_norm": 9.770055403147271, |
| "learning_rate": 2.715749878312579e-12, |
| "loss": 0.8117, |
| "step": 2778 |
| }, |
| { |
| "epoch": 0.999550400143872, |
| "grad_norm": 8.763906114496235, |
| "learning_rate": 6.789377000604446e-13, |
| "loss": 0.7695, |
| "step": 2779 |
| }, |
| { |
| "epoch": 0.9999100800287744, |
| "grad_norm": 36.349351007850856, |
| "learning_rate": 0.0, |
| "loss": 0.7575, |
| "step": 2780 |
| }, |
| { |
| "epoch": 0.9999100800287744, |
| "step": 2780, |
| "total_flos": 807392659931136.0, |
| "train_loss": 0.8599558137303633, |
| "train_runtime": 15742.0767, |
| "train_samples_per_second": 22.606, |
| "train_steps_per_second": 0.177 |
| } |
| ], |
| "logging_steps": 1.0, |
| "max_steps": 2780, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1, |
| "save_steps": 10000, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 807392659931136.0, |
| "train_batch_size": 8, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|