| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 0.9996983408748115, |
| "eval_steps": 208, |
| "global_step": 1657, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.0006033182503770739, |
| "grad_norm": 2.140331984013226, |
| "learning_rate": 0.0, |
| "loss": 2.4164, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.0006033182503770739, |
| "eval_loss": 2.440932273864746, |
| "eval_runtime": 21.8578, |
| "eval_samples_per_second": 4.026, |
| "eval_steps_per_second": 0.503, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.0012066365007541479, |
| "grad_norm": 2.131881596246372, |
| "learning_rate": 5.000000000000001e-07, |
| "loss": 2.3859, |
| "step": 2 |
| }, |
| { |
| "epoch": 0.0018099547511312218, |
| "grad_norm": 2.0329130314081363, |
| "learning_rate": 1.0000000000000002e-06, |
| "loss": 2.4842, |
| "step": 3 |
| }, |
| { |
| "epoch": 0.0024132730015082957, |
| "grad_norm": 2.0136673779808563, |
| "learning_rate": 1.5e-06, |
| "loss": 2.2674, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.0030165912518853697, |
| "grad_norm": 2.301421361800185, |
| "learning_rate": 2.0000000000000003e-06, |
| "loss": 2.4779, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.0036199095022624436, |
| "grad_norm": 2.1048944577756146, |
| "learning_rate": 2.5e-06, |
| "loss": 2.4403, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.004223227752639517, |
| "grad_norm": 2.2857994912312045, |
| "learning_rate": 3e-06, |
| "loss": 2.3985, |
| "step": 7 |
| }, |
| { |
| "epoch": 0.0048265460030165915, |
| "grad_norm": 1.9373222563858792, |
| "learning_rate": 3.5e-06, |
| "loss": 2.4469, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.005429864253393665, |
| "grad_norm": 1.8828732336223377, |
| "learning_rate": 4.000000000000001e-06, |
| "loss": 2.4803, |
| "step": 9 |
| }, |
| { |
| "epoch": 0.006033182503770739, |
| "grad_norm": 1.770391428824802, |
| "learning_rate": 4.5e-06, |
| "loss": 2.4667, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.006636500754147813, |
| "grad_norm": 1.7040163364876169, |
| "learning_rate": 5e-06, |
| "loss": 2.3847, |
| "step": 11 |
| }, |
| { |
| "epoch": 0.007239819004524887, |
| "grad_norm": 1.6348390447246268, |
| "learning_rate": 5.500000000000001e-06, |
| "loss": 2.4091, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.00784313725490196, |
| "grad_norm": 1.8132153740408123, |
| "learning_rate": 6e-06, |
| "loss": 2.3761, |
| "step": 13 |
| }, |
| { |
| "epoch": 0.008446455505279034, |
| "grad_norm": 1.7688677203718561, |
| "learning_rate": 6.5000000000000004e-06, |
| "loss": 2.3668, |
| "step": 14 |
| }, |
| { |
| "epoch": 0.00904977375565611, |
| "grad_norm": 2.012566995500888, |
| "learning_rate": 7e-06, |
| "loss": 2.541, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.009653092006033183, |
| "grad_norm": 2.344552551150623, |
| "learning_rate": 7.500000000000001e-06, |
| "loss": 2.4033, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.010256410256410256, |
| "grad_norm": 2.239204038709957, |
| "learning_rate": 8.000000000000001e-06, |
| "loss": 2.4575, |
| "step": 17 |
| }, |
| { |
| "epoch": 0.01085972850678733, |
| "grad_norm": 1.7688865688548743, |
| "learning_rate": 8.5e-06, |
| "loss": 2.3797, |
| "step": 18 |
| }, |
| { |
| "epoch": 0.011463046757164403, |
| "grad_norm": 1.6318435610254944, |
| "learning_rate": 9e-06, |
| "loss": 2.4044, |
| "step": 19 |
| }, |
| { |
| "epoch": 0.012066365007541479, |
| "grad_norm": 1.8510626760551105, |
| "learning_rate": 9.5e-06, |
| "loss": 2.4719, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.012669683257918552, |
| "grad_norm": 1.7111956301378386, |
| "learning_rate": 1e-05, |
| "loss": 2.3634, |
| "step": 21 |
| }, |
| { |
| "epoch": 0.013273001508295626, |
| "grad_norm": 1.9210730110130552, |
| "learning_rate": 1.0500000000000001e-05, |
| "loss": 2.4445, |
| "step": 22 |
| }, |
| { |
| "epoch": 0.013876319758672699, |
| "grad_norm": 1.817913869449993, |
| "learning_rate": 1.1000000000000001e-05, |
| "loss": 2.4591, |
| "step": 23 |
| }, |
| { |
| "epoch": 0.014479638009049774, |
| "grad_norm": 1.870987381889623, |
| "learning_rate": 1.15e-05, |
| "loss": 2.4188, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.015082956259426848, |
| "grad_norm": 1.7931111751340785, |
| "learning_rate": 1.2e-05, |
| "loss": 2.3321, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.01568627450980392, |
| "grad_norm": 1.8300547920846246, |
| "learning_rate": 1.25e-05, |
| "loss": 2.5083, |
| "step": 26 |
| }, |
| { |
| "epoch": 0.016289592760180997, |
| "grad_norm": 1.886339441644327, |
| "learning_rate": 1.3000000000000001e-05, |
| "loss": 2.4205, |
| "step": 27 |
| }, |
| { |
| "epoch": 0.01689291101055807, |
| "grad_norm": 1.672397817180637, |
| "learning_rate": 1.3500000000000001e-05, |
| "loss": 2.3449, |
| "step": 28 |
| }, |
| { |
| "epoch": 0.017496229260935144, |
| "grad_norm": 1.701493800333666, |
| "learning_rate": 1.4e-05, |
| "loss": 2.3802, |
| "step": 29 |
| }, |
| { |
| "epoch": 0.01809954751131222, |
| "grad_norm": 1.7101969275215947, |
| "learning_rate": 1.45e-05, |
| "loss": 2.4433, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.01870286576168929, |
| "grad_norm": 1.622468433014236, |
| "learning_rate": 1.5000000000000002e-05, |
| "loss": 2.3849, |
| "step": 31 |
| }, |
| { |
| "epoch": 0.019306184012066366, |
| "grad_norm": 1.7244402516216317, |
| "learning_rate": 1.55e-05, |
| "loss": 2.3494, |
| "step": 32 |
| }, |
| { |
| "epoch": 0.019909502262443438, |
| "grad_norm": 1.6968966331071, |
| "learning_rate": 1.6000000000000003e-05, |
| "loss": 2.3825, |
| "step": 33 |
| }, |
| { |
| "epoch": 0.020512820512820513, |
| "grad_norm": 1.7580609504607645, |
| "learning_rate": 1.65e-05, |
| "loss": 2.4379, |
| "step": 34 |
| }, |
| { |
| "epoch": 0.021116138763197588, |
| "grad_norm": 1.764259902209323, |
| "learning_rate": 1.7e-05, |
| "loss": 2.4232, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.02171945701357466, |
| "grad_norm": 1.8164208533771704, |
| "learning_rate": 1.7500000000000002e-05, |
| "loss": 2.364, |
| "step": 36 |
| }, |
| { |
| "epoch": 0.022322775263951735, |
| "grad_norm": 1.689659414180739, |
| "learning_rate": 1.8e-05, |
| "loss": 2.5181, |
| "step": 37 |
| }, |
| { |
| "epoch": 0.022926093514328807, |
| "grad_norm": 1.7385790082200887, |
| "learning_rate": 1.8500000000000002e-05, |
| "loss": 2.4695, |
| "step": 38 |
| }, |
| { |
| "epoch": 0.023529411764705882, |
| "grad_norm": 1.635856548315033, |
| "learning_rate": 1.9e-05, |
| "loss": 2.3484, |
| "step": 39 |
| }, |
| { |
| "epoch": 0.024132730015082957, |
| "grad_norm": 1.7607010990183498, |
| "learning_rate": 1.95e-05, |
| "loss": 2.4922, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.02473604826546003, |
| "grad_norm": 1.6736592370180061, |
| "learning_rate": 2e-05, |
| "loss": 2.3143, |
| "step": 41 |
| }, |
| { |
| "epoch": 0.025339366515837104, |
| "grad_norm": 1.7437431992559804, |
| "learning_rate": 1.999998112662482e-05, |
| "loss": 2.36, |
| "step": 42 |
| }, |
| { |
| "epoch": 0.02594268476621418, |
| "grad_norm": 1.7788761951573173, |
| "learning_rate": 1.999992450657051e-05, |
| "loss": 2.3651, |
| "step": 43 |
| }, |
| { |
| "epoch": 0.02654600301659125, |
| "grad_norm": 1.74387066132697, |
| "learning_rate": 1.9999830140050802e-05, |
| "loss": 2.3465, |
| "step": 44 |
| }, |
| { |
| "epoch": 0.027149321266968326, |
| "grad_norm": 1.7474556357496291, |
| "learning_rate": 1.9999698027421894e-05, |
| "loss": 2.3534, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.027752639517345398, |
| "grad_norm": 1.7289399191537893, |
| "learning_rate": 1.9999528169182472e-05, |
| "loss": 2.3961, |
| "step": 46 |
| }, |
| { |
| "epoch": 0.028355957767722473, |
| "grad_norm": 1.7709476810067486, |
| "learning_rate": 1.999932056597369e-05, |
| "loss": 2.364, |
| "step": 47 |
| }, |
| { |
| "epoch": 0.02895927601809955, |
| "grad_norm": 1.7002848185604087, |
| "learning_rate": 1.9999075218579184e-05, |
| "loss": 2.4127, |
| "step": 48 |
| }, |
| { |
| "epoch": 0.02956259426847662, |
| "grad_norm": 1.6668830763317524, |
| "learning_rate": 1.9998792127925066e-05, |
| "loss": 2.4382, |
| "step": 49 |
| }, |
| { |
| "epoch": 0.030165912518853696, |
| "grad_norm": 1.8216191873465934, |
| "learning_rate": 1.9998471295079908e-05, |
| "loss": 2.4013, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.03076923076923077, |
| "grad_norm": 1.7185319302782023, |
| "learning_rate": 1.999811272125474e-05, |
| "loss": 2.4387, |
| "step": 51 |
| }, |
| { |
| "epoch": 0.03137254901960784, |
| "grad_norm": 1.7446573922589135, |
| "learning_rate": 1.999771640780308e-05, |
| "loss": 2.4049, |
| "step": 52 |
| }, |
| { |
| "epoch": 0.031975867269984914, |
| "grad_norm": 1.7533551196856423, |
| "learning_rate": 1.999728235622087e-05, |
| "loss": 2.383, |
| "step": 53 |
| }, |
| { |
| "epoch": 0.03257918552036199, |
| "grad_norm": 2.4428720219194875, |
| "learning_rate": 1.999681056814652e-05, |
| "loss": 2.3984, |
| "step": 54 |
| }, |
| { |
| "epoch": 0.033182503770739065, |
| "grad_norm": 1.9117067623356128, |
| "learning_rate": 1.9996301045360874e-05, |
| "loss": 2.3987, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.03378582202111614, |
| "grad_norm": 1.6871395703400287, |
| "learning_rate": 1.9995753789787212e-05, |
| "loss": 2.4025, |
| "step": 56 |
| }, |
| { |
| "epoch": 0.034389140271493215, |
| "grad_norm": 1.806353695603544, |
| "learning_rate": 1.9995168803491246e-05, |
| "loss": 2.285, |
| "step": 57 |
| }, |
| { |
| "epoch": 0.03499245852187029, |
| "grad_norm": 1.7210953438365815, |
| "learning_rate": 1.9994546088681116e-05, |
| "loss": 2.4058, |
| "step": 58 |
| }, |
| { |
| "epoch": 0.03559577677224736, |
| "grad_norm": 1.6710134197503792, |
| "learning_rate": 1.9993885647707363e-05, |
| "loss": 2.4324, |
| "step": 59 |
| }, |
| { |
| "epoch": 0.03619909502262444, |
| "grad_norm": 1.6682273145402082, |
| "learning_rate": 1.9993187483062935e-05, |
| "loss": 2.4344, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.03680241327300151, |
| "grad_norm": 1.9225767969813456, |
| "learning_rate": 1.999245159738318e-05, |
| "loss": 2.4027, |
| "step": 61 |
| }, |
| { |
| "epoch": 0.03740573152337858, |
| "grad_norm": 1.7589628857228554, |
| "learning_rate": 1.9991677993445832e-05, |
| "loss": 2.4609, |
| "step": 62 |
| }, |
| { |
| "epoch": 0.03800904977375565, |
| "grad_norm": 1.9221108803705138, |
| "learning_rate": 1.9990866674170984e-05, |
| "loss": 2.3392, |
| "step": 63 |
| }, |
| { |
| "epoch": 0.03861236802413273, |
| "grad_norm": 1.9187569364342647, |
| "learning_rate": 1.999001764262111e-05, |
| "loss": 2.3234, |
| "step": 64 |
| }, |
| { |
| "epoch": 0.0392156862745098, |
| "grad_norm": 1.731376317705545, |
| "learning_rate": 1.9989130902001025e-05, |
| "loss": 2.344, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.039819004524886875, |
| "grad_norm": 1.6530803542103498, |
| "learning_rate": 1.9988206455657887e-05, |
| "loss": 2.4121, |
| "step": 66 |
| }, |
| { |
| "epoch": 0.040422322775263954, |
| "grad_norm": 1.8599525275677855, |
| "learning_rate": 1.9987244307081184e-05, |
| "loss": 2.4073, |
| "step": 67 |
| }, |
| { |
| "epoch": 0.041025641025641026, |
| "grad_norm": 1.871690372257642, |
| "learning_rate": 1.998624445990271e-05, |
| "loss": 2.3929, |
| "step": 68 |
| }, |
| { |
| "epoch": 0.0416289592760181, |
| "grad_norm": 1.9038530342428834, |
| "learning_rate": 1.9985206917896563e-05, |
| "loss": 2.4434, |
| "step": 69 |
| }, |
| { |
| "epoch": 0.042232277526395176, |
| "grad_norm": 1.7143239850083882, |
| "learning_rate": 1.9984131684979134e-05, |
| "loss": 2.4668, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.04283559577677225, |
| "grad_norm": 1.8822886351415713, |
| "learning_rate": 1.9983018765209067e-05, |
| "loss": 2.4079, |
| "step": 71 |
| }, |
| { |
| "epoch": 0.04343891402714932, |
| "grad_norm": 1.8419494425726253, |
| "learning_rate": 1.9981868162787283e-05, |
| "loss": 2.3937, |
| "step": 72 |
| }, |
| { |
| "epoch": 0.0440422322775264, |
| "grad_norm": 1.710334016900471, |
| "learning_rate": 1.9980679882056925e-05, |
| "loss": 2.3719, |
| "step": 73 |
| }, |
| { |
| "epoch": 0.04464555052790347, |
| "grad_norm": 1.7409130841301985, |
| "learning_rate": 1.9979453927503366e-05, |
| "loss": 2.5024, |
| "step": 74 |
| }, |
| { |
| "epoch": 0.04524886877828054, |
| "grad_norm": 1.9118707931356833, |
| "learning_rate": 1.997819030375419e-05, |
| "loss": 2.3466, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.045852187028657614, |
| "grad_norm": 1.9263333015328319, |
| "learning_rate": 1.9976889015579167e-05, |
| "loss": 2.397, |
| "step": 76 |
| }, |
| { |
| "epoch": 0.04645550527903469, |
| "grad_norm": 1.9416725485562814, |
| "learning_rate": 1.997555006789023e-05, |
| "loss": 2.4406, |
| "step": 77 |
| }, |
| { |
| "epoch": 0.047058823529411764, |
| "grad_norm": 1.8059449377845525, |
| "learning_rate": 1.997417346574148e-05, |
| "loss": 2.3462, |
| "step": 78 |
| }, |
| { |
| "epoch": 0.047662141779788836, |
| "grad_norm": 2.182819038622641, |
| "learning_rate": 1.9972759214329142e-05, |
| "loss": 2.3722, |
| "step": 79 |
| }, |
| { |
| "epoch": 0.048265460030165915, |
| "grad_norm": 2.134570927089691, |
| "learning_rate": 1.9971307318991546e-05, |
| "loss": 2.3632, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.048868778280542986, |
| "grad_norm": 2.081827313081015, |
| "learning_rate": 1.9969817785209137e-05, |
| "loss": 2.3737, |
| "step": 81 |
| }, |
| { |
| "epoch": 0.04947209653092006, |
| "grad_norm": 1.7992613052147577, |
| "learning_rate": 1.9968290618604413e-05, |
| "loss": 2.4025, |
| "step": 82 |
| }, |
| { |
| "epoch": 0.05007541478129714, |
| "grad_norm": 1.7171671224778233, |
| "learning_rate": 1.9966725824941933e-05, |
| "loss": 2.4723, |
| "step": 83 |
| }, |
| { |
| "epoch": 0.05067873303167421, |
| "grad_norm": 1.705969280625204, |
| "learning_rate": 1.9965123410128287e-05, |
| "loss": 2.3856, |
| "step": 84 |
| }, |
| { |
| "epoch": 0.05128205128205128, |
| "grad_norm": 1.9104369520615112, |
| "learning_rate": 1.996348338021207e-05, |
| "loss": 2.4186, |
| "step": 85 |
| }, |
| { |
| "epoch": 0.05188536953242836, |
| "grad_norm": 2.042807057903928, |
| "learning_rate": 1.9961805741383862e-05, |
| "loss": 2.4081, |
| "step": 86 |
| }, |
| { |
| "epoch": 0.05248868778280543, |
| "grad_norm": 2.07582997320187, |
| "learning_rate": 1.99600904999762e-05, |
| "loss": 2.3573, |
| "step": 87 |
| }, |
| { |
| "epoch": 0.0530920060331825, |
| "grad_norm": 1.7304234841275201, |
| "learning_rate": 1.995833766246357e-05, |
| "loss": 2.3617, |
| "step": 88 |
| }, |
| { |
| "epoch": 0.053695324283559574, |
| "grad_norm": 1.827279827289296, |
| "learning_rate": 1.995654723546236e-05, |
| "loss": 2.3151, |
| "step": 89 |
| }, |
| { |
| "epoch": 0.05429864253393665, |
| "grad_norm": 1.795039145615716, |
| "learning_rate": 1.9954719225730847e-05, |
| "loss": 2.3447, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.054901960784313725, |
| "grad_norm": 1.9487853189804405, |
| "learning_rate": 1.995285364016918e-05, |
| "loss": 2.3791, |
| "step": 91 |
| }, |
| { |
| "epoch": 0.055505279034690796, |
| "grad_norm": 1.8727113692711528, |
| "learning_rate": 1.9950950485819334e-05, |
| "loss": 2.3648, |
| "step": 92 |
| }, |
| { |
| "epoch": 0.056108597285067875, |
| "grad_norm": 1.796921337605627, |
| "learning_rate": 1.99490097698651e-05, |
| "loss": 2.377, |
| "step": 93 |
| }, |
| { |
| "epoch": 0.05671191553544495, |
| "grad_norm": 1.8406515054937658, |
| "learning_rate": 1.994703149963205e-05, |
| "loss": 2.3786, |
| "step": 94 |
| }, |
| { |
| "epoch": 0.05731523378582202, |
| "grad_norm": 1.9102332156136683, |
| "learning_rate": 1.9945015682587512e-05, |
| "loss": 2.4431, |
| "step": 95 |
| }, |
| { |
| "epoch": 0.0579185520361991, |
| "grad_norm": 1.74427627997967, |
| "learning_rate": 1.994296232634054e-05, |
| "loss": 2.3747, |
| "step": 96 |
| }, |
| { |
| "epoch": 0.05852187028657617, |
| "grad_norm": 1.916801794748654, |
| "learning_rate": 1.994087143864188e-05, |
| "loss": 2.3677, |
| "step": 97 |
| }, |
| { |
| "epoch": 0.05912518853695324, |
| "grad_norm": 1.9361095372662218, |
| "learning_rate": 1.9938743027383966e-05, |
| "loss": 2.3377, |
| "step": 98 |
| }, |
| { |
| "epoch": 0.05972850678733032, |
| "grad_norm": 1.6654143015011817, |
| "learning_rate": 1.9936577100600848e-05, |
| "loss": 2.3843, |
| "step": 99 |
| }, |
| { |
| "epoch": 0.06033182503770739, |
| "grad_norm": 1.739958437002719, |
| "learning_rate": 1.9934373666468203e-05, |
| "loss": 2.4005, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.06093514328808446, |
| "grad_norm": 1.903097711755015, |
| "learning_rate": 1.9932132733303273e-05, |
| "loss": 2.3827, |
| "step": 101 |
| }, |
| { |
| "epoch": 0.06153846153846154, |
| "grad_norm": 1.8795398203703997, |
| "learning_rate": 1.9929854309564858e-05, |
| "loss": 2.3317, |
| "step": 102 |
| }, |
| { |
| "epoch": 0.062141779788838614, |
| "grad_norm": 1.7431133865940558, |
| "learning_rate": 1.992753840385326e-05, |
| "loss": 2.4836, |
| "step": 103 |
| }, |
| { |
| "epoch": 0.06274509803921569, |
| "grad_norm": 1.9216277777624893, |
| "learning_rate": 1.992518502491028e-05, |
| "loss": 2.4088, |
| "step": 104 |
| }, |
| { |
| "epoch": 0.06334841628959276, |
| "grad_norm": 1.7436250240847813, |
| "learning_rate": 1.992279418161915e-05, |
| "loss": 2.309, |
| "step": 105 |
| }, |
| { |
| "epoch": 0.06395173453996983, |
| "grad_norm": 1.80540945976844, |
| "learning_rate": 1.992036588300453e-05, |
| "loss": 2.3845, |
| "step": 106 |
| }, |
| { |
| "epoch": 0.06455505279034691, |
| "grad_norm": 1.8598471706172288, |
| "learning_rate": 1.991790013823246e-05, |
| "loss": 2.3712, |
| "step": 107 |
| }, |
| { |
| "epoch": 0.06515837104072399, |
| "grad_norm": 1.7251014111069356, |
| "learning_rate": 1.9915396956610328e-05, |
| "loss": 2.3913, |
| "step": 108 |
| }, |
| { |
| "epoch": 0.06576168929110106, |
| "grad_norm": 1.8148853902478392, |
| "learning_rate": 1.991285634758682e-05, |
| "loss": 2.3982, |
| "step": 109 |
| }, |
| { |
| "epoch": 0.06636500754147813, |
| "grad_norm": 1.7498689126618199, |
| "learning_rate": 1.991027832075192e-05, |
| "loss": 2.4465, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.0669683257918552, |
| "grad_norm": 1.7457643777959446, |
| "learning_rate": 1.9907662885836836e-05, |
| "loss": 2.4567, |
| "step": 111 |
| }, |
| { |
| "epoch": 0.06757164404223227, |
| "grad_norm": 1.7733087443387425, |
| "learning_rate": 1.9905010052713988e-05, |
| "loss": 2.3656, |
| "step": 112 |
| }, |
| { |
| "epoch": 0.06817496229260935, |
| "grad_norm": 1.78400076670591, |
| "learning_rate": 1.9902319831396956e-05, |
| "loss": 2.4547, |
| "step": 113 |
| }, |
| { |
| "epoch": 0.06877828054298643, |
| "grad_norm": 1.8501073003356572, |
| "learning_rate": 1.9899592232040454e-05, |
| "loss": 2.3982, |
| "step": 114 |
| }, |
| { |
| "epoch": 0.0693815987933635, |
| "grad_norm": 1.7939030174137776, |
| "learning_rate": 1.989682726494028e-05, |
| "loss": 2.4242, |
| "step": 115 |
| }, |
| { |
| "epoch": 0.06998491704374057, |
| "grad_norm": 1.739031017675765, |
| "learning_rate": 1.989402494053329e-05, |
| "loss": 2.309, |
| "step": 116 |
| }, |
| { |
| "epoch": 0.07058823529411765, |
| "grad_norm": 1.726265958686578, |
| "learning_rate": 1.9891185269397347e-05, |
| "loss": 2.3979, |
| "step": 117 |
| }, |
| { |
| "epoch": 0.07119155354449472, |
| "grad_norm": 1.8241387788082768, |
| "learning_rate": 1.9888308262251286e-05, |
| "loss": 2.4293, |
| "step": 118 |
| }, |
| { |
| "epoch": 0.07179487179487179, |
| "grad_norm": 1.796999661875299, |
| "learning_rate": 1.9885393929954876e-05, |
| "loss": 2.4281, |
| "step": 119 |
| }, |
| { |
| "epoch": 0.07239819004524888, |
| "grad_norm": 1.8359034488503743, |
| "learning_rate": 1.988244228350877e-05, |
| "loss": 2.3915, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.07300150829562595, |
| "grad_norm": 1.8354333144775479, |
| "learning_rate": 1.9879453334054476e-05, |
| "loss": 2.3626, |
| "step": 121 |
| }, |
| { |
| "epoch": 0.07360482654600302, |
| "grad_norm": 1.7464715815370597, |
| "learning_rate": 1.987642709287431e-05, |
| "loss": 2.3856, |
| "step": 122 |
| }, |
| { |
| "epoch": 0.07420814479638009, |
| "grad_norm": 1.7365107395117005, |
| "learning_rate": 1.9873363571391344e-05, |
| "loss": 2.3996, |
| "step": 123 |
| }, |
| { |
| "epoch": 0.07481146304675716, |
| "grad_norm": 1.7674652876441117, |
| "learning_rate": 1.9870262781169378e-05, |
| "loss": 2.4401, |
| "step": 124 |
| }, |
| { |
| "epoch": 0.07541478129713423, |
| "grad_norm": 1.731649233262622, |
| "learning_rate": 1.986712473391289e-05, |
| "loss": 2.4364, |
| "step": 125 |
| }, |
| { |
| "epoch": 0.0760180995475113, |
| "grad_norm": 1.7269243496040354, |
| "learning_rate": 1.9863949441466988e-05, |
| "loss": 2.3828, |
| "step": 126 |
| }, |
| { |
| "epoch": 0.07662141779788839, |
| "grad_norm": 1.732468058395144, |
| "learning_rate": 1.9860736915817365e-05, |
| "loss": 2.3844, |
| "step": 127 |
| }, |
| { |
| "epoch": 0.07722473604826546, |
| "grad_norm": 1.7808885852323488, |
| "learning_rate": 1.9857487169090265e-05, |
| "loss": 2.4133, |
| "step": 128 |
| }, |
| { |
| "epoch": 0.07782805429864253, |
| "grad_norm": 1.9023595705344876, |
| "learning_rate": 1.9854200213552426e-05, |
| "loss": 2.4064, |
| "step": 129 |
| }, |
| { |
| "epoch": 0.0784313725490196, |
| "grad_norm": 1.7783293299159864, |
| "learning_rate": 1.9850876061611036e-05, |
| "loss": 2.4363, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.07903469079939668, |
| "grad_norm": 1.874231784614395, |
| "learning_rate": 1.984751472581369e-05, |
| "loss": 2.3655, |
| "step": 131 |
| }, |
| { |
| "epoch": 0.07963800904977375, |
| "grad_norm": 1.7499368726095257, |
| "learning_rate": 1.9844116218848335e-05, |
| "loss": 2.335, |
| "step": 132 |
| }, |
| { |
| "epoch": 0.08024132730015084, |
| "grad_norm": 2.6022340689179075, |
| "learning_rate": 1.984068055354323e-05, |
| "loss": 2.347, |
| "step": 133 |
| }, |
| { |
| "epoch": 0.08084464555052791, |
| "grad_norm": 1.9347849143168379, |
| "learning_rate": 1.98372077428669e-05, |
| "loss": 2.4458, |
| "step": 134 |
| }, |
| { |
| "epoch": 0.08144796380090498, |
| "grad_norm": 1.8916873969542793, |
| "learning_rate": 1.9833697799928074e-05, |
| "loss": 2.419, |
| "step": 135 |
| }, |
| { |
| "epoch": 0.08205128205128205, |
| "grad_norm": 1.980019343205032, |
| "learning_rate": 1.9830150737975648e-05, |
| "loss": 2.3749, |
| "step": 136 |
| }, |
| { |
| "epoch": 0.08265460030165912, |
| "grad_norm": 1.7724669285888033, |
| "learning_rate": 1.9826566570398622e-05, |
| "loss": 2.4208, |
| "step": 137 |
| }, |
| { |
| "epoch": 0.0832579185520362, |
| "grad_norm": 1.749167988541911, |
| "learning_rate": 1.982294531072607e-05, |
| "loss": 2.429, |
| "step": 138 |
| }, |
| { |
| "epoch": 0.08386123680241327, |
| "grad_norm": 1.6677676621185875, |
| "learning_rate": 1.9819286972627066e-05, |
| "loss": 2.3928, |
| "step": 139 |
| }, |
| { |
| "epoch": 0.08446455505279035, |
| "grad_norm": 1.9850127221560177, |
| "learning_rate": 1.9815591569910654e-05, |
| "loss": 2.3877, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.08506787330316742, |
| "grad_norm": 1.7617725554904797, |
| "learning_rate": 1.9811859116525774e-05, |
| "loss": 2.3771, |
| "step": 141 |
| }, |
| { |
| "epoch": 0.0856711915535445, |
| "grad_norm": 1.91372872316542, |
| "learning_rate": 1.9808089626561226e-05, |
| "loss": 2.3356, |
| "step": 142 |
| }, |
| { |
| "epoch": 0.08627450980392157, |
| "grad_norm": 1.7024663523887813, |
| "learning_rate": 1.9804283114245605e-05, |
| "loss": 2.4353, |
| "step": 143 |
| }, |
| { |
| "epoch": 0.08687782805429864, |
| "grad_norm": 1.9337653881090566, |
| "learning_rate": 1.9800439593947262e-05, |
| "loss": 2.3856, |
| "step": 144 |
| }, |
| { |
| "epoch": 0.08748114630467571, |
| "grad_norm": 1.9735612724311176, |
| "learning_rate": 1.979655908017424e-05, |
| "loss": 2.4385, |
| "step": 145 |
| }, |
| { |
| "epoch": 0.0880844645550528, |
| "grad_norm": 1.7880500112263782, |
| "learning_rate": 1.9792641587574212e-05, |
| "loss": 2.4161, |
| "step": 146 |
| }, |
| { |
| "epoch": 0.08868778280542987, |
| "grad_norm": 1.9465027868919351, |
| "learning_rate": 1.9788687130934445e-05, |
| "loss": 2.3453, |
| "step": 147 |
| }, |
| { |
| "epoch": 0.08929110105580694, |
| "grad_norm": 1.7593991336221384, |
| "learning_rate": 1.9784695725181722e-05, |
| "loss": 2.339, |
| "step": 148 |
| }, |
| { |
| "epoch": 0.08989441930618401, |
| "grad_norm": 1.738025895590932, |
| "learning_rate": 1.9780667385382303e-05, |
| "loss": 2.3519, |
| "step": 149 |
| }, |
| { |
| "epoch": 0.09049773755656108, |
| "grad_norm": 2.024610431953343, |
| "learning_rate": 1.9776602126741867e-05, |
| "loss": 2.3997, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.09110105580693816, |
| "grad_norm": 1.7286397557211362, |
| "learning_rate": 1.977249996460544e-05, |
| "loss": 2.4231, |
| "step": 151 |
| }, |
| { |
| "epoch": 0.09170437405731523, |
| "grad_norm": 1.9340147315431204, |
| "learning_rate": 1.9768360914457355e-05, |
| "loss": 2.3326, |
| "step": 152 |
| }, |
| { |
| "epoch": 0.09230769230769231, |
| "grad_norm": 1.7528407442090708, |
| "learning_rate": 1.9764184991921178e-05, |
| "loss": 2.4433, |
| "step": 153 |
| }, |
| { |
| "epoch": 0.09291101055806938, |
| "grad_norm": 1.7779252807636488, |
| "learning_rate": 1.9759972212759657e-05, |
| "loss": 2.3569, |
| "step": 154 |
| }, |
| { |
| "epoch": 0.09351432880844646, |
| "grad_norm": 1.8010667696730225, |
| "learning_rate": 1.975572259287467e-05, |
| "loss": 2.3723, |
| "step": 155 |
| }, |
| { |
| "epoch": 0.09411764705882353, |
| "grad_norm": 1.9083856537037562, |
| "learning_rate": 1.9751436148307145e-05, |
| "loss": 2.4458, |
| "step": 156 |
| }, |
| { |
| "epoch": 0.0947209653092006, |
| "grad_norm": 1.760952758078511, |
| "learning_rate": 1.9747112895237025e-05, |
| "loss": 2.3695, |
| "step": 157 |
| }, |
| { |
| "epoch": 0.09532428355957767, |
| "grad_norm": 1.7712302777595437, |
| "learning_rate": 1.974275284998318e-05, |
| "loss": 2.3216, |
| "step": 158 |
| }, |
| { |
| "epoch": 0.09592760180995476, |
| "grad_norm": 1.8823126275701794, |
| "learning_rate": 1.9738356029003367e-05, |
| "loss": 2.4338, |
| "step": 159 |
| }, |
| { |
| "epoch": 0.09653092006033183, |
| "grad_norm": 1.6853694604703957, |
| "learning_rate": 1.973392244889415e-05, |
| "loss": 2.3782, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.0971342383107089, |
| "grad_norm": 1.9145605199325024, |
| "learning_rate": 1.972945212639086e-05, |
| "loss": 2.3911, |
| "step": 161 |
| }, |
| { |
| "epoch": 0.09773755656108597, |
| "grad_norm": 1.8510508481654304, |
| "learning_rate": 1.9724945078367513e-05, |
| "loss": 2.4365, |
| "step": 162 |
| }, |
| { |
| "epoch": 0.09834087481146304, |
| "grad_norm": 1.7055487785395052, |
| "learning_rate": 1.9720401321836742e-05, |
| "loss": 2.3917, |
| "step": 163 |
| }, |
| { |
| "epoch": 0.09894419306184012, |
| "grad_norm": 1.8561955372145713, |
| "learning_rate": 1.971582087394976e-05, |
| "loss": 2.4272, |
| "step": 164 |
| }, |
| { |
| "epoch": 0.09954751131221719, |
| "grad_norm": 1.828842762360334, |
| "learning_rate": 1.9711203751996267e-05, |
| "loss": 2.3263, |
| "step": 165 |
| }, |
| { |
| "epoch": 0.10015082956259427, |
| "grad_norm": 2.1464668211309377, |
| "learning_rate": 1.9706549973404394e-05, |
| "loss": 2.4101, |
| "step": 166 |
| }, |
| { |
| "epoch": 0.10075414781297135, |
| "grad_norm": 1.7924458294359513, |
| "learning_rate": 1.9701859555740647e-05, |
| "loss": 2.3067, |
| "step": 167 |
| }, |
| { |
| "epoch": 0.10135746606334842, |
| "grad_norm": 1.9914703905505418, |
| "learning_rate": 1.9697132516709826e-05, |
| "loss": 2.4465, |
| "step": 168 |
| }, |
| { |
| "epoch": 0.10196078431372549, |
| "grad_norm": 2.1516167037282234, |
| "learning_rate": 1.9692368874154966e-05, |
| "loss": 2.3477, |
| "step": 169 |
| }, |
| { |
| "epoch": 0.10256410256410256, |
| "grad_norm": 2.2682594453717195, |
| "learning_rate": 1.9687568646057277e-05, |
| "loss": 2.4296, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.10316742081447963, |
| "grad_norm": 1.7325214885723506, |
| "learning_rate": 1.9682731850536054e-05, |
| "loss": 2.3712, |
| "step": 171 |
| }, |
| { |
| "epoch": 0.10377073906485672, |
| "grad_norm": 1.8188841894294345, |
| "learning_rate": 1.9677858505848627e-05, |
| "loss": 2.3909, |
| "step": 172 |
| }, |
| { |
| "epoch": 0.10437405731523379, |
| "grad_norm": 1.8475101959381366, |
| "learning_rate": 1.9672948630390296e-05, |
| "loss": 2.3942, |
| "step": 173 |
| }, |
| { |
| "epoch": 0.10497737556561086, |
| "grad_norm": 2.0699573667060194, |
| "learning_rate": 1.966800224269424e-05, |
| "loss": 2.4203, |
| "step": 174 |
| }, |
| { |
| "epoch": 0.10558069381598793, |
| "grad_norm": 1.8970948583101985, |
| "learning_rate": 1.966301936143146e-05, |
| "loss": 2.378, |
| "step": 175 |
| }, |
| { |
| "epoch": 0.106184012066365, |
| "grad_norm": 1.918771810482122, |
| "learning_rate": 1.965800000541072e-05, |
| "loss": 2.3355, |
| "step": 176 |
| }, |
| { |
| "epoch": 0.10678733031674208, |
| "grad_norm": 1.9104732128969586, |
| "learning_rate": 1.965294419357846e-05, |
| "loss": 2.3714, |
| "step": 177 |
| }, |
| { |
| "epoch": 0.10739064856711915, |
| "grad_norm": 1.9623062607289763, |
| "learning_rate": 1.9647851945018723e-05, |
| "loss": 2.3975, |
| "step": 178 |
| }, |
| { |
| "epoch": 0.10799396681749623, |
| "grad_norm": 1.7978825611011124, |
| "learning_rate": 1.9642723278953097e-05, |
| "loss": 2.3132, |
| "step": 179 |
| }, |
| { |
| "epoch": 0.1085972850678733, |
| "grad_norm": 1.8053266937593022, |
| "learning_rate": 1.9637558214740618e-05, |
| "loss": 2.465, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.10920060331825038, |
| "grad_norm": 1.889779034267551, |
| "learning_rate": 1.9632356771877735e-05, |
| "loss": 2.3872, |
| "step": 181 |
| }, |
| { |
| "epoch": 0.10980392156862745, |
| "grad_norm": 1.8803842729323514, |
| "learning_rate": 1.9627118969998204e-05, |
| "loss": 2.3778, |
| "step": 182 |
| }, |
| { |
| "epoch": 0.11040723981900452, |
| "grad_norm": 1.9505864821578314, |
| "learning_rate": 1.9621844828873024e-05, |
| "loss": 2.4465, |
| "step": 183 |
| }, |
| { |
| "epoch": 0.11101055806938159, |
| "grad_norm": 1.815874396814083, |
| "learning_rate": 1.9616534368410364e-05, |
| "loss": 2.376, |
| "step": 184 |
| }, |
| { |
| "epoch": 0.11161387631975868, |
| "grad_norm": 2.0291364738085362, |
| "learning_rate": 1.9611187608655484e-05, |
| "loss": 2.4539, |
| "step": 185 |
| }, |
| { |
| "epoch": 0.11221719457013575, |
| "grad_norm": 1.7293291522046839, |
| "learning_rate": 1.9605804569790667e-05, |
| "loss": 2.3774, |
| "step": 186 |
| }, |
| { |
| "epoch": 0.11282051282051282, |
| "grad_norm": 1.8271832077043337, |
| "learning_rate": 1.9600385272135133e-05, |
| "loss": 2.3419, |
| "step": 187 |
| }, |
| { |
| "epoch": 0.1134238310708899, |
| "grad_norm": 1.9073281870355994, |
| "learning_rate": 1.9594929736144978e-05, |
| "loss": 2.4058, |
| "step": 188 |
| }, |
| { |
| "epoch": 0.11402714932126697, |
| "grad_norm": 1.8920093693507751, |
| "learning_rate": 1.958943798241306e-05, |
| "loss": 2.3835, |
| "step": 189 |
| }, |
| { |
| "epoch": 0.11463046757164404, |
| "grad_norm": 1.9721387572671636, |
| "learning_rate": 1.9583910031668984e-05, |
| "loss": 2.4108, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.11523378582202111, |
| "grad_norm": 1.8457198854743908, |
| "learning_rate": 1.9578345904778956e-05, |
| "loss": 2.4417, |
| "step": 191 |
| }, |
| { |
| "epoch": 0.1158371040723982, |
| "grad_norm": 2.111648646737359, |
| "learning_rate": 1.957274562274575e-05, |
| "loss": 2.4391, |
| "step": 192 |
| }, |
| { |
| "epoch": 0.11644042232277527, |
| "grad_norm": 2.3730141778195293, |
| "learning_rate": 1.9567109206708615e-05, |
| "loss": 2.3304, |
| "step": 193 |
| }, |
| { |
| "epoch": 0.11704374057315234, |
| "grad_norm": 2.0315724564258373, |
| "learning_rate": 1.9561436677943183e-05, |
| "loss": 2.3952, |
| "step": 194 |
| }, |
| { |
| "epoch": 0.11764705882352941, |
| "grad_norm": 1.681575439138192, |
| "learning_rate": 1.955572805786141e-05, |
| "loss": 2.3506, |
| "step": 195 |
| }, |
| { |
| "epoch": 0.11825037707390648, |
| "grad_norm": 1.788871747181257, |
| "learning_rate": 1.954998336801148e-05, |
| "loss": 2.3203, |
| "step": 196 |
| }, |
| { |
| "epoch": 0.11885369532428355, |
| "grad_norm": 1.8503621082397483, |
| "learning_rate": 1.9544202630077733e-05, |
| "loss": 2.5021, |
| "step": 197 |
| }, |
| { |
| "epoch": 0.11945701357466064, |
| "grad_norm": 1.9772710060608363, |
| "learning_rate": 1.9538385865880574e-05, |
| "loss": 2.3863, |
| "step": 198 |
| }, |
| { |
| "epoch": 0.12006033182503771, |
| "grad_norm": 1.7920396633477924, |
| "learning_rate": 1.95325330973764e-05, |
| "loss": 2.3542, |
| "step": 199 |
| }, |
| { |
| "epoch": 0.12066365007541478, |
| "grad_norm": 1.9252370879367382, |
| "learning_rate": 1.9526644346657508e-05, |
| "loss": 2.388, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.12126696832579185, |
| "grad_norm": 1.7917196698282978, |
| "learning_rate": 1.9520719635952015e-05, |
| "loss": 2.4271, |
| "step": 201 |
| }, |
| { |
| "epoch": 0.12187028657616893, |
| "grad_norm": 1.7496514390158402, |
| "learning_rate": 1.9514758987623784e-05, |
| "loss": 2.443, |
| "step": 202 |
| }, |
| { |
| "epoch": 0.122473604826546, |
| "grad_norm": 1.9458516540089497, |
| "learning_rate": 1.9508762424172326e-05, |
| "loss": 2.4568, |
| "step": 203 |
| }, |
| { |
| "epoch": 0.12307692307692308, |
| "grad_norm": 1.8063666366956526, |
| "learning_rate": 1.9502729968232718e-05, |
| "loss": 2.3891, |
| "step": 204 |
| }, |
| { |
| "epoch": 0.12368024132730016, |
| "grad_norm": 1.7536255425649965, |
| "learning_rate": 1.9496661642575517e-05, |
| "loss": 2.3578, |
| "step": 205 |
| }, |
| { |
| "epoch": 0.12428355957767723, |
| "grad_norm": 1.8450234221910247, |
| "learning_rate": 1.949055747010669e-05, |
| "loss": 2.3632, |
| "step": 206 |
| }, |
| { |
| "epoch": 0.1248868778280543, |
| "grad_norm": 1.8760594417203018, |
| "learning_rate": 1.9484417473867493e-05, |
| "loss": 2.3541, |
| "step": 207 |
| }, |
| { |
| "epoch": 0.12549019607843137, |
| "grad_norm": 1.7663666525781663, |
| "learning_rate": 1.9478241677034422e-05, |
| "loss": 2.3477, |
| "step": 208 |
| }, |
| { |
| "epoch": 0.12549019607843137, |
| "eval_loss": 2.3970625400543213, |
| "eval_runtime": 22.1187, |
| "eval_samples_per_second": 3.979, |
| "eval_steps_per_second": 0.497, |
| "step": 208 |
| }, |
| { |
| "epoch": 0.12609351432880844, |
| "grad_norm": 1.592698799935792, |
| "learning_rate": 1.9472030102919102e-05, |
| "loss": 2.3408, |
| "step": 209 |
| }, |
| { |
| "epoch": 0.12669683257918551, |
| "grad_norm": 1.7453187285581997, |
| "learning_rate": 1.946578277496821e-05, |
| "loss": 2.2977, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.12730015082956259, |
| "grad_norm": 1.7030576182449984, |
| "learning_rate": 1.9459499716763376e-05, |
| "loss": 2.3966, |
| "step": 211 |
| }, |
| { |
| "epoch": 0.12790346907993966, |
| "grad_norm": 1.9418163249698999, |
| "learning_rate": 1.94531809520211e-05, |
| "loss": 2.3932, |
| "step": 212 |
| }, |
| { |
| "epoch": 0.12850678733031673, |
| "grad_norm": 1.9115180621965062, |
| "learning_rate": 1.944682650459267e-05, |
| "loss": 2.4014, |
| "step": 213 |
| }, |
| { |
| "epoch": 0.12911010558069383, |
| "grad_norm": 1.8587614833043946, |
| "learning_rate": 1.944043639846406e-05, |
| "loss": 2.4471, |
| "step": 214 |
| }, |
| { |
| "epoch": 0.1297134238310709, |
| "grad_norm": 2.015076007850878, |
| "learning_rate": 1.943401065775584e-05, |
| "loss": 2.4381, |
| "step": 215 |
| }, |
| { |
| "epoch": 0.13031674208144797, |
| "grad_norm": 1.915288111900487, |
| "learning_rate": 1.94275493067231e-05, |
| "loss": 2.2686, |
| "step": 216 |
| }, |
| { |
| "epoch": 0.13092006033182504, |
| "grad_norm": 1.9180430163339584, |
| "learning_rate": 1.9421052369755335e-05, |
| "loss": 2.3931, |
| "step": 217 |
| }, |
| { |
| "epoch": 0.13152337858220212, |
| "grad_norm": 1.9604487273843694, |
| "learning_rate": 1.9414519871376373e-05, |
| "loss": 2.4332, |
| "step": 218 |
| }, |
| { |
| "epoch": 0.1321266968325792, |
| "grad_norm": 1.9174581601925882, |
| "learning_rate": 1.940795183624427e-05, |
| "loss": 2.4211, |
| "step": 219 |
| }, |
| { |
| "epoch": 0.13273001508295626, |
| "grad_norm": 1.943376592580462, |
| "learning_rate": 1.940134828915123e-05, |
| "loss": 2.3691, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.13333333333333333, |
| "grad_norm": 1.8170672134115446, |
| "learning_rate": 1.9394709255023488e-05, |
| "loss": 2.3705, |
| "step": 221 |
| }, |
| { |
| "epoch": 0.1339366515837104, |
| "grad_norm": 1.8996487790159449, |
| "learning_rate": 1.9388034758921247e-05, |
| "loss": 2.3874, |
| "step": 222 |
| }, |
| { |
| "epoch": 0.13453996983408748, |
| "grad_norm": 2.0789020898005166, |
| "learning_rate": 1.938132482603856e-05, |
| "loss": 2.4122, |
| "step": 223 |
| }, |
| { |
| "epoch": 0.13514328808446455, |
| "grad_norm": 2.3124371595134243, |
| "learning_rate": 1.9374579481703244e-05, |
| "loss": 2.3854, |
| "step": 224 |
| }, |
| { |
| "epoch": 0.13574660633484162, |
| "grad_norm": 2.2679685622803123, |
| "learning_rate": 1.936779875137678e-05, |
| "loss": 2.4774, |
| "step": 225 |
| }, |
| { |
| "epoch": 0.1363499245852187, |
| "grad_norm": 1.9820900351023627, |
| "learning_rate": 1.936098266065422e-05, |
| "loss": 2.4126, |
| "step": 226 |
| }, |
| { |
| "epoch": 0.1369532428355958, |
| "grad_norm": 1.7859284260510715, |
| "learning_rate": 1.93541312352641e-05, |
| "loss": 2.367, |
| "step": 227 |
| }, |
| { |
| "epoch": 0.13755656108597286, |
| "grad_norm": 1.7574332004410753, |
| "learning_rate": 1.934724450106831e-05, |
| "loss": 2.3353, |
| "step": 228 |
| }, |
| { |
| "epoch": 0.13815987933634993, |
| "grad_norm": 1.8454300169336622, |
| "learning_rate": 1.934032248406205e-05, |
| "loss": 2.4519, |
| "step": 229 |
| }, |
| { |
| "epoch": 0.138763197586727, |
| "grad_norm": 2.1927204914704, |
| "learning_rate": 1.9333365210373668e-05, |
| "loss": 2.5473, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.13936651583710408, |
| "grad_norm": 2.0054427145982707, |
| "learning_rate": 1.9326372706264625e-05, |
| "loss": 2.3596, |
| "step": 231 |
| }, |
| { |
| "epoch": 0.13996983408748115, |
| "grad_norm": 1.8243562565754317, |
| "learning_rate": 1.9319344998129344e-05, |
| "loss": 2.3503, |
| "step": 232 |
| }, |
| { |
| "epoch": 0.14057315233785822, |
| "grad_norm": 2.002408622083468, |
| "learning_rate": 1.9312282112495146e-05, |
| "loss": 2.3835, |
| "step": 233 |
| }, |
| { |
| "epoch": 0.1411764705882353, |
| "grad_norm": 2.067448917796081, |
| "learning_rate": 1.9305184076022117e-05, |
| "loss": 2.4941, |
| "step": 234 |
| }, |
| { |
| "epoch": 0.14177978883861236, |
| "grad_norm": 2.071059748009206, |
| "learning_rate": 1.9298050915503053e-05, |
| "loss": 2.3011, |
| "step": 235 |
| }, |
| { |
| "epoch": 0.14238310708898944, |
| "grad_norm": 1.8223690330691251, |
| "learning_rate": 1.929088265786331e-05, |
| "loss": 2.3448, |
| "step": 236 |
| }, |
| { |
| "epoch": 0.1429864253393665, |
| "grad_norm": 1.9649289609379725, |
| "learning_rate": 1.9283679330160726e-05, |
| "loss": 2.404, |
| "step": 237 |
| }, |
| { |
| "epoch": 0.14358974358974358, |
| "grad_norm": 1.8402195523194098, |
| "learning_rate": 1.9276440959585533e-05, |
| "loss": 2.2817, |
| "step": 238 |
| }, |
| { |
| "epoch": 0.14419306184012065, |
| "grad_norm": 1.8343107897406508, |
| "learning_rate": 1.926916757346022e-05, |
| "loss": 2.3819, |
| "step": 239 |
| }, |
| { |
| "epoch": 0.14479638009049775, |
| "grad_norm": 1.990742200302091, |
| "learning_rate": 1.926185919923946e-05, |
| "loss": 2.3836, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.14539969834087482, |
| "grad_norm": 2.18088582410677, |
| "learning_rate": 1.9254515864509982e-05, |
| "loss": 2.4103, |
| "step": 241 |
| }, |
| { |
| "epoch": 0.1460030165912519, |
| "grad_norm": 1.87907229653626, |
| "learning_rate": 1.92471375969905e-05, |
| "loss": 2.3973, |
| "step": 242 |
| }, |
| { |
| "epoch": 0.14660633484162897, |
| "grad_norm": 1.8475576105015994, |
| "learning_rate": 1.9239724424531575e-05, |
| "loss": 2.3917, |
| "step": 243 |
| }, |
| { |
| "epoch": 0.14720965309200604, |
| "grad_norm": 2.055658433766295, |
| "learning_rate": 1.9232276375115517e-05, |
| "loss": 2.3383, |
| "step": 244 |
| }, |
| { |
| "epoch": 0.1478129713423831, |
| "grad_norm": 2.1041231379062935, |
| "learning_rate": 1.9224793476856293e-05, |
| "loss": 2.3808, |
| "step": 245 |
| }, |
| { |
| "epoch": 0.14841628959276018, |
| "grad_norm": 2.072538470281817, |
| "learning_rate": 1.9217275757999418e-05, |
| "loss": 2.3157, |
| "step": 246 |
| }, |
| { |
| "epoch": 0.14901960784313725, |
| "grad_norm": 1.8558398330207053, |
| "learning_rate": 1.9209723246921837e-05, |
| "loss": 2.4278, |
| "step": 247 |
| }, |
| { |
| "epoch": 0.14962292609351432, |
| "grad_norm": 1.8255106731337276, |
| "learning_rate": 1.920213597213182e-05, |
| "loss": 2.396, |
| "step": 248 |
| }, |
| { |
| "epoch": 0.1502262443438914, |
| "grad_norm": 2.29854070332297, |
| "learning_rate": 1.9194513962268865e-05, |
| "loss": 2.3833, |
| "step": 249 |
| }, |
| { |
| "epoch": 0.15082956259426847, |
| "grad_norm": 1.997730060781554, |
| "learning_rate": 1.9186857246103586e-05, |
| "loss": 2.4327, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.15143288084464554, |
| "grad_norm": 1.8277428582585862, |
| "learning_rate": 1.9179165852537596e-05, |
| "loss": 2.4328, |
| "step": 251 |
| }, |
| { |
| "epoch": 0.1520361990950226, |
| "grad_norm": 1.8068053318064188, |
| "learning_rate": 1.9171439810603406e-05, |
| "loss": 2.3604, |
| "step": 252 |
| }, |
| { |
| "epoch": 0.1526395173453997, |
| "grad_norm": 1.9030580310166223, |
| "learning_rate": 1.9163679149464313e-05, |
| "loss": 2.4525, |
| "step": 253 |
| }, |
| { |
| "epoch": 0.15324283559577678, |
| "grad_norm": 2.148689163299343, |
| "learning_rate": 1.9155883898414292e-05, |
| "loss": 2.4487, |
| "step": 254 |
| }, |
| { |
| "epoch": 0.15384615384615385, |
| "grad_norm": 2.636326374028854, |
| "learning_rate": 1.9148054086877884e-05, |
| "loss": 2.4019, |
| "step": 255 |
| }, |
| { |
| "epoch": 0.15444947209653093, |
| "grad_norm": 2.2875442599864138, |
| "learning_rate": 1.914018974441008e-05, |
| "loss": 2.3296, |
| "step": 256 |
| }, |
| { |
| "epoch": 0.155052790346908, |
| "grad_norm": 1.9038065532836297, |
| "learning_rate": 1.913229090069622e-05, |
| "loss": 2.3768, |
| "step": 257 |
| }, |
| { |
| "epoch": 0.15565610859728507, |
| "grad_norm": 1.799523347575496, |
| "learning_rate": 1.9124357585551872e-05, |
| "loss": 2.3565, |
| "step": 258 |
| }, |
| { |
| "epoch": 0.15625942684766214, |
| "grad_norm": 2.554360566381463, |
| "learning_rate": 1.9116389828922717e-05, |
| "loss": 2.501, |
| "step": 259 |
| }, |
| { |
| "epoch": 0.1568627450980392, |
| "grad_norm": 2.1864975862465603, |
| "learning_rate": 1.9108387660884456e-05, |
| "loss": 2.3677, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.15746606334841629, |
| "grad_norm": 1.9958565133131163, |
| "learning_rate": 1.9100351111642666e-05, |
| "loss": 2.41, |
| "step": 261 |
| }, |
| { |
| "epoch": 0.15806938159879336, |
| "grad_norm": 1.7747256573037626, |
| "learning_rate": 1.9092280211532715e-05, |
| "loss": 2.3605, |
| "step": 262 |
| }, |
| { |
| "epoch": 0.15867269984917043, |
| "grad_norm": 1.6053666596618383, |
| "learning_rate": 1.9084174991019622e-05, |
| "loss": 2.3649, |
| "step": 263 |
| }, |
| { |
| "epoch": 0.1592760180995475, |
| "grad_norm": 1.7529886672743595, |
| "learning_rate": 1.9076035480697964e-05, |
| "loss": 2.3464, |
| "step": 264 |
| }, |
| { |
| "epoch": 0.15987933634992457, |
| "grad_norm": 1.8311834709458998, |
| "learning_rate": 1.9067861711291744e-05, |
| "loss": 2.4533, |
| "step": 265 |
| }, |
| { |
| "epoch": 0.16048265460030167, |
| "grad_norm": 1.7726486534434671, |
| "learning_rate": 1.905965371365429e-05, |
| "loss": 2.4056, |
| "step": 266 |
| }, |
| { |
| "epoch": 0.16108597285067874, |
| "grad_norm": 2.0550030984404213, |
| "learning_rate": 1.9051411518768126e-05, |
| "loss": 2.394, |
| "step": 267 |
| }, |
| { |
| "epoch": 0.16168929110105582, |
| "grad_norm": 1.7669710679260708, |
| "learning_rate": 1.9043135157744853e-05, |
| "loss": 2.3723, |
| "step": 268 |
| }, |
| { |
| "epoch": 0.1622926093514329, |
| "grad_norm": 1.8777975727584983, |
| "learning_rate": 1.9034824661825048e-05, |
| "loss": 2.3603, |
| "step": 269 |
| }, |
| { |
| "epoch": 0.16289592760180996, |
| "grad_norm": 1.935796347721567, |
| "learning_rate": 1.9026480062378136e-05, |
| "loss": 2.3601, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.16349924585218703, |
| "grad_norm": 2.010363683111041, |
| "learning_rate": 1.9018101390902262e-05, |
| "loss": 2.4271, |
| "step": 271 |
| }, |
| { |
| "epoch": 0.1641025641025641, |
| "grad_norm": 1.8516623115874755, |
| "learning_rate": 1.900968867902419e-05, |
| "loss": 2.3637, |
| "step": 272 |
| }, |
| { |
| "epoch": 0.16470588235294117, |
| "grad_norm": 1.788273551595415, |
| "learning_rate": 1.900124195849918e-05, |
| "loss": 2.3673, |
| "step": 273 |
| }, |
| { |
| "epoch": 0.16530920060331825, |
| "grad_norm": 1.811773390862476, |
| "learning_rate": 1.8992761261210848e-05, |
| "loss": 2.4597, |
| "step": 274 |
| }, |
| { |
| "epoch": 0.16591251885369532, |
| "grad_norm": 2.0646022057034723, |
| "learning_rate": 1.8984246619171075e-05, |
| "loss": 2.3945, |
| "step": 275 |
| }, |
| { |
| "epoch": 0.1665158371040724, |
| "grad_norm": 1.8228637291719356, |
| "learning_rate": 1.8975698064519865e-05, |
| "loss": 2.3747, |
| "step": 276 |
| }, |
| { |
| "epoch": 0.16711915535444946, |
| "grad_norm": 2.0357715504851033, |
| "learning_rate": 1.8967115629525238e-05, |
| "loss": 2.4305, |
| "step": 277 |
| }, |
| { |
| "epoch": 0.16772247360482653, |
| "grad_norm": 1.8806892592912532, |
| "learning_rate": 1.8958499346583092e-05, |
| "loss": 2.4659, |
| "step": 278 |
| }, |
| { |
| "epoch": 0.16832579185520363, |
| "grad_norm": 1.8299663755931939, |
| "learning_rate": 1.89498492482171e-05, |
| "loss": 2.2796, |
| "step": 279 |
| }, |
| { |
| "epoch": 0.1689291101055807, |
| "grad_norm": 1.8549052690023438, |
| "learning_rate": 1.894116536707857e-05, |
| "loss": 2.4846, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.16953242835595778, |
| "grad_norm": 2.01979326739457, |
| "learning_rate": 1.8932447735946332e-05, |
| "loss": 2.3653, |
| "step": 281 |
| }, |
| { |
| "epoch": 0.17013574660633485, |
| "grad_norm": 1.921195088621264, |
| "learning_rate": 1.892369638772661e-05, |
| "loss": 2.401, |
| "step": 282 |
| }, |
| { |
| "epoch": 0.17073906485671192, |
| "grad_norm": 2.1371043076225087, |
| "learning_rate": 1.8914911355452895e-05, |
| "loss": 2.3625, |
| "step": 283 |
| }, |
| { |
| "epoch": 0.171342383107089, |
| "grad_norm": 1.7344453161696207, |
| "learning_rate": 1.8906092672285842e-05, |
| "loss": 2.3967, |
| "step": 284 |
| }, |
| { |
| "epoch": 0.17194570135746606, |
| "grad_norm": 1.8690649602037965, |
| "learning_rate": 1.8897240371513098e-05, |
| "loss": 2.4613, |
| "step": 285 |
| }, |
| { |
| "epoch": 0.17254901960784313, |
| "grad_norm": 1.7407275349017972, |
| "learning_rate": 1.8888354486549238e-05, |
| "loss": 2.3812, |
| "step": 286 |
| }, |
| { |
| "epoch": 0.1731523378582202, |
| "grad_norm": 2.04205458370752, |
| "learning_rate": 1.8879435050935577e-05, |
| "loss": 2.3417, |
| "step": 287 |
| }, |
| { |
| "epoch": 0.17375565610859728, |
| "grad_norm": 1.7278652356135291, |
| "learning_rate": 1.887048209834009e-05, |
| "loss": 2.3437, |
| "step": 288 |
| }, |
| { |
| "epoch": 0.17435897435897435, |
| "grad_norm": 1.7583874021257075, |
| "learning_rate": 1.8861495662557264e-05, |
| "loss": 2.4033, |
| "step": 289 |
| }, |
| { |
| "epoch": 0.17496229260935142, |
| "grad_norm": 1.863241146572609, |
| "learning_rate": 1.8852475777507983e-05, |
| "loss": 2.3677, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.1755656108597285, |
| "grad_norm": 1.8653641337953326, |
| "learning_rate": 1.8843422477239362e-05, |
| "loss": 2.456, |
| "step": 291 |
| }, |
| { |
| "epoch": 0.1761689291101056, |
| "grad_norm": 1.7776126571499877, |
| "learning_rate": 1.8834335795924686e-05, |
| "loss": 2.4091, |
| "step": 292 |
| }, |
| { |
| "epoch": 0.17677224736048266, |
| "grad_norm": 1.7570559673454416, |
| "learning_rate": 1.8825215767863215e-05, |
| "loss": 2.4307, |
| "step": 293 |
| }, |
| { |
| "epoch": 0.17737556561085974, |
| "grad_norm": 1.9052460328013197, |
| "learning_rate": 1.881606242748009e-05, |
| "loss": 2.407, |
| "step": 294 |
| }, |
| { |
| "epoch": 0.1779788838612368, |
| "grad_norm": 1.972445114283892, |
| "learning_rate": 1.8806875809326204e-05, |
| "loss": 2.3553, |
| "step": 295 |
| }, |
| { |
| "epoch": 0.17858220211161388, |
| "grad_norm": 1.7997390190238367, |
| "learning_rate": 1.879765594807805e-05, |
| "loss": 2.4215, |
| "step": 296 |
| }, |
| { |
| "epoch": 0.17918552036199095, |
| "grad_norm": 2.058979279045622, |
| "learning_rate": 1.878840287853761e-05, |
| "loss": 2.3919, |
| "step": 297 |
| }, |
| { |
| "epoch": 0.17978883861236802, |
| "grad_norm": 1.7717249063838723, |
| "learning_rate": 1.877911663563221e-05, |
| "loss": 2.4244, |
| "step": 298 |
| }, |
| { |
| "epoch": 0.1803921568627451, |
| "grad_norm": 2.0093236997931463, |
| "learning_rate": 1.8769797254414406e-05, |
| "loss": 2.4132, |
| "step": 299 |
| }, |
| { |
| "epoch": 0.18099547511312217, |
| "grad_norm": 1.8173301125856807, |
| "learning_rate": 1.876044477006183e-05, |
| "loss": 2.4322, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.18159879336349924, |
| "grad_norm": 2.152436375631988, |
| "learning_rate": 1.875105921787707e-05, |
| "loss": 2.3923, |
| "step": 301 |
| }, |
| { |
| "epoch": 0.1822021116138763, |
| "grad_norm": 1.8521342794831184, |
| "learning_rate": 1.874164063328754e-05, |
| "loss": 2.346, |
| "step": 302 |
| }, |
| { |
| "epoch": 0.18280542986425338, |
| "grad_norm": 1.8949500548076719, |
| "learning_rate": 1.8732189051845328e-05, |
| "loss": 2.4535, |
| "step": 303 |
| }, |
| { |
| "epoch": 0.18340874811463045, |
| "grad_norm": 1.8548204739343872, |
| "learning_rate": 1.8722704509227094e-05, |
| "loss": 2.3634, |
| "step": 304 |
| }, |
| { |
| "epoch": 0.18401206636500755, |
| "grad_norm": 2.0044853401637748, |
| "learning_rate": 1.8713187041233896e-05, |
| "loss": 2.353, |
| "step": 305 |
| }, |
| { |
| "epoch": 0.18461538461538463, |
| "grad_norm": 1.8962192812988332, |
| "learning_rate": 1.8703636683791084e-05, |
| "loss": 2.347, |
| "step": 306 |
| }, |
| { |
| "epoch": 0.1852187028657617, |
| "grad_norm": 1.8762931478739766, |
| "learning_rate": 1.8694053472948154e-05, |
| "loss": 2.4613, |
| "step": 307 |
| }, |
| { |
| "epoch": 0.18582202111613877, |
| "grad_norm": 1.775862031858868, |
| "learning_rate": 1.868443744487862e-05, |
| "loss": 2.36, |
| "step": 308 |
| }, |
| { |
| "epoch": 0.18642533936651584, |
| "grad_norm": 1.7944201096764627, |
| "learning_rate": 1.8674788635879848e-05, |
| "loss": 2.449, |
| "step": 309 |
| }, |
| { |
| "epoch": 0.1870286576168929, |
| "grad_norm": 1.8128871531412751, |
| "learning_rate": 1.866510708237297e-05, |
| "loss": 2.4367, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.18763197586726998, |
| "grad_norm": 1.7802612520365133, |
| "learning_rate": 1.8655392820902695e-05, |
| "loss": 2.3369, |
| "step": 311 |
| }, |
| { |
| "epoch": 0.18823529411764706, |
| "grad_norm": 2.2309703895171364, |
| "learning_rate": 1.8645645888137213e-05, |
| "loss": 2.4264, |
| "step": 312 |
| }, |
| { |
| "epoch": 0.18883861236802413, |
| "grad_norm": 1.7727549439698453, |
| "learning_rate": 1.8635866320868023e-05, |
| "loss": 2.3847, |
| "step": 313 |
| }, |
| { |
| "epoch": 0.1894419306184012, |
| "grad_norm": 1.8039382080690232, |
| "learning_rate": 1.8626054156009807e-05, |
| "loss": 2.325, |
| "step": 314 |
| }, |
| { |
| "epoch": 0.19004524886877827, |
| "grad_norm": 1.877849837240779, |
| "learning_rate": 1.861620943060031e-05, |
| "loss": 2.4105, |
| "step": 315 |
| }, |
| { |
| "epoch": 0.19064856711915534, |
| "grad_norm": 1.8964244614031414, |
| "learning_rate": 1.8606332181800165e-05, |
| "loss": 2.3869, |
| "step": 316 |
| }, |
| { |
| "epoch": 0.19125188536953242, |
| "grad_norm": 1.7787737145042568, |
| "learning_rate": 1.8596422446892774e-05, |
| "loss": 2.3289, |
| "step": 317 |
| }, |
| { |
| "epoch": 0.19185520361990951, |
| "grad_norm": 1.8520073770815342, |
| "learning_rate": 1.8586480263284174e-05, |
| "loss": 2.3654, |
| "step": 318 |
| }, |
| { |
| "epoch": 0.1924585218702866, |
| "grad_norm": 2.0189483743742196, |
| "learning_rate": 1.8576505668502872e-05, |
| "loss": 2.4469, |
| "step": 319 |
| }, |
| { |
| "epoch": 0.19306184012066366, |
| "grad_norm": 1.739253673937665, |
| "learning_rate": 1.856649870019972e-05, |
| "loss": 2.3625, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.19366515837104073, |
| "grad_norm": 1.773893768635032, |
| "learning_rate": 1.8556459396147777e-05, |
| "loss": 2.4035, |
| "step": 321 |
| }, |
| { |
| "epoch": 0.1942684766214178, |
| "grad_norm": 1.7946696459095224, |
| "learning_rate": 1.8546387794242148e-05, |
| "loss": 2.376, |
| "step": 322 |
| }, |
| { |
| "epoch": 0.19487179487179487, |
| "grad_norm": 1.8538835498926256, |
| "learning_rate": 1.853628393249986e-05, |
| "loss": 2.3625, |
| "step": 323 |
| }, |
| { |
| "epoch": 0.19547511312217195, |
| "grad_norm": 1.797967680017188, |
| "learning_rate": 1.8526147849059705e-05, |
| "loss": 2.4747, |
| "step": 324 |
| }, |
| { |
| "epoch": 0.19607843137254902, |
| "grad_norm": 1.9224740564061376, |
| "learning_rate": 1.8515979582182112e-05, |
| "loss": 2.3916, |
| "step": 325 |
| }, |
| { |
| "epoch": 0.1966817496229261, |
| "grad_norm": 2.0564977386941985, |
| "learning_rate": 1.8505779170248978e-05, |
| "loss": 2.3166, |
| "step": 326 |
| }, |
| { |
| "epoch": 0.19728506787330316, |
| "grad_norm": 1.808289138294294, |
| "learning_rate": 1.849554665176354e-05, |
| "loss": 2.4381, |
| "step": 327 |
| }, |
| { |
| "epoch": 0.19788838612368023, |
| "grad_norm": 1.8842853314907695, |
| "learning_rate": 1.8485282065350237e-05, |
| "loss": 2.3487, |
| "step": 328 |
| }, |
| { |
| "epoch": 0.1984917043740573, |
| "grad_norm": 1.8142033636317016, |
| "learning_rate": 1.8474985449754543e-05, |
| "loss": 2.4145, |
| "step": 329 |
| }, |
| { |
| "epoch": 0.19909502262443438, |
| "grad_norm": 2.034938580816979, |
| "learning_rate": 1.8464656843842837e-05, |
| "loss": 2.3543, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.19969834087481148, |
| "grad_norm": 2.2995536408204083, |
| "learning_rate": 1.845429628660225e-05, |
| "loss": 2.4052, |
| "step": 331 |
| }, |
| { |
| "epoch": 0.20030165912518855, |
| "grad_norm": 1.9107922335599887, |
| "learning_rate": 1.8443903817140517e-05, |
| "loss": 2.331, |
| "step": 332 |
| }, |
| { |
| "epoch": 0.20090497737556562, |
| "grad_norm": 1.8530530610478275, |
| "learning_rate": 1.8433479474685837e-05, |
| "loss": 2.391, |
| "step": 333 |
| }, |
| { |
| "epoch": 0.2015082956259427, |
| "grad_norm": 1.7772675731075585, |
| "learning_rate": 1.8423023298586716e-05, |
| "loss": 2.3647, |
| "step": 334 |
| }, |
| { |
| "epoch": 0.20211161387631976, |
| "grad_norm": 1.7438120884628647, |
| "learning_rate": 1.8412535328311813e-05, |
| "loss": 2.4638, |
| "step": 335 |
| }, |
| { |
| "epoch": 0.20271493212669683, |
| "grad_norm": 1.939231045048208, |
| "learning_rate": 1.8402015603449814e-05, |
| "loss": 2.3401, |
| "step": 336 |
| }, |
| { |
| "epoch": 0.2033182503770739, |
| "grad_norm": 2.1156637551867075, |
| "learning_rate": 1.839146416370926e-05, |
| "loss": 2.3731, |
| "step": 337 |
| }, |
| { |
| "epoch": 0.20392156862745098, |
| "grad_norm": 1.721959556886391, |
| "learning_rate": 1.8380881048918406e-05, |
| "loss": 2.3552, |
| "step": 338 |
| }, |
| { |
| "epoch": 0.20452488687782805, |
| "grad_norm": 1.7740805853861341, |
| "learning_rate": 1.8370266299025076e-05, |
| "loss": 2.3339, |
| "step": 339 |
| }, |
| { |
| "epoch": 0.20512820512820512, |
| "grad_norm": 1.8222550385292156, |
| "learning_rate": 1.8359619954096497e-05, |
| "loss": 2.3466, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.2057315233785822, |
| "grad_norm": 1.9260210550895165, |
| "learning_rate": 1.8348942054319164e-05, |
| "loss": 2.3875, |
| "step": 341 |
| }, |
| { |
| "epoch": 0.20633484162895926, |
| "grad_norm": 2.050552910276077, |
| "learning_rate": 1.8338232639998672e-05, |
| "loss": 2.3296, |
| "step": 342 |
| }, |
| { |
| "epoch": 0.20693815987933634, |
| "grad_norm": 1.9365149897495877, |
| "learning_rate": 1.832749175155959e-05, |
| "loss": 2.3629, |
| "step": 343 |
| }, |
| { |
| "epoch": 0.20754147812971344, |
| "grad_norm": 1.7559234344283723, |
| "learning_rate": 1.8316719429545277e-05, |
| "loss": 2.4042, |
| "step": 344 |
| }, |
| { |
| "epoch": 0.2081447963800905, |
| "grad_norm": 1.9011593642730806, |
| "learning_rate": 1.8305915714617745e-05, |
| "loss": 2.4089, |
| "step": 345 |
| }, |
| { |
| "epoch": 0.20874811463046758, |
| "grad_norm": 1.8581574541544932, |
| "learning_rate": 1.8295080647557507e-05, |
| "loss": 2.4676, |
| "step": 346 |
| }, |
| { |
| "epoch": 0.20935143288084465, |
| "grad_norm": 1.669412385871575, |
| "learning_rate": 1.828421426926343e-05, |
| "loss": 2.4181, |
| "step": 347 |
| }, |
| { |
| "epoch": 0.20995475113122172, |
| "grad_norm": 1.7111028859089021, |
| "learning_rate": 1.8273316620752548e-05, |
| "loss": 2.3814, |
| "step": 348 |
| }, |
| { |
| "epoch": 0.2105580693815988, |
| "grad_norm": 1.8094190923284823, |
| "learning_rate": 1.826238774315995e-05, |
| "loss": 2.368, |
| "step": 349 |
| }, |
| { |
| "epoch": 0.21116138763197587, |
| "grad_norm": 1.9349580280278726, |
| "learning_rate": 1.8251427677738596e-05, |
| "loss": 2.4115, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.21176470588235294, |
| "grad_norm": 1.9056928493906655, |
| "learning_rate": 1.824043646585917e-05, |
| "loss": 2.3545, |
| "step": 351 |
| }, |
| { |
| "epoch": 0.21236802413273, |
| "grad_norm": 1.6630940301503085, |
| "learning_rate": 1.822941414900993e-05, |
| "loss": 2.3402, |
| "step": 352 |
| }, |
| { |
| "epoch": 0.21297134238310708, |
| "grad_norm": 1.7549145614861923, |
| "learning_rate": 1.8218360768796534e-05, |
| "loss": 2.4039, |
| "step": 353 |
| }, |
| { |
| "epoch": 0.21357466063348415, |
| "grad_norm": 1.8427236861415082, |
| "learning_rate": 1.8207276366941905e-05, |
| "loss": 2.3381, |
| "step": 354 |
| }, |
| { |
| "epoch": 0.21417797888386123, |
| "grad_norm": 2.235373185504055, |
| "learning_rate": 1.8196160985286052e-05, |
| "loss": 2.3765, |
| "step": 355 |
| }, |
| { |
| "epoch": 0.2147812971342383, |
| "grad_norm": 1.770871765959977, |
| "learning_rate": 1.8185014665785936e-05, |
| "loss": 2.3681, |
| "step": 356 |
| }, |
| { |
| "epoch": 0.2153846153846154, |
| "grad_norm": 1.8385630389005978, |
| "learning_rate": 1.8173837450515286e-05, |
| "loss": 2.4128, |
| "step": 357 |
| }, |
| { |
| "epoch": 0.21598793363499247, |
| "grad_norm": 1.7678911062682983, |
| "learning_rate": 1.816262938166446e-05, |
| "loss": 2.3674, |
| "step": 358 |
| }, |
| { |
| "epoch": 0.21659125188536954, |
| "grad_norm": 1.9123835544861427, |
| "learning_rate": 1.8151390501540276e-05, |
| "loss": 2.3439, |
| "step": 359 |
| }, |
| { |
| "epoch": 0.2171945701357466, |
| "grad_norm": 2.0786843839491533, |
| "learning_rate": 1.814012085256585e-05, |
| "loss": 2.4165, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.21779788838612368, |
| "grad_norm": 1.8542968655837673, |
| "learning_rate": 1.812882047728045e-05, |
| "loss": 2.3766, |
| "step": 361 |
| }, |
| { |
| "epoch": 0.21840120663650076, |
| "grad_norm": 1.9816967730585244, |
| "learning_rate": 1.8117489418339317e-05, |
| "loss": 2.4764, |
| "step": 362 |
| }, |
| { |
| "epoch": 0.21900452488687783, |
| "grad_norm": 1.7957823857988076, |
| "learning_rate": 1.810612771851352e-05, |
| "loss": 2.3854, |
| "step": 363 |
| }, |
| { |
| "epoch": 0.2196078431372549, |
| "grad_norm": 1.6610433514447112, |
| "learning_rate": 1.8094735420689776e-05, |
| "loss": 2.3259, |
| "step": 364 |
| }, |
| { |
| "epoch": 0.22021116138763197, |
| "grad_norm": 1.8308638860416302, |
| "learning_rate": 1.8083312567870315e-05, |
| "loss": 2.3721, |
| "step": 365 |
| }, |
| { |
| "epoch": 0.22081447963800904, |
| "grad_norm": 1.8325626958437702, |
| "learning_rate": 1.8071859203172694e-05, |
| "loss": 2.377, |
| "step": 366 |
| }, |
| { |
| "epoch": 0.22141779788838611, |
| "grad_norm": 1.9547508634110777, |
| "learning_rate": 1.8060375369829634e-05, |
| "loss": 2.4247, |
| "step": 367 |
| }, |
| { |
| "epoch": 0.22202111613876319, |
| "grad_norm": 1.8649123365121487, |
| "learning_rate": 1.8048861111188886e-05, |
| "loss": 2.3844, |
| "step": 368 |
| }, |
| { |
| "epoch": 0.22262443438914026, |
| "grad_norm": 1.74624962518136, |
| "learning_rate": 1.803731647071303e-05, |
| "loss": 2.4366, |
| "step": 369 |
| }, |
| { |
| "epoch": 0.22322775263951736, |
| "grad_norm": 1.8219871819075906, |
| "learning_rate": 1.8025741491979326e-05, |
| "loss": 2.3105, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.22383107088989443, |
| "grad_norm": 2.1182021323134954, |
| "learning_rate": 1.8014136218679566e-05, |
| "loss": 2.318, |
| "step": 371 |
| }, |
| { |
| "epoch": 0.2244343891402715, |
| "grad_norm": 1.7494056659050516, |
| "learning_rate": 1.8002500694619884e-05, |
| "loss": 2.3206, |
| "step": 372 |
| }, |
| { |
| "epoch": 0.22503770739064857, |
| "grad_norm": 2.2164869617170266, |
| "learning_rate": 1.79908349637206e-05, |
| "loss": 2.4067, |
| "step": 373 |
| }, |
| { |
| "epoch": 0.22564102564102564, |
| "grad_norm": 1.9630568714796657, |
| "learning_rate": 1.7979139070016054e-05, |
| "loss": 2.3345, |
| "step": 374 |
| }, |
| { |
| "epoch": 0.22624434389140272, |
| "grad_norm": 1.994047154202555, |
| "learning_rate": 1.7967413057654452e-05, |
| "loss": 2.4362, |
| "step": 375 |
| }, |
| { |
| "epoch": 0.2268476621417798, |
| "grad_norm": 1.9394175558405136, |
| "learning_rate": 1.7955656970897673e-05, |
| "loss": 2.427, |
| "step": 376 |
| }, |
| { |
| "epoch": 0.22745098039215686, |
| "grad_norm": 2.143445611041672, |
| "learning_rate": 1.7943870854121126e-05, |
| "loss": 2.4238, |
| "step": 377 |
| }, |
| { |
| "epoch": 0.22805429864253393, |
| "grad_norm": 2.3004226826510754, |
| "learning_rate": 1.7932054751813574e-05, |
| "loss": 2.3988, |
| "step": 378 |
| }, |
| { |
| "epoch": 0.228657616892911, |
| "grad_norm": 1.7127745874017524, |
| "learning_rate": 1.7920208708576962e-05, |
| "loss": 2.3783, |
| "step": 379 |
| }, |
| { |
| "epoch": 0.22926093514328807, |
| "grad_norm": 1.8221494141382277, |
| "learning_rate": 1.7908332769126255e-05, |
| "loss": 2.4106, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.22986425339366515, |
| "grad_norm": 2.0184574556961485, |
| "learning_rate": 1.7896426978289266e-05, |
| "loss": 2.3633, |
| "step": 381 |
| }, |
| { |
| "epoch": 0.23046757164404222, |
| "grad_norm": 1.8606731026968055, |
| "learning_rate": 1.788449138100648e-05, |
| "loss": 2.4626, |
| "step": 382 |
| }, |
| { |
| "epoch": 0.23107088989441932, |
| "grad_norm": 1.882291435136788, |
| "learning_rate": 1.7872526022330902e-05, |
| "loss": 2.4771, |
| "step": 383 |
| }, |
| { |
| "epoch": 0.2316742081447964, |
| "grad_norm": 1.899729423493797, |
| "learning_rate": 1.7860530947427878e-05, |
| "loss": 2.4091, |
| "step": 384 |
| }, |
| { |
| "epoch": 0.23227752639517346, |
| "grad_norm": 1.706959359533271, |
| "learning_rate": 1.784850620157491e-05, |
| "loss": 2.3219, |
| "step": 385 |
| }, |
| { |
| "epoch": 0.23288084464555053, |
| "grad_norm": 1.9187456706408401, |
| "learning_rate": 1.7836451830161508e-05, |
| "loss": 2.3767, |
| "step": 386 |
| }, |
| { |
| "epoch": 0.2334841628959276, |
| "grad_norm": 1.9626088909394706, |
| "learning_rate": 1.782436787868901e-05, |
| "loss": 2.3429, |
| "step": 387 |
| }, |
| { |
| "epoch": 0.23408748114630468, |
| "grad_norm": 1.9942190856904858, |
| "learning_rate": 1.7812254392770404e-05, |
| "loss": 2.4339, |
| "step": 388 |
| }, |
| { |
| "epoch": 0.23469079939668175, |
| "grad_norm": 1.7936311787007881, |
| "learning_rate": 1.7800111418130157e-05, |
| "loss": 2.4009, |
| "step": 389 |
| }, |
| { |
| "epoch": 0.23529411764705882, |
| "grad_norm": 1.7321179132501958, |
| "learning_rate": 1.7787939000604063e-05, |
| "loss": 2.3799, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.2358974358974359, |
| "grad_norm": 1.862467975513408, |
| "learning_rate": 1.777573718613904e-05, |
| "loss": 2.5027, |
| "step": 391 |
| }, |
| { |
| "epoch": 0.23650075414781296, |
| "grad_norm": 1.9080097798672786, |
| "learning_rate": 1.7763506020792968e-05, |
| "loss": 2.3809, |
| "step": 392 |
| }, |
| { |
| "epoch": 0.23710407239819004, |
| "grad_norm": 1.8141940878625136, |
| "learning_rate": 1.775124555073452e-05, |
| "loss": 2.3633, |
| "step": 393 |
| }, |
| { |
| "epoch": 0.2377073906485671, |
| "grad_norm": 1.906573233645706, |
| "learning_rate": 1.773895582224299e-05, |
| "loss": 2.4482, |
| "step": 394 |
| }, |
| { |
| "epoch": 0.2383107088989442, |
| "grad_norm": 1.957367683643951, |
| "learning_rate": 1.7726636881708114e-05, |
| "loss": 2.2908, |
| "step": 395 |
| }, |
| { |
| "epoch": 0.23891402714932128, |
| "grad_norm": 1.7675271917750348, |
| "learning_rate": 1.771428877562988e-05, |
| "loss": 2.3769, |
| "step": 396 |
| }, |
| { |
| "epoch": 0.23951734539969835, |
| "grad_norm": 2.042449439254776, |
| "learning_rate": 1.7701911550618383e-05, |
| "loss": 2.3632, |
| "step": 397 |
| }, |
| { |
| "epoch": 0.24012066365007542, |
| "grad_norm": 2.3068254018217065, |
| "learning_rate": 1.768950525339362e-05, |
| "loss": 2.4363, |
| "step": 398 |
| }, |
| { |
| "epoch": 0.2407239819004525, |
| "grad_norm": 2.0732986884998224, |
| "learning_rate": 1.7677069930785338e-05, |
| "loss": 2.3961, |
| "step": 399 |
| }, |
| { |
| "epoch": 0.24132730015082957, |
| "grad_norm": 1.7517108830779222, |
| "learning_rate": 1.7664605629732832e-05, |
| "loss": 2.3616, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.24193061840120664, |
| "grad_norm": 1.76566025562354, |
| "learning_rate": 1.765211239728479e-05, |
| "loss": 2.3751, |
| "step": 401 |
| }, |
| { |
| "epoch": 0.2425339366515837, |
| "grad_norm": 1.9790147630767292, |
| "learning_rate": 1.7639590280599107e-05, |
| "loss": 2.2897, |
| "step": 402 |
| }, |
| { |
| "epoch": 0.24313725490196078, |
| "grad_norm": 1.9354803283654394, |
| "learning_rate": 1.7627039326942702e-05, |
| "loss": 2.3675, |
| "step": 403 |
| }, |
| { |
| "epoch": 0.24374057315233785, |
| "grad_norm": 2.0127286733426613, |
| "learning_rate": 1.7614459583691346e-05, |
| "loss": 2.3644, |
| "step": 404 |
| }, |
| { |
| "epoch": 0.24434389140271492, |
| "grad_norm": 2.0183875068444004, |
| "learning_rate": 1.7601851098329484e-05, |
| "loss": 2.3722, |
| "step": 405 |
| }, |
| { |
| "epoch": 0.244947209653092, |
| "grad_norm": 1.7880232703698695, |
| "learning_rate": 1.758921391845005e-05, |
| "loss": 2.3411, |
| "step": 406 |
| }, |
| { |
| "epoch": 0.24555052790346907, |
| "grad_norm": 1.9587130275464881, |
| "learning_rate": 1.757654809175429e-05, |
| "loss": 2.4508, |
| "step": 407 |
| }, |
| { |
| "epoch": 0.24615384615384617, |
| "grad_norm": 1.62012638567783, |
| "learning_rate": 1.7563853666051586e-05, |
| "loss": 2.3503, |
| "step": 408 |
| }, |
| { |
| "epoch": 0.24675716440422324, |
| "grad_norm": 1.883736203148245, |
| "learning_rate": 1.7551130689259272e-05, |
| "loss": 2.3349, |
| "step": 409 |
| }, |
| { |
| "epoch": 0.2473604826546003, |
| "grad_norm": 2.0210348824514885, |
| "learning_rate": 1.7538379209402442e-05, |
| "loss": 2.4396, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.24796380090497738, |
| "grad_norm": 1.922814654639588, |
| "learning_rate": 1.7525599274613798e-05, |
| "loss": 2.3656, |
| "step": 411 |
| }, |
| { |
| "epoch": 0.24856711915535445, |
| "grad_norm": 1.8260728794891592, |
| "learning_rate": 1.7512790933133435e-05, |
| "loss": 2.3812, |
| "step": 412 |
| }, |
| { |
| "epoch": 0.24917043740573153, |
| "grad_norm": 2.2119732212101426, |
| "learning_rate": 1.7499954233308686e-05, |
| "loss": 2.3182, |
| "step": 413 |
| }, |
| { |
| "epoch": 0.2497737556561086, |
| "grad_norm": 1.7730977493628404, |
| "learning_rate": 1.7487089223593913e-05, |
| "loss": 2.3445, |
| "step": 414 |
| }, |
| { |
| "epoch": 0.25037707390648567, |
| "grad_norm": 1.9928063308228516, |
| "learning_rate": 1.7474195952550355e-05, |
| "loss": 2.3702, |
| "step": 415 |
| }, |
| { |
| "epoch": 0.25098039215686274, |
| "grad_norm": 1.9034844033014646, |
| "learning_rate": 1.7461274468845917e-05, |
| "loss": 2.4197, |
| "step": 416 |
| }, |
| { |
| "epoch": 0.25098039215686274, |
| "eval_loss": 2.3942031860351562, |
| "eval_runtime": 21.4634, |
| "eval_samples_per_second": 4.1, |
| "eval_steps_per_second": 0.513, |
| "step": 416 |
| }, |
| { |
| "epoch": 0.2515837104072398, |
| "grad_norm": 1.8947314580332886, |
| "learning_rate": 1.7448324821255e-05, |
| "loss": 2.3497, |
| "step": 417 |
| }, |
| { |
| "epoch": 0.2521870286576169, |
| "grad_norm": 1.709832121306641, |
| "learning_rate": 1.7435347058658317e-05, |
| "loss": 2.3237, |
| "step": 418 |
| }, |
| { |
| "epoch": 0.25279034690799396, |
| "grad_norm": 1.9912783646331091, |
| "learning_rate": 1.74223412300427e-05, |
| "loss": 2.4166, |
| "step": 419 |
| }, |
| { |
| "epoch": 0.25339366515837103, |
| "grad_norm": 1.7460156364263784, |
| "learning_rate": 1.7409307384500932e-05, |
| "loss": 2.3867, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.2539969834087481, |
| "grad_norm": 1.8538623245046475, |
| "learning_rate": 1.7396245571231546e-05, |
| "loss": 2.3734, |
| "step": 421 |
| }, |
| { |
| "epoch": 0.25460030165912517, |
| "grad_norm": 1.846509710074018, |
| "learning_rate": 1.7383155839538634e-05, |
| "loss": 2.4071, |
| "step": 422 |
| }, |
| { |
| "epoch": 0.25520361990950224, |
| "grad_norm": 1.7825358596959273, |
| "learning_rate": 1.7370038238831682e-05, |
| "loss": 2.3877, |
| "step": 423 |
| }, |
| { |
| "epoch": 0.2558069381598793, |
| "grad_norm": 1.678243810735703, |
| "learning_rate": 1.7356892818625374e-05, |
| "loss": 2.3365, |
| "step": 424 |
| }, |
| { |
| "epoch": 0.2564102564102564, |
| "grad_norm": 1.748549957580436, |
| "learning_rate": 1.7343719628539396e-05, |
| "loss": 2.4936, |
| "step": 425 |
| }, |
| { |
| "epoch": 0.25701357466063346, |
| "grad_norm": 1.8214141054998045, |
| "learning_rate": 1.7330518718298263e-05, |
| "loss": 2.4665, |
| "step": 426 |
| }, |
| { |
| "epoch": 0.25761689291101053, |
| "grad_norm": 1.8466245931683942, |
| "learning_rate": 1.7317290137731122e-05, |
| "loss": 2.4653, |
| "step": 427 |
| }, |
| { |
| "epoch": 0.25822021116138766, |
| "grad_norm": 1.9016215729218684, |
| "learning_rate": 1.7304033936771557e-05, |
| "loss": 2.3672, |
| "step": 428 |
| }, |
| { |
| "epoch": 0.25882352941176473, |
| "grad_norm": 1.853880516402256, |
| "learning_rate": 1.729075016545743e-05, |
| "loss": 2.3238, |
| "step": 429 |
| }, |
| { |
| "epoch": 0.2594268476621418, |
| "grad_norm": 1.734071118881534, |
| "learning_rate": 1.7277438873930654e-05, |
| "loss": 2.431, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.2600301659125189, |
| "grad_norm": 1.9892858435111747, |
| "learning_rate": 1.726410011243703e-05, |
| "loss": 2.3572, |
| "step": 431 |
| }, |
| { |
| "epoch": 0.26063348416289595, |
| "grad_norm": 1.7704805245487907, |
| "learning_rate": 1.725073393132605e-05, |
| "loss": 2.4709, |
| "step": 432 |
| }, |
| { |
| "epoch": 0.261236802413273, |
| "grad_norm": 1.7240494225737328, |
| "learning_rate": 1.72373403810507e-05, |
| "loss": 2.4048, |
| "step": 433 |
| }, |
| { |
| "epoch": 0.2618401206636501, |
| "grad_norm": 1.781399562041848, |
| "learning_rate": 1.7223919512167292e-05, |
| "loss": 2.3077, |
| "step": 434 |
| }, |
| { |
| "epoch": 0.26244343891402716, |
| "grad_norm": 1.944568269606338, |
| "learning_rate": 1.7210471375335225e-05, |
| "loss": 2.3961, |
| "step": 435 |
| }, |
| { |
| "epoch": 0.26304675716440423, |
| "grad_norm": 1.9648161722720134, |
| "learning_rate": 1.7196996021316862e-05, |
| "loss": 2.3704, |
| "step": 436 |
| }, |
| { |
| "epoch": 0.2636500754147813, |
| "grad_norm": 1.7752870309421316, |
| "learning_rate": 1.7183493500977277e-05, |
| "loss": 2.3925, |
| "step": 437 |
| }, |
| { |
| "epoch": 0.2642533936651584, |
| "grad_norm": 1.9406708220706959, |
| "learning_rate": 1.71699638652841e-05, |
| "loss": 2.4408, |
| "step": 438 |
| }, |
| { |
| "epoch": 0.26485671191553545, |
| "grad_norm": 1.8139653351292817, |
| "learning_rate": 1.715640716530731e-05, |
| "loss": 2.2928, |
| "step": 439 |
| }, |
| { |
| "epoch": 0.2654600301659125, |
| "grad_norm": 1.82683639815915, |
| "learning_rate": 1.7142823452219036e-05, |
| "loss": 2.3599, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.2660633484162896, |
| "grad_norm": 1.7707719003945261, |
| "learning_rate": 1.7129212777293392e-05, |
| "loss": 2.4122, |
| "step": 441 |
| }, |
| { |
| "epoch": 0.26666666666666666, |
| "grad_norm": 1.7857253124157222, |
| "learning_rate": 1.7115575191906245e-05, |
| "loss": 2.3878, |
| "step": 442 |
| }, |
| { |
| "epoch": 0.26726998491704373, |
| "grad_norm": 1.9293969953243704, |
| "learning_rate": 1.7101910747535054e-05, |
| "loss": 2.4097, |
| "step": 443 |
| }, |
| { |
| "epoch": 0.2678733031674208, |
| "grad_norm": 2.066827352214268, |
| "learning_rate": 1.7088219495758652e-05, |
| "loss": 2.3335, |
| "step": 444 |
| }, |
| { |
| "epoch": 0.2684766214177979, |
| "grad_norm": 2.044784403214522, |
| "learning_rate": 1.7074501488257062e-05, |
| "loss": 2.3789, |
| "step": 445 |
| }, |
| { |
| "epoch": 0.26907993966817495, |
| "grad_norm": 1.8907127176156884, |
| "learning_rate": 1.706075677681131e-05, |
| "loss": 2.3815, |
| "step": 446 |
| }, |
| { |
| "epoch": 0.269683257918552, |
| "grad_norm": 1.852455283235919, |
| "learning_rate": 1.7046985413303215e-05, |
| "loss": 2.3542, |
| "step": 447 |
| }, |
| { |
| "epoch": 0.2702865761689291, |
| "grad_norm": 1.7754703332492303, |
| "learning_rate": 1.7033187449715195e-05, |
| "loss": 2.3499, |
| "step": 448 |
| }, |
| { |
| "epoch": 0.27088989441930617, |
| "grad_norm": 2.06022512348094, |
| "learning_rate": 1.7019362938130085e-05, |
| "loss": 2.324, |
| "step": 449 |
| }, |
| { |
| "epoch": 0.27149321266968324, |
| "grad_norm": 1.9527487591276138, |
| "learning_rate": 1.700551193073092e-05, |
| "loss": 2.4478, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.2720965309200603, |
| "grad_norm": 2.043185174930958, |
| "learning_rate": 1.699163447980075e-05, |
| "loss": 2.4106, |
| "step": 451 |
| }, |
| { |
| "epoch": 0.2726998491704374, |
| "grad_norm": 1.86188494052839, |
| "learning_rate": 1.6977730637722446e-05, |
| "loss": 2.4203, |
| "step": 452 |
| }, |
| { |
| "epoch": 0.27330316742081445, |
| "grad_norm": 1.8173005419840083, |
| "learning_rate": 1.6963800456978495e-05, |
| "loss": 2.4529, |
| "step": 453 |
| }, |
| { |
| "epoch": 0.2739064856711916, |
| "grad_norm": 1.926288288051369, |
| "learning_rate": 1.6949843990150798e-05, |
| "loss": 2.3295, |
| "step": 454 |
| }, |
| { |
| "epoch": 0.27450980392156865, |
| "grad_norm": 1.9443063715328432, |
| "learning_rate": 1.693586128992048e-05, |
| "loss": 2.3609, |
| "step": 455 |
| }, |
| { |
| "epoch": 0.2751131221719457, |
| "grad_norm": 2.328188386516258, |
| "learning_rate": 1.6921852409067698e-05, |
| "loss": 2.3975, |
| "step": 456 |
| }, |
| { |
| "epoch": 0.2757164404223228, |
| "grad_norm": 2.1483376873907245, |
| "learning_rate": 1.6907817400471422e-05, |
| "loss": 2.3472, |
| "step": 457 |
| }, |
| { |
| "epoch": 0.27631975867269987, |
| "grad_norm": 2.0212484018314205, |
| "learning_rate": 1.689375631710924e-05, |
| "loss": 2.3998, |
| "step": 458 |
| }, |
| { |
| "epoch": 0.27692307692307694, |
| "grad_norm": 1.7768919412269368, |
| "learning_rate": 1.6879669212057187e-05, |
| "loss": 2.3576, |
| "step": 459 |
| }, |
| { |
| "epoch": 0.277526395173454, |
| "grad_norm": 1.940782585527019, |
| "learning_rate": 1.6865556138489497e-05, |
| "loss": 2.4266, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.2781297134238311, |
| "grad_norm": 1.825130165036442, |
| "learning_rate": 1.6851417149678442e-05, |
| "loss": 2.4597, |
| "step": 461 |
| }, |
| { |
| "epoch": 0.27873303167420815, |
| "grad_norm": 1.8767052262788935, |
| "learning_rate": 1.6837252298994107e-05, |
| "loss": 2.4077, |
| "step": 462 |
| }, |
| { |
| "epoch": 0.2793363499245852, |
| "grad_norm": 2.093256354258747, |
| "learning_rate": 1.68230616399042e-05, |
| "loss": 2.3917, |
| "step": 463 |
| }, |
| { |
| "epoch": 0.2799396681749623, |
| "grad_norm": 1.8064129694457856, |
| "learning_rate": 1.680884522597385e-05, |
| "loss": 2.4043, |
| "step": 464 |
| }, |
| { |
| "epoch": 0.28054298642533937, |
| "grad_norm": 1.7734169912562705, |
| "learning_rate": 1.6794603110865396e-05, |
| "loss": 2.3579, |
| "step": 465 |
| }, |
| { |
| "epoch": 0.28114630467571644, |
| "grad_norm": 1.7758372258527604, |
| "learning_rate": 1.6780335348338195e-05, |
| "loss": 2.453, |
| "step": 466 |
| }, |
| { |
| "epoch": 0.2817496229260935, |
| "grad_norm": 1.7745279291014808, |
| "learning_rate": 1.6766041992248415e-05, |
| "loss": 2.3205, |
| "step": 467 |
| }, |
| { |
| "epoch": 0.2823529411764706, |
| "grad_norm": 2.0349417614230694, |
| "learning_rate": 1.6751723096548834e-05, |
| "loss": 2.3715, |
| "step": 468 |
| }, |
| { |
| "epoch": 0.28295625942684766, |
| "grad_norm": 1.8709834820124767, |
| "learning_rate": 1.6737378715288627e-05, |
| "loss": 2.3246, |
| "step": 469 |
| }, |
| { |
| "epoch": 0.28355957767722473, |
| "grad_norm": 1.754382359490896, |
| "learning_rate": 1.672300890261317e-05, |
| "loss": 2.364, |
| "step": 470 |
| }, |
| { |
| "epoch": 0.2841628959276018, |
| "grad_norm": 1.8716249102630769, |
| "learning_rate": 1.670861371276384e-05, |
| "loss": 2.3305, |
| "step": 471 |
| }, |
| { |
| "epoch": 0.28476621417797887, |
| "grad_norm": 1.8767204933617139, |
| "learning_rate": 1.6694193200077796e-05, |
| "loss": 2.4049, |
| "step": 472 |
| }, |
| { |
| "epoch": 0.28536953242835594, |
| "grad_norm": 1.8614514890660463, |
| "learning_rate": 1.667974741898779e-05, |
| "loss": 2.3556, |
| "step": 473 |
| }, |
| { |
| "epoch": 0.285972850678733, |
| "grad_norm": 1.7008268918437945, |
| "learning_rate": 1.6665276424021955e-05, |
| "loss": 2.4237, |
| "step": 474 |
| }, |
| { |
| "epoch": 0.2865761689291101, |
| "grad_norm": 1.913791072383469, |
| "learning_rate": 1.6650780269803587e-05, |
| "loss": 2.3659, |
| "step": 475 |
| }, |
| { |
| "epoch": 0.28717948717948716, |
| "grad_norm": 1.6601117504241991, |
| "learning_rate": 1.663625901105096e-05, |
| "loss": 2.4241, |
| "step": 476 |
| }, |
| { |
| "epoch": 0.28778280542986423, |
| "grad_norm": 1.837686989315102, |
| "learning_rate": 1.6621712702577116e-05, |
| "loss": 2.4188, |
| "step": 477 |
| }, |
| { |
| "epoch": 0.2883861236802413, |
| "grad_norm": 1.8421165580221608, |
| "learning_rate": 1.6607141399289628e-05, |
| "loss": 2.3939, |
| "step": 478 |
| }, |
| { |
| "epoch": 0.2889894419306184, |
| "grad_norm": 1.8192642232304175, |
| "learning_rate": 1.6592545156190437e-05, |
| "loss": 2.2918, |
| "step": 479 |
| }, |
| { |
| "epoch": 0.2895927601809955, |
| "grad_norm": 1.6952460022357256, |
| "learning_rate": 1.6577924028375622e-05, |
| "loss": 2.3662, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.2901960784313726, |
| "grad_norm": 1.9577770501552076, |
| "learning_rate": 1.6563278071035182e-05, |
| "loss": 2.3789, |
| "step": 481 |
| }, |
| { |
| "epoch": 0.29079939668174964, |
| "grad_norm": 1.9639779173712133, |
| "learning_rate": 1.6548607339452853e-05, |
| "loss": 2.4703, |
| "step": 482 |
| }, |
| { |
| "epoch": 0.2914027149321267, |
| "grad_norm": 2.0419999930141826, |
| "learning_rate": 1.6533911889005874e-05, |
| "loss": 2.4667, |
| "step": 483 |
| }, |
| { |
| "epoch": 0.2920060331825038, |
| "grad_norm": 1.8963556387959155, |
| "learning_rate": 1.6519191775164795e-05, |
| "loss": 2.2996, |
| "step": 484 |
| }, |
| { |
| "epoch": 0.29260935143288086, |
| "grad_norm": 1.934074324015012, |
| "learning_rate": 1.6504447053493264e-05, |
| "loss": 2.4242, |
| "step": 485 |
| }, |
| { |
| "epoch": 0.29321266968325793, |
| "grad_norm": 1.8545213632058337, |
| "learning_rate": 1.6489677779647813e-05, |
| "loss": 2.3063, |
| "step": 486 |
| }, |
| { |
| "epoch": 0.293815987933635, |
| "grad_norm": 3.973702407940097, |
| "learning_rate": 1.6474884009377658e-05, |
| "loss": 2.3574, |
| "step": 487 |
| }, |
| { |
| "epoch": 0.2944193061840121, |
| "grad_norm": 1.9537109220075501, |
| "learning_rate": 1.6460065798524464e-05, |
| "loss": 2.3463, |
| "step": 488 |
| }, |
| { |
| "epoch": 0.29502262443438915, |
| "grad_norm": 1.9389244940154626, |
| "learning_rate": 1.644522320302217e-05, |
| "loss": 2.3892, |
| "step": 489 |
| }, |
| { |
| "epoch": 0.2956259426847662, |
| "grad_norm": 1.927684899495583, |
| "learning_rate": 1.643035627889674e-05, |
| "loss": 2.3459, |
| "step": 490 |
| }, |
| { |
| "epoch": 0.2962292609351433, |
| "grad_norm": 1.8034588471166737, |
| "learning_rate": 1.641546508226599e-05, |
| "loss": 2.4316, |
| "step": 491 |
| }, |
| { |
| "epoch": 0.29683257918552036, |
| "grad_norm": 1.8514486122667233, |
| "learning_rate": 1.640054966933935e-05, |
| "loss": 2.3838, |
| "step": 492 |
| }, |
| { |
| "epoch": 0.29743589743589743, |
| "grad_norm": 1.827240704511863, |
| "learning_rate": 1.6385610096417654e-05, |
| "loss": 2.448, |
| "step": 493 |
| }, |
| { |
| "epoch": 0.2980392156862745, |
| "grad_norm": 1.9560055670404755, |
| "learning_rate": 1.637064641989293e-05, |
| "loss": 2.3373, |
| "step": 494 |
| }, |
| { |
| "epoch": 0.2986425339366516, |
| "grad_norm": 1.8968484574591564, |
| "learning_rate": 1.63556586962482e-05, |
| "loss": 2.4617, |
| "step": 495 |
| }, |
| { |
| "epoch": 0.29924585218702865, |
| "grad_norm": 1.8874111567645753, |
| "learning_rate": 1.634064698205725e-05, |
| "loss": 2.4268, |
| "step": 496 |
| }, |
| { |
| "epoch": 0.2998491704374057, |
| "grad_norm": 1.8988827502210233, |
| "learning_rate": 1.632561133398442e-05, |
| "loss": 2.3495, |
| "step": 497 |
| }, |
| { |
| "epoch": 0.3004524886877828, |
| "grad_norm": 1.7580087811549536, |
| "learning_rate": 1.6310551808784394e-05, |
| "loss": 2.3844, |
| "step": 498 |
| }, |
| { |
| "epoch": 0.30105580693815986, |
| "grad_norm": 1.8050290057379497, |
| "learning_rate": 1.6295468463301993e-05, |
| "loss": 2.3541, |
| "step": 499 |
| }, |
| { |
| "epoch": 0.30165912518853694, |
| "grad_norm": 1.673799931237698, |
| "learning_rate": 1.628036135447194e-05, |
| "loss": 2.3608, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.302262443438914, |
| "grad_norm": 1.8311291235848792, |
| "learning_rate": 1.6265230539318658e-05, |
| "loss": 2.3797, |
| "step": 501 |
| }, |
| { |
| "epoch": 0.3028657616892911, |
| "grad_norm": 1.9367398724614582, |
| "learning_rate": 1.6250076074956066e-05, |
| "loss": 2.4683, |
| "step": 502 |
| }, |
| { |
| "epoch": 0.30346907993966815, |
| "grad_norm": 1.7572907260857697, |
| "learning_rate": 1.6234898018587336e-05, |
| "loss": 2.3569, |
| "step": 503 |
| }, |
| { |
| "epoch": 0.3040723981900452, |
| "grad_norm": 2.1156012270415445, |
| "learning_rate": 1.6219696427504703e-05, |
| "loss": 2.3643, |
| "step": 504 |
| }, |
| { |
| "epoch": 0.3046757164404223, |
| "grad_norm": 1.6954259066781059, |
| "learning_rate": 1.6204471359089224e-05, |
| "loss": 2.4835, |
| "step": 505 |
| }, |
| { |
| "epoch": 0.3052790346907994, |
| "grad_norm": 1.880357726030102, |
| "learning_rate": 1.6189222870810596e-05, |
| "loss": 2.4242, |
| "step": 506 |
| }, |
| { |
| "epoch": 0.3058823529411765, |
| "grad_norm": 1.743051972913388, |
| "learning_rate": 1.61739510202269e-05, |
| "loss": 2.4604, |
| "step": 507 |
| }, |
| { |
| "epoch": 0.30648567119155357, |
| "grad_norm": 1.7096306081262191, |
| "learning_rate": 1.6158655864984413e-05, |
| "loss": 2.3306, |
| "step": 508 |
| }, |
| { |
| "epoch": 0.30708898944193064, |
| "grad_norm": 1.8969099344419835, |
| "learning_rate": 1.6143337462817372e-05, |
| "loss": 2.403, |
| "step": 509 |
| }, |
| { |
| "epoch": 0.3076923076923077, |
| "grad_norm": 1.8973160763468697, |
| "learning_rate": 1.612799587154777e-05, |
| "loss": 2.4471, |
| "step": 510 |
| }, |
| { |
| "epoch": 0.3082956259426848, |
| "grad_norm": 1.8915447769270686, |
| "learning_rate": 1.6112631149085128e-05, |
| "loss": 2.3624, |
| "step": 511 |
| }, |
| { |
| "epoch": 0.30889894419306185, |
| "grad_norm": 1.6567739781051283, |
| "learning_rate": 1.609724335342628e-05, |
| "loss": 2.3836, |
| "step": 512 |
| }, |
| { |
| "epoch": 0.3095022624434389, |
| "grad_norm": 2.0548142027771186, |
| "learning_rate": 1.6081832542655154e-05, |
| "loss": 2.4051, |
| "step": 513 |
| }, |
| { |
| "epoch": 0.310105580693816, |
| "grad_norm": 2.0154305540780637, |
| "learning_rate": 1.6066398774942556e-05, |
| "loss": 2.3679, |
| "step": 514 |
| }, |
| { |
| "epoch": 0.31070889894419307, |
| "grad_norm": 1.6683133670779435, |
| "learning_rate": 1.6050942108545938e-05, |
| "loss": 2.3651, |
| "step": 515 |
| }, |
| { |
| "epoch": 0.31131221719457014, |
| "grad_norm": 1.9261114209064358, |
| "learning_rate": 1.6035462601809193e-05, |
| "loss": 2.3811, |
| "step": 516 |
| }, |
| { |
| "epoch": 0.3119155354449472, |
| "grad_norm": 1.9055368841091447, |
| "learning_rate": 1.6019960313162436e-05, |
| "loss": 2.3471, |
| "step": 517 |
| }, |
| { |
| "epoch": 0.3125188536953243, |
| "grad_norm": 1.7619898676546586, |
| "learning_rate": 1.6004435301121762e-05, |
| "loss": 2.3532, |
| "step": 518 |
| }, |
| { |
| "epoch": 0.31312217194570136, |
| "grad_norm": 1.7379302950765867, |
| "learning_rate": 1.5988887624289045e-05, |
| "loss": 2.4402, |
| "step": 519 |
| }, |
| { |
| "epoch": 0.3137254901960784, |
| "grad_norm": 1.7861658019735307, |
| "learning_rate": 1.5973317341351725e-05, |
| "loss": 2.3356, |
| "step": 520 |
| }, |
| { |
| "epoch": 0.3143288084464555, |
| "grad_norm": 1.7406388213681667, |
| "learning_rate": 1.595772451108254e-05, |
| "loss": 2.3595, |
| "step": 521 |
| }, |
| { |
| "epoch": 0.31493212669683257, |
| "grad_norm": 1.8740480929030214, |
| "learning_rate": 1.5942109192339375e-05, |
| "loss": 2.4009, |
| "step": 522 |
| }, |
| { |
| "epoch": 0.31553544494720964, |
| "grad_norm": 1.6669644613043895, |
| "learning_rate": 1.592647144406498e-05, |
| "loss": 2.383, |
| "step": 523 |
| }, |
| { |
| "epoch": 0.3161387631975867, |
| "grad_norm": 1.7563204578628122, |
| "learning_rate": 1.5910811325286768e-05, |
| "loss": 2.3741, |
| "step": 524 |
| }, |
| { |
| "epoch": 0.3167420814479638, |
| "grad_norm": 1.7027904743606184, |
| "learning_rate": 1.58951288951166e-05, |
| "loss": 2.3938, |
| "step": 525 |
| }, |
| { |
| "epoch": 0.31734539969834086, |
| "grad_norm": 1.7906099932440336, |
| "learning_rate": 1.5879424212750554e-05, |
| "loss": 2.3705, |
| "step": 526 |
| }, |
| { |
| "epoch": 0.31794871794871793, |
| "grad_norm": 1.7898710643832698, |
| "learning_rate": 1.5863697337468704e-05, |
| "loss": 2.3321, |
| "step": 527 |
| }, |
| { |
| "epoch": 0.318552036199095, |
| "grad_norm": 3.435785300962325, |
| "learning_rate": 1.5847948328634895e-05, |
| "loss": 2.3694, |
| "step": 528 |
| }, |
| { |
| "epoch": 0.3191553544494721, |
| "grad_norm": 1.858360689105569, |
| "learning_rate": 1.583217724569651e-05, |
| "loss": 2.4358, |
| "step": 529 |
| }, |
| { |
| "epoch": 0.31975867269984914, |
| "grad_norm": 1.7287161321640319, |
| "learning_rate": 1.5816384148184273e-05, |
| "loss": 2.313, |
| "step": 530 |
| }, |
| { |
| "epoch": 0.32036199095022627, |
| "grad_norm": 1.970600858577929, |
| "learning_rate": 1.5800569095711983e-05, |
| "loss": 2.35, |
| "step": 531 |
| }, |
| { |
| "epoch": 0.32096530920060334, |
| "grad_norm": 1.9064390487389395, |
| "learning_rate": 1.5784732147976333e-05, |
| "loss": 2.4124, |
| "step": 532 |
| }, |
| { |
| "epoch": 0.3215686274509804, |
| "grad_norm": 1.7527577771779475, |
| "learning_rate": 1.5768873364756653e-05, |
| "loss": 2.3539, |
| "step": 533 |
| }, |
| { |
| "epoch": 0.3221719457013575, |
| "grad_norm": 2.0432674010861214, |
| "learning_rate": 1.575299280591469e-05, |
| "loss": 2.3665, |
| "step": 534 |
| }, |
| { |
| "epoch": 0.32277526395173456, |
| "grad_norm": 1.7149420503457244, |
| "learning_rate": 1.57370905313944e-05, |
| "loss": 2.3832, |
| "step": 535 |
| }, |
| { |
| "epoch": 0.32337858220211163, |
| "grad_norm": 1.6986453672998818, |
| "learning_rate": 1.5721166601221697e-05, |
| "loss": 2.3081, |
| "step": 536 |
| }, |
| { |
| "epoch": 0.3239819004524887, |
| "grad_norm": 1.6812118408175234, |
| "learning_rate": 1.5705221075504247e-05, |
| "loss": 2.4513, |
| "step": 537 |
| }, |
| { |
| "epoch": 0.3245852187028658, |
| "grad_norm": 1.910283619404697, |
| "learning_rate": 1.5689254014431225e-05, |
| "loss": 2.3518, |
| "step": 538 |
| }, |
| { |
| "epoch": 0.32518853695324285, |
| "grad_norm": 1.8424382759411888, |
| "learning_rate": 1.56732654782731e-05, |
| "loss": 2.2766, |
| "step": 539 |
| }, |
| { |
| "epoch": 0.3257918552036199, |
| "grad_norm": 1.8701280171641446, |
| "learning_rate": 1.5657255527381395e-05, |
| "loss": 2.3568, |
| "step": 540 |
| }, |
| { |
| "epoch": 0.326395173453997, |
| "grad_norm": 1.7703149577050399, |
| "learning_rate": 1.5641224222188476e-05, |
| "loss": 2.4348, |
| "step": 541 |
| }, |
| { |
| "epoch": 0.32699849170437406, |
| "grad_norm": 1.699314651054947, |
| "learning_rate": 1.562517162320731e-05, |
| "loss": 2.4006, |
| "step": 542 |
| }, |
| { |
| "epoch": 0.32760180995475113, |
| "grad_norm": 1.992395374798314, |
| "learning_rate": 1.5609097791031243e-05, |
| "loss": 2.4042, |
| "step": 543 |
| }, |
| { |
| "epoch": 0.3282051282051282, |
| "grad_norm": 1.8182004021843592, |
| "learning_rate": 1.559300278633377e-05, |
| "loss": 2.4339, |
| "step": 544 |
| }, |
| { |
| "epoch": 0.3288084464555053, |
| "grad_norm": 1.9273546760508904, |
| "learning_rate": 1.5576886669868297e-05, |
| "loss": 2.3665, |
| "step": 545 |
| }, |
| { |
| "epoch": 0.32941176470588235, |
| "grad_norm": 1.9311811042716147, |
| "learning_rate": 1.556074950246793e-05, |
| "loss": 2.387, |
| "step": 546 |
| }, |
| { |
| "epoch": 0.3300150829562594, |
| "grad_norm": 2.3722660221407588, |
| "learning_rate": 1.554459134504523e-05, |
| "loss": 2.3884, |
| "step": 547 |
| }, |
| { |
| "epoch": 0.3306184012066365, |
| "grad_norm": 1.797532989222507, |
| "learning_rate": 1.5528412258591994e-05, |
| "loss": 2.3019, |
| "step": 548 |
| }, |
| { |
| "epoch": 0.33122171945701356, |
| "grad_norm": 1.7199496012123132, |
| "learning_rate": 1.5512212304179015e-05, |
| "loss": 2.4037, |
| "step": 549 |
| }, |
| { |
| "epoch": 0.33182503770739064, |
| "grad_norm": 1.7484659894079446, |
| "learning_rate": 1.5495991542955855e-05, |
| "loss": 2.4882, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.3324283559577677, |
| "grad_norm": 11.886778043932564, |
| "learning_rate": 1.5479750036150614e-05, |
| "loss": 2.3168, |
| "step": 551 |
| }, |
| { |
| "epoch": 0.3330316742081448, |
| "grad_norm": 2.0257322511655422, |
| "learning_rate": 1.5463487845069708e-05, |
| "loss": 2.4386, |
| "step": 552 |
| }, |
| { |
| "epoch": 0.33363499245852185, |
| "grad_norm": 2.104527806500846, |
| "learning_rate": 1.544720503109762e-05, |
| "loss": 2.3966, |
| "step": 553 |
| }, |
| { |
| "epoch": 0.3342383107088989, |
| "grad_norm": 1.8881704290067898, |
| "learning_rate": 1.5430901655696683e-05, |
| "loss": 2.4835, |
| "step": 554 |
| }, |
| { |
| "epoch": 0.334841628959276, |
| "grad_norm": 1.7313204083862717, |
| "learning_rate": 1.541457778040684e-05, |
| "loss": 2.3059, |
| "step": 555 |
| }, |
| { |
| "epoch": 0.33544494720965307, |
| "grad_norm": 1.7885236223630756, |
| "learning_rate": 1.539823346684542e-05, |
| "loss": 2.3921, |
| "step": 556 |
| }, |
| { |
| "epoch": 0.3360482654600302, |
| "grad_norm": 1.7461855308005563, |
| "learning_rate": 1.5381868776706883e-05, |
| "loss": 2.3616, |
| "step": 557 |
| }, |
| { |
| "epoch": 0.33665158371040727, |
| "grad_norm": 1.9361450007910532, |
| "learning_rate": 1.536548377176263e-05, |
| "loss": 2.3722, |
| "step": 558 |
| }, |
| { |
| "epoch": 0.33725490196078434, |
| "grad_norm": 1.9336429106855157, |
| "learning_rate": 1.5349078513860728e-05, |
| "loss": 2.4162, |
| "step": 559 |
| }, |
| { |
| "epoch": 0.3378582202111614, |
| "grad_norm": 1.761755498515305, |
| "learning_rate": 1.5332653064925683e-05, |
| "loss": 2.4606, |
| "step": 560 |
| }, |
| { |
| "epoch": 0.3384615384615385, |
| "grad_norm": 1.7505505528301577, |
| "learning_rate": 1.5316207486958242e-05, |
| "loss": 2.4586, |
| "step": 561 |
| }, |
| { |
| "epoch": 0.33906485671191555, |
| "grad_norm": 1.779820283923665, |
| "learning_rate": 1.5299741842035108e-05, |
| "loss": 2.3328, |
| "step": 562 |
| }, |
| { |
| "epoch": 0.3396681749622926, |
| "grad_norm": 1.8162808883880843, |
| "learning_rate": 1.5283256192308744e-05, |
| "loss": 2.3907, |
| "step": 563 |
| }, |
| { |
| "epoch": 0.3402714932126697, |
| "grad_norm": 1.8002288685975787, |
| "learning_rate": 1.5266750600007122e-05, |
| "loss": 2.3733, |
| "step": 564 |
| }, |
| { |
| "epoch": 0.34087481146304677, |
| "grad_norm": 1.7074592980093009, |
| "learning_rate": 1.5250225127433485e-05, |
| "loss": 2.3787, |
| "step": 565 |
| }, |
| { |
| "epoch": 0.34147812971342384, |
| "grad_norm": 1.727791102751961, |
| "learning_rate": 1.5233679836966122e-05, |
| "loss": 2.4609, |
| "step": 566 |
| }, |
| { |
| "epoch": 0.3420814479638009, |
| "grad_norm": 1.7409151277373238, |
| "learning_rate": 1.5217114791058129e-05, |
| "loss": 2.3501, |
| "step": 567 |
| }, |
| { |
| "epoch": 0.342684766214178, |
| "grad_norm": 1.8502483538705539, |
| "learning_rate": 1.5200530052237174e-05, |
| "loss": 2.4225, |
| "step": 568 |
| }, |
| { |
| "epoch": 0.34328808446455505, |
| "grad_norm": 1.9323986008652658, |
| "learning_rate": 1.5183925683105254e-05, |
| "loss": 2.4579, |
| "step": 569 |
| }, |
| { |
| "epoch": 0.3438914027149321, |
| "grad_norm": 1.6959428286232177, |
| "learning_rate": 1.5167301746338466e-05, |
| "loss": 2.3903, |
| "step": 570 |
| }, |
| { |
| "epoch": 0.3444947209653092, |
| "grad_norm": 1.785549938873077, |
| "learning_rate": 1.5150658304686766e-05, |
| "loss": 2.3733, |
| "step": 571 |
| }, |
| { |
| "epoch": 0.34509803921568627, |
| "grad_norm": 1.8119824004389333, |
| "learning_rate": 1.5133995420973746e-05, |
| "loss": 2.3818, |
| "step": 572 |
| }, |
| { |
| "epoch": 0.34570135746606334, |
| "grad_norm": 1.8260773713603422, |
| "learning_rate": 1.5117313158096371e-05, |
| "loss": 2.3361, |
| "step": 573 |
| }, |
| { |
| "epoch": 0.3463046757164404, |
| "grad_norm": 2.06836980707018, |
| "learning_rate": 1.510061157902477e-05, |
| "loss": 2.4415, |
| "step": 574 |
| }, |
| { |
| "epoch": 0.3469079939668175, |
| "grad_norm": 2.086356686499412, |
| "learning_rate": 1.5083890746801962e-05, |
| "loss": 2.3938, |
| "step": 575 |
| }, |
| { |
| "epoch": 0.34751131221719456, |
| "grad_norm": 1.8906423761627238, |
| "learning_rate": 1.5067150724543669e-05, |
| "loss": 2.3683, |
| "step": 576 |
| }, |
| { |
| "epoch": 0.34811463046757163, |
| "grad_norm": 1.871834489314333, |
| "learning_rate": 1.5050391575438026e-05, |
| "loss": 2.3848, |
| "step": 577 |
| }, |
| { |
| "epoch": 0.3487179487179487, |
| "grad_norm": 1.8101661578969148, |
| "learning_rate": 1.503361336274538e-05, |
| "loss": 2.4243, |
| "step": 578 |
| }, |
| { |
| "epoch": 0.34932126696832577, |
| "grad_norm": 1.7689845298062439, |
| "learning_rate": 1.5016816149798033e-05, |
| "loss": 2.3667, |
| "step": 579 |
| }, |
| { |
| "epoch": 0.34992458521870284, |
| "grad_norm": 1.7995447114490488, |
| "learning_rate": 1.5000000000000002e-05, |
| "loss": 2.2974, |
| "step": 580 |
| }, |
| { |
| "epoch": 0.3505279034690799, |
| "grad_norm": 2.160922337798451, |
| "learning_rate": 1.4983164976826788e-05, |
| "loss": 2.3526, |
| "step": 581 |
| }, |
| { |
| "epoch": 0.351131221719457, |
| "grad_norm": 2.0689488964902902, |
| "learning_rate": 1.4966311143825132e-05, |
| "loss": 2.4311, |
| "step": 582 |
| }, |
| { |
| "epoch": 0.3517345399698341, |
| "grad_norm": 1.7389091824888516, |
| "learning_rate": 1.4949438564612778e-05, |
| "loss": 2.4222, |
| "step": 583 |
| }, |
| { |
| "epoch": 0.3523378582202112, |
| "grad_norm": 1.8173600434790154, |
| "learning_rate": 1.4932547302878228e-05, |
| "loss": 2.3584, |
| "step": 584 |
| }, |
| { |
| "epoch": 0.35294117647058826, |
| "grad_norm": 1.8292488414493129, |
| "learning_rate": 1.491563742238051e-05, |
| "loss": 2.3919, |
| "step": 585 |
| }, |
| { |
| "epoch": 0.35354449472096533, |
| "grad_norm": 1.6564271315606274, |
| "learning_rate": 1.4898708986948925e-05, |
| "loss": 2.4269, |
| "step": 586 |
| }, |
| { |
| "epoch": 0.3541478129713424, |
| "grad_norm": 1.8221323736787778, |
| "learning_rate": 1.4881762060482814e-05, |
| "loss": 2.291, |
| "step": 587 |
| }, |
| { |
| "epoch": 0.3547511312217195, |
| "grad_norm": 1.7436753903680235, |
| "learning_rate": 1.486479670695132e-05, |
| "loss": 2.3669, |
| "step": 588 |
| }, |
| { |
| "epoch": 0.35535444947209655, |
| "grad_norm": 1.7523543866066271, |
| "learning_rate": 1.4847812990393138e-05, |
| "loss": 2.4397, |
| "step": 589 |
| }, |
| { |
| "epoch": 0.3559577677224736, |
| "grad_norm": 1.8682538834622033, |
| "learning_rate": 1.483081097491628e-05, |
| "loss": 2.409, |
| "step": 590 |
| }, |
| { |
| "epoch": 0.3565610859728507, |
| "grad_norm": 1.745032073795883, |
| "learning_rate": 1.4813790724697832e-05, |
| "loss": 2.3007, |
| "step": 591 |
| }, |
| { |
| "epoch": 0.35716440422322776, |
| "grad_norm": 1.7186408434763945, |
| "learning_rate": 1.47967523039837e-05, |
| "loss": 2.4034, |
| "step": 592 |
| }, |
| { |
| "epoch": 0.35776772247360483, |
| "grad_norm": 1.7483375800449275, |
| "learning_rate": 1.4779695777088392e-05, |
| "loss": 2.3844, |
| "step": 593 |
| }, |
| { |
| "epoch": 0.3583710407239819, |
| "grad_norm": 1.7819426598449446, |
| "learning_rate": 1.476262120839475e-05, |
| "loss": 2.4795, |
| "step": 594 |
| }, |
| { |
| "epoch": 0.358974358974359, |
| "grad_norm": 1.7139242136095565, |
| "learning_rate": 1.4745528662353728e-05, |
| "loss": 2.3507, |
| "step": 595 |
| }, |
| { |
| "epoch": 0.35957767722473605, |
| "grad_norm": 1.8436276797329483, |
| "learning_rate": 1.4728418203484125e-05, |
| "loss": 2.4585, |
| "step": 596 |
| }, |
| { |
| "epoch": 0.3601809954751131, |
| "grad_norm": 1.8598700014175478, |
| "learning_rate": 1.471128989637237e-05, |
| "loss": 2.4102, |
| "step": 597 |
| }, |
| { |
| "epoch": 0.3607843137254902, |
| "grad_norm": 1.6533400176564714, |
| "learning_rate": 1.4694143805672254e-05, |
| "loss": 2.3843, |
| "step": 598 |
| }, |
| { |
| "epoch": 0.36138763197586726, |
| "grad_norm": 2.1098729029957215, |
| "learning_rate": 1.4676979996104694e-05, |
| "loss": 2.3878, |
| "step": 599 |
| }, |
| { |
| "epoch": 0.36199095022624433, |
| "grad_norm": 2.4340141500482684, |
| "learning_rate": 1.4659798532457497e-05, |
| "loss": 2.4534, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.3625942684766214, |
| "grad_norm": 1.9471998072388452, |
| "learning_rate": 1.4642599479585106e-05, |
| "loss": 2.3896, |
| "step": 601 |
| }, |
| { |
| "epoch": 0.3631975867269985, |
| "grad_norm": 2.0019981913767744, |
| "learning_rate": 1.4625382902408356e-05, |
| "loss": 2.33, |
| "step": 602 |
| }, |
| { |
| "epoch": 0.36380090497737555, |
| "grad_norm": 1.6948730617309857, |
| "learning_rate": 1.4608148865914226e-05, |
| "loss": 2.3983, |
| "step": 603 |
| }, |
| { |
| "epoch": 0.3644042232277526, |
| "grad_norm": 1.8123228593449345, |
| "learning_rate": 1.4590897435155609e-05, |
| "loss": 2.3558, |
| "step": 604 |
| }, |
| { |
| "epoch": 0.3650075414781297, |
| "grad_norm": 1.8752512293969732, |
| "learning_rate": 1.4573628675251051e-05, |
| "loss": 2.4097, |
| "step": 605 |
| }, |
| { |
| "epoch": 0.36561085972850677, |
| "grad_norm": 1.7681963283069855, |
| "learning_rate": 1.4556342651384503e-05, |
| "loss": 2.4081, |
| "step": 606 |
| }, |
| { |
| "epoch": 0.36621417797888384, |
| "grad_norm": 1.742690929021266, |
| "learning_rate": 1.453903942880509e-05, |
| "loss": 2.3479, |
| "step": 607 |
| }, |
| { |
| "epoch": 0.3668174962292609, |
| "grad_norm": 1.99882414897086, |
| "learning_rate": 1.4521719072826858e-05, |
| "loss": 2.3713, |
| "step": 608 |
| }, |
| { |
| "epoch": 0.36742081447963804, |
| "grad_norm": 1.9029689038645006, |
| "learning_rate": 1.4504381648828518e-05, |
| "loss": 2.4505, |
| "step": 609 |
| }, |
| { |
| "epoch": 0.3680241327300151, |
| "grad_norm": 2.0476458036895218, |
| "learning_rate": 1.4487027222253216e-05, |
| "loss": 2.4079, |
| "step": 610 |
| }, |
| { |
| "epoch": 0.3686274509803922, |
| "grad_norm": 1.776067769797596, |
| "learning_rate": 1.4469655858608267e-05, |
| "loss": 2.3776, |
| "step": 611 |
| }, |
| { |
| "epoch": 0.36923076923076925, |
| "grad_norm": 1.9672318792751764, |
| "learning_rate": 1.445226762346493e-05, |
| "loss": 2.3368, |
| "step": 612 |
| }, |
| { |
| "epoch": 0.3698340874811463, |
| "grad_norm": 1.855562519423061, |
| "learning_rate": 1.4434862582458136e-05, |
| "loss": 2.3117, |
| "step": 613 |
| }, |
| { |
| "epoch": 0.3704374057315234, |
| "grad_norm": 1.8894861586454959, |
| "learning_rate": 1.4417440801286263e-05, |
| "loss": 2.3204, |
| "step": 614 |
| }, |
| { |
| "epoch": 0.37104072398190047, |
| "grad_norm": 1.6694812676346151, |
| "learning_rate": 1.4400002345710871e-05, |
| "loss": 2.3575, |
| "step": 615 |
| }, |
| { |
| "epoch": 0.37164404223227754, |
| "grad_norm": 1.7479003447288977, |
| "learning_rate": 1.4382547281556464e-05, |
| "loss": 2.3689, |
| "step": 616 |
| }, |
| { |
| "epoch": 0.3722473604826546, |
| "grad_norm": 2.3948984633240036, |
| "learning_rate": 1.4365075674710238e-05, |
| "loss": 2.3109, |
| "step": 617 |
| }, |
| { |
| "epoch": 0.3728506787330317, |
| "grad_norm": 1.887014513355491, |
| "learning_rate": 1.434758759112183e-05, |
| "loss": 2.4637, |
| "step": 618 |
| }, |
| { |
| "epoch": 0.37345399698340875, |
| "grad_norm": 1.7882548264987235, |
| "learning_rate": 1.4330083096803073e-05, |
| "loss": 2.3452, |
| "step": 619 |
| }, |
| { |
| "epoch": 0.3740573152337858, |
| "grad_norm": 1.848141313490385, |
| "learning_rate": 1.4312562257827742e-05, |
| "loss": 2.4144, |
| "step": 620 |
| }, |
| { |
| "epoch": 0.3746606334841629, |
| "grad_norm": 1.9944819383375783, |
| "learning_rate": 1.4295025140331317e-05, |
| "loss": 2.3772, |
| "step": 621 |
| }, |
| { |
| "epoch": 0.37526395173453997, |
| "grad_norm": 1.8430443308071278, |
| "learning_rate": 1.427747181051071e-05, |
| "loss": 2.3471, |
| "step": 622 |
| }, |
| { |
| "epoch": 0.37586726998491704, |
| "grad_norm": 1.7064478636418337, |
| "learning_rate": 1.4259902334624043e-05, |
| "loss": 2.3765, |
| "step": 623 |
| }, |
| { |
| "epoch": 0.3764705882352941, |
| "grad_norm": 1.8467921519847166, |
| "learning_rate": 1.4242316778990373e-05, |
| "loss": 2.4057, |
| "step": 624 |
| }, |
| { |
| "epoch": 0.3764705882352941, |
| "eval_loss": 2.3918943405151367, |
| "eval_runtime": 21.7913, |
| "eval_samples_per_second": 4.038, |
| "eval_steps_per_second": 0.505, |
| "step": 624 |
| }, |
| { |
| "epoch": 0.3770739064856712, |
| "grad_norm": 2.0029681453109047, |
| "learning_rate": 1.4224715209989463e-05, |
| "loss": 2.3662, |
| "step": 625 |
| }, |
| { |
| "epoch": 0.37767722473604826, |
| "grad_norm": 1.7665174192031903, |
| "learning_rate": 1.4207097694061514e-05, |
| "loss": 2.3706, |
| "step": 626 |
| }, |
| { |
| "epoch": 0.3782805429864253, |
| "grad_norm": 1.7876219359582222, |
| "learning_rate": 1.418946429770692e-05, |
| "loss": 2.3525, |
| "step": 627 |
| }, |
| { |
| "epoch": 0.3788838612368024, |
| "grad_norm": 1.7700044933464636, |
| "learning_rate": 1.4171815087486026e-05, |
| "loss": 2.4189, |
| "step": 628 |
| }, |
| { |
| "epoch": 0.37948717948717947, |
| "grad_norm": 1.86529649557736, |
| "learning_rate": 1.4154150130018867e-05, |
| "loss": 2.3815, |
| "step": 629 |
| }, |
| { |
| "epoch": 0.38009049773755654, |
| "grad_norm": 2.006317051920272, |
| "learning_rate": 1.4136469491984913e-05, |
| "loss": 2.4124, |
| "step": 630 |
| }, |
| { |
| "epoch": 0.3806938159879336, |
| "grad_norm": 1.772114382724889, |
| "learning_rate": 1.4118773240122825e-05, |
| "loss": 2.3476, |
| "step": 631 |
| }, |
| { |
| "epoch": 0.3812971342383107, |
| "grad_norm": 1.7056526828735088, |
| "learning_rate": 1.4101061441230209e-05, |
| "loss": 2.4368, |
| "step": 632 |
| }, |
| { |
| "epoch": 0.38190045248868776, |
| "grad_norm": 1.8707670004402084, |
| "learning_rate": 1.4083334162163347e-05, |
| "loss": 2.4275, |
| "step": 633 |
| }, |
| { |
| "epoch": 0.38250377073906483, |
| "grad_norm": 1.7861302749515335, |
| "learning_rate": 1.4065591469836958e-05, |
| "loss": 2.3732, |
| "step": 634 |
| }, |
| { |
| "epoch": 0.38310708898944196, |
| "grad_norm": 1.818683066931359, |
| "learning_rate": 1.4047833431223938e-05, |
| "loss": 2.3422, |
| "step": 635 |
| }, |
| { |
| "epoch": 0.38371040723981903, |
| "grad_norm": 1.6885761613630272, |
| "learning_rate": 1.4030060113355118e-05, |
| "loss": 2.3495, |
| "step": 636 |
| }, |
| { |
| "epoch": 0.3843137254901961, |
| "grad_norm": 7.180336365886139, |
| "learning_rate": 1.4012271583318989e-05, |
| "loss": 2.353, |
| "step": 637 |
| }, |
| { |
| "epoch": 0.3849170437405732, |
| "grad_norm": 1.7340067613190102, |
| "learning_rate": 1.3994467908261474e-05, |
| "loss": 2.2981, |
| "step": 638 |
| }, |
| { |
| "epoch": 0.38552036199095024, |
| "grad_norm": 1.7030954630468456, |
| "learning_rate": 1.397664915538566e-05, |
| "loss": 2.3274, |
| "step": 639 |
| }, |
| { |
| "epoch": 0.3861236802413273, |
| "grad_norm": 1.800533489062573, |
| "learning_rate": 1.3958815391951552e-05, |
| "loss": 2.5047, |
| "step": 640 |
| }, |
| { |
| "epoch": 0.3867269984917044, |
| "grad_norm": 1.7827540232861019, |
| "learning_rate": 1.3940966685275812e-05, |
| "loss": 2.2918, |
| "step": 641 |
| }, |
| { |
| "epoch": 0.38733031674208146, |
| "grad_norm": 1.7797398302982137, |
| "learning_rate": 1.3923103102731504e-05, |
| "loss": 2.38, |
| "step": 642 |
| }, |
| { |
| "epoch": 0.38793363499245853, |
| "grad_norm": 1.6684389473922723, |
| "learning_rate": 1.3905224711747844e-05, |
| "loss": 2.3335, |
| "step": 643 |
| }, |
| { |
| "epoch": 0.3885369532428356, |
| "grad_norm": 1.8443665423108562, |
| "learning_rate": 1.3887331579809958e-05, |
| "loss": 2.3327, |
| "step": 644 |
| }, |
| { |
| "epoch": 0.3891402714932127, |
| "grad_norm": 1.9000405287193112, |
| "learning_rate": 1.3869423774458594e-05, |
| "loss": 2.4339, |
| "step": 645 |
| }, |
| { |
| "epoch": 0.38974358974358975, |
| "grad_norm": 1.7335394014606145, |
| "learning_rate": 1.3851501363289907e-05, |
| "loss": 2.3429, |
| "step": 646 |
| }, |
| { |
| "epoch": 0.3903469079939668, |
| "grad_norm": 1.939117624820807, |
| "learning_rate": 1.3833564413955171e-05, |
| "loss": 2.4046, |
| "step": 647 |
| }, |
| { |
| "epoch": 0.3909502262443439, |
| "grad_norm": 1.8243037308209973, |
| "learning_rate": 1.3815612994160544e-05, |
| "loss": 2.4171, |
| "step": 648 |
| }, |
| { |
| "epoch": 0.39155354449472096, |
| "grad_norm": 1.6139052960785931, |
| "learning_rate": 1.3797647171666792e-05, |
| "loss": 2.3557, |
| "step": 649 |
| }, |
| { |
| "epoch": 0.39215686274509803, |
| "grad_norm": 1.8121247464252912, |
| "learning_rate": 1.3779667014289067e-05, |
| "loss": 2.3947, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.3927601809954751, |
| "grad_norm": 1.7575043092638487, |
| "learning_rate": 1.3761672589896615e-05, |
| "loss": 2.4148, |
| "step": 651 |
| }, |
| { |
| "epoch": 0.3933634992458522, |
| "grad_norm": 1.8471659624255694, |
| "learning_rate": 1.3743663966412547e-05, |
| "loss": 2.3923, |
| "step": 652 |
| }, |
| { |
| "epoch": 0.39396681749622925, |
| "grad_norm": 1.8481281201046837, |
| "learning_rate": 1.3725641211813557e-05, |
| "loss": 2.4083, |
| "step": 653 |
| }, |
| { |
| "epoch": 0.3945701357466063, |
| "grad_norm": 1.783889419462132, |
| "learning_rate": 1.3707604394129687e-05, |
| "loss": 2.3017, |
| "step": 654 |
| }, |
| { |
| "epoch": 0.3951734539969834, |
| "grad_norm": 1.8916835677064565, |
| "learning_rate": 1.3689553581444069e-05, |
| "loss": 2.35, |
| "step": 655 |
| }, |
| { |
| "epoch": 0.39577677224736046, |
| "grad_norm": 1.8370602445589161, |
| "learning_rate": 1.3671488841892648e-05, |
| "loss": 2.3432, |
| "step": 656 |
| }, |
| { |
| "epoch": 0.39638009049773754, |
| "grad_norm": 1.8327493008799256, |
| "learning_rate": 1.3653410243663953e-05, |
| "loss": 2.3983, |
| "step": 657 |
| }, |
| { |
| "epoch": 0.3969834087481146, |
| "grad_norm": 1.7620328988966256, |
| "learning_rate": 1.3635317854998809e-05, |
| "loss": 2.3972, |
| "step": 658 |
| }, |
| { |
| "epoch": 0.3975867269984917, |
| "grad_norm": 1.8446532445383867, |
| "learning_rate": 1.361721174419011e-05, |
| "loss": 2.4332, |
| "step": 659 |
| }, |
| { |
| "epoch": 0.39819004524886875, |
| "grad_norm": 1.7192647983726854, |
| "learning_rate": 1.3599091979582537e-05, |
| "loss": 2.4035, |
| "step": 660 |
| }, |
| { |
| "epoch": 0.3987933634992459, |
| "grad_norm": 1.8612813664426646, |
| "learning_rate": 1.3580958629572316e-05, |
| "loss": 2.3399, |
| "step": 661 |
| }, |
| { |
| "epoch": 0.39939668174962295, |
| "grad_norm": 1.6726700821557248, |
| "learning_rate": 1.356281176260695e-05, |
| "loss": 2.3465, |
| "step": 662 |
| }, |
| { |
| "epoch": 0.4, |
| "grad_norm": 1.7445257737899353, |
| "learning_rate": 1.3544651447184961e-05, |
| "loss": 2.3113, |
| "step": 663 |
| }, |
| { |
| "epoch": 0.4006033182503771, |
| "grad_norm": 1.8653411246265055, |
| "learning_rate": 1.3526477751855645e-05, |
| "loss": 2.4037, |
| "step": 664 |
| }, |
| { |
| "epoch": 0.40120663650075417, |
| "grad_norm": 1.8347872489948513, |
| "learning_rate": 1.3508290745218789e-05, |
| "loss": 2.318, |
| "step": 665 |
| }, |
| { |
| "epoch": 0.40180995475113124, |
| "grad_norm": 1.8918225555087078, |
| "learning_rate": 1.3490090495924437e-05, |
| "loss": 2.3722, |
| "step": 666 |
| }, |
| { |
| "epoch": 0.4024132730015083, |
| "grad_norm": 2.010943861471511, |
| "learning_rate": 1.3471877072672617e-05, |
| "loss": 2.3639, |
| "step": 667 |
| }, |
| { |
| "epoch": 0.4030165912518854, |
| "grad_norm": 1.8980686640668742, |
| "learning_rate": 1.3453650544213078e-05, |
| "loss": 2.3517, |
| "step": 668 |
| }, |
| { |
| "epoch": 0.40361990950226245, |
| "grad_norm": 1.840946209795983, |
| "learning_rate": 1.3435410979345048e-05, |
| "loss": 2.4486, |
| "step": 669 |
| }, |
| { |
| "epoch": 0.4042232277526395, |
| "grad_norm": 1.977220556338354, |
| "learning_rate": 1.341715844691695e-05, |
| "loss": 2.4036, |
| "step": 670 |
| }, |
| { |
| "epoch": 0.4048265460030166, |
| "grad_norm": 1.835226836663754, |
| "learning_rate": 1.3398893015826166e-05, |
| "loss": 2.3811, |
| "step": 671 |
| }, |
| { |
| "epoch": 0.40542986425339367, |
| "grad_norm": 1.8478422310537772, |
| "learning_rate": 1.338061475501877e-05, |
| "loss": 2.4005, |
| "step": 672 |
| }, |
| { |
| "epoch": 0.40603318250377074, |
| "grad_norm": 1.7154610218519464, |
| "learning_rate": 1.3362323733489247e-05, |
| "loss": 2.3651, |
| "step": 673 |
| }, |
| { |
| "epoch": 0.4066365007541478, |
| "grad_norm": 1.9555464431886362, |
| "learning_rate": 1.3344020020280262e-05, |
| "loss": 2.3635, |
| "step": 674 |
| }, |
| { |
| "epoch": 0.4072398190045249, |
| "grad_norm": 1.9659968540183852, |
| "learning_rate": 1.3325703684482383e-05, |
| "loss": 2.5214, |
| "step": 675 |
| }, |
| { |
| "epoch": 0.40784313725490196, |
| "grad_norm": 1.7901769953025661, |
| "learning_rate": 1.330737479523383e-05, |
| "loss": 2.4147, |
| "step": 676 |
| }, |
| { |
| "epoch": 0.408446455505279, |
| "grad_norm": 1.925446038371978, |
| "learning_rate": 1.32890334217202e-05, |
| "loss": 2.3693, |
| "step": 677 |
| }, |
| { |
| "epoch": 0.4090497737556561, |
| "grad_norm": 1.887938807896764, |
| "learning_rate": 1.3270679633174219e-05, |
| "loss": 2.396, |
| "step": 678 |
| }, |
| { |
| "epoch": 0.40965309200603317, |
| "grad_norm": 1.9909687777389626, |
| "learning_rate": 1.3252313498875473e-05, |
| "loss": 2.4342, |
| "step": 679 |
| }, |
| { |
| "epoch": 0.41025641025641024, |
| "grad_norm": 1.8345463519611278, |
| "learning_rate": 1.3233935088150154e-05, |
| "loss": 2.3493, |
| "step": 680 |
| }, |
| { |
| "epoch": 0.4108597285067873, |
| "grad_norm": 1.8258441712601658, |
| "learning_rate": 1.3215544470370785e-05, |
| "loss": 2.3791, |
| "step": 681 |
| }, |
| { |
| "epoch": 0.4114630467571644, |
| "grad_norm": 1.779542065553901, |
| "learning_rate": 1.3197141714955977e-05, |
| "loss": 2.3066, |
| "step": 682 |
| }, |
| { |
| "epoch": 0.41206636500754146, |
| "grad_norm": 1.9061971700529505, |
| "learning_rate": 1.317872689137015e-05, |
| "loss": 2.4178, |
| "step": 683 |
| }, |
| { |
| "epoch": 0.41266968325791853, |
| "grad_norm": 2.151947023896646, |
| "learning_rate": 1.3160300069123277e-05, |
| "loss": 2.3833, |
| "step": 684 |
| }, |
| { |
| "epoch": 0.4132730015082956, |
| "grad_norm": 2.1725967166655233, |
| "learning_rate": 1.3141861317770628e-05, |
| "loss": 2.4076, |
| "step": 685 |
| }, |
| { |
| "epoch": 0.4138763197586727, |
| "grad_norm": 1.7110768486865224, |
| "learning_rate": 1.312341070691249e-05, |
| "loss": 2.4303, |
| "step": 686 |
| }, |
| { |
| "epoch": 0.4144796380090498, |
| "grad_norm": 1.8876018712519784, |
| "learning_rate": 1.3104948306193932e-05, |
| "loss": 2.4082, |
| "step": 687 |
| }, |
| { |
| "epoch": 0.41508295625942687, |
| "grad_norm": 1.7138291223841546, |
| "learning_rate": 1.308647418530451e-05, |
| "loss": 2.4395, |
| "step": 688 |
| }, |
| { |
| "epoch": 0.41568627450980394, |
| "grad_norm": 1.6819798162077328, |
| "learning_rate": 1.3067988413978032e-05, |
| "loss": 2.398, |
| "step": 689 |
| }, |
| { |
| "epoch": 0.416289592760181, |
| "grad_norm": 1.9776754872059599, |
| "learning_rate": 1.3049491061992274e-05, |
| "loss": 2.3758, |
| "step": 690 |
| }, |
| { |
| "epoch": 0.4168929110105581, |
| "grad_norm": 2.4185263540111133, |
| "learning_rate": 1.3030982199168732e-05, |
| "loss": 2.342, |
| "step": 691 |
| }, |
| { |
| "epoch": 0.41749622926093516, |
| "grad_norm": 1.9732286019963168, |
| "learning_rate": 1.3012461895372343e-05, |
| "loss": 2.4523, |
| "step": 692 |
| }, |
| { |
| "epoch": 0.41809954751131223, |
| "grad_norm": 1.866052215173324, |
| "learning_rate": 1.2993930220511245e-05, |
| "loss": 2.32, |
| "step": 693 |
| }, |
| { |
| "epoch": 0.4187028657616893, |
| "grad_norm": 1.7385777573811818, |
| "learning_rate": 1.2975387244536478e-05, |
| "loss": 2.3619, |
| "step": 694 |
| }, |
| { |
| "epoch": 0.4193061840120664, |
| "grad_norm": 1.7561905475842445, |
| "learning_rate": 1.2956833037441756e-05, |
| "loss": 2.3834, |
| "step": 695 |
| }, |
| { |
| "epoch": 0.41990950226244345, |
| "grad_norm": 1.9463356269336127, |
| "learning_rate": 1.2938267669263179e-05, |
| "loss": 2.4704, |
| "step": 696 |
| }, |
| { |
| "epoch": 0.4205128205128205, |
| "grad_norm": 1.801091105836512, |
| "learning_rate": 1.2919691210078982e-05, |
| "loss": 2.3265, |
| "step": 697 |
| }, |
| { |
| "epoch": 0.4211161387631976, |
| "grad_norm": 1.943220111113144, |
| "learning_rate": 1.2901103730009261e-05, |
| "loss": 2.3718, |
| "step": 698 |
| }, |
| { |
| "epoch": 0.42171945701357466, |
| "grad_norm": 1.836242175707625, |
| "learning_rate": 1.2882505299215711e-05, |
| "loss": 2.33, |
| "step": 699 |
| }, |
| { |
| "epoch": 0.42232277526395173, |
| "grad_norm": 1.6868877589339009, |
| "learning_rate": 1.2863895987901364e-05, |
| "loss": 2.3869, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.4229260935143288, |
| "grad_norm": 1.7970188659789745, |
| "learning_rate": 1.2845275866310325e-05, |
| "loss": 2.4001, |
| "step": 701 |
| }, |
| { |
| "epoch": 0.4235294117647059, |
| "grad_norm": 1.95275973209963, |
| "learning_rate": 1.2826645004727503e-05, |
| "loss": 2.3717, |
| "step": 702 |
| }, |
| { |
| "epoch": 0.42413273001508295, |
| "grad_norm": 1.7680186752612674, |
| "learning_rate": 1.2808003473478343e-05, |
| "loss": 2.3729, |
| "step": 703 |
| }, |
| { |
| "epoch": 0.42473604826546, |
| "grad_norm": 1.8149329130672966, |
| "learning_rate": 1.278935134292857e-05, |
| "loss": 2.3976, |
| "step": 704 |
| }, |
| { |
| "epoch": 0.4253393665158371, |
| "grad_norm": 2.088358936591777, |
| "learning_rate": 1.2770688683483914e-05, |
| "loss": 2.3096, |
| "step": 705 |
| }, |
| { |
| "epoch": 0.42594268476621416, |
| "grad_norm": 2.111711796208285, |
| "learning_rate": 1.2752015565589852e-05, |
| "loss": 2.4244, |
| "step": 706 |
| }, |
| { |
| "epoch": 0.42654600301659124, |
| "grad_norm": 1.7477535636939239, |
| "learning_rate": 1.2733332059731333e-05, |
| "loss": 2.3133, |
| "step": 707 |
| }, |
| { |
| "epoch": 0.4271493212669683, |
| "grad_norm": 1.8511085641604008, |
| "learning_rate": 1.2714638236432526e-05, |
| "loss": 2.4276, |
| "step": 708 |
| }, |
| { |
| "epoch": 0.4277526395173454, |
| "grad_norm": 1.9150387848439585, |
| "learning_rate": 1.2695934166256528e-05, |
| "loss": 2.5002, |
| "step": 709 |
| }, |
| { |
| "epoch": 0.42835595776772245, |
| "grad_norm": 1.74427260027451, |
| "learning_rate": 1.2677219919805137e-05, |
| "loss": 2.3458, |
| "step": 710 |
| }, |
| { |
| "epoch": 0.4289592760180995, |
| "grad_norm": 1.805483491725746, |
| "learning_rate": 1.2658495567718543e-05, |
| "loss": 2.3962, |
| "step": 711 |
| }, |
| { |
| "epoch": 0.4295625942684766, |
| "grad_norm": 1.7916901194265042, |
| "learning_rate": 1.2639761180675098e-05, |
| "loss": 2.4087, |
| "step": 712 |
| }, |
| { |
| "epoch": 0.4301659125188537, |
| "grad_norm": 1.8014131588828444, |
| "learning_rate": 1.2621016829391022e-05, |
| "loss": 2.402, |
| "step": 713 |
| }, |
| { |
| "epoch": 0.4307692307692308, |
| "grad_norm": 1.9360536162458533, |
| "learning_rate": 1.2602262584620154e-05, |
| "loss": 2.36, |
| "step": 714 |
| }, |
| { |
| "epoch": 0.43137254901960786, |
| "grad_norm": 1.8462103917561457, |
| "learning_rate": 1.2583498517153662e-05, |
| "loss": 2.3282, |
| "step": 715 |
| }, |
| { |
| "epoch": 0.43197586726998494, |
| "grad_norm": 1.9550039911148358, |
| "learning_rate": 1.2564724697819814e-05, |
| "loss": 2.3885, |
| "step": 716 |
| }, |
| { |
| "epoch": 0.432579185520362, |
| "grad_norm": 1.7547962323016189, |
| "learning_rate": 1.254594119748367e-05, |
| "loss": 2.3607, |
| "step": 717 |
| }, |
| { |
| "epoch": 0.4331825037707391, |
| "grad_norm": 1.7133242570223588, |
| "learning_rate": 1.2527148087046847e-05, |
| "loss": 2.3203, |
| "step": 718 |
| }, |
| { |
| "epoch": 0.43378582202111615, |
| "grad_norm": 1.7841657659406196, |
| "learning_rate": 1.2508345437447226e-05, |
| "loss": 2.4431, |
| "step": 719 |
| }, |
| { |
| "epoch": 0.4343891402714932, |
| "grad_norm": 1.6718263361727208, |
| "learning_rate": 1.2489533319658703e-05, |
| "loss": 2.372, |
| "step": 720 |
| }, |
| { |
| "epoch": 0.4349924585218703, |
| "grad_norm": 1.747207962107338, |
| "learning_rate": 1.2470711804690901e-05, |
| "loss": 2.4386, |
| "step": 721 |
| }, |
| { |
| "epoch": 0.43559577677224737, |
| "grad_norm": 1.7786322597485218, |
| "learning_rate": 1.2451880963588927e-05, |
| "loss": 2.3613, |
| "step": 722 |
| }, |
| { |
| "epoch": 0.43619909502262444, |
| "grad_norm": 1.7709982000618676, |
| "learning_rate": 1.2433040867433087e-05, |
| "loss": 2.3374, |
| "step": 723 |
| }, |
| { |
| "epoch": 0.4368024132730015, |
| "grad_norm": 1.8274123875511648, |
| "learning_rate": 1.2414191587338627e-05, |
| "loss": 2.4121, |
| "step": 724 |
| }, |
| { |
| "epoch": 0.4374057315233786, |
| "grad_norm": 1.8239929149081342, |
| "learning_rate": 1.2395333194455444e-05, |
| "loss": 2.4182, |
| "step": 725 |
| }, |
| { |
| "epoch": 0.43800904977375565, |
| "grad_norm": 1.8398198214129082, |
| "learning_rate": 1.2376465759967849e-05, |
| "loss": 2.3402, |
| "step": 726 |
| }, |
| { |
| "epoch": 0.4386123680241327, |
| "grad_norm": 1.9183852343383927, |
| "learning_rate": 1.2357589355094275e-05, |
| "loss": 2.3468, |
| "step": 727 |
| }, |
| { |
| "epoch": 0.4392156862745098, |
| "grad_norm": 1.965693182428336, |
| "learning_rate": 1.2338704051087014e-05, |
| "loss": 2.3226, |
| "step": 728 |
| }, |
| { |
| "epoch": 0.43981900452488687, |
| "grad_norm": 1.649936113840551, |
| "learning_rate": 1.2319809919231957e-05, |
| "loss": 2.4072, |
| "step": 729 |
| }, |
| { |
| "epoch": 0.44042232277526394, |
| "grad_norm": 1.8341340410648517, |
| "learning_rate": 1.2300907030848307e-05, |
| "loss": 2.2409, |
| "step": 730 |
| }, |
| { |
| "epoch": 0.441025641025641, |
| "grad_norm": 1.6910528946384642, |
| "learning_rate": 1.2281995457288324e-05, |
| "loss": 2.4764, |
| "step": 731 |
| }, |
| { |
| "epoch": 0.4416289592760181, |
| "grad_norm": 1.8929128640012716, |
| "learning_rate": 1.2263075269937057e-05, |
| "loss": 2.374, |
| "step": 732 |
| }, |
| { |
| "epoch": 0.44223227752639516, |
| "grad_norm": 1.9914643440790407, |
| "learning_rate": 1.2244146540212063e-05, |
| "loss": 2.3981, |
| "step": 733 |
| }, |
| { |
| "epoch": 0.44283559577677223, |
| "grad_norm": 1.8208263878089117, |
| "learning_rate": 1.2225209339563144e-05, |
| "loss": 2.3715, |
| "step": 734 |
| }, |
| { |
| "epoch": 0.4434389140271493, |
| "grad_norm": 1.7264998580660669, |
| "learning_rate": 1.2206263739472085e-05, |
| "loss": 2.3765, |
| "step": 735 |
| }, |
| { |
| "epoch": 0.44404223227752637, |
| "grad_norm": 1.7059386365866958, |
| "learning_rate": 1.2187309811452357e-05, |
| "loss": 2.4202, |
| "step": 736 |
| }, |
| { |
| "epoch": 0.44464555052790344, |
| "grad_norm": 1.9473821855556346, |
| "learning_rate": 1.2168347627048891e-05, |
| "loss": 2.3991, |
| "step": 737 |
| }, |
| { |
| "epoch": 0.4452488687782805, |
| "grad_norm": 1.752167824857159, |
| "learning_rate": 1.2149377257837767e-05, |
| "loss": 2.4284, |
| "step": 738 |
| }, |
| { |
| "epoch": 0.44585218702865764, |
| "grad_norm": 1.8153328418105037, |
| "learning_rate": 1.2130398775425964e-05, |
| "loss": 2.3456, |
| "step": 739 |
| }, |
| { |
| "epoch": 0.4464555052790347, |
| "grad_norm": 2.073143137177712, |
| "learning_rate": 1.2111412251451085e-05, |
| "loss": 2.3302, |
| "step": 740 |
| }, |
| { |
| "epoch": 0.4470588235294118, |
| "grad_norm": 1.8523051441295324, |
| "learning_rate": 1.2092417757581085e-05, |
| "loss": 2.4034, |
| "step": 741 |
| }, |
| { |
| "epoch": 0.44766214177978886, |
| "grad_norm": 1.8264910950212179, |
| "learning_rate": 1.2073415365514014e-05, |
| "loss": 2.3729, |
| "step": 742 |
| }, |
| { |
| "epoch": 0.44826546003016593, |
| "grad_norm": 1.886363437229635, |
| "learning_rate": 1.2054405146977719e-05, |
| "loss": 2.2983, |
| "step": 743 |
| }, |
| { |
| "epoch": 0.448868778280543, |
| "grad_norm": 1.914473698777555, |
| "learning_rate": 1.2035387173729606e-05, |
| "loss": 2.3985, |
| "step": 744 |
| }, |
| { |
| "epoch": 0.4494720965309201, |
| "grad_norm": 1.7371950120576183, |
| "learning_rate": 1.2016361517556334e-05, |
| "loss": 2.3949, |
| "step": 745 |
| }, |
| { |
| "epoch": 0.45007541478129715, |
| "grad_norm": 1.8051498258025656, |
| "learning_rate": 1.1997328250273582e-05, |
| "loss": 2.3474, |
| "step": 746 |
| }, |
| { |
| "epoch": 0.4506787330316742, |
| "grad_norm": 1.8453416553348072, |
| "learning_rate": 1.1978287443725737e-05, |
| "loss": 2.3895, |
| "step": 747 |
| }, |
| { |
| "epoch": 0.4512820512820513, |
| "grad_norm": 1.871938613961509, |
| "learning_rate": 1.1959239169785668e-05, |
| "loss": 2.4942, |
| "step": 748 |
| }, |
| { |
| "epoch": 0.45188536953242836, |
| "grad_norm": 1.9069542302070057, |
| "learning_rate": 1.194018350035441e-05, |
| "loss": 2.3946, |
| "step": 749 |
| }, |
| { |
| "epoch": 0.45248868778280543, |
| "grad_norm": 1.8230490419481198, |
| "learning_rate": 1.1921120507360934e-05, |
| "loss": 2.3828, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.4530920060331825, |
| "grad_norm": 1.7100130596372602, |
| "learning_rate": 1.190205026276183e-05, |
| "loss": 2.4444, |
| "step": 751 |
| }, |
| { |
| "epoch": 0.4536953242835596, |
| "grad_norm": 2.037831829444688, |
| "learning_rate": 1.1882972838541084e-05, |
| "loss": 2.3874, |
| "step": 752 |
| }, |
| { |
| "epoch": 0.45429864253393665, |
| "grad_norm": 1.8278872762113902, |
| "learning_rate": 1.1863888306709772e-05, |
| "loss": 2.4134, |
| "step": 753 |
| }, |
| { |
| "epoch": 0.4549019607843137, |
| "grad_norm": 1.7853622553097093, |
| "learning_rate": 1.1844796739305792e-05, |
| "loss": 2.4366, |
| "step": 754 |
| }, |
| { |
| "epoch": 0.4555052790346908, |
| "grad_norm": 1.7502372984758952, |
| "learning_rate": 1.182569820839362e-05, |
| "loss": 2.3502, |
| "step": 755 |
| }, |
| { |
| "epoch": 0.45610859728506786, |
| "grad_norm": 1.8953013191810872, |
| "learning_rate": 1.1806592786063991e-05, |
| "loss": 2.3261, |
| "step": 756 |
| }, |
| { |
| "epoch": 0.45671191553544493, |
| "grad_norm": 1.8474649604357705, |
| "learning_rate": 1.1787480544433673e-05, |
| "loss": 2.4107, |
| "step": 757 |
| }, |
| { |
| "epoch": 0.457315233785822, |
| "grad_norm": 1.9813399740850826, |
| "learning_rate": 1.1768361555645164e-05, |
| "loss": 2.3633, |
| "step": 758 |
| }, |
| { |
| "epoch": 0.4579185520361991, |
| "grad_norm": 1.8305808245169044, |
| "learning_rate": 1.1749235891866437e-05, |
| "loss": 2.4417, |
| "step": 759 |
| }, |
| { |
| "epoch": 0.45852187028657615, |
| "grad_norm": 1.835264821449839, |
| "learning_rate": 1.1730103625290658e-05, |
| "loss": 2.3976, |
| "step": 760 |
| }, |
| { |
| "epoch": 0.4591251885369532, |
| "grad_norm": 1.7498847639131172, |
| "learning_rate": 1.1710964828135913e-05, |
| "loss": 2.4323, |
| "step": 761 |
| }, |
| { |
| "epoch": 0.4597285067873303, |
| "grad_norm": 1.7819698121609244, |
| "learning_rate": 1.1691819572644941e-05, |
| "loss": 2.3824, |
| "step": 762 |
| }, |
| { |
| "epoch": 0.46033182503770737, |
| "grad_norm": 1.6605141840526678, |
| "learning_rate": 1.1672667931084862e-05, |
| "loss": 2.3819, |
| "step": 763 |
| }, |
| { |
| "epoch": 0.46093514328808444, |
| "grad_norm": 1.7855281123097961, |
| "learning_rate": 1.1653509975746899e-05, |
| "loss": 2.3018, |
| "step": 764 |
| }, |
| { |
| "epoch": 0.46153846153846156, |
| "grad_norm": 1.8601026398729645, |
| "learning_rate": 1.1634345778946112e-05, |
| "loss": 2.3745, |
| "step": 765 |
| }, |
| { |
| "epoch": 0.46214177978883864, |
| "grad_norm": 1.7547970617468687, |
| "learning_rate": 1.1615175413021107e-05, |
| "loss": 2.3493, |
| "step": 766 |
| }, |
| { |
| "epoch": 0.4627450980392157, |
| "grad_norm": 1.9548991514666338, |
| "learning_rate": 1.1595998950333794e-05, |
| "loss": 2.3649, |
| "step": 767 |
| }, |
| { |
| "epoch": 0.4633484162895928, |
| "grad_norm": 1.8357021323354088, |
| "learning_rate": 1.1576816463269083e-05, |
| "loss": 2.4205, |
| "step": 768 |
| }, |
| { |
| "epoch": 0.46395173453996985, |
| "grad_norm": 1.7260048621415132, |
| "learning_rate": 1.155762802423463e-05, |
| "loss": 2.3972, |
| "step": 769 |
| }, |
| { |
| "epoch": 0.4645550527903469, |
| "grad_norm": 1.8773223271613868, |
| "learning_rate": 1.1538433705660561e-05, |
| "loss": 2.4437, |
| "step": 770 |
| }, |
| { |
| "epoch": 0.465158371040724, |
| "grad_norm": 1.8937934512459285, |
| "learning_rate": 1.1519233579999187e-05, |
| "loss": 2.3078, |
| "step": 771 |
| }, |
| { |
| "epoch": 0.46576168929110107, |
| "grad_norm": 1.8347615261731738, |
| "learning_rate": 1.1500027719724745e-05, |
| "loss": 2.3686, |
| "step": 772 |
| }, |
| { |
| "epoch": 0.46636500754147814, |
| "grad_norm": 1.8991250931263675, |
| "learning_rate": 1.148081619733311e-05, |
| "loss": 2.4149, |
| "step": 773 |
| }, |
| { |
| "epoch": 0.4669683257918552, |
| "grad_norm": 1.7068569233280024, |
| "learning_rate": 1.1461599085341549e-05, |
| "loss": 2.417, |
| "step": 774 |
| }, |
| { |
| "epoch": 0.4675716440422323, |
| "grad_norm": 1.8546938193616271, |
| "learning_rate": 1.1442376456288402e-05, |
| "loss": 2.496, |
| "step": 775 |
| }, |
| { |
| "epoch": 0.46817496229260935, |
| "grad_norm": 1.7282335446656245, |
| "learning_rate": 1.1423148382732854e-05, |
| "loss": 2.3496, |
| "step": 776 |
| }, |
| { |
| "epoch": 0.4687782805429864, |
| "grad_norm": 1.8993396930857818, |
| "learning_rate": 1.140391493725463e-05, |
| "loss": 2.3287, |
| "step": 777 |
| }, |
| { |
| "epoch": 0.4693815987933635, |
| "grad_norm": 1.7742417433579973, |
| "learning_rate": 1.138467619245374e-05, |
| "loss": 2.4362, |
| "step": 778 |
| }, |
| { |
| "epoch": 0.46998491704374057, |
| "grad_norm": 1.6863824684148747, |
| "learning_rate": 1.1365432220950195e-05, |
| "loss": 2.3075, |
| "step": 779 |
| }, |
| { |
| "epoch": 0.47058823529411764, |
| "grad_norm": 1.9755312265214005, |
| "learning_rate": 1.1346183095383731e-05, |
| "loss": 2.4258, |
| "step": 780 |
| }, |
| { |
| "epoch": 0.4711915535444947, |
| "grad_norm": 1.7659437015031778, |
| "learning_rate": 1.1326928888413539e-05, |
| "loss": 2.3789, |
| "step": 781 |
| }, |
| { |
| "epoch": 0.4717948717948718, |
| "grad_norm": 1.9953934429422493, |
| "learning_rate": 1.1307669672718e-05, |
| "loss": 2.4341, |
| "step": 782 |
| }, |
| { |
| "epoch": 0.47239819004524886, |
| "grad_norm": 1.6759600117800293, |
| "learning_rate": 1.128840552099439e-05, |
| "loss": 2.38, |
| "step": 783 |
| }, |
| { |
| "epoch": 0.4730015082956259, |
| "grad_norm": 1.854728287765269, |
| "learning_rate": 1.1269136505958623e-05, |
| "loss": 2.3623, |
| "step": 784 |
| }, |
| { |
| "epoch": 0.473604826546003, |
| "grad_norm": 1.892879776282185, |
| "learning_rate": 1.1249862700344969e-05, |
| "loss": 2.4379, |
| "step": 785 |
| }, |
| { |
| "epoch": 0.47420814479638007, |
| "grad_norm": 1.842873393468782, |
| "learning_rate": 1.1230584176905784e-05, |
| "loss": 2.3336, |
| "step": 786 |
| }, |
| { |
| "epoch": 0.47481146304675714, |
| "grad_norm": 1.7678588758287328, |
| "learning_rate": 1.1211301008411222e-05, |
| "loss": 2.3832, |
| "step": 787 |
| }, |
| { |
| "epoch": 0.4754147812971342, |
| "grad_norm": 1.7957652718067842, |
| "learning_rate": 1.1192013267648982e-05, |
| "loss": 2.478, |
| "step": 788 |
| }, |
| { |
| "epoch": 0.4760180995475113, |
| "grad_norm": 2.0496571931310164, |
| "learning_rate": 1.1172721027424021e-05, |
| "loss": 2.3345, |
| "step": 789 |
| }, |
| { |
| "epoch": 0.4766214177978884, |
| "grad_norm": 1.8206515205346219, |
| "learning_rate": 1.1153424360558268e-05, |
| "loss": 2.3796, |
| "step": 790 |
| }, |
| { |
| "epoch": 0.4772247360482655, |
| "grad_norm": 1.9139906768147443, |
| "learning_rate": 1.1134123339890376e-05, |
| "loss": 2.3923, |
| "step": 791 |
| }, |
| { |
| "epoch": 0.47782805429864256, |
| "grad_norm": 1.749531594471372, |
| "learning_rate": 1.1114818038275428e-05, |
| "loss": 2.5011, |
| "step": 792 |
| }, |
| { |
| "epoch": 0.47843137254901963, |
| "grad_norm": 3.724545765526481, |
| "learning_rate": 1.109550852858466e-05, |
| "loss": 2.451, |
| "step": 793 |
| }, |
| { |
| "epoch": 0.4790346907993967, |
| "grad_norm": 1.7207858054874157, |
| "learning_rate": 1.1076194883705194e-05, |
| "loss": 2.4097, |
| "step": 794 |
| }, |
| { |
| "epoch": 0.4796380090497738, |
| "grad_norm": 1.9401644882234703, |
| "learning_rate": 1.1056877176539767e-05, |
| "loss": 2.4028, |
| "step": 795 |
| }, |
| { |
| "epoch": 0.48024132730015084, |
| "grad_norm": 1.8070053767404293, |
| "learning_rate": 1.1037555480006445e-05, |
| "loss": 2.3567, |
| "step": 796 |
| }, |
| { |
| "epoch": 0.4808446455505279, |
| "grad_norm": 1.8091050025141635, |
| "learning_rate": 1.1018229867038358e-05, |
| "loss": 2.4448, |
| "step": 797 |
| }, |
| { |
| "epoch": 0.481447963800905, |
| "grad_norm": 1.78424566269542, |
| "learning_rate": 1.0998900410583404e-05, |
| "loss": 2.3909, |
| "step": 798 |
| }, |
| { |
| "epoch": 0.48205128205128206, |
| "grad_norm": 1.7452068371419385, |
| "learning_rate": 1.0979567183604009e-05, |
| "loss": 2.3607, |
| "step": 799 |
| }, |
| { |
| "epoch": 0.48265460030165913, |
| "grad_norm": 1.8685395323481204, |
| "learning_rate": 1.0960230259076819e-05, |
| "loss": 2.3596, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.4832579185520362, |
| "grad_norm": 1.7681779054376958, |
| "learning_rate": 1.0940889709992441e-05, |
| "loss": 2.4577, |
| "step": 801 |
| }, |
| { |
| "epoch": 0.4838612368024133, |
| "grad_norm": 1.7900733532064816, |
| "learning_rate": 1.0921545609355162e-05, |
| "loss": 2.3578, |
| "step": 802 |
| }, |
| { |
| "epoch": 0.48446455505279035, |
| "grad_norm": 1.8719430790020382, |
| "learning_rate": 1.0902198030182677e-05, |
| "loss": 2.4568, |
| "step": 803 |
| }, |
| { |
| "epoch": 0.4850678733031674, |
| "grad_norm": 1.7120170734056872, |
| "learning_rate": 1.0882847045505809e-05, |
| "loss": 2.3494, |
| "step": 804 |
| }, |
| { |
| "epoch": 0.4856711915535445, |
| "grad_norm": 1.6197324052113928, |
| "learning_rate": 1.086349272836824e-05, |
| "loss": 2.3106, |
| "step": 805 |
| }, |
| { |
| "epoch": 0.48627450980392156, |
| "grad_norm": 1.8174715253639449, |
| "learning_rate": 1.084413515182622e-05, |
| "loss": 2.3496, |
| "step": 806 |
| }, |
| { |
| "epoch": 0.48687782805429863, |
| "grad_norm": 2.0172167263611653, |
| "learning_rate": 1.0824774388948321e-05, |
| "loss": 2.4097, |
| "step": 807 |
| }, |
| { |
| "epoch": 0.4874811463046757, |
| "grad_norm": 1.7907222804659544, |
| "learning_rate": 1.0805410512815123e-05, |
| "loss": 2.2761, |
| "step": 808 |
| }, |
| { |
| "epoch": 0.4880844645550528, |
| "grad_norm": 1.7182248007974297, |
| "learning_rate": 1.0786043596518964e-05, |
| "loss": 2.3949, |
| "step": 809 |
| }, |
| { |
| "epoch": 0.48868778280542985, |
| "grad_norm": 1.761572833600802, |
| "learning_rate": 1.0766673713163667e-05, |
| "loss": 2.3268, |
| "step": 810 |
| }, |
| { |
| "epoch": 0.4892911010558069, |
| "grad_norm": 1.978086578514091, |
| "learning_rate": 1.0747300935864245e-05, |
| "loss": 2.4644, |
| "step": 811 |
| }, |
| { |
| "epoch": 0.489894419306184, |
| "grad_norm": 2.179344097420728, |
| "learning_rate": 1.0727925337746633e-05, |
| "loss": 2.3845, |
| "step": 812 |
| }, |
| { |
| "epoch": 0.49049773755656106, |
| "grad_norm": 1.8078731760776887, |
| "learning_rate": 1.0708546991947422e-05, |
| "loss": 2.3472, |
| "step": 813 |
| }, |
| { |
| "epoch": 0.49110105580693814, |
| "grad_norm": 1.7449102909226242, |
| "learning_rate": 1.0689165971613566e-05, |
| "loss": 2.3945, |
| "step": 814 |
| }, |
| { |
| "epoch": 0.4917043740573152, |
| "grad_norm": 1.739318963481491, |
| "learning_rate": 1.0669782349902122e-05, |
| "loss": 2.3701, |
| "step": 815 |
| }, |
| { |
| "epoch": 0.49230769230769234, |
| "grad_norm": 1.6488397607877079, |
| "learning_rate": 1.0650396199979964e-05, |
| "loss": 2.2992, |
| "step": 816 |
| }, |
| { |
| "epoch": 0.4929110105580694, |
| "grad_norm": 1.7709600436471968, |
| "learning_rate": 1.0631007595023504e-05, |
| "loss": 2.425, |
| "step": 817 |
| }, |
| { |
| "epoch": 0.4935143288084465, |
| "grad_norm": 1.7076343907698315, |
| "learning_rate": 1.0611616608218429e-05, |
| "loss": 2.426, |
| "step": 818 |
| }, |
| { |
| "epoch": 0.49411764705882355, |
| "grad_norm": 1.8481580796609003, |
| "learning_rate": 1.0592223312759409e-05, |
| "loss": 2.365, |
| "step": 819 |
| }, |
| { |
| "epoch": 0.4947209653092006, |
| "grad_norm": 1.8715006232035116, |
| "learning_rate": 1.0572827781849835e-05, |
| "loss": 2.5006, |
| "step": 820 |
| }, |
| { |
| "epoch": 0.4953242835595777, |
| "grad_norm": 1.6925435107980902, |
| "learning_rate": 1.0553430088701533e-05, |
| "loss": 2.3887, |
| "step": 821 |
| }, |
| { |
| "epoch": 0.49592760180995477, |
| "grad_norm": 13.753810679352204, |
| "learning_rate": 1.0534030306534491e-05, |
| "loss": 2.3408, |
| "step": 822 |
| }, |
| { |
| "epoch": 0.49653092006033184, |
| "grad_norm": 1.7063797659828734, |
| "learning_rate": 1.051462850857658e-05, |
| "loss": 2.3331, |
| "step": 823 |
| }, |
| { |
| "epoch": 0.4971342383107089, |
| "grad_norm": 1.7178057305930887, |
| "learning_rate": 1.0495224768063288e-05, |
| "loss": 2.3707, |
| "step": 824 |
| }, |
| { |
| "epoch": 0.497737556561086, |
| "grad_norm": 1.7992237998948943, |
| "learning_rate": 1.0475819158237426e-05, |
| "loss": 2.5099, |
| "step": 825 |
| }, |
| { |
| "epoch": 0.49834087481146305, |
| "grad_norm": 1.7416788690847198, |
| "learning_rate": 1.045641175234886e-05, |
| "loss": 2.3624, |
| "step": 826 |
| }, |
| { |
| "epoch": 0.4989441930618401, |
| "grad_norm": 1.8216770572714687, |
| "learning_rate": 1.0437002623654256e-05, |
| "loss": 2.3162, |
| "step": 827 |
| }, |
| { |
| "epoch": 0.4995475113122172, |
| "grad_norm": 1.7366014386825241, |
| "learning_rate": 1.0417591845416748e-05, |
| "loss": 2.3976, |
| "step": 828 |
| }, |
| { |
| "epoch": 0.5001508295625943, |
| "grad_norm": 1.7235867220953933, |
| "learning_rate": 1.0398179490905731e-05, |
| "loss": 2.2961, |
| "step": 829 |
| }, |
| { |
| "epoch": 0.5007541478129713, |
| "grad_norm": 1.6477357985522785, |
| "learning_rate": 1.0378765633396526e-05, |
| "loss": 2.4396, |
| "step": 830 |
| }, |
| { |
| "epoch": 0.5013574660633484, |
| "grad_norm": 1.6818154245812602, |
| "learning_rate": 1.0359350346170142e-05, |
| "loss": 2.3604, |
| "step": 831 |
| }, |
| { |
| "epoch": 0.5019607843137255, |
| "grad_norm": 1.843044729927334, |
| "learning_rate": 1.0339933702512978e-05, |
| "loss": 2.3084, |
| "step": 832 |
| }, |
| { |
| "epoch": 0.5019607843137255, |
| "eval_loss": 2.387592077255249, |
| "eval_runtime": 22.7208, |
| "eval_samples_per_second": 3.873, |
| "eval_steps_per_second": 0.484, |
| "step": 832 |
| }, |
| { |
| "epoch": 0.5025641025641026, |
| "grad_norm": 1.7184520976508428, |
| "learning_rate": 1.0320515775716556e-05, |
| "loss": 2.4301, |
| "step": 833 |
| }, |
| { |
| "epoch": 0.5031674208144796, |
| "grad_norm": 1.6890268292680732, |
| "learning_rate": 1.0301096639077229e-05, |
| "loss": 2.4126, |
| "step": 834 |
| }, |
| { |
| "epoch": 0.5037707390648567, |
| "grad_norm": 1.7252703313236202, |
| "learning_rate": 1.0281676365895939e-05, |
| "loss": 2.2905, |
| "step": 835 |
| }, |
| { |
| "epoch": 0.5043740573152338, |
| "grad_norm": 1.7486803185975979, |
| "learning_rate": 1.02622550294779e-05, |
| "loss": 2.4323, |
| "step": 836 |
| }, |
| { |
| "epoch": 0.5049773755656108, |
| "grad_norm": 1.8603836336573905, |
| "learning_rate": 1.0242832703132353e-05, |
| "loss": 2.3632, |
| "step": 837 |
| }, |
| { |
| "epoch": 0.5055806938159879, |
| "grad_norm": 1.9392259883391494, |
| "learning_rate": 1.022340946017226e-05, |
| "loss": 2.3691, |
| "step": 838 |
| }, |
| { |
| "epoch": 0.506184012066365, |
| "grad_norm": 1.806362355351131, |
| "learning_rate": 1.0203985373914056e-05, |
| "loss": 2.4253, |
| "step": 839 |
| }, |
| { |
| "epoch": 0.5067873303167421, |
| "grad_norm": 1.753984778463969, |
| "learning_rate": 1.0184560517677353e-05, |
| "loss": 2.3383, |
| "step": 840 |
| }, |
| { |
| "epoch": 0.5073906485671191, |
| "grad_norm": 1.743749696683883, |
| "learning_rate": 1.0165134964784669e-05, |
| "loss": 2.3721, |
| "step": 841 |
| }, |
| { |
| "epoch": 0.5079939668174962, |
| "grad_norm": 1.8626358872488646, |
| "learning_rate": 1.0145708788561157e-05, |
| "loss": 2.4944, |
| "step": 842 |
| }, |
| { |
| "epoch": 0.5085972850678733, |
| "grad_norm": 1.7659918339275127, |
| "learning_rate": 1.012628206233432e-05, |
| "loss": 2.3927, |
| "step": 843 |
| }, |
| { |
| "epoch": 0.5092006033182503, |
| "grad_norm": 1.768537603828139, |
| "learning_rate": 1.0106854859433734e-05, |
| "loss": 2.3068, |
| "step": 844 |
| }, |
| { |
| "epoch": 0.5098039215686274, |
| "grad_norm": 1.6921895602019934, |
| "learning_rate": 1.0087427253190775e-05, |
| "loss": 2.3904, |
| "step": 845 |
| }, |
| { |
| "epoch": 0.5104072398190045, |
| "grad_norm": 1.8514273394892835, |
| "learning_rate": 1.0067999316938348e-05, |
| "loss": 2.3424, |
| "step": 846 |
| }, |
| { |
| "epoch": 0.5110105580693816, |
| "grad_norm": 1.793772258787467, |
| "learning_rate": 1.0048571124010597e-05, |
| "loss": 2.398, |
| "step": 847 |
| }, |
| { |
| "epoch": 0.5116138763197586, |
| "grad_norm": 1.6918053539167304, |
| "learning_rate": 1.0029142747742637e-05, |
| "loss": 2.3746, |
| "step": 848 |
| }, |
| { |
| "epoch": 0.5122171945701357, |
| "grad_norm": 1.6101656310591637, |
| "learning_rate": 1.0009714261470274e-05, |
| "loss": 2.3165, |
| "step": 849 |
| }, |
| { |
| "epoch": 0.5128205128205128, |
| "grad_norm": 1.700912056066946, |
| "learning_rate": 9.990285738529733e-06, |
| "loss": 2.4027, |
| "step": 850 |
| }, |
| { |
| "epoch": 0.5134238310708898, |
| "grad_norm": 1.7795095656194206, |
| "learning_rate": 9.970857252257368e-06, |
| "loss": 2.4301, |
| "step": 851 |
| }, |
| { |
| "epoch": 0.5140271493212669, |
| "grad_norm": 1.8166978303180596, |
| "learning_rate": 9.951428875989408e-06, |
| "loss": 2.3943, |
| "step": 852 |
| }, |
| { |
| "epoch": 0.514630467571644, |
| "grad_norm": 1.7906994439446355, |
| "learning_rate": 9.932000683061654e-06, |
| "loss": 2.3666, |
| "step": 853 |
| }, |
| { |
| "epoch": 0.5152337858220211, |
| "grad_norm": 1.8507567275198866, |
| "learning_rate": 9.912572746809228e-06, |
| "loss": 2.4303, |
| "step": 854 |
| }, |
| { |
| "epoch": 0.5158371040723982, |
| "grad_norm": 1.7029190223017787, |
| "learning_rate": 9.89314514056627e-06, |
| "loss": 2.3407, |
| "step": 855 |
| }, |
| { |
| "epoch": 0.5164404223227753, |
| "grad_norm": 1.8029171374885462, |
| "learning_rate": 9.873717937665683e-06, |
| "loss": 2.3434, |
| "step": 856 |
| }, |
| { |
| "epoch": 0.5170437405731524, |
| "grad_norm": 1.8949933952050677, |
| "learning_rate": 9.854291211438846e-06, |
| "loss": 2.451, |
| "step": 857 |
| }, |
| { |
| "epoch": 0.5176470588235295, |
| "grad_norm": 1.85164457739374, |
| "learning_rate": 9.834865035215333e-06, |
| "loss": 2.3654, |
| "step": 858 |
| }, |
| { |
| "epoch": 0.5182503770739065, |
| "grad_norm": 1.7585113182468484, |
| "learning_rate": 9.81543948232265e-06, |
| "loss": 2.4026, |
| "step": 859 |
| }, |
| { |
| "epoch": 0.5188536953242836, |
| "grad_norm": 1.8861048330861192, |
| "learning_rate": 9.79601462608595e-06, |
| "loss": 2.3372, |
| "step": 860 |
| }, |
| { |
| "epoch": 0.5194570135746607, |
| "grad_norm": 1.7958907530874708, |
| "learning_rate": 9.776590539827745e-06, |
| "loss": 2.4609, |
| "step": 861 |
| }, |
| { |
| "epoch": 0.5200603318250377, |
| "grad_norm": 1.7290587905372026, |
| "learning_rate": 9.757167296867652e-06, |
| "loss": 2.331, |
| "step": 862 |
| }, |
| { |
| "epoch": 0.5206636500754148, |
| "grad_norm": 1.7389613818950007, |
| "learning_rate": 9.737744970522101e-06, |
| "loss": 2.3763, |
| "step": 863 |
| }, |
| { |
| "epoch": 0.5212669683257919, |
| "grad_norm": 1.8264438820594222, |
| "learning_rate": 9.718323634104063e-06, |
| "loss": 2.3895, |
| "step": 864 |
| }, |
| { |
| "epoch": 0.521870286576169, |
| "grad_norm": 1.786930557626034, |
| "learning_rate": 9.698903360922773e-06, |
| "loss": 2.3234, |
| "step": 865 |
| }, |
| { |
| "epoch": 0.522473604826546, |
| "grad_norm": 2.0432045735745468, |
| "learning_rate": 9.67948422428345e-06, |
| "loss": 2.4544, |
| "step": 866 |
| }, |
| { |
| "epoch": 0.5230769230769231, |
| "grad_norm": 1.7232243886550707, |
| "learning_rate": 9.660066297487024e-06, |
| "loss": 2.4616, |
| "step": 867 |
| }, |
| { |
| "epoch": 0.5236802413273002, |
| "grad_norm": 1.7777195779321493, |
| "learning_rate": 9.640649653829856e-06, |
| "loss": 2.3366, |
| "step": 868 |
| }, |
| { |
| "epoch": 0.5242835595776772, |
| "grad_norm": 1.7329928051061616, |
| "learning_rate": 9.621234366603474e-06, |
| "loss": 2.3406, |
| "step": 869 |
| }, |
| { |
| "epoch": 0.5248868778280543, |
| "grad_norm": 1.6947835637273732, |
| "learning_rate": 9.601820509094272e-06, |
| "loss": 2.399, |
| "step": 870 |
| }, |
| { |
| "epoch": 0.5254901960784314, |
| "grad_norm": 1.7646228167407467, |
| "learning_rate": 9.582408154583256e-06, |
| "loss": 2.4245, |
| "step": 871 |
| }, |
| { |
| "epoch": 0.5260935143288085, |
| "grad_norm": 1.7818475873068609, |
| "learning_rate": 9.56299737634575e-06, |
| "loss": 2.4695, |
| "step": 872 |
| }, |
| { |
| "epoch": 0.5266968325791855, |
| "grad_norm": 1.6020743028303097, |
| "learning_rate": 9.543588247651141e-06, |
| "loss": 2.3758, |
| "step": 873 |
| }, |
| { |
| "epoch": 0.5273001508295626, |
| "grad_norm": 2.0059468394494795, |
| "learning_rate": 9.524180841762577e-06, |
| "loss": 2.3622, |
| "step": 874 |
| }, |
| { |
| "epoch": 0.5279034690799397, |
| "grad_norm": 1.8138883017044576, |
| "learning_rate": 9.504775231936716e-06, |
| "loss": 2.439, |
| "step": 875 |
| }, |
| { |
| "epoch": 0.5285067873303168, |
| "grad_norm": 1.6606861902316774, |
| "learning_rate": 9.485371491423421e-06, |
| "loss": 2.3756, |
| "step": 876 |
| }, |
| { |
| "epoch": 0.5291101055806938, |
| "grad_norm": 1.9247118890514288, |
| "learning_rate": 9.46596969346551e-06, |
| "loss": 2.4956, |
| "step": 877 |
| }, |
| { |
| "epoch": 0.5297134238310709, |
| "grad_norm": 1.6729495810971284, |
| "learning_rate": 9.446569911298469e-06, |
| "loss": 2.4496, |
| "step": 878 |
| }, |
| { |
| "epoch": 0.530316742081448, |
| "grad_norm": 1.721608451090351, |
| "learning_rate": 9.427172218150166e-06, |
| "loss": 2.4336, |
| "step": 879 |
| }, |
| { |
| "epoch": 0.530920060331825, |
| "grad_norm": 1.7440028090249518, |
| "learning_rate": 9.407776687240591e-06, |
| "loss": 2.399, |
| "step": 880 |
| }, |
| { |
| "epoch": 0.5315233785822021, |
| "grad_norm": 1.852395281031802, |
| "learning_rate": 9.388383391781576e-06, |
| "loss": 2.3752, |
| "step": 881 |
| }, |
| { |
| "epoch": 0.5321266968325792, |
| "grad_norm": 2.063001040819312, |
| "learning_rate": 9.3689924049765e-06, |
| "loss": 2.4234, |
| "step": 882 |
| }, |
| { |
| "epoch": 0.5327300150829563, |
| "grad_norm": 2.1374286448897055, |
| "learning_rate": 9.34960380002004e-06, |
| "loss": 2.4208, |
| "step": 883 |
| }, |
| { |
| "epoch": 0.5333333333333333, |
| "grad_norm": 1.7187678468330105, |
| "learning_rate": 9.330217650097881e-06, |
| "loss": 2.3219, |
| "step": 884 |
| }, |
| { |
| "epoch": 0.5339366515837104, |
| "grad_norm": 1.7650084385659235, |
| "learning_rate": 9.310834028386436e-06, |
| "loss": 2.3753, |
| "step": 885 |
| }, |
| { |
| "epoch": 0.5345399698340875, |
| "grad_norm": 1.7280929880181042, |
| "learning_rate": 9.291453008052582e-06, |
| "loss": 2.3902, |
| "step": 886 |
| }, |
| { |
| "epoch": 0.5351432880844645, |
| "grad_norm": 1.9002978359745568, |
| "learning_rate": 9.272074662253368e-06, |
| "loss": 2.3117, |
| "step": 887 |
| }, |
| { |
| "epoch": 0.5357466063348416, |
| "grad_norm": 1.95189657556873, |
| "learning_rate": 9.252699064135759e-06, |
| "loss": 2.4177, |
| "step": 888 |
| }, |
| { |
| "epoch": 0.5363499245852187, |
| "grad_norm": 2.014163602151102, |
| "learning_rate": 9.233326286836335e-06, |
| "loss": 2.4068, |
| "step": 889 |
| }, |
| { |
| "epoch": 0.5369532428355958, |
| "grad_norm": 1.8364320842962563, |
| "learning_rate": 9.213956403481037e-06, |
| "loss": 2.3912, |
| "step": 890 |
| }, |
| { |
| "epoch": 0.5375565610859728, |
| "grad_norm": 1.7499298359418458, |
| "learning_rate": 9.194589487184884e-06, |
| "loss": 2.3843, |
| "step": 891 |
| }, |
| { |
| "epoch": 0.5381598793363499, |
| "grad_norm": 1.686625665723383, |
| "learning_rate": 9.175225611051684e-06, |
| "loss": 2.3425, |
| "step": 892 |
| }, |
| { |
| "epoch": 0.538763197586727, |
| "grad_norm": 1.7193751821408507, |
| "learning_rate": 9.155864848173782e-06, |
| "loss": 2.3955, |
| "step": 893 |
| }, |
| { |
| "epoch": 0.539366515837104, |
| "grad_norm": 1.7894758836039961, |
| "learning_rate": 9.136507271631764e-06, |
| "loss": 2.3973, |
| "step": 894 |
| }, |
| { |
| "epoch": 0.5399698340874811, |
| "grad_norm": 1.8264782691225658, |
| "learning_rate": 9.117152954494195e-06, |
| "loss": 2.3688, |
| "step": 895 |
| }, |
| { |
| "epoch": 0.5405731523378582, |
| "grad_norm": 1.7326254631321008, |
| "learning_rate": 9.097801969817324e-06, |
| "loss": 2.328, |
| "step": 896 |
| }, |
| { |
| "epoch": 0.5411764705882353, |
| "grad_norm": 1.6970752681437251, |
| "learning_rate": 9.078454390644841e-06, |
| "loss": 2.3562, |
| "step": 897 |
| }, |
| { |
| "epoch": 0.5417797888386123, |
| "grad_norm": 1.874581417655975, |
| "learning_rate": 9.05911029000756e-06, |
| "loss": 2.3387, |
| "step": 898 |
| }, |
| { |
| "epoch": 0.5423831070889894, |
| "grad_norm": 1.7409066719499708, |
| "learning_rate": 9.039769740923183e-06, |
| "loss": 2.3901, |
| "step": 899 |
| }, |
| { |
| "epoch": 0.5429864253393665, |
| "grad_norm": 1.9842969144165807, |
| "learning_rate": 9.020432816395993e-06, |
| "loss": 2.4248, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.5435897435897435, |
| "grad_norm": 1.9292319261176405, |
| "learning_rate": 9.001099589416602e-06, |
| "loss": 2.3535, |
| "step": 901 |
| }, |
| { |
| "epoch": 0.5441930618401206, |
| "grad_norm": 1.7634105099688444, |
| "learning_rate": 8.981770132961649e-06, |
| "loss": 2.3952, |
| "step": 902 |
| }, |
| { |
| "epoch": 0.5447963800904977, |
| "grad_norm": 1.7210160546146538, |
| "learning_rate": 8.962444519993558e-06, |
| "loss": 2.4644, |
| "step": 903 |
| }, |
| { |
| "epoch": 0.5453996983408748, |
| "grad_norm": 2.0164293222578227, |
| "learning_rate": 8.943122823460235e-06, |
| "loss": 2.4605, |
| "step": 904 |
| }, |
| { |
| "epoch": 0.5460030165912518, |
| "grad_norm": 1.640145013684435, |
| "learning_rate": 8.92380511629481e-06, |
| "loss": 2.3656, |
| "step": 905 |
| }, |
| { |
| "epoch": 0.5466063348416289, |
| "grad_norm": 1.7608736859619678, |
| "learning_rate": 8.904491471415343e-06, |
| "loss": 2.4277, |
| "step": 906 |
| }, |
| { |
| "epoch": 0.5472096530920061, |
| "grad_norm": 1.689731655791089, |
| "learning_rate": 8.885181961724575e-06, |
| "loss": 2.3845, |
| "step": 907 |
| }, |
| { |
| "epoch": 0.5478129713423832, |
| "grad_norm": 2.006248612420757, |
| "learning_rate": 8.865876660109625e-06, |
| "loss": 2.3648, |
| "step": 908 |
| }, |
| { |
| "epoch": 0.5484162895927602, |
| "grad_norm": 2.0601382092003004, |
| "learning_rate": 8.846575639441732e-06, |
| "loss": 2.3658, |
| "step": 909 |
| }, |
| { |
| "epoch": 0.5490196078431373, |
| "grad_norm": 1.758520194938707, |
| "learning_rate": 8.827278972575984e-06, |
| "loss": 2.4306, |
| "step": 910 |
| }, |
| { |
| "epoch": 0.5496229260935144, |
| "grad_norm": 1.8296491728786637, |
| "learning_rate": 8.807986732351018e-06, |
| "loss": 2.3984, |
| "step": 911 |
| }, |
| { |
| "epoch": 0.5502262443438914, |
| "grad_norm": 1.8016195740772143, |
| "learning_rate": 8.788698991588782e-06, |
| "loss": 2.3319, |
| "step": 912 |
| }, |
| { |
| "epoch": 0.5508295625942685, |
| "grad_norm": 1.9273005935291878, |
| "learning_rate": 8.769415823094221e-06, |
| "loss": 2.373, |
| "step": 913 |
| }, |
| { |
| "epoch": 0.5514328808446456, |
| "grad_norm": 1.7813908784365993, |
| "learning_rate": 8.750137299655034e-06, |
| "loss": 2.3778, |
| "step": 914 |
| }, |
| { |
| "epoch": 0.5520361990950227, |
| "grad_norm": 1.9254300444919479, |
| "learning_rate": 8.730863494041379e-06, |
| "loss": 2.3988, |
| "step": 915 |
| }, |
| { |
| "epoch": 0.5526395173453997, |
| "grad_norm": 1.934896760614601, |
| "learning_rate": 8.711594479005614e-06, |
| "loss": 2.3483, |
| "step": 916 |
| }, |
| { |
| "epoch": 0.5532428355957768, |
| "grad_norm": 1.8095850637153221, |
| "learning_rate": 8.692330327282003e-06, |
| "loss": 2.3025, |
| "step": 917 |
| }, |
| { |
| "epoch": 0.5538461538461539, |
| "grad_norm": 1.7458152707487324, |
| "learning_rate": 8.673071111586463e-06, |
| "loss": 2.3381, |
| "step": 918 |
| }, |
| { |
| "epoch": 0.554449472096531, |
| "grad_norm": 1.72088810393769, |
| "learning_rate": 8.653816904616272e-06, |
| "loss": 2.3856, |
| "step": 919 |
| }, |
| { |
| "epoch": 0.555052790346908, |
| "grad_norm": 1.7299264791576274, |
| "learning_rate": 8.634567779049807e-06, |
| "loss": 2.3233, |
| "step": 920 |
| }, |
| { |
| "epoch": 0.5556561085972851, |
| "grad_norm": 1.8697572352042986, |
| "learning_rate": 8.615323807546258e-06, |
| "loss": 2.4076, |
| "step": 921 |
| }, |
| { |
| "epoch": 0.5562594268476622, |
| "grad_norm": 1.65992529283014, |
| "learning_rate": 8.596085062745375e-06, |
| "loss": 2.3409, |
| "step": 922 |
| }, |
| { |
| "epoch": 0.5568627450980392, |
| "grad_norm": 1.842335383881053, |
| "learning_rate": 8.576851617267151e-06, |
| "loss": 2.3512, |
| "step": 923 |
| }, |
| { |
| "epoch": 0.5574660633484163, |
| "grad_norm": 1.8043016583386247, |
| "learning_rate": 8.557623543711603e-06, |
| "loss": 2.4132, |
| "step": 924 |
| }, |
| { |
| "epoch": 0.5580693815987934, |
| "grad_norm": 1.8017666828160193, |
| "learning_rate": 8.538400914658456e-06, |
| "loss": 2.4248, |
| "step": 925 |
| }, |
| { |
| "epoch": 0.5586726998491705, |
| "grad_norm": 1.8679972521400925, |
| "learning_rate": 8.519183802666891e-06, |
| "loss": 2.411, |
| "step": 926 |
| }, |
| { |
| "epoch": 0.5592760180995475, |
| "grad_norm": 1.7033984464574257, |
| "learning_rate": 8.499972280275259e-06, |
| "loss": 2.3492, |
| "step": 927 |
| }, |
| { |
| "epoch": 0.5598793363499246, |
| "grad_norm": 1.8740497521935295, |
| "learning_rate": 8.480766420000815e-06, |
| "loss": 2.3457, |
| "step": 928 |
| }, |
| { |
| "epoch": 0.5604826546003017, |
| "grad_norm": 1.6508049361676014, |
| "learning_rate": 8.46156629433944e-06, |
| "loss": 2.3904, |
| "step": 929 |
| }, |
| { |
| "epoch": 0.5610859728506787, |
| "grad_norm": 1.916157350258544, |
| "learning_rate": 8.442371975765368e-06, |
| "loss": 2.506, |
| "step": 930 |
| }, |
| { |
| "epoch": 0.5616892911010558, |
| "grad_norm": 1.6300920878061467, |
| "learning_rate": 8.423183536730919e-06, |
| "loss": 2.3701, |
| "step": 931 |
| }, |
| { |
| "epoch": 0.5622926093514329, |
| "grad_norm": 1.7723202663180058, |
| "learning_rate": 8.404001049666211e-06, |
| "loss": 2.3965, |
| "step": 932 |
| }, |
| { |
| "epoch": 0.56289592760181, |
| "grad_norm": 1.8619013776832338, |
| "learning_rate": 8.384824586978896e-06, |
| "loss": 2.4014, |
| "step": 933 |
| }, |
| { |
| "epoch": 0.563499245852187, |
| "grad_norm": 1.9071332908477077, |
| "learning_rate": 8.365654221053894e-06, |
| "loss": 2.3833, |
| "step": 934 |
| }, |
| { |
| "epoch": 0.5641025641025641, |
| "grad_norm": 1.691778794010645, |
| "learning_rate": 8.346490024253103e-06, |
| "loss": 2.3487, |
| "step": 935 |
| }, |
| { |
| "epoch": 0.5647058823529412, |
| "grad_norm": 1.787146332821833, |
| "learning_rate": 8.327332068915141e-06, |
| "loss": 2.2985, |
| "step": 936 |
| }, |
| { |
| "epoch": 0.5653092006033182, |
| "grad_norm": 2.1950823456779345, |
| "learning_rate": 8.308180427355062e-06, |
| "loss": 2.3126, |
| "step": 937 |
| }, |
| { |
| "epoch": 0.5659125188536953, |
| "grad_norm": 1.775970367583555, |
| "learning_rate": 8.28903517186409e-06, |
| "loss": 2.3708, |
| "step": 938 |
| }, |
| { |
| "epoch": 0.5665158371040724, |
| "grad_norm": 1.8089771718898564, |
| "learning_rate": 8.269896374709345e-06, |
| "loss": 2.3944, |
| "step": 939 |
| }, |
| { |
| "epoch": 0.5671191553544495, |
| "grad_norm": 1.78584822525857, |
| "learning_rate": 8.250764108133562e-06, |
| "loss": 2.3171, |
| "step": 940 |
| }, |
| { |
| "epoch": 0.5677224736048265, |
| "grad_norm": 1.9530454117081384, |
| "learning_rate": 8.231638444354836e-06, |
| "loss": 2.4186, |
| "step": 941 |
| }, |
| { |
| "epoch": 0.5683257918552036, |
| "grad_norm": 1.768100357370704, |
| "learning_rate": 8.212519455566328e-06, |
| "loss": 2.3517, |
| "step": 942 |
| }, |
| { |
| "epoch": 0.5689291101055807, |
| "grad_norm": 1.7791100202829597, |
| "learning_rate": 8.193407213936014e-06, |
| "loss": 2.3895, |
| "step": 943 |
| }, |
| { |
| "epoch": 0.5695324283559577, |
| "grad_norm": 1.8015643530446919, |
| "learning_rate": 8.174301791606384e-06, |
| "loss": 2.4025, |
| "step": 944 |
| }, |
| { |
| "epoch": 0.5701357466063348, |
| "grad_norm": 1.731486155885199, |
| "learning_rate": 8.15520326069421e-06, |
| "loss": 2.4665, |
| "step": 945 |
| }, |
| { |
| "epoch": 0.5707390648567119, |
| "grad_norm": 1.8081292377643385, |
| "learning_rate": 8.136111693290231e-06, |
| "loss": 2.2807, |
| "step": 946 |
| }, |
| { |
| "epoch": 0.571342383107089, |
| "grad_norm": 1.712750921195952, |
| "learning_rate": 8.117027161458917e-06, |
| "loss": 2.3717, |
| "step": 947 |
| }, |
| { |
| "epoch": 0.571945701357466, |
| "grad_norm": 1.8003483361496844, |
| "learning_rate": 8.097949737238172e-06, |
| "loss": 2.2899, |
| "step": 948 |
| }, |
| { |
| "epoch": 0.5725490196078431, |
| "grad_norm": 1.7573744920295975, |
| "learning_rate": 8.078879492639069e-06, |
| "loss": 2.274, |
| "step": 949 |
| }, |
| { |
| "epoch": 0.5731523378582202, |
| "grad_norm": 1.983102632284402, |
| "learning_rate": 8.05981649964559e-06, |
| "loss": 2.4041, |
| "step": 950 |
| }, |
| { |
| "epoch": 0.5737556561085972, |
| "grad_norm": 1.7185303365501152, |
| "learning_rate": 8.040760830214334e-06, |
| "loss": 2.4131, |
| "step": 951 |
| }, |
| { |
| "epoch": 0.5743589743589743, |
| "grad_norm": 1.8744380753797991, |
| "learning_rate": 8.021712556274264e-06, |
| "loss": 2.3407, |
| "step": 952 |
| }, |
| { |
| "epoch": 0.5749622926093514, |
| "grad_norm": 1.9017253226675055, |
| "learning_rate": 8.002671749726425e-06, |
| "loss": 2.3663, |
| "step": 953 |
| }, |
| { |
| "epoch": 0.5755656108597285, |
| "grad_norm": 1.6585789142483918, |
| "learning_rate": 7.983638482443671e-06, |
| "loss": 2.325, |
| "step": 954 |
| }, |
| { |
| "epoch": 0.5761689291101055, |
| "grad_norm": 1.7262944652057712, |
| "learning_rate": 7.964612826270399e-06, |
| "loss": 2.3591, |
| "step": 955 |
| }, |
| { |
| "epoch": 0.5767722473604826, |
| "grad_norm": 1.760968270313303, |
| "learning_rate": 7.945594853022283e-06, |
| "loss": 2.4155, |
| "step": 956 |
| }, |
| { |
| "epoch": 0.5773755656108597, |
| "grad_norm": 1.6969107057603918, |
| "learning_rate": 7.926584634485988e-06, |
| "loss": 2.4495, |
| "step": 957 |
| }, |
| { |
| "epoch": 0.5779788838612367, |
| "grad_norm": 1.7402844415180172, |
| "learning_rate": 7.907582242418916e-06, |
| "loss": 2.3239, |
| "step": 958 |
| }, |
| { |
| "epoch": 0.5785822021116139, |
| "grad_norm": 1.7038508656877012, |
| "learning_rate": 7.888587748548918e-06, |
| "loss": 2.415, |
| "step": 959 |
| }, |
| { |
| "epoch": 0.579185520361991, |
| "grad_norm": 1.7929534294396436, |
| "learning_rate": 7.86960122457404e-06, |
| "loss": 2.4077, |
| "step": 960 |
| }, |
| { |
| "epoch": 0.5797888386123681, |
| "grad_norm": 1.7460741900975971, |
| "learning_rate": 7.850622742162236e-06, |
| "loss": 2.3493, |
| "step": 961 |
| }, |
| { |
| "epoch": 0.5803921568627451, |
| "grad_norm": 1.703203253822099, |
| "learning_rate": 7.831652372951109e-06, |
| "loss": 2.3821, |
| "step": 962 |
| }, |
| { |
| "epoch": 0.5809954751131222, |
| "grad_norm": 1.7364171065552723, |
| "learning_rate": 7.812690188547645e-06, |
| "loss": 2.3724, |
| "step": 963 |
| }, |
| { |
| "epoch": 0.5815987933634993, |
| "grad_norm": 1.9274640570228776, |
| "learning_rate": 7.793736260527922e-06, |
| "loss": 2.4338, |
| "step": 964 |
| }, |
| { |
| "epoch": 0.5822021116138764, |
| "grad_norm": 1.706481470208011, |
| "learning_rate": 7.774790660436857e-06, |
| "loss": 2.4833, |
| "step": 965 |
| }, |
| { |
| "epoch": 0.5828054298642534, |
| "grad_norm": 1.8296308204177585, |
| "learning_rate": 7.75585345978794e-06, |
| "loss": 2.4156, |
| "step": 966 |
| }, |
| { |
| "epoch": 0.5834087481146305, |
| "grad_norm": 1.8143140751021931, |
| "learning_rate": 7.736924730062947e-06, |
| "loss": 2.3107, |
| "step": 967 |
| }, |
| { |
| "epoch": 0.5840120663650076, |
| "grad_norm": 2.1068370693786953, |
| "learning_rate": 7.718004542711677e-06, |
| "loss": 2.4055, |
| "step": 968 |
| }, |
| { |
| "epoch": 0.5846153846153846, |
| "grad_norm": 1.6564875004624282, |
| "learning_rate": 7.699092969151698e-06, |
| "loss": 2.3797, |
| "step": 969 |
| }, |
| { |
| "epoch": 0.5852187028657617, |
| "grad_norm": 1.712883405875796, |
| "learning_rate": 7.680190080768046e-06, |
| "loss": 2.4207, |
| "step": 970 |
| }, |
| { |
| "epoch": 0.5858220211161388, |
| "grad_norm": 1.7983081675568953, |
| "learning_rate": 7.661295948912988e-06, |
| "loss": 2.38, |
| "step": 971 |
| }, |
| { |
| "epoch": 0.5864253393665159, |
| "grad_norm": 1.669670969733644, |
| "learning_rate": 7.642410644905726e-06, |
| "loss": 2.3756, |
| "step": 972 |
| }, |
| { |
| "epoch": 0.5870286576168929, |
| "grad_norm": 1.7493034103928633, |
| "learning_rate": 7.623534240032153e-06, |
| "loss": 2.364, |
| "step": 973 |
| }, |
| { |
| "epoch": 0.58763197586727, |
| "grad_norm": 1.7740033814156886, |
| "learning_rate": 7.604666805544561e-06, |
| "loss": 2.3382, |
| "step": 974 |
| }, |
| { |
| "epoch": 0.5882352941176471, |
| "grad_norm": 1.9391123215492103, |
| "learning_rate": 7.585808412661379e-06, |
| "loss": 2.3416, |
| "step": 975 |
| }, |
| { |
| "epoch": 0.5888386123680242, |
| "grad_norm": 1.8938327666813242, |
| "learning_rate": 7.566959132566914e-06, |
| "loss": 2.4275, |
| "step": 976 |
| }, |
| { |
| "epoch": 0.5894419306184012, |
| "grad_norm": 1.6461158841876589, |
| "learning_rate": 7.548119036411077e-06, |
| "loss": 2.3116, |
| "step": 977 |
| }, |
| { |
| "epoch": 0.5900452488687783, |
| "grad_norm": 1.7377089925600602, |
| "learning_rate": 7.529288195309102e-06, |
| "loss": 2.4299, |
| "step": 978 |
| }, |
| { |
| "epoch": 0.5906485671191554, |
| "grad_norm": 1.7021528327172468, |
| "learning_rate": 7.5104666803413015e-06, |
| "loss": 2.4079, |
| "step": 979 |
| }, |
| { |
| "epoch": 0.5912518853695324, |
| "grad_norm": 2.549582267427392, |
| "learning_rate": 7.4916545625527745e-06, |
| "loss": 2.4804, |
| "step": 980 |
| }, |
| { |
| "epoch": 0.5918552036199095, |
| "grad_norm": 1.731330521143831, |
| "learning_rate": 7.472851912953152e-06, |
| "loss": 2.3502, |
| "step": 981 |
| }, |
| { |
| "epoch": 0.5924585218702866, |
| "grad_norm": 1.750250274307276, |
| "learning_rate": 7.45405880251633e-06, |
| "loss": 2.3992, |
| "step": 982 |
| }, |
| { |
| "epoch": 0.5930618401206637, |
| "grad_norm": 1.722284719736197, |
| "learning_rate": 7.435275302180187e-06, |
| "loss": 2.3836, |
| "step": 983 |
| }, |
| { |
| "epoch": 0.5936651583710407, |
| "grad_norm": 1.761282630994155, |
| "learning_rate": 7.416501482846341e-06, |
| "loss": 2.4412, |
| "step": 984 |
| }, |
| { |
| "epoch": 0.5942684766214178, |
| "grad_norm": 1.8234928253520493, |
| "learning_rate": 7.397737415379853e-06, |
| "loss": 2.4086, |
| "step": 985 |
| }, |
| { |
| "epoch": 0.5948717948717949, |
| "grad_norm": 1.933085444592592, |
| "learning_rate": 7.378983170608982e-06, |
| "loss": 2.3915, |
| "step": 986 |
| }, |
| { |
| "epoch": 0.5954751131221719, |
| "grad_norm": 1.6863684524777538, |
| "learning_rate": 7.360238819324903e-06, |
| "loss": 2.3106, |
| "step": 987 |
| }, |
| { |
| "epoch": 0.596078431372549, |
| "grad_norm": 1.654594116317452, |
| "learning_rate": 7.341504432281459e-06, |
| "loss": 2.4465, |
| "step": 988 |
| }, |
| { |
| "epoch": 0.5966817496229261, |
| "grad_norm": 1.7758673763312907, |
| "learning_rate": 7.322780080194867e-06, |
| "loss": 2.4278, |
| "step": 989 |
| }, |
| { |
| "epoch": 0.5972850678733032, |
| "grad_norm": 1.6980250876911802, |
| "learning_rate": 7.304065833743475e-06, |
| "loss": 2.3902, |
| "step": 990 |
| }, |
| { |
| "epoch": 0.5978883861236802, |
| "grad_norm": 1.7745567427843472, |
| "learning_rate": 7.285361763567477e-06, |
| "loss": 2.4236, |
| "step": 991 |
| }, |
| { |
| "epoch": 0.5984917043740573, |
| "grad_norm": 1.652280491444713, |
| "learning_rate": 7.266667940268668e-06, |
| "loss": 2.3634, |
| "step": 992 |
| }, |
| { |
| "epoch": 0.5990950226244344, |
| "grad_norm": 1.8981724529225397, |
| "learning_rate": 7.24798443441015e-06, |
| "loss": 2.3727, |
| "step": 993 |
| }, |
| { |
| "epoch": 0.5996983408748114, |
| "grad_norm": 1.7079496543700798, |
| "learning_rate": 7.22931131651609e-06, |
| "loss": 2.3931, |
| "step": 994 |
| }, |
| { |
| "epoch": 0.6003016591251885, |
| "grad_norm": 1.9997843698547484, |
| "learning_rate": 7.210648657071433e-06, |
| "loss": 2.4107, |
| "step": 995 |
| }, |
| { |
| "epoch": 0.6009049773755656, |
| "grad_norm": 1.8744026362961528, |
| "learning_rate": 7.191996526521661e-06, |
| "loss": 2.3737, |
| "step": 996 |
| }, |
| { |
| "epoch": 0.6015082956259427, |
| "grad_norm": 1.7267988037294835, |
| "learning_rate": 7.173354995272499e-06, |
| "loss": 2.4609, |
| "step": 997 |
| }, |
| { |
| "epoch": 0.6021116138763197, |
| "grad_norm": 1.7375446041528633, |
| "learning_rate": 7.154724133689677e-06, |
| "loss": 2.3567, |
| "step": 998 |
| }, |
| { |
| "epoch": 0.6027149321266968, |
| "grad_norm": 1.789874796279508, |
| "learning_rate": 7.1361040120986394e-06, |
| "loss": 2.3626, |
| "step": 999 |
| }, |
| { |
| "epoch": 0.6033182503770739, |
| "grad_norm": 1.8360650580584557, |
| "learning_rate": 7.117494700784292e-06, |
| "loss": 2.3746, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.6039215686274509, |
| "grad_norm": 1.8647510973629962, |
| "learning_rate": 7.098896269990743e-06, |
| "loss": 2.4365, |
| "step": 1001 |
| }, |
| { |
| "epoch": 0.604524886877828, |
| "grad_norm": 1.6999258224571074, |
| "learning_rate": 7.080308789921019e-06, |
| "loss": 2.385, |
| "step": 1002 |
| }, |
| { |
| "epoch": 0.6051282051282051, |
| "grad_norm": 1.7305006652986321, |
| "learning_rate": 7.061732330736823e-06, |
| "loss": 2.4122, |
| "step": 1003 |
| }, |
| { |
| "epoch": 0.6057315233785822, |
| "grad_norm": 1.8268769233660138, |
| "learning_rate": 7.04316696255825e-06, |
| "loss": 2.3716, |
| "step": 1004 |
| }, |
| { |
| "epoch": 0.6063348416289592, |
| "grad_norm": 1.680563312051675, |
| "learning_rate": 7.024612755463529e-06, |
| "loss": 2.453, |
| "step": 1005 |
| }, |
| { |
| "epoch": 0.6069381598793363, |
| "grad_norm": 1.647602536203344, |
| "learning_rate": 7.006069779488761e-06, |
| "loss": 2.3768, |
| "step": 1006 |
| }, |
| { |
| "epoch": 0.6075414781297134, |
| "grad_norm": 1.8306490002639166, |
| "learning_rate": 6.9875381046276605e-06, |
| "loss": 2.3631, |
| "step": 1007 |
| }, |
| { |
| "epoch": 0.6081447963800904, |
| "grad_norm": 1.713462266890918, |
| "learning_rate": 6.969017800831273e-06, |
| "loss": 2.3453, |
| "step": 1008 |
| }, |
| { |
| "epoch": 0.6087481146304675, |
| "grad_norm": 2.08603045309057, |
| "learning_rate": 6.95050893800773e-06, |
| "loss": 2.3204, |
| "step": 1009 |
| }, |
| { |
| "epoch": 0.6093514328808446, |
| "grad_norm": 1.9474114701647294, |
| "learning_rate": 6.9320115860219705e-06, |
| "loss": 2.3946, |
| "step": 1010 |
| }, |
| { |
| "epoch": 0.6099547511312218, |
| "grad_norm": 1.8838133566846818, |
| "learning_rate": 6.913525814695492e-06, |
| "loss": 2.4846, |
| "step": 1011 |
| }, |
| { |
| "epoch": 0.6105580693815988, |
| "grad_norm": 1.837197570579041, |
| "learning_rate": 6.8950516938060716e-06, |
| "loss": 2.4237, |
| "step": 1012 |
| }, |
| { |
| "epoch": 0.6111613876319759, |
| "grad_norm": 1.800008780512233, |
| "learning_rate": 6.87658929308751e-06, |
| "loss": 2.3863, |
| "step": 1013 |
| }, |
| { |
| "epoch": 0.611764705882353, |
| "grad_norm": 1.8475263728724154, |
| "learning_rate": 6.8581386822293765e-06, |
| "loss": 2.3558, |
| "step": 1014 |
| }, |
| { |
| "epoch": 0.6123680241327301, |
| "grad_norm": 1.8487028902348472, |
| "learning_rate": 6.839699930876727e-06, |
| "loss": 2.2994, |
| "step": 1015 |
| }, |
| { |
| "epoch": 0.6129713423831071, |
| "grad_norm": 1.8095359265876, |
| "learning_rate": 6.821273108629853e-06, |
| "loss": 2.3966, |
| "step": 1016 |
| }, |
| { |
| "epoch": 0.6135746606334842, |
| "grad_norm": 2.1655842902381837, |
| "learning_rate": 6.802858285044025e-06, |
| "loss": 2.3741, |
| "step": 1017 |
| }, |
| { |
| "epoch": 0.6141779788838613, |
| "grad_norm": 1.9282607392510218, |
| "learning_rate": 6.784455529629218e-06, |
| "loss": 2.411, |
| "step": 1018 |
| }, |
| { |
| "epoch": 0.6147812971342383, |
| "grad_norm": 2.0907716606886937, |
| "learning_rate": 6.76606491184985e-06, |
| "loss": 2.4071, |
| "step": 1019 |
| }, |
| { |
| "epoch": 0.6153846153846154, |
| "grad_norm": 1.8869508977324307, |
| "learning_rate": 6.747686501124531e-06, |
| "loss": 2.421, |
| "step": 1020 |
| }, |
| { |
| "epoch": 0.6159879336349925, |
| "grad_norm": 1.7052193424579607, |
| "learning_rate": 6.729320366825785e-06, |
| "loss": 2.3511, |
| "step": 1021 |
| }, |
| { |
| "epoch": 0.6165912518853696, |
| "grad_norm": 1.7106004296023014, |
| "learning_rate": 6.710966578279802e-06, |
| "loss": 2.3493, |
| "step": 1022 |
| }, |
| { |
| "epoch": 0.6171945701357466, |
| "grad_norm": 1.6618114403839686, |
| "learning_rate": 6.692625204766172e-06, |
| "loss": 2.3097, |
| "step": 1023 |
| }, |
| { |
| "epoch": 0.6177978883861237, |
| "grad_norm": 1.7345051493958623, |
| "learning_rate": 6.6742963155176185e-06, |
| "loss": 2.426, |
| "step": 1024 |
| }, |
| { |
| "epoch": 0.6184012066365008, |
| "grad_norm": 1.9925628567257796, |
| "learning_rate": 6.655979979719744e-06, |
| "loss": 2.3454, |
| "step": 1025 |
| }, |
| { |
| "epoch": 0.6190045248868778, |
| "grad_norm": 1.8951801846690113, |
| "learning_rate": 6.63767626651076e-06, |
| "loss": 2.4046, |
| "step": 1026 |
| }, |
| { |
| "epoch": 0.6196078431372549, |
| "grad_norm": 1.8958330452775232, |
| "learning_rate": 6.619385244981233e-06, |
| "loss": 2.3169, |
| "step": 1027 |
| }, |
| { |
| "epoch": 0.620211161387632, |
| "grad_norm": 1.850986974478954, |
| "learning_rate": 6.601106984173835e-06, |
| "loss": 2.3384, |
| "step": 1028 |
| }, |
| { |
| "epoch": 0.6208144796380091, |
| "grad_norm": 1.830337165876901, |
| "learning_rate": 6.582841553083053e-06, |
| "loss": 2.3646, |
| "step": 1029 |
| }, |
| { |
| "epoch": 0.6214177978883861, |
| "grad_norm": 1.7035092444406967, |
| "learning_rate": 6.5645890206549566e-06, |
| "loss": 2.4757, |
| "step": 1030 |
| }, |
| { |
| "epoch": 0.6220211161387632, |
| "grad_norm": 1.7086779285604496, |
| "learning_rate": 6.546349455786926e-06, |
| "loss": 2.3828, |
| "step": 1031 |
| }, |
| { |
| "epoch": 0.6226244343891403, |
| "grad_norm": 1.8135018764370858, |
| "learning_rate": 6.528122927327386e-06, |
| "loss": 2.4015, |
| "step": 1032 |
| }, |
| { |
| "epoch": 0.6232277526395174, |
| "grad_norm": 1.7536779084006537, |
| "learning_rate": 6.5099095040755645e-06, |
| "loss": 2.423, |
| "step": 1033 |
| }, |
| { |
| "epoch": 0.6238310708898944, |
| "grad_norm": 1.8292406773895364, |
| "learning_rate": 6.491709254781211e-06, |
| "loss": 2.3724, |
| "step": 1034 |
| }, |
| { |
| "epoch": 0.6244343891402715, |
| "grad_norm": 1.7869792886207858, |
| "learning_rate": 6.473522248144359e-06, |
| "loss": 2.3563, |
| "step": 1035 |
| }, |
| { |
| "epoch": 0.6250377073906486, |
| "grad_norm": 1.8002600022182031, |
| "learning_rate": 6.455348552815042e-06, |
| "loss": 2.4471, |
| "step": 1036 |
| }, |
| { |
| "epoch": 0.6256410256410256, |
| "grad_norm": 1.8555130033856473, |
| "learning_rate": 6.437188237393055e-06, |
| "loss": 2.3658, |
| "step": 1037 |
| }, |
| { |
| "epoch": 0.6262443438914027, |
| "grad_norm": 1.7962582816756645, |
| "learning_rate": 6.419041370427686e-06, |
| "loss": 2.3816, |
| "step": 1038 |
| }, |
| { |
| "epoch": 0.6268476621417798, |
| "grad_norm": 1.6546276512104834, |
| "learning_rate": 6.400908020417466e-06, |
| "loss": 2.4006, |
| "step": 1039 |
| }, |
| { |
| "epoch": 0.6274509803921569, |
| "grad_norm": 1.8099892370074893, |
| "learning_rate": 6.382788255809893e-06, |
| "loss": 2.3307, |
| "step": 1040 |
| }, |
| { |
| "epoch": 0.6274509803921569, |
| "eval_loss": 2.383073091506958, |
| "eval_runtime": 22.1846, |
| "eval_samples_per_second": 3.967, |
| "eval_steps_per_second": 0.496, |
| "step": 1040 |
| }, |
| { |
| "epoch": 0.6280542986425339, |
| "grad_norm": 1.7763186085366514, |
| "learning_rate": 6.364682145001193e-06, |
| "loss": 2.4669, |
| "step": 1041 |
| }, |
| { |
| "epoch": 0.628657616892911, |
| "grad_norm": 1.7270436347923916, |
| "learning_rate": 6.34658975633605e-06, |
| "loss": 2.4226, |
| "step": 1042 |
| }, |
| { |
| "epoch": 0.6292609351432881, |
| "grad_norm": 1.7457663835716066, |
| "learning_rate": 6.3285111581073535e-06, |
| "loss": 2.4119, |
| "step": 1043 |
| }, |
| { |
| "epoch": 0.6298642533936651, |
| "grad_norm": 1.871543890238202, |
| "learning_rate": 6.310446418555934e-06, |
| "loss": 2.3105, |
| "step": 1044 |
| }, |
| { |
| "epoch": 0.6304675716440422, |
| "grad_norm": 1.6143421614477536, |
| "learning_rate": 6.292395605870314e-06, |
| "loss": 2.3267, |
| "step": 1045 |
| }, |
| { |
| "epoch": 0.6310708898944193, |
| "grad_norm": 1.7964895997705217, |
| "learning_rate": 6.2743587881864485e-06, |
| "loss": 2.4736, |
| "step": 1046 |
| }, |
| { |
| "epoch": 0.6316742081447964, |
| "grad_norm": 1.7328805184139204, |
| "learning_rate": 6.256336033587459e-06, |
| "loss": 2.3039, |
| "step": 1047 |
| }, |
| { |
| "epoch": 0.6322775263951734, |
| "grad_norm": 1.7477853637357237, |
| "learning_rate": 6.2383274101033865e-06, |
| "loss": 2.3596, |
| "step": 1048 |
| }, |
| { |
| "epoch": 0.6328808446455505, |
| "grad_norm": 1.7330713096361468, |
| "learning_rate": 6.220332985710936e-06, |
| "loss": 2.4127, |
| "step": 1049 |
| }, |
| { |
| "epoch": 0.6334841628959276, |
| "grad_norm": 1.9186686697353552, |
| "learning_rate": 6.202352828333211e-06, |
| "loss": 2.3919, |
| "step": 1050 |
| }, |
| { |
| "epoch": 0.6340874811463046, |
| "grad_norm": 1.8655237304210366, |
| "learning_rate": 6.18438700583946e-06, |
| "loss": 2.3999, |
| "step": 1051 |
| }, |
| { |
| "epoch": 0.6346907993966817, |
| "grad_norm": 1.820718197399878, |
| "learning_rate": 6.16643558604483e-06, |
| "loss": 2.3537, |
| "step": 1052 |
| }, |
| { |
| "epoch": 0.6352941176470588, |
| "grad_norm": 1.8837447292622485, |
| "learning_rate": 6.148498636710092e-06, |
| "loss": 2.4198, |
| "step": 1053 |
| }, |
| { |
| "epoch": 0.6358974358974359, |
| "grad_norm": 1.6822630207864002, |
| "learning_rate": 6.130576225541405e-06, |
| "loss": 2.3893, |
| "step": 1054 |
| }, |
| { |
| "epoch": 0.6365007541478129, |
| "grad_norm": 1.7791618232922595, |
| "learning_rate": 6.112668420190042e-06, |
| "loss": 2.371, |
| "step": 1055 |
| }, |
| { |
| "epoch": 0.63710407239819, |
| "grad_norm": 1.6994048296111355, |
| "learning_rate": 6.094775288252157e-06, |
| "loss": 2.3775, |
| "step": 1056 |
| }, |
| { |
| "epoch": 0.6377073906485671, |
| "grad_norm": 1.7134892091362488, |
| "learning_rate": 6.076896897268503e-06, |
| "loss": 2.3201, |
| "step": 1057 |
| }, |
| { |
| "epoch": 0.6383107088989441, |
| "grad_norm": 1.9344148280860245, |
| "learning_rate": 6.059033314724194e-06, |
| "loss": 2.3657, |
| "step": 1058 |
| }, |
| { |
| "epoch": 0.6389140271493212, |
| "grad_norm": 1.7314061386065116, |
| "learning_rate": 6.041184608048452e-06, |
| "loss": 2.4049, |
| "step": 1059 |
| }, |
| { |
| "epoch": 0.6395173453996983, |
| "grad_norm": 1.7161590279229737, |
| "learning_rate": 6.023350844614344e-06, |
| "loss": 2.3644, |
| "step": 1060 |
| }, |
| { |
| "epoch": 0.6401206636500754, |
| "grad_norm": 1.62133335141988, |
| "learning_rate": 6.0055320917385305e-06, |
| "loss": 2.3621, |
| "step": 1061 |
| }, |
| { |
| "epoch": 0.6407239819004525, |
| "grad_norm": 1.7234099751611929, |
| "learning_rate": 5.987728416681015e-06, |
| "loss": 2.3857, |
| "step": 1062 |
| }, |
| { |
| "epoch": 0.6413273001508296, |
| "grad_norm": 1.975399590971045, |
| "learning_rate": 5.9699398866448846e-06, |
| "loss": 2.379, |
| "step": 1063 |
| }, |
| { |
| "epoch": 0.6419306184012067, |
| "grad_norm": 1.7347993415134486, |
| "learning_rate": 5.952166568776062e-06, |
| "loss": 2.4556, |
| "step": 1064 |
| }, |
| { |
| "epoch": 0.6425339366515838, |
| "grad_norm": 1.7369197590791692, |
| "learning_rate": 5.9344085301630425e-06, |
| "loss": 2.3327, |
| "step": 1065 |
| }, |
| { |
| "epoch": 0.6431372549019608, |
| "grad_norm": 1.7195622807621243, |
| "learning_rate": 5.916665837836657e-06, |
| "loss": 2.361, |
| "step": 1066 |
| }, |
| { |
| "epoch": 0.6437405731523379, |
| "grad_norm": 1.9257307776473533, |
| "learning_rate": 5.8989385587697936e-06, |
| "loss": 2.3611, |
| "step": 1067 |
| }, |
| { |
| "epoch": 0.644343891402715, |
| "grad_norm": 1.7840387108031497, |
| "learning_rate": 5.881226759877179e-06, |
| "loss": 2.3426, |
| "step": 1068 |
| }, |
| { |
| "epoch": 0.644947209653092, |
| "grad_norm": 1.7327317147015306, |
| "learning_rate": 5.8635305080150916e-06, |
| "loss": 2.4682, |
| "step": 1069 |
| }, |
| { |
| "epoch": 0.6455505279034691, |
| "grad_norm": 1.8944105633833825, |
| "learning_rate": 5.845849869981137e-06, |
| "loss": 2.3418, |
| "step": 1070 |
| }, |
| { |
| "epoch": 0.6461538461538462, |
| "grad_norm": 1.8040897971811702, |
| "learning_rate": 5.828184912513974e-06, |
| "loss": 2.3958, |
| "step": 1071 |
| }, |
| { |
| "epoch": 0.6467571644042233, |
| "grad_norm": 1.9119298329179213, |
| "learning_rate": 5.810535702293081e-06, |
| "loss": 2.3984, |
| "step": 1072 |
| }, |
| { |
| "epoch": 0.6473604826546003, |
| "grad_norm": 1.6761960978767056, |
| "learning_rate": 5.792902305938491e-06, |
| "loss": 2.4212, |
| "step": 1073 |
| }, |
| { |
| "epoch": 0.6479638009049774, |
| "grad_norm": 1.883720966722848, |
| "learning_rate": 5.77528479001054e-06, |
| "loss": 2.3655, |
| "step": 1074 |
| }, |
| { |
| "epoch": 0.6485671191553545, |
| "grad_norm": 1.924326515284571, |
| "learning_rate": 5.757683221009625e-06, |
| "loss": 2.3697, |
| "step": 1075 |
| }, |
| { |
| "epoch": 0.6491704374057315, |
| "grad_norm": 1.7604122143051846, |
| "learning_rate": 5.740097665375956e-06, |
| "loss": 2.339, |
| "step": 1076 |
| }, |
| { |
| "epoch": 0.6497737556561086, |
| "grad_norm": 1.7198858746018695, |
| "learning_rate": 5.722528189489294e-06, |
| "loss": 2.3814, |
| "step": 1077 |
| }, |
| { |
| "epoch": 0.6503770739064857, |
| "grad_norm": 1.705416286375821, |
| "learning_rate": 5.7049748596686884e-06, |
| "loss": 2.4044, |
| "step": 1078 |
| }, |
| { |
| "epoch": 0.6509803921568628, |
| "grad_norm": 1.7533272673200686, |
| "learning_rate": 5.687437742172258e-06, |
| "loss": 2.3606, |
| "step": 1079 |
| }, |
| { |
| "epoch": 0.6515837104072398, |
| "grad_norm": 1.9879675721501724, |
| "learning_rate": 5.669916903196931e-06, |
| "loss": 2.2996, |
| "step": 1080 |
| }, |
| { |
| "epoch": 0.6521870286576169, |
| "grad_norm": 1.7554867269724397, |
| "learning_rate": 5.652412408878173e-06, |
| "loss": 2.397, |
| "step": 1081 |
| }, |
| { |
| "epoch": 0.652790346907994, |
| "grad_norm": 1.6659668606646618, |
| "learning_rate": 5.634924325289766e-06, |
| "loss": 2.4034, |
| "step": 1082 |
| }, |
| { |
| "epoch": 0.653393665158371, |
| "grad_norm": 1.7416386848131735, |
| "learning_rate": 5.617452718443539e-06, |
| "loss": 2.3319, |
| "step": 1083 |
| }, |
| { |
| "epoch": 0.6539969834087481, |
| "grad_norm": 1.7245860454063235, |
| "learning_rate": 5.599997654289129e-06, |
| "loss": 2.3469, |
| "step": 1084 |
| }, |
| { |
| "epoch": 0.6546003016591252, |
| "grad_norm": 1.7431292906249471, |
| "learning_rate": 5.58255919871374e-06, |
| "loss": 2.4416, |
| "step": 1085 |
| }, |
| { |
| "epoch": 0.6552036199095023, |
| "grad_norm": 1.6656660538319628, |
| "learning_rate": 5.565137417541866e-06, |
| "loss": 2.4012, |
| "step": 1086 |
| }, |
| { |
| "epoch": 0.6558069381598793, |
| "grad_norm": 1.652853454891257, |
| "learning_rate": 5.547732376535073e-06, |
| "loss": 2.2344, |
| "step": 1087 |
| }, |
| { |
| "epoch": 0.6564102564102564, |
| "grad_norm": 1.8033291285488744, |
| "learning_rate": 5.530344141391735e-06, |
| "loss": 2.319, |
| "step": 1088 |
| }, |
| { |
| "epoch": 0.6570135746606335, |
| "grad_norm": 1.5871742155343276, |
| "learning_rate": 5.512972777746788e-06, |
| "loss": 2.3877, |
| "step": 1089 |
| }, |
| { |
| "epoch": 0.6576168929110106, |
| "grad_norm": 1.8718927560904637, |
| "learning_rate": 5.495618351171484e-06, |
| "loss": 2.4203, |
| "step": 1090 |
| }, |
| { |
| "epoch": 0.6582202111613876, |
| "grad_norm": 1.6655319360040397, |
| "learning_rate": 5.478280927173145e-06, |
| "loss": 2.4034, |
| "step": 1091 |
| }, |
| { |
| "epoch": 0.6588235294117647, |
| "grad_norm": 1.7438510013325328, |
| "learning_rate": 5.46096057119491e-06, |
| "loss": 2.4229, |
| "step": 1092 |
| }, |
| { |
| "epoch": 0.6594268476621418, |
| "grad_norm": 1.891566092957791, |
| "learning_rate": 5.443657348615499e-06, |
| "loss": 2.4016, |
| "step": 1093 |
| }, |
| { |
| "epoch": 0.6600301659125188, |
| "grad_norm": 1.794552188228555, |
| "learning_rate": 5.4263713247489525e-06, |
| "loss": 2.4229, |
| "step": 1094 |
| }, |
| { |
| "epoch": 0.6606334841628959, |
| "grad_norm": 1.871493745282659, |
| "learning_rate": 5.409102564844393e-06, |
| "loss": 2.3732, |
| "step": 1095 |
| }, |
| { |
| "epoch": 0.661236802413273, |
| "grad_norm": 1.687434162032868, |
| "learning_rate": 5.391851134085777e-06, |
| "loss": 2.383, |
| "step": 1096 |
| }, |
| { |
| "epoch": 0.6618401206636501, |
| "grad_norm": 1.7303740770734184, |
| "learning_rate": 5.37461709759165e-06, |
| "loss": 2.4028, |
| "step": 1097 |
| }, |
| { |
| "epoch": 0.6624434389140271, |
| "grad_norm": 1.7189510745367969, |
| "learning_rate": 5.357400520414898e-06, |
| "loss": 2.3981, |
| "step": 1098 |
| }, |
| { |
| "epoch": 0.6630467571644042, |
| "grad_norm": 1.8096301053467294, |
| "learning_rate": 5.340201467542507e-06, |
| "loss": 2.3628, |
| "step": 1099 |
| }, |
| { |
| "epoch": 0.6636500754147813, |
| "grad_norm": 1.7165713960764049, |
| "learning_rate": 5.323020003895307e-06, |
| "loss": 2.3966, |
| "step": 1100 |
| }, |
| { |
| "epoch": 0.6642533936651583, |
| "grad_norm": 2.137276625245083, |
| "learning_rate": 5.30585619432775e-06, |
| "loss": 2.3458, |
| "step": 1101 |
| }, |
| { |
| "epoch": 0.6648567119155354, |
| "grad_norm": 1.9805084995758804, |
| "learning_rate": 5.2887101036276326e-06, |
| "loss": 2.3472, |
| "step": 1102 |
| }, |
| { |
| "epoch": 0.6654600301659125, |
| "grad_norm": 1.7370499167243267, |
| "learning_rate": 5.271581796515877e-06, |
| "loss": 2.3599, |
| "step": 1103 |
| }, |
| { |
| "epoch": 0.6660633484162896, |
| "grad_norm": 1.990509263132044, |
| "learning_rate": 5.254471337646277e-06, |
| "loss": 2.3297, |
| "step": 1104 |
| }, |
| { |
| "epoch": 0.6666666666666666, |
| "grad_norm": 1.817469601955063, |
| "learning_rate": 5.237378791605249e-06, |
| "loss": 2.3826, |
| "step": 1105 |
| }, |
| { |
| "epoch": 0.6672699849170437, |
| "grad_norm": 1.7081728667896796, |
| "learning_rate": 5.22030422291161e-06, |
| "loss": 2.3599, |
| "step": 1106 |
| }, |
| { |
| "epoch": 0.6678733031674208, |
| "grad_norm": 1.6649837042112172, |
| "learning_rate": 5.203247696016304e-06, |
| "loss": 2.2998, |
| "step": 1107 |
| }, |
| { |
| "epoch": 0.6684766214177978, |
| "grad_norm": 2.0879657423509252, |
| "learning_rate": 5.186209275302175e-06, |
| "loss": 2.3596, |
| "step": 1108 |
| }, |
| { |
| "epoch": 0.6690799396681749, |
| "grad_norm": 1.8790127017717781, |
| "learning_rate": 5.169189025083721e-06, |
| "loss": 2.3778, |
| "step": 1109 |
| }, |
| { |
| "epoch": 0.669683257918552, |
| "grad_norm": 1.9364344405317626, |
| "learning_rate": 5.152187009606864e-06, |
| "loss": 2.496, |
| "step": 1110 |
| }, |
| { |
| "epoch": 0.6702865761689291, |
| "grad_norm": 1.8162698780455198, |
| "learning_rate": 5.135203293048683e-06, |
| "loss": 2.3594, |
| "step": 1111 |
| }, |
| { |
| "epoch": 0.6708898944193061, |
| "grad_norm": 1.7365875388281704, |
| "learning_rate": 5.11823793951719e-06, |
| "loss": 2.4206, |
| "step": 1112 |
| }, |
| { |
| "epoch": 0.6714932126696832, |
| "grad_norm": 1.7389301743219594, |
| "learning_rate": 5.101291013051076e-06, |
| "loss": 2.4577, |
| "step": 1113 |
| }, |
| { |
| "epoch": 0.6720965309200604, |
| "grad_norm": 1.801725591550719, |
| "learning_rate": 5.08436257761949e-06, |
| "loss": 2.36, |
| "step": 1114 |
| }, |
| { |
| "epoch": 0.6726998491704375, |
| "grad_norm": 1.6246554556969792, |
| "learning_rate": 5.067452697121773e-06, |
| "loss": 2.4463, |
| "step": 1115 |
| }, |
| { |
| "epoch": 0.6733031674208145, |
| "grad_norm": 1.6391920742953727, |
| "learning_rate": 5.050561435387225e-06, |
| "loss": 2.3824, |
| "step": 1116 |
| }, |
| { |
| "epoch": 0.6739064856711916, |
| "grad_norm": 1.7957501126838844, |
| "learning_rate": 5.033688856174872e-06, |
| "loss": 2.3333, |
| "step": 1117 |
| }, |
| { |
| "epoch": 0.6745098039215687, |
| "grad_norm": 1.900643070460856, |
| "learning_rate": 5.016835023173216e-06, |
| "loss": 2.393, |
| "step": 1118 |
| }, |
| { |
| "epoch": 0.6751131221719457, |
| "grad_norm": 1.743212484710362, |
| "learning_rate": 5.000000000000003e-06, |
| "loss": 2.368, |
| "step": 1119 |
| }, |
| { |
| "epoch": 0.6757164404223228, |
| "grad_norm": 1.7167273166225816, |
| "learning_rate": 4.98318385020197e-06, |
| "loss": 2.3213, |
| "step": 1120 |
| }, |
| { |
| "epoch": 0.6763197586726999, |
| "grad_norm": 1.7430619282647748, |
| "learning_rate": 4.966386637254619e-06, |
| "loss": 2.4359, |
| "step": 1121 |
| }, |
| { |
| "epoch": 0.676923076923077, |
| "grad_norm": 1.7339856018167896, |
| "learning_rate": 4.949608424561974e-06, |
| "loss": 2.3696, |
| "step": 1122 |
| }, |
| { |
| "epoch": 0.677526395173454, |
| "grad_norm": 1.6797421152789709, |
| "learning_rate": 4.932849275456334e-06, |
| "loss": 2.3274, |
| "step": 1123 |
| }, |
| { |
| "epoch": 0.6781297134238311, |
| "grad_norm": 1.700903017111767, |
| "learning_rate": 4.91610925319804e-06, |
| "loss": 2.4187, |
| "step": 1124 |
| }, |
| { |
| "epoch": 0.6787330316742082, |
| "grad_norm": 1.8361798532146734, |
| "learning_rate": 4.8993884209752364e-06, |
| "loss": 2.3666, |
| "step": 1125 |
| }, |
| { |
| "epoch": 0.6793363499245852, |
| "grad_norm": 1.816892302835707, |
| "learning_rate": 4.882686841903627e-06, |
| "loss": 2.3812, |
| "step": 1126 |
| }, |
| { |
| "epoch": 0.6799396681749623, |
| "grad_norm": 1.7175271186337646, |
| "learning_rate": 4.866004579026254e-06, |
| "loss": 2.3617, |
| "step": 1127 |
| }, |
| { |
| "epoch": 0.6805429864253394, |
| "grad_norm": 1.6205294077884607, |
| "learning_rate": 4.8493416953132375e-06, |
| "loss": 2.3892, |
| "step": 1128 |
| }, |
| { |
| "epoch": 0.6811463046757165, |
| "grad_norm": 1.6716762176030333, |
| "learning_rate": 4.832698253661542e-06, |
| "loss": 2.4494, |
| "step": 1129 |
| }, |
| { |
| "epoch": 0.6817496229260935, |
| "grad_norm": 1.6898145097930017, |
| "learning_rate": 4.81607431689475e-06, |
| "loss": 2.3703, |
| "step": 1130 |
| }, |
| { |
| "epoch": 0.6823529411764706, |
| "grad_norm": 1.6545729812453533, |
| "learning_rate": 4.799469947762829e-06, |
| "loss": 2.3776, |
| "step": 1131 |
| }, |
| { |
| "epoch": 0.6829562594268477, |
| "grad_norm": 1.627605596598923, |
| "learning_rate": 4.782885208941873e-06, |
| "loss": 2.3966, |
| "step": 1132 |
| }, |
| { |
| "epoch": 0.6835595776772248, |
| "grad_norm": 1.7911359160166918, |
| "learning_rate": 4.766320163033882e-06, |
| "loss": 2.3174, |
| "step": 1133 |
| }, |
| { |
| "epoch": 0.6841628959276018, |
| "grad_norm": 1.7161660139511359, |
| "learning_rate": 4.749774872566516e-06, |
| "loss": 2.3946, |
| "step": 1134 |
| }, |
| { |
| "epoch": 0.6847662141779789, |
| "grad_norm": 1.7354626205952686, |
| "learning_rate": 4.7332493999928785e-06, |
| "loss": 2.4666, |
| "step": 1135 |
| }, |
| { |
| "epoch": 0.685369532428356, |
| "grad_norm": 1.688627517329885, |
| "learning_rate": 4.716743807691255e-06, |
| "loss": 2.4739, |
| "step": 1136 |
| }, |
| { |
| "epoch": 0.685972850678733, |
| "grad_norm": 1.730630827281108, |
| "learning_rate": 4.700258157964892e-06, |
| "loss": 2.3249, |
| "step": 1137 |
| }, |
| { |
| "epoch": 0.6865761689291101, |
| "grad_norm": 1.6111061207035091, |
| "learning_rate": 4.68379251304176e-06, |
| "loss": 2.3399, |
| "step": 1138 |
| }, |
| { |
| "epoch": 0.6871794871794872, |
| "grad_norm": 1.724226418639754, |
| "learning_rate": 4.667346935074317e-06, |
| "loss": 2.3608, |
| "step": 1139 |
| }, |
| { |
| "epoch": 0.6877828054298643, |
| "grad_norm": 1.6986068675165844, |
| "learning_rate": 4.6509214861392785e-06, |
| "loss": 2.4214, |
| "step": 1140 |
| }, |
| { |
| "epoch": 0.6883861236802413, |
| "grad_norm": 1.6645298951693541, |
| "learning_rate": 4.634516228237372e-06, |
| "loss": 2.3376, |
| "step": 1141 |
| }, |
| { |
| "epoch": 0.6889894419306184, |
| "grad_norm": 1.6189114356863668, |
| "learning_rate": 4.618131223293119e-06, |
| "loss": 2.5135, |
| "step": 1142 |
| }, |
| { |
| "epoch": 0.6895927601809955, |
| "grad_norm": 1.7083736473449533, |
| "learning_rate": 4.6017665331545845e-06, |
| "loss": 2.3635, |
| "step": 1143 |
| }, |
| { |
| "epoch": 0.6901960784313725, |
| "grad_norm": 1.877655085744349, |
| "learning_rate": 4.585422219593161e-06, |
| "loss": 2.3677, |
| "step": 1144 |
| }, |
| { |
| "epoch": 0.6907993966817496, |
| "grad_norm": 1.6987263814462694, |
| "learning_rate": 4.569098344303319e-06, |
| "loss": 2.3695, |
| "step": 1145 |
| }, |
| { |
| "epoch": 0.6914027149321267, |
| "grad_norm": 1.825635519300492, |
| "learning_rate": 4.552794968902382e-06, |
| "loss": 2.4652, |
| "step": 1146 |
| }, |
| { |
| "epoch": 0.6920060331825038, |
| "grad_norm": 1.6935317131155125, |
| "learning_rate": 4.5365121549302916e-06, |
| "loss": 2.4444, |
| "step": 1147 |
| }, |
| { |
| "epoch": 0.6926093514328808, |
| "grad_norm": 1.6512716687547069, |
| "learning_rate": 4.520249963849386e-06, |
| "loss": 2.374, |
| "step": 1148 |
| }, |
| { |
| "epoch": 0.6932126696832579, |
| "grad_norm": 1.702035225234864, |
| "learning_rate": 4.504008457044151e-06, |
| "loss": 2.3455, |
| "step": 1149 |
| }, |
| { |
| "epoch": 0.693815987933635, |
| "grad_norm": 1.8068570190141913, |
| "learning_rate": 4.487787695820991e-06, |
| "loss": 2.4141, |
| "step": 1150 |
| }, |
| { |
| "epoch": 0.694419306184012, |
| "grad_norm": 1.7162440248028858, |
| "learning_rate": 4.471587741408008e-06, |
| "loss": 2.4136, |
| "step": 1151 |
| }, |
| { |
| "epoch": 0.6950226244343891, |
| "grad_norm": 1.8312298352488556, |
| "learning_rate": 4.455408654954771e-06, |
| "loss": 2.3802, |
| "step": 1152 |
| }, |
| { |
| "epoch": 0.6956259426847662, |
| "grad_norm": 1.6692722099624584, |
| "learning_rate": 4.439250497532074e-06, |
| "loss": 2.3668, |
| "step": 1153 |
| }, |
| { |
| "epoch": 0.6962292609351433, |
| "grad_norm": 1.7023852481076565, |
| "learning_rate": 4.423113330131708e-06, |
| "loss": 2.3927, |
| "step": 1154 |
| }, |
| { |
| "epoch": 0.6968325791855203, |
| "grad_norm": 1.8669640411601005, |
| "learning_rate": 4.406997213666236e-06, |
| "loss": 2.4098, |
| "step": 1155 |
| }, |
| { |
| "epoch": 0.6974358974358974, |
| "grad_norm": 1.7473073763109117, |
| "learning_rate": 4.390902208968756e-06, |
| "loss": 2.3939, |
| "step": 1156 |
| }, |
| { |
| "epoch": 0.6980392156862745, |
| "grad_norm": 1.7800610206406344, |
| "learning_rate": 4.3748283767926895e-06, |
| "loss": 2.4372, |
| "step": 1157 |
| }, |
| { |
| "epoch": 0.6986425339366515, |
| "grad_norm": 1.634490779211142, |
| "learning_rate": 4.3587757778115255e-06, |
| "loss": 2.3264, |
| "step": 1158 |
| }, |
| { |
| "epoch": 0.6992458521870286, |
| "grad_norm": 1.77926534676703, |
| "learning_rate": 4.342744472618609e-06, |
| "loss": 2.37, |
| "step": 1159 |
| }, |
| { |
| "epoch": 0.6998491704374057, |
| "grad_norm": 1.7462074976007822, |
| "learning_rate": 4.326734521726905e-06, |
| "loss": 2.416, |
| "step": 1160 |
| }, |
| { |
| "epoch": 0.7004524886877828, |
| "grad_norm": 1.6554836021696273, |
| "learning_rate": 4.310745985568779e-06, |
| "loss": 2.3602, |
| "step": 1161 |
| }, |
| { |
| "epoch": 0.7010558069381598, |
| "grad_norm": 1.7946895000231815, |
| "learning_rate": 4.294778924495756e-06, |
| "loss": 2.2787, |
| "step": 1162 |
| }, |
| { |
| "epoch": 0.7016591251885369, |
| "grad_norm": 1.7454537713750038, |
| "learning_rate": 4.278833398778306e-06, |
| "loss": 2.3994, |
| "step": 1163 |
| }, |
| { |
| "epoch": 0.702262443438914, |
| "grad_norm": 1.7980490954890451, |
| "learning_rate": 4.262909468605602e-06, |
| "loss": 2.3779, |
| "step": 1164 |
| }, |
| { |
| "epoch": 0.702865761689291, |
| "grad_norm": 1.7044519866326602, |
| "learning_rate": 4.24700719408531e-06, |
| "loss": 2.3362, |
| "step": 1165 |
| }, |
| { |
| "epoch": 0.7034690799396682, |
| "grad_norm": 1.7347177996543848, |
| "learning_rate": 4.231126635243351e-06, |
| "loss": 2.3837, |
| "step": 1166 |
| }, |
| { |
| "epoch": 0.7040723981900453, |
| "grad_norm": 1.6524797749017155, |
| "learning_rate": 4.215267852023669e-06, |
| "loss": 2.36, |
| "step": 1167 |
| }, |
| { |
| "epoch": 0.7046757164404224, |
| "grad_norm": 1.537259068333786, |
| "learning_rate": 4.19943090428802e-06, |
| "loss": 2.2915, |
| "step": 1168 |
| }, |
| { |
| "epoch": 0.7052790346907994, |
| "grad_norm": 1.6467961805341684, |
| "learning_rate": 4.1836158518157335e-06, |
| "loss": 2.3372, |
| "step": 1169 |
| }, |
| { |
| "epoch": 0.7058823529411765, |
| "grad_norm": 1.6557291928485045, |
| "learning_rate": 4.167822754303493e-06, |
| "loss": 2.308, |
| "step": 1170 |
| }, |
| { |
| "epoch": 0.7064856711915536, |
| "grad_norm": 1.6836785725181491, |
| "learning_rate": 4.152051671365111e-06, |
| "loss": 2.4054, |
| "step": 1171 |
| }, |
| { |
| "epoch": 0.7070889894419307, |
| "grad_norm": 1.6787180661459031, |
| "learning_rate": 4.136302662531297e-06, |
| "loss": 2.335, |
| "step": 1172 |
| }, |
| { |
| "epoch": 0.7076923076923077, |
| "grad_norm": 1.643848874676066, |
| "learning_rate": 4.120575787249448e-06, |
| "loss": 2.4118, |
| "step": 1173 |
| }, |
| { |
| "epoch": 0.7082956259426848, |
| "grad_norm": 1.7616396709398916, |
| "learning_rate": 4.104871104883403e-06, |
| "loss": 2.3463, |
| "step": 1174 |
| }, |
| { |
| "epoch": 0.7088989441930619, |
| "grad_norm": 1.7903610252942221, |
| "learning_rate": 4.0891886747132356e-06, |
| "loss": 2.3359, |
| "step": 1175 |
| }, |
| { |
| "epoch": 0.709502262443439, |
| "grad_norm": 1.7098349377567204, |
| "learning_rate": 4.073528555935023e-06, |
| "loss": 2.3234, |
| "step": 1176 |
| }, |
| { |
| "epoch": 0.710105580693816, |
| "grad_norm": 1.7698002503180297, |
| "learning_rate": 4.057890807660623e-06, |
| "loss": 2.3478, |
| "step": 1177 |
| }, |
| { |
| "epoch": 0.7107088989441931, |
| "grad_norm": 1.7223602812887717, |
| "learning_rate": 4.042275488917457e-06, |
| "loss": 2.3879, |
| "step": 1178 |
| }, |
| { |
| "epoch": 0.7113122171945702, |
| "grad_norm": 1.7463132678483144, |
| "learning_rate": 4.026682658648279e-06, |
| "loss": 2.3691, |
| "step": 1179 |
| }, |
| { |
| "epoch": 0.7119155354449472, |
| "grad_norm": 1.7311429231481532, |
| "learning_rate": 4.011112375710958e-06, |
| "loss": 2.4077, |
| "step": 1180 |
| }, |
| { |
| "epoch": 0.7125188536953243, |
| "grad_norm": 1.8325302347981212, |
| "learning_rate": 3.995564698878242e-06, |
| "loss": 2.3488, |
| "step": 1181 |
| }, |
| { |
| "epoch": 0.7131221719457014, |
| "grad_norm": 1.68718592965624, |
| "learning_rate": 3.9800396868375675e-06, |
| "loss": 2.3314, |
| "step": 1182 |
| }, |
| { |
| "epoch": 0.7137254901960784, |
| "grad_norm": 1.6227069192861296, |
| "learning_rate": 3.964537398190809e-06, |
| "loss": 2.4016, |
| "step": 1183 |
| }, |
| { |
| "epoch": 0.7143288084464555, |
| "grad_norm": 1.6767707875017006, |
| "learning_rate": 3.949057891454067e-06, |
| "loss": 2.3296, |
| "step": 1184 |
| }, |
| { |
| "epoch": 0.7149321266968326, |
| "grad_norm": 1.7535276237480546, |
| "learning_rate": 3.933601225057446e-06, |
| "loss": 2.4307, |
| "step": 1185 |
| }, |
| { |
| "epoch": 0.7155354449472097, |
| "grad_norm": 1.793638115657222, |
| "learning_rate": 3.918167457344846e-06, |
| "loss": 2.3967, |
| "step": 1186 |
| }, |
| { |
| "epoch": 0.7161387631975867, |
| "grad_norm": 1.699285645702112, |
| "learning_rate": 3.902756646573721e-06, |
| "loss": 2.4461, |
| "step": 1187 |
| }, |
| { |
| "epoch": 0.7167420814479638, |
| "grad_norm": 1.6733211026986439, |
| "learning_rate": 3.887368850914873e-06, |
| "loss": 2.3596, |
| "step": 1188 |
| }, |
| { |
| "epoch": 0.7173453996983409, |
| "grad_norm": 1.7635125169609782, |
| "learning_rate": 3.872004128452231e-06, |
| "loss": 2.2959, |
| "step": 1189 |
| }, |
| { |
| "epoch": 0.717948717948718, |
| "grad_norm": 1.706941005183369, |
| "learning_rate": 3.85666253718263e-06, |
| "loss": 2.4371, |
| "step": 1190 |
| }, |
| { |
| "epoch": 0.718552036199095, |
| "grad_norm": 1.7770281662637257, |
| "learning_rate": 3.841344135015591e-06, |
| "loss": 2.3395, |
| "step": 1191 |
| }, |
| { |
| "epoch": 0.7191553544494721, |
| "grad_norm": 1.5833608069906788, |
| "learning_rate": 3.826048979773104e-06, |
| "loss": 2.4091, |
| "step": 1192 |
| }, |
| { |
| "epoch": 0.7197586726998492, |
| "grad_norm": 1.6958121990199075, |
| "learning_rate": 3.8107771291894092e-06, |
| "loss": 2.362, |
| "step": 1193 |
| }, |
| { |
| "epoch": 0.7203619909502262, |
| "grad_norm": 1.8301505796048145, |
| "learning_rate": 3.795528640910776e-06, |
| "loss": 2.39, |
| "step": 1194 |
| }, |
| { |
| "epoch": 0.7209653092006033, |
| "grad_norm": 1.6750258822027213, |
| "learning_rate": 3.7803035724953007e-06, |
| "loss": 2.4088, |
| "step": 1195 |
| }, |
| { |
| "epoch": 0.7215686274509804, |
| "grad_norm": 1.8697843559056373, |
| "learning_rate": 3.7651019814126656e-06, |
| "loss": 2.3559, |
| "step": 1196 |
| }, |
| { |
| "epoch": 0.7221719457013575, |
| "grad_norm": 1.7345691144446933, |
| "learning_rate": 3.7499239250439358e-06, |
| "loss": 2.4212, |
| "step": 1197 |
| }, |
| { |
| "epoch": 0.7227752639517345, |
| "grad_norm": 1.7015198165790977, |
| "learning_rate": 3.73476946068134e-06, |
| "loss": 2.3812, |
| "step": 1198 |
| }, |
| { |
| "epoch": 0.7233785822021116, |
| "grad_norm": 1.8583945749056379, |
| "learning_rate": 3.719638645528061e-06, |
| "loss": 2.3947, |
| "step": 1199 |
| }, |
| { |
| "epoch": 0.7239819004524887, |
| "grad_norm": 1.8719157111099178, |
| "learning_rate": 3.704531536698012e-06, |
| "loss": 2.3613, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.7245852187028657, |
| "grad_norm": 1.6366629687175498, |
| "learning_rate": 3.68944819121561e-06, |
| "loss": 2.4264, |
| "step": 1201 |
| }, |
| { |
| "epoch": 0.7251885369532428, |
| "grad_norm": 1.6694031295876548, |
| "learning_rate": 3.674388666015584e-06, |
| "loss": 2.3218, |
| "step": 1202 |
| }, |
| { |
| "epoch": 0.7257918552036199, |
| "grad_norm": 1.6815494848334889, |
| "learning_rate": 3.659353017942754e-06, |
| "loss": 2.3919, |
| "step": 1203 |
| }, |
| { |
| "epoch": 0.726395173453997, |
| "grad_norm": 1.7510552907903105, |
| "learning_rate": 3.644341303751804e-06, |
| "loss": 2.3933, |
| "step": 1204 |
| }, |
| { |
| "epoch": 0.726998491704374, |
| "grad_norm": 1.6587047957707022, |
| "learning_rate": 3.6293535801070735e-06, |
| "loss": 2.4262, |
| "step": 1205 |
| }, |
| { |
| "epoch": 0.7276018099547511, |
| "grad_norm": 1.7383291104545608, |
| "learning_rate": 3.6143899035823516e-06, |
| "loss": 2.2937, |
| "step": 1206 |
| }, |
| { |
| "epoch": 0.7282051282051282, |
| "grad_norm": 1.613937185194106, |
| "learning_rate": 3.5994503306606497e-06, |
| "loss": 2.4775, |
| "step": 1207 |
| }, |
| { |
| "epoch": 0.7288084464555052, |
| "grad_norm": 1.7978965594601073, |
| "learning_rate": 3.5845349177340083e-06, |
| "loss": 2.3361, |
| "step": 1208 |
| }, |
| { |
| "epoch": 0.7294117647058823, |
| "grad_norm": 1.945082423342847, |
| "learning_rate": 3.5696437211032607e-06, |
| "loss": 2.4762, |
| "step": 1209 |
| }, |
| { |
| "epoch": 0.7300150829562594, |
| "grad_norm": 1.6728640229319411, |
| "learning_rate": 3.5547767969778355e-06, |
| "loss": 2.3713, |
| "step": 1210 |
| }, |
| { |
| "epoch": 0.7306184012066365, |
| "grad_norm": 1.813104500880649, |
| "learning_rate": 3.5399342014755388e-06, |
| "loss": 2.3755, |
| "step": 1211 |
| }, |
| { |
| "epoch": 0.7312217194570135, |
| "grad_norm": 1.6597104880097602, |
| "learning_rate": 3.5251159906223453e-06, |
| "loss": 2.3724, |
| "step": 1212 |
| }, |
| { |
| "epoch": 0.7318250377073906, |
| "grad_norm": 1.65223185764697, |
| "learning_rate": 3.510322220352188e-06, |
| "loss": 2.4131, |
| "step": 1213 |
| }, |
| { |
| "epoch": 0.7324283559577677, |
| "grad_norm": 1.576921560711547, |
| "learning_rate": 3.4955529465067394e-06, |
| "loss": 2.3482, |
| "step": 1214 |
| }, |
| { |
| "epoch": 0.7330316742081447, |
| "grad_norm": 1.779960713070471, |
| "learning_rate": 3.4808082248352058e-06, |
| "loss": 2.364, |
| "step": 1215 |
| }, |
| { |
| "epoch": 0.7336349924585218, |
| "grad_norm": 1.6919838533539924, |
| "learning_rate": 3.466088110994129e-06, |
| "loss": 2.3319, |
| "step": 1216 |
| }, |
| { |
| "epoch": 0.7342383107088989, |
| "grad_norm": 1.7997004290448038, |
| "learning_rate": 3.4513926605471504e-06, |
| "loss": 2.3316, |
| "step": 1217 |
| }, |
| { |
| "epoch": 0.7348416289592761, |
| "grad_norm": 1.6733906358624668, |
| "learning_rate": 3.4367219289648192e-06, |
| "loss": 2.4196, |
| "step": 1218 |
| }, |
| { |
| "epoch": 0.7354449472096531, |
| "grad_norm": 1.6985536840122628, |
| "learning_rate": 3.42207597162438e-06, |
| "loss": 2.5372, |
| "step": 1219 |
| }, |
| { |
| "epoch": 0.7360482654600302, |
| "grad_norm": 1.7863639968736682, |
| "learning_rate": 3.40745484380956e-06, |
| "loss": 2.4425, |
| "step": 1220 |
| }, |
| { |
| "epoch": 0.7366515837104073, |
| "grad_norm": 1.61404911543304, |
| "learning_rate": 3.392858600710376e-06, |
| "loss": 2.3413, |
| "step": 1221 |
| }, |
| { |
| "epoch": 0.7372549019607844, |
| "grad_norm": 1.7141058073808544, |
| "learning_rate": 3.3782872974228896e-06, |
| "loss": 2.3926, |
| "step": 1222 |
| }, |
| { |
| "epoch": 0.7378582202111614, |
| "grad_norm": 1.6705147616730136, |
| "learning_rate": 3.363740988949038e-06, |
| "loss": 2.3701, |
| "step": 1223 |
| }, |
| { |
| "epoch": 0.7384615384615385, |
| "grad_norm": 1.709270311327098, |
| "learning_rate": 3.3492197301964145e-06, |
| "loss": 2.4238, |
| "step": 1224 |
| }, |
| { |
| "epoch": 0.7390648567119156, |
| "grad_norm": 1.645818471528142, |
| "learning_rate": 3.3347235759780483e-06, |
| "loss": 2.2983, |
| "step": 1225 |
| }, |
| { |
| "epoch": 0.7396681749622926, |
| "grad_norm": 1.70357964847191, |
| "learning_rate": 3.320252581012212e-06, |
| "loss": 2.3661, |
| "step": 1226 |
| }, |
| { |
| "epoch": 0.7402714932126697, |
| "grad_norm": 1.7149434102428434, |
| "learning_rate": 3.3058067999222075e-06, |
| "loss": 2.4988, |
| "step": 1227 |
| }, |
| { |
| "epoch": 0.7408748114630468, |
| "grad_norm": 1.7463338130667014, |
| "learning_rate": 3.2913862872361624e-06, |
| "loss": 2.4385, |
| "step": 1228 |
| }, |
| { |
| "epoch": 0.7414781297134239, |
| "grad_norm": 1.6395449989057074, |
| "learning_rate": 3.2769910973868314e-06, |
| "loss": 2.3395, |
| "step": 1229 |
| }, |
| { |
| "epoch": 0.7420814479638009, |
| "grad_norm": 1.8064334638366089, |
| "learning_rate": 3.262621284711376e-06, |
| "loss": 2.38, |
| "step": 1230 |
| }, |
| { |
| "epoch": 0.742684766214178, |
| "grad_norm": 1.6510309870602204, |
| "learning_rate": 3.248276903451171e-06, |
| "loss": 2.422, |
| "step": 1231 |
| }, |
| { |
| "epoch": 0.7432880844645551, |
| "grad_norm": 1.8146336246679287, |
| "learning_rate": 3.2339580077515864e-06, |
| "loss": 2.3916, |
| "step": 1232 |
| }, |
| { |
| "epoch": 0.7438914027149321, |
| "grad_norm": 1.703832113781218, |
| "learning_rate": 3.219664651661808e-06, |
| "loss": 2.3852, |
| "step": 1233 |
| }, |
| { |
| "epoch": 0.7444947209653092, |
| "grad_norm": 1.6845182669842365, |
| "learning_rate": 3.2053968891346087e-06, |
| "loss": 2.261, |
| "step": 1234 |
| }, |
| { |
| "epoch": 0.7450980392156863, |
| "grad_norm": 1.753880542406519, |
| "learning_rate": 3.191154774026156e-06, |
| "loss": 2.2682, |
| "step": 1235 |
| }, |
| { |
| "epoch": 0.7457013574660634, |
| "grad_norm": 1.639422373413466, |
| "learning_rate": 3.1769383600958005e-06, |
| "loss": 2.3552, |
| "step": 1236 |
| }, |
| { |
| "epoch": 0.7463046757164404, |
| "grad_norm": 1.7878659580421807, |
| "learning_rate": 3.1627477010058936e-06, |
| "loss": 2.3888, |
| "step": 1237 |
| }, |
| { |
| "epoch": 0.7469079939668175, |
| "grad_norm": 1.707074502643292, |
| "learning_rate": 3.1485828503215588e-06, |
| "loss": 2.3579, |
| "step": 1238 |
| }, |
| { |
| "epoch": 0.7475113122171946, |
| "grad_norm": 1.6154045829514472, |
| "learning_rate": 3.1344438615105023e-06, |
| "loss": 2.3499, |
| "step": 1239 |
| }, |
| { |
| "epoch": 0.7481146304675717, |
| "grad_norm": 1.9133709112277255, |
| "learning_rate": 3.1203307879428146e-06, |
| "loss": 2.369, |
| "step": 1240 |
| }, |
| { |
| "epoch": 0.7487179487179487, |
| "grad_norm": 1.6141320071125547, |
| "learning_rate": 3.1062436828907605e-06, |
| "loss": 2.3186, |
| "step": 1241 |
| }, |
| { |
| "epoch": 0.7493212669683258, |
| "grad_norm": 1.7956916560691374, |
| "learning_rate": 3.092182599528585e-06, |
| "loss": 2.3688, |
| "step": 1242 |
| }, |
| { |
| "epoch": 0.7499245852187029, |
| "grad_norm": 1.7711534803830473, |
| "learning_rate": 3.0781475909323066e-06, |
| "loss": 2.3732, |
| "step": 1243 |
| }, |
| { |
| "epoch": 0.7505279034690799, |
| "grad_norm": 1.7181406266567723, |
| "learning_rate": 3.0641387100795237e-06, |
| "loss": 2.4178, |
| "step": 1244 |
| }, |
| { |
| "epoch": 0.751131221719457, |
| "grad_norm": 2.2656899500092478, |
| "learning_rate": 3.0501560098492056e-06, |
| "loss": 2.4398, |
| "step": 1245 |
| }, |
| { |
| "epoch": 0.7517345399698341, |
| "grad_norm": 1.541135482751282, |
| "learning_rate": 3.0361995430215087e-06, |
| "loss": 2.3834, |
| "step": 1246 |
| }, |
| { |
| "epoch": 0.7523378582202112, |
| "grad_norm": 1.7382581186222361, |
| "learning_rate": 3.0222693622775544e-06, |
| "loss": 2.4601, |
| "step": 1247 |
| }, |
| { |
| "epoch": 0.7529411764705882, |
| "grad_norm": 1.9075571974715178, |
| "learning_rate": 3.008365520199251e-06, |
| "loss": 2.3921, |
| "step": 1248 |
| }, |
| { |
| "epoch": 0.7529411764705882, |
| "eval_loss": 2.38016414642334, |
| "eval_runtime": 22.1293, |
| "eval_samples_per_second": 3.977, |
| "eval_steps_per_second": 0.497, |
| "step": 1248 |
| }, |
| { |
| "epoch": 0.7535444947209653, |
| "grad_norm": 1.7905575217759446, |
| "learning_rate": 2.994488069269079e-06, |
| "loss": 2.3063, |
| "step": 1249 |
| }, |
| { |
| "epoch": 0.7541478129713424, |
| "grad_norm": 1.7748462108982017, |
| "learning_rate": 2.9806370618699142e-06, |
| "loss": 2.3667, |
| "step": 1250 |
| }, |
| { |
| "epoch": 0.7547511312217194, |
| "grad_norm": 1.6917719110640708, |
| "learning_rate": 2.9668125502848035e-06, |
| "loss": 2.3628, |
| "step": 1251 |
| }, |
| { |
| "epoch": 0.7553544494720965, |
| "grad_norm": 1.5787368628352945, |
| "learning_rate": 2.9530145866967897e-06, |
| "loss": 2.3794, |
| "step": 1252 |
| }, |
| { |
| "epoch": 0.7559577677224736, |
| "grad_norm": 1.619923189656748, |
| "learning_rate": 2.9392432231886914e-06, |
| "loss": 2.3134, |
| "step": 1253 |
| }, |
| { |
| "epoch": 0.7565610859728507, |
| "grad_norm": 1.7237975265243972, |
| "learning_rate": 2.9254985117429415e-06, |
| "loss": 2.3619, |
| "step": 1254 |
| }, |
| { |
| "epoch": 0.7571644042232277, |
| "grad_norm": 1.886525917985081, |
| "learning_rate": 2.911780504241354e-06, |
| "loss": 2.3792, |
| "step": 1255 |
| }, |
| { |
| "epoch": 0.7577677224736048, |
| "grad_norm": 1.6579917254662444, |
| "learning_rate": 2.8980892524649506e-06, |
| "loss": 2.3537, |
| "step": 1256 |
| }, |
| { |
| "epoch": 0.7583710407239819, |
| "grad_norm": 1.7615250599662575, |
| "learning_rate": 2.8844248080937543e-06, |
| "loss": 2.4131, |
| "step": 1257 |
| }, |
| { |
| "epoch": 0.7589743589743589, |
| "grad_norm": 1.5993153277369443, |
| "learning_rate": 2.870787222706609e-06, |
| "loss": 2.3332, |
| "step": 1258 |
| }, |
| { |
| "epoch": 0.759577677224736, |
| "grad_norm": 1.7633809449855375, |
| "learning_rate": 2.8571765477809645e-06, |
| "loss": 2.4275, |
| "step": 1259 |
| }, |
| { |
| "epoch": 0.7601809954751131, |
| "grad_norm": 1.7752019583671115, |
| "learning_rate": 2.8435928346926945e-06, |
| "loss": 2.3932, |
| "step": 1260 |
| }, |
| { |
| "epoch": 0.7607843137254902, |
| "grad_norm": 1.6600684023130048, |
| "learning_rate": 2.830036134715902e-06, |
| "loss": 2.3767, |
| "step": 1261 |
| }, |
| { |
| "epoch": 0.7613876319758672, |
| "grad_norm": 1.6177485230843274, |
| "learning_rate": 2.8165064990227255e-06, |
| "loss": 2.3334, |
| "step": 1262 |
| }, |
| { |
| "epoch": 0.7619909502262443, |
| "grad_norm": 1.718986673782489, |
| "learning_rate": 2.803003978683142e-06, |
| "loss": 2.3863, |
| "step": 1263 |
| }, |
| { |
| "epoch": 0.7625942684766214, |
| "grad_norm": 1.697123205087847, |
| "learning_rate": 2.789528624664778e-06, |
| "loss": 2.3717, |
| "step": 1264 |
| }, |
| { |
| "epoch": 0.7631975867269984, |
| "grad_norm": 1.8768955126798301, |
| "learning_rate": 2.776080487832715e-06, |
| "loss": 2.3315, |
| "step": 1265 |
| }, |
| { |
| "epoch": 0.7638009049773755, |
| "grad_norm": 1.7028416533958965, |
| "learning_rate": 2.7626596189492983e-06, |
| "loss": 2.3685, |
| "step": 1266 |
| }, |
| { |
| "epoch": 0.7644042232277526, |
| "grad_norm": 1.6899932080641498, |
| "learning_rate": 2.7492660686739513e-06, |
| "loss": 2.3124, |
| "step": 1267 |
| }, |
| { |
| "epoch": 0.7650075414781297, |
| "grad_norm": 1.7265124545392858, |
| "learning_rate": 2.7358998875629716e-06, |
| "loss": 2.304, |
| "step": 1268 |
| }, |
| { |
| "epoch": 0.7656108597285067, |
| "grad_norm": 1.6878514865004561, |
| "learning_rate": 2.7225611260693485e-06, |
| "loss": 2.3384, |
| "step": 1269 |
| }, |
| { |
| "epoch": 0.7662141779788839, |
| "grad_norm": 1.8156324278376936, |
| "learning_rate": 2.70924983454257e-06, |
| "loss": 2.3312, |
| "step": 1270 |
| }, |
| { |
| "epoch": 0.766817496229261, |
| "grad_norm": 1.6906563413337043, |
| "learning_rate": 2.695966063228442e-06, |
| "loss": 2.3785, |
| "step": 1271 |
| }, |
| { |
| "epoch": 0.7674208144796381, |
| "grad_norm": 1.6438022037492663, |
| "learning_rate": 2.682709862268883e-06, |
| "loss": 2.3599, |
| "step": 1272 |
| }, |
| { |
| "epoch": 0.7680241327300151, |
| "grad_norm": 1.6535431923407702, |
| "learning_rate": 2.669481281701739e-06, |
| "loss": 2.2519, |
| "step": 1273 |
| }, |
| { |
| "epoch": 0.7686274509803922, |
| "grad_norm": 1.6926489077110243, |
| "learning_rate": 2.6562803714606033e-06, |
| "loss": 2.3282, |
| "step": 1274 |
| }, |
| { |
| "epoch": 0.7692307692307693, |
| "grad_norm": 1.72212806620652, |
| "learning_rate": 2.6431071813746277e-06, |
| "loss": 2.3748, |
| "step": 1275 |
| }, |
| { |
| "epoch": 0.7698340874811463, |
| "grad_norm": 1.6096452402637356, |
| "learning_rate": 2.62996176116832e-06, |
| "loss": 2.3074, |
| "step": 1276 |
| }, |
| { |
| "epoch": 0.7704374057315234, |
| "grad_norm": 1.6456536712636463, |
| "learning_rate": 2.6168441604613706e-06, |
| "loss": 2.3434, |
| "step": 1277 |
| }, |
| { |
| "epoch": 0.7710407239819005, |
| "grad_norm": 1.6644054476758603, |
| "learning_rate": 2.6037544287684603e-06, |
| "loss": 2.4145, |
| "step": 1278 |
| }, |
| { |
| "epoch": 0.7716440422322776, |
| "grad_norm": 1.7176809008066278, |
| "learning_rate": 2.5906926154990676e-06, |
| "loss": 2.4399, |
| "step": 1279 |
| }, |
| { |
| "epoch": 0.7722473604826546, |
| "grad_norm": 1.6886873855562976, |
| "learning_rate": 2.5776587699573007e-06, |
| "loss": 2.3975, |
| "step": 1280 |
| }, |
| { |
| "epoch": 0.7728506787330317, |
| "grad_norm": 1.6825199363623684, |
| "learning_rate": 2.5646529413416864e-06, |
| "loss": 2.2743, |
| "step": 1281 |
| }, |
| { |
| "epoch": 0.7734539969834088, |
| "grad_norm": 1.726936978113641, |
| "learning_rate": 2.551675178745003e-06, |
| "loss": 2.3887, |
| "step": 1282 |
| }, |
| { |
| "epoch": 0.7740573152337858, |
| "grad_norm": 1.7424095587164001, |
| "learning_rate": 2.538725531154087e-06, |
| "loss": 2.3657, |
| "step": 1283 |
| }, |
| { |
| "epoch": 0.7746606334841629, |
| "grad_norm": 1.6742037882262482, |
| "learning_rate": 2.5258040474496483e-06, |
| "loss": 2.3799, |
| "step": 1284 |
| }, |
| { |
| "epoch": 0.77526395173454, |
| "grad_norm": 1.8169398307903717, |
| "learning_rate": 2.512910776406089e-06, |
| "loss": 2.3991, |
| "step": 1285 |
| }, |
| { |
| "epoch": 0.7758672699849171, |
| "grad_norm": 1.6743349016789828, |
| "learning_rate": 2.500045766691319e-06, |
| "loss": 2.3413, |
| "step": 1286 |
| }, |
| { |
| "epoch": 0.7764705882352941, |
| "grad_norm": 1.7015287344998682, |
| "learning_rate": 2.487209066866565e-06, |
| "loss": 2.4439, |
| "step": 1287 |
| }, |
| { |
| "epoch": 0.7770739064856712, |
| "grad_norm": 1.6878482036002078, |
| "learning_rate": 2.4744007253862046e-06, |
| "loss": 2.4143, |
| "step": 1288 |
| }, |
| { |
| "epoch": 0.7776772247360483, |
| "grad_norm": 1.629791277018855, |
| "learning_rate": 2.46162079059756e-06, |
| "loss": 2.3622, |
| "step": 1289 |
| }, |
| { |
| "epoch": 0.7782805429864253, |
| "grad_norm": 1.64498956245565, |
| "learning_rate": 2.4488693107407335e-06, |
| "loss": 2.2916, |
| "step": 1290 |
| }, |
| { |
| "epoch": 0.7788838612368024, |
| "grad_norm": 1.587955075191154, |
| "learning_rate": 2.436146333948416e-06, |
| "loss": 2.3719, |
| "step": 1291 |
| }, |
| { |
| "epoch": 0.7794871794871795, |
| "grad_norm": 1.7976619934058362, |
| "learning_rate": 2.4234519082457096e-06, |
| "loss": 2.3873, |
| "step": 1292 |
| }, |
| { |
| "epoch": 0.7800904977375566, |
| "grad_norm": 1.6815612433609544, |
| "learning_rate": 2.410786081549954e-06, |
| "loss": 2.3841, |
| "step": 1293 |
| }, |
| { |
| "epoch": 0.7806938159879336, |
| "grad_norm": 1.7102442917631429, |
| "learning_rate": 2.398148901670521e-06, |
| "loss": 2.2898, |
| "step": 1294 |
| }, |
| { |
| "epoch": 0.7812971342383107, |
| "grad_norm": 1.5996194988101127, |
| "learning_rate": 2.3855404163086558e-06, |
| "loss": 2.3665, |
| "step": 1295 |
| }, |
| { |
| "epoch": 0.7819004524886878, |
| "grad_norm": 1.690930485646474, |
| "learning_rate": 2.372960673057301e-06, |
| "loss": 2.421, |
| "step": 1296 |
| }, |
| { |
| "epoch": 0.7825037707390649, |
| "grad_norm": 1.6395486346485142, |
| "learning_rate": 2.3604097194008957e-06, |
| "loss": 2.4242, |
| "step": 1297 |
| }, |
| { |
| "epoch": 0.7831070889894419, |
| "grad_norm": 1.7291499453127324, |
| "learning_rate": 2.347887602715213e-06, |
| "loss": 2.3369, |
| "step": 1298 |
| }, |
| { |
| "epoch": 0.783710407239819, |
| "grad_norm": 1.7533383876291355, |
| "learning_rate": 2.3353943702671722e-06, |
| "loss": 2.3686, |
| "step": 1299 |
| }, |
| { |
| "epoch": 0.7843137254901961, |
| "grad_norm": 1.7910834697851878, |
| "learning_rate": 2.322930069214664e-06, |
| "loss": 2.3576, |
| "step": 1300 |
| }, |
| { |
| "epoch": 0.7849170437405731, |
| "grad_norm": 1.5708555637587793, |
| "learning_rate": 2.3104947466063785e-06, |
| "loss": 2.3296, |
| "step": 1301 |
| }, |
| { |
| "epoch": 0.7855203619909502, |
| "grad_norm": 1.729646722845939, |
| "learning_rate": 2.298088449381618e-06, |
| "loss": 2.4129, |
| "step": 1302 |
| }, |
| { |
| "epoch": 0.7861236802413273, |
| "grad_norm": 1.7080334843001908, |
| "learning_rate": 2.285711224370123e-06, |
| "loss": 2.4113, |
| "step": 1303 |
| }, |
| { |
| "epoch": 0.7867269984917044, |
| "grad_norm": 1.7346094079358705, |
| "learning_rate": 2.273363118291889e-06, |
| "loss": 2.4141, |
| "step": 1304 |
| }, |
| { |
| "epoch": 0.7873303167420814, |
| "grad_norm": 1.671715470945606, |
| "learning_rate": 2.2610441777570104e-06, |
| "loss": 2.4082, |
| "step": 1305 |
| }, |
| { |
| "epoch": 0.7879336349924585, |
| "grad_norm": 1.6161721547944843, |
| "learning_rate": 2.2487544492654832e-06, |
| "loss": 2.3392, |
| "step": 1306 |
| }, |
| { |
| "epoch": 0.7885369532428356, |
| "grad_norm": 1.6230809291834614, |
| "learning_rate": 2.2364939792070385e-06, |
| "loss": 2.3649, |
| "step": 1307 |
| }, |
| { |
| "epoch": 0.7891402714932126, |
| "grad_norm": 1.7871076592596147, |
| "learning_rate": 2.224262813860962e-06, |
| "loss": 2.274, |
| "step": 1308 |
| }, |
| { |
| "epoch": 0.7897435897435897, |
| "grad_norm": 1.7193363412008316, |
| "learning_rate": 2.2120609993959376e-06, |
| "loss": 2.3066, |
| "step": 1309 |
| }, |
| { |
| "epoch": 0.7903469079939668, |
| "grad_norm": 1.641785651926145, |
| "learning_rate": 2.1998885818698434e-06, |
| "loss": 2.3099, |
| "step": 1310 |
| }, |
| { |
| "epoch": 0.7909502262443439, |
| "grad_norm": 1.734997074775059, |
| "learning_rate": 2.187745607229601e-06, |
| "loss": 2.4392, |
| "step": 1311 |
| }, |
| { |
| "epoch": 0.7915535444947209, |
| "grad_norm": 1.6745699343545057, |
| "learning_rate": 2.1756321213109944e-06, |
| "loss": 2.3645, |
| "step": 1312 |
| }, |
| { |
| "epoch": 0.792156862745098, |
| "grad_norm": 1.745294391010054, |
| "learning_rate": 2.163548169838495e-06, |
| "loss": 2.3745, |
| "step": 1313 |
| }, |
| { |
| "epoch": 0.7927601809954751, |
| "grad_norm": 1.6435958547913674, |
| "learning_rate": 2.151493798425095e-06, |
| "loss": 2.3365, |
| "step": 1314 |
| }, |
| { |
| "epoch": 0.7933634992458521, |
| "grad_norm": 1.7601330578805139, |
| "learning_rate": 2.1394690525721275e-06, |
| "loss": 2.4231, |
| "step": 1315 |
| }, |
| { |
| "epoch": 0.7939668174962292, |
| "grad_norm": 1.7385461679888394, |
| "learning_rate": 2.1274739776691013e-06, |
| "loss": 2.3454, |
| "step": 1316 |
| }, |
| { |
| "epoch": 0.7945701357466063, |
| "grad_norm": 1.577322158373456, |
| "learning_rate": 2.1155086189935227e-06, |
| "loss": 2.3731, |
| "step": 1317 |
| }, |
| { |
| "epoch": 0.7951734539969834, |
| "grad_norm": 1.5972905038230132, |
| "learning_rate": 2.1035730217107385e-06, |
| "loss": 2.3499, |
| "step": 1318 |
| }, |
| { |
| "epoch": 0.7957767722473604, |
| "grad_norm": 1.595018031186073, |
| "learning_rate": 2.0916672308737464e-06, |
| "loss": 2.4249, |
| "step": 1319 |
| }, |
| { |
| "epoch": 0.7963800904977375, |
| "grad_norm": 1.7621017326525585, |
| "learning_rate": 2.079791291423039e-06, |
| "loss": 2.3145, |
| "step": 1320 |
| }, |
| { |
| "epoch": 0.7969834087481147, |
| "grad_norm": 1.6264135452299817, |
| "learning_rate": 2.0679452481864247e-06, |
| "loss": 2.2997, |
| "step": 1321 |
| }, |
| { |
| "epoch": 0.7975867269984918, |
| "grad_norm": 1.6580584494866133, |
| "learning_rate": 2.0561291458788736e-06, |
| "loss": 2.3622, |
| "step": 1322 |
| }, |
| { |
| "epoch": 0.7981900452488688, |
| "grad_norm": 1.5566118261930884, |
| "learning_rate": 2.044343029102328e-06, |
| "loss": 2.3974, |
| "step": 1323 |
| }, |
| { |
| "epoch": 0.7987933634992459, |
| "grad_norm": 1.7250474143269023, |
| "learning_rate": 2.0325869423455523e-06, |
| "loss": 2.3415, |
| "step": 1324 |
| }, |
| { |
| "epoch": 0.799396681749623, |
| "grad_norm": 1.626325032827294, |
| "learning_rate": 2.0208609299839465e-06, |
| "loss": 2.3513, |
| "step": 1325 |
| }, |
| { |
| "epoch": 0.8, |
| "grad_norm": 1.774403154637252, |
| "learning_rate": 2.0091650362794035e-06, |
| "loss": 2.3231, |
| "step": 1326 |
| }, |
| { |
| "epoch": 0.8006033182503771, |
| "grad_norm": 1.7206279247637468, |
| "learning_rate": 1.9974993053801186e-06, |
| "loss": 2.3612, |
| "step": 1327 |
| }, |
| { |
| "epoch": 0.8012066365007542, |
| "grad_norm": 1.8044368171035148, |
| "learning_rate": 1.9858637813204352e-06, |
| "loss": 2.4704, |
| "step": 1328 |
| }, |
| { |
| "epoch": 0.8018099547511313, |
| "grad_norm": 1.7163985106653774, |
| "learning_rate": 1.9742585080206754e-06, |
| "loss": 2.3197, |
| "step": 1329 |
| }, |
| { |
| "epoch": 0.8024132730015083, |
| "grad_norm": 1.661643572576915, |
| "learning_rate": 1.962683529286973e-06, |
| "loss": 2.4173, |
| "step": 1330 |
| }, |
| { |
| "epoch": 0.8030165912518854, |
| "grad_norm": 1.729300657247654, |
| "learning_rate": 1.951138888811115e-06, |
| "loss": 2.3567, |
| "step": 1331 |
| }, |
| { |
| "epoch": 0.8036199095022625, |
| "grad_norm": 1.5633312280127363, |
| "learning_rate": 1.939624630170367e-06, |
| "loss": 2.3826, |
| "step": 1332 |
| }, |
| { |
| "epoch": 0.8042232277526395, |
| "grad_norm": 1.775527155985604, |
| "learning_rate": 1.9281407968273115e-06, |
| "loss": 2.3891, |
| "step": 1333 |
| }, |
| { |
| "epoch": 0.8048265460030166, |
| "grad_norm": 1.540034243183988, |
| "learning_rate": 1.916687432129688e-06, |
| "loss": 2.3909, |
| "step": 1334 |
| }, |
| { |
| "epoch": 0.8054298642533937, |
| "grad_norm": 1.651033910298305, |
| "learning_rate": 1.9052645793102277e-06, |
| "loss": 2.4138, |
| "step": 1335 |
| }, |
| { |
| "epoch": 0.8060331825037708, |
| "grad_norm": 1.6787312602876567, |
| "learning_rate": 1.8938722814864863e-06, |
| "loss": 2.4045, |
| "step": 1336 |
| }, |
| { |
| "epoch": 0.8066365007541478, |
| "grad_norm": 1.6738748640963028, |
| "learning_rate": 1.882510581660687e-06, |
| "loss": 2.3668, |
| "step": 1337 |
| }, |
| { |
| "epoch": 0.8072398190045249, |
| "grad_norm": 1.8596276668931182, |
| "learning_rate": 1.8711795227195528e-06, |
| "loss": 2.4065, |
| "step": 1338 |
| }, |
| { |
| "epoch": 0.807843137254902, |
| "grad_norm": 1.6277462587992833, |
| "learning_rate": 1.8598791474341516e-06, |
| "loss": 2.3414, |
| "step": 1339 |
| }, |
| { |
| "epoch": 0.808446455505279, |
| "grad_norm": 1.7241134762025054, |
| "learning_rate": 1.8486094984597268e-06, |
| "loss": 2.3492, |
| "step": 1340 |
| }, |
| { |
| "epoch": 0.8090497737556561, |
| "grad_norm": 1.6923648374789768, |
| "learning_rate": 1.8373706183355423e-06, |
| "loss": 2.4431, |
| "step": 1341 |
| }, |
| { |
| "epoch": 0.8096530920060332, |
| "grad_norm": 1.7463635386431937, |
| "learning_rate": 1.8261625494847156e-06, |
| "loss": 2.3816, |
| "step": 1342 |
| }, |
| { |
| "epoch": 0.8102564102564103, |
| "grad_norm": 1.699154495364708, |
| "learning_rate": 1.8149853342140644e-06, |
| "loss": 2.3481, |
| "step": 1343 |
| }, |
| { |
| "epoch": 0.8108597285067873, |
| "grad_norm": 1.692016876163523, |
| "learning_rate": 1.8038390147139506e-06, |
| "loss": 2.367, |
| "step": 1344 |
| }, |
| { |
| "epoch": 0.8114630467571644, |
| "grad_norm": 1.7018531425762653, |
| "learning_rate": 1.7927236330581e-06, |
| "loss": 2.3712, |
| "step": 1345 |
| }, |
| { |
| "epoch": 0.8120663650075415, |
| "grad_norm": 1.7025540938243773, |
| "learning_rate": 1.781639231203467e-06, |
| "loss": 2.4019, |
| "step": 1346 |
| }, |
| { |
| "epoch": 0.8126696832579186, |
| "grad_norm": 1.6716228928422712, |
| "learning_rate": 1.770585850990072e-06, |
| "loss": 2.4047, |
| "step": 1347 |
| }, |
| { |
| "epoch": 0.8132730015082956, |
| "grad_norm": 1.655058366571219, |
| "learning_rate": 1.7595635341408302e-06, |
| "loss": 2.3402, |
| "step": 1348 |
| }, |
| { |
| "epoch": 0.8138763197586727, |
| "grad_norm": 1.6490662154522266, |
| "learning_rate": 1.7485723222614059e-06, |
| "loss": 2.3871, |
| "step": 1349 |
| }, |
| { |
| "epoch": 0.8144796380090498, |
| "grad_norm": 1.6062153254680276, |
| "learning_rate": 1.7376122568400533e-06, |
| "loss": 2.4266, |
| "step": 1350 |
| }, |
| { |
| "epoch": 0.8150829562594268, |
| "grad_norm": 1.663003025297214, |
| "learning_rate": 1.7266833792474536e-06, |
| "loss": 2.3385, |
| "step": 1351 |
| }, |
| { |
| "epoch": 0.8156862745098039, |
| "grad_norm": 1.5846299172862364, |
| "learning_rate": 1.7157857307365733e-06, |
| "loss": 2.3776, |
| "step": 1352 |
| }, |
| { |
| "epoch": 0.816289592760181, |
| "grad_norm": 1.7929078747126366, |
| "learning_rate": 1.7049193524424922e-06, |
| "loss": 2.4375, |
| "step": 1353 |
| }, |
| { |
| "epoch": 0.816892911010558, |
| "grad_norm": 1.6503047291415613, |
| "learning_rate": 1.6940842853822582e-06, |
| "loss": 2.3582, |
| "step": 1354 |
| }, |
| { |
| "epoch": 0.8174962292609351, |
| "grad_norm": 1.7197416263684178, |
| "learning_rate": 1.6832805704547272e-06, |
| "loss": 2.3355, |
| "step": 1355 |
| }, |
| { |
| "epoch": 0.8180995475113122, |
| "grad_norm": 1.761539196901066, |
| "learning_rate": 1.6725082484404132e-06, |
| "loss": 2.3332, |
| "step": 1356 |
| }, |
| { |
| "epoch": 0.8187028657616893, |
| "grad_norm": 1.729763987929638, |
| "learning_rate": 1.6617673600013295e-06, |
| "loss": 2.4059, |
| "step": 1357 |
| }, |
| { |
| "epoch": 0.8193061840120663, |
| "grad_norm": 1.6870548202068116, |
| "learning_rate": 1.6510579456808417e-06, |
| "loss": 2.3768, |
| "step": 1358 |
| }, |
| { |
| "epoch": 0.8199095022624434, |
| "grad_norm": 1.6953755837989941, |
| "learning_rate": 1.6403800459035046e-06, |
| "loss": 2.3077, |
| "step": 1359 |
| }, |
| { |
| "epoch": 0.8205128205128205, |
| "grad_norm": 1.6342968081775047, |
| "learning_rate": 1.6297337009749249e-06, |
| "loss": 2.4096, |
| "step": 1360 |
| }, |
| { |
| "epoch": 0.8211161387631976, |
| "grad_norm": 1.80156853216978, |
| "learning_rate": 1.6191189510815942e-06, |
| "loss": 2.2616, |
| "step": 1361 |
| }, |
| { |
| "epoch": 0.8217194570135746, |
| "grad_norm": 1.6421938563723806, |
| "learning_rate": 1.6085358362907423e-06, |
| "loss": 2.3517, |
| "step": 1362 |
| }, |
| { |
| "epoch": 0.8223227752639517, |
| "grad_norm": 1.6856507493913957, |
| "learning_rate": 1.5979843965501885e-06, |
| "loss": 2.4184, |
| "step": 1363 |
| }, |
| { |
| "epoch": 0.8229260935143288, |
| "grad_norm": 1.6662976991524578, |
| "learning_rate": 1.587464671688187e-06, |
| "loss": 2.365, |
| "step": 1364 |
| }, |
| { |
| "epoch": 0.8235294117647058, |
| "grad_norm": 1.801209558500375, |
| "learning_rate": 1.5769767014132885e-06, |
| "loss": 2.3553, |
| "step": 1365 |
| }, |
| { |
| "epoch": 0.8241327300150829, |
| "grad_norm": 1.7660705340536775, |
| "learning_rate": 1.5665205253141647e-06, |
| "loss": 2.4239, |
| "step": 1366 |
| }, |
| { |
| "epoch": 0.82473604826546, |
| "grad_norm": 1.8381496275503855, |
| "learning_rate": 1.5560961828594845e-06, |
| "loss": 2.4207, |
| "step": 1367 |
| }, |
| { |
| "epoch": 0.8253393665158371, |
| "grad_norm": 1.7129726266530854, |
| "learning_rate": 1.5457037133977515e-06, |
| "loss": 2.4011, |
| "step": 1368 |
| }, |
| { |
| "epoch": 0.8259426847662141, |
| "grad_norm": 1.6162587223613567, |
| "learning_rate": 1.5353431561571653e-06, |
| "loss": 2.4168, |
| "step": 1369 |
| }, |
| { |
| "epoch": 0.8265460030165912, |
| "grad_norm": 1.8119562213217464, |
| "learning_rate": 1.5250145502454594e-06, |
| "loss": 2.4459, |
| "step": 1370 |
| }, |
| { |
| "epoch": 0.8271493212669683, |
| "grad_norm": 1.7252802585436375, |
| "learning_rate": 1.5147179346497665e-06, |
| "loss": 2.4109, |
| "step": 1371 |
| }, |
| { |
| "epoch": 0.8277526395173453, |
| "grad_norm": 1.6087075158634467, |
| "learning_rate": 1.504453348236461e-06, |
| "loss": 2.4332, |
| "step": 1372 |
| }, |
| { |
| "epoch": 0.8283559577677225, |
| "grad_norm": 1.7356484029383643, |
| "learning_rate": 1.4942208297510252e-06, |
| "loss": 2.271, |
| "step": 1373 |
| }, |
| { |
| "epoch": 0.8289592760180996, |
| "grad_norm": 5.655572111782827, |
| "learning_rate": 1.4840204178178897e-06, |
| "loss": 2.4529, |
| "step": 1374 |
| }, |
| { |
| "epoch": 0.8295625942684767, |
| "grad_norm": 1.6446113205280009, |
| "learning_rate": 1.473852150940297e-06, |
| "loss": 2.3765, |
| "step": 1375 |
| }, |
| { |
| "epoch": 0.8301659125188537, |
| "grad_norm": 1.7789716974806704, |
| "learning_rate": 1.4637160675001427e-06, |
| "loss": 2.4082, |
| "step": 1376 |
| }, |
| { |
| "epoch": 0.8307692307692308, |
| "grad_norm": 1.755320567961607, |
| "learning_rate": 1.453612205757855e-06, |
| "loss": 2.3835, |
| "step": 1377 |
| }, |
| { |
| "epoch": 0.8313725490196079, |
| "grad_norm": 1.741950402553732, |
| "learning_rate": 1.443540603852227e-06, |
| "loss": 2.4143, |
| "step": 1378 |
| }, |
| { |
| "epoch": 0.831975867269985, |
| "grad_norm": 1.638206384727529, |
| "learning_rate": 1.433501299800283e-06, |
| "loss": 2.3397, |
| "step": 1379 |
| }, |
| { |
| "epoch": 0.832579185520362, |
| "grad_norm": 1.6525001416131908, |
| "learning_rate": 1.4234943314971328e-06, |
| "loss": 2.4176, |
| "step": 1380 |
| }, |
| { |
| "epoch": 0.8331825037707391, |
| "grad_norm": 1.636977385908322, |
| "learning_rate": 1.413519736715827e-06, |
| "loss": 2.3485, |
| "step": 1381 |
| }, |
| { |
| "epoch": 0.8337858220211162, |
| "grad_norm": 1.5618943455158865, |
| "learning_rate": 1.4035775531072259e-06, |
| "loss": 2.4065, |
| "step": 1382 |
| }, |
| { |
| "epoch": 0.8343891402714932, |
| "grad_norm": 1.5734395328774438, |
| "learning_rate": 1.3936678181998376e-06, |
| "loss": 2.2659, |
| "step": 1383 |
| }, |
| { |
| "epoch": 0.8349924585218703, |
| "grad_norm": 1.8179891942394608, |
| "learning_rate": 1.3837905693996922e-06, |
| "loss": 2.3585, |
| "step": 1384 |
| }, |
| { |
| "epoch": 0.8355957767722474, |
| "grad_norm": 1.6722431204654182, |
| "learning_rate": 1.373945843990192e-06, |
| "loss": 2.3467, |
| "step": 1385 |
| }, |
| { |
| "epoch": 0.8361990950226245, |
| "grad_norm": 1.6316885953843028, |
| "learning_rate": 1.3641336791319814e-06, |
| "loss": 2.3139, |
| "step": 1386 |
| }, |
| { |
| "epoch": 0.8368024132730015, |
| "grad_norm": 1.6498294540673093, |
| "learning_rate": 1.35435411186279e-06, |
| "loss": 2.4035, |
| "step": 1387 |
| }, |
| { |
| "epoch": 0.8374057315233786, |
| "grad_norm": 1.663069087036581, |
| "learning_rate": 1.3446071790973058e-06, |
| "loss": 2.268, |
| "step": 1388 |
| }, |
| { |
| "epoch": 0.8380090497737557, |
| "grad_norm": 1.5896047232548594, |
| "learning_rate": 1.334892917627033e-06, |
| "loss": 2.4154, |
| "step": 1389 |
| }, |
| { |
| "epoch": 0.8386123680241327, |
| "grad_norm": 1.5609131592393046, |
| "learning_rate": 1.3252113641201537e-06, |
| "loss": 2.3732, |
| "step": 1390 |
| }, |
| { |
| "epoch": 0.8392156862745098, |
| "grad_norm": 1.661564956740201, |
| "learning_rate": 1.3155625551213857e-06, |
| "loss": 2.2994, |
| "step": 1391 |
| }, |
| { |
| "epoch": 0.8398190045248869, |
| "grad_norm": 1.6410595605624814, |
| "learning_rate": 1.3059465270518469e-06, |
| "loss": 2.4214, |
| "step": 1392 |
| }, |
| { |
| "epoch": 0.840422322775264, |
| "grad_norm": 1.602523309837128, |
| "learning_rate": 1.2963633162089174e-06, |
| "loss": 2.3991, |
| "step": 1393 |
| }, |
| { |
| "epoch": 0.841025641025641, |
| "grad_norm": 1.6334270239786723, |
| "learning_rate": 1.286812958766106e-06, |
| "loss": 2.3418, |
| "step": 1394 |
| }, |
| { |
| "epoch": 0.8416289592760181, |
| "grad_norm": 1.7860198422911693, |
| "learning_rate": 1.2772954907729074e-06, |
| "loss": 2.3447, |
| "step": 1395 |
| }, |
| { |
| "epoch": 0.8422322775263952, |
| "grad_norm": 1.6682612826514833, |
| "learning_rate": 1.267810948154674e-06, |
| "loss": 2.4333, |
| "step": 1396 |
| }, |
| { |
| "epoch": 0.8428355957767723, |
| "grad_norm": 1.6290333910999475, |
| "learning_rate": 1.2583593667124638e-06, |
| "loss": 2.3509, |
| "step": 1397 |
| }, |
| { |
| "epoch": 0.8434389140271493, |
| "grad_norm": 1.6691854260224455, |
| "learning_rate": 1.2489407821229326e-06, |
| "loss": 2.3983, |
| "step": 1398 |
| }, |
| { |
| "epoch": 0.8440422322775264, |
| "grad_norm": 1.741145842002938, |
| "learning_rate": 1.2395552299381742e-06, |
| "loss": 2.3382, |
| "step": 1399 |
| }, |
| { |
| "epoch": 0.8446455505279035, |
| "grad_norm": 1.6289594974977173, |
| "learning_rate": 1.2302027455855969e-06, |
| "loss": 2.3712, |
| "step": 1400 |
| }, |
| { |
| "epoch": 0.8452488687782805, |
| "grad_norm": 1.6425484443639187, |
| "learning_rate": 1.220883364367792e-06, |
| "loss": 2.4272, |
| "step": 1401 |
| }, |
| { |
| "epoch": 0.8458521870286576, |
| "grad_norm": 1.8073588625784758, |
| "learning_rate": 1.2115971214623923e-06, |
| "loss": 2.3826, |
| "step": 1402 |
| }, |
| { |
| "epoch": 0.8464555052790347, |
| "grad_norm": 1.6222889871033748, |
| "learning_rate": 1.2023440519219508e-06, |
| "loss": 2.3432, |
| "step": 1403 |
| }, |
| { |
| "epoch": 0.8470588235294118, |
| "grad_norm": 1.7205116329534476, |
| "learning_rate": 1.1931241906737966e-06, |
| "loss": 2.3555, |
| "step": 1404 |
| }, |
| { |
| "epoch": 0.8476621417797888, |
| "grad_norm": 1.5973272142066768, |
| "learning_rate": 1.1839375725199098e-06, |
| "loss": 2.4325, |
| "step": 1405 |
| }, |
| { |
| "epoch": 0.8482654600301659, |
| "grad_norm": 1.6364231863924403, |
| "learning_rate": 1.1747842321367886e-06, |
| "loss": 2.4447, |
| "step": 1406 |
| }, |
| { |
| "epoch": 0.848868778280543, |
| "grad_norm": 1.7216509921725922, |
| "learning_rate": 1.1656642040753174e-06, |
| "loss": 2.4644, |
| "step": 1407 |
| }, |
| { |
| "epoch": 0.84947209653092, |
| "grad_norm": 1.882302558304212, |
| "learning_rate": 1.156577522760639e-06, |
| "loss": 2.3529, |
| "step": 1408 |
| }, |
| { |
| "epoch": 0.8500754147812971, |
| "grad_norm": 1.67946358471103, |
| "learning_rate": 1.1475242224920234e-06, |
| "loss": 2.3677, |
| "step": 1409 |
| }, |
| { |
| "epoch": 0.8506787330316742, |
| "grad_norm": 1.7014475741055364, |
| "learning_rate": 1.1385043374427341e-06, |
| "loss": 2.3374, |
| "step": 1410 |
| }, |
| { |
| "epoch": 0.8512820512820513, |
| "grad_norm": 1.6288323501069348, |
| "learning_rate": 1.129517901659911e-06, |
| "loss": 2.3707, |
| "step": 1411 |
| }, |
| { |
| "epoch": 0.8518853695324283, |
| "grad_norm": 1.715269118664735, |
| "learning_rate": 1.1205649490644255e-06, |
| "loss": 2.351, |
| "step": 1412 |
| }, |
| { |
| "epoch": 0.8524886877828054, |
| "grad_norm": 1.7131818295241272, |
| "learning_rate": 1.1116455134507665e-06, |
| "loss": 2.3191, |
| "step": 1413 |
| }, |
| { |
| "epoch": 0.8530920060331825, |
| "grad_norm": 1.6242702822484834, |
| "learning_rate": 1.1027596284869024e-06, |
| "loss": 2.4025, |
| "step": 1414 |
| }, |
| { |
| "epoch": 0.8536953242835595, |
| "grad_norm": 1.6766455089052343, |
| "learning_rate": 1.0939073277141598e-06, |
| "loss": 2.3731, |
| "step": 1415 |
| }, |
| { |
| "epoch": 0.8542986425339366, |
| "grad_norm": 1.673925183108547, |
| "learning_rate": 1.0850886445471055e-06, |
| "loss": 2.4059, |
| "step": 1416 |
| }, |
| { |
| "epoch": 0.8549019607843137, |
| "grad_norm": 1.8068773405643075, |
| "learning_rate": 1.076303612273395e-06, |
| "loss": 2.4252, |
| "step": 1417 |
| }, |
| { |
| "epoch": 0.8555052790346908, |
| "grad_norm": 1.6512291093394074, |
| "learning_rate": 1.0675522640536706e-06, |
| "loss": 2.3158, |
| "step": 1418 |
| }, |
| { |
| "epoch": 0.8561085972850678, |
| "grad_norm": 1.7090234334662167, |
| "learning_rate": 1.0588346329214316e-06, |
| "loss": 2.3748, |
| "step": 1419 |
| }, |
| { |
| "epoch": 0.8567119155354449, |
| "grad_norm": 1.6948548741381664, |
| "learning_rate": 1.0501507517829012e-06, |
| "loss": 2.3942, |
| "step": 1420 |
| }, |
| { |
| "epoch": 0.857315233785822, |
| "grad_norm": 1.7826231466250537, |
| "learning_rate": 1.0415006534169092e-06, |
| "loss": 2.3419, |
| "step": 1421 |
| }, |
| { |
| "epoch": 0.857918552036199, |
| "grad_norm": 1.7591224301384984, |
| "learning_rate": 1.0328843704747649e-06, |
| "loss": 2.3298, |
| "step": 1422 |
| }, |
| { |
| "epoch": 0.8585218702865761, |
| "grad_norm": 1.679495270548289, |
| "learning_rate": 1.0243019354801353e-06, |
| "loss": 2.3149, |
| "step": 1423 |
| }, |
| { |
| "epoch": 0.8591251885369532, |
| "grad_norm": 1.7069415551851421, |
| "learning_rate": 1.0157533808289265e-06, |
| "loss": 2.3253, |
| "step": 1424 |
| }, |
| { |
| "epoch": 0.8597285067873304, |
| "grad_norm": 1.8331634766626557, |
| "learning_rate": 1.0072387387891535e-06, |
| "loss": 2.438, |
| "step": 1425 |
| }, |
| { |
| "epoch": 0.8603318250377074, |
| "grad_norm": 1.791237994709679, |
| "learning_rate": 9.987580415008224e-07, |
| "loss": 2.4065, |
| "step": 1426 |
| }, |
| { |
| "epoch": 0.8609351432880845, |
| "grad_norm": 1.6788734645769163, |
| "learning_rate": 9.903113209758098e-07, |
| "loss": 2.3797, |
| "step": 1427 |
| }, |
| { |
| "epoch": 0.8615384615384616, |
| "grad_norm": 1.65803920979439, |
| "learning_rate": 9.8189860909774e-07, |
| "loss": 2.3822, |
| "step": 1428 |
| }, |
| { |
| "epoch": 0.8621417797888387, |
| "grad_norm": 1.6380863804172392, |
| "learning_rate": 9.735199376218673e-07, |
| "loss": 2.3546, |
| "step": 1429 |
| }, |
| { |
| "epoch": 0.8627450980392157, |
| "grad_norm": 1.6992202984146336, |
| "learning_rate": 9.65175338174954e-07, |
| "loss": 2.4007, |
| "step": 1430 |
| }, |
| { |
| "epoch": 0.8633484162895928, |
| "grad_norm": 1.746741845341246, |
| "learning_rate": 9.568648422551486e-07, |
| "loss": 2.371, |
| "step": 1431 |
| }, |
| { |
| "epoch": 0.8639517345399699, |
| "grad_norm": 1.7520256764048072, |
| "learning_rate": 9.485884812318769e-07, |
| "loss": 2.3613, |
| "step": 1432 |
| }, |
| { |
| "epoch": 0.864555052790347, |
| "grad_norm": 1.709025973421567, |
| "learning_rate": 9.403462863457113e-07, |
| "loss": 2.4001, |
| "step": 1433 |
| }, |
| { |
| "epoch": 0.865158371040724, |
| "grad_norm": 1.5659271803992887, |
| "learning_rate": 9.321382887082564e-07, |
| "loss": 2.4279, |
| "step": 1434 |
| }, |
| { |
| "epoch": 0.8657616892911011, |
| "grad_norm": 1.6574233931237368, |
| "learning_rate": 9.239645193020386e-07, |
| "loss": 2.4045, |
| "step": 1435 |
| }, |
| { |
| "epoch": 0.8663650075414782, |
| "grad_norm": 1.7567684546745939, |
| "learning_rate": 9.158250089803789e-07, |
| "loss": 2.3661, |
| "step": 1436 |
| }, |
| { |
| "epoch": 0.8669683257918552, |
| "grad_norm": 1.6579040791726805, |
| "learning_rate": 9.077197884672884e-07, |
| "loss": 2.3595, |
| "step": 1437 |
| }, |
| { |
| "epoch": 0.8675716440422323, |
| "grad_norm": 1.672146465058933, |
| "learning_rate": 8.996488883573351e-07, |
| "loss": 2.3272, |
| "step": 1438 |
| }, |
| { |
| "epoch": 0.8681749622926094, |
| "grad_norm": 1.7982516402454005, |
| "learning_rate": 8.916123391155473e-07, |
| "loss": 2.3342, |
| "step": 1439 |
| }, |
| { |
| "epoch": 0.8687782805429864, |
| "grad_norm": 1.6513082922110383, |
| "learning_rate": 8.836101710772826e-07, |
| "loss": 2.4187, |
| "step": 1440 |
| }, |
| { |
| "epoch": 0.8693815987933635, |
| "grad_norm": 1.5801424653470257, |
| "learning_rate": 8.756424144481313e-07, |
| "loss": 2.3546, |
| "step": 1441 |
| }, |
| { |
| "epoch": 0.8699849170437406, |
| "grad_norm": 1.6204238318617161, |
| "learning_rate": 8.677090993037817e-07, |
| "loss": 2.3551, |
| "step": 1442 |
| }, |
| { |
| "epoch": 0.8705882352941177, |
| "grad_norm": 1.819858713479699, |
| "learning_rate": 8.598102555899224e-07, |
| "loss": 2.4483, |
| "step": 1443 |
| }, |
| { |
| "epoch": 0.8711915535444947, |
| "grad_norm": 1.8786719832489334, |
| "learning_rate": 8.519459131221175e-07, |
| "loss": 2.3604, |
| "step": 1444 |
| }, |
| { |
| "epoch": 0.8717948717948718, |
| "grad_norm": 1.667217198114092, |
| "learning_rate": 8.441161015857092e-07, |
| "loss": 2.3518, |
| "step": 1445 |
| }, |
| { |
| "epoch": 0.8723981900452489, |
| "grad_norm": 1.732586131005076, |
| "learning_rate": 8.36320850535689e-07, |
| "loss": 2.3817, |
| "step": 1446 |
| }, |
| { |
| "epoch": 0.873001508295626, |
| "grad_norm": 1.632569270479025, |
| "learning_rate": 8.285601893965989e-07, |
| "loss": 2.2941, |
| "step": 1447 |
| }, |
| { |
| "epoch": 0.873604826546003, |
| "grad_norm": 1.7481093246691317, |
| "learning_rate": 8.208341474624071e-07, |
| "loss": 2.3644, |
| "step": 1448 |
| }, |
| { |
| "epoch": 0.8742081447963801, |
| "grad_norm": 1.592396538771664, |
| "learning_rate": 8.131427538964165e-07, |
| "loss": 2.3092, |
| "step": 1449 |
| }, |
| { |
| "epoch": 0.8748114630467572, |
| "grad_norm": 1.7076427444522029, |
| "learning_rate": 8.054860377311368e-07, |
| "loss": 2.3829, |
| "step": 1450 |
| }, |
| { |
| "epoch": 0.8754147812971342, |
| "grad_norm": 1.624933821901664, |
| "learning_rate": 7.978640278681838e-07, |
| "loss": 2.3219, |
| "step": 1451 |
| }, |
| { |
| "epoch": 0.8760180995475113, |
| "grad_norm": 1.5590949529060047, |
| "learning_rate": 7.902767530781664e-07, |
| "loss": 2.3748, |
| "step": 1452 |
| }, |
| { |
| "epoch": 0.8766214177978884, |
| "grad_norm": 1.6724019664102798, |
| "learning_rate": 7.82724242000581e-07, |
| "loss": 2.4011, |
| "step": 1453 |
| }, |
| { |
| "epoch": 0.8772247360482655, |
| "grad_norm": 1.7230129361136048, |
| "learning_rate": 7.752065231437067e-07, |
| "loss": 2.2988, |
| "step": 1454 |
| }, |
| { |
| "epoch": 0.8778280542986425, |
| "grad_norm": 1.7025219674521455, |
| "learning_rate": 7.677236248844855e-07, |
| "loss": 2.3598, |
| "step": 1455 |
| }, |
| { |
| "epoch": 0.8784313725490196, |
| "grad_norm": 1.762003177975733, |
| "learning_rate": 7.602755754684277e-07, |
| "loss": 2.3904, |
| "step": 1456 |
| }, |
| { |
| "epoch": 0.8784313725490196, |
| "eval_loss": 2.378657341003418, |
| "eval_runtime": 21.8258, |
| "eval_samples_per_second": 4.032, |
| "eval_steps_per_second": 0.504, |
| "step": 1456 |
| }, |
| { |
| "epoch": 0.8790346907993967, |
| "grad_norm": 1.6970670634162761, |
| "learning_rate": 7.528624030094978e-07, |
| "loss": 2.3864, |
| "step": 1457 |
| }, |
| { |
| "epoch": 0.8796380090497737, |
| "grad_norm": 1.7350810480457661, |
| "learning_rate": 7.454841354900177e-07, |
| "loss": 2.2923, |
| "step": 1458 |
| }, |
| { |
| "epoch": 0.8802413273001508, |
| "grad_norm": 1.6909883458124622, |
| "learning_rate": 7.38140800760545e-07, |
| "loss": 2.3932, |
| "step": 1459 |
| }, |
| { |
| "epoch": 0.8808446455505279, |
| "grad_norm": 1.7254478671591273, |
| "learning_rate": 7.308324265397837e-07, |
| "loss": 2.4299, |
| "step": 1460 |
| }, |
| { |
| "epoch": 0.881447963800905, |
| "grad_norm": 1.6323170982575828, |
| "learning_rate": 7.235590404144688e-07, |
| "loss": 2.4528, |
| "step": 1461 |
| }, |
| { |
| "epoch": 0.882051282051282, |
| "grad_norm": 1.725888739611964, |
| "learning_rate": 7.163206698392744e-07, |
| "loss": 2.4173, |
| "step": 1462 |
| }, |
| { |
| "epoch": 0.8826546003016591, |
| "grad_norm": 1.5964294056930346, |
| "learning_rate": 7.091173421366937e-07, |
| "loss": 2.3595, |
| "step": 1463 |
| }, |
| { |
| "epoch": 0.8832579185520362, |
| "grad_norm": 1.6073598119187729, |
| "learning_rate": 7.01949084496949e-07, |
| "loss": 2.3126, |
| "step": 1464 |
| }, |
| { |
| "epoch": 0.8838612368024132, |
| "grad_norm": 1.649407476234432, |
| "learning_rate": 6.948159239778829e-07, |
| "loss": 2.2887, |
| "step": 1465 |
| }, |
| { |
| "epoch": 0.8844645550527903, |
| "grad_norm": 1.7340350529598598, |
| "learning_rate": 6.877178875048573e-07, |
| "loss": 2.3362, |
| "step": 1466 |
| }, |
| { |
| "epoch": 0.8850678733031674, |
| "grad_norm": 1.6839092408802154, |
| "learning_rate": 6.80655001870657e-07, |
| "loss": 2.3465, |
| "step": 1467 |
| }, |
| { |
| "epoch": 0.8856711915535445, |
| "grad_norm": 1.6218961606812634, |
| "learning_rate": 6.736272937353782e-07, |
| "loss": 2.3734, |
| "step": 1468 |
| }, |
| { |
| "epoch": 0.8862745098039215, |
| "grad_norm": 1.6983984598193318, |
| "learning_rate": 6.666347896263326e-07, |
| "loss": 2.4695, |
| "step": 1469 |
| }, |
| { |
| "epoch": 0.8868778280542986, |
| "grad_norm": 1.6624539260672893, |
| "learning_rate": 6.596775159379543e-07, |
| "loss": 2.4142, |
| "step": 1470 |
| }, |
| { |
| "epoch": 0.8874811463046757, |
| "grad_norm": 1.8405004468613892, |
| "learning_rate": 6.527554989316898e-07, |
| "loss": 2.3249, |
| "step": 1471 |
| }, |
| { |
| "epoch": 0.8880844645550527, |
| "grad_norm": 1.653966697394111, |
| "learning_rate": 6.458687647359041e-07, |
| "loss": 2.3264, |
| "step": 1472 |
| }, |
| { |
| "epoch": 0.8886877828054298, |
| "grad_norm": 1.57539011035201, |
| "learning_rate": 6.3901733934578e-07, |
| "loss": 2.3421, |
| "step": 1473 |
| }, |
| { |
| "epoch": 0.8892911010558069, |
| "grad_norm": 1.5954240002596058, |
| "learning_rate": 6.322012486232209e-07, |
| "loss": 2.401, |
| "step": 1474 |
| }, |
| { |
| "epoch": 0.889894419306184, |
| "grad_norm": 1.6355649776928247, |
| "learning_rate": 6.254205182967566e-07, |
| "loss": 2.3746, |
| "step": 1475 |
| }, |
| { |
| "epoch": 0.890497737556561, |
| "grad_norm": 1.6844563379748747, |
| "learning_rate": 6.186751739614405e-07, |
| "loss": 2.3419, |
| "step": 1476 |
| }, |
| { |
| "epoch": 0.8911010558069382, |
| "grad_norm": 1.631699028891681, |
| "learning_rate": 6.119652410787546e-07, |
| "loss": 2.4116, |
| "step": 1477 |
| }, |
| { |
| "epoch": 0.8917043740573153, |
| "grad_norm": 1.7577623661059945, |
| "learning_rate": 6.052907449765144e-07, |
| "loss": 2.3307, |
| "step": 1478 |
| }, |
| { |
| "epoch": 0.8923076923076924, |
| "grad_norm": 1.9272993274326495, |
| "learning_rate": 5.986517108487754e-07, |
| "loss": 2.4229, |
| "step": 1479 |
| }, |
| { |
| "epoch": 0.8929110105580694, |
| "grad_norm": 1.731338520723134, |
| "learning_rate": 5.920481637557318e-07, |
| "loss": 2.4008, |
| "step": 1480 |
| }, |
| { |
| "epoch": 0.8935143288084465, |
| "grad_norm": 1.7062510402870246, |
| "learning_rate": 5.8548012862363e-07, |
| "loss": 2.3974, |
| "step": 1481 |
| }, |
| { |
| "epoch": 0.8941176470588236, |
| "grad_norm": 1.5686806047724653, |
| "learning_rate": 5.789476302446662e-07, |
| "loss": 2.3423, |
| "step": 1482 |
| }, |
| { |
| "epoch": 0.8947209653092006, |
| "grad_norm": 1.709116996155703, |
| "learning_rate": 5.724506932769014e-07, |
| "loss": 2.4025, |
| "step": 1483 |
| }, |
| { |
| "epoch": 0.8953242835595777, |
| "grad_norm": 1.712797698360303, |
| "learning_rate": 5.659893422441598e-07, |
| "loss": 2.3405, |
| "step": 1484 |
| }, |
| { |
| "epoch": 0.8959276018099548, |
| "grad_norm": 1.760912337787795, |
| "learning_rate": 5.59563601535943e-07, |
| "loss": 2.3576, |
| "step": 1485 |
| }, |
| { |
| "epoch": 0.8965309200603319, |
| "grad_norm": 1.655187155662018, |
| "learning_rate": 5.53173495407332e-07, |
| "loss": 2.3894, |
| "step": 1486 |
| }, |
| { |
| "epoch": 0.8971342383107089, |
| "grad_norm": 1.6356081181884141, |
| "learning_rate": 5.468190479789015e-07, |
| "loss": 2.3583, |
| "step": 1487 |
| }, |
| { |
| "epoch": 0.897737556561086, |
| "grad_norm": 1.5988218833944716, |
| "learning_rate": 5.40500283236628e-07, |
| "loss": 2.3008, |
| "step": 1488 |
| }, |
| { |
| "epoch": 0.8983408748114631, |
| "grad_norm": 1.669649706786489, |
| "learning_rate": 5.342172250317946e-07, |
| "loss": 2.3583, |
| "step": 1489 |
| }, |
| { |
| "epoch": 0.8989441930618401, |
| "grad_norm": 1.6851067875014922, |
| "learning_rate": 5.279698970809011e-07, |
| "loss": 2.341, |
| "step": 1490 |
| }, |
| { |
| "epoch": 0.8995475113122172, |
| "grad_norm": 1.714939456296494, |
| "learning_rate": 5.21758322965581e-07, |
| "loss": 2.3949, |
| "step": 1491 |
| }, |
| { |
| "epoch": 0.9001508295625943, |
| "grad_norm": 1.6680686230304222, |
| "learning_rate": 5.155825261325099e-07, |
| "loss": 2.352, |
| "step": 1492 |
| }, |
| { |
| "epoch": 0.9007541478129714, |
| "grad_norm": 1.694172379851801, |
| "learning_rate": 5.094425298933136e-07, |
| "loss": 2.3659, |
| "step": 1493 |
| }, |
| { |
| "epoch": 0.9013574660633484, |
| "grad_norm": 1.5458043855268861, |
| "learning_rate": 5.033383574244832e-07, |
| "loss": 2.3697, |
| "step": 1494 |
| }, |
| { |
| "epoch": 0.9019607843137255, |
| "grad_norm": 1.6316763489085158, |
| "learning_rate": 4.972700317672829e-07, |
| "loss": 2.3894, |
| "step": 1495 |
| }, |
| { |
| "epoch": 0.9025641025641026, |
| "grad_norm": 1.7347252339740011, |
| "learning_rate": 4.912375758276744e-07, |
| "loss": 2.4098, |
| "step": 1496 |
| }, |
| { |
| "epoch": 0.9031674208144796, |
| "grad_norm": 1.6488961343325694, |
| "learning_rate": 4.852410123762164e-07, |
| "loss": 2.4116, |
| "step": 1497 |
| }, |
| { |
| "epoch": 0.9037707390648567, |
| "grad_norm": 1.7413362686027694, |
| "learning_rate": 4.792803640479871e-07, |
| "loss": 2.4963, |
| "step": 1498 |
| }, |
| { |
| "epoch": 0.9043740573152338, |
| "grad_norm": 1.5131014161067307, |
| "learning_rate": 4.7335565334249767e-07, |
| "loss": 2.3327, |
| "step": 1499 |
| }, |
| { |
| "epoch": 0.9049773755656109, |
| "grad_norm": 1.6485978846587896, |
| "learning_rate": 4.674669026236045e-07, |
| "loss": 2.3153, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.9055806938159879, |
| "grad_norm": 1.648121715180723, |
| "learning_rate": 4.6161413411942913e-07, |
| "loss": 2.3907, |
| "step": 1501 |
| }, |
| { |
| "epoch": 0.906184012066365, |
| "grad_norm": 1.5954459345960825, |
| "learning_rate": 4.557973699222706e-07, |
| "loss": 2.4178, |
| "step": 1502 |
| }, |
| { |
| "epoch": 0.9067873303167421, |
| "grad_norm": 1.7890764532981966, |
| "learning_rate": 4.500166319885235e-07, |
| "loss": 2.3997, |
| "step": 1503 |
| }, |
| { |
| "epoch": 0.9073906485671192, |
| "grad_norm": 1.6434691831960049, |
| "learning_rate": 4.4427194213859216e-07, |
| "loss": 2.3621, |
| "step": 1504 |
| }, |
| { |
| "epoch": 0.9079939668174962, |
| "grad_norm": 1.6893382016076397, |
| "learning_rate": 4.385633220568186e-07, |
| "loss": 2.4687, |
| "step": 1505 |
| }, |
| { |
| "epoch": 0.9085972850678733, |
| "grad_norm": 1.6875837245052094, |
| "learning_rate": 4.328907932913873e-07, |
| "loss": 2.4183, |
| "step": 1506 |
| }, |
| { |
| "epoch": 0.9092006033182504, |
| "grad_norm": 1.7217829130925384, |
| "learning_rate": 4.2725437725424923e-07, |
| "loss": 2.3603, |
| "step": 1507 |
| }, |
| { |
| "epoch": 0.9098039215686274, |
| "grad_norm": 1.6624234725771576, |
| "learning_rate": 4.216540952210435e-07, |
| "loss": 2.3669, |
| "step": 1508 |
| }, |
| { |
| "epoch": 0.9104072398190045, |
| "grad_norm": 1.8787811617905237, |
| "learning_rate": 4.160899683310171e-07, |
| "loss": 2.4136, |
| "step": 1509 |
| }, |
| { |
| "epoch": 0.9110105580693816, |
| "grad_norm": 1.8583772875085336, |
| "learning_rate": 4.1056201758693957e-07, |
| "loss": 2.2831, |
| "step": 1510 |
| }, |
| { |
| "epoch": 0.9116138763197587, |
| "grad_norm": 1.6368612883465103, |
| "learning_rate": 4.0507026385502747e-07, |
| "loss": 2.3401, |
| "step": 1511 |
| }, |
| { |
| "epoch": 0.9122171945701357, |
| "grad_norm": 1.6299438372055655, |
| "learning_rate": 3.9961472786486655e-07, |
| "loss": 2.3577, |
| "step": 1512 |
| }, |
| { |
| "epoch": 0.9128205128205128, |
| "grad_norm": 1.7111536524933928, |
| "learning_rate": 3.9419543020933426e-07, |
| "loss": 2.4232, |
| "step": 1513 |
| }, |
| { |
| "epoch": 0.9134238310708899, |
| "grad_norm": 1.7191711135202754, |
| "learning_rate": 3.888123913445174e-07, |
| "loss": 2.5138, |
| "step": 1514 |
| }, |
| { |
| "epoch": 0.9140271493212669, |
| "grad_norm": 1.6802486623040969, |
| "learning_rate": 3.834656315896379e-07, |
| "loss": 2.3612, |
| "step": 1515 |
| }, |
| { |
| "epoch": 0.914630467571644, |
| "grad_norm": 1.5822887932572551, |
| "learning_rate": 3.7815517112697707e-07, |
| "loss": 2.4312, |
| "step": 1516 |
| }, |
| { |
| "epoch": 0.9152337858220211, |
| "grad_norm": 1.6833890185021834, |
| "learning_rate": 3.728810300017949e-07, |
| "loss": 2.433, |
| "step": 1517 |
| }, |
| { |
| "epoch": 0.9158371040723982, |
| "grad_norm": 1.750558917564341, |
| "learning_rate": 3.6764322812226416e-07, |
| "loss": 2.3951, |
| "step": 1518 |
| }, |
| { |
| "epoch": 0.9164404223227752, |
| "grad_norm": 1.566804602225607, |
| "learning_rate": 3.624417852593842e-07, |
| "loss": 2.414, |
| "step": 1519 |
| }, |
| { |
| "epoch": 0.9170437405731523, |
| "grad_norm": 1.7643675825250036, |
| "learning_rate": 3.572767210469086e-07, |
| "loss": 2.3622, |
| "step": 1520 |
| }, |
| { |
| "epoch": 0.9176470588235294, |
| "grad_norm": 1.6153326061930553, |
| "learning_rate": 3.521480549812784e-07, |
| "loss": 2.4519, |
| "step": 1521 |
| }, |
| { |
| "epoch": 0.9182503770739064, |
| "grad_norm": 1.6247385009617024, |
| "learning_rate": 3.4705580642154126e-07, |
| "loss": 2.4414, |
| "step": 1522 |
| }, |
| { |
| "epoch": 0.9188536953242835, |
| "grad_norm": 1.6397317973016956, |
| "learning_rate": 3.4199999458928045e-07, |
| "loss": 2.3467, |
| "step": 1523 |
| }, |
| { |
| "epoch": 0.9194570135746606, |
| "grad_norm": 1.608345000582214, |
| "learning_rate": 3.3698063856854257e-07, |
| "loss": 2.3482, |
| "step": 1524 |
| }, |
| { |
| "epoch": 0.9200603318250377, |
| "grad_norm": 1.6313694914216899, |
| "learning_rate": 3.319977573057642e-07, |
| "loss": 2.3654, |
| "step": 1525 |
| }, |
| { |
| "epoch": 0.9206636500754147, |
| "grad_norm": 1.6034585133542865, |
| "learning_rate": 3.2705136960970554e-07, |
| "loss": 2.4205, |
| "step": 1526 |
| }, |
| { |
| "epoch": 0.9212669683257918, |
| "grad_norm": 1.7012160770662008, |
| "learning_rate": 3.221414941513723e-07, |
| "loss": 2.4297, |
| "step": 1527 |
| }, |
| { |
| "epoch": 0.9218702865761689, |
| "grad_norm": 1.6314068122413963, |
| "learning_rate": 3.1726814946394736e-07, |
| "loss": 2.3526, |
| "step": 1528 |
| }, |
| { |
| "epoch": 0.9224736048265461, |
| "grad_norm": 1.7604298285793514, |
| "learning_rate": 3.124313539427226e-07, |
| "loss": 2.3841, |
| "step": 1529 |
| }, |
| { |
| "epoch": 0.9230769230769231, |
| "grad_norm": 1.6897111047811944, |
| "learning_rate": 3.0763112584503264e-07, |
| "loss": 2.3596, |
| "step": 1530 |
| }, |
| { |
| "epoch": 0.9236802413273002, |
| "grad_norm": 1.901856272346252, |
| "learning_rate": 3.028674832901757e-07, |
| "loss": 2.3238, |
| "step": 1531 |
| }, |
| { |
| "epoch": 0.9242835595776773, |
| "grad_norm": 1.6867131472505243, |
| "learning_rate": 2.9814044425935605e-07, |
| "loss": 2.419, |
| "step": 1532 |
| }, |
| { |
| "epoch": 0.9248868778280543, |
| "grad_norm": 1.83379043500035, |
| "learning_rate": 2.934500265956075e-07, |
| "loss": 2.3716, |
| "step": 1533 |
| }, |
| { |
| "epoch": 0.9254901960784314, |
| "grad_norm": 1.8719709347158549, |
| "learning_rate": 2.887962480037354e-07, |
| "loss": 2.4137, |
| "step": 1534 |
| }, |
| { |
| "epoch": 0.9260935143288085, |
| "grad_norm": 1.6300473777324955, |
| "learning_rate": 2.841791260502402e-07, |
| "loss": 2.3371, |
| "step": 1535 |
| }, |
| { |
| "epoch": 0.9266968325791856, |
| "grad_norm": 1.6432377193732035, |
| "learning_rate": 2.7959867816325756e-07, |
| "loss": 2.4116, |
| "step": 1536 |
| }, |
| { |
| "epoch": 0.9273001508295626, |
| "grad_norm": 1.6374974156620303, |
| "learning_rate": 2.750549216324894e-07, |
| "loss": 2.3411, |
| "step": 1537 |
| }, |
| { |
| "epoch": 0.9279034690799397, |
| "grad_norm": 1.6801637904838937, |
| "learning_rate": 2.7054787360913825e-07, |
| "loss": 2.414, |
| "step": 1538 |
| }, |
| { |
| "epoch": 0.9285067873303168, |
| "grad_norm": 1.6856307351719522, |
| "learning_rate": 2.6607755110584886e-07, |
| "loss": 2.3434, |
| "step": 1539 |
| }, |
| { |
| "epoch": 0.9291101055806938, |
| "grad_norm": 1.603414777890947, |
| "learning_rate": 2.6164397099663676e-07, |
| "loss": 2.4188, |
| "step": 1540 |
| }, |
| { |
| "epoch": 0.9297134238310709, |
| "grad_norm": 1.5895687457039656, |
| "learning_rate": 2.5724715001682053e-07, |
| "loss": 2.4484, |
| "step": 1541 |
| }, |
| { |
| "epoch": 0.930316742081448, |
| "grad_norm": 1.685584255381596, |
| "learning_rate": 2.5288710476297553e-07, |
| "loss": 2.3443, |
| "step": 1542 |
| }, |
| { |
| "epoch": 0.9309200603318251, |
| "grad_norm": 1.6565581708347004, |
| "learning_rate": 2.4856385169285457e-07, |
| "loss": 2.3593, |
| "step": 1543 |
| }, |
| { |
| "epoch": 0.9315233785822021, |
| "grad_norm": 1.817733041298196, |
| "learning_rate": 2.442774071253329e-07, |
| "loss": 2.4111, |
| "step": 1544 |
| }, |
| { |
| "epoch": 0.9321266968325792, |
| "grad_norm": 1.6109618863028958, |
| "learning_rate": 2.4002778724034447e-07, |
| "loss": 2.3405, |
| "step": 1545 |
| }, |
| { |
| "epoch": 0.9327300150829563, |
| "grad_norm": 1.6820631065521003, |
| "learning_rate": 2.3581500807882462e-07, |
| "loss": 2.4116, |
| "step": 1546 |
| }, |
| { |
| "epoch": 0.9333333333333333, |
| "grad_norm": 1.6802462851437703, |
| "learning_rate": 2.3163908554264646e-07, |
| "loss": 2.4147, |
| "step": 1547 |
| }, |
| { |
| "epoch": 0.9339366515837104, |
| "grad_norm": 1.5529306934293425, |
| "learning_rate": 2.2750003539456e-07, |
| "loss": 2.368, |
| "step": 1548 |
| }, |
| { |
| "epoch": 0.9345399698340875, |
| "grad_norm": 1.7540477373608447, |
| "learning_rate": 2.2339787325813323e-07, |
| "loss": 2.3551, |
| "step": 1549 |
| }, |
| { |
| "epoch": 0.9351432880844646, |
| "grad_norm": 1.5684347320472798, |
| "learning_rate": 2.1933261461769772e-07, |
| "loss": 2.3646, |
| "step": 1550 |
| }, |
| { |
| "epoch": 0.9357466063348416, |
| "grad_norm": 1.6975612580460133, |
| "learning_rate": 2.15304274818281e-07, |
| "loss": 2.424, |
| "step": 1551 |
| }, |
| { |
| "epoch": 0.9363499245852187, |
| "grad_norm": 1.6439050320927016, |
| "learning_rate": 2.1131286906555859e-07, |
| "loss": 2.4373, |
| "step": 1552 |
| }, |
| { |
| "epoch": 0.9369532428355958, |
| "grad_norm": 1.6463439573689524, |
| "learning_rate": 2.0735841242578992e-07, |
| "loss": 2.3877, |
| "step": 1553 |
| }, |
| { |
| "epoch": 0.9375565610859729, |
| "grad_norm": 1.596036485060441, |
| "learning_rate": 2.034409198257614e-07, |
| "loss": 2.3444, |
| "step": 1554 |
| }, |
| { |
| "epoch": 0.9381598793363499, |
| "grad_norm": 1.60735589310518, |
| "learning_rate": 1.9956040605273784e-07, |
| "loss": 2.382, |
| "step": 1555 |
| }, |
| { |
| "epoch": 0.938763197586727, |
| "grad_norm": 1.7502524053387238, |
| "learning_rate": 1.9571688575439672e-07, |
| "loss": 2.3612, |
| "step": 1556 |
| }, |
| { |
| "epoch": 0.9393665158371041, |
| "grad_norm": 1.695777678767089, |
| "learning_rate": 1.9191037343877729e-07, |
| "loss": 2.4561, |
| "step": 1557 |
| }, |
| { |
| "epoch": 0.9399698340874811, |
| "grad_norm": 1.7535784624939774, |
| "learning_rate": 1.8814088347422822e-07, |
| "loss": 2.4411, |
| "step": 1558 |
| }, |
| { |
| "epoch": 0.9405731523378582, |
| "grad_norm": 1.7687315605771063, |
| "learning_rate": 1.844084300893456e-07, |
| "loss": 2.3016, |
| "step": 1559 |
| }, |
| { |
| "epoch": 0.9411764705882353, |
| "grad_norm": 1.6191879781315448, |
| "learning_rate": 1.8071302737293294e-07, |
| "loss": 2.4139, |
| "step": 1560 |
| }, |
| { |
| "epoch": 0.9417797888386124, |
| "grad_norm": 1.7873729255837811, |
| "learning_rate": 1.770546892739322e-07, |
| "loss": 2.413, |
| "step": 1561 |
| }, |
| { |
| "epoch": 0.9423831070889894, |
| "grad_norm": 1.619912308221573, |
| "learning_rate": 1.7343342960138064e-07, |
| "loss": 2.3733, |
| "step": 1562 |
| }, |
| { |
| "epoch": 0.9429864253393665, |
| "grad_norm": 1.6108954711011358, |
| "learning_rate": 1.6984926202435527e-07, |
| "loss": 2.3484, |
| "step": 1563 |
| }, |
| { |
| "epoch": 0.9435897435897436, |
| "grad_norm": 1.7174024797225025, |
| "learning_rate": 1.6630220007192722e-07, |
| "loss": 2.4189, |
| "step": 1564 |
| }, |
| { |
| "epoch": 0.9441930618401206, |
| "grad_norm": 1.6005639732300854, |
| "learning_rate": 1.6279225713310088e-07, |
| "loss": 2.3632, |
| "step": 1565 |
| }, |
| { |
| "epoch": 0.9447963800904977, |
| "grad_norm": 1.7260576555936502, |
| "learning_rate": 1.5931944645677043e-07, |
| "loss": 2.2744, |
| "step": 1566 |
| }, |
| { |
| "epoch": 0.9453996983408748, |
| "grad_norm": 1.6490451332752685, |
| "learning_rate": 1.558837811516667e-07, |
| "loss": 2.4731, |
| "step": 1567 |
| }, |
| { |
| "epoch": 0.9460030165912519, |
| "grad_norm": 1.6988860268737969, |
| "learning_rate": 1.5248527418631254e-07, |
| "loss": 2.3633, |
| "step": 1568 |
| }, |
| { |
| "epoch": 0.9466063348416289, |
| "grad_norm": 1.7173844921390578, |
| "learning_rate": 1.4912393838896422e-07, |
| "loss": 2.3386, |
| "step": 1569 |
| }, |
| { |
| "epoch": 0.947209653092006, |
| "grad_norm": 1.7545899522448907, |
| "learning_rate": 1.4579978644757463e-07, |
| "loss": 2.3882, |
| "step": 1570 |
| }, |
| { |
| "epoch": 0.9478129713423831, |
| "grad_norm": 1.72621481584512, |
| "learning_rate": 1.4251283090973567e-07, |
| "loss": 2.3771, |
| "step": 1571 |
| }, |
| { |
| "epoch": 0.9484162895927601, |
| "grad_norm": 1.642167082784339, |
| "learning_rate": 1.392630841826359e-07, |
| "loss": 2.3513, |
| "step": 1572 |
| }, |
| { |
| "epoch": 0.9490196078431372, |
| "grad_norm": 1.6632894815030224, |
| "learning_rate": 1.360505585330152e-07, |
| "loss": 2.4374, |
| "step": 1573 |
| }, |
| { |
| "epoch": 0.9496229260935143, |
| "grad_norm": 1.7481181022414323, |
| "learning_rate": 1.3287526608711132e-07, |
| "loss": 2.411, |
| "step": 1574 |
| }, |
| { |
| "epoch": 0.9502262443438914, |
| "grad_norm": 1.6299990975986602, |
| "learning_rate": 1.297372188306234e-07, |
| "loss": 2.3651, |
| "step": 1575 |
| }, |
| { |
| "epoch": 0.9508295625942684, |
| "grad_norm": 1.632744134449333, |
| "learning_rate": 1.2663642860865854e-07, |
| "loss": 2.3385, |
| "step": 1576 |
| }, |
| { |
| "epoch": 0.9514328808446455, |
| "grad_norm": 1.669311029172285, |
| "learning_rate": 1.2357290712569304e-07, |
| "loss": 2.371, |
| "step": 1577 |
| }, |
| { |
| "epoch": 0.9520361990950226, |
| "grad_norm": 1.6777638968211144, |
| "learning_rate": 1.2054666594552568e-07, |
| "loss": 2.3964, |
| "step": 1578 |
| }, |
| { |
| "epoch": 0.9526395173453996, |
| "grad_norm": 1.8704079528291009, |
| "learning_rate": 1.1755771649123337e-07, |
| "loss": 2.331, |
| "step": 1579 |
| }, |
| { |
| "epoch": 0.9532428355957768, |
| "grad_norm": 1.6167520099218842, |
| "learning_rate": 1.1460607004512681e-07, |
| "loss": 2.3648, |
| "step": 1580 |
| }, |
| { |
| "epoch": 0.9538461538461539, |
| "grad_norm": 1.5402727403200858, |
| "learning_rate": 1.1169173774871478e-07, |
| "loss": 2.3065, |
| "step": 1581 |
| }, |
| { |
| "epoch": 0.954449472096531, |
| "grad_norm": 1.7323323663200998, |
| "learning_rate": 1.0881473060265325e-07, |
| "loss": 2.4157, |
| "step": 1582 |
| }, |
| { |
| "epoch": 0.955052790346908, |
| "grad_norm": 1.6250278954640458, |
| "learning_rate": 1.0597505946670972e-07, |
| "loss": 2.4076, |
| "step": 1583 |
| }, |
| { |
| "epoch": 0.9556561085972851, |
| "grad_norm": 1.7877603632530947, |
| "learning_rate": 1.0317273505972003e-07, |
| "loss": 2.4501, |
| "step": 1584 |
| }, |
| { |
| "epoch": 0.9562594268476622, |
| "grad_norm": 1.690873762437158, |
| "learning_rate": 1.004077679595472e-07, |
| "loss": 2.3116, |
| "step": 1585 |
| }, |
| { |
| "epoch": 0.9568627450980393, |
| "grad_norm": 1.7188774311515, |
| "learning_rate": 9.768016860304485e-08, |
| "loss": 2.3585, |
| "step": 1586 |
| }, |
| { |
| "epoch": 0.9574660633484163, |
| "grad_norm": 1.612172189966338, |
| "learning_rate": 9.498994728601386e-08, |
| "loss": 2.3611, |
| "step": 1587 |
| }, |
| { |
| "epoch": 0.9580693815987934, |
| "grad_norm": 1.7076734282514445, |
| "learning_rate": 9.233711416316571e-08, |
| "loss": 2.3298, |
| "step": 1588 |
| }, |
| { |
| "epoch": 0.9586726998491705, |
| "grad_norm": 1.6271830112713341, |
| "learning_rate": 8.972167924808151e-08, |
| "loss": 2.4282, |
| "step": 1589 |
| }, |
| { |
| "epoch": 0.9592760180995475, |
| "grad_norm": 1.6452718258981232, |
| "learning_rate": 8.714365241318079e-08, |
| "loss": 2.2814, |
| "step": 1590 |
| }, |
| { |
| "epoch": 0.9598793363499246, |
| "grad_norm": 1.736858370561717, |
| "learning_rate": 8.460304338967496e-08, |
| "loss": 2.2471, |
| "step": 1591 |
| }, |
| { |
| "epoch": 0.9604826546003017, |
| "grad_norm": 1.698600906604979, |
| "learning_rate": 8.209986176753947e-08, |
| "loss": 2.3503, |
| "step": 1592 |
| }, |
| { |
| "epoch": 0.9610859728506788, |
| "grad_norm": 1.7115650921235999, |
| "learning_rate": 7.963411699546952e-08, |
| "loss": 2.3511, |
| "step": 1593 |
| }, |
| { |
| "epoch": 0.9616892911010558, |
| "grad_norm": 1.676613995847439, |
| "learning_rate": 7.720581838085106e-08, |
| "loss": 2.4299, |
| "step": 1594 |
| }, |
| { |
| "epoch": 0.9622926093514329, |
| "grad_norm": 1.5996939948140017, |
| "learning_rate": 7.481497508972313e-08, |
| "loss": 2.2491, |
| "step": 1595 |
| }, |
| { |
| "epoch": 0.96289592760181, |
| "grad_norm": 1.6337951191726159, |
| "learning_rate": 7.24615961467412e-08, |
| "loss": 2.4536, |
| "step": 1596 |
| }, |
| { |
| "epoch": 0.963499245852187, |
| "grad_norm": 1.7162528403403121, |
| "learning_rate": 7.014569043514496e-08, |
| "loss": 2.4588, |
| "step": 1597 |
| }, |
| { |
| "epoch": 0.9641025641025641, |
| "grad_norm": 1.701535912798598, |
| "learning_rate": 6.78672666967295e-08, |
| "loss": 2.339, |
| "step": 1598 |
| }, |
| { |
| "epoch": 0.9647058823529412, |
| "grad_norm": 1.7375073029608583, |
| "learning_rate": 6.562633353180081e-08, |
| "loss": 2.3682, |
| "step": 1599 |
| }, |
| { |
| "epoch": 0.9653092006033183, |
| "grad_norm": 1.6157005142344734, |
| "learning_rate": 6.342289939915369e-08, |
| "loss": 2.3839, |
| "step": 1600 |
| }, |
| { |
| "epoch": 0.9659125188536953, |
| "grad_norm": 1.7080860462253225, |
| "learning_rate": 6.125697261603725e-08, |
| "loss": 2.4579, |
| "step": 1601 |
| }, |
| { |
| "epoch": 0.9665158371040724, |
| "grad_norm": 1.6596429838292508, |
| "learning_rate": 5.912856135812051e-08, |
| "loss": 2.4166, |
| "step": 1602 |
| }, |
| { |
| "epoch": 0.9671191553544495, |
| "grad_norm": 1.6281663411559284, |
| "learning_rate": 5.7037673659464664e-08, |
| "loss": 2.3515, |
| "step": 1603 |
| }, |
| { |
| "epoch": 0.9677224736048265, |
| "grad_norm": 1.6615869318672554, |
| "learning_rate": 5.498431741249089e-08, |
| "loss": 2.4207, |
| "step": 1604 |
| }, |
| { |
| "epoch": 0.9683257918552036, |
| "grad_norm": 1.6928639425812697, |
| "learning_rate": 5.2968500367951425e-08, |
| "loss": 2.3342, |
| "step": 1605 |
| }, |
| { |
| "epoch": 0.9689291101055807, |
| "grad_norm": 1.6711375673234121, |
| "learning_rate": 5.0990230134900786e-08, |
| "loss": 2.3565, |
| "step": 1606 |
| }, |
| { |
| "epoch": 0.9695324283559578, |
| "grad_norm": 1.6097613979206937, |
| "learning_rate": 4.904951418066684e-08, |
| "loss": 2.3441, |
| "step": 1607 |
| }, |
| { |
| "epoch": 0.9701357466063348, |
| "grad_norm": 1.8307288771308332, |
| "learning_rate": 4.7146359830821944e-08, |
| "loss": 2.3499, |
| "step": 1608 |
| }, |
| { |
| "epoch": 0.9707390648567119, |
| "grad_norm": 1.6421462675997172, |
| "learning_rate": 4.528077426915412e-08, |
| "loss": 2.4347, |
| "step": 1609 |
| }, |
| { |
| "epoch": 0.971342383107089, |
| "grad_norm": 1.5882185826913309, |
| "learning_rate": 4.345276453764258e-08, |
| "loss": 2.3434, |
| "step": 1610 |
| }, |
| { |
| "epoch": 0.971945701357466, |
| "grad_norm": 1.6558426293124853, |
| "learning_rate": 4.166233753643112e-08, |
| "loss": 2.3193, |
| "step": 1611 |
| }, |
| { |
| "epoch": 0.9725490196078431, |
| "grad_norm": 1.7560871448672373, |
| "learning_rate": 3.990950002380034e-08, |
| "loss": 2.3671, |
| "step": 1612 |
| }, |
| { |
| "epoch": 0.9731523378582202, |
| "grad_norm": 1.6400996276397237, |
| "learning_rate": 3.81942586161399e-08, |
| "loss": 2.3296, |
| "step": 1613 |
| }, |
| { |
| "epoch": 0.9737556561085973, |
| "grad_norm": 1.5216875785599606, |
| "learning_rate": 3.651661978793075e-08, |
| "loss": 2.4094, |
| "step": 1614 |
| }, |
| { |
| "epoch": 0.9743589743589743, |
| "grad_norm": 1.5635756439509034, |
| "learning_rate": 3.487658987171294e-08, |
| "loss": 2.4104, |
| "step": 1615 |
| }, |
| { |
| "epoch": 0.9749622926093514, |
| "grad_norm": 1.641632266579705, |
| "learning_rate": 3.327417505806785e-08, |
| "loss": 2.4169, |
| "step": 1616 |
| }, |
| { |
| "epoch": 0.9755656108597285, |
| "grad_norm": 1.7298559985631585, |
| "learning_rate": 3.170938139558932e-08, |
| "loss": 2.3705, |
| "step": 1617 |
| }, |
| { |
| "epoch": 0.9761689291101056, |
| "grad_norm": 1.6921930674125782, |
| "learning_rate": 3.0182214790865915e-08, |
| "loss": 2.3795, |
| "step": 1618 |
| }, |
| { |
| "epoch": 0.9767722473604826, |
| "grad_norm": 1.771755697713286, |
| "learning_rate": 2.8692681008454238e-08, |
| "loss": 2.3752, |
| "step": 1619 |
| }, |
| { |
| "epoch": 0.9773755656108597, |
| "grad_norm": 1.6033324873008887, |
| "learning_rate": 2.724078567086119e-08, |
| "loss": 2.3251, |
| "step": 1620 |
| }, |
| { |
| "epoch": 0.9779788838612368, |
| "grad_norm": 1.803718606228007, |
| "learning_rate": 2.5826534258520663e-08, |
| "loss": 2.3912, |
| "step": 1621 |
| }, |
| { |
| "epoch": 0.9785822021116138, |
| "grad_norm": 1.655323213505876, |
| "learning_rate": 2.44499321097702e-08, |
| "loss": 2.3988, |
| "step": 1622 |
| }, |
| { |
| "epoch": 0.9791855203619909, |
| "grad_norm": 1.6389264444134706, |
| "learning_rate": 2.311098442083659e-08, |
| "loss": 2.3493, |
| "step": 1623 |
| }, |
| { |
| "epoch": 0.979788838612368, |
| "grad_norm": 1.9126115252359497, |
| "learning_rate": 2.180969624581253e-08, |
| "loss": 2.4097, |
| "step": 1624 |
| }, |
| { |
| "epoch": 0.9803921568627451, |
| "grad_norm": 1.6775223298076922, |
| "learning_rate": 2.054607249663665e-08, |
| "loss": 2.4115, |
| "step": 1625 |
| }, |
| { |
| "epoch": 0.9809954751131221, |
| "grad_norm": 1.6034357462290654, |
| "learning_rate": 1.9320117943080198e-08, |
| "loss": 2.418, |
| "step": 1626 |
| }, |
| { |
| "epoch": 0.9815987933634992, |
| "grad_norm": 1.614030224153856, |
| "learning_rate": 1.813183721272038e-08, |
| "loss": 2.3146, |
| "step": 1627 |
| }, |
| { |
| "epoch": 0.9822021116138763, |
| "grad_norm": 1.577279995047101, |
| "learning_rate": 1.698123479093372e-08, |
| "loss": 2.3794, |
| "step": 1628 |
| }, |
| { |
| "epoch": 0.9828054298642533, |
| "grad_norm": 1.6247769579619815, |
| "learning_rate": 1.5868315020868276e-08, |
| "loss": 2.3719, |
| "step": 1629 |
| }, |
| { |
| "epoch": 0.9834087481146304, |
| "grad_norm": 1.7166433701178447, |
| "learning_rate": 1.4793082103435885e-08, |
| "loss": 2.4619, |
| "step": 1630 |
| }, |
| { |
| "epoch": 0.9840120663650075, |
| "grad_norm": 1.7219302287966303, |
| "learning_rate": 1.3755540097291076e-08, |
| "loss": 2.3699, |
| "step": 1631 |
| }, |
| { |
| "epoch": 0.9846153846153847, |
| "grad_norm": 1.6258356906532427, |
| "learning_rate": 1.275569291881662e-08, |
| "loss": 2.3593, |
| "step": 1632 |
| }, |
| { |
| "epoch": 0.9852187028657617, |
| "grad_norm": 1.6647030705810246, |
| "learning_rate": 1.179354434211355e-08, |
| "loss": 2.4539, |
| "step": 1633 |
| }, |
| { |
| "epoch": 0.9858220211161388, |
| "grad_norm": 1.8941466485318852, |
| "learning_rate": 1.0869097998976729e-08, |
| "loss": 2.372, |
| "step": 1634 |
| }, |
| { |
| "epoch": 0.9864253393665159, |
| "grad_norm": 1.694320915923539, |
| "learning_rate": 9.982357378891528e-09, |
| "loss": 2.3551, |
| "step": 1635 |
| }, |
| { |
| "epoch": 0.987028657616893, |
| "grad_norm": 1.5883863869845254, |
| "learning_rate": 9.13332582901716e-09, |
| "loss": 2.3607, |
| "step": 1636 |
| }, |
| { |
| "epoch": 0.98763197586727, |
| "grad_norm": 1.657927151109981, |
| "learning_rate": 8.322006554171147e-09, |
| "loss": 2.4434, |
| "step": 1637 |
| }, |
| { |
| "epoch": 0.9882352941176471, |
| "grad_norm": 1.5879679104330986, |
| "learning_rate": 7.548402616819328e-09, |
| "loss": 2.4031, |
| "step": 1638 |
| }, |
| { |
| "epoch": 0.9888386123680242, |
| "grad_norm": 1.6918171007969323, |
| "learning_rate": 6.812516937065861e-09, |
| "loss": 2.4065, |
| "step": 1639 |
| }, |
| { |
| "epoch": 0.9894419306184012, |
| "grad_norm": 1.7232747761996445, |
| "learning_rate": 6.114352292639902e-09, |
| "loss": 2.3715, |
| "step": 1640 |
| }, |
| { |
| "epoch": 0.9900452488687783, |
| "grad_norm": 1.7276585553136063, |
| "learning_rate": 5.453911318886729e-09, |
| "loss": 2.385, |
| "step": 1641 |
| }, |
| { |
| "epoch": 0.9906485671191554, |
| "grad_norm": 1.7987368522109572, |
| "learning_rate": 4.83119650875552e-09, |
| "loss": 2.3348, |
| "step": 1642 |
| }, |
| { |
| "epoch": 0.9912518853695325, |
| "grad_norm": 1.6681326236785403, |
| "learning_rate": 4.246210212791591e-09, |
| "loss": 2.3628, |
| "step": 1643 |
| }, |
| { |
| "epoch": 0.9918552036199095, |
| "grad_norm": 1.6883734713107637, |
| "learning_rate": 3.698954639129726e-09, |
| "loss": 2.3348, |
| "step": 1644 |
| }, |
| { |
| "epoch": 0.9924585218702866, |
| "grad_norm": 1.6356203453610036, |
| "learning_rate": 3.1894318534819725e-09, |
| "loss": 2.3339, |
| "step": 1645 |
| }, |
| { |
| "epoch": 0.9930618401206637, |
| "grad_norm": 1.6389219683246756, |
| "learning_rate": 2.717643779129864e-09, |
| "loss": 2.3864, |
| "step": 1646 |
| }, |
| { |
| "epoch": 0.9936651583710407, |
| "grad_norm": 1.7109810853278875, |
| "learning_rate": 2.2835921969210917e-09, |
| "loss": 2.3851, |
| "step": 1647 |
| }, |
| { |
| "epoch": 0.9942684766214178, |
| "grad_norm": 1.6983534471695187, |
| "learning_rate": 1.8872787452584028e-09, |
| "loss": 2.4063, |
| "step": 1648 |
| }, |
| { |
| "epoch": 0.9948717948717949, |
| "grad_norm": 1.8209800366157713, |
| "learning_rate": 1.5287049200962688e-09, |
| "loss": 2.4157, |
| "step": 1649 |
| }, |
| { |
| "epoch": 0.995475113122172, |
| "grad_norm": 1.711823043494049, |
| "learning_rate": 1.2078720749364447e-09, |
| "loss": 2.3617, |
| "step": 1650 |
| }, |
| { |
| "epoch": 0.996078431372549, |
| "grad_norm": 1.6751671226929132, |
| "learning_rate": 9.24781420816867e-10, |
| "loss": 2.384, |
| "step": 1651 |
| }, |
| { |
| "epoch": 0.9966817496229261, |
| "grad_norm": 1.6900768872777132, |
| "learning_rate": 6.794340263127641e-10, |
| "loss": 2.4084, |
| "step": 1652 |
| }, |
| { |
| "epoch": 0.9972850678733032, |
| "grad_norm": 1.6710422234638216, |
| "learning_rate": 4.718308175311049e-10, |
| "loss": 2.4462, |
| "step": 1653 |
| }, |
| { |
| "epoch": 0.9978883861236802, |
| "grad_norm": 1.6582342788101545, |
| "learning_rate": 3.0197257810615774e-10, |
| "loss": 2.392, |
| "step": 1654 |
| }, |
| { |
| "epoch": 0.9984917043740573, |
| "grad_norm": 1.7272268608762122, |
| "learning_rate": 1.69859949198381e-10, |
| "loss": 2.3052, |
| "step": 1655 |
| }, |
| { |
| "epoch": 0.9990950226244344, |
| "grad_norm": 1.770451843913821, |
| "learning_rate": 7.549342948887095e-11, |
| "loss": 2.4259, |
| "step": 1656 |
| }, |
| { |
| "epoch": 0.9996983408748115, |
| "grad_norm": 1.6154000753848161, |
| "learning_rate": 1.8873375182693054e-11, |
| "loss": 2.3743, |
| "step": 1657 |
| } |
| ], |
| "logging_steps": 1, |
| "max_steps": 1657, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1, |
| "save_steps": 829, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 540265949429760.0, |
| "train_batch_size": 2, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|