| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 2.0, |
| "eval_steps": 500, |
| "global_step": 1152, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.0017362995116657625, |
| "grad_norm": 1.9245674193476585, |
| "learning_rate": 0.0, |
| "loss": 0.5116, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.003472599023331525, |
| "grad_norm": 1.9643021408198185, |
| "learning_rate": 8.620689655172414e-08, |
| "loss": 0.4842, |
| "step": 2 |
| }, |
| { |
| "epoch": 0.005208898534997287, |
| "grad_norm": 1.9648005065921663, |
| "learning_rate": 1.7241379310344828e-07, |
| "loss": 0.5066, |
| "step": 3 |
| }, |
| { |
| "epoch": 0.00694519804666305, |
| "grad_norm": 1.9491551990062834, |
| "learning_rate": 2.5862068965517245e-07, |
| "loss": 0.5129, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.008681497558328812, |
| "grad_norm": 2.1220950808108245, |
| "learning_rate": 3.4482758620689656e-07, |
| "loss": 0.5027, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.010417797069994574, |
| "grad_norm": 2.07218102117925, |
| "learning_rate": 4.3103448275862073e-07, |
| "loss": 0.5025, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.012154096581660336, |
| "grad_norm": 2.0995935631323217, |
| "learning_rate": 5.172413793103449e-07, |
| "loss": 0.489, |
| "step": 7 |
| }, |
| { |
| "epoch": 0.0138903960933261, |
| "grad_norm": 1.8768353178221435, |
| "learning_rate": 6.034482758620691e-07, |
| "loss": 0.4919, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.01562669560499186, |
| "grad_norm": 1.9966804449499742, |
| "learning_rate": 6.896551724137931e-07, |
| "loss": 0.4811, |
| "step": 9 |
| }, |
| { |
| "epoch": 0.017362995116657624, |
| "grad_norm": 1.8763897444655444, |
| "learning_rate": 7.758620689655173e-07, |
| "loss": 0.5173, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.019099294628323386, |
| "grad_norm": 1.9849388307797002, |
| "learning_rate": 8.620689655172415e-07, |
| "loss": 0.5029, |
| "step": 11 |
| }, |
| { |
| "epoch": 0.020835594139989148, |
| "grad_norm": 1.8516809060978145, |
| "learning_rate": 9.482758620689655e-07, |
| "loss": 0.4963, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.02257189365165491, |
| "grad_norm": 1.9602745515172928, |
| "learning_rate": 1.0344827586206898e-06, |
| "loss": 0.5193, |
| "step": 13 |
| }, |
| { |
| "epoch": 0.02430819316332067, |
| "grad_norm": 1.8697001604531807, |
| "learning_rate": 1.120689655172414e-06, |
| "loss": 0.5064, |
| "step": 14 |
| }, |
| { |
| "epoch": 0.026044492674986434, |
| "grad_norm": 1.6394017102026306, |
| "learning_rate": 1.2068965517241381e-06, |
| "loss": 0.4735, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.0277807921866522, |
| "grad_norm": 1.6749588224557876, |
| "learning_rate": 1.2931034482758623e-06, |
| "loss": 0.4982, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.02951709169831796, |
| "grad_norm": 1.6539213113264153, |
| "learning_rate": 1.3793103448275862e-06, |
| "loss": 0.4837, |
| "step": 17 |
| }, |
| { |
| "epoch": 0.03125339120998372, |
| "grad_norm": 1.6831347937004577, |
| "learning_rate": 1.4655172413793104e-06, |
| "loss": 0.4926, |
| "step": 18 |
| }, |
| { |
| "epoch": 0.032989690721649485, |
| "grad_norm": 1.6783335621868025, |
| "learning_rate": 1.5517241379310346e-06, |
| "loss": 0.4853, |
| "step": 19 |
| }, |
| { |
| "epoch": 0.03472599023331525, |
| "grad_norm": 1.5470450238435818, |
| "learning_rate": 1.6379310344827587e-06, |
| "loss": 0.4912, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.03646228974498101, |
| "grad_norm": 1.2016269254575944, |
| "learning_rate": 1.724137931034483e-06, |
| "loss": 0.4956, |
| "step": 21 |
| }, |
| { |
| "epoch": 0.03819858925664677, |
| "grad_norm": 1.1241956354180678, |
| "learning_rate": 1.810344827586207e-06, |
| "loss": 0.4803, |
| "step": 22 |
| }, |
| { |
| "epoch": 0.03993488876831253, |
| "grad_norm": 1.005571825200285, |
| "learning_rate": 1.896551724137931e-06, |
| "loss": 0.4837, |
| "step": 23 |
| }, |
| { |
| "epoch": 0.041671188279978295, |
| "grad_norm": 0.8622973028025744, |
| "learning_rate": 1.982758620689655e-06, |
| "loss": 0.4927, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.04340748779164406, |
| "grad_norm": 0.9266904408010402, |
| "learning_rate": 2.0689655172413796e-06, |
| "loss": 0.4639, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.04514378730330982, |
| "grad_norm": 0.8473052639588573, |
| "learning_rate": 2.1551724137931035e-06, |
| "loss": 0.482, |
| "step": 26 |
| }, |
| { |
| "epoch": 0.04688008681497558, |
| "grad_norm": 0.7307713142390112, |
| "learning_rate": 2.241379310344828e-06, |
| "loss": 0.4986, |
| "step": 27 |
| }, |
| { |
| "epoch": 0.04861638632664134, |
| "grad_norm": 0.634004932033382, |
| "learning_rate": 2.327586206896552e-06, |
| "loss": 0.465, |
| "step": 28 |
| }, |
| { |
| "epoch": 0.050352685838307105, |
| "grad_norm": 0.6554907050735821, |
| "learning_rate": 2.4137931034482762e-06, |
| "loss": 0.4693, |
| "step": 29 |
| }, |
| { |
| "epoch": 0.05208898534997287, |
| "grad_norm": 0.9610831996283411, |
| "learning_rate": 2.5e-06, |
| "loss": 0.4788, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.05382528486163863, |
| "grad_norm": 0.9636064060253327, |
| "learning_rate": 2.5862068965517246e-06, |
| "loss": 0.4563, |
| "step": 31 |
| }, |
| { |
| "epoch": 0.0555615843733044, |
| "grad_norm": 0.9591103406834719, |
| "learning_rate": 2.672413793103448e-06, |
| "loss": 0.4516, |
| "step": 32 |
| }, |
| { |
| "epoch": 0.05729788388497016, |
| "grad_norm": 1.0323468312914004, |
| "learning_rate": 2.7586206896551725e-06, |
| "loss": 0.4584, |
| "step": 33 |
| }, |
| { |
| "epoch": 0.05903418339663592, |
| "grad_norm": 0.858020140191911, |
| "learning_rate": 2.844827586206897e-06, |
| "loss": 0.438, |
| "step": 34 |
| }, |
| { |
| "epoch": 0.060770482908301685, |
| "grad_norm": 0.8960956717385824, |
| "learning_rate": 2.931034482758621e-06, |
| "loss": 0.4744, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.06250678241996745, |
| "grad_norm": 0.7458992924003728, |
| "learning_rate": 3.017241379310345e-06, |
| "loss": 0.4567, |
| "step": 36 |
| }, |
| { |
| "epoch": 0.06424308193163321, |
| "grad_norm": 0.8185303607292461, |
| "learning_rate": 3.103448275862069e-06, |
| "loss": 0.4872, |
| "step": 37 |
| }, |
| { |
| "epoch": 0.06597938144329897, |
| "grad_norm": 0.5822441053966084, |
| "learning_rate": 3.1896551724137935e-06, |
| "loss": 0.459, |
| "step": 38 |
| }, |
| { |
| "epoch": 0.06771568095496473, |
| "grad_norm": 0.5481383087123407, |
| "learning_rate": 3.2758620689655175e-06, |
| "loss": 0.4623, |
| "step": 39 |
| }, |
| { |
| "epoch": 0.0694519804666305, |
| "grad_norm": 0.41776886298603355, |
| "learning_rate": 3.362068965517242e-06, |
| "loss": 0.4543, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.07118827997829626, |
| "grad_norm": 0.2938251335852544, |
| "learning_rate": 3.448275862068966e-06, |
| "loss": 0.4532, |
| "step": 41 |
| }, |
| { |
| "epoch": 0.07292457948996202, |
| "grad_norm": 0.2737664486009171, |
| "learning_rate": 3.5344827586206898e-06, |
| "loss": 0.4314, |
| "step": 42 |
| }, |
| { |
| "epoch": 0.07466087900162778, |
| "grad_norm": 0.3109574122181629, |
| "learning_rate": 3.620689655172414e-06, |
| "loss": 0.4327, |
| "step": 43 |
| }, |
| { |
| "epoch": 0.07639717851329354, |
| "grad_norm": 0.3666431324176173, |
| "learning_rate": 3.7068965517241385e-06, |
| "loss": 0.4549, |
| "step": 44 |
| }, |
| { |
| "epoch": 0.0781334780249593, |
| "grad_norm": 0.4053783490688968, |
| "learning_rate": 3.793103448275862e-06, |
| "loss": 0.4563, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.07986977753662507, |
| "grad_norm": 0.4155281249495721, |
| "learning_rate": 3.8793103448275865e-06, |
| "loss": 0.4543, |
| "step": 46 |
| }, |
| { |
| "epoch": 0.08160607704829083, |
| "grad_norm": 0.4453663795611955, |
| "learning_rate": 3.96551724137931e-06, |
| "loss": 0.4464, |
| "step": 47 |
| }, |
| { |
| "epoch": 0.08334237655995659, |
| "grad_norm": 0.39170643684961, |
| "learning_rate": 4.051724137931034e-06, |
| "loss": 0.441, |
| "step": 48 |
| }, |
| { |
| "epoch": 0.08507867607162235, |
| "grad_norm": 0.29498910001689194, |
| "learning_rate": 4.137931034482759e-06, |
| "loss": 0.4247, |
| "step": 49 |
| }, |
| { |
| "epoch": 0.08681497558328811, |
| "grad_norm": 0.3353170488362428, |
| "learning_rate": 4.224137931034483e-06, |
| "loss": 0.4411, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.08855127509495388, |
| "grad_norm": 0.26783380276886826, |
| "learning_rate": 4.310344827586207e-06, |
| "loss": 0.4546, |
| "step": 51 |
| }, |
| { |
| "epoch": 0.09028757460661964, |
| "grad_norm": 0.28388985863752725, |
| "learning_rate": 4.396551724137931e-06, |
| "loss": 0.4597, |
| "step": 52 |
| }, |
| { |
| "epoch": 0.0920238741182854, |
| "grad_norm": 0.20735134006190523, |
| "learning_rate": 4.482758620689656e-06, |
| "loss": 0.4444, |
| "step": 53 |
| }, |
| { |
| "epoch": 0.09376017362995116, |
| "grad_norm": 0.21901348164598217, |
| "learning_rate": 4.56896551724138e-06, |
| "loss": 0.4752, |
| "step": 54 |
| }, |
| { |
| "epoch": 0.09549647314161692, |
| "grad_norm": 0.25595144274452897, |
| "learning_rate": 4.655172413793104e-06, |
| "loss": 0.4339, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.09723277265328269, |
| "grad_norm": 0.19446449161885673, |
| "learning_rate": 4.741379310344828e-06, |
| "loss": 0.4312, |
| "step": 56 |
| }, |
| { |
| "epoch": 0.09896907216494845, |
| "grad_norm": 0.22806613782923416, |
| "learning_rate": 4.8275862068965525e-06, |
| "loss": 0.4544, |
| "step": 57 |
| }, |
| { |
| "epoch": 0.10070537167661421, |
| "grad_norm": 0.18835606666142574, |
| "learning_rate": 4.9137931034482765e-06, |
| "loss": 0.4319, |
| "step": 58 |
| }, |
| { |
| "epoch": 0.10244167118827997, |
| "grad_norm": 0.24977581681440297, |
| "learning_rate": 5e-06, |
| "loss": 0.4416, |
| "step": 59 |
| }, |
| { |
| "epoch": 0.10417797069994574, |
| "grad_norm": 0.19750282506802236, |
| "learning_rate": 5.086206896551724e-06, |
| "loss": 0.4257, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.1059142702116115, |
| "grad_norm": 0.19110900242177456, |
| "learning_rate": 5.172413793103449e-06, |
| "loss": 0.4177, |
| "step": 61 |
| }, |
| { |
| "epoch": 0.10765056972327726, |
| "grad_norm": 0.17213185107606405, |
| "learning_rate": 5.258620689655173e-06, |
| "loss": 0.4327, |
| "step": 62 |
| }, |
| { |
| "epoch": 0.10938686923494302, |
| "grad_norm": 0.15630107177842684, |
| "learning_rate": 5.344827586206896e-06, |
| "loss": 0.415, |
| "step": 63 |
| }, |
| { |
| "epoch": 0.1111231687466088, |
| "grad_norm": 0.17029523143593536, |
| "learning_rate": 5.431034482758621e-06, |
| "loss": 0.4453, |
| "step": 64 |
| }, |
| { |
| "epoch": 0.11285946825827456, |
| "grad_norm": 0.19659475939270915, |
| "learning_rate": 5.517241379310345e-06, |
| "loss": 0.4584, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.11459576776994032, |
| "grad_norm": 0.16860964327696285, |
| "learning_rate": 5.603448275862069e-06, |
| "loss": 0.4493, |
| "step": 66 |
| }, |
| { |
| "epoch": 0.11633206728160608, |
| "grad_norm": 0.22957081775328345, |
| "learning_rate": 5.689655172413794e-06, |
| "loss": 0.456, |
| "step": 67 |
| }, |
| { |
| "epoch": 0.11806836679327185, |
| "grad_norm": 0.22307260970140155, |
| "learning_rate": 5.775862068965518e-06, |
| "loss": 0.4542, |
| "step": 68 |
| }, |
| { |
| "epoch": 0.11980466630493761, |
| "grad_norm": 0.1761287403232541, |
| "learning_rate": 5.862068965517242e-06, |
| "loss": 0.4357, |
| "step": 69 |
| }, |
| { |
| "epoch": 0.12154096581660337, |
| "grad_norm": 0.17806731622716251, |
| "learning_rate": 5.9482758620689665e-06, |
| "loss": 0.441, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.12327726532826913, |
| "grad_norm": 0.16595870061584297, |
| "learning_rate": 6.03448275862069e-06, |
| "loss": 0.4478, |
| "step": 71 |
| }, |
| { |
| "epoch": 0.1250135648399349, |
| "grad_norm": 0.17683622113941452, |
| "learning_rate": 6.1206896551724135e-06, |
| "loss": 0.4439, |
| "step": 72 |
| }, |
| { |
| "epoch": 0.12674986435160066, |
| "grad_norm": 0.1728852455905528, |
| "learning_rate": 6.206896551724138e-06, |
| "loss": 0.468, |
| "step": 73 |
| }, |
| { |
| "epoch": 0.12848616386326642, |
| "grad_norm": 0.17067325519254972, |
| "learning_rate": 6.293103448275862e-06, |
| "loss": 0.44, |
| "step": 74 |
| }, |
| { |
| "epoch": 0.13022246337493218, |
| "grad_norm": 0.17049371114039943, |
| "learning_rate": 6.379310344827587e-06, |
| "loss": 0.4301, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.13195876288659794, |
| "grad_norm": 0.16183277888470998, |
| "learning_rate": 6.465517241379311e-06, |
| "loss": 0.4438, |
| "step": 76 |
| }, |
| { |
| "epoch": 0.1336950623982637, |
| "grad_norm": 0.1849741022931392, |
| "learning_rate": 6.551724137931035e-06, |
| "loss": 0.436, |
| "step": 77 |
| }, |
| { |
| "epoch": 0.13543136190992947, |
| "grad_norm": 0.15302845702506, |
| "learning_rate": 6.63793103448276e-06, |
| "loss": 0.4522, |
| "step": 78 |
| }, |
| { |
| "epoch": 0.13716766142159523, |
| "grad_norm": 0.1779999440937052, |
| "learning_rate": 6.724137931034484e-06, |
| "loss": 0.4469, |
| "step": 79 |
| }, |
| { |
| "epoch": 0.138903960933261, |
| "grad_norm": 0.14785801477595045, |
| "learning_rate": 6.810344827586207e-06, |
| "loss": 0.4339, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.14064026044492675, |
| "grad_norm": 0.14779342419552097, |
| "learning_rate": 6.896551724137932e-06, |
| "loss": 0.445, |
| "step": 81 |
| }, |
| { |
| "epoch": 0.1423765599565925, |
| "grad_norm": 0.17525194430534158, |
| "learning_rate": 6.982758620689656e-06, |
| "loss": 0.4426, |
| "step": 82 |
| }, |
| { |
| "epoch": 0.14411285946825828, |
| "grad_norm": 0.16728639548623878, |
| "learning_rate": 7.0689655172413796e-06, |
| "loss": 0.4301, |
| "step": 83 |
| }, |
| { |
| "epoch": 0.14584915897992404, |
| "grad_norm": 0.14448921047681842, |
| "learning_rate": 7.155172413793104e-06, |
| "loss": 0.4398, |
| "step": 84 |
| }, |
| { |
| "epoch": 0.1475854584915898, |
| "grad_norm": 0.17217160852240143, |
| "learning_rate": 7.241379310344828e-06, |
| "loss": 0.4452, |
| "step": 85 |
| }, |
| { |
| "epoch": 0.14932175800325556, |
| "grad_norm": 0.15054846492214635, |
| "learning_rate": 7.327586206896552e-06, |
| "loss": 0.4402, |
| "step": 86 |
| }, |
| { |
| "epoch": 0.15105805751492132, |
| "grad_norm": 0.18058844867837248, |
| "learning_rate": 7.413793103448277e-06, |
| "loss": 0.4249, |
| "step": 87 |
| }, |
| { |
| "epoch": 0.15279435702658709, |
| "grad_norm": 0.16522103718686243, |
| "learning_rate": 7.500000000000001e-06, |
| "loss": 0.439, |
| "step": 88 |
| }, |
| { |
| "epoch": 0.15453065653825285, |
| "grad_norm": 0.17085898069141625, |
| "learning_rate": 7.586206896551724e-06, |
| "loss": 0.4615, |
| "step": 89 |
| }, |
| { |
| "epoch": 0.1562669560499186, |
| "grad_norm": 0.1352941909359962, |
| "learning_rate": 7.672413793103449e-06, |
| "loss": 0.4366, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.15800325556158437, |
| "grad_norm": 0.14834678829341033, |
| "learning_rate": 7.758620689655173e-06, |
| "loss": 0.4212, |
| "step": 91 |
| }, |
| { |
| "epoch": 0.15973955507325013, |
| "grad_norm": 0.1520079978715368, |
| "learning_rate": 7.844827586206897e-06, |
| "loss": 0.4365, |
| "step": 92 |
| }, |
| { |
| "epoch": 0.1614758545849159, |
| "grad_norm": 0.1371331686782148, |
| "learning_rate": 7.93103448275862e-06, |
| "loss": 0.4216, |
| "step": 93 |
| }, |
| { |
| "epoch": 0.16321215409658166, |
| "grad_norm": 0.13976693670870327, |
| "learning_rate": 8.017241379310345e-06, |
| "loss": 0.4492, |
| "step": 94 |
| }, |
| { |
| "epoch": 0.16494845360824742, |
| "grad_norm": 0.15160516329676715, |
| "learning_rate": 8.103448275862069e-06, |
| "loss": 0.4418, |
| "step": 95 |
| }, |
| { |
| "epoch": 0.16668475311991318, |
| "grad_norm": 0.17142907121339737, |
| "learning_rate": 8.189655172413794e-06, |
| "loss": 0.4383, |
| "step": 96 |
| }, |
| { |
| "epoch": 0.16842105263157894, |
| "grad_norm": 0.16648106630358045, |
| "learning_rate": 8.275862068965518e-06, |
| "loss": 0.4374, |
| "step": 97 |
| }, |
| { |
| "epoch": 0.1701573521432447, |
| "grad_norm": 0.16111838676750498, |
| "learning_rate": 8.362068965517242e-06, |
| "loss": 0.4568, |
| "step": 98 |
| }, |
| { |
| "epoch": 0.17189365165491047, |
| "grad_norm": 0.14581097572550494, |
| "learning_rate": 8.448275862068966e-06, |
| "loss": 0.4255, |
| "step": 99 |
| }, |
| { |
| "epoch": 0.17362995116657623, |
| "grad_norm": 0.1378644644499854, |
| "learning_rate": 8.53448275862069e-06, |
| "loss": 0.4235, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.175366250678242, |
| "grad_norm": 0.15888598262640966, |
| "learning_rate": 8.620689655172414e-06, |
| "loss": 0.4347, |
| "step": 101 |
| }, |
| { |
| "epoch": 0.17710255018990775, |
| "grad_norm": 0.14832980896853906, |
| "learning_rate": 8.706896551724138e-06, |
| "loss": 0.4256, |
| "step": 102 |
| }, |
| { |
| "epoch": 0.17883884970157352, |
| "grad_norm": 0.16331329070890865, |
| "learning_rate": 8.793103448275862e-06, |
| "loss": 0.4146, |
| "step": 103 |
| }, |
| { |
| "epoch": 0.18057514921323928, |
| "grad_norm": 0.16254414896072908, |
| "learning_rate": 8.879310344827588e-06, |
| "loss": 0.4538, |
| "step": 104 |
| }, |
| { |
| "epoch": 0.18231144872490504, |
| "grad_norm": 0.20628714572870552, |
| "learning_rate": 8.965517241379312e-06, |
| "loss": 0.4545, |
| "step": 105 |
| }, |
| { |
| "epoch": 0.1840477482365708, |
| "grad_norm": 0.14672329302866716, |
| "learning_rate": 9.051724137931036e-06, |
| "loss": 0.4409, |
| "step": 106 |
| }, |
| { |
| "epoch": 0.18578404774823656, |
| "grad_norm": 0.17704727106288587, |
| "learning_rate": 9.13793103448276e-06, |
| "loss": 0.4569, |
| "step": 107 |
| }, |
| { |
| "epoch": 0.18752034725990233, |
| "grad_norm": 0.1671108515367416, |
| "learning_rate": 9.224137931034484e-06, |
| "loss": 0.4419, |
| "step": 108 |
| }, |
| { |
| "epoch": 0.1892566467715681, |
| "grad_norm": 0.1401024359628494, |
| "learning_rate": 9.310344827586207e-06, |
| "loss": 0.4399, |
| "step": 109 |
| }, |
| { |
| "epoch": 0.19099294628323385, |
| "grad_norm": 0.17514966479813296, |
| "learning_rate": 9.396551724137931e-06, |
| "loss": 0.4323, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.1927292457948996, |
| "grad_norm": 0.1813009132122757, |
| "learning_rate": 9.482758620689655e-06, |
| "loss": 0.4465, |
| "step": 111 |
| }, |
| { |
| "epoch": 0.19446554530656537, |
| "grad_norm": 0.14595421920726084, |
| "learning_rate": 9.56896551724138e-06, |
| "loss": 0.4378, |
| "step": 112 |
| }, |
| { |
| "epoch": 0.19620184481823114, |
| "grad_norm": 0.15465278812299593, |
| "learning_rate": 9.655172413793105e-06, |
| "loss": 0.4381, |
| "step": 113 |
| }, |
| { |
| "epoch": 0.1979381443298969, |
| "grad_norm": 0.15186254208439187, |
| "learning_rate": 9.741379310344829e-06, |
| "loss": 0.4231, |
| "step": 114 |
| }, |
| { |
| "epoch": 0.19967444384156266, |
| "grad_norm": 0.15058276592497516, |
| "learning_rate": 9.827586206896553e-06, |
| "loss": 0.431, |
| "step": 115 |
| }, |
| { |
| "epoch": 0.20141074335322842, |
| "grad_norm": 0.16119079928649604, |
| "learning_rate": 9.913793103448277e-06, |
| "loss": 0.452, |
| "step": 116 |
| }, |
| { |
| "epoch": 0.20314704286489418, |
| "grad_norm": 0.14095989647979248, |
| "learning_rate": 1e-05, |
| "loss": 0.4182, |
| "step": 117 |
| }, |
| { |
| "epoch": 0.20488334237655995, |
| "grad_norm": 0.1746147114354537, |
| "learning_rate": 9.999977011008992e-06, |
| "loss": 0.4404, |
| "step": 118 |
| }, |
| { |
| "epoch": 0.2066196418882257, |
| "grad_norm": 0.17509978960441983, |
| "learning_rate": 9.999908044247359e-06, |
| "loss": 0.4376, |
| "step": 119 |
| }, |
| { |
| "epoch": 0.20835594139989147, |
| "grad_norm": 0.15056060709141128, |
| "learning_rate": 9.999793100349294e-06, |
| "loss": 0.4058, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.21009224091155723, |
| "grad_norm": 0.16195757110570655, |
| "learning_rate": 9.999632180371776e-06, |
| "loss": 0.4461, |
| "step": 121 |
| }, |
| { |
| "epoch": 0.211828540423223, |
| "grad_norm": 0.13882873497505904, |
| "learning_rate": 9.999425285794557e-06, |
| "loss": 0.4405, |
| "step": 122 |
| }, |
| { |
| "epoch": 0.21356483993488876, |
| "grad_norm": 0.16965441460666414, |
| "learning_rate": 9.999172418520159e-06, |
| "loss": 0.4322, |
| "step": 123 |
| }, |
| { |
| "epoch": 0.21530113944655452, |
| "grad_norm": 0.16599733161228727, |
| "learning_rate": 9.998873580873848e-06, |
| "loss": 0.4375, |
| "step": 124 |
| }, |
| { |
| "epoch": 0.21703743895822028, |
| "grad_norm": 0.17776524728753473, |
| "learning_rate": 9.998528775603612e-06, |
| "loss": 0.4404, |
| "step": 125 |
| }, |
| { |
| "epoch": 0.21877373846988604, |
| "grad_norm": 0.14928214841596088, |
| "learning_rate": 9.99813800588014e-06, |
| "loss": 0.4028, |
| "step": 126 |
| }, |
| { |
| "epoch": 0.2205100379815518, |
| "grad_norm": 0.19711135086768242, |
| "learning_rate": 9.997701275296796e-06, |
| "loss": 0.4457, |
| "step": 127 |
| }, |
| { |
| "epoch": 0.2222463374932176, |
| "grad_norm": 0.15782967723076521, |
| "learning_rate": 9.997218587869577e-06, |
| "loss": 0.444, |
| "step": 128 |
| }, |
| { |
| "epoch": 0.22398263700488336, |
| "grad_norm": 0.16659222906938462, |
| "learning_rate": 9.996689948037081e-06, |
| "loss": 0.4371, |
| "step": 129 |
| }, |
| { |
| "epoch": 0.22571893651654912, |
| "grad_norm": 0.1499268597695575, |
| "learning_rate": 9.996115360660466e-06, |
| "loss": 0.4393, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.22745523602821488, |
| "grad_norm": 0.1646819022624862, |
| "learning_rate": 9.99549483102341e-06, |
| "loss": 0.4539, |
| "step": 131 |
| }, |
| { |
| "epoch": 0.22919153553988064, |
| "grad_norm": 0.16685025404219458, |
| "learning_rate": 9.994828364832045e-06, |
| "loss": 0.4081, |
| "step": 132 |
| }, |
| { |
| "epoch": 0.2309278350515464, |
| "grad_norm": 0.15466930085043173, |
| "learning_rate": 9.994115968214933e-06, |
| "loss": 0.4378, |
| "step": 133 |
| }, |
| { |
| "epoch": 0.23266413456321217, |
| "grad_norm": 0.15508073913810053, |
| "learning_rate": 9.993357647722982e-06, |
| "loss": 0.439, |
| "step": 134 |
| }, |
| { |
| "epoch": 0.23440043407487793, |
| "grad_norm": 0.1539119346782389, |
| "learning_rate": 9.9925534103294e-06, |
| "loss": 0.4265, |
| "step": 135 |
| }, |
| { |
| "epoch": 0.2361367335865437, |
| "grad_norm": 0.14426900874434997, |
| "learning_rate": 9.991703263429633e-06, |
| "loss": 0.4244, |
| "step": 136 |
| }, |
| { |
| "epoch": 0.23787303309820945, |
| "grad_norm": 0.16870400577528086, |
| "learning_rate": 9.990807214841288e-06, |
| "loss": 0.419, |
| "step": 137 |
| }, |
| { |
| "epoch": 0.23960933260987521, |
| "grad_norm": 0.20462475693369803, |
| "learning_rate": 9.989865272804064e-06, |
| "loss": 0.4246, |
| "step": 138 |
| }, |
| { |
| "epoch": 0.24134563212154098, |
| "grad_norm": 0.1451712253771932, |
| "learning_rate": 9.988877445979681e-06, |
| "loss": 0.4381, |
| "step": 139 |
| }, |
| { |
| "epoch": 0.24308193163320674, |
| "grad_norm": 0.15891534718210473, |
| "learning_rate": 9.987843743451796e-06, |
| "loss": 0.4235, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.2448182311448725, |
| "grad_norm": 0.17815822351007055, |
| "learning_rate": 9.98676417472592e-06, |
| "loss": 0.4425, |
| "step": 141 |
| }, |
| { |
| "epoch": 0.24655453065653826, |
| "grad_norm": 0.1654797111641331, |
| "learning_rate": 9.985638749729331e-06, |
| "loss": 0.4323, |
| "step": 142 |
| }, |
| { |
| "epoch": 0.24829083016820402, |
| "grad_norm": 0.15859047009407842, |
| "learning_rate": 9.984467478810985e-06, |
| "loss": 0.4441, |
| "step": 143 |
| }, |
| { |
| "epoch": 0.2500271296798698, |
| "grad_norm": 0.16268622271549055, |
| "learning_rate": 9.983250372741412e-06, |
| "loss": 0.4458, |
| "step": 144 |
| }, |
| { |
| "epoch": 0.25176342919153555, |
| "grad_norm": 0.14671156779493466, |
| "learning_rate": 9.981987442712634e-06, |
| "loss": 0.4299, |
| "step": 145 |
| }, |
| { |
| "epoch": 0.2534997287032013, |
| "grad_norm": 0.13279882931945028, |
| "learning_rate": 9.980678700338043e-06, |
| "loss": 0.4234, |
| "step": 146 |
| }, |
| { |
| "epoch": 0.2552360282148671, |
| "grad_norm": 0.13969849126911693, |
| "learning_rate": 9.979324157652303e-06, |
| "loss": 0.412, |
| "step": 147 |
| }, |
| { |
| "epoch": 0.25697232772653283, |
| "grad_norm": 0.1488892574422794, |
| "learning_rate": 9.977923827111247e-06, |
| "loss": 0.4238, |
| "step": 148 |
| }, |
| { |
| "epoch": 0.2587086272381986, |
| "grad_norm": 0.16686316447713223, |
| "learning_rate": 9.976477721591746e-06, |
| "loss": 0.4629, |
| "step": 149 |
| }, |
| { |
| "epoch": 0.26044492674986436, |
| "grad_norm": 0.16230014013728034, |
| "learning_rate": 9.974985854391606e-06, |
| "loss": 0.4411, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.2621812262615301, |
| "grad_norm": 0.16069337866094202, |
| "learning_rate": 9.973448239229431e-06, |
| "loss": 0.4239, |
| "step": 151 |
| }, |
| { |
| "epoch": 0.2639175257731959, |
| "grad_norm": 0.1506322062881885, |
| "learning_rate": 9.971864890244514e-06, |
| "loss": 0.4517, |
| "step": 152 |
| }, |
| { |
| "epoch": 0.26565382528486164, |
| "grad_norm": 0.171740710512301, |
| "learning_rate": 9.97023582199669e-06, |
| "loss": 0.4374, |
| "step": 153 |
| }, |
| { |
| "epoch": 0.2673901247965274, |
| "grad_norm": 0.1419738588320865, |
| "learning_rate": 9.968561049466214e-06, |
| "loss": 0.4199, |
| "step": 154 |
| }, |
| { |
| "epoch": 0.26912642430819317, |
| "grad_norm": 0.1606660762569778, |
| "learning_rate": 9.966840588053618e-06, |
| "loss": 0.4372, |
| "step": 155 |
| }, |
| { |
| "epoch": 0.27086272381985893, |
| "grad_norm": 0.14846716499090143, |
| "learning_rate": 9.965074453579573e-06, |
| "loss": 0.4454, |
| "step": 156 |
| }, |
| { |
| "epoch": 0.2725990233315247, |
| "grad_norm": 0.1655209351236852, |
| "learning_rate": 9.963262662284735e-06, |
| "loss": 0.4467, |
| "step": 157 |
| }, |
| { |
| "epoch": 0.27433532284319045, |
| "grad_norm": 0.17149849857801977, |
| "learning_rate": 9.96140523082961e-06, |
| "loss": 0.4325, |
| "step": 158 |
| }, |
| { |
| "epoch": 0.2760716223548562, |
| "grad_norm": 0.1410596039887854, |
| "learning_rate": 9.959502176294384e-06, |
| "loss": 0.4337, |
| "step": 159 |
| }, |
| { |
| "epoch": 0.277807921866522, |
| "grad_norm": 0.1512531292193184, |
| "learning_rate": 9.957553516178782e-06, |
| "loss": 0.4278, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.27954422137818774, |
| "grad_norm": 0.14446839709527778, |
| "learning_rate": 9.955559268401893e-06, |
| "loss": 0.4262, |
| "step": 161 |
| }, |
| { |
| "epoch": 0.2812805208898535, |
| "grad_norm": 0.14669453705746469, |
| "learning_rate": 9.953519451302016e-06, |
| "loss": 0.4271, |
| "step": 162 |
| }, |
| { |
| "epoch": 0.28301682040151926, |
| "grad_norm": 0.16500067161261647, |
| "learning_rate": 9.951434083636484e-06, |
| "loss": 0.4353, |
| "step": 163 |
| }, |
| { |
| "epoch": 0.284753119913185, |
| "grad_norm": 0.14619596732513318, |
| "learning_rate": 9.9493031845815e-06, |
| "loss": 0.4326, |
| "step": 164 |
| }, |
| { |
| "epoch": 0.2864894194248508, |
| "grad_norm": 0.16853726289967258, |
| "learning_rate": 9.947126773731949e-06, |
| "loss": 0.4407, |
| "step": 165 |
| }, |
| { |
| "epoch": 0.28822571893651655, |
| "grad_norm": 0.15112267673295302, |
| "learning_rate": 9.944904871101227e-06, |
| "loss": 0.4391, |
| "step": 166 |
| }, |
| { |
| "epoch": 0.2899620184481823, |
| "grad_norm": 0.1651599790331637, |
| "learning_rate": 9.942637497121055e-06, |
| "loss": 0.4448, |
| "step": 167 |
| }, |
| { |
| "epoch": 0.2916983179598481, |
| "grad_norm": 0.15953370847696652, |
| "learning_rate": 9.940324672641289e-06, |
| "loss": 0.4327, |
| "step": 168 |
| }, |
| { |
| "epoch": 0.29343461747151384, |
| "grad_norm": 0.19265474992006965, |
| "learning_rate": 9.937966418929725e-06, |
| "loss": 0.4491, |
| "step": 169 |
| }, |
| { |
| "epoch": 0.2951709169831796, |
| "grad_norm": 0.16933292810141926, |
| "learning_rate": 9.93556275767192e-06, |
| "loss": 0.4286, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.29690721649484536, |
| "grad_norm": 0.15237594797568174, |
| "learning_rate": 9.933113710970967e-06, |
| "loss": 0.4283, |
| "step": 171 |
| }, |
| { |
| "epoch": 0.2986435160065111, |
| "grad_norm": 0.18265650880814457, |
| "learning_rate": 9.930619301347312e-06, |
| "loss": 0.4546, |
| "step": 172 |
| }, |
| { |
| "epoch": 0.3003798155181769, |
| "grad_norm": 0.18099070598137104, |
| "learning_rate": 9.928079551738542e-06, |
| "loss": 0.4092, |
| "step": 173 |
| }, |
| { |
| "epoch": 0.30211611502984265, |
| "grad_norm": 0.1815346079160872, |
| "learning_rate": 9.925494485499167e-06, |
| "loss": 0.4478, |
| "step": 174 |
| }, |
| { |
| "epoch": 0.3038524145415084, |
| "grad_norm": 0.1524482370964297, |
| "learning_rate": 9.922864126400414e-06, |
| "loss": 0.4545, |
| "step": 175 |
| }, |
| { |
| "epoch": 0.30558871405317417, |
| "grad_norm": 0.16293570594409254, |
| "learning_rate": 9.920188498630003e-06, |
| "loss": 0.4208, |
| "step": 176 |
| }, |
| { |
| "epoch": 0.30732501356483993, |
| "grad_norm": 0.1675570035616688, |
| "learning_rate": 9.917467626791925e-06, |
| "loss": 0.4626, |
| "step": 177 |
| }, |
| { |
| "epoch": 0.3090613130765057, |
| "grad_norm": 0.15825453091697014, |
| "learning_rate": 9.914701535906224e-06, |
| "loss": 0.4512, |
| "step": 178 |
| }, |
| { |
| "epoch": 0.31079761258817146, |
| "grad_norm": 0.15630202329564072, |
| "learning_rate": 9.91189025140875e-06, |
| "loss": 0.4343, |
| "step": 179 |
| }, |
| { |
| "epoch": 0.3125339120998372, |
| "grad_norm": 0.16078903742852474, |
| "learning_rate": 9.909033799150947e-06, |
| "loss": 0.4408, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.314270211611503, |
| "grad_norm": 0.16012556685600268, |
| "learning_rate": 9.90613220539959e-06, |
| "loss": 0.4139, |
| "step": 181 |
| }, |
| { |
| "epoch": 0.31600651112316874, |
| "grad_norm": 0.14729320163626394, |
| "learning_rate": 9.90318549683657e-06, |
| "loss": 0.4365, |
| "step": 182 |
| }, |
| { |
| "epoch": 0.3177428106348345, |
| "grad_norm": 0.1397968636888418, |
| "learning_rate": 9.900193700558626e-06, |
| "loss": 0.4247, |
| "step": 183 |
| }, |
| { |
| "epoch": 0.31947911014650027, |
| "grad_norm": 0.16626985308479, |
| "learning_rate": 9.897156844077111e-06, |
| "loss": 0.4288, |
| "step": 184 |
| }, |
| { |
| "epoch": 0.32121540965816603, |
| "grad_norm": 0.15854235536164976, |
| "learning_rate": 9.89407495531773e-06, |
| "loss": 0.4148, |
| "step": 185 |
| }, |
| { |
| "epoch": 0.3229517091698318, |
| "grad_norm": 0.1622205483358838, |
| "learning_rate": 9.890948062620289e-06, |
| "loss": 0.438, |
| "step": 186 |
| }, |
| { |
| "epoch": 0.32468800868149755, |
| "grad_norm": 0.16252071337796425, |
| "learning_rate": 9.887776194738433e-06, |
| "loss": 0.4501, |
| "step": 187 |
| }, |
| { |
| "epoch": 0.3264243081931633, |
| "grad_norm": 0.1710876752747644, |
| "learning_rate": 9.884559380839374e-06, |
| "loss": 0.4283, |
| "step": 188 |
| }, |
| { |
| "epoch": 0.3281606077048291, |
| "grad_norm": 0.18377407394807166, |
| "learning_rate": 9.881297650503641e-06, |
| "loss": 0.4413, |
| "step": 189 |
| }, |
| { |
| "epoch": 0.32989690721649484, |
| "grad_norm": 0.140886519345969, |
| "learning_rate": 9.877991033724782e-06, |
| "loss": 0.4323, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.3316332067281606, |
| "grad_norm": 0.14923852434950355, |
| "learning_rate": 9.874639560909118e-06, |
| "loss": 0.4422, |
| "step": 191 |
| }, |
| { |
| "epoch": 0.33336950623982636, |
| "grad_norm": 0.14200004213993497, |
| "learning_rate": 9.871243262875437e-06, |
| "loss": 0.4258, |
| "step": 192 |
| }, |
| { |
| "epoch": 0.3351058057514921, |
| "grad_norm": 0.1589219977289536, |
| "learning_rate": 9.867802170854724e-06, |
| "loss": 0.435, |
| "step": 193 |
| }, |
| { |
| "epoch": 0.3368421052631579, |
| "grad_norm": 0.1590427030427661, |
| "learning_rate": 9.864316316489873e-06, |
| "loss": 0.4327, |
| "step": 194 |
| }, |
| { |
| "epoch": 0.33857840477482365, |
| "grad_norm": 0.17338035825567785, |
| "learning_rate": 9.860785731835397e-06, |
| "loss": 0.4489, |
| "step": 195 |
| }, |
| { |
| "epoch": 0.3403147042864894, |
| "grad_norm": 0.1499845982176126, |
| "learning_rate": 9.857210449357121e-06, |
| "loss": 0.4333, |
| "step": 196 |
| }, |
| { |
| "epoch": 0.3420510037981552, |
| "grad_norm": 0.15231120130052084, |
| "learning_rate": 9.853590501931905e-06, |
| "loss": 0.4502, |
| "step": 197 |
| }, |
| { |
| "epoch": 0.34378730330982094, |
| "grad_norm": 0.18719106483657583, |
| "learning_rate": 9.849925922847323e-06, |
| "loss": 0.4525, |
| "step": 198 |
| }, |
| { |
| "epoch": 0.3455236028214867, |
| "grad_norm": 0.17712759736915118, |
| "learning_rate": 9.846216745801366e-06, |
| "loss": 0.4441, |
| "step": 199 |
| }, |
| { |
| "epoch": 0.34725990233315246, |
| "grad_norm": 0.14227690714934854, |
| "learning_rate": 9.842463004902127e-06, |
| "loss": 0.4278, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.3489962018448182, |
| "grad_norm": 0.1420001146482775, |
| "learning_rate": 9.838664734667496e-06, |
| "loss": 0.4311, |
| "step": 201 |
| }, |
| { |
| "epoch": 0.350732501356484, |
| "grad_norm": 0.13874177478200764, |
| "learning_rate": 9.834821970024828e-06, |
| "loss": 0.4232, |
| "step": 202 |
| }, |
| { |
| "epoch": 0.35246880086814975, |
| "grad_norm": 0.16110035201959844, |
| "learning_rate": 9.83093474631064e-06, |
| "loss": 0.4178, |
| "step": 203 |
| }, |
| { |
| "epoch": 0.3542051003798155, |
| "grad_norm": 0.1477258063517038, |
| "learning_rate": 9.827003099270272e-06, |
| "loss": 0.4333, |
| "step": 204 |
| }, |
| { |
| "epoch": 0.35594139989148127, |
| "grad_norm": 0.14916525553791554, |
| "learning_rate": 9.82302706505756e-06, |
| "loss": 0.4336, |
| "step": 205 |
| }, |
| { |
| "epoch": 0.35767769940314703, |
| "grad_norm": 0.1573789515947177, |
| "learning_rate": 9.819006680234513e-06, |
| "loss": 0.44, |
| "step": 206 |
| }, |
| { |
| "epoch": 0.3594139989148128, |
| "grad_norm": 0.1814058728192214, |
| "learning_rate": 9.814941981770966e-06, |
| "loss": 0.4364, |
| "step": 207 |
| }, |
| { |
| "epoch": 0.36115029842647856, |
| "grad_norm": 0.14326603797247606, |
| "learning_rate": 9.810833007044247e-06, |
| "loss": 0.4206, |
| "step": 208 |
| }, |
| { |
| "epoch": 0.3628865979381443, |
| "grad_norm": 0.14817920662769005, |
| "learning_rate": 9.806679793838829e-06, |
| "loss": 0.4407, |
| "step": 209 |
| }, |
| { |
| "epoch": 0.3646228974498101, |
| "grad_norm": 0.1585265768266505, |
| "learning_rate": 9.802482380345983e-06, |
| "loss": 0.4752, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.36635919696147584, |
| "grad_norm": 0.16492885340552949, |
| "learning_rate": 9.79824080516343e-06, |
| "loss": 0.4321, |
| "step": 211 |
| }, |
| { |
| "epoch": 0.3680954964731416, |
| "grad_norm": 0.15885428732313536, |
| "learning_rate": 9.793955107294983e-06, |
| "loss": 0.4182, |
| "step": 212 |
| }, |
| { |
| "epoch": 0.36983179598480737, |
| "grad_norm": 0.1851374561328918, |
| "learning_rate": 9.78962532615019e-06, |
| "loss": 0.4359, |
| "step": 213 |
| }, |
| { |
| "epoch": 0.3715680954964731, |
| "grad_norm": 0.16410510241800808, |
| "learning_rate": 9.785251501543973e-06, |
| "loss": 0.4525, |
| "step": 214 |
| }, |
| { |
| "epoch": 0.3733043950081389, |
| "grad_norm": 0.15343002176408385, |
| "learning_rate": 9.780833673696255e-06, |
| "loss": 0.4064, |
| "step": 215 |
| }, |
| { |
| "epoch": 0.37504069451980465, |
| "grad_norm": 0.1830058368195266, |
| "learning_rate": 9.7763718832316e-06, |
| "loss": 0.4068, |
| "step": 216 |
| }, |
| { |
| "epoch": 0.3767769940314704, |
| "grad_norm": 0.15700071691973735, |
| "learning_rate": 9.771866171178832e-06, |
| "loss": 0.4218, |
| "step": 217 |
| }, |
| { |
| "epoch": 0.3785132935431362, |
| "grad_norm": 0.12876055954547613, |
| "learning_rate": 9.767316578970658e-06, |
| "loss": 0.4211, |
| "step": 218 |
| }, |
| { |
| "epoch": 0.38024959305480194, |
| "grad_norm": 0.15442537490509584, |
| "learning_rate": 9.762723148443297e-06, |
| "loss": 0.435, |
| "step": 219 |
| }, |
| { |
| "epoch": 0.3819858925664677, |
| "grad_norm": 0.15580345792713624, |
| "learning_rate": 9.758085921836076e-06, |
| "loss": 0.4369, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.38372219207813346, |
| "grad_norm": 0.1361428319295974, |
| "learning_rate": 9.753404941791063e-06, |
| "loss": 0.4313, |
| "step": 221 |
| }, |
| { |
| "epoch": 0.3854584915897992, |
| "grad_norm": 0.14653318408406776, |
| "learning_rate": 9.74868025135266e-06, |
| "loss": 0.4533, |
| "step": 222 |
| }, |
| { |
| "epoch": 0.387194791101465, |
| "grad_norm": 0.1565754671263572, |
| "learning_rate": 9.743911893967216e-06, |
| "loss": 0.4322, |
| "step": 223 |
| }, |
| { |
| "epoch": 0.38893109061313075, |
| "grad_norm": 0.16399056677484686, |
| "learning_rate": 9.739099913482616e-06, |
| "loss": 0.4466, |
| "step": 224 |
| }, |
| { |
| "epoch": 0.3906673901247965, |
| "grad_norm": 0.14157252484905397, |
| "learning_rate": 9.734244354147897e-06, |
| "loss": 0.4295, |
| "step": 225 |
| }, |
| { |
| "epoch": 0.39240368963646227, |
| "grad_norm": 0.1603171745561666, |
| "learning_rate": 9.729345260612817e-06, |
| "loss": 0.4372, |
| "step": 226 |
| }, |
| { |
| "epoch": 0.39413998914812803, |
| "grad_norm": 0.14321084701941422, |
| "learning_rate": 9.724402677927466e-06, |
| "loss": 0.4325, |
| "step": 227 |
| }, |
| { |
| "epoch": 0.3958762886597938, |
| "grad_norm": 0.1473692730338607, |
| "learning_rate": 9.719416651541839e-06, |
| "loss": 0.4309, |
| "step": 228 |
| }, |
| { |
| "epoch": 0.39761258817145956, |
| "grad_norm": 0.14956009373534798, |
| "learning_rate": 9.714387227305422e-06, |
| "loss": 0.4261, |
| "step": 229 |
| }, |
| { |
| "epoch": 0.3993488876831253, |
| "grad_norm": 0.14848206159225216, |
| "learning_rate": 9.70931445146677e-06, |
| "loss": 0.4397, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.4010851871947911, |
| "grad_norm": 0.14241520833339297, |
| "learning_rate": 9.704198370673084e-06, |
| "loss": 0.4245, |
| "step": 231 |
| }, |
| { |
| "epoch": 0.40282148670645684, |
| "grad_norm": 0.14596741528366003, |
| "learning_rate": 9.699039031969776e-06, |
| "loss": 0.4396, |
| "step": 232 |
| }, |
| { |
| "epoch": 0.4045577862181226, |
| "grad_norm": 0.17062314124311329, |
| "learning_rate": 9.693836482800044e-06, |
| "loss": 0.4359, |
| "step": 233 |
| }, |
| { |
| "epoch": 0.40629408572978837, |
| "grad_norm": 0.1345681574448004, |
| "learning_rate": 9.68859077100443e-06, |
| "loss": 0.4019, |
| "step": 234 |
| }, |
| { |
| "epoch": 0.40803038524145413, |
| "grad_norm": 0.15564851444441055, |
| "learning_rate": 9.683301944820382e-06, |
| "loss": 0.432, |
| "step": 235 |
| }, |
| { |
| "epoch": 0.4097666847531199, |
| "grad_norm": 0.15674408774253595, |
| "learning_rate": 9.677970052881811e-06, |
| "loss": 0.4431, |
| "step": 236 |
| }, |
| { |
| "epoch": 0.41150298426478565, |
| "grad_norm": 0.147235124546805, |
| "learning_rate": 9.672595144218646e-06, |
| "loss": 0.4229, |
| "step": 237 |
| }, |
| { |
| "epoch": 0.4132392837764514, |
| "grad_norm": 0.15359262893739295, |
| "learning_rate": 9.667177268256373e-06, |
| "loss": 0.4489, |
| "step": 238 |
| }, |
| { |
| "epoch": 0.4149755832881172, |
| "grad_norm": 0.1936836685893336, |
| "learning_rate": 9.661716474815597e-06, |
| "loss": 0.4482, |
| "step": 239 |
| }, |
| { |
| "epoch": 0.41671188279978294, |
| "grad_norm": 0.15707058757983838, |
| "learning_rate": 9.656212814111567e-06, |
| "loss": 0.4324, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.4184481823114487, |
| "grad_norm": 0.14905279461924087, |
| "learning_rate": 9.65066633675373e-06, |
| "loss": 0.4332, |
| "step": 241 |
| }, |
| { |
| "epoch": 0.42018448182311446, |
| "grad_norm": 0.15002700768694846, |
| "learning_rate": 9.645077093745248e-06, |
| "loss": 0.4317, |
| "step": 242 |
| }, |
| { |
| "epoch": 0.4219207813347802, |
| "grad_norm": 0.15705106694199372, |
| "learning_rate": 9.639445136482549e-06, |
| "loss": 0.4331, |
| "step": 243 |
| }, |
| { |
| "epoch": 0.423657080846446, |
| "grad_norm": 0.13539219399033092, |
| "learning_rate": 9.633770516754834e-06, |
| "loss": 0.4172, |
| "step": 244 |
| }, |
| { |
| "epoch": 0.42539338035811175, |
| "grad_norm": 0.15065870855174054, |
| "learning_rate": 9.628053286743619e-06, |
| "loss": 0.4344, |
| "step": 245 |
| }, |
| { |
| "epoch": 0.4271296798697775, |
| "grad_norm": 0.17437059505759828, |
| "learning_rate": 9.622293499022243e-06, |
| "loss": 0.4245, |
| "step": 246 |
| }, |
| { |
| "epoch": 0.4288659793814433, |
| "grad_norm": 0.17919029116585017, |
| "learning_rate": 9.61649120655539e-06, |
| "loss": 0.4221, |
| "step": 247 |
| }, |
| { |
| "epoch": 0.43060227889310904, |
| "grad_norm": 0.1334976875908364, |
| "learning_rate": 9.610646462698598e-06, |
| "loss": 0.403, |
| "step": 248 |
| }, |
| { |
| "epoch": 0.4323385784047748, |
| "grad_norm": 0.16277200115474594, |
| "learning_rate": 9.604759321197775e-06, |
| "loss": 0.4424, |
| "step": 249 |
| }, |
| { |
| "epoch": 0.43407487791644056, |
| "grad_norm": 0.13988502194177868, |
| "learning_rate": 9.598829836188694e-06, |
| "loss": 0.4476, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.4358111774281063, |
| "grad_norm": 0.15784858998250612, |
| "learning_rate": 9.59285806219651e-06, |
| "loss": 0.4209, |
| "step": 251 |
| }, |
| { |
| "epoch": 0.4375474769397721, |
| "grad_norm": 0.1324213031630464, |
| "learning_rate": 9.586844054135248e-06, |
| "loss": 0.4133, |
| "step": 252 |
| }, |
| { |
| "epoch": 0.43928377645143785, |
| "grad_norm": 0.15300021033544062, |
| "learning_rate": 9.580787867307293e-06, |
| "loss": 0.4384, |
| "step": 253 |
| }, |
| { |
| "epoch": 0.4410200759631036, |
| "grad_norm": 0.143838210306649, |
| "learning_rate": 9.574689557402899e-06, |
| "loss": 0.4248, |
| "step": 254 |
| }, |
| { |
| "epoch": 0.44275637547476937, |
| "grad_norm": 0.13871482863771145, |
| "learning_rate": 9.56854918049966e-06, |
| "loss": 0.4405, |
| "step": 255 |
| }, |
| { |
| "epoch": 0.4444926749864352, |
| "grad_norm": 0.14460323057850463, |
| "learning_rate": 9.562366793062007e-06, |
| "loss": 0.4088, |
| "step": 256 |
| }, |
| { |
| "epoch": 0.44622897449810095, |
| "grad_norm": 0.1516286738769081, |
| "learning_rate": 9.55614245194068e-06, |
| "loss": 0.426, |
| "step": 257 |
| }, |
| { |
| "epoch": 0.4479652740097667, |
| "grad_norm": 0.15740523731737546, |
| "learning_rate": 9.549876214372203e-06, |
| "loss": 0.4042, |
| "step": 258 |
| }, |
| { |
| "epoch": 0.4497015735214325, |
| "grad_norm": 0.13541372433807597, |
| "learning_rate": 9.543568137978373e-06, |
| "loss": 0.4266, |
| "step": 259 |
| }, |
| { |
| "epoch": 0.45143787303309824, |
| "grad_norm": 0.13720170783679506, |
| "learning_rate": 9.53721828076571e-06, |
| "loss": 0.4192, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.453174172544764, |
| "grad_norm": 0.15636037632592015, |
| "learning_rate": 9.53082670112494e-06, |
| "loss": 0.413, |
| "step": 261 |
| }, |
| { |
| "epoch": 0.45491047205642976, |
| "grad_norm": 0.1545294587942669, |
| "learning_rate": 9.524393457830452e-06, |
| "loss": 0.4494, |
| "step": 262 |
| }, |
| { |
| "epoch": 0.4566467715680955, |
| "grad_norm": 0.15627701909179118, |
| "learning_rate": 9.51791861003975e-06, |
| "loss": 0.4224, |
| "step": 263 |
| }, |
| { |
| "epoch": 0.4583830710797613, |
| "grad_norm": 0.1492359535781047, |
| "learning_rate": 9.511402217292927e-06, |
| "loss": 0.4262, |
| "step": 264 |
| }, |
| { |
| "epoch": 0.46011937059142705, |
| "grad_norm": 0.1353127405505104, |
| "learning_rate": 9.504844339512096e-06, |
| "loss": 0.4165, |
| "step": 265 |
| }, |
| { |
| "epoch": 0.4618556701030928, |
| "grad_norm": 0.13373371028131661, |
| "learning_rate": 9.498245037000857e-06, |
| "loss": 0.4243, |
| "step": 266 |
| }, |
| { |
| "epoch": 0.46359196961475857, |
| "grad_norm": 0.14814709309461124, |
| "learning_rate": 9.491604370443732e-06, |
| "loss": 0.4295, |
| "step": 267 |
| }, |
| { |
| "epoch": 0.46532826912642433, |
| "grad_norm": 0.15693342961674778, |
| "learning_rate": 9.484922400905608e-06, |
| "loss": 0.4296, |
| "step": 268 |
| }, |
| { |
| "epoch": 0.4670645686380901, |
| "grad_norm": 0.13500362769857513, |
| "learning_rate": 9.478199189831184e-06, |
| "loss": 0.4356, |
| "step": 269 |
| }, |
| { |
| "epoch": 0.46880086814975586, |
| "grad_norm": 0.1479397146934287, |
| "learning_rate": 9.471434799044392e-06, |
| "loss": 0.4342, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.4705371676614216, |
| "grad_norm": 0.14658155644327392, |
| "learning_rate": 9.464629290747844e-06, |
| "loss": 0.417, |
| "step": 271 |
| }, |
| { |
| "epoch": 0.4722734671730874, |
| "grad_norm": 0.1547841209657364, |
| "learning_rate": 9.457782727522242e-06, |
| "loss": 0.4468, |
| "step": 272 |
| }, |
| { |
| "epoch": 0.47400976668475314, |
| "grad_norm": 0.140566594459949, |
| "learning_rate": 9.450895172325822e-06, |
| "loss": 0.4144, |
| "step": 273 |
| }, |
| { |
| "epoch": 0.4757460661964189, |
| "grad_norm": 0.14904014949184202, |
| "learning_rate": 9.443966688493762e-06, |
| "loss": 0.4267, |
| "step": 274 |
| }, |
| { |
| "epoch": 0.47748236570808467, |
| "grad_norm": 0.1618365518683853, |
| "learning_rate": 9.4369973397376e-06, |
| "loss": 0.4278, |
| "step": 275 |
| }, |
| { |
| "epoch": 0.47921866521975043, |
| "grad_norm": 0.15223749430970504, |
| "learning_rate": 9.429987190144659e-06, |
| "loss": 0.4321, |
| "step": 276 |
| }, |
| { |
| "epoch": 0.4809549647314162, |
| "grad_norm": 0.1539731787632902, |
| "learning_rate": 9.422936304177439e-06, |
| "loss": 0.428, |
| "step": 277 |
| }, |
| { |
| "epoch": 0.48269126424308195, |
| "grad_norm": 0.14879656683456302, |
| "learning_rate": 9.415844746673047e-06, |
| "loss": 0.4245, |
| "step": 278 |
| }, |
| { |
| "epoch": 0.4844275637547477, |
| "grad_norm": 0.14532316486603897, |
| "learning_rate": 9.408712582842583e-06, |
| "loss": 0.4301, |
| "step": 279 |
| }, |
| { |
| "epoch": 0.4861638632664135, |
| "grad_norm": 0.1707471215126368, |
| "learning_rate": 9.401539878270545e-06, |
| "loss": 0.4349, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.48790016277807924, |
| "grad_norm": 0.1482363199845621, |
| "learning_rate": 9.394326698914229e-06, |
| "loss": 0.432, |
| "step": 281 |
| }, |
| { |
| "epoch": 0.489636462289745, |
| "grad_norm": 0.16631121800186766, |
| "learning_rate": 9.387073111103124e-06, |
| "loss": 0.4522, |
| "step": 282 |
| }, |
| { |
| "epoch": 0.49137276180141076, |
| "grad_norm": 0.15399106664401388, |
| "learning_rate": 9.379779181538294e-06, |
| "loss": 0.4177, |
| "step": 283 |
| }, |
| { |
| "epoch": 0.4931090613130765, |
| "grad_norm": 0.15933705725540492, |
| "learning_rate": 9.372444977291772e-06, |
| "loss": 0.4331, |
| "step": 284 |
| }, |
| { |
| "epoch": 0.4948453608247423, |
| "grad_norm": 0.1558071947319395, |
| "learning_rate": 9.365070565805941e-06, |
| "loss": 0.4413, |
| "step": 285 |
| }, |
| { |
| "epoch": 0.49658166033640805, |
| "grad_norm": 0.15869210462435573, |
| "learning_rate": 9.357656014892913e-06, |
| "loss": 0.4501, |
| "step": 286 |
| }, |
| { |
| "epoch": 0.4983179598480738, |
| "grad_norm": 0.14836343303572178, |
| "learning_rate": 9.350201392733902e-06, |
| "loss": 0.4398, |
| "step": 287 |
| }, |
| { |
| "epoch": 0.5000542593597396, |
| "grad_norm": 0.1446486130902907, |
| "learning_rate": 9.342706767878609e-06, |
| "loss": 0.4134, |
| "step": 288 |
| }, |
| { |
| "epoch": 0.5017905588714053, |
| "grad_norm": 0.16780837684964714, |
| "learning_rate": 9.335172209244577e-06, |
| "loss": 0.4354, |
| "step": 289 |
| }, |
| { |
| "epoch": 0.5035268583830711, |
| "grad_norm": 0.19016077034051496, |
| "learning_rate": 9.327597786116567e-06, |
| "loss": 0.4439, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.5052631578947369, |
| "grad_norm": 0.12843881816321912, |
| "learning_rate": 9.319983568145919e-06, |
| "loss": 0.4044, |
| "step": 291 |
| }, |
| { |
| "epoch": 0.5069994574064026, |
| "grad_norm": 0.18172343997071713, |
| "learning_rate": 9.312329625349903e-06, |
| "loss": 0.439, |
| "step": 292 |
| }, |
| { |
| "epoch": 0.5087357569180684, |
| "grad_norm": 0.1533109565783212, |
| "learning_rate": 9.304636028111093e-06, |
| "loss": 0.4605, |
| "step": 293 |
| }, |
| { |
| "epoch": 0.5104720564297341, |
| "grad_norm": 0.1500997412539675, |
| "learning_rate": 9.296902847176703e-06, |
| "loss": 0.4346, |
| "step": 294 |
| }, |
| { |
| "epoch": 0.5122083559413999, |
| "grad_norm": 0.1434536596131837, |
| "learning_rate": 9.289130153657944e-06, |
| "loss": 0.442, |
| "step": 295 |
| }, |
| { |
| "epoch": 0.5139446554530657, |
| "grad_norm": 0.1631022083156658, |
| "learning_rate": 9.281318019029366e-06, |
| "loss": 0.4243, |
| "step": 296 |
| }, |
| { |
| "epoch": 0.5156809549647314, |
| "grad_norm": 0.15745451993950932, |
| "learning_rate": 9.273466515128209e-06, |
| "loss": 0.435, |
| "step": 297 |
| }, |
| { |
| "epoch": 0.5174172544763972, |
| "grad_norm": 0.15528300554164817, |
| "learning_rate": 9.265575714153732e-06, |
| "loss": 0.4335, |
| "step": 298 |
| }, |
| { |
| "epoch": 0.519153553988063, |
| "grad_norm": 0.18075251075323473, |
| "learning_rate": 9.257645688666557e-06, |
| "loss": 0.4293, |
| "step": 299 |
| }, |
| { |
| "epoch": 0.5208898534997287, |
| "grad_norm": 0.16706193153429877, |
| "learning_rate": 9.249676511588e-06, |
| "loss": 0.425, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.5226261530113945, |
| "grad_norm": 0.15762590559281106, |
| "learning_rate": 9.241668256199392e-06, |
| "loss": 0.4572, |
| "step": 301 |
| }, |
| { |
| "epoch": 0.5243624525230602, |
| "grad_norm": 0.16766422353990898, |
| "learning_rate": 9.233620996141421e-06, |
| "loss": 0.4421, |
| "step": 302 |
| }, |
| { |
| "epoch": 0.526098752034726, |
| "grad_norm": 0.15326556435477035, |
| "learning_rate": 9.225534805413443e-06, |
| "loss": 0.4382, |
| "step": 303 |
| }, |
| { |
| "epoch": 0.5278350515463918, |
| "grad_norm": 0.15594005545981307, |
| "learning_rate": 9.217409758372805e-06, |
| "loss": 0.4306, |
| "step": 304 |
| }, |
| { |
| "epoch": 0.5295713510580575, |
| "grad_norm": 0.15658249902105414, |
| "learning_rate": 9.209245929734156e-06, |
| "loss": 0.4276, |
| "step": 305 |
| }, |
| { |
| "epoch": 0.5313076505697233, |
| "grad_norm": 0.1688752124377244, |
| "learning_rate": 9.201043394568773e-06, |
| "loss": 0.4431, |
| "step": 306 |
| }, |
| { |
| "epoch": 0.533043950081389, |
| "grad_norm": 0.16298343992138783, |
| "learning_rate": 9.192802228303858e-06, |
| "loss": 0.432, |
| "step": 307 |
| }, |
| { |
| "epoch": 0.5347802495930548, |
| "grad_norm": 0.14067039648645466, |
| "learning_rate": 9.184522506721848e-06, |
| "loss": 0.4268, |
| "step": 308 |
| }, |
| { |
| "epoch": 0.5365165491047206, |
| "grad_norm": 0.13618995591727004, |
| "learning_rate": 9.176204305959727e-06, |
| "loss": 0.4267, |
| "step": 309 |
| }, |
| { |
| "epoch": 0.5382528486163863, |
| "grad_norm": 0.13359346371022174, |
| "learning_rate": 9.167847702508304e-06, |
| "loss": 0.3988, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.5399891481280521, |
| "grad_norm": 0.1696549417930708, |
| "learning_rate": 9.159452773211537e-06, |
| "loss": 0.423, |
| "step": 311 |
| }, |
| { |
| "epoch": 0.5417254476397179, |
| "grad_norm": 0.14323074167209557, |
| "learning_rate": 9.151019595265805e-06, |
| "loss": 0.4093, |
| "step": 312 |
| }, |
| { |
| "epoch": 0.5434617471513836, |
| "grad_norm": 0.16685931065308446, |
| "learning_rate": 9.142548246219212e-06, |
| "loss": 0.4416, |
| "step": 313 |
| }, |
| { |
| "epoch": 0.5451980466630494, |
| "grad_norm": 0.1487456457447269, |
| "learning_rate": 9.134038803970861e-06, |
| "loss": 0.4451, |
| "step": 314 |
| }, |
| { |
| "epoch": 0.5469343461747151, |
| "grad_norm": 0.1566773867653797, |
| "learning_rate": 9.12549134677015e-06, |
| "loss": 0.4205, |
| "step": 315 |
| }, |
| { |
| "epoch": 0.5486706456863809, |
| "grad_norm": 0.14167677739491838, |
| "learning_rate": 9.116905953216048e-06, |
| "loss": 0.4267, |
| "step": 316 |
| }, |
| { |
| "epoch": 0.5504069451980467, |
| "grad_norm": 0.15472423361672563, |
| "learning_rate": 9.108282702256366e-06, |
| "loss": 0.4271, |
| "step": 317 |
| }, |
| { |
| "epoch": 0.5521432447097124, |
| "grad_norm": 0.13110251374929036, |
| "learning_rate": 9.09962167318704e-06, |
| "loss": 0.4112, |
| "step": 318 |
| }, |
| { |
| "epoch": 0.5538795442213782, |
| "grad_norm": 0.15032138262922598, |
| "learning_rate": 9.090922945651399e-06, |
| "loss": 0.4448, |
| "step": 319 |
| }, |
| { |
| "epoch": 0.555615843733044, |
| "grad_norm": 0.14551624676182287, |
| "learning_rate": 9.082186599639429e-06, |
| "loss": 0.4201, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.5573521432447097, |
| "grad_norm": 0.14465301780155224, |
| "learning_rate": 9.073412715487045e-06, |
| "loss": 0.4267, |
| "step": 321 |
| }, |
| { |
| "epoch": 0.5590884427563755, |
| "grad_norm": 0.14551487118151804, |
| "learning_rate": 9.064601373875341e-06, |
| "loss": 0.421, |
| "step": 322 |
| }, |
| { |
| "epoch": 0.5608247422680412, |
| "grad_norm": 0.1457948287955944, |
| "learning_rate": 9.05575265582986e-06, |
| "loss": 0.4445, |
| "step": 323 |
| }, |
| { |
| "epoch": 0.562561041779707, |
| "grad_norm": 0.16067858876663518, |
| "learning_rate": 9.04686664271984e-06, |
| "loss": 0.4434, |
| "step": 324 |
| }, |
| { |
| "epoch": 0.5642973412913728, |
| "grad_norm": 0.1541984433913847, |
| "learning_rate": 9.037943416257475e-06, |
| "loss": 0.4306, |
| "step": 325 |
| }, |
| { |
| "epoch": 0.5660336408030385, |
| "grad_norm": 0.14316636284034773, |
| "learning_rate": 9.028983058497152e-06, |
| "loss": 0.414, |
| "step": 326 |
| }, |
| { |
| "epoch": 0.5677699403147043, |
| "grad_norm": 0.17397044853565383, |
| "learning_rate": 9.019985651834703e-06, |
| "loss": 0.4432, |
| "step": 327 |
| }, |
| { |
| "epoch": 0.56950623982637, |
| "grad_norm": 0.1627074833616904, |
| "learning_rate": 9.010951279006652e-06, |
| "loss": 0.448, |
| "step": 328 |
| }, |
| { |
| "epoch": 0.5712425393380358, |
| "grad_norm": 0.1940870487142773, |
| "learning_rate": 9.001880023089442e-06, |
| "loss": 0.4425, |
| "step": 329 |
| }, |
| { |
| "epoch": 0.5729788388497016, |
| "grad_norm": 0.159598948177848, |
| "learning_rate": 8.992771967498682e-06, |
| "loss": 0.4406, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.5747151383613673, |
| "grad_norm": 0.1498699860214419, |
| "learning_rate": 8.983627195988376e-06, |
| "loss": 0.4388, |
| "step": 331 |
| }, |
| { |
| "epoch": 0.5764514378730331, |
| "grad_norm": 0.16951652153250185, |
| "learning_rate": 8.974445792650152e-06, |
| "loss": 0.4423, |
| "step": 332 |
| }, |
| { |
| "epoch": 0.5781877373846989, |
| "grad_norm": 0.19455142439514098, |
| "learning_rate": 8.96522784191249e-06, |
| "loss": 0.4111, |
| "step": 333 |
| }, |
| { |
| "epoch": 0.5799240368963646, |
| "grad_norm": 0.13973563527713942, |
| "learning_rate": 8.955973428539943e-06, |
| "loss": 0.4096, |
| "step": 334 |
| }, |
| { |
| "epoch": 0.5816603364080304, |
| "grad_norm": 0.16050181521060788, |
| "learning_rate": 8.946682637632362e-06, |
| "loss": 0.4245, |
| "step": 335 |
| }, |
| { |
| "epoch": 0.5833966359196961, |
| "grad_norm": 0.15909571259070338, |
| "learning_rate": 8.937355554624111e-06, |
| "loss": 0.4072, |
| "step": 336 |
| }, |
| { |
| "epoch": 0.5851329354313619, |
| "grad_norm": 0.15430883571122384, |
| "learning_rate": 8.927992265283282e-06, |
| "loss": 0.4143, |
| "step": 337 |
| }, |
| { |
| "epoch": 0.5868692349430277, |
| "grad_norm": 0.14165670203508623, |
| "learning_rate": 8.9185928557109e-06, |
| "loss": 0.4317, |
| "step": 338 |
| }, |
| { |
| "epoch": 0.5886055344546934, |
| "grad_norm": 0.14467120823031193, |
| "learning_rate": 8.90915741234015e-06, |
| "loss": 0.4484, |
| "step": 339 |
| }, |
| { |
| "epoch": 0.5903418339663592, |
| "grad_norm": 0.15194753933755567, |
| "learning_rate": 8.899686021935554e-06, |
| "loss": 0.409, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.592078133478025, |
| "grad_norm": 0.1909028572126585, |
| "learning_rate": 8.890178771592198e-06, |
| "loss": 0.4323, |
| "step": 341 |
| }, |
| { |
| "epoch": 0.5938144329896907, |
| "grad_norm": 0.16563174108323855, |
| "learning_rate": 8.88063574873492e-06, |
| "loss": 0.4382, |
| "step": 342 |
| }, |
| { |
| "epoch": 0.5955507325013565, |
| "grad_norm": 0.1535707289828906, |
| "learning_rate": 8.871057041117505e-06, |
| "loss": 0.4219, |
| "step": 343 |
| }, |
| { |
| "epoch": 0.5972870320130222, |
| "grad_norm": 0.17733027347423158, |
| "learning_rate": 8.861442736821883e-06, |
| "loss": 0.4229, |
| "step": 344 |
| }, |
| { |
| "epoch": 0.599023331524688, |
| "grad_norm": 0.15784389767547816, |
| "learning_rate": 8.851792924257316e-06, |
| "loss": 0.4155, |
| "step": 345 |
| }, |
| { |
| "epoch": 0.6007596310363538, |
| "grad_norm": 0.16406056888797899, |
| "learning_rate": 8.842107692159587e-06, |
| "loss": 0.4389, |
| "step": 346 |
| }, |
| { |
| "epoch": 0.6024959305480195, |
| "grad_norm": 0.16632382879168614, |
| "learning_rate": 8.83238712959018e-06, |
| "loss": 0.432, |
| "step": 347 |
| }, |
| { |
| "epoch": 0.6042322300596853, |
| "grad_norm": 0.16932428537112926, |
| "learning_rate": 8.822631325935463e-06, |
| "loss": 0.4179, |
| "step": 348 |
| }, |
| { |
| "epoch": 0.6059685295713511, |
| "grad_norm": 0.20272370572709011, |
| "learning_rate": 8.812840370905872e-06, |
| "loss": 0.4289, |
| "step": 349 |
| }, |
| { |
| "epoch": 0.6077048290830168, |
| "grad_norm": 0.16321136969174183, |
| "learning_rate": 8.80301435453508e-06, |
| "loss": 0.4346, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.6094411285946826, |
| "grad_norm": 0.1695889443045464, |
| "learning_rate": 8.793153367179164e-06, |
| "loss": 0.4087, |
| "step": 351 |
| }, |
| { |
| "epoch": 0.6111774281063483, |
| "grad_norm": 0.1717524474479854, |
| "learning_rate": 8.783257499515785e-06, |
| "loss": 0.4175, |
| "step": 352 |
| }, |
| { |
| "epoch": 0.6129137276180141, |
| "grad_norm": 0.19238932735231165, |
| "learning_rate": 8.773326842543348e-06, |
| "loss": 0.427, |
| "step": 353 |
| }, |
| { |
| "epoch": 0.6146500271296799, |
| "grad_norm": 0.15756445720282503, |
| "learning_rate": 8.763361487580167e-06, |
| "loss": 0.4316, |
| "step": 354 |
| }, |
| { |
| "epoch": 0.6163863266413456, |
| "grad_norm": 0.1863059885455897, |
| "learning_rate": 8.753361526263622e-06, |
| "loss": 0.441, |
| "step": 355 |
| }, |
| { |
| "epoch": 0.6181226261530114, |
| "grad_norm": 0.14994819207944002, |
| "learning_rate": 8.743327050549326e-06, |
| "loss": 0.4265, |
| "step": 356 |
| }, |
| { |
| "epoch": 0.6198589256646772, |
| "grad_norm": 0.17062218033006715, |
| "learning_rate": 8.733258152710262e-06, |
| "loss": 0.454, |
| "step": 357 |
| }, |
| { |
| "epoch": 0.6215952251763429, |
| "grad_norm": 0.16850908560763084, |
| "learning_rate": 8.723154925335957e-06, |
| "loss": 0.4344, |
| "step": 358 |
| }, |
| { |
| "epoch": 0.6233315246880087, |
| "grad_norm": 0.16709271250050742, |
| "learning_rate": 8.713017461331608e-06, |
| "loss": 0.4362, |
| "step": 359 |
| }, |
| { |
| "epoch": 0.6250678241996744, |
| "grad_norm": 0.17296909553562448, |
| "learning_rate": 8.702845853917242e-06, |
| "loss": 0.4391, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.6268041237113402, |
| "grad_norm": 0.1634801663010388, |
| "learning_rate": 8.692640196626859e-06, |
| "loss": 0.418, |
| "step": 361 |
| }, |
| { |
| "epoch": 0.628540423223006, |
| "grad_norm": 0.21089231489074947, |
| "learning_rate": 8.682400583307562e-06, |
| "loss": 0.4521, |
| "step": 362 |
| }, |
| { |
| "epoch": 0.6302767227346717, |
| "grad_norm": 0.16024795309892262, |
| "learning_rate": 8.672127108118702e-06, |
| "loss": 0.4298, |
| "step": 363 |
| }, |
| { |
| "epoch": 0.6320130222463375, |
| "grad_norm": 0.1387299903351098, |
| "learning_rate": 8.661819865531014e-06, |
| "loss": 0.4267, |
| "step": 364 |
| }, |
| { |
| "epoch": 0.6337493217580032, |
| "grad_norm": 0.16720213909194867, |
| "learning_rate": 8.651478950325739e-06, |
| "loss": 0.427, |
| "step": 365 |
| }, |
| { |
| "epoch": 0.635485621269669, |
| "grad_norm": 0.18886296533854524, |
| "learning_rate": 8.641104457593756e-06, |
| "loss": 0.4302, |
| "step": 366 |
| }, |
| { |
| "epoch": 0.6372219207813348, |
| "grad_norm": 0.1435746148298888, |
| "learning_rate": 8.630696482734718e-06, |
| "loss": 0.4216, |
| "step": 367 |
| }, |
| { |
| "epoch": 0.6389582202930005, |
| "grad_norm": 0.16965303549071564, |
| "learning_rate": 8.620255121456157e-06, |
| "loss": 0.4425, |
| "step": 368 |
| }, |
| { |
| "epoch": 0.6406945198046663, |
| "grad_norm": 0.15830472472009197, |
| "learning_rate": 8.609780469772623e-06, |
| "loss": 0.4198, |
| "step": 369 |
| }, |
| { |
| "epoch": 0.6424308193163321, |
| "grad_norm": 0.15576341914865044, |
| "learning_rate": 8.59927262400478e-06, |
| "loss": 0.4029, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.6441671188279978, |
| "grad_norm": 0.1502744242798527, |
| "learning_rate": 8.588731680778541e-06, |
| "loss": 0.4266, |
| "step": 371 |
| }, |
| { |
| "epoch": 0.6459034183396636, |
| "grad_norm": 0.1613768907477256, |
| "learning_rate": 8.578157737024161e-06, |
| "loss": 0.4198, |
| "step": 372 |
| }, |
| { |
| "epoch": 0.6476397178513293, |
| "grad_norm": 0.14851747702098045, |
| "learning_rate": 8.567550889975362e-06, |
| "loss": 0.4293, |
| "step": 373 |
| }, |
| { |
| "epoch": 0.6493760173629951, |
| "grad_norm": 0.16651395276661102, |
| "learning_rate": 8.556911237168428e-06, |
| "loss": 0.4267, |
| "step": 374 |
| }, |
| { |
| "epoch": 0.6511123168746609, |
| "grad_norm": 0.1503438279696906, |
| "learning_rate": 8.546238876441313e-06, |
| "loss": 0.4315, |
| "step": 375 |
| }, |
| { |
| "epoch": 0.6528486163863266, |
| "grad_norm": 0.13894070033687952, |
| "learning_rate": 8.535533905932739e-06, |
| "loss": 0.4361, |
| "step": 376 |
| }, |
| { |
| "epoch": 0.6545849158979924, |
| "grad_norm": 0.15374759492249587, |
| "learning_rate": 8.524796424081291e-06, |
| "loss": 0.4295, |
| "step": 377 |
| }, |
| { |
| "epoch": 0.6563212154096582, |
| "grad_norm": 0.16379040041737855, |
| "learning_rate": 8.514026529624523e-06, |
| "loss": 0.4278, |
| "step": 378 |
| }, |
| { |
| "epoch": 0.6580575149213239, |
| "grad_norm": 0.13727637457553227, |
| "learning_rate": 8.503224321598035e-06, |
| "loss": 0.4233, |
| "step": 379 |
| }, |
| { |
| "epoch": 0.6597938144329897, |
| "grad_norm": 0.1681213503893215, |
| "learning_rate": 8.492389899334572e-06, |
| "loss": 0.4249, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.6615301139446554, |
| "grad_norm": 0.155321814222258, |
| "learning_rate": 8.481523362463111e-06, |
| "loss": 0.4131, |
| "step": 381 |
| }, |
| { |
| "epoch": 0.6632664134563212, |
| "grad_norm": 0.15655768137446305, |
| "learning_rate": 8.470624810907936e-06, |
| "loss": 0.4339, |
| "step": 382 |
| }, |
| { |
| "epoch": 0.665002712967987, |
| "grad_norm": 0.13927963207883687, |
| "learning_rate": 8.459694344887732e-06, |
| "loss": 0.4342, |
| "step": 383 |
| }, |
| { |
| "epoch": 0.6667390124796527, |
| "grad_norm": 0.16226244072961146, |
| "learning_rate": 8.44873206491465e-06, |
| "loss": 0.4273, |
| "step": 384 |
| }, |
| { |
| "epoch": 0.6684753119913185, |
| "grad_norm": 0.1695557602850689, |
| "learning_rate": 8.437738071793394e-06, |
| "loss": 0.4202, |
| "step": 385 |
| }, |
| { |
| "epoch": 0.6702116115029843, |
| "grad_norm": 0.15576962380070136, |
| "learning_rate": 8.426712466620288e-06, |
| "loss": 0.4288, |
| "step": 386 |
| }, |
| { |
| "epoch": 0.67194791101465, |
| "grad_norm": 0.158261704332491, |
| "learning_rate": 8.415655350782346e-06, |
| "loss": 0.4433, |
| "step": 387 |
| }, |
| { |
| "epoch": 0.6736842105263158, |
| "grad_norm": 0.15193331842541377, |
| "learning_rate": 8.404566825956341e-06, |
| "loss": 0.4155, |
| "step": 388 |
| }, |
| { |
| "epoch": 0.6754205100379815, |
| "grad_norm": 0.174426066510097, |
| "learning_rate": 8.393446994107876e-06, |
| "loss": 0.4404, |
| "step": 389 |
| }, |
| { |
| "epoch": 0.6771568095496473, |
| "grad_norm": 0.1325367145926275, |
| "learning_rate": 8.382295957490435e-06, |
| "loss": 0.4224, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.6788931090613131, |
| "grad_norm": 0.13452032528615257, |
| "learning_rate": 8.371113818644449e-06, |
| "loss": 0.4185, |
| "step": 391 |
| }, |
| { |
| "epoch": 0.6806294085729788, |
| "grad_norm": 0.16147477287089293, |
| "learning_rate": 8.359900680396356e-06, |
| "loss": 0.4424, |
| "step": 392 |
| }, |
| { |
| "epoch": 0.6823657080846446, |
| "grad_norm": 0.15553772122957388, |
| "learning_rate": 8.348656645857648e-06, |
| "loss": 0.4252, |
| "step": 393 |
| }, |
| { |
| "epoch": 0.6841020075963103, |
| "grad_norm": 0.14572880321282852, |
| "learning_rate": 8.33738181842393e-06, |
| "loss": 0.4155, |
| "step": 394 |
| }, |
| { |
| "epoch": 0.6858383071079761, |
| "grad_norm": 0.14969559128512425, |
| "learning_rate": 8.326076301773964e-06, |
| "loss": 0.4358, |
| "step": 395 |
| }, |
| { |
| "epoch": 0.6875746066196419, |
| "grad_norm": 0.1520543652203451, |
| "learning_rate": 8.314740199868716e-06, |
| "loss": 0.4179, |
| "step": 396 |
| }, |
| { |
| "epoch": 0.6893109061313076, |
| "grad_norm": 0.14932155425139088, |
| "learning_rate": 8.303373616950408e-06, |
| "loss": 0.4417, |
| "step": 397 |
| }, |
| { |
| "epoch": 0.6910472056429734, |
| "grad_norm": 0.1436390734221764, |
| "learning_rate": 8.291976657541545e-06, |
| "loss": 0.4357, |
| "step": 398 |
| }, |
| { |
| "epoch": 0.6927835051546392, |
| "grad_norm": 0.16562398124925534, |
| "learning_rate": 8.28054942644397e-06, |
| "loss": 0.4485, |
| "step": 399 |
| }, |
| { |
| "epoch": 0.6945198046663049, |
| "grad_norm": 0.16781942148844772, |
| "learning_rate": 8.269092028737885e-06, |
| "loss": 0.4341, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.6962561041779707, |
| "grad_norm": 0.14039547328113816, |
| "learning_rate": 8.257604569780898e-06, |
| "loss": 0.414, |
| "step": 401 |
| }, |
| { |
| "epoch": 0.6979924036896364, |
| "grad_norm": 0.13511539909730524, |
| "learning_rate": 8.246087155207041e-06, |
| "loss": 0.4151, |
| "step": 402 |
| }, |
| { |
| "epoch": 0.6997287032013022, |
| "grad_norm": 0.1749036371486231, |
| "learning_rate": 8.234539890925812e-06, |
| "loss": 0.4149, |
| "step": 403 |
| }, |
| { |
| "epoch": 0.701465002712968, |
| "grad_norm": 0.1386923926618094, |
| "learning_rate": 8.222962883121196e-06, |
| "loss": 0.429, |
| "step": 404 |
| }, |
| { |
| "epoch": 0.7032013022246337, |
| "grad_norm": 0.18428690503297337, |
| "learning_rate": 8.21135623825068e-06, |
| "loss": 0.44, |
| "step": 405 |
| }, |
| { |
| "epoch": 0.7049376017362995, |
| "grad_norm": 0.1452278087108832, |
| "learning_rate": 8.19972006304429e-06, |
| "loss": 0.4179, |
| "step": 406 |
| }, |
| { |
| "epoch": 0.7066739012479653, |
| "grad_norm": 0.15917166403787675, |
| "learning_rate": 8.188054464503591e-06, |
| "loss": 0.4289, |
| "step": 407 |
| }, |
| { |
| "epoch": 0.708410200759631, |
| "grad_norm": 0.15011317963396884, |
| "learning_rate": 8.176359549900725e-06, |
| "loss": 0.4149, |
| "step": 408 |
| }, |
| { |
| "epoch": 0.7101465002712968, |
| "grad_norm": 0.1652053561456685, |
| "learning_rate": 8.164635426777404e-06, |
| "loss": 0.4412, |
| "step": 409 |
| }, |
| { |
| "epoch": 0.7118827997829625, |
| "grad_norm": 0.16677143872271724, |
| "learning_rate": 8.152882202943933e-06, |
| "loss": 0.4397, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.7136190992946283, |
| "grad_norm": 0.1404122061810744, |
| "learning_rate": 8.141099986478212e-06, |
| "loss": 0.4193, |
| "step": 411 |
| }, |
| { |
| "epoch": 0.7153553988062941, |
| "grad_norm": 0.15212329324056417, |
| "learning_rate": 8.129288885724752e-06, |
| "loss": 0.4374, |
| "step": 412 |
| }, |
| { |
| "epoch": 0.7170916983179598, |
| "grad_norm": 0.173228829337398, |
| "learning_rate": 8.117449009293668e-06, |
| "loss": 0.4075, |
| "step": 413 |
| }, |
| { |
| "epoch": 0.7188279978296256, |
| "grad_norm": 0.14887921410638563, |
| "learning_rate": 8.105580466059685e-06, |
| "loss": 0.421, |
| "step": 414 |
| }, |
| { |
| "epoch": 0.7205642973412913, |
| "grad_norm": 0.1341819963564282, |
| "learning_rate": 8.093683365161135e-06, |
| "loss": 0.4312, |
| "step": 415 |
| }, |
| { |
| "epoch": 0.7223005968529571, |
| "grad_norm": 0.15095004881427365, |
| "learning_rate": 8.081757815998958e-06, |
| "loss": 0.4348, |
| "step": 416 |
| }, |
| { |
| "epoch": 0.7240368963646229, |
| "grad_norm": 0.14673177767107912, |
| "learning_rate": 8.069803928235689e-06, |
| "loss": 0.4273, |
| "step": 417 |
| }, |
| { |
| "epoch": 0.7257731958762886, |
| "grad_norm": 0.1822633455744125, |
| "learning_rate": 8.057821811794457e-06, |
| "loss": 0.4479, |
| "step": 418 |
| }, |
| { |
| "epoch": 0.7275094953879544, |
| "grad_norm": 0.14795548056912192, |
| "learning_rate": 8.045811576857969e-06, |
| "loss": 0.425, |
| "step": 419 |
| }, |
| { |
| "epoch": 0.7292457948996202, |
| "grad_norm": 0.14905122363244133, |
| "learning_rate": 8.033773333867498e-06, |
| "loss": 0.4213, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.7309820944112859, |
| "grad_norm": 0.1576303606399092, |
| "learning_rate": 8.021707193521865e-06, |
| "loss": 0.4537, |
| "step": 421 |
| }, |
| { |
| "epoch": 0.7327183939229517, |
| "grad_norm": 0.15229537034665708, |
| "learning_rate": 8.009613266776433e-06, |
| "loss": 0.4328, |
| "step": 422 |
| }, |
| { |
| "epoch": 0.7344546934346174, |
| "grad_norm": 0.14146675627654046, |
| "learning_rate": 7.997491664842067e-06, |
| "loss": 0.4228, |
| "step": 423 |
| }, |
| { |
| "epoch": 0.7361909929462832, |
| "grad_norm": 0.1494345927721846, |
| "learning_rate": 7.985342499184125e-06, |
| "loss": 0.4472, |
| "step": 424 |
| }, |
| { |
| "epoch": 0.737927292457949, |
| "grad_norm": 0.15638464912362302, |
| "learning_rate": 7.973165881521435e-06, |
| "loss": 0.4654, |
| "step": 425 |
| }, |
| { |
| "epoch": 0.7396635919696147, |
| "grad_norm": 0.1581657562507735, |
| "learning_rate": 7.960961923825255e-06, |
| "loss": 0.4307, |
| "step": 426 |
| }, |
| { |
| "epoch": 0.7413998914812805, |
| "grad_norm": 0.1597652030369101, |
| "learning_rate": 7.948730738318255e-06, |
| "loss": 0.4467, |
| "step": 427 |
| }, |
| { |
| "epoch": 0.7431361909929463, |
| "grad_norm": 0.13680894316088868, |
| "learning_rate": 7.936472437473482e-06, |
| "loss": 0.4196, |
| "step": 428 |
| }, |
| { |
| "epoch": 0.744872490504612, |
| "grad_norm": 0.13931230444695578, |
| "learning_rate": 7.924187134013323e-06, |
| "loss": 0.4139, |
| "step": 429 |
| }, |
| { |
| "epoch": 0.7466087900162778, |
| "grad_norm": 0.13099637928584668, |
| "learning_rate": 7.91187494090847e-06, |
| "loss": 0.4088, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.7483450895279435, |
| "grad_norm": 0.14334829413945557, |
| "learning_rate": 7.899535971376881e-06, |
| "loss": 0.4164, |
| "step": 431 |
| }, |
| { |
| "epoch": 0.7500813890396093, |
| "grad_norm": 0.16796267070198684, |
| "learning_rate": 7.887170338882742e-06, |
| "loss": 0.4359, |
| "step": 432 |
| }, |
| { |
| "epoch": 0.7518176885512751, |
| "grad_norm": 0.16242036776843566, |
| "learning_rate": 7.874778157135416e-06, |
| "loss": 0.4258, |
| "step": 433 |
| }, |
| { |
| "epoch": 0.7535539880629408, |
| "grad_norm": 0.1385070248558403, |
| "learning_rate": 7.862359540088404e-06, |
| "loss": 0.4277, |
| "step": 434 |
| }, |
| { |
| "epoch": 0.7552902875746066, |
| "grad_norm": 0.13314274636073767, |
| "learning_rate": 7.849914601938302e-06, |
| "loss": 0.4084, |
| "step": 435 |
| }, |
| { |
| "epoch": 0.7570265870862724, |
| "grad_norm": 0.18197820963349937, |
| "learning_rate": 7.837443457123732e-06, |
| "loss": 0.4496, |
| "step": 436 |
| }, |
| { |
| "epoch": 0.7587628865979381, |
| "grad_norm": 0.14847810455059401, |
| "learning_rate": 7.824946220324313e-06, |
| "loss": 0.4249, |
| "step": 437 |
| }, |
| { |
| "epoch": 0.7604991861096039, |
| "grad_norm": 0.14144146539360863, |
| "learning_rate": 7.812423006459588e-06, |
| "loss": 0.4073, |
| "step": 438 |
| }, |
| { |
| "epoch": 0.7622354856212696, |
| "grad_norm": 0.16954210592768115, |
| "learning_rate": 7.799873930687979e-06, |
| "loss": 0.4162, |
| "step": 439 |
| }, |
| { |
| "epoch": 0.7639717851329354, |
| "grad_norm": 0.1567807710460927, |
| "learning_rate": 7.78729910840572e-06, |
| "loss": 0.4266, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.7657080846446012, |
| "grad_norm": 0.13338113485510952, |
| "learning_rate": 7.774698655245802e-06, |
| "loss": 0.4221, |
| "step": 441 |
| }, |
| { |
| "epoch": 0.7674443841562669, |
| "grad_norm": 0.13011809766956603, |
| "learning_rate": 7.762072687076911e-06, |
| "loss": 0.4333, |
| "step": 442 |
| }, |
| { |
| "epoch": 0.7691806836679327, |
| "grad_norm": 0.1565450908841742, |
| "learning_rate": 7.749421320002349e-06, |
| "loss": 0.4406, |
| "step": 443 |
| }, |
| { |
| "epoch": 0.7709169831795984, |
| "grad_norm": 0.15302860162984966, |
| "learning_rate": 7.736744670358985e-06, |
| "loss": 0.449, |
| "step": 444 |
| }, |
| { |
| "epoch": 0.7726532826912642, |
| "grad_norm": 0.15249501154388767, |
| "learning_rate": 7.724042854716169e-06, |
| "loss": 0.4298, |
| "step": 445 |
| }, |
| { |
| "epoch": 0.77438958220293, |
| "grad_norm": 0.14043726604769854, |
| "learning_rate": 7.711315989874677e-06, |
| "loss": 0.4162, |
| "step": 446 |
| }, |
| { |
| "epoch": 0.7761258817145957, |
| "grad_norm": 0.15320635053093767, |
| "learning_rate": 7.698564192865617e-06, |
| "loss": 0.4242, |
| "step": 447 |
| }, |
| { |
| "epoch": 0.7778621812262615, |
| "grad_norm": 0.17989485147230902, |
| "learning_rate": 7.68578758094937e-06, |
| "loss": 0.4418, |
| "step": 448 |
| }, |
| { |
| "epoch": 0.7795984807379273, |
| "grad_norm": 0.1684510261895026, |
| "learning_rate": 7.6729862716145e-06, |
| "loss": 0.4337, |
| "step": 449 |
| }, |
| { |
| "epoch": 0.781334780249593, |
| "grad_norm": 0.15069039960003242, |
| "learning_rate": 7.660160382576683e-06, |
| "loss": 0.426, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.7830710797612588, |
| "grad_norm": 0.13273283070755218, |
| "learning_rate": 7.64731003177762e-06, |
| "loss": 0.4251, |
| "step": 451 |
| }, |
| { |
| "epoch": 0.7848073792729245, |
| "grad_norm": 0.15560617906201166, |
| "learning_rate": 7.634435337383948e-06, |
| "loss": 0.4519, |
| "step": 452 |
| }, |
| { |
| "epoch": 0.7865436787845903, |
| "grad_norm": 0.14351368157137256, |
| "learning_rate": 7.621536417786159e-06, |
| "loss": 0.4314, |
| "step": 453 |
| }, |
| { |
| "epoch": 0.7882799782962561, |
| "grad_norm": 0.1599564797673948, |
| "learning_rate": 7.608613391597514e-06, |
| "loss": 0.4314, |
| "step": 454 |
| }, |
| { |
| "epoch": 0.7900162778079218, |
| "grad_norm": 0.13912046111141438, |
| "learning_rate": 7.595666377652948e-06, |
| "loss": 0.4195, |
| "step": 455 |
| }, |
| { |
| "epoch": 0.7917525773195876, |
| "grad_norm": 0.1686980909185228, |
| "learning_rate": 7.582695495007974e-06, |
| "loss": 0.4328, |
| "step": 456 |
| }, |
| { |
| "epoch": 0.7934888768312534, |
| "grad_norm": 0.16273258235117594, |
| "learning_rate": 7.56970086293759e-06, |
| "loss": 0.4572, |
| "step": 457 |
| }, |
| { |
| "epoch": 0.7952251763429191, |
| "grad_norm": 0.1512893886108004, |
| "learning_rate": 7.556682600935194e-06, |
| "loss": 0.4288, |
| "step": 458 |
| }, |
| { |
| "epoch": 0.7969614758545849, |
| "grad_norm": 0.15124774797909318, |
| "learning_rate": 7.543640828711467e-06, |
| "loss": 0.4157, |
| "step": 459 |
| }, |
| { |
| "epoch": 0.7986977753662506, |
| "grad_norm": 0.15932778545097698, |
| "learning_rate": 7.530575666193283e-06, |
| "loss": 0.4152, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.8004340748779164, |
| "grad_norm": 0.13827246944553215, |
| "learning_rate": 7.5174872335226e-06, |
| "loss": 0.4267, |
| "step": 461 |
| }, |
| { |
| "epoch": 0.8021703743895822, |
| "grad_norm": 0.1633340969790764, |
| "learning_rate": 7.504375651055369e-06, |
| "loss": 0.43, |
| "step": 462 |
| }, |
| { |
| "epoch": 0.8039066739012479, |
| "grad_norm": 0.148737493055638, |
| "learning_rate": 7.491241039360404e-06, |
| "loss": 0.4201, |
| "step": 463 |
| }, |
| { |
| "epoch": 0.8056429734129137, |
| "grad_norm": 0.17090144278144342, |
| "learning_rate": 7.478083519218297e-06, |
| "loss": 0.4646, |
| "step": 464 |
| }, |
| { |
| "epoch": 0.8073792729245794, |
| "grad_norm": 0.16543183078759685, |
| "learning_rate": 7.464903211620291e-06, |
| "loss": 0.431, |
| "step": 465 |
| }, |
| { |
| "epoch": 0.8091155724362452, |
| "grad_norm": 0.14450967489218555, |
| "learning_rate": 7.451700237767177e-06, |
| "loss": 0.4409, |
| "step": 466 |
| }, |
| { |
| "epoch": 0.810851871947911, |
| "grad_norm": 0.14314840673680362, |
| "learning_rate": 7.438474719068174e-06, |
| "loss": 0.4355, |
| "step": 467 |
| }, |
| { |
| "epoch": 0.8125881714595767, |
| "grad_norm": 0.14758334398446482, |
| "learning_rate": 7.425226777139811e-06, |
| "loss": 0.4459, |
| "step": 468 |
| }, |
| { |
| "epoch": 0.8143244709712425, |
| "grad_norm": 0.14270670380654457, |
| "learning_rate": 7.4119565338048195e-06, |
| "loss": 0.4062, |
| "step": 469 |
| }, |
| { |
| "epoch": 0.8160607704829083, |
| "grad_norm": 0.13060107623226094, |
| "learning_rate": 7.3986641110909975e-06, |
| "loss": 0.4248, |
| "step": 470 |
| }, |
| { |
| "epoch": 0.817797069994574, |
| "grad_norm": 0.1388192439723675, |
| "learning_rate": 7.385349631230102e-06, |
| "loss": 0.4229, |
| "step": 471 |
| }, |
| { |
| "epoch": 0.8195333695062398, |
| "grad_norm": 0.14822306068748878, |
| "learning_rate": 7.372013216656715e-06, |
| "loss": 0.4241, |
| "step": 472 |
| }, |
| { |
| "epoch": 0.8212696690179055, |
| "grad_norm": 0.1383157739435485, |
| "learning_rate": 7.358654990007123e-06, |
| "loss": 0.4315, |
| "step": 473 |
| }, |
| { |
| "epoch": 0.8230059685295713, |
| "grad_norm": 0.16449865250363682, |
| "learning_rate": 7.3452750741181855e-06, |
| "loss": 0.4391, |
| "step": 474 |
| }, |
| { |
| "epoch": 0.8247422680412371, |
| "grad_norm": 0.14207031927514444, |
| "learning_rate": 7.331873592026212e-06, |
| "loss": 0.4232, |
| "step": 475 |
| }, |
| { |
| "epoch": 0.8264785675529028, |
| "grad_norm": 0.14589681162317597, |
| "learning_rate": 7.31845066696582e-06, |
| "loss": 0.4167, |
| "step": 476 |
| }, |
| { |
| "epoch": 0.8282148670645686, |
| "grad_norm": 0.1422796225146136, |
| "learning_rate": 7.305006422368811e-06, |
| "loss": 0.4309, |
| "step": 477 |
| }, |
| { |
| "epoch": 0.8299511665762344, |
| "grad_norm": 0.13818720279721158, |
| "learning_rate": 7.291540981863034e-06, |
| "loss": 0.4283, |
| "step": 478 |
| }, |
| { |
| "epoch": 0.8316874660879001, |
| "grad_norm": 0.14285698185091358, |
| "learning_rate": 7.278054469271245e-06, |
| "loss": 0.4273, |
| "step": 479 |
| }, |
| { |
| "epoch": 0.8334237655995659, |
| "grad_norm": 0.13966266346160003, |
| "learning_rate": 7.26454700860997e-06, |
| "loss": 0.4367, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.8351600651112316, |
| "grad_norm": 0.1331588540524562, |
| "learning_rate": 7.251018724088367e-06, |
| "loss": 0.431, |
| "step": 481 |
| }, |
| { |
| "epoch": 0.8368963646228974, |
| "grad_norm": 0.1425328854150848, |
| "learning_rate": 7.237469740107078e-06, |
| "loss": 0.4335, |
| "step": 482 |
| }, |
| { |
| "epoch": 0.8386326641345632, |
| "grad_norm": 0.13148396093005682, |
| "learning_rate": 7.223900181257094e-06, |
| "loss": 0.3987, |
| "step": 483 |
| }, |
| { |
| "epoch": 0.8403689636462289, |
| "grad_norm": 0.13046232046429795, |
| "learning_rate": 7.2103101723186e-06, |
| "loss": 0.4059, |
| "step": 484 |
| }, |
| { |
| "epoch": 0.8421052631578947, |
| "grad_norm": 0.1443365626310558, |
| "learning_rate": 7.196699838259834e-06, |
| "loss": 0.4386, |
| "step": 485 |
| }, |
| { |
| "epoch": 0.8438415626695605, |
| "grad_norm": 0.14089367813530887, |
| "learning_rate": 7.183069304235935e-06, |
| "loss": 0.4219, |
| "step": 486 |
| }, |
| { |
| "epoch": 0.8455778621812262, |
| "grad_norm": 0.13167205606546367, |
| "learning_rate": 7.169418695587791e-06, |
| "loss": 0.4174, |
| "step": 487 |
| }, |
| { |
| "epoch": 0.847314161692892, |
| "grad_norm": 0.13933768890571246, |
| "learning_rate": 7.155748137840892e-06, |
| "loss": 0.4278, |
| "step": 488 |
| }, |
| { |
| "epoch": 0.8490504612045577, |
| "grad_norm": 0.12724653360437108, |
| "learning_rate": 7.142057756704168e-06, |
| "loss": 0.4293, |
| "step": 489 |
| }, |
| { |
| "epoch": 0.8507867607162235, |
| "grad_norm": 0.13861402433503245, |
| "learning_rate": 7.128347678068841e-06, |
| "loss": 0.4304, |
| "step": 490 |
| }, |
| { |
| "epoch": 0.8525230602278893, |
| "grad_norm": 0.1420863969782023, |
| "learning_rate": 7.1146180280072584e-06, |
| "loss": 0.4284, |
| "step": 491 |
| }, |
| { |
| "epoch": 0.854259359739555, |
| "grad_norm": 0.14172250449900636, |
| "learning_rate": 7.100868932771741e-06, |
| "loss": 0.4461, |
| "step": 492 |
| }, |
| { |
| "epoch": 0.8559956592512208, |
| "grad_norm": 0.16114613280891626, |
| "learning_rate": 7.087100518793421e-06, |
| "loss": 0.4233, |
| "step": 493 |
| }, |
| { |
| "epoch": 0.8577319587628865, |
| "grad_norm": 0.1471442217924641, |
| "learning_rate": 7.073312912681074e-06, |
| "loss": 0.4236, |
| "step": 494 |
| }, |
| { |
| "epoch": 0.8594682582745523, |
| "grad_norm": 0.14412078891445385, |
| "learning_rate": 7.059506241219964e-06, |
| "loss": 0.4331, |
| "step": 495 |
| }, |
| { |
| "epoch": 0.8612045577862181, |
| "grad_norm": 0.14590273185782915, |
| "learning_rate": 7.045680631370668e-06, |
| "loss": 0.409, |
| "step": 496 |
| }, |
| { |
| "epoch": 0.8629408572978838, |
| "grad_norm": 0.15307053625347286, |
| "learning_rate": 7.031836210267915e-06, |
| "loss": 0.4255, |
| "step": 497 |
| }, |
| { |
| "epoch": 0.8646771568095496, |
| "grad_norm": 0.15633948129523692, |
| "learning_rate": 7.0179731052194134e-06, |
| "loss": 0.4205, |
| "step": 498 |
| }, |
| { |
| "epoch": 0.8664134563212154, |
| "grad_norm": 0.1569427893027917, |
| "learning_rate": 7.004091443704681e-06, |
| "loss": 0.438, |
| "step": 499 |
| }, |
| { |
| "epoch": 0.8681497558328811, |
| "grad_norm": 0.14493419488810694, |
| "learning_rate": 6.990191353373876e-06, |
| "loss": 0.432, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.8698860553445469, |
| "grad_norm": 0.13166025057509068, |
| "learning_rate": 6.976272962046619e-06, |
| "loss": 0.4263, |
| "step": 501 |
| }, |
| { |
| "epoch": 0.8716223548562126, |
| "grad_norm": 0.1487366571198014, |
| "learning_rate": 6.962336397710819e-06, |
| "loss": 0.4326, |
| "step": 502 |
| }, |
| { |
| "epoch": 0.8733586543678784, |
| "grad_norm": 0.15151379193315279, |
| "learning_rate": 6.948381788521498e-06, |
| "loss": 0.421, |
| "step": 503 |
| }, |
| { |
| "epoch": 0.8750949538795442, |
| "grad_norm": 0.1446369210011889, |
| "learning_rate": 6.9344092627996075e-06, |
| "loss": 0.4357, |
| "step": 504 |
| }, |
| { |
| "epoch": 0.8768312533912099, |
| "grad_norm": 0.13526985128108113, |
| "learning_rate": 6.920418949030856e-06, |
| "loss": 0.4226, |
| "step": 505 |
| }, |
| { |
| "epoch": 0.8785675529028757, |
| "grad_norm": 0.14209678942328663, |
| "learning_rate": 6.906410975864522e-06, |
| "loss": 0.436, |
| "step": 506 |
| }, |
| { |
| "epoch": 0.8803038524145415, |
| "grad_norm": 0.15545453377773016, |
| "learning_rate": 6.892385472112275e-06, |
| "loss": 0.4223, |
| "step": 507 |
| }, |
| { |
| "epoch": 0.8820401519262072, |
| "grad_norm": 0.160449156940923, |
| "learning_rate": 6.878342566746985e-06, |
| "loss": 0.4256, |
| "step": 508 |
| }, |
| { |
| "epoch": 0.883776451437873, |
| "grad_norm": 0.15483085985532163, |
| "learning_rate": 6.864282388901544e-06, |
| "loss": 0.434, |
| "step": 509 |
| }, |
| { |
| "epoch": 0.8855127509495387, |
| "grad_norm": 0.14591180973133458, |
| "learning_rate": 6.85020506786767e-06, |
| "loss": 0.4234, |
| "step": 510 |
| }, |
| { |
| "epoch": 0.8872490504612045, |
| "grad_norm": 0.13585311221734875, |
| "learning_rate": 6.836110733094728e-06, |
| "loss": 0.4287, |
| "step": 511 |
| }, |
| { |
| "epoch": 0.8889853499728704, |
| "grad_norm": 0.15429791758689432, |
| "learning_rate": 6.821999514188532e-06, |
| "loss": 0.4338, |
| "step": 512 |
| }, |
| { |
| "epoch": 0.8907216494845361, |
| "grad_norm": 0.13417432207738678, |
| "learning_rate": 6.807871540910155e-06, |
| "loss": 0.4147, |
| "step": 513 |
| }, |
| { |
| "epoch": 0.8924579489962019, |
| "grad_norm": 0.12318186174299123, |
| "learning_rate": 6.793726943174737e-06, |
| "loss": 0.4155, |
| "step": 514 |
| }, |
| { |
| "epoch": 0.8941942485078677, |
| "grad_norm": 0.14032720007338734, |
| "learning_rate": 6.779565851050292e-06, |
| "loss": 0.406, |
| "step": 515 |
| }, |
| { |
| "epoch": 0.8959305480195334, |
| "grad_norm": 0.1604151317342483, |
| "learning_rate": 6.765388394756504e-06, |
| "loss": 0.448, |
| "step": 516 |
| }, |
| { |
| "epoch": 0.8976668475311992, |
| "grad_norm": 0.13753780470400695, |
| "learning_rate": 6.751194704663544e-06, |
| "loss": 0.4327, |
| "step": 517 |
| }, |
| { |
| "epoch": 0.899403147042865, |
| "grad_norm": 0.16004678522601554, |
| "learning_rate": 6.736984911290853e-06, |
| "loss": 0.3995, |
| "step": 518 |
| }, |
| { |
| "epoch": 0.9011394465545307, |
| "grad_norm": 0.13803186927904562, |
| "learning_rate": 6.722759145305959e-06, |
| "loss": 0.431, |
| "step": 519 |
| }, |
| { |
| "epoch": 0.9028757460661965, |
| "grad_norm": 0.15500469621691793, |
| "learning_rate": 6.708517537523264e-06, |
| "loss": 0.4391, |
| "step": 520 |
| }, |
| { |
| "epoch": 0.9046120455778622, |
| "grad_norm": 0.15834488288721668, |
| "learning_rate": 6.694260218902845e-06, |
| "loss": 0.4096, |
| "step": 521 |
| }, |
| { |
| "epoch": 0.906348345089528, |
| "grad_norm": 0.14874399244800696, |
| "learning_rate": 6.6799873205492485e-06, |
| "loss": 0.4286, |
| "step": 522 |
| }, |
| { |
| "epoch": 0.9080846446011938, |
| "grad_norm": 0.12429177101740557, |
| "learning_rate": 6.665698973710289e-06, |
| "loss": 0.4072, |
| "step": 523 |
| }, |
| { |
| "epoch": 0.9098209441128595, |
| "grad_norm": 0.13519014730031864, |
| "learning_rate": 6.651395309775837e-06, |
| "loss": 0.4153, |
| "step": 524 |
| }, |
| { |
| "epoch": 0.9115572436245253, |
| "grad_norm": 0.16632287295859693, |
| "learning_rate": 6.637076460276612e-06, |
| "loss": 0.4248, |
| "step": 525 |
| }, |
| { |
| "epoch": 0.913293543136191, |
| "grad_norm": 0.17329750345670855, |
| "learning_rate": 6.622742556882976e-06, |
| "loss": 0.4352, |
| "step": 526 |
| }, |
| { |
| "epoch": 0.9150298426478568, |
| "grad_norm": 0.12671659925618628, |
| "learning_rate": 6.608393731403721e-06, |
| "loss": 0.4062, |
| "step": 527 |
| }, |
| { |
| "epoch": 0.9167661421595226, |
| "grad_norm": 0.14740755648196183, |
| "learning_rate": 6.5940301157848505e-06, |
| "loss": 0.4168, |
| "step": 528 |
| }, |
| { |
| "epoch": 0.9185024416711883, |
| "grad_norm": 0.14660381330269254, |
| "learning_rate": 6.579651842108381e-06, |
| "loss": 0.4154, |
| "step": 529 |
| }, |
| { |
| "epoch": 0.9202387411828541, |
| "grad_norm": 0.13079068927881393, |
| "learning_rate": 6.565259042591112e-06, |
| "loss": 0.4068, |
| "step": 530 |
| }, |
| { |
| "epoch": 0.9219750406945199, |
| "grad_norm": 0.16047124875668828, |
| "learning_rate": 6.5508518495834214e-06, |
| "loss": 0.447, |
| "step": 531 |
| }, |
| { |
| "epoch": 0.9237113402061856, |
| "grad_norm": 0.16056398869904115, |
| "learning_rate": 6.536430395568037e-06, |
| "loss": 0.4402, |
| "step": 532 |
| }, |
| { |
| "epoch": 0.9254476397178514, |
| "grad_norm": 0.14379051689279518, |
| "learning_rate": 6.521994813158834e-06, |
| "loss": 0.4255, |
| "step": 533 |
| }, |
| { |
| "epoch": 0.9271839392295171, |
| "grad_norm": 0.14092695024536514, |
| "learning_rate": 6.507545235099601e-06, |
| "loss": 0.4318, |
| "step": 534 |
| }, |
| { |
| "epoch": 0.9289202387411829, |
| "grad_norm": 0.12525720188556722, |
| "learning_rate": 6.493081794262823e-06, |
| "loss": 0.4099, |
| "step": 535 |
| }, |
| { |
| "epoch": 0.9306565382528487, |
| "grad_norm": 0.13733388828680687, |
| "learning_rate": 6.478604623648468e-06, |
| "loss": 0.4562, |
| "step": 536 |
| }, |
| { |
| "epoch": 0.9323928377645144, |
| "grad_norm": 0.15169527782692627, |
| "learning_rate": 6.464113856382752e-06, |
| "loss": 0.4388, |
| "step": 537 |
| }, |
| { |
| "epoch": 0.9341291372761802, |
| "grad_norm": 0.15659833497207304, |
| "learning_rate": 6.449609625716924e-06, |
| "loss": 0.4385, |
| "step": 538 |
| }, |
| { |
| "epoch": 0.935865436787846, |
| "grad_norm": 0.14056083766486266, |
| "learning_rate": 6.435092065026035e-06, |
| "loss": 0.4365, |
| "step": 539 |
| }, |
| { |
| "epoch": 0.9376017362995117, |
| "grad_norm": 0.14777123442183437, |
| "learning_rate": 6.420561307807713e-06, |
| "loss": 0.4399, |
| "step": 540 |
| }, |
| { |
| "epoch": 0.9393380358111775, |
| "grad_norm": 0.1638663833379513, |
| "learning_rate": 6.406017487680938e-06, |
| "loss": 0.4607, |
| "step": 541 |
| }, |
| { |
| "epoch": 0.9410743353228432, |
| "grad_norm": 0.1729654923685193, |
| "learning_rate": 6.391460738384808e-06, |
| "loss": 0.436, |
| "step": 542 |
| }, |
| { |
| "epoch": 0.942810634834509, |
| "grad_norm": 0.14203433714494784, |
| "learning_rate": 6.376891193777317e-06, |
| "loss": 0.4491, |
| "step": 543 |
| }, |
| { |
| "epoch": 0.9445469343461748, |
| "grad_norm": 0.13595845029555445, |
| "learning_rate": 6.3623089878341146e-06, |
| "loss": 0.4246, |
| "step": 544 |
| }, |
| { |
| "epoch": 0.9462832338578405, |
| "grad_norm": 0.14926991946937673, |
| "learning_rate": 6.3477142546472836e-06, |
| "loss": 0.4307, |
| "step": 545 |
| }, |
| { |
| "epoch": 0.9480195333695063, |
| "grad_norm": 0.14848175859544074, |
| "learning_rate": 6.333107128424098e-06, |
| "loss": 0.4285, |
| "step": 546 |
| }, |
| { |
| "epoch": 0.949755832881172, |
| "grad_norm": 0.13823091677225982, |
| "learning_rate": 6.318487743485797e-06, |
| "loss": 0.4129, |
| "step": 547 |
| }, |
| { |
| "epoch": 0.9514921323928378, |
| "grad_norm": 0.14290107135288815, |
| "learning_rate": 6.303856234266344e-06, |
| "loss": 0.4269, |
| "step": 548 |
| }, |
| { |
| "epoch": 0.9532284319045036, |
| "grad_norm": 0.15407365570533135, |
| "learning_rate": 6.28921273531119e-06, |
| "loss": 0.4332, |
| "step": 549 |
| }, |
| { |
| "epoch": 0.9549647314161693, |
| "grad_norm": 0.1478352145654114, |
| "learning_rate": 6.274557381276045e-06, |
| "loss": 0.4278, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.9567010309278351, |
| "grad_norm": 0.13620053200124488, |
| "learning_rate": 6.259890306925627e-06, |
| "loss": 0.425, |
| "step": 551 |
| }, |
| { |
| "epoch": 0.9584373304395009, |
| "grad_norm": 0.14784390477062037, |
| "learning_rate": 6.245211647132433e-06, |
| "loss": 0.4354, |
| "step": 552 |
| }, |
| { |
| "epoch": 0.9601736299511666, |
| "grad_norm": 0.13789859062305407, |
| "learning_rate": 6.230521536875494e-06, |
| "loss": 0.4119, |
| "step": 553 |
| }, |
| { |
| "epoch": 0.9619099294628324, |
| "grad_norm": 0.13714602492626404, |
| "learning_rate": 6.215820111239137e-06, |
| "loss": 0.4407, |
| "step": 554 |
| }, |
| { |
| "epoch": 0.9636462289744981, |
| "grad_norm": 0.1352836476917812, |
| "learning_rate": 6.201107505411736e-06, |
| "loss": 0.4262, |
| "step": 555 |
| }, |
| { |
| "epoch": 0.9653825284861639, |
| "grad_norm": 0.14817555775463528, |
| "learning_rate": 6.186383854684479e-06, |
| "loss": 0.4263, |
| "step": 556 |
| }, |
| { |
| "epoch": 0.9671188279978297, |
| "grad_norm": 0.13028353307404736, |
| "learning_rate": 6.171649294450113e-06, |
| "loss": 0.4369, |
| "step": 557 |
| }, |
| { |
| "epoch": 0.9688551275094954, |
| "grad_norm": 0.129005775473133, |
| "learning_rate": 6.156903960201709e-06, |
| "loss": 0.4037, |
| "step": 558 |
| }, |
| { |
| "epoch": 0.9705914270211612, |
| "grad_norm": 0.14131727755712598, |
| "learning_rate": 6.142147987531407e-06, |
| "loss": 0.4363, |
| "step": 559 |
| }, |
| { |
| "epoch": 0.972327726532827, |
| "grad_norm": 0.1492506986288016, |
| "learning_rate": 6.12738151212918e-06, |
| "loss": 0.4313, |
| "step": 560 |
| }, |
| { |
| "epoch": 0.9740640260444927, |
| "grad_norm": 0.14245819902062826, |
| "learning_rate": 6.112604669781572e-06, |
| "loss": 0.4424, |
| "step": 561 |
| }, |
| { |
| "epoch": 0.9758003255561585, |
| "grad_norm": 0.1443083695557364, |
| "learning_rate": 6.097817596370465e-06, |
| "loss": 0.4295, |
| "step": 562 |
| }, |
| { |
| "epoch": 0.9775366250678242, |
| "grad_norm": 0.13360371562359302, |
| "learning_rate": 6.083020427871818e-06, |
| "loss": 0.4347, |
| "step": 563 |
| }, |
| { |
| "epoch": 0.97927292457949, |
| "grad_norm": 0.1374915267856843, |
| "learning_rate": 6.0682133003544165e-06, |
| "loss": 0.4355, |
| "step": 564 |
| }, |
| { |
| "epoch": 0.9810092240911558, |
| "grad_norm": 0.1392477071475812, |
| "learning_rate": 6.053396349978632e-06, |
| "loss": 0.4294, |
| "step": 565 |
| }, |
| { |
| "epoch": 0.9827455236028215, |
| "grad_norm": 0.14836695769751443, |
| "learning_rate": 6.038569712995161e-06, |
| "loss": 0.4125, |
| "step": 566 |
| }, |
| { |
| "epoch": 0.9844818231144873, |
| "grad_norm": 0.14807766260570912, |
| "learning_rate": 6.02373352574377e-06, |
| "loss": 0.4117, |
| "step": 567 |
| }, |
| { |
| "epoch": 0.986218122626153, |
| "grad_norm": 0.1383315377874369, |
| "learning_rate": 6.008887924652053e-06, |
| "loss": 0.4412, |
| "step": 568 |
| }, |
| { |
| "epoch": 0.9879544221378188, |
| "grad_norm": 0.14253538031780663, |
| "learning_rate": 5.994033046234163e-06, |
| "loss": 0.4326, |
| "step": 569 |
| }, |
| { |
| "epoch": 0.9896907216494846, |
| "grad_norm": 0.14622116436494234, |
| "learning_rate": 5.979169027089568e-06, |
| "loss": 0.4067, |
| "step": 570 |
| }, |
| { |
| "epoch": 0.9914270211611503, |
| "grad_norm": 0.14488845841033232, |
| "learning_rate": 5.9642960039017875e-06, |
| "loss": 0.4374, |
| "step": 571 |
| }, |
| { |
| "epoch": 0.9931633206728161, |
| "grad_norm": 0.1362868479844263, |
| "learning_rate": 5.949414113437142e-06, |
| "loss": 0.4358, |
| "step": 572 |
| }, |
| { |
| "epoch": 0.9948996201844819, |
| "grad_norm": 0.1383976044726672, |
| "learning_rate": 5.934523492543489e-06, |
| "loss": 0.4185, |
| "step": 573 |
| }, |
| { |
| "epoch": 0.9966359196961476, |
| "grad_norm": 0.13345431171412198, |
| "learning_rate": 5.919624278148969e-06, |
| "loss": 0.4216, |
| "step": 574 |
| }, |
| { |
| "epoch": 0.9983722192078134, |
| "grad_norm": 0.13508419957425205, |
| "learning_rate": 5.904716607260743e-06, |
| "loss": 0.4113, |
| "step": 575 |
| }, |
| { |
| "epoch": 1.0, |
| "grad_norm": 0.14703530488547564, |
| "learning_rate": 5.889800616963738e-06, |
| "loss": 0.4309, |
| "step": 576 |
| }, |
| { |
| "epoch": 1.0017362995116659, |
| "grad_norm": 0.13820635061977254, |
| "learning_rate": 5.874876444419377e-06, |
| "loss": 0.4281, |
| "step": 577 |
| }, |
| { |
| "epoch": 1.0034725990233315, |
| "grad_norm": 0.1399834662944234, |
| "learning_rate": 5.8599442268643325e-06, |
| "loss": 0.4093, |
| "step": 578 |
| }, |
| { |
| "epoch": 1.0052088985349974, |
| "grad_norm": 0.14740466502395172, |
| "learning_rate": 5.8450041016092465e-06, |
| "loss": 0.4362, |
| "step": 579 |
| }, |
| { |
| "epoch": 1.006945198046663, |
| "grad_norm": 0.14722268227038388, |
| "learning_rate": 5.830056206037482e-06, |
| "loss": 0.4148, |
| "step": 580 |
| }, |
| { |
| "epoch": 1.008681497558329, |
| "grad_norm": 0.1489173191512877, |
| "learning_rate": 5.815100677603854e-06, |
| "loss": 0.4079, |
| "step": 581 |
| }, |
| { |
| "epoch": 1.0104177970699946, |
| "grad_norm": 0.13701264794437995, |
| "learning_rate": 5.800137653833368e-06, |
| "loss": 0.4104, |
| "step": 582 |
| }, |
| { |
| "epoch": 1.0121540965816604, |
| "grad_norm": 0.14149209374235472, |
| "learning_rate": 5.785167272319948e-06, |
| "loss": 0.4143, |
| "step": 583 |
| }, |
| { |
| "epoch": 1.013890396093326, |
| "grad_norm": 0.13608710059669457, |
| "learning_rate": 5.7701896707251824e-06, |
| "loss": 0.4133, |
| "step": 584 |
| }, |
| { |
| "epoch": 1.015626695604992, |
| "grad_norm": 0.127667185519214, |
| "learning_rate": 5.75520498677705e-06, |
| "loss": 0.3983, |
| "step": 585 |
| }, |
| { |
| "epoch": 1.0173629951166576, |
| "grad_norm": 0.14952422157976003, |
| "learning_rate": 5.740213358268658e-06, |
| "loss": 0.4063, |
| "step": 586 |
| }, |
| { |
| "epoch": 1.0190992946283235, |
| "grad_norm": 0.17330839118924732, |
| "learning_rate": 5.72521492305697e-06, |
| "loss": 0.4265, |
| "step": 587 |
| }, |
| { |
| "epoch": 1.0208355941399891, |
| "grad_norm": 0.1369542169535847, |
| "learning_rate": 5.710209819061544e-06, |
| "loss": 0.4085, |
| "step": 588 |
| }, |
| { |
| "epoch": 1.022571893651655, |
| "grad_norm": 0.14021766398680688, |
| "learning_rate": 5.695198184263259e-06, |
| "loss": 0.402, |
| "step": 589 |
| }, |
| { |
| "epoch": 1.0243081931633207, |
| "grad_norm": 0.1273275336014157, |
| "learning_rate": 5.680180156703052e-06, |
| "loss": 0.4158, |
| "step": 590 |
| }, |
| { |
| "epoch": 1.0260444926749865, |
| "grad_norm": 0.1447814022061172, |
| "learning_rate": 5.665155874480639e-06, |
| "loss": 0.402, |
| "step": 591 |
| }, |
| { |
| "epoch": 1.0277807921866522, |
| "grad_norm": 0.13493488857954397, |
| "learning_rate": 5.65012547575326e-06, |
| "loss": 0.4124, |
| "step": 592 |
| }, |
| { |
| "epoch": 1.029517091698318, |
| "grad_norm": 0.1419298182476474, |
| "learning_rate": 5.635089098734394e-06, |
| "loss": 0.4328, |
| "step": 593 |
| }, |
| { |
| "epoch": 1.0312533912099837, |
| "grad_norm": 0.1259898887932693, |
| "learning_rate": 5.620046881692496e-06, |
| "loss": 0.4082, |
| "step": 594 |
| }, |
| { |
| "epoch": 1.0329896907216496, |
| "grad_norm": 0.14566690869149043, |
| "learning_rate": 5.604998962949721e-06, |
| "loss": 0.4082, |
| "step": 595 |
| }, |
| { |
| "epoch": 1.0347259902333152, |
| "grad_norm": 0.1437692130612652, |
| "learning_rate": 5.5899454808806604e-06, |
| "loss": 0.4336, |
| "step": 596 |
| }, |
| { |
| "epoch": 1.0364622897449811, |
| "grad_norm": 0.13609460091136136, |
| "learning_rate": 5.574886573911056e-06, |
| "loss": 0.4329, |
| "step": 597 |
| }, |
| { |
| "epoch": 1.0381985892566468, |
| "grad_norm": 0.13077463591633914, |
| "learning_rate": 5.559822380516539e-06, |
| "loss": 0.397, |
| "step": 598 |
| }, |
| { |
| "epoch": 1.0399348887683126, |
| "grad_norm": 0.14243098008221436, |
| "learning_rate": 5.5447530392213545e-06, |
| "loss": 0.4047, |
| "step": 599 |
| }, |
| { |
| "epoch": 1.0416711882799783, |
| "grad_norm": 0.1344847481406696, |
| "learning_rate": 5.529678688597081e-06, |
| "loss": 0.4043, |
| "step": 600 |
| }, |
| { |
| "epoch": 1.0434074877916442, |
| "grad_norm": 0.14785465016750507, |
| "learning_rate": 5.514599467261363e-06, |
| "loss": 0.4023, |
| "step": 601 |
| }, |
| { |
| "epoch": 1.0451437873033098, |
| "grad_norm": 0.14094496078005248, |
| "learning_rate": 5.4995155138766345e-06, |
| "loss": 0.4009, |
| "step": 602 |
| }, |
| { |
| "epoch": 1.0468800868149757, |
| "grad_norm": 0.12656563162402626, |
| "learning_rate": 5.484426967148843e-06, |
| "loss": 0.4124, |
| "step": 603 |
| }, |
| { |
| "epoch": 1.0486163863266413, |
| "grad_norm": 0.14436518143142452, |
| "learning_rate": 5.469333965826174e-06, |
| "loss": 0.4091, |
| "step": 604 |
| }, |
| { |
| "epoch": 1.0503526858383072, |
| "grad_norm": 0.1266094695639352, |
| "learning_rate": 5.454236648697776e-06, |
| "loss": 0.4238, |
| "step": 605 |
| }, |
| { |
| "epoch": 1.0520889853499729, |
| "grad_norm": 0.1312935404022273, |
| "learning_rate": 5.439135154592486e-06, |
| "loss": 0.4017, |
| "step": 606 |
| }, |
| { |
| "epoch": 1.0538252848616387, |
| "grad_norm": 0.14228976847722097, |
| "learning_rate": 5.4240296223775465e-06, |
| "loss": 0.4155, |
| "step": 607 |
| }, |
| { |
| "epoch": 1.0555615843733044, |
| "grad_norm": 0.12839472792831685, |
| "learning_rate": 5.4089201909573376e-06, |
| "loss": 0.3921, |
| "step": 608 |
| }, |
| { |
| "epoch": 1.0572978838849703, |
| "grad_norm": 0.12265551035079586, |
| "learning_rate": 5.3938069992720894e-06, |
| "loss": 0.3843, |
| "step": 609 |
| }, |
| { |
| "epoch": 1.059034183396636, |
| "grad_norm": 0.12358555247985234, |
| "learning_rate": 5.378690186296617e-06, |
| "loss": 0.403, |
| "step": 610 |
| }, |
| { |
| "epoch": 1.0607704829083018, |
| "grad_norm": 0.13923611661731283, |
| "learning_rate": 5.363569891039027e-06, |
| "loss": 0.4303, |
| "step": 611 |
| }, |
| { |
| "epoch": 1.0625067824199674, |
| "grad_norm": 0.1466000832580553, |
| "learning_rate": 5.348446252539457e-06, |
| "loss": 0.4136, |
| "step": 612 |
| }, |
| { |
| "epoch": 1.0642430819316333, |
| "grad_norm": 0.13908426064129698, |
| "learning_rate": 5.333319409868777e-06, |
| "loss": 0.4287, |
| "step": 613 |
| }, |
| { |
| "epoch": 1.065979381443299, |
| "grad_norm": 0.1268183562981206, |
| "learning_rate": 5.318189502127332e-06, |
| "loss": 0.3916, |
| "step": 614 |
| }, |
| { |
| "epoch": 1.0677156809549648, |
| "grad_norm": 0.13537891316641507, |
| "learning_rate": 5.303056668443645e-06, |
| "loss": 0.4132, |
| "step": 615 |
| }, |
| { |
| "epoch": 1.0694519804666305, |
| "grad_norm": 0.12929905525099036, |
| "learning_rate": 5.287921047973149e-06, |
| "loss": 0.4098, |
| "step": 616 |
| }, |
| { |
| "epoch": 1.0711882799782964, |
| "grad_norm": 0.13243953145888157, |
| "learning_rate": 5.272782779896898e-06, |
| "loss": 0.4243, |
| "step": 617 |
| }, |
| { |
| "epoch": 1.072924579489962, |
| "grad_norm": 0.1399328685213442, |
| "learning_rate": 5.257642003420298e-06, |
| "loss": 0.4159, |
| "step": 618 |
| }, |
| { |
| "epoch": 1.0746608790016279, |
| "grad_norm": 0.13181582787899612, |
| "learning_rate": 5.242498857771816e-06, |
| "loss": 0.4061, |
| "step": 619 |
| }, |
| { |
| "epoch": 1.0763971785132935, |
| "grad_norm": 0.15243301626899763, |
| "learning_rate": 5.2273534822017105e-06, |
| "loss": 0.4104, |
| "step": 620 |
| }, |
| { |
| "epoch": 1.0781334780249594, |
| "grad_norm": 0.13165320866583394, |
| "learning_rate": 5.212206015980742e-06, |
| "loss": 0.3981, |
| "step": 621 |
| }, |
| { |
| "epoch": 1.079869777536625, |
| "grad_norm": 0.14715452781968524, |
| "learning_rate": 5.197056598398897e-06, |
| "loss": 0.4168, |
| "step": 622 |
| }, |
| { |
| "epoch": 1.081606077048291, |
| "grad_norm": 0.13599260807794022, |
| "learning_rate": 5.181905368764102e-06, |
| "loss": 0.4326, |
| "step": 623 |
| }, |
| { |
| "epoch": 1.0833423765599566, |
| "grad_norm": 0.14589346570188805, |
| "learning_rate": 5.166752466400954e-06, |
| "loss": 0.4112, |
| "step": 624 |
| }, |
| { |
| "epoch": 1.0850786760716224, |
| "grad_norm": 0.12995220413381026, |
| "learning_rate": 5.151598030649425e-06, |
| "loss": 0.3986, |
| "step": 625 |
| }, |
| { |
| "epoch": 1.086814975583288, |
| "grad_norm": 0.1329355849474862, |
| "learning_rate": 5.13644220086359e-06, |
| "loss": 0.4012, |
| "step": 626 |
| }, |
| { |
| "epoch": 1.088551275094954, |
| "grad_norm": 0.13589628456048708, |
| "learning_rate": 5.121285116410344e-06, |
| "loss": 0.4008, |
| "step": 627 |
| }, |
| { |
| "epoch": 1.0902875746066196, |
| "grad_norm": 0.14045644426905074, |
| "learning_rate": 5.106126916668118e-06, |
| "loss": 0.4111, |
| "step": 628 |
| }, |
| { |
| "epoch": 1.0920238741182855, |
| "grad_norm": 0.14551962000345375, |
| "learning_rate": 5.090967741025599e-06, |
| "loss": 0.4006, |
| "step": 629 |
| }, |
| { |
| "epoch": 1.0937601736299511, |
| "grad_norm": 0.13705310011406346, |
| "learning_rate": 5.075807728880447e-06, |
| "loss": 0.4281, |
| "step": 630 |
| }, |
| { |
| "epoch": 1.095496473141617, |
| "grad_norm": 0.12740292576758674, |
| "learning_rate": 5.060647019638016e-06, |
| "loss": 0.4007, |
| "step": 631 |
| }, |
| { |
| "epoch": 1.0972327726532827, |
| "grad_norm": 0.12099875797969191, |
| "learning_rate": 5.04548575271007e-06, |
| "loss": 0.415, |
| "step": 632 |
| }, |
| { |
| "epoch": 1.0989690721649485, |
| "grad_norm": 0.13398256820011353, |
| "learning_rate": 5.030324067513499e-06, |
| "loss": 0.3973, |
| "step": 633 |
| }, |
| { |
| "epoch": 1.1007053716766142, |
| "grad_norm": 0.1348779537529425, |
| "learning_rate": 5.015162103469042e-06, |
| "loss": 0.4065, |
| "step": 634 |
| }, |
| { |
| "epoch": 1.10244167118828, |
| "grad_norm": 0.12678399726307263, |
| "learning_rate": 5e-06, |
| "loss": 0.4141, |
| "step": 635 |
| }, |
| { |
| "epoch": 1.1041779706999457, |
| "grad_norm": 0.13577535863115883, |
| "learning_rate": 4.984837896530959e-06, |
| "loss": 0.4109, |
| "step": 636 |
| }, |
| { |
| "epoch": 1.1059142702116116, |
| "grad_norm": 0.13470321160616094, |
| "learning_rate": 4.969675932486503e-06, |
| "loss": 0.4086, |
| "step": 637 |
| }, |
| { |
| "epoch": 1.1076505697232772, |
| "grad_norm": 0.14286962783803234, |
| "learning_rate": 4.954514247289931e-06, |
| "loss": 0.403, |
| "step": 638 |
| }, |
| { |
| "epoch": 1.1093868692349431, |
| "grad_norm": 0.12644018098118473, |
| "learning_rate": 4.939352980361985e-06, |
| "loss": 0.4201, |
| "step": 639 |
| }, |
| { |
| "epoch": 1.1111231687466088, |
| "grad_norm": 0.1325504479364434, |
| "learning_rate": 4.924192271119554e-06, |
| "loss": 0.4013, |
| "step": 640 |
| }, |
| { |
| "epoch": 1.1128594682582746, |
| "grad_norm": 0.1291532073174894, |
| "learning_rate": 4.909032258974403e-06, |
| "loss": 0.4086, |
| "step": 641 |
| }, |
| { |
| "epoch": 1.1145957677699403, |
| "grad_norm": 0.13268107816932617, |
| "learning_rate": 4.8938730833318825e-06, |
| "loss": 0.4229, |
| "step": 642 |
| }, |
| { |
| "epoch": 1.1163320672816062, |
| "grad_norm": 0.14551167466783432, |
| "learning_rate": 4.878714883589657e-06, |
| "loss": 0.4049, |
| "step": 643 |
| }, |
| { |
| "epoch": 1.1180683667932718, |
| "grad_norm": 0.1458795187912891, |
| "learning_rate": 4.863557799136411e-06, |
| "loss": 0.4252, |
| "step": 644 |
| }, |
| { |
| "epoch": 1.1198046663049377, |
| "grad_norm": 0.16140943317028875, |
| "learning_rate": 4.848401969350577e-06, |
| "loss": 0.4115, |
| "step": 645 |
| }, |
| { |
| "epoch": 1.1215409658166033, |
| "grad_norm": 0.17325457371109407, |
| "learning_rate": 4.833247533599047e-06, |
| "loss": 0.4121, |
| "step": 646 |
| }, |
| { |
| "epoch": 1.1232772653282692, |
| "grad_norm": 0.13364198145727132, |
| "learning_rate": 4.8180946312359e-06, |
| "loss": 0.4039, |
| "step": 647 |
| }, |
| { |
| "epoch": 1.1250135648399349, |
| "grad_norm": 0.1281682816584959, |
| "learning_rate": 4.802943401601105e-06, |
| "loss": 0.4182, |
| "step": 648 |
| }, |
| { |
| "epoch": 1.1267498643516007, |
| "grad_norm": 0.14174013597508733, |
| "learning_rate": 4.78779398401926e-06, |
| "loss": 0.4263, |
| "step": 649 |
| }, |
| { |
| "epoch": 1.1284861638632664, |
| "grad_norm": 0.13774912972454795, |
| "learning_rate": 4.77264651779829e-06, |
| "loss": 0.4421, |
| "step": 650 |
| }, |
| { |
| "epoch": 1.1302224633749323, |
| "grad_norm": 0.14207068241290607, |
| "learning_rate": 4.757501142228186e-06, |
| "loss": 0.4089, |
| "step": 651 |
| }, |
| { |
| "epoch": 1.131958762886598, |
| "grad_norm": 0.12903740995178237, |
| "learning_rate": 4.742357996579704e-06, |
| "loss": 0.4163, |
| "step": 652 |
| }, |
| { |
| "epoch": 1.1336950623982638, |
| "grad_norm": 0.1350820578224657, |
| "learning_rate": 4.7272172201031055e-06, |
| "loss": 0.4041, |
| "step": 653 |
| }, |
| { |
| "epoch": 1.1354313619099294, |
| "grad_norm": 0.1329350427266318, |
| "learning_rate": 4.712078952026853e-06, |
| "loss": 0.4035, |
| "step": 654 |
| }, |
| { |
| "epoch": 1.1371676614215953, |
| "grad_norm": 0.14408458515913283, |
| "learning_rate": 4.696943331556357e-06, |
| "loss": 0.4153, |
| "step": 655 |
| }, |
| { |
| "epoch": 1.138903960933261, |
| "grad_norm": 0.1382968249352353, |
| "learning_rate": 4.6818104978726685e-06, |
| "loss": 0.4167, |
| "step": 656 |
| }, |
| { |
| "epoch": 1.1406402604449268, |
| "grad_norm": 0.1337490824001829, |
| "learning_rate": 4.666680590131225e-06, |
| "loss": 0.4021, |
| "step": 657 |
| }, |
| { |
| "epoch": 1.1423765599565925, |
| "grad_norm": 0.13624081308831007, |
| "learning_rate": 4.651553747460545e-06, |
| "loss": 0.4008, |
| "step": 658 |
| }, |
| { |
| "epoch": 1.1441128594682584, |
| "grad_norm": 0.1301703680610189, |
| "learning_rate": 4.6364301089609755e-06, |
| "loss": 0.4201, |
| "step": 659 |
| }, |
| { |
| "epoch": 1.145849158979924, |
| "grad_norm": 0.12869809645079283, |
| "learning_rate": 4.621309813703385e-06, |
| "loss": 0.4221, |
| "step": 660 |
| }, |
| { |
| "epoch": 1.1475854584915899, |
| "grad_norm": 0.15594955604265287, |
| "learning_rate": 4.606193000727913e-06, |
| "loss": 0.423, |
| "step": 661 |
| }, |
| { |
| "epoch": 1.1493217580032555, |
| "grad_norm": 0.14731027076680392, |
| "learning_rate": 4.591079809042664e-06, |
| "loss": 0.4244, |
| "step": 662 |
| }, |
| { |
| "epoch": 1.1510580575149214, |
| "grad_norm": 0.13957336559394706, |
| "learning_rate": 4.575970377622456e-06, |
| "loss": 0.4203, |
| "step": 663 |
| }, |
| { |
| "epoch": 1.152794357026587, |
| "grad_norm": 0.11562182093236165, |
| "learning_rate": 4.560864845407515e-06, |
| "loss": 0.4134, |
| "step": 664 |
| }, |
| { |
| "epoch": 1.154530656538253, |
| "grad_norm": 0.12916966436454652, |
| "learning_rate": 4.545763351302224e-06, |
| "loss": 0.4273, |
| "step": 665 |
| }, |
| { |
| "epoch": 1.1562669560499186, |
| "grad_norm": 0.1279961071644105, |
| "learning_rate": 4.530666034173827e-06, |
| "loss": 0.3993, |
| "step": 666 |
| }, |
| { |
| "epoch": 1.1580032555615845, |
| "grad_norm": 0.12327967071387225, |
| "learning_rate": 4.515573032851158e-06, |
| "loss": 0.4015, |
| "step": 667 |
| }, |
| { |
| "epoch": 1.15973955507325, |
| "grad_norm": 0.13127349833439605, |
| "learning_rate": 4.500484486123367e-06, |
| "loss": 0.4069, |
| "step": 668 |
| }, |
| { |
| "epoch": 1.161475854584916, |
| "grad_norm": 0.13059532586105568, |
| "learning_rate": 4.485400532738638e-06, |
| "loss": 0.4166, |
| "step": 669 |
| }, |
| { |
| "epoch": 1.1632121540965816, |
| "grad_norm": 0.12508095784691065, |
| "learning_rate": 4.47032131140292e-06, |
| "loss": 0.4104, |
| "step": 670 |
| }, |
| { |
| "epoch": 1.1649484536082475, |
| "grad_norm": 0.1515418268364036, |
| "learning_rate": 4.455246960778646e-06, |
| "loss": 0.4098, |
| "step": 671 |
| }, |
| { |
| "epoch": 1.1666847531199132, |
| "grad_norm": 0.12573956573460845, |
| "learning_rate": 4.4401776194834615e-06, |
| "loss": 0.3935, |
| "step": 672 |
| }, |
| { |
| "epoch": 1.168421052631579, |
| "grad_norm": 0.14929151845493635, |
| "learning_rate": 4.425113426088945e-06, |
| "loss": 0.4119, |
| "step": 673 |
| }, |
| { |
| "epoch": 1.1701573521432447, |
| "grad_norm": 0.14187289444160336, |
| "learning_rate": 4.410054519119341e-06, |
| "loss": 0.418, |
| "step": 674 |
| }, |
| { |
| "epoch": 1.1718936516549106, |
| "grad_norm": 0.14862945555637808, |
| "learning_rate": 4.395001037050278e-06, |
| "loss": 0.4257, |
| "step": 675 |
| }, |
| { |
| "epoch": 1.1736299511665762, |
| "grad_norm": 0.13996175733356186, |
| "learning_rate": 4.379953118307505e-06, |
| "loss": 0.4003, |
| "step": 676 |
| }, |
| { |
| "epoch": 1.175366250678242, |
| "grad_norm": 0.1255053214493474, |
| "learning_rate": 4.364910901265607e-06, |
| "loss": 0.4241, |
| "step": 677 |
| }, |
| { |
| "epoch": 1.1771025501899077, |
| "grad_norm": 0.13373405771489325, |
| "learning_rate": 4.3498745242467415e-06, |
| "loss": 0.4052, |
| "step": 678 |
| }, |
| { |
| "epoch": 1.1788388497015736, |
| "grad_norm": 0.1300518944364487, |
| "learning_rate": 4.334844125519363e-06, |
| "loss": 0.4237, |
| "step": 679 |
| }, |
| { |
| "epoch": 1.1805751492132392, |
| "grad_norm": 0.13269874204216286, |
| "learning_rate": 4.319819843296952e-06, |
| "loss": 0.4171, |
| "step": 680 |
| }, |
| { |
| "epoch": 1.1823114487249051, |
| "grad_norm": 0.1492214358278627, |
| "learning_rate": 4.3048018157367435e-06, |
| "loss": 0.4084, |
| "step": 681 |
| }, |
| { |
| "epoch": 1.1840477482365708, |
| "grad_norm": 0.1329430046793398, |
| "learning_rate": 4.289790180938459e-06, |
| "loss": 0.4217, |
| "step": 682 |
| }, |
| { |
| "epoch": 1.1857840477482366, |
| "grad_norm": 0.12941948682816526, |
| "learning_rate": 4.274785076943031e-06, |
| "loss": 0.4178, |
| "step": 683 |
| }, |
| { |
| "epoch": 1.1875203472599023, |
| "grad_norm": 0.13202812035471978, |
| "learning_rate": 4.259786641731344e-06, |
| "loss": 0.4095, |
| "step": 684 |
| }, |
| { |
| "epoch": 1.1892566467715682, |
| "grad_norm": 0.14494857456759286, |
| "learning_rate": 4.244795013222951e-06, |
| "loss": 0.4224, |
| "step": 685 |
| }, |
| { |
| "epoch": 1.1909929462832338, |
| "grad_norm": 0.13462792345729818, |
| "learning_rate": 4.229810329274819e-06, |
| "loss": 0.4239, |
| "step": 686 |
| }, |
| { |
| "epoch": 1.1927292457948997, |
| "grad_norm": 0.14566055483265097, |
| "learning_rate": 4.214832727680054e-06, |
| "loss": 0.4348, |
| "step": 687 |
| }, |
| { |
| "epoch": 1.1944655453065653, |
| "grad_norm": 0.13632018344344551, |
| "learning_rate": 4.199862346166635e-06, |
| "loss": 0.4142, |
| "step": 688 |
| }, |
| { |
| "epoch": 1.1962018448182312, |
| "grad_norm": 0.14011949501328627, |
| "learning_rate": 4.184899322396147e-06, |
| "loss": 0.4031, |
| "step": 689 |
| }, |
| { |
| "epoch": 1.1979381443298969, |
| "grad_norm": 0.14232733717051585, |
| "learning_rate": 4.16994379396252e-06, |
| "loss": 0.3988, |
| "step": 690 |
| }, |
| { |
| "epoch": 1.1996744438415627, |
| "grad_norm": 0.12566943014411622, |
| "learning_rate": 4.154995898390756e-06, |
| "loss": 0.4054, |
| "step": 691 |
| }, |
| { |
| "epoch": 1.2014107433532284, |
| "grad_norm": 0.14134431965453162, |
| "learning_rate": 4.140055773135671e-06, |
| "loss": 0.4208, |
| "step": 692 |
| }, |
| { |
| "epoch": 1.2031470428648943, |
| "grad_norm": 0.1331920255111448, |
| "learning_rate": 4.125123555580624e-06, |
| "loss": 0.4179, |
| "step": 693 |
| }, |
| { |
| "epoch": 1.20488334237656, |
| "grad_norm": 0.1302865467669287, |
| "learning_rate": 4.110199383036263e-06, |
| "loss": 0.4185, |
| "step": 694 |
| }, |
| { |
| "epoch": 1.2066196418882258, |
| "grad_norm": 0.119878175170804, |
| "learning_rate": 4.0952833927392585e-06, |
| "loss": 0.3943, |
| "step": 695 |
| }, |
| { |
| "epoch": 1.2083559413998914, |
| "grad_norm": 0.1289332139821464, |
| "learning_rate": 4.080375721851031e-06, |
| "loss": 0.4317, |
| "step": 696 |
| }, |
| { |
| "epoch": 1.2100922409115573, |
| "grad_norm": 0.1489851485752332, |
| "learning_rate": 4.0654765074565125e-06, |
| "loss": 0.4147, |
| "step": 697 |
| }, |
| { |
| "epoch": 1.211828540423223, |
| "grad_norm": 0.1359535916414997, |
| "learning_rate": 4.050585886562858e-06, |
| "loss": 0.4098, |
| "step": 698 |
| }, |
| { |
| "epoch": 1.2135648399348888, |
| "grad_norm": 0.13399659588917587, |
| "learning_rate": 4.035703996098214e-06, |
| "loss": 0.4073, |
| "step": 699 |
| }, |
| { |
| "epoch": 1.2153011394465545, |
| "grad_norm": 0.14010727123563935, |
| "learning_rate": 4.020830972910433e-06, |
| "loss": 0.4208, |
| "step": 700 |
| }, |
| { |
| "epoch": 1.2170374389582204, |
| "grad_norm": 0.13938234625949356, |
| "learning_rate": 4.00596695376584e-06, |
| "loss": 0.4115, |
| "step": 701 |
| }, |
| { |
| "epoch": 1.218773738469886, |
| "grad_norm": 0.1353808767288146, |
| "learning_rate": 3.991112075347948e-06, |
| "loss": 0.4068, |
| "step": 702 |
| }, |
| { |
| "epoch": 1.2205100379815519, |
| "grad_norm": 0.14275599742668682, |
| "learning_rate": 3.976266474256232e-06, |
| "loss": 0.4165, |
| "step": 703 |
| }, |
| { |
| "epoch": 1.2222463374932175, |
| "grad_norm": 0.1378621771922673, |
| "learning_rate": 3.96143028700484e-06, |
| "loss": 0.416, |
| "step": 704 |
| }, |
| { |
| "epoch": 1.2239826370048834, |
| "grad_norm": 0.13218109257570979, |
| "learning_rate": 3.94660365002137e-06, |
| "loss": 0.4118, |
| "step": 705 |
| }, |
| { |
| "epoch": 1.225718936516549, |
| "grad_norm": 0.13836744351326696, |
| "learning_rate": 3.931786699645584e-06, |
| "loss": 0.4158, |
| "step": 706 |
| }, |
| { |
| "epoch": 1.227455236028215, |
| "grad_norm": 0.12863460316637662, |
| "learning_rate": 3.916979572128185e-06, |
| "loss": 0.4145, |
| "step": 707 |
| }, |
| { |
| "epoch": 1.2291915355398806, |
| "grad_norm": 0.1487138212974801, |
| "learning_rate": 3.9021824036295355e-06, |
| "loss": 0.4462, |
| "step": 708 |
| }, |
| { |
| "epoch": 1.2309278350515465, |
| "grad_norm": 0.15238329064789952, |
| "learning_rate": 3.887395330218429e-06, |
| "loss": 0.4329, |
| "step": 709 |
| }, |
| { |
| "epoch": 1.232664134563212, |
| "grad_norm": 0.14000705269862404, |
| "learning_rate": 3.872618487870822e-06, |
| "loss": 0.4208, |
| "step": 710 |
| }, |
| { |
| "epoch": 1.234400434074878, |
| "grad_norm": 0.1337473888423507, |
| "learning_rate": 3.857852012468594e-06, |
| "loss": 0.3966, |
| "step": 711 |
| }, |
| { |
| "epoch": 1.2361367335865436, |
| "grad_norm": 0.13831080512228816, |
| "learning_rate": 3.843096039798293e-06, |
| "loss": 0.4136, |
| "step": 712 |
| }, |
| { |
| "epoch": 1.2378730330982095, |
| "grad_norm": 0.13958883535341216, |
| "learning_rate": 3.8283507055498886e-06, |
| "loss": 0.4088, |
| "step": 713 |
| }, |
| { |
| "epoch": 1.2396093326098752, |
| "grad_norm": 0.12998244574393739, |
| "learning_rate": 3.8136161453155225e-06, |
| "loss": 0.4082, |
| "step": 714 |
| }, |
| { |
| "epoch": 1.241345632121541, |
| "grad_norm": 0.12023935229932833, |
| "learning_rate": 3.798892494588265e-06, |
| "loss": 0.406, |
| "step": 715 |
| }, |
| { |
| "epoch": 1.2430819316332067, |
| "grad_norm": 0.13329454708178137, |
| "learning_rate": 3.784179888760864e-06, |
| "loss": 0.4108, |
| "step": 716 |
| }, |
| { |
| "epoch": 1.2448182311448726, |
| "grad_norm": 0.14362657202555368, |
| "learning_rate": 3.7694784631245066e-06, |
| "loss": 0.4153, |
| "step": 717 |
| }, |
| { |
| "epoch": 1.2465545306565382, |
| "grad_norm": 0.14234958856936142, |
| "learning_rate": 3.754788352867568e-06, |
| "loss": 0.4257, |
| "step": 718 |
| }, |
| { |
| "epoch": 1.248290830168204, |
| "grad_norm": 0.13113466271767915, |
| "learning_rate": 3.7401096930743753e-06, |
| "loss": 0.4068, |
| "step": 719 |
| }, |
| { |
| "epoch": 1.2500271296798697, |
| "grad_norm": 0.14575529914592233, |
| "learning_rate": 3.7254426187239567e-06, |
| "loss": 0.4272, |
| "step": 720 |
| }, |
| { |
| "epoch": 1.2517634291915356, |
| "grad_norm": 0.13600829322582764, |
| "learning_rate": 3.7107872646888115e-06, |
| "loss": 0.4162, |
| "step": 721 |
| }, |
| { |
| "epoch": 1.2534997287032013, |
| "grad_norm": 0.148400333733331, |
| "learning_rate": 3.696143765733658e-06, |
| "loss": 0.4298, |
| "step": 722 |
| }, |
| { |
| "epoch": 1.2552360282148671, |
| "grad_norm": 0.13942444090666167, |
| "learning_rate": 3.6815122565142034e-06, |
| "loss": 0.4056, |
| "step": 723 |
| }, |
| { |
| "epoch": 1.2569723277265328, |
| "grad_norm": 0.1445262305649223, |
| "learning_rate": 3.666892871575903e-06, |
| "loss": 0.4211, |
| "step": 724 |
| }, |
| { |
| "epoch": 1.2587086272381987, |
| "grad_norm": 0.12435903954866685, |
| "learning_rate": 3.6522857453527172e-06, |
| "loss": 0.4049, |
| "step": 725 |
| }, |
| { |
| "epoch": 1.2604449267498643, |
| "grad_norm": 0.14897501276229344, |
| "learning_rate": 3.6376910121658867e-06, |
| "loss": 0.4319, |
| "step": 726 |
| }, |
| { |
| "epoch": 1.2621812262615302, |
| "grad_norm": 0.1581301795554515, |
| "learning_rate": 3.623108806222684e-06, |
| "loss": 0.4227, |
| "step": 727 |
| }, |
| { |
| "epoch": 1.2639175257731958, |
| "grad_norm": 0.16087358209515815, |
| "learning_rate": 3.608539261615194e-06, |
| "loss": 0.4129, |
| "step": 728 |
| }, |
| { |
| "epoch": 1.2656538252848617, |
| "grad_norm": 0.14218267776525048, |
| "learning_rate": 3.5939825123190637e-06, |
| "loss": 0.4125, |
| "step": 729 |
| }, |
| { |
| "epoch": 1.2673901247965274, |
| "grad_norm": 0.1557900583782951, |
| "learning_rate": 3.5794386921922885e-06, |
| "loss": 0.427, |
| "step": 730 |
| }, |
| { |
| "epoch": 1.2691264243081932, |
| "grad_norm": 0.13287959721455264, |
| "learning_rate": 3.5649079349739656e-06, |
| "loss": 0.4337, |
| "step": 731 |
| }, |
| { |
| "epoch": 1.2708627238198589, |
| "grad_norm": 0.13684064235737806, |
| "learning_rate": 3.550390374283077e-06, |
| "loss": 0.4201, |
| "step": 732 |
| }, |
| { |
| "epoch": 1.2725990233315247, |
| "grad_norm": 0.13717376028853762, |
| "learning_rate": 3.5358861436172487e-06, |
| "loss": 0.4245, |
| "step": 733 |
| }, |
| { |
| "epoch": 1.2743353228431904, |
| "grad_norm": 0.12673772460863922, |
| "learning_rate": 3.521395376351534e-06, |
| "loss": 0.4302, |
| "step": 734 |
| }, |
| { |
| "epoch": 1.2760716223548563, |
| "grad_norm": 0.1452730674929177, |
| "learning_rate": 3.506918205737179e-06, |
| "loss": 0.4167, |
| "step": 735 |
| }, |
| { |
| "epoch": 1.277807921866522, |
| "grad_norm": 0.12966559464754768, |
| "learning_rate": 3.492454764900402e-06, |
| "loss": 0.4096, |
| "step": 736 |
| }, |
| { |
| "epoch": 1.2795442213781878, |
| "grad_norm": 0.126491624200071, |
| "learning_rate": 3.478005186841167e-06, |
| "loss": 0.4111, |
| "step": 737 |
| }, |
| { |
| "epoch": 1.2812805208898534, |
| "grad_norm": 0.1648290795025507, |
| "learning_rate": 3.4635696044319644e-06, |
| "loss": 0.4265, |
| "step": 738 |
| }, |
| { |
| "epoch": 1.2830168204015193, |
| "grad_norm": 0.13792714906019224, |
| "learning_rate": 3.4491481504165802e-06, |
| "loss": 0.4086, |
| "step": 739 |
| }, |
| { |
| "epoch": 1.284753119913185, |
| "grad_norm": 0.12851901581350092, |
| "learning_rate": 3.4347409574088896e-06, |
| "loss": 0.4153, |
| "step": 740 |
| }, |
| { |
| "epoch": 1.2864894194248508, |
| "grad_norm": 0.14055982724287153, |
| "learning_rate": 3.4203481578916197e-06, |
| "loss": 0.3958, |
| "step": 741 |
| }, |
| { |
| "epoch": 1.2882257189365165, |
| "grad_norm": 0.14211923802570475, |
| "learning_rate": 3.4059698842151516e-06, |
| "loss": 0.4253, |
| "step": 742 |
| }, |
| { |
| "epoch": 1.2899620184481824, |
| "grad_norm": 0.14505225134761637, |
| "learning_rate": 3.3916062685962813e-06, |
| "loss": 0.4086, |
| "step": 743 |
| }, |
| { |
| "epoch": 1.291698317959848, |
| "grad_norm": 0.1378622302172767, |
| "learning_rate": 3.377257443117027e-06, |
| "loss": 0.4145, |
| "step": 744 |
| }, |
| { |
| "epoch": 1.293434617471514, |
| "grad_norm": 0.12570469560505018, |
| "learning_rate": 3.3629235397233894e-06, |
| "loss": 0.4028, |
| "step": 745 |
| }, |
| { |
| "epoch": 1.2951709169831795, |
| "grad_norm": 0.1465008931089143, |
| "learning_rate": 3.3486046902241663e-06, |
| "loss": 0.4385, |
| "step": 746 |
| }, |
| { |
| "epoch": 1.2969072164948454, |
| "grad_norm": 0.13318963224172997, |
| "learning_rate": 3.3343010262897125e-06, |
| "loss": 0.4129, |
| "step": 747 |
| }, |
| { |
| "epoch": 1.298643516006511, |
| "grad_norm": 0.13025976099827086, |
| "learning_rate": 3.3200126794507544e-06, |
| "loss": 0.4254, |
| "step": 748 |
| }, |
| { |
| "epoch": 1.300379815518177, |
| "grad_norm": 0.13522951465238042, |
| "learning_rate": 3.305739781097157e-06, |
| "loss": 0.4358, |
| "step": 749 |
| }, |
| { |
| "epoch": 1.3021161150298426, |
| "grad_norm": 0.13796244449987072, |
| "learning_rate": 3.2914824624767384e-06, |
| "loss": 0.4173, |
| "step": 750 |
| }, |
| { |
| "epoch": 1.3038524145415085, |
| "grad_norm": 0.12998370880403234, |
| "learning_rate": 3.2772408546940413e-06, |
| "loss": 0.4258, |
| "step": 751 |
| }, |
| { |
| "epoch": 1.3055887140531741, |
| "grad_norm": 0.12608502139488215, |
| "learning_rate": 3.263015088709147e-06, |
| "loss": 0.3888, |
| "step": 752 |
| }, |
| { |
| "epoch": 1.30732501356484, |
| "grad_norm": 0.12057418198065353, |
| "learning_rate": 3.248805295336458e-06, |
| "loss": 0.4178, |
| "step": 753 |
| }, |
| { |
| "epoch": 1.3090613130765056, |
| "grad_norm": 0.13684112107904475, |
| "learning_rate": 3.234611605243496e-06, |
| "loss": 0.4212, |
| "step": 754 |
| }, |
| { |
| "epoch": 1.3107976125881715, |
| "grad_norm": 0.12993759903594548, |
| "learning_rate": 3.2204341489497098e-06, |
| "loss": 0.4131, |
| "step": 755 |
| }, |
| { |
| "epoch": 1.3125339120998372, |
| "grad_norm": 0.13405591819120782, |
| "learning_rate": 3.206273056825263e-06, |
| "loss": 0.4193, |
| "step": 756 |
| }, |
| { |
| "epoch": 1.314270211611503, |
| "grad_norm": 0.11848443460254894, |
| "learning_rate": 3.192128459089846e-06, |
| "loss": 0.407, |
| "step": 757 |
| }, |
| { |
| "epoch": 1.3160065111231687, |
| "grad_norm": 0.1283026303176609, |
| "learning_rate": 3.178000485811469e-06, |
| "loss": 0.4043, |
| "step": 758 |
| }, |
| { |
| "epoch": 1.3177428106348346, |
| "grad_norm": 0.134503429002085, |
| "learning_rate": 3.1638892669052725e-06, |
| "loss": 0.4233, |
| "step": 759 |
| }, |
| { |
| "epoch": 1.3194791101465002, |
| "grad_norm": 0.1304647334011823, |
| "learning_rate": 3.149794932132331e-06, |
| "loss": 0.3977, |
| "step": 760 |
| }, |
| { |
| "epoch": 1.321215409658166, |
| "grad_norm": 0.12921517217947945, |
| "learning_rate": 3.1357176110984578e-06, |
| "loss": 0.4148, |
| "step": 761 |
| }, |
| { |
| "epoch": 1.3229517091698317, |
| "grad_norm": 0.13650111546563745, |
| "learning_rate": 3.1216574332530153e-06, |
| "loss": 0.4322, |
| "step": 762 |
| }, |
| { |
| "epoch": 1.3246880086814976, |
| "grad_norm": 0.12200301128989274, |
| "learning_rate": 3.107614527887727e-06, |
| "loss": 0.4208, |
| "step": 763 |
| }, |
| { |
| "epoch": 1.3264243081931633, |
| "grad_norm": 0.1385711764976705, |
| "learning_rate": 3.093589024135478e-06, |
| "loss": 0.4124, |
| "step": 764 |
| }, |
| { |
| "epoch": 1.3281606077048291, |
| "grad_norm": 0.12826365451603902, |
| "learning_rate": 3.079581050969146e-06, |
| "loss": 0.42, |
| "step": 765 |
| }, |
| { |
| "epoch": 1.3298969072164948, |
| "grad_norm": 0.1353306325888454, |
| "learning_rate": 3.0655907372003945e-06, |
| "loss": 0.4164, |
| "step": 766 |
| }, |
| { |
| "epoch": 1.3316332067281607, |
| "grad_norm": 0.1378424644755559, |
| "learning_rate": 3.0516182114785044e-06, |
| "loss": 0.4159, |
| "step": 767 |
| }, |
| { |
| "epoch": 1.3333695062398263, |
| "grad_norm": 0.1285283517829389, |
| "learning_rate": 3.0376636022891813e-06, |
| "loss": 0.4126, |
| "step": 768 |
| }, |
| { |
| "epoch": 1.3351058057514922, |
| "grad_norm": 0.13426392864303094, |
| "learning_rate": 3.0237270379533823e-06, |
| "loss": 0.4115, |
| "step": 769 |
| }, |
| { |
| "epoch": 1.3368421052631578, |
| "grad_norm": 0.12430141285628485, |
| "learning_rate": 3.0098086466261244e-06, |
| "loss": 0.4074, |
| "step": 770 |
| }, |
| { |
| "epoch": 1.3385784047748237, |
| "grad_norm": 0.1296003321288165, |
| "learning_rate": 2.9959085562953207e-06, |
| "loss": 0.4074, |
| "step": 771 |
| }, |
| { |
| "epoch": 1.3403147042864894, |
| "grad_norm": 0.12858505693047656, |
| "learning_rate": 2.9820268947805886e-06, |
| "loss": 0.4152, |
| "step": 772 |
| }, |
| { |
| "epoch": 1.3420510037981552, |
| "grad_norm": 0.12197621327889531, |
| "learning_rate": 2.968163789732087e-06, |
| "loss": 0.3858, |
| "step": 773 |
| }, |
| { |
| "epoch": 1.3437873033098209, |
| "grad_norm": 0.12881358416491176, |
| "learning_rate": 2.954319368629333e-06, |
| "loss": 0.4001, |
| "step": 774 |
| }, |
| { |
| "epoch": 1.3455236028214868, |
| "grad_norm": 0.130976313094142, |
| "learning_rate": 2.9404937587800374e-06, |
| "loss": 0.3965, |
| "step": 775 |
| }, |
| { |
| "epoch": 1.3472599023331524, |
| "grad_norm": 0.14058834884031382, |
| "learning_rate": 2.9266870873189275e-06, |
| "loss": 0.4283, |
| "step": 776 |
| }, |
| { |
| "epoch": 1.3489962018448183, |
| "grad_norm": 0.14263920518768747, |
| "learning_rate": 2.912899481206582e-06, |
| "loss": 0.4043, |
| "step": 777 |
| }, |
| { |
| "epoch": 1.350732501356484, |
| "grad_norm": 0.12084831525686957, |
| "learning_rate": 2.89913106722826e-06, |
| "loss": 0.4154, |
| "step": 778 |
| }, |
| { |
| "epoch": 1.3524688008681498, |
| "grad_norm": 0.1354875286137426, |
| "learning_rate": 2.8853819719927432e-06, |
| "loss": 0.4206, |
| "step": 779 |
| }, |
| { |
| "epoch": 1.3542051003798155, |
| "grad_norm": 0.13251361102306228, |
| "learning_rate": 2.871652321931161e-06, |
| "loss": 0.4007, |
| "step": 780 |
| }, |
| { |
| "epoch": 1.3559413998914813, |
| "grad_norm": 0.13099944844294423, |
| "learning_rate": 2.8579422432958316e-06, |
| "loss": 0.4207, |
| "step": 781 |
| }, |
| { |
| "epoch": 1.357677699403147, |
| "grad_norm": 0.12769218412087335, |
| "learning_rate": 2.8442518621591085e-06, |
| "loss": 0.4183, |
| "step": 782 |
| }, |
| { |
| "epoch": 1.3594139989148128, |
| "grad_norm": 0.14309184331634017, |
| "learning_rate": 2.83058130441221e-06, |
| "loss": 0.4097, |
| "step": 783 |
| }, |
| { |
| "epoch": 1.3611502984264785, |
| "grad_norm": 0.16868092956785954, |
| "learning_rate": 2.8169306957640675e-06, |
| "loss": 0.4255, |
| "step": 784 |
| }, |
| { |
| "epoch": 1.3628865979381444, |
| "grad_norm": 0.14472137695357806, |
| "learning_rate": 2.803300161740166e-06, |
| "loss": 0.3991, |
| "step": 785 |
| }, |
| { |
| "epoch": 1.36462289744981, |
| "grad_norm": 0.1260246338457178, |
| "learning_rate": 2.7896898276814005e-06, |
| "loss": 0.4251, |
| "step": 786 |
| }, |
| { |
| "epoch": 1.366359196961476, |
| "grad_norm": 0.12995354011841623, |
| "learning_rate": 2.7760998187429067e-06, |
| "loss": 0.3945, |
| "step": 787 |
| }, |
| { |
| "epoch": 1.3680954964731415, |
| "grad_norm": 0.13800691020421174, |
| "learning_rate": 2.7625302598929226e-06, |
| "loss": 0.4121, |
| "step": 788 |
| }, |
| { |
| "epoch": 1.3698317959848074, |
| "grad_norm": 0.12589367394524512, |
| "learning_rate": 2.748981275911633e-06, |
| "loss": 0.4116, |
| "step": 789 |
| }, |
| { |
| "epoch": 1.371568095496473, |
| "grad_norm": 0.13086859376723847, |
| "learning_rate": 2.73545299139003e-06, |
| "loss": 0.417, |
| "step": 790 |
| }, |
| { |
| "epoch": 1.373304395008139, |
| "grad_norm": 0.12779840511323343, |
| "learning_rate": 2.7219455307287557e-06, |
| "loss": 0.4007, |
| "step": 791 |
| }, |
| { |
| "epoch": 1.3750406945198046, |
| "grad_norm": 0.13743539379081776, |
| "learning_rate": 2.7084590181369675e-06, |
| "loss": 0.4165, |
| "step": 792 |
| }, |
| { |
| "epoch": 1.3767769940314705, |
| "grad_norm": 0.14005665536498818, |
| "learning_rate": 2.69499357763119e-06, |
| "loss": 0.4112, |
| "step": 793 |
| }, |
| { |
| "epoch": 1.3785132935431361, |
| "grad_norm": 0.12255718957071185, |
| "learning_rate": 2.6815493330341822e-06, |
| "loss": 0.4191, |
| "step": 794 |
| }, |
| { |
| "epoch": 1.380249593054802, |
| "grad_norm": 0.13565587855667666, |
| "learning_rate": 2.6681264079737907e-06, |
| "loss": 0.4065, |
| "step": 795 |
| }, |
| { |
| "epoch": 1.3819858925664676, |
| "grad_norm": 0.13419326992967473, |
| "learning_rate": 2.6547249258818162e-06, |
| "loss": 0.4119, |
| "step": 796 |
| }, |
| { |
| "epoch": 1.3837221920781335, |
| "grad_norm": 0.13136822894753178, |
| "learning_rate": 2.641345009992878e-06, |
| "loss": 0.4153, |
| "step": 797 |
| }, |
| { |
| "epoch": 1.3854584915897992, |
| "grad_norm": 0.14176326400487876, |
| "learning_rate": 2.627986783343287e-06, |
| "loss": 0.4096, |
| "step": 798 |
| }, |
| { |
| "epoch": 1.387194791101465, |
| "grad_norm": 0.12839978347380843, |
| "learning_rate": 2.6146503687699005e-06, |
| "loss": 0.4044, |
| "step": 799 |
| }, |
| { |
| "epoch": 1.3889310906131307, |
| "grad_norm": 0.1355048536747864, |
| "learning_rate": 2.601335888909005e-06, |
| "loss": 0.4163, |
| "step": 800 |
| }, |
| { |
| "epoch": 1.3906673901247966, |
| "grad_norm": 0.12545466668714947, |
| "learning_rate": 2.5880434661951826e-06, |
| "loss": 0.4119, |
| "step": 801 |
| }, |
| { |
| "epoch": 1.3924036896364622, |
| "grad_norm": 0.13036763191690007, |
| "learning_rate": 2.5747732228601903e-06, |
| "loss": 0.4047, |
| "step": 802 |
| }, |
| { |
| "epoch": 1.394139989148128, |
| "grad_norm": 0.1379598785530406, |
| "learning_rate": 2.5615252809318287e-06, |
| "loss": 0.4326, |
| "step": 803 |
| }, |
| { |
| "epoch": 1.3958762886597937, |
| "grad_norm": 0.14077141190929993, |
| "learning_rate": 2.5482997622328252e-06, |
| "loss": 0.3938, |
| "step": 804 |
| }, |
| { |
| "epoch": 1.3976125881714596, |
| "grad_norm": 0.1468546409601843, |
| "learning_rate": 2.5350967883797095e-06, |
| "loss": 0.4301, |
| "step": 805 |
| }, |
| { |
| "epoch": 1.3993488876831253, |
| "grad_norm": 0.13961508755488275, |
| "learning_rate": 2.5219164807817055e-06, |
| "loss": 0.4132, |
| "step": 806 |
| }, |
| { |
| "epoch": 1.4010851871947911, |
| "grad_norm": 0.12741589875103143, |
| "learning_rate": 2.508758960639599e-06, |
| "loss": 0.4286, |
| "step": 807 |
| }, |
| { |
| "epoch": 1.4028214867064568, |
| "grad_norm": 0.1322065820346883, |
| "learning_rate": 2.495624348944633e-06, |
| "loss": 0.4, |
| "step": 808 |
| }, |
| { |
| "epoch": 1.4045577862181227, |
| "grad_norm": 0.1279223022976378, |
| "learning_rate": 2.4825127664774008e-06, |
| "loss": 0.4032, |
| "step": 809 |
| }, |
| { |
| "epoch": 1.4062940857297883, |
| "grad_norm": 0.1299005206195399, |
| "learning_rate": 2.469424333806718e-06, |
| "loss": 0.4145, |
| "step": 810 |
| }, |
| { |
| "epoch": 1.4080303852414542, |
| "grad_norm": 0.12318119514845602, |
| "learning_rate": 2.456359171288534e-06, |
| "loss": 0.4119, |
| "step": 811 |
| }, |
| { |
| "epoch": 1.4097666847531198, |
| "grad_norm": 0.11803503465881546, |
| "learning_rate": 2.443317399064806e-06, |
| "loss": 0.3971, |
| "step": 812 |
| }, |
| { |
| "epoch": 1.4115029842647857, |
| "grad_norm": 0.12120636381131922, |
| "learning_rate": 2.4302991370624106e-06, |
| "loss": 0.4154, |
| "step": 813 |
| }, |
| { |
| "epoch": 1.4132392837764514, |
| "grad_norm": 0.14357692984029835, |
| "learning_rate": 2.4173045049920276e-06, |
| "loss": 0.4153, |
| "step": 814 |
| }, |
| { |
| "epoch": 1.4149755832881172, |
| "grad_norm": 0.13853494299667582, |
| "learning_rate": 2.4043336223470525e-06, |
| "loss": 0.4122, |
| "step": 815 |
| }, |
| { |
| "epoch": 1.4167118827997829, |
| "grad_norm": 0.12762107651550092, |
| "learning_rate": 2.3913866084024857e-06, |
| "loss": 0.3934, |
| "step": 816 |
| }, |
| { |
| "epoch": 1.4184481823114488, |
| "grad_norm": 0.14007255270207977, |
| "learning_rate": 2.3784635822138424e-06, |
| "loss": 0.4069, |
| "step": 817 |
| }, |
| { |
| "epoch": 1.4201844818231144, |
| "grad_norm": 0.13584088672858124, |
| "learning_rate": 2.365564662616053e-06, |
| "loss": 0.4198, |
| "step": 818 |
| }, |
| { |
| "epoch": 1.4219207813347803, |
| "grad_norm": 0.12697303599490736, |
| "learning_rate": 2.3526899682223813e-06, |
| "loss": 0.3899, |
| "step": 819 |
| }, |
| { |
| "epoch": 1.423657080846446, |
| "grad_norm": 0.12679484994880091, |
| "learning_rate": 2.339839617423318e-06, |
| "loss": 0.4197, |
| "step": 820 |
| }, |
| { |
| "epoch": 1.4253933803581118, |
| "grad_norm": 0.12876866513716562, |
| "learning_rate": 2.3270137283855022e-06, |
| "loss": 0.3991, |
| "step": 821 |
| }, |
| { |
| "epoch": 1.4271296798697775, |
| "grad_norm": 0.12836114905719587, |
| "learning_rate": 2.3142124190506315e-06, |
| "loss": 0.4021, |
| "step": 822 |
| }, |
| { |
| "epoch": 1.4288659793814433, |
| "grad_norm": 0.12995546010077, |
| "learning_rate": 2.3014358071343844e-06, |
| "loss": 0.414, |
| "step": 823 |
| }, |
| { |
| "epoch": 1.430602278893109, |
| "grad_norm": 0.13519331292147682, |
| "learning_rate": 2.288684010125325e-06, |
| "loss": 0.4246, |
| "step": 824 |
| }, |
| { |
| "epoch": 1.4323385784047749, |
| "grad_norm": 0.12538832218788362, |
| "learning_rate": 2.2759571452838325e-06, |
| "loss": 0.3945, |
| "step": 825 |
| }, |
| { |
| "epoch": 1.4340748779164405, |
| "grad_norm": 0.12565420481547468, |
| "learning_rate": 2.2632553296410172e-06, |
| "loss": 0.3893, |
| "step": 826 |
| }, |
| { |
| "epoch": 1.4358111774281064, |
| "grad_norm": 0.14011228845933368, |
| "learning_rate": 2.2505786799976527e-06, |
| "loss": 0.4024, |
| "step": 827 |
| }, |
| { |
| "epoch": 1.437547476939772, |
| "grad_norm": 0.13362405741803038, |
| "learning_rate": 2.2379273129230916e-06, |
| "loss": 0.4104, |
| "step": 828 |
| }, |
| { |
| "epoch": 1.439283776451438, |
| "grad_norm": 0.12278040688438624, |
| "learning_rate": 2.2253013447541993e-06, |
| "loss": 0.3996, |
| "step": 829 |
| }, |
| { |
| "epoch": 1.4410200759631036, |
| "grad_norm": 0.12707888516785454, |
| "learning_rate": 2.2127008915942817e-06, |
| "loss": 0.4256, |
| "step": 830 |
| }, |
| { |
| "epoch": 1.4427563754747694, |
| "grad_norm": 0.12297647001266902, |
| "learning_rate": 2.2001260693120236e-06, |
| "loss": 0.4116, |
| "step": 831 |
| }, |
| { |
| "epoch": 1.444492674986435, |
| "grad_norm": 0.1316400193577434, |
| "learning_rate": 2.1875769935404145e-06, |
| "loss": 0.4257, |
| "step": 832 |
| }, |
| { |
| "epoch": 1.446228974498101, |
| "grad_norm": 0.12592051019559328, |
| "learning_rate": 2.17505377967569e-06, |
| "loss": 0.4002, |
| "step": 833 |
| }, |
| { |
| "epoch": 1.4479652740097668, |
| "grad_norm": 0.13307011942276165, |
| "learning_rate": 2.1625565428762687e-06, |
| "loss": 0.4143, |
| "step": 834 |
| }, |
| { |
| "epoch": 1.4497015735214325, |
| "grad_norm": 0.13261132323467073, |
| "learning_rate": 2.1500853980616997e-06, |
| "loss": 0.4103, |
| "step": 835 |
| }, |
| { |
| "epoch": 1.4514378730330981, |
| "grad_norm": 0.13736208640453593, |
| "learning_rate": 2.1376404599115963e-06, |
| "loss": 0.4076, |
| "step": 836 |
| }, |
| { |
| "epoch": 1.453174172544764, |
| "grad_norm": 0.14043076559730674, |
| "learning_rate": 2.125221842864585e-06, |
| "loss": 0.4208, |
| "step": 837 |
| }, |
| { |
| "epoch": 1.4549104720564299, |
| "grad_norm": 0.12533168615565918, |
| "learning_rate": 2.1128296611172593e-06, |
| "loss": 0.4137, |
| "step": 838 |
| }, |
| { |
| "epoch": 1.4566467715680955, |
| "grad_norm": 0.11887754452981136, |
| "learning_rate": 2.10046402862312e-06, |
| "loss": 0.4174, |
| "step": 839 |
| }, |
| { |
| "epoch": 1.4583830710797612, |
| "grad_norm": 0.1365498648247696, |
| "learning_rate": 2.0881250590915316e-06, |
| "loss": 0.4125, |
| "step": 840 |
| }, |
| { |
| "epoch": 1.460119370591427, |
| "grad_norm": 0.12267517652888206, |
| "learning_rate": 2.075812865986677e-06, |
| "loss": 0.4091, |
| "step": 841 |
| }, |
| { |
| "epoch": 1.461855670103093, |
| "grad_norm": 0.13012211466376844, |
| "learning_rate": 2.0635275625265187e-06, |
| "loss": 0.4311, |
| "step": 842 |
| }, |
| { |
| "epoch": 1.4635919696147586, |
| "grad_norm": 0.129498733682449, |
| "learning_rate": 2.051269261681745e-06, |
| "loss": 0.4224, |
| "step": 843 |
| }, |
| { |
| "epoch": 1.4653282691264242, |
| "grad_norm": 0.12190967445012241, |
| "learning_rate": 2.039038076174748e-06, |
| "loss": 0.4155, |
| "step": 844 |
| }, |
| { |
| "epoch": 1.46706456863809, |
| "grad_norm": 0.13087222637882714, |
| "learning_rate": 2.0268341184785674e-06, |
| "loss": 0.4118, |
| "step": 845 |
| }, |
| { |
| "epoch": 1.468800868149756, |
| "grad_norm": 0.134418294508292, |
| "learning_rate": 2.0146575008158765e-06, |
| "loss": 0.4269, |
| "step": 846 |
| }, |
| { |
| "epoch": 1.4705371676614216, |
| "grad_norm": 0.12743480518428843, |
| "learning_rate": 2.0025083351579337e-06, |
| "loss": 0.4006, |
| "step": 847 |
| }, |
| { |
| "epoch": 1.4722734671730873, |
| "grad_norm": 0.12625799127877016, |
| "learning_rate": 1.990386733223569e-06, |
| "loss": 0.4217, |
| "step": 848 |
| }, |
| { |
| "epoch": 1.4740097666847531, |
| "grad_norm": 0.13796255034449323, |
| "learning_rate": 1.978292806478134e-06, |
| "loss": 0.4329, |
| "step": 849 |
| }, |
| { |
| "epoch": 1.475746066196419, |
| "grad_norm": 0.13577869591390213, |
| "learning_rate": 1.9662266661325038e-06, |
| "loss": 0.4108, |
| "step": 850 |
| }, |
| { |
| "epoch": 1.4774823657080847, |
| "grad_norm": 0.12374125989207808, |
| "learning_rate": 1.9541884231420304e-06, |
| "loss": 0.4058, |
| "step": 851 |
| }, |
| { |
| "epoch": 1.4792186652197503, |
| "grad_norm": 0.13167015482453784, |
| "learning_rate": 1.9421781882055447e-06, |
| "loss": 0.4146, |
| "step": 852 |
| }, |
| { |
| "epoch": 1.4809549647314162, |
| "grad_norm": 0.1332394699372241, |
| "learning_rate": 1.930196071764312e-06, |
| "loss": 0.425, |
| "step": 853 |
| }, |
| { |
| "epoch": 1.482691264243082, |
| "grad_norm": 0.1268455605647913, |
| "learning_rate": 1.918242184001044e-06, |
| "loss": 0.3957, |
| "step": 854 |
| }, |
| { |
| "epoch": 1.4844275637547477, |
| "grad_norm": 0.13517298465365898, |
| "learning_rate": 1.906316634838865e-06, |
| "loss": 0.4179, |
| "step": 855 |
| }, |
| { |
| "epoch": 1.4861638632664134, |
| "grad_norm": 0.12480497326109305, |
| "learning_rate": 1.8944195339403176e-06, |
| "loss": 0.4163, |
| "step": 856 |
| }, |
| { |
| "epoch": 1.4879001627780792, |
| "grad_norm": 0.15655713241339458, |
| "learning_rate": 1.8825509907063328e-06, |
| "loss": 0.418, |
| "step": 857 |
| }, |
| { |
| "epoch": 1.4896364622897451, |
| "grad_norm": 0.12127548342469688, |
| "learning_rate": 1.8707111142752493e-06, |
| "loss": 0.395, |
| "step": 858 |
| }, |
| { |
| "epoch": 1.4913727618014108, |
| "grad_norm": 0.12456923098315939, |
| "learning_rate": 1.8589000135217882e-06, |
| "loss": 0.4027, |
| "step": 859 |
| }, |
| { |
| "epoch": 1.4931090613130764, |
| "grad_norm": 0.11957509588071376, |
| "learning_rate": 1.8471177970560712e-06, |
| "loss": 0.409, |
| "step": 860 |
| }, |
| { |
| "epoch": 1.4948453608247423, |
| "grad_norm": 0.12209458380394814, |
| "learning_rate": 1.8353645732225977e-06, |
| "loss": 0.3863, |
| "step": 861 |
| }, |
| { |
| "epoch": 1.4965816603364082, |
| "grad_norm": 0.1273098707036207, |
| "learning_rate": 1.8236404500992767e-06, |
| "loss": 0.3968, |
| "step": 862 |
| }, |
| { |
| "epoch": 1.4983179598480738, |
| "grad_norm": 0.12369245264759446, |
| "learning_rate": 1.8119455354964088e-06, |
| "loss": 0.4233, |
| "step": 863 |
| }, |
| { |
| "epoch": 1.5000542593597395, |
| "grad_norm": 0.13398660997805997, |
| "learning_rate": 1.8002799369557121e-06, |
| "loss": 0.4153, |
| "step": 864 |
| }, |
| { |
| "epoch": 1.5017905588714053, |
| "grad_norm": 0.1384801086046812, |
| "learning_rate": 1.7886437617493206e-06, |
| "loss": 0.4333, |
| "step": 865 |
| }, |
| { |
| "epoch": 1.5035268583830712, |
| "grad_norm": 0.11746726161406841, |
| "learning_rate": 1.7770371168788042e-06, |
| "loss": 0.4087, |
| "step": 866 |
| }, |
| { |
| "epoch": 1.5052631578947369, |
| "grad_norm": 0.12377311929628279, |
| "learning_rate": 1.765460109074188e-06, |
| "loss": 0.4066, |
| "step": 867 |
| }, |
| { |
| "epoch": 1.5069994574064025, |
| "grad_norm": 0.12518207143985072, |
| "learning_rate": 1.7539128447929603e-06, |
| "loss": 0.4275, |
| "step": 868 |
| }, |
| { |
| "epoch": 1.5087357569180684, |
| "grad_norm": 0.12617569143392077, |
| "learning_rate": 1.7423954302191047e-06, |
| "loss": 0.4083, |
| "step": 869 |
| }, |
| { |
| "epoch": 1.5104720564297343, |
| "grad_norm": 0.1236554713478632, |
| "learning_rate": 1.7309079712621152e-06, |
| "loss": 0.3962, |
| "step": 870 |
| }, |
| { |
| "epoch": 1.5122083559414, |
| "grad_norm": 0.1246822344076014, |
| "learning_rate": 1.7194505735560307e-06, |
| "loss": 0.4099, |
| "step": 871 |
| }, |
| { |
| "epoch": 1.5139446554530656, |
| "grad_norm": 0.1299322275534694, |
| "learning_rate": 1.7080233424584553e-06, |
| "loss": 0.4215, |
| "step": 872 |
| }, |
| { |
| "epoch": 1.5156809549647314, |
| "grad_norm": 0.13649358878274787, |
| "learning_rate": 1.6966263830495939e-06, |
| "loss": 0.429, |
| "step": 873 |
| }, |
| { |
| "epoch": 1.5174172544763973, |
| "grad_norm": 0.13493202861295495, |
| "learning_rate": 1.6852598001312836e-06, |
| "loss": 0.4203, |
| "step": 874 |
| }, |
| { |
| "epoch": 1.519153553988063, |
| "grad_norm": 0.1300026060628514, |
| "learning_rate": 1.6739236982260377e-06, |
| "loss": 0.4176, |
| "step": 875 |
| }, |
| { |
| "epoch": 1.5208898534997286, |
| "grad_norm": 0.13491527997309305, |
| "learning_rate": 1.662618181576071e-06, |
| "loss": 0.4156, |
| "step": 876 |
| }, |
| { |
| "epoch": 1.5226261530113945, |
| "grad_norm": 0.11300836408342123, |
| "learning_rate": 1.6513433541423529e-06, |
| "loss": 0.3902, |
| "step": 877 |
| }, |
| { |
| "epoch": 1.5243624525230604, |
| "grad_norm": 0.12174204501469876, |
| "learning_rate": 1.6400993196036441e-06, |
| "loss": 0.4242, |
| "step": 878 |
| }, |
| { |
| "epoch": 1.526098752034726, |
| "grad_norm": 0.13546333027439583, |
| "learning_rate": 1.6288861813555511e-06, |
| "loss": 0.4227, |
| "step": 879 |
| }, |
| { |
| "epoch": 1.5278350515463917, |
| "grad_norm": 0.12127618299527383, |
| "learning_rate": 1.6177040425095664e-06, |
| "loss": 0.414, |
| "step": 880 |
| }, |
| { |
| "epoch": 1.5295713510580575, |
| "grad_norm": 0.13012169318108746, |
| "learning_rate": 1.6065530058921253e-06, |
| "loss": 0.4277, |
| "step": 881 |
| }, |
| { |
| "epoch": 1.5313076505697234, |
| "grad_norm": 0.1423957843203789, |
| "learning_rate": 1.5954331740436591e-06, |
| "loss": 0.4005, |
| "step": 882 |
| }, |
| { |
| "epoch": 1.533043950081389, |
| "grad_norm": 0.12506933416673377, |
| "learning_rate": 1.5843446492176562e-06, |
| "loss": 0.3961, |
| "step": 883 |
| }, |
| { |
| "epoch": 1.5347802495930547, |
| "grad_norm": 0.13366751188467238, |
| "learning_rate": 1.5732875333797143e-06, |
| "loss": 0.4149, |
| "step": 884 |
| }, |
| { |
| "epoch": 1.5365165491047206, |
| "grad_norm": 0.12554028072879378, |
| "learning_rate": 1.562261928206608e-06, |
| "loss": 0.4123, |
| "step": 885 |
| }, |
| { |
| "epoch": 1.5382528486163864, |
| "grad_norm": 0.12208173618819541, |
| "learning_rate": 1.551267935085351e-06, |
| "loss": 0.3962, |
| "step": 886 |
| }, |
| { |
| "epoch": 1.539989148128052, |
| "grad_norm": 0.12016508007857718, |
| "learning_rate": 1.5403056551122697e-06, |
| "loss": 0.4124, |
| "step": 887 |
| }, |
| { |
| "epoch": 1.5417254476397177, |
| "grad_norm": 0.13559374517942055, |
| "learning_rate": 1.5293751890920649e-06, |
| "loss": 0.4134, |
| "step": 888 |
| }, |
| { |
| "epoch": 1.5434617471513836, |
| "grad_norm": 0.11686815344911052, |
| "learning_rate": 1.5184766375368914e-06, |
| "loss": 0.4016, |
| "step": 889 |
| }, |
| { |
| "epoch": 1.5451980466630495, |
| "grad_norm": 0.13384389935838778, |
| "learning_rate": 1.5076101006654286e-06, |
| "loss": 0.405, |
| "step": 890 |
| }, |
| { |
| "epoch": 1.5469343461747151, |
| "grad_norm": 0.13607774222673785, |
| "learning_rate": 1.4967756784019666e-06, |
| "loss": 0.4131, |
| "step": 891 |
| }, |
| { |
| "epoch": 1.5486706456863808, |
| "grad_norm": 0.12143950640173919, |
| "learning_rate": 1.485973470375479e-06, |
| "loss": 0.4393, |
| "step": 892 |
| }, |
| { |
| "epoch": 1.5504069451980467, |
| "grad_norm": 0.13447482018669238, |
| "learning_rate": 1.4752035759187106e-06, |
| "loss": 0.3989, |
| "step": 893 |
| }, |
| { |
| "epoch": 1.5521432447097125, |
| "grad_norm": 0.12219257242260986, |
| "learning_rate": 1.4644660940672628e-06, |
| "loss": 0.4136, |
| "step": 894 |
| }, |
| { |
| "epoch": 1.5538795442213782, |
| "grad_norm": 0.11918499493508868, |
| "learning_rate": 1.4537611235586863e-06, |
| "loss": 0.4105, |
| "step": 895 |
| }, |
| { |
| "epoch": 1.5556158437330438, |
| "grad_norm": 0.12248886593105629, |
| "learning_rate": 1.4430887628315715e-06, |
| "loss": 0.4122, |
| "step": 896 |
| }, |
| { |
| "epoch": 1.5573521432447097, |
| "grad_norm": 0.1220004487241072, |
| "learning_rate": 1.4324491100246386e-06, |
| "loss": 0.3947, |
| "step": 897 |
| }, |
| { |
| "epoch": 1.5590884427563756, |
| "grad_norm": 0.11598397128698863, |
| "learning_rate": 1.4218422629758405e-06, |
| "loss": 0.4004, |
| "step": 898 |
| }, |
| { |
| "epoch": 1.5608247422680412, |
| "grad_norm": 0.12691720000176912, |
| "learning_rate": 1.4112683192214598e-06, |
| "loss": 0.4338, |
| "step": 899 |
| }, |
| { |
| "epoch": 1.562561041779707, |
| "grad_norm": 0.13316611138281842, |
| "learning_rate": 1.40072737599522e-06, |
| "loss": 0.4132, |
| "step": 900 |
| }, |
| { |
| "epoch": 1.5642973412913728, |
| "grad_norm": 0.13213292659724496, |
| "learning_rate": 1.390219530227378e-06, |
| "loss": 0.4159, |
| "step": 901 |
| }, |
| { |
| "epoch": 1.5660336408030386, |
| "grad_norm": 0.12083065391743368, |
| "learning_rate": 1.3797448785438433e-06, |
| "loss": 0.4142, |
| "step": 902 |
| }, |
| { |
| "epoch": 1.5677699403147043, |
| "grad_norm": 0.12191223406786662, |
| "learning_rate": 1.369303517265283e-06, |
| "loss": 0.4132, |
| "step": 903 |
| }, |
| { |
| "epoch": 1.56950623982637, |
| "grad_norm": 0.13900771994376313, |
| "learning_rate": 1.358895542406245e-06, |
| "loss": 0.4261, |
| "step": 904 |
| }, |
| { |
| "epoch": 1.5712425393380358, |
| "grad_norm": 0.1278302066364733, |
| "learning_rate": 1.348521049674264e-06, |
| "loss": 0.4066, |
| "step": 905 |
| }, |
| { |
| "epoch": 1.5729788388497017, |
| "grad_norm": 0.12945116378479313, |
| "learning_rate": 1.3381801344689876e-06, |
| "loss": 0.4089, |
| "step": 906 |
| }, |
| { |
| "epoch": 1.5747151383613673, |
| "grad_norm": 0.12916747174933954, |
| "learning_rate": 1.3278728918812978e-06, |
| "loss": 0.4225, |
| "step": 907 |
| }, |
| { |
| "epoch": 1.576451437873033, |
| "grad_norm": 0.11927642012866999, |
| "learning_rate": 1.3175994166924394e-06, |
| "loss": 0.4294, |
| "step": 908 |
| }, |
| { |
| "epoch": 1.5781877373846989, |
| "grad_norm": 0.1206412178443493, |
| "learning_rate": 1.3073598033731427e-06, |
| "loss": 0.4026, |
| "step": 909 |
| }, |
| { |
| "epoch": 1.5799240368963647, |
| "grad_norm": 0.13593801989098223, |
| "learning_rate": 1.2971541460827597e-06, |
| "loss": 0.4259, |
| "step": 910 |
| }, |
| { |
| "epoch": 1.5816603364080304, |
| "grad_norm": 0.13696333152462678, |
| "learning_rate": 1.2869825386683938e-06, |
| "loss": 0.4084, |
| "step": 911 |
| }, |
| { |
| "epoch": 1.583396635919696, |
| "grad_norm": 0.13284306946184443, |
| "learning_rate": 1.2768450746640448e-06, |
| "loss": 0.4192, |
| "step": 912 |
| }, |
| { |
| "epoch": 1.585132935431362, |
| "grad_norm": 0.13243455602958631, |
| "learning_rate": 1.2667418472897386e-06, |
| "loss": 0.4193, |
| "step": 913 |
| }, |
| { |
| "epoch": 1.5868692349430278, |
| "grad_norm": 0.1225346669572518, |
| "learning_rate": 1.2566729494506768e-06, |
| "loss": 0.3819, |
| "step": 914 |
| }, |
| { |
| "epoch": 1.5886055344546934, |
| "grad_norm": 0.12964877271023698, |
| "learning_rate": 1.246638473736378e-06, |
| "loss": 0.4087, |
| "step": 915 |
| }, |
| { |
| "epoch": 1.590341833966359, |
| "grad_norm": 0.1341324486851053, |
| "learning_rate": 1.236638512419835e-06, |
| "loss": 0.3998, |
| "step": 916 |
| }, |
| { |
| "epoch": 1.592078133478025, |
| "grad_norm": 0.12992187668983388, |
| "learning_rate": 1.2266731574566536e-06, |
| "loss": 0.4248, |
| "step": 917 |
| }, |
| { |
| "epoch": 1.5938144329896908, |
| "grad_norm": 0.1282491585530033, |
| "learning_rate": 1.2167425004842171e-06, |
| "loss": 0.4162, |
| "step": 918 |
| }, |
| { |
| "epoch": 1.5955507325013565, |
| "grad_norm": 0.12186709446939265, |
| "learning_rate": 1.2068466328208368e-06, |
| "loss": 0.4291, |
| "step": 919 |
| }, |
| { |
| "epoch": 1.5972870320130221, |
| "grad_norm": 0.14142890451759685, |
| "learning_rate": 1.196985645464921e-06, |
| "loss": 0.419, |
| "step": 920 |
| }, |
| { |
| "epoch": 1.599023331524688, |
| "grad_norm": 0.14338876457298588, |
| "learning_rate": 1.1871596290941278e-06, |
| "loss": 0.4077, |
| "step": 921 |
| }, |
| { |
| "epoch": 1.6007596310363539, |
| "grad_norm": 0.1305962229366433, |
| "learning_rate": 1.1773686740645384e-06, |
| "loss": 0.4071, |
| "step": 922 |
| }, |
| { |
| "epoch": 1.6024959305480195, |
| "grad_norm": 0.11435580814232883, |
| "learning_rate": 1.1676128704098222e-06, |
| "loss": 0.4057, |
| "step": 923 |
| }, |
| { |
| "epoch": 1.6042322300596852, |
| "grad_norm": 0.11670856676886189, |
| "learning_rate": 1.1578923078404152e-06, |
| "loss": 0.4058, |
| "step": 924 |
| }, |
| { |
| "epoch": 1.605968529571351, |
| "grad_norm": 0.124581473605881, |
| "learning_rate": 1.1482070757426855e-06, |
| "loss": 0.418, |
| "step": 925 |
| }, |
| { |
| "epoch": 1.607704829083017, |
| "grad_norm": 0.11983010369768266, |
| "learning_rate": 1.1385572631781178e-06, |
| "loss": 0.4045, |
| "step": 926 |
| }, |
| { |
| "epoch": 1.6094411285946826, |
| "grad_norm": 0.124700646599233, |
| "learning_rate": 1.1289429588824962e-06, |
| "loss": 0.3989, |
| "step": 927 |
| }, |
| { |
| "epoch": 1.6111774281063482, |
| "grad_norm": 0.13817353344869943, |
| "learning_rate": 1.1193642512650805e-06, |
| "loss": 0.4086, |
| "step": 928 |
| }, |
| { |
| "epoch": 1.612913727618014, |
| "grad_norm": 0.12049426083605701, |
| "learning_rate": 1.1098212284078037e-06, |
| "loss": 0.3997, |
| "step": 929 |
| }, |
| { |
| "epoch": 1.61465002712968, |
| "grad_norm": 0.11944095499572413, |
| "learning_rate": 1.1003139780644467e-06, |
| "loss": 0.4146, |
| "step": 930 |
| }, |
| { |
| "epoch": 1.6163863266413456, |
| "grad_norm": 0.12874490049668783, |
| "learning_rate": 1.0908425876598512e-06, |
| "loss": 0.4078, |
| "step": 931 |
| }, |
| { |
| "epoch": 1.6181226261530113, |
| "grad_norm": 0.1200731058980104, |
| "learning_rate": 1.0814071442890983e-06, |
| "loss": 0.4134, |
| "step": 932 |
| }, |
| { |
| "epoch": 1.6198589256646772, |
| "grad_norm": 0.12656380110216298, |
| "learning_rate": 1.07200773471672e-06, |
| "loss": 0.4168, |
| "step": 933 |
| }, |
| { |
| "epoch": 1.621595225176343, |
| "grad_norm": 0.12268627183607625, |
| "learning_rate": 1.0626444453758895e-06, |
| "loss": 0.4098, |
| "step": 934 |
| }, |
| { |
| "epoch": 1.6233315246880087, |
| "grad_norm": 0.12230549063662255, |
| "learning_rate": 1.053317362367639e-06, |
| "loss": 0.4099, |
| "step": 935 |
| }, |
| { |
| "epoch": 1.6250678241996743, |
| "grad_norm": 0.12897798951819442, |
| "learning_rate": 1.0440265714600573e-06, |
| "loss": 0.4109, |
| "step": 936 |
| }, |
| { |
| "epoch": 1.6268041237113402, |
| "grad_norm": 0.1296337132331582, |
| "learning_rate": 1.0347721580875125e-06, |
| "loss": 0.4344, |
| "step": 937 |
| }, |
| { |
| "epoch": 1.628540423223006, |
| "grad_norm": 0.12301012837827231, |
| "learning_rate": 1.0255542073498487e-06, |
| "loss": 0.4119, |
| "step": 938 |
| }, |
| { |
| "epoch": 1.6302767227346717, |
| "grad_norm": 0.13806847181548138, |
| "learning_rate": 1.016372804011625e-06, |
| "loss": 0.4224, |
| "step": 939 |
| }, |
| { |
| "epoch": 1.6320130222463374, |
| "grad_norm": 0.11912380035465843, |
| "learning_rate": 1.0072280325013185e-06, |
| "loss": 0.3886, |
| "step": 940 |
| }, |
| { |
| "epoch": 1.6337493217580032, |
| "grad_norm": 0.12481979295903664, |
| "learning_rate": 9.981199769105605e-07, |
| "loss": 0.411, |
| "step": 941 |
| }, |
| { |
| "epoch": 1.6354856212696691, |
| "grad_norm": 0.12010701170379462, |
| "learning_rate": 9.890487209933497e-07, |
| "loss": 0.4158, |
| "step": 942 |
| }, |
| { |
| "epoch": 1.6372219207813348, |
| "grad_norm": 0.1271131768562924, |
| "learning_rate": 9.80014348165298e-07, |
| "loss": 0.4135, |
| "step": 943 |
| }, |
| { |
| "epoch": 1.6389582202930004, |
| "grad_norm": 0.11686752698079612, |
| "learning_rate": 9.710169415028492e-07, |
| "loss": 0.3941, |
| "step": 944 |
| }, |
| { |
| "epoch": 1.6406945198046663, |
| "grad_norm": 0.13442063233496024, |
| "learning_rate": 9.62056583742527e-07, |
| "loss": 0.428, |
| "step": 945 |
| }, |
| { |
| "epoch": 1.6424308193163322, |
| "grad_norm": 0.1367443089530073, |
| "learning_rate": 9.531333572801604e-07, |
| "loss": 0.4306, |
| "step": 946 |
| }, |
| { |
| "epoch": 1.6441671188279978, |
| "grad_norm": 0.12926844185744127, |
| "learning_rate": 9.442473441701422e-07, |
| "loss": 0.4075, |
| "step": 947 |
| }, |
| { |
| "epoch": 1.6459034183396635, |
| "grad_norm": 0.138290795761905, |
| "learning_rate": 9.353986261246606e-07, |
| "loss": 0.4068, |
| "step": 948 |
| }, |
| { |
| "epoch": 1.6476397178513293, |
| "grad_norm": 0.13448081054207545, |
| "learning_rate": 9.26587284512957e-07, |
| "loss": 0.4116, |
| "step": 949 |
| }, |
| { |
| "epoch": 1.6493760173629952, |
| "grad_norm": 0.12689121517003302, |
| "learning_rate": 9.178134003605721e-07, |
| "loss": 0.4095, |
| "step": 950 |
| }, |
| { |
| "epoch": 1.6511123168746609, |
| "grad_norm": 0.11431606376406304, |
| "learning_rate": 9.090770543486033e-07, |
| "loss": 0.4055, |
| "step": 951 |
| }, |
| { |
| "epoch": 1.6528486163863265, |
| "grad_norm": 0.129398563963692, |
| "learning_rate": 9.003783268129612e-07, |
| "loss": 0.42, |
| "step": 952 |
| }, |
| { |
| "epoch": 1.6545849158979924, |
| "grad_norm": 0.12584472294549326, |
| "learning_rate": 8.917172977436356e-07, |
| "loss": 0.4099, |
| "step": 953 |
| }, |
| { |
| "epoch": 1.6563212154096583, |
| "grad_norm": 0.13257419772829573, |
| "learning_rate": 8.830940467839538e-07, |
| "loss": 0.4181, |
| "step": 954 |
| }, |
| { |
| "epoch": 1.658057514921324, |
| "grad_norm": 0.12623087863431037, |
| "learning_rate": 8.745086532298497e-07, |
| "loss": 0.4154, |
| "step": 955 |
| }, |
| { |
| "epoch": 1.6597938144329896, |
| "grad_norm": 0.1323049844837645, |
| "learning_rate": 8.659611960291397e-07, |
| "loss": 0.4227, |
| "step": 956 |
| }, |
| { |
| "epoch": 1.6615301139446554, |
| "grad_norm": 0.11871513898323233, |
| "learning_rate": 8.574517537807897e-07, |
| "loss": 0.3906, |
| "step": 957 |
| }, |
| { |
| "epoch": 1.6632664134563213, |
| "grad_norm": 0.13398389612030392, |
| "learning_rate": 8.48980404734196e-07, |
| "loss": 0.4231, |
| "step": 958 |
| }, |
| { |
| "epoch": 1.665002712967987, |
| "grad_norm": 0.12019923307656202, |
| "learning_rate": 8.40547226788464e-07, |
| "loss": 0.4232, |
| "step": 959 |
| }, |
| { |
| "epoch": 1.6667390124796526, |
| "grad_norm": 0.1275078036550687, |
| "learning_rate": 8.321522974916968e-07, |
| "loss": 0.4127, |
| "step": 960 |
| }, |
| { |
| "epoch": 1.6684753119913185, |
| "grad_norm": 0.12244677864657005, |
| "learning_rate": 8.237956940402758e-07, |
| "loss": 0.4091, |
| "step": 961 |
| }, |
| { |
| "epoch": 1.6702116115029844, |
| "grad_norm": 0.12771023412092483, |
| "learning_rate": 8.154774932781523e-07, |
| "loss": 0.4232, |
| "step": 962 |
| }, |
| { |
| "epoch": 1.67194791101465, |
| "grad_norm": 0.1279342671525142, |
| "learning_rate": 8.071977716961432e-07, |
| "loss": 0.4164, |
| "step": 963 |
| }, |
| { |
| "epoch": 1.6736842105263157, |
| "grad_norm": 0.11698740747617209, |
| "learning_rate": 7.989566054312286e-07, |
| "loss": 0.3993, |
| "step": 964 |
| }, |
| { |
| "epoch": 1.6754205100379815, |
| "grad_norm": 0.11905130215832907, |
| "learning_rate": 7.907540702658456e-07, |
| "loss": 0.3869, |
| "step": 965 |
| }, |
| { |
| "epoch": 1.6771568095496474, |
| "grad_norm": 0.1299614359355832, |
| "learning_rate": 7.82590241627198e-07, |
| "loss": 0.416, |
| "step": 966 |
| }, |
| { |
| "epoch": 1.678893109061313, |
| "grad_norm": 0.11874306176345516, |
| "learning_rate": 7.744651945865572e-07, |
| "loss": 0.4272, |
| "step": 967 |
| }, |
| { |
| "epoch": 1.6806294085729787, |
| "grad_norm": 0.12183634974576847, |
| "learning_rate": 7.663790038585794e-07, |
| "loss": 0.3941, |
| "step": 968 |
| }, |
| { |
| "epoch": 1.6823657080846446, |
| "grad_norm": 0.11711707630925917, |
| "learning_rate": 7.583317438006094e-07, |
| "loss": 0.4115, |
| "step": 969 |
| }, |
| { |
| "epoch": 1.6841020075963105, |
| "grad_norm": 0.11537841404891344, |
| "learning_rate": 7.503234884120031e-07, |
| "loss": 0.4166, |
| "step": 970 |
| }, |
| { |
| "epoch": 1.685838307107976, |
| "grad_norm": 0.12352881324092035, |
| "learning_rate": 7.423543113334436e-07, |
| "loss": 0.3987, |
| "step": 971 |
| }, |
| { |
| "epoch": 1.6875746066196418, |
| "grad_norm": 0.13446570147296136, |
| "learning_rate": 7.344242858462697e-07, |
| "loss": 0.4293, |
| "step": 972 |
| }, |
| { |
| "epoch": 1.6893109061313076, |
| "grad_norm": 0.12555237261456514, |
| "learning_rate": 7.265334848717931e-07, |
| "loss": 0.4024, |
| "step": 973 |
| }, |
| { |
| "epoch": 1.6910472056429735, |
| "grad_norm": 0.12022020939360528, |
| "learning_rate": 7.186819809706358e-07, |
| "loss": 0.4082, |
| "step": 974 |
| }, |
| { |
| "epoch": 1.6927835051546392, |
| "grad_norm": 0.12022778005709792, |
| "learning_rate": 7.108698463420577e-07, |
| "loss": 0.4206, |
| "step": 975 |
| }, |
| { |
| "epoch": 1.6945198046663048, |
| "grad_norm": 0.13192095245849325, |
| "learning_rate": 7.030971528232983e-07, |
| "loss": 0.4258, |
| "step": 976 |
| }, |
| { |
| "epoch": 1.6962561041779707, |
| "grad_norm": 0.1288027515410068, |
| "learning_rate": 6.953639718889077e-07, |
| "loss": 0.4277, |
| "step": 977 |
| }, |
| { |
| "epoch": 1.6979924036896366, |
| "grad_norm": 0.1257765886436919, |
| "learning_rate": 6.876703746500984e-07, |
| "loss": 0.4241, |
| "step": 978 |
| }, |
| { |
| "epoch": 1.6997287032013022, |
| "grad_norm": 0.11829898374820907, |
| "learning_rate": 6.800164318540836e-07, |
| "loss": 0.3969, |
| "step": 979 |
| }, |
| { |
| "epoch": 1.7014650027129679, |
| "grad_norm": 0.12343280352654452, |
| "learning_rate": 6.724022138834341e-07, |
| "loss": 0.4394, |
| "step": 980 |
| }, |
| { |
| "epoch": 1.7032013022246337, |
| "grad_norm": 0.12584810369777363, |
| "learning_rate": 6.648277907554235e-07, |
| "loss": 0.4086, |
| "step": 981 |
| }, |
| { |
| "epoch": 1.7049376017362996, |
| "grad_norm": 0.12220526205664348, |
| "learning_rate": 6.572932321213921e-07, |
| "loss": 0.4149, |
| "step": 982 |
| }, |
| { |
| "epoch": 1.7066739012479653, |
| "grad_norm": 0.11264723449198615, |
| "learning_rate": 6.497986072660989e-07, |
| "loss": 0.3946, |
| "step": 983 |
| }, |
| { |
| "epoch": 1.708410200759631, |
| "grad_norm": 0.11486638829519887, |
| "learning_rate": 6.423439851070884e-07, |
| "loss": 0.4029, |
| "step": 984 |
| }, |
| { |
| "epoch": 1.7101465002712968, |
| "grad_norm": 0.12297059784409017, |
| "learning_rate": 6.349294341940593e-07, |
| "loss": 0.4178, |
| "step": 985 |
| }, |
| { |
| "epoch": 1.7118827997829626, |
| "grad_norm": 0.12946935380664815, |
| "learning_rate": 6.275550227082278e-07, |
| "loss": 0.4286, |
| "step": 986 |
| }, |
| { |
| "epoch": 1.7136190992946283, |
| "grad_norm": 0.13174117409809644, |
| "learning_rate": 6.202208184617065e-07, |
| "loss": 0.4129, |
| "step": 987 |
| }, |
| { |
| "epoch": 1.715355398806294, |
| "grad_norm": 0.1260009778700492, |
| "learning_rate": 6.129268888968759e-07, |
| "loss": 0.4226, |
| "step": 988 |
| }, |
| { |
| "epoch": 1.7170916983179598, |
| "grad_norm": 0.13334490497918267, |
| "learning_rate": 6.056733010857713e-07, |
| "loss": 0.4169, |
| "step": 989 |
| }, |
| { |
| "epoch": 1.7188279978296257, |
| "grad_norm": 0.12492912736636083, |
| "learning_rate": 5.984601217294567e-07, |
| "loss": 0.4414, |
| "step": 990 |
| }, |
| { |
| "epoch": 1.7205642973412913, |
| "grad_norm": 0.12227556231870458, |
| "learning_rate": 5.91287417157419e-07, |
| "loss": 0.4155, |
| "step": 991 |
| }, |
| { |
| "epoch": 1.722300596852957, |
| "grad_norm": 0.12166850206869084, |
| "learning_rate": 5.841552533269534e-07, |
| "loss": 0.4155, |
| "step": 992 |
| }, |
| { |
| "epoch": 1.7240368963646229, |
| "grad_norm": 0.11635001857121037, |
| "learning_rate": 5.770636958225617e-07, |
| "loss": 0.4153, |
| "step": 993 |
| }, |
| { |
| "epoch": 1.7257731958762887, |
| "grad_norm": 0.12318020561314444, |
| "learning_rate": 5.700128098553436e-07, |
| "loss": 0.4072, |
| "step": 994 |
| }, |
| { |
| "epoch": 1.7275094953879544, |
| "grad_norm": 0.12603305151655275, |
| "learning_rate": 5.630026602624011e-07, |
| "loss": 0.4222, |
| "step": 995 |
| }, |
| { |
| "epoch": 1.72924579489962, |
| "grad_norm": 0.11621559170055032, |
| "learning_rate": 5.560333115062389e-07, |
| "loss": 0.3941, |
| "step": 996 |
| }, |
| { |
| "epoch": 1.730982094411286, |
| "grad_norm": 0.12012197885101744, |
| "learning_rate": 5.491048276741784e-07, |
| "loss": 0.4274, |
| "step": 997 |
| }, |
| { |
| "epoch": 1.7327183939229518, |
| "grad_norm": 0.13911959663978574, |
| "learning_rate": 5.422172724777586e-07, |
| "loss": 0.4252, |
| "step": 998 |
| }, |
| { |
| "epoch": 1.7344546934346174, |
| "grad_norm": 0.1248001547802355, |
| "learning_rate": 5.353707092521581e-07, |
| "loss": 0.4057, |
| "step": 999 |
| }, |
| { |
| "epoch": 1.736190992946283, |
| "grad_norm": 0.12501251772698127, |
| "learning_rate": 5.285652009556075e-07, |
| "loss": 0.4318, |
| "step": 1000 |
| }, |
| { |
| "epoch": 1.737927292457949, |
| "grad_norm": 0.13319821322804037, |
| "learning_rate": 5.218008101688172e-07, |
| "loss": 0.4211, |
| "step": 1001 |
| }, |
| { |
| "epoch": 1.7396635919696148, |
| "grad_norm": 0.13793608628093898, |
| "learning_rate": 5.150775990943924e-07, |
| "loss": 0.4161, |
| "step": 1002 |
| }, |
| { |
| "epoch": 1.7413998914812805, |
| "grad_norm": 0.11965611546509496, |
| "learning_rate": 5.083956295562704e-07, |
| "loss": 0.4147, |
| "step": 1003 |
| }, |
| { |
| "epoch": 1.7431361909929461, |
| "grad_norm": 0.11638856925820777, |
| "learning_rate": 5.017549629991437e-07, |
| "loss": 0.4156, |
| "step": 1004 |
| }, |
| { |
| "epoch": 1.744872490504612, |
| "grad_norm": 0.12049432353418484, |
| "learning_rate": 4.951556604879049e-07, |
| "loss": 0.4076, |
| "step": 1005 |
| }, |
| { |
| "epoch": 1.746608790016278, |
| "grad_norm": 0.12657454111882746, |
| "learning_rate": 4.885977827070748e-07, |
| "loss": 0.436, |
| "step": 1006 |
| }, |
| { |
| "epoch": 1.7483450895279435, |
| "grad_norm": 0.1271471881131416, |
| "learning_rate": 4.820813899602506e-07, |
| "loss": 0.4137, |
| "step": 1007 |
| }, |
| { |
| "epoch": 1.7500813890396092, |
| "grad_norm": 0.1194948081487038, |
| "learning_rate": 4.756065421695499e-07, |
| "loss": 0.3982, |
| "step": 1008 |
| }, |
| { |
| "epoch": 1.751817688551275, |
| "grad_norm": 0.12495098812494451, |
| "learning_rate": 4.6917329887506133e-07, |
| "loss": 0.4353, |
| "step": 1009 |
| }, |
| { |
| "epoch": 1.753553988062941, |
| "grad_norm": 0.1151883405922238, |
| "learning_rate": 4.6278171923429207e-07, |
| "loss": 0.391, |
| "step": 1010 |
| }, |
| { |
| "epoch": 1.7552902875746066, |
| "grad_norm": 0.1331087193647889, |
| "learning_rate": 4.5643186202162904e-07, |
| "loss": 0.412, |
| "step": 1011 |
| }, |
| { |
| "epoch": 1.7570265870862722, |
| "grad_norm": 0.11920565550399728, |
| "learning_rate": 4.501237856277979e-07, |
| "loss": 0.407, |
| "step": 1012 |
| }, |
| { |
| "epoch": 1.7587628865979381, |
| "grad_norm": 0.12336162708275682, |
| "learning_rate": 4.43857548059321e-07, |
| "loss": 0.4104, |
| "step": 1013 |
| }, |
| { |
| "epoch": 1.760499186109604, |
| "grad_norm": 0.1177681245029087, |
| "learning_rate": 4.376332069379929e-07, |
| "loss": 0.4033, |
| "step": 1014 |
| }, |
| { |
| "epoch": 1.7622354856212696, |
| "grad_norm": 0.11923987199445626, |
| "learning_rate": 4.3145081950033915e-07, |
| "loss": 0.406, |
| "step": 1015 |
| }, |
| { |
| "epoch": 1.7639717851329353, |
| "grad_norm": 0.12030800995449681, |
| "learning_rate": 4.2531044259710217e-07, |
| "loss": 0.4142, |
| "step": 1016 |
| }, |
| { |
| "epoch": 1.7657080846446012, |
| "grad_norm": 0.13356196556406277, |
| "learning_rate": 4.192121326927073e-07, |
| "loss": 0.4157, |
| "step": 1017 |
| }, |
| { |
| "epoch": 1.767444384156267, |
| "grad_norm": 0.11767092243929073, |
| "learning_rate": 4.131559458647544e-07, |
| "loss": 0.3966, |
| "step": 1018 |
| }, |
| { |
| "epoch": 1.7691806836679327, |
| "grad_norm": 0.11976922663082386, |
| "learning_rate": 4.0714193780348965e-07, |
| "loss": 0.3953, |
| "step": 1019 |
| }, |
| { |
| "epoch": 1.7709169831795983, |
| "grad_norm": 0.11569192386039726, |
| "learning_rate": 4.0117016381130636e-07, |
| "loss": 0.4009, |
| "step": 1020 |
| }, |
| { |
| "epoch": 1.7726532826912642, |
| "grad_norm": 0.12306395373992947, |
| "learning_rate": 3.952406788022267e-07, |
| "loss": 0.3994, |
| "step": 1021 |
| }, |
| { |
| "epoch": 1.77438958220293, |
| "grad_norm": 0.12551376699711242, |
| "learning_rate": 3.89353537301404e-07, |
| "loss": 0.4179, |
| "step": 1022 |
| }, |
| { |
| "epoch": 1.7761258817145957, |
| "grad_norm": 0.1242165395810953, |
| "learning_rate": 3.8350879344461134e-07, |
| "loss": 0.4278, |
| "step": 1023 |
| }, |
| { |
| "epoch": 1.7778621812262614, |
| "grad_norm": 0.11672137446032638, |
| "learning_rate": 3.7770650097775805e-07, |
| "loss": 0.4059, |
| "step": 1024 |
| }, |
| { |
| "epoch": 1.7795984807379273, |
| "grad_norm": 0.14417998618582262, |
| "learning_rate": 3.71946713256382e-07, |
| "loss": 0.4018, |
| "step": 1025 |
| }, |
| { |
| "epoch": 1.7813347802495931, |
| "grad_norm": 0.11215087093514516, |
| "learning_rate": 3.6622948324516796e-07, |
| "loss": 0.4339, |
| "step": 1026 |
| }, |
| { |
| "epoch": 1.7830710797612588, |
| "grad_norm": 0.11670782056107784, |
| "learning_rate": 3.6055486351745327e-07, |
| "loss": 0.4165, |
| "step": 1027 |
| }, |
| { |
| "epoch": 1.7848073792729244, |
| "grad_norm": 0.11642631630152035, |
| "learning_rate": 3.549229062547532e-07, |
| "loss": 0.3922, |
| "step": 1028 |
| }, |
| { |
| "epoch": 1.7865436787845903, |
| "grad_norm": 0.13084447236943444, |
| "learning_rate": 3.4933366324627183e-07, |
| "loss": 0.4372, |
| "step": 1029 |
| }, |
| { |
| "epoch": 1.7882799782962562, |
| "grad_norm": 0.12019833156490904, |
| "learning_rate": 3.4378718588843395e-07, |
| "loss": 0.3918, |
| "step": 1030 |
| }, |
| { |
| "epoch": 1.7900162778079218, |
| "grad_norm": 0.12401178225076169, |
| "learning_rate": 3.3828352518440464e-07, |
| "loss": 0.4288, |
| "step": 1031 |
| }, |
| { |
| "epoch": 1.7917525773195875, |
| "grad_norm": 0.11609111078950146, |
| "learning_rate": 3.328227317436278e-07, |
| "loss": 0.4066, |
| "step": 1032 |
| }, |
| { |
| "epoch": 1.7934888768312534, |
| "grad_norm": 0.12936793468009347, |
| "learning_rate": 3.274048557813553e-07, |
| "loss": 0.4284, |
| "step": 1033 |
| }, |
| { |
| "epoch": 1.7952251763429192, |
| "grad_norm": 0.13160087563042192, |
| "learning_rate": 3.220299471181898e-07, |
| "loss": 0.4141, |
| "step": 1034 |
| }, |
| { |
| "epoch": 1.7969614758545849, |
| "grad_norm": 0.12367898334775716, |
| "learning_rate": 3.1669805517961896e-07, |
| "loss": 0.4163, |
| "step": 1035 |
| }, |
| { |
| "epoch": 1.7986977753662505, |
| "grad_norm": 0.11825101838963192, |
| "learning_rate": 3.1140922899557115e-07, |
| "loss": 0.3847, |
| "step": 1036 |
| }, |
| { |
| "epoch": 1.8004340748779164, |
| "grad_norm": 0.11677455293491364, |
| "learning_rate": 3.061635171999566e-07, |
| "loss": 0.3883, |
| "step": 1037 |
| }, |
| { |
| "epoch": 1.8021703743895823, |
| "grad_norm": 0.12161061205938878, |
| "learning_rate": 3.0096096803022445e-07, |
| "loss": 0.4208, |
| "step": 1038 |
| }, |
| { |
| "epoch": 1.803906673901248, |
| "grad_norm": 0.11698975276704002, |
| "learning_rate": 2.9580162932691726e-07, |
| "loss": 0.4057, |
| "step": 1039 |
| }, |
| { |
| "epoch": 1.8056429734129136, |
| "grad_norm": 0.11411535406581336, |
| "learning_rate": 2.906855485332305e-07, |
| "loss": 0.4118, |
| "step": 1040 |
| }, |
| { |
| "epoch": 1.8073792729245794, |
| "grad_norm": 0.11562855972013814, |
| "learning_rate": 2.85612772694579e-07, |
| "loss": 0.3985, |
| "step": 1041 |
| }, |
| { |
| "epoch": 1.8091155724362453, |
| "grad_norm": 0.1180653895313076, |
| "learning_rate": 2.8058334845816214e-07, |
| "loss": 0.4206, |
| "step": 1042 |
| }, |
| { |
| "epoch": 1.810851871947911, |
| "grad_norm": 0.1131823910445962, |
| "learning_rate": 2.7559732207253554e-07, |
| "loss": 0.3917, |
| "step": 1043 |
| }, |
| { |
| "epoch": 1.8125881714595766, |
| "grad_norm": 0.11671701219857006, |
| "learning_rate": 2.706547393871839e-07, |
| "loss": 0.427, |
| "step": 1044 |
| }, |
| { |
| "epoch": 1.8143244709712425, |
| "grad_norm": 0.12812859020701226, |
| "learning_rate": 2.6575564585210487e-07, |
| "loss": 0.4095, |
| "step": 1045 |
| }, |
| { |
| "epoch": 1.8160607704829084, |
| "grad_norm": 0.12292614122195747, |
| "learning_rate": 2.609000865173844e-07, |
| "loss": 0.4074, |
| "step": 1046 |
| }, |
| { |
| "epoch": 1.817797069994574, |
| "grad_norm": 0.11918461838119136, |
| "learning_rate": 2.5608810603278634e-07, |
| "loss": 0.4144, |
| "step": 1047 |
| }, |
| { |
| "epoch": 1.8195333695062397, |
| "grad_norm": 0.12189620357247144, |
| "learning_rate": 2.5131974864734063e-07, |
| "loss": 0.4082, |
| "step": 1048 |
| }, |
| { |
| "epoch": 1.8212696690179055, |
| "grad_norm": 0.11828054439793066, |
| "learning_rate": 2.4659505820893827e-07, |
| "loss": 0.4083, |
| "step": 1049 |
| }, |
| { |
| "epoch": 1.8230059685295714, |
| "grad_norm": 0.1158006635449286, |
| "learning_rate": 2.4191407816392565e-07, |
| "loss": 0.3963, |
| "step": 1050 |
| }, |
| { |
| "epoch": 1.824742268041237, |
| "grad_norm": 0.12599491824541956, |
| "learning_rate": 2.3727685155670587e-07, |
| "loss": 0.4281, |
| "step": 1051 |
| }, |
| { |
| "epoch": 1.8264785675529027, |
| "grad_norm": 0.1298331569600777, |
| "learning_rate": 2.3268342102934216e-07, |
| "loss": 0.4222, |
| "step": 1052 |
| }, |
| { |
| "epoch": 1.8282148670645686, |
| "grad_norm": 0.126821957156595, |
| "learning_rate": 2.2813382882116986e-07, |
| "loss": 0.4139, |
| "step": 1053 |
| }, |
| { |
| "epoch": 1.8299511665762345, |
| "grad_norm": 0.13079207049274877, |
| "learning_rate": 2.2362811676840123e-07, |
| "loss": 0.4451, |
| "step": 1054 |
| }, |
| { |
| "epoch": 1.8316874660879001, |
| "grad_norm": 0.12114459628543015, |
| "learning_rate": 2.1916632630374579e-07, |
| "loss": 0.3974, |
| "step": 1055 |
| }, |
| { |
| "epoch": 1.8334237655995658, |
| "grad_norm": 0.1140732521419423, |
| "learning_rate": 2.1474849845602773e-07, |
| "loss": 0.4047, |
| "step": 1056 |
| }, |
| { |
| "epoch": 1.8351600651112316, |
| "grad_norm": 0.11526093835726821, |
| "learning_rate": 2.1037467384981024e-07, |
| "loss": 0.3992, |
| "step": 1057 |
| }, |
| { |
| "epoch": 1.8368963646228975, |
| "grad_norm": 0.12228612221156158, |
| "learning_rate": 2.0604489270501847e-07, |
| "loss": 0.4059, |
| "step": 1058 |
| }, |
| { |
| "epoch": 1.8386326641345632, |
| "grad_norm": 0.12477417041122149, |
| "learning_rate": 2.0175919483657213e-07, |
| "loss": 0.4116, |
| "step": 1059 |
| }, |
| { |
| "epoch": 1.8403689636462288, |
| "grad_norm": 0.12862208203106815, |
| "learning_rate": 1.975176196540185e-07, |
| "loss": 0.4177, |
| "step": 1060 |
| }, |
| { |
| "epoch": 1.8421052631578947, |
| "grad_norm": 0.12578544940193018, |
| "learning_rate": 1.933202061611722e-07, |
| "loss": 0.4095, |
| "step": 1061 |
| }, |
| { |
| "epoch": 1.8438415626695606, |
| "grad_norm": 0.12325651839838077, |
| "learning_rate": 1.8916699295575324e-07, |
| "loss": 0.4175, |
| "step": 1062 |
| }, |
| { |
| "epoch": 1.8455778621812262, |
| "grad_norm": 0.12464746665679002, |
| "learning_rate": 1.8505801822903459e-07, |
| "loss": 0.4196, |
| "step": 1063 |
| }, |
| { |
| "epoch": 1.8473141616928919, |
| "grad_norm": 0.13032362019407107, |
| "learning_rate": 1.8099331976548785e-07, |
| "loss": 0.3897, |
| "step": 1064 |
| }, |
| { |
| "epoch": 1.8490504612045577, |
| "grad_norm": 0.11289571970838852, |
| "learning_rate": 1.769729349424415e-07, |
| "loss": 0.3845, |
| "step": 1065 |
| }, |
| { |
| "epoch": 1.8507867607162236, |
| "grad_norm": 0.11655854527319932, |
| "learning_rate": 1.729969007297305e-07, |
| "loss": 0.4305, |
| "step": 1066 |
| }, |
| { |
| "epoch": 1.8525230602278893, |
| "grad_norm": 0.1261126469926069, |
| "learning_rate": 1.6906525368936055e-07, |
| "loss": 0.4262, |
| "step": 1067 |
| }, |
| { |
| "epoch": 1.854259359739555, |
| "grad_norm": 0.12541632651842094, |
| "learning_rate": 1.6517802997517262e-07, |
| "loss": 0.4013, |
| "step": 1068 |
| }, |
| { |
| "epoch": 1.8559956592512208, |
| "grad_norm": 0.12921619899241918, |
| "learning_rate": 1.6133526533250566e-07, |
| "loss": 0.4262, |
| "step": 1069 |
| }, |
| { |
| "epoch": 1.8577319587628867, |
| "grad_norm": 0.11908846405888013, |
| "learning_rate": 1.5753699509787336e-07, |
| "loss": 0.4355, |
| "step": 1070 |
| }, |
| { |
| "epoch": 1.8594682582745523, |
| "grad_norm": 0.12440968519612089, |
| "learning_rate": 1.5378325419863504e-07, |
| "loss": 0.4248, |
| "step": 1071 |
| }, |
| { |
| "epoch": 1.861204557786218, |
| "grad_norm": 0.11926491117578981, |
| "learning_rate": 1.5007407715267762e-07, |
| "loss": 0.4058, |
| "step": 1072 |
| }, |
| { |
| "epoch": 1.8629408572978838, |
| "grad_norm": 0.142881003325884, |
| "learning_rate": 1.4640949806809523e-07, |
| "loss": 0.4413, |
| "step": 1073 |
| }, |
| { |
| "epoch": 1.8646771568095497, |
| "grad_norm": 0.11865749007897276, |
| "learning_rate": 1.4278955064287948e-07, |
| "loss": 0.4345, |
| "step": 1074 |
| }, |
| { |
| "epoch": 1.8664134563212154, |
| "grad_norm": 0.11784186117435934, |
| "learning_rate": 1.3921426816460525e-07, |
| "loss": 0.43, |
| "step": 1075 |
| }, |
| { |
| "epoch": 1.868149755832881, |
| "grad_norm": 0.12206259247615907, |
| "learning_rate": 1.3568368351012718e-07, |
| "loss": 0.4194, |
| "step": 1076 |
| }, |
| { |
| "epoch": 1.8698860553445469, |
| "grad_norm": 0.1281570795041499, |
| "learning_rate": 1.3219782914527633e-07, |
| "loss": 0.4145, |
| "step": 1077 |
| }, |
| { |
| "epoch": 1.8716223548562128, |
| "grad_norm": 0.11970320146740036, |
| "learning_rate": 1.287567371245635e-07, |
| "loss": 0.4027, |
| "step": 1078 |
| }, |
| { |
| "epoch": 1.8733586543678784, |
| "grad_norm": 0.12848469392555553, |
| "learning_rate": 1.253604390908819e-07, |
| "loss": 0.4321, |
| "step": 1079 |
| }, |
| { |
| "epoch": 1.875094953879544, |
| "grad_norm": 0.12770177634383664, |
| "learning_rate": 1.2200896627521718e-07, |
| "loss": 0.4386, |
| "step": 1080 |
| }, |
| { |
| "epoch": 1.87683125339121, |
| "grad_norm": 0.12151906694858564, |
| "learning_rate": 1.1870234949636072e-07, |
| "loss": 0.4159, |
| "step": 1081 |
| }, |
| { |
| "epoch": 1.8785675529028758, |
| "grad_norm": 0.127017831403236, |
| "learning_rate": 1.154406191606261e-07, |
| "loss": 0.3994, |
| "step": 1082 |
| }, |
| { |
| "epoch": 1.8803038524145415, |
| "grad_norm": 0.1262661612491107, |
| "learning_rate": 1.1222380526156929e-07, |
| "loss": 0.4342, |
| "step": 1083 |
| }, |
| { |
| "epoch": 1.882040151926207, |
| "grad_norm": 0.1171509178238538, |
| "learning_rate": 1.090519373797122e-07, |
| "loss": 0.4226, |
| "step": 1084 |
| }, |
| { |
| "epoch": 1.883776451437873, |
| "grad_norm": 0.13097472029104612, |
| "learning_rate": 1.0592504468227127e-07, |
| "loss": 0.4126, |
| "step": 1085 |
| }, |
| { |
| "epoch": 1.8855127509495389, |
| "grad_norm": 0.1187689132650043, |
| "learning_rate": 1.0284315592289041e-07, |
| "loss": 0.4216, |
| "step": 1086 |
| }, |
| { |
| "epoch": 1.8872490504612045, |
| "grad_norm": 0.11632907820386926, |
| "learning_rate": 9.98062994413751e-08, |
| "loss": 0.4083, |
| "step": 1087 |
| }, |
| { |
| "epoch": 1.8889853499728704, |
| "grad_norm": 0.11874619451388094, |
| "learning_rate": 9.681450316343155e-08, |
| "loss": 0.4076, |
| "step": 1088 |
| }, |
| { |
| "epoch": 1.890721649484536, |
| "grad_norm": 0.11553420260775356, |
| "learning_rate": 9.386779460041018e-08, |
| "loss": 0.4022, |
| "step": 1089 |
| }, |
| { |
| "epoch": 1.892457948996202, |
| "grad_norm": 0.12777258119761628, |
| "learning_rate": 9.096620084905472e-08, |
| "loss": 0.4215, |
| "step": 1090 |
| }, |
| { |
| "epoch": 1.8941942485078678, |
| "grad_norm": 0.1299790103661572, |
| "learning_rate": 8.810974859124966e-08, |
| "loss": 0.4195, |
| "step": 1091 |
| }, |
| { |
| "epoch": 1.8959305480195334, |
| "grad_norm": 0.12870008807740843, |
| "learning_rate": 8.529846409377707e-08, |
| "loss": 0.4183, |
| "step": 1092 |
| }, |
| { |
| "epoch": 1.897666847531199, |
| "grad_norm": 0.11557600699360503, |
| "learning_rate": 8.253237320807461e-08, |
| "loss": 0.4012, |
| "step": 1093 |
| }, |
| { |
| "epoch": 1.899403147042865, |
| "grad_norm": 0.1295692835277435, |
| "learning_rate": 7.981150136999793e-08, |
| "loss": 0.4205, |
| "step": 1094 |
| }, |
| { |
| "epoch": 1.9011394465545308, |
| "grad_norm": 0.11763339137349027, |
| "learning_rate": 7.71358735995864e-08, |
| "loss": 0.4137, |
| "step": 1095 |
| }, |
| { |
| "epoch": 1.9028757460661965, |
| "grad_norm": 0.12209014747781179, |
| "learning_rate": 7.450551450083277e-08, |
| "loss": 0.415, |
| "step": 1096 |
| }, |
| { |
| "epoch": 1.9046120455778621, |
| "grad_norm": 0.11983015977982606, |
| "learning_rate": 7.192044826145772e-08, |
| "loss": 0.3991, |
| "step": 1097 |
| }, |
| { |
| "epoch": 1.906348345089528, |
| "grad_norm": 0.1207622356665182, |
| "learning_rate": 6.938069865268737e-08, |
| "loss": 0.4126, |
| "step": 1098 |
| }, |
| { |
| "epoch": 1.9080846446011939, |
| "grad_norm": 0.12025369875642083, |
| "learning_rate": 6.688628902903393e-08, |
| "loss": 0.4117, |
| "step": 1099 |
| }, |
| { |
| "epoch": 1.9098209441128595, |
| "grad_norm": 0.12948153340332494, |
| "learning_rate": 6.443724232808146e-08, |
| "loss": 0.4252, |
| "step": 1100 |
| }, |
| { |
| "epoch": 1.9115572436245252, |
| "grad_norm": 0.11631844929689528, |
| "learning_rate": 6.203358107027491e-08, |
| "loss": 0.4177, |
| "step": 1101 |
| }, |
| { |
| "epoch": 1.913293543136191, |
| "grad_norm": 0.12021451757580208, |
| "learning_rate": 5.967532735871306e-08, |
| "loss": 0.42, |
| "step": 1102 |
| }, |
| { |
| "epoch": 1.915029842647857, |
| "grad_norm": 0.12120333242584432, |
| "learning_rate": 5.736250287894651e-08, |
| "loss": 0.4301, |
| "step": 1103 |
| }, |
| { |
| "epoch": 1.9167661421595226, |
| "grad_norm": 0.11959409905136235, |
| "learning_rate": 5.509512889877333e-08, |
| "loss": 0.4068, |
| "step": 1104 |
| }, |
| { |
| "epoch": 1.9185024416711882, |
| "grad_norm": 0.12331314438716552, |
| "learning_rate": 5.2873226268052026e-08, |
| "loss": 0.4026, |
| "step": 1105 |
| }, |
| { |
| "epoch": 1.920238741182854, |
| "grad_norm": 0.12692833064254694, |
| "learning_rate": 5.069681541850058e-08, |
| "loss": 0.421, |
| "step": 1106 |
| }, |
| { |
| "epoch": 1.92197504069452, |
| "grad_norm": 0.11247262830318959, |
| "learning_rate": 4.856591636351604e-08, |
| "loss": 0.4053, |
| "step": 1107 |
| }, |
| { |
| "epoch": 1.9237113402061856, |
| "grad_norm": 0.12245938252177607, |
| "learning_rate": 4.648054869798524e-08, |
| "loss": 0.4167, |
| "step": 1108 |
| }, |
| { |
| "epoch": 1.9254476397178513, |
| "grad_norm": 0.12475135088194052, |
| "learning_rate": 4.444073159810769e-08, |
| "loss": 0.4021, |
| "step": 1109 |
| }, |
| { |
| "epoch": 1.9271839392295171, |
| "grad_norm": 0.11574725410724895, |
| "learning_rate": 4.244648382121852e-08, |
| "loss": 0.408, |
| "step": 1110 |
| }, |
| { |
| "epoch": 1.928920238741183, |
| "grad_norm": 0.12368144526647107, |
| "learning_rate": 4.0497823705615836e-08, |
| "loss": 0.4117, |
| "step": 1111 |
| }, |
| { |
| "epoch": 1.9306565382528487, |
| "grad_norm": 0.1206154036080435, |
| "learning_rate": 3.859476917039029e-08, |
| "loss": 0.4091, |
| "step": 1112 |
| }, |
| { |
| "epoch": 1.9323928377645143, |
| "grad_norm": 0.11695836630567202, |
| "learning_rate": 3.673733771526466e-08, |
| "loss": 0.4138, |
| "step": 1113 |
| }, |
| { |
| "epoch": 1.9341291372761802, |
| "grad_norm": 0.11933510110855337, |
| "learning_rate": 3.492554642042789e-08, |
| "loss": 0.3976, |
| "step": 1114 |
| }, |
| { |
| "epoch": 1.935865436787846, |
| "grad_norm": 0.11621387774141514, |
| "learning_rate": 3.315941194638239e-08, |
| "loss": 0.4192, |
| "step": 1115 |
| }, |
| { |
| "epoch": 1.9376017362995117, |
| "grad_norm": 0.11785093875152831, |
| "learning_rate": 3.143895053378698e-08, |
| "loss": 0.4088, |
| "step": 1116 |
| }, |
| { |
| "epoch": 1.9393380358111774, |
| "grad_norm": 0.11954445365824508, |
| "learning_rate": 2.976417800331144e-08, |
| "loss": 0.4071, |
| "step": 1117 |
| }, |
| { |
| "epoch": 1.9410743353228432, |
| "grad_norm": 0.11557274305022142, |
| "learning_rate": 2.8135109755487723e-08, |
| "loss": 0.3902, |
| "step": 1118 |
| }, |
| { |
| "epoch": 1.942810634834509, |
| "grad_norm": 0.1298782178121965, |
| "learning_rate": 2.6551760770569534e-08, |
| "loss": 0.4134, |
| "step": 1119 |
| }, |
| { |
| "epoch": 1.9445469343461748, |
| "grad_norm": 0.1179201998294052, |
| "learning_rate": 2.501414560839577e-08, |
| "loss": 0.415, |
| "step": 1120 |
| }, |
| { |
| "epoch": 1.9462832338578404, |
| "grad_norm": 0.12529243637432458, |
| "learning_rate": 2.352227840825394e-08, |
| "loss": 0.4207, |
| "step": 1121 |
| }, |
| { |
| "epoch": 1.9480195333695063, |
| "grad_norm": 0.11761944161526862, |
| "learning_rate": 2.2076172888753632e-08, |
| "loss": 0.4171, |
| "step": 1122 |
| }, |
| { |
| "epoch": 1.9497558328811722, |
| "grad_norm": 0.13326349889687525, |
| "learning_rate": 2.067584234769715e-08, |
| "loss": 0.4397, |
| "step": 1123 |
| }, |
| { |
| "epoch": 1.9514921323928378, |
| "grad_norm": 0.12285157394273083, |
| "learning_rate": 1.9321299661959614e-08, |
| "loss": 0.4151, |
| "step": 1124 |
| }, |
| { |
| "epoch": 1.9532284319045035, |
| "grad_norm": 0.11927335798670852, |
| "learning_rate": 1.8012557287367394e-08, |
| "loss": 0.399, |
| "step": 1125 |
| }, |
| { |
| "epoch": 1.9549647314161693, |
| "grad_norm": 0.11652679857972266, |
| "learning_rate": 1.674962725858875e-08, |
| "loss": 0.4194, |
| "step": 1126 |
| }, |
| { |
| "epoch": 1.9567010309278352, |
| "grad_norm": 0.11860055221544642, |
| "learning_rate": 1.553252118901727e-08, |
| "loss": 0.4179, |
| "step": 1127 |
| }, |
| { |
| "epoch": 1.9584373304395009, |
| "grad_norm": 0.11695805242109508, |
| "learning_rate": 1.4361250270670257e-08, |
| "loss": 0.4183, |
| "step": 1128 |
| }, |
| { |
| "epoch": 1.9601736299511665, |
| "grad_norm": 0.13215770831912252, |
| "learning_rate": 1.3235825274081626e-08, |
| "loss": 0.4293, |
| "step": 1129 |
| }, |
| { |
| "epoch": 1.9619099294628324, |
| "grad_norm": 0.12055150409768445, |
| "learning_rate": 1.2156256548205292e-08, |
| "loss": 0.4051, |
| "step": 1130 |
| }, |
| { |
| "epoch": 1.9636462289744983, |
| "grad_norm": 0.11537922403279285, |
| "learning_rate": 1.1122554020320252e-08, |
| "loss": 0.4317, |
| "step": 1131 |
| }, |
| { |
| "epoch": 1.965382528486164, |
| "grad_norm": 0.13083088141242724, |
| "learning_rate": 1.0134727195937332e-08, |
| "loss": 0.4079, |
| "step": 1132 |
| }, |
| { |
| "epoch": 1.9671188279978296, |
| "grad_norm": 0.11720738515436362, |
| "learning_rate": 9.192785158713691e-09, |
| "loss": 0.3998, |
| "step": 1133 |
| }, |
| { |
| "epoch": 1.9688551275094954, |
| "grad_norm": 0.11709302268501304, |
| "learning_rate": 8.296736570367337e-09, |
| "loss": 0.4006, |
| "step": 1134 |
| }, |
| { |
| "epoch": 1.9705914270211613, |
| "grad_norm": 0.11420680025829363, |
| "learning_rate": 7.446589670599968e-09, |
| "loss": 0.3982, |
| "step": 1135 |
| }, |
| { |
| "epoch": 1.972327726532827, |
| "grad_norm": 0.1260826669146985, |
| "learning_rate": 6.642352277019815e-09, |
| "loss": 0.4159, |
| "step": 1136 |
| }, |
| { |
| "epoch": 1.9740640260444926, |
| "grad_norm": 0.12070921112427768, |
| "learning_rate": 5.884031785068356e-09, |
| "loss": 0.4111, |
| "step": 1137 |
| }, |
| { |
| "epoch": 1.9758003255561585, |
| "grad_norm": 0.12073001784468888, |
| "learning_rate": 5.17163516795538e-09, |
| "loss": 0.4108, |
| "step": 1138 |
| }, |
| { |
| "epoch": 1.9775366250678243, |
| "grad_norm": 0.12518510846802877, |
| "learning_rate": 4.505168976592922e-09, |
| "loss": 0.4103, |
| "step": 1139 |
| }, |
| { |
| "epoch": 1.97927292457949, |
| "grad_norm": 0.12321739657536239, |
| "learning_rate": 3.884639339534202e-09, |
| "loss": 0.4066, |
| "step": 1140 |
| }, |
| { |
| "epoch": 1.9810092240911557, |
| "grad_norm": 0.11741501996048638, |
| "learning_rate": 3.3100519629203353e-09, |
| "loss": 0.3731, |
| "step": 1141 |
| }, |
| { |
| "epoch": 1.9827455236028215, |
| "grad_norm": 0.11667218226770629, |
| "learning_rate": 2.781412130424266e-09, |
| "loss": 0.3942, |
| "step": 1142 |
| }, |
| { |
| "epoch": 1.9844818231144874, |
| "grad_norm": 0.11589729089722366, |
| "learning_rate": 2.298724703204691e-09, |
| "loss": 0.3943, |
| "step": 1143 |
| }, |
| { |
| "epoch": 1.986218122626153, |
| "grad_norm": 0.12645976313375637, |
| "learning_rate": 1.861994119860544e-09, |
| "loss": 0.4108, |
| "step": 1144 |
| }, |
| { |
| "epoch": 1.9879544221378187, |
| "grad_norm": 0.12047556198437237, |
| "learning_rate": 1.471224396389359e-09, |
| "loss": 0.4021, |
| "step": 1145 |
| }, |
| { |
| "epoch": 1.9896907216494846, |
| "grad_norm": 0.11686013457027536, |
| "learning_rate": 1.1264191261528557e-09, |
| "loss": 0.4092, |
| "step": 1146 |
| }, |
| { |
| "epoch": 1.9914270211611504, |
| "grad_norm": 0.1321783887220322, |
| "learning_rate": 8.275814798408554e-10, |
| "loss": 0.418, |
| "step": 1147 |
| }, |
| { |
| "epoch": 1.993163320672816, |
| "grad_norm": 0.1178482673584452, |
| "learning_rate": 5.747142054429722e-10, |
| "loss": 0.4088, |
| "step": 1148 |
| }, |
| { |
| "epoch": 1.9948996201844817, |
| "grad_norm": 0.1205943485930681, |
| "learning_rate": 3.678196282252966e-10, |
| "loss": 0.4181, |
| "step": 1149 |
| }, |
| { |
| "epoch": 1.9966359196961476, |
| "grad_norm": 0.1216930863569034, |
| "learning_rate": 2.0689965070652686e-10, |
| "loss": 0.4379, |
| "step": 1150 |
| }, |
| { |
| "epoch": 1.9983722192078135, |
| "grad_norm": 0.12024784790971259, |
| "learning_rate": 9.195575264242529e-11, |
| "loss": 0.3954, |
| "step": 1151 |
| }, |
| { |
| "epoch": 2.0, |
| "grad_norm": 0.12663974138485112, |
| "learning_rate": 2.2988991009720295e-11, |
| "loss": 0.4082, |
| "step": 1152 |
| }, |
| { |
| "epoch": 2.0, |
| "step": 1152, |
| "total_flos": 2415371294343168.0, |
| "train_loss": 0.4241238099574629, |
| "train_runtime": 41892.0315, |
| "train_samples_per_second": 1.76, |
| "train_steps_per_second": 0.027 |
| } |
| ], |
| "logging_steps": 1, |
| "max_steps": 1152, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 2, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 2415371294343168.0, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|