diff --git "a/trainer_state.json" "b/trainer_state.json" deleted file mode 100644--- "a/trainer_state.json" +++ /dev/null @@ -1,36266 +0,0 @@ -{ - "best_global_step": null, - "best_metric": null, - "best_model_checkpoint": null, - "epoch": 3.9994207375941304, - "eval_steps": 500, - "global_step": 5176, - "is_hyper_param_search": false, - "is_local_process_zero": true, - "is_world_process_zero": true, - "log_history": [ - { - "epoch": 0.0007723498744931454, - "grad_norm": 2.1837761402130127, - "learning_rate": 0.0, - "loss": 0.1449, - "step": 1 - }, - { - "epoch": 0.0015446997489862908, - "grad_norm": 1.8389215469360352, - "learning_rate": 1.5455950540958268e-07, - "loss": 0.1331, - "step": 2 - }, - { - "epoch": 0.0023170496234794363, - "grad_norm": 1.9792014360427856, - "learning_rate": 3.0911901081916536e-07, - "loss": 0.1353, - "step": 3 - }, - { - "epoch": 0.0030893994979725816, - "grad_norm": 1.6553817987442017, - "learning_rate": 4.636785162287481e-07, - "loss": 0.1451, - "step": 4 - }, - { - "epoch": 0.003861749372465727, - "grad_norm": 2.098407745361328, - "learning_rate": 6.182380216383307e-07, - "loss": 0.1415, - "step": 5 - }, - { - "epoch": 0.004634099246958873, - "grad_norm": 1.5416218042373657, - "learning_rate": 7.727975270479134e-07, - "loss": 0.1229, - "step": 6 - }, - { - "epoch": 0.0054064491214520175, - "grad_norm": 1.732174038887024, - "learning_rate": 9.273570324574961e-07, - "loss": 0.1284, - "step": 7 - }, - { - "epoch": 0.006178798995945163, - "grad_norm": 1.7713077068328857, - "learning_rate": 1.0819165378670788e-06, - "loss": 0.1171, - "step": 8 - }, - { - "epoch": 0.006951148870438309, - "grad_norm": 1.6853700876235962, - "learning_rate": 1.2364760432766615e-06, - "loss": 0.123, - "step": 9 - }, - { - "epoch": 0.007723498744931454, - "grad_norm": 1.5785168409347534, - "learning_rate": 1.3910355486862442e-06, - "loss": 0.1232, - "step": 10 - }, - { - "epoch": 0.008495848619424599, - "grad_norm": 1.4248701333999634, - "learning_rate": 1.5455950540958269e-06, - "loss": 0.118, - "step": 11 - }, - { - "epoch": 0.009268198493917745, - "grad_norm": 1.0691180229187012, - "learning_rate": 1.7001545595054098e-06, - "loss": 0.0743, - "step": 12 - }, - { - "epoch": 0.01004054836841089, - "grad_norm": 0.9088413119316101, - "learning_rate": 1.8547140649149923e-06, - "loss": 0.077, - "step": 13 - }, - { - "epoch": 0.010812898242904035, - "grad_norm": 0.9975239634513855, - "learning_rate": 2.009273570324575e-06, - "loss": 0.0719, - "step": 14 - }, - { - "epoch": 0.011585248117397182, - "grad_norm": 0.7566964626312256, - "learning_rate": 2.1638330757341575e-06, - "loss": 0.0665, - "step": 15 - }, - { - "epoch": 0.012357597991890326, - "grad_norm": 0.4156074523925781, - "learning_rate": 2.3183925811437404e-06, - "loss": 0.0436, - "step": 16 - }, - { - "epoch": 0.013129947866383471, - "grad_norm": 0.5193804502487183, - "learning_rate": 2.472952086553323e-06, - "loss": 0.0428, - "step": 17 - }, - { - "epoch": 0.013902297740876618, - "grad_norm": 0.4834356904029846, - "learning_rate": 2.627511591962906e-06, - "loss": 0.0419, - "step": 18 - }, - { - "epoch": 0.014674647615369763, - "grad_norm": 0.44509536027908325, - "learning_rate": 2.7820710973724883e-06, - "loss": 0.0392, - "step": 19 - }, - { - "epoch": 0.015446997489862908, - "grad_norm": 0.4127598702907562, - "learning_rate": 2.9366306027820713e-06, - "loss": 0.0372, - "step": 20 - }, - { - "epoch": 0.016219347364356054, - "grad_norm": 0.3719303607940674, - "learning_rate": 3.0911901081916538e-06, - "loss": 0.0352, - "step": 21 - }, - { - "epoch": 0.016991697238849197, - "grad_norm": 0.2237921953201294, - "learning_rate": 3.2457496136012367e-06, - "loss": 0.03, - "step": 22 - }, - { - "epoch": 0.017764047113342344, - "grad_norm": 0.2025945633649826, - "learning_rate": 3.4003091190108196e-06, - "loss": 0.0254, - "step": 23 - }, - { - "epoch": 0.01853639698783549, - "grad_norm": 0.25628677010536194, - "learning_rate": 3.554868624420402e-06, - "loss": 0.0223, - "step": 24 - }, - { - "epoch": 0.019308746862328634, - "grad_norm": 0.21528829634189606, - "learning_rate": 3.7094281298299846e-06, - "loss": 0.0245, - "step": 25 - }, - { - "epoch": 0.02008109673682178, - "grad_norm": 0.17562495172023773, - "learning_rate": 3.863987635239567e-06, - "loss": 0.0215, - "step": 26 - }, - { - "epoch": 0.020853446611314927, - "grad_norm": 0.15615582466125488, - "learning_rate": 4.01854714064915e-06, - "loss": 0.0226, - "step": 27 - }, - { - "epoch": 0.02162579648580807, - "grad_norm": 0.17141973972320557, - "learning_rate": 4.173106646058733e-06, - "loss": 0.0211, - "step": 28 - }, - { - "epoch": 0.022398146360301217, - "grad_norm": 0.14127251505851746, - "learning_rate": 4.327666151468315e-06, - "loss": 0.0202, - "step": 29 - }, - { - "epoch": 0.023170496234794363, - "grad_norm": 0.12122655659914017, - "learning_rate": 4.482225656877898e-06, - "loss": 0.0181, - "step": 30 - }, - { - "epoch": 0.023942846109287506, - "grad_norm": 0.1447962075471878, - "learning_rate": 4.636785162287481e-06, - "loss": 0.0175, - "step": 31 - }, - { - "epoch": 0.024715195983780653, - "grad_norm": 0.11168921738862991, - "learning_rate": 4.791344667697063e-06, - "loss": 0.0175, - "step": 32 - }, - { - "epoch": 0.0254875458582738, - "grad_norm": 0.17067140340805054, - "learning_rate": 4.945904173106646e-06, - "loss": 0.0172, - "step": 33 - }, - { - "epoch": 0.026259895732766943, - "grad_norm": 0.21642448008060455, - "learning_rate": 5.100463678516229e-06, - "loss": 0.016, - "step": 34 - }, - { - "epoch": 0.02703224560726009, - "grad_norm": 0.1376991868019104, - "learning_rate": 5.255023183925812e-06, - "loss": 0.0172, - "step": 35 - }, - { - "epoch": 0.027804595481753236, - "grad_norm": 0.1457221657037735, - "learning_rate": 5.409582689335394e-06, - "loss": 0.0147, - "step": 36 - }, - { - "epoch": 0.02857694535624638, - "grad_norm": 0.12801788747310638, - "learning_rate": 5.564142194744977e-06, - "loss": 0.0137, - "step": 37 - }, - { - "epoch": 0.029349295230739526, - "grad_norm": 0.12587693333625793, - "learning_rate": 5.71870170015456e-06, - "loss": 0.0135, - "step": 38 - }, - { - "epoch": 0.03012164510523267, - "grad_norm": 0.16374649107456207, - "learning_rate": 5.8732612055641425e-06, - "loss": 0.0143, - "step": 39 - }, - { - "epoch": 0.030893994979725815, - "grad_norm": 0.09200599044561386, - "learning_rate": 6.0278207109737254e-06, - "loss": 0.0137, - "step": 40 - }, - { - "epoch": 0.03166634485421896, - "grad_norm": 0.15025416016578674, - "learning_rate": 6.1823802163833075e-06, - "loss": 0.0124, - "step": 41 - }, - { - "epoch": 0.03243869472871211, - "grad_norm": 0.09628334641456604, - "learning_rate": 6.3369397217928904e-06, - "loss": 0.0122, - "step": 42 - }, - { - "epoch": 0.03321104460320525, - "grad_norm": 0.1101807951927185, - "learning_rate": 6.491499227202473e-06, - "loss": 0.0134, - "step": 43 - }, - { - "epoch": 0.033983394477698395, - "grad_norm": 0.2027738243341446, - "learning_rate": 6.646058732612056e-06, - "loss": 0.0127, - "step": 44 - }, - { - "epoch": 0.034755744352191545, - "grad_norm": 0.06605881452560425, - "learning_rate": 6.800618238021639e-06, - "loss": 0.0119, - "step": 45 - }, - { - "epoch": 0.03552809422668469, - "grad_norm": 0.07640182971954346, - "learning_rate": 6.955177743431221e-06, - "loss": 0.0109, - "step": 46 - }, - { - "epoch": 0.03630044410117783, - "grad_norm": 0.16407518088817596, - "learning_rate": 7.109737248840804e-06, - "loss": 0.0121, - "step": 47 - }, - { - "epoch": 0.03707279397567098, - "grad_norm": 0.09163492918014526, - "learning_rate": 7.264296754250387e-06, - "loss": 0.01, - "step": 48 - }, - { - "epoch": 0.037845143850164124, - "grad_norm": 0.05860032141208649, - "learning_rate": 7.418856259659969e-06, - "loss": 0.0119, - "step": 49 - }, - { - "epoch": 0.03861749372465727, - "grad_norm": 0.10120034962892532, - "learning_rate": 7.573415765069553e-06, - "loss": 0.012, - "step": 50 - }, - { - "epoch": 0.03938984359915042, - "grad_norm": 0.08744475245475769, - "learning_rate": 7.727975270479134e-06, - "loss": 0.0105, - "step": 51 - }, - { - "epoch": 0.04016219347364356, - "grad_norm": 0.10905009508132935, - "learning_rate": 7.882534775888716e-06, - "loss": 0.0113, - "step": 52 - }, - { - "epoch": 0.040934543348136704, - "grad_norm": 0.08753761649131775, - "learning_rate": 8.0370942812983e-06, - "loss": 0.0105, - "step": 53 - }, - { - "epoch": 0.041706893222629854, - "grad_norm": 0.07518894970417023, - "learning_rate": 8.191653786707882e-06, - "loss": 0.0104, - "step": 54 - }, - { - "epoch": 0.042479243097123, - "grad_norm": 0.10365454107522964, - "learning_rate": 8.346213292117466e-06, - "loss": 0.0112, - "step": 55 - }, - { - "epoch": 0.04325159297161614, - "grad_norm": 0.0678032711148262, - "learning_rate": 8.500772797527048e-06, - "loss": 0.0108, - "step": 56 - }, - { - "epoch": 0.04402394284610929, - "grad_norm": 0.10868978500366211, - "learning_rate": 8.65533230293663e-06, - "loss": 0.0102, - "step": 57 - }, - { - "epoch": 0.04479629272060243, - "grad_norm": 0.1246650293469429, - "learning_rate": 8.809891808346214e-06, - "loss": 0.0098, - "step": 58 - }, - { - "epoch": 0.045568642595095576, - "grad_norm": 0.06522880494594574, - "learning_rate": 8.964451313755796e-06, - "loss": 0.0101, - "step": 59 - }, - { - "epoch": 0.04634099246958873, - "grad_norm": 0.053914472460746765, - "learning_rate": 9.119010819165378e-06, - "loss": 0.0093, - "step": 60 - }, - { - "epoch": 0.04711334234408187, - "grad_norm": 0.062161996960639954, - "learning_rate": 9.273570324574962e-06, - "loss": 0.0089, - "step": 61 - }, - { - "epoch": 0.04788569221857501, - "grad_norm": 0.055369071662425995, - "learning_rate": 9.428129829984544e-06, - "loss": 0.0085, - "step": 62 - }, - { - "epoch": 0.04865804209306816, - "grad_norm": 0.08797524124383926, - "learning_rate": 9.582689335394126e-06, - "loss": 0.0095, - "step": 63 - }, - { - "epoch": 0.049430391967561306, - "grad_norm": 0.06364715099334717, - "learning_rate": 9.73724884080371e-06, - "loss": 0.0101, - "step": 64 - }, - { - "epoch": 0.05020274184205445, - "grad_norm": 0.09843257814645767, - "learning_rate": 9.891808346213292e-06, - "loss": 0.009, - "step": 65 - }, - { - "epoch": 0.0509750917165476, - "grad_norm": 0.06403323262929916, - "learning_rate": 1.0046367851622875e-05, - "loss": 0.0087, - "step": 66 - }, - { - "epoch": 0.05174744159104074, - "grad_norm": 0.05529443547129631, - "learning_rate": 1.0200927357032458e-05, - "loss": 0.0089, - "step": 67 - }, - { - "epoch": 0.052519791465533885, - "grad_norm": 0.08181367814540863, - "learning_rate": 1.035548686244204e-05, - "loss": 0.0093, - "step": 68 - }, - { - "epoch": 0.053292141340027036, - "grad_norm": 0.060869909822940826, - "learning_rate": 1.0510046367851623e-05, - "loss": 0.0089, - "step": 69 - }, - { - "epoch": 0.05406449121452018, - "grad_norm": 0.06601981818675995, - "learning_rate": 1.0664605873261205e-05, - "loss": 0.0091, - "step": 70 - }, - { - "epoch": 0.05483684108901332, - "grad_norm": 0.10082995146512985, - "learning_rate": 1.0819165378670788e-05, - "loss": 0.0096, - "step": 71 - }, - { - "epoch": 0.05560919096350647, - "grad_norm": 0.046674828976392746, - "learning_rate": 1.0973724884080371e-05, - "loss": 0.0081, - "step": 72 - }, - { - "epoch": 0.056381540837999615, - "grad_norm": 0.06928626447916031, - "learning_rate": 1.1128284389489953e-05, - "loss": 0.0091, - "step": 73 - }, - { - "epoch": 0.05715389071249276, - "grad_norm": 0.1223256066441536, - "learning_rate": 1.1282843894899537e-05, - "loss": 0.0088, - "step": 74 - }, - { - "epoch": 0.0579262405869859, - "grad_norm": 0.06588178128004074, - "learning_rate": 1.143740340030912e-05, - "loss": 0.0086, - "step": 75 - }, - { - "epoch": 0.05869859046147905, - "grad_norm": 0.06649603694677353, - "learning_rate": 1.1591962905718701e-05, - "loss": 0.0082, - "step": 76 - }, - { - "epoch": 0.059470940335972194, - "grad_norm": 0.11596790701150894, - "learning_rate": 1.1746522411128285e-05, - "loss": 0.0092, - "step": 77 - }, - { - "epoch": 0.06024329021046534, - "grad_norm": 0.05574265122413635, - "learning_rate": 1.1901081916537867e-05, - "loss": 0.0088, - "step": 78 - }, - { - "epoch": 0.06101564008495849, - "grad_norm": 0.1380206048488617, - "learning_rate": 1.2055641421947451e-05, - "loss": 0.0081, - "step": 79 - }, - { - "epoch": 0.06178798995945163, - "grad_norm": 0.09319712966680527, - "learning_rate": 1.2210200927357033e-05, - "loss": 0.0092, - "step": 80 - }, - { - "epoch": 0.06256033983394478, - "grad_norm": 0.04295811802148819, - "learning_rate": 1.2364760432766615e-05, - "loss": 0.0085, - "step": 81 - }, - { - "epoch": 0.06333268970843792, - "grad_norm": 0.18484055995941162, - "learning_rate": 1.2519319938176199e-05, - "loss": 0.0089, - "step": 82 - }, - { - "epoch": 0.06410503958293107, - "grad_norm": 0.059900783002376556, - "learning_rate": 1.2673879443585781e-05, - "loss": 0.0079, - "step": 83 - }, - { - "epoch": 0.06487738945742422, - "grad_norm": 0.1664331704378128, - "learning_rate": 1.2828438948995365e-05, - "loss": 0.0086, - "step": 84 - }, - { - "epoch": 0.06564973933191735, - "grad_norm": 0.05938958376646042, - "learning_rate": 1.2982998454404947e-05, - "loss": 0.0083, - "step": 85 - }, - { - "epoch": 0.0664220892064105, - "grad_norm": 0.13517208397388458, - "learning_rate": 1.3137557959814529e-05, - "loss": 0.0086, - "step": 86 - }, - { - "epoch": 0.06719443908090365, - "grad_norm": 0.06283359974622726, - "learning_rate": 1.3292117465224113e-05, - "loss": 0.0081, - "step": 87 - }, - { - "epoch": 0.06796678895539679, - "grad_norm": 0.1026938259601593, - "learning_rate": 1.3446676970633695e-05, - "loss": 0.0077, - "step": 88 - }, - { - "epoch": 0.06873913882988994, - "grad_norm": 0.05881831422448158, - "learning_rate": 1.3601236476043278e-05, - "loss": 0.0085, - "step": 89 - }, - { - "epoch": 0.06951148870438309, - "grad_norm": 0.06477896869182587, - "learning_rate": 1.375579598145286e-05, - "loss": 0.0074, - "step": 90 - }, - { - "epoch": 0.07028383857887623, - "grad_norm": 0.10522706806659698, - "learning_rate": 1.3910355486862443e-05, - "loss": 0.0083, - "step": 91 - }, - { - "epoch": 0.07105618845336938, - "grad_norm": 0.05898185446858406, - "learning_rate": 1.4064914992272025e-05, - "loss": 0.0086, - "step": 92 - }, - { - "epoch": 0.07182853832786253, - "grad_norm": 0.09011103957891464, - "learning_rate": 1.4219474497681608e-05, - "loss": 0.0075, - "step": 93 - }, - { - "epoch": 0.07260088820235566, - "grad_norm": 0.06274860352277756, - "learning_rate": 1.4374034003091192e-05, - "loss": 0.0067, - "step": 94 - }, - { - "epoch": 0.07337323807684881, - "grad_norm": 0.05327846109867096, - "learning_rate": 1.4528593508500774e-05, - "loss": 0.008, - "step": 95 - }, - { - "epoch": 0.07414558795134196, - "grad_norm": 0.1118331104516983, - "learning_rate": 1.4683153013910356e-05, - "loss": 0.0081, - "step": 96 - }, - { - "epoch": 0.0749179378258351, - "grad_norm": 0.05044018477201462, - "learning_rate": 1.4837712519319938e-05, - "loss": 0.0072, - "step": 97 - }, - { - "epoch": 0.07569028770032825, - "grad_norm": 0.11157859861850739, - "learning_rate": 1.4992272024729522e-05, - "loss": 0.0075, - "step": 98 - }, - { - "epoch": 0.0764626375748214, - "grad_norm": 0.0844530537724495, - "learning_rate": 1.5146831530139106e-05, - "loss": 0.0073, - "step": 99 - }, - { - "epoch": 0.07723498744931453, - "grad_norm": 0.06750793009996414, - "learning_rate": 1.5301391035548686e-05, - "loss": 0.0082, - "step": 100 - }, - { - "epoch": 0.07800733732380769, - "grad_norm": 0.09606797993183136, - "learning_rate": 1.545595054095827e-05, - "loss": 0.0079, - "step": 101 - }, - { - "epoch": 0.07877968719830084, - "grad_norm": 0.06051672250032425, - "learning_rate": 1.561051004636785e-05, - "loss": 0.0075, - "step": 102 - }, - { - "epoch": 0.07955203707279397, - "grad_norm": 0.05748724564909935, - "learning_rate": 1.5765069551777432e-05, - "loss": 0.0077, - "step": 103 - }, - { - "epoch": 0.08032438694728712, - "grad_norm": 0.034181009978055954, - "learning_rate": 1.5919629057187018e-05, - "loss": 0.007, - "step": 104 - }, - { - "epoch": 0.08109673682178027, - "grad_norm": 0.0854635238647461, - "learning_rate": 1.60741885625966e-05, - "loss": 0.0075, - "step": 105 - }, - { - "epoch": 0.08186908669627341, - "grad_norm": 0.049009062349796295, - "learning_rate": 1.6228748068006182e-05, - "loss": 0.0074, - "step": 106 - }, - { - "epoch": 0.08264143657076656, - "grad_norm": 0.0906350389122963, - "learning_rate": 1.6383307573415764e-05, - "loss": 0.0079, - "step": 107 - }, - { - "epoch": 0.08341378644525971, - "grad_norm": 0.05115756392478943, - "learning_rate": 1.6537867078825346e-05, - "loss": 0.0072, - "step": 108 - }, - { - "epoch": 0.08418613631975284, - "grad_norm": 0.05932699888944626, - "learning_rate": 1.6692426584234932e-05, - "loss": 0.0071, - "step": 109 - }, - { - "epoch": 0.084958486194246, - "grad_norm": 0.0629432424902916, - "learning_rate": 1.6846986089644514e-05, - "loss": 0.0068, - "step": 110 - }, - { - "epoch": 0.08573083606873914, - "grad_norm": 0.07591287791728973, - "learning_rate": 1.7001545595054096e-05, - "loss": 0.0067, - "step": 111 - }, - { - "epoch": 0.08650318594323228, - "grad_norm": 0.03689542040228844, - "learning_rate": 1.7156105100463678e-05, - "loss": 0.0073, - "step": 112 - }, - { - "epoch": 0.08727553581772543, - "grad_norm": 0.10098189860582352, - "learning_rate": 1.731066460587326e-05, - "loss": 0.0071, - "step": 113 - }, - { - "epoch": 0.08804788569221858, - "grad_norm": 0.05658024176955223, - "learning_rate": 1.7465224111282842e-05, - "loss": 0.0063, - "step": 114 - }, - { - "epoch": 0.08882023556671172, - "grad_norm": 0.05307658389210701, - "learning_rate": 1.7619783616692428e-05, - "loss": 0.0063, - "step": 115 - }, - { - "epoch": 0.08959258544120487, - "grad_norm": 0.08351606130599976, - "learning_rate": 1.777434312210201e-05, - "loss": 0.0075, - "step": 116 - }, - { - "epoch": 0.09036493531569802, - "grad_norm": 0.10230844467878342, - "learning_rate": 1.792890262751159e-05, - "loss": 0.007, - "step": 117 - }, - { - "epoch": 0.09113728519019115, - "grad_norm": 0.03776973858475685, - "learning_rate": 1.8083462132921174e-05, - "loss": 0.0068, - "step": 118 - }, - { - "epoch": 0.0919096350646843, - "grad_norm": 0.0671847015619278, - "learning_rate": 1.8238021638330756e-05, - "loss": 0.0059, - "step": 119 - }, - { - "epoch": 0.09268198493917745, - "grad_norm": 0.03916226327419281, - "learning_rate": 1.839258114374034e-05, - "loss": 0.0066, - "step": 120 - }, - { - "epoch": 0.09345433481367059, - "grad_norm": 0.04165134206414223, - "learning_rate": 1.8547140649149923e-05, - "loss": 0.0076, - "step": 121 - }, - { - "epoch": 0.09422668468816374, - "grad_norm": 0.06339821964502335, - "learning_rate": 1.8701700154559505e-05, - "loss": 0.0071, - "step": 122 - }, - { - "epoch": 0.09499903456265689, - "grad_norm": 0.03353743627667427, - "learning_rate": 1.8856259659969088e-05, - "loss": 0.0074, - "step": 123 - }, - { - "epoch": 0.09577138443715003, - "grad_norm": 0.09131729602813721, - "learning_rate": 1.901081916537867e-05, - "loss": 0.0074, - "step": 124 - }, - { - "epoch": 0.09654373431164318, - "grad_norm": 0.04216662794351578, - "learning_rate": 1.916537867078825e-05, - "loss": 0.0066, - "step": 125 - }, - { - "epoch": 0.09731608418613633, - "grad_norm": 0.11223969608545303, - "learning_rate": 1.9319938176197837e-05, - "loss": 0.0076, - "step": 126 - }, - { - "epoch": 0.09808843406062946, - "grad_norm": 0.04586590453982353, - "learning_rate": 1.947449768160742e-05, - "loss": 0.0065, - "step": 127 - }, - { - "epoch": 0.09886078393512261, - "grad_norm": 0.13881778717041016, - "learning_rate": 1.9629057187017e-05, - "loss": 0.0069, - "step": 128 - }, - { - "epoch": 0.09963313380961576, - "grad_norm": 0.042186565697193146, - "learning_rate": 1.9783616692426583e-05, - "loss": 0.0071, - "step": 129 - }, - { - "epoch": 0.1004054836841089, - "grad_norm": 0.11093481630086899, - "learning_rate": 1.9938176197836165e-05, - "loss": 0.0074, - "step": 130 - }, - { - "epoch": 0.10117783355860205, - "grad_norm": 0.053297363221645355, - "learning_rate": 2.009273570324575e-05, - "loss": 0.0057, - "step": 131 - }, - { - "epoch": 0.1019501834330952, - "grad_norm": 0.10928916931152344, - "learning_rate": 2.0247295208655333e-05, - "loss": 0.0071, - "step": 132 - }, - { - "epoch": 0.10272253330758833, - "grad_norm": 0.06282244622707367, - "learning_rate": 2.0401854714064915e-05, - "loss": 0.0071, - "step": 133 - }, - { - "epoch": 0.10349488318208148, - "grad_norm": 0.08216606825590134, - "learning_rate": 2.0556414219474497e-05, - "loss": 0.0066, - "step": 134 - }, - { - "epoch": 0.10426723305657463, - "grad_norm": 0.03244255110621452, - "learning_rate": 2.071097372488408e-05, - "loss": 0.0064, - "step": 135 - }, - { - "epoch": 0.10503958293106777, - "grad_norm": 0.11407410353422165, - "learning_rate": 2.086553323029366e-05, - "loss": 0.0068, - "step": 136 - }, - { - "epoch": 0.10581193280556092, - "grad_norm": 0.05050407722592354, - "learning_rate": 2.1020092735703247e-05, - "loss": 0.0059, - "step": 137 - }, - { - "epoch": 0.10658428268005407, - "grad_norm": 0.07172229140996933, - "learning_rate": 2.117465224111283e-05, - "loss": 0.0064, - "step": 138 - }, - { - "epoch": 0.10735663255454721, - "grad_norm": 0.0523892417550087, - "learning_rate": 2.132921174652241e-05, - "loss": 0.0064, - "step": 139 - }, - { - "epoch": 0.10812898242904036, - "grad_norm": 0.06238657608628273, - "learning_rate": 2.1483771251931993e-05, - "loss": 0.0063, - "step": 140 - }, - { - "epoch": 0.10890133230353351, - "grad_norm": 0.12138430774211884, - "learning_rate": 2.1638330757341575e-05, - "loss": 0.0074, - "step": 141 - }, - { - "epoch": 0.10967368217802664, - "grad_norm": 0.06384667754173279, - "learning_rate": 2.179289026275116e-05, - "loss": 0.0069, - "step": 142 - }, - { - "epoch": 0.1104460320525198, - "grad_norm": 0.17014510929584503, - "learning_rate": 2.1947449768160743e-05, - "loss": 0.0077, - "step": 143 - }, - { - "epoch": 0.11121838192701294, - "grad_norm": 0.0351419635117054, - "learning_rate": 2.2102009273570325e-05, - "loss": 0.0067, - "step": 144 - }, - { - "epoch": 0.11199073180150608, - "grad_norm": 0.2364615947008133, - "learning_rate": 2.2256568778979907e-05, - "loss": 0.0083, - "step": 145 - }, - { - "epoch": 0.11276308167599923, - "grad_norm": 0.05774795636534691, - "learning_rate": 2.241112828438949e-05, - "loss": 0.0068, - "step": 146 - }, - { - "epoch": 0.11353543155049237, - "grad_norm": 0.23769724369049072, - "learning_rate": 2.2565687789799074e-05, - "loss": 0.0076, - "step": 147 - }, - { - "epoch": 0.11430778142498552, - "grad_norm": 0.048934392631053925, - "learning_rate": 2.2720247295208656e-05, - "loss": 0.0066, - "step": 148 - }, - { - "epoch": 0.11508013129947867, - "grad_norm": 0.22198757529258728, - "learning_rate": 2.287480680061824e-05, - "loss": 0.0083, - "step": 149 - }, - { - "epoch": 0.1158524811739718, - "grad_norm": 0.0632915124297142, - "learning_rate": 2.302936630602782e-05, - "loss": 0.007, - "step": 150 - }, - { - "epoch": 0.11662483104846495, - "grad_norm": 0.17810183763504028, - "learning_rate": 2.3183925811437403e-05, - "loss": 0.008, - "step": 151 - }, - { - "epoch": 0.1173971809229581, - "grad_norm": 0.11858183145523071, - "learning_rate": 2.3338485316846988e-05, - "loss": 0.0073, - "step": 152 - }, - { - "epoch": 0.11816953079745124, - "grad_norm": 0.12697774171829224, - "learning_rate": 2.349304482225657e-05, - "loss": 0.0077, - "step": 153 - }, - { - "epoch": 0.11894188067194439, - "grad_norm": 0.08780393749475479, - "learning_rate": 2.3647604327666152e-05, - "loss": 0.0071, - "step": 154 - }, - { - "epoch": 0.11971423054643754, - "grad_norm": 0.038997404277324677, - "learning_rate": 2.3802163833075734e-05, - "loss": 0.0068, - "step": 155 - }, - { - "epoch": 0.12048658042093068, - "grad_norm": 0.044348638504743576, - "learning_rate": 2.3956723338485316e-05, - "loss": 0.007, - "step": 156 - }, - { - "epoch": 0.12125893029542383, - "grad_norm": 0.09429153054952621, - "learning_rate": 2.4111282843894902e-05, - "loss": 0.008, - "step": 157 - }, - { - "epoch": 0.12203128016991698, - "grad_norm": 0.04546340927481651, - "learning_rate": 2.4265842349304484e-05, - "loss": 0.0062, - "step": 158 - }, - { - "epoch": 0.12280363004441011, - "grad_norm": 0.09436047822237015, - "learning_rate": 2.4420401854714066e-05, - "loss": 0.0065, - "step": 159 - }, - { - "epoch": 0.12357597991890326, - "grad_norm": 0.08909037709236145, - "learning_rate": 2.4574961360123648e-05, - "loss": 0.0069, - "step": 160 - }, - { - "epoch": 0.12434832979339641, - "grad_norm": 0.10167445987462997, - "learning_rate": 2.472952086553323e-05, - "loss": 0.0067, - "step": 161 - }, - { - "epoch": 0.12512067966788956, - "grad_norm": 0.10640830546617508, - "learning_rate": 2.4884080370942815e-05, - "loss": 0.0068, - "step": 162 - }, - { - "epoch": 0.1258930295423827, - "grad_norm": 0.04076463729143143, - "learning_rate": 2.5038639876352398e-05, - "loss": 0.0073, - "step": 163 - }, - { - "epoch": 0.12666537941687583, - "grad_norm": 0.08261235058307648, - "learning_rate": 2.519319938176198e-05, - "loss": 0.007, - "step": 164 - }, - { - "epoch": 0.12743772929136898, - "grad_norm": 0.13118483126163483, - "learning_rate": 2.5347758887171562e-05, - "loss": 0.0074, - "step": 165 - }, - { - "epoch": 0.12821007916586213, - "grad_norm": 0.07147473096847534, - "learning_rate": 2.5502318392581144e-05, - "loss": 0.0064, - "step": 166 - }, - { - "epoch": 0.12898242904035528, - "grad_norm": 0.11924638599157333, - "learning_rate": 2.565687789799073e-05, - "loss": 0.0065, - "step": 167 - }, - { - "epoch": 0.12975477891484843, - "grad_norm": 0.07254443317651749, - "learning_rate": 2.581143740340031e-05, - "loss": 0.0068, - "step": 168 - }, - { - "epoch": 0.13052712878934158, - "grad_norm": 0.1017264872789383, - "learning_rate": 2.5965996908809893e-05, - "loss": 0.0073, - "step": 169 - }, - { - "epoch": 0.1312994786638347, - "grad_norm": 0.10837958008050919, - "learning_rate": 2.6120556414219475e-05, - "loss": 0.0065, - "step": 170 - }, - { - "epoch": 0.13207182853832786, - "grad_norm": 0.043897368013858795, - "learning_rate": 2.6275115919629058e-05, - "loss": 0.0068, - "step": 171 - }, - { - "epoch": 0.132844178412821, - "grad_norm": 0.08516182005405426, - "learning_rate": 2.6429675425038643e-05, - "loss": 0.0071, - "step": 172 - }, - { - "epoch": 0.13361652828731416, - "grad_norm": 0.08410457521677017, - "learning_rate": 2.6584234930448225e-05, - "loss": 0.0072, - "step": 173 - }, - { - "epoch": 0.1343888781618073, - "grad_norm": 0.04611523821949959, - "learning_rate": 2.6738794435857807e-05, - "loss": 0.0058, - "step": 174 - }, - { - "epoch": 0.13516122803630046, - "grad_norm": 0.08548586070537567, - "learning_rate": 2.689335394126739e-05, - "loss": 0.0068, - "step": 175 - }, - { - "epoch": 0.13593357791079358, - "grad_norm": 0.0654478371143341, - "learning_rate": 2.704791344667697e-05, - "loss": 0.0076, - "step": 176 - }, - { - "epoch": 0.13670592778528673, - "grad_norm": 0.09992239624261856, - "learning_rate": 2.7202472952086557e-05, - "loss": 0.0068, - "step": 177 - }, - { - "epoch": 0.13747827765977988, - "grad_norm": 0.06172487512230873, - "learning_rate": 2.735703245749614e-05, - "loss": 0.0073, - "step": 178 - }, - { - "epoch": 0.13825062753427303, - "grad_norm": 0.09481542557477951, - "learning_rate": 2.751159196290572e-05, - "loss": 0.0075, - "step": 179 - }, - { - "epoch": 0.13902297740876618, - "grad_norm": 0.055736932903528214, - "learning_rate": 2.7666151468315303e-05, - "loss": 0.0055, - "step": 180 - }, - { - "epoch": 0.13979532728325933, - "grad_norm": 0.05875783413648605, - "learning_rate": 2.7820710973724885e-05, - "loss": 0.0067, - "step": 181 - }, - { - "epoch": 0.14056767715775245, - "grad_norm": 0.07550349831581116, - "learning_rate": 2.797527047913447e-05, - "loss": 0.0066, - "step": 182 - }, - { - "epoch": 0.1413400270322456, - "grad_norm": 0.03867774456739426, - "learning_rate": 2.812982998454405e-05, - "loss": 0.0063, - "step": 183 - }, - { - "epoch": 0.14211237690673875, - "grad_norm": 0.05367950350046158, - "learning_rate": 2.8284389489953635e-05, - "loss": 0.0065, - "step": 184 - }, - { - "epoch": 0.1428847267812319, - "grad_norm": 0.06944329291582108, - "learning_rate": 2.8438948995363217e-05, - "loss": 0.0065, - "step": 185 - }, - { - "epoch": 0.14365707665572505, - "grad_norm": 0.044338397681713104, - "learning_rate": 2.85935085007728e-05, - "loss": 0.0059, - "step": 186 - }, - { - "epoch": 0.1444294265302182, - "grad_norm": 0.07662731409072876, - "learning_rate": 2.8748068006182384e-05, - "loss": 0.0057, - "step": 187 - }, - { - "epoch": 0.14520177640471132, - "grad_norm": 0.03504624217748642, - "learning_rate": 2.8902627511591963e-05, - "loss": 0.0051, - "step": 188 - }, - { - "epoch": 0.14597412627920447, - "grad_norm": 0.05816268175840378, - "learning_rate": 2.905718701700155e-05, - "loss": 0.0063, - "step": 189 - }, - { - "epoch": 0.14674647615369762, - "grad_norm": 0.05528225749731064, - "learning_rate": 2.921174652241113e-05, - "loss": 0.0063, - "step": 190 - }, - { - "epoch": 0.14751882602819077, - "grad_norm": 0.03650132939219475, - "learning_rate": 2.9366306027820713e-05, - "loss": 0.0063, - "step": 191 - }, - { - "epoch": 0.14829117590268392, - "grad_norm": 0.03839438036084175, - "learning_rate": 2.9520865533230298e-05, - "loss": 0.0056, - "step": 192 - }, - { - "epoch": 0.14906352577717707, - "grad_norm": 0.02672256901860237, - "learning_rate": 2.9675425038639877e-05, - "loss": 0.0063, - "step": 193 - }, - { - "epoch": 0.1498358756516702, - "grad_norm": 0.04782088100910187, - "learning_rate": 2.9829984544049462e-05, - "loss": 0.0059, - "step": 194 - }, - { - "epoch": 0.15060822552616335, - "grad_norm": 0.03413840010762215, - "learning_rate": 2.9984544049459044e-05, - "loss": 0.0053, - "step": 195 - }, - { - "epoch": 0.1513805754006565, - "grad_norm": 0.05395448952913284, - "learning_rate": 3.0139103554868626e-05, - "loss": 0.006, - "step": 196 - }, - { - "epoch": 0.15215292527514965, - "grad_norm": 0.04196440801024437, - "learning_rate": 3.0293663060278212e-05, - "loss": 0.0064, - "step": 197 - }, - { - "epoch": 0.1529252751496428, - "grad_norm": 0.06425611674785614, - "learning_rate": 3.044822256568779e-05, - "loss": 0.0064, - "step": 198 - }, - { - "epoch": 0.15369762502413592, - "grad_norm": 0.053349222987890244, - "learning_rate": 3.060278207109737e-05, - "loss": 0.0058, - "step": 199 - }, - { - "epoch": 0.15446997489862907, - "grad_norm": 0.05441872030496597, - "learning_rate": 3.075734157650695e-05, - "loss": 0.0058, - "step": 200 - }, - { - "epoch": 0.15524232477312222, - "grad_norm": 0.03474319726228714, - "learning_rate": 3.091190108191654e-05, - "loss": 0.006, - "step": 201 - }, - { - "epoch": 0.15601467464761537, - "grad_norm": 0.03509390726685524, - "learning_rate": 3.106646058732612e-05, - "loss": 0.0058, - "step": 202 - }, - { - "epoch": 0.15678702452210852, - "grad_norm": 0.03735740855336189, - "learning_rate": 3.12210200927357e-05, - "loss": 0.0057, - "step": 203 - }, - { - "epoch": 0.15755937439660167, - "grad_norm": 0.049221672117710114, - "learning_rate": 3.1375579598145286e-05, - "loss": 0.0068, - "step": 204 - }, - { - "epoch": 0.1583317242710948, - "grad_norm": 0.056076932698488235, - "learning_rate": 3.1530139103554865e-05, - "loss": 0.0063, - "step": 205 - }, - { - "epoch": 0.15910407414558794, - "grad_norm": 0.033057134598493576, - "learning_rate": 3.168469860896445e-05, - "loss": 0.0057, - "step": 206 - }, - { - "epoch": 0.1598764240200811, - "grad_norm": 0.030060870572924614, - "learning_rate": 3.1839258114374036e-05, - "loss": 0.0062, - "step": 207 - }, - { - "epoch": 0.16064877389457424, - "grad_norm": 0.0711582824587822, - "learning_rate": 3.1993817619783615e-05, - "loss": 0.006, - "step": 208 - }, - { - "epoch": 0.1614211237690674, - "grad_norm": 0.03385183587670326, - "learning_rate": 3.21483771251932e-05, - "loss": 0.0053, - "step": 209 - }, - { - "epoch": 0.16219347364356054, - "grad_norm": 0.08179536461830139, - "learning_rate": 3.230293663060278e-05, - "loss": 0.0056, - "step": 210 - }, - { - "epoch": 0.16296582351805367, - "grad_norm": 0.03334837406873703, - "learning_rate": 3.2457496136012364e-05, - "loss": 0.005, - "step": 211 - }, - { - "epoch": 0.16373817339254682, - "grad_norm": 0.0625922828912735, - "learning_rate": 3.261205564142195e-05, - "loss": 0.006, - "step": 212 - }, - { - "epoch": 0.16451052326703997, - "grad_norm": 0.03503840044140816, - "learning_rate": 3.276661514683153e-05, - "loss": 0.0064, - "step": 213 - }, - { - "epoch": 0.16528287314153312, - "grad_norm": 0.07841236144304276, - "learning_rate": 3.2921174652241114e-05, - "loss": 0.0064, - "step": 214 - }, - { - "epoch": 0.16605522301602627, - "grad_norm": 0.03800879791378975, - "learning_rate": 3.307573415765069e-05, - "loss": 0.0061, - "step": 215 - }, - { - "epoch": 0.16682757289051942, - "grad_norm": 0.04207073524594307, - "learning_rate": 3.323029366306028e-05, - "loss": 0.0055, - "step": 216 - }, - { - "epoch": 0.16759992276501254, - "grad_norm": 0.03547835350036621, - "learning_rate": 3.3384853168469863e-05, - "loss": 0.0059, - "step": 217 - }, - { - "epoch": 0.1683722726395057, - "grad_norm": 0.04894952476024628, - "learning_rate": 3.353941267387944e-05, - "loss": 0.0053, - "step": 218 - }, - { - "epoch": 0.16914462251399884, - "grad_norm": 0.03328663110733032, - "learning_rate": 3.369397217928903e-05, - "loss": 0.0062, - "step": 219 - }, - { - "epoch": 0.169916972388492, - "grad_norm": 0.04965837672352791, - "learning_rate": 3.3848531684698606e-05, - "loss": 0.0062, - "step": 220 - }, - { - "epoch": 0.17068932226298514, - "grad_norm": 0.04981934279203415, - "learning_rate": 3.400309119010819e-05, - "loss": 0.006, - "step": 221 - }, - { - "epoch": 0.1714616721374783, - "grad_norm": 0.07044146209955215, - "learning_rate": 3.415765069551777e-05, - "loss": 0.0054, - "step": 222 - }, - { - "epoch": 0.1722340220119714, - "grad_norm": 0.03921792656183243, - "learning_rate": 3.4312210200927356e-05, - "loss": 0.0051, - "step": 223 - }, - { - "epoch": 0.17300637188646456, - "grad_norm": 0.038964755833148956, - "learning_rate": 3.446676970633694e-05, - "loss": 0.0066, - "step": 224 - }, - { - "epoch": 0.1737787217609577, - "grad_norm": 0.03377487137913704, - "learning_rate": 3.462132921174652e-05, - "loss": 0.0061, - "step": 225 - }, - { - "epoch": 0.17455107163545086, - "grad_norm": 0.04873465746641159, - "learning_rate": 3.4775888717156105e-05, - "loss": 0.0067, - "step": 226 - }, - { - "epoch": 0.175323421509944, - "grad_norm": 0.041648928076028824, - "learning_rate": 3.4930448222565684e-05, - "loss": 0.0064, - "step": 227 - }, - { - "epoch": 0.17609577138443716, - "grad_norm": 0.0782196968793869, - "learning_rate": 3.508500772797527e-05, - "loss": 0.0069, - "step": 228 - }, - { - "epoch": 0.17686812125893028, - "grad_norm": 0.05687787011265755, - "learning_rate": 3.5239567233384855e-05, - "loss": 0.0064, - "step": 229 - }, - { - "epoch": 0.17764047113342343, - "grad_norm": 0.07424864917993546, - "learning_rate": 3.5394126738794434e-05, - "loss": 0.006, - "step": 230 - }, - { - "epoch": 0.17841282100791658, - "grad_norm": 0.09180114418268204, - "learning_rate": 3.554868624420402e-05, - "loss": 0.0058, - "step": 231 - }, - { - "epoch": 0.17918517088240973, - "grad_norm": 0.045950617641210556, - "learning_rate": 3.57032457496136e-05, - "loss": 0.0055, - "step": 232 - }, - { - "epoch": 0.17995752075690288, - "grad_norm": 0.053155139088630676, - "learning_rate": 3.585780525502318e-05, - "loss": 0.0059, - "step": 233 - }, - { - "epoch": 0.18072987063139603, - "grad_norm": 0.03958377242088318, - "learning_rate": 3.601236476043277e-05, - "loss": 0.0062, - "step": 234 - }, - { - "epoch": 0.18150222050588916, - "grad_norm": 0.09630659967660904, - "learning_rate": 3.616692426584235e-05, - "loss": 0.0054, - "step": 235 - }, - { - "epoch": 0.1822745703803823, - "grad_norm": 0.06993494927883148, - "learning_rate": 3.632148377125193e-05, - "loss": 0.0062, - "step": 236 - }, - { - "epoch": 0.18304692025487546, - "grad_norm": 0.07463036477565765, - "learning_rate": 3.647604327666151e-05, - "loss": 0.0065, - "step": 237 - }, - { - "epoch": 0.1838192701293686, - "grad_norm": 0.08071650564670563, - "learning_rate": 3.66306027820711e-05, - "loss": 0.0069, - "step": 238 - }, - { - "epoch": 0.18459162000386176, - "grad_norm": 0.07690216600894928, - "learning_rate": 3.678516228748068e-05, - "loss": 0.0062, - "step": 239 - }, - { - "epoch": 0.1853639698783549, - "grad_norm": 0.02988547831773758, - "learning_rate": 3.693972179289026e-05, - "loss": 0.0058, - "step": 240 - }, - { - "epoch": 0.18613631975284803, - "grad_norm": 0.059305574744939804, - "learning_rate": 3.709428129829985e-05, - "loss": 0.0067, - "step": 241 - }, - { - "epoch": 0.18690866962734118, - "grad_norm": 0.04826758801937103, - "learning_rate": 3.7248840803709425e-05, - "loss": 0.0067, - "step": 242 - }, - { - "epoch": 0.18768101950183433, - "grad_norm": 0.0522642657160759, - "learning_rate": 3.740340030911901e-05, - "loss": 0.0065, - "step": 243 - }, - { - "epoch": 0.18845336937632748, - "grad_norm": 0.10103264451026917, - "learning_rate": 3.755795981452859e-05, - "loss": 0.006, - "step": 244 - }, - { - "epoch": 0.18922571925082063, - "grad_norm": 0.03253243863582611, - "learning_rate": 3.7712519319938175e-05, - "loss": 0.0053, - "step": 245 - }, - { - "epoch": 0.18999806912531378, - "grad_norm": 0.06003435328602791, - "learning_rate": 3.786707882534776e-05, - "loss": 0.0064, - "step": 246 - }, - { - "epoch": 0.1907704189998069, - "grad_norm": 0.08122384548187256, - "learning_rate": 3.802163833075734e-05, - "loss": 0.0062, - "step": 247 - }, - { - "epoch": 0.19154276887430005, - "grad_norm": 0.08827490359544754, - "learning_rate": 3.8176197836166925e-05, - "loss": 0.0067, - "step": 248 - }, - { - "epoch": 0.1923151187487932, - "grad_norm": 0.09226624667644501, - "learning_rate": 3.83307573415765e-05, - "loss": 0.0066, - "step": 249 - }, - { - "epoch": 0.19308746862328635, - "grad_norm": 0.05763600394129753, - "learning_rate": 3.848531684698609e-05, - "loss": 0.0061, - "step": 250 - }, - { - "epoch": 0.1938598184977795, - "grad_norm": 0.02883163094520569, - "learning_rate": 3.8639876352395674e-05, - "loss": 0.0057, - "step": 251 - }, - { - "epoch": 0.19463216837227265, - "grad_norm": 0.09011675417423248, - "learning_rate": 3.879443585780525e-05, - "loss": 0.006, - "step": 252 - }, - { - "epoch": 0.19540451824676577, - "grad_norm": 0.03947719186544418, - "learning_rate": 3.894899536321484e-05, - "loss": 0.0061, - "step": 253 - }, - { - "epoch": 0.19617686812125892, - "grad_norm": 0.10283095389604568, - "learning_rate": 3.910355486862442e-05, - "loss": 0.007, - "step": 254 - }, - { - "epoch": 0.19694921799575207, - "grad_norm": 0.05724601447582245, - "learning_rate": 3.9258114374034e-05, - "loss": 0.0062, - "step": 255 - }, - { - "epoch": 0.19772156787024522, - "grad_norm": 0.10304121673107147, - "learning_rate": 3.941267387944359e-05, - "loss": 0.0067, - "step": 256 - }, - { - "epoch": 0.19849391774473837, - "grad_norm": 0.04029000177979469, - "learning_rate": 3.956723338485317e-05, - "loss": 0.0062, - "step": 257 - }, - { - "epoch": 0.19926626761923152, - "grad_norm": 0.07087238132953644, - "learning_rate": 3.972179289026275e-05, - "loss": 0.0067, - "step": 258 - }, - { - "epoch": 0.20003861749372465, - "grad_norm": 0.02597912587225437, - "learning_rate": 3.987635239567233e-05, - "loss": 0.0054, - "step": 259 - }, - { - "epoch": 0.2008109673682178, - "grad_norm": 0.11193391680717468, - "learning_rate": 4.0030911901081916e-05, - "loss": 0.0071, - "step": 260 - }, - { - "epoch": 0.20158331724271095, - "grad_norm": 0.03230161592364311, - "learning_rate": 4.01854714064915e-05, - "loss": 0.0058, - "step": 261 - }, - { - "epoch": 0.2023556671172041, - "grad_norm": 0.056707777082920074, - "learning_rate": 4.034003091190108e-05, - "loss": 0.0063, - "step": 262 - }, - { - "epoch": 0.20312801699169725, - "grad_norm": 0.07985185086727142, - "learning_rate": 4.0494590417310666e-05, - "loss": 0.0066, - "step": 263 - }, - { - "epoch": 0.2039003668661904, - "grad_norm": 0.06168767064809799, - "learning_rate": 4.0649149922720245e-05, - "loss": 0.0059, - "step": 264 - }, - { - "epoch": 0.20467271674068352, - "grad_norm": 0.06294318288564682, - "learning_rate": 4.080370942812983e-05, - "loss": 0.006, - "step": 265 - }, - { - "epoch": 0.20544506661517667, - "grad_norm": 0.0714600458741188, - "learning_rate": 4.095826893353941e-05, - "loss": 0.0063, - "step": 266 - }, - { - "epoch": 0.20621741648966982, - "grad_norm": 0.04438761621713638, - "learning_rate": 4.1112828438948994e-05, - "loss": 0.0057, - "step": 267 - }, - { - "epoch": 0.20698976636416297, - "grad_norm": 0.06038789451122284, - "learning_rate": 4.126738794435858e-05, - "loss": 0.0067, - "step": 268 - }, - { - "epoch": 0.20776211623865612, - "grad_norm": 0.044002216309309006, - "learning_rate": 4.142194744976816e-05, - "loss": 0.0063, - "step": 269 - }, - { - "epoch": 0.20853446611314927, - "grad_norm": 0.06005591154098511, - "learning_rate": 4.1576506955177744e-05, - "loss": 0.0056, - "step": 270 - }, - { - "epoch": 0.2093068159876424, - "grad_norm": 0.026720965281128883, - "learning_rate": 4.173106646058732e-05, - "loss": 0.0057, - "step": 271 - }, - { - "epoch": 0.21007916586213554, - "grad_norm": 0.032197173684835434, - "learning_rate": 4.188562596599691e-05, - "loss": 0.0063, - "step": 272 - }, - { - "epoch": 0.2108515157366287, - "grad_norm": 0.02827630750834942, - "learning_rate": 4.2040185471406493e-05, - "loss": 0.0059, - "step": 273 - }, - { - "epoch": 0.21162386561112184, - "grad_norm": 0.03269730508327484, - "learning_rate": 4.219474497681607e-05, - "loss": 0.0063, - "step": 274 - }, - { - "epoch": 0.212396215485615, - "grad_norm": 0.03561446815729141, - "learning_rate": 4.234930448222566e-05, - "loss": 0.0062, - "step": 275 - }, - { - "epoch": 0.21316856536010814, - "grad_norm": 0.0301744993776083, - "learning_rate": 4.2503863987635236e-05, - "loss": 0.0061, - "step": 276 - }, - { - "epoch": 0.21394091523460126, - "grad_norm": 0.02276015095412731, - "learning_rate": 4.265842349304482e-05, - "loss": 0.0059, - "step": 277 - }, - { - "epoch": 0.21471326510909441, - "grad_norm": 0.025467311963438988, - "learning_rate": 4.281298299845441e-05, - "loss": 0.0053, - "step": 278 - }, - { - "epoch": 0.21548561498358756, - "grad_norm": 0.03545083850622177, - "learning_rate": 4.2967542503863986e-05, - "loss": 0.0051, - "step": 279 - }, - { - "epoch": 0.21625796485808071, - "grad_norm": 0.06959273666143417, - "learning_rate": 4.312210200927357e-05, - "loss": 0.0066, - "step": 280 - }, - { - "epoch": 0.21703031473257386, - "grad_norm": 0.03239291533827782, - "learning_rate": 4.327666151468315e-05, - "loss": 0.0053, - "step": 281 - }, - { - "epoch": 0.21780266460706701, - "grad_norm": 0.03332449123263359, - "learning_rate": 4.3431221020092735e-05, - "loss": 0.0065, - "step": 282 - }, - { - "epoch": 0.21857501448156014, - "grad_norm": 0.026551589369773865, - "learning_rate": 4.358578052550232e-05, - "loss": 0.0062, - "step": 283 - }, - { - "epoch": 0.2193473643560533, - "grad_norm": 0.0345880500972271, - "learning_rate": 4.37403400309119e-05, - "loss": 0.0057, - "step": 284 - }, - { - "epoch": 0.22011971423054644, - "grad_norm": 0.05873372405767441, - "learning_rate": 4.3894899536321485e-05, - "loss": 0.0059, - "step": 285 - }, - { - "epoch": 0.2208920641050396, - "grad_norm": 0.03387603163719177, - "learning_rate": 4.4049459041731064e-05, - "loss": 0.0056, - "step": 286 - }, - { - "epoch": 0.22166441397953274, - "grad_norm": 0.08423435688018799, - "learning_rate": 4.420401854714065e-05, - "loss": 0.0064, - "step": 287 - }, - { - "epoch": 0.2224367638540259, - "grad_norm": 0.04661883786320686, - "learning_rate": 4.4358578052550235e-05, - "loss": 0.0061, - "step": 288 - }, - { - "epoch": 0.223209113728519, - "grad_norm": 0.09756273031234741, - "learning_rate": 4.451313755795981e-05, - "loss": 0.0062, - "step": 289 - }, - { - "epoch": 0.22398146360301216, - "grad_norm": 0.0334688164293766, - "learning_rate": 4.46676970633694e-05, - "loss": 0.0056, - "step": 290 - }, - { - "epoch": 0.2247538134775053, - "grad_norm": 0.037672027945518494, - "learning_rate": 4.482225656877898e-05, - "loss": 0.0056, - "step": 291 - }, - { - "epoch": 0.22552616335199846, - "grad_norm": 0.05709156394004822, - "learning_rate": 4.497681607418856e-05, - "loss": 0.0064, - "step": 292 - }, - { - "epoch": 0.2262985132264916, - "grad_norm": 0.06096167117357254, - "learning_rate": 4.513137557959815e-05, - "loss": 0.0061, - "step": 293 - }, - { - "epoch": 0.22707086310098473, - "grad_norm": 0.04873086139559746, - "learning_rate": 4.528593508500773e-05, - "loss": 0.006, - "step": 294 - }, - { - "epoch": 0.22784321297547788, - "grad_norm": 0.08215577900409698, - "learning_rate": 4.544049459041731e-05, - "loss": 0.0065, - "step": 295 - }, - { - "epoch": 0.22861556284997103, - "grad_norm": 0.05225319415330887, - "learning_rate": 4.559505409582689e-05, - "loss": 0.0061, - "step": 296 - }, - { - "epoch": 0.22938791272446418, - "grad_norm": 0.0716412216424942, - "learning_rate": 4.574961360123648e-05, - "loss": 0.0056, - "step": 297 - }, - { - "epoch": 0.23016026259895733, - "grad_norm": 0.03296257182955742, - "learning_rate": 4.590417310664606e-05, - "loss": 0.0054, - "step": 298 - }, - { - "epoch": 0.23093261247345048, - "grad_norm": 0.06722760200500488, - "learning_rate": 4.605873261205564e-05, - "loss": 0.0055, - "step": 299 - }, - { - "epoch": 0.2317049623479436, - "grad_norm": 0.04291321709752083, - "learning_rate": 4.6213292117465226e-05, - "loss": 0.0063, - "step": 300 - }, - { - "epoch": 0.23247731222243675, - "grad_norm": 0.07270011305809021, - "learning_rate": 4.6367851622874805e-05, - "loss": 0.0063, - "step": 301 - }, - { - "epoch": 0.2332496620969299, - "grad_norm": 0.10372505336999893, - "learning_rate": 4.652241112828439e-05, - "loss": 0.0061, - "step": 302 - }, - { - "epoch": 0.23402201197142306, - "grad_norm": 0.034952372312545776, - "learning_rate": 4.6676970633693976e-05, - "loss": 0.0052, - "step": 303 - }, - { - "epoch": 0.2347943618459162, - "grad_norm": 0.12165629863739014, - "learning_rate": 4.6831530139103555e-05, - "loss": 0.0072, - "step": 304 - }, - { - "epoch": 0.23556671172040936, - "grad_norm": 0.03678225353360176, - "learning_rate": 4.698608964451314e-05, - "loss": 0.0062, - "step": 305 - }, - { - "epoch": 0.23633906159490248, - "grad_norm": 0.14635953307151794, - "learning_rate": 4.714064914992272e-05, - "loss": 0.0071, - "step": 306 - }, - { - "epoch": 0.23711141146939563, - "grad_norm": 0.05414648354053497, - "learning_rate": 4.7295208655332304e-05, - "loss": 0.0056, - "step": 307 - }, - { - "epoch": 0.23788376134388878, - "grad_norm": 0.09111734479665756, - "learning_rate": 4.744976816074189e-05, - "loss": 0.007, - "step": 308 - }, - { - "epoch": 0.23865611121838193, - "grad_norm": 0.01853596605360508, - "learning_rate": 4.760432766615147e-05, - "loss": 0.0055, - "step": 309 - }, - { - "epoch": 0.23942846109287508, - "grad_norm": 0.09702561795711517, - "learning_rate": 4.7758887171561054e-05, - "loss": 0.0063, - "step": 310 - }, - { - "epoch": 0.24020081096736823, - "grad_norm": 0.04423897713422775, - "learning_rate": 4.791344667697063e-05, - "loss": 0.0058, - "step": 311 - }, - { - "epoch": 0.24097316084186135, - "grad_norm": 0.1399751603603363, - "learning_rate": 4.806800618238022e-05, - "loss": 0.0068, - "step": 312 - }, - { - "epoch": 0.2417455107163545, - "grad_norm": 0.03153667598962784, - "learning_rate": 4.8222565687789803e-05, - "loss": 0.0056, - "step": 313 - }, - { - "epoch": 0.24251786059084765, - "grad_norm": 0.11411945521831512, - "learning_rate": 4.837712519319938e-05, - "loss": 0.0069, - "step": 314 - }, - { - "epoch": 0.2432902104653408, - "grad_norm": 0.031384337693452835, - "learning_rate": 4.853168469860897e-05, - "loss": 0.0052, - "step": 315 - }, - { - "epoch": 0.24406256033983395, - "grad_norm": 0.07609619945287704, - "learning_rate": 4.8686244204018546e-05, - "loss": 0.0064, - "step": 316 - }, - { - "epoch": 0.2448349102143271, - "grad_norm": 0.04893430694937706, - "learning_rate": 4.884080370942813e-05, - "loss": 0.0058, - "step": 317 - }, - { - "epoch": 0.24560726008882022, - "grad_norm": 0.09350036829710007, - "learning_rate": 4.899536321483772e-05, - "loss": 0.0059, - "step": 318 - }, - { - "epoch": 0.24637960996331337, - "grad_norm": 0.09150069952011108, - "learning_rate": 4.9149922720247296e-05, - "loss": 0.0059, - "step": 319 - }, - { - "epoch": 0.24715195983780652, - "grad_norm": 0.04132336005568504, - "learning_rate": 4.930448222565688e-05, - "loss": 0.0054, - "step": 320 - }, - { - "epoch": 0.24792430971229967, - "grad_norm": 0.07091164588928223, - "learning_rate": 4.945904173106646e-05, - "loss": 0.0053, - "step": 321 - }, - { - "epoch": 0.24869665958679282, - "grad_norm": 0.042904872447252274, - "learning_rate": 4.9613601236476046e-05, - "loss": 0.0059, - "step": 322 - }, - { - "epoch": 0.24946900946128597, - "grad_norm": 0.07893645018339157, - "learning_rate": 4.976816074188563e-05, - "loss": 0.0065, - "step": 323 - }, - { - "epoch": 0.2502413593357791, - "grad_norm": 0.03633784502744675, - "learning_rate": 4.992272024729521e-05, - "loss": 0.0062, - "step": 324 - }, - { - "epoch": 0.2510137092102723, - "grad_norm": 0.047179654240608215, - "learning_rate": 5.0077279752704795e-05, - "loss": 0.0057, - "step": 325 - }, - { - "epoch": 0.2517860590847654, - "grad_norm": 0.03592117503285408, - "learning_rate": 5.0231839258114374e-05, - "loss": 0.0066, - "step": 326 - }, - { - "epoch": 0.2525584089592585, - "grad_norm": 0.026356182992458344, - "learning_rate": 5.038639876352396e-05, - "loss": 0.0053, - "step": 327 - }, - { - "epoch": 0.25333075883375167, - "grad_norm": 0.05666210874915123, - "learning_rate": 5.0540958268933545e-05, - "loss": 0.006, - "step": 328 - }, - { - "epoch": 0.2541031087082448, - "grad_norm": 0.02799471653997898, - "learning_rate": 5.0695517774343123e-05, - "loss": 0.0053, - "step": 329 - }, - { - "epoch": 0.25487545858273797, - "grad_norm": 0.035501424223184586, - "learning_rate": 5.085007727975271e-05, - "loss": 0.0054, - "step": 330 - }, - { - "epoch": 0.2556478084572311, - "grad_norm": 0.03726429119706154, - "learning_rate": 5.100463678516229e-05, - "loss": 0.006, - "step": 331 - }, - { - "epoch": 0.25642015833172427, - "grad_norm": 0.04929223656654358, - "learning_rate": 5.115919629057187e-05, - "loss": 0.0062, - "step": 332 - }, - { - "epoch": 0.2571925082062174, - "grad_norm": 0.044612541794776917, - "learning_rate": 5.131375579598146e-05, - "loss": 0.0062, - "step": 333 - }, - { - "epoch": 0.25796485808071057, - "grad_norm": 0.05634415149688721, - "learning_rate": 5.146831530139104e-05, - "loss": 0.0066, - "step": 334 - }, - { - "epoch": 0.2587372079552037, - "grad_norm": 0.02783166617155075, - "learning_rate": 5.162287480680062e-05, - "loss": 0.0062, - "step": 335 - }, - { - "epoch": 0.25950955782969687, - "grad_norm": 0.07673410326242447, - "learning_rate": 5.17774343122102e-05, - "loss": 0.0068, - "step": 336 - }, - { - "epoch": 0.26028190770419, - "grad_norm": 0.03702232986688614, - "learning_rate": 5.193199381761979e-05, - "loss": 0.0061, - "step": 337 - }, - { - "epoch": 0.26105425757868317, - "grad_norm": 0.042356688529253006, - "learning_rate": 5.2086553323029365e-05, - "loss": 0.0053, - "step": 338 - }, - { - "epoch": 0.26182660745317626, - "grad_norm": 0.031067850068211555, - "learning_rate": 5.224111282843895e-05, - "loss": 0.0057, - "step": 339 - }, - { - "epoch": 0.2625989573276694, - "grad_norm": 0.03430347144603729, - "learning_rate": 5.2395672333848536e-05, - "loss": 0.0059, - "step": 340 - }, - { - "epoch": 0.26337130720216256, - "grad_norm": 0.076685830950737, - "learning_rate": 5.2550231839258115e-05, - "loss": 0.0061, - "step": 341 - }, - { - "epoch": 0.2641436570766557, - "grad_norm": 0.02771041728556156, - "learning_rate": 5.27047913446677e-05, - "loss": 0.0053, - "step": 342 - }, - { - "epoch": 0.26491600695114886, - "grad_norm": 0.08005037903785706, - "learning_rate": 5.2859350850077286e-05, - "loss": 0.0057, - "step": 343 - }, - { - "epoch": 0.265688356825642, - "grad_norm": 0.04103340208530426, - "learning_rate": 5.3013910355486865e-05, - "loss": 0.0054, - "step": 344 - }, - { - "epoch": 0.26646070670013516, - "grad_norm": 0.06979218870401382, - "learning_rate": 5.316846986089645e-05, - "loss": 0.0057, - "step": 345 - }, - { - "epoch": 0.2672330565746283, - "grad_norm": 0.060539260506629944, - "learning_rate": 5.332302936630603e-05, - "loss": 0.0058, - "step": 346 - }, - { - "epoch": 0.26800540644912146, - "grad_norm": 0.034149207174777985, - "learning_rate": 5.3477588871715614e-05, - "loss": 0.0056, - "step": 347 - }, - { - "epoch": 0.2687777563236146, - "grad_norm": 0.04316283389925957, - "learning_rate": 5.363214837712519e-05, - "loss": 0.0059, - "step": 348 - }, - { - "epoch": 0.26955010619810776, - "grad_norm": 0.07623675465583801, - "learning_rate": 5.378670788253478e-05, - "loss": 0.0067, - "step": 349 - }, - { - "epoch": 0.2703224560726009, - "grad_norm": 0.07627623528242111, - "learning_rate": 5.3941267387944364e-05, - "loss": 0.006, - "step": 350 - }, - { - "epoch": 0.271094805947094, - "grad_norm": 0.026273977011442184, - "learning_rate": 5.409582689335394e-05, - "loss": 0.0052, - "step": 351 - }, - { - "epoch": 0.27186715582158716, - "grad_norm": 0.06081831455230713, - "learning_rate": 5.425038639876353e-05, - "loss": 0.0058, - "step": 352 - }, - { - "epoch": 0.2726395056960803, - "grad_norm": 0.024972470477223396, - "learning_rate": 5.4404945904173114e-05, - "loss": 0.0052, - "step": 353 - }, - { - "epoch": 0.27341185557057346, - "grad_norm": 0.033721551299095154, - "learning_rate": 5.455950540958269e-05, - "loss": 0.006, - "step": 354 - }, - { - "epoch": 0.2741842054450666, - "grad_norm": 0.03825666382908821, - "learning_rate": 5.471406491499228e-05, - "loss": 0.0068, - "step": 355 - }, - { - "epoch": 0.27495655531955976, - "grad_norm": 0.019865261390805244, - "learning_rate": 5.4868624420401856e-05, - "loss": 0.0057, - "step": 356 - }, - { - "epoch": 0.2757289051940529, - "grad_norm": 0.05228486657142639, - "learning_rate": 5.502318392581144e-05, - "loss": 0.0056, - "step": 357 - }, - { - "epoch": 0.27650125506854606, - "grad_norm": 0.022940287366509438, - "learning_rate": 5.517774343122102e-05, - "loss": 0.0056, - "step": 358 - }, - { - "epoch": 0.2772736049430392, - "grad_norm": 0.031132381409406662, - "learning_rate": 5.5332302936630606e-05, - "loss": 0.0052, - "step": 359 - }, - { - "epoch": 0.27804595481753236, - "grad_norm": 0.029625840485095978, - "learning_rate": 5.548686244204019e-05, - "loss": 0.0062, - "step": 360 - }, - { - "epoch": 0.2788183046920255, - "grad_norm": 0.03403817117214203, - "learning_rate": 5.564142194744977e-05, - "loss": 0.0057, - "step": 361 - }, - { - "epoch": 0.27959065456651866, - "grad_norm": 0.020263448357582092, - "learning_rate": 5.5795981452859356e-05, - "loss": 0.006, - "step": 362 - }, - { - "epoch": 0.28036300444101175, - "grad_norm": 0.029121456667780876, - "learning_rate": 5.595054095826894e-05, - "loss": 0.0053, - "step": 363 - }, - { - "epoch": 0.2811353543155049, - "grad_norm": 0.053699836134910583, - "learning_rate": 5.610510046367852e-05, - "loss": 0.0048, - "step": 364 - }, - { - "epoch": 0.28190770418999805, - "grad_norm": 0.04384114220738411, - "learning_rate": 5.62596599690881e-05, - "loss": 0.006, - "step": 365 - }, - { - "epoch": 0.2826800540644912, - "grad_norm": 0.024282528087496758, - "learning_rate": 5.6414219474497684e-05, - "loss": 0.0052, - "step": 366 - }, - { - "epoch": 0.28345240393898435, - "grad_norm": 0.02922219969332218, - "learning_rate": 5.656877897990727e-05, - "loss": 0.005, - "step": 367 - }, - { - "epoch": 0.2842247538134775, - "grad_norm": 0.09684975445270538, - "learning_rate": 5.672333848531685e-05, - "loss": 0.0055, - "step": 368 - }, - { - "epoch": 0.28499710368797065, - "grad_norm": 0.03495265543460846, - "learning_rate": 5.6877897990726433e-05, - "loss": 0.0055, - "step": 369 - }, - { - "epoch": 0.2857694535624638, - "grad_norm": 0.18816547095775604, - "learning_rate": 5.703245749613602e-05, - "loss": 0.0084, - "step": 370 - }, - { - "epoch": 0.28654180343695695, - "grad_norm": 0.022471094503998756, - "learning_rate": 5.71870170015456e-05, - "loss": 0.0054, - "step": 371 - }, - { - "epoch": 0.2873141533114501, - "grad_norm": 0.8913330435752869, - "learning_rate": 5.734157650695518e-05, - "loss": 0.0188, - "step": 372 - }, - { - "epoch": 0.28808650318594325, - "grad_norm": 8.70426082611084, - "learning_rate": 5.749613601236477e-05, - "loss": 0.4367, - "step": 373 - }, - { - "epoch": 0.2888588530604364, - "grad_norm": 2.9028074741363525, - "learning_rate": 5.765069551777435e-05, - "loss": 0.2147, - "step": 374 - }, - { - "epoch": 0.2896312029349295, - "grad_norm": 2.0662808418273926, - "learning_rate": 5.7805255023183926e-05, - "loss": 0.0749, - "step": 375 - }, - { - "epoch": 0.29040355280942265, - "grad_norm": 11.876626968383789, - "learning_rate": 5.795981452859351e-05, - "loss": 0.3195, - "step": 376 - }, - { - "epoch": 0.2911759026839158, - "grad_norm": 8.665094375610352, - "learning_rate": 5.81143740340031e-05, - "loss": 0.2859, - "step": 377 - }, - { - "epoch": 0.29194825255840895, - "grad_norm": 4.957003593444824, - "learning_rate": 5.8268933539412676e-05, - "loss": 0.2408, - "step": 378 - }, - { - "epoch": 0.2927206024329021, - "grad_norm": 10.162996292114258, - "learning_rate": 5.842349304482226e-05, - "loss": 0.4644, - "step": 379 - }, - { - "epoch": 0.29349295230739525, - "grad_norm": 44.583221435546875, - "learning_rate": 5.8578052550231846e-05, - "loss": 1.6812, - "step": 380 - }, - { - "epoch": 0.2942653021818884, - "grad_norm": 5.381219863891602, - "learning_rate": 5.8732612055641425e-05, - "loss": 0.456, - "step": 381 - }, - { - "epoch": 0.29503765205638155, - "grad_norm": 5.965335369110107, - "learning_rate": 5.8887171561051004e-05, - "loss": 0.4624, - "step": 382 - }, - { - "epoch": 0.2958100019308747, - "grad_norm": 1.930649757385254, - "learning_rate": 5.9041731066460596e-05, - "loss": 0.1439, - "step": 383 - }, - { - "epoch": 0.29658235180536785, - "grad_norm": 1.6111472845077515, - "learning_rate": 5.9196290571870175e-05, - "loss": 0.256, - "step": 384 - }, - { - "epoch": 0.297354701679861, - "grad_norm": 3.4821295738220215, - "learning_rate": 5.9350850077279753e-05, - "loss": 0.3274, - "step": 385 - }, - { - "epoch": 0.29812705155435415, - "grad_norm": 1.2926265001296997, - "learning_rate": 5.950540958268934e-05, - "loss": 0.1754, - "step": 386 - }, - { - "epoch": 0.29889940142884724, - "grad_norm": 0.8014867305755615, - "learning_rate": 5.9659969088098924e-05, - "loss": 0.142, - "step": 387 - }, - { - "epoch": 0.2996717513033404, - "grad_norm": 8.201684951782227, - "learning_rate": 5.98145285935085e-05, - "loss": 0.4574, - "step": 388 - }, - { - "epoch": 0.30044410117783354, - "grad_norm": 0.9916243553161621, - "learning_rate": 5.996908809891809e-05, - "loss": 0.1185, - "step": 389 - }, - { - "epoch": 0.3012164510523267, - "grad_norm": 1.5478551387786865, - "learning_rate": 6.0123647604327674e-05, - "loss": 0.152, - "step": 390 - }, - { - "epoch": 0.30198880092681984, - "grad_norm": 0.656897783279419, - "learning_rate": 6.027820710973725e-05, - "loss": 0.1092, - "step": 391 - }, - { - "epoch": 0.302761150801313, - "grad_norm": 1.1630992889404297, - "learning_rate": 6.043276661514683e-05, - "loss": 0.1185, - "step": 392 - }, - { - "epoch": 0.30353350067580614, - "grad_norm": 0.7383008003234863, - "learning_rate": 6.0587326120556424e-05, - "loss": 0.0938, - "step": 393 - }, - { - "epoch": 0.3043058505502993, - "grad_norm": 1.26111900806427, - "learning_rate": 6.0741885625966e-05, - "loss": 0.1211, - "step": 394 - }, - { - "epoch": 0.30507820042479245, - "grad_norm": 0.7731469869613647, - "learning_rate": 6.089644513137558e-05, - "loss": 0.1022, - "step": 395 - }, - { - "epoch": 0.3058505502992856, - "grad_norm": 0.7344601154327393, - "learning_rate": 6.105100463678517e-05, - "loss": 0.1251, - "step": 396 - }, - { - "epoch": 0.30662290017377875, - "grad_norm": 0.5411269068717957, - "learning_rate": 6.120556414219475e-05, - "loss": 0.0928, - "step": 397 - }, - { - "epoch": 0.30739525004827184, - "grad_norm": 0.5494950413703918, - "learning_rate": 6.136012364760433e-05, - "loss": 0.0914, - "step": 398 - }, - { - "epoch": 0.308167599922765, - "grad_norm": 0.4900193512439728, - "learning_rate": 6.15146831530139e-05, - "loss": 0.1169, - "step": 399 - }, - { - "epoch": 0.30893994979725814, - "grad_norm": 0.43041563034057617, - "learning_rate": 6.16692426584235e-05, - "loss": 0.094, - "step": 400 - }, - { - "epoch": 0.3097122996717513, - "grad_norm": 0.5949247479438782, - "learning_rate": 6.182380216383307e-05, - "loss": 0.0922, - "step": 401 - }, - { - "epoch": 0.31048464954624444, - "grad_norm": 0.3937532305717468, - "learning_rate": 6.197836166924266e-05, - "loss": 0.0831, - "step": 402 - }, - { - "epoch": 0.3112569994207376, - "grad_norm": 0.3710464537143707, - "learning_rate": 6.213292117465224e-05, - "loss": 0.0836, - "step": 403 - }, - { - "epoch": 0.31202934929523074, - "grad_norm": 0.1914016306400299, - "learning_rate": 6.228748068006183e-05, - "loss": 0.0889, - "step": 404 - }, - { - "epoch": 0.3128016991697239, - "grad_norm": 0.3411383330821991, - "learning_rate": 6.24420401854714e-05, - "loss": 0.0853, - "step": 405 - }, - { - "epoch": 0.31357404904421704, - "grad_norm": 0.27261802554130554, - "learning_rate": 6.2596599690881e-05, - "loss": 0.0863, - "step": 406 - }, - { - "epoch": 0.3143463989187102, - "grad_norm": 0.26665183901786804, - "learning_rate": 6.275115919629057e-05, - "loss": 0.0803, - "step": 407 - }, - { - "epoch": 0.31511874879320334, - "grad_norm": 0.437089204788208, - "learning_rate": 6.290571870170016e-05, - "loss": 0.0794, - "step": 408 - }, - { - "epoch": 0.3158910986676965, - "grad_norm": 0.18276162445545197, - "learning_rate": 6.306027820710973e-05, - "loss": 0.0807, - "step": 409 - }, - { - "epoch": 0.3166634485421896, - "grad_norm": 0.35369428992271423, - "learning_rate": 6.321483771251933e-05, - "loss": 0.0796, - "step": 410 - }, - { - "epoch": 0.31743579841668274, - "grad_norm": 0.020447123795747757, - "learning_rate": 6.33693972179289e-05, - "loss": 0.0816, - "step": 411 - }, - { - "epoch": 0.3182081482911759, - "grad_norm": 0.34988123178482056, - "learning_rate": 6.352395672333849e-05, - "loss": 0.0843, - "step": 412 - }, - { - "epoch": 0.31898049816566904, - "grad_norm": 0.044847674667835236, - "learning_rate": 6.367851622874807e-05, - "loss": 0.0807, - "step": 413 - }, - { - "epoch": 0.3197528480401622, - "grad_norm": 0.2252042591571808, - "learning_rate": 6.383307573415766e-05, - "loss": 0.0786, - "step": 414 - }, - { - "epoch": 0.32052519791465534, - "grad_norm": 0.13536381721496582, - "learning_rate": 6.398763523956723e-05, - "loss": 0.0789, - "step": 415 - }, - { - "epoch": 0.3212975477891485, - "grad_norm": 0.1970210075378418, - "learning_rate": 6.414219474497683e-05, - "loss": 0.071, - "step": 416 - }, - { - "epoch": 0.32206989766364164, - "grad_norm": 0.13441815972328186, - "learning_rate": 6.42967542503864e-05, - "loss": 0.0782, - "step": 417 - }, - { - "epoch": 0.3228422475381348, - "grad_norm": 0.14775612950325012, - "learning_rate": 6.445131375579599e-05, - "loss": 0.0758, - "step": 418 - }, - { - "epoch": 0.32361459741262794, - "grad_norm": 0.0626678317785263, - "learning_rate": 6.460587326120556e-05, - "loss": 0.0745, - "step": 419 - }, - { - "epoch": 0.3243869472871211, - "grad_norm": 0.05171412229537964, - "learning_rate": 6.476043276661516e-05, - "loss": 0.0756, - "step": 420 - }, - { - "epoch": 0.32515929716161424, - "grad_norm": 0.14175881445407867, - "learning_rate": 6.491499227202473e-05, - "loss": 0.0746, - "step": 421 - }, - { - "epoch": 0.32593164703610733, - "grad_norm": 0.05274312198162079, - "learning_rate": 6.506955177743431e-05, - "loss": 0.0744, - "step": 422 - }, - { - "epoch": 0.3267039969106005, - "grad_norm": 0.1517016589641571, - "learning_rate": 6.52241112828439e-05, - "loss": 0.0766, - "step": 423 - }, - { - "epoch": 0.32747634678509363, - "grad_norm": 0.05368823930621147, - "learning_rate": 6.537867078825348e-05, - "loss": 0.0749, - "step": 424 - }, - { - "epoch": 0.3282486966595868, - "grad_norm": 0.14097322523593903, - "learning_rate": 6.553323029366306e-05, - "loss": 0.0764, - "step": 425 - }, - { - "epoch": 0.32902104653407993, - "grad_norm": 0.06039084121584892, - "learning_rate": 6.568778979907264e-05, - "loss": 0.0709, - "step": 426 - }, - { - "epoch": 0.3297933964085731, - "grad_norm": 0.07845164835453033, - "learning_rate": 6.584234930448223e-05, - "loss": 0.0743, - "step": 427 - }, - { - "epoch": 0.33056574628306623, - "grad_norm": 0.06691066920757294, - "learning_rate": 6.599690880989181e-05, - "loss": 0.0735, - "step": 428 - }, - { - "epoch": 0.3313380961575594, - "grad_norm": 0.02390989474952221, - "learning_rate": 6.615146831530138e-05, - "loss": 0.0713, - "step": 429 - }, - { - "epoch": 0.33211044603205253, - "grad_norm": 0.029864365234971046, - "learning_rate": 6.630602782071098e-05, - "loss": 0.0752, - "step": 430 - }, - { - "epoch": 0.3328827959065457, - "grad_norm": 0.09867202490568161, - "learning_rate": 6.646058732612056e-05, - "loss": 0.0742, - "step": 431 - }, - { - "epoch": 0.33365514578103883, - "grad_norm": 0.2730984687805176, - "learning_rate": 6.661514683153014e-05, - "loss": 0.0733, - "step": 432 - }, - { - "epoch": 0.334427495655532, - "grad_norm": 0.1675487458705902, - "learning_rate": 6.676970633693973e-05, - "loss": 0.0799, - "step": 433 - }, - { - "epoch": 0.3351998455300251, - "grad_norm": 0.25640159845352173, - "learning_rate": 6.692426584234931e-05, - "loss": 0.0725, - "step": 434 - }, - { - "epoch": 0.3359721954045182, - "grad_norm": 0.32263967394828796, - "learning_rate": 6.707882534775888e-05, - "loss": 0.0745, - "step": 435 - }, - { - "epoch": 0.3367445452790114, - "grad_norm": 0.10832744091749191, - "learning_rate": 6.723338485316847e-05, - "loss": 0.0689, - "step": 436 - }, - { - "epoch": 0.3375168951535045, - "grad_norm": 0.2080148458480835, - "learning_rate": 6.738794435857806e-05, - "loss": 0.0779, - "step": 437 - }, - { - "epoch": 0.3382892450279977, - "grad_norm": 0.14392602443695068, - "learning_rate": 6.754250386398764e-05, - "loss": 0.0709, - "step": 438 - }, - { - "epoch": 0.3390615949024908, - "grad_norm": 0.1571575552225113, - "learning_rate": 6.769706336939721e-05, - "loss": 0.0731, - "step": 439 - }, - { - "epoch": 0.339833944776984, - "grad_norm": 0.15750271081924438, - "learning_rate": 6.785162287480681e-05, - "loss": 0.0756, - "step": 440 - }, - { - "epoch": 0.3406062946514771, - "grad_norm": 0.1180683821439743, - "learning_rate": 6.800618238021638e-05, - "loss": 0.0737, - "step": 441 - }, - { - "epoch": 0.3413786445259703, - "grad_norm": 0.16251571476459503, - "learning_rate": 6.816074188562597e-05, - "loss": 0.0692, - "step": 442 - }, - { - "epoch": 0.3421509944004634, - "grad_norm": 0.07380519807338715, - "learning_rate": 6.831530139103554e-05, - "loss": 0.0722, - "step": 443 - }, - { - "epoch": 0.3429233442749566, - "grad_norm": 0.22383369505405426, - "learning_rate": 6.846986089644514e-05, - "loss": 0.0681, - "step": 444 - }, - { - "epoch": 0.3436956941494497, - "grad_norm": 0.1768750697374344, - "learning_rate": 6.862442040185471e-05, - "loss": 0.0693, - "step": 445 - }, - { - "epoch": 0.3444680440239428, - "grad_norm": 0.0998828187584877, - "learning_rate": 6.87789799072643e-05, - "loss": 0.076, - "step": 446 - }, - { - "epoch": 0.34524039389843597, - "grad_norm": 0.1407008320093155, - "learning_rate": 6.893353941267388e-05, - "loss": 0.0734, - "step": 447 - }, - { - "epoch": 0.3460127437729291, - "grad_norm": 0.24386470019817352, - "learning_rate": 6.908809891808347e-05, - "loss": 0.069, - "step": 448 - }, - { - "epoch": 0.34678509364742227, - "grad_norm": 0.17217887938022614, - "learning_rate": 6.924265842349304e-05, - "loss": 0.0718, - "step": 449 - }, - { - "epoch": 0.3475574435219154, - "grad_norm": 0.1998705416917801, - "learning_rate": 6.939721792890264e-05, - "loss": 0.0779, - "step": 450 - }, - { - "epoch": 0.34832979339640857, - "grad_norm": 0.15912608802318573, - "learning_rate": 6.955177743431221e-05, - "loss": 0.0738, - "step": 451 - }, - { - "epoch": 0.3491021432709017, - "grad_norm": 0.7292158603668213, - "learning_rate": 6.97063369397218e-05, - "loss": 0.0815, - "step": 452 - }, - { - "epoch": 0.34987449314539487, - "grad_norm": 0.16888517141342163, - "learning_rate": 6.986089644513137e-05, - "loss": 0.0757, - "step": 453 - }, - { - "epoch": 0.350646843019888, - "grad_norm": 0.22534602880477905, - "learning_rate": 7.001545595054097e-05, - "loss": 0.0881, - "step": 454 - }, - { - "epoch": 0.35141919289438117, - "grad_norm": 0.2260252833366394, - "learning_rate": 7.017001545595054e-05, - "loss": 0.0936, - "step": 455 - }, - { - "epoch": 0.3521915427688743, - "grad_norm": 0.16626620292663574, - "learning_rate": 7.032457496136012e-05, - "loss": 0.0894, - "step": 456 - }, - { - "epoch": 0.35296389264336747, - "grad_norm": 0.1906137317419052, - "learning_rate": 7.047913446676971e-05, - "loss": 0.0864, - "step": 457 - }, - { - "epoch": 0.35373624251786057, - "grad_norm": 0.16349031031131744, - "learning_rate": 7.06336939721793e-05, - "loss": 0.0781, - "step": 458 - }, - { - "epoch": 0.3545085923923537, - "grad_norm": 0.07731667160987854, - "learning_rate": 7.078825347758887e-05, - "loss": 0.0671, - "step": 459 - }, - { - "epoch": 0.35528094226684687, - "grad_norm": 0.818091094493866, - "learning_rate": 7.094281298299847e-05, - "loss": 0.0816, - "step": 460 - }, - { - "epoch": 0.35605329214134, - "grad_norm": 0.04961278289556503, - "learning_rate": 7.109737248840804e-05, - "loss": 0.0618, - "step": 461 - }, - { - "epoch": 0.35682564201583317, - "grad_norm": 0.14381299912929535, - "learning_rate": 7.125193199381762e-05, - "loss": 0.0698, - "step": 462 - }, - { - "epoch": 0.3575979918903263, - "grad_norm": 0.16918033361434937, - "learning_rate": 7.14064914992272e-05, - "loss": 0.0746, - "step": 463 - }, - { - "epoch": 0.35837034176481947, - "grad_norm": 0.14989601075649261, - "learning_rate": 7.15610510046368e-05, - "loss": 0.075, - "step": 464 - }, - { - "epoch": 0.3591426916393126, - "grad_norm": 0.15754370391368866, - "learning_rate": 7.171561051004637e-05, - "loss": 0.0706, - "step": 465 - }, - { - "epoch": 0.35991504151380577, - "grad_norm": 0.14635930955410004, - "learning_rate": 7.187017001545595e-05, - "loss": 0.0695, - "step": 466 - }, - { - "epoch": 0.3606873913882989, - "grad_norm": 0.12360066920518875, - "learning_rate": 7.202472952086554e-05, - "loss": 0.07, - "step": 467 - }, - { - "epoch": 0.36145974126279207, - "grad_norm": 0.0649237334728241, - "learning_rate": 7.217928902627512e-05, - "loss": 0.0675, - "step": 468 - }, - { - "epoch": 0.3622320911372852, - "grad_norm": 0.294392466545105, - "learning_rate": 7.23338485316847e-05, - "loss": 0.0634, - "step": 469 - }, - { - "epoch": 0.3630044410117783, - "grad_norm": 0.2768172323703766, - "learning_rate": 7.248840803709428e-05, - "loss": 0.0642, - "step": 470 - }, - { - "epoch": 0.36377679088627146, - "grad_norm": 0.054166581481695175, - "learning_rate": 7.264296754250387e-05, - "loss": 0.0653, - "step": 471 - }, - { - "epoch": 0.3645491407607646, - "grad_norm": 0.16986453533172607, - "learning_rate": 7.279752704791345e-05, - "loss": 0.059, - "step": 472 - }, - { - "epoch": 0.36532149063525776, - "grad_norm": 0.15569911897182465, - "learning_rate": 7.295208655332302e-05, - "loss": 0.063, - "step": 473 - }, - { - "epoch": 0.3660938405097509, - "grad_norm": 0.1545466035604477, - "learning_rate": 7.310664605873262e-05, - "loss": 0.0622, - "step": 474 - }, - { - "epoch": 0.36686619038424406, - "grad_norm": 0.1770275980234146, - "learning_rate": 7.32612055641422e-05, - "loss": 0.0692, - "step": 475 - }, - { - "epoch": 0.3676385402587372, - "grad_norm": 0.13061058521270752, - "learning_rate": 7.341576506955178e-05, - "loss": 0.0645, - "step": 476 - }, - { - "epoch": 0.36841089013323036, - "grad_norm": 0.19434262812137604, - "learning_rate": 7.357032457496137e-05, - "loss": 0.0619, - "step": 477 - }, - { - "epoch": 0.3691832400077235, - "grad_norm": 0.12423071265220642, - "learning_rate": 7.372488408037095e-05, - "loss": 0.0614, - "step": 478 - }, - { - "epoch": 0.36995558988221666, - "grad_norm": 0.1717289686203003, - "learning_rate": 7.387944358578052e-05, - "loss": 0.0652, - "step": 479 - }, - { - "epoch": 0.3707279397567098, - "grad_norm": 0.19073888659477234, - "learning_rate": 7.403400309119011e-05, - "loss": 0.0623, - "step": 480 - }, - { - "epoch": 0.37150028963120296, - "grad_norm": 0.09556687623262405, - "learning_rate": 7.41885625965997e-05, - "loss": 0.0585, - "step": 481 - }, - { - "epoch": 0.37227263950569606, - "grad_norm": 0.25466054677963257, - "learning_rate": 7.434312210200928e-05, - "loss": 0.0615, - "step": 482 - }, - { - "epoch": 0.3730449893801892, - "grad_norm": 0.11340200901031494, - "learning_rate": 7.449768160741885e-05, - "loss": 0.0597, - "step": 483 - }, - { - "epoch": 0.37381733925468236, - "grad_norm": 0.13622435927391052, - "learning_rate": 7.465224111282845e-05, - "loss": 0.0593, - "step": 484 - }, - { - "epoch": 0.3745896891291755, - "grad_norm": 0.04559488967061043, - "learning_rate": 7.480680061823802e-05, - "loss": 0.06, - "step": 485 - }, - { - "epoch": 0.37536203900366866, - "grad_norm": 0.20303906500339508, - "learning_rate": 7.496136012364761e-05, - "loss": 0.0539, - "step": 486 - }, - { - "epoch": 0.3761343888781618, - "grad_norm": 0.10952377319335938, - "learning_rate": 7.511591962905718e-05, - "loss": 0.0569, - "step": 487 - }, - { - "epoch": 0.37690673875265496, - "grad_norm": 0.09587670862674713, - "learning_rate": 7.527047913446678e-05, - "loss": 0.0555, - "step": 488 - }, - { - "epoch": 0.3776790886271481, - "grad_norm": 0.15245388448238373, - "learning_rate": 7.542503863987635e-05, - "loss": 0.0591, - "step": 489 - }, - { - "epoch": 0.37845143850164126, - "grad_norm": 0.03817014768719673, - "learning_rate": 7.557959814528594e-05, - "loss": 0.0539, - "step": 490 - }, - { - "epoch": 0.3792237883761344, - "grad_norm": 0.1699414998292923, - "learning_rate": 7.573415765069552e-05, - "loss": 0.0601, - "step": 491 - }, - { - "epoch": 0.37999613825062756, - "grad_norm": 0.14948895573616028, - "learning_rate": 7.58887171561051e-05, - "loss": 0.0614, - "step": 492 - }, - { - "epoch": 0.38076848812512065, - "grad_norm": 0.06657677888870239, - "learning_rate": 7.604327666151468e-05, - "loss": 0.0528, - "step": 493 - }, - { - "epoch": 0.3815408379996138, - "grad_norm": 0.07747476547956467, - "learning_rate": 7.619783616692428e-05, - "loss": 0.0536, - "step": 494 - }, - { - "epoch": 0.38231318787410695, - "grad_norm": 0.061604093760252, - "learning_rate": 7.635239567233385e-05, - "loss": 0.0495, - "step": 495 - }, - { - "epoch": 0.3830855377486001, - "grad_norm": 0.23023541271686554, - "learning_rate": 7.650695517774343e-05, - "loss": 0.0517, - "step": 496 - }, - { - "epoch": 0.38385788762309325, - "grad_norm": 0.3549231290817261, - "learning_rate": 7.6661514683153e-05, - "loss": 0.0531, - "step": 497 - }, - { - "epoch": 0.3846302374975864, - "grad_norm": 0.2388257384300232, - "learning_rate": 7.68160741885626e-05, - "loss": 0.0482, - "step": 498 - }, - { - "epoch": 0.38540258737207955, - "grad_norm": 0.05481262877583504, - "learning_rate": 7.697063369397218e-05, - "loss": 0.0466, - "step": 499 - }, - { - "epoch": 0.3861749372465727, - "grad_norm": 0.20280222594738007, - "learning_rate": 7.712519319938176e-05, - "loss": 0.0489, - "step": 500 - }, - { - "epoch": 0.38694728712106585, - "grad_norm": 0.10731515288352966, - "learning_rate": 7.727975270479135e-05, - "loss": 0.0402, - "step": 501 - }, - { - "epoch": 0.387719636995559, - "grad_norm": 0.14593768119812012, - "learning_rate": 7.743431221020093e-05, - "loss": 0.0402, - "step": 502 - }, - { - "epoch": 0.38849198687005215, - "grad_norm": 0.14538267254829407, - "learning_rate": 7.75888717156105e-05, - "loss": 0.0383, - "step": 503 - }, - { - "epoch": 0.3892643367445453, - "grad_norm": 0.07397224009037018, - "learning_rate": 7.774343122102009e-05, - "loss": 0.0332, - "step": 504 - }, - { - "epoch": 0.3900366866190384, - "grad_norm": 0.13446685671806335, - "learning_rate": 7.789799072642968e-05, - "loss": 0.0338, - "step": 505 - }, - { - "epoch": 0.39080903649353155, - "grad_norm": 0.1309434473514557, - "learning_rate": 7.805255023183926e-05, - "loss": 0.0336, - "step": 506 - }, - { - "epoch": 0.3915813863680247, - "grad_norm": 0.08193838596343994, - "learning_rate": 7.820710973724883e-05, - "loss": 0.0308, - "step": 507 - }, - { - "epoch": 0.39235373624251785, - "grad_norm": 0.06123901903629303, - "learning_rate": 7.836166924265843e-05, - "loss": 0.0282, - "step": 508 - }, - { - "epoch": 0.393126086117011, - "grad_norm": 0.08192218840122223, - "learning_rate": 7.8516228748068e-05, - "loss": 0.0263, - "step": 509 - }, - { - "epoch": 0.39389843599150415, - "grad_norm": 0.08452457934617996, - "learning_rate": 7.867078825347759e-05, - "loss": 0.024, - "step": 510 - }, - { - "epoch": 0.3946707858659973, - "grad_norm": 0.0647989958524704, - "learning_rate": 7.882534775888718e-05, - "loss": 0.0221, - "step": 511 - }, - { - "epoch": 0.39544313574049045, - "grad_norm": 0.07992154359817505, - "learning_rate": 7.897990726429676e-05, - "loss": 0.0218, - "step": 512 - }, - { - "epoch": 0.3962154856149836, - "grad_norm": 0.06557576358318329, - "learning_rate": 7.913446676970633e-05, - "loss": 0.0218, - "step": 513 - }, - { - "epoch": 0.39698783548947675, - "grad_norm": 0.07514405995607376, - "learning_rate": 7.928902627511592e-05, - "loss": 0.017, - "step": 514 - }, - { - "epoch": 0.3977601853639699, - "grad_norm": 0.05783172324299812, - "learning_rate": 7.94435857805255e-05, - "loss": 0.0162, - "step": 515 - }, - { - "epoch": 0.39853253523846305, - "grad_norm": 0.13857033848762512, - "learning_rate": 7.959814528593509e-05, - "loss": 0.0173, - "step": 516 - }, - { - "epoch": 0.39930488511295614, - "grad_norm": 0.26674067974090576, - "learning_rate": 7.975270479134466e-05, - "loss": 0.0206, - "step": 517 - }, - { - "epoch": 0.4000772349874493, - "grad_norm": 0.29622969031333923, - "learning_rate": 7.990726429675426e-05, - "loss": 0.0362, - "step": 518 - }, - { - "epoch": 0.40084958486194244, - "grad_norm": 0.07281139492988586, - "learning_rate": 8.006182380216383e-05, - "loss": 0.0168, - "step": 519 - }, - { - "epoch": 0.4016219347364356, - "grad_norm": 0.22175332903862, - "learning_rate": 8.021638330757342e-05, - "loss": 0.0215, - "step": 520 - }, - { - "epoch": 0.40239428461092874, - "grad_norm": 0.08515631407499313, - "learning_rate": 8.0370942812983e-05, - "loss": 0.0173, - "step": 521 - }, - { - "epoch": 0.4031666344854219, - "grad_norm": 0.056558165699243546, - "learning_rate": 8.052550231839259e-05, - "loss": 0.0143, - "step": 522 - }, - { - "epoch": 0.40393898435991504, - "grad_norm": 0.06582767516374588, - "learning_rate": 8.068006182380216e-05, - "loss": 0.0131, - "step": 523 - }, - { - "epoch": 0.4047113342344082, - "grad_norm": 0.04354550689458847, - "learning_rate": 8.083462132921175e-05, - "loss": 0.0135, - "step": 524 - }, - { - "epoch": 0.40548368410890134, - "grad_norm": 0.04626228287816048, - "learning_rate": 8.098918083462133e-05, - "loss": 0.0134, - "step": 525 - }, - { - "epoch": 0.4062560339833945, - "grad_norm": 0.056485529989004135, - "learning_rate": 8.114374034003092e-05, - "loss": 0.0114, - "step": 526 - }, - { - "epoch": 0.40702838385788764, - "grad_norm": 0.048203691840171814, - "learning_rate": 8.129829984544049e-05, - "loss": 0.0114, - "step": 527 - }, - { - "epoch": 0.4078007337323808, - "grad_norm": 0.04264072701334953, - "learning_rate": 8.145285935085009e-05, - "loss": 0.0114, - "step": 528 - }, - { - "epoch": 0.4085730836068739, - "grad_norm": 0.06230396404862404, - "learning_rate": 8.160741885625966e-05, - "loss": 0.0112, - "step": 529 - }, - { - "epoch": 0.40934543348136704, - "grad_norm": 0.048966314643621445, - "learning_rate": 8.176197836166925e-05, - "loss": 0.0098, - "step": 530 - }, - { - "epoch": 0.4101177833558602, - "grad_norm": 0.03883390873670578, - "learning_rate": 8.191653786707882e-05, - "loss": 0.0109, - "step": 531 - }, - { - "epoch": 0.41089013323035334, - "grad_norm": 0.0660616010427475, - "learning_rate": 8.207109737248842e-05, - "loss": 0.0109, - "step": 532 - }, - { - "epoch": 0.4116624831048465, - "grad_norm": 0.049630846828222275, - "learning_rate": 8.222565687789799e-05, - "loss": 0.0097, - "step": 533 - }, - { - "epoch": 0.41243483297933964, - "grad_norm": 0.051477570086717606, - "learning_rate": 8.238021638330757e-05, - "loss": 0.0101, - "step": 534 - }, - { - "epoch": 0.4132071828538328, - "grad_norm": 0.04098783805966377, - "learning_rate": 8.253477588871716e-05, - "loss": 0.0095, - "step": 535 - }, - { - "epoch": 0.41397953272832594, - "grad_norm": 0.03531495854258537, - "learning_rate": 8.268933539412674e-05, - "loss": 0.0093, - "step": 536 - }, - { - "epoch": 0.4147518826028191, - "grad_norm": 0.03453240916132927, - "learning_rate": 8.284389489953632e-05, - "loss": 0.01, - "step": 537 - }, - { - "epoch": 0.41552423247731224, - "grad_norm": 0.07704520225524902, - "learning_rate": 8.299845440494592e-05, - "loss": 0.0106, - "step": 538 - }, - { - "epoch": 0.4162965823518054, - "grad_norm": 0.05514024570584297, - "learning_rate": 8.315301391035549e-05, - "loss": 0.0088, - "step": 539 - }, - { - "epoch": 0.41706893222629854, - "grad_norm": 0.042679984122514725, - "learning_rate": 8.330757341576507e-05, - "loss": 0.0093, - "step": 540 - }, - { - "epoch": 0.41784128210079163, - "grad_norm": 0.059344884008169174, - "learning_rate": 8.346213292117464e-05, - "loss": 0.0092, - "step": 541 - }, - { - "epoch": 0.4186136319752848, - "grad_norm": 0.0364481545984745, - "learning_rate": 8.361669242658424e-05, - "loss": 0.0083, - "step": 542 - }, - { - "epoch": 0.41938598184977793, - "grad_norm": 0.03970944508910179, - "learning_rate": 8.377125193199382e-05, - "loss": 0.0091, - "step": 543 - }, - { - "epoch": 0.4201583317242711, - "grad_norm": 0.06593722850084305, - "learning_rate": 8.39258114374034e-05, - "loss": 0.0086, - "step": 544 - }, - { - "epoch": 0.42093068159876423, - "grad_norm": 0.054119762033224106, - "learning_rate": 8.408037094281299e-05, - "loss": 0.0089, - "step": 545 - }, - { - "epoch": 0.4217030314732574, - "grad_norm": 0.07380783557891846, - "learning_rate": 8.423493044822257e-05, - "loss": 0.0097, - "step": 546 - }, - { - "epoch": 0.42247538134775053, - "grad_norm": 0.04153510928153992, - "learning_rate": 8.438948995363214e-05, - "loss": 0.0083, - "step": 547 - }, - { - "epoch": 0.4232477312222437, - "grad_norm": 0.09071889519691467, - "learning_rate": 8.454404945904173e-05, - "loss": 0.0085, - "step": 548 - }, - { - "epoch": 0.42402008109673683, - "grad_norm": 0.02221975289285183, - "learning_rate": 8.469860896445132e-05, - "loss": 0.0076, - "step": 549 - }, - { - "epoch": 0.42479243097123, - "grad_norm": 0.0676327645778656, - "learning_rate": 8.48531684698609e-05, - "loss": 0.0095, - "step": 550 - }, - { - "epoch": 0.42556478084572313, - "grad_norm": 0.06018376350402832, - "learning_rate": 8.500772797527047e-05, - "loss": 0.0085, - "step": 551 - }, - { - "epoch": 0.4263371307202163, - "grad_norm": 0.03183077275753021, - "learning_rate": 8.516228748068007e-05, - "loss": 0.0077, - "step": 552 - }, - { - "epoch": 0.4271094805947094, - "grad_norm": 0.06584025919437408, - "learning_rate": 8.531684698608964e-05, - "loss": 0.009, - "step": 553 - }, - { - "epoch": 0.42788183046920253, - "grad_norm": 0.039773181080818176, - "learning_rate": 8.547140649149923e-05, - "loss": 0.0088, - "step": 554 - }, - { - "epoch": 0.4286541803436957, - "grad_norm": 0.05208640173077583, - "learning_rate": 8.562596599690881e-05, - "loss": 0.0076, - "step": 555 - }, - { - "epoch": 0.42942653021818883, - "grad_norm": 0.060254037380218506, - "learning_rate": 8.57805255023184e-05, - "loss": 0.0083, - "step": 556 - }, - { - "epoch": 0.430198880092682, - "grad_norm": 0.019793977960944176, - "learning_rate": 8.593508500772797e-05, - "loss": 0.0076, - "step": 557 - }, - { - "epoch": 0.43097122996717513, - "grad_norm": 0.04959573224186897, - "learning_rate": 8.608964451313756e-05, - "loss": 0.0087, - "step": 558 - }, - { - "epoch": 0.4317435798416683, - "grad_norm": 0.027046391740441322, - "learning_rate": 8.624420401854714e-05, - "loss": 0.0076, - "step": 559 - }, - { - "epoch": 0.43251592971616143, - "grad_norm": 0.030857374891638756, - "learning_rate": 8.639876352395673e-05, - "loss": 0.0077, - "step": 560 - }, - { - "epoch": 0.4332882795906546, - "grad_norm": 0.01885489746928215, - "learning_rate": 8.65533230293663e-05, - "loss": 0.0075, - "step": 561 - }, - { - "epoch": 0.43406062946514773, - "grad_norm": 0.020508000627160072, - "learning_rate": 8.67078825347759e-05, - "loss": 0.0082, - "step": 562 - }, - { - "epoch": 0.4348329793396409, - "grad_norm": 0.037663884460926056, - "learning_rate": 8.686244204018547e-05, - "loss": 0.0077, - "step": 563 - }, - { - "epoch": 0.43560532921413403, - "grad_norm": 0.020937500521540642, - "learning_rate": 8.701700154559506e-05, - "loss": 0.0078, - "step": 564 - }, - { - "epoch": 0.4363776790886271, - "grad_norm": 0.021033072844147682, - "learning_rate": 8.717156105100464e-05, - "loss": 0.0065, - "step": 565 - }, - { - "epoch": 0.4371500289631203, - "grad_norm": 0.029200492426753044, - "learning_rate": 8.732612055641423e-05, - "loss": 0.0079, - "step": 566 - }, - { - "epoch": 0.4379223788376134, - "grad_norm": 0.029047193005681038, - "learning_rate": 8.74806800618238e-05, - "loss": 0.0073, - "step": 567 - }, - { - "epoch": 0.4386947287121066, - "grad_norm": 0.02560579404234886, - "learning_rate": 8.763523956723338e-05, - "loss": 0.0073, - "step": 568 - }, - { - "epoch": 0.4394670785865997, - "grad_norm": 0.03898506984114647, - "learning_rate": 8.778979907264297e-05, - "loss": 0.0078, - "step": 569 - }, - { - "epoch": 0.4402394284610929, - "grad_norm": 0.055788278579711914, - "learning_rate": 8.794435857805256e-05, - "loss": 0.0083, - "step": 570 - }, - { - "epoch": 0.441011778335586, - "grad_norm": 0.026275061070919037, - "learning_rate": 8.809891808346213e-05, - "loss": 0.0076, - "step": 571 - }, - { - "epoch": 0.4417841282100792, - "grad_norm": 0.040119849145412445, - "learning_rate": 8.825347758887173e-05, - "loss": 0.007, - "step": 572 - }, - { - "epoch": 0.4425564780845723, - "grad_norm": 0.04549155756831169, - "learning_rate": 8.84080370942813e-05, - "loss": 0.0079, - "step": 573 - }, - { - "epoch": 0.4433288279590655, - "grad_norm": 0.025678303092718124, - "learning_rate": 8.856259659969088e-05, - "loss": 0.0068, - "step": 574 - }, - { - "epoch": 0.4441011778335586, - "grad_norm": 0.0724676251411438, - "learning_rate": 8.871715610510047e-05, - "loss": 0.008, - "step": 575 - }, - { - "epoch": 0.4448735277080518, - "grad_norm": 0.018808679655194283, - "learning_rate": 8.887171561051005e-05, - "loss": 0.0069, - "step": 576 - }, - { - "epoch": 0.44564587758254487, - "grad_norm": 0.044109608978033066, - "learning_rate": 8.902627511591963e-05, - "loss": 0.0066, - "step": 577 - }, - { - "epoch": 0.446418227457038, - "grad_norm": 0.04183276370167732, - "learning_rate": 8.918083462132921e-05, - "loss": 0.0067, - "step": 578 - }, - { - "epoch": 0.44719057733153117, - "grad_norm": 0.018278077244758606, - "learning_rate": 8.93353941267388e-05, - "loss": 0.0068, - "step": 579 - }, - { - "epoch": 0.4479629272060243, - "grad_norm": 0.040254537016153336, - "learning_rate": 8.948995363214838e-05, - "loss": 0.0073, - "step": 580 - }, - { - "epoch": 0.44873527708051747, - "grad_norm": 0.04067078232765198, - "learning_rate": 8.964451313755796e-05, - "loss": 0.0071, - "step": 581 - }, - { - "epoch": 0.4495076269550106, - "grad_norm": 0.021396074444055557, - "learning_rate": 8.979907264296755e-05, - "loss": 0.0064, - "step": 582 - }, - { - "epoch": 0.45027997682950377, - "grad_norm": 0.05812348425388336, - "learning_rate": 8.995363214837713e-05, - "loss": 0.0072, - "step": 583 - }, - { - "epoch": 0.4510523267039969, - "grad_norm": 0.052907440811395645, - "learning_rate": 9.010819165378671e-05, - "loss": 0.0075, - "step": 584 - }, - { - "epoch": 0.45182467657849007, - "grad_norm": 0.04013500362634659, - "learning_rate": 9.02627511591963e-05, - "loss": 0.007, - "step": 585 - }, - { - "epoch": 0.4525970264529832, - "grad_norm": 0.062313806265592575, - "learning_rate": 9.041731066460588e-05, - "loss": 0.0073, - "step": 586 - }, - { - "epoch": 0.45336937632747637, - "grad_norm": 0.020526016131043434, - "learning_rate": 9.057187017001545e-05, - "loss": 0.0073, - "step": 587 - }, - { - "epoch": 0.45414172620196946, - "grad_norm": 0.03165501728653908, - "learning_rate": 9.072642967542504e-05, - "loss": 0.0065, - "step": 588 - }, - { - "epoch": 0.4549140760764626, - "grad_norm": 0.08332403749227524, - "learning_rate": 9.088098918083463e-05, - "loss": 0.007, - "step": 589 - }, - { - "epoch": 0.45568642595095576, - "grad_norm": 0.031461749225854874, - "learning_rate": 9.103554868624421e-05, - "loss": 0.0066, - "step": 590 - }, - { - "epoch": 0.4564587758254489, - "grad_norm": 0.08399269729852676, - "learning_rate": 9.119010819165378e-05, - "loss": 0.0076, - "step": 591 - }, - { - "epoch": 0.45723112569994206, - "grad_norm": 0.07166516035795212, - "learning_rate": 9.134466769706337e-05, - "loss": 0.0081, - "step": 592 - }, - { - "epoch": 0.4580034755744352, - "grad_norm": 0.07119960337877274, - "learning_rate": 9.149922720247295e-05, - "loss": 0.0069, - "step": 593 - }, - { - "epoch": 0.45877582544892836, - "grad_norm": 0.06339021027088165, - "learning_rate": 9.165378670788254e-05, - "loss": 0.0071, - "step": 594 - }, - { - "epoch": 0.4595481753234215, - "grad_norm": 0.02564193122088909, - "learning_rate": 9.180834621329212e-05, - "loss": 0.0058, - "step": 595 - }, - { - "epoch": 0.46032052519791467, - "grad_norm": 0.022058872506022453, - "learning_rate": 9.196290571870171e-05, - "loss": 0.0062, - "step": 596 - }, - { - "epoch": 0.4610928750724078, - "grad_norm": 0.06470519304275513, - "learning_rate": 9.211746522411128e-05, - "loss": 0.0069, - "step": 597 - }, - { - "epoch": 0.46186522494690097, - "grad_norm": 0.04460231587290764, - "learning_rate": 9.227202472952087e-05, - "loss": 0.0066, - "step": 598 - }, - { - "epoch": 0.4626375748213941, - "grad_norm": 0.04173488914966583, - "learning_rate": 9.242658423493045e-05, - "loss": 0.0069, - "step": 599 - }, - { - "epoch": 0.4634099246958872, - "grad_norm": 0.06716244667768478, - "learning_rate": 9.258114374034004e-05, - "loss": 0.0074, - "step": 600 - }, - { - "epoch": 0.46418227457038036, - "grad_norm": 0.025468017905950546, - "learning_rate": 9.273570324574961e-05, - "loss": 0.0066, - "step": 601 - }, - { - "epoch": 0.4649546244448735, - "grad_norm": 0.045318953692913055, - "learning_rate": 9.28902627511592e-05, - "loss": 0.0069, - "step": 602 - }, - { - "epoch": 0.46572697431936666, - "grad_norm": 0.06828133761882782, - "learning_rate": 9.304482225656878e-05, - "loss": 0.008, - "step": 603 - }, - { - "epoch": 0.4664993241938598, - "grad_norm": 0.018730657175183296, - "learning_rate": 9.319938176197837e-05, - "loss": 0.0067, - "step": 604 - }, - { - "epoch": 0.46727167406835296, - "grad_norm": 0.052807360887527466, - "learning_rate": 9.335394126738795e-05, - "loss": 0.0064, - "step": 605 - }, - { - "epoch": 0.4680440239428461, - "grad_norm": 0.03708384931087494, - "learning_rate": 9.350850077279754e-05, - "loss": 0.0074, - "step": 606 - }, - { - "epoch": 0.46881637381733926, - "grad_norm": 0.034401170909404755, - "learning_rate": 9.366306027820711e-05, - "loss": 0.0071, - "step": 607 - }, - { - "epoch": 0.4695887236918324, - "grad_norm": 0.03672698140144348, - "learning_rate": 9.38176197836167e-05, - "loss": 0.0063, - "step": 608 - }, - { - "epoch": 0.47036107356632556, - "grad_norm": 0.04110847786068916, - "learning_rate": 9.397217928902628e-05, - "loss": 0.0066, - "step": 609 - }, - { - "epoch": 0.4711334234408187, - "grad_norm": 0.020947515964508057, - "learning_rate": 9.412673879443587e-05, - "loss": 0.0065, - "step": 610 - }, - { - "epoch": 0.47190577331531186, - "grad_norm": 0.023696739226579666, - "learning_rate": 9.428129829984544e-05, - "loss": 0.0062, - "step": 611 - }, - { - "epoch": 0.47267812318980496, - "grad_norm": 0.02793034166097641, - "learning_rate": 9.443585780525502e-05, - "loss": 0.0066, - "step": 612 - }, - { - "epoch": 0.4734504730642981, - "grad_norm": 0.036380648612976074, - "learning_rate": 9.459041731066461e-05, - "loss": 0.0065, - "step": 613 - }, - { - "epoch": 0.47422282293879126, - "grad_norm": 0.016912082210183144, - "learning_rate": 9.47449768160742e-05, - "loss": 0.0067, - "step": 614 - }, - { - "epoch": 0.4749951728132844, - "grad_norm": 0.0291866697371006, - "learning_rate": 9.489953632148378e-05, - "loss": 0.0067, - "step": 615 - }, - { - "epoch": 0.47576752268777756, - "grad_norm": 0.024140650406479836, - "learning_rate": 9.505409582689336e-05, - "loss": 0.0061, - "step": 616 - }, - { - "epoch": 0.4765398725622707, - "grad_norm": 0.018938008695840836, - "learning_rate": 9.520865533230294e-05, - "loss": 0.0069, - "step": 617 - }, - { - "epoch": 0.47731222243676386, - "grad_norm": 0.034017808735370636, - "learning_rate": 9.536321483771252e-05, - "loss": 0.0061, - "step": 618 - }, - { - "epoch": 0.478084572311257, - "grad_norm": 0.024729734286665916, - "learning_rate": 9.551777434312211e-05, - "loss": 0.0069, - "step": 619 - }, - { - "epoch": 0.47885692218575016, - "grad_norm": 0.02578096278011799, - "learning_rate": 9.567233384853169e-05, - "loss": 0.0056, - "step": 620 - }, - { - "epoch": 0.4796292720602433, - "grad_norm": 0.024975182488560677, - "learning_rate": 9.582689335394127e-05, - "loss": 0.0061, - "step": 621 - }, - { - "epoch": 0.48040162193473646, - "grad_norm": 0.016552217304706573, - "learning_rate": 9.598145285935085e-05, - "loss": 0.0066, - "step": 622 - }, - { - "epoch": 0.4811739718092296, - "grad_norm": 0.024094808846712112, - "learning_rate": 9.613601236476044e-05, - "loss": 0.0061, - "step": 623 - }, - { - "epoch": 0.4819463216837227, - "grad_norm": 0.03026052936911583, - "learning_rate": 9.629057187017002e-05, - "loss": 0.0063, - "step": 624 - }, - { - "epoch": 0.48271867155821585, - "grad_norm": 0.02954074554145336, - "learning_rate": 9.644513137557961e-05, - "loss": 0.0064, - "step": 625 - }, - { - "epoch": 0.483491021432709, - "grad_norm": 0.01834062486886978, - "learning_rate": 9.659969088098919e-05, - "loss": 0.0061, - "step": 626 - }, - { - "epoch": 0.48426337130720215, - "grad_norm": 0.026609912514686584, - "learning_rate": 9.675425038639876e-05, - "loss": 0.0067, - "step": 627 - }, - { - "epoch": 0.4850357211816953, - "grad_norm": 0.03172049671411514, - "learning_rate": 9.690880989180835e-05, - "loss": 0.0058, - "step": 628 - }, - { - "epoch": 0.48580807105618845, - "grad_norm": 0.03625147417187691, - "learning_rate": 9.706336939721794e-05, - "loss": 0.0063, - "step": 629 - }, - { - "epoch": 0.4865804209306816, - "grad_norm": 0.020125100389122963, - "learning_rate": 9.721792890262752e-05, - "loss": 0.0058, - "step": 630 - }, - { - "epoch": 0.48735277080517475, - "grad_norm": 0.07479507476091385, - "learning_rate": 9.737248840803709e-05, - "loss": 0.0074, - "step": 631 - }, - { - "epoch": 0.4881251206796679, - "grad_norm": 0.028870578855276108, - "learning_rate": 9.752704791344668e-05, - "loss": 0.0061, - "step": 632 - }, - { - "epoch": 0.48889747055416105, - "grad_norm": 0.04097558557987213, - "learning_rate": 9.768160741885626e-05, - "loss": 0.0066, - "step": 633 - }, - { - "epoch": 0.4896698204286542, - "grad_norm": 0.023863254114985466, - "learning_rate": 9.783616692426585e-05, - "loss": 0.0061, - "step": 634 - }, - { - "epoch": 0.49044217030314735, - "grad_norm": 0.037251200526952744, - "learning_rate": 9.799072642967543e-05, - "loss": 0.0064, - "step": 635 - }, - { - "epoch": 0.49121452017764045, - "grad_norm": 0.017656605690717697, - "learning_rate": 9.8145285935085e-05, - "loss": 0.0066, - "step": 636 - }, - { - "epoch": 0.4919868700521336, - "grad_norm": 0.013435768894851208, - "learning_rate": 9.829984544049459e-05, - "loss": 0.0064, - "step": 637 - }, - { - "epoch": 0.49275921992662675, - "grad_norm": 0.01997395232319832, - "learning_rate": 9.845440494590418e-05, - "loss": 0.0063, - "step": 638 - }, - { - "epoch": 0.4935315698011199, - "grad_norm": 0.049601081758737564, - "learning_rate": 9.860896445131376e-05, - "loss": 0.0065, - "step": 639 - }, - { - "epoch": 0.49430391967561305, - "grad_norm": 0.02850998379290104, - "learning_rate": 9.876352395672335e-05, - "loss": 0.0062, - "step": 640 - }, - { - "epoch": 0.4950762695501062, - "grad_norm": 0.0246591754257679, - "learning_rate": 9.891808346213292e-05, - "loss": 0.0062, - "step": 641 - }, - { - "epoch": 0.49584861942459935, - "grad_norm": 0.02419452928006649, - "learning_rate": 9.90726429675425e-05, - "loss": 0.0059, - "step": 642 - }, - { - "epoch": 0.4966209692990925, - "grad_norm": 0.024134468287229538, - "learning_rate": 9.922720247295209e-05, - "loss": 0.0066, - "step": 643 - }, - { - "epoch": 0.49739331917358565, - "grad_norm": 0.020593956112861633, - "learning_rate": 9.938176197836168e-05, - "loss": 0.0068, - "step": 644 - }, - { - "epoch": 0.4981656690480788, - "grad_norm": 0.030068911612033844, - "learning_rate": 9.953632148377126e-05, - "loss": 0.0058, - "step": 645 - }, - { - "epoch": 0.49893801892257195, - "grad_norm": 0.03695107623934746, - "learning_rate": 9.969088098918083e-05, - "loss": 0.0059, - "step": 646 - }, - { - "epoch": 0.4997103687970651, - "grad_norm": 0.01950264722108841, - "learning_rate": 9.984544049459042e-05, - "loss": 0.0058, - "step": 647 - }, - { - "epoch": 0.5004827186715582, - "grad_norm": 0.019529715180397034, - "learning_rate": 0.0001, - "loss": 0.0064, - "step": 648 - }, - { - "epoch": 0.5012550685460514, - "grad_norm": 0.01743905618786812, - "learning_rate": 9.999999272310408e-05, - "loss": 0.0069, - "step": 649 - }, - { - "epoch": 0.5020274184205445, - "grad_norm": 0.017296746373176575, - "learning_rate": 9.999997089241844e-05, - "loss": 0.0063, - "step": 650 - }, - { - "epoch": 0.5027997682950377, - "grad_norm": 0.021065376698970795, - "learning_rate": 9.999993450794945e-05, - "loss": 0.0065, - "step": 651 - }, - { - "epoch": 0.5035721181695308, - "grad_norm": 0.017609447240829468, - "learning_rate": 9.999988356970765e-05, - "loss": 0.0061, - "step": 652 - }, - { - "epoch": 0.504344468044024, - "grad_norm": 0.014331743121147156, - "learning_rate": 9.999981807770793e-05, - "loss": 0.0058, - "step": 653 - }, - { - "epoch": 0.505116817918517, - "grad_norm": 0.02973123826086521, - "learning_rate": 9.999973803196931e-05, - "loss": 0.0059, - "step": 654 - }, - { - "epoch": 0.5058891677930102, - "grad_norm": 0.06875317543745041, - "learning_rate": 9.99996434325151e-05, - "loss": 0.0066, - "step": 655 - }, - { - "epoch": 0.5066615176675033, - "grad_norm": 0.029321495443582535, - "learning_rate": 9.999953427937285e-05, - "loss": 0.0065, - "step": 656 - }, - { - "epoch": 0.5074338675419965, - "grad_norm": 0.02221604809165001, - "learning_rate": 9.999941057257431e-05, - "loss": 0.0061, - "step": 657 - }, - { - "epoch": 0.5082062174164896, - "grad_norm": 0.024562258273363113, - "learning_rate": 9.999927231215551e-05, - "loss": 0.0061, - "step": 658 - }, - { - "epoch": 0.5089785672909828, - "grad_norm": 0.05312330275774002, - "learning_rate": 9.999911949815668e-05, - "loss": 0.0057, - "step": 659 - }, - { - "epoch": 0.5097509171654759, - "grad_norm": 0.02948874980211258, - "learning_rate": 9.99989521306223e-05, - "loss": 0.007, - "step": 660 - }, - { - "epoch": 0.5105232670399691, - "grad_norm": 0.03101520985364914, - "learning_rate": 9.99987702096011e-05, - "loss": 0.0057, - "step": 661 - }, - { - "epoch": 0.5112956169144622, - "grad_norm": 0.027225030586123466, - "learning_rate": 9.999857373514601e-05, - "loss": 0.0069, - "step": 662 - }, - { - "epoch": 0.5120679667889554, - "grad_norm": 0.04047227278351784, - "learning_rate": 9.999836270731423e-05, - "loss": 0.0066, - "step": 663 - }, - { - "epoch": 0.5128403166634485, - "grad_norm": 0.0271292757242918, - "learning_rate": 9.999813712616719e-05, - "loss": 0.0066, - "step": 664 - }, - { - "epoch": 0.5136126665379417, - "grad_norm": 0.044453106820583344, - "learning_rate": 9.999789699177056e-05, - "loss": 0.0068, - "step": 665 - }, - { - "epoch": 0.5143850164124348, - "grad_norm": 0.03188185393810272, - "learning_rate": 9.999764230419422e-05, - "loss": 0.0062, - "step": 666 - }, - { - "epoch": 0.515157366286928, - "grad_norm": 0.033734098076820374, - "learning_rate": 9.999737306351232e-05, - "loss": 0.007, - "step": 667 - }, - { - "epoch": 0.5159297161614211, - "grad_norm": 0.035705890506505966, - "learning_rate": 9.99970892698032e-05, - "loss": 0.0058, - "step": 668 - }, - { - "epoch": 0.5167020660359143, - "grad_norm": 0.0311118196696043, - "learning_rate": 9.999679092314948e-05, - "loss": 0.0067, - "step": 669 - }, - { - "epoch": 0.5174744159104074, - "grad_norm": 0.04391526058316231, - "learning_rate": 9.999647802363803e-05, - "loss": 0.006, - "step": 670 - }, - { - "epoch": 0.5182467657849006, - "grad_norm": 0.01930868998169899, - "learning_rate": 9.999615057135989e-05, - "loss": 0.0063, - "step": 671 - }, - { - "epoch": 0.5190191156593937, - "grad_norm": 0.033360958099365234, - "learning_rate": 9.999580856641038e-05, - "loss": 0.0059, - "step": 672 - }, - { - "epoch": 0.5197914655338869, - "grad_norm": 0.020999480038881302, - "learning_rate": 9.999545200888907e-05, - "loss": 0.0063, - "step": 673 - }, - { - "epoch": 0.52056381540838, - "grad_norm": 0.05861787870526314, - "learning_rate": 9.999508089889971e-05, - "loss": 0.0062, - "step": 674 - }, - { - "epoch": 0.5213361652828732, - "grad_norm": 0.026002857834100723, - "learning_rate": 9.999469523655036e-05, - "loss": 0.0059, - "step": 675 - }, - { - "epoch": 0.5221085151573663, - "grad_norm": 0.017863426357507706, - "learning_rate": 9.999429502195326e-05, - "loss": 0.0058, - "step": 676 - }, - { - "epoch": 0.5228808650318595, - "grad_norm": 0.02639612928032875, - "learning_rate": 9.999388025522489e-05, - "loss": 0.0069, - "step": 677 - }, - { - "epoch": 0.5236532149063525, - "grad_norm": 0.025559836998581886, - "learning_rate": 9.9993450936486e-05, - "loss": 0.0054, - "step": 678 - }, - { - "epoch": 0.5244255647808457, - "grad_norm": 0.022854767739772797, - "learning_rate": 9.999300706586154e-05, - "loss": 0.0063, - "step": 679 - }, - { - "epoch": 0.5251979146553388, - "grad_norm": 0.016021009534597397, - "learning_rate": 9.999254864348073e-05, - "loss": 0.0055, - "step": 680 - }, - { - "epoch": 0.525970264529832, - "grad_norm": 0.016422376036643982, - "learning_rate": 9.999207566947698e-05, - "loss": 0.0061, - "step": 681 - }, - { - "epoch": 0.5267426144043251, - "grad_norm": 0.019435180351138115, - "learning_rate": 9.999158814398796e-05, - "loss": 0.0065, - "step": 682 - }, - { - "epoch": 0.5275149642788183, - "grad_norm": 0.022698726505041122, - "learning_rate": 9.999108606715561e-05, - "loss": 0.0061, - "step": 683 - }, - { - "epoch": 0.5282873141533114, - "grad_norm": 0.017409764230251312, - "learning_rate": 9.999056943912603e-05, - "loss": 0.0053, - "step": 684 - }, - { - "epoch": 0.5290596640278046, - "grad_norm": 0.019013019278645515, - "learning_rate": 9.999003826004964e-05, - "loss": 0.0061, - "step": 685 - }, - { - "epoch": 0.5298320139022977, - "grad_norm": 0.018017224967479706, - "learning_rate": 9.998949253008103e-05, - "loss": 0.0062, - "step": 686 - }, - { - "epoch": 0.5306043637767909, - "grad_norm": 0.042635902762413025, - "learning_rate": 9.998893224937904e-05, - "loss": 0.0063, - "step": 687 - }, - { - "epoch": 0.531376713651284, - "grad_norm": 0.01615188457071781, - "learning_rate": 9.998835741810677e-05, - "loss": 0.0056, - "step": 688 - }, - { - "epoch": 0.5321490635257772, - "grad_norm": 0.022535262629389763, - "learning_rate": 9.998776803643155e-05, - "loss": 0.0065, - "step": 689 - }, - { - "epoch": 0.5329214134002703, - "grad_norm": 0.034012194722890854, - "learning_rate": 9.99871641045249e-05, - "loss": 0.0061, - "step": 690 - }, - { - "epoch": 0.5336937632747635, - "grad_norm": 0.020539700984954834, - "learning_rate": 9.998654562256265e-05, - "loss": 0.005, - "step": 691 - }, - { - "epoch": 0.5344661131492566, - "grad_norm": 0.03489365801215172, - "learning_rate": 9.998591259072479e-05, - "loss": 0.0062, - "step": 692 - }, - { - "epoch": 0.5352384630237498, - "grad_norm": 0.016758328303694725, - "learning_rate": 9.998526500919558e-05, - "loss": 0.0057, - "step": 693 - }, - { - "epoch": 0.5360108128982429, - "grad_norm": 0.04345661774277687, - "learning_rate": 9.998460287816355e-05, - "loss": 0.0062, - "step": 694 - }, - { - "epoch": 0.5367831627727361, - "grad_norm": 0.028332583606243134, - "learning_rate": 9.998392619782142e-05, - "loss": 0.0061, - "step": 695 - }, - { - "epoch": 0.5375555126472292, - "grad_norm": 0.017350120469927788, - "learning_rate": 9.998323496836613e-05, - "loss": 0.0066, - "step": 696 - }, - { - "epoch": 0.5383278625217224, - "grad_norm": 0.05254344269633293, - "learning_rate": 9.99825291899989e-05, - "loss": 0.0069, - "step": 697 - }, - { - "epoch": 0.5391002123962155, - "grad_norm": 0.02194729447364807, - "learning_rate": 9.998180886292517e-05, - "loss": 0.0059, - "step": 698 - }, - { - "epoch": 0.5398725622707087, - "grad_norm": 0.02504734694957733, - "learning_rate": 9.998107398735459e-05, - "loss": 0.0061, - "step": 699 - }, - { - "epoch": 0.5406449121452018, - "grad_norm": 0.048992741852998734, - "learning_rate": 9.998032456350108e-05, - "loss": 0.0066, - "step": 700 - }, - { - "epoch": 0.5414172620196949, - "grad_norm": 0.016226578503847122, - "learning_rate": 9.997956059158278e-05, - "loss": 0.0062, - "step": 701 - }, - { - "epoch": 0.542189611894188, - "grad_norm": 0.03624337911605835, - "learning_rate": 9.997878207182205e-05, - "loss": 0.0065, - "step": 702 - }, - { - "epoch": 0.5429619617686812, - "grad_norm": 0.05948558449745178, - "learning_rate": 9.99779890044455e-05, - "loss": 0.0061, - "step": 703 - }, - { - "epoch": 0.5437343116431743, - "grad_norm": 0.01626715436577797, - "learning_rate": 9.9977181389684e-05, - "loss": 0.0058, - "step": 704 - }, - { - "epoch": 0.5445066615176675, - "grad_norm": 0.041707947850227356, - "learning_rate": 9.99763592277726e-05, - "loss": 0.0063, - "step": 705 - }, - { - "epoch": 0.5452790113921606, - "grad_norm": 0.05031125247478485, - "learning_rate": 9.997552251895061e-05, - "loss": 0.0062, - "step": 706 - }, - { - "epoch": 0.5460513612666538, - "grad_norm": 0.022725578397512436, - "learning_rate": 9.99746712634616e-05, - "loss": 0.0054, - "step": 707 - }, - { - "epoch": 0.5468237111411469, - "grad_norm": 0.042997218668460846, - "learning_rate": 9.997380546155333e-05, - "loss": 0.0067, - "step": 708 - }, - { - "epoch": 0.5475960610156401, - "grad_norm": 0.03495806083083153, - "learning_rate": 9.99729251134778e-05, - "loss": 0.006, - "step": 709 - }, - { - "epoch": 0.5483684108901332, - "grad_norm": 0.01554971281439066, - "learning_rate": 9.99720302194913e-05, - "loss": 0.0061, - "step": 710 - }, - { - "epoch": 0.5491407607646264, - "grad_norm": 0.027845371514558792, - "learning_rate": 9.997112077985428e-05, - "loss": 0.0064, - "step": 711 - }, - { - "epoch": 0.5499131106391195, - "grad_norm": 0.06307762861251831, - "learning_rate": 9.997019679483145e-05, - "loss": 0.0065, - "step": 712 - }, - { - "epoch": 0.5506854605136127, - "grad_norm": 0.024832166731357574, - "learning_rate": 9.99692582646918e-05, - "loss": 0.0057, - "step": 713 - }, - { - "epoch": 0.5514578103881058, - "grad_norm": 0.055046964436769485, - "learning_rate": 9.996830518970847e-05, - "loss": 0.0065, - "step": 714 - }, - { - "epoch": 0.552230160262599, - "grad_norm": 0.025708051398396492, - "learning_rate": 9.99673375701589e-05, - "loss": 0.006, - "step": 715 - }, - { - "epoch": 0.5530025101370921, - "grad_norm": 0.02331097424030304, - "learning_rate": 9.996635540632473e-05, - "loss": 0.006, - "step": 716 - }, - { - "epoch": 0.5537748600115853, - "grad_norm": 0.03324016556143761, - "learning_rate": 9.996535869849186e-05, - "loss": 0.0063, - "step": 717 - }, - { - "epoch": 0.5545472098860784, - "grad_norm": 0.03681979700922966, - "learning_rate": 9.996434744695038e-05, - "loss": 0.0061, - "step": 718 - }, - { - "epoch": 0.5553195597605716, - "grad_norm": 0.016809049993753433, - "learning_rate": 9.996332165199466e-05, - "loss": 0.0061, - "step": 719 - }, - { - "epoch": 0.5560919096350647, - "grad_norm": 0.0409129299223423, - "learning_rate": 9.996228131392329e-05, - "loss": 0.0058, - "step": 720 - }, - { - "epoch": 0.5568642595095579, - "grad_norm": 0.02070578932762146, - "learning_rate": 9.99612264330391e-05, - "loss": 0.0057, - "step": 721 - }, - { - "epoch": 0.557636609384051, - "grad_norm": 0.022130563855171204, - "learning_rate": 9.996015700964908e-05, - "loss": 0.0056, - "step": 722 - }, - { - "epoch": 0.5584089592585442, - "grad_norm": 0.04914633929729462, - "learning_rate": 9.995907304406457e-05, - "loss": 0.0066, - "step": 723 - }, - { - "epoch": 0.5591813091330373, - "grad_norm": 0.02824830636382103, - "learning_rate": 9.995797453660107e-05, - "loss": 0.0057, - "step": 724 - }, - { - "epoch": 0.5599536590075304, - "grad_norm": 0.01932264119386673, - "learning_rate": 9.995686148757833e-05, - "loss": 0.0058, - "step": 725 - }, - { - "epoch": 0.5607260088820235, - "grad_norm": 0.034268081188201904, - "learning_rate": 9.995573389732032e-05, - "loss": 0.0058, - "step": 726 - }, - { - "epoch": 0.5614983587565167, - "grad_norm": 0.018359249457716942, - "learning_rate": 9.995459176615527e-05, - "loss": 0.0065, - "step": 727 - }, - { - "epoch": 0.5622707086310098, - "grad_norm": 0.021127983927726746, - "learning_rate": 9.995343509441561e-05, - "loss": 0.0062, - "step": 728 - }, - { - "epoch": 0.563043058505503, - "grad_norm": 0.026875965297222137, - "learning_rate": 9.995226388243804e-05, - "loss": 0.006, - "step": 729 - }, - { - "epoch": 0.5638154083799961, - "grad_norm": 0.022183291614055634, - "learning_rate": 9.995107813056347e-05, - "loss": 0.0056, - "step": 730 - }, - { - "epoch": 0.5645877582544893, - "grad_norm": 0.025123730301856995, - "learning_rate": 9.994987783913704e-05, - "loss": 0.0061, - "step": 731 - }, - { - "epoch": 0.5653601081289824, - "grad_norm": 0.03460489585995674, - "learning_rate": 9.994866300850809e-05, - "loss": 0.0054, - "step": 732 - }, - { - "epoch": 0.5661324580034756, - "grad_norm": 0.012985138222575188, - "learning_rate": 9.994743363903028e-05, - "loss": 0.0052, - "step": 733 - }, - { - "epoch": 0.5669048078779687, - "grad_norm": 0.01988379657268524, - "learning_rate": 9.994618973106142e-05, - "loss": 0.0053, - "step": 734 - }, - { - "epoch": 0.5676771577524619, - "grad_norm": 0.03403865545988083, - "learning_rate": 9.994493128496359e-05, - "loss": 0.0057, - "step": 735 - }, - { - "epoch": 0.568449507626955, - "grad_norm": 0.019713513553142548, - "learning_rate": 9.994365830110311e-05, - "loss": 0.0052, - "step": 736 - }, - { - "epoch": 0.5692218575014482, - "grad_norm": 0.02123085968196392, - "learning_rate": 9.994237077985048e-05, - "loss": 0.0059, - "step": 737 - }, - { - "epoch": 0.5699942073759413, - "grad_norm": 0.021393541246652603, - "learning_rate": 9.99410687215805e-05, - "loss": 0.0047, - "step": 738 - }, - { - "epoch": 0.5707665572504345, - "grad_norm": 0.019972041249275208, - "learning_rate": 9.993975212667212e-05, - "loss": 0.0054, - "step": 739 - }, - { - "epoch": 0.5715389071249276, - "grad_norm": 0.01621190644800663, - "learning_rate": 9.993842099550863e-05, - "loss": 0.0054, - "step": 740 - }, - { - "epoch": 0.5723112569994208, - "grad_norm": 0.0192471481859684, - "learning_rate": 9.993707532847745e-05, - "loss": 0.005, - "step": 741 - }, - { - "epoch": 0.5730836068739139, - "grad_norm": 0.026484373956918716, - "learning_rate": 9.993571512597028e-05, - "loss": 0.0055, - "step": 742 - }, - { - "epoch": 0.5738559567484071, - "grad_norm": 0.01591717079281807, - "learning_rate": 9.993434038838306e-05, - "loss": 0.0055, - "step": 743 - }, - { - "epoch": 0.5746283066229002, - "grad_norm": 0.020151285454630852, - "learning_rate": 9.993295111611592e-05, - "loss": 0.0054, - "step": 744 - }, - { - "epoch": 0.5754006564973934, - "grad_norm": 0.015028299763798714, - "learning_rate": 9.993154730957326e-05, - "loss": 0.0059, - "step": 745 - }, - { - "epoch": 0.5761730063718865, - "grad_norm": 0.012499259784817696, - "learning_rate": 9.993012896916368e-05, - "loss": 0.0049, - "step": 746 - }, - { - "epoch": 0.5769453562463797, - "grad_norm": 0.019595742225646973, - "learning_rate": 9.992869609530001e-05, - "loss": 0.0053, - "step": 747 - }, - { - "epoch": 0.5777177061208728, - "grad_norm": 0.026846617460250854, - "learning_rate": 9.992724868839935e-05, - "loss": 0.0059, - "step": 748 - }, - { - "epoch": 0.5784900559953658, - "grad_norm": 0.043270599097013474, - "learning_rate": 9.992578674888302e-05, - "loss": 0.0059, - "step": 749 - }, - { - "epoch": 0.579262405869859, - "grad_norm": 0.03132093697786331, - "learning_rate": 9.992431027717652e-05, - "loss": 0.0055, - "step": 750 - }, - { - "epoch": 0.5800347557443521, - "grad_norm": 0.04177276790142059, - "learning_rate": 9.992281927370963e-05, - "loss": 0.0055, - "step": 751 - }, - { - "epoch": 0.5808071056188453, - "grad_norm": 0.018702374771237373, - "learning_rate": 9.992131373891635e-05, - "loss": 0.0056, - "step": 752 - }, - { - "epoch": 0.5815794554933384, - "grad_norm": 0.031128762289881706, - "learning_rate": 9.991979367323491e-05, - "loss": 0.0058, - "step": 753 - }, - { - "epoch": 0.5823518053678316, - "grad_norm": 0.028675846755504608, - "learning_rate": 9.991825907710775e-05, - "loss": 0.0051, - "step": 754 - }, - { - "epoch": 0.5831241552423247, - "grad_norm": 0.01913781464099884, - "learning_rate": 9.991670995098155e-05, - "loss": 0.0052, - "step": 755 - }, - { - "epoch": 0.5838965051168179, - "grad_norm": 0.02314949221909046, - "learning_rate": 9.991514629530723e-05, - "loss": 0.0058, - "step": 756 - }, - { - "epoch": 0.584668854991311, - "grad_norm": 0.0678134635090828, - "learning_rate": 9.991356811053994e-05, - "loss": 0.0058, - "step": 757 - }, - { - "epoch": 0.5854412048658042, - "grad_norm": 0.013736193999648094, - "learning_rate": 9.991197539713903e-05, - "loss": 0.0047, - "step": 758 - }, - { - "epoch": 0.5862135547402973, - "grad_norm": 0.059933267533779144, - "learning_rate": 9.991036815556814e-05, - "loss": 0.0062, - "step": 759 - }, - { - "epoch": 0.5869859046147905, - "grad_norm": 0.04519271478056908, - "learning_rate": 9.990874638629506e-05, - "loss": 0.006, - "step": 760 - }, - { - "epoch": 0.5877582544892836, - "grad_norm": 0.018265997990965843, - "learning_rate": 9.990711008979187e-05, - "loss": 0.0057, - "step": 761 - }, - { - "epoch": 0.5885306043637768, - "grad_norm": 0.04235892370343208, - "learning_rate": 9.990545926653485e-05, - "loss": 0.0055, - "step": 762 - }, - { - "epoch": 0.58930295423827, - "grad_norm": 0.04719007760286331, - "learning_rate": 9.990379391700451e-05, - "loss": 0.0057, - "step": 763 - }, - { - "epoch": 0.5900753041127631, - "grad_norm": 0.03028462827205658, - "learning_rate": 9.990211404168561e-05, - "loss": 0.0056, - "step": 764 - }, - { - "epoch": 0.5908476539872562, - "grad_norm": 0.02930932678282261, - "learning_rate": 9.990041964106708e-05, - "loss": 0.0054, - "step": 765 - }, - { - "epoch": 0.5916200038617494, - "grad_norm": 0.023444976657629013, - "learning_rate": 9.989871071564217e-05, - "loss": 0.0051, - "step": 766 - }, - { - "epoch": 0.5923923537362425, - "grad_norm": 0.021648840978741646, - "learning_rate": 9.989698726590829e-05, - "loss": 0.0056, - "step": 767 - }, - { - "epoch": 0.5931647036107357, - "grad_norm": 0.03562987968325615, - "learning_rate": 9.989524929236707e-05, - "loss": 0.0054, - "step": 768 - }, - { - "epoch": 0.5939370534852288, - "grad_norm": 0.0237098541110754, - "learning_rate": 9.989349679552441e-05, - "loss": 0.0056, - "step": 769 - }, - { - "epoch": 0.594709403359722, - "grad_norm": 0.032227423042058945, - "learning_rate": 9.989172977589043e-05, - "loss": 0.0057, - "step": 770 - }, - { - "epoch": 0.5954817532342151, - "grad_norm": 0.03126399964094162, - "learning_rate": 9.988994823397946e-05, - "loss": 0.0054, - "step": 771 - }, - { - "epoch": 0.5962541031087083, - "grad_norm": 0.012847068719565868, - "learning_rate": 9.988815217031005e-05, - "loss": 0.0054, - "step": 772 - }, - { - "epoch": 0.5970264529832013, - "grad_norm": 0.014000168070197105, - "learning_rate": 9.988634158540501e-05, - "loss": 0.005, - "step": 773 - }, - { - "epoch": 0.5977988028576945, - "grad_norm": 0.0342111699283123, - "learning_rate": 9.988451647979134e-05, - "loss": 0.0054, - "step": 774 - }, - { - "epoch": 0.5985711527321876, - "grad_norm": 0.017097115516662598, - "learning_rate": 9.98826768540003e-05, - "loss": 0.0056, - "step": 775 - }, - { - "epoch": 0.5993435026066808, - "grad_norm": 0.02337076887488365, - "learning_rate": 9.988082270856735e-05, - "loss": 0.0058, - "step": 776 - }, - { - "epoch": 0.6001158524811739, - "grad_norm": 0.020811082795262337, - "learning_rate": 9.98789540440322e-05, - "loss": 0.0048, - "step": 777 - }, - { - "epoch": 0.6008882023556671, - "grad_norm": 0.021415896713733673, - "learning_rate": 9.987707086093876e-05, - "loss": 0.0056, - "step": 778 - }, - { - "epoch": 0.6016605522301602, - "grad_norm": 0.022532187402248383, - "learning_rate": 9.987517315983517e-05, - "loss": 0.0052, - "step": 779 - }, - { - "epoch": 0.6024329021046534, - "grad_norm": 0.015836693346500397, - "learning_rate": 9.987326094127383e-05, - "loss": 0.0053, - "step": 780 - }, - { - "epoch": 0.6032052519791465, - "grad_norm": 0.026889432221651077, - "learning_rate": 9.987133420581133e-05, - "loss": 0.0051, - "step": 781 - }, - { - "epoch": 0.6039776018536397, - "grad_norm": 0.016825655475258827, - "learning_rate": 9.98693929540085e-05, - "loss": 0.0057, - "step": 782 - }, - { - "epoch": 0.6047499517281328, - "grad_norm": 0.013712005689740181, - "learning_rate": 9.986743718643037e-05, - "loss": 0.0052, - "step": 783 - }, - { - "epoch": 0.605522301602626, - "grad_norm": 0.014379930682480335, - "learning_rate": 9.986546690364625e-05, - "loss": 0.0055, - "step": 784 - }, - { - "epoch": 0.6062946514771191, - "grad_norm": 0.015972580760717392, - "learning_rate": 9.986348210622961e-05, - "loss": 0.0051, - "step": 785 - }, - { - "epoch": 0.6070670013516123, - "grad_norm": 0.012665828689932823, - "learning_rate": 9.98614827947582e-05, - "loss": 0.0052, - "step": 786 - }, - { - "epoch": 0.6078393512261054, - "grad_norm": 0.015817679464817047, - "learning_rate": 9.985946896981396e-05, - "loss": 0.0064, - "step": 787 - }, - { - "epoch": 0.6086117011005986, - "grad_norm": 0.02141885831952095, - "learning_rate": 9.985744063198305e-05, - "loss": 0.0057, - "step": 788 - }, - { - "epoch": 0.6093840509750917, - "grad_norm": 0.019856026396155357, - "learning_rate": 9.985539778185591e-05, - "loss": 0.0056, - "step": 789 - }, - { - "epoch": 0.6101564008495849, - "grad_norm": 0.014455040916800499, - "learning_rate": 9.985334042002714e-05, - "loss": 0.0058, - "step": 790 - }, - { - "epoch": 0.610928750724078, - "grad_norm": 0.012989351525902748, - "learning_rate": 9.985126854709559e-05, - "loss": 0.0055, - "step": 791 - }, - { - "epoch": 0.6117011005985712, - "grad_norm": 0.0151575468480587, - "learning_rate": 9.984918216366435e-05, - "loss": 0.0053, - "step": 792 - }, - { - "epoch": 0.6124734504730643, - "grad_norm": 0.018324896693229675, - "learning_rate": 9.984708127034067e-05, - "loss": 0.0052, - "step": 793 - }, - { - "epoch": 0.6132458003475575, - "grad_norm": 0.016394076868891716, - "learning_rate": 9.984496586773611e-05, - "loss": 0.0056, - "step": 794 - }, - { - "epoch": 0.6140181502220506, - "grad_norm": 0.016955038532614708, - "learning_rate": 9.98428359564664e-05, - "loss": 0.0058, - "step": 795 - }, - { - "epoch": 0.6147905000965437, - "grad_norm": 0.01672198437154293, - "learning_rate": 9.98406915371515e-05, - "loss": 0.005, - "step": 796 - }, - { - "epoch": 0.6155628499710368, - "grad_norm": 0.018940173089504242, - "learning_rate": 9.983853261041561e-05, - "loss": 0.0055, - "step": 797 - }, - { - "epoch": 0.61633519984553, - "grad_norm": 0.015106343664228916, - "learning_rate": 9.983635917688714e-05, - "loss": 0.0046, - "step": 798 - }, - { - "epoch": 0.6171075497200231, - "grad_norm": 0.03357703983783722, - "learning_rate": 9.983417123719872e-05, - "loss": 0.0055, - "step": 799 - }, - { - "epoch": 0.6178798995945163, - "grad_norm": 0.02359483204782009, - "learning_rate": 9.983196879198721e-05, - "loss": 0.005, - "step": 800 - }, - { - "epoch": 0.6186522494690094, - "grad_norm": 0.02851145900785923, - "learning_rate": 9.982975184189367e-05, - "loss": 0.0057, - "step": 801 - }, - { - "epoch": 0.6194245993435026, - "grad_norm": 0.05479593202471733, - "learning_rate": 9.982752038756344e-05, - "loss": 0.0054, - "step": 802 - }, - { - "epoch": 0.6201969492179957, - "grad_norm": 0.030343232676386833, - "learning_rate": 9.9825274429646e-05, - "loss": 0.0059, - "step": 803 - }, - { - "epoch": 0.6209692990924889, - "grad_norm": 0.03817446529865265, - "learning_rate": 9.982301396879512e-05, - "loss": 0.0053, - "step": 804 - }, - { - "epoch": 0.621741648966982, - "grad_norm": 0.02437039092183113, - "learning_rate": 9.982073900566876e-05, - "loss": 0.0058, - "step": 805 - }, - { - "epoch": 0.6225139988414752, - "grad_norm": 0.015008049085736275, - "learning_rate": 9.98184495409291e-05, - "loss": 0.0055, - "step": 806 - }, - { - "epoch": 0.6232863487159683, - "grad_norm": 0.03220735862851143, - "learning_rate": 9.981614557524254e-05, - "loss": 0.0056, - "step": 807 - }, - { - "epoch": 0.6240586985904615, - "grad_norm": 0.03285316750407219, - "learning_rate": 9.981382710927974e-05, - "loss": 0.0054, - "step": 808 - }, - { - "epoch": 0.6248310484649546, - "grad_norm": 0.028768053278326988, - "learning_rate": 9.981149414371553e-05, - "loss": 0.0049, - "step": 809 - }, - { - "epoch": 0.6256033983394478, - "grad_norm": 0.01450389251112938, - "learning_rate": 9.980914667922898e-05, - "loss": 0.0053, - "step": 810 - }, - { - "epoch": 0.6263757482139409, - "grad_norm": 0.025595488026738167, - "learning_rate": 9.980678471650337e-05, - "loss": 0.0054, - "step": 811 - }, - { - "epoch": 0.6271480980884341, - "grad_norm": 0.02557152882218361, - "learning_rate": 9.980440825622622e-05, - "loss": 0.0048, - "step": 812 - }, - { - "epoch": 0.6279204479629272, - "grad_norm": 0.013808295130729675, - "learning_rate": 9.980201729908926e-05, - "loss": 0.0052, - "step": 813 - }, - { - "epoch": 0.6286927978374204, - "grad_norm": 0.015615541487932205, - "learning_rate": 9.979961184578847e-05, - "loss": 0.0059, - "step": 814 - }, - { - "epoch": 0.6294651477119135, - "grad_norm": 0.01616467721760273, - "learning_rate": 9.979719189702397e-05, - "loss": 0.0051, - "step": 815 - }, - { - "epoch": 0.6302374975864067, - "grad_norm": 0.01760113425552845, - "learning_rate": 9.979475745350018e-05, - "loss": 0.005, - "step": 816 - }, - { - "epoch": 0.6310098474608998, - "grad_norm": 0.014776479452848434, - "learning_rate": 9.979230851592567e-05, - "loss": 0.005, - "step": 817 - }, - { - "epoch": 0.631782197335393, - "grad_norm": 0.014517302624881268, - "learning_rate": 9.978984508501332e-05, - "loss": 0.0053, - "step": 818 - }, - { - "epoch": 0.6325545472098861, - "grad_norm": 0.013414164073765278, - "learning_rate": 9.978736716148013e-05, - "loss": 0.0051, - "step": 819 - }, - { - "epoch": 0.6333268970843792, - "grad_norm": 0.018927980214357376, - "learning_rate": 9.978487474604741e-05, - "loss": 0.0053, - "step": 820 - }, - { - "epoch": 0.6340992469588723, - "grad_norm": 0.01567482389509678, - "learning_rate": 9.978236783944059e-05, - "loss": 0.0055, - "step": 821 - }, - { - "epoch": 0.6348715968333655, - "grad_norm": 0.015856629237532616, - "learning_rate": 9.97798464423894e-05, - "loss": 0.0057, - "step": 822 - }, - { - "epoch": 0.6356439467078586, - "grad_norm": 0.019539091736078262, - "learning_rate": 9.977731055562775e-05, - "loss": 0.0058, - "step": 823 - }, - { - "epoch": 0.6364162965823518, - "grad_norm": 0.015069976449012756, - "learning_rate": 9.977476017989377e-05, - "loss": 0.0058, - "step": 824 - }, - { - "epoch": 0.6371886464568449, - "grad_norm": 0.01712663099169731, - "learning_rate": 9.977219531592984e-05, - "loss": 0.0058, - "step": 825 - }, - { - "epoch": 0.6379609963313381, - "grad_norm": 0.015889057889580727, - "learning_rate": 9.97696159644825e-05, - "loss": 0.0053, - "step": 826 - }, - { - "epoch": 0.6387333462058312, - "grad_norm": 0.024051977321505547, - "learning_rate": 9.976702212630255e-05, - "loss": 0.0055, - "step": 827 - }, - { - "epoch": 0.6395056960803244, - "grad_norm": 0.018556464463472366, - "learning_rate": 9.976441380214499e-05, - "loss": 0.0054, - "step": 828 - }, - { - "epoch": 0.6402780459548175, - "grad_norm": 0.013441438786685467, - "learning_rate": 9.976179099276903e-05, - "loss": 0.0053, - "step": 829 - }, - { - "epoch": 0.6410503958293107, - "grad_norm": 0.010628352873027325, - "learning_rate": 9.975915369893813e-05, - "loss": 0.0051, - "step": 830 - }, - { - "epoch": 0.6418227457038038, - "grad_norm": 0.019367951899766922, - "learning_rate": 9.975650192141992e-05, - "loss": 0.0051, - "step": 831 - }, - { - "epoch": 0.642595095578297, - "grad_norm": 0.013856122270226479, - "learning_rate": 9.975383566098628e-05, - "loss": 0.0049, - "step": 832 - }, - { - "epoch": 0.6433674454527901, - "grad_norm": 0.02479681186378002, - "learning_rate": 9.975115491841329e-05, - "loss": 0.0054, - "step": 833 - }, - { - "epoch": 0.6441397953272833, - "grad_norm": 0.01671089604496956, - "learning_rate": 9.974845969448127e-05, - "loss": 0.0055, - "step": 834 - }, - { - "epoch": 0.6449121452017764, - "grad_norm": 0.03032640554010868, - "learning_rate": 9.974574998997471e-05, - "loss": 0.0061, - "step": 835 - }, - { - "epoch": 0.6456844950762696, - "grad_norm": 0.036496784538030624, - "learning_rate": 9.974302580568232e-05, - "loss": 0.0054, - "step": 836 - }, - { - "epoch": 0.6464568449507627, - "grad_norm": 0.014118451625108719, - "learning_rate": 9.974028714239709e-05, - "loss": 0.0054, - "step": 837 - }, - { - "epoch": 0.6472291948252559, - "grad_norm": 0.046305056661367416, - "learning_rate": 9.973753400091616e-05, - "loss": 0.0058, - "step": 838 - }, - { - "epoch": 0.648001544699749, - "grad_norm": 0.020002854987978935, - "learning_rate": 9.97347663820409e-05, - "loss": 0.0059, - "step": 839 - }, - { - "epoch": 0.6487738945742422, - "grad_norm": 0.03489231690764427, - "learning_rate": 9.973198428657688e-05, - "loss": 0.0056, - "step": 840 - }, - { - "epoch": 0.6495462444487353, - "grad_norm": 0.03892209753394127, - "learning_rate": 9.972918771533394e-05, - "loss": 0.0059, - "step": 841 - }, - { - "epoch": 0.6503185943232285, - "grad_norm": 0.011789434589445591, - "learning_rate": 9.972637666912607e-05, - "loss": 0.0049, - "step": 842 - }, - { - "epoch": 0.6510909441977216, - "grad_norm": 0.032112717628479004, - "learning_rate": 9.97235511487715e-05, - "loss": 0.0054, - "step": 843 - }, - { - "epoch": 0.6518632940722147, - "grad_norm": 0.050589319318532944, - "learning_rate": 9.972071115509266e-05, - "loss": 0.0058, - "step": 844 - }, - { - "epoch": 0.6526356439467078, - "grad_norm": 0.012880904600024223, - "learning_rate": 9.971785668891623e-05, - "loss": 0.005, - "step": 845 - }, - { - "epoch": 0.653407993821201, - "grad_norm": 0.046898335218429565, - "learning_rate": 9.971498775107305e-05, - "loss": 0.0057, - "step": 846 - }, - { - "epoch": 0.6541803436956941, - "grad_norm": 0.03865799680352211, - "learning_rate": 9.971210434239822e-05, - "loss": 0.0052, - "step": 847 - }, - { - "epoch": 0.6549526935701873, - "grad_norm": 0.015834303572773933, - "learning_rate": 9.9709206463731e-05, - "loss": 0.005, - "step": 848 - }, - { - "epoch": 0.6557250434446804, - "grad_norm": 0.016615983098745346, - "learning_rate": 9.970629411591494e-05, - "loss": 0.005, - "step": 849 - }, - { - "epoch": 0.6564973933191736, - "grad_norm": 0.0366445891559124, - "learning_rate": 9.970336729979772e-05, - "loss": 0.0053, - "step": 850 - }, - { - "epoch": 0.6572697431936667, - "grad_norm": 0.02086579240858555, - "learning_rate": 9.970042601623127e-05, - "loss": 0.0052, - "step": 851 - }, - { - "epoch": 0.6580420930681599, - "grad_norm": 0.021980177611112595, - "learning_rate": 9.969747026607172e-05, - "loss": 0.0053, - "step": 852 - }, - { - "epoch": 0.658814442942653, - "grad_norm": 0.029278066009283066, - "learning_rate": 9.969450005017944e-05, - "loss": 0.0052, - "step": 853 - }, - { - "epoch": 0.6595867928171462, - "grad_norm": 0.020770156756043434, - "learning_rate": 9.969151536941897e-05, - "loss": 0.0054, - "step": 854 - }, - { - "epoch": 0.6603591426916393, - "grad_norm": 0.014317609369754791, - "learning_rate": 9.968851622465907e-05, - "loss": 0.0049, - "step": 855 - }, - { - "epoch": 0.6611314925661325, - "grad_norm": 0.036996036767959595, - "learning_rate": 9.968550261677274e-05, - "loss": 0.0059, - "step": 856 - }, - { - "epoch": 0.6619038424406256, - "grad_norm": 0.017970040440559387, - "learning_rate": 9.968247454663717e-05, - "loss": 0.0048, - "step": 857 - }, - { - "epoch": 0.6626761923151188, - "grad_norm": 0.014349796809256077, - "learning_rate": 9.967943201513374e-05, - "loss": 0.0052, - "step": 858 - }, - { - "epoch": 0.6634485421896119, - "grad_norm": 0.023744618520140648, - "learning_rate": 9.967637502314806e-05, - "loss": 0.0049, - "step": 859 - }, - { - "epoch": 0.6642208920641051, - "grad_norm": 0.03156176209449768, - "learning_rate": 9.967330357156996e-05, - "loss": 0.0054, - "step": 860 - }, - { - "epoch": 0.6649932419385982, - "grad_norm": 0.019208727404475212, - "learning_rate": 9.967021766129345e-05, - "loss": 0.0052, - "step": 861 - }, - { - "epoch": 0.6657655918130914, - "grad_norm": 0.037500545382499695, - "learning_rate": 9.966711729321679e-05, - "loss": 0.005, - "step": 862 - }, - { - "epoch": 0.6665379416875845, - "grad_norm": 0.040097303688526154, - "learning_rate": 9.966400246824238e-05, - "loss": 0.0058, - "step": 863 - }, - { - "epoch": 0.6673102915620777, - "grad_norm": 0.020316198468208313, - "learning_rate": 9.966087318727691e-05, - "loss": 0.0049, - "step": 864 - }, - { - "epoch": 0.6680826414365708, - "grad_norm": 0.04022366181015968, - "learning_rate": 9.96577294512312e-05, - "loss": 0.0057, - "step": 865 - }, - { - "epoch": 0.668854991311064, - "grad_norm": 0.017211418598890305, - "learning_rate": 9.965457126102036e-05, - "loss": 0.0049, - "step": 866 - }, - { - "epoch": 0.6696273411855571, - "grad_norm": 0.018000515177845955, - "learning_rate": 9.965139861756362e-05, - "loss": 0.0047, - "step": 867 - }, - { - "epoch": 0.6703996910600502, - "grad_norm": 0.02290462702512741, - "learning_rate": 9.964821152178451e-05, - "loss": 0.0051, - "step": 868 - }, - { - "epoch": 0.6711720409345433, - "grad_norm": 0.020106492564082146, - "learning_rate": 9.964500997461065e-05, - "loss": 0.005, - "step": 869 - }, - { - "epoch": 0.6719443908090365, - "grad_norm": 0.05181401968002319, - "learning_rate": 9.9641793976974e-05, - "loss": 0.0054, - "step": 870 - }, - { - "epoch": 0.6727167406835296, - "grad_norm": 0.03370905667543411, - "learning_rate": 9.963856352981062e-05, - "loss": 0.0057, - "step": 871 - }, - { - "epoch": 0.6734890905580228, - "grad_norm": 0.02146386355161667, - "learning_rate": 9.963531863406082e-05, - "loss": 0.0056, - "step": 872 - }, - { - "epoch": 0.6742614404325159, - "grad_norm": 0.023943284526467323, - "learning_rate": 9.963205929066912e-05, - "loss": 0.0057, - "step": 873 - }, - { - "epoch": 0.675033790307009, - "grad_norm": 0.023869581520557404, - "learning_rate": 9.962878550058422e-05, - "loss": 0.0053, - "step": 874 - }, - { - "epoch": 0.6758061401815022, - "grad_norm": 0.025636158883571625, - "learning_rate": 9.962549726475906e-05, - "loss": 0.0055, - "step": 875 - }, - { - "epoch": 0.6765784900559954, - "grad_norm": 0.016352355480194092, - "learning_rate": 9.962219458415077e-05, - "loss": 0.0051, - "step": 876 - }, - { - "epoch": 0.6773508399304885, - "grad_norm": 0.017515957355499268, - "learning_rate": 9.961887745972065e-05, - "loss": 0.005, - "step": 877 - }, - { - "epoch": 0.6781231898049817, - "grad_norm": 0.021827103570103645, - "learning_rate": 9.961554589243424e-05, - "loss": 0.0051, - "step": 878 - }, - { - "epoch": 0.6788955396794748, - "grad_norm": 0.02612106315791607, - "learning_rate": 9.961219988326132e-05, - "loss": 0.0056, - "step": 879 - }, - { - "epoch": 0.679667889553968, - "grad_norm": 0.017447635531425476, - "learning_rate": 9.960883943317579e-05, - "loss": 0.0052, - "step": 880 - }, - { - "epoch": 0.6804402394284611, - "grad_norm": 0.023314133286476135, - "learning_rate": 9.960546454315582e-05, - "loss": 0.0051, - "step": 881 - }, - { - "epoch": 0.6812125893029543, - "grad_norm": 0.02142924815416336, - "learning_rate": 9.960207521418374e-05, - "loss": 0.0053, - "step": 882 - }, - { - "epoch": 0.6819849391774474, - "grad_norm": 0.022927336394786835, - "learning_rate": 9.959867144724611e-05, - "loss": 0.006, - "step": 883 - }, - { - "epoch": 0.6827572890519406, - "grad_norm": 0.01784409210085869, - "learning_rate": 9.95952532433337e-05, - "loss": 0.005, - "step": 884 - }, - { - "epoch": 0.6835296389264337, - "grad_norm": 0.017598478123545647, - "learning_rate": 9.959182060344144e-05, - "loss": 0.0057, - "step": 885 - }, - { - "epoch": 0.6843019888009269, - "grad_norm": 0.019970614463090897, - "learning_rate": 9.958837352856852e-05, - "loss": 0.0057, - "step": 886 - }, - { - "epoch": 0.68507433867542, - "grad_norm": 0.016816386952996254, - "learning_rate": 9.958491201971825e-05, - "loss": 0.0052, - "step": 887 - }, - { - "epoch": 0.6858466885499132, - "grad_norm": 0.02354409731924534, - "learning_rate": 9.958143607789823e-05, - "loss": 0.0057, - "step": 888 - }, - { - "epoch": 0.6866190384244063, - "grad_norm": 0.01257417444139719, - "learning_rate": 9.957794570412022e-05, - "loss": 0.0049, - "step": 889 - }, - { - "epoch": 0.6873913882988995, - "grad_norm": 0.015461335889995098, - "learning_rate": 9.957444089940018e-05, - "loss": 0.0051, - "step": 890 - }, - { - "epoch": 0.6881637381733925, - "grad_norm": 0.016879508271813393, - "learning_rate": 9.957092166475828e-05, - "loss": 0.0057, - "step": 891 - }, - { - "epoch": 0.6889360880478856, - "grad_norm": 0.020782092586159706, - "learning_rate": 9.956738800121886e-05, - "loss": 0.0051, - "step": 892 - }, - { - "epoch": 0.6897084379223788, - "grad_norm": 0.013319279067218304, - "learning_rate": 9.95638399098105e-05, - "loss": 0.0049, - "step": 893 - }, - { - "epoch": 0.6904807877968719, - "grad_norm": 0.039818745106458664, - "learning_rate": 9.956027739156596e-05, - "loss": 0.0055, - "step": 894 - }, - { - "epoch": 0.6912531376713651, - "grad_norm": 0.030000966042280197, - "learning_rate": 9.955670044752223e-05, - "loss": 0.0054, - "step": 895 - }, - { - "epoch": 0.6920254875458582, - "grad_norm": 0.019784117117524147, - "learning_rate": 9.955310907872044e-05, - "loss": 0.006, - "step": 896 - }, - { - "epoch": 0.6927978374203514, - "grad_norm": 0.03611806407570839, - "learning_rate": 9.954950328620596e-05, - "loss": 0.0055, - "step": 897 - }, - { - "epoch": 0.6935701872948445, - "grad_norm": 0.03365226835012436, - "learning_rate": 9.954588307102834e-05, - "loss": 0.0054, - "step": 898 - }, - { - "epoch": 0.6943425371693377, - "grad_norm": 0.016417738050222397, - "learning_rate": 9.954224843424136e-05, - "loss": 0.0049, - "step": 899 - }, - { - "epoch": 0.6951148870438308, - "grad_norm": 0.028995562344789505, - "learning_rate": 9.953859937690295e-05, - "loss": 0.005, - "step": 900 - }, - { - "epoch": 0.695887236918324, - "grad_norm": 0.03609693422913551, - "learning_rate": 9.953493590007528e-05, - "loss": 0.0053, - "step": 901 - }, - { - "epoch": 0.6966595867928171, - "grad_norm": 0.01953314244747162, - "learning_rate": 9.953125800482469e-05, - "loss": 0.0052, - "step": 902 - }, - { - "epoch": 0.6974319366673103, - "grad_norm": 0.03401225060224533, - "learning_rate": 9.952756569222173e-05, - "loss": 0.0046, - "step": 903 - }, - { - "epoch": 0.6982042865418034, - "grad_norm": 0.027537843212485313, - "learning_rate": 9.952385896334114e-05, - "loss": 0.005, - "step": 904 - }, - { - "epoch": 0.6989766364162966, - "grad_norm": 0.02060253545641899, - "learning_rate": 9.952013781926186e-05, - "loss": 0.006, - "step": 905 - }, - { - "epoch": 0.6997489862907897, - "grad_norm": 0.017162665724754333, - "learning_rate": 9.951640226106704e-05, - "loss": 0.0053, - "step": 906 - }, - { - "epoch": 0.7005213361652829, - "grad_norm": 0.016031889244914055, - "learning_rate": 9.951265228984398e-05, - "loss": 0.005, - "step": 907 - }, - { - "epoch": 0.701293686039776, - "grad_norm": 0.01761656627058983, - "learning_rate": 9.950888790668424e-05, - "loss": 0.005, - "step": 908 - }, - { - "epoch": 0.7020660359142692, - "grad_norm": 0.015069528482854366, - "learning_rate": 9.950510911268352e-05, - "loss": 0.0049, - "step": 909 - }, - { - "epoch": 0.7028383857887623, - "grad_norm": 0.019050147384405136, - "learning_rate": 9.950131590894173e-05, - "loss": 0.0056, - "step": 910 - }, - { - "epoch": 0.7036107356632555, - "grad_norm": 0.03884793817996979, - "learning_rate": 9.949750829656299e-05, - "loss": 0.0046, - "step": 911 - }, - { - "epoch": 0.7043830855377486, - "grad_norm": 0.01480827759951353, - "learning_rate": 9.949368627665561e-05, - "loss": 0.0053, - "step": 912 - }, - { - "epoch": 0.7051554354122418, - "grad_norm": 0.015345815569162369, - "learning_rate": 9.948984985033208e-05, - "loss": 0.0047, - "step": 913 - }, - { - "epoch": 0.7059277852867349, - "grad_norm": 0.014812378212809563, - "learning_rate": 9.94859990187091e-05, - "loss": 0.0051, - "step": 914 - }, - { - "epoch": 0.706700135161228, - "grad_norm": 0.013499701395630836, - "learning_rate": 9.948213378290754e-05, - "loss": 0.0052, - "step": 915 - }, - { - "epoch": 0.7074724850357211, - "grad_norm": 0.010988417081534863, - "learning_rate": 9.947825414405248e-05, - "loss": 0.0044, - "step": 916 - }, - { - "epoch": 0.7082448349102143, - "grad_norm": 0.016281627118587494, - "learning_rate": 9.94743601032732e-05, - "loss": 0.0048, - "step": 917 - }, - { - "epoch": 0.7090171847847074, - "grad_norm": 0.017756443470716476, - "learning_rate": 9.947045166170315e-05, - "loss": 0.005, - "step": 918 - }, - { - "epoch": 0.7097895346592006, - "grad_norm": 0.01533447951078415, - "learning_rate": 9.946652882047999e-05, - "loss": 0.0056, - "step": 919 - }, - { - "epoch": 0.7105618845336937, - "grad_norm": 0.01683143526315689, - "learning_rate": 9.946259158074556e-05, - "loss": 0.0054, - "step": 920 - }, - { - "epoch": 0.7113342344081869, - "grad_norm": 0.01568206399679184, - "learning_rate": 9.945863994364588e-05, - "loss": 0.0055, - "step": 921 - }, - { - "epoch": 0.71210658428268, - "grad_norm": 0.015111725777387619, - "learning_rate": 9.945467391033121e-05, - "loss": 0.0055, - "step": 922 - }, - { - "epoch": 0.7128789341571732, - "grad_norm": 0.02060648240149021, - "learning_rate": 9.945069348195595e-05, - "loss": 0.0052, - "step": 923 - }, - { - "epoch": 0.7136512840316663, - "grad_norm": 0.014100771397352219, - "learning_rate": 9.94466986596787e-05, - "loss": 0.0055, - "step": 924 - }, - { - "epoch": 0.7144236339061595, - "grad_norm": 0.012977558188140392, - "learning_rate": 9.944268944466226e-05, - "loss": 0.0043, - "step": 925 - }, - { - "epoch": 0.7151959837806526, - "grad_norm": 0.027847349643707275, - "learning_rate": 9.943866583807362e-05, - "loss": 0.0052, - "step": 926 - }, - { - "epoch": 0.7159683336551458, - "grad_norm": 0.021106727421283722, - "learning_rate": 9.943462784108396e-05, - "loss": 0.0058, - "step": 927 - }, - { - "epoch": 0.7167406835296389, - "grad_norm": 0.016337329521775246, - "learning_rate": 9.943057545486863e-05, - "loss": 0.0052, - "step": 928 - }, - { - "epoch": 0.7175130334041321, - "grad_norm": 0.01878957264125347, - "learning_rate": 9.942650868060716e-05, - "loss": 0.0051, - "step": 929 - }, - { - "epoch": 0.7182853832786252, - "grad_norm": 0.030198317021131516, - "learning_rate": 9.942242751948335e-05, - "loss": 0.0051, - "step": 930 - }, - { - "epoch": 0.7190577331531184, - "grad_norm": 0.027476893737912178, - "learning_rate": 9.941833197268509e-05, - "loss": 0.0059, - "step": 931 - }, - { - "epoch": 0.7198300830276115, - "grad_norm": 0.025040531530976295, - "learning_rate": 9.941422204140449e-05, - "loss": 0.0055, - "step": 932 - }, - { - "epoch": 0.7206024329021047, - "grad_norm": 0.024198127910494804, - "learning_rate": 9.941009772683786e-05, - "loss": 0.005, - "step": 933 - }, - { - "epoch": 0.7213747827765978, - "grad_norm": 0.015231741592288017, - "learning_rate": 9.94059590301857e-05, - "loss": 0.0047, - "step": 934 - }, - { - "epoch": 0.722147132651091, - "grad_norm": 0.014660035260021687, - "learning_rate": 9.940180595265266e-05, - "loss": 0.0053, - "step": 935 - }, - { - "epoch": 0.7229194825255841, - "grad_norm": 0.025344964116811752, - "learning_rate": 9.939763849544762e-05, - "loss": 0.0057, - "step": 936 - }, - { - "epoch": 0.7236918324000773, - "grad_norm": 0.011590663343667984, - "learning_rate": 9.939345665978361e-05, - "loss": 0.0049, - "step": 937 - }, - { - "epoch": 0.7244641822745704, - "grad_norm": 0.023815158754587173, - "learning_rate": 9.938926044687788e-05, - "loss": 0.0054, - "step": 938 - }, - { - "epoch": 0.7252365321490635, - "grad_norm": 0.023053664714097977, - "learning_rate": 9.938504985795184e-05, - "loss": 0.0044, - "step": 939 - }, - { - "epoch": 0.7260088820235566, - "grad_norm": 0.018284421414136887, - "learning_rate": 9.93808248942311e-05, - "loss": 0.0055, - "step": 940 - }, - { - "epoch": 0.7267812318980498, - "grad_norm": 0.031033355742692947, - "learning_rate": 9.937658555694541e-05, - "loss": 0.0052, - "step": 941 - }, - { - "epoch": 0.7275535817725429, - "grad_norm": 0.020948749035596848, - "learning_rate": 9.937233184732877e-05, - "loss": 0.0058, - "step": 942 - }, - { - "epoch": 0.7283259316470361, - "grad_norm": 0.016316091641783714, - "learning_rate": 9.936806376661932e-05, - "loss": 0.0056, - "step": 943 - }, - { - "epoch": 0.7290982815215292, - "grad_norm": 0.02747221291065216, - "learning_rate": 9.936378131605941e-05, - "loss": 0.0052, - "step": 944 - }, - { - "epoch": 0.7298706313960224, - "grad_norm": 0.03303531929850578, - "learning_rate": 9.935948449689553e-05, - "loss": 0.0049, - "step": 945 - }, - { - "epoch": 0.7306429812705155, - "grad_norm": 0.027890313416719437, - "learning_rate": 9.935517331037842e-05, - "loss": 0.0048, - "step": 946 - }, - { - "epoch": 0.7314153311450087, - "grad_norm": 0.0356268435716629, - "learning_rate": 9.935084775776292e-05, - "loss": 0.0053, - "step": 947 - }, - { - "epoch": 0.7321876810195018, - "grad_norm": 0.02592495270073414, - "learning_rate": 9.934650784030812e-05, - "loss": 0.0051, - "step": 948 - }, - { - "epoch": 0.732960030893995, - "grad_norm": 0.01498821098357439, - "learning_rate": 9.934215355927724e-05, - "loss": 0.0048, - "step": 949 - }, - { - "epoch": 0.7337323807684881, - "grad_norm": 0.020218007266521454, - "learning_rate": 9.933778491593776e-05, - "loss": 0.005, - "step": 950 - }, - { - "epoch": 0.7345047306429813, - "grad_norm": 0.040983304381370544, - "learning_rate": 9.933340191156123e-05, - "loss": 0.0053, - "step": 951 - }, - { - "epoch": 0.7352770805174744, - "grad_norm": 0.02097119390964508, - "learning_rate": 9.932900454742347e-05, - "loss": 0.0053, - "step": 952 - }, - { - "epoch": 0.7360494303919676, - "grad_norm": 0.05633474886417389, - "learning_rate": 9.932459282480442e-05, - "loss": 0.0053, - "step": 953 - }, - { - "epoch": 0.7368217802664607, - "grad_norm": 0.03562900051474571, - "learning_rate": 9.932016674498822e-05, - "loss": 0.0058, - "step": 954 - }, - { - "epoch": 0.7375941301409539, - "grad_norm": 0.023267168551683426, - "learning_rate": 9.931572630926324e-05, - "loss": 0.0046, - "step": 955 - }, - { - "epoch": 0.738366480015447, - "grad_norm": 0.030092468485236168, - "learning_rate": 9.931127151892197e-05, - "loss": 0.0054, - "step": 956 - }, - { - "epoch": 0.7391388298899402, - "grad_norm": 0.02708488330245018, - "learning_rate": 9.930680237526107e-05, - "loss": 0.0056, - "step": 957 - }, - { - "epoch": 0.7399111797644333, - "grad_norm": 0.011751326732337475, - "learning_rate": 9.93023188795814e-05, - "loss": 0.0047, - "step": 958 - }, - { - "epoch": 0.7406835296389265, - "grad_norm": 0.028150459751486778, - "learning_rate": 9.9297821033188e-05, - "loss": 0.0054, - "step": 959 - }, - { - "epoch": 0.7414558795134196, - "grad_norm": 0.017759088426828384, - "learning_rate": 9.929330883739011e-05, - "loss": 0.0054, - "step": 960 - }, - { - "epoch": 0.7422282293879128, - "grad_norm": 0.015162650495767593, - "learning_rate": 9.92887822935011e-05, - "loss": 0.005, - "step": 961 - }, - { - "epoch": 0.7430005792624059, - "grad_norm": 0.016975706443190575, - "learning_rate": 9.928424140283854e-05, - "loss": 0.0047, - "step": 962 - }, - { - "epoch": 0.743772929136899, - "grad_norm": 0.017186246812343597, - "learning_rate": 9.927968616672416e-05, - "loss": 0.0052, - "step": 963 - }, - { - "epoch": 0.7445452790113921, - "grad_norm": 0.010945098474621773, - "learning_rate": 9.927511658648389e-05, - "loss": 0.0044, - "step": 964 - }, - { - "epoch": 0.7453176288858853, - "grad_norm": 0.024373026564717293, - "learning_rate": 9.927053266344784e-05, - "loss": 0.0056, - "step": 965 - }, - { - "epoch": 0.7460899787603784, - "grad_norm": 0.03226197138428688, - "learning_rate": 9.926593439895027e-05, - "loss": 0.0053, - "step": 966 - }, - { - "epoch": 0.7468623286348716, - "grad_norm": 0.024539737030863762, - "learning_rate": 9.926132179432962e-05, - "loss": 0.0052, - "step": 967 - }, - { - "epoch": 0.7476346785093647, - "grad_norm": 0.04279707744717598, - "learning_rate": 9.92566948509285e-05, - "loss": 0.0055, - "step": 968 - }, - { - "epoch": 0.7484070283838579, - "grad_norm": 0.024123720824718475, - "learning_rate": 9.92520535700937e-05, - "loss": 0.0048, - "step": 969 - }, - { - "epoch": 0.749179378258351, - "grad_norm": 0.03971217945218086, - "learning_rate": 9.924739795317621e-05, - "loss": 0.0051, - "step": 970 - }, - { - "epoch": 0.7499517281328442, - "grad_norm": 0.04534037783741951, - "learning_rate": 9.924272800153117e-05, - "loss": 0.0058, - "step": 971 - }, - { - "epoch": 0.7507240780073373, - "grad_norm": 0.014068394899368286, - "learning_rate": 9.923804371651783e-05, - "loss": 0.0055, - "step": 972 - }, - { - "epoch": 0.7514964278818305, - "grad_norm": 0.020580369979143143, - "learning_rate": 9.923334509949973e-05, - "loss": 0.0048, - "step": 973 - }, - { - "epoch": 0.7522687777563236, - "grad_norm": 0.03742160275578499, - "learning_rate": 9.922863215184452e-05, - "loss": 0.005, - "step": 974 - }, - { - "epoch": 0.7530411276308168, - "grad_norm": 0.043946683406829834, - "learning_rate": 9.9223904874924e-05, - "loss": 0.0055, - "step": 975 - }, - { - "epoch": 0.7538134775053099, - "grad_norm": 0.04362013563513756, - "learning_rate": 9.921916327011418e-05, - "loss": 0.0049, - "step": 976 - }, - { - "epoch": 0.7545858273798031, - "grad_norm": 0.03981248289346695, - "learning_rate": 9.921440733879524e-05, - "loss": 0.0052, - "step": 977 - }, - { - "epoch": 0.7553581772542962, - "grad_norm": 0.023527968674898148, - "learning_rate": 9.920963708235148e-05, - "loss": 0.0051, - "step": 978 - }, - { - "epoch": 0.7561305271287894, - "grad_norm": 0.01712462492287159, - "learning_rate": 9.920485250217144e-05, - "loss": 0.0048, - "step": 979 - }, - { - "epoch": 0.7569028770032825, - "grad_norm": 0.020297260954976082, - "learning_rate": 9.920005359964778e-05, - "loss": 0.0055, - "step": 980 - }, - { - "epoch": 0.7576752268777757, - "grad_norm": 0.048822131007909775, - "learning_rate": 9.919524037617735e-05, - "loss": 0.0057, - "step": 981 - }, - { - "epoch": 0.7584475767522688, - "grad_norm": 0.015059332363307476, - "learning_rate": 9.919041283316116e-05, - "loss": 0.0051, - "step": 982 - }, - { - "epoch": 0.759219926626762, - "grad_norm": 0.021079909056425095, - "learning_rate": 9.918557097200441e-05, - "loss": 0.006, - "step": 983 - }, - { - "epoch": 0.7599922765012551, - "grad_norm": 0.04429293051362038, - "learning_rate": 9.918071479411642e-05, - "loss": 0.0057, - "step": 984 - }, - { - "epoch": 0.7607646263757483, - "grad_norm": 0.05578068643808365, - "learning_rate": 9.91758443009107e-05, - "loss": 0.0056, - "step": 985 - }, - { - "epoch": 0.7615369762502413, - "grad_norm": 0.039931271225214005, - "learning_rate": 9.917095949380497e-05, - "loss": 0.005, - "step": 986 - }, - { - "epoch": 0.7623093261247345, - "grad_norm": 0.07003474980592728, - "learning_rate": 9.916606037422105e-05, - "loss": 0.0056, - "step": 987 - }, - { - "epoch": 0.7630816759992276, - "grad_norm": 0.03523440286517143, - "learning_rate": 9.916114694358498e-05, - "loss": 0.0051, - "step": 988 - }, - { - "epoch": 0.7638540258737208, - "grad_norm": 0.028762444853782654, - "learning_rate": 9.915621920332691e-05, - "loss": 0.0053, - "step": 989 - }, - { - "epoch": 0.7646263757482139, - "grad_norm": 0.0738329142332077, - "learning_rate": 9.915127715488121e-05, - "loss": 0.0066, - "step": 990 - }, - { - "epoch": 0.765398725622707, - "grad_norm": 0.03267653286457062, - "learning_rate": 9.914632079968639e-05, - "loss": 0.0052, - "step": 991 - }, - { - "epoch": 0.7661710754972002, - "grad_norm": 0.02162010595202446, - "learning_rate": 9.914135013918511e-05, - "loss": 0.0051, - "step": 992 - }, - { - "epoch": 0.7669434253716934, - "grad_norm": 0.05322584509849548, - "learning_rate": 9.913636517482423e-05, - "loss": 0.0063, - "step": 993 - }, - { - "epoch": 0.7677157752461865, - "grad_norm": 0.04363081231713295, - "learning_rate": 9.913136590805472e-05, - "loss": 0.0053, - "step": 994 - }, - { - "epoch": 0.7684881251206797, - "grad_norm": 0.013890145346522331, - "learning_rate": 9.912635234033178e-05, - "loss": 0.0051, - "step": 995 - }, - { - "epoch": 0.7692604749951728, - "grad_norm": 0.044813696295022964, - "learning_rate": 9.912132447311472e-05, - "loss": 0.0061, - "step": 996 - }, - { - "epoch": 0.770032824869666, - "grad_norm": 0.06689228862524033, - "learning_rate": 9.911628230786703e-05, - "loss": 0.0056, - "step": 997 - }, - { - "epoch": 0.7708051747441591, - "grad_norm": 0.019164199009537697, - "learning_rate": 9.911122584605638e-05, - "loss": 0.0051, - "step": 998 - }, - { - "epoch": 0.7715775246186523, - "grad_norm": 0.039524856954813004, - "learning_rate": 9.910615508915457e-05, - "loss": 0.005, - "step": 999 - }, - { - "epoch": 0.7723498744931454, - "grad_norm": 0.07112497836351395, - "learning_rate": 9.910107003863755e-05, - "loss": 0.0063, - "step": 1000 - }, - { - "epoch": 0.7731222243676386, - "grad_norm": 0.018947944045066833, - "learning_rate": 9.909597069598552e-05, - "loss": 0.0049, - "step": 1001 - }, - { - "epoch": 0.7738945742421317, - "grad_norm": 0.035670891404151917, - "learning_rate": 9.909085706268272e-05, - "loss": 0.0053, - "step": 1002 - }, - { - "epoch": 0.7746669241166249, - "grad_norm": 0.062375158071517944, - "learning_rate": 9.908572914021762e-05, - "loss": 0.0055, - "step": 1003 - }, - { - "epoch": 0.775439273991118, - "grad_norm": 0.01835649274289608, - "learning_rate": 9.908058693008284e-05, - "loss": 0.0053, - "step": 1004 - }, - { - "epoch": 0.7762116238656112, - "grad_norm": 0.0214702058583498, - "learning_rate": 9.907543043377514e-05, - "loss": 0.0055, - "step": 1005 - }, - { - "epoch": 0.7769839737401043, - "grad_norm": 0.04028837010264397, - "learning_rate": 9.907025965279548e-05, - "loss": 0.0051, - "step": 1006 - }, - { - "epoch": 0.7777563236145975, - "grad_norm": 0.03422695770859718, - "learning_rate": 9.906507458864891e-05, - "loss": 0.0056, - "step": 1007 - }, - { - "epoch": 0.7785286734890906, - "grad_norm": 0.013215802609920502, - "learning_rate": 9.905987524284471e-05, - "loss": 0.0051, - "step": 1008 - }, - { - "epoch": 0.7793010233635838, - "grad_norm": 0.030311673879623413, - "learning_rate": 9.905466161689627e-05, - "loss": 0.0055, - "step": 1009 - }, - { - "epoch": 0.7800733732380768, - "grad_norm": 0.041229937225580215, - "learning_rate": 9.904943371232116e-05, - "loss": 0.0054, - "step": 1010 - }, - { - "epoch": 0.78084572311257, - "grad_norm": 0.0180194228887558, - "learning_rate": 9.904419153064107e-05, - "loss": 0.0051, - "step": 1011 - }, - { - "epoch": 0.7816180729870631, - "grad_norm": 0.027759356424212456, - "learning_rate": 9.90389350733819e-05, - "loss": 0.0052, - "step": 1012 - }, - { - "epoch": 0.7823904228615562, - "grad_norm": 0.02440088428556919, - "learning_rate": 9.903366434207367e-05, - "loss": 0.0054, - "step": 1013 - }, - { - "epoch": 0.7831627727360494, - "grad_norm": 0.017409170046448708, - "learning_rate": 9.902837933825055e-05, - "loss": 0.0052, - "step": 1014 - }, - { - "epoch": 0.7839351226105425, - "grad_norm": 0.018347326666116714, - "learning_rate": 9.902308006345091e-05, - "loss": 0.0059, - "step": 1015 - }, - { - "epoch": 0.7847074724850357, - "grad_norm": 0.030546607449650764, - "learning_rate": 9.90177665192172e-05, - "loss": 0.0052, - "step": 1016 - }, - { - "epoch": 0.7854798223595288, - "grad_norm": 0.029116885736584663, - "learning_rate": 9.901243870709609e-05, - "loss": 0.0049, - "step": 1017 - }, - { - "epoch": 0.786252172234022, - "grad_norm": 0.022032609209418297, - "learning_rate": 9.900709662863837e-05, - "loss": 0.0052, - "step": 1018 - }, - { - "epoch": 0.7870245221085151, - "grad_norm": 0.04383152723312378, - "learning_rate": 9.900174028539899e-05, - "loss": 0.0056, - "step": 1019 - }, - { - "epoch": 0.7877968719830083, - "grad_norm": 0.01003220397979021, - "learning_rate": 9.899636967893706e-05, - "loss": 0.0053, - "step": 1020 - }, - { - "epoch": 0.7885692218575014, - "grad_norm": 0.01930188573896885, - "learning_rate": 9.899098481081582e-05, - "loss": 0.0047, - "step": 1021 - }, - { - "epoch": 0.7893415717319946, - "grad_norm": 0.016947351396083832, - "learning_rate": 9.898558568260268e-05, - "loss": 0.0045, - "step": 1022 - }, - { - "epoch": 0.7901139216064877, - "grad_norm": 0.019604964181780815, - "learning_rate": 9.89801722958692e-05, - "loss": 0.0054, - "step": 1023 - }, - { - "epoch": 0.7908862714809809, - "grad_norm": 0.012611506506800652, - "learning_rate": 9.897474465219108e-05, - "loss": 0.0053, - "step": 1024 - }, - { - "epoch": 0.791658621355474, - "grad_norm": 0.022115463390946388, - "learning_rate": 9.896930275314819e-05, - "loss": 0.0055, - "step": 1025 - }, - { - "epoch": 0.7924309712299672, - "grad_norm": 0.010714237578213215, - "learning_rate": 9.896384660032452e-05, - "loss": 0.0041, - "step": 1026 - }, - { - "epoch": 0.7932033211044603, - "grad_norm": 0.021694796159863472, - "learning_rate": 9.895837619530822e-05, - "loss": 0.0048, - "step": 1027 - }, - { - "epoch": 0.7939756709789535, - "grad_norm": 0.026115277782082558, - "learning_rate": 9.895289153969161e-05, - "loss": 0.0049, - "step": 1028 - }, - { - "epoch": 0.7947480208534466, - "grad_norm": 0.0178262647241354, - "learning_rate": 9.894739263507113e-05, - "loss": 0.0056, - "step": 1029 - }, - { - "epoch": 0.7955203707279398, - "grad_norm": 0.0118311932310462, - "learning_rate": 9.894187948304737e-05, - "loss": 0.0055, - "step": 1030 - }, - { - "epoch": 0.796292720602433, - "grad_norm": 0.012592138722538948, - "learning_rate": 9.893635208522509e-05, - "loss": 0.0046, - "step": 1031 - }, - { - "epoch": 0.7970650704769261, - "grad_norm": 0.01537346187978983, - "learning_rate": 9.89308104432132e-05, - "loss": 0.005, - "step": 1032 - }, - { - "epoch": 0.7978374203514192, - "grad_norm": 0.013634511269629002, - "learning_rate": 9.892525455862469e-05, - "loss": 0.0052, - "step": 1033 - }, - { - "epoch": 0.7986097702259123, - "grad_norm": 0.017275096848607063, - "learning_rate": 9.891968443307678e-05, - "loss": 0.0052, - "step": 1034 - }, - { - "epoch": 0.7993821201004054, - "grad_norm": 0.014000173658132553, - "learning_rate": 9.891410006819079e-05, - "loss": 0.0051, - "step": 1035 - }, - { - "epoch": 0.8001544699748986, - "grad_norm": 0.01269017904996872, - "learning_rate": 9.890850146559219e-05, - "loss": 0.0041, - "step": 1036 - }, - { - "epoch": 0.8009268198493917, - "grad_norm": 0.013115121982991695, - "learning_rate": 9.890288862691059e-05, - "loss": 0.0052, - "step": 1037 - }, - { - "epoch": 0.8016991697238849, - "grad_norm": 0.0130803557112813, - "learning_rate": 9.889726155377979e-05, - "loss": 0.0052, - "step": 1038 - }, - { - "epoch": 0.802471519598378, - "grad_norm": 0.020898472517728806, - "learning_rate": 9.889162024783764e-05, - "loss": 0.0045, - "step": 1039 - }, - { - "epoch": 0.8032438694728712, - "grad_norm": 0.01687263883650303, - "learning_rate": 9.888596471072622e-05, - "loss": 0.005, - "step": 1040 - }, - { - "epoch": 0.8040162193473643, - "grad_norm": 0.016980089247226715, - "learning_rate": 9.888029494409172e-05, - "loss": 0.0051, - "step": 1041 - }, - { - "epoch": 0.8047885692218575, - "grad_norm": 0.029596013948321342, - "learning_rate": 9.887461094958445e-05, - "loss": 0.0051, - "step": 1042 - }, - { - "epoch": 0.8055609190963506, - "grad_norm": 0.010340928100049496, - "learning_rate": 9.886891272885893e-05, - "loss": 0.0047, - "step": 1043 - }, - { - "epoch": 0.8063332689708438, - "grad_norm": 0.01951519399881363, - "learning_rate": 9.886320028357372e-05, - "loss": 0.0049, - "step": 1044 - }, - { - "epoch": 0.8071056188453369, - "grad_norm": 0.013048755936324596, - "learning_rate": 9.885747361539162e-05, - "loss": 0.0055, - "step": 1045 - }, - { - "epoch": 0.8078779687198301, - "grad_norm": 0.015198012813925743, - "learning_rate": 9.885173272597949e-05, - "loss": 0.0053, - "step": 1046 - }, - { - "epoch": 0.8086503185943232, - "grad_norm": 0.015721959993243217, - "learning_rate": 9.884597761700838e-05, - "loss": 0.005, - "step": 1047 - }, - { - "epoch": 0.8094226684688164, - "grad_norm": 0.01544812973588705, - "learning_rate": 9.884020829015347e-05, - "loss": 0.005, - "step": 1048 - }, - { - "epoch": 0.8101950183433095, - "grad_norm": 0.013755733147263527, - "learning_rate": 9.883442474709406e-05, - "loss": 0.0043, - "step": 1049 - }, - { - "epoch": 0.8109673682178027, - "grad_norm": 0.024196283891797066, - "learning_rate": 9.882862698951361e-05, - "loss": 0.0047, - "step": 1050 - }, - { - "epoch": 0.8117397180922958, - "grad_norm": 0.01150796189904213, - "learning_rate": 9.882281501909968e-05, - "loss": 0.0048, - "step": 1051 - }, - { - "epoch": 0.812512067966789, - "grad_norm": 0.01957443729043007, - "learning_rate": 9.881698883754402e-05, - "loss": 0.005, - "step": 1052 - }, - { - "epoch": 0.8132844178412821, - "grad_norm": 0.029252223670482635, - "learning_rate": 9.881114844654249e-05, - "loss": 0.0052, - "step": 1053 - }, - { - "epoch": 0.8140567677157753, - "grad_norm": 0.014108984731137753, - "learning_rate": 9.880529384779508e-05, - "loss": 0.0052, - "step": 1054 - }, - { - "epoch": 0.8148291175902684, - "grad_norm": 0.025499513372778893, - "learning_rate": 9.879942504300593e-05, - "loss": 0.0052, - "step": 1055 - }, - { - "epoch": 0.8156014674647616, - "grad_norm": 0.018272900953888893, - "learning_rate": 9.87935420338833e-05, - "loss": 0.0045, - "step": 1056 - }, - { - "epoch": 0.8163738173392547, - "grad_norm": 0.014985686168074608, - "learning_rate": 9.878764482213959e-05, - "loss": 0.0049, - "step": 1057 - }, - { - "epoch": 0.8171461672137478, - "grad_norm": 0.020466111600399017, - "learning_rate": 9.878173340949136e-05, - "loss": 0.0049, - "step": 1058 - }, - { - "epoch": 0.8179185170882409, - "grad_norm": 0.013434127904474735, - "learning_rate": 9.877580779765922e-05, - "loss": 0.0044, - "step": 1059 - }, - { - "epoch": 0.8186908669627341, - "grad_norm": 0.014021494425833225, - "learning_rate": 9.876986798836803e-05, - "loss": 0.005, - "step": 1060 - }, - { - "epoch": 0.8194632168372272, - "grad_norm": 0.02708282321691513, - "learning_rate": 9.87639139833467e-05, - "loss": 0.0052, - "step": 1061 - }, - { - "epoch": 0.8202355667117204, - "grad_norm": 0.016625193879008293, - "learning_rate": 9.875794578432831e-05, - "loss": 0.0051, - "step": 1062 - }, - { - "epoch": 0.8210079165862135, - "grad_norm": 0.010641127824783325, - "learning_rate": 9.875196339305004e-05, - "loss": 0.0049, - "step": 1063 - }, - { - "epoch": 0.8217802664607067, - "grad_norm": 0.025936421006917953, - "learning_rate": 9.874596681125324e-05, - "loss": 0.0053, - "step": 1064 - }, - { - "epoch": 0.8225526163351998, - "grad_norm": 0.017523979768157005, - "learning_rate": 9.873995604068335e-05, - "loss": 0.0056, - "step": 1065 - }, - { - "epoch": 0.823324966209693, - "grad_norm": 0.018067866563796997, - "learning_rate": 9.873393108308999e-05, - "loss": 0.0048, - "step": 1066 - }, - { - "epoch": 0.8240973160841861, - "grad_norm": 0.026911109685897827, - "learning_rate": 9.872789194022684e-05, - "loss": 0.0047, - "step": 1067 - }, - { - "epoch": 0.8248696659586793, - "grad_norm": 0.02376358024775982, - "learning_rate": 9.872183861385177e-05, - "loss": 0.0049, - "step": 1068 - }, - { - "epoch": 0.8256420158331724, - "grad_norm": 0.023630045354366302, - "learning_rate": 9.871577110572679e-05, - "loss": 0.0054, - "step": 1069 - }, - { - "epoch": 0.8264143657076656, - "grad_norm": 0.03594619780778885, - "learning_rate": 9.870968941761793e-05, - "loss": 0.0054, - "step": 1070 - }, - { - "epoch": 0.8271867155821587, - "grad_norm": 0.013311091810464859, - "learning_rate": 9.870359355129548e-05, - "loss": 0.0047, - "step": 1071 - }, - { - "epoch": 0.8279590654566519, - "grad_norm": 0.018972985446453094, - "learning_rate": 9.869748350853378e-05, - "loss": 0.0053, - "step": 1072 - }, - { - "epoch": 0.828731415331145, - "grad_norm": 0.014069105498492718, - "learning_rate": 9.869135929111133e-05, - "loss": 0.0048, - "step": 1073 - }, - { - "epoch": 0.8295037652056382, - "grad_norm": 0.013836747035384178, - "learning_rate": 9.868522090081071e-05, - "loss": 0.0046, - "step": 1074 - }, - { - "epoch": 0.8302761150801313, - "grad_norm": 0.014035874046385288, - "learning_rate": 9.86790683394187e-05, - "loss": 0.0061, - "step": 1075 - }, - { - "epoch": 0.8310484649546245, - "grad_norm": 0.015665380284190178, - "learning_rate": 9.867290160872613e-05, - "loss": 0.005, - "step": 1076 - }, - { - "epoch": 0.8318208148291176, - "grad_norm": 0.010867977514863014, - "learning_rate": 9.866672071052798e-05, - "loss": 0.0045, - "step": 1077 - }, - { - "epoch": 0.8325931647036108, - "grad_norm": 0.010544631630182266, - "learning_rate": 9.86605256466234e-05, - "loss": 0.0045, - "step": 1078 - }, - { - "epoch": 0.8333655145781039, - "grad_norm": 0.018490837886929512, - "learning_rate": 9.865431641881558e-05, - "loss": 0.005, - "step": 1079 - }, - { - "epoch": 0.8341378644525971, - "grad_norm": 0.011083516292273998, - "learning_rate": 9.864809302891192e-05, - "loss": 0.0051, - "step": 1080 - }, - { - "epoch": 0.8349102143270901, - "grad_norm": 0.014008168131113052, - "learning_rate": 9.864185547872385e-05, - "loss": 0.0052, - "step": 1081 - }, - { - "epoch": 0.8356825642015833, - "grad_norm": 0.01850767433643341, - "learning_rate": 9.8635603770067e-05, - "loss": 0.0051, - "step": 1082 - }, - { - "epoch": 0.8364549140760764, - "grad_norm": 0.011455691419541836, - "learning_rate": 9.862933790476108e-05, - "loss": 0.0052, - "step": 1083 - }, - { - "epoch": 0.8372272639505696, - "grad_norm": 0.014069200493395329, - "learning_rate": 9.862305788462996e-05, - "loss": 0.005, - "step": 1084 - }, - { - "epoch": 0.8379996138250627, - "grad_norm": 0.02683812938630581, - "learning_rate": 9.861676371150154e-05, - "loss": 0.0047, - "step": 1085 - }, - { - "epoch": 0.8387719636995559, - "grad_norm": 0.016436012461781502, - "learning_rate": 9.861045538720798e-05, - "loss": 0.0055, - "step": 1086 - }, - { - "epoch": 0.839544313574049, - "grad_norm": 0.01961844600737095, - "learning_rate": 9.860413291358542e-05, - "loss": 0.0049, - "step": 1087 - }, - { - "epoch": 0.8403166634485422, - "grad_norm": 0.013608099892735481, - "learning_rate": 9.859779629247421e-05, - "loss": 0.0043, - "step": 1088 - }, - { - "epoch": 0.8410890133230353, - "grad_norm": 0.012145274318754673, - "learning_rate": 9.859144552571877e-05, - "loss": 0.0048, - "step": 1089 - }, - { - "epoch": 0.8418613631975285, - "grad_norm": 0.014922577887773514, - "learning_rate": 9.858508061516766e-05, - "loss": 0.0052, - "step": 1090 - }, - { - "epoch": 0.8426337130720216, - "grad_norm": 0.015595716424286366, - "learning_rate": 9.857870156267357e-05, - "loss": 0.0053, - "step": 1091 - }, - { - "epoch": 0.8434060629465148, - "grad_norm": 0.01833524741232395, - "learning_rate": 9.857230837009329e-05, - "loss": 0.0055, - "step": 1092 - }, - { - "epoch": 0.8441784128210079, - "grad_norm": 0.01634645089507103, - "learning_rate": 9.856590103928767e-05, - "loss": 0.0061, - "step": 1093 - }, - { - "epoch": 0.8449507626955011, - "grad_norm": 0.016310440376400948, - "learning_rate": 9.855947957212178e-05, - "loss": 0.0049, - "step": 1094 - }, - { - "epoch": 0.8457231125699942, - "grad_norm": 0.016290003433823586, - "learning_rate": 9.855304397046474e-05, - "loss": 0.005, - "step": 1095 - }, - { - "epoch": 0.8464954624444874, - "grad_norm": 0.013762637041509151, - "learning_rate": 9.85465942361898e-05, - "loss": 0.0049, - "step": 1096 - }, - { - "epoch": 0.8472678123189805, - "grad_norm": 0.017816467210650444, - "learning_rate": 9.854013037117431e-05, - "loss": 0.0052, - "step": 1097 - }, - { - "epoch": 0.8480401621934737, - "grad_norm": 0.019573671743273735, - "learning_rate": 9.853365237729976e-05, - "loss": 0.0046, - "step": 1098 - }, - { - "epoch": 0.8488125120679668, - "grad_norm": 0.013996942900121212, - "learning_rate": 9.852716025645175e-05, - "loss": 0.0049, - "step": 1099 - }, - { - "epoch": 0.84958486194246, - "grad_norm": 0.039246659725904465, - "learning_rate": 9.852065401051993e-05, - "loss": 0.0055, - "step": 1100 - }, - { - "epoch": 0.8503572118169531, - "grad_norm": 0.01461487915366888, - "learning_rate": 9.851413364139817e-05, - "loss": 0.0057, - "step": 1101 - }, - { - "epoch": 0.8511295616914463, - "grad_norm": 0.02236500196158886, - "learning_rate": 9.850759915098434e-05, - "loss": 0.0053, - "step": 1102 - }, - { - "epoch": 0.8519019115659394, - "grad_norm": 0.020510345697402954, - "learning_rate": 9.850105054118052e-05, - "loss": 0.0047, - "step": 1103 - }, - { - "epoch": 0.8526742614404326, - "grad_norm": 0.018675046041607857, - "learning_rate": 9.84944878138928e-05, - "loss": 0.0052, - "step": 1104 - }, - { - "epoch": 0.8534466113149256, - "grad_norm": 0.017054179683327675, - "learning_rate": 9.84879109710315e-05, - "loss": 0.0046, - "step": 1105 - }, - { - "epoch": 0.8542189611894188, - "grad_norm": 0.026352353394031525, - "learning_rate": 9.848132001451091e-05, - "loss": 0.0052, - "step": 1106 - }, - { - "epoch": 0.8549913110639119, - "grad_norm": 0.02260519377887249, - "learning_rate": 9.847471494624953e-05, - "loss": 0.005, - "step": 1107 - }, - { - "epoch": 0.8557636609384051, - "grad_norm": 0.03695909306406975, - "learning_rate": 9.846809576816996e-05, - "loss": 0.0051, - "step": 1108 - }, - { - "epoch": 0.8565360108128982, - "grad_norm": 0.030567850917577744, - "learning_rate": 9.846146248219882e-05, - "loss": 0.0049, - "step": 1109 - }, - { - "epoch": 0.8573083606873914, - "grad_norm": 0.010855203494429588, - "learning_rate": 9.845481509026697e-05, - "loss": 0.0051, - "step": 1110 - }, - { - "epoch": 0.8580807105618845, - "grad_norm": 0.017507346346974373, - "learning_rate": 9.844815359430926e-05, - "loss": 0.0054, - "step": 1111 - }, - { - "epoch": 0.8588530604363777, - "grad_norm": 0.03854590281844139, - "learning_rate": 9.84414779962647e-05, - "loss": 0.0051, - "step": 1112 - }, - { - "epoch": 0.8596254103108708, - "grad_norm": 0.014054159633815289, - "learning_rate": 9.843478829807639e-05, - "loss": 0.0048, - "step": 1113 - }, - { - "epoch": 0.860397760185364, - "grad_norm": 0.02449275553226471, - "learning_rate": 9.842808450169156e-05, - "loss": 0.0048, - "step": 1114 - }, - { - "epoch": 0.8611701100598571, - "grad_norm": 0.03688691556453705, - "learning_rate": 9.84213666090615e-05, - "loss": 0.0055, - "step": 1115 - }, - { - "epoch": 0.8619424599343503, - "grad_norm": 0.011802013963460922, - "learning_rate": 9.841463462214165e-05, - "loss": 0.0045, - "step": 1116 - }, - { - "epoch": 0.8627148098088434, - "grad_norm": 0.023805933073163033, - "learning_rate": 9.84078885428915e-05, - "loss": 0.005, - "step": 1117 - }, - { - "epoch": 0.8634871596833366, - "grad_norm": 0.01294466108083725, - "learning_rate": 9.840112837327469e-05, - "loss": 0.0045, - "step": 1118 - }, - { - "epoch": 0.8642595095578297, - "grad_norm": 0.017138268798589706, - "learning_rate": 9.839435411525892e-05, - "loss": 0.0051, - "step": 1119 - }, - { - "epoch": 0.8650318594323229, - "grad_norm": 0.00986329186707735, - "learning_rate": 9.838756577081605e-05, - "loss": 0.0043, - "step": 1120 - }, - { - "epoch": 0.865804209306816, - "grad_norm": 0.014220822602510452, - "learning_rate": 9.838076334192198e-05, - "loss": 0.005, - "step": 1121 - }, - { - "epoch": 0.8665765591813092, - "grad_norm": 0.017122695222496986, - "learning_rate": 9.837394683055675e-05, - "loss": 0.0044, - "step": 1122 - }, - { - "epoch": 0.8673489090558023, - "grad_norm": 0.01046262588351965, - "learning_rate": 9.836711623870445e-05, - "loss": 0.0043, - "step": 1123 - }, - { - "epoch": 0.8681212589302955, - "grad_norm": 0.014623397961258888, - "learning_rate": 9.836027156835332e-05, - "loss": 0.0049, - "step": 1124 - }, - { - "epoch": 0.8688936088047886, - "grad_norm": 0.01762223243713379, - "learning_rate": 9.835341282149568e-05, - "loss": 0.0041, - "step": 1125 - }, - { - "epoch": 0.8696659586792818, - "grad_norm": 0.02136484906077385, - "learning_rate": 9.834654000012796e-05, - "loss": 0.0053, - "step": 1126 - }, - { - "epoch": 0.8704383085537749, - "grad_norm": 0.03846302628517151, - "learning_rate": 9.833965310625063e-05, - "loss": 0.005, - "step": 1127 - }, - { - "epoch": 0.8712106584282681, - "grad_norm": 0.023591913282871246, - "learning_rate": 9.833275214186833e-05, - "loss": 0.0049, - "step": 1128 - }, - { - "epoch": 0.8719830083027611, - "grad_norm": 0.011188088916242123, - "learning_rate": 9.832583710898974e-05, - "loss": 0.0047, - "step": 1129 - }, - { - "epoch": 0.8727553581772542, - "grad_norm": 0.031315676867961884, - "learning_rate": 9.831890800962771e-05, - "loss": 0.005, - "step": 1130 - }, - { - "epoch": 0.8735277080517474, - "grad_norm": 0.02841937728226185, - "learning_rate": 9.831196484579908e-05, - "loss": 0.0052, - "step": 1131 - }, - { - "epoch": 0.8743000579262405, - "grad_norm": 0.016970042139291763, - "learning_rate": 9.830500761952484e-05, - "loss": 0.0043, - "step": 1132 - }, - { - "epoch": 0.8750724078007337, - "grad_norm": 0.017414122819900513, - "learning_rate": 9.82980363328301e-05, - "loss": 0.005, - "step": 1133 - }, - { - "epoch": 0.8758447576752268, - "grad_norm": 0.03681933134794235, - "learning_rate": 9.829105098774403e-05, - "loss": 0.0054, - "step": 1134 - }, - { - "epoch": 0.87661710754972, - "grad_norm": 0.03045729361474514, - "learning_rate": 9.828405158629987e-05, - "loss": 0.0046, - "step": 1135 - }, - { - "epoch": 0.8773894574242131, - "grad_norm": 0.02646372653543949, - "learning_rate": 9.827703813053499e-05, - "loss": 0.0051, - "step": 1136 - }, - { - "epoch": 0.8781618072987063, - "grad_norm": 0.0201679989695549, - "learning_rate": 9.827001062249086e-05, - "loss": 0.0049, - "step": 1137 - }, - { - "epoch": 0.8789341571731994, - "grad_norm": 0.022596973925828934, - "learning_rate": 9.826296906421298e-05, - "loss": 0.0051, - "step": 1138 - }, - { - "epoch": 0.8797065070476926, - "grad_norm": 0.01095764059573412, - "learning_rate": 9.8255913457751e-05, - "loss": 0.0045, - "step": 1139 - }, - { - "epoch": 0.8804788569221857, - "grad_norm": 0.02390890382230282, - "learning_rate": 9.824884380515862e-05, - "loss": 0.0051, - "step": 1140 - }, - { - "epoch": 0.8812512067966789, - "grad_norm": 0.024027172476053238, - "learning_rate": 9.824176010849367e-05, - "loss": 0.0043, - "step": 1141 - }, - { - "epoch": 0.882023556671172, - "grad_norm": 0.011508166790008545, - "learning_rate": 9.823466236981802e-05, - "loss": 0.0044, - "step": 1142 - }, - { - "epoch": 0.8827959065456652, - "grad_norm": 0.01273355819284916, - "learning_rate": 9.822755059119765e-05, - "loss": 0.0044, - "step": 1143 - }, - { - "epoch": 0.8835682564201583, - "grad_norm": 0.018790990114212036, - "learning_rate": 9.822042477470265e-05, - "loss": 0.0048, - "step": 1144 - }, - { - "epoch": 0.8843406062946515, - "grad_norm": 0.023220032453536987, - "learning_rate": 9.821328492240715e-05, - "loss": 0.005, - "step": 1145 - }, - { - "epoch": 0.8851129561691446, - "grad_norm": 0.029522253200411797, - "learning_rate": 9.820613103638941e-05, - "loss": 0.0045, - "step": 1146 - }, - { - "epoch": 0.8858853060436378, - "grad_norm": 0.014386980794370174, - "learning_rate": 9.819896311873174e-05, - "loss": 0.0045, - "step": 1147 - }, - { - "epoch": 0.886657655918131, - "grad_norm": 0.010787330567836761, - "learning_rate": 9.819178117152053e-05, - "loss": 0.0045, - "step": 1148 - }, - { - "epoch": 0.8874300057926241, - "grad_norm": 0.013695201836526394, - "learning_rate": 9.81845851968463e-05, - "loss": 0.0052, - "step": 1149 - }, - { - "epoch": 0.8882023556671172, - "grad_norm": 0.013468950055539608, - "learning_rate": 9.817737519680362e-05, - "loss": 0.004, - "step": 1150 - }, - { - "epoch": 0.8889747055416104, - "grad_norm": 0.013891457580029964, - "learning_rate": 9.817015117349113e-05, - "loss": 0.0051, - "step": 1151 - }, - { - "epoch": 0.8897470554161035, - "grad_norm": 0.011809339746832848, - "learning_rate": 9.816291312901159e-05, - "loss": 0.0043, - "step": 1152 - }, - { - "epoch": 0.8905194052905966, - "grad_norm": 0.021739143878221512, - "learning_rate": 9.81556610654718e-05, - "loss": 0.0047, - "step": 1153 - }, - { - "epoch": 0.8912917551650897, - "grad_norm": 0.013957880437374115, - "learning_rate": 9.814839498498268e-05, - "loss": 0.0047, - "step": 1154 - }, - { - "epoch": 0.8920641050395829, - "grad_norm": 0.011841529048979282, - "learning_rate": 9.814111488965918e-05, - "loss": 0.0047, - "step": 1155 - }, - { - "epoch": 0.892836454914076, - "grad_norm": 0.02467193454504013, - "learning_rate": 9.81338207816204e-05, - "loss": 0.0048, - "step": 1156 - }, - { - "epoch": 0.8936088047885692, - "grad_norm": 0.020380204543471336, - "learning_rate": 9.812651266298944e-05, - "loss": 0.0046, - "step": 1157 - }, - { - "epoch": 0.8943811546630623, - "grad_norm": 0.026750663295388222, - "learning_rate": 9.811919053589355e-05, - "loss": 0.0049, - "step": 1158 - }, - { - "epoch": 0.8951535045375555, - "grad_norm": 0.016972428187727928, - "learning_rate": 9.8111854402464e-05, - "loss": 0.0042, - "step": 1159 - }, - { - "epoch": 0.8959258544120486, - "grad_norm": 0.014942123554646969, - "learning_rate": 9.810450426483618e-05, - "loss": 0.0046, - "step": 1160 - }, - { - "epoch": 0.8966982042865418, - "grad_norm": 0.013228869996964931, - "learning_rate": 9.809714012514953e-05, - "loss": 0.0047, - "step": 1161 - }, - { - "epoch": 0.8974705541610349, - "grad_norm": 0.015319743193686008, - "learning_rate": 9.808976198554755e-05, - "loss": 0.0049, - "step": 1162 - }, - { - "epoch": 0.8982429040355281, - "grad_norm": 0.01305676344782114, - "learning_rate": 9.80823698481779e-05, - "loss": 0.0048, - "step": 1163 - }, - { - "epoch": 0.8990152539100212, - "grad_norm": 0.023310331627726555, - "learning_rate": 9.807496371519219e-05, - "loss": 0.0051, - "step": 1164 - }, - { - "epoch": 0.8997876037845144, - "grad_norm": 0.020194826647639275, - "learning_rate": 9.806754358874617e-05, - "loss": 0.0051, - "step": 1165 - }, - { - "epoch": 0.9005599536590075, - "grad_norm": 0.01612170971930027, - "learning_rate": 9.806010947099971e-05, - "loss": 0.004, - "step": 1166 - }, - { - "epoch": 0.9013323035335007, - "grad_norm": 0.02737812139093876, - "learning_rate": 9.805266136411663e-05, - "loss": 0.0049, - "step": 1167 - }, - { - "epoch": 0.9021046534079938, - "grad_norm": 0.011050846427679062, - "learning_rate": 9.804519927026496e-05, - "loss": 0.0045, - "step": 1168 - }, - { - "epoch": 0.902877003282487, - "grad_norm": 0.013702969066798687, - "learning_rate": 9.803772319161672e-05, - "loss": 0.0047, - "step": 1169 - }, - { - "epoch": 0.9036493531569801, - "grad_norm": 0.018601300194859505, - "learning_rate": 9.803023313034797e-05, - "loss": 0.0047, - "step": 1170 - }, - { - "epoch": 0.9044217030314733, - "grad_norm": 0.034467071294784546, - "learning_rate": 9.802272908863897e-05, - "loss": 0.0054, - "step": 1171 - }, - { - "epoch": 0.9051940529059664, - "grad_norm": 0.025881033390760422, - "learning_rate": 9.801521106867388e-05, - "loss": 0.0047, - "step": 1172 - }, - { - "epoch": 0.9059664027804596, - "grad_norm": 0.05163715407252312, - "learning_rate": 9.800767907264105e-05, - "loss": 0.0049, - "step": 1173 - }, - { - "epoch": 0.9067387526549527, - "grad_norm": 0.03238176926970482, - "learning_rate": 9.800013310273288e-05, - "loss": 0.0049, - "step": 1174 - }, - { - "epoch": 0.9075111025294459, - "grad_norm": 0.022428739815950394, - "learning_rate": 9.799257316114579e-05, - "loss": 0.0046, - "step": 1175 - }, - { - "epoch": 0.9082834524039389, - "grad_norm": 0.04021529480814934, - "learning_rate": 9.79849992500803e-05, - "loss": 0.0046, - "step": 1176 - }, - { - "epoch": 0.9090558022784321, - "grad_norm": 0.04667231813073158, - "learning_rate": 9.797741137174102e-05, - "loss": 0.0053, - "step": 1177 - }, - { - "epoch": 0.9098281521529252, - "grad_norm": 0.01686747372150421, - "learning_rate": 9.796980952833656e-05, - "loss": 0.0052, - "step": 1178 - }, - { - "epoch": 0.9106005020274184, - "grad_norm": 0.028423011302947998, - "learning_rate": 9.796219372207966e-05, - "loss": 0.0055, - "step": 1179 - }, - { - "epoch": 0.9113728519019115, - "grad_norm": 0.04666873812675476, - "learning_rate": 9.795456395518709e-05, - "loss": 0.0052, - "step": 1180 - }, - { - "epoch": 0.9121452017764047, - "grad_norm": 0.019545091316103935, - "learning_rate": 9.79469202298797e-05, - "loss": 0.0051, - "step": 1181 - }, - { - "epoch": 0.9129175516508978, - "grad_norm": 0.021862614899873734, - "learning_rate": 9.793926254838237e-05, - "loss": 0.0051, - "step": 1182 - }, - { - "epoch": 0.913689901525391, - "grad_norm": 0.0275779590010643, - "learning_rate": 9.793159091292408e-05, - "loss": 0.0054, - "step": 1183 - }, - { - "epoch": 0.9144622513998841, - "grad_norm": 0.04146139696240425, - "learning_rate": 9.792390532573786e-05, - "loss": 0.0051, - "step": 1184 - }, - { - "epoch": 0.9152346012743773, - "grad_norm": 0.010904265567660332, - "learning_rate": 9.791620578906079e-05, - "loss": 0.0043, - "step": 1185 - }, - { - "epoch": 0.9160069511488704, - "grad_norm": 0.03226253017783165, - "learning_rate": 9.790849230513402e-05, - "loss": 0.0047, - "step": 1186 - }, - { - "epoch": 0.9167793010233636, - "grad_norm": 0.03302263468503952, - "learning_rate": 9.790076487620276e-05, - "loss": 0.0048, - "step": 1187 - }, - { - "epoch": 0.9175516508978567, - "grad_norm": 0.016255497932434082, - "learning_rate": 9.78930235045163e-05, - "loss": 0.005, - "step": 1188 - }, - { - "epoch": 0.9183240007723499, - "grad_norm": 0.03711313381791115, - "learning_rate": 9.788526819232795e-05, - "loss": 0.0055, - "step": 1189 - }, - { - "epoch": 0.919096350646843, - "grad_norm": 0.0345834456384182, - "learning_rate": 9.787749894189507e-05, - "loss": 0.0059, - "step": 1190 - }, - { - "epoch": 0.9198687005213362, - "grad_norm": 0.01827995665371418, - "learning_rate": 9.786971575547914e-05, - "loss": 0.0047, - "step": 1191 - }, - { - "epoch": 0.9206410503958293, - "grad_norm": 0.02552584744989872, - "learning_rate": 9.786191863534563e-05, - "loss": 0.0044, - "step": 1192 - }, - { - "epoch": 0.9214134002703225, - "grad_norm": 0.05019800364971161, - "learning_rate": 9.78541075837641e-05, - "loss": 0.0055, - "step": 1193 - }, - { - "epoch": 0.9221857501448156, - "grad_norm": 0.012669118121266365, - "learning_rate": 9.784628260300817e-05, - "loss": 0.0045, - "step": 1194 - }, - { - "epoch": 0.9229581000193088, - "grad_norm": 0.022647053003311157, - "learning_rate": 9.783844369535549e-05, - "loss": 0.0044, - "step": 1195 - }, - { - "epoch": 0.9237304498938019, - "grad_norm": 0.02889241650700569, - "learning_rate": 9.783059086308779e-05, - "loss": 0.0049, - "step": 1196 - }, - { - "epoch": 0.9245027997682951, - "grad_norm": 0.03231604024767876, - "learning_rate": 9.782272410849083e-05, - "loss": 0.005, - "step": 1197 - }, - { - "epoch": 0.9252751496427882, - "grad_norm": 0.015890056267380714, - "learning_rate": 9.781484343385442e-05, - "loss": 0.0046, - "step": 1198 - }, - { - "epoch": 0.9260474995172814, - "grad_norm": 0.031044993549585342, - "learning_rate": 9.780694884147245e-05, - "loss": 0.0049, - "step": 1199 - }, - { - "epoch": 0.9268198493917744, - "grad_norm": 0.024014852941036224, - "learning_rate": 9.779904033364284e-05, - "loss": 0.0048, - "step": 1200 - }, - { - "epoch": 0.9275921992662676, - "grad_norm": 0.010703184641897678, - "learning_rate": 9.779111791266757e-05, - "loss": 0.0041, - "step": 1201 - }, - { - "epoch": 0.9283645491407607, - "grad_norm": 0.01483464427292347, - "learning_rate": 9.778318158085268e-05, - "loss": 0.0048, - "step": 1202 - }, - { - "epoch": 0.9291368990152539, - "grad_norm": 0.01868997886776924, - "learning_rate": 9.777523134050821e-05, - "loss": 0.0049, - "step": 1203 - }, - { - "epoch": 0.929909248889747, - "grad_norm": 0.0175771564245224, - "learning_rate": 9.776726719394831e-05, - "loss": 0.0043, - "step": 1204 - }, - { - "epoch": 0.9306815987642402, - "grad_norm": 0.010368075221776962, - "learning_rate": 9.775928914349113e-05, - "loss": 0.0042, - "step": 1205 - }, - { - "epoch": 0.9314539486387333, - "grad_norm": 0.017229681834578514, - "learning_rate": 9.775129719145891e-05, - "loss": 0.0047, - "step": 1206 - }, - { - "epoch": 0.9322262985132265, - "grad_norm": 0.02685217559337616, - "learning_rate": 9.774329134017788e-05, - "loss": 0.0049, - "step": 1207 - }, - { - "epoch": 0.9329986483877196, - "grad_norm": 0.01164172776043415, - "learning_rate": 9.77352715919784e-05, - "loss": 0.0049, - "step": 1208 - }, - { - "epoch": 0.9337709982622128, - "grad_norm": 0.027054371312260628, - "learning_rate": 9.772723794919478e-05, - "loss": 0.0045, - "step": 1209 - }, - { - "epoch": 0.9345433481367059, - "grad_norm": 0.022101471200585365, - "learning_rate": 9.771919041416544e-05, - "loss": 0.0054, - "step": 1210 - }, - { - "epoch": 0.9353156980111991, - "grad_norm": 0.015406518243253231, - "learning_rate": 9.771112898923283e-05, - "loss": 0.0045, - "step": 1211 - }, - { - "epoch": 0.9360880478856922, - "grad_norm": 0.028950640931725502, - "learning_rate": 9.770305367674341e-05, - "loss": 0.0047, - "step": 1212 - }, - { - "epoch": 0.9368603977601854, - "grad_norm": 0.01438950840383768, - "learning_rate": 9.769496447904774e-05, - "loss": 0.0044, - "step": 1213 - }, - { - "epoch": 0.9376327476346785, - "grad_norm": 0.011271512135863304, - "learning_rate": 9.768686139850037e-05, - "loss": 0.0045, - "step": 1214 - }, - { - "epoch": 0.9384050975091717, - "grad_norm": 0.02822425588965416, - "learning_rate": 9.76787444374599e-05, - "loss": 0.0049, - "step": 1215 - }, - { - "epoch": 0.9391774473836648, - "grad_norm": 0.014436332508921623, - "learning_rate": 9.767061359828899e-05, - "loss": 0.0051, - "step": 1216 - }, - { - "epoch": 0.939949797258158, - "grad_norm": 0.013423058204352856, - "learning_rate": 9.766246888335437e-05, - "loss": 0.0048, - "step": 1217 - }, - { - "epoch": 0.9407221471326511, - "grad_norm": 0.01095044706016779, - "learning_rate": 9.765431029502672e-05, - "loss": 0.0046, - "step": 1218 - }, - { - "epoch": 0.9414944970071443, - "grad_norm": 0.01047494262456894, - "learning_rate": 9.764613783568082e-05, - "loss": 0.0044, - "step": 1219 - }, - { - "epoch": 0.9422668468816374, - "grad_norm": 0.020056355744600296, - "learning_rate": 9.763795150769548e-05, - "loss": 0.0045, - "step": 1220 - }, - { - "epoch": 0.9430391967561306, - "grad_norm": 0.014146436005830765, - "learning_rate": 9.762975131345356e-05, - "loss": 0.005, - "step": 1221 - }, - { - "epoch": 0.9438115466306237, - "grad_norm": 0.02325545623898506, - "learning_rate": 9.76215372553419e-05, - "loss": 0.0056, - "step": 1222 - }, - { - "epoch": 0.9445838965051169, - "grad_norm": 0.023392075672745705, - "learning_rate": 9.761330933575145e-05, - "loss": 0.0056, - "step": 1223 - }, - { - "epoch": 0.9453562463796099, - "grad_norm": 0.012304428964853287, - "learning_rate": 9.760506755707713e-05, - "loss": 0.0051, - "step": 1224 - }, - { - "epoch": 0.9461285962541031, - "grad_norm": 0.023236092180013657, - "learning_rate": 9.759681192171795e-05, - "loss": 0.0045, - "step": 1225 - }, - { - "epoch": 0.9469009461285962, - "grad_norm": 0.020777378231287003, - "learning_rate": 9.758854243207689e-05, - "loss": 0.0046, - "step": 1226 - }, - { - "epoch": 0.9476732960030894, - "grad_norm": 0.013657379895448685, - "learning_rate": 9.758025909056103e-05, - "loss": 0.0047, - "step": 1227 - }, - { - "epoch": 0.9484456458775825, - "grad_norm": 0.024491876363754272, - "learning_rate": 9.757196189958145e-05, - "loss": 0.0048, - "step": 1228 - }, - { - "epoch": 0.9492179957520757, - "grad_norm": 0.015795817598700523, - "learning_rate": 9.756365086155325e-05, - "loss": 0.0045, - "step": 1229 - }, - { - "epoch": 0.9499903456265688, - "grad_norm": 0.011541806161403656, - "learning_rate": 9.755532597889558e-05, - "loss": 0.0049, - "step": 1230 - }, - { - "epoch": 0.950762695501062, - "grad_norm": 0.02976318635046482, - "learning_rate": 9.75469872540316e-05, - "loss": 0.0048, - "step": 1231 - }, - { - "epoch": 0.9515350453755551, - "grad_norm": 0.01473459042608738, - "learning_rate": 9.75386346893885e-05, - "loss": 0.0044, - "step": 1232 - }, - { - "epoch": 0.9523073952500483, - "grad_norm": 0.014375674538314342, - "learning_rate": 9.753026828739756e-05, - "loss": 0.0047, - "step": 1233 - }, - { - "epoch": 0.9530797451245414, - "grad_norm": 0.03567088767886162, - "learning_rate": 9.7521888050494e-05, - "loss": 0.0051, - "step": 1234 - }, - { - "epoch": 0.9538520949990346, - "grad_norm": 0.018942657858133316, - "learning_rate": 9.75134939811171e-05, - "loss": 0.0042, - "step": 1235 - }, - { - "epoch": 0.9546244448735277, - "grad_norm": 0.024158213287591934, - "learning_rate": 9.750508608171018e-05, - "loss": 0.0048, - "step": 1236 - }, - { - "epoch": 0.9553967947480209, - "grad_norm": 0.04132556542754173, - "learning_rate": 9.749666435472059e-05, - "loss": 0.0049, - "step": 1237 - }, - { - "epoch": 0.956169144622514, - "grad_norm": 0.013448309153318405, - "learning_rate": 9.748822880259967e-05, - "loss": 0.005, - "step": 1238 - }, - { - "epoch": 0.9569414944970072, - "grad_norm": 0.034693315625190735, - "learning_rate": 9.747977942780281e-05, - "loss": 0.0047, - "step": 1239 - }, - { - "epoch": 0.9577138443715003, - "grad_norm": 0.019551943987607956, - "learning_rate": 9.747131623278943e-05, - "loss": 0.0053, - "step": 1240 - }, - { - "epoch": 0.9584861942459935, - "grad_norm": 0.01473858579993248, - "learning_rate": 9.746283922002295e-05, - "loss": 0.0052, - "step": 1241 - }, - { - "epoch": 0.9592585441204866, - "grad_norm": 0.035735420882701874, - "learning_rate": 9.745434839197082e-05, - "loss": 0.004, - "step": 1242 - }, - { - "epoch": 0.9600308939949798, - "grad_norm": 0.03530476614832878, - "learning_rate": 9.744584375110453e-05, - "loss": 0.0049, - "step": 1243 - }, - { - "epoch": 0.9608032438694729, - "grad_norm": 0.013710936531424522, - "learning_rate": 9.743732529989958e-05, - "loss": 0.0047, - "step": 1244 - }, - { - "epoch": 0.9615755937439661, - "grad_norm": 0.037209782749414444, - "learning_rate": 9.742879304083546e-05, - "loss": 0.0052, - "step": 1245 - }, - { - "epoch": 0.9623479436184592, - "grad_norm": 0.027198487892746925, - "learning_rate": 9.742024697639573e-05, - "loss": 0.0049, - "step": 1246 - }, - { - "epoch": 0.9631202934929524, - "grad_norm": 0.009933719411492348, - "learning_rate": 9.741168710906792e-05, - "loss": 0.0046, - "step": 1247 - }, - { - "epoch": 0.9638926433674454, - "grad_norm": 0.013061133213341236, - "learning_rate": 9.74031134413436e-05, - "loss": 0.0048, - "step": 1248 - }, - { - "epoch": 0.9646649932419386, - "grad_norm": 0.04397881403565407, - "learning_rate": 9.739452597571839e-05, - "loss": 0.005, - "step": 1249 - }, - { - "epoch": 0.9654373431164317, - "grad_norm": 0.01409953273832798, - "learning_rate": 9.738592471469188e-05, - "loss": 0.0049, - "step": 1250 - }, - { - "epoch": 0.9662096929909249, - "grad_norm": 0.027452891692519188, - "learning_rate": 9.737730966076766e-05, - "loss": 0.0043, - "step": 1251 - }, - { - "epoch": 0.966982042865418, - "grad_norm": 0.040638476610183716, - "learning_rate": 9.736868081645339e-05, - "loss": 0.0053, - "step": 1252 - }, - { - "epoch": 0.9677543927399112, - "grad_norm": 0.0253109373152256, - "learning_rate": 9.736003818426073e-05, - "loss": 0.0051, - "step": 1253 - }, - { - "epoch": 0.9685267426144043, - "grad_norm": 0.012483520433306694, - "learning_rate": 9.735138176670531e-05, - "loss": 0.0044, - "step": 1254 - }, - { - "epoch": 0.9692990924888975, - "grad_norm": 0.0266621932387352, - "learning_rate": 9.734271156630683e-05, - "loss": 0.005, - "step": 1255 - }, - { - "epoch": 0.9700714423633906, - "grad_norm": 0.037746869027614594, - "learning_rate": 9.733402758558896e-05, - "loss": 0.0053, - "step": 1256 - }, - { - "epoch": 0.9708437922378838, - "grad_norm": 0.012163268402218819, - "learning_rate": 9.73253298270794e-05, - "loss": 0.0046, - "step": 1257 - }, - { - "epoch": 0.9716161421123769, - "grad_norm": 0.017105646431446075, - "learning_rate": 9.731661829330986e-05, - "loss": 0.0049, - "step": 1258 - }, - { - "epoch": 0.97238849198687, - "grad_norm": 0.05115858465433121, - "learning_rate": 9.730789298681607e-05, - "loss": 0.0052, - "step": 1259 - }, - { - "epoch": 0.9731608418613632, - "grad_norm": 0.014376375824213028, - "learning_rate": 9.72991539101377e-05, - "loss": 0.0052, - "step": 1260 - }, - { - "epoch": 0.9739331917358564, - "grad_norm": 0.022228136658668518, - "learning_rate": 9.729040106581858e-05, - "loss": 0.0051, - "step": 1261 - }, - { - "epoch": 0.9747055416103495, - "grad_norm": 0.04539450630545616, - "learning_rate": 9.728163445640636e-05, - "loss": 0.0056, - "step": 1262 - }, - { - "epoch": 0.9754778914848427, - "grad_norm": 0.015013706870377064, - "learning_rate": 9.727285408445285e-05, - "loss": 0.005, - "step": 1263 - }, - { - "epoch": 0.9762502413593358, - "grad_norm": 0.02358063869178295, - "learning_rate": 9.726405995251377e-05, - "loss": 0.0049, - "step": 1264 - }, - { - "epoch": 0.977022591233829, - "grad_norm": 0.02099069207906723, - "learning_rate": 9.72552520631489e-05, - "loss": 0.0049, - "step": 1265 - }, - { - "epoch": 0.9777949411083221, - "grad_norm": 0.031068965792655945, - "learning_rate": 9.724643041892199e-05, - "loss": 0.0048, - "step": 1266 - }, - { - "epoch": 0.9785672909828153, - "grad_norm": 0.019800299778580666, - "learning_rate": 9.72375950224008e-05, - "loss": 0.0043, - "step": 1267 - }, - { - "epoch": 0.9793396408573084, - "grad_norm": 0.015887726098299026, - "learning_rate": 9.722874587615711e-05, - "loss": 0.0042, - "step": 1268 - }, - { - "epoch": 0.9801119907318016, - "grad_norm": 0.020139768719673157, - "learning_rate": 9.72198829827667e-05, - "loss": 0.0052, - "step": 1269 - }, - { - "epoch": 0.9808843406062947, - "grad_norm": 0.012581721879541874, - "learning_rate": 9.721100634480934e-05, - "loss": 0.0047, - "step": 1270 - }, - { - "epoch": 0.9816566904807877, - "grad_norm": 0.014556423760950565, - "learning_rate": 9.72021159648688e-05, - "loss": 0.0043, - "step": 1271 - }, - { - "epoch": 0.9824290403552809, - "grad_norm": 0.01289236731827259, - "learning_rate": 9.719321184553286e-05, - "loss": 0.0048, - "step": 1272 - }, - { - "epoch": 0.983201390229774, - "grad_norm": 0.013001179322600365, - "learning_rate": 9.718429398939329e-05, - "loss": 0.0049, - "step": 1273 - }, - { - "epoch": 0.9839737401042672, - "grad_norm": 0.010636583901941776, - "learning_rate": 9.717536239904586e-05, - "loss": 0.0049, - "step": 1274 - }, - { - "epoch": 0.9847460899787603, - "grad_norm": 0.014941484667360783, - "learning_rate": 9.716641707709035e-05, - "loss": 0.0047, - "step": 1275 - }, - { - "epoch": 0.9855184398532535, - "grad_norm": 0.01230227667838335, - "learning_rate": 9.715745802613052e-05, - "loss": 0.0046, - "step": 1276 - }, - { - "epoch": 0.9862907897277466, - "grad_norm": 0.019549479708075523, - "learning_rate": 9.714848524877413e-05, - "loss": 0.0058, - "step": 1277 - }, - { - "epoch": 0.9870631396022398, - "grad_norm": 0.015148761682212353, - "learning_rate": 9.713949874763296e-05, - "loss": 0.0046, - "step": 1278 - }, - { - "epoch": 0.9878354894767329, - "grad_norm": 0.012779118493199348, - "learning_rate": 9.713049852532275e-05, - "loss": 0.0047, - "step": 1279 - }, - { - "epoch": 0.9886078393512261, - "grad_norm": 0.02783294767141342, - "learning_rate": 9.712148458446322e-05, - "loss": 0.005, - "step": 1280 - }, - { - "epoch": 0.9893801892257192, - "grad_norm": 0.017038974910974503, - "learning_rate": 9.711245692767814e-05, - "loss": 0.0042, - "step": 1281 - }, - { - "epoch": 0.9901525391002124, - "grad_norm": 0.015469277277588844, - "learning_rate": 9.710341555759523e-05, - "loss": 0.0042, - "step": 1282 - }, - { - "epoch": 0.9909248889747055, - "grad_norm": 0.015973426401615143, - "learning_rate": 9.709436047684624e-05, - "loss": 0.0045, - "step": 1283 - }, - { - "epoch": 0.9916972388491987, - "grad_norm": 0.02046266943216324, - "learning_rate": 9.708529168806686e-05, - "loss": 0.0051, - "step": 1284 - }, - { - "epoch": 0.9924695887236918, - "grad_norm": 0.01801963895559311, - "learning_rate": 9.70762091938968e-05, - "loss": 0.0048, - "step": 1285 - }, - { - "epoch": 0.993241938598185, - "grad_norm": 0.013758906163275242, - "learning_rate": 9.706711299697976e-05, - "loss": 0.0051, - "step": 1286 - }, - { - "epoch": 0.9940142884726781, - "grad_norm": 0.021490855142474174, - "learning_rate": 9.70580030999634e-05, - "loss": 0.0047, - "step": 1287 - }, - { - "epoch": 0.9947866383471713, - "grad_norm": 0.01400856114923954, - "learning_rate": 9.704887950549943e-05, - "loss": 0.0054, - "step": 1288 - }, - { - "epoch": 0.9955589882216644, - "grad_norm": 0.01435021311044693, - "learning_rate": 9.703974221624351e-05, - "loss": 0.0045, - "step": 1289 - }, - { - "epoch": 0.9963313380961576, - "grad_norm": 0.014206396415829659, - "learning_rate": 9.703059123485523e-05, - "loss": 0.0047, - "step": 1290 - }, - { - "epoch": 0.9971036879706507, - "grad_norm": 0.012883146293461323, - "learning_rate": 9.702142656399824e-05, - "loss": 0.0049, - "step": 1291 - }, - { - "epoch": 0.9978760378451439, - "grad_norm": 0.022087296470999718, - "learning_rate": 9.701224820634019e-05, - "loss": 0.0042, - "step": 1292 - }, - { - "epoch": 0.998648387719637, - "grad_norm": 0.01635371334850788, - "learning_rate": 9.700305616455266e-05, - "loss": 0.0043, - "step": 1293 - }, - { - "epoch": 0.9994207375941302, - "grad_norm": 0.016066590324044228, - "learning_rate": 9.69938504413112e-05, - "loss": 0.0054, - "step": 1294 - }, - { - "epoch": 1.0007723498744932, - "grad_norm": 0.03909831866621971, - "learning_rate": 9.698463103929542e-05, - "loss": 0.0095, - "step": 1295 - }, - { - "epoch": 1.0015446997489863, - "grad_norm": 0.009909950196743011, - "learning_rate": 9.697539796118884e-05, - "loss": 0.0042, - "step": 1296 - }, - { - "epoch": 1.0023170496234795, - "grad_norm": 0.011213305406272411, - "learning_rate": 9.6966151209679e-05, - "loss": 0.0047, - "step": 1297 - }, - { - "epoch": 1.0030893994979726, - "grad_norm": 0.03059287928044796, - "learning_rate": 9.695689078745737e-05, - "loss": 0.0049, - "step": 1298 - }, - { - "epoch": 1.0038617493724658, - "grad_norm": 0.030008280649781227, - "learning_rate": 9.694761669721947e-05, - "loss": 0.0049, - "step": 1299 - }, - { - "epoch": 1.004634099246959, - "grad_norm": 0.022881874814629555, - "learning_rate": 9.693832894166479e-05, - "loss": 0.0049, - "step": 1300 - }, - { - "epoch": 1.005406449121452, - "grad_norm": 0.021256916224956512, - "learning_rate": 9.69290275234967e-05, - "loss": 0.0048, - "step": 1301 - }, - { - "epoch": 1.0061787989959452, - "grad_norm": 0.020482858642935753, - "learning_rate": 9.691971244542266e-05, - "loss": 0.0049, - "step": 1302 - }, - { - "epoch": 1.0069511488704384, - "grad_norm": 0.015014131553471088, - "learning_rate": 9.691038371015406e-05, - "loss": 0.0043, - "step": 1303 - }, - { - "epoch": 1.0077234987449315, - "grad_norm": 0.022472646087408066, - "learning_rate": 9.690104132040627e-05, - "loss": 0.0047, - "step": 1304 - }, - { - "epoch": 1.0084958486194247, - "grad_norm": 0.01419077068567276, - "learning_rate": 9.689168527889863e-05, - "loss": 0.0048, - "step": 1305 - }, - { - "epoch": 1.0092681984939178, - "grad_norm": 0.016903279349207878, - "learning_rate": 9.688231558835445e-05, - "loss": 0.0044, - "step": 1306 - }, - { - "epoch": 1.010040548368411, - "grad_norm": 0.014411347918212414, - "learning_rate": 9.687293225150104e-05, - "loss": 0.0045, - "step": 1307 - }, - { - "epoch": 1.010812898242904, - "grad_norm": 0.010093619115650654, - "learning_rate": 9.686353527106967e-05, - "loss": 0.005, - "step": 1308 - }, - { - "epoch": 1.0115852481173973, - "grad_norm": 0.010465381667017937, - "learning_rate": 9.685412464979554e-05, - "loss": 0.0042, - "step": 1309 - }, - { - "epoch": 1.0123575979918904, - "grad_norm": 0.012797888368368149, - "learning_rate": 9.684470039041786e-05, - "loss": 0.0047, - "step": 1310 - }, - { - "epoch": 1.0131299478663836, - "grad_norm": 0.011513960547745228, - "learning_rate": 9.683526249567982e-05, - "loss": 0.0046, - "step": 1311 - }, - { - "epoch": 1.0139022977408767, - "grad_norm": 0.013557342812418938, - "learning_rate": 9.682581096832856e-05, - "loss": 0.0043, - "step": 1312 - }, - { - "epoch": 1.0146746476153699, - "grad_norm": 0.012859497219324112, - "learning_rate": 9.681634581111519e-05, - "loss": 0.0044, - "step": 1313 - }, - { - "epoch": 1.015446997489863, - "grad_norm": 0.018372129648923874, - "learning_rate": 9.68068670267948e-05, - "loss": 0.0043, - "step": 1314 - }, - { - "epoch": 1.0162193473643562, - "grad_norm": 0.012753891758620739, - "learning_rate": 9.679737461812641e-05, - "loss": 0.004, - "step": 1315 - }, - { - "epoch": 1.0169916972388493, - "grad_norm": 0.010481827892363071, - "learning_rate": 9.678786858787306e-05, - "loss": 0.0041, - "step": 1316 - }, - { - "epoch": 1.0177640471133425, - "grad_norm": 0.018584148958325386, - "learning_rate": 9.677834893880168e-05, - "loss": 0.0049, - "step": 1317 - }, - { - "epoch": 1.0185363969878356, - "grad_norm": 0.012177852913737297, - "learning_rate": 9.676881567368325e-05, - "loss": 0.0045, - "step": 1318 - }, - { - "epoch": 1.0193087468623285, - "grad_norm": 0.011105979792773724, - "learning_rate": 9.675926879529268e-05, - "loss": 0.0042, - "step": 1319 - }, - { - "epoch": 1.0200810967368217, - "grad_norm": 0.01322960201650858, - "learning_rate": 9.674970830640881e-05, - "loss": 0.0042, - "step": 1320 - }, - { - "epoch": 1.0208534466113148, - "grad_norm": 0.0165097676217556, - "learning_rate": 9.674013420981447e-05, - "loss": 0.0046, - "step": 1321 - }, - { - "epoch": 1.021625796485808, - "grad_norm": 0.011996297165751457, - "learning_rate": 9.673054650829645e-05, - "loss": 0.0038, - "step": 1322 - }, - { - "epoch": 1.0223981463603011, - "grad_norm": 0.011866395361721516, - "learning_rate": 9.672094520464552e-05, - "loss": 0.0044, - "step": 1323 - }, - { - "epoch": 1.0231704962347943, - "grad_norm": 0.010129679925739765, - "learning_rate": 9.671133030165635e-05, - "loss": 0.0041, - "step": 1324 - }, - { - "epoch": 1.0239428461092874, - "grad_norm": 0.01069657877087593, - "learning_rate": 9.670170180212764e-05, - "loss": 0.0041, - "step": 1325 - }, - { - "epoch": 1.0247151959837806, - "grad_norm": 0.014417791739106178, - "learning_rate": 9.669205970886197e-05, - "loss": 0.0048, - "step": 1326 - }, - { - "epoch": 1.0254875458582737, - "grad_norm": 0.011403602547943592, - "learning_rate": 9.668240402466597e-05, - "loss": 0.0046, - "step": 1327 - }, - { - "epoch": 1.0262598957327669, - "grad_norm": 0.0127126919105649, - "learning_rate": 9.667273475235017e-05, - "loss": 0.0048, - "step": 1328 - }, - { - "epoch": 1.02703224560726, - "grad_norm": 0.0228350218385458, - "learning_rate": 9.666305189472903e-05, - "loss": 0.0048, - "step": 1329 - }, - { - "epoch": 1.0278045954817532, - "grad_norm": 0.012411186471581459, - "learning_rate": 9.665335545462102e-05, - "loss": 0.0042, - "step": 1330 - }, - { - "epoch": 1.0285769453562463, - "grad_norm": 0.01859738491475582, - "learning_rate": 9.664364543484851e-05, - "loss": 0.0047, - "step": 1331 - }, - { - "epoch": 1.0293492952307395, - "grad_norm": 0.01865040697157383, - "learning_rate": 9.66339218382379e-05, - "loss": 0.0045, - "step": 1332 - }, - { - "epoch": 1.0301216451052326, - "grad_norm": 0.02105005457997322, - "learning_rate": 9.662418466761947e-05, - "loss": 0.0046, - "step": 1333 - }, - { - "epoch": 1.0308939949797258, - "grad_norm": 0.010967560112476349, - "learning_rate": 9.661443392582746e-05, - "loss": 0.0047, - "step": 1334 - }, - { - "epoch": 1.031666344854219, - "grad_norm": 0.040137216448783875, - "learning_rate": 9.66046696157001e-05, - "loss": 0.0046, - "step": 1335 - }, - { - "epoch": 1.032438694728712, - "grad_norm": 0.018981043249368668, - "learning_rate": 9.659489174007951e-05, - "loss": 0.0046, - "step": 1336 - }, - { - "epoch": 1.0332110446032052, - "grad_norm": 0.01990450546145439, - "learning_rate": 9.658510030181184e-05, - "loss": 0.0046, - "step": 1337 - }, - { - "epoch": 1.0339833944776984, - "grad_norm": 0.01513310894370079, - "learning_rate": 9.657529530374713e-05, - "loss": 0.0039, - "step": 1338 - }, - { - "epoch": 1.0347557443521915, - "grad_norm": 0.024439837783575058, - "learning_rate": 9.656547674873934e-05, - "loss": 0.0045, - "step": 1339 - }, - { - "epoch": 1.0355280942266847, - "grad_norm": 0.01174076646566391, - "learning_rate": 9.655564463964646e-05, - "loss": 0.0047, - "step": 1340 - }, - { - "epoch": 1.0363004441011778, - "grad_norm": 0.019211484119296074, - "learning_rate": 9.654579897933033e-05, - "loss": 0.0044, - "step": 1341 - }, - { - "epoch": 1.037072793975671, - "grad_norm": 0.01669737510383129, - "learning_rate": 9.653593977065685e-05, - "loss": 0.0045, - "step": 1342 - }, - { - "epoch": 1.0378451438501641, - "grad_norm": 0.01965293660759926, - "learning_rate": 9.652606701649574e-05, - "loss": 0.0045, - "step": 1343 - }, - { - "epoch": 1.0386174937246573, - "grad_norm": 0.013414140790700912, - "learning_rate": 9.651618071972075e-05, - "loss": 0.0044, - "step": 1344 - }, - { - "epoch": 1.0393898435991504, - "grad_norm": 0.016711724922060966, - "learning_rate": 9.650628088320953e-05, - "loss": 0.0045, - "step": 1345 - }, - { - "epoch": 1.0401621934736436, - "grad_norm": 0.025946568697690964, - "learning_rate": 9.649636750984368e-05, - "loss": 0.0045, - "step": 1346 - }, - { - "epoch": 1.0409345433481367, - "grad_norm": 0.010373227298259735, - "learning_rate": 9.648644060250875e-05, - "loss": 0.0044, - "step": 1347 - }, - { - "epoch": 1.0417068932226299, - "grad_norm": 0.015023860149085522, - "learning_rate": 9.647650016409421e-05, - "loss": 0.0043, - "step": 1348 - }, - { - "epoch": 1.042479243097123, - "grad_norm": 0.02561667561531067, - "learning_rate": 9.646654619749352e-05, - "loss": 0.0046, - "step": 1349 - }, - { - "epoch": 1.0432515929716162, - "grad_norm": 0.011588500812649727, - "learning_rate": 9.645657870560401e-05, - "loss": 0.0049, - "step": 1350 - }, - { - "epoch": 1.0440239428461093, - "grad_norm": 0.018138093873858452, - "learning_rate": 9.644659769132696e-05, - "loss": 0.0043, - "step": 1351 - }, - { - "epoch": 1.0447962927206025, - "grad_norm": 0.013827506452798843, - "learning_rate": 9.643660315756764e-05, - "loss": 0.0043, - "step": 1352 - }, - { - "epoch": 1.0455686425950956, - "grad_norm": 0.015662331134080887, - "learning_rate": 9.64265951072352e-05, - "loss": 0.0041, - "step": 1353 - }, - { - "epoch": 1.0463409924695888, - "grad_norm": 0.01205469761043787, - "learning_rate": 9.641657354324273e-05, - "loss": 0.004, - "step": 1354 - }, - { - "epoch": 1.047113342344082, - "grad_norm": 0.017667818814516068, - "learning_rate": 9.640653846850728e-05, - "loss": 0.0043, - "step": 1355 - }, - { - "epoch": 1.047885692218575, - "grad_norm": 0.039402734488248825, - "learning_rate": 9.63964898859498e-05, - "loss": 0.0049, - "step": 1356 - }, - { - "epoch": 1.0486580420930682, - "grad_norm": 0.01787903904914856, - "learning_rate": 9.638642779849523e-05, - "loss": 0.004, - "step": 1357 - }, - { - "epoch": 1.0494303919675614, - "grad_norm": 0.02071666158735752, - "learning_rate": 9.637635220907235e-05, - "loss": 0.0048, - "step": 1358 - }, - { - "epoch": 1.0502027418420545, - "grad_norm": 0.016656925901770592, - "learning_rate": 9.636626312061395e-05, - "loss": 0.0047, - "step": 1359 - }, - { - "epoch": 1.0509750917165477, - "grad_norm": 0.011745874769985676, - "learning_rate": 9.635616053605672e-05, - "loss": 0.0045, - "step": 1360 - }, - { - "epoch": 1.0517474415910408, - "grad_norm": 0.018410833552479744, - "learning_rate": 9.634604445834127e-05, - "loss": 0.0049, - "step": 1361 - }, - { - "epoch": 1.052519791465534, - "grad_norm": 0.01569872908294201, - "learning_rate": 9.633591489041213e-05, - "loss": 0.0047, - "step": 1362 - }, - { - "epoch": 1.0532921413400271, - "grad_norm": 0.014584588818252087, - "learning_rate": 9.632577183521782e-05, - "loss": 0.0046, - "step": 1363 - }, - { - "epoch": 1.0540644912145203, - "grad_norm": 0.023064883425831795, - "learning_rate": 9.631561529571069e-05, - "loss": 0.0047, - "step": 1364 - }, - { - "epoch": 1.0548368410890134, - "grad_norm": 0.015406741760671139, - "learning_rate": 9.630544527484708e-05, - "loss": 0.0042, - "step": 1365 - }, - { - "epoch": 1.0556091909635064, - "grad_norm": 0.014026117511093616, - "learning_rate": 9.629526177558725e-05, - "loss": 0.0046, - "step": 1366 - }, - { - "epoch": 1.0563815408379995, - "grad_norm": 0.025568680837750435, - "learning_rate": 9.628506480089535e-05, - "loss": 0.005, - "step": 1367 - }, - { - "epoch": 1.0571538907124927, - "grad_norm": 0.015331593342125416, - "learning_rate": 9.627485435373948e-05, - "loss": 0.0043, - "step": 1368 - }, - { - "epoch": 1.0579262405869858, - "grad_norm": 0.01583258807659149, - "learning_rate": 9.626463043709168e-05, - "loss": 0.0046, - "step": 1369 - }, - { - "epoch": 1.058698590461479, - "grad_norm": 0.018904291093349457, - "learning_rate": 9.625439305392784e-05, - "loss": 0.0044, - "step": 1370 - }, - { - "epoch": 1.0594709403359721, - "grad_norm": 0.01609903946518898, - "learning_rate": 9.624414220722784e-05, - "loss": 0.0044, - "step": 1371 - }, - { - "epoch": 1.0602432902104653, - "grad_norm": 0.019533008337020874, - "learning_rate": 9.623387789997547e-05, - "loss": 0.0045, - "step": 1372 - }, - { - "epoch": 1.0610156400849584, - "grad_norm": 0.021502437070012093, - "learning_rate": 9.622360013515838e-05, - "loss": 0.0046, - "step": 1373 - }, - { - "epoch": 1.0617879899594516, - "grad_norm": 0.033865705132484436, - "learning_rate": 9.62133089157682e-05, - "loss": 0.0051, - "step": 1374 - }, - { - "epoch": 1.0625603398339447, - "grad_norm": 0.021266119554638863, - "learning_rate": 9.620300424480046e-05, - "loss": 0.0048, - "step": 1375 - }, - { - "epoch": 1.0633326897084379, - "grad_norm": 0.029526852071285248, - "learning_rate": 9.619268612525461e-05, - "loss": 0.0044, - "step": 1376 - }, - { - "epoch": 1.064105039582931, - "grad_norm": 0.02488149330019951, - "learning_rate": 9.618235456013397e-05, - "loss": 0.0047, - "step": 1377 - }, - { - "epoch": 1.0648773894574242, - "grad_norm": 0.020261965692043304, - "learning_rate": 9.617200955244586e-05, - "loss": 0.0046, - "step": 1378 - }, - { - "epoch": 1.0656497393319173, - "grad_norm": 0.01376634556800127, - "learning_rate": 9.616165110520143e-05, - "loss": 0.0044, - "step": 1379 - }, - { - "epoch": 1.0664220892064105, - "grad_norm": 0.03926656395196915, - "learning_rate": 9.615127922141576e-05, - "loss": 0.0044, - "step": 1380 - }, - { - "epoch": 1.0671944390809036, - "grad_norm": 0.022135786712169647, - "learning_rate": 9.614089390410788e-05, - "loss": 0.0049, - "step": 1381 - }, - { - "epoch": 1.0679667889553968, - "grad_norm": 0.017217369750142097, - "learning_rate": 9.61304951563007e-05, - "loss": 0.0046, - "step": 1382 - }, - { - "epoch": 1.06873913882989, - "grad_norm": 0.02201268821954727, - "learning_rate": 9.612008298102104e-05, - "loss": 0.0046, - "step": 1383 - }, - { - "epoch": 1.069511488704383, - "grad_norm": 0.024450382217764854, - "learning_rate": 9.610965738129963e-05, - "loss": 0.0047, - "step": 1384 - }, - { - "epoch": 1.0702838385788762, - "grad_norm": 0.01855049841105938, - "learning_rate": 9.609921836017113e-05, - "loss": 0.0049, - "step": 1385 - }, - { - "epoch": 1.0710561884533694, - "grad_norm": 0.032038744539022446, - "learning_rate": 9.608876592067404e-05, - "loss": 0.0045, - "step": 1386 - }, - { - "epoch": 1.0718285383278625, - "grad_norm": 0.011334764771163464, - "learning_rate": 9.607830006585087e-05, - "loss": 0.0044, - "step": 1387 - }, - { - "epoch": 1.0726008882023557, - "grad_norm": 0.01075075939297676, - "learning_rate": 9.606782079874794e-05, - "loss": 0.0046, - "step": 1388 - }, - { - "epoch": 1.0733732380768488, - "grad_norm": 0.01240682415664196, - "learning_rate": 9.605732812241553e-05, - "loss": 0.0043, - "step": 1389 - }, - { - "epoch": 1.074145587951342, - "grad_norm": 0.014968945644795895, - "learning_rate": 9.604682203990778e-05, - "loss": 0.0039, - "step": 1390 - }, - { - "epoch": 1.0749179378258351, - "grad_norm": 0.014436611905694008, - "learning_rate": 9.60363025542828e-05, - "loss": 0.0041, - "step": 1391 - }, - { - "epoch": 1.0756902877003283, - "grad_norm": 0.011069850996136665, - "learning_rate": 9.602576966860251e-05, - "loss": 0.004, - "step": 1392 - }, - { - "epoch": 1.0764626375748214, - "grad_norm": 0.01463300921022892, - "learning_rate": 9.60152233859328e-05, - "loss": 0.0047, - "step": 1393 - }, - { - "epoch": 1.0772349874493146, - "grad_norm": 0.020886411890387535, - "learning_rate": 9.600466370934345e-05, - "loss": 0.004, - "step": 1394 - }, - { - "epoch": 1.0780073373238077, - "grad_norm": 0.013697480782866478, - "learning_rate": 9.599409064190811e-05, - "loss": 0.0046, - "step": 1395 - }, - { - "epoch": 1.0787796871983009, - "grad_norm": 0.01161142997443676, - "learning_rate": 9.598350418670434e-05, - "loss": 0.0039, - "step": 1396 - }, - { - "epoch": 1.079552037072794, - "grad_norm": 0.013734615407884121, - "learning_rate": 9.597290434681363e-05, - "loss": 0.0049, - "step": 1397 - }, - { - "epoch": 1.0803243869472872, - "grad_norm": 0.014882289804518223, - "learning_rate": 9.596229112532132e-05, - "loss": 0.0043, - "step": 1398 - }, - { - "epoch": 1.0810967368217803, - "grad_norm": 0.011839316226541996, - "learning_rate": 9.595166452531663e-05, - "loss": 0.0044, - "step": 1399 - }, - { - "epoch": 1.0818690866962735, - "grad_norm": 0.015712959691882133, - "learning_rate": 9.594102454989275e-05, - "loss": 0.0044, - "step": 1400 - }, - { - "epoch": 1.0826414365707666, - "grad_norm": 0.008323220536112785, - "learning_rate": 9.593037120214672e-05, - "loss": 0.0044, - "step": 1401 - }, - { - "epoch": 1.0834137864452598, - "grad_norm": 0.023797884583473206, - "learning_rate": 9.591970448517946e-05, - "loss": 0.0048, - "step": 1402 - }, - { - "epoch": 1.084186136319753, - "grad_norm": 0.021375704556703568, - "learning_rate": 9.590902440209577e-05, - "loss": 0.0051, - "step": 1403 - }, - { - "epoch": 1.084958486194246, - "grad_norm": 0.012305250391364098, - "learning_rate": 9.58983309560044e-05, - "loss": 0.0048, - "step": 1404 - }, - { - "epoch": 1.0857308360687392, - "grad_norm": 0.03434315323829651, - "learning_rate": 9.588762415001795e-05, - "loss": 0.0043, - "step": 1405 - }, - { - "epoch": 1.0865031859432324, - "grad_norm": 0.018410326912999153, - "learning_rate": 9.587690398725288e-05, - "loss": 0.005, - "step": 1406 - }, - { - "epoch": 1.0872755358177255, - "grad_norm": 0.02015906572341919, - "learning_rate": 9.586617047082962e-05, - "loss": 0.004, - "step": 1407 - }, - { - "epoch": 1.0880478856922187, - "grad_norm": 0.06614932417869568, - "learning_rate": 9.585542360387238e-05, - "loss": 0.0047, - "step": 1408 - }, - { - "epoch": 1.0888202355667118, - "grad_norm": 0.014235509559512138, - "learning_rate": 9.584466338950937e-05, - "loss": 0.0051, - "step": 1409 - }, - { - "epoch": 1.089592585441205, - "grad_norm": 0.01355188898742199, - "learning_rate": 9.583388983087258e-05, - "loss": 0.0045, - "step": 1410 - }, - { - "epoch": 1.0903649353156981, - "grad_norm": 0.022596238180994987, - "learning_rate": 9.582310293109798e-05, - "loss": 0.0045, - "step": 1411 - }, - { - "epoch": 1.0911372851901913, - "grad_norm": 0.02476441115140915, - "learning_rate": 9.581230269332533e-05, - "loss": 0.0049, - "step": 1412 - }, - { - "epoch": 1.0919096350646842, - "grad_norm": 0.013642823323607445, - "learning_rate": 9.580148912069836e-05, - "loss": 0.0044, - "step": 1413 - }, - { - "epoch": 1.0926819849391776, - "grad_norm": 0.026510091498494148, - "learning_rate": 9.579066221636459e-05, - "loss": 0.0038, - "step": 1414 - }, - { - "epoch": 1.0934543348136705, - "grad_norm": 0.036978624761104584, - "learning_rate": 9.57798219834755e-05, - "loss": 0.0052, - "step": 1415 - }, - { - "epoch": 1.0942266846881636, - "grad_norm": 0.030871255323290825, - "learning_rate": 9.576896842518643e-05, - "loss": 0.0047, - "step": 1416 - }, - { - "epoch": 1.0949990345626568, - "grad_norm": 0.028220554813742638, - "learning_rate": 9.575810154465658e-05, - "loss": 0.0047, - "step": 1417 - }, - { - "epoch": 1.09577138443715, - "grad_norm": 0.02838347852230072, - "learning_rate": 9.574722134504904e-05, - "loss": 0.0053, - "step": 1418 - }, - { - "epoch": 1.096543734311643, - "grad_norm": 0.017904307693243027, - "learning_rate": 9.573632782953075e-05, - "loss": 0.0046, - "step": 1419 - }, - { - "epoch": 1.0973160841861362, - "grad_norm": 0.018795479089021683, - "learning_rate": 9.572542100127258e-05, - "loss": 0.0049, - "step": 1420 - }, - { - "epoch": 1.0980884340606294, - "grad_norm": 0.021671850234270096, - "learning_rate": 9.571450086344922e-05, - "loss": 0.0045, - "step": 1421 - }, - { - "epoch": 1.0988607839351225, - "grad_norm": 0.014755095355212688, - "learning_rate": 9.570356741923927e-05, - "loss": 0.0043, - "step": 1422 - }, - { - "epoch": 1.0996331338096157, - "grad_norm": 0.03559665009379387, - "learning_rate": 9.569262067182518e-05, - "loss": 0.0048, - "step": 1423 - }, - { - "epoch": 1.1004054836841088, - "grad_norm": 0.021603044122457504, - "learning_rate": 9.56816606243933e-05, - "loss": 0.0048, - "step": 1424 - }, - { - "epoch": 1.101177833558602, - "grad_norm": 0.012603303417563438, - "learning_rate": 9.567068728013384e-05, - "loss": 0.0047, - "step": 1425 - }, - { - "epoch": 1.1019501834330951, - "grad_norm": 0.021283099427819252, - "learning_rate": 9.565970064224085e-05, - "loss": 0.005, - "step": 1426 - }, - { - "epoch": 1.1027225333075883, - "grad_norm": 0.028067365288734436, - "learning_rate": 9.56487007139123e-05, - "loss": 0.0046, - "step": 1427 - }, - { - "epoch": 1.1034948831820814, - "grad_norm": 0.013425813987851143, - "learning_rate": 9.563768749834998e-05, - "loss": 0.0042, - "step": 1428 - }, - { - "epoch": 1.1042672330565746, - "grad_norm": 0.022721700370311737, - "learning_rate": 9.562666099875959e-05, - "loss": 0.0043, - "step": 1429 - }, - { - "epoch": 1.1050395829310677, - "grad_norm": 0.03619357943534851, - "learning_rate": 9.561562121835066e-05, - "loss": 0.0051, - "step": 1430 - }, - { - "epoch": 1.105811932805561, - "grad_norm": 0.013147195801138878, - "learning_rate": 9.560456816033662e-05, - "loss": 0.0047, - "step": 1431 - }, - { - "epoch": 1.106584282680054, - "grad_norm": 0.03599505499005318, - "learning_rate": 9.559350182793475e-05, - "loss": 0.0047, - "step": 1432 - }, - { - "epoch": 1.1073566325545472, - "grad_norm": 0.01628255285322666, - "learning_rate": 9.558242222436617e-05, - "loss": 0.0045, - "step": 1433 - }, - { - "epoch": 1.1081289824290403, - "grad_norm": 0.019326291978359222, - "learning_rate": 9.557132935285591e-05, - "loss": 0.0045, - "step": 1434 - }, - { - "epoch": 1.1089013323035335, - "grad_norm": 0.01495333295315504, - "learning_rate": 9.556022321663283e-05, - "loss": 0.0046, - "step": 1435 - }, - { - "epoch": 1.1096736821780266, - "grad_norm": 0.014609484933316708, - "learning_rate": 9.554910381892964e-05, - "loss": 0.0045, - "step": 1436 - }, - { - "epoch": 1.1104460320525198, - "grad_norm": 0.01904207468032837, - "learning_rate": 9.553797116298295e-05, - "loss": 0.0035, - "step": 1437 - }, - { - "epoch": 1.111218381927013, - "grad_norm": 0.012693614698946476, - "learning_rate": 9.552682525203319e-05, - "loss": 0.0044, - "step": 1438 - }, - { - "epoch": 1.111990731801506, - "grad_norm": 0.010756377130746841, - "learning_rate": 9.551566608932467e-05, - "loss": 0.0043, - "step": 1439 - }, - { - "epoch": 1.1127630816759992, - "grad_norm": 0.013393844477832317, - "learning_rate": 9.550449367810557e-05, - "loss": 0.0049, - "step": 1440 - }, - { - "epoch": 1.1135354315504924, - "grad_norm": 0.0180951077491045, - "learning_rate": 9.549330802162789e-05, - "loss": 0.0055, - "step": 1441 - }, - { - "epoch": 1.1143077814249855, - "grad_norm": 0.013738658279180527, - "learning_rate": 9.54821091231475e-05, - "loss": 0.0052, - "step": 1442 - }, - { - "epoch": 1.1150801312994787, - "grad_norm": 0.01545040961354971, - "learning_rate": 9.547089698592416e-05, - "loss": 0.0044, - "step": 1443 - }, - { - "epoch": 1.1158524811739718, - "grad_norm": 0.012175020761787891, - "learning_rate": 9.545967161322141e-05, - "loss": 0.0045, - "step": 1444 - }, - { - "epoch": 1.116624831048465, - "grad_norm": 0.018132036551833153, - "learning_rate": 9.544843300830671e-05, - "loss": 0.0048, - "step": 1445 - }, - { - "epoch": 1.1173971809229581, - "grad_norm": 0.015559297055006027, - "learning_rate": 9.543718117445135e-05, - "loss": 0.0043, - "step": 1446 - }, - { - "epoch": 1.1181695307974513, - "grad_norm": 0.017296213656663895, - "learning_rate": 9.542591611493046e-05, - "loss": 0.0048, - "step": 1447 - }, - { - "epoch": 1.1189418806719444, - "grad_norm": 0.015066379681229591, - "learning_rate": 9.541463783302303e-05, - "loss": 0.004, - "step": 1448 - }, - { - "epoch": 1.1197142305464376, - "grad_norm": 0.010292529128491879, - "learning_rate": 9.540334633201186e-05, - "loss": 0.0046, - "step": 1449 - }, - { - "epoch": 1.1204865804209307, - "grad_norm": 0.0397217720746994, - "learning_rate": 9.53920416151837e-05, - "loss": 0.0041, - "step": 1450 - }, - { - "epoch": 1.121258930295424, - "grad_norm": 0.016435086727142334, - "learning_rate": 9.538072368582902e-05, - "loss": 0.0042, - "step": 1451 - }, - { - "epoch": 1.122031280169917, - "grad_norm": 0.0129328528419137, - "learning_rate": 9.536939254724222e-05, - "loss": 0.004, - "step": 1452 - }, - { - "epoch": 1.1228036300444102, - "grad_norm": 0.013893014751374722, - "learning_rate": 9.535804820272152e-05, - "loss": 0.0045, - "step": 1453 - }, - { - "epoch": 1.1235759799189033, - "grad_norm": 0.017391353845596313, - "learning_rate": 9.534669065556901e-05, - "loss": 0.0042, - "step": 1454 - }, - { - "epoch": 1.1243483297933965, - "grad_norm": 0.0244289580732584, - "learning_rate": 9.533531990909055e-05, - "loss": 0.0046, - "step": 1455 - }, - { - "epoch": 1.1251206796678896, - "grad_norm": 0.011779186315834522, - "learning_rate": 9.53239359665959e-05, - "loss": 0.0041, - "step": 1456 - }, - { - "epoch": 1.1258930295423828, - "grad_norm": 0.02714175544679165, - "learning_rate": 9.531253883139869e-05, - "loss": 0.0046, - "step": 1457 - }, - { - "epoch": 1.1266653794168757, - "grad_norm": 0.01168652344495058, - "learning_rate": 9.53011285068163e-05, - "loss": 0.0049, - "step": 1458 - }, - { - "epoch": 1.127437729291369, - "grad_norm": 0.021335959434509277, - "learning_rate": 9.528970499617003e-05, - "loss": 0.0041, - "step": 1459 - }, - { - "epoch": 1.128210079165862, - "grad_norm": 0.013770773075520992, - "learning_rate": 9.5278268302785e-05, - "loss": 0.004, - "step": 1460 - }, - { - "epoch": 1.1289824290403554, - "grad_norm": 0.011553505435585976, - "learning_rate": 9.526681842999011e-05, - "loss": 0.0046, - "step": 1461 - }, - { - "epoch": 1.1297547789148483, - "grad_norm": 0.018769564107060432, - "learning_rate": 9.525535538111818e-05, - "loss": 0.0046, - "step": 1462 - }, - { - "epoch": 1.1305271287893417, - "grad_norm": 0.014158394187688828, - "learning_rate": 9.524387915950581e-05, - "loss": 0.0044, - "step": 1463 - }, - { - "epoch": 1.1312994786638346, - "grad_norm": 0.021640509366989136, - "learning_rate": 9.523238976849344e-05, - "loss": 0.0043, - "step": 1464 - }, - { - "epoch": 1.1320718285383278, - "grad_norm": 0.015402225777506828, - "learning_rate": 9.522088721142539e-05, - "loss": 0.0047, - "step": 1465 - }, - { - "epoch": 1.132844178412821, - "grad_norm": 0.012613057158887386, - "learning_rate": 9.520937149164975e-05, - "loss": 0.0042, - "step": 1466 - }, - { - "epoch": 1.133616528287314, - "grad_norm": 0.010349423624575138, - "learning_rate": 9.519784261251847e-05, - "loss": 0.0047, - "step": 1467 - }, - { - "epoch": 1.1343888781618072, - "grad_norm": 0.014359497465193272, - "learning_rate": 9.518630057738733e-05, - "loss": 0.0054, - "step": 1468 - }, - { - "epoch": 1.1351612280363004, - "grad_norm": 0.02081706002354622, - "learning_rate": 9.517474538961595e-05, - "loss": 0.0043, - "step": 1469 - }, - { - "epoch": 1.1359335779107935, - "grad_norm": 0.015621309168636799, - "learning_rate": 9.516317705256774e-05, - "loss": 0.0046, - "step": 1470 - }, - { - "epoch": 1.1367059277852867, - "grad_norm": 0.01698329485952854, - "learning_rate": 9.515159556960998e-05, - "loss": 0.0046, - "step": 1471 - }, - { - "epoch": 1.1374782776597798, - "grad_norm": 0.03221715986728668, - "learning_rate": 9.514000094411377e-05, - "loss": 0.0049, - "step": 1472 - }, - { - "epoch": 1.138250627534273, - "grad_norm": 0.013817714527249336, - "learning_rate": 9.5128393179454e-05, - "loss": 0.0049, - "step": 1473 - }, - { - "epoch": 1.1390229774087661, - "grad_norm": 0.016748011112213135, - "learning_rate": 9.511677227900942e-05, - "loss": 0.0046, - "step": 1474 - }, - { - "epoch": 1.1397953272832593, - "grad_norm": 0.031057976186275482, - "learning_rate": 9.510513824616261e-05, - "loss": 0.0052, - "step": 1475 - }, - { - "epoch": 1.1405676771577524, - "grad_norm": 0.03200233355164528, - "learning_rate": 9.509349108429993e-05, - "loss": 0.0049, - "step": 1476 - }, - { - "epoch": 1.1413400270322456, - "grad_norm": 0.01874365098774433, - "learning_rate": 9.50818307968116e-05, - "loss": 0.0049, - "step": 1477 - }, - { - "epoch": 1.1421123769067387, - "grad_norm": 0.02029556781053543, - "learning_rate": 9.507015738709165e-05, - "loss": 0.0051, - "step": 1478 - }, - { - "epoch": 1.1428847267812319, - "grad_norm": 0.010294657200574875, - "learning_rate": 9.505847085853792e-05, - "loss": 0.0043, - "step": 1479 - }, - { - "epoch": 1.143657076655725, - "grad_norm": 0.011460382491350174, - "learning_rate": 9.504677121455208e-05, - "loss": 0.0047, - "step": 1480 - }, - { - "epoch": 1.1444294265302182, - "grad_norm": 0.015220578759908676, - "learning_rate": 9.503505845853963e-05, - "loss": 0.0046, - "step": 1481 - }, - { - "epoch": 1.1452017764047113, - "grad_norm": 0.07611880451440811, - "learning_rate": 9.502333259390984e-05, - "loss": 0.0049, - "step": 1482 - }, - { - "epoch": 1.1459741262792045, - "grad_norm": 0.01241256482899189, - "learning_rate": 9.501159362407584e-05, - "loss": 0.0048, - "step": 1483 - }, - { - "epoch": 1.1467464761536976, - "grad_norm": 0.16832369565963745, - "learning_rate": 9.499984155245457e-05, - "loss": 0.0067, - "step": 1484 - }, - { - "epoch": 1.1475188260281908, - "grad_norm": 0.011174335144460201, - "learning_rate": 9.498807638246676e-05, - "loss": 0.0041, - "step": 1485 - }, - { - "epoch": 1.148291175902684, - "grad_norm": 0.022033747285604477, - "learning_rate": 9.497629811753697e-05, - "loss": 0.0049, - "step": 1486 - }, - { - "epoch": 1.149063525777177, - "grad_norm": 0.018059583380818367, - "learning_rate": 9.496450676109359e-05, - "loss": 0.006, - "step": 1487 - }, - { - "epoch": 1.1498358756516702, - "grad_norm": 0.044133514165878296, - "learning_rate": 9.495270231656875e-05, - "loss": 0.0065, - "step": 1488 - }, - { - "epoch": 1.1506082255261634, - "grad_norm": 0.056028980761766434, - "learning_rate": 9.494088478739848e-05, - "loss": 0.0056, - "step": 1489 - }, - { - "epoch": 1.1513805754006565, - "grad_norm": 0.02036936953663826, - "learning_rate": 9.492905417702255e-05, - "loss": 0.0045, - "step": 1490 - }, - { - "epoch": 1.1521529252751497, - "grad_norm": 0.01621861197054386, - "learning_rate": 9.491721048888461e-05, - "loss": 0.0047, - "step": 1491 - }, - { - "epoch": 1.1529252751496428, - "grad_norm": 0.016505256295204163, - "learning_rate": 9.490535372643203e-05, - "loss": 0.0048, - "step": 1492 - }, - { - "epoch": 1.153697625024136, - "grad_norm": 0.019400065764784813, - "learning_rate": 9.489348389311603e-05, - "loss": 0.0038, - "step": 1493 - }, - { - "epoch": 1.1544699748986291, - "grad_norm": 0.03492862358689308, - "learning_rate": 9.488160099239164e-05, - "loss": 0.0051, - "step": 1494 - }, - { - "epoch": 1.1552423247731223, - "grad_norm": 0.03151748329401016, - "learning_rate": 9.486970502771769e-05, - "loss": 0.005, - "step": 1495 - }, - { - "epoch": 1.1560146746476154, - "grad_norm": 0.013358225114643574, - "learning_rate": 9.48577960025568e-05, - "loss": 0.0052, - "step": 1496 - }, - { - "epoch": 1.1567870245221086, - "grad_norm": 0.024633124470710754, - "learning_rate": 9.48458739203754e-05, - "loss": 0.005, - "step": 1497 - }, - { - "epoch": 1.1575593743966017, - "grad_norm": 0.030730850994586945, - "learning_rate": 9.483393878464372e-05, - "loss": 0.0051, - "step": 1498 - }, - { - "epoch": 1.1583317242710949, - "grad_norm": 0.021617265418171883, - "learning_rate": 9.482199059883581e-05, - "loss": 0.0049, - "step": 1499 - }, - { - "epoch": 1.159104074145588, - "grad_norm": 0.022437119856476784, - "learning_rate": 9.481002936642946e-05, - "loss": 0.0046, - "step": 1500 - }, - { - "epoch": 1.1598764240200812, - "grad_norm": 0.04322676733136177, - "learning_rate": 9.479805509090633e-05, - "loss": 0.005, - "step": 1501 - }, - { - "epoch": 1.1606487738945743, - "grad_norm": 0.019928233698010445, - "learning_rate": 9.478606777575183e-05, - "loss": 0.0048, - "step": 1502 - }, - { - "epoch": 1.1614211237690675, - "grad_norm": 0.029807768762111664, - "learning_rate": 9.477406742445516e-05, - "loss": 0.005, - "step": 1503 - }, - { - "epoch": 1.1621934736435606, - "grad_norm": 0.02389885112643242, - "learning_rate": 9.476205404050936e-05, - "loss": 0.0045, - "step": 1504 - }, - { - "epoch": 1.1629658235180536, - "grad_norm": 0.01405603438615799, - "learning_rate": 9.475002762741122e-05, - "loss": 0.0045, - "step": 1505 - }, - { - "epoch": 1.163738173392547, - "grad_norm": 0.02136818692088127, - "learning_rate": 9.473798818866134e-05, - "loss": 0.0049, - "step": 1506 - }, - { - "epoch": 1.1645105232670399, - "grad_norm": 0.026121748611330986, - "learning_rate": 9.472593572776411e-05, - "loss": 0.0048, - "step": 1507 - }, - { - "epoch": 1.1652828731415332, - "grad_norm": 0.022823212668299675, - "learning_rate": 9.471387024822773e-05, - "loss": 0.0049, - "step": 1508 - }, - { - "epoch": 1.1660552230160262, - "grad_norm": 0.019243579357862473, - "learning_rate": 9.470179175356413e-05, - "loss": 0.0049, - "step": 1509 - }, - { - "epoch": 1.1668275728905195, - "grad_norm": 0.014758034609258175, - "learning_rate": 9.468970024728911e-05, - "loss": 0.0047, - "step": 1510 - }, - { - "epoch": 1.1675999227650125, - "grad_norm": 0.013576776720583439, - "learning_rate": 9.467759573292217e-05, - "loss": 0.0051, - "step": 1511 - }, - { - "epoch": 1.1683722726395056, - "grad_norm": 0.03590305894613266, - "learning_rate": 9.466547821398668e-05, - "loss": 0.0049, - "step": 1512 - }, - { - "epoch": 1.1691446225139988, - "grad_norm": 0.0228645708411932, - "learning_rate": 9.465334769400975e-05, - "loss": 0.0047, - "step": 1513 - }, - { - "epoch": 1.169916972388492, - "grad_norm": 0.0239447969943285, - "learning_rate": 9.464120417652226e-05, - "loss": 0.0046, - "step": 1514 - }, - { - "epoch": 1.170689322262985, - "grad_norm": 0.038972727954387665, - "learning_rate": 9.462904766505893e-05, - "loss": 0.0051, - "step": 1515 - }, - { - "epoch": 1.1714616721374782, - "grad_norm": 0.01209209393709898, - "learning_rate": 9.46168781631582e-05, - "loss": 0.0051, - "step": 1516 - }, - { - "epoch": 1.1722340220119714, - "grad_norm": 0.01610351912677288, - "learning_rate": 9.460469567436232e-05, - "loss": 0.0046, - "step": 1517 - }, - { - "epoch": 1.1730063718864645, - "grad_norm": 0.036079291254282, - "learning_rate": 9.459250020221731e-05, - "loss": 0.005, - "step": 1518 - }, - { - "epoch": 1.1737787217609577, - "grad_norm": 0.01571802981197834, - "learning_rate": 9.458029175027301e-05, - "loss": 0.0049, - "step": 1519 - }, - { - "epoch": 1.1745510716354508, - "grad_norm": 0.018590154126286507, - "learning_rate": 9.456807032208298e-05, - "loss": 0.0047, - "step": 1520 - }, - { - "epoch": 1.175323421509944, - "grad_norm": 0.04737241193652153, - "learning_rate": 9.455583592120458e-05, - "loss": 0.005, - "step": 1521 - }, - { - "epoch": 1.176095771384437, - "grad_norm": 0.023734835907816887, - "learning_rate": 9.454358855119895e-05, - "loss": 0.0047, - "step": 1522 - }, - { - "epoch": 1.1768681212589303, - "grad_norm": 0.04579580947756767, - "learning_rate": 9.453132821563102e-05, - "loss": 0.0051, - "step": 1523 - }, - { - "epoch": 1.1776404711334234, - "grad_norm": 0.05348771810531616, - "learning_rate": 9.451905491806946e-05, - "loss": 0.0053, - "step": 1524 - }, - { - "epoch": 1.1784128210079166, - "grad_norm": 0.013545769266784191, - "learning_rate": 9.450676866208675e-05, - "loss": 0.0047, - "step": 1525 - }, - { - "epoch": 1.1791851708824097, - "grad_norm": 0.04317854717373848, - "learning_rate": 9.44944694512591e-05, - "loss": 0.0049, - "step": 1526 - }, - { - "epoch": 1.1799575207569029, - "grad_norm": 0.027513282373547554, - "learning_rate": 9.448215728916652e-05, - "loss": 0.0045, - "step": 1527 - }, - { - "epoch": 1.180729870631396, - "grad_norm": 0.020986218005418777, - "learning_rate": 9.446983217939278e-05, - "loss": 0.0046, - "step": 1528 - }, - { - "epoch": 1.1815022205058892, - "grad_norm": 0.03395180404186249, - "learning_rate": 9.445749412552544e-05, - "loss": 0.005, - "step": 1529 - }, - { - "epoch": 1.1822745703803823, - "grad_norm": 0.01142895594239235, - "learning_rate": 9.44451431311558e-05, - "loss": 0.0048, - "step": 1530 - }, - { - "epoch": 1.1830469202548755, - "grad_norm": 0.016486341133713722, - "learning_rate": 9.443277919987892e-05, - "loss": 0.0047, - "step": 1531 - }, - { - "epoch": 1.1838192701293686, - "grad_norm": 0.010691473260521889, - "learning_rate": 9.442040233529366e-05, - "loss": 0.0051, - "step": 1532 - }, - { - "epoch": 1.1845916200038618, - "grad_norm": 0.015164146199822426, - "learning_rate": 9.440801254100261e-05, - "loss": 0.0044, - "step": 1533 - }, - { - "epoch": 1.185363969878355, - "grad_norm": 0.01204696111381054, - "learning_rate": 9.439560982061215e-05, - "loss": 0.0049, - "step": 1534 - }, - { - "epoch": 1.186136319752848, - "grad_norm": 0.012924645096063614, - "learning_rate": 9.438319417773243e-05, - "loss": 0.0046, - "step": 1535 - }, - { - "epoch": 1.1869086696273412, - "grad_norm": 0.016666272655129433, - "learning_rate": 9.43707656159773e-05, - "loss": 0.0044, - "step": 1536 - }, - { - "epoch": 1.1876810195018344, - "grad_norm": 0.012084845453500748, - "learning_rate": 9.435832413896446e-05, - "loss": 0.0042, - "step": 1537 - }, - { - "epoch": 1.1884533693763275, - "grad_norm": 0.017669981345534325, - "learning_rate": 9.43458697503153e-05, - "loss": 0.0046, - "step": 1538 - }, - { - "epoch": 1.1892257192508207, - "grad_norm": 0.0243675597012043, - "learning_rate": 9.433340245365499e-05, - "loss": 0.0041, - "step": 1539 - }, - { - "epoch": 1.1899980691253138, - "grad_norm": 0.014637123793363571, - "learning_rate": 9.432092225261246e-05, - "loss": 0.0044, - "step": 1540 - }, - { - "epoch": 1.190770418999807, - "grad_norm": 0.01418989896774292, - "learning_rate": 9.430842915082042e-05, - "loss": 0.0042, - "step": 1541 - }, - { - "epoch": 1.1915427688743, - "grad_norm": 0.02326418273150921, - "learning_rate": 9.429592315191527e-05, - "loss": 0.0043, - "step": 1542 - }, - { - "epoch": 1.1923151187487933, - "grad_norm": 0.010195459239184856, - "learning_rate": 9.428340425953723e-05, - "loss": 0.0046, - "step": 1543 - }, - { - "epoch": 1.1930874686232864, - "grad_norm": 0.0273361224681139, - "learning_rate": 9.427087247733023e-05, - "loss": 0.0052, - "step": 1544 - }, - { - "epoch": 1.1938598184977796, - "grad_norm": 0.01398895401507616, - "learning_rate": 9.425832780894198e-05, - "loss": 0.0048, - "step": 1545 - }, - { - "epoch": 1.1946321683722727, - "grad_norm": 0.01838972046971321, - "learning_rate": 9.424577025802394e-05, - "loss": 0.0049, - "step": 1546 - }, - { - "epoch": 1.1954045182467659, - "grad_norm": 0.028748217970132828, - "learning_rate": 9.423319982823129e-05, - "loss": 0.0045, - "step": 1547 - }, - { - "epoch": 1.196176868121259, - "grad_norm": 0.012720394879579544, - "learning_rate": 9.422061652322298e-05, - "loss": 0.0038, - "step": 1548 - }, - { - "epoch": 1.1969492179957522, - "grad_norm": 0.011654634028673172, - "learning_rate": 9.420802034666172e-05, - "loss": 0.004, - "step": 1549 - }, - { - "epoch": 1.1977215678702453, - "grad_norm": 0.015816690400242805, - "learning_rate": 9.419541130221394e-05, - "loss": 0.0043, - "step": 1550 - }, - { - "epoch": 1.1984939177447385, - "grad_norm": 0.014582616277039051, - "learning_rate": 9.418278939354984e-05, - "loss": 0.0045, - "step": 1551 - }, - { - "epoch": 1.1992662676192316, - "grad_norm": 0.011749477125704288, - "learning_rate": 9.417015462434336e-05, - "loss": 0.0044, - "step": 1552 - }, - { - "epoch": 1.2000386174937248, - "grad_norm": 0.014693439938127995, - "learning_rate": 9.415750699827213e-05, - "loss": 0.0045, - "step": 1553 - }, - { - "epoch": 1.2008109673682177, - "grad_norm": 0.014372429810464382, - "learning_rate": 9.414484651901763e-05, - "loss": 0.0043, - "step": 1554 - }, - { - "epoch": 1.201583317242711, - "grad_norm": 0.01311397459357977, - "learning_rate": 9.413217319026497e-05, - "loss": 0.0042, - "step": 1555 - }, - { - "epoch": 1.202355667117204, - "grad_norm": 0.01853027381002903, - "learning_rate": 9.411948701570307e-05, - "loss": 0.0047, - "step": 1556 - }, - { - "epoch": 1.2031280169916974, - "grad_norm": 0.00959880743175745, - "learning_rate": 9.410678799902458e-05, - "loss": 0.0043, - "step": 1557 - }, - { - "epoch": 1.2039003668661903, - "grad_norm": 0.011557672172784805, - "learning_rate": 9.409407614392585e-05, - "loss": 0.0048, - "step": 1558 - }, - { - "epoch": 1.2046727167406834, - "grad_norm": 0.022469764575362206, - "learning_rate": 9.408135145410701e-05, - "loss": 0.0045, - "step": 1559 - }, - { - "epoch": 1.2054450666151766, - "grad_norm": 0.012665819376707077, - "learning_rate": 9.406861393327193e-05, - "loss": 0.0042, - "step": 1560 - }, - { - "epoch": 1.2062174164896697, - "grad_norm": 0.016224119812250137, - "learning_rate": 9.405586358512817e-05, - "loss": 0.0047, - "step": 1561 - }, - { - "epoch": 1.2069897663641629, - "grad_norm": 0.021541811525821686, - "learning_rate": 9.404310041338704e-05, - "loss": 0.0046, - "step": 1562 - }, - { - "epoch": 1.207762116238656, - "grad_norm": 0.017033765092492104, - "learning_rate": 9.40303244217636e-05, - "loss": 0.0043, - "step": 1563 - }, - { - "epoch": 1.2085344661131492, - "grad_norm": 0.02095952443778515, - "learning_rate": 9.401753561397664e-05, - "loss": 0.0043, - "step": 1564 - }, - { - "epoch": 1.2093068159876423, - "grad_norm": 0.013313321396708488, - "learning_rate": 9.400473399374868e-05, - "loss": 0.0039, - "step": 1565 - }, - { - "epoch": 1.2100791658621355, - "grad_norm": 0.016337791457772255, - "learning_rate": 9.399191956480594e-05, - "loss": 0.0042, - "step": 1566 - }, - { - "epoch": 1.2108515157366286, - "grad_norm": 0.01406900305300951, - "learning_rate": 9.397909233087839e-05, - "loss": 0.0049, - "step": 1567 - }, - { - "epoch": 1.2116238656111218, - "grad_norm": 0.013073080219328403, - "learning_rate": 9.396625229569975e-05, - "loss": 0.0042, - "step": 1568 - }, - { - "epoch": 1.212396215485615, - "grad_norm": 0.013312644325196743, - "learning_rate": 9.395339946300743e-05, - "loss": 0.0042, - "step": 1569 - }, - { - "epoch": 1.213168565360108, - "grad_norm": 0.014997152611613274, - "learning_rate": 9.394053383654258e-05, - "loss": 0.0049, - "step": 1570 - }, - { - "epoch": 1.2139409152346012, - "grad_norm": 0.014144647866487503, - "learning_rate": 9.392765542005008e-05, - "loss": 0.0043, - "step": 1571 - }, - { - "epoch": 1.2147132651090944, - "grad_norm": 0.01141285989433527, - "learning_rate": 9.391476421727853e-05, - "loss": 0.0044, - "step": 1572 - }, - { - "epoch": 1.2154856149835875, - "grad_norm": 0.020384816452860832, - "learning_rate": 9.390186023198022e-05, - "loss": 0.0043, - "step": 1573 - }, - { - "epoch": 1.2162579648580807, - "grad_norm": 0.01110097672790289, - "learning_rate": 9.388894346791121e-05, - "loss": 0.0044, - "step": 1574 - }, - { - "epoch": 1.2170303147325738, - "grad_norm": 0.015966853126883507, - "learning_rate": 9.387601392883128e-05, - "loss": 0.0044, - "step": 1575 - }, - { - "epoch": 1.217802664607067, - "grad_norm": 0.009524564258754253, - "learning_rate": 9.386307161850384e-05, - "loss": 0.004, - "step": 1576 - }, - { - "epoch": 1.2185750144815601, - "grad_norm": 0.01066622231155634, - "learning_rate": 9.385011654069615e-05, - "loss": 0.0044, - "step": 1577 - }, - { - "epoch": 1.2193473643560533, - "grad_norm": 0.010826393030583858, - "learning_rate": 9.38371486991791e-05, - "loss": 0.0045, - "step": 1578 - }, - { - "epoch": 1.2201197142305464, - "grad_norm": 0.01694653183221817, - "learning_rate": 9.38241680977273e-05, - "loss": 0.0048, - "step": 1579 - }, - { - "epoch": 1.2208920641050396, - "grad_norm": 0.022291971370577812, - "learning_rate": 9.38111747401191e-05, - "loss": 0.0045, - "step": 1580 - }, - { - "epoch": 1.2216644139795327, - "grad_norm": 0.01632075384259224, - "learning_rate": 9.379816863013655e-05, - "loss": 0.0045, - "step": 1581 - }, - { - "epoch": 1.2224367638540259, - "grad_norm": 0.01566249504685402, - "learning_rate": 9.378514977156543e-05, - "loss": 0.0047, - "step": 1582 - }, - { - "epoch": 1.223209113728519, - "grad_norm": 0.016212694346904755, - "learning_rate": 9.377211816819518e-05, - "loss": 0.0044, - "step": 1583 - }, - { - "epoch": 1.2239814636030122, - "grad_norm": 0.009922226890921593, - "learning_rate": 9.375907382381903e-05, - "loss": 0.0039, - "step": 1584 - }, - { - "epoch": 1.2247538134775053, - "grad_norm": 0.011142924427986145, - "learning_rate": 9.374601674223383e-05, - "loss": 0.0044, - "step": 1585 - }, - { - "epoch": 1.2255261633519985, - "grad_norm": 0.012572258710861206, - "learning_rate": 9.373294692724022e-05, - "loss": 0.0044, - "step": 1586 - }, - { - "epoch": 1.2262985132264916, - "grad_norm": 0.012604492716491222, - "learning_rate": 9.371986438264246e-05, - "loss": 0.0044, - "step": 1587 - }, - { - "epoch": 1.2270708631009848, - "grad_norm": 0.017465921118855476, - "learning_rate": 9.370676911224862e-05, - "loss": 0.0047, - "step": 1588 - }, - { - "epoch": 1.227843212975478, - "grad_norm": 0.019798975437879562, - "learning_rate": 9.369366111987037e-05, - "loss": 0.0048, - "step": 1589 - }, - { - "epoch": 1.228615562849971, - "grad_norm": 0.013204040005803108, - "learning_rate": 9.368054040932315e-05, - "loss": 0.0044, - "step": 1590 - }, - { - "epoch": 1.2293879127244642, - "grad_norm": 0.013707738369703293, - "learning_rate": 9.366740698442608e-05, - "loss": 0.0043, - "step": 1591 - }, - { - "epoch": 1.2301602625989574, - "grad_norm": 0.014138542115688324, - "learning_rate": 9.3654260849002e-05, - "loss": 0.0045, - "step": 1592 - }, - { - "epoch": 1.2309326124734505, - "grad_norm": 0.011442754417657852, - "learning_rate": 9.364110200687738e-05, - "loss": 0.0044, - "step": 1593 - }, - { - "epoch": 1.2317049623479437, - "grad_norm": 0.013380014337599277, - "learning_rate": 9.36279304618825e-05, - "loss": 0.0045, - "step": 1594 - }, - { - "epoch": 1.2324773122224368, - "grad_norm": 0.01104913279414177, - "learning_rate": 9.361474621785125e-05, - "loss": 0.004, - "step": 1595 - }, - { - "epoch": 1.23324966209693, - "grad_norm": 0.010025731287896633, - "learning_rate": 9.360154927862123e-05, - "loss": 0.0039, - "step": 1596 - }, - { - "epoch": 1.2340220119714231, - "grad_norm": 0.009694824926555157, - "learning_rate": 9.358833964803379e-05, - "loss": 0.004, - "step": 1597 - }, - { - "epoch": 1.2347943618459163, - "grad_norm": 0.012553083710372448, - "learning_rate": 9.357511732993392e-05, - "loss": 0.0047, - "step": 1598 - }, - { - "epoch": 1.2355667117204094, - "grad_norm": 0.012948554940521717, - "learning_rate": 9.356188232817029e-05, - "loss": 0.0048, - "step": 1599 - }, - { - "epoch": 1.2363390615949026, - "grad_norm": 0.013301286846399307, - "learning_rate": 9.354863464659532e-05, - "loss": 0.0049, - "step": 1600 - }, - { - "epoch": 1.2371114114693955, - "grad_norm": 0.0150552187114954, - "learning_rate": 9.353537428906508e-05, - "loss": 0.0043, - "step": 1601 - }, - { - "epoch": 1.2378837613438889, - "grad_norm": 0.01858876273036003, - "learning_rate": 9.352210125943934e-05, - "loss": 0.0048, - "step": 1602 - }, - { - "epoch": 1.2386561112183818, - "grad_norm": 0.016063978895545006, - "learning_rate": 9.350881556158155e-05, - "loss": 0.0051, - "step": 1603 - }, - { - "epoch": 1.2394284610928752, - "grad_norm": 0.010043537244200706, - "learning_rate": 9.349551719935887e-05, - "loss": 0.0039, - "step": 1604 - }, - { - "epoch": 1.2402008109673681, - "grad_norm": 0.009146731346845627, - "learning_rate": 9.348220617664212e-05, - "loss": 0.0038, - "step": 1605 - }, - { - "epoch": 1.2409731608418613, - "grad_norm": 0.011488694697618484, - "learning_rate": 9.346888249730583e-05, - "loss": 0.0048, - "step": 1606 - }, - { - "epoch": 1.2417455107163544, - "grad_norm": 0.014834541827440262, - "learning_rate": 9.345554616522818e-05, - "loss": 0.0049, - "step": 1607 - }, - { - "epoch": 1.2425178605908476, - "grad_norm": 0.010890133678913116, - "learning_rate": 9.344219718429108e-05, - "loss": 0.0043, - "step": 1608 - }, - { - "epoch": 1.2432902104653407, - "grad_norm": 0.019118288531899452, - "learning_rate": 9.342883555838007e-05, - "loss": 0.004, - "step": 1609 - }, - { - "epoch": 1.2440625603398339, - "grad_norm": 0.025927064940333366, - "learning_rate": 9.341546129138442e-05, - "loss": 0.0045, - "step": 1610 - }, - { - "epoch": 1.244834910214327, - "grad_norm": 0.019404688850045204, - "learning_rate": 9.340207438719703e-05, - "loss": 0.0044, - "step": 1611 - }, - { - "epoch": 1.2456072600888202, - "grad_norm": 0.01408322062343359, - "learning_rate": 9.338867484971454e-05, - "loss": 0.0046, - "step": 1612 - }, - { - "epoch": 1.2463796099633133, - "grad_norm": 0.01599975675344467, - "learning_rate": 9.33752626828372e-05, - "loss": 0.0045, - "step": 1613 - }, - { - "epoch": 1.2471519598378065, - "grad_norm": 0.012950134463608265, - "learning_rate": 9.336183789046899e-05, - "loss": 0.0042, - "step": 1614 - }, - { - "epoch": 1.2479243097122996, - "grad_norm": 0.012142792344093323, - "learning_rate": 9.334840047651752e-05, - "loss": 0.0043, - "step": 1615 - }, - { - "epoch": 1.2486966595867928, - "grad_norm": 0.011769046075642109, - "learning_rate": 9.33349504448941e-05, - "loss": 0.0042, - "step": 1616 - }, - { - "epoch": 1.249469009461286, - "grad_norm": 0.019216034561395645, - "learning_rate": 9.332148779951375e-05, - "loss": 0.0047, - "step": 1617 - }, - { - "epoch": 1.250241359335779, - "grad_norm": 0.012334473431110382, - "learning_rate": 9.330801254429507e-05, - "loss": 0.0044, - "step": 1618 - }, - { - "epoch": 1.2510137092102722, - "grad_norm": 0.01532732229679823, - "learning_rate": 9.32945246831604e-05, - "loss": 0.0049, - "step": 1619 - }, - { - "epoch": 1.2517860590847654, - "grad_norm": 0.01669018529355526, - "learning_rate": 9.328102422003572e-05, - "loss": 0.0044, - "step": 1620 - }, - { - "epoch": 1.2525584089592585, - "grad_norm": 0.011733302846550941, - "learning_rate": 9.326751115885071e-05, - "loss": 0.0043, - "step": 1621 - }, - { - "epoch": 1.2533307588337517, - "grad_norm": 0.011070352047681808, - "learning_rate": 9.325398550353868e-05, - "loss": 0.0043, - "step": 1622 - }, - { - "epoch": 1.2541031087082448, - "grad_norm": 0.00988784246146679, - "learning_rate": 9.324044725803662e-05, - "loss": 0.004, - "step": 1623 - }, - { - "epoch": 1.254875458582738, - "grad_norm": 0.009769883938133717, - "learning_rate": 9.322689642628519e-05, - "loss": 0.0039, - "step": 1624 - }, - { - "epoch": 1.2556478084572311, - "grad_norm": 0.009687105193734169, - "learning_rate": 9.321333301222872e-05, - "loss": 0.0042, - "step": 1625 - }, - { - "epoch": 1.2564201583317243, - "grad_norm": 0.014387411065399647, - "learning_rate": 9.319975701981519e-05, - "loss": 0.0046, - "step": 1626 - }, - { - "epoch": 1.2571925082062174, - "grad_norm": 0.009872864000499249, - "learning_rate": 9.318616845299622e-05, - "loss": 0.004, - "step": 1627 - }, - { - "epoch": 1.2579648580807106, - "grad_norm": 0.01052020862698555, - "learning_rate": 9.317256731572713e-05, - "loss": 0.0042, - "step": 1628 - }, - { - "epoch": 1.2587372079552037, - "grad_norm": 0.010644960217177868, - "learning_rate": 9.315895361196689e-05, - "loss": 0.0041, - "step": 1629 - }, - { - "epoch": 1.2595095578296969, - "grad_norm": 0.021633964031934738, - "learning_rate": 9.314532734567811e-05, - "loss": 0.0045, - "step": 1630 - }, - { - "epoch": 1.26028190770419, - "grad_norm": 0.011609155684709549, - "learning_rate": 9.313168852082708e-05, - "loss": 0.0046, - "step": 1631 - }, - { - "epoch": 1.2610542575786832, - "grad_norm": 0.016867250204086304, - "learning_rate": 9.311803714138372e-05, - "loss": 0.0047, - "step": 1632 - }, - { - "epoch": 1.2618266074531763, - "grad_norm": 0.026296664029359818, - "learning_rate": 9.310437321132161e-05, - "loss": 0.0044, - "step": 1633 - }, - { - "epoch": 1.2625989573276695, - "grad_norm": 0.010850663296878338, - "learning_rate": 9.3090696734618e-05, - "loss": 0.005, - "step": 1634 - }, - { - "epoch": 1.2633713072021626, - "grad_norm": 0.025731515139341354, - "learning_rate": 9.307700771525379e-05, - "loss": 0.0051, - "step": 1635 - }, - { - "epoch": 1.2641436570766558, - "grad_norm": 0.01730777695775032, - "learning_rate": 9.30633061572135e-05, - "loss": 0.0041, - "step": 1636 - }, - { - "epoch": 1.264916006951149, - "grad_norm": 0.014599094167351723, - "learning_rate": 9.304959206448534e-05, - "loss": 0.0047, - "step": 1637 - }, - { - "epoch": 1.265688356825642, - "grad_norm": 0.03559194132685661, - "learning_rate": 9.303586544106115e-05, - "loss": 0.0044, - "step": 1638 - }, - { - "epoch": 1.2664607067001352, - "grad_norm": 0.01836187206208706, - "learning_rate": 9.302212629093641e-05, - "loss": 0.0043, - "step": 1639 - }, - { - "epoch": 1.2672330565746284, - "grad_norm": 0.016208026558160782, - "learning_rate": 9.300837461811027e-05, - "loss": 0.0049, - "step": 1640 - }, - { - "epoch": 1.2680054064491215, - "grad_norm": 0.01692858897149563, - "learning_rate": 9.299461042658548e-05, - "loss": 0.0043, - "step": 1641 - }, - { - "epoch": 1.2687777563236147, - "grad_norm": 0.012160011567175388, - "learning_rate": 9.29808337203685e-05, - "loss": 0.0044, - "step": 1642 - }, - { - "epoch": 1.2695501061981078, - "grad_norm": 0.022362643852829933, - "learning_rate": 9.296704450346938e-05, - "loss": 0.0044, - "step": 1643 - }, - { - "epoch": 1.270322456072601, - "grad_norm": 0.010011281818151474, - "learning_rate": 9.295324277990183e-05, - "loss": 0.0041, - "step": 1644 - }, - { - "epoch": 1.2710948059470941, - "grad_norm": 0.020944936200976372, - "learning_rate": 9.293942855368318e-05, - "loss": 0.005, - "step": 1645 - }, - { - "epoch": 1.271867155821587, - "grad_norm": 0.012940247543156147, - "learning_rate": 9.292560182883444e-05, - "loss": 0.0051, - "step": 1646 - }, - { - "epoch": 1.2726395056960804, - "grad_norm": 0.010885242372751236, - "learning_rate": 9.291176260938023e-05, - "loss": 0.005, - "step": 1647 - }, - { - "epoch": 1.2734118555705733, - "grad_norm": 0.02365632727742195, - "learning_rate": 9.28979108993488e-05, - "loss": 0.0048, - "step": 1648 - }, - { - "epoch": 1.2741842054450667, - "grad_norm": 0.018599843606352806, - "learning_rate": 9.288404670277208e-05, - "loss": 0.004, - "step": 1649 - }, - { - "epoch": 1.2749565553195596, - "grad_norm": 0.0399547703564167, - "learning_rate": 9.287017002368557e-05, - "loss": 0.0053, - "step": 1650 - }, - { - "epoch": 1.275728905194053, - "grad_norm": 0.01006177719682455, - "learning_rate": 9.285628086612844e-05, - "loss": 0.0042, - "step": 1651 - }, - { - "epoch": 1.276501255068546, - "grad_norm": 0.009012104943394661, - "learning_rate": 9.284237923414351e-05, - "loss": 0.0042, - "step": 1652 - }, - { - "epoch": 1.2772736049430393, - "grad_norm": 0.010311353951692581, - "learning_rate": 9.282846513177718e-05, - "loss": 0.0049, - "step": 1653 - }, - { - "epoch": 1.2780459548175322, - "grad_norm": 0.020534196868538857, - "learning_rate": 9.281453856307953e-05, - "loss": 0.0048, - "step": 1654 - }, - { - "epoch": 1.2788183046920256, - "grad_norm": 0.020206743851304054, - "learning_rate": 9.280059953210425e-05, - "loss": 0.0051, - "step": 1655 - }, - { - "epoch": 1.2795906545665185, - "grad_norm": 0.021070044487714767, - "learning_rate": 9.278664804290864e-05, - "loss": 0.0049, - "step": 1656 - }, - { - "epoch": 1.2803630044410117, - "grad_norm": 0.014754528179764748, - "learning_rate": 9.277268409955364e-05, - "loss": 0.0044, - "step": 1657 - }, - { - "epoch": 1.2811353543155048, - "grad_norm": 0.012802411802113056, - "learning_rate": 9.275870770610382e-05, - "loss": 0.0044, - "step": 1658 - }, - { - "epoch": 1.281907704189998, - "grad_norm": 0.009704644791781902, - "learning_rate": 9.274471886662739e-05, - "loss": 0.0045, - "step": 1659 - }, - { - "epoch": 1.2826800540644911, - "grad_norm": 0.02821960113942623, - "learning_rate": 9.273071758519615e-05, - "loss": 0.0048, - "step": 1660 - }, - { - "epoch": 1.2834524039389843, - "grad_norm": 0.010638576932251453, - "learning_rate": 9.271670386588552e-05, - "loss": 0.0045, - "step": 1661 - }, - { - "epoch": 1.2842247538134774, - "grad_norm": 0.016380267217755318, - "learning_rate": 9.270267771277458e-05, - "loss": 0.0046, - "step": 1662 - }, - { - "epoch": 1.2849971036879706, - "grad_norm": 0.021698275581002235, - "learning_rate": 9.268863912994599e-05, - "loss": 0.0045, - "step": 1663 - }, - { - "epoch": 1.2857694535624637, - "grad_norm": 0.016744716092944145, - "learning_rate": 9.267458812148604e-05, - "loss": 0.0043, - "step": 1664 - }, - { - "epoch": 1.286541803436957, - "grad_norm": 0.018823161721229553, - "learning_rate": 9.266052469148463e-05, - "loss": 0.0043, - "step": 1665 - }, - { - "epoch": 1.28731415331145, - "grad_norm": 0.015203451737761497, - "learning_rate": 9.264644884403532e-05, - "loss": 0.0046, - "step": 1666 - }, - { - "epoch": 1.2880865031859432, - "grad_norm": 0.013647705316543579, - "learning_rate": 9.263236058323522e-05, - "loss": 0.0046, - "step": 1667 - }, - { - "epoch": 1.2888588530604363, - "grad_norm": 0.014722113497555256, - "learning_rate": 9.261825991318509e-05, - "loss": 0.0041, - "step": 1668 - }, - { - "epoch": 1.2896312029349295, - "grad_norm": 0.02481660805642605, - "learning_rate": 9.260414683798929e-05, - "loss": 0.0053, - "step": 1669 - }, - { - "epoch": 1.2904035528094226, - "grad_norm": 0.017250480130314827, - "learning_rate": 9.25900213617558e-05, - "loss": 0.0048, - "step": 1670 - }, - { - "epoch": 1.2911759026839158, - "grad_norm": 0.00951310619711876, - "learning_rate": 9.25758834885962e-05, - "loss": 0.004, - "step": 1671 - }, - { - "epoch": 1.291948252558409, - "grad_norm": 0.02184438891708851, - "learning_rate": 9.256173322262569e-05, - "loss": 0.0051, - "step": 1672 - }, - { - "epoch": 1.292720602432902, - "grad_norm": 0.01367892511188984, - "learning_rate": 9.254757056796305e-05, - "loss": 0.0048, - "step": 1673 - }, - { - "epoch": 1.2934929523073952, - "grad_norm": 0.017672359943389893, - "learning_rate": 9.253339552873074e-05, - "loss": 0.0046, - "step": 1674 - }, - { - "epoch": 1.2942653021818884, - "grad_norm": 0.011920403689146042, - "learning_rate": 9.251920810905473e-05, - "loss": 0.0102, - "step": 1675 - }, - { - "epoch": 1.2950376520563815, - "grad_norm": 0.016817884519696236, - "learning_rate": 9.250500831306462e-05, - "loss": 0.0043, - "step": 1676 - }, - { - "epoch": 1.2958100019308747, - "grad_norm": 0.015963738784193993, - "learning_rate": 9.249079614489364e-05, - "loss": 0.004, - "step": 1677 - }, - { - "epoch": 1.2965823518053678, - "grad_norm": 0.07387775182723999, - "learning_rate": 9.247657160867864e-05, - "loss": 0.0053, - "step": 1678 - }, - { - "epoch": 1.297354701679861, - "grad_norm": 0.30068767070770264, - "learning_rate": 9.246233470856e-05, - "loss": 0.006, - "step": 1679 - }, - { - "epoch": 1.2981270515543541, - "grad_norm": 0.06857229024171829, - "learning_rate": 9.244808544868177e-05, - "loss": 0.0045, - "step": 1680 - }, - { - "epoch": 1.2988994014288473, - "grad_norm": 0.012670093216001987, - "learning_rate": 9.243382383319154e-05, - "loss": 0.0043, - "step": 1681 - }, - { - "epoch": 1.2996717513033405, - "grad_norm": 0.03482682630419731, - "learning_rate": 9.241954986624052e-05, - "loss": 0.0047, - "step": 1682 - }, - { - "epoch": 1.3004441011778336, - "grad_norm": 0.03190697729587555, - "learning_rate": 9.240526355198353e-05, - "loss": 0.0126, - "step": 1683 - }, - { - "epoch": 1.3012164510523268, - "grad_norm": 0.14594252407550812, - "learning_rate": 9.239096489457898e-05, - "loss": 0.0054, - "step": 1684 - }, - { - "epoch": 1.30198880092682, - "grad_norm": 0.029185067862272263, - "learning_rate": 9.237665389818885e-05, - "loss": 0.006, - "step": 1685 - }, - { - "epoch": 1.302761150801313, - "grad_norm": 0.04388967528939247, - "learning_rate": 9.236233056697872e-05, - "loss": 0.0054, - "step": 1686 - }, - { - "epoch": 1.3035335006758062, - "grad_norm": 0.1524173617362976, - "learning_rate": 9.234799490511778e-05, - "loss": 0.0117, - "step": 1687 - }, - { - "epoch": 1.3043058505502994, - "grad_norm": 0.04995962977409363, - "learning_rate": 9.233364691677877e-05, - "loss": 0.0056, - "step": 1688 - }, - { - "epoch": 1.3050782004247925, - "grad_norm": 0.03565353527665138, - "learning_rate": 9.231928660613807e-05, - "loss": 0.0053, - "step": 1689 - }, - { - "epoch": 1.3058505502992857, - "grad_norm": 0.06701270490884781, - "learning_rate": 9.230491397737562e-05, - "loss": 0.0066, - "step": 1690 - }, - { - "epoch": 1.3066229001737788, - "grad_norm": 0.05576106533408165, - "learning_rate": 9.229052903467493e-05, - "loss": 0.0067, - "step": 1691 - }, - { - "epoch": 1.307395250048272, - "grad_norm": 0.4241020083427429, - "learning_rate": 9.22761317822231e-05, - "loss": 0.0152, - "step": 1692 - }, - { - "epoch": 1.3081675999227649, - "grad_norm": 0.02562202699482441, - "learning_rate": 9.226172222421083e-05, - "loss": 0.0045, - "step": 1693 - }, - { - "epoch": 1.3089399497972583, - "grad_norm": 0.06259538233280182, - "learning_rate": 9.224730036483241e-05, - "loss": 0.0065, - "step": 1694 - }, - { - "epoch": 1.3097122996717512, - "grad_norm": 0.055991485714912415, - "learning_rate": 9.223286620828569e-05, - "loss": 0.006, - "step": 1695 - }, - { - "epoch": 1.3104846495462446, - "grad_norm": 0.12314442545175552, - "learning_rate": 9.22184197587721e-05, - "loss": 0.0213, - "step": 1696 - }, - { - "epoch": 1.3112569994207375, - "grad_norm": 0.034334950149059296, - "learning_rate": 9.220396102049665e-05, - "loss": 0.0055, - "step": 1697 - }, - { - "epoch": 1.3120293492952309, - "grad_norm": 0.05128410831093788, - "learning_rate": 9.218948999766792e-05, - "loss": 0.0055, - "step": 1698 - }, - { - "epoch": 1.3128016991697238, - "grad_norm": 0.12215114384889603, - "learning_rate": 9.21750066944981e-05, - "loss": 0.0058, - "step": 1699 - }, - { - "epoch": 1.3135740490442172, - "grad_norm": 0.03655848279595375, - "learning_rate": 9.21605111152029e-05, - "loss": 0.0058, - "step": 1700 - }, - { - "epoch": 1.31434639891871, - "grad_norm": 0.07133016735315323, - "learning_rate": 9.214600326400165e-05, - "loss": 0.0062, - "step": 1701 - }, - { - "epoch": 1.3151187487932035, - "grad_norm": 0.03918299451470375, - "learning_rate": 9.213148314511723e-05, - "loss": 0.0053, - "step": 1702 - }, - { - "epoch": 1.3158910986676964, - "grad_norm": 0.01166351418942213, - "learning_rate": 9.211695076277611e-05, - "loss": 0.0053, - "step": 1703 - }, - { - "epoch": 1.3166634485421895, - "grad_norm": 0.03695222735404968, - "learning_rate": 9.210240612120831e-05, - "loss": 0.0053, - "step": 1704 - }, - { - "epoch": 1.3174357984166827, - "grad_norm": 0.07988899946212769, - "learning_rate": 9.208784922464742e-05, - "loss": 0.0057, - "step": 1705 - }, - { - "epoch": 1.3182081482911758, - "grad_norm": 0.023410560563206673, - "learning_rate": 9.207328007733059e-05, - "loss": 0.0059, - "step": 1706 - }, - { - "epoch": 1.318980498165669, - "grad_norm": 0.018493063747882843, - "learning_rate": 9.205869868349854e-05, - "loss": 0.0055, - "step": 1707 - }, - { - "epoch": 1.3197528480401621, - "grad_norm": 0.06907300651073456, - "learning_rate": 9.204410504739559e-05, - "loss": 0.0055, - "step": 1708 - }, - { - "epoch": 1.3205251979146553, - "grad_norm": 0.04984796419739723, - "learning_rate": 9.202949917326957e-05, - "loss": 0.0057, - "step": 1709 - }, - { - "epoch": 1.3212975477891484, - "grad_norm": 0.02336471527814865, - "learning_rate": 9.201488106537192e-05, - "loss": 0.005, - "step": 1710 - }, - { - "epoch": 1.3220698976636416, - "grad_norm": 0.026698529720306396, - "learning_rate": 9.200025072795762e-05, - "loss": 0.0059, - "step": 1711 - }, - { - "epoch": 1.3228422475381347, - "grad_norm": 0.020736917853355408, - "learning_rate": 9.198560816528519e-05, - "loss": 0.0058, - "step": 1712 - }, - { - "epoch": 1.3236145974126279, - "grad_norm": 0.030118491500616074, - "learning_rate": 9.197095338161671e-05, - "loss": 0.0058, - "step": 1713 - }, - { - "epoch": 1.324386947287121, - "grad_norm": 0.02043141797184944, - "learning_rate": 9.195628638121786e-05, - "loss": 0.0054, - "step": 1714 - }, - { - "epoch": 1.3251592971616142, - "grad_norm": 0.017679326236248016, - "learning_rate": 9.194160716835786e-05, - "loss": 0.0053, - "step": 1715 - }, - { - "epoch": 1.3259316470361073, - "grad_norm": 0.02373124659061432, - "learning_rate": 9.192691574730944e-05, - "loss": 0.0052, - "step": 1716 - }, - { - "epoch": 1.3267039969106005, - "grad_norm": 0.1194394901394844, - "learning_rate": 9.191221212234895e-05, - "loss": 0.0051, - "step": 1717 - }, - { - "epoch": 1.3274763467850936, - "grad_norm": 0.02503488026559353, - "learning_rate": 9.189749629775622e-05, - "loss": 0.0048, - "step": 1718 - }, - { - "epoch": 1.3282486966595868, - "grad_norm": 0.04794839769601822, - "learning_rate": 9.188276827781472e-05, - "loss": 0.0061, - "step": 1719 - }, - { - "epoch": 1.32902104653408, - "grad_norm": 0.021881457418203354, - "learning_rate": 9.186802806681139e-05, - "loss": 0.0048, - "step": 1720 - }, - { - "epoch": 1.329793396408573, - "grad_norm": 0.023368775844573975, - "learning_rate": 9.185327566903675e-05, - "loss": 0.0052, - "step": 1721 - }, - { - "epoch": 1.3305657462830662, - "grad_norm": 0.0751650407910347, - "learning_rate": 9.183851108878488e-05, - "loss": 0.0057, - "step": 1722 - }, - { - "epoch": 1.3313380961575594, - "grad_norm": 0.020163100212812424, - "learning_rate": 9.182373433035338e-05, - "loss": 0.0051, - "step": 1723 - }, - { - "epoch": 1.3321104460320525, - "grad_norm": 0.0644889622926712, - "learning_rate": 9.180894539804342e-05, - "loss": 0.0059, - "step": 1724 - }, - { - "epoch": 1.3328827959065457, - "grad_norm": 0.05060422047972679, - "learning_rate": 9.179414429615969e-05, - "loss": 0.0063, - "step": 1725 - }, - { - "epoch": 1.3336551457810388, - "grad_norm": 0.016556408256292343, - "learning_rate": 9.177933102901044e-05, - "loss": 0.0046, - "step": 1726 - }, - { - "epoch": 1.334427495655532, - "grad_norm": 0.054401129484176636, - "learning_rate": 9.176450560090745e-05, - "loss": 0.0052, - "step": 1727 - }, - { - "epoch": 1.3351998455300251, - "grad_norm": 0.014559631235897541, - "learning_rate": 9.174966801616603e-05, - "loss": 0.0047, - "step": 1728 - }, - { - "epoch": 1.3359721954045183, - "grad_norm": 0.02533547952771187, - "learning_rate": 9.173481827910508e-05, - "loss": 0.0057, - "step": 1729 - }, - { - "epoch": 1.3367445452790114, - "grad_norm": 0.021959854289889336, - "learning_rate": 9.171995639404696e-05, - "loss": 0.0052, - "step": 1730 - }, - { - "epoch": 1.3375168951535046, - "grad_norm": 0.017806829884648323, - "learning_rate": 9.170508236531763e-05, - "loss": 0.0053, - "step": 1731 - }, - { - "epoch": 1.3382892450279977, - "grad_norm": 0.032244615256786346, - "learning_rate": 9.169019619724654e-05, - "loss": 0.0049, - "step": 1732 - }, - { - "epoch": 1.3390615949024909, - "grad_norm": 0.01540327351540327, - "learning_rate": 9.167529789416671e-05, - "loss": 0.0051, - "step": 1733 - }, - { - "epoch": 1.339833944776984, - "grad_norm": 0.023344023153185844, - "learning_rate": 9.166038746041468e-05, - "loss": 0.0051, - "step": 1734 - }, - { - "epoch": 1.3406062946514772, - "grad_norm": 0.015337632037699223, - "learning_rate": 9.164546490033051e-05, - "loss": 0.0051, - "step": 1735 - }, - { - "epoch": 1.3413786445259703, - "grad_norm": 0.015382025390863419, - "learning_rate": 9.16305302182578e-05, - "loss": 0.0057, - "step": 1736 - }, - { - "epoch": 1.3421509944004635, - "grad_norm": 0.021464722231030464, - "learning_rate": 9.161558341854366e-05, - "loss": 0.0052, - "step": 1737 - }, - { - "epoch": 1.3429233442749566, - "grad_norm": 0.027189360931515694, - "learning_rate": 9.160062450553874e-05, - "loss": 0.0055, - "step": 1738 - }, - { - "epoch": 1.3436956941494498, - "grad_norm": 0.012121383100748062, - "learning_rate": 9.158565348359727e-05, - "loss": 0.0049, - "step": 1739 - }, - { - "epoch": 1.3444680440239427, - "grad_norm": 0.014514083042740822, - "learning_rate": 9.157067035707689e-05, - "loss": 0.0051, - "step": 1740 - }, - { - "epoch": 1.345240393898436, - "grad_norm": 0.01988913305103779, - "learning_rate": 9.155567513033884e-05, - "loss": 0.0048, - "step": 1741 - }, - { - "epoch": 1.346012743772929, - "grad_norm": 0.013310214504599571, - "learning_rate": 9.154066780774791e-05, - "loss": 0.0051, - "step": 1742 - }, - { - "epoch": 1.3467850936474224, - "grad_norm": 0.014470972120761871, - "learning_rate": 9.15256483936723e-05, - "loss": 0.0044, - "step": 1743 - }, - { - "epoch": 1.3475574435219153, - "grad_norm": 0.013322905637323856, - "learning_rate": 9.151061689248386e-05, - "loss": 0.004, - "step": 1744 - }, - { - "epoch": 1.3483297933964087, - "grad_norm": 0.013611961156129837, - "learning_rate": 9.149557330855787e-05, - "loss": 0.0048, - "step": 1745 - }, - { - "epoch": 1.3491021432709016, - "grad_norm": 0.026355788111686707, - "learning_rate": 9.148051764627315e-05, - "loss": 0.0047, - "step": 1746 - }, - { - "epoch": 1.349874493145395, - "grad_norm": 0.023001806810498238, - "learning_rate": 9.146544991001204e-05, - "loss": 0.0053, - "step": 1747 - }, - { - "epoch": 1.350646843019888, - "grad_norm": 0.016485080122947693, - "learning_rate": 9.145037010416043e-05, - "loss": 0.0045, - "step": 1748 - }, - { - "epoch": 1.3514191928943813, - "grad_norm": 0.024821486324071884, - "learning_rate": 9.143527823310762e-05, - "loss": 0.005, - "step": 1749 - }, - { - "epoch": 1.3521915427688742, - "grad_norm": 0.015309068374335766, - "learning_rate": 9.142017430124655e-05, - "loss": 0.0046, - "step": 1750 - }, - { - "epoch": 1.3529638926433676, - "grad_norm": 0.0249322522431612, - "learning_rate": 9.140505831297357e-05, - "loss": 0.005, - "step": 1751 - }, - { - "epoch": 1.3537362425178605, - "grad_norm": 0.016517311334609985, - "learning_rate": 9.138993027268861e-05, - "loss": 0.0045, - "step": 1752 - }, - { - "epoch": 1.3545085923923537, - "grad_norm": 0.01045146957039833, - "learning_rate": 9.137479018479506e-05, - "loss": 0.0047, - "step": 1753 - }, - { - "epoch": 1.3552809422668468, - "grad_norm": 0.01968018338084221, - "learning_rate": 9.135963805369983e-05, - "loss": 0.005, - "step": 1754 - }, - { - "epoch": 1.35605329214134, - "grad_norm": 0.023560628294944763, - "learning_rate": 9.134447388381335e-05, - "loss": 0.0044, - "step": 1755 - }, - { - "epoch": 1.3568256420158331, - "grad_norm": 0.017931949347257614, - "learning_rate": 9.132929767954951e-05, - "loss": 0.0048, - "step": 1756 - }, - { - "epoch": 1.3575979918903263, - "grad_norm": 0.011551735922694206, - "learning_rate": 9.13141094453258e-05, - "loss": 0.0046, - "step": 1757 - }, - { - "epoch": 1.3583703417648194, - "grad_norm": 0.015296884812414646, - "learning_rate": 9.129890918556309e-05, - "loss": 0.0049, - "step": 1758 - }, - { - "epoch": 1.3591426916393126, - "grad_norm": 0.011915626004338264, - "learning_rate": 9.128369690468586e-05, - "loss": 0.0044, - "step": 1759 - }, - { - "epoch": 1.3599150415138057, - "grad_norm": 0.014855817891657352, - "learning_rate": 9.126847260712198e-05, - "loss": 0.0047, - "step": 1760 - }, - { - "epoch": 1.3606873913882989, - "grad_norm": 0.012156839482486248, - "learning_rate": 9.125323629730291e-05, - "loss": 0.0049, - "step": 1761 - }, - { - "epoch": 1.361459741262792, - "grad_norm": 0.01043495163321495, - "learning_rate": 9.123798797966357e-05, - "loss": 0.0045, - "step": 1762 - }, - { - "epoch": 1.3622320911372852, - "grad_norm": 0.015351502224802971, - "learning_rate": 9.122272765864237e-05, - "loss": 0.0045, - "step": 1763 - }, - { - "epoch": 1.3630044410117783, - "grad_norm": 0.017676282674074173, - "learning_rate": 9.120745533868121e-05, - "loss": 0.0048, - "step": 1764 - }, - { - "epoch": 1.3637767908862715, - "grad_norm": 0.0142407501116395, - "learning_rate": 9.119217102422552e-05, - "loss": 0.0047, - "step": 1765 - }, - { - "epoch": 1.3645491407607646, - "grad_norm": 0.013388961553573608, - "learning_rate": 9.117687471972418e-05, - "loss": 0.0049, - "step": 1766 - }, - { - "epoch": 1.3653214906352578, - "grad_norm": 0.015762466937303543, - "learning_rate": 9.116156642962956e-05, - "loss": 0.005, - "step": 1767 - }, - { - "epoch": 1.366093840509751, - "grad_norm": 0.020082663744688034, - "learning_rate": 9.114624615839756e-05, - "loss": 0.0049, - "step": 1768 - }, - { - "epoch": 1.366866190384244, - "grad_norm": 0.011602682061493397, - "learning_rate": 9.113091391048753e-05, - "loss": 0.0044, - "step": 1769 - }, - { - "epoch": 1.3676385402587372, - "grad_norm": 0.011811558157205582, - "learning_rate": 9.111556969036232e-05, - "loss": 0.0049, - "step": 1770 - }, - { - "epoch": 1.3684108901332304, - "grad_norm": 0.010968919843435287, - "learning_rate": 9.110021350248825e-05, - "loss": 0.0048, - "step": 1771 - }, - { - "epoch": 1.3691832400077235, - "grad_norm": 0.01981322281062603, - "learning_rate": 9.108484535133514e-05, - "loss": 0.0054, - "step": 1772 - }, - { - "epoch": 1.3699555898822167, - "grad_norm": 0.012514528818428516, - "learning_rate": 9.10694652413763e-05, - "loss": 0.0048, - "step": 1773 - }, - { - "epoch": 1.3707279397567098, - "grad_norm": 0.01699264906346798, - "learning_rate": 9.105407317708849e-05, - "loss": 0.005, - "step": 1774 - }, - { - "epoch": 1.371500289631203, - "grad_norm": 0.009253366850316525, - "learning_rate": 9.103866916295198e-05, - "loss": 0.0043, - "step": 1775 - }, - { - "epoch": 1.3722726395056961, - "grad_norm": 0.010722543112933636, - "learning_rate": 9.102325320345052e-05, - "loss": 0.0044, - "step": 1776 - }, - { - "epoch": 1.3730449893801893, - "grad_norm": 0.0193585567176342, - "learning_rate": 9.100782530307128e-05, - "loss": 0.0047, - "step": 1777 - }, - { - "epoch": 1.3738173392546824, - "grad_norm": 0.013146034441888332, - "learning_rate": 9.099238546630498e-05, - "loss": 0.0046, - "step": 1778 - }, - { - "epoch": 1.3745896891291756, - "grad_norm": 0.013135528191924095, - "learning_rate": 9.097693369764579e-05, - "loss": 0.0048, - "step": 1779 - }, - { - "epoch": 1.3753620390036687, - "grad_norm": 0.02093779295682907, - "learning_rate": 9.096147000159132e-05, - "loss": 0.005, - "step": 1780 - }, - { - "epoch": 1.3761343888781619, - "grad_norm": 0.010460708290338516, - "learning_rate": 9.09459943826427e-05, - "loss": 0.0044, - "step": 1781 - }, - { - "epoch": 1.376906738752655, - "grad_norm": 0.021889355033636093, - "learning_rate": 9.093050684530451e-05, - "loss": 0.0048, - "step": 1782 - }, - { - "epoch": 1.3776790886271482, - "grad_norm": 0.017091799527406693, - "learning_rate": 9.091500739408478e-05, - "loss": 0.0043, - "step": 1783 - }, - { - "epoch": 1.3784514385016413, - "grad_norm": 0.021311407908797264, - "learning_rate": 9.089949603349505e-05, - "loss": 0.0049, - "step": 1784 - }, - { - "epoch": 1.3792237883761345, - "grad_norm": 0.012659488245844841, - "learning_rate": 9.088397276805028e-05, - "loss": 0.0044, - "step": 1785 - }, - { - "epoch": 1.3799961382506276, - "grad_norm": 0.01769060641527176, - "learning_rate": 9.086843760226891e-05, - "loss": 0.0048, - "step": 1786 - }, - { - "epoch": 1.3807684881251205, - "grad_norm": 0.010824068449437618, - "learning_rate": 9.085289054067289e-05, - "loss": 0.0048, - "step": 1787 - }, - { - "epoch": 1.381540837999614, - "grad_norm": 0.021509883925318718, - "learning_rate": 9.083733158778755e-05, - "loss": 0.005, - "step": 1788 - }, - { - "epoch": 1.3823131878741068, - "grad_norm": 0.026273906230926514, - "learning_rate": 9.082176074814177e-05, - "loss": 0.0048, - "step": 1789 - }, - { - "epoch": 1.3830855377486002, - "grad_norm": 0.025889763608574867, - "learning_rate": 9.080617802626781e-05, - "loss": 0.0042, - "step": 1790 - }, - { - "epoch": 1.3838578876230931, - "grad_norm": 0.011952931992709637, - "learning_rate": 9.079058342670143e-05, - "loss": 0.0043, - "step": 1791 - }, - { - "epoch": 1.3846302374975865, - "grad_norm": 0.043131519109010696, - "learning_rate": 9.077497695398185e-05, - "loss": 0.0048, - "step": 1792 - }, - { - "epoch": 1.3854025873720794, - "grad_norm": 0.01599467732012272, - "learning_rate": 9.075935861265174e-05, - "loss": 0.0045, - "step": 1793 - }, - { - "epoch": 1.3861749372465728, - "grad_norm": 0.01082384493201971, - "learning_rate": 9.074372840725721e-05, - "loss": 0.0043, - "step": 1794 - }, - { - "epoch": 1.3869472871210657, - "grad_norm": 0.028997058048844337, - "learning_rate": 9.072808634234784e-05, - "loss": 0.005, - "step": 1795 - }, - { - "epoch": 1.3877196369955591, - "grad_norm": 0.02237372286617756, - "learning_rate": 9.071243242247667e-05, - "loss": 0.0047, - "step": 1796 - }, - { - "epoch": 1.388491986870052, - "grad_norm": 0.016084033995866776, - "learning_rate": 9.069676665220015e-05, - "loss": 0.0049, - "step": 1797 - }, - { - "epoch": 1.3892643367445454, - "grad_norm": 0.037538450211286545, - "learning_rate": 9.068108903607821e-05, - "loss": 0.0047, - "step": 1798 - }, - { - "epoch": 1.3900366866190383, - "grad_norm": 0.014534000307321548, - "learning_rate": 9.066539957867425e-05, - "loss": 0.0046, - "step": 1799 - }, - { - "epoch": 1.3908090364935315, - "grad_norm": 0.01424864400178194, - "learning_rate": 9.064969828455509e-05, - "loss": 0.0046, - "step": 1800 - }, - { - "epoch": 1.3915813863680246, - "grad_norm": 0.021501099690794945, - "learning_rate": 9.063398515829097e-05, - "loss": 0.0046, - "step": 1801 - }, - { - "epoch": 1.3923537362425178, - "grad_norm": 0.01063248235732317, - "learning_rate": 9.061826020445564e-05, - "loss": 0.0043, - "step": 1802 - }, - { - "epoch": 1.393126086117011, - "grad_norm": 0.10095581412315369, - "learning_rate": 9.060252342762622e-05, - "loss": 0.0045, - "step": 1803 - }, - { - "epoch": 1.393898435991504, - "grad_norm": 0.017239512875676155, - "learning_rate": 9.058677483238332e-05, - "loss": 0.0046, - "step": 1804 - }, - { - "epoch": 1.3946707858659972, - "grad_norm": 0.017307721078395844, - "learning_rate": 9.057101442331097e-05, - "loss": 0.0052, - "step": 1805 - }, - { - "epoch": 1.3954431357404904, - "grad_norm": 0.014669974334537983, - "learning_rate": 9.055524220499665e-05, - "loss": 0.0051, - "step": 1806 - }, - { - "epoch": 1.3962154856149835, - "grad_norm": 0.10470427572727203, - "learning_rate": 9.053945818203126e-05, - "loss": 0.0059, - "step": 1807 - }, - { - "epoch": 1.3969878354894767, - "grad_norm": 0.01636355370283127, - "learning_rate": 9.052366235900918e-05, - "loss": 0.0045, - "step": 1808 - }, - { - "epoch": 1.3977601853639698, - "grad_norm": 0.016303371638059616, - "learning_rate": 9.050785474052814e-05, - "loss": 0.0052, - "step": 1809 - }, - { - "epoch": 1.398532535238463, - "grad_norm": 0.022861136123538017, - "learning_rate": 9.04920353311894e-05, - "loss": 0.005, - "step": 1810 - }, - { - "epoch": 1.3993048851129561, - "grad_norm": 0.02645104192197323, - "learning_rate": 9.047620413559759e-05, - "loss": 0.0051, - "step": 1811 - }, - { - "epoch": 1.4000772349874493, - "grad_norm": 0.038855042308568954, - "learning_rate": 9.046036115836081e-05, - "loss": 0.0058, - "step": 1812 - }, - { - "epoch": 1.4008495848619424, - "grad_norm": 0.018047073855996132, - "learning_rate": 9.044450640409053e-05, - "loss": 0.0054, - "step": 1813 - }, - { - "epoch": 1.4016219347364356, - "grad_norm": 0.034481655806303024, - "learning_rate": 9.042863987740171e-05, - "loss": 0.005, - "step": 1814 - }, - { - "epoch": 1.4023942846109287, - "grad_norm": 0.01587347313761711, - "learning_rate": 9.04127615829127e-05, - "loss": 0.0052, - "step": 1815 - }, - { - "epoch": 1.403166634485422, - "grad_norm": 0.024790631607174873, - "learning_rate": 9.03968715252453e-05, - "loss": 0.0057, - "step": 1816 - }, - { - "epoch": 1.403938984359915, - "grad_norm": 0.01651730015873909, - "learning_rate": 9.038096970902472e-05, - "loss": 0.0054, - "step": 1817 - }, - { - "epoch": 1.4047113342344082, - "grad_norm": 0.018351811915636063, - "learning_rate": 9.03650561388796e-05, - "loss": 0.0047, - "step": 1818 - }, - { - "epoch": 1.4054836841089013, - "grad_norm": 0.03518560901284218, - "learning_rate": 9.034913081944199e-05, - "loss": 0.0049, - "step": 1819 - }, - { - "epoch": 1.4062560339833945, - "grad_norm": 0.01802041381597519, - "learning_rate": 9.033319375534734e-05, - "loss": 0.005, - "step": 1820 - }, - { - "epoch": 1.4070283838578876, - "grad_norm": 0.015685800462961197, - "learning_rate": 9.031724495123458e-05, - "loss": 0.0053, - "step": 1821 - }, - { - "epoch": 1.4078007337323808, - "grad_norm": 0.019203344359993935, - "learning_rate": 9.030128441174601e-05, - "loss": 0.0047, - "step": 1822 - }, - { - "epoch": 1.408573083606874, - "grad_norm": 0.018631575629115105, - "learning_rate": 9.028531214152735e-05, - "loss": 0.0056, - "step": 1823 - }, - { - "epoch": 1.409345433481367, - "grad_norm": 0.020659970119595528, - "learning_rate": 9.026932814522776e-05, - "loss": 0.0054, - "step": 1824 - }, - { - "epoch": 1.4101177833558602, - "grad_norm": 0.025972822681069374, - "learning_rate": 9.025333242749978e-05, - "loss": 0.0053, - "step": 1825 - }, - { - "epoch": 1.4108901332303534, - "grad_norm": 0.015437978319823742, - "learning_rate": 9.023732499299937e-05, - "loss": 0.0051, - "step": 1826 - }, - { - "epoch": 1.4116624831048465, - "grad_norm": 0.016610266640782356, - "learning_rate": 9.022130584638593e-05, - "loss": 0.0045, - "step": 1827 - }, - { - "epoch": 1.4124348329793397, - "grad_norm": 0.011608053930103779, - "learning_rate": 9.020527499232223e-05, - "loss": 0.0049, - "step": 1828 - }, - { - "epoch": 1.4132071828538328, - "grad_norm": 0.02084183320403099, - "learning_rate": 9.018923243547449e-05, - "loss": 0.0046, - "step": 1829 - }, - { - "epoch": 1.413979532728326, - "grad_norm": 0.013906857930123806, - "learning_rate": 9.017317818051225e-05, - "loss": 0.0055, - "step": 1830 - }, - { - "epoch": 1.4147518826028191, - "grad_norm": 0.0152976606041193, - "learning_rate": 9.015711223210857e-05, - "loss": 0.0045, - "step": 1831 - }, - { - "epoch": 1.4155242324773123, - "grad_norm": 0.016641858965158463, - "learning_rate": 9.014103459493986e-05, - "loss": 0.0046, - "step": 1832 - }, - { - "epoch": 1.4162965823518054, - "grad_norm": 0.012137793004512787, - "learning_rate": 9.012494527368588e-05, - "loss": 0.0048, - "step": 1833 - }, - { - "epoch": 1.4170689322262986, - "grad_norm": 0.031864847987890244, - "learning_rate": 9.010884427302993e-05, - "loss": 0.0047, - "step": 1834 - }, - { - "epoch": 1.4178412821007917, - "grad_norm": 0.01978926546871662, - "learning_rate": 9.009273159765853e-05, - "loss": 0.005, - "step": 1835 - }, - { - "epoch": 1.4186136319752847, - "grad_norm": 0.011707760393619537, - "learning_rate": 9.007660725226175e-05, - "loss": 0.0052, - "step": 1836 - }, - { - "epoch": 1.419385981849778, - "grad_norm": 0.03232321888208389, - "learning_rate": 9.006047124153297e-05, - "loss": 0.0052, - "step": 1837 - }, - { - "epoch": 1.420158331724271, - "grad_norm": 0.01921793818473816, - "learning_rate": 9.004432357016901e-05, - "loss": 0.0051, - "step": 1838 - }, - { - "epoch": 1.4209306815987643, - "grad_norm": 0.031026741489768028, - "learning_rate": 9.002816424287004e-05, - "loss": 0.0045, - "step": 1839 - }, - { - "epoch": 1.4217030314732573, - "grad_norm": 0.03284458816051483, - "learning_rate": 9.001199326433969e-05, - "loss": 0.0045, - "step": 1840 - }, - { - "epoch": 1.4224753813477506, - "grad_norm": 0.02016862854361534, - "learning_rate": 8.99958106392849e-05, - "loss": 0.0051, - "step": 1841 - }, - { - "epoch": 1.4232477312222436, - "grad_norm": 0.030963387340307236, - "learning_rate": 8.997961637241608e-05, - "loss": 0.0043, - "step": 1842 - }, - { - "epoch": 1.424020081096737, - "grad_norm": 0.011597777716815472, - "learning_rate": 8.996341046844696e-05, - "loss": 0.0054, - "step": 1843 - }, - { - "epoch": 1.4247924309712299, - "grad_norm": 0.014895058237016201, - "learning_rate": 8.994719293209471e-05, - "loss": 0.0047, - "step": 1844 - }, - { - "epoch": 1.4255647808457232, - "grad_norm": 0.013554844073951244, - "learning_rate": 8.993096376807983e-05, - "loss": 0.0055, - "step": 1845 - }, - { - "epoch": 1.4263371307202162, - "grad_norm": 0.016599401831626892, - "learning_rate": 8.991472298112627e-05, - "loss": 0.0047, - "step": 1846 - }, - { - "epoch": 1.4271094805947093, - "grad_norm": 0.011927340179681778, - "learning_rate": 8.989847057596131e-05, - "loss": 0.0046, - "step": 1847 - }, - { - "epoch": 1.4278818304692025, - "grad_norm": 0.015175368636846542, - "learning_rate": 8.988220655731565e-05, - "loss": 0.0049, - "step": 1848 - }, - { - "epoch": 1.4286541803436956, - "grad_norm": 0.02604135498404503, - "learning_rate": 8.986593092992334e-05, - "loss": 0.0048, - "step": 1849 - }, - { - "epoch": 1.4294265302181888, - "grad_norm": 0.012300568632781506, - "learning_rate": 8.984964369852183e-05, - "loss": 0.0049, - "step": 1850 - }, - { - "epoch": 1.430198880092682, - "grad_norm": 0.020940154790878296, - "learning_rate": 8.983334486785192e-05, - "loss": 0.0046, - "step": 1851 - }, - { - "epoch": 1.430971229967175, - "grad_norm": 0.011929292231798172, - "learning_rate": 8.981703444265783e-05, - "loss": 0.005, - "step": 1852 - }, - { - "epoch": 1.4317435798416682, - "grad_norm": 0.01556863822042942, - "learning_rate": 8.980071242768713e-05, - "loss": 0.0045, - "step": 1853 - }, - { - "epoch": 1.4325159297161614, - "grad_norm": 0.017990630120038986, - "learning_rate": 8.978437882769074e-05, - "loss": 0.0048, - "step": 1854 - }, - { - "epoch": 1.4332882795906545, - "grad_norm": 0.019159870222210884, - "learning_rate": 8.9768033647423e-05, - "loss": 0.0044, - "step": 1855 - }, - { - "epoch": 1.4340606294651477, - "grad_norm": 0.022200316190719604, - "learning_rate": 8.975167689164159e-05, - "loss": 0.0058, - "step": 1856 - }, - { - "epoch": 1.4348329793396408, - "grad_norm": 0.02062351442873478, - "learning_rate": 8.973530856510757e-05, - "loss": 0.0045, - "step": 1857 - }, - { - "epoch": 1.435605329214134, - "grad_norm": 0.04572264105081558, - "learning_rate": 8.971892867258535e-05, - "loss": 0.0054, - "step": 1858 - }, - { - "epoch": 1.4363776790886271, - "grad_norm": 0.04120631515979767, - "learning_rate": 8.970253721884272e-05, - "loss": 0.005, - "step": 1859 - }, - { - "epoch": 1.4371500289631203, - "grad_norm": 0.042257945984601974, - "learning_rate": 8.968613420865087e-05, - "loss": 0.0058, - "step": 1860 - }, - { - "epoch": 1.4379223788376134, - "grad_norm": 0.013364771381020546, - "learning_rate": 8.966971964678429e-05, - "loss": 0.0047, - "step": 1861 - }, - { - "epoch": 1.4386947287121066, - "grad_norm": 0.02408026158809662, - "learning_rate": 8.965329353802087e-05, - "loss": 0.0045, - "step": 1862 - }, - { - "epoch": 1.4394670785865997, - "grad_norm": 0.04472861811518669, - "learning_rate": 8.963685588714185e-05, - "loss": 0.0051, - "step": 1863 - }, - { - "epoch": 1.4402394284610929, - "grad_norm": 0.011410464532673359, - "learning_rate": 8.962040669893184e-05, - "loss": 0.0051, - "step": 1864 - }, - { - "epoch": 1.441011778335586, - "grad_norm": 0.024275388568639755, - "learning_rate": 8.960394597817878e-05, - "loss": 0.0046, - "step": 1865 - }, - { - "epoch": 1.4417841282100792, - "grad_norm": 0.029177196323871613, - "learning_rate": 8.958747372967403e-05, - "loss": 0.0046, - "step": 1866 - }, - { - "epoch": 1.4425564780845723, - "grad_norm": 0.011070268228650093, - "learning_rate": 8.95709899582122e-05, - "loss": 0.0046, - "step": 1867 - }, - { - "epoch": 1.4433288279590655, - "grad_norm": 0.01813647709786892, - "learning_rate": 8.955449466859138e-05, - "loss": 0.0042, - "step": 1868 - }, - { - "epoch": 1.4441011778335586, - "grad_norm": 0.03172139450907707, - "learning_rate": 8.953798786561294e-05, - "loss": 0.0053, - "step": 1869 - }, - { - "epoch": 1.4448735277080518, - "grad_norm": 0.02178853377699852, - "learning_rate": 8.952146955408157e-05, - "loss": 0.0048, - "step": 1870 - }, - { - "epoch": 1.445645877582545, - "grad_norm": 0.013043739832937717, - "learning_rate": 8.95049397388054e-05, - "loss": 0.0051, - "step": 1871 - }, - { - "epoch": 1.446418227457038, - "grad_norm": 0.016747932881116867, - "learning_rate": 8.948839842459583e-05, - "loss": 0.0053, - "step": 1872 - }, - { - "epoch": 1.4471905773315312, - "grad_norm": 0.012382641434669495, - "learning_rate": 8.947184561626765e-05, - "loss": 0.0048, - "step": 1873 - }, - { - "epoch": 1.4479629272060244, - "grad_norm": 0.024634165689349174, - "learning_rate": 8.945528131863896e-05, - "loss": 0.0043, - "step": 1874 - }, - { - "epoch": 1.4487352770805175, - "grad_norm": 0.015179039910435677, - "learning_rate": 8.943870553653126e-05, - "loss": 0.0048, - "step": 1875 - }, - { - "epoch": 1.4495076269550107, - "grad_norm": 0.0111264418810606, - "learning_rate": 8.942211827476934e-05, - "loss": 0.005, - "step": 1876 - }, - { - "epoch": 1.4502799768295038, - "grad_norm": 0.012447085231542587, - "learning_rate": 8.940551953818136e-05, - "loss": 0.0048, - "step": 1877 - }, - { - "epoch": 1.451052326703997, - "grad_norm": 0.009181671775877476, - "learning_rate": 8.938890933159881e-05, - "loss": 0.0044, - "step": 1878 - }, - { - "epoch": 1.4518246765784901, - "grad_norm": 0.009875455871224403, - "learning_rate": 8.93722876598565e-05, - "loss": 0.0046, - "step": 1879 - }, - { - "epoch": 1.4525970264529833, - "grad_norm": 0.014886122196912766, - "learning_rate": 8.935565452779263e-05, - "loss": 0.0051, - "step": 1880 - }, - { - "epoch": 1.4533693763274764, - "grad_norm": 0.013239393942058086, - "learning_rate": 8.933900994024868e-05, - "loss": 0.0051, - "step": 1881 - }, - { - "epoch": 1.4541417262019696, - "grad_norm": 0.012474555522203445, - "learning_rate": 8.932235390206948e-05, - "loss": 0.0049, - "step": 1882 - }, - { - "epoch": 1.4549140760764625, - "grad_norm": 0.012886752374470234, - "learning_rate": 8.930568641810324e-05, - "loss": 0.0049, - "step": 1883 - }, - { - "epoch": 1.4556864259509559, - "grad_norm": 0.012491632252931595, - "learning_rate": 8.928900749320143e-05, - "loss": 0.0046, - "step": 1884 - }, - { - "epoch": 1.4564587758254488, - "grad_norm": 0.02662251889705658, - "learning_rate": 8.927231713221886e-05, - "loss": 0.0046, - "step": 1885 - }, - { - "epoch": 1.4572311256999422, - "grad_norm": 0.01857464201748371, - "learning_rate": 8.925561534001374e-05, - "loss": 0.0046, - "step": 1886 - }, - { - "epoch": 1.458003475574435, - "grad_norm": 0.02623864635825157, - "learning_rate": 8.923890212144755e-05, - "loss": 0.0049, - "step": 1887 - }, - { - "epoch": 1.4587758254489285, - "grad_norm": 0.023755142465233803, - "learning_rate": 8.922217748138508e-05, - "loss": 0.0046, - "step": 1888 - }, - { - "epoch": 1.4595481753234214, - "grad_norm": 0.019182506948709488, - "learning_rate": 8.920544142469447e-05, - "loss": 0.0042, - "step": 1889 - }, - { - "epoch": 1.4603205251979148, - "grad_norm": 0.014418594539165497, - "learning_rate": 8.918869395624719e-05, - "loss": 0.0045, - "step": 1890 - }, - { - "epoch": 1.4610928750724077, - "grad_norm": 0.029968436807394028, - "learning_rate": 8.917193508091803e-05, - "loss": 0.0048, - "step": 1891 - }, - { - "epoch": 1.461865224946901, - "grad_norm": 0.015648595988750458, - "learning_rate": 8.91551648035851e-05, - "loss": 0.0047, - "step": 1892 - }, - { - "epoch": 1.462637574821394, - "grad_norm": 0.023029394447803497, - "learning_rate": 8.91383831291298e-05, - "loss": 0.0046, - "step": 1893 - }, - { - "epoch": 1.4634099246958872, - "grad_norm": 0.016863130033016205, - "learning_rate": 8.912159006243688e-05, - "loss": 0.0047, - "step": 1894 - }, - { - "epoch": 1.4641822745703803, - "grad_norm": 0.01065768487751484, - "learning_rate": 8.91047856083944e-05, - "loss": 0.0043, - "step": 1895 - }, - { - "epoch": 1.4649546244448735, - "grad_norm": 0.012436062097549438, - "learning_rate": 8.908796977189371e-05, - "loss": 0.0054, - "step": 1896 - }, - { - "epoch": 1.4657269743193666, - "grad_norm": 0.008846023119986057, - "learning_rate": 8.907114255782953e-05, - "loss": 0.0043, - "step": 1897 - }, - { - "epoch": 1.4664993241938598, - "grad_norm": 0.017335759475827217, - "learning_rate": 8.905430397109981e-05, - "loss": 0.0039, - "step": 1898 - }, - { - "epoch": 1.467271674068353, - "grad_norm": 0.00891080778092146, - "learning_rate": 8.903745401660591e-05, - "loss": 0.0042, - "step": 1899 - }, - { - "epoch": 1.468044023942846, - "grad_norm": 0.01016610860824585, - "learning_rate": 8.90205926992524e-05, - "loss": 0.0043, - "step": 1900 - }, - { - "epoch": 1.4688163738173392, - "grad_norm": 0.021171605214476585, - "learning_rate": 8.900372002394723e-05, - "loss": 0.0046, - "step": 1901 - }, - { - "epoch": 1.4695887236918324, - "grad_norm": 0.013704544864594936, - "learning_rate": 8.898683599560162e-05, - "loss": 0.0046, - "step": 1902 - }, - { - "epoch": 1.4703610735663255, - "grad_norm": 0.010161432437598705, - "learning_rate": 8.896994061913009e-05, - "loss": 0.0043, - "step": 1903 - }, - { - "epoch": 1.4711334234408187, - "grad_norm": 0.018764277920126915, - "learning_rate": 8.89530338994505e-05, - "loss": 0.0047, - "step": 1904 - }, - { - "epoch": 1.4719057733153118, - "grad_norm": 0.014022842980921268, - "learning_rate": 8.893611584148395e-05, - "loss": 0.0047, - "step": 1905 - }, - { - "epoch": 1.472678123189805, - "grad_norm": 0.01479887031018734, - "learning_rate": 8.891918645015491e-05, - "loss": 0.0042, - "step": 1906 - }, - { - "epoch": 1.473450473064298, - "grad_norm": 0.014984914101660252, - "learning_rate": 8.89022457303911e-05, - "loss": 0.0039, - "step": 1907 - }, - { - "epoch": 1.4742228229387913, - "grad_norm": 0.014987524598836899, - "learning_rate": 8.888529368712357e-05, - "loss": 0.0046, - "step": 1908 - }, - { - "epoch": 1.4749951728132844, - "grad_norm": 0.018837671726942062, - "learning_rate": 8.886833032528665e-05, - "loss": 0.0042, - "step": 1909 - }, - { - "epoch": 1.4757675226877776, - "grad_norm": 0.016406618058681488, - "learning_rate": 8.885135564981794e-05, - "loss": 0.0049, - "step": 1910 - }, - { - "epoch": 1.4765398725622707, - "grad_norm": 0.011555914767086506, - "learning_rate": 8.883436966565836e-05, - "loss": 0.0048, - "step": 1911 - }, - { - "epoch": 1.4773122224367639, - "grad_norm": 0.023820480331778526, - "learning_rate": 8.881737237775216e-05, - "loss": 0.0046, - "step": 1912 - }, - { - "epoch": 1.478084572311257, - "grad_norm": 0.0240377988666296, - "learning_rate": 8.880036379104681e-05, - "loss": 0.0048, - "step": 1913 - }, - { - "epoch": 1.4788569221857502, - "grad_norm": 0.017000969499349594, - "learning_rate": 8.87833439104931e-05, - "loss": 0.0051, - "step": 1914 - }, - { - "epoch": 1.4796292720602433, - "grad_norm": 0.03072609193623066, - "learning_rate": 8.876631274104511e-05, - "loss": 0.0048, - "step": 1915 - }, - { - "epoch": 1.4804016219347365, - "grad_norm": 0.04257432371377945, - "learning_rate": 8.87492702876602e-05, - "loss": 0.0047, - "step": 1916 - }, - { - "epoch": 1.4811739718092296, - "grad_norm": 0.016059063374996185, - "learning_rate": 8.873221655529902e-05, - "loss": 0.0049, - "step": 1917 - }, - { - "epoch": 1.4819463216837228, - "grad_norm": 0.028160041198134422, - "learning_rate": 8.871515154892549e-05, - "loss": 0.0053, - "step": 1918 - }, - { - "epoch": 1.482718671558216, - "grad_norm": 0.022643111646175385, - "learning_rate": 8.869807527350683e-05, - "loss": 0.0046, - "step": 1919 - }, - { - "epoch": 1.483491021432709, - "grad_norm": 0.01096571795642376, - "learning_rate": 8.868098773401352e-05, - "loss": 0.0045, - "step": 1920 - }, - { - "epoch": 1.4842633713072022, - "grad_norm": 0.01421598345041275, - "learning_rate": 8.866388893541932e-05, - "loss": 0.0043, - "step": 1921 - }, - { - "epoch": 1.4850357211816954, - "grad_norm": 0.03223200514912605, - "learning_rate": 8.864677888270133e-05, - "loss": 0.0046, - "step": 1922 - }, - { - "epoch": 1.4858080710561885, - "grad_norm": 0.011875314638018608, - "learning_rate": 8.862965758083983e-05, - "loss": 0.0046, - "step": 1923 - }, - { - "epoch": 1.4865804209306817, - "grad_norm": 0.01744246669113636, - "learning_rate": 8.861252503481842e-05, - "loss": 0.0047, - "step": 1924 - }, - { - "epoch": 1.4873527708051748, - "grad_norm": 0.011263393796980381, - "learning_rate": 8.859538124962397e-05, - "loss": 0.0041, - "step": 1925 - }, - { - "epoch": 1.488125120679668, - "grad_norm": 0.016610687598586082, - "learning_rate": 8.857822623024663e-05, - "loss": 0.0044, - "step": 1926 - }, - { - "epoch": 1.488897470554161, - "grad_norm": 0.010531166568398476, - "learning_rate": 8.85610599816798e-05, - "loss": 0.0043, - "step": 1927 - }, - { - "epoch": 1.4896698204286543, - "grad_norm": 0.021132631227374077, - "learning_rate": 8.854388250892019e-05, - "loss": 0.0047, - "step": 1928 - }, - { - "epoch": 1.4904421703031474, - "grad_norm": 0.01411630492657423, - "learning_rate": 8.852669381696772e-05, - "loss": 0.0044, - "step": 1929 - }, - { - "epoch": 1.4912145201776403, - "grad_norm": 0.012702484615147114, - "learning_rate": 8.85094939108256e-05, - "loss": 0.0047, - "step": 1930 - }, - { - "epoch": 1.4919868700521337, - "grad_norm": 0.009157133288681507, - "learning_rate": 8.849228279550032e-05, - "loss": 0.0041, - "step": 1931 - }, - { - "epoch": 1.4927592199266266, - "grad_norm": 0.020086420699954033, - "learning_rate": 8.847506047600162e-05, - "loss": 0.0045, - "step": 1932 - }, - { - "epoch": 1.49353156980112, - "grad_norm": 0.009814996272325516, - "learning_rate": 8.845782695734248e-05, - "loss": 0.0039, - "step": 1933 - }, - { - "epoch": 1.494303919675613, - "grad_norm": 0.011432342231273651, - "learning_rate": 8.844058224453919e-05, - "loss": 0.0048, - "step": 1934 - }, - { - "epoch": 1.4950762695501063, - "grad_norm": 0.01218743808567524, - "learning_rate": 8.842332634261126e-05, - "loss": 0.0042, - "step": 1935 - }, - { - "epoch": 1.4958486194245992, - "grad_norm": 0.01667974330484867, - "learning_rate": 8.840605925658145e-05, - "loss": 0.0045, - "step": 1936 - }, - { - "epoch": 1.4966209692990926, - "grad_norm": 0.0116344029083848, - "learning_rate": 8.838878099147583e-05, - "loss": 0.0042, - "step": 1937 - }, - { - "epoch": 1.4973933191735855, - "grad_norm": 0.013338102027773857, - "learning_rate": 8.837149155232364e-05, - "loss": 0.0046, - "step": 1938 - }, - { - "epoch": 1.498165669048079, - "grad_norm": 0.01640329137444496, - "learning_rate": 8.835419094415745e-05, - "loss": 0.0044, - "step": 1939 - }, - { - "epoch": 1.4989380189225718, - "grad_norm": 0.01282061543315649, - "learning_rate": 8.833687917201301e-05, - "loss": 0.0051, - "step": 1940 - }, - { - "epoch": 1.4997103687970652, - "grad_norm": 0.01326051913201809, - "learning_rate": 8.831955624092941e-05, - "loss": 0.0049, - "step": 1941 - }, - { - "epoch": 1.5004827186715581, - "grad_norm": 0.014805569313466549, - "learning_rate": 8.83022221559489e-05, - "loss": 0.004, - "step": 1942 - }, - { - "epoch": 1.5012550685460515, - "grad_norm": 0.009898116812109947, - "learning_rate": 8.828487692211704e-05, - "loss": 0.0046, - "step": 1943 - }, - { - "epoch": 1.5020274184205444, - "grad_norm": 0.025644246488809586, - "learning_rate": 8.826752054448259e-05, - "loss": 0.0049, - "step": 1944 - }, - { - "epoch": 1.5027997682950378, - "grad_norm": 0.01422660518437624, - "learning_rate": 8.825015302809756e-05, - "loss": 0.0042, - "step": 1945 - }, - { - "epoch": 1.5035721181695307, - "grad_norm": 0.019137471914291382, - "learning_rate": 8.823277437801724e-05, - "loss": 0.0048, - "step": 1946 - }, - { - "epoch": 1.504344468044024, - "grad_norm": 0.027513476088643074, - "learning_rate": 8.821538459930013e-05, - "loss": 0.0044, - "step": 1947 - }, - { - "epoch": 1.505116817918517, - "grad_norm": 0.017542105168104172, - "learning_rate": 8.819798369700797e-05, - "loss": 0.0043, - "step": 1948 - }, - { - "epoch": 1.5058891677930102, - "grad_norm": 0.025185147300362587, - "learning_rate": 8.818057167620574e-05, - "loss": 0.0044, - "step": 1949 - }, - { - "epoch": 1.5066615176675033, - "grad_norm": 0.030095340684056282, - "learning_rate": 8.816314854196167e-05, - "loss": 0.0046, - "step": 1950 - }, - { - "epoch": 1.5074338675419965, - "grad_norm": 0.011635826900601387, - "learning_rate": 8.814571429934719e-05, - "loss": 0.0042, - "step": 1951 - }, - { - "epoch": 1.5082062174164896, - "grad_norm": 0.022228319197893143, - "learning_rate": 8.8128268953437e-05, - "loss": 0.0043, - "step": 1952 - }, - { - "epoch": 1.5089785672909828, - "grad_norm": 0.02450932003557682, - "learning_rate": 8.811081250930902e-05, - "loss": 0.0046, - "step": 1953 - }, - { - "epoch": 1.509750917165476, - "grad_norm": 0.01285399030894041, - "learning_rate": 8.80933449720444e-05, - "loss": 0.0047, - "step": 1954 - }, - { - "epoch": 1.510523267039969, - "grad_norm": 0.02027706429362297, - "learning_rate": 8.807586634672751e-05, - "loss": 0.0046, - "step": 1955 - }, - { - "epoch": 1.5112956169144622, - "grad_norm": 0.013969060964882374, - "learning_rate": 8.805837663844598e-05, - "loss": 0.0043, - "step": 1956 - }, - { - "epoch": 1.5120679667889554, - "grad_norm": 0.009368671104311943, - "learning_rate": 8.804087585229061e-05, - "loss": 0.0039, - "step": 1957 - }, - { - "epoch": 1.5128403166634485, - "grad_norm": 0.023244036361575127, - "learning_rate": 8.802336399335547e-05, - "loss": 0.0046, - "step": 1958 - }, - { - "epoch": 1.5136126665379417, - "grad_norm": 0.009645405225455761, - "learning_rate": 8.800584106673784e-05, - "loss": 0.0046, - "step": 1959 - }, - { - "epoch": 1.5143850164124348, - "grad_norm": 0.013144160620868206, - "learning_rate": 8.798830707753823e-05, - "loss": 0.0047, - "step": 1960 - }, - { - "epoch": 1.515157366286928, - "grad_norm": 0.012967154383659363, - "learning_rate": 8.797076203086033e-05, - "loss": 0.0048, - "step": 1961 - }, - { - "epoch": 1.5159297161614211, - "grad_norm": 0.01168031059205532, - "learning_rate": 8.795320593181112e-05, - "loss": 0.0041, - "step": 1962 - }, - { - "epoch": 1.5167020660359143, - "grad_norm": 0.012260856106877327, - "learning_rate": 8.793563878550072e-05, - "loss": 0.0049, - "step": 1963 - }, - { - "epoch": 1.5174744159104074, - "grad_norm": 0.013430275954306126, - "learning_rate": 8.791806059704251e-05, - "loss": 0.004, - "step": 1964 - }, - { - "epoch": 1.5182467657849006, - "grad_norm": 0.02253013476729393, - "learning_rate": 8.79004713715531e-05, - "loss": 0.0044, - "step": 1965 - }, - { - "epoch": 1.5190191156593937, - "grad_norm": 0.009793316014111042, - "learning_rate": 8.788287111415227e-05, - "loss": 0.0045, - "step": 1966 - }, - { - "epoch": 1.5197914655338869, - "grad_norm": 0.016231399029493332, - "learning_rate": 8.786525982996302e-05, - "loss": 0.0038, - "step": 1967 - }, - { - "epoch": 1.52056381540838, - "grad_norm": 0.020186608657240868, - "learning_rate": 8.784763752411159e-05, - "loss": 0.0046, - "step": 1968 - }, - { - "epoch": 1.5213361652828732, - "grad_norm": 0.011528832837939262, - "learning_rate": 8.783000420172738e-05, - "loss": 0.0048, - "step": 1969 - }, - { - "epoch": 1.5221085151573663, - "grad_norm": 0.012485725805163383, - "learning_rate": 8.781235986794305e-05, - "loss": 0.0048, - "step": 1970 - }, - { - "epoch": 1.5228808650318595, - "grad_norm": 0.027413304895162582, - "learning_rate": 8.779470452789445e-05, - "loss": 0.005, - "step": 1971 - }, - { - "epoch": 1.5236532149063526, - "grad_norm": 0.008898314088582993, - "learning_rate": 8.777703818672059e-05, - "loss": 0.0042, - "step": 1972 - }, - { - "epoch": 1.5244255647808456, - "grad_norm": 0.011833777651190758, - "learning_rate": 8.775936084956371e-05, - "loss": 0.0042, - "step": 1973 - }, - { - "epoch": 1.525197914655339, - "grad_norm": 0.016269506886601448, - "learning_rate": 8.77416725215693e-05, - "loss": 0.0047, - "step": 1974 - }, - { - "epoch": 1.5259702645298319, - "grad_norm": 0.013332594186067581, - "learning_rate": 8.772397320788597e-05, - "loss": 0.0042, - "step": 1975 - }, - { - "epoch": 1.5267426144043252, - "grad_norm": 0.011268539354205132, - "learning_rate": 8.770626291366557e-05, - "loss": 0.0042, - "step": 1976 - }, - { - "epoch": 1.5275149642788182, - "grad_norm": 0.013201587833464146, - "learning_rate": 8.768854164406314e-05, - "loss": 0.0045, - "step": 1977 - }, - { - "epoch": 1.5282873141533115, - "grad_norm": 0.022223128005862236, - "learning_rate": 8.767080940423692e-05, - "loss": 0.0046, - "step": 1978 - }, - { - "epoch": 1.5290596640278045, - "grad_norm": 0.010781112127006054, - "learning_rate": 8.765306619934833e-05, - "loss": 0.0048, - "step": 1979 - }, - { - "epoch": 1.5298320139022978, - "grad_norm": 0.02067198045551777, - "learning_rate": 8.763531203456199e-05, - "loss": 0.0048, - "step": 1980 - }, - { - "epoch": 1.5306043637767908, - "grad_norm": 0.025904567912220955, - "learning_rate": 8.76175469150457e-05, - "loss": 0.0049, - "step": 1981 - }, - { - "epoch": 1.5313767136512841, - "grad_norm": 0.023831602185964584, - "learning_rate": 8.759977084597047e-05, - "loss": 0.0048, - "step": 1982 - }, - { - "epoch": 1.532149063525777, - "grad_norm": 0.024097897112369537, - "learning_rate": 8.758198383251047e-05, - "loss": 0.0044, - "step": 1983 - }, - { - "epoch": 1.5329214134002704, - "grad_norm": 0.011627217754721642, - "learning_rate": 8.756418587984307e-05, - "loss": 0.0041, - "step": 1984 - }, - { - "epoch": 1.5336937632747634, - "grad_norm": 0.013759966939687729, - "learning_rate": 8.754637699314885e-05, - "loss": 0.0041, - "step": 1985 - }, - { - "epoch": 1.5344661131492567, - "grad_norm": 0.0151802534237504, - "learning_rate": 8.752855717761152e-05, - "loss": 0.0047, - "step": 1986 - }, - { - "epoch": 1.5352384630237497, - "grad_norm": 0.02687522955238819, - "learning_rate": 8.751072643841803e-05, - "loss": 0.0047, - "step": 1987 - }, - { - "epoch": 1.536010812898243, - "grad_norm": 0.01627619005739689, - "learning_rate": 8.749288478075842e-05, - "loss": 0.0043, - "step": 1988 - }, - { - "epoch": 1.536783162772736, - "grad_norm": 0.022365057840943336, - "learning_rate": 8.747503220982602e-05, - "loss": 0.0047, - "step": 1989 - }, - { - "epoch": 1.5375555126472293, - "grad_norm": 0.010350333526730537, - "learning_rate": 8.745716873081725e-05, - "loss": 0.004, - "step": 1990 - }, - { - "epoch": 1.5383278625217223, - "grad_norm": 0.013772976584732533, - "learning_rate": 8.743929434893176e-05, - "loss": 0.0046, - "step": 1991 - }, - { - "epoch": 1.5391002123962156, - "grad_norm": 0.014426304958760738, - "learning_rate": 8.742140906937233e-05, - "loss": 0.0042, - "step": 1992 - }, - { - "epoch": 1.5398725622707086, - "grad_norm": 0.011211644858121872, - "learning_rate": 8.740351289734495e-05, - "loss": 0.0046, - "step": 1993 - }, - { - "epoch": 1.540644912145202, - "grad_norm": 0.013804316520690918, - "learning_rate": 8.738560583805873e-05, - "loss": 0.0048, - "step": 1994 - }, - { - "epoch": 1.5414172620196949, - "grad_norm": 0.010096611455082893, - "learning_rate": 8.736768789672602e-05, - "loss": 0.0041, - "step": 1995 - }, - { - "epoch": 1.542189611894188, - "grad_norm": 0.019236311316490173, - "learning_rate": 8.73497590785623e-05, - "loss": 0.0047, - "step": 1996 - }, - { - "epoch": 1.5429619617686812, - "grad_norm": 0.019567430019378662, - "learning_rate": 8.73318193887862e-05, - "loss": 0.0042, - "step": 1997 - }, - { - "epoch": 1.5437343116431743, - "grad_norm": 0.008151182904839516, - "learning_rate": 8.731386883261952e-05, - "loss": 0.004, - "step": 1998 - }, - { - "epoch": 1.5445066615176675, - "grad_norm": 0.026208965107798576, - "learning_rate": 8.729590741528726e-05, - "loss": 0.0047, - "step": 1999 - }, - { - "epoch": 1.5452790113921606, - "grad_norm": 0.047962453216314316, - "learning_rate": 8.727793514201752e-05, - "loss": 0.0046, - "step": 2000 - }, - { - "epoch": 1.5460513612666538, - "grad_norm": 0.011168277822434902, - "learning_rate": 8.725995201804163e-05, - "loss": 0.0044, - "step": 2001 - }, - { - "epoch": 1.546823711141147, - "grad_norm": 0.013109843246638775, - "learning_rate": 8.724195804859403e-05, - "loss": 0.0044, - "step": 2002 - }, - { - "epoch": 1.54759606101564, - "grad_norm": 0.02118433639407158, - "learning_rate": 8.722395323891233e-05, - "loss": 0.0041, - "step": 2003 - }, - { - "epoch": 1.5483684108901332, - "grad_norm": 0.012441566213965416, - "learning_rate": 8.720593759423728e-05, - "loss": 0.004, - "step": 2004 - }, - { - "epoch": 1.5491407607646264, - "grad_norm": 0.01646626740694046, - "learning_rate": 8.718791111981282e-05, - "loss": 0.0046, - "step": 2005 - }, - { - "epoch": 1.5499131106391195, - "grad_norm": 0.020530641078948975, - "learning_rate": 8.716987382088602e-05, - "loss": 0.005, - "step": 2006 - }, - { - "epoch": 1.5506854605136127, - "grad_norm": 0.01678456924855709, - "learning_rate": 8.715182570270707e-05, - "loss": 0.0049, - "step": 2007 - }, - { - "epoch": 1.5514578103881058, - "grad_norm": 0.011109710671007633, - "learning_rate": 8.713376677052939e-05, - "loss": 0.0041, - "step": 2008 - }, - { - "epoch": 1.552230160262599, - "grad_norm": 0.016971318051218987, - "learning_rate": 8.711569702960947e-05, - "loss": 0.0043, - "step": 2009 - }, - { - "epoch": 1.5530025101370921, - "grad_norm": 0.027093220502138138, - "learning_rate": 8.709761648520697e-05, - "loss": 0.0041, - "step": 2010 - }, - { - "epoch": 1.5537748600115853, - "grad_norm": 0.011815927922725677, - "learning_rate": 8.707952514258472e-05, - "loss": 0.0041, - "step": 2011 - }, - { - "epoch": 1.5545472098860784, - "grad_norm": 0.04407166689634323, - "learning_rate": 8.706142300700865e-05, - "loss": 0.0048, - "step": 2012 - }, - { - "epoch": 1.5553195597605716, - "grad_norm": 0.013555064797401428, - "learning_rate": 8.704331008374788e-05, - "loss": 0.0052, - "step": 2013 - }, - { - "epoch": 1.5560919096350647, - "grad_norm": 0.02045373059809208, - "learning_rate": 8.702518637807462e-05, - "loss": 0.0043, - "step": 2014 - }, - { - "epoch": 1.5568642595095579, - "grad_norm": 0.02960846945643425, - "learning_rate": 8.700705189526425e-05, - "loss": 0.0045, - "step": 2015 - }, - { - "epoch": 1.557636609384051, - "grad_norm": 0.025553971529006958, - "learning_rate": 8.69889066405953e-05, - "loss": 0.0047, - "step": 2016 - }, - { - "epoch": 1.5584089592585442, - "grad_norm": 0.015526111237704754, - "learning_rate": 8.697075061934937e-05, - "loss": 0.0042, - "step": 2017 - }, - { - "epoch": 1.5591813091330373, - "grad_norm": 0.03279326483607292, - "learning_rate": 8.695258383681128e-05, - "loss": 0.0046, - "step": 2018 - }, - { - "epoch": 1.5599536590075305, - "grad_norm": 0.020039940252900124, - "learning_rate": 8.693440629826893e-05, - "loss": 0.005, - "step": 2019 - }, - { - "epoch": 1.5607260088820234, - "grad_norm": 0.010598140768706799, - "learning_rate": 8.691621800901337e-05, - "loss": 0.0043, - "step": 2020 - }, - { - "epoch": 1.5614983587565168, - "grad_norm": 0.029510769993066788, - "learning_rate": 8.689801897433876e-05, - "loss": 0.0048, - "step": 2021 - }, - { - "epoch": 1.5622707086310097, - "grad_norm": 0.023090941831469536, - "learning_rate": 8.68798091995424e-05, - "loss": 0.0051, - "step": 2022 - }, - { - "epoch": 1.563043058505503, - "grad_norm": 0.01705724187195301, - "learning_rate": 8.68615886899247e-05, - "loss": 0.0044, - "step": 2023 - }, - { - "epoch": 1.563815408379996, - "grad_norm": 0.0226137712597847, - "learning_rate": 8.684335745078925e-05, - "loss": 0.0042, - "step": 2024 - }, - { - "epoch": 1.5645877582544894, - "grad_norm": 0.017362765967845917, - "learning_rate": 8.682511548744267e-05, - "loss": 0.0048, - "step": 2025 - }, - { - "epoch": 1.5653601081289823, - "grad_norm": 0.018477879464626312, - "learning_rate": 8.680686280519481e-05, - "loss": 0.0046, - "step": 2026 - }, - { - "epoch": 1.5661324580034757, - "grad_norm": 0.011460873298346996, - "learning_rate": 8.678859940935856e-05, - "loss": 0.0045, - "step": 2027 - }, - { - "epoch": 1.5669048078779686, - "grad_norm": 0.013441020622849464, - "learning_rate": 8.677032530524994e-05, - "loss": 0.0041, - "step": 2028 - }, - { - "epoch": 1.567677157752462, - "grad_norm": 0.02192874252796173, - "learning_rate": 8.675204049818812e-05, - "loss": 0.0045, - "step": 2029 - }, - { - "epoch": 1.568449507626955, - "grad_norm": 0.009835068136453629, - "learning_rate": 8.673374499349536e-05, - "loss": 0.0036, - "step": 2030 - }, - { - "epoch": 1.5692218575014483, - "grad_norm": 0.01787945069372654, - "learning_rate": 8.671543879649703e-05, - "loss": 0.0046, - "step": 2031 - }, - { - "epoch": 1.5699942073759412, - "grad_norm": 0.012214172631502151, - "learning_rate": 8.669712191252165e-05, - "loss": 0.004, - "step": 2032 - }, - { - "epoch": 1.5707665572504346, - "grad_norm": 0.01092873141169548, - "learning_rate": 8.667879434690078e-05, - "loss": 0.0044, - "step": 2033 - }, - { - "epoch": 1.5715389071249275, - "grad_norm": 0.011023624800145626, - "learning_rate": 8.666045610496916e-05, - "loss": 0.0041, - "step": 2034 - }, - { - "epoch": 1.5723112569994209, - "grad_norm": 0.012606930918991566, - "learning_rate": 8.66421071920646e-05, - "loss": 0.0053, - "step": 2035 - }, - { - "epoch": 1.5730836068739138, - "grad_norm": 0.00925703439861536, - "learning_rate": 8.662374761352804e-05, - "loss": 0.0043, - "step": 2036 - }, - { - "epoch": 1.5738559567484072, - "grad_norm": 0.008895349688827991, - "learning_rate": 8.660537737470348e-05, - "loss": 0.0038, - "step": 2037 - }, - { - "epoch": 1.5746283066229, - "grad_norm": 0.009540137834846973, - "learning_rate": 8.658699648093809e-05, - "loss": 0.0044, - "step": 2038 - }, - { - "epoch": 1.5754006564973935, - "grad_norm": 0.01367781963199377, - "learning_rate": 8.656860493758207e-05, - "loss": 0.0054, - "step": 2039 - }, - { - "epoch": 1.5761730063718864, - "grad_norm": 0.008566140197217464, - "learning_rate": 8.655020274998877e-05, - "loss": 0.0041, - "step": 2040 - }, - { - "epoch": 1.5769453562463798, - "grad_norm": 0.012065630406141281, - "learning_rate": 8.653178992351462e-05, - "loss": 0.0045, - "step": 2041 - }, - { - "epoch": 1.5777177061208727, - "grad_norm": 0.0094382269307971, - "learning_rate": 8.651336646351915e-05, - "loss": 0.0044, - "step": 2042 - }, - { - "epoch": 1.5784900559953658, - "grad_norm": 0.01034244243055582, - "learning_rate": 8.649493237536499e-05, - "loss": 0.0043, - "step": 2043 - }, - { - "epoch": 1.579262405869859, - "grad_norm": 0.020144378766417503, - "learning_rate": 8.647648766441784e-05, - "loss": 0.0047, - "step": 2044 - }, - { - "epoch": 1.5800347557443521, - "grad_norm": 0.014348277822136879, - "learning_rate": 8.645803233604652e-05, - "loss": 0.0042, - "step": 2045 - }, - { - "epoch": 1.5808071056188453, - "grad_norm": 0.021733032539486885, - "learning_rate": 8.643956639562294e-05, - "loss": 0.0049, - "step": 2046 - }, - { - "epoch": 1.5815794554933384, - "grad_norm": 0.021847965195775032, - "learning_rate": 8.642108984852206e-05, - "loss": 0.0046, - "step": 2047 - }, - { - "epoch": 1.5823518053678316, - "grad_norm": 0.010277594439685345, - "learning_rate": 8.640260270012199e-05, - "loss": 0.0043, - "step": 2048 - }, - { - "epoch": 1.5831241552423247, - "grad_norm": 0.013328421860933304, - "learning_rate": 8.638410495580389e-05, - "loss": 0.0042, - "step": 2049 - }, - { - "epoch": 1.583896505116818, - "grad_norm": 0.015377648174762726, - "learning_rate": 8.636559662095199e-05, - "loss": 0.0043, - "step": 2050 - }, - { - "epoch": 1.584668854991311, - "grad_norm": 0.008601309731602669, - "learning_rate": 8.63470777009536e-05, - "loss": 0.0044, - "step": 2051 - }, - { - "epoch": 1.5854412048658042, - "grad_norm": 0.014834502711892128, - "learning_rate": 8.632854820119917e-05, - "loss": 0.0048, - "step": 2052 - }, - { - "epoch": 1.5862135547402973, - "grad_norm": 0.011667091399431229, - "learning_rate": 8.631000812708217e-05, - "loss": 0.0047, - "step": 2053 - }, - { - "epoch": 1.5869859046147905, - "grad_norm": 0.010898937471210957, - "learning_rate": 8.629145748399919e-05, - "loss": 0.0047, - "step": 2054 - }, - { - "epoch": 1.5877582544892836, - "grad_norm": 0.013858029618859291, - "learning_rate": 8.627289627734983e-05, - "loss": 0.0046, - "step": 2055 - }, - { - "epoch": 1.5885306043637768, - "grad_norm": 0.011080384254455566, - "learning_rate": 8.625432451253683e-05, - "loss": 0.0045, - "step": 2056 - }, - { - "epoch": 1.58930295423827, - "grad_norm": 0.020170332863926888, - "learning_rate": 8.6235742194966e-05, - "loss": 0.0042, - "step": 2057 - }, - { - "epoch": 1.590075304112763, - "grad_norm": 0.022068405523896217, - "learning_rate": 8.621714933004619e-05, - "loss": 0.005, - "step": 2058 - }, - { - "epoch": 1.5908476539872562, - "grad_norm": 0.01097305491566658, - "learning_rate": 8.619854592318932e-05, - "loss": 0.0037, - "step": 2059 - }, - { - "epoch": 1.5916200038617494, - "grad_norm": 0.030156239867210388, - "learning_rate": 8.617993197981043e-05, - "loss": 0.0044, - "step": 2060 - }, - { - "epoch": 1.5923923537362425, - "grad_norm": 0.02470502071082592, - "learning_rate": 8.616130750532753e-05, - "loss": 0.0051, - "step": 2061 - }, - { - "epoch": 1.5931647036107357, - "grad_norm": 0.014046196825802326, - "learning_rate": 8.614267250516182e-05, - "loss": 0.0043, - "step": 2062 - }, - { - "epoch": 1.5939370534852288, - "grad_norm": 0.030482210218906403, - "learning_rate": 8.612402698473745e-05, - "loss": 0.0044, - "step": 2063 - }, - { - "epoch": 1.594709403359722, - "grad_norm": 0.02512381598353386, - "learning_rate": 8.61053709494817e-05, - "loss": 0.0043, - "step": 2064 - }, - { - "epoch": 1.5954817532342151, - "grad_norm": 0.00860644318163395, - "learning_rate": 8.608670440482489e-05, - "loss": 0.0042, - "step": 2065 - }, - { - "epoch": 1.5962541031087083, - "grad_norm": 0.012833881191909313, - "learning_rate": 8.606802735620041e-05, - "loss": 0.0046, - "step": 2066 - }, - { - "epoch": 1.5970264529832012, - "grad_norm": 0.04173438251018524, - "learning_rate": 8.604933980904466e-05, - "loss": 0.0047, - "step": 2067 - }, - { - "epoch": 1.5977988028576946, - "grad_norm": 0.021180585026741028, - "learning_rate": 8.603064176879718e-05, - "loss": 0.0044, - "step": 2068 - }, - { - "epoch": 1.5985711527321875, - "grad_norm": 0.016953809186816216, - "learning_rate": 8.601193324090049e-05, - "loss": 0.0037, - "step": 2069 - }, - { - "epoch": 1.599343502606681, - "grad_norm": 0.011930052191019058, - "learning_rate": 8.59932142308002e-05, - "loss": 0.0044, - "step": 2070 - }, - { - "epoch": 1.6001158524811738, - "grad_norm": 0.019897272810339928, - "learning_rate": 8.597448474394496e-05, - "loss": 0.0045, - "step": 2071 - }, - { - "epoch": 1.6008882023556672, - "grad_norm": 0.012094732373952866, - "learning_rate": 8.595574478578647e-05, - "loss": 0.0046, - "step": 2072 - }, - { - "epoch": 1.6016605522301601, - "grad_norm": 0.012607376091182232, - "learning_rate": 8.593699436177949e-05, - "loss": 0.0037, - "step": 2073 - }, - { - "epoch": 1.6024329021046535, - "grad_norm": 0.025926416739821434, - "learning_rate": 8.59182334773818e-05, - "loss": 0.0045, - "step": 2074 - }, - { - "epoch": 1.6032052519791464, - "grad_norm": 0.010309277102351189, - "learning_rate": 8.589946213805422e-05, - "loss": 0.0043, - "step": 2075 - }, - { - "epoch": 1.6039776018536398, - "grad_norm": 0.011180024594068527, - "learning_rate": 8.588068034926069e-05, - "loss": 0.0042, - "step": 2076 - }, - { - "epoch": 1.6047499517281327, - "grad_norm": 0.015378049574792385, - "learning_rate": 8.586188811646809e-05, - "loss": 0.004, - "step": 2077 - }, - { - "epoch": 1.605522301602626, - "grad_norm": 0.018012790009379387, - "learning_rate": 8.584308544514639e-05, - "loss": 0.0041, - "step": 2078 - }, - { - "epoch": 1.606294651477119, - "grad_norm": 0.012710918672382832, - "learning_rate": 8.582427234076861e-05, - "loss": 0.0043, - "step": 2079 - }, - { - "epoch": 1.6070670013516124, - "grad_norm": 0.021130891516804695, - "learning_rate": 8.580544880881079e-05, - "loss": 0.0045, - "step": 2080 - }, - { - "epoch": 1.6078393512261053, - "grad_norm": 0.025349225848913193, - "learning_rate": 8.578661485475199e-05, - "loss": 0.0047, - "step": 2081 - }, - { - "epoch": 1.6086117011005987, - "grad_norm": 0.012351235374808311, - "learning_rate": 8.576777048407432e-05, - "loss": 0.0045, - "step": 2082 - }, - { - "epoch": 1.6093840509750916, - "grad_norm": 0.025645259767770767, - "learning_rate": 8.574891570226292e-05, - "loss": 0.0044, - "step": 2083 - }, - { - "epoch": 1.610156400849585, - "grad_norm": 0.016709905117750168, - "learning_rate": 8.573005051480598e-05, - "loss": 0.005, - "step": 2084 - }, - { - "epoch": 1.610928750724078, - "grad_norm": 0.013472169637680054, - "learning_rate": 8.57111749271947e-05, - "loss": 0.004, - "step": 2085 - }, - { - "epoch": 1.6117011005985713, - "grad_norm": 0.011256473138928413, - "learning_rate": 8.569228894492328e-05, - "loss": 0.0037, - "step": 2086 - }, - { - "epoch": 1.6124734504730642, - "grad_norm": 0.009445869363844395, - "learning_rate": 8.567339257348898e-05, - "loss": 0.0046, - "step": 2087 - }, - { - "epoch": 1.6132458003475576, - "grad_norm": 0.014279279857873917, - "learning_rate": 8.56544858183921e-05, - "loss": 0.0049, - "step": 2088 - }, - { - "epoch": 1.6140181502220505, - "grad_norm": 0.013222617097198963, - "learning_rate": 8.563556868513592e-05, - "loss": 0.0046, - "step": 2089 - }, - { - "epoch": 1.6147905000965437, - "grad_norm": 0.011042260564863682, - "learning_rate": 8.561664117922677e-05, - "loss": 0.0042, - "step": 2090 - }, - { - "epoch": 1.6155628499710368, - "grad_norm": 0.011346302926540375, - "learning_rate": 8.559770330617399e-05, - "loss": 0.0041, - "step": 2091 - }, - { - "epoch": 1.61633519984553, - "grad_norm": 0.012888088822364807, - "learning_rate": 8.557875507148991e-05, - "loss": 0.0042, - "step": 2092 - }, - { - "epoch": 1.6171075497200231, - "grad_norm": 0.010861898772418499, - "learning_rate": 8.555979648068994e-05, - "loss": 0.0044, - "step": 2093 - }, - { - "epoch": 1.6178798995945163, - "grad_norm": 0.008471175096929073, - "learning_rate": 8.554082753929245e-05, - "loss": 0.0047, - "step": 2094 - }, - { - "epoch": 1.6186522494690094, - "grad_norm": 0.018162831664085388, - "learning_rate": 8.552184825281885e-05, - "loss": 0.0047, - "step": 2095 - }, - { - "epoch": 1.6194245993435026, - "grad_norm": 0.010423211380839348, - "learning_rate": 8.550285862679355e-05, - "loss": 0.0047, - "step": 2096 - }, - { - "epoch": 1.6201969492179957, - "grad_norm": 0.009892021305859089, - "learning_rate": 8.548385866674397e-05, - "loss": 0.0047, - "step": 2097 - }, - { - "epoch": 1.6209692990924889, - "grad_norm": 0.01203847024589777, - "learning_rate": 8.546484837820053e-05, - "loss": 0.0043, - "step": 2098 - }, - { - "epoch": 1.621741648966982, - "grad_norm": 0.01343244407325983, - "learning_rate": 8.544582776669665e-05, - "loss": 0.0049, - "step": 2099 - }, - { - "epoch": 1.6225139988414752, - "grad_norm": 0.011868324130773544, - "learning_rate": 8.542679683776881e-05, - "loss": 0.0051, - "step": 2100 - }, - { - "epoch": 1.6232863487159683, - "grad_norm": 0.012159244157373905, - "learning_rate": 8.540775559695645e-05, - "loss": 0.0041, - "step": 2101 - }, - { - "epoch": 1.6240586985904615, - "grad_norm": 0.008571230806410313, - "learning_rate": 8.5388704049802e-05, - "loss": 0.0041, - "step": 2102 - }, - { - "epoch": 1.6248310484649546, - "grad_norm": 0.012841513380408287, - "learning_rate": 8.536964220185089e-05, - "loss": 0.0042, - "step": 2103 - }, - { - "epoch": 1.6256033983394478, - "grad_norm": 0.0088861845433712, - "learning_rate": 8.535057005865158e-05, - "loss": 0.0042, - "step": 2104 - }, - { - "epoch": 1.626375748213941, - "grad_norm": 0.014981401152908802, - "learning_rate": 8.533148762575552e-05, - "loss": 0.0043, - "step": 2105 - }, - { - "epoch": 1.627148098088434, - "grad_norm": 0.01102379895746708, - "learning_rate": 8.531239490871712e-05, - "loss": 0.0048, - "step": 2106 - }, - { - "epoch": 1.6279204479629272, - "grad_norm": 0.010146740823984146, - "learning_rate": 8.529329191309383e-05, - "loss": 0.0045, - "step": 2107 - }, - { - "epoch": 1.6286927978374204, - "grad_norm": 0.02327127754688263, - "learning_rate": 8.527417864444606e-05, - "loss": 0.005, - "step": 2108 - }, - { - "epoch": 1.6294651477119135, - "grad_norm": 0.010763900354504585, - "learning_rate": 8.525505510833724e-05, - "loss": 0.0042, - "step": 2109 - }, - { - "epoch": 1.6302374975864067, - "grad_norm": 0.013578513637185097, - "learning_rate": 8.523592131033376e-05, - "loss": 0.0042, - "step": 2110 - }, - { - "epoch": 1.6310098474608998, - "grad_norm": 0.022221015766263008, - "learning_rate": 8.521677725600497e-05, - "loss": 0.005, - "step": 2111 - }, - { - "epoch": 1.631782197335393, - "grad_norm": 0.018897736445069313, - "learning_rate": 8.519762295092329e-05, - "loss": 0.0048, - "step": 2112 - }, - { - "epoch": 1.6325545472098861, - "grad_norm": 0.011113962158560753, - "learning_rate": 8.517845840066406e-05, - "loss": 0.0039, - "step": 2113 - }, - { - "epoch": 1.633326897084379, - "grad_norm": 0.034275464713573456, - "learning_rate": 8.515928361080558e-05, - "loss": 0.0048, - "step": 2114 - }, - { - "epoch": 1.6340992469588724, - "grad_norm": 0.011763646267354488, - "learning_rate": 8.514009858692924e-05, - "loss": 0.0045, - "step": 2115 - }, - { - "epoch": 1.6348715968333654, - "grad_norm": 0.010240164585411549, - "learning_rate": 8.512090333461929e-05, - "loss": 0.0049, - "step": 2116 - }, - { - "epoch": 1.6356439467078587, - "grad_norm": 0.010845442302525043, - "learning_rate": 8.5101697859463e-05, - "loss": 0.0038, - "step": 2117 - }, - { - "epoch": 1.6364162965823517, - "grad_norm": 0.013729963451623917, - "learning_rate": 8.508248216705065e-05, - "loss": 0.005, - "step": 2118 - }, - { - "epoch": 1.637188646456845, - "grad_norm": 0.016924763098359108, - "learning_rate": 8.506325626297545e-05, - "loss": 0.0052, - "step": 2119 - }, - { - "epoch": 1.637960996331338, - "grad_norm": 0.010555649176239967, - "learning_rate": 8.504402015283358e-05, - "loss": 0.0041, - "step": 2120 - }, - { - "epoch": 1.6387333462058313, - "grad_norm": 0.015738019719719887, - "learning_rate": 8.502477384222423e-05, - "loss": 0.0042, - "step": 2121 - }, - { - "epoch": 1.6395056960803243, - "grad_norm": 0.017977280542254448, - "learning_rate": 8.50055173367495e-05, - "loss": 0.0045, - "step": 2122 - }, - { - "epoch": 1.6402780459548176, - "grad_norm": 0.02719496376812458, - "learning_rate": 8.498625064201455e-05, - "loss": 0.0046, - "step": 2123 - }, - { - "epoch": 1.6410503958293106, - "grad_norm": 0.023861657828092575, - "learning_rate": 8.496697376362742e-05, - "loss": 0.0045, - "step": 2124 - }, - { - "epoch": 1.641822745703804, - "grad_norm": 0.01022212952375412, - "learning_rate": 8.494768670719912e-05, - "loss": 0.0037, - "step": 2125 - }, - { - "epoch": 1.6425950955782969, - "grad_norm": 0.009150752797722816, - "learning_rate": 8.492838947834367e-05, - "loss": 0.0045, - "step": 2126 - }, - { - "epoch": 1.6433674454527902, - "grad_norm": 0.025724977254867554, - "learning_rate": 8.490908208267805e-05, - "loss": 0.0049, - "step": 2127 - }, - { - "epoch": 1.6441397953272832, - "grad_norm": 0.014376234263181686, - "learning_rate": 8.488976452582213e-05, - "loss": 0.0045, - "step": 2128 - }, - { - "epoch": 1.6449121452017765, - "grad_norm": 0.015311934985220432, - "learning_rate": 8.487043681339881e-05, - "loss": 0.0046, - "step": 2129 - }, - { - "epoch": 1.6456844950762695, - "grad_norm": 0.018781229853630066, - "learning_rate": 8.485109895103391e-05, - "loss": 0.0042, - "step": 2130 - }, - { - "epoch": 1.6464568449507628, - "grad_norm": 0.013387559913098812, - "learning_rate": 8.483175094435622e-05, - "loss": 0.0041, - "step": 2131 - }, - { - "epoch": 1.6472291948252558, - "grad_norm": 0.03222404792904854, - "learning_rate": 8.481239279899748e-05, - "loss": 0.0046, - "step": 2132 - }, - { - "epoch": 1.6480015446997491, - "grad_norm": 0.012264180928468704, - "learning_rate": 8.479302452059238e-05, - "loss": 0.0042, - "step": 2133 - }, - { - "epoch": 1.648773894574242, - "grad_norm": 0.014101590029895306, - "learning_rate": 8.477364611477857e-05, - "loss": 0.0042, - "step": 2134 - }, - { - "epoch": 1.6495462444487354, - "grad_norm": 0.02377992682158947, - "learning_rate": 8.475425758719659e-05, - "loss": 0.0049, - "step": 2135 - }, - { - "epoch": 1.6503185943232284, - "grad_norm": 0.022946573793888092, - "learning_rate": 8.473485894349002e-05, - "loss": 0.0048, - "step": 2136 - }, - { - "epoch": 1.6510909441977217, - "grad_norm": 0.010878360830247402, - "learning_rate": 8.471545018930531e-05, - "loss": 0.0041, - "step": 2137 - }, - { - "epoch": 1.6518632940722147, - "grad_norm": 0.011692160740494728, - "learning_rate": 8.46960313302919e-05, - "loss": 0.0041, - "step": 2138 - }, - { - "epoch": 1.6526356439467078, - "grad_norm": 0.011987571604549885, - "learning_rate": 8.467660237210211e-05, - "loss": 0.0045, - "step": 2139 - }, - { - "epoch": 1.653407993821201, - "grad_norm": 0.009922868572175503, - "learning_rate": 8.465716332039128e-05, - "loss": 0.004, - "step": 2140 - }, - { - "epoch": 1.654180343695694, - "grad_norm": 0.02513553760945797, - "learning_rate": 8.463771418081763e-05, - "loss": 0.0044, - "step": 2141 - }, - { - "epoch": 1.6549526935701873, - "grad_norm": 0.009684463031589985, - "learning_rate": 8.461825495904236e-05, - "loss": 0.0042, - "step": 2142 - }, - { - "epoch": 1.6557250434446804, - "grad_norm": 0.011489993892610073, - "learning_rate": 8.459878566072955e-05, - "loss": 0.0042, - "step": 2143 - }, - { - "epoch": 1.6564973933191736, - "grad_norm": 0.021958818659186363, - "learning_rate": 8.457930629154625e-05, - "loss": 0.0041, - "step": 2144 - }, - { - "epoch": 1.6572697431936667, - "grad_norm": 0.019374405965209007, - "learning_rate": 8.455981685716244e-05, - "loss": 0.0043, - "step": 2145 - }, - { - "epoch": 1.6580420930681599, - "grad_norm": 0.010465430095791817, - "learning_rate": 8.454031736325101e-05, - "loss": 0.0043, - "step": 2146 - }, - { - "epoch": 1.658814442942653, - "grad_norm": 0.01048517506569624, - "learning_rate": 8.452080781548781e-05, - "loss": 0.0041, - "step": 2147 - }, - { - "epoch": 1.6595867928171462, - "grad_norm": 0.026124538853764534, - "learning_rate": 8.450128821955159e-05, - "loss": 0.0044, - "step": 2148 - }, - { - "epoch": 1.6603591426916393, - "grad_norm": 0.012944714166224003, - "learning_rate": 8.448175858112402e-05, - "loss": 0.0042, - "step": 2149 - }, - { - "epoch": 1.6611314925661325, - "grad_norm": 0.011114759370684624, - "learning_rate": 8.446221890588972e-05, - "loss": 0.004, - "step": 2150 - }, - { - "epoch": 1.6619038424406256, - "grad_norm": 0.023538824170827866, - "learning_rate": 8.444266919953623e-05, - "loss": 0.0051, - "step": 2151 - }, - { - "epoch": 1.6626761923151188, - "grad_norm": 0.015576600097119808, - "learning_rate": 8.442310946775397e-05, - "loss": 0.005, - "step": 2152 - }, - { - "epoch": 1.663448542189612, - "grad_norm": 0.013887589797377586, - "learning_rate": 8.440353971623631e-05, - "loss": 0.0043, - "step": 2153 - }, - { - "epoch": 1.664220892064105, - "grad_norm": 0.03156410902738571, - "learning_rate": 8.438395995067956e-05, - "loss": 0.0045, - "step": 2154 - }, - { - "epoch": 1.6649932419385982, - "grad_norm": 0.02747179940342903, - "learning_rate": 8.436437017678287e-05, - "loss": 0.0045, - "step": 2155 - }, - { - "epoch": 1.6657655918130914, - "grad_norm": 0.00802780594676733, - "learning_rate": 8.43447704002484e-05, - "loss": 0.0037, - "step": 2156 - }, - { - "epoch": 1.6665379416875845, - "grad_norm": 0.026982294395565987, - "learning_rate": 8.432516062678113e-05, - "loss": 0.0046, - "step": 2157 - }, - { - "epoch": 1.6673102915620777, - "grad_norm": 0.020452868193387985, - "learning_rate": 8.430554086208902e-05, - "loss": 0.0044, - "step": 2158 - }, - { - "epoch": 1.6680826414365708, - "grad_norm": 0.008987967856228352, - "learning_rate": 8.428591111188289e-05, - "loss": 0.0043, - "step": 2159 - }, - { - "epoch": 1.668854991311064, - "grad_norm": 0.008565445430576801, - "learning_rate": 8.426627138187648e-05, - "loss": 0.0044, - "step": 2160 - }, - { - "epoch": 1.6696273411855571, - "grad_norm": 0.0161731019616127, - "learning_rate": 8.424662167778647e-05, - "loss": 0.0043, - "step": 2161 - }, - { - "epoch": 1.6703996910600503, - "grad_norm": 0.01388185191899538, - "learning_rate": 8.42269620053324e-05, - "loss": 0.0042, - "step": 2162 - }, - { - "epoch": 1.6711720409345432, - "grad_norm": 0.010824089869856834, - "learning_rate": 8.420729237023672e-05, - "loss": 0.0045, - "step": 2163 - }, - { - "epoch": 1.6719443908090366, - "grad_norm": 0.018120337277650833, - "learning_rate": 8.418761277822478e-05, - "loss": 0.0047, - "step": 2164 - }, - { - "epoch": 1.6727167406835295, - "grad_norm": 0.01861923560500145, - "learning_rate": 8.416792323502486e-05, - "loss": 0.0046, - "step": 2165 - }, - { - "epoch": 1.6734890905580229, - "grad_norm": 0.012371395714581013, - "learning_rate": 8.414822374636808e-05, - "loss": 0.0039, - "step": 2166 - }, - { - "epoch": 1.6742614404325158, - "grad_norm": 0.012984382919967175, - "learning_rate": 8.41285143179885e-05, - "loss": 0.0039, - "step": 2167 - }, - { - "epoch": 1.6750337903070092, - "grad_norm": 0.015273356810212135, - "learning_rate": 8.410879495562307e-05, - "loss": 0.0044, - "step": 2168 - }, - { - "epoch": 1.675806140181502, - "grad_norm": 0.014091392047703266, - "learning_rate": 8.40890656650116e-05, - "loss": 0.0043, - "step": 2169 - }, - { - "epoch": 1.6765784900559955, - "grad_norm": 0.01114340964704752, - "learning_rate": 8.40693264518968e-05, - "loss": 0.0042, - "step": 2170 - }, - { - "epoch": 1.6773508399304884, - "grad_norm": 0.011085857637226582, - "learning_rate": 8.404957732202431e-05, - "loss": 0.0045, - "step": 2171 - }, - { - "epoch": 1.6781231898049818, - "grad_norm": 0.02794049121439457, - "learning_rate": 8.402981828114261e-05, - "loss": 0.004, - "step": 2172 - }, - { - "epoch": 1.6788955396794747, - "grad_norm": 0.016085123643279076, - "learning_rate": 8.401004933500307e-05, - "loss": 0.0045, - "step": 2173 - }, - { - "epoch": 1.679667889553968, - "grad_norm": 0.010472382418811321, - "learning_rate": 8.399027048935997e-05, - "loss": 0.0042, - "step": 2174 - }, - { - "epoch": 1.680440239428461, - "grad_norm": 0.00927384290844202, - "learning_rate": 8.397048174997044e-05, - "loss": 0.0043, - "step": 2175 - }, - { - "epoch": 1.6812125893029544, - "grad_norm": 0.03752785176038742, - "learning_rate": 8.395068312259451e-05, - "loss": 0.0048, - "step": 2176 - }, - { - "epoch": 1.6819849391774473, - "grad_norm": 0.011899629607796669, - "learning_rate": 8.393087461299508e-05, - "loss": 0.0042, - "step": 2177 - }, - { - "epoch": 1.6827572890519407, - "grad_norm": 0.01649544946849346, - "learning_rate": 8.391105622693793e-05, - "loss": 0.0046, - "step": 2178 - }, - { - "epoch": 1.6835296389264336, - "grad_norm": 0.04666175693273544, - "learning_rate": 8.389122797019172e-05, - "loss": 0.005, - "step": 2179 - }, - { - "epoch": 1.684301988800927, - "grad_norm": 0.008677134290337563, - "learning_rate": 8.387138984852795e-05, - "loss": 0.0039, - "step": 2180 - }, - { - "epoch": 1.68507433867542, - "grad_norm": 0.02470334805548191, - "learning_rate": 8.385154186772106e-05, - "loss": 0.0046, - "step": 2181 - }, - { - "epoch": 1.6858466885499133, - "grad_norm": 0.01587885431945324, - "learning_rate": 8.383168403354827e-05, - "loss": 0.0041, - "step": 2182 - }, - { - "epoch": 1.6866190384244062, - "grad_norm": 0.02103058062493801, - "learning_rate": 8.381181635178976e-05, - "loss": 0.0045, - "step": 2183 - }, - { - "epoch": 1.6873913882988996, - "grad_norm": 0.00931734312325716, - "learning_rate": 8.379193882822851e-05, - "loss": 0.0045, - "step": 2184 - }, - { - "epoch": 1.6881637381733925, - "grad_norm": 0.023588426411151886, - "learning_rate": 8.377205146865038e-05, - "loss": 0.0049, - "step": 2185 - }, - { - "epoch": 1.6889360880478856, - "grad_norm": 0.021287525072693825, - "learning_rate": 8.37521542788441e-05, - "loss": 0.004, - "step": 2186 - }, - { - "epoch": 1.6897084379223788, - "grad_norm": 0.014442606829106808, - "learning_rate": 8.37322472646013e-05, - "loss": 0.0042, - "step": 2187 - }, - { - "epoch": 1.690480787796872, - "grad_norm": 0.013919384218752384, - "learning_rate": 8.371233043171637e-05, - "loss": 0.0051, - "step": 2188 - }, - { - "epoch": 1.691253137671365, - "grad_norm": 0.02148333191871643, - "learning_rate": 8.369240378598667e-05, - "loss": 0.0044, - "step": 2189 - }, - { - "epoch": 1.6920254875458582, - "grad_norm": 0.020780183374881744, - "learning_rate": 8.367246733321235e-05, - "loss": 0.0049, - "step": 2190 - }, - { - "epoch": 1.6927978374203514, - "grad_norm": 0.01798652857542038, - "learning_rate": 8.365252107919641e-05, - "loss": 0.0043, - "step": 2191 - }, - { - "epoch": 1.6935701872948445, - "grad_norm": 0.016136417165398598, - "learning_rate": 8.363256502974474e-05, - "loss": 0.0041, - "step": 2192 - }, - { - "epoch": 1.6943425371693377, - "grad_norm": 0.009567273780703545, - "learning_rate": 8.361259919066606e-05, - "loss": 0.004, - "step": 2193 - }, - { - "epoch": 1.6951148870438308, - "grad_norm": 0.009171852841973305, - "learning_rate": 8.359262356777194e-05, - "loss": 0.0044, - "step": 2194 - }, - { - "epoch": 1.695887236918324, - "grad_norm": 0.009534629993140697, - "learning_rate": 8.357263816687681e-05, - "loss": 0.0042, - "step": 2195 - }, - { - "epoch": 1.6966595867928171, - "grad_norm": 0.025950351729989052, - "learning_rate": 8.355264299379794e-05, - "loss": 0.0044, - "step": 2196 - }, - { - "epoch": 1.6974319366673103, - "grad_norm": 0.00965665653347969, - "learning_rate": 8.353263805435543e-05, - "loss": 0.0045, - "step": 2197 - }, - { - "epoch": 1.6982042865418034, - "grad_norm": 0.02293946035206318, - "learning_rate": 8.351262335437224e-05, - "loss": 0.0046, - "step": 2198 - }, - { - "epoch": 1.6989766364162966, - "grad_norm": 0.018668964505195618, - "learning_rate": 8.349259889967416e-05, - "loss": 0.0043, - "step": 2199 - }, - { - "epoch": 1.6997489862907897, - "grad_norm": 0.011089003644883633, - "learning_rate": 8.347256469608983e-05, - "loss": 0.0043, - "step": 2200 - }, - { - "epoch": 1.700521336165283, - "grad_norm": 0.010769315995275974, - "learning_rate": 8.345252074945071e-05, - "loss": 0.0044, - "step": 2201 - }, - { - "epoch": 1.701293686039776, - "grad_norm": 0.018320759758353233, - "learning_rate": 8.343246706559113e-05, - "loss": 0.0042, - "step": 2202 - }, - { - "epoch": 1.7020660359142692, - "grad_norm": 0.019361697137355804, - "learning_rate": 8.341240365034823e-05, - "loss": 0.0042, - "step": 2203 - }, - { - "epoch": 1.7028383857887623, - "grad_norm": 0.011455011554062366, - "learning_rate": 8.339233050956198e-05, - "loss": 0.0041, - "step": 2204 - }, - { - "epoch": 1.7036107356632555, - "grad_norm": 0.011829971335828304, - "learning_rate": 8.337224764907518e-05, - "loss": 0.0048, - "step": 2205 - }, - { - "epoch": 1.7043830855377486, - "grad_norm": 0.008956990204751492, - "learning_rate": 8.335215507473346e-05, - "loss": 0.004, - "step": 2206 - }, - { - "epoch": 1.7051554354122418, - "grad_norm": 0.009752951562404633, - "learning_rate": 8.333205279238531e-05, - "loss": 0.004, - "step": 2207 - }, - { - "epoch": 1.705927785286735, - "grad_norm": 0.010895447805523872, - "learning_rate": 8.3311940807882e-05, - "loss": 0.0037, - "step": 2208 - }, - { - "epoch": 1.706700135161228, - "grad_norm": 0.007751632947474718, - "learning_rate": 8.329181912707764e-05, - "loss": 0.0037, - "step": 2209 - }, - { - "epoch": 1.707472485035721, - "grad_norm": 0.012226882390677929, - "learning_rate": 8.327168775582916e-05, - "loss": 0.0045, - "step": 2210 - }, - { - "epoch": 1.7082448349102144, - "grad_norm": 0.01452571339905262, - "learning_rate": 8.325154669999634e-05, - "loss": 0.0041, - "step": 2211 - }, - { - "epoch": 1.7090171847847073, - "grad_norm": 0.013364356011152267, - "learning_rate": 8.323139596544174e-05, - "loss": 0.0041, - "step": 2212 - }, - { - "epoch": 1.7097895346592007, - "grad_norm": 0.02150997333228588, - "learning_rate": 8.321123555803074e-05, - "loss": 0.0043, - "step": 2213 - }, - { - "epoch": 1.7105618845336936, - "grad_norm": 0.015584706328809261, - "learning_rate": 8.319106548363156e-05, - "loss": 0.0038, - "step": 2214 - }, - { - "epoch": 1.711334234408187, - "grad_norm": 0.009450695477426052, - "learning_rate": 8.317088574811524e-05, - "loss": 0.004, - "step": 2215 - }, - { - "epoch": 1.71210658428268, - "grad_norm": 0.008295596577227116, - "learning_rate": 8.315069635735557e-05, - "loss": 0.0042, - "step": 2216 - }, - { - "epoch": 1.7128789341571733, - "grad_norm": 0.02395360730588436, - "learning_rate": 8.313049731722924e-05, - "loss": 0.004, - "step": 2217 - }, - { - "epoch": 1.7136512840316662, - "grad_norm": 0.029887588694691658, - "learning_rate": 8.311028863361566e-05, - "loss": 0.0043, - "step": 2218 - }, - { - "epoch": 1.7144236339061596, - "grad_norm": 0.028670670464634895, - "learning_rate": 8.309007031239712e-05, - "loss": 0.0042, - "step": 2219 - }, - { - "epoch": 1.7151959837806525, - "grad_norm": 0.020392004400491714, - "learning_rate": 8.306984235945868e-05, - "loss": 0.0036, - "step": 2220 - }, - { - "epoch": 1.715968333655146, - "grad_norm": 0.028491845354437828, - "learning_rate": 8.304960478068819e-05, - "loss": 0.0045, - "step": 2221 - }, - { - "epoch": 1.7167406835296388, - "grad_norm": 0.01415249053388834, - "learning_rate": 8.302935758197634e-05, - "loss": 0.0044, - "step": 2222 - }, - { - "epoch": 1.7175130334041322, - "grad_norm": 0.03231945261359215, - "learning_rate": 8.30091007692166e-05, - "loss": 0.0046, - "step": 2223 - }, - { - "epoch": 1.7182853832786251, - "grad_norm": 0.023468049243092537, - "learning_rate": 8.29888343483052e-05, - "loss": 0.0045, - "step": 2224 - }, - { - "epoch": 1.7190577331531185, - "grad_norm": 0.009676053188741207, - "learning_rate": 8.296855832514128e-05, - "loss": 0.0036, - "step": 2225 - }, - { - "epoch": 1.7198300830276114, - "grad_norm": 0.012138811871409416, - "learning_rate": 8.294827270562664e-05, - "loss": 0.0041, - "step": 2226 - }, - { - "epoch": 1.7206024329021048, - "grad_norm": 0.02946018986403942, - "learning_rate": 8.292797749566594e-05, - "loss": 0.0046, - "step": 2227 - }, - { - "epoch": 1.7213747827765977, - "grad_norm": 0.01382883358746767, - "learning_rate": 8.290767270116666e-05, - "loss": 0.0043, - "step": 2228 - }, - { - "epoch": 1.722147132651091, - "grad_norm": 0.011325203813612461, - "learning_rate": 8.2887358328039e-05, - "loss": 0.0043, - "step": 2229 - }, - { - "epoch": 1.722919482525584, - "grad_norm": 0.015367789193987846, - "learning_rate": 8.2867034382196e-05, - "loss": 0.004, - "step": 2230 - }, - { - "epoch": 1.7236918324000774, - "grad_norm": 0.019331874325871468, - "learning_rate": 8.284670086955346e-05, - "loss": 0.0043, - "step": 2231 - }, - { - "epoch": 1.7244641822745703, - "grad_norm": 0.009774122387170792, - "learning_rate": 8.282635779602998e-05, - "loss": 0.0044, - "step": 2232 - }, - { - "epoch": 1.7252365321490635, - "grad_norm": 0.00924541987478733, - "learning_rate": 8.280600516754694e-05, - "loss": 0.0036, - "step": 2233 - }, - { - "epoch": 1.7260088820235566, - "grad_norm": 0.011827325448393822, - "learning_rate": 8.278564299002849e-05, - "loss": 0.0043, - "step": 2234 - }, - { - "epoch": 1.7267812318980498, - "grad_norm": 0.016981936991214752, - "learning_rate": 8.276527126940157e-05, - "loss": 0.0048, - "step": 2235 - }, - { - "epoch": 1.727553581772543, - "grad_norm": 0.011866158805787563, - "learning_rate": 8.27448900115959e-05, - "loss": 0.0042, - "step": 2236 - }, - { - "epoch": 1.728325931647036, - "grad_norm": 0.011118290014564991, - "learning_rate": 8.272449922254398e-05, - "loss": 0.0047, - "step": 2237 - }, - { - "epoch": 1.7290982815215292, - "grad_norm": 0.016521891579031944, - "learning_rate": 8.270409890818104e-05, - "loss": 0.005, - "step": 2238 - }, - { - "epoch": 1.7298706313960224, - "grad_norm": 0.009404239244759083, - "learning_rate": 8.268368907444518e-05, - "loss": 0.0046, - "step": 2239 - }, - { - "epoch": 1.7306429812705155, - "grad_norm": 0.009855051524937153, - "learning_rate": 8.266326972727714e-05, - "loss": 0.0044, - "step": 2240 - }, - { - "epoch": 1.7314153311450087, - "grad_norm": 0.01203001756221056, - "learning_rate": 8.264284087262056e-05, - "loss": 0.0051, - "step": 2241 - }, - { - "epoch": 1.7321876810195018, - "grad_norm": 0.015957722440361977, - "learning_rate": 8.262240251642173e-05, - "loss": 0.004, - "step": 2242 - }, - { - "epoch": 1.732960030893995, - "grad_norm": 0.014566673897206783, - "learning_rate": 8.260195466462981e-05, - "loss": 0.0044, - "step": 2243 - }, - { - "epoch": 1.7337323807684881, - "grad_norm": 0.016216455027461052, - "learning_rate": 8.258149732319665e-05, - "loss": 0.0048, - "step": 2244 - }, - { - "epoch": 1.7345047306429813, - "grad_norm": 0.014252996072173119, - "learning_rate": 8.256103049807688e-05, - "loss": 0.0045, - "step": 2245 - }, - { - "epoch": 1.7352770805174744, - "grad_norm": 0.011566858738660812, - "learning_rate": 8.254055419522792e-05, - "loss": 0.0045, - "step": 2246 - }, - { - "epoch": 1.7360494303919676, - "grad_norm": 0.01703554019331932, - "learning_rate": 8.252006842060993e-05, - "loss": 0.0041, - "step": 2247 - }, - { - "epoch": 1.7368217802664607, - "grad_norm": 0.009798984974622726, - "learning_rate": 8.249957318018581e-05, - "loss": 0.0049, - "step": 2248 - }, - { - "epoch": 1.7375941301409539, - "grad_norm": 0.008998863399028778, - "learning_rate": 8.247906847992122e-05, - "loss": 0.0041, - "step": 2249 - }, - { - "epoch": 1.738366480015447, - "grad_norm": 0.009814360179007053, - "learning_rate": 8.24585543257846e-05, - "loss": 0.0046, - "step": 2250 - }, - { - "epoch": 1.7391388298899402, - "grad_norm": 0.016251133754849434, - "learning_rate": 8.243803072374711e-05, - "loss": 0.0047, - "step": 2251 - }, - { - "epoch": 1.7399111797644333, - "grad_norm": 0.007743713911622763, - "learning_rate": 8.24174976797827e-05, - "loss": 0.0038, - "step": 2252 - }, - { - "epoch": 1.7406835296389265, - "grad_norm": 0.00983304250985384, - "learning_rate": 8.239695519986802e-05, - "loss": 0.0042, - "step": 2253 - }, - { - "epoch": 1.7414558795134196, - "grad_norm": 0.01851450838148594, - "learning_rate": 8.237640328998249e-05, - "loss": 0.0048, - "step": 2254 - }, - { - "epoch": 1.7422282293879128, - "grad_norm": 0.011477231048047543, - "learning_rate": 8.235584195610829e-05, - "loss": 0.0041, - "step": 2255 - }, - { - "epoch": 1.743000579262406, - "grad_norm": 0.010229852981865406, - "learning_rate": 8.233527120423031e-05, - "loss": 0.0041, - "step": 2256 - }, - { - "epoch": 1.7437729291368989, - "grad_norm": 0.027905916795134544, - "learning_rate": 8.231469104033621e-05, - "loss": 0.0044, - "step": 2257 - }, - { - "epoch": 1.7445452790113922, - "grad_norm": 0.010942929424345493, - "learning_rate": 8.229410147041639e-05, - "loss": 0.0042, - "step": 2258 - }, - { - "epoch": 1.7453176288858852, - "grad_norm": 0.008921844884753227, - "learning_rate": 8.227350250046393e-05, - "loss": 0.0044, - "step": 2259 - }, - { - "epoch": 1.7460899787603785, - "grad_norm": 0.017951516434550285, - "learning_rate": 8.225289413647475e-05, - "loss": 0.004, - "step": 2260 - }, - { - "epoch": 1.7468623286348715, - "grad_norm": 0.01976814493536949, - "learning_rate": 8.22322763844474e-05, - "loss": 0.004, - "step": 2261 - }, - { - "epoch": 1.7476346785093648, - "grad_norm": 0.014263933524489403, - "learning_rate": 8.221164925038325e-05, - "loss": 0.005, - "step": 2262 - }, - { - "epoch": 1.7484070283838578, - "grad_norm": 0.020307086408138275, - "learning_rate": 8.219101274028634e-05, - "loss": 0.0042, - "step": 2263 - }, - { - "epoch": 1.7491793782583511, - "grad_norm": 0.021188564598560333, - "learning_rate": 8.217036686016344e-05, - "loss": 0.0042, - "step": 2264 - }, - { - "epoch": 1.749951728132844, - "grad_norm": 0.010408424772322178, - "learning_rate": 8.21497116160241e-05, - "loss": 0.0043, - "step": 2265 - }, - { - "epoch": 1.7507240780073374, - "grad_norm": 0.01547803170979023, - "learning_rate": 8.212904701388054e-05, - "loss": 0.0042, - "step": 2266 - }, - { - "epoch": 1.7514964278818304, - "grad_norm": 0.030950602144002914, - "learning_rate": 8.210837305974775e-05, - "loss": 0.0045, - "step": 2267 - }, - { - "epoch": 1.7522687777563237, - "grad_norm": 0.00840567797422409, - "learning_rate": 8.208768975964338e-05, - "loss": 0.0045, - "step": 2268 - }, - { - "epoch": 1.7530411276308167, - "grad_norm": 0.014739587903022766, - "learning_rate": 8.206699711958789e-05, - "loss": 0.0038, - "step": 2269 - }, - { - "epoch": 1.75381347750531, - "grad_norm": 0.027478396892547607, - "learning_rate": 8.204629514560437e-05, - "loss": 0.0043, - "step": 2270 - }, - { - "epoch": 1.754585827379803, - "grad_norm": 0.009460730478167534, - "learning_rate": 8.202558384371868e-05, - "loss": 0.0047, - "step": 2271 - }, - { - "epoch": 1.7553581772542963, - "grad_norm": 0.026636026799678802, - "learning_rate": 8.200486321995936e-05, - "loss": 0.005, - "step": 2272 - }, - { - "epoch": 1.7561305271287893, - "grad_norm": 0.0176105797290802, - "learning_rate": 8.19841332803577e-05, - "loss": 0.0048, - "step": 2273 - }, - { - "epoch": 1.7569028770032826, - "grad_norm": 0.01977243646979332, - "learning_rate": 8.196339403094768e-05, - "loss": 0.0043, - "step": 2274 - }, - { - "epoch": 1.7576752268777756, - "grad_norm": 0.00944372545927763, - "learning_rate": 8.194264547776603e-05, - "loss": 0.0047, - "step": 2275 - }, - { - "epoch": 1.758447576752269, - "grad_norm": 0.01759764365851879, - "learning_rate": 8.192188762685208e-05, - "loss": 0.0043, - "step": 2276 - }, - { - "epoch": 1.7592199266267619, - "grad_norm": 0.014346209354698658, - "learning_rate": 8.190112048424799e-05, - "loss": 0.0043, - "step": 2277 - }, - { - "epoch": 1.7599922765012552, - "grad_norm": 0.01344558596611023, - "learning_rate": 8.188034405599856e-05, - "loss": 0.0045, - "step": 2278 - }, - { - "epoch": 1.7607646263757482, - "grad_norm": 0.010901092551648617, - "learning_rate": 8.18595583481513e-05, - "loss": 0.0047, - "step": 2279 - }, - { - "epoch": 1.7615369762502413, - "grad_norm": 0.012345247901976109, - "learning_rate": 8.183876336675644e-05, - "loss": 0.0046, - "step": 2280 - }, - { - "epoch": 1.7623093261247345, - "grad_norm": 0.012082891538739204, - "learning_rate": 8.18179591178669e-05, - "loss": 0.0045, - "step": 2281 - }, - { - "epoch": 1.7630816759992276, - "grad_norm": 0.010318143293261528, - "learning_rate": 8.179714560753828e-05, - "loss": 0.0043, - "step": 2282 - }, - { - "epoch": 1.7638540258737208, - "grad_norm": 0.01975180394947529, - "learning_rate": 8.177632284182888e-05, - "loss": 0.0048, - "step": 2283 - }, - { - "epoch": 1.764626375748214, - "grad_norm": 0.010247784666717052, - "learning_rate": 8.175549082679973e-05, - "loss": 0.0046, - "step": 2284 - }, - { - "epoch": 1.765398725622707, - "grad_norm": 0.012388781644403934, - "learning_rate": 8.173464956851452e-05, - "loss": 0.0042, - "step": 2285 - }, - { - "epoch": 1.7661710754972002, - "grad_norm": 0.010144729167222977, - "learning_rate": 8.171379907303964e-05, - "loss": 0.004, - "step": 2286 - }, - { - "epoch": 1.7669434253716934, - "grad_norm": 0.019393224269151688, - "learning_rate": 8.169293934644412e-05, - "loss": 0.0041, - "step": 2287 - }, - { - "epoch": 1.7677157752461865, - "grad_norm": 0.010567243210971355, - "learning_rate": 8.16720703947998e-05, - "loss": 0.004, - "step": 2288 - }, - { - "epoch": 1.7684881251206797, - "grad_norm": 0.013309785164892673, - "learning_rate": 8.165119222418107e-05, - "loss": 0.0041, - "step": 2289 - }, - { - "epoch": 1.7692604749951728, - "grad_norm": 0.010740738362073898, - "learning_rate": 8.163030484066508e-05, - "loss": 0.0044, - "step": 2290 - }, - { - "epoch": 1.770032824869666, - "grad_norm": 0.012563997879624367, - "learning_rate": 8.160940825033165e-05, - "loss": 0.0042, - "step": 2291 - }, - { - "epoch": 1.770805174744159, - "grad_norm": 0.008908885531127453, - "learning_rate": 8.158850245926325e-05, - "loss": 0.0043, - "step": 2292 - }, - { - "epoch": 1.7715775246186523, - "grad_norm": 0.013040358200669289, - "learning_rate": 8.156758747354507e-05, - "loss": 0.0043, - "step": 2293 - }, - { - "epoch": 1.7723498744931454, - "grad_norm": 0.01532841194421053, - "learning_rate": 8.154666329926494e-05, - "loss": 0.0046, - "step": 2294 - }, - { - "epoch": 1.7731222243676386, - "grad_norm": 0.009438990615308285, - "learning_rate": 8.152572994251342e-05, - "loss": 0.0041, - "step": 2295 - }, - { - "epoch": 1.7738945742421317, - "grad_norm": 0.01765560731291771, - "learning_rate": 8.150478740938365e-05, - "loss": 0.0038, - "step": 2296 - }, - { - "epoch": 1.7746669241166249, - "grad_norm": 0.009543683379888535, - "learning_rate": 8.148383570597154e-05, - "loss": 0.0041, - "step": 2297 - }, - { - "epoch": 1.775439273991118, - "grad_norm": 0.010798865929245949, - "learning_rate": 8.14628748383756e-05, - "loss": 0.0044, - "step": 2298 - }, - { - "epoch": 1.7762116238656112, - "grad_norm": 0.00954483449459076, - "learning_rate": 8.144190481269702e-05, - "loss": 0.0044, - "step": 2299 - }, - { - "epoch": 1.7769839737401043, - "grad_norm": 0.007039578165858984, - "learning_rate": 8.142092563503972e-05, - "loss": 0.0041, - "step": 2300 - }, - { - "epoch": 1.7777563236145975, - "grad_norm": 0.022727273404598236, - "learning_rate": 8.139993731151017e-05, - "loss": 0.0039, - "step": 2301 - }, - { - "epoch": 1.7785286734890906, - "grad_norm": 0.013155174441635609, - "learning_rate": 8.137893984821761e-05, - "loss": 0.0041, - "step": 2302 - }, - { - "epoch": 1.7793010233635838, - "grad_norm": 0.014248731546103954, - "learning_rate": 8.135793325127388e-05, - "loss": 0.0043, - "step": 2303 - }, - { - "epoch": 1.7800733732380767, - "grad_norm": 0.027962619438767433, - "learning_rate": 8.133691752679347e-05, - "loss": 0.0046, - "step": 2304 - }, - { - "epoch": 1.78084572311257, - "grad_norm": 0.023610763251781464, - "learning_rate": 8.131589268089358e-05, - "loss": 0.0039, - "step": 2305 - }, - { - "epoch": 1.781618072987063, - "grad_norm": 0.012954623438417912, - "learning_rate": 8.129485871969402e-05, - "loss": 0.0041, - "step": 2306 - }, - { - "epoch": 1.7823904228615564, - "grad_norm": 0.03095209412276745, - "learning_rate": 8.127381564931726e-05, - "loss": 0.0047, - "step": 2307 - }, - { - "epoch": 1.7831627727360493, - "grad_norm": 0.020441990345716476, - "learning_rate": 8.125276347588847e-05, - "loss": 0.0041, - "step": 2308 - }, - { - "epoch": 1.7839351226105427, - "grad_norm": 0.014079474844038486, - "learning_rate": 8.123170220553537e-05, - "loss": 0.0042, - "step": 2309 - }, - { - "epoch": 1.7847074724850356, - "grad_norm": 0.03220905736088753, - "learning_rate": 8.121063184438845e-05, - "loss": 0.0043, - "step": 2310 - }, - { - "epoch": 1.785479822359529, - "grad_norm": 0.018410203978419304, - "learning_rate": 8.118955239858072e-05, - "loss": 0.0047, - "step": 2311 - }, - { - "epoch": 1.7862521722340219, - "grad_norm": 0.015284133143723011, - "learning_rate": 8.116846387424794e-05, - "loss": 0.0041, - "step": 2312 - }, - { - "epoch": 1.7870245221085153, - "grad_norm": 0.024698738008737564, - "learning_rate": 8.114736627752846e-05, - "loss": 0.004, - "step": 2313 - }, - { - "epoch": 1.7877968719830082, - "grad_norm": 0.016370631754398346, - "learning_rate": 8.112625961456325e-05, - "loss": 0.0048, - "step": 2314 - }, - { - "epoch": 1.7885692218575016, - "grad_norm": 0.017090700566768646, - "learning_rate": 8.1105143891496e-05, - "loss": 0.0042, - "step": 2315 - }, - { - "epoch": 1.7893415717319945, - "grad_norm": 0.019004611298441887, - "learning_rate": 8.108401911447297e-05, - "loss": 0.0044, - "step": 2316 - }, - { - "epoch": 1.7901139216064879, - "grad_norm": 0.026414448395371437, - "learning_rate": 8.106288528964306e-05, - "loss": 0.0049, - "step": 2317 - }, - { - "epoch": 1.7908862714809808, - "grad_norm": 0.01318669319152832, - "learning_rate": 8.104174242315781e-05, - "loss": 0.0042, - "step": 2318 - }, - { - "epoch": 1.7916586213554742, - "grad_norm": 0.023264247924089432, - "learning_rate": 8.102059052117141e-05, - "loss": 0.0045, - "step": 2319 - }, - { - "epoch": 1.792430971229967, - "grad_norm": 0.010670443065464497, - "learning_rate": 8.099942958984068e-05, - "loss": 0.004, - "step": 2320 - }, - { - "epoch": 1.7932033211044605, - "grad_norm": 0.0195294339209795, - "learning_rate": 8.097825963532504e-05, - "loss": 0.0043, - "step": 2321 - }, - { - "epoch": 1.7939756709789534, - "grad_norm": 0.009845489636063576, - "learning_rate": 8.095708066378653e-05, - "loss": 0.0043, - "step": 2322 - }, - { - "epoch": 1.7947480208534468, - "grad_norm": 0.018464913591742516, - "learning_rate": 8.09358926813899e-05, - "loss": 0.0039, - "step": 2323 - }, - { - "epoch": 1.7955203707279397, - "grad_norm": 0.02056654542684555, - "learning_rate": 8.091469569430238e-05, - "loss": 0.0042, - "step": 2324 - }, - { - "epoch": 1.796292720602433, - "grad_norm": 0.010467185638844967, - "learning_rate": 8.089348970869398e-05, - "loss": 0.0043, - "step": 2325 - }, - { - "epoch": 1.797065070476926, - "grad_norm": 0.01489762682467699, - "learning_rate": 8.087227473073719e-05, - "loss": 0.0042, - "step": 2326 - }, - { - "epoch": 1.7978374203514194, - "grad_norm": 0.017772536724805832, - "learning_rate": 8.085105076660722e-05, - "loss": 0.0045, - "step": 2327 - }, - { - "epoch": 1.7986097702259123, - "grad_norm": 0.011074879206717014, - "learning_rate": 8.082981782248182e-05, - "loss": 0.0044, - "step": 2328 - }, - { - "epoch": 1.7993821201004054, - "grad_norm": 0.0100552998483181, - "learning_rate": 8.080857590454138e-05, - "loss": 0.0043, - "step": 2329 - }, - { - "epoch": 1.8001544699748986, - "grad_norm": 0.019595801830291748, - "learning_rate": 8.078732501896896e-05, - "loss": 0.0043, - "step": 2330 - }, - { - "epoch": 1.8009268198493917, - "grad_norm": 0.01229262538254261, - "learning_rate": 8.076606517195013e-05, - "loss": 0.0039, - "step": 2331 - }, - { - "epoch": 1.8016991697238849, - "grad_norm": 0.009469173848628998, - "learning_rate": 8.074479636967314e-05, - "loss": 0.0038, - "step": 2332 - }, - { - "epoch": 1.802471519598378, - "grad_norm": 0.012940244749188423, - "learning_rate": 8.072351861832883e-05, - "loss": 0.0039, - "step": 2333 - }, - { - "epoch": 1.8032438694728712, - "grad_norm": 0.01948258839547634, - "learning_rate": 8.070223192411061e-05, - "loss": 0.0043, - "step": 2334 - }, - { - "epoch": 1.8040162193473643, - "grad_norm": 0.008973821066319942, - "learning_rate": 8.068093629321456e-05, - "loss": 0.004, - "step": 2335 - }, - { - "epoch": 1.8047885692218575, - "grad_norm": 0.009137239307165146, - "learning_rate": 8.065963173183929e-05, - "loss": 0.0038, - "step": 2336 - }, - { - "epoch": 1.8055609190963506, - "grad_norm": 0.020940445363521576, - "learning_rate": 8.063831824618606e-05, - "loss": 0.0045, - "step": 2337 - }, - { - "epoch": 1.8063332689708438, - "grad_norm": 0.01261752936989069, - "learning_rate": 8.061699584245872e-05, - "loss": 0.0047, - "step": 2338 - }, - { - "epoch": 1.807105618845337, - "grad_norm": 0.01585754007101059, - "learning_rate": 8.05956645268637e-05, - "loss": 0.0042, - "step": 2339 - }, - { - "epoch": 1.80787796871983, - "grad_norm": 0.013830906711518764, - "learning_rate": 8.057432430561e-05, - "loss": 0.0046, - "step": 2340 - }, - { - "epoch": 1.8086503185943232, - "grad_norm": 0.009410025551915169, - "learning_rate": 8.05529751849093e-05, - "loss": 0.0044, - "step": 2341 - }, - { - "epoch": 1.8094226684688164, - "grad_norm": 0.01510144118219614, - "learning_rate": 8.053161717097575e-05, - "loss": 0.0044, - "step": 2342 - }, - { - "epoch": 1.8101950183433095, - "grad_norm": 0.012338819913566113, - "learning_rate": 8.05102502700262e-05, - "loss": 0.0041, - "step": 2343 - }, - { - "epoch": 1.8109673682178027, - "grad_norm": 0.016597097739577293, - "learning_rate": 8.048887448828001e-05, - "loss": 0.0045, - "step": 2344 - }, - { - "epoch": 1.8117397180922958, - "grad_norm": 0.009099415503442287, - "learning_rate": 8.046748983195919e-05, - "loss": 0.0044, - "step": 2345 - }, - { - "epoch": 1.812512067966789, - "grad_norm": 0.014252396300435066, - "learning_rate": 8.044609630728826e-05, - "loss": 0.0047, - "step": 2346 - }, - { - "epoch": 1.8132844178412821, - "grad_norm": 0.009875001385807991, - "learning_rate": 8.042469392049436e-05, - "loss": 0.0038, - "step": 2347 - }, - { - "epoch": 1.8140567677157753, - "grad_norm": 0.009954247623682022, - "learning_rate": 8.040328267780724e-05, - "loss": 0.0042, - "step": 2348 - }, - { - "epoch": 1.8148291175902684, - "grad_norm": 0.008104944601655006, - "learning_rate": 8.038186258545916e-05, - "loss": 0.0037, - "step": 2349 - }, - { - "epoch": 1.8156014674647616, - "grad_norm": 0.020304758101701736, - "learning_rate": 8.0360433649685e-05, - "loss": 0.0045, - "step": 2350 - }, - { - "epoch": 1.8163738173392547, - "grad_norm": 0.013075760565698147, - "learning_rate": 8.033899587672222e-05, - "loss": 0.0049, - "step": 2351 - }, - { - "epoch": 1.8171461672137479, - "grad_norm": 0.011701141484081745, - "learning_rate": 8.031754927281084e-05, - "loss": 0.0041, - "step": 2352 - }, - { - "epoch": 1.8179185170882408, - "grad_norm": 0.008222193457186222, - "learning_rate": 8.029609384419341e-05, - "loss": 0.0039, - "step": 2353 - }, - { - "epoch": 1.8186908669627342, - "grad_norm": 0.010191203095018864, - "learning_rate": 8.027462959711512e-05, - "loss": 0.0043, - "step": 2354 - }, - { - "epoch": 1.8194632168372271, - "grad_norm": 0.009672521613538265, - "learning_rate": 8.02531565378237e-05, - "loss": 0.0039, - "step": 2355 - }, - { - "epoch": 1.8202355667117205, - "grad_norm": 0.017960375174880028, - "learning_rate": 8.023167467256942e-05, - "loss": 0.0043, - "step": 2356 - }, - { - "epoch": 1.8210079165862134, - "grad_norm": 0.01434109266847372, - "learning_rate": 8.021018400760514e-05, - "loss": 0.0043, - "step": 2357 - }, - { - "epoch": 1.8217802664607068, - "grad_norm": 0.012434864416718483, - "learning_rate": 8.018868454918627e-05, - "loss": 0.0038, - "step": 2358 - }, - { - "epoch": 1.8225526163351997, - "grad_norm": 0.024516962468624115, - "learning_rate": 8.016717630357076e-05, - "loss": 0.0043, - "step": 2359 - }, - { - "epoch": 1.823324966209693, - "grad_norm": 0.01232131663709879, - "learning_rate": 8.01456592770192e-05, - "loss": 0.0046, - "step": 2360 - }, - { - "epoch": 1.824097316084186, - "grad_norm": 0.013871725648641586, - "learning_rate": 8.012413347579462e-05, - "loss": 0.0041, - "step": 2361 - }, - { - "epoch": 1.8248696659586794, - "grad_norm": 0.024569762870669365, - "learning_rate": 8.010259890616267e-05, - "loss": 0.0045, - "step": 2362 - }, - { - "epoch": 1.8256420158331723, - "grad_norm": 0.01810004748404026, - "learning_rate": 8.008105557439159e-05, - "loss": 0.004, - "step": 2363 - }, - { - "epoch": 1.8264143657076657, - "grad_norm": 0.01343507133424282, - "learning_rate": 8.005950348675205e-05, - "loss": 0.004, - "step": 2364 - }, - { - "epoch": 1.8271867155821586, - "grad_norm": 0.010591656900942326, - "learning_rate": 8.003794264951741e-05, - "loss": 0.0043, - "step": 2365 - }, - { - "epoch": 1.827959065456652, - "grad_norm": 0.010226280428469181, - "learning_rate": 8.001637306896346e-05, - "loss": 0.004, - "step": 2366 - }, - { - "epoch": 1.828731415331145, - "grad_norm": 0.019120151177048683, - "learning_rate": 7.999479475136859e-05, - "loss": 0.0041, - "step": 2367 - }, - { - "epoch": 1.8295037652056383, - "grad_norm": 0.015317712910473347, - "learning_rate": 7.997320770301377e-05, - "loss": 0.0043, - "step": 2368 - }, - { - "epoch": 1.8302761150801312, - "grad_norm": 0.011360458098351955, - "learning_rate": 7.995161193018241e-05, - "loss": 0.0041, - "step": 2369 - }, - { - "epoch": 1.8310484649546246, - "grad_norm": 0.009333595633506775, - "learning_rate": 7.993000743916056e-05, - "loss": 0.0042, - "step": 2370 - }, - { - "epoch": 1.8318208148291175, - "grad_norm": 0.013634170405566692, - "learning_rate": 7.990839423623675e-05, - "loss": 0.0041, - "step": 2371 - }, - { - "epoch": 1.8325931647036109, - "grad_norm": 0.008756657131016254, - "learning_rate": 7.988677232770205e-05, - "loss": 0.0038, - "step": 2372 - }, - { - "epoch": 1.8333655145781038, - "grad_norm": 0.011406585574150085, - "learning_rate": 7.98651417198501e-05, - "loss": 0.0047, - "step": 2373 - }, - { - "epoch": 1.8341378644525972, - "grad_norm": 0.009512819349765778, - "learning_rate": 7.984350241897703e-05, - "loss": 0.0044, - "step": 2374 - }, - { - "epoch": 1.8349102143270901, - "grad_norm": 0.011950364336371422, - "learning_rate": 7.98218544313815e-05, - "loss": 0.0048, - "step": 2375 - }, - { - "epoch": 1.8356825642015833, - "grad_norm": 0.010683958418667316, - "learning_rate": 7.980019776336475e-05, - "loss": 0.0043, - "step": 2376 - }, - { - "epoch": 1.8364549140760764, - "grad_norm": 0.009281586855649948, - "learning_rate": 7.977853242123052e-05, - "loss": 0.0037, - "step": 2377 - }, - { - "epoch": 1.8372272639505696, - "grad_norm": 0.01649283990263939, - "learning_rate": 7.975685841128502e-05, - "loss": 0.0043, - "step": 2378 - }, - { - "epoch": 1.8379996138250627, - "grad_norm": 0.008996492251753807, - "learning_rate": 7.973517573983707e-05, - "loss": 0.004, - "step": 2379 - }, - { - "epoch": 1.8387719636995559, - "grad_norm": 0.013421095907688141, - "learning_rate": 7.971348441319796e-05, - "loss": 0.0041, - "step": 2380 - }, - { - "epoch": 1.839544313574049, - "grad_norm": 0.012858722358942032, - "learning_rate": 7.969178443768151e-05, - "loss": 0.0039, - "step": 2381 - }, - { - "epoch": 1.8403166634485422, - "grad_norm": 0.00814067106693983, - "learning_rate": 7.967007581960407e-05, - "loss": 0.0039, - "step": 2382 - }, - { - "epoch": 1.8410890133230353, - "grad_norm": 0.014434592798352242, - "learning_rate": 7.964835856528446e-05, - "loss": 0.0036, - "step": 2383 - }, - { - "epoch": 1.8418613631975285, - "grad_norm": 0.010793699882924557, - "learning_rate": 7.962663268104408e-05, - "loss": 0.004, - "step": 2384 - }, - { - "epoch": 1.8426337130720216, - "grad_norm": 0.013711008243262768, - "learning_rate": 7.960489817320682e-05, - "loss": 0.0048, - "step": 2385 - }, - { - "epoch": 1.8434060629465148, - "grad_norm": 0.011649169027805328, - "learning_rate": 7.958315504809903e-05, - "loss": 0.0041, - "step": 2386 - }, - { - "epoch": 1.844178412821008, - "grad_norm": 0.026133539155125618, - "learning_rate": 7.956140331204963e-05, - "loss": 0.0042, - "step": 2387 - }, - { - "epoch": 1.844950762695501, - "grad_norm": 0.009728859178721905, - "learning_rate": 7.953964297139004e-05, - "loss": 0.0041, - "step": 2388 - }, - { - "epoch": 1.8457231125699942, - "grad_norm": 0.0270326379686594, - "learning_rate": 7.951787403245414e-05, - "loss": 0.0054, - "step": 2389 - }, - { - "epoch": 1.8464954624444874, - "grad_norm": 0.018707750365138054, - "learning_rate": 7.949609650157836e-05, - "loss": 0.0046, - "step": 2390 - }, - { - "epoch": 1.8472678123189805, - "grad_norm": 0.014537750743329525, - "learning_rate": 7.947431038510162e-05, - "loss": 0.004, - "step": 2391 - }, - { - "epoch": 1.8480401621934737, - "grad_norm": 0.01314216386526823, - "learning_rate": 7.945251568936529e-05, - "loss": 0.0045, - "step": 2392 - }, - { - "epoch": 1.8488125120679668, - "grad_norm": 0.028566807508468628, - "learning_rate": 7.943071242071334e-05, - "loss": 0.0046, - "step": 2393 - }, - { - "epoch": 1.84958486194246, - "grad_norm": 0.01397473644465208, - "learning_rate": 7.940890058549214e-05, - "loss": 0.0043, - "step": 2394 - }, - { - "epoch": 1.8503572118169531, - "grad_norm": 0.035390984266996384, - "learning_rate": 7.93870801900506e-05, - "loss": 0.0044, - "step": 2395 - }, - { - "epoch": 1.8511295616914463, - "grad_norm": 0.02588469907641411, - "learning_rate": 7.936525124074008e-05, - "loss": 0.0047, - "step": 2396 - }, - { - "epoch": 1.8519019115659394, - "grad_norm": 0.013072910718619823, - "learning_rate": 7.93434137439145e-05, - "loss": 0.0042, - "step": 2397 - }, - { - "epoch": 1.8526742614404326, - "grad_norm": 0.024199657142162323, - "learning_rate": 7.93215677059302e-05, - "loss": 0.0044, - "step": 2398 - }, - { - "epoch": 1.8534466113149257, - "grad_norm": 0.03284648805856705, - "learning_rate": 7.929971313314604e-05, - "loss": 0.0045, - "step": 2399 - }, - { - "epoch": 1.8542189611894186, - "grad_norm": 0.020237479358911514, - "learning_rate": 7.927785003192338e-05, - "loss": 0.0041, - "step": 2400 - }, - { - "epoch": 1.854991311063912, - "grad_norm": 0.012655111029744148, - "learning_rate": 7.925597840862602e-05, - "loss": 0.0042, - "step": 2401 - }, - { - "epoch": 1.855763660938405, - "grad_norm": 0.02779683843255043, - "learning_rate": 7.923409826962025e-05, - "loss": 0.0045, - "step": 2402 - }, - { - "epoch": 1.8565360108128983, - "grad_norm": 0.017316104844212532, - "learning_rate": 7.921220962127487e-05, - "loss": 0.0044, - "step": 2403 - }, - { - "epoch": 1.8573083606873912, - "grad_norm": 0.010716917924582958, - "learning_rate": 7.919031246996114e-05, - "loss": 0.0046, - "step": 2404 - }, - { - "epoch": 1.8580807105618846, - "grad_norm": 0.00959822628647089, - "learning_rate": 7.916840682205278e-05, - "loss": 0.0043, - "step": 2405 - }, - { - "epoch": 1.8588530604363775, - "grad_norm": 0.012936657294631004, - "learning_rate": 7.914649268392598e-05, - "loss": 0.0044, - "step": 2406 - }, - { - "epoch": 1.859625410310871, - "grad_norm": 0.02065490558743477, - "learning_rate": 7.912457006195945e-05, - "loss": 0.0045, - "step": 2407 - }, - { - "epoch": 1.8603977601853638, - "grad_norm": 0.01272338256239891, - "learning_rate": 7.910263896253432e-05, - "loss": 0.0039, - "step": 2408 - }, - { - "epoch": 1.8611701100598572, - "grad_norm": 0.010941894724965096, - "learning_rate": 7.908069939203419e-05, - "loss": 0.0045, - "step": 2409 - }, - { - "epoch": 1.8619424599343501, - "grad_norm": 0.009238572791218758, - "learning_rate": 7.905875135684515e-05, - "loss": 0.0041, - "step": 2410 - }, - { - "epoch": 1.8627148098088435, - "grad_norm": 0.017378829419612885, - "learning_rate": 7.903679486335575e-05, - "loss": 0.0041, - "step": 2411 - }, - { - "epoch": 1.8634871596833364, - "grad_norm": 0.019908621907234192, - "learning_rate": 7.901482991795699e-05, - "loss": 0.0052, - "step": 2412 - }, - { - "epoch": 1.8642595095578298, - "grad_norm": 0.011136463843286037, - "learning_rate": 7.899285652704232e-05, - "loss": 0.0037, - "step": 2413 - }, - { - "epoch": 1.8650318594323227, - "grad_norm": 0.033886104822158813, - "learning_rate": 7.897087469700768e-05, - "loss": 0.0047, - "step": 2414 - }, - { - "epoch": 1.8658042093068161, - "grad_norm": 0.024965351447463036, - "learning_rate": 7.894888443425145e-05, - "loss": 0.0043, - "step": 2415 - }, - { - "epoch": 1.866576559181309, - "grad_norm": 0.012115641497075558, - "learning_rate": 7.892688574517447e-05, - "loss": 0.0043, - "step": 2416 - }, - { - "epoch": 1.8673489090558024, - "grad_norm": 0.013908233493566513, - "learning_rate": 7.890487863618e-05, - "loss": 0.0045, - "step": 2417 - }, - { - "epoch": 1.8681212589302953, - "grad_norm": 0.02759517915546894, - "learning_rate": 7.888286311367379e-05, - "loss": 0.0046, - "step": 2418 - }, - { - "epoch": 1.8688936088047887, - "grad_norm": 0.008342528715729713, - "learning_rate": 7.886083918406404e-05, - "loss": 0.0039, - "step": 2419 - }, - { - "epoch": 1.8696659586792816, - "grad_norm": 0.00872607808560133, - "learning_rate": 7.883880685376137e-05, - "loss": 0.0047, - "step": 2420 - }, - { - "epoch": 1.870438308553775, - "grad_norm": 0.013731162063777447, - "learning_rate": 7.881676612917888e-05, - "loss": 0.004, - "step": 2421 - }, - { - "epoch": 1.871210658428268, - "grad_norm": 0.022072460502386093, - "learning_rate": 7.879471701673204e-05, - "loss": 0.0043, - "step": 2422 - }, - { - "epoch": 1.871983008302761, - "grad_norm": 0.010056992992758751, - "learning_rate": 7.877265952283889e-05, - "loss": 0.0042, - "step": 2423 - }, - { - "epoch": 1.8727553581772542, - "grad_norm": 0.009204492904245853, - "learning_rate": 7.875059365391977e-05, - "loss": 0.0042, - "step": 2424 - }, - { - "epoch": 1.8735277080517474, - "grad_norm": 0.00854427833110094, - "learning_rate": 7.872851941639754e-05, - "loss": 0.0042, - "step": 2425 - }, - { - "epoch": 1.8743000579262405, - "grad_norm": 0.01865958608686924, - "learning_rate": 7.87064368166975e-05, - "loss": 0.0034, - "step": 2426 - }, - { - "epoch": 1.8750724078007337, - "grad_norm": 0.011449846439063549, - "learning_rate": 7.868434586124734e-05, - "loss": 0.0042, - "step": 2427 - }, - { - "epoch": 1.8758447576752268, - "grad_norm": 0.017687395215034485, - "learning_rate": 7.866224655647718e-05, - "loss": 0.0041, - "step": 2428 - }, - { - "epoch": 1.87661710754972, - "grad_norm": 0.013626324012875557, - "learning_rate": 7.864013890881963e-05, - "loss": 0.0045, - "step": 2429 - }, - { - "epoch": 1.8773894574242131, - "grad_norm": 0.010985249653458595, - "learning_rate": 7.86180229247097e-05, - "loss": 0.0042, - "step": 2430 - }, - { - "epoch": 1.8781618072987063, - "grad_norm": 0.009432705119252205, - "learning_rate": 7.859589861058479e-05, - "loss": 0.0042, - "step": 2431 - }, - { - "epoch": 1.8789341571731994, - "grad_norm": 0.009560228325426579, - "learning_rate": 7.857376597288476e-05, - "loss": 0.004, - "step": 2432 - }, - { - "epoch": 1.8797065070476926, - "grad_norm": 0.014136698096990585, - "learning_rate": 7.85516250180519e-05, - "loss": 0.0045, - "step": 2433 - }, - { - "epoch": 1.8804788569221857, - "grad_norm": 0.010503970086574554, - "learning_rate": 7.85294757525309e-05, - "loss": 0.0046, - "step": 2434 - }, - { - "epoch": 1.881251206796679, - "grad_norm": 0.017374323680996895, - "learning_rate": 7.850731818276885e-05, - "loss": 0.0052, - "step": 2435 - }, - { - "epoch": 1.882023556671172, - "grad_norm": 0.011501210741698742, - "learning_rate": 7.848515231521531e-05, - "loss": 0.0043, - "step": 2436 - }, - { - "epoch": 1.8827959065456652, - "grad_norm": 0.009206007234752178, - "learning_rate": 7.846297815632224e-05, - "loss": 0.004, - "step": 2437 - }, - { - "epoch": 1.8835682564201583, - "grad_norm": 0.02441769279539585, - "learning_rate": 7.844079571254397e-05, - "loss": 0.0043, - "step": 2438 - }, - { - "epoch": 1.8843406062946515, - "grad_norm": 0.008784429170191288, - "learning_rate": 7.841860499033731e-05, - "loss": 0.0041, - "step": 2439 - }, - { - "epoch": 1.8851129561691446, - "grad_norm": 0.018666021525859833, - "learning_rate": 7.83964059961614e-05, - "loss": 0.0041, - "step": 2440 - }, - { - "epoch": 1.8858853060436378, - "grad_norm": 0.012539715506136417, - "learning_rate": 7.837419873647787e-05, - "loss": 0.0041, - "step": 2441 - }, - { - "epoch": 1.886657655918131, - "grad_norm": 0.008437935262918472, - "learning_rate": 7.835198321775067e-05, - "loss": 0.0041, - "step": 2442 - }, - { - "epoch": 1.887430005792624, - "grad_norm": 0.00943806767463684, - "learning_rate": 7.832975944644626e-05, - "loss": 0.0046, - "step": 2443 - }, - { - "epoch": 1.8882023556671172, - "grad_norm": 0.009006207808852196, - "learning_rate": 7.830752742903341e-05, - "loss": 0.0038, - "step": 2444 - }, - { - "epoch": 1.8889747055416104, - "grad_norm": 0.01270466111600399, - "learning_rate": 7.828528717198331e-05, - "loss": 0.0044, - "step": 2445 - }, - { - "epoch": 1.8897470554161035, - "grad_norm": 0.008993150666356087, - "learning_rate": 7.826303868176961e-05, - "loss": 0.0038, - "step": 2446 - }, - { - "epoch": 1.8905194052905965, - "grad_norm": 0.009910772554576397, - "learning_rate": 7.824078196486823e-05, - "loss": 0.0044, - "step": 2447 - }, - { - "epoch": 1.8912917551650898, - "grad_norm": 0.018001548945903778, - "learning_rate": 7.821851702775765e-05, - "loss": 0.0045, - "step": 2448 - }, - { - "epoch": 1.8920641050395828, - "grad_norm": 0.008888039737939835, - "learning_rate": 7.81962438769186e-05, - "loss": 0.0042, - "step": 2449 - }, - { - "epoch": 1.8928364549140761, - "grad_norm": 0.011056415736675262, - "learning_rate": 7.817396251883426e-05, - "loss": 0.004, - "step": 2450 - }, - { - "epoch": 1.893608804788569, - "grad_norm": 0.01189905870705843, - "learning_rate": 7.815167295999021e-05, - "loss": 0.0038, - "step": 2451 - }, - { - "epoch": 1.8943811546630624, - "grad_norm": 0.012073645368218422, - "learning_rate": 7.81293752068744e-05, - "loss": 0.0042, - "step": 2452 - }, - { - "epoch": 1.8951535045375554, - "grad_norm": 0.02056579291820526, - "learning_rate": 7.810706926597715e-05, - "loss": 0.0045, - "step": 2453 - }, - { - "epoch": 1.8959258544120487, - "grad_norm": 0.013602050952613354, - "learning_rate": 7.808475514379121e-05, - "loss": 0.0047, - "step": 2454 - }, - { - "epoch": 1.8966982042865417, - "grad_norm": 0.020173611119389534, - "learning_rate": 7.806243284681166e-05, - "loss": 0.0041, - "step": 2455 - }, - { - "epoch": 1.897470554161035, - "grad_norm": 0.013334146700799465, - "learning_rate": 7.804010238153598e-05, - "loss": 0.0034, - "step": 2456 - }, - { - "epoch": 1.898242904035528, - "grad_norm": 0.009373542852699757, - "learning_rate": 7.801776375446406e-05, - "loss": 0.0043, - "step": 2457 - }, - { - "epoch": 1.8990152539100214, - "grad_norm": 0.013425251469016075, - "learning_rate": 7.799541697209809e-05, - "loss": 0.0039, - "step": 2458 - }, - { - "epoch": 1.8997876037845143, - "grad_norm": 0.013179768808186054, - "learning_rate": 7.79730620409427e-05, - "loss": 0.0044, - "step": 2459 - }, - { - "epoch": 1.9005599536590077, - "grad_norm": 0.020629245787858963, - "learning_rate": 7.795069896750487e-05, - "loss": 0.004, - "step": 2460 - }, - { - "epoch": 1.9013323035335006, - "grad_norm": 0.01077171042561531, - "learning_rate": 7.792832775829395e-05, - "loss": 0.0038, - "step": 2461 - }, - { - "epoch": 1.902104653407994, - "grad_norm": 0.016820227727293968, - "learning_rate": 7.790594841982166e-05, - "loss": 0.0047, - "step": 2462 - }, - { - "epoch": 1.9028770032824869, - "grad_norm": 0.01245331671088934, - "learning_rate": 7.788356095860208e-05, - "loss": 0.0048, - "step": 2463 - }, - { - "epoch": 1.9036493531569803, - "grad_norm": 0.010155857540667057, - "learning_rate": 7.786116538115166e-05, - "loss": 0.004, - "step": 2464 - }, - { - "epoch": 1.9044217030314732, - "grad_norm": 0.012097825296223164, - "learning_rate": 7.783876169398921e-05, - "loss": 0.0042, - "step": 2465 - }, - { - "epoch": 1.9051940529059666, - "grad_norm": 0.014586256816983223, - "learning_rate": 7.78163499036359e-05, - "loss": 0.0048, - "step": 2466 - }, - { - "epoch": 1.9059664027804595, - "grad_norm": 0.010004458017647266, - "learning_rate": 7.779393001661529e-05, - "loss": 0.0046, - "step": 2467 - }, - { - "epoch": 1.9067387526549529, - "grad_norm": 0.010745279490947723, - "learning_rate": 7.777150203945322e-05, - "loss": 0.0039, - "step": 2468 - }, - { - "epoch": 1.9075111025294458, - "grad_norm": 0.01012917049229145, - "learning_rate": 7.774906597867797e-05, - "loss": 0.0043, - "step": 2469 - }, - { - "epoch": 1.908283452403939, - "grad_norm": 0.01098201610147953, - "learning_rate": 7.772662184082011e-05, - "loss": 0.004, - "step": 2470 - }, - { - "epoch": 1.909055802278432, - "grad_norm": 0.01820765621960163, - "learning_rate": 7.770416963241261e-05, - "loss": 0.0049, - "step": 2471 - }, - { - "epoch": 1.9098281521529252, - "grad_norm": 0.016260072588920593, - "learning_rate": 7.768170935999074e-05, - "loss": 0.0038, - "step": 2472 - }, - { - "epoch": 1.9106005020274184, - "grad_norm": 0.010583130642771721, - "learning_rate": 7.765924103009216e-05, - "loss": 0.0039, - "step": 2473 - }, - { - "epoch": 1.9113728519019115, - "grad_norm": 0.02265016734600067, - "learning_rate": 7.763676464925685e-05, - "loss": 0.0043, - "step": 2474 - }, - { - "epoch": 1.9121452017764047, - "grad_norm": 0.02282298542559147, - "learning_rate": 7.761428022402715e-05, - "loss": 0.0046, - "step": 2475 - }, - { - "epoch": 1.9129175516508978, - "grad_norm": 0.010374841280281544, - "learning_rate": 7.759178776094772e-05, - "loss": 0.0042, - "step": 2476 - }, - { - "epoch": 1.913689901525391, - "grad_norm": 0.027158847078680992, - "learning_rate": 7.756928726656559e-05, - "loss": 0.0049, - "step": 2477 - }, - { - "epoch": 1.9144622513998841, - "grad_norm": 0.017630072310566902, - "learning_rate": 7.754677874743009e-05, - "loss": 0.0044, - "step": 2478 - }, - { - "epoch": 1.9152346012743773, - "grad_norm": 0.015178913250565529, - "learning_rate": 7.75242622100929e-05, - "loss": 0.0043, - "step": 2479 - }, - { - "epoch": 1.9160069511488704, - "grad_norm": 0.01737389713525772, - "learning_rate": 7.750173766110806e-05, - "loss": 0.0048, - "step": 2480 - }, - { - "epoch": 1.9167793010233636, - "grad_norm": 0.028451379388570786, - "learning_rate": 7.747920510703194e-05, - "loss": 0.0047, - "step": 2481 - }, - { - "epoch": 1.9175516508978567, - "grad_norm": 0.0099767055362463, - "learning_rate": 7.745666455442318e-05, - "loss": 0.0039, - "step": 2482 - }, - { - "epoch": 1.9183240007723499, - "grad_norm": 0.023450423032045364, - "learning_rate": 7.743411600984282e-05, - "loss": 0.0043, - "step": 2483 - }, - { - "epoch": 1.919096350646843, - "grad_norm": 0.01755410246551037, - "learning_rate": 7.741155947985419e-05, - "loss": 0.0038, - "step": 2484 - }, - { - "epoch": 1.9198687005213362, - "grad_norm": 0.012542766518890858, - "learning_rate": 7.738899497102291e-05, - "loss": 0.0041, - "step": 2485 - }, - { - "epoch": 1.9206410503958293, - "grad_norm": 0.008834821172058582, - "learning_rate": 7.736642248991705e-05, - "loss": 0.0043, - "step": 2486 - }, - { - "epoch": 1.9214134002703225, - "grad_norm": 0.013980619609355927, - "learning_rate": 7.734384204310685e-05, - "loss": 0.0043, - "step": 2487 - }, - { - "epoch": 1.9221857501448156, - "grad_norm": 0.016002390533685684, - "learning_rate": 7.732125363716494e-05, - "loss": 0.0049, - "step": 2488 - }, - { - "epoch": 1.9229581000193088, - "grad_norm": 0.008405406959354877, - "learning_rate": 7.729865727866626e-05, - "loss": 0.0043, - "step": 2489 - }, - { - "epoch": 1.923730449893802, - "grad_norm": 0.007769963704049587, - "learning_rate": 7.727605297418808e-05, - "loss": 0.0043, - "step": 2490 - }, - { - "epoch": 1.924502799768295, - "grad_norm": 0.013532442972064018, - "learning_rate": 7.725344073030995e-05, - "loss": 0.0045, - "step": 2491 - }, - { - "epoch": 1.9252751496427882, - "grad_norm": 0.01704113557934761, - "learning_rate": 7.723082055361375e-05, - "loss": 0.0041, - "step": 2492 - }, - { - "epoch": 1.9260474995172814, - "grad_norm": 0.01334497332572937, - "learning_rate": 7.720819245068368e-05, - "loss": 0.004, - "step": 2493 - }, - { - "epoch": 1.9268198493917743, - "grad_norm": 0.008304566144943237, - "learning_rate": 7.718555642810623e-05, - "loss": 0.0041, - "step": 2494 - }, - { - "epoch": 1.9275921992662677, - "grad_norm": 0.014789161272346973, - "learning_rate": 7.716291249247018e-05, - "loss": 0.0043, - "step": 2495 - }, - { - "epoch": 1.9283645491407606, - "grad_norm": 0.01813403330743313, - "learning_rate": 7.714026065036666e-05, - "loss": 0.0041, - "step": 2496 - }, - { - "epoch": 1.929136899015254, - "grad_norm": 0.0111536281183362, - "learning_rate": 7.711760090838905e-05, - "loss": 0.0045, - "step": 2497 - }, - { - "epoch": 1.929909248889747, - "grad_norm": 0.01396595872938633, - "learning_rate": 7.709493327313307e-05, - "loss": 0.0048, - "step": 2498 - }, - { - "epoch": 1.9306815987642403, - "grad_norm": 0.015925578773021698, - "learning_rate": 7.707225775119671e-05, - "loss": 0.0043, - "step": 2499 - }, - { - "epoch": 1.9314539486387332, - "grad_norm": 0.008697424083948135, - "learning_rate": 7.704957434918028e-05, - "loss": 0.0042, - "step": 2500 - }, - { - "epoch": 1.9322262985132266, - "grad_norm": 0.0090946638956666, - "learning_rate": 7.702688307368635e-05, - "loss": 0.0042, - "step": 2501 - }, - { - "epoch": 1.9329986483877195, - "grad_norm": 0.01600305549800396, - "learning_rate": 7.700418393131982e-05, - "loss": 0.0039, - "step": 2502 - }, - { - "epoch": 1.9337709982622129, - "grad_norm": 0.015603674575686455, - "learning_rate": 7.698147692868785e-05, - "loss": 0.0041, - "step": 2503 - }, - { - "epoch": 1.9345433481367058, - "grad_norm": 0.010995871387422085, - "learning_rate": 7.695876207239993e-05, - "loss": 0.0042, - "step": 2504 - }, - { - "epoch": 1.9353156980111992, - "grad_norm": 0.011392833665013313, - "learning_rate": 7.693603936906775e-05, - "loss": 0.004, - "step": 2505 - }, - { - "epoch": 1.936088047885692, - "grad_norm": 0.01025738287717104, - "learning_rate": 7.691330882530539e-05, - "loss": 0.0039, - "step": 2506 - }, - { - "epoch": 1.9368603977601855, - "grad_norm": 0.020514898002147675, - "learning_rate": 7.689057044772914e-05, - "loss": 0.004, - "step": 2507 - }, - { - "epoch": 1.9376327476346784, - "grad_norm": 0.011781363748013973, - "learning_rate": 7.686782424295757e-05, - "loss": 0.0042, - "step": 2508 - }, - { - "epoch": 1.9384050975091718, - "grad_norm": 0.026511946693062782, - "learning_rate": 7.68450702176116e-05, - "loss": 0.0049, - "step": 2509 - }, - { - "epoch": 1.9391774473836647, - "grad_norm": 0.010671273805201054, - "learning_rate": 7.682230837831437e-05, - "loss": 0.0044, - "step": 2510 - }, - { - "epoch": 1.939949797258158, - "grad_norm": 0.013548245653510094, - "learning_rate": 7.679953873169125e-05, - "loss": 0.0046, - "step": 2511 - }, - { - "epoch": 1.940722147132651, - "grad_norm": 0.016395317390561104, - "learning_rate": 7.677676128436999e-05, - "loss": 0.0041, - "step": 2512 - }, - { - "epoch": 1.9414944970071444, - "grad_norm": 0.011841686442494392, - "learning_rate": 7.675397604298053e-05, - "loss": 0.0043, - "step": 2513 - }, - { - "epoch": 1.9422668468816373, - "grad_norm": 0.009677517227828503, - "learning_rate": 7.67311830141551e-05, - "loss": 0.0043, - "step": 2514 - }, - { - "epoch": 1.9430391967561307, - "grad_norm": 0.025067778304219246, - "learning_rate": 7.670838220452821e-05, - "loss": 0.0049, - "step": 2515 - }, - { - "epoch": 1.9438115466306236, - "grad_norm": 0.008173921145498753, - "learning_rate": 7.668557362073663e-05, - "loss": 0.0042, - "step": 2516 - }, - { - "epoch": 1.944583896505117, - "grad_norm": 0.011280354112386703, - "learning_rate": 7.666275726941936e-05, - "loss": 0.004, - "step": 2517 - }, - { - "epoch": 1.94535624637961, - "grad_norm": 0.010628647170960903, - "learning_rate": 7.663993315721771e-05, - "loss": 0.0044, - "step": 2518 - }, - { - "epoch": 1.946128596254103, - "grad_norm": 0.009494693949818611, - "learning_rate": 7.661710129077523e-05, - "loss": 0.0042, - "step": 2519 - }, - { - "epoch": 1.9469009461285962, - "grad_norm": 0.009466594085097313, - "learning_rate": 7.659426167673772e-05, - "loss": 0.004, - "step": 2520 - }, - { - "epoch": 1.9476732960030894, - "grad_norm": 0.007926770485937595, - "learning_rate": 7.657141432175323e-05, - "loss": 0.004, - "step": 2521 - }, - { - "epoch": 1.9484456458775825, - "grad_norm": 0.013132256455719471, - "learning_rate": 7.654855923247208e-05, - "loss": 0.0041, - "step": 2522 - }, - { - "epoch": 1.9492179957520757, - "grad_norm": 0.01026675384491682, - "learning_rate": 7.652569641554687e-05, - "loss": 0.0037, - "step": 2523 - }, - { - "epoch": 1.9499903456265688, - "grad_norm": 0.009410426020622253, - "learning_rate": 7.650282587763236e-05, - "loss": 0.004, - "step": 2524 - }, - { - "epoch": 1.950762695501062, - "grad_norm": 0.010991397313773632, - "learning_rate": 7.64799476253856e-05, - "loss": 0.0048, - "step": 2525 - }, - { - "epoch": 1.951535045375555, - "grad_norm": 0.011427038349211216, - "learning_rate": 7.645706166546596e-05, - "loss": 0.0038, - "step": 2526 - }, - { - "epoch": 1.9523073952500483, - "grad_norm": 0.009793714620172977, - "learning_rate": 7.643416800453495e-05, - "loss": 0.0037, - "step": 2527 - }, - { - "epoch": 1.9530797451245414, - "grad_norm": 0.013073851354420185, - "learning_rate": 7.641126664925637e-05, - "loss": 0.0046, - "step": 2528 - }, - { - "epoch": 1.9538520949990346, - "grad_norm": 0.013127139769494534, - "learning_rate": 7.638835760629626e-05, - "loss": 0.0038, - "step": 2529 - }, - { - "epoch": 1.9546244448735277, - "grad_norm": 0.01772155798971653, - "learning_rate": 7.636544088232284e-05, - "loss": 0.0055, - "step": 2530 - }, - { - "epoch": 1.9553967947480209, - "grad_norm": 0.015408862382173538, - "learning_rate": 7.63425164840067e-05, - "loss": 0.0045, - "step": 2531 - }, - { - "epoch": 1.956169144622514, - "grad_norm": 0.012125047855079174, - "learning_rate": 7.63195844180205e-05, - "loss": 0.004, - "step": 2532 - }, - { - "epoch": 1.9569414944970072, - "grad_norm": 0.015412149019539356, - "learning_rate": 7.629664469103926e-05, - "loss": 0.0035, - "step": 2533 - }, - { - "epoch": 1.9577138443715003, - "grad_norm": 0.008662154898047447, - "learning_rate": 7.627369730974016e-05, - "loss": 0.0041, - "step": 2534 - }, - { - "epoch": 1.9584861942459935, - "grad_norm": 0.008716880343854427, - "learning_rate": 7.625074228080262e-05, - "loss": 0.0042, - "step": 2535 - }, - { - "epoch": 1.9592585441204866, - "grad_norm": 0.01857982575893402, - "learning_rate": 7.62277796109083e-05, - "loss": 0.0039, - "step": 2536 - }, - { - "epoch": 1.9600308939949798, - "grad_norm": 0.015294843353331089, - "learning_rate": 7.62048093067411e-05, - "loss": 0.0039, - "step": 2537 - }, - { - "epoch": 1.960803243869473, - "grad_norm": 0.009465827606618404, - "learning_rate": 7.618183137498709e-05, - "loss": 0.0041, - "step": 2538 - }, - { - "epoch": 1.961575593743966, - "grad_norm": 0.01294454000890255, - "learning_rate": 7.615884582233461e-05, - "loss": 0.004, - "step": 2539 - }, - { - "epoch": 1.9623479436184592, - "grad_norm": 0.008212809450924397, - "learning_rate": 7.613585265547418e-05, - "loss": 0.0038, - "step": 2540 - }, - { - "epoch": 1.9631202934929524, - "grad_norm": 0.0148194320499897, - "learning_rate": 7.611285188109859e-05, - "loss": 0.0041, - "step": 2541 - }, - { - "epoch": 1.9638926433674455, - "grad_norm": 0.010401822626590729, - "learning_rate": 7.608984350590278e-05, - "loss": 0.0043, - "step": 2542 - }, - { - "epoch": 1.9646649932419384, - "grad_norm": 0.014921027235686779, - "learning_rate": 7.606682753658394e-05, - "loss": 0.0046, - "step": 2543 - }, - { - "epoch": 1.9654373431164318, - "grad_norm": 0.010640044696629047, - "learning_rate": 7.604380397984146e-05, - "loss": 0.0046, - "step": 2544 - }, - { - "epoch": 1.9662096929909247, - "grad_norm": 0.010940579697489738, - "learning_rate": 7.602077284237693e-05, - "loss": 0.0044, - "step": 2545 - }, - { - "epoch": 1.9669820428654181, - "grad_norm": 0.007183860521763563, - "learning_rate": 7.599773413089419e-05, - "loss": 0.004, - "step": 2546 - }, - { - "epoch": 1.967754392739911, - "grad_norm": 0.010012193582952023, - "learning_rate": 7.597468785209924e-05, - "loss": 0.0042, - "step": 2547 - }, - { - "epoch": 1.9685267426144044, - "grad_norm": 0.009017789736390114, - "learning_rate": 7.595163401270028e-05, - "loss": 0.0038, - "step": 2548 - }, - { - "epoch": 1.9692990924888973, - "grad_norm": 0.009588037617504597, - "learning_rate": 7.592857261940774e-05, - "loss": 0.0043, - "step": 2549 - }, - { - "epoch": 1.9700714423633907, - "grad_norm": 0.009811597876250744, - "learning_rate": 7.590550367893421e-05, - "loss": 0.0044, - "step": 2550 - }, - { - "epoch": 1.9708437922378836, - "grad_norm": 0.015444842167198658, - "learning_rate": 7.588242719799452e-05, - "loss": 0.0039, - "step": 2551 - }, - { - "epoch": 1.971616142112377, - "grad_norm": 0.01210468914359808, - "learning_rate": 7.585934318330569e-05, - "loss": 0.0039, - "step": 2552 - }, - { - "epoch": 1.97238849198687, - "grad_norm": 0.01698988489806652, - "learning_rate": 7.583625164158689e-05, - "loss": 0.0047, - "step": 2553 - }, - { - "epoch": 1.9731608418613633, - "grad_norm": 0.01659640483558178, - "learning_rate": 7.581315257955954e-05, - "loss": 0.0044, - "step": 2554 - }, - { - "epoch": 1.9739331917358562, - "grad_norm": 0.00832913164049387, - "learning_rate": 7.57900460039472e-05, - "loss": 0.0045, - "step": 2555 - }, - { - "epoch": 1.9747055416103496, - "grad_norm": 0.014485886320471764, - "learning_rate": 7.576693192147564e-05, - "loss": 0.0041, - "step": 2556 - }, - { - "epoch": 1.9754778914848425, - "grad_norm": 0.010839599184691906, - "learning_rate": 7.57438103388728e-05, - "loss": 0.0047, - "step": 2557 - }, - { - "epoch": 1.976250241359336, - "grad_norm": 0.011815196834504604, - "learning_rate": 7.572068126286883e-05, - "loss": 0.0046, - "step": 2558 - }, - { - "epoch": 1.9770225912338288, - "grad_norm": 0.011010092683136463, - "learning_rate": 7.569754470019603e-05, - "loss": 0.0038, - "step": 2559 - }, - { - "epoch": 1.9777949411083222, - "grad_norm": 0.008888042531907558, - "learning_rate": 7.56744006575889e-05, - "loss": 0.004, - "step": 2560 - }, - { - "epoch": 1.9785672909828151, - "grad_norm": 0.02021145448088646, - "learning_rate": 7.565124914178415e-05, - "loss": 0.0043, - "step": 2561 - }, - { - "epoch": 1.9793396408573085, - "grad_norm": 0.0163202416151762, - "learning_rate": 7.562809015952054e-05, - "loss": 0.0042, - "step": 2562 - }, - { - "epoch": 1.9801119907318014, - "grad_norm": 0.01251328643411398, - "learning_rate": 7.560492371753918e-05, - "loss": 0.0047, - "step": 2563 - }, - { - "epoch": 1.9808843406062948, - "grad_norm": 0.008392451331019402, - "learning_rate": 7.558174982258321e-05, - "loss": 0.0035, - "step": 2564 - }, - { - "epoch": 1.9816566904807877, - "grad_norm": 0.012348789721727371, - "learning_rate": 7.555856848139801e-05, - "loss": 0.0047, - "step": 2565 - }, - { - "epoch": 1.982429040355281, - "grad_norm": 0.014394666068255901, - "learning_rate": 7.55353797007311e-05, - "loss": 0.0046, - "step": 2566 - }, - { - "epoch": 1.983201390229774, - "grad_norm": 0.008999601006507874, - "learning_rate": 7.551218348733217e-05, - "loss": 0.0039, - "step": 2567 - }, - { - "epoch": 1.9839737401042672, - "grad_norm": 0.023641925305128098, - "learning_rate": 7.54889798479531e-05, - "loss": 0.0037, - "step": 2568 - }, - { - "epoch": 1.9847460899787603, - "grad_norm": 0.009869282133877277, - "learning_rate": 7.546576878934788e-05, - "loss": 0.0046, - "step": 2569 - }, - { - "epoch": 1.9855184398532535, - "grad_norm": 0.01192814763635397, - "learning_rate": 7.544255031827268e-05, - "loss": 0.0041, - "step": 2570 - }, - { - "epoch": 1.9862907897277466, - "grad_norm": 0.02921842224895954, - "learning_rate": 7.541932444148589e-05, - "loss": 0.0045, - "step": 2571 - }, - { - "epoch": 1.9870631396022398, - "grad_norm": 0.009306002408266068, - "learning_rate": 7.539609116574795e-05, - "loss": 0.0044, - "step": 2572 - }, - { - "epoch": 1.987835489476733, - "grad_norm": 0.009245769120752811, - "learning_rate": 7.537285049782153e-05, - "loss": 0.0045, - "step": 2573 - }, - { - "epoch": 1.988607839351226, - "grad_norm": 0.018734237179160118, - "learning_rate": 7.534960244447141e-05, - "loss": 0.0042, - "step": 2574 - }, - { - "epoch": 1.9893801892257192, - "grad_norm": 0.020502427592873573, - "learning_rate": 7.532634701246454e-05, - "loss": 0.0044, - "step": 2575 - }, - { - "epoch": 1.9901525391002124, - "grad_norm": 0.014067620038986206, - "learning_rate": 7.530308420857004e-05, - "loss": 0.0043, - "step": 2576 - }, - { - "epoch": 1.9909248889747055, - "grad_norm": 0.04353853315114975, - "learning_rate": 7.527981403955913e-05, - "loss": 0.005, - "step": 2577 - }, - { - "epoch": 1.9916972388491987, - "grad_norm": 0.022540874779224396, - "learning_rate": 7.525653651220519e-05, - "loss": 0.0045, - "step": 2578 - }, - { - "epoch": 1.9924695887236918, - "grad_norm": 0.017821505665779114, - "learning_rate": 7.523325163328375e-05, - "loss": 0.0044, - "step": 2579 - }, - { - "epoch": 1.993241938598185, - "grad_norm": 0.026386121287941933, - "learning_rate": 7.520995940957248e-05, - "loss": 0.0044, - "step": 2580 - }, - { - "epoch": 1.9940142884726781, - "grad_norm": 0.03731666877865791, - "learning_rate": 7.518665984785119e-05, - "loss": 0.0047, - "step": 2581 - }, - { - "epoch": 1.9947866383471713, - "grad_norm": 0.016136379912495613, - "learning_rate": 7.516335295490178e-05, - "loss": 0.0047, - "step": 2582 - }, - { - "epoch": 1.9955589882216644, - "grad_norm": 0.00737913278862834, - "learning_rate": 7.514003873750836e-05, - "loss": 0.0037, - "step": 2583 - }, - { - "epoch": 1.9963313380961576, - "grad_norm": 0.027570966631174088, - "learning_rate": 7.511671720245715e-05, - "loss": 0.0037, - "step": 2584 - }, - { - "epoch": 1.9971036879706507, - "grad_norm": 0.03167342394590378, - "learning_rate": 7.509338835653643e-05, - "loss": 0.0036, - "step": 2585 - }, - { - "epoch": 1.997876037845144, - "grad_norm": 0.009573575109243393, - "learning_rate": 7.507005220653673e-05, - "loss": 0.0037, - "step": 2586 - }, - { - "epoch": 1.998648387719637, - "grad_norm": 0.014849287457764149, - "learning_rate": 7.504670875925058e-05, - "loss": 0.0046, - "step": 2587 - }, - { - "epoch": 1.9994207375941302, - "grad_norm": 0.02890627458691597, - "learning_rate": 7.502335802147273e-05, - "loss": 0.0042, - "step": 2588 - }, - { - "epoch": 2.000772349874493, - "grad_norm": 0.038108453154563904, - "learning_rate": 7.500000000000001e-05, - "loss": 0.0079, - "step": 2589 - }, - { - "epoch": 2.0015446997489863, - "grad_norm": 0.006743496749550104, - "learning_rate": 7.497663470163135e-05, - "loss": 0.0034, - "step": 2590 - }, - { - "epoch": 2.0023170496234792, - "grad_norm": 0.026532011106610298, - "learning_rate": 7.495326213316787e-05, - "loss": 0.0042, - "step": 2591 - }, - { - "epoch": 2.0030893994979726, - "grad_norm": 0.03554273024201393, - "learning_rate": 7.492988230141272e-05, - "loss": 0.005, - "step": 2592 - }, - { - "epoch": 2.0038617493724655, - "grad_norm": 0.013866342604160309, - "learning_rate": 7.490649521317121e-05, - "loss": 0.004, - "step": 2593 - }, - { - "epoch": 2.004634099246959, - "grad_norm": 0.015046735294163227, - "learning_rate": 7.488310087525079e-05, - "loss": 0.0038, - "step": 2594 - }, - { - "epoch": 2.005406449121452, - "grad_norm": 0.027027055621147156, - "learning_rate": 7.485969929446094e-05, - "loss": 0.0042, - "step": 2595 - }, - { - "epoch": 2.006178798995945, - "grad_norm": 0.01871171034872532, - "learning_rate": 7.483629047761333e-05, - "loss": 0.0038, - "step": 2596 - }, - { - "epoch": 2.006951148870438, - "grad_norm": 0.010589679703116417, - "learning_rate": 7.481287443152167e-05, - "loss": 0.0035, - "step": 2597 - }, - { - "epoch": 2.0077234987449315, - "grad_norm": 0.016800161451101303, - "learning_rate": 7.478945116300183e-05, - "loss": 0.0041, - "step": 2598 - }, - { - "epoch": 2.0084958486194244, - "grad_norm": 0.009656563401222229, - "learning_rate": 7.476602067887178e-05, - "loss": 0.004, - "step": 2599 - }, - { - "epoch": 2.009268198493918, - "grad_norm": 0.032102540135383606, - "learning_rate": 7.474258298595148e-05, - "loss": 0.004, - "step": 2600 - }, - { - "epoch": 2.0100405483684107, - "grad_norm": 0.012074053287506104, - "learning_rate": 7.471913809106316e-05, - "loss": 0.004, - "step": 2601 - }, - { - "epoch": 2.010812898242904, - "grad_norm": 0.016124553978443146, - "learning_rate": 7.469568600103103e-05, - "loss": 0.0044, - "step": 2602 - }, - { - "epoch": 2.011585248117397, - "grad_norm": 0.02067081443965435, - "learning_rate": 7.467222672268146e-05, - "loss": 0.0038, - "step": 2603 - }, - { - "epoch": 2.0123575979918904, - "grad_norm": 0.038172945380210876, - "learning_rate": 7.464876026284281e-05, - "loss": 0.0038, - "step": 2604 - }, - { - "epoch": 2.0131299478663833, - "grad_norm": 0.012773305177688599, - "learning_rate": 7.462528662834568e-05, - "loss": 0.0042, - "step": 2605 - }, - { - "epoch": 2.0139022977408767, - "grad_norm": 0.03118148073554039, - "learning_rate": 7.460180582602262e-05, - "loss": 0.0044, - "step": 2606 - }, - { - "epoch": 2.0146746476153696, - "grad_norm": 0.02907433547079563, - "learning_rate": 7.457831786270834e-05, - "loss": 0.0039, - "step": 2607 - }, - { - "epoch": 2.015446997489863, - "grad_norm": 0.008807958103716373, - "learning_rate": 7.455482274523963e-05, - "loss": 0.0038, - "step": 2608 - }, - { - "epoch": 2.016219347364356, - "grad_norm": 0.01534703653305769, - "learning_rate": 7.453132048045532e-05, - "loss": 0.0041, - "step": 2609 - }, - { - "epoch": 2.0169916972388493, - "grad_norm": 0.016446243971586227, - "learning_rate": 7.45078110751964e-05, - "loss": 0.0045, - "step": 2610 - }, - { - "epoch": 2.0177640471133422, - "grad_norm": 0.0146534014493227, - "learning_rate": 7.448429453630585e-05, - "loss": 0.0043, - "step": 2611 - }, - { - "epoch": 2.0185363969878356, - "grad_norm": 0.011785686947405338, - "learning_rate": 7.446077087062879e-05, - "loss": 0.0038, - "step": 2612 - }, - { - "epoch": 2.0193087468623285, - "grad_norm": 0.01915571838617325, - "learning_rate": 7.443724008501237e-05, - "loss": 0.0048, - "step": 2613 - }, - { - "epoch": 2.020081096736822, - "grad_norm": 0.015387355349957943, - "learning_rate": 7.441370218630585e-05, - "loss": 0.0039, - "step": 2614 - }, - { - "epoch": 2.020853446611315, - "grad_norm": 0.009541081264615059, - "learning_rate": 7.439015718136055e-05, - "loss": 0.0038, - "step": 2615 - }, - { - "epoch": 2.021625796485808, - "grad_norm": 0.014201180078089237, - "learning_rate": 7.436660507702982e-05, - "loss": 0.0037, - "step": 2616 - }, - { - "epoch": 2.022398146360301, - "grad_norm": 0.010206950828433037, - "learning_rate": 7.434304588016912e-05, - "loss": 0.0038, - "step": 2617 - }, - { - "epoch": 2.0231704962347945, - "grad_norm": 0.020223252475261688, - "learning_rate": 7.431947959763598e-05, - "loss": 0.0041, - "step": 2618 - }, - { - "epoch": 2.0239428461092874, - "grad_norm": 0.010396569967269897, - "learning_rate": 7.429590623628998e-05, - "loss": 0.0044, - "step": 2619 - }, - { - "epoch": 2.024715195983781, - "grad_norm": 0.009901518002152443, - "learning_rate": 7.427232580299272e-05, - "loss": 0.0039, - "step": 2620 - }, - { - "epoch": 2.0254875458582737, - "grad_norm": 0.032874803990125656, - "learning_rate": 7.424873830460793e-05, - "loss": 0.0041, - "step": 2621 - }, - { - "epoch": 2.026259895732767, - "grad_norm": 0.013373463414609432, - "learning_rate": 7.422514374800135e-05, - "loss": 0.0035, - "step": 2622 - }, - { - "epoch": 2.02703224560726, - "grad_norm": 0.011829612776637077, - "learning_rate": 7.420154214004078e-05, - "loss": 0.0036, - "step": 2623 - }, - { - "epoch": 2.0278045954817534, - "grad_norm": 0.02061588130891323, - "learning_rate": 7.417793348759608e-05, - "loss": 0.0039, - "step": 2624 - }, - { - "epoch": 2.0285769453562463, - "grad_norm": 0.017115185037255287, - "learning_rate": 7.415431779753915e-05, - "loss": 0.0036, - "step": 2625 - }, - { - "epoch": 2.0293492952307397, - "grad_norm": 0.009569639340043068, - "learning_rate": 7.413069507674396e-05, - "loss": 0.0041, - "step": 2626 - }, - { - "epoch": 2.0301216451052326, - "grad_norm": 0.019505271688103676, - "learning_rate": 7.410706533208652e-05, - "loss": 0.0035, - "step": 2627 - }, - { - "epoch": 2.030893994979726, - "grad_norm": 0.01109884213656187, - "learning_rate": 7.408342857044484e-05, - "loss": 0.0036, - "step": 2628 - }, - { - "epoch": 2.031666344854219, - "grad_norm": 0.012622921727597713, - "learning_rate": 7.405978479869907e-05, - "loss": 0.0041, - "step": 2629 - }, - { - "epoch": 2.0324386947287123, - "grad_norm": 0.012068958953022957, - "learning_rate": 7.403613402373126e-05, - "loss": 0.004, - "step": 2630 - }, - { - "epoch": 2.0332110446032052, - "grad_norm": 0.010394621640443802, - "learning_rate": 7.401247625242566e-05, - "loss": 0.0037, - "step": 2631 - }, - { - "epoch": 2.0339833944776986, - "grad_norm": 0.013484945520758629, - "learning_rate": 7.398881149166846e-05, - "loss": 0.004, - "step": 2632 - }, - { - "epoch": 2.0347557443521915, - "grad_norm": 0.014950394630432129, - "learning_rate": 7.396513974834784e-05, - "loss": 0.0041, - "step": 2633 - }, - { - "epoch": 2.035528094226685, - "grad_norm": 0.011799067258834839, - "learning_rate": 7.394146102935414e-05, - "loss": 0.0035, - "step": 2634 - }, - { - "epoch": 2.036300444101178, - "grad_norm": 0.019200555980205536, - "learning_rate": 7.391777534157963e-05, - "loss": 0.004, - "step": 2635 - }, - { - "epoch": 2.037072793975671, - "grad_norm": 0.0084306038916111, - "learning_rate": 7.389408269191864e-05, - "loss": 0.0038, - "step": 2636 - }, - { - "epoch": 2.037845143850164, - "grad_norm": 0.007933447137475014, - "learning_rate": 7.387038308726755e-05, - "loss": 0.0036, - "step": 2637 - }, - { - "epoch": 2.038617493724657, - "grad_norm": 0.013884244486689568, - "learning_rate": 7.384667653452472e-05, - "loss": 0.004, - "step": 2638 - }, - { - "epoch": 2.0393898435991504, - "grad_norm": 0.013365771621465683, - "learning_rate": 7.382296304059055e-05, - "loss": 0.0039, - "step": 2639 - }, - { - "epoch": 2.0401621934736434, - "grad_norm": 0.017005208879709244, - "learning_rate": 7.379924261236751e-05, - "loss": 0.0035, - "step": 2640 - }, - { - "epoch": 2.0409345433481367, - "grad_norm": 0.016478469595313072, - "learning_rate": 7.377551525675999e-05, - "loss": 0.0038, - "step": 2641 - }, - { - "epoch": 2.0417068932226297, - "grad_norm": 0.010146384127438068, - "learning_rate": 7.375178098067448e-05, - "loss": 0.004, - "step": 2642 - }, - { - "epoch": 2.042479243097123, - "grad_norm": 0.012967503629624844, - "learning_rate": 7.372803979101945e-05, - "loss": 0.0035, - "step": 2643 - }, - { - "epoch": 2.043251592971616, - "grad_norm": 0.02204027958214283, - "learning_rate": 7.370429169470536e-05, - "loss": 0.004, - "step": 2644 - }, - { - "epoch": 2.0440239428461093, - "grad_norm": 0.009851394221186638, - "learning_rate": 7.368053669864475e-05, - "loss": 0.0037, - "step": 2645 - }, - { - "epoch": 2.0447962927206023, - "grad_norm": 0.01107101235538721, - "learning_rate": 7.365677480975211e-05, - "loss": 0.0038, - "step": 2646 - }, - { - "epoch": 2.0455686425950956, - "grad_norm": 0.011317633092403412, - "learning_rate": 7.363300603494393e-05, - "loss": 0.0039, - "step": 2647 - }, - { - "epoch": 2.0463409924695886, - "grad_norm": 0.008429724723100662, - "learning_rate": 7.360923038113876e-05, - "loss": 0.0039, - "step": 2648 - }, - { - "epoch": 2.047113342344082, - "grad_norm": 0.009489733725786209, - "learning_rate": 7.358544785525708e-05, - "loss": 0.0038, - "step": 2649 - }, - { - "epoch": 2.047885692218575, - "grad_norm": 0.009628896601498127, - "learning_rate": 7.356165846422144e-05, - "loss": 0.0038, - "step": 2650 - }, - { - "epoch": 2.0486580420930682, - "grad_norm": 0.008734790608286858, - "learning_rate": 7.353786221495636e-05, - "loss": 0.004, - "step": 2651 - }, - { - "epoch": 2.049430391967561, - "grad_norm": 0.010296465829014778, - "learning_rate": 7.351405911438833e-05, - "loss": 0.0039, - "step": 2652 - }, - { - "epoch": 2.0502027418420545, - "grad_norm": 0.016222462058067322, - "learning_rate": 7.349024916944586e-05, - "loss": 0.0037, - "step": 2653 - }, - { - "epoch": 2.0509750917165475, - "grad_norm": 0.010780733078718185, - "learning_rate": 7.346643238705946e-05, - "loss": 0.0042, - "step": 2654 - }, - { - "epoch": 2.051747441591041, - "grad_norm": 0.023083990439772606, - "learning_rate": 7.344260877416161e-05, - "loss": 0.0039, - "step": 2655 - }, - { - "epoch": 2.0525197914655338, - "grad_norm": 0.018788602203130722, - "learning_rate": 7.341877833768682e-05, - "loss": 0.0041, - "step": 2656 - }, - { - "epoch": 2.053292141340027, - "grad_norm": 0.01248422171920538, - "learning_rate": 7.33949410845715e-05, - "loss": 0.0042, - "step": 2657 - }, - { - "epoch": 2.05406449121452, - "grad_norm": 0.01570403017103672, - "learning_rate": 7.337109702175413e-05, - "loss": 0.0042, - "step": 2658 - }, - { - "epoch": 2.0548368410890134, - "grad_norm": 0.02470230497419834, - "learning_rate": 7.334724615617517e-05, - "loss": 0.0037, - "step": 2659 - }, - { - "epoch": 2.0556091909635064, - "grad_norm": 0.022730203345417976, - "learning_rate": 7.332338849477696e-05, - "loss": 0.004, - "step": 2660 - }, - { - "epoch": 2.0563815408379997, - "grad_norm": 0.008514747023582458, - "learning_rate": 7.329952404450395e-05, - "loss": 0.0036, - "step": 2661 - }, - { - "epoch": 2.0571538907124927, - "grad_norm": 0.02491837926208973, - "learning_rate": 7.327565281230247e-05, - "loss": 0.0038, - "step": 2662 - }, - { - "epoch": 2.057926240586986, - "grad_norm": 0.03023313358426094, - "learning_rate": 7.325177480512087e-05, - "loss": 0.0044, - "step": 2663 - }, - { - "epoch": 2.058698590461479, - "grad_norm": 0.007855813950300217, - "learning_rate": 7.322789002990948e-05, - "loss": 0.0033, - "step": 2664 - }, - { - "epoch": 2.0594709403359723, - "grad_norm": 0.011223818175494671, - "learning_rate": 7.320399849362055e-05, - "loss": 0.0035, - "step": 2665 - }, - { - "epoch": 2.0602432902104653, - "grad_norm": 0.017690075561404228, - "learning_rate": 7.318010020320833e-05, - "loss": 0.0042, - "step": 2666 - }, - { - "epoch": 2.0610156400849586, - "grad_norm": 0.01451319083571434, - "learning_rate": 7.315619516562908e-05, - "loss": 0.0037, - "step": 2667 - }, - { - "epoch": 2.0617879899594516, - "grad_norm": 0.009698505513370037, - "learning_rate": 7.313228338784091e-05, - "loss": 0.0037, - "step": 2668 - }, - { - "epoch": 2.062560339833945, - "grad_norm": 0.00963345356285572, - "learning_rate": 7.310836487680402e-05, - "loss": 0.0038, - "step": 2669 - }, - { - "epoch": 2.063332689708438, - "grad_norm": 0.022508280351758003, - "learning_rate": 7.308443963948047e-05, - "loss": 0.004, - "step": 2670 - }, - { - "epoch": 2.0641050395829312, - "grad_norm": 0.021821528673171997, - "learning_rate": 7.306050768283434e-05, - "loss": 0.0036, - "step": 2671 - }, - { - "epoch": 2.064877389457424, - "grad_norm": 0.009050424210727215, - "learning_rate": 7.303656901383164e-05, - "loss": 0.0037, - "step": 2672 - }, - { - "epoch": 2.0656497393319175, - "grad_norm": 0.020669065415859222, - "learning_rate": 7.301262363944035e-05, - "loss": 0.0035, - "step": 2673 - }, - { - "epoch": 2.0664220892064105, - "grad_norm": 0.019859908148646355, - "learning_rate": 7.298867156663036e-05, - "loss": 0.0038, - "step": 2674 - }, - { - "epoch": 2.067194439080904, - "grad_norm": 0.009505180642008781, - "learning_rate": 7.296471280237356e-05, - "loss": 0.0036, - "step": 2675 - }, - { - "epoch": 2.0679667889553968, - "grad_norm": 0.012986565008759499, - "learning_rate": 7.294074735364378e-05, - "loss": 0.0043, - "step": 2676 - }, - { - "epoch": 2.06873913882989, - "grad_norm": 0.02292938530445099, - "learning_rate": 7.291677522741676e-05, - "loss": 0.0038, - "step": 2677 - }, - { - "epoch": 2.069511488704383, - "grad_norm": 0.01900501362979412, - "learning_rate": 7.289279643067021e-05, - "loss": 0.0034, - "step": 2678 - }, - { - "epoch": 2.0702838385788764, - "grad_norm": 0.00864870473742485, - "learning_rate": 7.286881097038378e-05, - "loss": 0.0035, - "step": 2679 - }, - { - "epoch": 2.0710561884533694, - "grad_norm": 0.02228534035384655, - "learning_rate": 7.284481885353906e-05, - "loss": 0.0041, - "step": 2680 - }, - { - "epoch": 2.0718285383278627, - "grad_norm": 0.014121807180345058, - "learning_rate": 7.282082008711959e-05, - "loss": 0.0038, - "step": 2681 - }, - { - "epoch": 2.0726008882023557, - "grad_norm": 0.011820808053016663, - "learning_rate": 7.279681467811082e-05, - "loss": 0.0038, - "step": 2682 - }, - { - "epoch": 2.073373238076849, - "grad_norm": 0.007909181527793407, - "learning_rate": 7.277280263350012e-05, - "loss": 0.0035, - "step": 2683 - }, - { - "epoch": 2.074145587951342, - "grad_norm": 0.014459396712481976, - "learning_rate": 7.274878396027685e-05, - "loss": 0.0038, - "step": 2684 - }, - { - "epoch": 2.074917937825835, - "grad_norm": 0.02249385230243206, - "learning_rate": 7.272475866543225e-05, - "loss": 0.0042, - "step": 2685 - }, - { - "epoch": 2.0756902877003283, - "grad_norm": 0.008177523501217365, - "learning_rate": 7.270072675595951e-05, - "loss": 0.0035, - "step": 2686 - }, - { - "epoch": 2.076462637574821, - "grad_norm": 0.010570460930466652, - "learning_rate": 7.267668823885373e-05, - "loss": 0.0038, - "step": 2687 - }, - { - "epoch": 2.0772349874493146, - "grad_norm": 0.01808362826704979, - "learning_rate": 7.265264312111194e-05, - "loss": 0.0037, - "step": 2688 - }, - { - "epoch": 2.0780073373238075, - "grad_norm": 0.008438099175691605, - "learning_rate": 7.26285914097331e-05, - "loss": 0.0039, - "step": 2689 - }, - { - "epoch": 2.078779687198301, - "grad_norm": 0.009861689060926437, - "learning_rate": 7.260453311171809e-05, - "loss": 0.0036, - "step": 2690 - }, - { - "epoch": 2.079552037072794, - "grad_norm": 0.009457158856093884, - "learning_rate": 7.258046823406968e-05, - "loss": 0.0036, - "step": 2691 - }, - { - "epoch": 2.080324386947287, - "grad_norm": 0.012208986096084118, - "learning_rate": 7.25563967837926e-05, - "loss": 0.0038, - "step": 2692 - }, - { - "epoch": 2.08109673682178, - "grad_norm": 0.010008537210524082, - "learning_rate": 7.253231876789343e-05, - "loss": 0.0034, - "step": 2693 - }, - { - "epoch": 2.0818690866962735, - "grad_norm": 0.01255972869694233, - "learning_rate": 7.250823419338073e-05, - "loss": 0.0036, - "step": 2694 - }, - { - "epoch": 2.0826414365707664, - "grad_norm": 0.010346543975174427, - "learning_rate": 7.248414306726492e-05, - "loss": 0.0041, - "step": 2695 - }, - { - "epoch": 2.0834137864452598, - "grad_norm": 0.022845614701509476, - "learning_rate": 7.246004539655836e-05, - "loss": 0.0043, - "step": 2696 - }, - { - "epoch": 2.0841861363197527, - "grad_norm": 0.016264963895082474, - "learning_rate": 7.24359411882753e-05, - "loss": 0.004, - "step": 2697 - }, - { - "epoch": 2.084958486194246, - "grad_norm": 0.01792088896036148, - "learning_rate": 7.241183044943187e-05, - "loss": 0.0034, - "step": 2698 - }, - { - "epoch": 2.085730836068739, - "grad_norm": 0.01296139508485794, - "learning_rate": 7.238771318704615e-05, - "loss": 0.0034, - "step": 2699 - }, - { - "epoch": 2.0865031859432324, - "grad_norm": 0.010842915624380112, - "learning_rate": 7.236358940813807e-05, - "loss": 0.0038, - "step": 2700 - }, - { - "epoch": 2.0872755358177253, - "grad_norm": 0.012911595404148102, - "learning_rate": 7.233945911972948e-05, - "loss": 0.0041, - "step": 2701 - }, - { - "epoch": 2.0880478856922187, - "grad_norm": 0.02002376690506935, - "learning_rate": 7.231532232884417e-05, - "loss": 0.0038, - "step": 2702 - }, - { - "epoch": 2.0888202355667116, - "grad_norm": 0.022102218121290207, - "learning_rate": 7.229117904250771e-05, - "loss": 0.004, - "step": 2703 - }, - { - "epoch": 2.089592585441205, - "grad_norm": 0.008304497227072716, - "learning_rate": 7.226702926774767e-05, - "loss": 0.0039, - "step": 2704 - }, - { - "epoch": 2.090364935315698, - "grad_norm": 0.024397362023591995, - "learning_rate": 7.224287301159345e-05, - "loss": 0.005, - "step": 2705 - }, - { - "epoch": 2.0911372851901913, - "grad_norm": 0.043411292135715485, - "learning_rate": 7.221871028107635e-05, - "loss": 0.0047, - "step": 2706 - }, - { - "epoch": 2.091909635064684, - "grad_norm": 0.010221214033663273, - "learning_rate": 7.219454108322957e-05, - "loss": 0.0041, - "step": 2707 - }, - { - "epoch": 2.0926819849391776, - "grad_norm": 0.020435620099306107, - "learning_rate": 7.217036542508817e-05, - "loss": 0.0042, - "step": 2708 - }, - { - "epoch": 2.0934543348136705, - "grad_norm": 0.0436616912484169, - "learning_rate": 7.21461833136891e-05, - "loss": 0.0041, - "step": 2709 - }, - { - "epoch": 2.094226684688164, - "grad_norm": 0.018153520300984383, - "learning_rate": 7.212199475607119e-05, - "loss": 0.0041, - "step": 2710 - }, - { - "epoch": 2.094999034562657, - "grad_norm": 0.025095542892813683, - "learning_rate": 7.209779975927515e-05, - "loss": 0.0041, - "step": 2711 - }, - { - "epoch": 2.09577138443715, - "grad_norm": 0.03797965496778488, - "learning_rate": 7.207359833034355e-05, - "loss": 0.0043, - "step": 2712 - }, - { - "epoch": 2.096543734311643, - "grad_norm": 0.018812689930200577, - "learning_rate": 7.204939047632085e-05, - "loss": 0.0038, - "step": 2713 - }, - { - "epoch": 2.0973160841861365, - "grad_norm": 0.00882665067911148, - "learning_rate": 7.202517620425335e-05, - "loss": 0.0039, - "step": 2714 - }, - { - "epoch": 2.0980884340606294, - "grad_norm": 0.021644921973347664, - "learning_rate": 7.200095552118927e-05, - "loss": 0.0041, - "step": 2715 - }, - { - "epoch": 2.0988607839351228, - "grad_norm": 0.03702374920248985, - "learning_rate": 7.197672843417865e-05, - "loss": 0.0041, - "step": 2716 - }, - { - "epoch": 2.0996331338096157, - "grad_norm": 0.024662388488650322, - "learning_rate": 7.195249495027343e-05, - "loss": 0.0041, - "step": 2717 - }, - { - "epoch": 2.100405483684109, - "grad_norm": 0.02720011956989765, - "learning_rate": 7.192825507652734e-05, - "loss": 0.0045, - "step": 2718 - }, - { - "epoch": 2.101177833558602, - "grad_norm": 0.027912747114896774, - "learning_rate": 7.190400881999607e-05, - "loss": 0.0038, - "step": 2719 - }, - { - "epoch": 2.1019501834330954, - "grad_norm": 0.03504815697669983, - "learning_rate": 7.18797561877371e-05, - "loss": 0.0039, - "step": 2720 - }, - { - "epoch": 2.1027225333075883, - "grad_norm": 0.010269366204738617, - "learning_rate": 7.18554971868098e-05, - "loss": 0.0035, - "step": 2721 - }, - { - "epoch": 2.1034948831820817, - "grad_norm": 0.02692263200879097, - "learning_rate": 7.183123182427536e-05, - "loss": 0.004, - "step": 2722 - }, - { - "epoch": 2.1042672330565746, - "grad_norm": 0.025207681581377983, - "learning_rate": 7.180696010719683e-05, - "loss": 0.0039, - "step": 2723 - }, - { - "epoch": 2.105039582931068, - "grad_norm": 0.015352983959019184, - "learning_rate": 7.178268204263919e-05, - "loss": 0.0042, - "step": 2724 - }, - { - "epoch": 2.105811932805561, - "grad_norm": 0.009109385311603546, - "learning_rate": 7.175839763766909e-05, - "loss": 0.004, - "step": 2725 - }, - { - "epoch": 2.1065842826800543, - "grad_norm": 0.012240353971719742, - "learning_rate": 7.173410689935521e-05, - "loss": 0.0039, - "step": 2726 - }, - { - "epoch": 2.107356632554547, - "grad_norm": 0.020937219262123108, - "learning_rate": 7.1709809834768e-05, - "loss": 0.0039, - "step": 2727 - }, - { - "epoch": 2.1081289824290406, - "grad_norm": 0.01876830868422985, - "learning_rate": 7.16855064509797e-05, - "loss": 0.0037, - "step": 2728 - }, - { - "epoch": 2.1089013323035335, - "grad_norm": 0.012717272154986858, - "learning_rate": 7.166119675506449e-05, - "loss": 0.0038, - "step": 2729 - }, - { - "epoch": 2.109673682178027, - "grad_norm": 0.007943187840282917, - "learning_rate": 7.163688075409828e-05, - "loss": 0.0037, - "step": 2730 - }, - { - "epoch": 2.11044603205252, - "grad_norm": 0.018614256754517555, - "learning_rate": 7.161255845515891e-05, - "loss": 0.0041, - "step": 2731 - }, - { - "epoch": 2.1112183819270127, - "grad_norm": 0.013146874494850636, - "learning_rate": 7.158822986532601e-05, - "loss": 0.0038, - "step": 2732 - }, - { - "epoch": 2.111990731801506, - "grad_norm": 0.011025556363165379, - "learning_rate": 7.156389499168102e-05, - "loss": 0.0031, - "step": 2733 - }, - { - "epoch": 2.112763081675999, - "grad_norm": 0.01522703468799591, - "learning_rate": 7.153955384130726e-05, - "loss": 0.0037, - "step": 2734 - }, - { - "epoch": 2.1135354315504924, - "grad_norm": 0.01450337190181017, - "learning_rate": 7.151520642128985e-05, - "loss": 0.004, - "step": 2735 - }, - { - "epoch": 2.1143077814249853, - "grad_norm": 0.015826785936951637, - "learning_rate": 7.149085273871572e-05, - "loss": 0.0038, - "step": 2736 - }, - { - "epoch": 2.1150801312994787, - "grad_norm": 0.009047970175743103, - "learning_rate": 7.146649280067365e-05, - "loss": 0.0041, - "step": 2737 - }, - { - "epoch": 2.1158524811739716, - "grad_norm": 0.008611311204731464, - "learning_rate": 7.144212661425422e-05, - "loss": 0.0036, - "step": 2738 - }, - { - "epoch": 2.116624831048465, - "grad_norm": 0.016116051003336906, - "learning_rate": 7.141775418654985e-05, - "loss": 0.0039, - "step": 2739 - }, - { - "epoch": 2.117397180922958, - "grad_norm": 0.024656126275658607, - "learning_rate": 7.139337552465475e-05, - "loss": 0.0049, - "step": 2740 - }, - { - "epoch": 2.1181695307974513, - "grad_norm": 0.015838859602808952, - "learning_rate": 7.136899063566498e-05, - "loss": 0.0039, - "step": 2741 - }, - { - "epoch": 2.1189418806719442, - "grad_norm": 0.025547225028276443, - "learning_rate": 7.134459952667837e-05, - "loss": 0.0041, - "step": 2742 - }, - { - "epoch": 2.1197142305464376, - "grad_norm": 0.030239900574088097, - "learning_rate": 7.132020220479459e-05, - "loss": 0.0038, - "step": 2743 - }, - { - "epoch": 2.1204865804209305, - "grad_norm": 0.011210362426936626, - "learning_rate": 7.129579867711511e-05, - "loss": 0.0043, - "step": 2744 - }, - { - "epoch": 2.121258930295424, - "grad_norm": 0.022186698392033577, - "learning_rate": 7.127138895074322e-05, - "loss": 0.0042, - "step": 2745 - }, - { - "epoch": 2.122031280169917, - "grad_norm": 0.02024666965007782, - "learning_rate": 7.124697303278399e-05, - "loss": 0.0038, - "step": 2746 - }, - { - "epoch": 2.12280363004441, - "grad_norm": 0.012369618751108646, - "learning_rate": 7.12225509303443e-05, - "loss": 0.0042, - "step": 2747 - }, - { - "epoch": 2.123575979918903, - "grad_norm": 0.011102922260761261, - "learning_rate": 7.119812265053286e-05, - "loss": 0.0037, - "step": 2748 - }, - { - "epoch": 2.1243483297933965, - "grad_norm": 0.014041735790669918, - "learning_rate": 7.11736882004601e-05, - "loss": 0.0038, - "step": 2749 - }, - { - "epoch": 2.1251206796678894, - "grad_norm": 0.010623575188219547, - "learning_rate": 7.114924758723833e-05, - "loss": 0.0036, - "step": 2750 - }, - { - "epoch": 2.125893029542383, - "grad_norm": 0.01225269865244627, - "learning_rate": 7.112480081798165e-05, - "loss": 0.0039, - "step": 2751 - }, - { - "epoch": 2.1266653794168757, - "grad_norm": 0.013135985471308231, - "learning_rate": 7.110034789980588e-05, - "loss": 0.0039, - "step": 2752 - }, - { - "epoch": 2.127437729291369, - "grad_norm": 0.00986009743064642, - "learning_rate": 7.107588883982868e-05, - "loss": 0.0034, - "step": 2753 - }, - { - "epoch": 2.128210079165862, - "grad_norm": 0.00858435407280922, - "learning_rate": 7.105142364516952e-05, - "loss": 0.0034, - "step": 2754 - }, - { - "epoch": 2.1289824290403554, - "grad_norm": 0.008631178177893162, - "learning_rate": 7.102695232294958e-05, - "loss": 0.0036, - "step": 2755 - }, - { - "epoch": 2.1297547789148483, - "grad_norm": 0.009610356763005257, - "learning_rate": 7.100247488029192e-05, - "loss": 0.0036, - "step": 2756 - }, - { - "epoch": 2.1305271287893417, - "grad_norm": 0.009580448269844055, - "learning_rate": 7.09779913243213e-05, - "loss": 0.0037, - "step": 2757 - }, - { - "epoch": 2.1312994786638346, - "grad_norm": 0.009013169445097446, - "learning_rate": 7.095350166216431e-05, - "loss": 0.0038, - "step": 2758 - }, - { - "epoch": 2.132071828538328, - "grad_norm": 0.015030079521238804, - "learning_rate": 7.092900590094928e-05, - "loss": 0.0039, - "step": 2759 - }, - { - "epoch": 2.132844178412821, - "grad_norm": 0.011109733954071999, - "learning_rate": 7.090450404780635e-05, - "loss": 0.0037, - "step": 2760 - }, - { - "epoch": 2.1336165282873143, - "grad_norm": 0.009983384981751442, - "learning_rate": 7.087999610986741e-05, - "loss": 0.0036, - "step": 2761 - }, - { - "epoch": 2.1343888781618072, - "grad_norm": 0.01634259894490242, - "learning_rate": 7.085548209426613e-05, - "loss": 0.0043, - "step": 2762 - }, - { - "epoch": 2.1351612280363006, - "grad_norm": 0.009563818573951721, - "learning_rate": 7.083096200813794e-05, - "loss": 0.0038, - "step": 2763 - }, - { - "epoch": 2.1359335779107935, - "grad_norm": 0.011839197017252445, - "learning_rate": 7.080643585862007e-05, - "loss": 0.0043, - "step": 2764 - }, - { - "epoch": 2.136705927785287, - "grad_norm": 0.01338752917945385, - "learning_rate": 7.078190365285147e-05, - "loss": 0.0037, - "step": 2765 - }, - { - "epoch": 2.13747827765978, - "grad_norm": 0.009806690737605095, - "learning_rate": 7.075736539797287e-05, - "loss": 0.0033, - "step": 2766 - }, - { - "epoch": 2.138250627534273, - "grad_norm": 0.01095563918352127, - "learning_rate": 7.073282110112676e-05, - "loss": 0.004, - "step": 2767 - }, - { - "epoch": 2.139022977408766, - "grad_norm": 0.016509409993886948, - "learning_rate": 7.07082707694574e-05, - "loss": 0.0035, - "step": 2768 - }, - { - "epoch": 2.1397953272832595, - "grad_norm": 0.00953350868076086, - "learning_rate": 7.06837144101108e-05, - "loss": 0.0044, - "step": 2769 - }, - { - "epoch": 2.1405676771577524, - "grad_norm": 0.00878667738288641, - "learning_rate": 7.065915203023472e-05, - "loss": 0.0038, - "step": 2770 - }, - { - "epoch": 2.141340027032246, - "grad_norm": 0.01307417917996645, - "learning_rate": 7.063458363697867e-05, - "loss": 0.0043, - "step": 2771 - }, - { - "epoch": 2.1421123769067387, - "grad_norm": 0.0096151577308774, - "learning_rate": 7.061000923749395e-05, - "loss": 0.0039, - "step": 2772 - }, - { - "epoch": 2.142884726781232, - "grad_norm": 0.0076342313550412655, - "learning_rate": 7.058542883893351e-05, - "loss": 0.0035, - "step": 2773 - }, - { - "epoch": 2.143657076655725, - "grad_norm": 0.012591948732733727, - "learning_rate": 7.056084244845216e-05, - "loss": 0.0035, - "step": 2774 - }, - { - "epoch": 2.1444294265302184, - "grad_norm": 0.008794068358838558, - "learning_rate": 7.05362500732064e-05, - "loss": 0.0038, - "step": 2775 - }, - { - "epoch": 2.1452017764047113, - "grad_norm": 0.010124838910996914, - "learning_rate": 7.051165172035444e-05, - "loss": 0.0041, - "step": 2776 - }, - { - "epoch": 2.1459741262792047, - "grad_norm": 0.012167713604867458, - "learning_rate": 7.048704739705632e-05, - "loss": 0.0042, - "step": 2777 - }, - { - "epoch": 2.1467464761536976, - "grad_norm": 0.01282426342368126, - "learning_rate": 7.046243711047372e-05, - "loss": 0.0036, - "step": 2778 - }, - { - "epoch": 2.1475188260281906, - "grad_norm": 0.014842244796454906, - "learning_rate": 7.043782086777011e-05, - "loss": 0.0038, - "step": 2779 - }, - { - "epoch": 2.148291175902684, - "grad_norm": 0.0100597208365798, - "learning_rate": 7.04131986761107e-05, - "loss": 0.0039, - "step": 2780 - }, - { - "epoch": 2.1490635257771773, - "grad_norm": 0.013920299708843231, - "learning_rate": 7.038857054266241e-05, - "loss": 0.0043, - "step": 2781 - }, - { - "epoch": 2.1498358756516702, - "grad_norm": 0.013597175478935242, - "learning_rate": 7.036393647459387e-05, - "loss": 0.0042, - "step": 2782 - }, - { - "epoch": 2.150608225526163, - "grad_norm": 0.009669664315879345, - "learning_rate": 7.03392964790755e-05, - "loss": 0.0037, - "step": 2783 - }, - { - "epoch": 2.1513805754006565, - "grad_norm": 0.013507052324712276, - "learning_rate": 7.031465056327936e-05, - "loss": 0.0034, - "step": 2784 - }, - { - "epoch": 2.1521529252751495, - "grad_norm": 0.00888124294579029, - "learning_rate": 7.028999873437931e-05, - "loss": 0.0037, - "step": 2785 - }, - { - "epoch": 2.152925275149643, - "grad_norm": 0.015375855378806591, - "learning_rate": 7.026534099955094e-05, - "loss": 0.0039, - "step": 2786 - }, - { - "epoch": 2.1536976250241358, - "grad_norm": 0.013118419796228409, - "learning_rate": 7.024067736597145e-05, - "loss": 0.0041, - "step": 2787 - }, - { - "epoch": 2.154469974898629, - "grad_norm": 0.017940763384103775, - "learning_rate": 7.021600784081985e-05, - "loss": 0.0041, - "step": 2788 - }, - { - "epoch": 2.155242324773122, - "grad_norm": 0.016645725816488266, - "learning_rate": 7.019133243127688e-05, - "loss": 0.0041, - "step": 2789 - }, - { - "epoch": 2.1560146746476154, - "grad_norm": 0.010853741317987442, - "learning_rate": 7.016665114452491e-05, - "loss": 0.0032, - "step": 2790 - }, - { - "epoch": 2.1567870245221084, - "grad_norm": 0.015913356095552444, - "learning_rate": 7.014196398774808e-05, - "loss": 0.0042, - "step": 2791 - }, - { - "epoch": 2.1575593743966017, - "grad_norm": 0.014743924140930176, - "learning_rate": 7.011727096813226e-05, - "loss": 0.0032, - "step": 2792 - }, - { - "epoch": 2.1583317242710947, - "grad_norm": 0.007792849093675613, - "learning_rate": 7.009257209286491e-05, - "loss": 0.004, - "step": 2793 - }, - { - "epoch": 2.159104074145588, - "grad_norm": 0.009073421359062195, - "learning_rate": 7.006786736913536e-05, - "loss": 0.0037, - "step": 2794 - }, - { - "epoch": 2.159876424020081, - "grad_norm": 0.011311429552733898, - "learning_rate": 7.00431568041345e-05, - "loss": 0.004, - "step": 2795 - }, - { - "epoch": 2.1606487738945743, - "grad_norm": 0.013861387968063354, - "learning_rate": 7.001844040505501e-05, - "loss": 0.0036, - "step": 2796 - }, - { - "epoch": 2.1614211237690673, - "grad_norm": 0.02032576873898506, - "learning_rate": 6.999371817909124e-05, - "loss": 0.0041, - "step": 2797 - }, - { - "epoch": 2.1621934736435606, - "grad_norm": 0.023006441071629524, - "learning_rate": 6.99689901334392e-05, - "loss": 0.0038, - "step": 2798 - }, - { - "epoch": 2.1629658235180536, - "grad_norm": 0.017457854002714157, - "learning_rate": 6.994425627529666e-05, - "loss": 0.0034, - "step": 2799 - }, - { - "epoch": 2.163738173392547, - "grad_norm": 0.012595701031386852, - "learning_rate": 6.991951661186305e-05, - "loss": 0.0045, - "step": 2800 - }, - { - "epoch": 2.16451052326704, - "grad_norm": 0.010976454243063927, - "learning_rate": 6.989477115033945e-05, - "loss": 0.0036, - "step": 2801 - }, - { - "epoch": 2.1652828731415332, - "grad_norm": 0.009047305211424828, - "learning_rate": 6.987001989792869e-05, - "loss": 0.0035, - "step": 2802 - }, - { - "epoch": 2.166055223016026, - "grad_norm": 0.02184477262198925, - "learning_rate": 6.984526286183528e-05, - "loss": 0.0035, - "step": 2803 - }, - { - "epoch": 2.1668275728905195, - "grad_norm": 0.013075319118797779, - "learning_rate": 6.982050004926537e-05, - "loss": 0.0035, - "step": 2804 - }, - { - "epoch": 2.1675999227650125, - "grad_norm": 0.010313881561160088, - "learning_rate": 6.979573146742682e-05, - "loss": 0.0039, - "step": 2805 - }, - { - "epoch": 2.168372272639506, - "grad_norm": 0.026523558422923088, - "learning_rate": 6.977095712352916e-05, - "loss": 0.0037, - "step": 2806 - }, - { - "epoch": 2.1691446225139988, - "grad_norm": 0.019071469083428383, - "learning_rate": 6.974617702478362e-05, - "loss": 0.0041, - "step": 2807 - }, - { - "epoch": 2.169916972388492, - "grad_norm": 0.010107327252626419, - "learning_rate": 6.972139117840307e-05, - "loss": 0.0038, - "step": 2808 - }, - { - "epoch": 2.170689322262985, - "grad_norm": 0.023329516872763634, - "learning_rate": 6.96965995916021e-05, - "loss": 0.0038, - "step": 2809 - }, - { - "epoch": 2.1714616721374784, - "grad_norm": 0.014047396369278431, - "learning_rate": 6.967180227159691e-05, - "loss": 0.0037, - "step": 2810 - }, - { - "epoch": 2.1722340220119714, - "grad_norm": 0.010431072674691677, - "learning_rate": 6.96469992256054e-05, - "loss": 0.0039, - "step": 2811 - }, - { - "epoch": 2.1730063718864647, - "grad_norm": 0.015764454379677773, - "learning_rate": 6.962219046084717e-05, - "loss": 0.0039, - "step": 2812 - }, - { - "epoch": 2.1737787217609577, - "grad_norm": 0.013384529389441013, - "learning_rate": 6.959737598454342e-05, - "loss": 0.0039, - "step": 2813 - }, - { - "epoch": 2.174551071635451, - "grad_norm": 0.01582462526857853, - "learning_rate": 6.957255580391707e-05, - "loss": 0.0036, - "step": 2814 - }, - { - "epoch": 2.175323421509944, - "grad_norm": 0.008840755559504032, - "learning_rate": 6.954772992619265e-05, - "loss": 0.0038, - "step": 2815 - }, - { - "epoch": 2.1760957713844373, - "grad_norm": 0.01822042465209961, - "learning_rate": 6.952289835859639e-05, - "loss": 0.0035, - "step": 2816 - }, - { - "epoch": 2.1768681212589303, - "grad_norm": 0.02567700669169426, - "learning_rate": 6.949806110835615e-05, - "loss": 0.0035, - "step": 2817 - }, - { - "epoch": 2.1776404711334236, - "grad_norm": 0.018477164208889008, - "learning_rate": 6.947321818270146e-05, - "loss": 0.0037, - "step": 2818 - }, - { - "epoch": 2.1784128210079166, - "grad_norm": 0.009455078281462193, - "learning_rate": 6.944836958886349e-05, - "loss": 0.0037, - "step": 2819 - }, - { - "epoch": 2.17918517088241, - "grad_norm": 0.029945973306894302, - "learning_rate": 6.942351533407507e-05, - "loss": 0.0041, - "step": 2820 - }, - { - "epoch": 2.179957520756903, - "grad_norm": 0.022179346531629562, - "learning_rate": 6.939865542557067e-05, - "loss": 0.0037, - "step": 2821 - }, - { - "epoch": 2.1807298706313962, - "grad_norm": 0.016879554837942123, - "learning_rate": 6.937378987058642e-05, - "loss": 0.0036, - "step": 2822 - }, - { - "epoch": 2.181502220505889, - "grad_norm": 0.015892690047621727, - "learning_rate": 6.934891867636004e-05, - "loss": 0.004, - "step": 2823 - }, - { - "epoch": 2.1822745703803825, - "grad_norm": 0.036032550036907196, - "learning_rate": 6.9324041850131e-05, - "loss": 0.004, - "step": 2824 - }, - { - "epoch": 2.1830469202548755, - "grad_norm": 0.01696144975721836, - "learning_rate": 6.92991593991403e-05, - "loss": 0.0036, - "step": 2825 - }, - { - "epoch": 2.1838192701293684, - "grad_norm": 0.007884988561272621, - "learning_rate": 6.927427133063061e-05, - "loss": 0.0038, - "step": 2826 - }, - { - "epoch": 2.1845916200038618, - "grad_norm": 0.026676487177610397, - "learning_rate": 6.924937765184629e-05, - "loss": 0.0038, - "step": 2827 - }, - { - "epoch": 2.185363969878355, - "grad_norm": 0.019141726195812225, - "learning_rate": 6.922447837003324e-05, - "loss": 0.004, - "step": 2828 - }, - { - "epoch": 2.186136319752848, - "grad_norm": 0.015733450651168823, - "learning_rate": 6.919957349243907e-05, - "loss": 0.0045, - "step": 2829 - }, - { - "epoch": 2.186908669627341, - "grad_norm": 0.012079773470759392, - "learning_rate": 6.9174663026313e-05, - "loss": 0.0032, - "step": 2830 - }, - { - "epoch": 2.1876810195018344, - "grad_norm": 0.013535288162529469, - "learning_rate": 6.914974697890581e-05, - "loss": 0.004, - "step": 2831 - }, - { - "epoch": 2.1884533693763273, - "grad_norm": 0.01917087472975254, - "learning_rate": 6.912482535747002e-05, - "loss": 0.0035, - "step": 2832 - }, - { - "epoch": 2.1892257192508207, - "grad_norm": 0.009686388075351715, - "learning_rate": 6.909989816925967e-05, - "loss": 0.0035, - "step": 2833 - }, - { - "epoch": 2.1899980691253136, - "grad_norm": 0.009863371029496193, - "learning_rate": 6.907496542153049e-05, - "loss": 0.0036, - "step": 2834 - }, - { - "epoch": 2.190770418999807, - "grad_norm": 0.010294755920767784, - "learning_rate": 6.90500271215398e-05, - "loss": 0.0039, - "step": 2835 - }, - { - "epoch": 2.1915427688743, - "grad_norm": 0.01450702641159296, - "learning_rate": 6.902508327654649e-05, - "loss": 0.0035, - "step": 2836 - }, - { - "epoch": 2.1923151187487933, - "grad_norm": 0.020422223955392838, - "learning_rate": 6.900013389381117e-05, - "loss": 0.0043, - "step": 2837 - }, - { - "epoch": 2.193087468623286, - "grad_norm": 0.014234035275876522, - "learning_rate": 6.897517898059597e-05, - "loss": 0.0037, - "step": 2838 - }, - { - "epoch": 2.1938598184977796, - "grad_norm": 0.021411612629890442, - "learning_rate": 6.895021854416467e-05, - "loss": 0.0037, - "step": 2839 - }, - { - "epoch": 2.1946321683722725, - "grad_norm": 0.021227024495601654, - "learning_rate": 6.892525259178265e-05, - "loss": 0.0041, - "step": 2840 - }, - { - "epoch": 2.195404518246766, - "grad_norm": 0.012483715079724789, - "learning_rate": 6.89002811307169e-05, - "loss": 0.0036, - "step": 2841 - }, - { - "epoch": 2.196176868121259, - "grad_norm": 0.011528292670845985, - "learning_rate": 6.8875304168236e-05, - "loss": 0.0035, - "step": 2842 - }, - { - "epoch": 2.196949217995752, - "grad_norm": 0.018989963456988335, - "learning_rate": 6.885032171161014e-05, - "loss": 0.0038, - "step": 2843 - }, - { - "epoch": 2.197721567870245, - "grad_norm": 0.018031930550932884, - "learning_rate": 6.882533376811112e-05, - "loss": 0.0041, - "step": 2844 - }, - { - "epoch": 2.1984939177447385, - "grad_norm": 0.026337895542383194, - "learning_rate": 6.880034034501232e-05, - "loss": 0.0046, - "step": 2845 - }, - { - "epoch": 2.1992662676192314, - "grad_norm": 0.01727277971804142, - "learning_rate": 6.877534144958873e-05, - "loss": 0.0032, - "step": 2846 - }, - { - "epoch": 2.2000386174937248, - "grad_norm": 0.012766748666763306, - "learning_rate": 6.875033708911692e-05, - "loss": 0.0032, - "step": 2847 - }, - { - "epoch": 2.2008109673682177, - "grad_norm": 0.02523941732943058, - "learning_rate": 6.872532727087502e-05, - "loss": 0.0034, - "step": 2848 - }, - { - "epoch": 2.201583317242711, - "grad_norm": 0.014299865812063217, - "learning_rate": 6.870031200214285e-05, - "loss": 0.0041, - "step": 2849 - }, - { - "epoch": 2.202355667117204, - "grad_norm": 0.011945880018174648, - "learning_rate": 6.86752912902017e-05, - "loss": 0.0032, - "step": 2850 - }, - { - "epoch": 2.2031280169916974, - "grad_norm": 0.023927869275212288, - "learning_rate": 6.865026514233452e-05, - "loss": 0.004, - "step": 2851 - }, - { - "epoch": 2.2039003668661903, - "grad_norm": 0.01110320258885622, - "learning_rate": 6.862523356582579e-05, - "loss": 0.0038, - "step": 2852 - }, - { - "epoch": 2.2046727167406837, - "grad_norm": 0.010775089263916016, - "learning_rate": 6.860019656796163e-05, - "loss": 0.0038, - "step": 2853 - }, - { - "epoch": 2.2054450666151766, - "grad_norm": 0.013647436164319515, - "learning_rate": 6.85751541560297e-05, - "loss": 0.0039, - "step": 2854 - }, - { - "epoch": 2.20621741648967, - "grad_norm": 0.014270083047449589, - "learning_rate": 6.855010633731923e-05, - "loss": 0.0038, - "step": 2855 - }, - { - "epoch": 2.206989766364163, - "grad_norm": 0.009960106573998928, - "learning_rate": 6.8525053119121e-05, - "loss": 0.0038, - "step": 2856 - }, - { - "epoch": 2.2077621162386563, - "grad_norm": 0.0098283551633358, - "learning_rate": 6.849999450872745e-05, - "loss": 0.0041, - "step": 2857 - }, - { - "epoch": 2.208534466113149, - "grad_norm": 0.01134185679256916, - "learning_rate": 6.847493051343252e-05, - "loss": 0.0045, - "step": 2858 - }, - { - "epoch": 2.2093068159876426, - "grad_norm": 0.012657348066568375, - "learning_rate": 6.844986114053173e-05, - "loss": 0.0037, - "step": 2859 - }, - { - "epoch": 2.2100791658621355, - "grad_norm": 0.01441334281116724, - "learning_rate": 6.842478639732219e-05, - "loss": 0.0039, - "step": 2860 - }, - { - "epoch": 2.210851515736629, - "grad_norm": 0.011827160604298115, - "learning_rate": 6.83997062911025e-05, - "loss": 0.0041, - "step": 2861 - }, - { - "epoch": 2.211623865611122, - "grad_norm": 0.015119560062885284, - "learning_rate": 6.837462082917295e-05, - "loss": 0.0042, - "step": 2862 - }, - { - "epoch": 2.212396215485615, - "grad_norm": 0.007931212894618511, - "learning_rate": 6.834953001883522e-05, - "loss": 0.004, - "step": 2863 - }, - { - "epoch": 2.213168565360108, - "grad_norm": 0.013704811222851276, - "learning_rate": 6.832443386739269e-05, - "loss": 0.0035, - "step": 2864 - }, - { - "epoch": 2.2139409152346015, - "grad_norm": 0.008806932717561722, - "learning_rate": 6.829933238215028e-05, - "loss": 0.0043, - "step": 2865 - }, - { - "epoch": 2.2147132651090944, - "grad_norm": 0.008302022702991962, - "learning_rate": 6.827422557041433e-05, - "loss": 0.0035, - "step": 2866 - }, - { - "epoch": 2.2154856149835878, - "grad_norm": 0.011451393365859985, - "learning_rate": 6.824911343949291e-05, - "loss": 0.0036, - "step": 2867 - }, - { - "epoch": 2.2162579648580807, - "grad_norm": 0.00943044200539589, - "learning_rate": 6.822399599669552e-05, - "loss": 0.0038, - "step": 2868 - }, - { - "epoch": 2.217030314732574, - "grad_norm": 0.008121364749968052, - "learning_rate": 6.819887324933325e-05, - "loss": 0.0036, - "step": 2869 - }, - { - "epoch": 2.217802664607067, - "grad_norm": 0.009307894855737686, - "learning_rate": 6.81737452047187e-05, - "loss": 0.0048, - "step": 2870 - }, - { - "epoch": 2.2185750144815604, - "grad_norm": 0.010115236043930054, - "learning_rate": 6.814861187016608e-05, - "loss": 0.0044, - "step": 2871 - }, - { - "epoch": 2.2193473643560533, - "grad_norm": 0.009662744589149952, - "learning_rate": 6.812347325299107e-05, - "loss": 0.004, - "step": 2872 - }, - { - "epoch": 2.220119714230546, - "grad_norm": 0.01051376387476921, - "learning_rate": 6.809832936051092e-05, - "loss": 0.0044, - "step": 2873 - }, - { - "epoch": 2.2208920641050396, - "grad_norm": 0.010458176024258137, - "learning_rate": 6.80731802000444e-05, - "loss": 0.0038, - "step": 2874 - }, - { - "epoch": 2.221664413979533, - "grad_norm": 0.007859159260988235, - "learning_rate": 6.804802577891182e-05, - "loss": 0.0044, - "step": 2875 - }, - { - "epoch": 2.222436763854026, - "grad_norm": 0.007987123914062977, - "learning_rate": 6.802286610443506e-05, - "loss": 0.0038, - "step": 2876 - }, - { - "epoch": 2.223209113728519, - "grad_norm": 0.008854716084897518, - "learning_rate": 6.799770118393746e-05, - "loss": 0.0037, - "step": 2877 - }, - { - "epoch": 2.223981463603012, - "grad_norm": 0.008117330260574818, - "learning_rate": 6.797253102474392e-05, - "loss": 0.0038, - "step": 2878 - }, - { - "epoch": 2.224753813477505, - "grad_norm": 0.010898245498538017, - "learning_rate": 6.794735563418087e-05, - "loss": 0.0035, - "step": 2879 - }, - { - "epoch": 2.2255261633519985, - "grad_norm": 0.009254688397049904, - "learning_rate": 6.792217501957626e-05, - "loss": 0.004, - "step": 2880 - }, - { - "epoch": 2.2262985132264914, - "grad_norm": 0.02000666782259941, - "learning_rate": 6.789698918825957e-05, - "loss": 0.0033, - "step": 2881 - }, - { - "epoch": 2.227070863100985, - "grad_norm": 0.009464719332754612, - "learning_rate": 6.787179814756177e-05, - "loss": 0.0033, - "step": 2882 - }, - { - "epoch": 2.2278432129754777, - "grad_norm": 0.010253376327455044, - "learning_rate": 6.784660190481535e-05, - "loss": 0.0045, - "step": 2883 - }, - { - "epoch": 2.228615562849971, - "grad_norm": 0.01094959769397974, - "learning_rate": 6.782140046735439e-05, - "loss": 0.0032, - "step": 2884 - }, - { - "epoch": 2.229387912724464, - "grad_norm": 0.0162676814943552, - "learning_rate": 6.779619384251435e-05, - "loss": 0.0037, - "step": 2885 - }, - { - "epoch": 2.2301602625989574, - "grad_norm": 0.00939108245074749, - "learning_rate": 6.77709820376323e-05, - "loss": 0.0035, - "step": 2886 - }, - { - "epoch": 2.2309326124734503, - "grad_norm": 0.010298709385097027, - "learning_rate": 6.774576506004678e-05, - "loss": 0.004, - "step": 2887 - }, - { - "epoch": 2.2317049623479437, - "grad_norm": 0.011534671299159527, - "learning_rate": 6.772054291709784e-05, - "loss": 0.0041, - "step": 2888 - }, - { - "epoch": 2.2324773122224366, - "grad_norm": 0.011011037975549698, - "learning_rate": 6.769531561612706e-05, - "loss": 0.0037, - "step": 2889 - }, - { - "epoch": 2.23324966209693, - "grad_norm": 0.010238991118967533, - "learning_rate": 6.767008316447747e-05, - "loss": 0.0035, - "step": 2890 - }, - { - "epoch": 2.234022011971423, - "grad_norm": 0.010306601412594318, - "learning_rate": 6.764484556949362e-05, - "loss": 0.0044, - "step": 2891 - }, - { - "epoch": 2.2347943618459163, - "grad_norm": 0.01326883677393198, - "learning_rate": 6.76196028385216e-05, - "loss": 0.0033, - "step": 2892 - }, - { - "epoch": 2.235566711720409, - "grad_norm": 0.00924383569508791, - "learning_rate": 6.759435497890894e-05, - "loss": 0.0037, - "step": 2893 - }, - { - "epoch": 2.2363390615949026, - "grad_norm": 0.01186363585293293, - "learning_rate": 6.756910199800468e-05, - "loss": 0.0041, - "step": 2894 - }, - { - "epoch": 2.2371114114693955, - "grad_norm": 0.007807865273207426, - "learning_rate": 6.754384390315936e-05, - "loss": 0.0036, - "step": 2895 - }, - { - "epoch": 2.237883761343889, - "grad_norm": 0.01037057489156723, - "learning_rate": 6.751858070172499e-05, - "loss": 0.0036, - "step": 2896 - }, - { - "epoch": 2.238656111218382, - "grad_norm": 0.011497294530272484, - "learning_rate": 6.749331240105507e-05, - "loss": 0.0037, - "step": 2897 - }, - { - "epoch": 2.239428461092875, - "grad_norm": 0.02164345420897007, - "learning_rate": 6.746803900850462e-05, - "loss": 0.0037, - "step": 2898 - }, - { - "epoch": 2.240200810967368, - "grad_norm": 0.008693995885550976, - "learning_rate": 6.74427605314301e-05, - "loss": 0.0038, - "step": 2899 - }, - { - "epoch": 2.2409731608418615, - "grad_norm": 0.021689802408218384, - "learning_rate": 6.741747697718946e-05, - "loss": 0.0042, - "step": 2900 - }, - { - "epoch": 2.2417455107163544, - "grad_norm": 0.008022323250770569, - "learning_rate": 6.739218835314213e-05, - "loss": 0.0041, - "step": 2901 - }, - { - "epoch": 2.242517860590848, - "grad_norm": 0.010573562234640121, - "learning_rate": 6.736689466664902e-05, - "loss": 0.0043, - "step": 2902 - }, - { - "epoch": 2.2432902104653407, - "grad_norm": 0.009405065327882767, - "learning_rate": 6.734159592507252e-05, - "loss": 0.0041, - "step": 2903 - }, - { - "epoch": 2.244062560339834, - "grad_norm": 0.009706784039735794, - "learning_rate": 6.731629213577647e-05, - "loss": 0.0042, - "step": 2904 - }, - { - "epoch": 2.244834910214327, - "grad_norm": 0.011170423589646816, - "learning_rate": 6.72909833061262e-05, - "loss": 0.0038, - "step": 2905 - }, - { - "epoch": 2.2456072600888204, - "grad_norm": 0.008561627008020878, - "learning_rate": 6.72656694434885e-05, - "loss": 0.0037, - "step": 2906 - }, - { - "epoch": 2.2463796099633133, - "grad_norm": 0.009911119937896729, - "learning_rate": 6.724035055523161e-05, - "loss": 0.0041, - "step": 2907 - }, - { - "epoch": 2.2471519598378067, - "grad_norm": 0.014809616841375828, - "learning_rate": 6.721502664872526e-05, - "loss": 0.0042, - "step": 2908 - }, - { - "epoch": 2.2479243097122996, - "grad_norm": 0.012895144522190094, - "learning_rate": 6.718969773134062e-05, - "loss": 0.0037, - "step": 2909 - }, - { - "epoch": 2.248696659586793, - "grad_norm": 0.009620807133615017, - "learning_rate": 6.716436381045032e-05, - "loss": 0.0039, - "step": 2910 - }, - { - "epoch": 2.249469009461286, - "grad_norm": 0.014791177585721016, - "learning_rate": 6.713902489342849e-05, - "loss": 0.0042, - "step": 2911 - }, - { - "epoch": 2.2502413593357793, - "grad_norm": 0.00880932342261076, - "learning_rate": 6.711368098765063e-05, - "loss": 0.0033, - "step": 2912 - }, - { - "epoch": 2.251013709210272, - "grad_norm": 0.01417181733995676, - "learning_rate": 6.708833210049374e-05, - "loss": 0.0037, - "step": 2913 - }, - { - "epoch": 2.2517860590847656, - "grad_norm": 0.010888157412409782, - "learning_rate": 6.706297823933631e-05, - "loss": 0.0037, - "step": 2914 - }, - { - "epoch": 2.2525584089592585, - "grad_norm": 0.01013087760657072, - "learning_rate": 6.70376194115582e-05, - "loss": 0.0041, - "step": 2915 - }, - { - "epoch": 2.2533307588337514, - "grad_norm": 0.021921338513493538, - "learning_rate": 6.701225562454077e-05, - "loss": 0.0037, - "step": 2916 - }, - { - "epoch": 2.254103108708245, - "grad_norm": 0.013176465407013893, - "learning_rate": 6.698688688566679e-05, - "loss": 0.0042, - "step": 2917 - }, - { - "epoch": 2.254875458582738, - "grad_norm": 0.012408842332661152, - "learning_rate": 6.69615132023205e-05, - "loss": 0.0041, - "step": 2918 - }, - { - "epoch": 2.255647808457231, - "grad_norm": 0.019068893045186996, - "learning_rate": 6.693613458188756e-05, - "loss": 0.0035, - "step": 2919 - }, - { - "epoch": 2.256420158331724, - "grad_norm": 0.008966639637947083, - "learning_rate": 6.691075103175506e-05, - "loss": 0.0037, - "step": 2920 - }, - { - "epoch": 2.2571925082062174, - "grad_norm": 0.010589229874312878, - "learning_rate": 6.688536255931157e-05, - "loss": 0.0037, - "step": 2921 - }, - { - "epoch": 2.257964858080711, - "grad_norm": 0.012341762892901897, - "learning_rate": 6.685996917194705e-05, - "loss": 0.0039, - "step": 2922 - }, - { - "epoch": 2.2587372079552037, - "grad_norm": 0.01653473637998104, - "learning_rate": 6.683457087705287e-05, - "loss": 0.0038, - "step": 2923 - }, - { - "epoch": 2.2595095578296966, - "grad_norm": 0.010807894170284271, - "learning_rate": 6.68091676820219e-05, - "loss": 0.0039, - "step": 2924 - }, - { - "epoch": 2.26028190770419, - "grad_norm": 0.013531602919101715, - "learning_rate": 6.67837595942484e-05, - "loss": 0.0036, - "step": 2925 - }, - { - "epoch": 2.2610542575786834, - "grad_norm": 0.008627147413790226, - "learning_rate": 6.675834662112801e-05, - "loss": 0.0034, - "step": 2926 - }, - { - "epoch": 2.2618266074531763, - "grad_norm": 0.009081604890525341, - "learning_rate": 6.673292877005786e-05, - "loss": 0.0035, - "step": 2927 - }, - { - "epoch": 2.2625989573276692, - "grad_norm": 0.009991762228310108, - "learning_rate": 6.670750604843646e-05, - "loss": 0.0035, - "step": 2928 - }, - { - "epoch": 2.2633713072021626, - "grad_norm": 0.008216849528253078, - "learning_rate": 6.668207846366377e-05, - "loss": 0.0032, - "step": 2929 - }, - { - "epoch": 2.2641436570766555, - "grad_norm": 0.0134681211784482, - "learning_rate": 6.665664602314112e-05, - "loss": 0.0038, - "step": 2930 - }, - { - "epoch": 2.264916006951149, - "grad_norm": 0.013249974697828293, - "learning_rate": 6.663120873427129e-05, - "loss": 0.004, - "step": 2931 - }, - { - "epoch": 2.265688356825642, - "grad_norm": 0.011910191737115383, - "learning_rate": 6.660576660445846e-05, - "loss": 0.0038, - "step": 2932 - }, - { - "epoch": 2.266460706700135, - "grad_norm": 0.01929398812353611, - "learning_rate": 6.658031964110822e-05, - "loss": 0.004, - "step": 2933 - }, - { - "epoch": 2.267233056574628, - "grad_norm": 0.011442175135016441, - "learning_rate": 6.655486785162758e-05, - "loss": 0.0043, - "step": 2934 - }, - { - "epoch": 2.2680054064491215, - "grad_norm": 0.012157517485320568, - "learning_rate": 6.652941124342492e-05, - "loss": 0.0044, - "step": 2935 - }, - { - "epoch": 2.2687777563236144, - "grad_norm": 0.01320195198059082, - "learning_rate": 6.650394982391004e-05, - "loss": 0.0038, - "step": 2936 - }, - { - "epoch": 2.269550106198108, - "grad_norm": 0.012781853787600994, - "learning_rate": 6.647848360049417e-05, - "loss": 0.0041, - "step": 2937 - }, - { - "epoch": 2.2703224560726007, - "grad_norm": 0.019531317055225372, - "learning_rate": 6.645301258058989e-05, - "loss": 0.0038, - "step": 2938 - }, - { - "epoch": 2.271094805947094, - "grad_norm": 0.010299470275640488, - "learning_rate": 6.642753677161121e-05, - "loss": 0.0034, - "step": 2939 - }, - { - "epoch": 2.271867155821587, - "grad_norm": 0.011393277905881405, - "learning_rate": 6.640205618097352e-05, - "loss": 0.0038, - "step": 2940 - }, - { - "epoch": 2.2726395056960804, - "grad_norm": 0.017499489709734917, - "learning_rate": 6.63765708160936e-05, - "loss": 0.0039, - "step": 2941 - }, - { - "epoch": 2.2734118555705733, - "grad_norm": 0.023332005366683006, - "learning_rate": 6.635108068438962e-05, - "loss": 0.0039, - "step": 2942 - }, - { - "epoch": 2.2741842054450667, - "grad_norm": 0.01747935451567173, - "learning_rate": 6.632558579328114e-05, - "loss": 0.0039, - "step": 2943 - }, - { - "epoch": 2.2749565553195596, - "grad_norm": 0.012404967099428177, - "learning_rate": 6.630008615018914e-05, - "loss": 0.0036, - "step": 2944 - }, - { - "epoch": 2.275728905194053, - "grad_norm": 0.027197325602173805, - "learning_rate": 6.627458176253591e-05, - "loss": 0.0037, - "step": 2945 - }, - { - "epoch": 2.276501255068546, - "grad_norm": 0.025847600772976875, - "learning_rate": 6.624907263774518e-05, - "loss": 0.0041, - "step": 2946 - }, - { - "epoch": 2.2772736049430393, - "grad_norm": 0.010057358071208, - "learning_rate": 6.622355878324203e-05, - "loss": 0.0034, - "step": 2947 - }, - { - "epoch": 2.2780459548175322, - "grad_norm": 0.022725243121385574, - "learning_rate": 6.619804020645292e-05, - "loss": 0.0037, - "step": 2948 - }, - { - "epoch": 2.2788183046920256, - "grad_norm": 0.02578037418425083, - "learning_rate": 6.617251691480572e-05, - "loss": 0.0037, - "step": 2949 - }, - { - "epoch": 2.2795906545665185, - "grad_norm": 0.021287426352500916, - "learning_rate": 6.614698891572962e-05, - "loss": 0.0042, - "step": 2950 - }, - { - "epoch": 2.280363004441012, - "grad_norm": 0.010197608731687069, - "learning_rate": 6.612145621665519e-05, - "loss": 0.0041, - "step": 2951 - }, - { - "epoch": 2.281135354315505, - "grad_norm": 0.020424285903573036, - "learning_rate": 6.609591882501444e-05, - "loss": 0.0036, - "step": 2952 - }, - { - "epoch": 2.281907704189998, - "grad_norm": 0.022551648318767548, - "learning_rate": 6.607037674824062e-05, - "loss": 0.0043, - "step": 2953 - }, - { - "epoch": 2.282680054064491, - "grad_norm": 0.016569821164011955, - "learning_rate": 6.604482999376845e-05, - "loss": 0.0042, - "step": 2954 - }, - { - "epoch": 2.2834524039389845, - "grad_norm": 0.018890995532274246, - "learning_rate": 6.601927856903398e-05, - "loss": 0.0042, - "step": 2955 - }, - { - "epoch": 2.2842247538134774, - "grad_norm": 0.01695701666176319, - "learning_rate": 6.599372248147458e-05, - "loss": 0.0037, - "step": 2956 - }, - { - "epoch": 2.284997103687971, - "grad_norm": 0.018755938857793808, - "learning_rate": 6.596816173852903e-05, - "loss": 0.0036, - "step": 2957 - }, - { - "epoch": 2.2857694535624637, - "grad_norm": 0.009337184019386768, - "learning_rate": 6.594259634763742e-05, - "loss": 0.0042, - "step": 2958 - }, - { - "epoch": 2.286541803436957, - "grad_norm": 0.008161675184965134, - "learning_rate": 6.591702631624126e-05, - "loss": 0.0041, - "step": 2959 - }, - { - "epoch": 2.28731415331145, - "grad_norm": 0.02394683100283146, - "learning_rate": 6.589145165178335e-05, - "loss": 0.0038, - "step": 2960 - }, - { - "epoch": 2.2880865031859434, - "grad_norm": 0.016816599294543266, - "learning_rate": 6.586587236170783e-05, - "loss": 0.0043, - "step": 2961 - }, - { - "epoch": 2.2888588530604363, - "grad_norm": 0.014665575698018074, - "learning_rate": 6.584028845346025e-05, - "loss": 0.0035, - "step": 2962 - }, - { - "epoch": 2.2896312029349293, - "grad_norm": 0.008588436990976334, - "learning_rate": 6.581469993448746e-05, - "loss": 0.0036, - "step": 2963 - }, - { - "epoch": 2.2904035528094226, - "grad_norm": 0.03165439888834953, - "learning_rate": 6.578910681223765e-05, - "loss": 0.0041, - "step": 2964 - }, - { - "epoch": 2.291175902683916, - "grad_norm": 0.024436477571725845, - "learning_rate": 6.576350909416034e-05, - "loss": 0.004, - "step": 2965 - }, - { - "epoch": 2.291948252558409, - "grad_norm": 0.023368055000901222, - "learning_rate": 6.573790678770646e-05, - "loss": 0.0041, - "step": 2966 - }, - { - "epoch": 2.292720602432902, - "grad_norm": 0.013346045278012753, - "learning_rate": 6.571229990032817e-05, - "loss": 0.0037, - "step": 2967 - }, - { - "epoch": 2.2934929523073952, - "grad_norm": 0.009437451139092445, - "learning_rate": 6.568668843947906e-05, - "loss": 0.0034, - "step": 2968 - }, - { - "epoch": 2.2942653021818886, - "grad_norm": 0.011323517188429832, - "learning_rate": 6.566107241261397e-05, - "loss": 0.0035, - "step": 2969 - }, - { - "epoch": 2.2950376520563815, - "grad_norm": 0.012972177937626839, - "learning_rate": 6.563545182718914e-05, - "loss": 0.0034, - "step": 2970 - }, - { - "epoch": 2.2958100019308745, - "grad_norm": 0.008187105879187584, - "learning_rate": 6.560982669066207e-05, - "loss": 0.0036, - "step": 2971 - }, - { - "epoch": 2.296582351805368, - "grad_norm": 0.017039962112903595, - "learning_rate": 6.558419701049163e-05, - "loss": 0.0039, - "step": 2972 - }, - { - "epoch": 2.297354701679861, - "grad_norm": 0.008243520744144917, - "learning_rate": 6.555856279413802e-05, - "loss": 0.0037, - "step": 2973 - }, - { - "epoch": 2.298127051554354, - "grad_norm": 0.012830189429223537, - "learning_rate": 6.553292404906271e-05, - "loss": 0.0035, - "step": 2974 - }, - { - "epoch": 2.298899401428847, - "grad_norm": 0.013081861659884453, - "learning_rate": 6.550728078272855e-05, - "loss": 0.0034, - "step": 2975 - }, - { - "epoch": 2.2996717513033405, - "grad_norm": 0.019632283598184586, - "learning_rate": 6.548163300259966e-05, - "loss": 0.0044, - "step": 2976 - }, - { - "epoch": 2.3004441011778334, - "grad_norm": 0.01079633366316557, - "learning_rate": 6.545598071614148e-05, - "loss": 0.004, - "step": 2977 - }, - { - "epoch": 2.3012164510523268, - "grad_norm": 0.012638948857784271, - "learning_rate": 6.543032393082077e-05, - "loss": 0.0034, - "step": 2978 - }, - { - "epoch": 2.3019888009268197, - "grad_norm": 0.016536030918359756, - "learning_rate": 6.540466265410563e-05, - "loss": 0.0035, - "step": 2979 - }, - { - "epoch": 2.302761150801313, - "grad_norm": 0.011115089990198612, - "learning_rate": 6.537899689346541e-05, - "loss": 0.0044, - "step": 2980 - }, - { - "epoch": 2.303533500675806, - "grad_norm": 0.008431201800704002, - "learning_rate": 6.53533266563708e-05, - "loss": 0.0038, - "step": 2981 - }, - { - "epoch": 2.3043058505502994, - "grad_norm": 0.010215497575700283, - "learning_rate": 6.532765195029379e-05, - "loss": 0.0033, - "step": 2982 - }, - { - "epoch": 2.3050782004247923, - "grad_norm": 0.019033120945096016, - "learning_rate": 6.530197278270765e-05, - "loss": 0.004, - "step": 2983 - }, - { - "epoch": 2.3058505502992857, - "grad_norm": 0.009608306922018528, - "learning_rate": 6.527628916108699e-05, - "loss": 0.0037, - "step": 2984 - }, - { - "epoch": 2.3066229001737786, - "grad_norm": 0.011376178823411465, - "learning_rate": 6.525060109290768e-05, - "loss": 0.0039, - "step": 2985 - }, - { - "epoch": 2.307395250048272, - "grad_norm": 0.012053635902702808, - "learning_rate": 6.522490858564689e-05, - "loss": 0.0039, - "step": 2986 - }, - { - "epoch": 2.308167599922765, - "grad_norm": 0.009816590696573257, - "learning_rate": 6.51992116467831e-05, - "loss": 0.0036, - "step": 2987 - }, - { - "epoch": 2.3089399497972583, - "grad_norm": 0.007648579776287079, - "learning_rate": 6.517351028379603e-05, - "loss": 0.0035, - "step": 2988 - }, - { - "epoch": 2.309712299671751, - "grad_norm": 0.010480094701051712, - "learning_rate": 6.51478045041668e-05, - "loss": 0.0038, - "step": 2989 - }, - { - "epoch": 2.3104846495462446, - "grad_norm": 0.0140578243881464, - "learning_rate": 6.51220943153777e-05, - "loss": 0.0042, - "step": 2990 - }, - { - "epoch": 2.3112569994207375, - "grad_norm": 0.011694060638546944, - "learning_rate": 6.509637972491231e-05, - "loss": 0.004, - "step": 2991 - }, - { - "epoch": 2.312029349295231, - "grad_norm": 0.011438708752393723, - "learning_rate": 6.507066074025557e-05, - "loss": 0.0036, - "step": 2992 - }, - { - "epoch": 2.312801699169724, - "grad_norm": 0.027124110609292984, - "learning_rate": 6.504493736889366e-05, - "loss": 0.0043, - "step": 2993 - }, - { - "epoch": 2.313574049044217, - "grad_norm": 0.01168591808527708, - "learning_rate": 6.5019209618314e-05, - "loss": 0.0041, - "step": 2994 - }, - { - "epoch": 2.31434639891871, - "grad_norm": 0.008750442415475845, - "learning_rate": 6.499347749600533e-05, - "loss": 0.0036, - "step": 2995 - }, - { - "epoch": 2.3151187487932035, - "grad_norm": 0.02237485721707344, - "learning_rate": 6.496774100945766e-05, - "loss": 0.004, - "step": 2996 - }, - { - "epoch": 2.3158910986676964, - "grad_norm": 0.012080367654561996, - "learning_rate": 6.494200016616225e-05, - "loss": 0.0035, - "step": 2997 - }, - { - "epoch": 2.3166634485421898, - "grad_norm": 0.01149061881005764, - "learning_rate": 6.491625497361164e-05, - "loss": 0.0038, - "step": 2998 - }, - { - "epoch": 2.3174357984166827, - "grad_norm": 0.01568903587758541, - "learning_rate": 6.489050543929964e-05, - "loss": 0.0039, - "step": 2999 - }, - { - "epoch": 2.318208148291176, - "grad_norm": 0.011648730374872684, - "learning_rate": 6.486475157072129e-05, - "loss": 0.0033, - "step": 3000 - }, - { - "epoch": 2.318980498165669, - "grad_norm": 0.013448680751025677, - "learning_rate": 6.483899337537295e-05, - "loss": 0.004, - "step": 3001 - }, - { - "epoch": 2.3197528480401624, - "grad_norm": 0.0091966912150383, - "learning_rate": 6.481323086075219e-05, - "loss": 0.0039, - "step": 3002 - }, - { - "epoch": 2.3205251979146553, - "grad_norm": 0.010801075026392937, - "learning_rate": 6.478746403435787e-05, - "loss": 0.0038, - "step": 3003 - }, - { - "epoch": 2.3212975477891487, - "grad_norm": 0.014122814871370792, - "learning_rate": 6.476169290369007e-05, - "loss": 0.0038, - "step": 3004 - }, - { - "epoch": 2.3220698976636416, - "grad_norm": 0.022588754072785378, - "learning_rate": 6.473591747625017e-05, - "loss": 0.0041, - "step": 3005 - }, - { - "epoch": 2.322842247538135, - "grad_norm": 0.008739450946450233, - "learning_rate": 6.471013775954076e-05, - "loss": 0.0043, - "step": 3006 - }, - { - "epoch": 2.323614597412628, - "grad_norm": 0.015635278075933456, - "learning_rate": 6.468435376106568e-05, - "loss": 0.0034, - "step": 3007 - }, - { - "epoch": 2.3243869472871213, - "grad_norm": 0.022441934794187546, - "learning_rate": 6.465856548833005e-05, - "loss": 0.0038, - "step": 3008 - }, - { - "epoch": 2.325159297161614, - "grad_norm": 0.010819089598953724, - "learning_rate": 6.46327729488402e-05, - "loss": 0.0041, - "step": 3009 - }, - { - "epoch": 2.325931647036107, - "grad_norm": 0.010484733618795872, - "learning_rate": 6.460697615010373e-05, - "loss": 0.004, - "step": 3010 - }, - { - "epoch": 2.3267039969106005, - "grad_norm": 0.017068954184651375, - "learning_rate": 6.458117509962944e-05, - "loss": 0.0042, - "step": 3011 - }, - { - "epoch": 2.327476346785094, - "grad_norm": 0.027246003970503807, - "learning_rate": 6.455536980492741e-05, - "loss": 0.0041, - "step": 3012 - }, - { - "epoch": 2.328248696659587, - "grad_norm": 0.00963776558637619, - "learning_rate": 6.452956027350893e-05, - "loss": 0.0037, - "step": 3013 - }, - { - "epoch": 2.3290210465340797, - "grad_norm": 0.01445906050503254, - "learning_rate": 6.450374651288656e-05, - "loss": 0.0033, - "step": 3014 - }, - { - "epoch": 2.329793396408573, - "grad_norm": 0.025476105511188507, - "learning_rate": 6.447792853057402e-05, - "loss": 0.0041, - "step": 3015 - }, - { - "epoch": 2.3305657462830665, - "grad_norm": 0.01671411097049713, - "learning_rate": 6.445210633408631e-05, - "loss": 0.0044, - "step": 3016 - }, - { - "epoch": 2.3313380961575594, - "grad_norm": 0.01059251930564642, - "learning_rate": 6.442627993093966e-05, - "loss": 0.0035, - "step": 3017 - }, - { - "epoch": 2.3321104460320523, - "grad_norm": 0.015922479331493378, - "learning_rate": 6.440044932865151e-05, - "loss": 0.0037, - "step": 3018 - }, - { - "epoch": 2.3328827959065457, - "grad_norm": 0.01611727476119995, - "learning_rate": 6.437461453474052e-05, - "loss": 0.0039, - "step": 3019 - }, - { - "epoch": 2.333655145781039, - "grad_norm": 0.015589396469295025, - "learning_rate": 6.434877555672657e-05, - "loss": 0.0037, - "step": 3020 - }, - { - "epoch": 2.334427495655532, - "grad_norm": 0.008948437869548798, - "learning_rate": 6.432293240213079e-05, - "loss": 0.0037, - "step": 3021 - }, - { - "epoch": 2.335199845530025, - "grad_norm": 0.013299311511218548, - "learning_rate": 6.429708507847544e-05, - "loss": 0.0038, - "step": 3022 - }, - { - "epoch": 2.3359721954045183, - "grad_norm": 0.013574497774243355, - "learning_rate": 6.427123359328413e-05, - "loss": 0.0039, - "step": 3023 - }, - { - "epoch": 2.336744545279011, - "grad_norm": 0.021488720551133156, - "learning_rate": 6.424537795408153e-05, - "loss": 0.004, - "step": 3024 - }, - { - "epoch": 2.3375168951535046, - "grad_norm": 0.012841997668147087, - "learning_rate": 6.421951816839364e-05, - "loss": 0.0041, - "step": 3025 - }, - { - "epoch": 2.3382892450279975, - "grad_norm": 0.018972106277942657, - "learning_rate": 6.41936542437476e-05, - "loss": 0.0041, - "step": 3026 - }, - { - "epoch": 2.339061594902491, - "grad_norm": 0.02803695946931839, - "learning_rate": 6.416778618767175e-05, - "loss": 0.0042, - "step": 3027 - }, - { - "epoch": 2.339833944776984, - "grad_norm": 0.020719368010759354, - "learning_rate": 6.414191400769571e-05, - "loss": 0.0043, - "step": 3028 - }, - { - "epoch": 2.340606294651477, - "grad_norm": 0.007930947467684746, - "learning_rate": 6.411603771135019e-05, - "loss": 0.0038, - "step": 3029 - }, - { - "epoch": 2.34137864452597, - "grad_norm": 0.03803905099630356, - "learning_rate": 6.409015730616719e-05, - "loss": 0.0043, - "step": 3030 - }, - { - "epoch": 2.3421509944004635, - "grad_norm": 0.023514466360211372, - "learning_rate": 6.406427279967987e-05, - "loss": 0.0035, - "step": 3031 - }, - { - "epoch": 2.3429233442749564, - "grad_norm": 0.008395480923354626, - "learning_rate": 6.403838419942256e-05, - "loss": 0.0039, - "step": 3032 - }, - { - "epoch": 2.34369569414945, - "grad_norm": 0.0166980791836977, - "learning_rate": 6.401249151293084e-05, - "loss": 0.004, - "step": 3033 - }, - { - "epoch": 2.3444680440239427, - "grad_norm": 0.029495570808649063, - "learning_rate": 6.398659474774142e-05, - "loss": 0.0048, - "step": 3034 - }, - { - "epoch": 2.345240393898436, - "grad_norm": 0.026838259771466255, - "learning_rate": 6.396069391139223e-05, - "loss": 0.0039, - "step": 3035 - }, - { - "epoch": 2.346012743772929, - "grad_norm": 0.010425945743918419, - "learning_rate": 6.393478901142237e-05, - "loss": 0.0041, - "step": 3036 - }, - { - "epoch": 2.3467850936474224, - "grad_norm": 0.026453660801053047, - "learning_rate": 6.390888005537216e-05, - "loss": 0.0038, - "step": 3037 - }, - { - "epoch": 2.3475574435219153, - "grad_norm": 0.035654086619615555, - "learning_rate": 6.388296705078303e-05, - "loss": 0.0043, - "step": 3038 - }, - { - "epoch": 2.3483297933964087, - "grad_norm": 0.023507380858063698, - "learning_rate": 6.385705000519766e-05, - "loss": 0.004, - "step": 3039 - }, - { - "epoch": 2.3491021432709016, - "grad_norm": 0.031890109181404114, - "learning_rate": 6.383112892615986e-05, - "loss": 0.0036, - "step": 3040 - }, - { - "epoch": 2.349874493145395, - "grad_norm": 0.03831114247441292, - "learning_rate": 6.380520382121463e-05, - "loss": 0.0042, - "step": 3041 - }, - { - "epoch": 2.350646843019888, - "grad_norm": 0.022333258762955666, - "learning_rate": 6.377927469790815e-05, - "loss": 0.0039, - "step": 3042 - }, - { - "epoch": 2.3514191928943813, - "grad_norm": 0.008276746608316898, - "learning_rate": 6.375334156378776e-05, - "loss": 0.0037, - "step": 3043 - }, - { - "epoch": 2.352191542768874, - "grad_norm": 0.0267958901822567, - "learning_rate": 6.372740442640196e-05, - "loss": 0.004, - "step": 3044 - }, - { - "epoch": 2.3529638926433676, - "grad_norm": 0.04083576798439026, - "learning_rate": 6.370146329330043e-05, - "loss": 0.0048, - "step": 3045 - }, - { - "epoch": 2.3537362425178605, - "grad_norm": 0.01393703743815422, - "learning_rate": 6.3675518172034e-05, - "loss": 0.0044, - "step": 3046 - }, - { - "epoch": 2.354508592392354, - "grad_norm": 0.019029032438993454, - "learning_rate": 6.364956907015469e-05, - "loss": 0.0039, - "step": 3047 - }, - { - "epoch": 2.355280942266847, - "grad_norm": 0.029930435121059418, - "learning_rate": 6.362361599521563e-05, - "loss": 0.0041, - "step": 3048 - }, - { - "epoch": 2.35605329214134, - "grad_norm": 0.020898763090372086, - "learning_rate": 6.359765895477114e-05, - "loss": 0.0039, - "step": 3049 - }, - { - "epoch": 2.356825642015833, - "grad_norm": 0.016825007274746895, - "learning_rate": 6.357169795637671e-05, - "loss": 0.0041, - "step": 3050 - }, - { - "epoch": 2.3575979918903265, - "grad_norm": 0.01854357123374939, - "learning_rate": 6.354573300758893e-05, - "loss": 0.0038, - "step": 3051 - }, - { - "epoch": 2.3583703417648194, - "grad_norm": 0.040367867797613144, - "learning_rate": 6.351976411596558e-05, - "loss": 0.0044, - "step": 3052 - }, - { - "epoch": 2.359142691639313, - "grad_norm": 0.021105622872710228, - "learning_rate": 6.349379128906559e-05, - "loss": 0.0047, - "step": 3053 - }, - { - "epoch": 2.3599150415138057, - "grad_norm": 0.022896962240338326, - "learning_rate": 6.346781453444898e-05, - "loss": 0.0034, - "step": 3054 - }, - { - "epoch": 2.360687391388299, - "grad_norm": 0.017875095829367638, - "learning_rate": 6.344183385967702e-05, - "loss": 0.0033, - "step": 3055 - }, - { - "epoch": 2.361459741262792, - "grad_norm": 0.04240044206380844, - "learning_rate": 6.341584927231198e-05, - "loss": 0.0041, - "step": 3056 - }, - { - "epoch": 2.3622320911372854, - "grad_norm": 0.024292636662721634, - "learning_rate": 6.338986077991742e-05, - "loss": 0.0042, - "step": 3057 - }, - { - "epoch": 2.3630044410117783, - "grad_norm": 0.011553112417459488, - "learning_rate": 6.336386839005792e-05, - "loss": 0.0038, - "step": 3058 - }, - { - "epoch": 2.3637767908862717, - "grad_norm": 0.01957235299050808, - "learning_rate": 6.333787211029924e-05, - "loss": 0.0044, - "step": 3059 - }, - { - "epoch": 2.3645491407607646, - "grad_norm": 0.03314711153507233, - "learning_rate": 6.331187194820827e-05, - "loss": 0.0041, - "step": 3060 - }, - { - "epoch": 2.3653214906352575, - "grad_norm": 0.027983160689473152, - "learning_rate": 6.328586791135304e-05, - "loss": 0.0043, - "step": 3061 - }, - { - "epoch": 2.366093840509751, - "grad_norm": 0.009940424002707005, - "learning_rate": 6.32598600073027e-05, - "loss": 0.0041, - "step": 3062 - }, - { - "epoch": 2.3668661903842443, - "grad_norm": 0.022160066291689873, - "learning_rate": 6.323384824362753e-05, - "loss": 0.0036, - "step": 3063 - }, - { - "epoch": 2.367638540258737, - "grad_norm": 0.0382474847137928, - "learning_rate": 6.320783262789887e-05, - "loss": 0.0041, - "step": 3064 - }, - { - "epoch": 2.36841089013323, - "grad_norm": 0.011643902398645878, - "learning_rate": 6.318181316768929e-05, - "loss": 0.0039, - "step": 3065 - }, - { - "epoch": 2.3691832400077235, - "grad_norm": 0.009308322332799435, - "learning_rate": 6.315578987057242e-05, - "loss": 0.004, - "step": 3066 - }, - { - "epoch": 2.369955589882217, - "grad_norm": 0.01695558987557888, - "learning_rate": 6.312976274412299e-05, - "loss": 0.0036, - "step": 3067 - }, - { - "epoch": 2.37072793975671, - "grad_norm": 0.017537007108330727, - "learning_rate": 6.310373179591688e-05, - "loss": 0.0035, - "step": 3068 - }, - { - "epoch": 2.3715002896312027, - "grad_norm": 0.02394968830049038, - "learning_rate": 6.307769703353109e-05, - "loss": 0.0041, - "step": 3069 - }, - { - "epoch": 2.372272639505696, - "grad_norm": 0.009026383981108665, - "learning_rate": 6.305165846454369e-05, - "loss": 0.0035, - "step": 3070 - }, - { - "epoch": 2.373044989380189, - "grad_norm": 0.017562277615070343, - "learning_rate": 6.302561609653387e-05, - "loss": 0.0036, - "step": 3071 - }, - { - "epoch": 2.3738173392546824, - "grad_norm": 0.018362585455179214, - "learning_rate": 6.299956993708196e-05, - "loss": 0.004, - "step": 3072 - }, - { - "epoch": 2.3745896891291753, - "grad_norm": 0.015956103801727295, - "learning_rate": 6.297351999376934e-05, - "loss": 0.0036, - "step": 3073 - }, - { - "epoch": 2.3753620390036687, - "grad_norm": 0.009392969310283661, - "learning_rate": 6.294746627417853e-05, - "loss": 0.0034, - "step": 3074 - }, - { - "epoch": 2.3761343888781616, - "grad_norm": 0.013169806450605392, - "learning_rate": 6.292140878589315e-05, - "loss": 0.0042, - "step": 3075 - }, - { - "epoch": 2.376906738752655, - "grad_norm": 0.023034386336803436, - "learning_rate": 6.289534753649788e-05, - "loss": 0.005, - "step": 3076 - }, - { - "epoch": 2.377679088627148, - "grad_norm": 0.011399516835808754, - "learning_rate": 6.286928253357856e-05, - "loss": 0.0039, - "step": 3077 - }, - { - "epoch": 2.3784514385016413, - "grad_norm": 0.010855674743652344, - "learning_rate": 6.284321378472204e-05, - "loss": 0.0034, - "step": 3078 - }, - { - "epoch": 2.3792237883761342, - "grad_norm": 0.007988915778696537, - "learning_rate": 6.281714129751632e-05, - "loss": 0.0034, - "step": 3079 - }, - { - "epoch": 2.3799961382506276, - "grad_norm": 0.014476333744823933, - "learning_rate": 6.279106507955049e-05, - "loss": 0.0038, - "step": 3080 - }, - { - "epoch": 2.3807684881251205, - "grad_norm": 0.011578804813325405, - "learning_rate": 6.276498513841466e-05, - "loss": 0.0033, - "step": 3081 - }, - { - "epoch": 2.381540837999614, - "grad_norm": 0.01192422304302454, - "learning_rate": 6.273890148170011e-05, - "loss": 0.0041, - "step": 3082 - }, - { - "epoch": 2.382313187874107, - "grad_norm": 0.017417842522263527, - "learning_rate": 6.271281411699916e-05, - "loss": 0.0041, - "step": 3083 - }, - { - "epoch": 2.3830855377486, - "grad_norm": 0.018631063401699066, - "learning_rate": 6.268672305190519e-05, - "loss": 0.004, - "step": 3084 - }, - { - "epoch": 2.383857887623093, - "grad_norm": 0.010208629071712494, - "learning_rate": 6.266062829401271e-05, - "loss": 0.0041, - "step": 3085 - }, - { - "epoch": 2.3846302374975865, - "grad_norm": 0.020590389147400856, - "learning_rate": 6.263452985091725e-05, - "loss": 0.0035, - "step": 3086 - }, - { - "epoch": 2.3854025873720794, - "grad_norm": 0.030716968700289726, - "learning_rate": 6.260842773021543e-05, - "loss": 0.0041, - "step": 3087 - }, - { - "epoch": 2.386174937246573, - "grad_norm": 0.014777534641325474, - "learning_rate": 6.258232193950497e-05, - "loss": 0.0032, - "step": 3088 - }, - { - "epoch": 2.3869472871210657, - "grad_norm": 0.008989118970930576, - "learning_rate": 6.255621248638461e-05, - "loss": 0.0039, - "step": 3089 - }, - { - "epoch": 2.387719636995559, - "grad_norm": 0.020442666485905647, - "learning_rate": 6.253009937845422e-05, - "loss": 0.0039, - "step": 3090 - }, - { - "epoch": 2.388491986870052, - "grad_norm": 0.02745845727622509, - "learning_rate": 6.250398262331463e-05, - "loss": 0.0038, - "step": 3091 - }, - { - "epoch": 2.3892643367445454, - "grad_norm": 0.010492071509361267, - "learning_rate": 6.247786222856786e-05, - "loss": 0.0044, - "step": 3092 - }, - { - "epoch": 2.3900366866190383, - "grad_norm": 0.008296754211187363, - "learning_rate": 6.245173820181691e-05, - "loss": 0.0039, - "step": 3093 - }, - { - "epoch": 2.3908090364935317, - "grad_norm": 0.01965721696615219, - "learning_rate": 6.242561055066581e-05, - "loss": 0.0037, - "step": 3094 - }, - { - "epoch": 2.3915813863680246, - "grad_norm": 0.023441633209586143, - "learning_rate": 6.239947928271974e-05, - "loss": 0.004, - "step": 3095 - }, - { - "epoch": 2.392353736242518, - "grad_norm": 0.010678118094801903, - "learning_rate": 6.237334440558487e-05, - "loss": 0.0041, - "step": 3096 - }, - { - "epoch": 2.393126086117011, - "grad_norm": 0.01498490385711193, - "learning_rate": 6.234720592686842e-05, - "loss": 0.0036, - "step": 3097 - }, - { - "epoch": 2.3938984359915043, - "grad_norm": 0.011694070883095264, - "learning_rate": 6.232106385417866e-05, - "loss": 0.0037, - "step": 3098 - }, - { - "epoch": 2.3946707858659972, - "grad_norm": 0.018943822011351585, - "learning_rate": 6.229491819512494e-05, - "loss": 0.0043, - "step": 3099 - }, - { - "epoch": 2.3954431357404906, - "grad_norm": 0.008188261650502682, - "learning_rate": 6.226876895731761e-05, - "loss": 0.0038, - "step": 3100 - }, - { - "epoch": 2.3962154856149835, - "grad_norm": 0.014174428768455982, - "learning_rate": 6.22426161483681e-05, - "loss": 0.0033, - "step": 3101 - }, - { - "epoch": 2.396987835489477, - "grad_norm": 0.021323859691619873, - "learning_rate": 6.221645977588885e-05, - "loss": 0.0039, - "step": 3102 - }, - { - "epoch": 2.39776018536397, - "grad_norm": 0.017304735258221626, - "learning_rate": 6.219029984749334e-05, - "loss": 0.0031, - "step": 3103 - }, - { - "epoch": 2.398532535238463, - "grad_norm": 0.011139573529362679, - "learning_rate": 6.216413637079611e-05, - "loss": 0.0037, - "step": 3104 - }, - { - "epoch": 2.399304885112956, - "grad_norm": 0.01064131036400795, - "learning_rate": 6.213796935341269e-05, - "loss": 0.0042, - "step": 3105 - }, - { - "epoch": 2.4000772349874495, - "grad_norm": 0.01577679067850113, - "learning_rate": 6.211179880295971e-05, - "loss": 0.0034, - "step": 3106 - }, - { - "epoch": 2.4008495848619424, - "grad_norm": 0.009980639442801476, - "learning_rate": 6.208562472705474e-05, - "loss": 0.0039, - "step": 3107 - }, - { - "epoch": 2.4016219347364354, - "grad_norm": 0.007531393319368362, - "learning_rate": 6.205944713331644e-05, - "loss": 0.0032, - "step": 3108 - }, - { - "epoch": 2.4023942846109287, - "grad_norm": 0.007788199465721846, - "learning_rate": 6.203326602936448e-05, - "loss": 0.0034, - "step": 3109 - }, - { - "epoch": 2.403166634485422, - "grad_norm": 0.01703229732811451, - "learning_rate": 6.200708142281954e-05, - "loss": 0.0042, - "step": 3110 - }, - { - "epoch": 2.403938984359915, - "grad_norm": 0.015591508708894253, - "learning_rate": 6.198089332130332e-05, - "loss": 0.004, - "step": 3111 - }, - { - "epoch": 2.404711334234408, - "grad_norm": 0.008327793329954147, - "learning_rate": 6.195470173243857e-05, - "loss": 0.0037, - "step": 3112 - }, - { - "epoch": 2.4054836841089013, - "grad_norm": 0.013708319514989853, - "learning_rate": 6.1928506663849e-05, - "loss": 0.0034, - "step": 3113 - }, - { - "epoch": 2.4062560339833947, - "grad_norm": 0.016529733315110207, - "learning_rate": 6.190230812315936e-05, - "loss": 0.004, - "step": 3114 - }, - { - "epoch": 2.4070283838578876, - "grad_norm": 0.011277355253696442, - "learning_rate": 6.187610611799544e-05, - "loss": 0.004, - "step": 3115 - }, - { - "epoch": 2.4078007337323806, - "grad_norm": 0.009526095353066921, - "learning_rate": 6.184990065598397e-05, - "loss": 0.0038, - "step": 3116 - }, - { - "epoch": 2.408573083606874, - "grad_norm": 0.008797888644039631, - "learning_rate": 6.182369174475278e-05, - "loss": 0.0035, - "step": 3117 - }, - { - "epoch": 2.409345433481367, - "grad_norm": 0.007940378971397877, - "learning_rate": 6.179747939193062e-05, - "loss": 0.0042, - "step": 3118 - }, - { - "epoch": 2.4101177833558602, - "grad_norm": 0.01933273859322071, - "learning_rate": 6.177126360514725e-05, - "loss": 0.004, - "step": 3119 - }, - { - "epoch": 2.410890133230353, - "grad_norm": 0.011832729913294315, - "learning_rate": 6.174504439203349e-05, - "loss": 0.004, - "step": 3120 - }, - { - "epoch": 2.4116624831048465, - "grad_norm": 0.009274031966924667, - "learning_rate": 6.17188217602211e-05, - "loss": 0.0038, - "step": 3121 - }, - { - "epoch": 2.4124348329793395, - "grad_norm": 0.021085087209939957, - "learning_rate": 6.169259571734286e-05, - "loss": 0.0039, - "step": 3122 - }, - { - "epoch": 2.413207182853833, - "grad_norm": 0.00877199973911047, - "learning_rate": 6.166636627103256e-05, - "loss": 0.0035, - "step": 3123 - }, - { - "epoch": 2.4139795327283258, - "grad_norm": 0.01439269445836544, - "learning_rate": 6.16401334289249e-05, - "loss": 0.0039, - "step": 3124 - }, - { - "epoch": 2.414751882602819, - "grad_norm": 0.009551884606480598, - "learning_rate": 6.16138971986557e-05, - "loss": 0.0043, - "step": 3125 - }, - { - "epoch": 2.415524232477312, - "grad_norm": 0.014275209046900272, - "learning_rate": 6.158765758786162e-05, - "loss": 0.0042, - "step": 3126 - }, - { - "epoch": 2.4162965823518054, - "grad_norm": 0.023969031870365143, - "learning_rate": 6.156141460418042e-05, - "loss": 0.0042, - "step": 3127 - }, - { - "epoch": 2.4170689322262984, - "grad_norm": 0.007481127046048641, - "learning_rate": 6.15351682552508e-05, - "loss": 0.0038, - "step": 3128 - }, - { - "epoch": 2.4178412821007917, - "grad_norm": 0.027742892503738403, - "learning_rate": 6.150891854871243e-05, - "loss": 0.0037, - "step": 3129 - }, - { - "epoch": 2.4186136319752847, - "grad_norm": 0.011066229082643986, - "learning_rate": 6.148266549220594e-05, - "loss": 0.004, - "step": 3130 - }, - { - "epoch": 2.419385981849778, - "grad_norm": 0.008486258797347546, - "learning_rate": 6.145640909337302e-05, - "loss": 0.0039, - "step": 3131 - }, - { - "epoch": 2.420158331724271, - "grad_norm": 0.013606767170131207, - "learning_rate": 6.14301493598562e-05, - "loss": 0.0044, - "step": 3132 - }, - { - "epoch": 2.4209306815987643, - "grad_norm": 0.014068580232560635, - "learning_rate": 6.140388629929912e-05, - "loss": 0.0036, - "step": 3133 - }, - { - "epoch": 2.4217030314732573, - "grad_norm": 0.007964743301272392, - "learning_rate": 6.137761991934628e-05, - "loss": 0.0036, - "step": 3134 - }, - { - "epoch": 2.4224753813477506, - "grad_norm": 0.010003727860748768, - "learning_rate": 6.13513502276432e-05, - "loss": 0.0038, - "step": 3135 - }, - { - "epoch": 2.4232477312222436, - "grad_norm": 0.0116347037255764, - "learning_rate": 6.132507723183635e-05, - "loss": 0.0038, - "step": 3136 - }, - { - "epoch": 2.424020081096737, - "grad_norm": 0.013268062844872475, - "learning_rate": 6.129880093957316e-05, - "loss": 0.0042, - "step": 3137 - }, - { - "epoch": 2.42479243097123, - "grad_norm": 0.009738652966916561, - "learning_rate": 6.127252135850206e-05, - "loss": 0.0037, - "step": 3138 - }, - { - "epoch": 2.4255647808457232, - "grad_norm": 0.009008978493511677, - "learning_rate": 6.124623849627236e-05, - "loss": 0.0031, - "step": 3139 - }, - { - "epoch": 2.426337130720216, - "grad_norm": 0.008294097147881985, - "learning_rate": 6.121995236053437e-05, - "loss": 0.0039, - "step": 3140 - }, - { - "epoch": 2.4271094805947095, - "grad_norm": 0.011128397658467293, - "learning_rate": 6.119366295893935e-05, - "loss": 0.0042, - "step": 3141 - }, - { - "epoch": 2.4278818304692025, - "grad_norm": 0.010329133830964565, - "learning_rate": 6.116737029913954e-05, - "loss": 0.0035, - "step": 3142 - }, - { - "epoch": 2.428654180343696, - "grad_norm": 0.011560085229575634, - "learning_rate": 6.114107438878807e-05, - "loss": 0.004, - "step": 3143 - }, - { - "epoch": 2.4294265302181888, - "grad_norm": 0.008150237612426281, - "learning_rate": 6.111477523553905e-05, - "loss": 0.0035, - "step": 3144 - }, - { - "epoch": 2.430198880092682, - "grad_norm": 0.008369138464331627, - "learning_rate": 6.108847284704752e-05, - "loss": 0.0036, - "step": 3145 - }, - { - "epoch": 2.430971229967175, - "grad_norm": 0.019015392288565636, - "learning_rate": 6.106216723096948e-05, - "loss": 0.004, - "step": 3146 - }, - { - "epoch": 2.4317435798416684, - "grad_norm": 0.013174445368349552, - "learning_rate": 6.103585839496187e-05, - "loss": 0.0031, - "step": 3147 - }, - { - "epoch": 2.4325159297161614, - "grad_norm": 0.009038873016834259, - "learning_rate": 6.1009546346682533e-05, - "loss": 0.0035, - "step": 3148 - }, - { - "epoch": 2.4332882795906547, - "grad_norm": 0.016160445287823677, - "learning_rate": 6.098323109379027e-05, - "loss": 0.0034, - "step": 3149 - }, - { - "epoch": 2.4340606294651477, - "grad_norm": 0.01445263996720314, - "learning_rate": 6.0956912643944843e-05, - "loss": 0.0035, - "step": 3150 - }, - { - "epoch": 2.434832979339641, - "grad_norm": 0.007858852855861187, - "learning_rate": 6.093059100480688e-05, - "loss": 0.0037, - "step": 3151 - }, - { - "epoch": 2.435605329214134, - "grad_norm": 0.008782031945884228, - "learning_rate": 6.090426618403802e-05, - "loss": 0.004, - "step": 3152 - }, - { - "epoch": 2.4363776790886273, - "grad_norm": 0.019404729828238487, - "learning_rate": 6.087793818930074e-05, - "loss": 0.0041, - "step": 3153 - }, - { - "epoch": 2.4371500289631203, - "grad_norm": 0.008168632164597511, - "learning_rate": 6.0851607028258494e-05, - "loss": 0.0037, - "step": 3154 - }, - { - "epoch": 2.437922378837613, - "grad_norm": 0.010241009294986725, - "learning_rate": 6.082527270857565e-05, - "loss": 0.0038, - "step": 3155 - }, - { - "epoch": 2.4386947287121066, - "grad_norm": 0.00815881323069334, - "learning_rate": 6.079893523791751e-05, - "loss": 0.003, - "step": 3156 - }, - { - "epoch": 2.4394670785866, - "grad_norm": 0.009282760322093964, - "learning_rate": 6.077259462395025e-05, - "loss": 0.0039, - "step": 3157 - }, - { - "epoch": 2.440239428461093, - "grad_norm": 0.009555073454976082, - "learning_rate": 6.074625087434101e-05, - "loss": 0.0038, - "step": 3158 - }, - { - "epoch": 2.441011778335586, - "grad_norm": 0.00916551798582077, - "learning_rate": 6.071990399675779e-05, - "loss": 0.0037, - "step": 3159 - }, - { - "epoch": 2.441784128210079, - "grad_norm": 0.009903534315526485, - "learning_rate": 6.069355399886955e-05, - "loss": 0.0038, - "step": 3160 - }, - { - "epoch": 2.4425564780845725, - "grad_norm": 0.01959330402314663, - "learning_rate": 6.066720088834612e-05, - "loss": 0.0037, - "step": 3161 - }, - { - "epoch": 2.4433288279590655, - "grad_norm": 0.011863019317388535, - "learning_rate": 6.064084467285828e-05, - "loss": 0.004, - "step": 3162 - }, - { - "epoch": 2.4441011778335584, - "grad_norm": 0.010247189551591873, - "learning_rate": 6.0614485360077656e-05, - "loss": 0.0039, - "step": 3163 - }, - { - "epoch": 2.4448735277080518, - "grad_norm": 0.012665782123804092, - "learning_rate": 6.058812295767684e-05, - "loss": 0.0036, - "step": 3164 - }, - { - "epoch": 2.4456458775825447, - "grad_norm": 0.01594417728483677, - "learning_rate": 6.0561757473329264e-05, - "loss": 0.0043, - "step": 3165 - }, - { - "epoch": 2.446418227457038, - "grad_norm": 0.008561563678085804, - "learning_rate": 6.05353889147093e-05, - "loss": 0.0034, - "step": 3166 - }, - { - "epoch": 2.447190577331531, - "grad_norm": 0.008343924768269062, - "learning_rate": 6.0509017289492184e-05, - "loss": 0.0034, - "step": 3167 - }, - { - "epoch": 2.4479629272060244, - "grad_norm": 0.012697969563305378, - "learning_rate": 6.0482642605354075e-05, - "loss": 0.0038, - "step": 3168 - }, - { - "epoch": 2.4487352770805173, - "grad_norm": 0.018456846475601196, - "learning_rate": 6.0456264869972e-05, - "loss": 0.0038, - "step": 3169 - }, - { - "epoch": 2.4495076269550107, - "grad_norm": 0.007918601855635643, - "learning_rate": 6.0429884091023867e-05, - "loss": 0.0038, - "step": 3170 - }, - { - "epoch": 2.4502799768295036, - "grad_norm": 0.014714458957314491, - "learning_rate": 6.0403500276188494e-05, - "loss": 0.0037, - "step": 3171 - }, - { - "epoch": 2.451052326703997, - "grad_norm": 0.018335428088903427, - "learning_rate": 6.0377113433145596e-05, - "loss": 0.0038, - "step": 3172 - }, - { - "epoch": 2.45182467657849, - "grad_norm": 0.016228122636675835, - "learning_rate": 6.03507235695757e-05, - "loss": 0.0038, - "step": 3173 - }, - { - "epoch": 2.4525970264529833, - "grad_norm": 0.008822061121463776, - "learning_rate": 6.032433069316028e-05, - "loss": 0.0031, - "step": 3174 - }, - { - "epoch": 2.453369376327476, - "grad_norm": 0.01898917555809021, - "learning_rate": 6.029793481158168e-05, - "loss": 0.0042, - "step": 3175 - }, - { - "epoch": 2.4541417262019696, - "grad_norm": 0.020056406036019325, - "learning_rate": 6.027153593252306e-05, - "loss": 0.004, - "step": 3176 - }, - { - "epoch": 2.4549140760764625, - "grad_norm": 0.011279793456196785, - "learning_rate": 6.024513406366855e-05, - "loss": 0.0035, - "step": 3177 - }, - { - "epoch": 2.455686425950956, - "grad_norm": 0.014008576981723309, - "learning_rate": 6.0218729212703064e-05, - "loss": 0.0033, - "step": 3178 - }, - { - "epoch": 2.456458775825449, - "grad_norm": 0.01047800574451685, - "learning_rate": 6.019232138731241e-05, - "loss": 0.004, - "step": 3179 - }, - { - "epoch": 2.457231125699942, - "grad_norm": 0.009189547039568424, - "learning_rate": 6.016591059518329e-05, - "loss": 0.0039, - "step": 3180 - }, - { - "epoch": 2.458003475574435, - "grad_norm": 0.008353308774530888, - "learning_rate": 6.013949684400323e-05, - "loss": 0.0036, - "step": 3181 - }, - { - "epoch": 2.4587758254489285, - "grad_norm": 0.007851125672459602, - "learning_rate": 6.0113080141460646e-05, - "loss": 0.0032, - "step": 3182 - }, - { - "epoch": 2.4595481753234214, - "grad_norm": 0.011206655763089657, - "learning_rate": 6.008666049524481e-05, - "loss": 0.0037, - "step": 3183 - }, - { - "epoch": 2.4603205251979148, - "grad_norm": 0.013563932850956917, - "learning_rate": 6.00602379130458e-05, - "loss": 0.0034, - "step": 3184 - }, - { - "epoch": 2.4610928750724077, - "grad_norm": 0.008387591689825058, - "learning_rate": 6.003381240255466e-05, - "loss": 0.0039, - "step": 3185 - }, - { - "epoch": 2.461865224946901, - "grad_norm": 0.009753430262207985, - "learning_rate": 6.000738397146315e-05, - "loss": 0.0036, - "step": 3186 - }, - { - "epoch": 2.462637574821394, - "grad_norm": 0.009347907267510891, - "learning_rate": 5.998095262746397e-05, - "loss": 0.0036, - "step": 3187 - }, - { - "epoch": 2.4634099246958874, - "grad_norm": 0.010195978917181492, - "learning_rate": 5.995451837825068e-05, - "loss": 0.0037, - "step": 3188 - }, - { - "epoch": 2.4641822745703803, - "grad_norm": 0.008878756314516068, - "learning_rate": 5.99280812315176e-05, - "loss": 0.0034, - "step": 3189 - }, - { - "epoch": 2.4649546244448737, - "grad_norm": 0.010437872260808945, - "learning_rate": 5.9901641194959976e-05, - "loss": 0.0038, - "step": 3190 - }, - { - "epoch": 2.4657269743193666, - "grad_norm": 0.012879699468612671, - "learning_rate": 5.987519827627387e-05, - "loss": 0.0039, - "step": 3191 - }, - { - "epoch": 2.46649932419386, - "grad_norm": 0.007131462451070547, - "learning_rate": 5.9848752483156135e-05, - "loss": 0.0035, - "step": 3192 - }, - { - "epoch": 2.467271674068353, - "grad_norm": 0.009347299113869667, - "learning_rate": 5.9822303823304545e-05, - "loss": 0.0036, - "step": 3193 - }, - { - "epoch": 2.4680440239428463, - "grad_norm": 0.009807485155761242, - "learning_rate": 5.979585230441764e-05, - "loss": 0.0034, - "step": 3194 - }, - { - "epoch": 2.468816373817339, - "grad_norm": 0.010532918386161327, - "learning_rate": 5.976939793419485e-05, - "loss": 0.0039, - "step": 3195 - }, - { - "epoch": 2.4695887236918326, - "grad_norm": 0.008854050189256668, - "learning_rate": 5.974294072033637e-05, - "loss": 0.0036, - "step": 3196 - }, - { - "epoch": 2.4703610735663255, - "grad_norm": 0.012353861704468727, - "learning_rate": 5.971648067054326e-05, - "loss": 0.0034, - "step": 3197 - }, - { - "epoch": 2.471133423440819, - "grad_norm": 0.01505427435040474, - "learning_rate": 5.969001779251742e-05, - "loss": 0.0035, - "step": 3198 - }, - { - "epoch": 2.471905773315312, - "grad_norm": 0.01005632895976305, - "learning_rate": 5.966355209396154e-05, - "loss": 0.0044, - "step": 3199 - }, - { - "epoch": 2.472678123189805, - "grad_norm": 0.018703538924455643, - "learning_rate": 5.9637083582579136e-05, - "loss": 0.0039, - "step": 3200 - }, - { - "epoch": 2.473450473064298, - "grad_norm": 0.008641044609248638, - "learning_rate": 5.9610612266074585e-05, - "loss": 0.0041, - "step": 3201 - }, - { - "epoch": 2.474222822938791, - "grad_norm": 0.013356814160943031, - "learning_rate": 5.9584138152153004e-05, - "loss": 0.0037, - "step": 3202 - }, - { - "epoch": 2.4749951728132844, - "grad_norm": 0.017683392390608788, - "learning_rate": 5.95576612485204e-05, - "loss": 0.0044, - "step": 3203 - }, - { - "epoch": 2.4757675226877778, - "grad_norm": 0.011670832522213459, - "learning_rate": 5.953118156288355e-05, - "loss": 0.004, - "step": 3204 - }, - { - "epoch": 2.4765398725622707, - "grad_norm": 0.008941936306655407, - "learning_rate": 5.9504699102950055e-05, - "loss": 0.0031, - "step": 3205 - }, - { - "epoch": 2.4773122224367636, - "grad_norm": 0.019912226125597954, - "learning_rate": 5.9478213876428316e-05, - "loss": 0.0035, - "step": 3206 - }, - { - "epoch": 2.478084572311257, - "grad_norm": 0.009334501810371876, - "learning_rate": 5.945172589102754e-05, - "loss": 0.0041, - "step": 3207 - }, - { - "epoch": 2.4788569221857504, - "grad_norm": 0.010612251237034798, - "learning_rate": 5.942523515445775e-05, - "loss": 0.0038, - "step": 3208 - }, - { - "epoch": 2.4796292720602433, - "grad_norm": 0.016206124797463417, - "learning_rate": 5.9398741674429726e-05, - "loss": 0.004, - "step": 3209 - }, - { - "epoch": 2.4804016219347362, - "grad_norm": 0.021283496171236038, - "learning_rate": 5.937224545865513e-05, - "loss": 0.0041, - "step": 3210 - }, - { - "epoch": 2.4811739718092296, - "grad_norm": 0.008566698990762234, - "learning_rate": 5.934574651484633e-05, - "loss": 0.0036, - "step": 3211 - }, - { - "epoch": 2.4819463216837225, - "grad_norm": 0.011161359958350658, - "learning_rate": 5.931924485071655e-05, - "loss": 0.0039, - "step": 3212 - }, - { - "epoch": 2.482718671558216, - "grad_norm": 0.015776904299855232, - "learning_rate": 5.929274047397977e-05, - "loss": 0.004, - "step": 3213 - }, - { - "epoch": 2.483491021432709, - "grad_norm": 0.008733643218874931, - "learning_rate": 5.926623339235078e-05, - "loss": 0.0036, - "step": 3214 - }, - { - "epoch": 2.484263371307202, - "grad_norm": 0.012375583872199059, - "learning_rate": 5.923972361354516e-05, - "loss": 0.0039, - "step": 3215 - }, - { - "epoch": 2.485035721181695, - "grad_norm": 0.00923305656760931, - "learning_rate": 5.921321114527926e-05, - "loss": 0.0038, - "step": 3216 - }, - { - "epoch": 2.4858080710561885, - "grad_norm": 0.009024463593959808, - "learning_rate": 5.918669599527019e-05, - "loss": 0.0039, - "step": 3217 - }, - { - "epoch": 2.4865804209306814, - "grad_norm": 0.014953624457120895, - "learning_rate": 5.9160178171235926e-05, - "loss": 0.0041, - "step": 3218 - }, - { - "epoch": 2.487352770805175, - "grad_norm": 0.011638659983873367, - "learning_rate": 5.9133657680895114e-05, - "loss": 0.0038, - "step": 3219 - }, - { - "epoch": 2.4881251206796677, - "grad_norm": 0.011053145863115788, - "learning_rate": 5.910713453196727e-05, - "loss": 0.0033, - "step": 3220 - }, - { - "epoch": 2.488897470554161, - "grad_norm": 0.01029958389699459, - "learning_rate": 5.9080608732172617e-05, - "loss": 0.0034, - "step": 3221 - }, - { - "epoch": 2.489669820428654, - "grad_norm": 0.008136557415127754, - "learning_rate": 5.905408028923216e-05, - "loss": 0.0038, - "step": 3222 - }, - { - "epoch": 2.4904421703031474, - "grad_norm": 0.008826378732919693, - "learning_rate": 5.9027549210867725e-05, - "loss": 0.0034, - "step": 3223 - }, - { - "epoch": 2.4912145201776403, - "grad_norm": 0.015251542441546917, - "learning_rate": 5.900101550480185e-05, - "loss": 0.0038, - "step": 3224 - }, - { - "epoch": 2.4919868700521337, - "grad_norm": 0.008893954567611217, - "learning_rate": 5.8974479178757845e-05, - "loss": 0.0033, - "step": 3225 - }, - { - "epoch": 2.4927592199266266, - "grad_norm": 0.010919424705207348, - "learning_rate": 5.894794024045982e-05, - "loss": 0.0036, - "step": 3226 - }, - { - "epoch": 2.49353156980112, - "grad_norm": 0.02282760478556156, - "learning_rate": 5.892139869763258e-05, - "loss": 0.004, - "step": 3227 - }, - { - "epoch": 2.494303919675613, - "grad_norm": 0.012096663005650043, - "learning_rate": 5.8894854558001756e-05, - "loss": 0.0033, - "step": 3228 - }, - { - "epoch": 2.4950762695501063, - "grad_norm": 0.022534744814038277, - "learning_rate": 5.88683078292937e-05, - "loss": 0.0042, - "step": 3229 - }, - { - "epoch": 2.4958486194245992, - "grad_norm": 0.01121017150580883, - "learning_rate": 5.884175851923552e-05, - "loss": 0.0038, - "step": 3230 - }, - { - "epoch": 2.4966209692990926, - "grad_norm": 0.013400291092693806, - "learning_rate": 5.881520663555509e-05, - "loss": 0.0037, - "step": 3231 - }, - { - "epoch": 2.4973933191735855, - "grad_norm": 0.008116140030324459, - "learning_rate": 5.878865218598101e-05, - "loss": 0.0034, - "step": 3232 - }, - { - "epoch": 2.498165669048079, - "grad_norm": 0.012089090421795845, - "learning_rate": 5.876209517824264e-05, - "loss": 0.0034, - "step": 3233 - }, - { - "epoch": 2.498938018922572, - "grad_norm": 0.014870635233819485, - "learning_rate": 5.873553562007008e-05, - "loss": 0.0037, - "step": 3234 - }, - { - "epoch": 2.499710368797065, - "grad_norm": 0.010396681725978851, - "learning_rate": 5.8708973519194174e-05, - "loss": 0.004, - "step": 3235 - }, - { - "epoch": 2.500482718671558, - "grad_norm": 0.009356563910841942, - "learning_rate": 5.868240888334653e-05, - "loss": 0.0037, - "step": 3236 - }, - { - "epoch": 2.5012550685460515, - "grad_norm": 0.014082294888794422, - "learning_rate": 5.8655841720259444e-05, - "loss": 0.0036, - "step": 3237 - }, - { - "epoch": 2.5020274184205444, - "grad_norm": 0.013190063647925854, - "learning_rate": 5.8629272037665984e-05, - "loss": 0.0036, - "step": 3238 - }, - { - "epoch": 2.502799768295038, - "grad_norm": 0.011822250671684742, - "learning_rate": 5.860269984329995e-05, - "loss": 0.0045, - "step": 3239 - }, - { - "epoch": 2.5035721181695307, - "grad_norm": 0.01531610544770956, - "learning_rate": 5.857612514489585e-05, - "loss": 0.0036, - "step": 3240 - }, - { - "epoch": 2.504344468044024, - "grad_norm": 0.011635066010057926, - "learning_rate": 5.8549547950188964e-05, - "loss": 0.0037, - "step": 3241 - }, - { - "epoch": 2.505116817918517, - "grad_norm": 0.007300242781639099, - "learning_rate": 5.852296826691525e-05, - "loss": 0.0031, - "step": 3242 - }, - { - "epoch": 2.5058891677930104, - "grad_norm": 0.009953463450074196, - "learning_rate": 5.849638610281141e-05, - "loss": 0.0041, - "step": 3243 - }, - { - "epoch": 2.5066615176675033, - "grad_norm": 0.009325490333139896, - "learning_rate": 5.846980146561486e-05, - "loss": 0.0041, - "step": 3244 - }, - { - "epoch": 2.5074338675419963, - "grad_norm": 0.010487103834748268, - "learning_rate": 5.8443214363063795e-05, - "loss": 0.0037, - "step": 3245 - }, - { - "epoch": 2.5082062174164896, - "grad_norm": 0.01220545545220375, - "learning_rate": 5.8416624802897026e-05, - "loss": 0.0038, - "step": 3246 - }, - { - "epoch": 2.508978567290983, - "grad_norm": 0.012766262516379356, - "learning_rate": 5.8390032792854134e-05, - "loss": 0.0042, - "step": 3247 - }, - { - "epoch": 2.509750917165476, - "grad_norm": 0.007656930014491081, - "learning_rate": 5.836343834067546e-05, - "loss": 0.0046, - "step": 3248 - }, - { - "epoch": 2.510523267039969, - "grad_norm": 0.010122626088559628, - "learning_rate": 5.8336841454101945e-05, - "loss": 0.0043, - "step": 3249 - }, - { - "epoch": 2.5112956169144622, - "grad_norm": 0.011598845943808556, - "learning_rate": 5.831024214087534e-05, - "loss": 0.0038, - "step": 3250 - }, - { - "epoch": 2.5120679667889556, - "grad_norm": 0.01554699894040823, - "learning_rate": 5.828364040873806e-05, - "loss": 0.0041, - "step": 3251 - }, - { - "epoch": 2.5128403166634485, - "grad_norm": 0.010791851207613945, - "learning_rate": 5.82570362654332e-05, - "loss": 0.0042, - "step": 3252 - }, - { - "epoch": 2.5136126665379415, - "grad_norm": 0.009610738605260849, - "learning_rate": 5.8230429718704606e-05, - "loss": 0.0037, - "step": 3253 - }, - { - "epoch": 2.514385016412435, - "grad_norm": 0.011307166889309883, - "learning_rate": 5.820382077629678e-05, - "loss": 0.0041, - "step": 3254 - }, - { - "epoch": 2.515157366286928, - "grad_norm": 0.012049585580825806, - "learning_rate": 5.817720944595497e-05, - "loss": 0.0039, - "step": 3255 - }, - { - "epoch": 2.515929716161421, - "grad_norm": 0.01084988471120596, - "learning_rate": 5.815059573542509e-05, - "loss": 0.0042, - "step": 3256 - }, - { - "epoch": 2.516702066035914, - "grad_norm": 0.0171855129301548, - "learning_rate": 5.812397965245372e-05, - "loss": 0.0032, - "step": 3257 - }, - { - "epoch": 2.5174744159104074, - "grad_norm": 0.014846454374492168, - "learning_rate": 5.809736120478817e-05, - "loss": 0.0042, - "step": 3258 - }, - { - "epoch": 2.518246765784901, - "grad_norm": 0.012940598651766777, - "learning_rate": 5.807074040017645e-05, - "loss": 0.0038, - "step": 3259 - }, - { - "epoch": 2.5190191156593937, - "grad_norm": 0.008697236888110638, - "learning_rate": 5.8044117246367205e-05, - "loss": 0.0035, - "step": 3260 - }, - { - "epoch": 2.5197914655338867, - "grad_norm": 0.023681191727519035, - "learning_rate": 5.80174917511098e-05, - "loss": 0.0039, - "step": 3261 - }, - { - "epoch": 2.52056381540838, - "grad_norm": 0.009607069194316864, - "learning_rate": 5.799086392215427e-05, - "loss": 0.0036, - "step": 3262 - }, - { - "epoch": 2.5213361652828734, - "grad_norm": 0.009420047514140606, - "learning_rate": 5.7964233767251354e-05, - "loss": 0.0036, - "step": 3263 - }, - { - "epoch": 2.5221085151573663, - "grad_norm": 0.0105622923001647, - "learning_rate": 5.793760129415241e-05, - "loss": 0.0041, - "step": 3264 - }, - { - "epoch": 2.5228808650318593, - "grad_norm": 0.012450242415070534, - "learning_rate": 5.791096651060954e-05, - "loss": 0.0038, - "step": 3265 - }, - { - "epoch": 2.5236532149063526, - "grad_norm": 0.008220894262194633, - "learning_rate": 5.788432942437547e-05, - "loss": 0.0036, - "step": 3266 - }, - { - "epoch": 2.5244255647808456, - "grad_norm": 0.009492254815995693, - "learning_rate": 5.785769004320362e-05, - "loss": 0.0035, - "step": 3267 - }, - { - "epoch": 2.525197914655339, - "grad_norm": 0.0072402735240757465, - "learning_rate": 5.7831048374848055e-05, - "loss": 0.0038, - "step": 3268 - }, - { - "epoch": 2.525970264529832, - "grad_norm": 0.01509221363812685, - "learning_rate": 5.780440442706354e-05, - "loss": 0.0034, - "step": 3269 - }, - { - "epoch": 2.5267426144043252, - "grad_norm": 0.008010455407202244, - "learning_rate": 5.777775820760547e-05, - "loss": 0.0031, - "step": 3270 - }, - { - "epoch": 2.527514964278818, - "grad_norm": 0.008865603245794773, - "learning_rate": 5.775110972422992e-05, - "loss": 0.004, - "step": 3271 - }, - { - "epoch": 2.5282873141533115, - "grad_norm": 0.011722778901457787, - "learning_rate": 5.772445898469363e-05, - "loss": 0.0042, - "step": 3272 - }, - { - "epoch": 2.5290596640278045, - "grad_norm": 0.014061705209314823, - "learning_rate": 5.769780599675397e-05, - "loss": 0.0037, - "step": 3273 - }, - { - "epoch": 2.529832013902298, - "grad_norm": 0.011701086536049843, - "learning_rate": 5.767115076816898e-05, - "loss": 0.0037, - "step": 3274 - }, - { - "epoch": 2.5306043637767908, - "grad_norm": 0.0137869818136096, - "learning_rate": 5.764449330669738e-05, - "loss": 0.0039, - "step": 3275 - }, - { - "epoch": 2.531376713651284, - "grad_norm": 0.006596204359084368, - "learning_rate": 5.7617833620098495e-05, - "loss": 0.0034, - "step": 3276 - }, - { - "epoch": 2.532149063525777, - "grad_norm": 0.008754332549870014, - "learning_rate": 5.7591171716132285e-05, - "loss": 0.0036, - "step": 3277 - }, - { - "epoch": 2.5329214134002704, - "grad_norm": 0.011859533376991749, - "learning_rate": 5.7564507602559445e-05, - "loss": 0.0035, - "step": 3278 - }, - { - "epoch": 2.5336937632747634, - "grad_norm": 0.007447488605976105, - "learning_rate": 5.753784128714122e-05, - "loss": 0.004, - "step": 3279 - }, - { - "epoch": 2.5344661131492567, - "grad_norm": 0.01583264209330082, - "learning_rate": 5.751117277763953e-05, - "loss": 0.004, - "step": 3280 - }, - { - "epoch": 2.5352384630237497, - "grad_norm": 0.007798062637448311, - "learning_rate": 5.748450208181694e-05, - "loss": 0.0036, - "step": 3281 - }, - { - "epoch": 2.536010812898243, - "grad_norm": 0.013155490159988403, - "learning_rate": 5.745782920743663e-05, - "loss": 0.0038, - "step": 3282 - }, - { - "epoch": 2.536783162772736, - "grad_norm": 0.008394292555749416, - "learning_rate": 5.743115416226247e-05, - "loss": 0.0034, - "step": 3283 - }, - { - "epoch": 2.5375555126472293, - "grad_norm": 0.00787175353616476, - "learning_rate": 5.7404476954058864e-05, - "loss": 0.0034, - "step": 3284 - }, - { - "epoch": 2.5383278625217223, - "grad_norm": 0.008502156473696232, - "learning_rate": 5.7377797590590954e-05, - "loss": 0.0035, - "step": 3285 - }, - { - "epoch": 2.5391002123962156, - "grad_norm": 0.0073899938724935055, - "learning_rate": 5.7351116079624435e-05, - "loss": 0.0036, - "step": 3286 - }, - { - "epoch": 2.5398725622707086, - "grad_norm": 0.007314924616366625, - "learning_rate": 5.7324432428925643e-05, - "loss": 0.0036, - "step": 3287 - }, - { - "epoch": 2.540644912145202, - "grad_norm": 0.009392407722771168, - "learning_rate": 5.729774664626155e-05, - "loss": 0.0037, - "step": 3288 - }, - { - "epoch": 2.541417262019695, - "grad_norm": 0.009202550165355206, - "learning_rate": 5.727105873939975e-05, - "loss": 0.0037, - "step": 3289 - }, - { - "epoch": 2.5421896118941882, - "grad_norm": 0.01186344213783741, - "learning_rate": 5.7244368716108457e-05, - "loss": 0.0035, - "step": 3290 - }, - { - "epoch": 2.542961961768681, - "grad_norm": 0.00778360478579998, - "learning_rate": 5.7217676584156476e-05, - "loss": 0.0039, - "step": 3291 - }, - { - "epoch": 2.543734311643174, - "grad_norm": 0.010938924737274647, - "learning_rate": 5.7190982351313216e-05, - "loss": 0.0037, - "step": 3292 - }, - { - "epoch": 2.5445066615176675, - "grad_norm": 0.00829948391765356, - "learning_rate": 5.716428602534878e-05, - "loss": 0.0038, - "step": 3293 - }, - { - "epoch": 2.545279011392161, - "grad_norm": 0.01430403534322977, - "learning_rate": 5.7137587614033785e-05, - "loss": 0.0035, - "step": 3294 - }, - { - "epoch": 2.5460513612666538, - "grad_norm": 0.024432053789496422, - "learning_rate": 5.71108871251395e-05, - "loss": 0.0039, - "step": 3295 - }, - { - "epoch": 2.5468237111411467, - "grad_norm": 0.009824837557971478, - "learning_rate": 5.7084184566437794e-05, - "loss": 0.0035, - "step": 3296 - }, - { - "epoch": 2.54759606101564, - "grad_norm": 0.018679151311516762, - "learning_rate": 5.705747994570114e-05, - "loss": 0.0041, - "step": 3297 - }, - { - "epoch": 2.5483684108901334, - "grad_norm": 0.018965978175401688, - "learning_rate": 5.70307732707026e-05, - "loss": 0.0038, - "step": 3298 - }, - { - "epoch": 2.5491407607646264, - "grad_norm": 0.01286038663238287, - "learning_rate": 5.700406454921585e-05, - "loss": 0.0038, - "step": 3299 - }, - { - "epoch": 2.5499131106391193, - "grad_norm": 0.011224661022424698, - "learning_rate": 5.6977353789015154e-05, - "loss": 0.0036, - "step": 3300 - }, - { - "epoch": 2.5506854605136127, - "grad_norm": 0.01046342495828867, - "learning_rate": 5.695064099787537e-05, - "loss": 0.0034, - "step": 3301 - }, - { - "epoch": 2.551457810388106, - "grad_norm": 0.008502374403178692, - "learning_rate": 5.692392618357193e-05, - "loss": 0.004, - "step": 3302 - }, - { - "epoch": 2.552230160262599, - "grad_norm": 0.01280928310006857, - "learning_rate": 5.6897209353880885e-05, - "loss": 0.0043, - "step": 3303 - }, - { - "epoch": 2.553002510137092, - "grad_norm": 0.01096299383789301, - "learning_rate": 5.687049051657885e-05, - "loss": 0.0039, - "step": 3304 - }, - { - "epoch": 2.5537748600115853, - "grad_norm": 0.007984945550560951, - "learning_rate": 5.684376967944306e-05, - "loss": 0.004, - "step": 3305 - }, - { - "epoch": 2.5545472098860786, - "grad_norm": 0.00990123488008976, - "learning_rate": 5.681704685025127e-05, - "loss": 0.0036, - "step": 3306 - }, - { - "epoch": 2.5553195597605716, - "grad_norm": 0.009490500204265118, - "learning_rate": 5.679032203678186e-05, - "loss": 0.0037, - "step": 3307 - }, - { - "epoch": 2.5560919096350645, - "grad_norm": 0.012009241618216038, - "learning_rate": 5.6763595246813786e-05, - "loss": 0.0038, - "step": 3308 - }, - { - "epoch": 2.556864259509558, - "grad_norm": 0.009753881022334099, - "learning_rate": 5.673686648812655e-05, - "loss": 0.0032, - "step": 3309 - }, - { - "epoch": 2.5576366093840512, - "grad_norm": 0.008679524064064026, - "learning_rate": 5.6710135768500294e-05, - "loss": 0.0038, - "step": 3310 - }, - { - "epoch": 2.558408959258544, - "grad_norm": 0.00896353181451559, - "learning_rate": 5.668340309571564e-05, - "loss": 0.0037, - "step": 3311 - }, - { - "epoch": 2.559181309133037, - "grad_norm": 0.010172102600336075, - "learning_rate": 5.665666847755383e-05, - "loss": 0.0038, - "step": 3312 - }, - { - "epoch": 2.5599536590075305, - "grad_norm": 0.022809471935033798, - "learning_rate": 5.6629931921796686e-05, - "loss": 0.0038, - "step": 3313 - }, - { - "epoch": 2.5607260088820234, - "grad_norm": 0.01311578880995512, - "learning_rate": 5.660319343622654e-05, - "loss": 0.0037, - "step": 3314 - }, - { - "epoch": 2.5614983587565168, - "grad_norm": 0.013067226856946945, - "learning_rate": 5.6576453028626354e-05, - "loss": 0.0039, - "step": 3315 - }, - { - "epoch": 2.5622707086310097, - "grad_norm": 0.01835964061319828, - "learning_rate": 5.654971070677961e-05, - "loss": 0.004, - "step": 3316 - }, - { - "epoch": 2.563043058505503, - "grad_norm": 0.034658029675483704, - "learning_rate": 5.652296647847032e-05, - "loss": 0.004, - "step": 3317 - }, - { - "epoch": 2.563815408379996, - "grad_norm": 0.009408431127667427, - "learning_rate": 5.649622035148312e-05, - "loss": 0.0042, - "step": 3318 - }, - { - "epoch": 2.5645877582544894, - "grad_norm": 0.026709269732236862, - "learning_rate": 5.6469472333603136e-05, - "loss": 0.0041, - "step": 3319 - }, - { - "epoch": 2.5653601081289823, - "grad_norm": 0.030673233792185783, - "learning_rate": 5.644272243261608e-05, - "loss": 0.004, - "step": 3320 - }, - { - "epoch": 2.5661324580034757, - "grad_norm": 0.011289527639746666, - "learning_rate": 5.6415970656308213e-05, - "loss": 0.0039, - "step": 3321 - }, - { - "epoch": 2.5669048078779686, - "grad_norm": 0.016593938693404198, - "learning_rate": 5.63892170124663e-05, - "loss": 0.0038, - "step": 3322 - }, - { - "epoch": 2.567677157752462, - "grad_norm": 0.024493860080838203, - "learning_rate": 5.6362461508877704e-05, - "loss": 0.0035, - "step": 3323 - }, - { - "epoch": 2.568449507626955, - "grad_norm": 0.011858409270644188, - "learning_rate": 5.6335704153330305e-05, - "loss": 0.0036, - "step": 3324 - }, - { - "epoch": 2.5692218575014483, - "grad_norm": 0.010189369320869446, - "learning_rate": 5.630894495361252e-05, - "loss": 0.0032, - "step": 3325 - }, - { - "epoch": 2.569994207375941, - "grad_norm": 0.009152532555162907, - "learning_rate": 5.62821839175133e-05, - "loss": 0.0032, - "step": 3326 - }, - { - "epoch": 2.5707665572504346, - "grad_norm": 0.009873981587588787, - "learning_rate": 5.6255421052822134e-05, - "loss": 0.0035, - "step": 3327 - }, - { - "epoch": 2.5715389071249275, - "grad_norm": 0.013396297581493855, - "learning_rate": 5.622865636732906e-05, - "loss": 0.0033, - "step": 3328 - }, - { - "epoch": 2.572311256999421, - "grad_norm": 0.011942288838326931, - "learning_rate": 5.620188986882461e-05, - "loss": 0.0037, - "step": 3329 - }, - { - "epoch": 2.573083606873914, - "grad_norm": 0.008207396604120731, - "learning_rate": 5.617512156509989e-05, - "loss": 0.0033, - "step": 3330 - }, - { - "epoch": 2.573855956748407, - "grad_norm": 0.011415604501962662, - "learning_rate": 5.614835146394648e-05, - "loss": 0.0038, - "step": 3331 - }, - { - "epoch": 2.5746283066229, - "grad_norm": 0.01788908615708351, - "learning_rate": 5.612157957315654e-05, - "loss": 0.0034, - "step": 3332 - }, - { - "epoch": 2.5754006564973935, - "grad_norm": 0.010203823447227478, - "learning_rate": 5.609480590052268e-05, - "loss": 0.0037, - "step": 3333 - }, - { - "epoch": 2.5761730063718864, - "grad_norm": 0.009772097691893578, - "learning_rate": 5.606803045383811e-05, - "loss": 0.0038, - "step": 3334 - }, - { - "epoch": 2.5769453562463798, - "grad_norm": 0.010885546915233135, - "learning_rate": 5.6041253240896495e-05, - "loss": 0.0038, - "step": 3335 - }, - { - "epoch": 2.5777177061208727, - "grad_norm": 0.017759766429662704, - "learning_rate": 5.6014474269492036e-05, - "loss": 0.0038, - "step": 3336 - }, - { - "epoch": 2.578490055995366, - "grad_norm": 0.013944578357040882, - "learning_rate": 5.598769354741945e-05, - "loss": 0.0038, - "step": 3337 - }, - { - "epoch": 2.579262405869859, - "grad_norm": 0.010490084066987038, - "learning_rate": 5.5960911082473956e-05, - "loss": 0.0043, - "step": 3338 - }, - { - "epoch": 2.580034755744352, - "grad_norm": 0.023333175107836723, - "learning_rate": 5.5934126882451266e-05, - "loss": 0.004, - "step": 3339 - }, - { - "epoch": 2.5808071056188453, - "grad_norm": 0.01074046641588211, - "learning_rate": 5.5907340955147645e-05, - "loss": 0.0037, - "step": 3340 - }, - { - "epoch": 2.5815794554933387, - "grad_norm": 0.030795851722359657, - "learning_rate": 5.588055330835981e-05, - "loss": 0.0036, - "step": 3341 - }, - { - "epoch": 2.5823518053678316, - "grad_norm": 0.009321062825620174, - "learning_rate": 5.5853763949884976e-05, - "loss": 0.0037, - "step": 3342 - }, - { - "epoch": 2.5831241552423245, - "grad_norm": 0.011110929772257805, - "learning_rate": 5.5826972887520935e-05, - "loss": 0.0041, - "step": 3343 - }, - { - "epoch": 2.583896505116818, - "grad_norm": 0.010194243863224983, - "learning_rate": 5.580018012906586e-05, - "loss": 0.0037, - "step": 3344 - }, - { - "epoch": 2.5846688549913113, - "grad_norm": 0.011587006971240044, - "learning_rate": 5.577338568231852e-05, - "loss": 0.0036, - "step": 3345 - }, - { - "epoch": 2.585441204865804, - "grad_norm": 0.00983353890478611, - "learning_rate": 5.574658955507811e-05, - "loss": 0.0038, - "step": 3346 - }, - { - "epoch": 2.586213554740297, - "grad_norm": 0.01402513962239027, - "learning_rate": 5.571979175514431e-05, - "loss": 0.0039, - "step": 3347 - }, - { - "epoch": 2.5869859046147905, - "grad_norm": 0.018349068239331245, - "learning_rate": 5.5692992290317366e-05, - "loss": 0.0043, - "step": 3348 - }, - { - "epoch": 2.587758254489284, - "grad_norm": 0.009055593982338905, - "learning_rate": 5.566619116839792e-05, - "loss": 0.0042, - "step": 3349 - }, - { - "epoch": 2.588530604363777, - "grad_norm": 0.01099521853029728, - "learning_rate": 5.5639388397187134e-05, - "loss": 0.0038, - "step": 3350 - }, - { - "epoch": 2.5893029542382697, - "grad_norm": 0.007961034774780273, - "learning_rate": 5.5612583984486666e-05, - "loss": 0.0037, - "step": 3351 - }, - { - "epoch": 2.590075304112763, - "grad_norm": 0.007924498990178108, - "learning_rate": 5.558577793809861e-05, - "loss": 0.004, - "step": 3352 - }, - { - "epoch": 2.5908476539872565, - "grad_norm": 0.010857324115931988, - "learning_rate": 5.555897026582555e-05, - "loss": 0.0037, - "step": 3353 - }, - { - "epoch": 2.5916200038617494, - "grad_norm": 0.011452377773821354, - "learning_rate": 5.553216097547058e-05, - "loss": 0.0042, - "step": 3354 - }, - { - "epoch": 2.5923923537362423, - "grad_norm": 0.010629558935761452, - "learning_rate": 5.550535007483724e-05, - "loss": 0.0039, - "step": 3355 - }, - { - "epoch": 2.5931647036107357, - "grad_norm": 0.011495045386254787, - "learning_rate": 5.547853757172951e-05, - "loss": 0.0035, - "step": 3356 - }, - { - "epoch": 2.593937053485229, - "grad_norm": 0.0070584677159786224, - "learning_rate": 5.545172347395186e-05, - "loss": 0.0034, - "step": 3357 - }, - { - "epoch": 2.594709403359722, - "grad_norm": 0.008315347135066986, - "learning_rate": 5.542490778930924e-05, - "loss": 0.0035, - "step": 3358 - }, - { - "epoch": 2.595481753234215, - "grad_norm": 0.016000311821699142, - "learning_rate": 5.539809052560706e-05, - "loss": 0.0039, - "step": 3359 - }, - { - "epoch": 2.5962541031087083, - "grad_norm": 0.016649696975946426, - "learning_rate": 5.537127169065116e-05, - "loss": 0.0036, - "step": 3360 - }, - { - "epoch": 2.5970264529832012, - "grad_norm": 0.011692811734974384, - "learning_rate": 5.534445129224786e-05, - "loss": 0.0038, - "step": 3361 - }, - { - "epoch": 2.5977988028576946, - "grad_norm": 0.008047428913414478, - "learning_rate": 5.531762933820391e-05, - "loss": 0.0037, - "step": 3362 - }, - { - "epoch": 2.5985711527321875, - "grad_norm": 0.009794252924621105, - "learning_rate": 5.529080583632656e-05, - "loss": 0.0036, - "step": 3363 - }, - { - "epoch": 2.599343502606681, - "grad_norm": 0.013193776831030846, - "learning_rate": 5.5263980794423484e-05, - "loss": 0.0032, - "step": 3364 - }, - { - "epoch": 2.600115852481174, - "grad_norm": 0.010043804533779621, - "learning_rate": 5.523715422030279e-05, - "loss": 0.0039, - "step": 3365 - }, - { - "epoch": 2.600888202355667, - "grad_norm": 0.013375181704759598, - "learning_rate": 5.5210326121773034e-05, - "loss": 0.004, - "step": 3366 - }, - { - "epoch": 2.60166055223016, - "grad_norm": 0.013488434255123138, - "learning_rate": 5.5183496506643264e-05, - "loss": 0.0035, - "step": 3367 - }, - { - "epoch": 2.6024329021046535, - "grad_norm": 0.01303744688630104, - "learning_rate": 5.51566653827229e-05, - "loss": 0.004, - "step": 3368 - }, - { - "epoch": 2.6032052519791464, - "grad_norm": 0.009145365096628666, - "learning_rate": 5.5129832757821834e-05, - "loss": 0.0037, - "step": 3369 - }, - { - "epoch": 2.60397760185364, - "grad_norm": 0.010411336086690426, - "learning_rate": 5.5102998639750424e-05, - "loss": 0.0039, - "step": 3370 - }, - { - "epoch": 2.6047499517281327, - "grad_norm": 0.008222127333283424, - "learning_rate": 5.507616303631941e-05, - "loss": 0.0036, - "step": 3371 - }, - { - "epoch": 2.605522301602626, - "grad_norm": 0.011100389994680882, - "learning_rate": 5.5049325955339984e-05, - "loss": 0.0039, - "step": 3372 - }, - { - "epoch": 2.606294651477119, - "grad_norm": 0.008225099183619022, - "learning_rate": 5.502248740462379e-05, - "loss": 0.0037, - "step": 3373 - }, - { - "epoch": 2.6070670013516124, - "grad_norm": 0.016636086627840996, - "learning_rate": 5.4995647391982875e-05, - "loss": 0.0042, - "step": 3374 - }, - { - "epoch": 2.6078393512261053, - "grad_norm": 0.007184876129031181, - "learning_rate": 5.49688059252297e-05, - "loss": 0.0037, - "step": 3375 - }, - { - "epoch": 2.6086117011005987, - "grad_norm": 0.008062731474637985, - "learning_rate": 5.49419630121772e-05, - "loss": 0.0036, - "step": 3376 - }, - { - "epoch": 2.6093840509750916, - "grad_norm": 0.012067383155226707, - "learning_rate": 5.491511866063865e-05, - "loss": 0.0038, - "step": 3377 - }, - { - "epoch": 2.610156400849585, - "grad_norm": 0.00804864801466465, - "learning_rate": 5.488827287842786e-05, - "loss": 0.0036, - "step": 3378 - }, - { - "epoch": 2.610928750724078, - "grad_norm": 0.009306215681135654, - "learning_rate": 5.486142567335894e-05, - "loss": 0.0038, - "step": 3379 - }, - { - "epoch": 2.6117011005985713, - "grad_norm": 0.012968027032911777, - "learning_rate": 5.483457705324646e-05, - "loss": 0.0037, - "step": 3380 - }, - { - "epoch": 2.6124734504730642, - "grad_norm": 0.009706765413284302, - "learning_rate": 5.480772702590544e-05, - "loss": 0.0039, - "step": 3381 - }, - { - "epoch": 2.6132458003475576, - "grad_norm": 0.009779158048331738, - "learning_rate": 5.478087559915123e-05, - "loss": 0.0039, - "step": 3382 - }, - { - "epoch": 2.6140181502220505, - "grad_norm": 0.008816637098789215, - "learning_rate": 5.4754022780799665e-05, - "loss": 0.0036, - "step": 3383 - }, - { - "epoch": 2.614790500096544, - "grad_norm": 0.010408301837742329, - "learning_rate": 5.4727168578666956e-05, - "loss": 0.0035, - "step": 3384 - }, - { - "epoch": 2.615562849971037, - "grad_norm": 0.00888033676892519, - "learning_rate": 5.470031300056968e-05, - "loss": 0.0035, - "step": 3385 - }, - { - "epoch": 2.6163351998455298, - "grad_norm": 0.00977409165352583, - "learning_rate": 5.4673456054324875e-05, - "loss": 0.0039, - "step": 3386 - }, - { - "epoch": 2.617107549720023, - "grad_norm": 0.01601114310324192, - "learning_rate": 5.4646597747749916e-05, - "loss": 0.0039, - "step": 3387 - }, - { - "epoch": 2.6178798995945165, - "grad_norm": 0.014116347767412663, - "learning_rate": 5.461973808866265e-05, - "loss": 0.0041, - "step": 3388 - }, - { - "epoch": 2.6186522494690094, - "grad_norm": 0.01782173663377762, - "learning_rate": 5.4592877084881254e-05, - "loss": 0.0044, - "step": 3389 - }, - { - "epoch": 2.6194245993435024, - "grad_norm": 0.00860170554369688, - "learning_rate": 5.45660147442243e-05, - "loss": 0.0035, - "step": 3390 - }, - { - "epoch": 2.6201969492179957, - "grad_norm": 0.017179779708385468, - "learning_rate": 5.4539151074510805e-05, - "loss": 0.0036, - "step": 3391 - }, - { - "epoch": 2.620969299092489, - "grad_norm": 0.010571127757430077, - "learning_rate": 5.45122860835601e-05, - "loss": 0.0039, - "step": 3392 - }, - { - "epoch": 2.621741648966982, - "grad_norm": 0.013925640843808651, - "learning_rate": 5.448541977919195e-05, - "loss": 0.0035, - "step": 3393 - }, - { - "epoch": 2.622513998841475, - "grad_norm": 0.030185796320438385, - "learning_rate": 5.4458552169226486e-05, - "loss": 0.0041, - "step": 3394 - }, - { - "epoch": 2.6232863487159683, - "grad_norm": 0.014250488951802254, - "learning_rate": 5.4431683261484224e-05, - "loss": 0.0038, - "step": 3395 - }, - { - "epoch": 2.6240586985904617, - "grad_norm": 0.009191480465233326, - "learning_rate": 5.440481306378604e-05, - "loss": 0.0039, - "step": 3396 - }, - { - "epoch": 2.6248310484649546, - "grad_norm": 0.028713863343000412, - "learning_rate": 5.4377941583953206e-05, - "loss": 0.0042, - "step": 3397 - }, - { - "epoch": 2.6256033983394476, - "grad_norm": 0.019584739580750465, - "learning_rate": 5.4351068829807375e-05, - "loss": 0.0037, - "step": 3398 - }, - { - "epoch": 2.626375748213941, - "grad_norm": 0.009171944111585617, - "learning_rate": 5.432419480917053e-05, - "loss": 0.0037, - "step": 3399 - }, - { - "epoch": 2.6271480980884343, - "grad_norm": 0.008588941767811775, - "learning_rate": 5.429731952986506e-05, - "loss": 0.0038, - "step": 3400 - }, - { - "epoch": 2.6279204479629272, - "grad_norm": 0.01289437897503376, - "learning_rate": 5.427044299971372e-05, - "loss": 0.004, - "step": 3401 - }, - { - "epoch": 2.62869279783742, - "grad_norm": 0.025774981826543808, - "learning_rate": 5.4243565226539613e-05, - "loss": 0.0044, - "step": 3402 - }, - { - "epoch": 2.6294651477119135, - "grad_norm": 0.011850577779114246, - "learning_rate": 5.4216686218166204e-05, - "loss": 0.004, - "step": 3403 - }, - { - "epoch": 2.630237497586407, - "grad_norm": 0.01651032827794552, - "learning_rate": 5.418980598241733e-05, - "loss": 0.0039, - "step": 3404 - }, - { - "epoch": 2.6310098474609, - "grad_norm": 0.01289384812116623, - "learning_rate": 5.416292452711716e-05, - "loss": 0.0035, - "step": 3405 - }, - { - "epoch": 2.6317821973353928, - "grad_norm": 0.018764130771160126, - "learning_rate": 5.413604186009027e-05, - "loss": 0.0035, - "step": 3406 - }, - { - "epoch": 2.632554547209886, - "grad_norm": 0.01137736439704895, - "learning_rate": 5.410915798916151e-05, - "loss": 0.004, - "step": 3407 - }, - { - "epoch": 2.633326897084379, - "grad_norm": 0.00818009115755558, - "learning_rate": 5.4082272922156176e-05, - "loss": 0.0038, - "step": 3408 - }, - { - "epoch": 2.6340992469588724, - "grad_norm": 0.017443781718611717, - "learning_rate": 5.405538666689982e-05, - "loss": 0.0038, - "step": 3409 - }, - { - "epoch": 2.6348715968333654, - "grad_norm": 0.008871911093592644, - "learning_rate": 5.402849923121839e-05, - "loss": 0.0037, - "step": 3410 - }, - { - "epoch": 2.6356439467078587, - "grad_norm": 0.00904800370335579, - "learning_rate": 5.400161062293819e-05, - "loss": 0.0036, - "step": 3411 - }, - { - "epoch": 2.6364162965823517, - "grad_norm": 0.00901111587882042, - "learning_rate": 5.3974720849885807e-05, - "loss": 0.0035, - "step": 3412 - }, - { - "epoch": 2.637188646456845, - "grad_norm": 0.012365750037133694, - "learning_rate": 5.394782991988826e-05, - "loss": 0.0042, - "step": 3413 - }, - { - "epoch": 2.637960996331338, - "grad_norm": 0.009294239804148674, - "learning_rate": 5.39209378407728e-05, - "loss": 0.0037, - "step": 3414 - }, - { - "epoch": 2.6387333462058313, - "grad_norm": 0.01763080060482025, - "learning_rate": 5.3894044620367056e-05, - "loss": 0.0037, - "step": 3415 - }, - { - "epoch": 2.6395056960803243, - "grad_norm": 0.03039529360830784, - "learning_rate": 5.386715026649906e-05, - "loss": 0.0034, - "step": 3416 - }, - { - "epoch": 2.6402780459548176, - "grad_norm": 0.009638811461627483, - "learning_rate": 5.384025478699702e-05, - "loss": 0.0039, - "step": 3417 - }, - { - "epoch": 2.6410503958293106, - "grad_norm": 0.016960902139544487, - "learning_rate": 5.381335818968962e-05, - "loss": 0.0034, - "step": 3418 - }, - { - "epoch": 2.641822745703804, - "grad_norm": 0.02310163341462612, - "learning_rate": 5.378646048240581e-05, - "loss": 0.0032, - "step": 3419 - }, - { - "epoch": 2.642595095578297, - "grad_norm": 0.020672015845775604, - "learning_rate": 5.3759561672974825e-05, - "loss": 0.004, - "step": 3420 - }, - { - "epoch": 2.6433674454527902, - "grad_norm": 0.010249595157802105, - "learning_rate": 5.373266176922629e-05, - "loss": 0.0038, - "step": 3421 - }, - { - "epoch": 2.644139795327283, - "grad_norm": 0.01078090351074934, - "learning_rate": 5.3705760778990114e-05, - "loss": 0.0032, - "step": 3422 - }, - { - "epoch": 2.6449121452017765, - "grad_norm": 0.009476094506680965, - "learning_rate": 5.36788587100965e-05, - "loss": 0.0035, - "step": 3423 - }, - { - "epoch": 2.6456844950762695, - "grad_norm": 0.008092152886092663, - "learning_rate": 5.365195557037602e-05, - "loss": 0.0034, - "step": 3424 - }, - { - "epoch": 2.646456844950763, - "grad_norm": 0.02135201171040535, - "learning_rate": 5.362505136765952e-05, - "loss": 0.004, - "step": 3425 - }, - { - "epoch": 2.6472291948252558, - "grad_norm": 0.00874929130077362, - "learning_rate": 5.359814610977816e-05, - "loss": 0.0037, - "step": 3426 - }, - { - "epoch": 2.648001544699749, - "grad_norm": 0.011419526301324368, - "learning_rate": 5.35712398045634e-05, - "loss": 0.0041, - "step": 3427 - }, - { - "epoch": 2.648773894574242, - "grad_norm": 0.01089134905487299, - "learning_rate": 5.3544332459847034e-05, - "loss": 0.0039, - "step": 3428 - }, - { - "epoch": 2.6495462444487354, - "grad_norm": 0.016542969271540642, - "learning_rate": 5.3517424083461134e-05, - "loss": 0.0033, - "step": 3429 - }, - { - "epoch": 2.6503185943232284, - "grad_norm": 0.011641754768788815, - "learning_rate": 5.349051468323807e-05, - "loss": 0.0037, - "step": 3430 - }, - { - "epoch": 2.6510909441977217, - "grad_norm": 0.00846849475055933, - "learning_rate": 5.346360426701051e-05, - "loss": 0.0037, - "step": 3431 - }, - { - "epoch": 2.6518632940722147, - "grad_norm": 0.013782123103737831, - "learning_rate": 5.343669284261147e-05, - "loss": 0.0035, - "step": 3432 - }, - { - "epoch": 2.6526356439467076, - "grad_norm": 0.014771669171750546, - "learning_rate": 5.340978041787417e-05, - "loss": 0.0044, - "step": 3433 - }, - { - "epoch": 2.653407993821201, - "grad_norm": 0.010918432846665382, - "learning_rate": 5.3382867000632174e-05, - "loss": 0.0034, - "step": 3434 - }, - { - "epoch": 2.6541803436956943, - "grad_norm": 0.008296050131320953, - "learning_rate": 5.335595259871934e-05, - "loss": 0.0041, - "step": 3435 - }, - { - "epoch": 2.6549526935701873, - "grad_norm": 0.013633402064442635, - "learning_rate": 5.33290372199698e-05, - "loss": 0.0037, - "step": 3436 - }, - { - "epoch": 2.65572504344468, - "grad_norm": 0.0165613554418087, - "learning_rate": 5.3302120872217955e-05, - "loss": 0.004, - "step": 3437 - }, - { - "epoch": 2.6564973933191736, - "grad_norm": 0.015982868149876595, - "learning_rate": 5.327520356329853e-05, - "loss": 0.0042, - "step": 3438 - }, - { - "epoch": 2.657269743193667, - "grad_norm": 0.015539965592324734, - "learning_rate": 5.3248285301046476e-05, - "loss": 0.0044, - "step": 3439 - }, - { - "epoch": 2.65804209306816, - "grad_norm": 0.016152702271938324, - "learning_rate": 5.3221366093297066e-05, - "loss": 0.0037, - "step": 3440 - }, - { - "epoch": 2.658814442942653, - "grad_norm": 0.014841979369521141, - "learning_rate": 5.3194445947885816e-05, - "loss": 0.0036, - "step": 3441 - }, - { - "epoch": 2.659586792817146, - "grad_norm": 0.010280294343829155, - "learning_rate": 5.316752487264853e-05, - "loss": 0.0044, - "step": 3442 - }, - { - "epoch": 2.6603591426916395, - "grad_norm": 0.008196298032999039, - "learning_rate": 5.314060287542132e-05, - "loss": 0.0032, - "step": 3443 - }, - { - "epoch": 2.6611314925661325, - "grad_norm": 0.01514330506324768, - "learning_rate": 5.311367996404049e-05, - "loss": 0.0039, - "step": 3444 - }, - { - "epoch": 2.6619038424406254, - "grad_norm": 0.00861449260264635, - "learning_rate": 5.308675614634264e-05, - "loss": 0.0036, - "step": 3445 - }, - { - "epoch": 2.6626761923151188, - "grad_norm": 0.010334886610507965, - "learning_rate": 5.305983143016469e-05, - "loss": 0.004, - "step": 3446 - }, - { - "epoch": 2.663448542189612, - "grad_norm": 0.01663089171051979, - "learning_rate": 5.303290582334372e-05, - "loss": 0.0038, - "step": 3447 - }, - { - "epoch": 2.664220892064105, - "grad_norm": 0.009869327768683434, - "learning_rate": 5.300597933371716e-05, - "loss": 0.0038, - "step": 3448 - }, - { - "epoch": 2.664993241938598, - "grad_norm": 0.011241073720157146, - "learning_rate": 5.297905196912266e-05, - "loss": 0.0039, - "step": 3449 - }, - { - "epoch": 2.6657655918130914, - "grad_norm": 0.02179298736155033, - "learning_rate": 5.29521237373981e-05, - "loss": 0.0035, - "step": 3450 - }, - { - "epoch": 2.6665379416875847, - "grad_norm": 0.01262400671839714, - "learning_rate": 5.292519464638166e-05, - "loss": 0.0033, - "step": 3451 - }, - { - "epoch": 2.6673102915620777, - "grad_norm": 0.008806941099464893, - "learning_rate": 5.289826470391174e-05, - "loss": 0.0042, - "step": 3452 - }, - { - "epoch": 2.6680826414365706, - "grad_norm": 0.01608050800859928, - "learning_rate": 5.287133391782699e-05, - "loss": 0.0041, - "step": 3453 - }, - { - "epoch": 2.668854991311064, - "grad_norm": 0.021533267572522163, - "learning_rate": 5.2844402295966346e-05, - "loss": 0.004, - "step": 3454 - }, - { - "epoch": 2.6696273411855573, - "grad_norm": 0.01042283233255148, - "learning_rate": 5.281746984616889e-05, - "loss": 0.0037, - "step": 3455 - }, - { - "epoch": 2.6703996910600503, - "grad_norm": 0.015076697804033756, - "learning_rate": 5.2790536576274055e-05, - "loss": 0.0041, - "step": 3456 - }, - { - "epoch": 2.671172040934543, - "grad_norm": 0.019963202998042107, - "learning_rate": 5.276360249412144e-05, - "loss": 0.004, - "step": 3457 - }, - { - "epoch": 2.6719443908090366, - "grad_norm": 0.0230875164270401, - "learning_rate": 5.2736667607550925e-05, - "loss": 0.0035, - "step": 3458 - }, - { - "epoch": 2.6727167406835295, - "grad_norm": 0.011236096732318401, - "learning_rate": 5.2709731924402596e-05, - "loss": 0.0037, - "step": 3459 - }, - { - "epoch": 2.673489090558023, - "grad_norm": 0.007921960204839706, - "learning_rate": 5.2682795452516784e-05, - "loss": 0.0034, - "step": 3460 - }, - { - "epoch": 2.674261440432516, - "grad_norm": 0.018090009689331055, - "learning_rate": 5.265585819973403e-05, - "loss": 0.0045, - "step": 3461 - }, - { - "epoch": 2.675033790307009, - "grad_norm": 0.019070502370595932, - "learning_rate": 5.2628920173895134e-05, - "loss": 0.0043, - "step": 3462 - }, - { - "epoch": 2.675806140181502, - "grad_norm": 0.015746183693408966, - "learning_rate": 5.26019813828411e-05, - "loss": 0.0035, - "step": 3463 - }, - { - "epoch": 2.6765784900559955, - "grad_norm": 0.010020890273153782, - "learning_rate": 5.257504183441316e-05, - "loss": 0.0031, - "step": 3464 - }, - { - "epoch": 2.6773508399304884, - "grad_norm": 0.027315745130181313, - "learning_rate": 5.254810153645277e-05, - "loss": 0.004, - "step": 3465 - }, - { - "epoch": 2.6781231898049818, - "grad_norm": 0.02392551489174366, - "learning_rate": 5.2521160496801566e-05, - "loss": 0.0037, - "step": 3466 - }, - { - "epoch": 2.6788955396794747, - "grad_norm": 0.009675184264779091, - "learning_rate": 5.2494218723301483e-05, - "loss": 0.0036, - "step": 3467 - }, - { - "epoch": 2.679667889553968, - "grad_norm": 0.021859876811504364, - "learning_rate": 5.2467276223794595e-05, - "loss": 0.0035, - "step": 3468 - }, - { - "epoch": 2.680440239428461, - "grad_norm": 0.01790783368051052, - "learning_rate": 5.244033300612321e-05, - "loss": 0.0039, - "step": 3469 - }, - { - "epoch": 2.6812125893029544, - "grad_norm": 0.012461425736546516, - "learning_rate": 5.241338907812986e-05, - "loss": 0.0032, - "step": 3470 - }, - { - "epoch": 2.6819849391774473, - "grad_norm": 0.00857500173151493, - "learning_rate": 5.2386444447657256e-05, - "loss": 0.0034, - "step": 3471 - }, - { - "epoch": 2.6827572890519407, - "grad_norm": 0.011089037172496319, - "learning_rate": 5.235949912254834e-05, - "loss": 0.0042, - "step": 3472 - }, - { - "epoch": 2.6835296389264336, - "grad_norm": 0.01732746884226799, - "learning_rate": 5.233255311064625e-05, - "loss": 0.0041, - "step": 3473 - }, - { - "epoch": 2.684301988800927, - "grad_norm": 0.026455463841557503, - "learning_rate": 5.2305606419794305e-05, - "loss": 0.004, - "step": 3474 - }, - { - "epoch": 2.68507433867542, - "grad_norm": 0.006931504234671593, - "learning_rate": 5.227865905783603e-05, - "loss": 0.0033, - "step": 3475 - }, - { - "epoch": 2.6858466885499133, - "grad_norm": 0.022621190175414085, - "learning_rate": 5.225171103261519e-05, - "loss": 0.0039, - "step": 3476 - }, - { - "epoch": 2.686619038424406, - "grad_norm": 0.015190225094556808, - "learning_rate": 5.2224762351975655e-05, - "loss": 0.0035, - "step": 3477 - }, - { - "epoch": 2.6873913882988996, - "grad_norm": 0.010020987130701542, - "learning_rate": 5.2197813023761564e-05, - "loss": 0.0042, - "step": 3478 - }, - { - "epoch": 2.6881637381733925, - "grad_norm": 0.009694269858300686, - "learning_rate": 5.217086305581722e-05, - "loss": 0.0038, - "step": 3479 - }, - { - "epoch": 2.6889360880478854, - "grad_norm": 0.014760488644242287, - "learning_rate": 5.2143912455987075e-05, - "loss": 0.0039, - "step": 3480 - }, - { - "epoch": 2.689708437922379, - "grad_norm": 0.008779791183769703, - "learning_rate": 5.211696123211585e-05, - "loss": 0.0037, - "step": 3481 - }, - { - "epoch": 2.690480787796872, - "grad_norm": 0.015196164138615131, - "learning_rate": 5.209000939204832e-05, - "loss": 0.0041, - "step": 3482 - }, - { - "epoch": 2.691253137671365, - "grad_norm": 0.011902782134711742, - "learning_rate": 5.206305694362959e-05, - "loss": 0.004, - "step": 3483 - }, - { - "epoch": 2.692025487545858, - "grad_norm": 0.008937469683587551, - "learning_rate": 5.2036103894704825e-05, - "loss": 0.0034, - "step": 3484 - }, - { - "epoch": 2.6927978374203514, - "grad_norm": 0.010444491170346737, - "learning_rate": 5.20091502531194e-05, - "loss": 0.0038, - "step": 3485 - }, - { - "epoch": 2.6935701872948448, - "grad_norm": 0.014897272922098637, - "learning_rate": 5.1982196026718896e-05, - "loss": 0.0035, - "step": 3486 - }, - { - "epoch": 2.6943425371693377, - "grad_norm": 0.011194158345460892, - "learning_rate": 5.195524122334903e-05, - "loss": 0.004, - "step": 3487 - }, - { - "epoch": 2.6951148870438306, - "grad_norm": 0.0157900582998991, - "learning_rate": 5.1928285850855676e-05, - "loss": 0.0043, - "step": 3488 - }, - { - "epoch": 2.695887236918324, - "grad_norm": 0.00894899107515812, - "learning_rate": 5.190132991708492e-05, - "loss": 0.0032, - "step": 3489 - }, - { - "epoch": 2.6966595867928174, - "grad_norm": 0.00943822506815195, - "learning_rate": 5.187437342988295e-05, - "loss": 0.0037, - "step": 3490 - }, - { - "epoch": 2.6974319366673103, - "grad_norm": 0.009946034289896488, - "learning_rate": 5.184741639709618e-05, - "loss": 0.0043, - "step": 3491 - }, - { - "epoch": 2.698204286541803, - "grad_norm": 0.010902882553637028, - "learning_rate": 5.1820458826571126e-05, - "loss": 0.0042, - "step": 3492 - }, - { - "epoch": 2.6989766364162966, - "grad_norm": 0.011112508364021778, - "learning_rate": 5.1793500726154506e-05, - "loss": 0.0038, - "step": 3493 - }, - { - "epoch": 2.69974898629079, - "grad_norm": 0.010520882904529572, - "learning_rate": 5.176654210369315e-05, - "loss": 0.0037, - "step": 3494 - }, - { - "epoch": 2.700521336165283, - "grad_norm": 0.01588641107082367, - "learning_rate": 5.173958296703408e-05, - "loss": 0.0042, - "step": 3495 - }, - { - "epoch": 2.701293686039776, - "grad_norm": 0.008596131578087807, - "learning_rate": 5.1712623324024444e-05, - "loss": 0.0034, - "step": 3496 - }, - { - "epoch": 2.702066035914269, - "grad_norm": 0.009026461280882359, - "learning_rate": 5.1685663182511535e-05, - "loss": 0.0037, - "step": 3497 - }, - { - "epoch": 2.7028383857887626, - "grad_norm": 0.01139860600233078, - "learning_rate": 5.165870255034281e-05, - "loss": 0.0039, - "step": 3498 - }, - { - "epoch": 2.7036107356632555, - "grad_norm": 0.010759508237242699, - "learning_rate": 5.1631741435365856e-05, - "loss": 0.0042, - "step": 3499 - }, - { - "epoch": 2.7043830855377484, - "grad_norm": 0.011585521511733532, - "learning_rate": 5.160477984542839e-05, - "loss": 0.0037, - "step": 3500 - }, - { - "epoch": 2.705155435412242, - "grad_norm": 0.008951091207563877, - "learning_rate": 5.157781778837829e-05, - "loss": 0.0038, - "step": 3501 - }, - { - "epoch": 2.705927785286735, - "grad_norm": 0.019819583743810654, - "learning_rate": 5.1550855272063545e-05, - "loss": 0.0041, - "step": 3502 - }, - { - "epoch": 2.706700135161228, - "grad_norm": 0.010994885116815567, - "learning_rate": 5.152389230433232e-05, - "loss": 0.0037, - "step": 3503 - }, - { - "epoch": 2.707472485035721, - "grad_norm": 0.010090935043990612, - "learning_rate": 5.149692889303287e-05, - "loss": 0.0036, - "step": 3504 - }, - { - "epoch": 2.7082448349102144, - "grad_norm": 0.008905458264052868, - "learning_rate": 5.146996504601357e-05, - "loss": 0.0037, - "step": 3505 - }, - { - "epoch": 2.7090171847847073, - "grad_norm": 0.008558421395719051, - "learning_rate": 5.1443000771122995e-05, - "loss": 0.0038, - "step": 3506 - }, - { - "epoch": 2.7097895346592007, - "grad_norm": 0.015720965340733528, - "learning_rate": 5.1416036076209725e-05, - "loss": 0.0036, - "step": 3507 - }, - { - "epoch": 2.7105618845336936, - "grad_norm": 0.0087439501658082, - "learning_rate": 5.138907096912261e-05, - "loss": 0.0037, - "step": 3508 - }, - { - "epoch": 2.711334234408187, - "grad_norm": 0.009726365096867085, - "learning_rate": 5.1362105457710477e-05, - "loss": 0.0035, - "step": 3509 - }, - { - "epoch": 2.71210658428268, - "grad_norm": 0.014834542758762836, - "learning_rate": 5.133513954982235e-05, - "loss": 0.0039, - "step": 3510 - }, - { - "epoch": 2.7128789341571733, - "grad_norm": 0.01073391456156969, - "learning_rate": 5.130817325330738e-05, - "loss": 0.0034, - "step": 3511 - }, - { - "epoch": 2.7136512840316662, - "grad_norm": 0.011810685507953167, - "learning_rate": 5.128120657601477e-05, - "loss": 0.004, - "step": 3512 - }, - { - "epoch": 2.7144236339061596, - "grad_norm": 0.009675235487520695, - "learning_rate": 5.125423952579389e-05, - "loss": 0.0032, - "step": 3513 - }, - { - "epoch": 2.7151959837806525, - "grad_norm": 0.009556422010064125, - "learning_rate": 5.122727211049421e-05, - "loss": 0.0036, - "step": 3514 - }, - { - "epoch": 2.715968333655146, - "grad_norm": 0.00851303432136774, - "learning_rate": 5.120030433796524e-05, - "loss": 0.0037, - "step": 3515 - }, - { - "epoch": 2.716740683529639, - "grad_norm": 0.01430381927639246, - "learning_rate": 5.11733362160567e-05, - "loss": 0.0037, - "step": 3516 - }, - { - "epoch": 2.717513033404132, - "grad_norm": 0.011229856871068478, - "learning_rate": 5.114636775261833e-05, - "loss": 0.0036, - "step": 3517 - }, - { - "epoch": 2.718285383278625, - "grad_norm": 0.01209938246756792, - "learning_rate": 5.111939895550001e-05, - "loss": 0.0039, - "step": 3518 - }, - { - "epoch": 2.7190577331531185, - "grad_norm": 0.009382943622767925, - "learning_rate": 5.109242983255171e-05, - "loss": 0.0039, - "step": 3519 - }, - { - "epoch": 2.7198300830276114, - "grad_norm": 0.021627336740493774, - "learning_rate": 5.106546039162348e-05, - "loss": 0.0043, - "step": 3520 - }, - { - "epoch": 2.720602432902105, - "grad_norm": 0.011568021960556507, - "learning_rate": 5.103849064056546e-05, - "loss": 0.0035, - "step": 3521 - }, - { - "epoch": 2.7213747827765977, - "grad_norm": 0.011675220914185047, - "learning_rate": 5.1011520587227924e-05, - "loss": 0.004, - "step": 3522 - }, - { - "epoch": 2.722147132651091, - "grad_norm": 0.008323492482304573, - "learning_rate": 5.0984550239461184e-05, - "loss": 0.0036, - "step": 3523 - }, - { - "epoch": 2.722919482525584, - "grad_norm": 0.009397026151418686, - "learning_rate": 5.095757960511566e-05, - "loss": 0.0033, - "step": 3524 - }, - { - "epoch": 2.7236918324000774, - "grad_norm": 0.007559982128441334, - "learning_rate": 5.093060869204185e-05, - "loss": 0.0033, - "step": 3525 - }, - { - "epoch": 2.7244641822745703, - "grad_norm": 0.009094711393117905, - "learning_rate": 5.090363750809033e-05, - "loss": 0.0036, - "step": 3526 - }, - { - "epoch": 2.7252365321490633, - "grad_norm": 0.009485473856329918, - "learning_rate": 5.0876666061111774e-05, - "loss": 0.003, - "step": 3527 - }, - { - "epoch": 2.7260088820235566, - "grad_norm": 0.012808065861463547, - "learning_rate": 5.084969435895691e-05, - "loss": 0.0035, - "step": 3528 - }, - { - "epoch": 2.72678123189805, - "grad_norm": 0.009124809876084328, - "learning_rate": 5.082272240947654e-05, - "loss": 0.0036, - "step": 3529 - }, - { - "epoch": 2.727553581772543, - "grad_norm": 0.010432286188006401, - "learning_rate": 5.079575022052157e-05, - "loss": 0.0036, - "step": 3530 - }, - { - "epoch": 2.728325931647036, - "grad_norm": 0.011838859878480434, - "learning_rate": 5.0768777799942934e-05, - "loss": 0.0042, - "step": 3531 - }, - { - "epoch": 2.7290982815215292, - "grad_norm": 0.017166869714856148, - "learning_rate": 5.0741805155591634e-05, - "loss": 0.0044, - "step": 3532 - }, - { - "epoch": 2.7298706313960226, - "grad_norm": 0.010602729395031929, - "learning_rate": 5.0714832295318815e-05, - "loss": 0.0038, - "step": 3533 - }, - { - "epoch": 2.7306429812705155, - "grad_norm": 0.012442387640476227, - "learning_rate": 5.068785922697558e-05, - "loss": 0.0037, - "step": 3534 - }, - { - "epoch": 2.7314153311450085, - "grad_norm": 0.008895857259631157, - "learning_rate": 5.066088595841313e-05, - "loss": 0.0035, - "step": 3535 - }, - { - "epoch": 2.732187681019502, - "grad_norm": 0.007561968639492989, - "learning_rate": 5.063391249748275e-05, - "loss": 0.0038, - "step": 3536 - }, - { - "epoch": 2.732960030893995, - "grad_norm": 0.011665033176541328, - "learning_rate": 5.0606938852035756e-05, - "loss": 0.0034, - "step": 3537 - }, - { - "epoch": 2.733732380768488, - "grad_norm": 0.013502943329513073, - "learning_rate": 5.057996502992355e-05, - "loss": 0.0034, - "step": 3538 - }, - { - "epoch": 2.734504730642981, - "grad_norm": 0.009370699524879456, - "learning_rate": 5.055299103899751e-05, - "loss": 0.0031, - "step": 3539 - }, - { - "epoch": 2.7352770805174744, - "grad_norm": 0.013993117958307266, - "learning_rate": 5.052601688710914e-05, - "loss": 0.0035, - "step": 3540 - }, - { - "epoch": 2.736049430391968, - "grad_norm": 0.013802869245409966, - "learning_rate": 5.049904258210999e-05, - "loss": 0.0039, - "step": 3541 - }, - { - "epoch": 2.7368217802664607, - "grad_norm": 0.0092271463945508, - "learning_rate": 5.047206813185158e-05, - "loss": 0.004, - "step": 3542 - }, - { - "epoch": 2.7375941301409537, - "grad_norm": 0.014459514059126377, - "learning_rate": 5.044509354418555e-05, - "loss": 0.0039, - "step": 3543 - }, - { - "epoch": 2.738366480015447, - "grad_norm": 0.014967890456318855, - "learning_rate": 5.041811882696356e-05, - "loss": 0.004, - "step": 3544 - }, - { - "epoch": 2.7391388298899404, - "grad_norm": 0.007593820337206125, - "learning_rate": 5.039114398803726e-05, - "loss": 0.0039, - "step": 3545 - }, - { - "epoch": 2.7399111797644333, - "grad_norm": 0.009915469214320183, - "learning_rate": 5.0364169035258414e-05, - "loss": 0.0037, - "step": 3546 - }, - { - "epoch": 2.7406835296389263, - "grad_norm": 0.019180258736014366, - "learning_rate": 5.033719397647875e-05, - "loss": 0.0039, - "step": 3547 - }, - { - "epoch": 2.7414558795134196, - "grad_norm": 0.01391974650323391, - "learning_rate": 5.031021881955007e-05, - "loss": 0.0037, - "step": 3548 - }, - { - "epoch": 2.742228229387913, - "grad_norm": 0.007851621136069298, - "learning_rate": 5.028324357232419e-05, - "loss": 0.0038, - "step": 3549 - }, - { - "epoch": 2.743000579262406, - "grad_norm": 0.016175515949726105, - "learning_rate": 5.025626824265294e-05, - "loss": 0.0036, - "step": 3550 - }, - { - "epoch": 2.743772929136899, - "grad_norm": 0.009228870272636414, - "learning_rate": 5.022929283838821e-05, - "loss": 0.0038, - "step": 3551 - }, - { - "epoch": 2.7445452790113922, - "grad_norm": 0.022975003346800804, - "learning_rate": 5.020231736738187e-05, - "loss": 0.004, - "step": 3552 - }, - { - "epoch": 2.745317628885885, - "grad_norm": 0.013293186202645302, - "learning_rate": 5.0175341837485835e-05, - "loss": 0.0039, - "step": 3553 - }, - { - "epoch": 2.7460899787603785, - "grad_norm": 0.020917177200317383, - "learning_rate": 5.014836625655201e-05, - "loss": 0.0035, - "step": 3554 - }, - { - "epoch": 2.7468623286348715, - "grad_norm": 0.027095790952444077, - "learning_rate": 5.0121390632432376e-05, - "loss": 0.0038, - "step": 3555 - }, - { - "epoch": 2.747634678509365, - "grad_norm": 0.027216967195272446, - "learning_rate": 5.0094414972978857e-05, - "loss": 0.0042, - "step": 3556 - }, - { - "epoch": 2.7484070283838578, - "grad_norm": 0.008903938345611095, - "learning_rate": 5.0067439286043414e-05, - "loss": 0.0038, - "step": 3557 - }, - { - "epoch": 2.749179378258351, - "grad_norm": 0.0110005559399724, - "learning_rate": 5.0040463579478036e-05, - "loss": 0.004, - "step": 3558 - }, - { - "epoch": 2.749951728132844, - "grad_norm": 0.026676757261157036, - "learning_rate": 5.001348786113468e-05, - "loss": 0.0038, - "step": 3559 - }, - { - "epoch": 2.7507240780073374, - "grad_norm": 0.0195772722363472, - "learning_rate": 4.998651213886533e-05, - "loss": 0.0041, - "step": 3560 - }, - { - "epoch": 2.7514964278818304, - "grad_norm": 0.015704307705163956, - "learning_rate": 4.995953642052197e-05, - "loss": 0.0036, - "step": 3561 - }, - { - "epoch": 2.7522687777563237, - "grad_norm": 0.009365121833980083, - "learning_rate": 4.993256071395659e-05, - "loss": 0.0041, - "step": 3562 - }, - { - "epoch": 2.7530411276308167, - "grad_norm": 0.013832955621182919, - "learning_rate": 4.990558502702115e-05, - "loss": 0.0032, - "step": 3563 - }, - { - "epoch": 2.75381347750531, - "grad_norm": 0.01772594451904297, - "learning_rate": 4.987860936756762e-05, - "loss": 0.0041, - "step": 3564 - }, - { - "epoch": 2.754585827379803, - "grad_norm": 0.01973775029182434, - "learning_rate": 4.985163374344799e-05, - "loss": 0.0039, - "step": 3565 - }, - { - "epoch": 2.7553581772542963, - "grad_norm": 0.006610201671719551, - "learning_rate": 4.9824658162514183e-05, - "loss": 0.0033, - "step": 3566 - }, - { - "epoch": 2.7561305271287893, - "grad_norm": 0.01677447371184826, - "learning_rate": 4.9797682632618134e-05, - "loss": 0.0037, - "step": 3567 - }, - { - "epoch": 2.7569028770032826, - "grad_norm": 0.019037803635001183, - "learning_rate": 4.9770707161611806e-05, - "loss": 0.0036, - "step": 3568 - }, - { - "epoch": 2.7576752268777756, - "grad_norm": 0.011565999127924442, - "learning_rate": 4.974373175734707e-05, - "loss": 0.0034, - "step": 3569 - }, - { - "epoch": 2.758447576752269, - "grad_norm": 0.019641969352960587, - "learning_rate": 4.9716756427675816e-05, - "loss": 0.0036, - "step": 3570 - }, - { - "epoch": 2.759219926626762, - "grad_norm": 0.017786724492907524, - "learning_rate": 4.9689781180449935e-05, - "loss": 0.0038, - "step": 3571 - }, - { - "epoch": 2.7599922765012552, - "grad_norm": 0.010379564017057419, - "learning_rate": 4.966280602352127e-05, - "loss": 0.004, - "step": 3572 - }, - { - "epoch": 2.760764626375748, - "grad_norm": 0.014459546655416489, - "learning_rate": 4.963583096474159e-05, - "loss": 0.0039, - "step": 3573 - }, - { - "epoch": 2.761536976250241, - "grad_norm": 0.009286866523325443, - "learning_rate": 4.960885601196274e-05, - "loss": 0.0038, - "step": 3574 - }, - { - "epoch": 2.7623093261247345, - "grad_norm": 0.008317566476762295, - "learning_rate": 4.958188117303646e-05, - "loss": 0.0037, - "step": 3575 - }, - { - "epoch": 2.763081675999228, - "grad_norm": 0.009271105751395226, - "learning_rate": 4.955490645581446e-05, - "loss": 0.0036, - "step": 3576 - }, - { - "epoch": 2.7638540258737208, - "grad_norm": 0.012317502871155739, - "learning_rate": 4.952793186814842e-05, - "loss": 0.0042, - "step": 3577 - }, - { - "epoch": 2.7646263757482137, - "grad_norm": 0.015943629667162895, - "learning_rate": 4.950095741789003e-05, - "loss": 0.0041, - "step": 3578 - }, - { - "epoch": 2.765398725622707, - "grad_norm": 0.011819128878414631, - "learning_rate": 4.9473983112890865e-05, - "loss": 0.0037, - "step": 3579 - }, - { - "epoch": 2.7661710754972004, - "grad_norm": 0.012603342533111572, - "learning_rate": 4.9447008961002495e-05, - "loss": 0.0038, - "step": 3580 - }, - { - "epoch": 2.7669434253716934, - "grad_norm": 0.014154767617583275, - "learning_rate": 4.942003497007647e-05, - "loss": 0.004, - "step": 3581 - }, - { - "epoch": 2.7677157752461863, - "grad_norm": 0.01127578690648079, - "learning_rate": 4.939306114796426e-05, - "loss": 0.0039, - "step": 3582 - }, - { - "epoch": 2.7684881251206797, - "grad_norm": 0.009542621672153473, - "learning_rate": 4.936608750251726e-05, - "loss": 0.0037, - "step": 3583 - }, - { - "epoch": 2.769260474995173, - "grad_norm": 0.008893107995390892, - "learning_rate": 4.933911404158688e-05, - "loss": 0.004, - "step": 3584 - }, - { - "epoch": 2.770032824869666, - "grad_norm": 0.01047214213758707, - "learning_rate": 4.931214077302445e-05, - "loss": 0.0034, - "step": 3585 - }, - { - "epoch": 2.770805174744159, - "grad_norm": 0.011454110965132713, - "learning_rate": 4.928516770468119e-05, - "loss": 0.0038, - "step": 3586 - }, - { - "epoch": 2.7715775246186523, - "grad_norm": 0.009302152320742607, - "learning_rate": 4.925819484440836e-05, - "loss": 0.0036, - "step": 3587 - }, - { - "epoch": 2.7723498744931456, - "grad_norm": 0.008845715783536434, - "learning_rate": 4.923122220005709e-05, - "loss": 0.0035, - "step": 3588 - }, - { - "epoch": 2.7731222243676386, - "grad_norm": 0.011453505605459213, - "learning_rate": 4.920424977947844e-05, - "loss": 0.0038, - "step": 3589 - }, - { - "epoch": 2.7738945742421315, - "grad_norm": 0.014364033006131649, - "learning_rate": 4.9177277590523464e-05, - "loss": 0.0038, - "step": 3590 - }, - { - "epoch": 2.774666924116625, - "grad_norm": 0.009715999476611614, - "learning_rate": 4.9150305641043096e-05, - "loss": 0.0037, - "step": 3591 - }, - { - "epoch": 2.7754392739911182, - "grad_norm": 0.011447625234723091, - "learning_rate": 4.912333393888824e-05, - "loss": 0.0039, - "step": 3592 - }, - { - "epoch": 2.776211623865611, - "grad_norm": 0.01904742605984211, - "learning_rate": 4.909636249190968e-05, - "loss": 0.003, - "step": 3593 - }, - { - "epoch": 2.776983973740104, - "grad_norm": 0.010695524513721466, - "learning_rate": 4.906939130795815e-05, - "loss": 0.0046, - "step": 3594 - }, - { - "epoch": 2.7777563236145975, - "grad_norm": 0.007318440359085798, - "learning_rate": 4.904242039488435e-05, - "loss": 0.0034, - "step": 3595 - }, - { - "epoch": 2.778528673489091, - "grad_norm": 0.007491338532418013, - "learning_rate": 4.901544976053882e-05, - "loss": 0.0037, - "step": 3596 - }, - { - "epoch": 2.7793010233635838, - "grad_norm": 0.012006079778075218, - "learning_rate": 4.8988479412772074e-05, - "loss": 0.0039, - "step": 3597 - }, - { - "epoch": 2.7800733732380767, - "grad_norm": 0.009866135194897652, - "learning_rate": 4.896150935943454e-05, - "loss": 0.0039, - "step": 3598 - }, - { - "epoch": 2.78084572311257, - "grad_norm": 0.012143891304731369, - "learning_rate": 4.893453960837653e-05, - "loss": 0.0039, - "step": 3599 - }, - { - "epoch": 2.781618072987063, - "grad_norm": 0.008424376137554646, - "learning_rate": 4.8907570167448294e-05, - "loss": 0.0037, - "step": 3600 - }, - { - "epoch": 2.7823904228615564, - "grad_norm": 0.012690001167356968, - "learning_rate": 4.8880601044499984e-05, - "loss": 0.0035, - "step": 3601 - }, - { - "epoch": 2.7831627727360493, - "grad_norm": 0.006906248163431883, - "learning_rate": 4.885363224738168e-05, - "loss": 0.003, - "step": 3602 - }, - { - "epoch": 2.7839351226105427, - "grad_norm": 0.009611250832676888, - "learning_rate": 4.8826663783943314e-05, - "loss": 0.0042, - "step": 3603 - }, - { - "epoch": 2.7847074724850356, - "grad_norm": 0.01498124934732914, - "learning_rate": 4.8799695662034764e-05, - "loss": 0.0039, - "step": 3604 - }, - { - "epoch": 2.785479822359529, - "grad_norm": 0.009965925477445126, - "learning_rate": 4.877272788950582e-05, - "loss": 0.0037, - "step": 3605 - }, - { - "epoch": 2.786252172234022, - "grad_norm": 0.009315697476267815, - "learning_rate": 4.874576047420612e-05, - "loss": 0.0043, - "step": 3606 - }, - { - "epoch": 2.7870245221085153, - "grad_norm": 0.008523601107299328, - "learning_rate": 4.8718793423985235e-05, - "loss": 0.0033, - "step": 3607 - }, - { - "epoch": 2.787796871983008, - "grad_norm": 0.010219255462288857, - "learning_rate": 4.869182674669264e-05, - "loss": 0.0039, - "step": 3608 - }, - { - "epoch": 2.7885692218575016, - "grad_norm": 0.011321432888507843, - "learning_rate": 4.866486045017766e-05, - "loss": 0.0035, - "step": 3609 - }, - { - "epoch": 2.7893415717319945, - "grad_norm": 0.009535894729197025, - "learning_rate": 4.8637894542289535e-05, - "loss": 0.0035, - "step": 3610 - }, - { - "epoch": 2.790113921606488, - "grad_norm": 0.011316893622279167, - "learning_rate": 4.8610929030877405e-05, - "loss": 0.0034, - "step": 3611 - }, - { - "epoch": 2.790886271480981, - "grad_norm": 0.009314599446952343, - "learning_rate": 4.858396392379028e-05, - "loss": 0.0038, - "step": 3612 - }, - { - "epoch": 2.791658621355474, - "grad_norm": 0.008782761171460152, - "learning_rate": 4.8556999228877023e-05, - "loss": 0.0032, - "step": 3613 - }, - { - "epoch": 2.792430971229967, - "grad_norm": 0.010352780111134052, - "learning_rate": 4.8530034953986426e-05, - "loss": 0.0038, - "step": 3614 - }, - { - "epoch": 2.7932033211044605, - "grad_norm": 0.013563552871346474, - "learning_rate": 4.850307110696715e-05, - "loss": 0.0041, - "step": 3615 - }, - { - "epoch": 2.7939756709789534, - "grad_norm": 0.01034247875213623, - "learning_rate": 4.8476107695667686e-05, - "loss": 0.0034, - "step": 3616 - }, - { - "epoch": 2.7947480208534468, - "grad_norm": 0.008694970980286598, - "learning_rate": 4.844914472793646e-05, - "loss": 0.0039, - "step": 3617 - }, - { - "epoch": 2.7955203707279397, - "grad_norm": 0.02558278851211071, - "learning_rate": 4.842218221162174e-05, - "loss": 0.0038, - "step": 3618 - }, - { - "epoch": 2.796292720602433, - "grad_norm": 0.013983160257339478, - "learning_rate": 4.839522015457162e-05, - "loss": 0.0045, - "step": 3619 - }, - { - "epoch": 2.797065070476926, - "grad_norm": 0.01667140983045101, - "learning_rate": 4.836825856463416e-05, - "loss": 0.0038, - "step": 3620 - }, - { - "epoch": 2.7978374203514194, - "grad_norm": 0.008836453780531883, - "learning_rate": 4.834129744965719e-05, - "loss": 0.0039, - "step": 3621 - }, - { - "epoch": 2.7986097702259123, - "grad_norm": 0.014575183391571045, - "learning_rate": 4.831433681748847e-05, - "loss": 0.0036, - "step": 3622 - }, - { - "epoch": 2.7993821201004057, - "grad_norm": 0.026069698855280876, - "learning_rate": 4.828737667597557e-05, - "loss": 0.0044, - "step": 3623 - }, - { - "epoch": 2.8001544699748986, - "grad_norm": 0.00918895099312067, - "learning_rate": 4.8260417032965915e-05, - "loss": 0.0037, - "step": 3624 - }, - { - "epoch": 2.8009268198493915, - "grad_norm": 0.019233256578445435, - "learning_rate": 4.8233457896306853e-05, - "loss": 0.0033, - "step": 3625 - }, - { - "epoch": 2.801699169723885, - "grad_norm": 0.017233747988939285, - "learning_rate": 4.8206499273845505e-05, - "loss": 0.0039, - "step": 3626 - }, - { - "epoch": 2.8024715195983783, - "grad_norm": 0.018169382587075233, - "learning_rate": 4.817954117342887e-05, - "loss": 0.004, - "step": 3627 - }, - { - "epoch": 2.803243869472871, - "grad_norm": 0.011694537475705147, - "learning_rate": 4.8152583602903846e-05, - "loss": 0.0037, - "step": 3628 - }, - { - "epoch": 2.804016219347364, - "grad_norm": 0.010528466664254665, - "learning_rate": 4.812562657011706e-05, - "loss": 0.0039, - "step": 3629 - }, - { - "epoch": 2.8047885692218575, - "grad_norm": 0.02013496682047844, - "learning_rate": 4.80986700829151e-05, - "loss": 0.0037, - "step": 3630 - }, - { - "epoch": 2.805560919096351, - "grad_norm": 0.023819101974368095, - "learning_rate": 4.807171414914432e-05, - "loss": 0.004, - "step": 3631 - }, - { - "epoch": 2.806333268970844, - "grad_norm": 0.04680509865283966, - "learning_rate": 4.804475877665099e-05, - "loss": 0.004, - "step": 3632 - }, - { - "epoch": 2.8071056188453367, - "grad_norm": 0.0160503126680851, - "learning_rate": 4.8017803973281115e-05, - "loss": 0.0035, - "step": 3633 - }, - { - "epoch": 2.80787796871983, - "grad_norm": 0.02833872102200985, - "learning_rate": 4.79908497468806e-05, - "loss": 0.005, - "step": 3634 - }, - { - "epoch": 2.8086503185943235, - "grad_norm": 0.011657784692943096, - "learning_rate": 4.79638961052952e-05, - "loss": 0.0031, - "step": 3635 - }, - { - "epoch": 2.8094226684688164, - "grad_norm": 0.010503173805773258, - "learning_rate": 4.793694305637043e-05, - "loss": 0.0036, - "step": 3636 - }, - { - "epoch": 2.8101950183433093, - "grad_norm": 0.019909678027033806, - "learning_rate": 4.790999060795168e-05, - "loss": 0.0036, - "step": 3637 - }, - { - "epoch": 2.8109673682178027, - "grad_norm": 0.01406138576567173, - "learning_rate": 4.788303876788418e-05, - "loss": 0.0038, - "step": 3638 - }, - { - "epoch": 2.811739718092296, - "grad_norm": 0.009920360520482063, - "learning_rate": 4.7856087544012936e-05, - "loss": 0.0038, - "step": 3639 - }, - { - "epoch": 2.812512067966789, - "grad_norm": 0.016837509348988533, - "learning_rate": 4.7829136944182786e-05, - "loss": 0.0034, - "step": 3640 - }, - { - "epoch": 2.813284417841282, - "grad_norm": 0.009123590774834156, - "learning_rate": 4.7802186976238434e-05, - "loss": 0.0036, - "step": 3641 - }, - { - "epoch": 2.8140567677157753, - "grad_norm": 0.00945031177252531, - "learning_rate": 4.777523764802435e-05, - "loss": 0.0035, - "step": 3642 - }, - { - "epoch": 2.8148291175902687, - "grad_norm": 0.0078117093071341515, - "learning_rate": 4.7748288967384815e-05, - "loss": 0.004, - "step": 3643 - }, - { - "epoch": 2.8156014674647616, - "grad_norm": 0.01575397700071335, - "learning_rate": 4.772134094216396e-05, - "loss": 0.0038, - "step": 3644 - }, - { - "epoch": 2.8163738173392545, - "grad_norm": 0.025902222841978073, - "learning_rate": 4.7694393580205706e-05, - "loss": 0.0042, - "step": 3645 - }, - { - "epoch": 2.817146167213748, - "grad_norm": 0.008085416629910469, - "learning_rate": 4.766744688935376e-05, - "loss": 0.004, - "step": 3646 - }, - { - "epoch": 2.817918517088241, - "grad_norm": 0.017281895503401756, - "learning_rate": 4.764050087745167e-05, - "loss": 0.0041, - "step": 3647 - }, - { - "epoch": 2.818690866962734, - "grad_norm": 0.009094124659895897, - "learning_rate": 4.7613555552342756e-05, - "loss": 0.0036, - "step": 3648 - }, - { - "epoch": 2.819463216837227, - "grad_norm": 0.008755745366215706, - "learning_rate": 4.758661092187015e-05, - "loss": 0.0034, - "step": 3649 - }, - { - "epoch": 2.8202355667117205, - "grad_norm": 0.01971414126455784, - "learning_rate": 4.75596669938768e-05, - "loss": 0.0035, - "step": 3650 - }, - { - "epoch": 2.8210079165862134, - "grad_norm": 0.020349010825157166, - "learning_rate": 4.7532723776205403e-05, - "loss": 0.0038, - "step": 3651 - }, - { - "epoch": 2.821780266460707, - "grad_norm": 0.01196723897010088, - "learning_rate": 4.750578127669852e-05, - "loss": 0.0046, - "step": 3652 - }, - { - "epoch": 2.8225526163351997, - "grad_norm": 0.0126083018258214, - "learning_rate": 4.747883950319844e-05, - "loss": 0.0036, - "step": 3653 - }, - { - "epoch": 2.823324966209693, - "grad_norm": 0.015116846188902855, - "learning_rate": 4.745189846354724e-05, - "loss": 0.0042, - "step": 3654 - }, - { - "epoch": 2.824097316084186, - "grad_norm": 0.009867295622825623, - "learning_rate": 4.742495816558686e-05, - "loss": 0.0038, - "step": 3655 - }, - { - "epoch": 2.8248696659586794, - "grad_norm": 0.015143278986215591, - "learning_rate": 4.739801861715891e-05, - "loss": 0.0037, - "step": 3656 - }, - { - "epoch": 2.8256420158331723, - "grad_norm": 0.008129570633172989, - "learning_rate": 4.737107982610488e-05, - "loss": 0.0034, - "step": 3657 - }, - { - "epoch": 2.8264143657076657, - "grad_norm": 0.012850960716605186, - "learning_rate": 4.7344141800265987e-05, - "loss": 0.0039, - "step": 3658 - }, - { - "epoch": 2.8271867155821586, - "grad_norm": 0.014351118355989456, - "learning_rate": 4.731720454748323e-05, - "loss": 0.0042, - "step": 3659 - }, - { - "epoch": 2.827959065456652, - "grad_norm": 0.009173419326543808, - "learning_rate": 4.729026807559741e-05, - "loss": 0.0039, - "step": 3660 - }, - { - "epoch": 2.828731415331145, - "grad_norm": 0.0093191834166646, - "learning_rate": 4.7263332392449094e-05, - "loss": 0.004, - "step": 3661 - }, - { - "epoch": 2.8295037652056383, - "grad_norm": 0.007909681648015976, - "learning_rate": 4.723639750587857e-05, - "loss": 0.0032, - "step": 3662 - }, - { - "epoch": 2.830276115080131, - "grad_norm": 0.012290013954043388, - "learning_rate": 4.720946342372596e-05, - "loss": 0.0035, - "step": 3663 - }, - { - "epoch": 2.8310484649546246, - "grad_norm": 0.01055870484560728, - "learning_rate": 4.718253015383111e-05, - "loss": 0.0035, - "step": 3664 - }, - { - "epoch": 2.8318208148291175, - "grad_norm": 0.009200993925333023, - "learning_rate": 4.715559770403368e-05, - "loss": 0.0037, - "step": 3665 - }, - { - "epoch": 2.832593164703611, - "grad_norm": 0.008415855467319489, - "learning_rate": 4.712866608217301e-05, - "loss": 0.0037, - "step": 3666 - }, - { - "epoch": 2.833365514578104, - "grad_norm": 0.016717437654733658, - "learning_rate": 4.710173529608825e-05, - "loss": 0.0037, - "step": 3667 - }, - { - "epoch": 2.834137864452597, - "grad_norm": 0.01344334241002798, - "learning_rate": 4.707480535361835e-05, - "loss": 0.0037, - "step": 3668 - }, - { - "epoch": 2.83491021432709, - "grad_norm": 0.008762449026107788, - "learning_rate": 4.7047876262601906e-05, - "loss": 0.0033, - "step": 3669 - }, - { - "epoch": 2.8356825642015835, - "grad_norm": 0.014935498125851154, - "learning_rate": 4.7020948030877346e-05, - "loss": 0.0033, - "step": 3670 - }, - { - "epoch": 2.8364549140760764, - "grad_norm": 0.011157029308378696, - "learning_rate": 4.699402066628285e-05, - "loss": 0.0037, - "step": 3671 - }, - { - "epoch": 2.8372272639505693, - "grad_norm": 0.008815648965537548, - "learning_rate": 4.696709417665629e-05, - "loss": 0.0035, - "step": 3672 - }, - { - "epoch": 2.8379996138250627, - "grad_norm": 0.012259745970368385, - "learning_rate": 4.6940168569835324e-05, - "loss": 0.0038, - "step": 3673 - }, - { - "epoch": 2.838771963699556, - "grad_norm": 0.013299521990120411, - "learning_rate": 4.6913243853657356e-05, - "loss": 0.0035, - "step": 3674 - }, - { - "epoch": 2.839544313574049, - "grad_norm": 0.012281878851354122, - "learning_rate": 4.688632003595954e-05, - "loss": 0.0041, - "step": 3675 - }, - { - "epoch": 2.840316663448542, - "grad_norm": 0.008334542624652386, - "learning_rate": 4.6859397124578684e-05, - "loss": 0.0035, - "step": 3676 - }, - { - "epoch": 2.8410890133230353, - "grad_norm": 0.009239261038601398, - "learning_rate": 4.683247512735146e-05, - "loss": 0.004, - "step": 3677 - }, - { - "epoch": 2.8418613631975287, - "grad_norm": 0.010223859921097755, - "learning_rate": 4.68055540521142e-05, - "loss": 0.0034, - "step": 3678 - }, - { - "epoch": 2.8426337130720216, - "grad_norm": 0.007821132428944111, - "learning_rate": 4.6778633906702945e-05, - "loss": 0.0032, - "step": 3679 - }, - { - "epoch": 2.8434060629465145, - "grad_norm": 0.009876555763185024, - "learning_rate": 4.6751714698953536e-05, - "loss": 0.0038, - "step": 3680 - }, - { - "epoch": 2.844178412821008, - "grad_norm": 0.010038082487881184, - "learning_rate": 4.6724796436701496e-05, - "loss": 0.0037, - "step": 3681 - }, - { - "epoch": 2.8449507626955013, - "grad_norm": 0.009820781648159027, - "learning_rate": 4.6697879127782064e-05, - "loss": 0.004, - "step": 3682 - }, - { - "epoch": 2.845723112569994, - "grad_norm": 0.008844954892992973, - "learning_rate": 4.667096278003021e-05, - "loss": 0.0034, - "step": 3683 - }, - { - "epoch": 2.846495462444487, - "grad_norm": 0.012067172676324844, - "learning_rate": 4.6644047401280664e-05, - "loss": 0.0041, - "step": 3684 - }, - { - "epoch": 2.8472678123189805, - "grad_norm": 0.009092634543776512, - "learning_rate": 4.6617132999367844e-05, - "loss": 0.0034, - "step": 3685 - }, - { - "epoch": 2.848040162193474, - "grad_norm": 0.009050995111465454, - "learning_rate": 4.659021958212585e-05, - "loss": 0.004, - "step": 3686 - }, - { - "epoch": 2.848812512067967, - "grad_norm": 0.02117002010345459, - "learning_rate": 4.656330715738855e-05, - "loss": 0.0036, - "step": 3687 - }, - { - "epoch": 2.8495848619424597, - "grad_norm": 0.010049187578260899, - "learning_rate": 4.65363957329895e-05, - "loss": 0.0037, - "step": 3688 - }, - { - "epoch": 2.850357211816953, - "grad_norm": 0.011103583499789238, - "learning_rate": 4.650948531676195e-05, - "loss": 0.0034, - "step": 3689 - }, - { - "epoch": 2.8511295616914465, - "grad_norm": 0.009087778627872467, - "learning_rate": 4.6482575916538885e-05, - "loss": 0.0033, - "step": 3690 - }, - { - "epoch": 2.8519019115659394, - "grad_norm": 0.011496257968246937, - "learning_rate": 4.6455667540152984e-05, - "loss": 0.0038, - "step": 3691 - }, - { - "epoch": 2.8526742614404323, - "grad_norm": 0.009799706749618053, - "learning_rate": 4.642876019543661e-05, - "loss": 0.0042, - "step": 3692 - }, - { - "epoch": 2.8534466113149257, - "grad_norm": 0.009193724021315575, - "learning_rate": 4.640185389022186e-05, - "loss": 0.0041, - "step": 3693 - }, - { - "epoch": 2.8542189611894186, - "grad_norm": 0.006750943139195442, - "learning_rate": 4.637494863234048e-05, - "loss": 0.0036, - "step": 3694 - }, - { - "epoch": 2.854991311063912, - "grad_norm": 0.020224595442414284, - "learning_rate": 4.6348044429623986e-05, - "loss": 0.0038, - "step": 3695 - }, - { - "epoch": 2.855763660938405, - "grad_norm": 0.007359965238720179, - "learning_rate": 4.632114128990351e-05, - "loss": 0.0033, - "step": 3696 - }, - { - "epoch": 2.8565360108128983, - "grad_norm": 0.009418971836566925, - "learning_rate": 4.629423922100989e-05, - "loss": 0.004, - "step": 3697 - }, - { - "epoch": 2.8573083606873912, - "grad_norm": 0.007746202405542135, - "learning_rate": 4.626733823077372e-05, - "loss": 0.0034, - "step": 3698 - }, - { - "epoch": 2.8580807105618846, - "grad_norm": 0.011419177986681461, - "learning_rate": 4.624043832702519e-05, - "loss": 0.0042, - "step": 3699 - }, - { - "epoch": 2.8588530604363775, - "grad_norm": 0.014163573272526264, - "learning_rate": 4.62135395175942e-05, - "loss": 0.0037, - "step": 3700 - }, - { - "epoch": 2.859625410310871, - "grad_norm": 0.010520074516534805, - "learning_rate": 4.618664181031039e-05, - "loss": 0.004, - "step": 3701 - }, - { - "epoch": 2.860397760185364, - "grad_norm": 0.01044239941984415, - "learning_rate": 4.615974521300299e-05, - "loss": 0.0041, - "step": 3702 - }, - { - "epoch": 2.861170110059857, - "grad_norm": 0.0111888712272048, - "learning_rate": 4.613284973350096e-05, - "loss": 0.0036, - "step": 3703 - }, - { - "epoch": 2.86194245993435, - "grad_norm": 0.010530322790145874, - "learning_rate": 4.6105955379632936e-05, - "loss": 0.0038, - "step": 3704 - }, - { - "epoch": 2.8627148098088435, - "grad_norm": 0.0078042857348918915, - "learning_rate": 4.6079062159227225e-05, - "loss": 0.0039, - "step": 3705 - }, - { - "epoch": 2.8634871596833364, - "grad_norm": 0.009296247735619545, - "learning_rate": 4.605217008011176e-05, - "loss": 0.0037, - "step": 3706 - }, - { - "epoch": 2.86425950955783, - "grad_norm": 0.012510127387940884, - "learning_rate": 4.6025279150114185e-05, - "loss": 0.0034, - "step": 3707 - }, - { - "epoch": 2.8650318594323227, - "grad_norm": 0.010169142857193947, - "learning_rate": 4.599838937706183e-05, - "loss": 0.0032, - "step": 3708 - }, - { - "epoch": 2.865804209306816, - "grad_norm": 0.007322363089770079, - "learning_rate": 4.597150076878163e-05, - "loss": 0.0032, - "step": 3709 - }, - { - "epoch": 2.866576559181309, - "grad_norm": 0.010280710645020008, - "learning_rate": 4.5944613333100195e-05, - "loss": 0.0038, - "step": 3710 - }, - { - "epoch": 2.8673489090558024, - "grad_norm": 0.008753606118261814, - "learning_rate": 4.5917727077843856e-05, - "loss": 0.0034, - "step": 3711 - }, - { - "epoch": 2.8681212589302953, - "grad_norm": 0.012354779988527298, - "learning_rate": 4.5890842010838504e-05, - "loss": 0.0034, - "step": 3712 - }, - { - "epoch": 2.8688936088047887, - "grad_norm": 0.009628398343920708, - "learning_rate": 4.586395813990974e-05, - "loss": 0.004, - "step": 3713 - }, - { - "epoch": 2.8696659586792816, - "grad_norm": 0.01673782244324684, - "learning_rate": 4.583707547288285e-05, - "loss": 0.0035, - "step": 3714 - }, - { - "epoch": 2.870438308553775, - "grad_norm": 0.018587319180369377, - "learning_rate": 4.58101940175827e-05, - "loss": 0.0042, - "step": 3715 - }, - { - "epoch": 2.871210658428268, - "grad_norm": 0.013515827246010303, - "learning_rate": 4.57833137818338e-05, - "loss": 0.0042, - "step": 3716 - }, - { - "epoch": 2.8719830083027613, - "grad_norm": 0.010271236300468445, - "learning_rate": 4.575643477346039e-05, - "loss": 0.0042, - "step": 3717 - }, - { - "epoch": 2.8727553581772542, - "grad_norm": 0.015272647142410278, - "learning_rate": 4.5729557000286296e-05, - "loss": 0.0037, - "step": 3718 - }, - { - "epoch": 2.873527708051747, - "grad_norm": 0.011950243264436722, - "learning_rate": 4.570268047013495e-05, - "loss": 0.0039, - "step": 3719 - }, - { - "epoch": 2.8743000579262405, - "grad_norm": 0.008711851201951504, - "learning_rate": 4.567580519082948e-05, - "loss": 0.0034, - "step": 3720 - }, - { - "epoch": 2.875072407800734, - "grad_norm": 0.011989584192633629, - "learning_rate": 4.564893117019266e-05, - "loss": 0.0032, - "step": 3721 - }, - { - "epoch": 2.875844757675227, - "grad_norm": 0.016780303791165352, - "learning_rate": 4.5622058416046805e-05, - "loss": 0.0039, - "step": 3722 - }, - { - "epoch": 2.8766171075497198, - "grad_norm": 0.008650211617350578, - "learning_rate": 4.559518693621397e-05, - "loss": 0.0033, - "step": 3723 - }, - { - "epoch": 2.877389457424213, - "grad_norm": 0.007928716950118542, - "learning_rate": 4.556831673851578e-05, - "loss": 0.0039, - "step": 3724 - }, - { - "epoch": 2.8781618072987065, - "grad_norm": 0.014184878207743168, - "learning_rate": 4.554144783077352e-05, - "loss": 0.0037, - "step": 3725 - }, - { - "epoch": 2.8789341571731994, - "grad_norm": 0.00899726152420044, - "learning_rate": 4.551458022080806e-05, - "loss": 0.0033, - "step": 3726 - }, - { - "epoch": 2.8797065070476924, - "grad_norm": 0.006811381317675114, - "learning_rate": 4.54877139164399e-05, - "loss": 0.0036, - "step": 3727 - }, - { - "epoch": 2.8804788569221857, - "grad_norm": 0.007462832145392895, - "learning_rate": 4.5460848925489206e-05, - "loss": 0.0035, - "step": 3728 - }, - { - "epoch": 2.881251206796679, - "grad_norm": 0.006794137414544821, - "learning_rate": 4.5433985255775705e-05, - "loss": 0.0033, - "step": 3729 - }, - { - "epoch": 2.882023556671172, - "grad_norm": 0.014314756728708744, - "learning_rate": 4.540712291511875e-05, - "loss": 0.0039, - "step": 3730 - }, - { - "epoch": 2.882795906545665, - "grad_norm": 0.00948580913245678, - "learning_rate": 4.538026191133736e-05, - "loss": 0.0037, - "step": 3731 - }, - { - "epoch": 2.8835682564201583, - "grad_norm": 0.00665636220946908, - "learning_rate": 4.535340225225009e-05, - "loss": 0.0036, - "step": 3732 - }, - { - "epoch": 2.8843406062946517, - "grad_norm": 0.010609416291117668, - "learning_rate": 4.5326543945675136e-05, - "loss": 0.0039, - "step": 3733 - }, - { - "epoch": 2.8851129561691446, - "grad_norm": 0.009739573113620281, - "learning_rate": 4.529968699943033e-05, - "loss": 0.0042, - "step": 3734 - }, - { - "epoch": 2.8858853060436376, - "grad_norm": 0.011525544337928295, - "learning_rate": 4.527283142133306e-05, - "loss": 0.0039, - "step": 3735 - }, - { - "epoch": 2.886657655918131, - "grad_norm": 0.009056229144334793, - "learning_rate": 4.524597721920034e-05, - "loss": 0.0038, - "step": 3736 - }, - { - "epoch": 2.8874300057926243, - "grad_norm": 0.010327644646167755, - "learning_rate": 4.521912440084877e-05, - "loss": 0.0032, - "step": 3737 - }, - { - "epoch": 2.8882023556671172, - "grad_norm": 0.00854728277772665, - "learning_rate": 4.519227297409458e-05, - "loss": 0.0034, - "step": 3738 - }, - { - "epoch": 2.88897470554161, - "grad_norm": 0.016212280839681625, - "learning_rate": 4.5165422946753546e-05, - "loss": 0.003, - "step": 3739 - }, - { - "epoch": 2.8897470554161035, - "grad_norm": 0.010443037375807762, - "learning_rate": 4.513857432664107e-05, - "loss": 0.0037, - "step": 3740 - }, - { - "epoch": 2.8905194052905965, - "grad_norm": 0.008367817848920822, - "learning_rate": 4.5111727121572156e-05, - "loss": 0.0036, - "step": 3741 - }, - { - "epoch": 2.89129175516509, - "grad_norm": 0.010264886543154716, - "learning_rate": 4.508488133936135e-05, - "loss": 0.0035, - "step": 3742 - }, - { - "epoch": 2.8920641050395828, - "grad_norm": 0.014999981969594955, - "learning_rate": 4.505803698782281e-05, - "loss": 0.0037, - "step": 3743 - }, - { - "epoch": 2.892836454914076, - "grad_norm": 0.009613145142793655, - "learning_rate": 4.50311940747703e-05, - "loss": 0.0037, - "step": 3744 - }, - { - "epoch": 2.893608804788569, - "grad_norm": 0.008537041023373604, - "learning_rate": 4.500435260801715e-05, - "loss": 0.0037, - "step": 3745 - }, - { - "epoch": 2.8943811546630624, - "grad_norm": 0.013094227761030197, - "learning_rate": 4.497751259537622e-05, - "loss": 0.0034, - "step": 3746 - }, - { - "epoch": 2.8951535045375554, - "grad_norm": 0.014482134021818638, - "learning_rate": 4.495067404466002e-05, - "loss": 0.0035, - "step": 3747 - }, - { - "epoch": 2.8959258544120487, - "grad_norm": 0.011909635737538338, - "learning_rate": 4.492383696368061e-05, - "loss": 0.0035, - "step": 3748 - }, - { - "epoch": 2.8966982042865417, - "grad_norm": 0.01069470215588808, - "learning_rate": 4.489700136024959e-05, - "loss": 0.0036, - "step": 3749 - }, - { - "epoch": 2.897470554161035, - "grad_norm": 0.008906307630240917, - "learning_rate": 4.487016724217817e-05, - "loss": 0.0034, - "step": 3750 - }, - { - "epoch": 2.898242904035528, - "grad_norm": 0.009430951438844204, - "learning_rate": 4.484333461727712e-05, - "loss": 0.0035, - "step": 3751 - }, - { - "epoch": 2.8990152539100214, - "grad_norm": 0.009873777627944946, - "learning_rate": 4.481650349335675e-05, - "loss": 0.0033, - "step": 3752 - }, - { - "epoch": 2.8997876037845143, - "grad_norm": 0.0071570733562111855, - "learning_rate": 4.478967387822697e-05, - "loss": 0.0036, - "step": 3753 - }, - { - "epoch": 2.9005599536590077, - "grad_norm": 0.010955625213682652, - "learning_rate": 4.476284577969722e-05, - "loss": 0.0037, - "step": 3754 - }, - { - "epoch": 2.9013323035335006, - "grad_norm": 0.008684576489031315, - "learning_rate": 4.473601920557653e-05, - "loss": 0.0035, - "step": 3755 - }, - { - "epoch": 2.902104653407994, - "grad_norm": 0.0068272012285888195, - "learning_rate": 4.470919416367344e-05, - "loss": 0.0035, - "step": 3756 - }, - { - "epoch": 2.902877003282487, - "grad_norm": 0.008168086409568787, - "learning_rate": 4.468237066179609e-05, - "loss": 0.0039, - "step": 3757 - }, - { - "epoch": 2.9036493531569803, - "grad_norm": 0.010575056076049805, - "learning_rate": 4.465554870775216e-05, - "loss": 0.0033, - "step": 3758 - }, - { - "epoch": 2.904421703031473, - "grad_norm": 0.008518901653587818, - "learning_rate": 4.462872830934886e-05, - "loss": 0.0036, - "step": 3759 - }, - { - "epoch": 2.9051940529059666, - "grad_norm": 0.010122411884367466, - "learning_rate": 4.460190947439294e-05, - "loss": 0.0036, - "step": 3760 - }, - { - "epoch": 2.9059664027804595, - "grad_norm": 0.014623790979385376, - "learning_rate": 4.457509221069077e-05, - "loss": 0.004, - "step": 3761 - }, - { - "epoch": 2.906738752654953, - "grad_norm": 0.008349210023880005, - "learning_rate": 4.454827652604815e-05, - "loss": 0.0035, - "step": 3762 - }, - { - "epoch": 2.907511102529446, - "grad_norm": 0.015230577439069748, - "learning_rate": 4.452146242827051e-05, - "loss": 0.003, - "step": 3763 - }, - { - "epoch": 2.908283452403939, - "grad_norm": 0.013238787651062012, - "learning_rate": 4.4494649925162765e-05, - "loss": 0.0036, - "step": 3764 - }, - { - "epoch": 2.909055802278432, - "grad_norm": 0.011168868280947208, - "learning_rate": 4.4467839024529425e-05, - "loss": 0.0034, - "step": 3765 - }, - { - "epoch": 2.909828152152925, - "grad_norm": 0.009999548085033894, - "learning_rate": 4.4441029734174456e-05, - "loss": 0.0035, - "step": 3766 - }, - { - "epoch": 2.9106005020274184, - "grad_norm": 0.024452844634652138, - "learning_rate": 4.44142220619014e-05, - "loss": 0.0037, - "step": 3767 - }, - { - "epoch": 2.9113728519019118, - "grad_norm": 0.018125806003808975, - "learning_rate": 4.438741601551335e-05, - "loss": 0.0039, - "step": 3768 - }, - { - "epoch": 2.9121452017764047, - "grad_norm": 0.009755297563970089, - "learning_rate": 4.436061160281287e-05, - "loss": 0.0036, - "step": 3769 - }, - { - "epoch": 2.9129175516508976, - "grad_norm": 0.008294413797557354, - "learning_rate": 4.433380883160208e-05, - "loss": 0.0037, - "step": 3770 - }, - { - "epoch": 2.913689901525391, - "grad_norm": 0.01363943051546812, - "learning_rate": 4.4307007709682645e-05, - "loss": 0.0037, - "step": 3771 - }, - { - "epoch": 2.9144622513998844, - "grad_norm": 0.010448621585965157, - "learning_rate": 4.4280208244855695e-05, - "loss": 0.0037, - "step": 3772 - }, - { - "epoch": 2.9152346012743773, - "grad_norm": 0.015610828064382076, - "learning_rate": 4.4253410444921904e-05, - "loss": 0.0035, - "step": 3773 - }, - { - "epoch": 2.91600695114887, - "grad_norm": 0.008082563057541847, - "learning_rate": 4.422661431768149e-05, - "loss": 0.0041, - "step": 3774 - }, - { - "epoch": 2.9167793010233636, - "grad_norm": 0.01200641319155693, - "learning_rate": 4.419981987093415e-05, - "loss": 0.0039, - "step": 3775 - }, - { - "epoch": 2.917551650897857, - "grad_norm": 0.01190141774713993, - "learning_rate": 4.4173027112479076e-05, - "loss": 0.0039, - "step": 3776 - }, - { - "epoch": 2.91832400077235, - "grad_norm": 0.01024235412478447, - "learning_rate": 4.414623605011502e-05, - "loss": 0.0038, - "step": 3777 - }, - { - "epoch": 2.919096350646843, - "grad_norm": 0.009192753583192825, - "learning_rate": 4.411944669164022e-05, - "loss": 0.0043, - "step": 3778 - }, - { - "epoch": 2.919868700521336, - "grad_norm": 0.011779602617025375, - "learning_rate": 4.4092659044852366e-05, - "loss": 0.0036, - "step": 3779 - }, - { - "epoch": 2.9206410503958296, - "grad_norm": 0.011895395815372467, - "learning_rate": 4.406587311754874e-05, - "loss": 0.0038, - "step": 3780 - }, - { - "epoch": 2.9214134002703225, - "grad_norm": 0.007252044975757599, - "learning_rate": 4.4039088917526075e-05, - "loss": 0.0036, - "step": 3781 - }, - { - "epoch": 2.9221857501448154, - "grad_norm": 0.009333361871540546, - "learning_rate": 4.401230645258056e-05, - "loss": 0.0039, - "step": 3782 - }, - { - "epoch": 2.922958100019309, - "grad_norm": 0.008951723575592041, - "learning_rate": 4.398552573050797e-05, - "loss": 0.0033, - "step": 3783 - }, - { - "epoch": 2.923730449893802, - "grad_norm": 0.009833808057010174, - "learning_rate": 4.39587467591035e-05, - "loss": 0.0034, - "step": 3784 - }, - { - "epoch": 2.924502799768295, - "grad_norm": 0.009983672760426998, - "learning_rate": 4.39319695461619e-05, - "loss": 0.0034, - "step": 3785 - }, - { - "epoch": 2.925275149642788, - "grad_norm": 0.007449030876159668, - "learning_rate": 4.390519409947732e-05, - "loss": 0.0038, - "step": 3786 - }, - { - "epoch": 2.9260474995172814, - "grad_norm": 0.015035904943943024, - "learning_rate": 4.387842042684346e-05, - "loss": 0.0039, - "step": 3787 - }, - { - "epoch": 2.9268198493917743, - "grad_norm": 0.022417740896344185, - "learning_rate": 4.385164853605354e-05, - "loss": 0.0035, - "step": 3788 - }, - { - "epoch": 2.9275921992662677, - "grad_norm": 0.011493900790810585, - "learning_rate": 4.382487843490012e-05, - "loss": 0.0036, - "step": 3789 - }, - { - "epoch": 2.9283645491407606, - "grad_norm": 0.016776161268353462, - "learning_rate": 4.3798110131175396e-05, - "loss": 0.0036, - "step": 3790 - }, - { - "epoch": 2.929136899015254, - "grad_norm": 0.021847503259778023, - "learning_rate": 4.377134363267097e-05, - "loss": 0.0038, - "step": 3791 - }, - { - "epoch": 2.929909248889747, - "grad_norm": 0.015478880144655704, - "learning_rate": 4.374457894717788e-05, - "loss": 0.0043, - "step": 3792 - }, - { - "epoch": 2.9306815987642403, - "grad_norm": 0.009824762120842934, - "learning_rate": 4.371781608248672e-05, - "loss": 0.0036, - "step": 3793 - }, - { - "epoch": 2.931453948638733, - "grad_norm": 0.011714638210833073, - "learning_rate": 4.3691055046387484e-05, - "loss": 0.0041, - "step": 3794 - }, - { - "epoch": 2.9322262985132266, - "grad_norm": 0.020869752392172813, - "learning_rate": 4.366429584666971e-05, - "loss": 0.0037, - "step": 3795 - }, - { - "epoch": 2.9329986483877195, - "grad_norm": 0.023330258205533028, - "learning_rate": 4.363753849112231e-05, - "loss": 0.0037, - "step": 3796 - }, - { - "epoch": 2.933770998262213, - "grad_norm": 0.011580456048250198, - "learning_rate": 4.361078298753371e-05, - "loss": 0.004, - "step": 3797 - }, - { - "epoch": 2.934543348136706, - "grad_norm": 0.017411785200238228, - "learning_rate": 4.3584029343691805e-05, - "loss": 0.0036, - "step": 3798 - }, - { - "epoch": 2.935315698011199, - "grad_norm": 0.01577589102089405, - "learning_rate": 4.355727756738393e-05, - "loss": 0.0041, - "step": 3799 - }, - { - "epoch": 2.936088047885692, - "grad_norm": 0.02870682254433632, - "learning_rate": 4.353052766639687e-05, - "loss": 0.0041, - "step": 3800 - }, - { - "epoch": 2.9368603977601855, - "grad_norm": 0.008758428506553173, - "learning_rate": 4.3503779648516896e-05, - "loss": 0.0042, - "step": 3801 - }, - { - "epoch": 2.9376327476346784, - "grad_norm": 0.018319813534617424, - "learning_rate": 4.3477033521529686e-05, - "loss": 0.0041, - "step": 3802 - }, - { - "epoch": 2.938405097509172, - "grad_norm": 0.012039601802825928, - "learning_rate": 4.34502892932204e-05, - "loss": 0.0032, - "step": 3803 - }, - { - "epoch": 2.9391774473836647, - "grad_norm": 0.021439632400870323, - "learning_rate": 4.342354697137364e-05, - "loss": 0.0037, - "step": 3804 - }, - { - "epoch": 2.939949797258158, - "grad_norm": 0.011071378365159035, - "learning_rate": 4.339680656377347e-05, - "loss": 0.0038, - "step": 3805 - }, - { - "epoch": 2.940722147132651, - "grad_norm": 0.011408895254135132, - "learning_rate": 4.3370068078203326e-05, - "loss": 0.0038, - "step": 3806 - }, - { - "epoch": 2.9414944970071444, - "grad_norm": 0.01955719292163849, - "learning_rate": 4.3343331522446175e-05, - "loss": 0.004, - "step": 3807 - }, - { - "epoch": 2.9422668468816373, - "grad_norm": 0.01151325274258852, - "learning_rate": 4.331659690428438e-05, - "loss": 0.0035, - "step": 3808 - }, - { - "epoch": 2.9430391967561307, - "grad_norm": 0.01454999204725027, - "learning_rate": 4.328986423149972e-05, - "loss": 0.0034, - "step": 3809 - }, - { - "epoch": 2.9438115466306236, - "grad_norm": 0.010412359610199928, - "learning_rate": 4.326313351187344e-05, - "loss": 0.0035, - "step": 3810 - }, - { - "epoch": 2.944583896505117, - "grad_norm": 0.008726815693080425, - "learning_rate": 4.323640475318623e-05, - "loss": 0.0035, - "step": 3811 - }, - { - "epoch": 2.94535624637961, - "grad_norm": 0.010784812271595001, - "learning_rate": 4.320967796321815e-05, - "loss": 0.0037, - "step": 3812 - }, - { - "epoch": 2.946128596254103, - "grad_norm": 0.008588474243879318, - "learning_rate": 4.3182953149748745e-05, - "loss": 0.0037, - "step": 3813 - }, - { - "epoch": 2.946900946128596, - "grad_norm": 0.013325064443051815, - "learning_rate": 4.315623032055694e-05, - "loss": 0.0037, - "step": 3814 - }, - { - "epoch": 2.9476732960030896, - "grad_norm": 0.007062042597681284, - "learning_rate": 4.3129509483421157e-05, - "loss": 0.0033, - "step": 3815 - }, - { - "epoch": 2.9484456458775825, - "grad_norm": 0.00899225752800703, - "learning_rate": 4.310279064611912e-05, - "loss": 0.0039, - "step": 3816 - }, - { - "epoch": 2.9492179957520754, - "grad_norm": 0.008381965570151806, - "learning_rate": 4.307607381642808e-05, - "loss": 0.0035, - "step": 3817 - }, - { - "epoch": 2.949990345626569, - "grad_norm": 0.009166887030005455, - "learning_rate": 4.304935900212466e-05, - "loss": 0.0036, - "step": 3818 - }, - { - "epoch": 2.950762695501062, - "grad_norm": 0.010656706057488918, - "learning_rate": 4.302264621098486e-05, - "loss": 0.0036, - "step": 3819 - }, - { - "epoch": 2.951535045375555, - "grad_norm": 0.007658713962882757, - "learning_rate": 4.299593545078416e-05, - "loss": 0.0039, - "step": 3820 - }, - { - "epoch": 2.952307395250048, - "grad_norm": 0.0119070615619421, - "learning_rate": 4.296922672929742e-05, - "loss": 0.0036, - "step": 3821 - }, - { - "epoch": 2.9530797451245414, - "grad_norm": 0.015731122344732285, - "learning_rate": 4.294252005429888e-05, - "loss": 0.0036, - "step": 3822 - }, - { - "epoch": 2.953852094999035, - "grad_norm": 0.008103919215500355, - "learning_rate": 4.2915815433562224e-05, - "loss": 0.0037, - "step": 3823 - }, - { - "epoch": 2.9546244448735277, - "grad_norm": 0.010937350802123547, - "learning_rate": 4.28891128748605e-05, - "loss": 0.0037, - "step": 3824 - }, - { - "epoch": 2.9553967947480206, - "grad_norm": 0.023103781044483185, - "learning_rate": 4.2862412385966233e-05, - "loss": 0.0037, - "step": 3825 - }, - { - "epoch": 2.956169144622514, - "grad_norm": 0.009802715852856636, - "learning_rate": 4.283571397465124e-05, - "loss": 0.0035, - "step": 3826 - }, - { - "epoch": 2.9569414944970074, - "grad_norm": 0.010085641406476498, - "learning_rate": 4.2809017648686775e-05, - "loss": 0.0032, - "step": 3827 - }, - { - "epoch": 2.9577138443715003, - "grad_norm": 0.026056107133626938, - "learning_rate": 4.278232341584355e-05, - "loss": 0.0038, - "step": 3828 - }, - { - "epoch": 2.9584861942459932, - "grad_norm": 0.015221530571579933, - "learning_rate": 4.2755631283891555e-05, - "loss": 0.0041, - "step": 3829 - }, - { - "epoch": 2.9592585441204866, - "grad_norm": 0.009898537769913673, - "learning_rate": 4.272894126060024e-05, - "loss": 0.004, - "step": 3830 - }, - { - "epoch": 2.96003089399498, - "grad_norm": 0.019119389355182648, - "learning_rate": 4.270225335373846e-05, - "loss": 0.0037, - "step": 3831 - }, - { - "epoch": 2.960803243869473, - "grad_norm": 0.01502426527440548, - "learning_rate": 4.267556757107437e-05, - "loss": 0.0042, - "step": 3832 - }, - { - "epoch": 2.961575593743966, - "grad_norm": 0.01574239507317543, - "learning_rate": 4.264888392037557e-05, - "loss": 0.0037, - "step": 3833 - }, - { - "epoch": 2.962347943618459, - "grad_norm": 0.008315333165228367, - "learning_rate": 4.262220240940905e-05, - "loss": 0.0034, - "step": 3834 - }, - { - "epoch": 2.9631202934929526, - "grad_norm": 0.022397365421056747, - "learning_rate": 4.259552304594114e-05, - "loss": 0.0037, - "step": 3835 - }, - { - "epoch": 2.9638926433674455, - "grad_norm": 0.011859522201120853, - "learning_rate": 4.256884583773754e-05, - "loss": 0.0039, - "step": 3836 - }, - { - "epoch": 2.9646649932419384, - "grad_norm": 0.010500779375433922, - "learning_rate": 4.254217079256337e-05, - "loss": 0.0037, - "step": 3837 - }, - { - "epoch": 2.965437343116432, - "grad_norm": 0.009014081209897995, - "learning_rate": 4.2515497918183086e-05, - "loss": 0.0043, - "step": 3838 - }, - { - "epoch": 2.9662096929909247, - "grad_norm": 0.007724730763584375, - "learning_rate": 4.2488827222360487e-05, - "loss": 0.0037, - "step": 3839 - }, - { - "epoch": 2.966982042865418, - "grad_norm": 0.010915543884038925, - "learning_rate": 4.246215871285879e-05, - "loss": 0.004, - "step": 3840 - }, - { - "epoch": 2.967754392739911, - "grad_norm": 0.021229470148682594, - "learning_rate": 4.243549239744057e-05, - "loss": 0.0037, - "step": 3841 - }, - { - "epoch": 2.9685267426144044, - "grad_norm": 0.011718028225004673, - "learning_rate": 4.2408828283867727e-05, - "loss": 0.004, - "step": 3842 - }, - { - "epoch": 2.9692990924888973, - "grad_norm": 0.007771989796310663, - "learning_rate": 4.238216637990152e-05, - "loss": 0.0035, - "step": 3843 - }, - { - "epoch": 2.9700714423633907, - "grad_norm": 0.017838140949606895, - "learning_rate": 4.2355506693302635e-05, - "loss": 0.0034, - "step": 3844 - }, - { - "epoch": 2.9708437922378836, - "grad_norm": 0.030527664348483086, - "learning_rate": 4.232884923183103e-05, - "loss": 0.0037, - "step": 3845 - }, - { - "epoch": 2.971616142112377, - "grad_norm": 0.009560693055391312, - "learning_rate": 4.230219400324604e-05, - "loss": 0.0034, - "step": 3846 - }, - { - "epoch": 2.97238849198687, - "grad_norm": 0.022195350378751755, - "learning_rate": 4.2275541015306384e-05, - "loss": 0.004, - "step": 3847 - }, - { - "epoch": 2.9731608418613633, - "grad_norm": 0.021256253123283386, - "learning_rate": 4.2248890275770096e-05, - "loss": 0.0041, - "step": 3848 - }, - { - "epoch": 2.9739331917358562, - "grad_norm": 0.007295470684766769, - "learning_rate": 4.222224179239455e-05, - "loss": 0.0033, - "step": 3849 - }, - { - "epoch": 2.9747055416103496, - "grad_norm": 0.009077059105038643, - "learning_rate": 4.219559557293647e-05, - "loss": 0.0036, - "step": 3850 - }, - { - "epoch": 2.9754778914848425, - "grad_norm": 0.009278069250285625, - "learning_rate": 4.216895162515197e-05, - "loss": 0.0039, - "step": 3851 - }, - { - "epoch": 2.976250241359336, - "grad_norm": 0.014763458631932735, - "learning_rate": 4.21423099567964e-05, - "loss": 0.0036, - "step": 3852 - }, - { - "epoch": 2.977022591233829, - "grad_norm": 0.017561612650752068, - "learning_rate": 4.211567057562454e-05, - "loss": 0.0041, - "step": 3853 - }, - { - "epoch": 2.977794941108322, - "grad_norm": 0.016626615077257156, - "learning_rate": 4.2089033489390483e-05, - "loss": 0.0038, - "step": 3854 - }, - { - "epoch": 2.978567290982815, - "grad_norm": 0.010037774220108986, - "learning_rate": 4.20623987058476e-05, - "loss": 0.004, - "step": 3855 - }, - { - "epoch": 2.9793396408573085, - "grad_norm": 0.008447550237178802, - "learning_rate": 4.2035766232748664e-05, - "loss": 0.0037, - "step": 3856 - }, - { - "epoch": 2.9801119907318014, - "grad_norm": 0.01665206253528595, - "learning_rate": 4.2009136077845725e-05, - "loss": 0.004, - "step": 3857 - }, - { - "epoch": 2.980884340606295, - "grad_norm": 0.009643926285207272, - "learning_rate": 4.198250824889021e-05, - "loss": 0.0036, - "step": 3858 - }, - { - "epoch": 2.9816566904807877, - "grad_norm": 0.0100821228697896, - "learning_rate": 4.1955882753632806e-05, - "loss": 0.0038, - "step": 3859 - }, - { - "epoch": 2.9824290403552807, - "grad_norm": 0.013579235412180424, - "learning_rate": 4.1929259599823556e-05, - "loss": 0.0039, - "step": 3860 - }, - { - "epoch": 2.983201390229774, - "grad_norm": 0.007406320888549089, - "learning_rate": 4.1902638795211836e-05, - "loss": 0.0032, - "step": 3861 - }, - { - "epoch": 2.9839737401042674, - "grad_norm": 0.014929025433957577, - "learning_rate": 4.18760203475463e-05, - "loss": 0.0038, - "step": 3862 - }, - { - "epoch": 2.9847460899787603, - "grad_norm": 0.010060267522931099, - "learning_rate": 4.184940426457492e-05, - "loss": 0.0037, - "step": 3863 - }, - { - "epoch": 2.9855184398532533, - "grad_norm": 0.00800973642617464, - "learning_rate": 4.182279055404504e-05, - "loss": 0.0034, - "step": 3864 - }, - { - "epoch": 2.9862907897277466, - "grad_norm": 0.012781900353729725, - "learning_rate": 4.1796179223703225e-05, - "loss": 0.0033, - "step": 3865 - }, - { - "epoch": 2.98706313960224, - "grad_norm": 0.007149236276745796, - "learning_rate": 4.17695702812954e-05, - "loss": 0.0034, - "step": 3866 - }, - { - "epoch": 2.987835489476733, - "grad_norm": 0.007939104922115803, - "learning_rate": 4.174296373456681e-05, - "loss": 0.0035, - "step": 3867 - }, - { - "epoch": 2.988607839351226, - "grad_norm": 0.008486042730510235, - "learning_rate": 4.1716359591261964e-05, - "loss": 0.0034, - "step": 3868 - }, - { - "epoch": 2.9893801892257192, - "grad_norm": 0.01805216819047928, - "learning_rate": 4.168975785912467e-05, - "loss": 0.0042, - "step": 3869 - }, - { - "epoch": 2.9901525391002126, - "grad_norm": 0.013067138381302357, - "learning_rate": 4.166315854589805e-05, - "loss": 0.0036, - "step": 3870 - }, - { - "epoch": 2.9909248889747055, - "grad_norm": 0.01489514485001564, - "learning_rate": 4.1636561659324565e-05, - "loss": 0.0039, - "step": 3871 - }, - { - "epoch": 2.9916972388491985, - "grad_norm": 0.015756510198116302, - "learning_rate": 4.160996720714587e-05, - "loss": 0.0041, - "step": 3872 - }, - { - "epoch": 2.992469588723692, - "grad_norm": 0.012170984409749508, - "learning_rate": 4.1583375197102985e-05, - "loss": 0.0038, - "step": 3873 - }, - { - "epoch": 2.993241938598185, - "grad_norm": 0.013005238026380539, - "learning_rate": 4.155678563693623e-05, - "loss": 0.0039, - "step": 3874 - }, - { - "epoch": 2.994014288472678, - "grad_norm": 0.013325825333595276, - "learning_rate": 4.153019853438515e-05, - "loss": 0.0038, - "step": 3875 - }, - { - "epoch": 2.994786638347171, - "grad_norm": 0.01133064366877079, - "learning_rate": 4.15036138971886e-05, - "loss": 0.0036, - "step": 3876 - }, - { - "epoch": 2.9955589882216644, - "grad_norm": 0.00813497044146061, - "learning_rate": 4.147703173308477e-05, - "loss": 0.0039, - "step": 3877 - }, - { - "epoch": 2.996331338096158, - "grad_norm": 0.011179421097040176, - "learning_rate": 4.145045204981106e-05, - "loss": 0.004, - "step": 3878 - }, - { - "epoch": 2.9971036879706507, - "grad_norm": 0.011623677797615528, - "learning_rate": 4.142387485510416e-05, - "loss": 0.0039, - "step": 3879 - }, - { - "epoch": 2.9978760378451437, - "grad_norm": 0.009855561889708042, - "learning_rate": 4.139730015670006e-05, - "loss": 0.0039, - "step": 3880 - }, - { - "epoch": 2.998648387719637, - "grad_norm": 0.006677902769297361, - "learning_rate": 4.137072796233404e-05, - "loss": 0.0034, - "step": 3881 - }, - { - "epoch": 2.9994207375941304, - "grad_norm": 0.008960848674178123, - "learning_rate": 4.1344158279740574e-05, - "loss": 0.0037, - "step": 3882 - }, - { - "epoch": 3.000772349874493, - "grad_norm": 0.04119117185473442, - "learning_rate": 4.131759111665349e-05, - "loss": 0.0073, - "step": 3883 - }, - { - "epoch": 3.0015446997489863, - "grad_norm": 0.0093689551576972, - "learning_rate": 4.1291026480805845e-05, - "loss": 0.0033, - "step": 3884 - }, - { - "epoch": 3.0023170496234792, - "grad_norm": 0.013623661361634731, - "learning_rate": 4.126446437992993e-05, - "loss": 0.0033, - "step": 3885 - }, - { - "epoch": 3.0030893994979726, - "grad_norm": 0.013883264735341072, - "learning_rate": 4.1237904821757374e-05, - "loss": 0.0034, - "step": 3886 - }, - { - "epoch": 3.0038617493724655, - "grad_norm": 0.010895649902522564, - "learning_rate": 4.121134781401899e-05, - "loss": 0.0034, - "step": 3887 - }, - { - "epoch": 3.004634099246959, - "grad_norm": 0.010138518176972866, - "learning_rate": 4.118479336444492e-05, - "loss": 0.0034, - "step": 3888 - }, - { - "epoch": 3.005406449121452, - "grad_norm": 0.010731861926615238, - "learning_rate": 4.1158241480764483e-05, - "loss": 0.0034, - "step": 3889 - }, - { - "epoch": 3.006178798995945, - "grad_norm": 0.009778124280273914, - "learning_rate": 4.113169217070629e-05, - "loss": 0.0038, - "step": 3890 - }, - { - "epoch": 3.006951148870438, - "grad_norm": 0.011736907996237278, - "learning_rate": 4.110514544199825e-05, - "loss": 0.0035, - "step": 3891 - }, - { - "epoch": 3.0077234987449315, - "grad_norm": 0.01224558986723423, - "learning_rate": 4.107860130236743e-05, - "loss": 0.0034, - "step": 3892 - }, - { - "epoch": 3.0084958486194244, - "grad_norm": 0.009522565640509129, - "learning_rate": 4.105205975954019e-05, - "loss": 0.0033, - "step": 3893 - }, - { - "epoch": 3.009268198493918, - "grad_norm": 0.01398923434317112, - "learning_rate": 4.102552082124217e-05, - "loss": 0.0031, - "step": 3894 - }, - { - "epoch": 3.0100405483684107, - "grad_norm": 0.014189448207616806, - "learning_rate": 4.099898449519817e-05, - "loss": 0.0037, - "step": 3895 - }, - { - "epoch": 3.010812898242904, - "grad_norm": 0.011617259122431278, - "learning_rate": 4.097245078913229e-05, - "loss": 0.0037, - "step": 3896 - }, - { - "epoch": 3.011585248117397, - "grad_norm": 0.00984710082411766, - "learning_rate": 4.094591971076783e-05, - "loss": 0.003, - "step": 3897 - }, - { - "epoch": 3.0123575979918904, - "grad_norm": 0.012181985192000866, - "learning_rate": 4.09193912678274e-05, - "loss": 0.0029, - "step": 3898 - }, - { - "epoch": 3.0131299478663833, - "grad_norm": 0.010762302204966545, - "learning_rate": 4.089286546803275e-05, - "loss": 0.0035, - "step": 3899 - }, - { - "epoch": 3.0139022977408767, - "grad_norm": 0.00949389673769474, - "learning_rate": 4.0866342319104884e-05, - "loss": 0.0033, - "step": 3900 - }, - { - "epoch": 3.0146746476153696, - "grad_norm": 0.014336561784148216, - "learning_rate": 4.083982182876409e-05, - "loss": 0.0033, - "step": 3901 - }, - { - "epoch": 3.015446997489863, - "grad_norm": 0.010391092859208584, - "learning_rate": 4.081330400472982e-05, - "loss": 0.0031, - "step": 3902 - }, - { - "epoch": 3.016219347364356, - "grad_norm": 0.010914616286754608, - "learning_rate": 4.078678885472076e-05, - "loss": 0.0037, - "step": 3903 - }, - { - "epoch": 3.0169916972388493, - "grad_norm": 0.00893909391015768, - "learning_rate": 4.0760276386454856e-05, - "loss": 0.0032, - "step": 3904 - }, - { - "epoch": 3.0177640471133422, - "grad_norm": 0.008087982423603535, - "learning_rate": 4.073376660764924e-05, - "loss": 0.0032, - "step": 3905 - }, - { - "epoch": 3.0185363969878356, - "grad_norm": 0.008801883086562157, - "learning_rate": 4.0707259526020244e-05, - "loss": 0.0033, - "step": 3906 - }, - { - "epoch": 3.0193087468623285, - "grad_norm": 0.007589471992105246, - "learning_rate": 4.068075514928347e-05, - "loss": 0.003, - "step": 3907 - }, - { - "epoch": 3.020081096736822, - "grad_norm": 0.009592815302312374, - "learning_rate": 4.065425348515369e-05, - "loss": 0.0036, - "step": 3908 - }, - { - "epoch": 3.020853446611315, - "grad_norm": 0.014320937916636467, - "learning_rate": 4.062775454134489e-05, - "loss": 0.003, - "step": 3909 - }, - { - "epoch": 3.021625796485808, - "grad_norm": 0.009289335459470749, - "learning_rate": 4.060125832557028e-05, - "loss": 0.0031, - "step": 3910 - }, - { - "epoch": 3.022398146360301, - "grad_norm": 0.00905862171202898, - "learning_rate": 4.0574764845542276e-05, - "loss": 0.0034, - "step": 3911 - }, - { - "epoch": 3.0231704962347945, - "grad_norm": 0.006691919639706612, - "learning_rate": 4.0548274108972464e-05, - "loss": 0.0031, - "step": 3912 - }, - { - "epoch": 3.0239428461092874, - "grad_norm": 0.014596517197787762, - "learning_rate": 4.052178612357169e-05, - "loss": 0.0031, - "step": 3913 - }, - { - "epoch": 3.024715195983781, - "grad_norm": 0.01569315977394581, - "learning_rate": 4.0495300897049957e-05, - "loss": 0.003, - "step": 3914 - }, - { - "epoch": 3.0254875458582737, - "grad_norm": 0.009716901928186417, - "learning_rate": 4.046881843711645e-05, - "loss": 0.0033, - "step": 3915 - }, - { - "epoch": 3.026259895732767, - "grad_norm": 0.00683940015733242, - "learning_rate": 4.04423387514796e-05, - "loss": 0.003, - "step": 3916 - }, - { - "epoch": 3.02703224560726, - "grad_norm": 0.009737824089825153, - "learning_rate": 4.0415861847846994e-05, - "loss": 0.0032, - "step": 3917 - }, - { - "epoch": 3.0278045954817534, - "grad_norm": 0.008718915283679962, - "learning_rate": 4.0389387733925434e-05, - "loss": 0.003, - "step": 3918 - }, - { - "epoch": 3.0285769453562463, - "grad_norm": 0.010790186002850533, - "learning_rate": 4.036291641742087e-05, - "loss": 0.0031, - "step": 3919 - }, - { - "epoch": 3.0293492952307397, - "grad_norm": 0.02050062268972397, - "learning_rate": 4.0336447906038466e-05, - "loss": 0.0032, - "step": 3920 - }, - { - "epoch": 3.0301216451052326, - "grad_norm": 0.010391199961304665, - "learning_rate": 4.030998220748261e-05, - "loss": 0.0029, - "step": 3921 - }, - { - "epoch": 3.030893994979726, - "grad_norm": 0.009953645057976246, - "learning_rate": 4.028351932945675e-05, - "loss": 0.003, - "step": 3922 - }, - { - "epoch": 3.031666344854219, - "grad_norm": 0.017743706703186035, - "learning_rate": 4.025705927966365e-05, - "loss": 0.0034, - "step": 3923 - }, - { - "epoch": 3.0324386947287123, - "grad_norm": 0.01259995810687542, - "learning_rate": 4.0230602065805176e-05, - "loss": 0.0033, - "step": 3924 - }, - { - "epoch": 3.0332110446032052, - "grad_norm": 0.007997802458703518, - "learning_rate": 4.0204147695582364e-05, - "loss": 0.003, - "step": 3925 - }, - { - "epoch": 3.0339833944776986, - "grad_norm": 0.009885910898447037, - "learning_rate": 4.0177696176695466e-05, - "loss": 0.0032, - "step": 3926 - }, - { - "epoch": 3.0347557443521915, - "grad_norm": 0.015047162771224976, - "learning_rate": 4.015124751684386e-05, - "loss": 0.0035, - "step": 3927 - }, - { - "epoch": 3.035528094226685, - "grad_norm": 0.010488315485417843, - "learning_rate": 4.0124801723726155e-05, - "loss": 0.0029, - "step": 3928 - }, - { - "epoch": 3.036300444101178, - "grad_norm": 0.011665630154311657, - "learning_rate": 4.0098358805040036e-05, - "loss": 0.003, - "step": 3929 - }, - { - "epoch": 3.037072793975671, - "grad_norm": 0.010001543909311295, - "learning_rate": 4.0071918768482406e-05, - "loss": 0.0035, - "step": 3930 - }, - { - "epoch": 3.037845143850164, - "grad_norm": 0.0158031415194273, - "learning_rate": 4.0045481621749345e-05, - "loss": 0.0031, - "step": 3931 - }, - { - "epoch": 3.038617493724657, - "grad_norm": 0.008115153759717941, - "learning_rate": 4.001904737253604e-05, - "loss": 0.003, - "step": 3932 - }, - { - "epoch": 3.0393898435991504, - "grad_norm": 0.010430651716887951, - "learning_rate": 3.999261602853686e-05, - "loss": 0.0031, - "step": 3933 - }, - { - "epoch": 3.0401621934736434, - "grad_norm": 0.008416597731411457, - "learning_rate": 3.9966187597445373e-05, - "loss": 0.003, - "step": 3934 - }, - { - "epoch": 3.0409345433481367, - "grad_norm": 0.007000258192420006, - "learning_rate": 3.993976208695421e-05, - "loss": 0.0035, - "step": 3935 - }, - { - "epoch": 3.0417068932226297, - "grad_norm": 0.016911238431930542, - "learning_rate": 3.99133395047552e-05, - "loss": 0.0028, - "step": 3936 - }, - { - "epoch": 3.042479243097123, - "grad_norm": 0.007231170777231455, - "learning_rate": 3.988691985853936e-05, - "loss": 0.0034, - "step": 3937 - }, - { - "epoch": 3.043251592971616, - "grad_norm": 0.009564080275595188, - "learning_rate": 3.986050315599678e-05, - "loss": 0.003, - "step": 3938 - }, - { - "epoch": 3.0440239428461093, - "grad_norm": 0.010936416685581207, - "learning_rate": 3.983408940481672e-05, - "loss": 0.0033, - "step": 3939 - }, - { - "epoch": 3.0447962927206023, - "grad_norm": 0.012983507476747036, - "learning_rate": 3.9807678612687596e-05, - "loss": 0.0037, - "step": 3940 - }, - { - "epoch": 3.0455686425950956, - "grad_norm": 0.009794589132070541, - "learning_rate": 3.9781270787296954e-05, - "loss": 0.0034, - "step": 3941 - }, - { - "epoch": 3.0463409924695886, - "grad_norm": 0.007537867408245802, - "learning_rate": 3.9754865936331455e-05, - "loss": 0.0032, - "step": 3942 - }, - { - "epoch": 3.047113342344082, - "grad_norm": 0.010640479624271393, - "learning_rate": 3.972846406747694e-05, - "loss": 0.0035, - "step": 3943 - }, - { - "epoch": 3.047885692218575, - "grad_norm": 0.02730044722557068, - "learning_rate": 3.9702065188418344e-05, - "loss": 0.0032, - "step": 3944 - }, - { - "epoch": 3.0486580420930682, - "grad_norm": 0.014430705457925797, - "learning_rate": 3.9675669306839724e-05, - "loss": 0.0029, - "step": 3945 - }, - { - "epoch": 3.049430391967561, - "grad_norm": 0.0166519396007061, - "learning_rate": 3.9649276430424306e-05, - "loss": 0.0036, - "step": 3946 - }, - { - "epoch": 3.0502027418420545, - "grad_norm": 0.01753905788064003, - "learning_rate": 3.962288656685441e-05, - "loss": 0.0034, - "step": 3947 - }, - { - "epoch": 3.0509750917165475, - "grad_norm": 0.027564140036702156, - "learning_rate": 3.959649972381152e-05, - "loss": 0.0032, - "step": 3948 - }, - { - "epoch": 3.051747441591041, - "grad_norm": 0.008522110059857368, - "learning_rate": 3.957011590897614e-05, - "loss": 0.0033, - "step": 3949 - }, - { - "epoch": 3.0525197914655338, - "grad_norm": 0.013643836602568626, - "learning_rate": 3.9543735130028015e-05, - "loss": 0.0034, - "step": 3950 - }, - { - "epoch": 3.053292141340027, - "grad_norm": 0.009889103472232819, - "learning_rate": 3.951735739464594e-05, - "loss": 0.0031, - "step": 3951 - }, - { - "epoch": 3.05406449121452, - "grad_norm": 0.023804178461432457, - "learning_rate": 3.949098271050782e-05, - "loss": 0.0034, - "step": 3952 - }, - { - "epoch": 3.0548368410890134, - "grad_norm": 0.02104552462697029, - "learning_rate": 3.9464611085290714e-05, - "loss": 0.004, - "step": 3953 - }, - { - "epoch": 3.0556091909635064, - "grad_norm": 0.0075318883173167706, - "learning_rate": 3.9438242526670754e-05, - "loss": 0.0028, - "step": 3954 - }, - { - "epoch": 3.0563815408379997, - "grad_norm": 0.023547343909740448, - "learning_rate": 3.941187704232318e-05, - "loss": 0.0033, - "step": 3955 - }, - { - "epoch": 3.0571538907124927, - "grad_norm": 0.030767524614930153, - "learning_rate": 3.938551463992235e-05, - "loss": 0.0035, - "step": 3956 - }, - { - "epoch": 3.057926240586986, - "grad_norm": 0.01460769958794117, - "learning_rate": 3.935915532714173e-05, - "loss": 0.0037, - "step": 3957 - }, - { - "epoch": 3.058698590461479, - "grad_norm": 0.008200748823583126, - "learning_rate": 3.933279911165389e-05, - "loss": 0.003, - "step": 3958 - }, - { - "epoch": 3.0594709403359723, - "grad_norm": 0.011066111735999584, - "learning_rate": 3.930644600113047e-05, - "loss": 0.0033, - "step": 3959 - }, - { - "epoch": 3.0602432902104653, - "grad_norm": 0.015494465827941895, - "learning_rate": 3.928009600324222e-05, - "loss": 0.0033, - "step": 3960 - }, - { - "epoch": 3.0610156400849586, - "grad_norm": 0.015704691410064697, - "learning_rate": 3.9253749125659005e-05, - "loss": 0.0034, - "step": 3961 - }, - { - "epoch": 3.0617879899594516, - "grad_norm": 0.011681852862238884, - "learning_rate": 3.9227405376049754e-05, - "loss": 0.003, - "step": 3962 - }, - { - "epoch": 3.062560339833945, - "grad_norm": 0.011715327389538288, - "learning_rate": 3.920106476208248e-05, - "loss": 0.0032, - "step": 3963 - }, - { - "epoch": 3.063332689708438, - "grad_norm": 0.008908935822546482, - "learning_rate": 3.917472729142435e-05, - "loss": 0.0027, - "step": 3964 - }, - { - "epoch": 3.0641050395829312, - "grad_norm": 0.0069875093176960945, - "learning_rate": 3.914839297174152e-05, - "loss": 0.003, - "step": 3965 - }, - { - "epoch": 3.064877389457424, - "grad_norm": 0.017726223915815353, - "learning_rate": 3.912206181069927e-05, - "loss": 0.0029, - "step": 3966 - }, - { - "epoch": 3.0656497393319175, - "grad_norm": 0.01484143827110529, - "learning_rate": 3.9095733815961986e-05, - "loss": 0.0032, - "step": 3967 - }, - { - "epoch": 3.0664220892064105, - "grad_norm": 0.013785186223685741, - "learning_rate": 3.906940899519312e-05, - "loss": 0.0031, - "step": 3968 - }, - { - "epoch": 3.067194439080904, - "grad_norm": 0.010157103650271893, - "learning_rate": 3.904308735605516e-05, - "loss": 0.0035, - "step": 3969 - }, - { - "epoch": 3.0679667889553968, - "grad_norm": 0.011927854269742966, - "learning_rate": 3.901676890620973e-05, - "loss": 0.0033, - "step": 3970 - }, - { - "epoch": 3.06873913882989, - "grad_norm": 0.02505023591220379, - "learning_rate": 3.8990453653317485e-05, - "loss": 0.0033, - "step": 3971 - }, - { - "epoch": 3.069511488704383, - "grad_norm": 0.0361270047724247, - "learning_rate": 3.896414160503814e-05, - "loss": 0.0034, - "step": 3972 - }, - { - "epoch": 3.0702838385788764, - "grad_norm": 0.010872379876673222, - "learning_rate": 3.8937832769030516e-05, - "loss": 0.0032, - "step": 3973 - }, - { - "epoch": 3.0710561884533694, - "grad_norm": 0.015281944535672665, - "learning_rate": 3.8911527152952496e-05, - "loss": 0.0036, - "step": 3974 - }, - { - "epoch": 3.0718285383278627, - "grad_norm": 0.007319580763578415, - "learning_rate": 3.888522476446097e-05, - "loss": 0.0033, - "step": 3975 - }, - { - "epoch": 3.0726008882023557, - "grad_norm": 0.022427208721637726, - "learning_rate": 3.885892561121194e-05, - "loss": 0.0035, - "step": 3976 - }, - { - "epoch": 3.073373238076849, - "grad_norm": 0.019485650584101677, - "learning_rate": 3.883262970086045e-05, - "loss": 0.0035, - "step": 3977 - }, - { - "epoch": 3.074145587951342, - "grad_norm": 0.009180407039821148, - "learning_rate": 3.880633704106066e-05, - "loss": 0.0031, - "step": 3978 - }, - { - "epoch": 3.074917937825835, - "grad_norm": 0.010681082494556904, - "learning_rate": 3.878004763946564e-05, - "loss": 0.0033, - "step": 3979 - }, - { - "epoch": 3.0756902877003283, - "grad_norm": 0.02105492167174816, - "learning_rate": 3.8753761503727656e-05, - "loss": 0.0036, - "step": 3980 - }, - { - "epoch": 3.076462637574821, - "grad_norm": 0.025038886815309525, - "learning_rate": 3.872747864149797e-05, - "loss": 0.0032, - "step": 3981 - }, - { - "epoch": 3.0772349874493146, - "grad_norm": 0.009126921184360981, - "learning_rate": 3.870119906042684e-05, - "loss": 0.0032, - "step": 3982 - }, - { - "epoch": 3.0780073373238075, - "grad_norm": 0.015841223299503326, - "learning_rate": 3.867492276816366e-05, - "loss": 0.0035, - "step": 3983 - }, - { - "epoch": 3.078779687198301, - "grad_norm": 0.02008405700325966, - "learning_rate": 3.8648649772356824e-05, - "loss": 0.0033, - "step": 3984 - }, - { - "epoch": 3.079552037072794, - "grad_norm": 0.013557328842580318, - "learning_rate": 3.862238008065374e-05, - "loss": 0.0034, - "step": 3985 - }, - { - "epoch": 3.080324386947287, - "grad_norm": 0.012391114607453346, - "learning_rate": 3.85961137007009e-05, - "loss": 0.0032, - "step": 3986 - }, - { - "epoch": 3.08109673682178, - "grad_norm": 0.011869030073285103, - "learning_rate": 3.8569850640143796e-05, - "loss": 0.004, - "step": 3987 - }, - { - "epoch": 3.0818690866962735, - "grad_norm": 0.01839050091803074, - "learning_rate": 3.8543590906627e-05, - "loss": 0.0033, - "step": 3988 - }, - { - "epoch": 3.0826414365707664, - "grad_norm": 0.022180140018463135, - "learning_rate": 3.851733450779406e-05, - "loss": 0.0033, - "step": 3989 - }, - { - "epoch": 3.0834137864452598, - "grad_norm": 0.010339627042412758, - "learning_rate": 3.8491081451287577e-05, - "loss": 0.0033, - "step": 3990 - }, - { - "epoch": 3.0841861363197527, - "grad_norm": 0.015834074467420578, - "learning_rate": 3.846483174474921e-05, - "loss": 0.0032, - "step": 3991 - }, - { - "epoch": 3.084958486194246, - "grad_norm": 0.01692846417427063, - "learning_rate": 3.843858539581959e-05, - "loss": 0.0035, - "step": 3992 - }, - { - "epoch": 3.085730836068739, - "grad_norm": 0.011141604743897915, - "learning_rate": 3.841234241213838e-05, - "loss": 0.0034, - "step": 3993 - }, - { - "epoch": 3.0865031859432324, - "grad_norm": 0.010867138393223286, - "learning_rate": 3.838610280134432e-05, - "loss": 0.0032, - "step": 3994 - }, - { - "epoch": 3.0872755358177253, - "grad_norm": 0.010458271950483322, - "learning_rate": 3.835986657107511e-05, - "loss": 0.003, - "step": 3995 - }, - { - "epoch": 3.0880478856922187, - "grad_norm": 0.010842096991837025, - "learning_rate": 3.8333633728967456e-05, - "loss": 0.0033, - "step": 3996 - }, - { - "epoch": 3.0888202355667116, - "grad_norm": 0.01667606830596924, - "learning_rate": 3.8307404282657134e-05, - "loss": 0.0037, - "step": 3997 - }, - { - "epoch": 3.089592585441205, - "grad_norm": 0.011396470479667187, - "learning_rate": 3.8281178239778915e-05, - "loss": 0.0033, - "step": 3998 - }, - { - "epoch": 3.090364935315698, - "grad_norm": 0.0088053522631526, - "learning_rate": 3.825495560796651e-05, - "loss": 0.0032, - "step": 3999 - }, - { - "epoch": 3.0911372851901913, - "grad_norm": 0.012660624459385872, - "learning_rate": 3.822873639485276e-05, - "loss": 0.0029, - "step": 4000 - }, - { - "epoch": 3.091909635064684, - "grad_norm": 0.010186905972659588, - "learning_rate": 3.820252060806941e-05, - "loss": 0.0032, - "step": 4001 - }, - { - "epoch": 3.0926819849391776, - "grad_norm": 0.01091783307492733, - "learning_rate": 3.817630825524723e-05, - "loss": 0.0034, - "step": 4002 - }, - { - "epoch": 3.0934543348136705, - "grad_norm": 0.008735897950828075, - "learning_rate": 3.8150099344016024e-05, - "loss": 0.003, - "step": 4003 - }, - { - "epoch": 3.094226684688164, - "grad_norm": 0.011613654904067516, - "learning_rate": 3.812389388200458e-05, - "loss": 0.0036, - "step": 4004 - }, - { - "epoch": 3.094999034562657, - "grad_norm": 0.013497716747224331, - "learning_rate": 3.8097691876840655e-05, - "loss": 0.0034, - "step": 4005 - }, - { - "epoch": 3.09577138443715, - "grad_norm": 0.00860733911395073, - "learning_rate": 3.807149333615101e-05, - "loss": 0.0029, - "step": 4006 - }, - { - "epoch": 3.096543734311643, - "grad_norm": 0.010404443368315697, - "learning_rate": 3.804529826756144e-05, - "loss": 0.003, - "step": 4007 - }, - { - "epoch": 3.0973160841861365, - "grad_norm": 0.010187176056206226, - "learning_rate": 3.8019106678696695e-05, - "loss": 0.0033, - "step": 4008 - }, - { - "epoch": 3.0980884340606294, - "grad_norm": 0.015230610966682434, - "learning_rate": 3.799291857718047e-05, - "loss": 0.0033, - "step": 4009 - }, - { - "epoch": 3.0988607839351228, - "grad_norm": 0.007961034774780273, - "learning_rate": 3.7966733970635526e-05, - "loss": 0.0032, - "step": 4010 - }, - { - "epoch": 3.0996331338096157, - "grad_norm": 0.014632215723395348, - "learning_rate": 3.794055286668358e-05, - "loss": 0.0031, - "step": 4011 - }, - { - "epoch": 3.100405483684109, - "grad_norm": 0.01650458574295044, - "learning_rate": 3.791437527294527e-05, - "loss": 0.0037, - "step": 4012 - }, - { - "epoch": 3.101177833558602, - "grad_norm": 0.01053455751389265, - "learning_rate": 3.7888201197040304e-05, - "loss": 0.0028, - "step": 4013 - }, - { - "epoch": 3.1019501834330954, - "grad_norm": 0.009459201246500015, - "learning_rate": 3.786203064658732e-05, - "loss": 0.0035, - "step": 4014 - }, - { - "epoch": 3.1027225333075883, - "grad_norm": 0.012510925531387329, - "learning_rate": 3.7835863629203904e-05, - "loss": 0.0031, - "step": 4015 - }, - { - "epoch": 3.1034948831820817, - "grad_norm": 0.013898443430662155, - "learning_rate": 3.780970015250667e-05, - "loss": 0.0038, - "step": 4016 - }, - { - "epoch": 3.1042672330565746, - "grad_norm": 0.017648473381996155, - "learning_rate": 3.778354022411115e-05, - "loss": 0.0029, - "step": 4017 - }, - { - "epoch": 3.105039582931068, - "grad_norm": 0.013399518094956875, - "learning_rate": 3.775738385163191e-05, - "loss": 0.0035, - "step": 4018 - }, - { - "epoch": 3.105811932805561, - "grad_norm": 0.010458532720804214, - "learning_rate": 3.773123104268239e-05, - "loss": 0.0031, - "step": 4019 - }, - { - "epoch": 3.1065842826800543, - "grad_norm": 0.01588922180235386, - "learning_rate": 3.770508180487506e-05, - "loss": 0.0032, - "step": 4020 - }, - { - "epoch": 3.107356632554547, - "grad_norm": 0.008511990308761597, - "learning_rate": 3.7678936145821344e-05, - "loss": 0.0035, - "step": 4021 - }, - { - "epoch": 3.1081289824290406, - "grad_norm": 0.01115653570741415, - "learning_rate": 3.7652794073131595e-05, - "loss": 0.0031, - "step": 4022 - }, - { - "epoch": 3.1089013323035335, - "grad_norm": 0.015124349854886532, - "learning_rate": 3.762665559441513e-05, - "loss": 0.0031, - "step": 4023 - }, - { - "epoch": 3.109673682178027, - "grad_norm": 0.012387467548251152, - "learning_rate": 3.760052071728026e-05, - "loss": 0.0037, - "step": 4024 - }, - { - "epoch": 3.11044603205252, - "grad_norm": 0.010778908617794514, - "learning_rate": 3.757438944933419e-05, - "loss": 0.0031, - "step": 4025 - }, - { - "epoch": 3.1112183819270127, - "grad_norm": 0.022256221622228622, - "learning_rate": 3.75482617981831e-05, - "loss": 0.0032, - "step": 4026 - }, - { - "epoch": 3.111990731801506, - "grad_norm": 0.0103048300370574, - "learning_rate": 3.752213777143214e-05, - "loss": 0.0034, - "step": 4027 - }, - { - "epoch": 3.112763081675999, - "grad_norm": 0.009700235910713673, - "learning_rate": 3.749601737668538e-05, - "loss": 0.0029, - "step": 4028 - }, - { - "epoch": 3.1135354315504924, - "grad_norm": 0.015991652384400368, - "learning_rate": 3.7469900621545796e-05, - "loss": 0.0036, - "step": 4029 - }, - { - "epoch": 3.1143077814249853, - "grad_norm": 0.013409771025180817, - "learning_rate": 3.744378751361539e-05, - "loss": 0.0035, - "step": 4030 - }, - { - "epoch": 3.1150801312994787, - "grad_norm": 0.009320034645497799, - "learning_rate": 3.7417678060495045e-05, - "loss": 0.0031, - "step": 4031 - }, - { - "epoch": 3.1158524811739716, - "grad_norm": 0.009379249997437, - "learning_rate": 3.739157226978458e-05, - "loss": 0.0029, - "step": 4032 - }, - { - "epoch": 3.116624831048465, - "grad_norm": 0.00768303731456399, - "learning_rate": 3.736547014908276e-05, - "loss": 0.0032, - "step": 4033 - }, - { - "epoch": 3.117397180922958, - "grad_norm": 0.007516008801758289, - "learning_rate": 3.73393717059873e-05, - "loss": 0.0032, - "step": 4034 - }, - { - "epoch": 3.1181695307974513, - "grad_norm": 0.008907015435397625, - "learning_rate": 3.731327694809481e-05, - "loss": 0.0032, - "step": 4035 - }, - { - "epoch": 3.1189418806719442, - "grad_norm": 0.011298132129013538, - "learning_rate": 3.728718588300084e-05, - "loss": 0.003, - "step": 4036 - }, - { - "epoch": 3.1197142305464376, - "grad_norm": 0.008996719494462013, - "learning_rate": 3.72610985182999e-05, - "loss": 0.0032, - "step": 4037 - }, - { - "epoch": 3.1204865804209305, - "grad_norm": 0.009170308709144592, - "learning_rate": 3.7235014861585356e-05, - "loss": 0.0031, - "step": 4038 - }, - { - "epoch": 3.121258930295424, - "grad_norm": 0.012968270108103752, - "learning_rate": 3.720893492044953e-05, - "loss": 0.0031, - "step": 4039 - }, - { - "epoch": 3.122031280169917, - "grad_norm": 0.008478841744363308, - "learning_rate": 3.718285870248368e-05, - "loss": 0.0034, - "step": 4040 - }, - { - "epoch": 3.12280363004441, - "grad_norm": 0.00782487541437149, - "learning_rate": 3.715678621527798e-05, - "loss": 0.0027, - "step": 4041 - }, - { - "epoch": 3.123575979918903, - "grad_norm": 0.010733025148510933, - "learning_rate": 3.713071746642145e-05, - "loss": 0.0033, - "step": 4042 - }, - { - "epoch": 3.1243483297933965, - "grad_norm": 0.010959485545754433, - "learning_rate": 3.710465246350212e-05, - "loss": 0.0029, - "step": 4043 - }, - { - "epoch": 3.1251206796678894, - "grad_norm": 0.009859909303486347, - "learning_rate": 3.707859121410687e-05, - "loss": 0.0032, - "step": 4044 - }, - { - "epoch": 3.125893029542383, - "grad_norm": 0.01753888465464115, - "learning_rate": 3.7052533725821484e-05, - "loss": 0.0029, - "step": 4045 - }, - { - "epoch": 3.1266653794168757, - "grad_norm": 0.010642552748322487, - "learning_rate": 3.702648000623067e-05, - "loss": 0.0033, - "step": 4046 - }, - { - "epoch": 3.127437729291369, - "grad_norm": 0.012091566808521748, - "learning_rate": 3.700043006291807e-05, - "loss": 0.0029, - "step": 4047 - }, - { - "epoch": 3.128210079165862, - "grad_norm": 0.008897331543266773, - "learning_rate": 3.6974383903466134e-05, - "loss": 0.0034, - "step": 4048 - }, - { - "epoch": 3.1289824290403554, - "grad_norm": 0.009017202071845531, - "learning_rate": 3.694834153545632e-05, - "loss": 0.0034, - "step": 4049 - }, - { - "epoch": 3.1297547789148483, - "grad_norm": 0.011252693831920624, - "learning_rate": 3.692230296646891e-05, - "loss": 0.0037, - "step": 4050 - }, - { - "epoch": 3.1305271287893417, - "grad_norm": 0.010351852513849735, - "learning_rate": 3.689626820408312e-05, - "loss": 0.0033, - "step": 4051 - }, - { - "epoch": 3.1312994786638346, - "grad_norm": 0.010088905692100525, - "learning_rate": 3.6870237255877025e-05, - "loss": 0.0028, - "step": 4052 - }, - { - "epoch": 3.132071828538328, - "grad_norm": 0.008366243913769722, - "learning_rate": 3.684421012942759e-05, - "loss": 0.003, - "step": 4053 - }, - { - "epoch": 3.132844178412821, - "grad_norm": 0.01793886534869671, - "learning_rate": 3.6818186832310716e-05, - "loss": 0.0033, - "step": 4054 - }, - { - "epoch": 3.1336165282873143, - "grad_norm": 0.011342871934175491, - "learning_rate": 3.6792167372101146e-05, - "loss": 0.0031, - "step": 4055 - }, - { - "epoch": 3.1343888781618072, - "grad_norm": 0.008785545825958252, - "learning_rate": 3.676615175637249e-05, - "loss": 0.0029, - "step": 4056 - }, - { - "epoch": 3.1351612280363006, - "grad_norm": 0.010612391866743565, - "learning_rate": 3.6740139992697316e-05, - "loss": 0.003, - "step": 4057 - }, - { - "epoch": 3.1359335779107935, - "grad_norm": 0.016039269044995308, - "learning_rate": 3.671413208864696e-05, - "loss": 0.0035, - "step": 4058 - }, - { - "epoch": 3.136705927785287, - "grad_norm": 0.01777689903974533, - "learning_rate": 3.668812805179173e-05, - "loss": 0.003, - "step": 4059 - }, - { - "epoch": 3.13747827765978, - "grad_norm": 0.010719449259340763, - "learning_rate": 3.666212788970076e-05, - "loss": 0.0031, - "step": 4060 - }, - { - "epoch": 3.138250627534273, - "grad_norm": 0.015978340059518814, - "learning_rate": 3.6636131609942094e-05, - "loss": 0.0035, - "step": 4061 - }, - { - "epoch": 3.139022977408766, - "grad_norm": 0.010096929967403412, - "learning_rate": 3.6610139220082596e-05, - "loss": 0.0036, - "step": 4062 - }, - { - "epoch": 3.1397953272832595, - "grad_norm": 0.02071520686149597, - "learning_rate": 3.6584150727688015e-05, - "loss": 0.0037, - "step": 4063 - }, - { - "epoch": 3.1405676771577524, - "grad_norm": 0.009962460026144981, - "learning_rate": 3.655816614032301e-05, - "loss": 0.0031, - "step": 4064 - }, - { - "epoch": 3.141340027032246, - "grad_norm": 0.009225048124790192, - "learning_rate": 3.653218546555103e-05, - "loss": 0.0036, - "step": 4065 - }, - { - "epoch": 3.1421123769067387, - "grad_norm": 0.009466026909649372, - "learning_rate": 3.6506208710934426e-05, - "loss": 0.0032, - "step": 4066 - }, - { - "epoch": 3.142884726781232, - "grad_norm": 0.012480610981583595, - "learning_rate": 3.648023588403443e-05, - "loss": 0.0036, - "step": 4067 - }, - { - "epoch": 3.143657076655725, - "grad_norm": 0.018842605873942375, - "learning_rate": 3.6454266992411084e-05, - "loss": 0.0032, - "step": 4068 - }, - { - "epoch": 3.1444294265302184, - "grad_norm": 0.013484551571309566, - "learning_rate": 3.64283020436233e-05, - "loss": 0.003, - "step": 4069 - }, - { - "epoch": 3.1452017764047113, - "grad_norm": 0.008349167183041573, - "learning_rate": 3.6402341045228857e-05, - "loss": 0.0031, - "step": 4070 - }, - { - "epoch": 3.1459741262792047, - "grad_norm": 0.007275492884218693, - "learning_rate": 3.6376384004784383e-05, - "loss": 0.0031, - "step": 4071 - }, - { - "epoch": 3.1467464761536976, - "grad_norm": 0.012611397542059422, - "learning_rate": 3.635043092984531e-05, - "loss": 0.0031, - "step": 4072 - }, - { - "epoch": 3.1475188260281906, - "grad_norm": 0.011832364834845066, - "learning_rate": 3.6324481827966e-05, - "loss": 0.003, - "step": 4073 - }, - { - "epoch": 3.148291175902684, - "grad_norm": 0.017449690029025078, - "learning_rate": 3.6298536706699595e-05, - "loss": 0.003, - "step": 4074 - }, - { - "epoch": 3.1490635257771773, - "grad_norm": 0.014712398871779442, - "learning_rate": 3.627259557359805e-05, - "loss": 0.0029, - "step": 4075 - }, - { - "epoch": 3.1498358756516702, - "grad_norm": 0.018742509186267853, - "learning_rate": 3.6246658436212256e-05, - "loss": 0.003, - "step": 4076 - }, - { - "epoch": 3.150608225526163, - "grad_norm": 0.012672246433794498, - "learning_rate": 3.622072530209187e-05, - "loss": 0.0033, - "step": 4077 - }, - { - "epoch": 3.1513805754006565, - "grad_norm": 0.01303433533757925, - "learning_rate": 3.6194796178785376e-05, - "loss": 0.0035, - "step": 4078 - }, - { - "epoch": 3.1521529252751495, - "grad_norm": 0.014445452019572258, - "learning_rate": 3.616887107384015e-05, - "loss": 0.0029, - "step": 4079 - }, - { - "epoch": 3.152925275149643, - "grad_norm": 0.01622670516371727, - "learning_rate": 3.614294999480234e-05, - "loss": 0.0035, - "step": 4080 - }, - { - "epoch": 3.1536976250241358, - "grad_norm": 0.01022647600620985, - "learning_rate": 3.611703294921698e-05, - "loss": 0.0033, - "step": 4081 - }, - { - "epoch": 3.154469974898629, - "grad_norm": 0.008269752375781536, - "learning_rate": 3.609111994462785e-05, - "loss": 0.0034, - "step": 4082 - }, - { - "epoch": 3.155242324773122, - "grad_norm": 0.014312672428786755, - "learning_rate": 3.606521098857762e-05, - "loss": 0.0032, - "step": 4083 - }, - { - "epoch": 3.1560146746476154, - "grad_norm": 0.01480252668261528, - "learning_rate": 3.603930608860778e-05, - "loss": 0.0032, - "step": 4084 - }, - { - "epoch": 3.1567870245221084, - "grad_norm": 0.014581169001758099, - "learning_rate": 3.6013405252258585e-05, - "loss": 0.0035, - "step": 4085 - }, - { - "epoch": 3.1575593743966017, - "grad_norm": 0.008861055597662926, - "learning_rate": 3.598750848706917e-05, - "loss": 0.0029, - "step": 4086 - }, - { - "epoch": 3.1583317242710947, - "grad_norm": 0.009202736429870129, - "learning_rate": 3.596161580057745e-05, - "loss": 0.0032, - "step": 4087 - }, - { - "epoch": 3.159104074145588, - "grad_norm": 0.013064440339803696, - "learning_rate": 3.5935727200320144e-05, - "loss": 0.0039, - "step": 4088 - }, - { - "epoch": 3.159876424020081, - "grad_norm": 0.011419693939387798, - "learning_rate": 3.5909842693832817e-05, - "loss": 0.0039, - "step": 4089 - }, - { - "epoch": 3.1606487738945743, - "grad_norm": 0.015743782743811607, - "learning_rate": 3.588396228864981e-05, - "loss": 0.0035, - "step": 4090 - }, - { - "epoch": 3.1614211237690673, - "grad_norm": 0.009340745396912098, - "learning_rate": 3.5858085992304316e-05, - "loss": 0.0034, - "step": 4091 - }, - { - "epoch": 3.1621934736435606, - "grad_norm": 0.012455140240490437, - "learning_rate": 3.583221381232826e-05, - "loss": 0.0031, - "step": 4092 - }, - { - "epoch": 3.1629658235180536, - "grad_norm": 0.008808081969618797, - "learning_rate": 3.5806345756252414e-05, - "loss": 0.0032, - "step": 4093 - }, - { - "epoch": 3.163738173392547, - "grad_norm": 0.016623718664050102, - "learning_rate": 3.578048183160638e-05, - "loss": 0.0036, - "step": 4094 - }, - { - "epoch": 3.16451052326704, - "grad_norm": 0.009980283677577972, - "learning_rate": 3.575462204591848e-05, - "loss": 0.0031, - "step": 4095 - }, - { - "epoch": 3.1652828731415332, - "grad_norm": 0.00870469119399786, - "learning_rate": 3.5728766406715876e-05, - "loss": 0.0033, - "step": 4096 - }, - { - "epoch": 3.166055223016026, - "grad_norm": 0.009748588316142559, - "learning_rate": 3.570291492152455e-05, - "loss": 0.0037, - "step": 4097 - }, - { - "epoch": 3.1668275728905195, - "grad_norm": 0.009119248017668724, - "learning_rate": 3.567706759786923e-05, - "loss": 0.0033, - "step": 4098 - }, - { - "epoch": 3.1675999227650125, - "grad_norm": 0.009126069024205208, - "learning_rate": 3.565122444327342e-05, - "loss": 0.0036, - "step": 4099 - }, - { - "epoch": 3.168372272639506, - "grad_norm": 0.01185399480164051, - "learning_rate": 3.562538546525949e-05, - "loss": 0.0033, - "step": 4100 - }, - { - "epoch": 3.1691446225139988, - "grad_norm": 0.007178458850830793, - "learning_rate": 3.55995506713485e-05, - "loss": 0.0035, - "step": 4101 - }, - { - "epoch": 3.169916972388492, - "grad_norm": 0.009440583176910877, - "learning_rate": 3.557372006906035e-05, - "loss": 0.0033, - "step": 4102 - }, - { - "epoch": 3.170689322262985, - "grad_norm": 0.010119931772351265, - "learning_rate": 3.55478936659137e-05, - "loss": 0.0034, - "step": 4103 - }, - { - "epoch": 3.1714616721374784, - "grad_norm": 0.00834848452359438, - "learning_rate": 3.552207146942601e-05, - "loss": 0.0029, - "step": 4104 - }, - { - "epoch": 3.1722340220119714, - "grad_norm": 0.012394800782203674, - "learning_rate": 3.5496253487113455e-05, - "loss": 0.0034, - "step": 4105 - }, - { - "epoch": 3.1730063718864647, - "grad_norm": 0.011674412526190281, - "learning_rate": 3.547043972649107e-05, - "loss": 0.0037, - "step": 4106 - }, - { - "epoch": 3.1737787217609577, - "grad_norm": 0.011983788572251797, - "learning_rate": 3.544463019507261e-05, - "loss": 0.0036, - "step": 4107 - }, - { - "epoch": 3.174551071635451, - "grad_norm": 0.017204612493515015, - "learning_rate": 3.541882490037057e-05, - "loss": 0.0033, - "step": 4108 - }, - { - "epoch": 3.175323421509944, - "grad_norm": 0.009163384325802326, - "learning_rate": 3.539302384989629e-05, - "loss": 0.0033, - "step": 4109 - }, - { - "epoch": 3.1760957713844373, - "grad_norm": 0.007902873679995537, - "learning_rate": 3.53672270511598e-05, - "loss": 0.0031, - "step": 4110 - }, - { - "epoch": 3.1768681212589303, - "grad_norm": 0.008102684281766415, - "learning_rate": 3.534143451166997e-05, - "loss": 0.003, - "step": 4111 - }, - { - "epoch": 3.1776404711334236, - "grad_norm": 0.010337292216718197, - "learning_rate": 3.531564623893433e-05, - "loss": 0.0037, - "step": 4112 - }, - { - "epoch": 3.1784128210079166, - "grad_norm": 0.008929918520152569, - "learning_rate": 3.5289862240459255e-05, - "loss": 0.0034, - "step": 4113 - }, - { - "epoch": 3.17918517088241, - "grad_norm": 0.009258911944925785, - "learning_rate": 3.526408252374985e-05, - "loss": 0.0033, - "step": 4114 - }, - { - "epoch": 3.179957520756903, - "grad_norm": 0.01704230345785618, - "learning_rate": 3.523830709630993e-05, - "loss": 0.003, - "step": 4115 - }, - { - "epoch": 3.1807298706313962, - "grad_norm": 0.00898750964552164, - "learning_rate": 3.521253596564214e-05, - "loss": 0.0033, - "step": 4116 - }, - { - "epoch": 3.181502220505889, - "grad_norm": 0.007785670459270477, - "learning_rate": 3.518676913924783e-05, - "loss": 0.0034, - "step": 4117 - }, - { - "epoch": 3.1822745703803825, - "grad_norm": 0.013011530973017216, - "learning_rate": 3.516100662462706e-05, - "loss": 0.003, - "step": 4118 - }, - { - "epoch": 3.1830469202548755, - "grad_norm": 0.009192707017064095, - "learning_rate": 3.5135248429278724e-05, - "loss": 0.0033, - "step": 4119 - }, - { - "epoch": 3.1838192701293684, - "grad_norm": 0.012670222669839859, - "learning_rate": 3.510949456070037e-05, - "loss": 0.0031, - "step": 4120 - }, - { - "epoch": 3.1845916200038618, - "grad_norm": 0.009181042201817036, - "learning_rate": 3.508374502638837e-05, - "loss": 0.0032, - "step": 4121 - }, - { - "epoch": 3.185363969878355, - "grad_norm": 0.01356884092092514, - "learning_rate": 3.505799983383776e-05, - "loss": 0.003, - "step": 4122 - }, - { - "epoch": 3.186136319752848, - "grad_norm": 0.01130085252225399, - "learning_rate": 3.5032258990542336e-05, - "loss": 0.0034, - "step": 4123 - }, - { - "epoch": 3.186908669627341, - "grad_norm": 0.010613438673317432, - "learning_rate": 3.500652250399468e-05, - "loss": 0.0028, - "step": 4124 - }, - { - "epoch": 3.1876810195018344, - "grad_norm": 0.01142602413892746, - "learning_rate": 3.498079038168601e-05, - "loss": 0.0029, - "step": 4125 - }, - { - "epoch": 3.1884533693763273, - "grad_norm": 0.008843295276165009, - "learning_rate": 3.495506263110635e-05, - "loss": 0.003, - "step": 4126 - }, - { - "epoch": 3.1892257192508207, - "grad_norm": 0.01249662134796381, - "learning_rate": 3.492933925974444e-05, - "loss": 0.0029, - "step": 4127 - }, - { - "epoch": 3.1899980691253136, - "grad_norm": 0.007697202730923891, - "learning_rate": 3.490362027508771e-05, - "loss": 0.0028, - "step": 4128 - }, - { - "epoch": 3.190770418999807, - "grad_norm": 0.01017635315656662, - "learning_rate": 3.487790568462232e-05, - "loss": 0.0028, - "step": 4129 - }, - { - "epoch": 3.1915427688743, - "grad_norm": 0.011190581135451794, - "learning_rate": 3.4852195495833204e-05, - "loss": 0.0036, - "step": 4130 - }, - { - "epoch": 3.1923151187487933, - "grad_norm": 0.012526176869869232, - "learning_rate": 3.482648971620397e-05, - "loss": 0.0031, - "step": 4131 - }, - { - "epoch": 3.193087468623286, - "grad_norm": 0.008728506974875927, - "learning_rate": 3.4800788353216914e-05, - "loss": 0.0033, - "step": 4132 - }, - { - "epoch": 3.1938598184977796, - "grad_norm": 0.01112066674977541, - "learning_rate": 3.477509141435312e-05, - "loss": 0.0032, - "step": 4133 - }, - { - "epoch": 3.1946321683722725, - "grad_norm": 0.010328928008675575, - "learning_rate": 3.4749398907092344e-05, - "loss": 0.0033, - "step": 4134 - }, - { - "epoch": 3.195404518246766, - "grad_norm": 0.009153681807219982, - "learning_rate": 3.472371083891301e-05, - "loss": 0.0032, - "step": 4135 - }, - { - "epoch": 3.196176868121259, - "grad_norm": 0.009757447056472301, - "learning_rate": 3.4698027217292356e-05, - "loss": 0.0031, - "step": 4136 - }, - { - "epoch": 3.196949217995752, - "grad_norm": 0.011577093042433262, - "learning_rate": 3.467234804970624e-05, - "loss": 0.004, - "step": 4137 - }, - { - "epoch": 3.197721567870245, - "grad_norm": 0.011520899832248688, - "learning_rate": 3.464667334362922e-05, - "loss": 0.0036, - "step": 4138 - }, - { - "epoch": 3.1984939177447385, - "grad_norm": 0.007746752351522446, - "learning_rate": 3.46210031065346e-05, - "loss": 0.0034, - "step": 4139 - }, - { - "epoch": 3.1992662676192314, - "grad_norm": 0.01598169095814228, - "learning_rate": 3.459533734589438e-05, - "loss": 0.0038, - "step": 4140 - }, - { - "epoch": 3.2000386174937248, - "grad_norm": 0.010562562383711338, - "learning_rate": 3.4569676069179244e-05, - "loss": 0.0035, - "step": 4141 - }, - { - "epoch": 3.2008109673682177, - "grad_norm": 0.007787355221807957, - "learning_rate": 3.4544019283858534e-05, - "loss": 0.0029, - "step": 4142 - }, - { - "epoch": 3.201583317242711, - "grad_norm": 0.01091032288968563, - "learning_rate": 3.4518366997400355e-05, - "loss": 0.0037, - "step": 4143 - }, - { - "epoch": 3.202355667117204, - "grad_norm": 0.006965094245970249, - "learning_rate": 3.4492719217271474e-05, - "loss": 0.0032, - "step": 4144 - }, - { - "epoch": 3.2031280169916974, - "grad_norm": 0.015202803537249565, - "learning_rate": 3.446707595093729e-05, - "loss": 0.0029, - "step": 4145 - }, - { - "epoch": 3.2039003668661903, - "grad_norm": 0.012899445369839668, - "learning_rate": 3.444143720586199e-05, - "loss": 0.0033, - "step": 4146 - }, - { - "epoch": 3.2046727167406837, - "grad_norm": 0.008736785501241684, - "learning_rate": 3.4415802989508384e-05, - "loss": 0.0029, - "step": 4147 - }, - { - "epoch": 3.2054450666151766, - "grad_norm": 0.008177345618605614, - "learning_rate": 3.4390173309337944e-05, - "loss": 0.0028, - "step": 4148 - }, - { - "epoch": 3.20621741648967, - "grad_norm": 0.015837090089917183, - "learning_rate": 3.436454817281088e-05, - "loss": 0.0032, - "step": 4149 - }, - { - "epoch": 3.206989766364163, - "grad_norm": 0.009351897053420544, - "learning_rate": 3.433892758738603e-05, - "loss": 0.0033, - "step": 4150 - }, - { - "epoch": 3.2077621162386563, - "grad_norm": 0.010843559168279171, - "learning_rate": 3.4313311560520955e-05, - "loss": 0.0027, - "step": 4151 - }, - { - "epoch": 3.208534466113149, - "grad_norm": 0.017377877607941628, - "learning_rate": 3.428770009967183e-05, - "loss": 0.0031, - "step": 4152 - }, - { - "epoch": 3.2093068159876426, - "grad_norm": 0.008192894980311394, - "learning_rate": 3.426209321229355e-05, - "loss": 0.0033, - "step": 4153 - }, - { - "epoch": 3.2100791658621355, - "grad_norm": 0.014300121925771236, - "learning_rate": 3.4236490905839656e-05, - "loss": 0.0029, - "step": 4154 - }, - { - "epoch": 3.210851515736629, - "grad_norm": 0.008843852207064629, - "learning_rate": 3.421089318776237e-05, - "loss": 0.0035, - "step": 4155 - }, - { - "epoch": 3.211623865611122, - "grad_norm": 0.010597603395581245, - "learning_rate": 3.418530006551255e-05, - "loss": 0.003, - "step": 4156 - }, - { - "epoch": 3.212396215485615, - "grad_norm": 0.016319219022989273, - "learning_rate": 3.415971154653976e-05, - "loss": 0.0034, - "step": 4157 - }, - { - "epoch": 3.213168565360108, - "grad_norm": 0.010809916071593761, - "learning_rate": 3.413412763829218e-05, - "loss": 0.0031, - "step": 4158 - }, - { - "epoch": 3.2139409152346015, - "grad_norm": 0.010344364680349827, - "learning_rate": 3.410854834821666e-05, - "loss": 0.003, - "step": 4159 - }, - { - "epoch": 3.2147132651090944, - "grad_norm": 0.013101106509566307, - "learning_rate": 3.408297368375874e-05, - "loss": 0.0035, - "step": 4160 - }, - { - "epoch": 3.2154856149835878, - "grad_norm": 0.015558012761175632, - "learning_rate": 3.405740365236258e-05, - "loss": 0.0032, - "step": 4161 - }, - { - "epoch": 3.2162579648580807, - "grad_norm": 0.011329291388392448, - "learning_rate": 3.4031838261470986e-05, - "loss": 0.0031, - "step": 4162 - }, - { - "epoch": 3.217030314732574, - "grad_norm": 0.008331413380801678, - "learning_rate": 3.400627751852543e-05, - "loss": 0.0027, - "step": 4163 - }, - { - "epoch": 3.217802664607067, - "grad_norm": 0.009807384572923183, - "learning_rate": 3.398072143096604e-05, - "loss": 0.0028, - "step": 4164 - }, - { - "epoch": 3.2185750144815604, - "grad_norm": 0.008590004406869411, - "learning_rate": 3.3955170006231555e-05, - "loss": 0.0032, - "step": 4165 - }, - { - "epoch": 3.2193473643560533, - "grad_norm": 0.009014099836349487, - "learning_rate": 3.392962325175938e-05, - "loss": 0.0032, - "step": 4166 - }, - { - "epoch": 3.220119714230546, - "grad_norm": 0.01811886765062809, - "learning_rate": 3.390408117498558e-05, - "loss": 0.0034, - "step": 4167 - }, - { - "epoch": 3.2208920641050396, - "grad_norm": 0.010788844898343086, - "learning_rate": 3.3878543783344806e-05, - "loss": 0.003, - "step": 4168 - }, - { - "epoch": 3.221664413979533, - "grad_norm": 0.013252652250230312, - "learning_rate": 3.385301108427039e-05, - "loss": 0.0031, - "step": 4169 - }, - { - "epoch": 3.222436763854026, - "grad_norm": 0.014306911267340183, - "learning_rate": 3.382748308519429e-05, - "loss": 0.0031, - "step": 4170 - }, - { - "epoch": 3.223209113728519, - "grad_norm": 0.01070572528988123, - "learning_rate": 3.38019597935471e-05, - "loss": 0.0029, - "step": 4171 - }, - { - "epoch": 3.223981463603012, - "grad_norm": 0.008928976953029633, - "learning_rate": 3.3776441216757984e-05, - "loss": 0.0034, - "step": 4172 - }, - { - "epoch": 3.224753813477505, - "grad_norm": 0.01367044635117054, - "learning_rate": 3.375092736225484e-05, - "loss": 0.0034, - "step": 4173 - }, - { - "epoch": 3.2255261633519985, - "grad_norm": 0.007461641449481249, - "learning_rate": 3.372541823746411e-05, - "loss": 0.0031, - "step": 4174 - }, - { - "epoch": 3.2262985132264914, - "grad_norm": 0.008332383818924427, - "learning_rate": 3.369991384981087e-05, - "loss": 0.0033, - "step": 4175 - }, - { - "epoch": 3.227070863100985, - "grad_norm": 0.02094440907239914, - "learning_rate": 3.3674414206718854e-05, - "loss": 0.0033, - "step": 4176 - }, - { - "epoch": 3.2278432129754777, - "grad_norm": 0.013396904803812504, - "learning_rate": 3.36489193156104e-05, - "loss": 0.0035, - "step": 4177 - }, - { - "epoch": 3.228615562849971, - "grad_norm": 0.00786169059574604, - "learning_rate": 3.362342918390641e-05, - "loss": 0.0029, - "step": 4178 - }, - { - "epoch": 3.229387912724464, - "grad_norm": 0.011862647719681263, - "learning_rate": 3.359794381902649e-05, - "loss": 0.0029, - "step": 4179 - }, - { - "epoch": 3.2301602625989574, - "grad_norm": 0.010191472247242928, - "learning_rate": 3.357246322838878e-05, - "loss": 0.0031, - "step": 4180 - }, - { - "epoch": 3.2309326124734503, - "grad_norm": 0.010995290242135525, - "learning_rate": 3.354698741941011e-05, - "loss": 0.0029, - "step": 4181 - }, - { - "epoch": 3.2317049623479437, - "grad_norm": 0.00857754610478878, - "learning_rate": 3.352151639950584e-05, - "loss": 0.0032, - "step": 4182 - }, - { - "epoch": 3.2324773122224366, - "grad_norm": 0.011650115251541138, - "learning_rate": 3.3496050176089946e-05, - "loss": 0.0031, - "step": 4183 - }, - { - "epoch": 3.23324966209693, - "grad_norm": 0.009595906361937523, - "learning_rate": 3.347058875657509e-05, - "loss": 0.003, - "step": 4184 - }, - { - "epoch": 3.234022011971423, - "grad_norm": 0.014852471649646759, - "learning_rate": 3.344513214837243e-05, - "loss": 0.0033, - "step": 4185 - }, - { - "epoch": 3.2347943618459163, - "grad_norm": 0.009725824929773808, - "learning_rate": 3.341968035889177e-05, - "loss": 0.0035, - "step": 4186 - }, - { - "epoch": 3.235566711720409, - "grad_norm": 0.01160483993589878, - "learning_rate": 3.339423339554155e-05, - "loss": 0.0032, - "step": 4187 - }, - { - "epoch": 3.2363390615949026, - "grad_norm": 0.010148034431040287, - "learning_rate": 3.336879126572872e-05, - "loss": 0.0028, - "step": 4188 - }, - { - "epoch": 3.2371114114693955, - "grad_norm": 0.012596935965120792, - "learning_rate": 3.334335397685889e-05, - "loss": 0.0039, - "step": 4189 - }, - { - "epoch": 3.237883761343889, - "grad_norm": 0.011795789934694767, - "learning_rate": 3.331792153633624e-05, - "loss": 0.0033, - "step": 4190 - }, - { - "epoch": 3.238656111218382, - "grad_norm": 0.009832086972892284, - "learning_rate": 3.329249395156355e-05, - "loss": 0.0034, - "step": 4191 - }, - { - "epoch": 3.239428461092875, - "grad_norm": 0.010105843655765057, - "learning_rate": 3.3267071229942157e-05, - "loss": 0.003, - "step": 4192 - }, - { - "epoch": 3.240200810967368, - "grad_norm": 0.010233977809548378, - "learning_rate": 3.3241653378872e-05, - "loss": 0.0034, - "step": 4193 - }, - { - "epoch": 3.2409731608418615, - "grad_norm": 0.011225282214581966, - "learning_rate": 3.321624040575162e-05, - "loss": 0.0034, - "step": 4194 - }, - { - "epoch": 3.2417455107163544, - "grad_norm": 0.008810597471892834, - "learning_rate": 3.31908323179781e-05, - "loss": 0.0031, - "step": 4195 - }, - { - "epoch": 3.242517860590848, - "grad_norm": 0.008012303151190281, - "learning_rate": 3.316542912294712e-05, - "loss": 0.003, - "step": 4196 - }, - { - "epoch": 3.2432902104653407, - "grad_norm": 0.0090979328379035, - "learning_rate": 3.314003082805297e-05, - "loss": 0.0036, - "step": 4197 - }, - { - "epoch": 3.244062560339834, - "grad_norm": 0.010045964270830154, - "learning_rate": 3.3114637440688445e-05, - "loss": 0.0031, - "step": 4198 - }, - { - "epoch": 3.244834910214327, - "grad_norm": 0.012850887142121792, - "learning_rate": 3.308924896824494e-05, - "loss": 0.0037, - "step": 4199 - }, - { - "epoch": 3.2456072600888204, - "grad_norm": 0.01774417981505394, - "learning_rate": 3.3063865418112456e-05, - "loss": 0.0032, - "step": 4200 - }, - { - "epoch": 3.2463796099633133, - "grad_norm": 0.009460978209972382, - "learning_rate": 3.303848679767952e-05, - "loss": 0.003, - "step": 4201 - }, - { - "epoch": 3.2471519598378067, - "grad_norm": 0.01683228649199009, - "learning_rate": 3.301311311433322e-05, - "loss": 0.0037, - "step": 4202 - }, - { - "epoch": 3.2479243097122996, - "grad_norm": 0.010335755534470081, - "learning_rate": 3.298774437545924e-05, - "loss": 0.0032, - "step": 4203 - }, - { - "epoch": 3.248696659586793, - "grad_norm": 0.018849408254027367, - "learning_rate": 3.296238058844182e-05, - "loss": 0.0033, - "step": 4204 - }, - { - "epoch": 3.249469009461286, - "grad_norm": 0.008381340652704239, - "learning_rate": 3.29370217606637e-05, - "loss": 0.0031, - "step": 4205 - }, - { - "epoch": 3.2502413593357793, - "grad_norm": 0.0117068225517869, - "learning_rate": 3.291166789950626e-05, - "loss": 0.0032, - "step": 4206 - }, - { - "epoch": 3.251013709210272, - "grad_norm": 0.01788029633462429, - "learning_rate": 3.28863190123494e-05, - "loss": 0.0031, - "step": 4207 - }, - { - "epoch": 3.2517860590847656, - "grad_norm": 0.017351066693663597, - "learning_rate": 3.2860975106571525e-05, - "loss": 0.0032, - "step": 4208 - }, - { - "epoch": 3.2525584089592585, - "grad_norm": 0.01061096228659153, - "learning_rate": 3.2835636189549676e-05, - "loss": 0.0029, - "step": 4209 - }, - { - "epoch": 3.2533307588337514, - "grad_norm": 0.02109973505139351, - "learning_rate": 3.2810302268659375e-05, - "loss": 0.0029, - "step": 4210 - }, - { - "epoch": 3.254103108708245, - "grad_norm": 0.014287742786109447, - "learning_rate": 3.278497335127475e-05, - "loss": 0.0032, - "step": 4211 - }, - { - "epoch": 3.254875458582738, - "grad_norm": 0.014870606362819672, - "learning_rate": 3.2759649444768406e-05, - "loss": 0.0034, - "step": 4212 - }, - { - "epoch": 3.255647808457231, - "grad_norm": 0.007436896208673716, - "learning_rate": 3.273433055651151e-05, - "loss": 0.0031, - "step": 4213 - }, - { - "epoch": 3.256420158331724, - "grad_norm": 0.01197846606373787, - "learning_rate": 3.2709016693873803e-05, - "loss": 0.0036, - "step": 4214 - }, - { - "epoch": 3.2571925082062174, - "grad_norm": 0.012271657586097717, - "learning_rate": 3.2683707864223534e-05, - "loss": 0.0039, - "step": 4215 - }, - { - "epoch": 3.257964858080711, - "grad_norm": 0.020635711029171944, - "learning_rate": 3.265840407492748e-05, - "loss": 0.0031, - "step": 4216 - }, - { - "epoch": 3.2587372079552037, - "grad_norm": 0.007999719120562077, - "learning_rate": 3.2633105333351e-05, - "loss": 0.0032, - "step": 4217 - }, - { - "epoch": 3.2595095578296966, - "grad_norm": 0.010632477700710297, - "learning_rate": 3.260781164685788e-05, - "loss": 0.0033, - "step": 4218 - }, - { - "epoch": 3.26028190770419, - "grad_norm": 0.012916402891278267, - "learning_rate": 3.2582523022810555e-05, - "loss": 0.003, - "step": 4219 - }, - { - "epoch": 3.2610542575786834, - "grad_norm": 0.010575252585113049, - "learning_rate": 3.255723946856992e-05, - "loss": 0.003, - "step": 4220 - }, - { - "epoch": 3.2618266074531763, - "grad_norm": 0.010021179914474487, - "learning_rate": 3.253196099149539e-05, - "loss": 0.0032, - "step": 4221 - }, - { - "epoch": 3.2625989573276692, - "grad_norm": 0.006727861240506172, - "learning_rate": 3.2506687598944935e-05, - "loss": 0.003, - "step": 4222 - }, - { - "epoch": 3.2633713072021626, - "grad_norm": 0.0075446320697665215, - "learning_rate": 3.248141929827502e-05, - "loss": 0.0031, - "step": 4223 - }, - { - "epoch": 3.2641436570766555, - "grad_norm": 0.010262253694236279, - "learning_rate": 3.2456156096840656e-05, - "loss": 0.0031, - "step": 4224 - }, - { - "epoch": 3.264916006951149, - "grad_norm": 0.009761231951415539, - "learning_rate": 3.2430898001995335e-05, - "loss": 0.003, - "step": 4225 - }, - { - "epoch": 3.265688356825642, - "grad_norm": 0.008487430401146412, - "learning_rate": 3.2405645021091056e-05, - "loss": 0.0031, - "step": 4226 - }, - { - "epoch": 3.266460706700135, - "grad_norm": 0.011206555180251598, - "learning_rate": 3.238039716147841e-05, - "loss": 0.0033, - "step": 4227 - }, - { - "epoch": 3.267233056574628, - "grad_norm": 0.008502183482050896, - "learning_rate": 3.2355154430506385e-05, - "loss": 0.003, - "step": 4228 - }, - { - "epoch": 3.2680054064491215, - "grad_norm": 0.008844464085996151, - "learning_rate": 3.232991683552254e-05, - "loss": 0.0034, - "step": 4229 - }, - { - "epoch": 3.2687777563236144, - "grad_norm": 0.0106724938377738, - "learning_rate": 3.2304684383872966e-05, - "loss": 0.0036, - "step": 4230 - }, - { - "epoch": 3.269550106198108, - "grad_norm": 0.00910135731101036, - "learning_rate": 3.2279457082902175e-05, - "loss": 0.0035, - "step": 4231 - }, - { - "epoch": 3.2703224560726007, - "grad_norm": 0.01070884894579649, - "learning_rate": 3.2254234939953234e-05, - "loss": 0.003, - "step": 4232 - }, - { - "epoch": 3.271094805947094, - "grad_norm": 0.013538029044866562, - "learning_rate": 3.222901796236771e-05, - "loss": 0.0029, - "step": 4233 - }, - { - "epoch": 3.271867155821587, - "grad_norm": 0.009791580028831959, - "learning_rate": 3.220380615748567e-05, - "loss": 0.0032, - "step": 4234 - }, - { - "epoch": 3.2726395056960804, - "grad_norm": 0.012030337005853653, - "learning_rate": 3.2178599532645625e-05, - "loss": 0.0028, - "step": 4235 - }, - { - "epoch": 3.2734118555705733, - "grad_norm": 0.010877392254769802, - "learning_rate": 3.215339809518464e-05, - "loss": 0.0037, - "step": 4236 - }, - { - "epoch": 3.2741842054450667, - "grad_norm": 0.00963950902223587, - "learning_rate": 3.2128201852438255e-05, - "loss": 0.0029, - "step": 4237 - }, - { - "epoch": 3.2749565553195596, - "grad_norm": 0.009462360292673111, - "learning_rate": 3.210301081174044e-05, - "loss": 0.0029, - "step": 4238 - }, - { - "epoch": 3.275728905194053, - "grad_norm": 0.0111409155651927, - "learning_rate": 3.207782498042374e-05, - "loss": 0.0029, - "step": 4239 - }, - { - "epoch": 3.276501255068546, - "grad_norm": 0.009481188841164112, - "learning_rate": 3.205264436581915e-05, - "loss": 0.0037, - "step": 4240 - }, - { - "epoch": 3.2772736049430393, - "grad_norm": 0.008347220718860626, - "learning_rate": 3.2027468975256094e-05, - "loss": 0.0028, - "step": 4241 - }, - { - "epoch": 3.2780459548175322, - "grad_norm": 0.01125909760594368, - "learning_rate": 3.200229881606256e-05, - "loss": 0.0032, - "step": 4242 - }, - { - "epoch": 3.2788183046920256, - "grad_norm": 0.011121939867734909, - "learning_rate": 3.197713389556494e-05, - "loss": 0.0029, - "step": 4243 - }, - { - "epoch": 3.2795906545665185, - "grad_norm": 0.007758612744510174, - "learning_rate": 3.195197422108819e-05, - "loss": 0.0029, - "step": 4244 - }, - { - "epoch": 3.280363004441012, - "grad_norm": 0.009592241607606411, - "learning_rate": 3.192681979995561e-05, - "loss": 0.0027, - "step": 4245 - }, - { - "epoch": 3.281135354315505, - "grad_norm": 0.010534889996051788, - "learning_rate": 3.190167063948909e-05, - "loss": 0.0036, - "step": 4246 - }, - { - "epoch": 3.281907704189998, - "grad_norm": 0.009834305383265018, - "learning_rate": 3.187652674700895e-05, - "loss": 0.003, - "step": 4247 - }, - { - "epoch": 3.282680054064491, - "grad_norm": 0.008079975843429565, - "learning_rate": 3.185138812983393e-05, - "loss": 0.0029, - "step": 4248 - }, - { - "epoch": 3.2834524039389845, - "grad_norm": 0.008383155800402164, - "learning_rate": 3.18262547952813e-05, - "loss": 0.003, - "step": 4249 - }, - { - "epoch": 3.2842247538134774, - "grad_norm": 0.008969396352767944, - "learning_rate": 3.1801126750666775e-05, - "loss": 0.0031, - "step": 4250 - }, - { - "epoch": 3.284997103687971, - "grad_norm": 0.008961372077465057, - "learning_rate": 3.177600400330449e-05, - "loss": 0.0028, - "step": 4251 - }, - { - "epoch": 3.2857694535624637, - "grad_norm": 0.009899982251226902, - "learning_rate": 3.17508865605071e-05, - "loss": 0.0035, - "step": 4252 - }, - { - "epoch": 3.286541803436957, - "grad_norm": 0.01790609210729599, - "learning_rate": 3.1725774429585664e-05, - "loss": 0.0029, - "step": 4253 - }, - { - "epoch": 3.28731415331145, - "grad_norm": 0.008014030754566193, - "learning_rate": 3.1700667617849755e-05, - "loss": 0.0034, - "step": 4254 - }, - { - "epoch": 3.2880865031859434, - "grad_norm": 0.00846839789301157, - "learning_rate": 3.167556613260732e-05, - "loss": 0.003, - "step": 4255 - }, - { - "epoch": 3.2888588530604363, - "grad_norm": 0.00876067578792572, - "learning_rate": 3.165046998116479e-05, - "loss": 0.0029, - "step": 4256 - }, - { - "epoch": 3.2896312029349293, - "grad_norm": 0.010461987927556038, - "learning_rate": 3.1625379170827084e-05, - "loss": 0.0035, - "step": 4257 - }, - { - "epoch": 3.2904035528094226, - "grad_norm": 0.008245227858424187, - "learning_rate": 3.1600293708897504e-05, - "loss": 0.0034, - "step": 4258 - }, - { - "epoch": 3.291175902683916, - "grad_norm": 0.01120483223348856, - "learning_rate": 3.1575213602677824e-05, - "loss": 0.0033, - "step": 4259 - }, - { - "epoch": 3.291948252558409, - "grad_norm": 0.00878420751541853, - "learning_rate": 3.155013885946827e-05, - "loss": 0.0034, - "step": 4260 - }, - { - "epoch": 3.292720602432902, - "grad_norm": 0.012798376381397247, - "learning_rate": 3.152506948656749e-05, - "loss": 0.0031, - "step": 4261 - }, - { - "epoch": 3.2934929523073952, - "grad_norm": 0.01274291705340147, - "learning_rate": 3.150000549127255e-05, - "loss": 0.0031, - "step": 4262 - }, - { - "epoch": 3.2942653021818886, - "grad_norm": 0.01563984714448452, - "learning_rate": 3.1474946880879e-05, - "loss": 0.0036, - "step": 4263 - }, - { - "epoch": 3.2950376520563815, - "grad_norm": 0.014731112867593765, - "learning_rate": 3.1449893662680806e-05, - "loss": 0.0031, - "step": 4264 - }, - { - "epoch": 3.2958100019308745, - "grad_norm": 0.01007825881242752, - "learning_rate": 3.1424845843970314e-05, - "loss": 0.0034, - "step": 4265 - }, - { - "epoch": 3.296582351805368, - "grad_norm": 0.0087623605504632, - "learning_rate": 3.139980343203837e-05, - "loss": 0.0025, - "step": 4266 - }, - { - "epoch": 3.297354701679861, - "grad_norm": 0.017609968781471252, - "learning_rate": 3.137476643417422e-05, - "loss": 0.0032, - "step": 4267 - }, - { - "epoch": 3.298127051554354, - "grad_norm": 0.007911840453743935, - "learning_rate": 3.134973485766549e-05, - "loss": 0.0029, - "step": 4268 - }, - { - "epoch": 3.298899401428847, - "grad_norm": 0.012556039728224277, - "learning_rate": 3.13247087097983e-05, - "loss": 0.003, - "step": 4269 - }, - { - "epoch": 3.2996717513033405, - "grad_norm": 0.007746555842459202, - "learning_rate": 3.129968799785717e-05, - "loss": 0.0026, - "step": 4270 - }, - { - "epoch": 3.3004441011778334, - "grad_norm": 0.009839855134487152, - "learning_rate": 3.127467272912499e-05, - "loss": 0.0031, - "step": 4271 - }, - { - "epoch": 3.3012164510523268, - "grad_norm": 0.015597268007695675, - "learning_rate": 3.12496629108831e-05, - "loss": 0.0036, - "step": 4272 - }, - { - "epoch": 3.3019888009268197, - "grad_norm": 0.010683227330446243, - "learning_rate": 3.122465855041128e-05, - "loss": 0.003, - "step": 4273 - }, - { - "epoch": 3.302761150801313, - "grad_norm": 0.019207023084163666, - "learning_rate": 3.119965965498769e-05, - "loss": 0.0033, - "step": 4274 - }, - { - "epoch": 3.303533500675806, - "grad_norm": 0.013781500048935413, - "learning_rate": 3.117466623188888e-05, - "loss": 0.0032, - "step": 4275 - }, - { - "epoch": 3.3043058505502994, - "grad_norm": 0.00863671489059925, - "learning_rate": 3.114967828838987e-05, - "loss": 0.0031, - "step": 4276 - }, - { - "epoch": 3.3050782004247923, - "grad_norm": 0.009823620319366455, - "learning_rate": 3.112469583176402e-05, - "loss": 0.0036, - "step": 4277 - }, - { - "epoch": 3.3058505502992857, - "grad_norm": 0.010731927119195461, - "learning_rate": 3.109971886928311e-05, - "loss": 0.0033, - "step": 4278 - }, - { - "epoch": 3.3066229001737786, - "grad_norm": 0.017347775399684906, - "learning_rate": 3.107474740821736e-05, - "loss": 0.0031, - "step": 4279 - }, - { - "epoch": 3.307395250048272, - "grad_norm": 0.009366197511553764, - "learning_rate": 3.104978145583535e-05, - "loss": 0.0033, - "step": 4280 - }, - { - "epoch": 3.308167599922765, - "grad_norm": 0.014173255302011967, - "learning_rate": 3.1024821019404047e-05, - "loss": 0.0033, - "step": 4281 - }, - { - "epoch": 3.3089399497972583, - "grad_norm": 0.01867513731122017, - "learning_rate": 3.099986610618885e-05, - "loss": 0.0035, - "step": 4282 - }, - { - "epoch": 3.309712299671751, - "grad_norm": 0.01063159853219986, - "learning_rate": 3.097491672345351e-05, - "loss": 0.0029, - "step": 4283 - }, - { - "epoch": 3.3104846495462446, - "grad_norm": 0.010607903823256493, - "learning_rate": 3.094997287846023e-05, - "loss": 0.003, - "step": 4284 - }, - { - "epoch": 3.3112569994207375, - "grad_norm": 0.007606419734656811, - "learning_rate": 3.092503457846952e-05, - "loss": 0.0032, - "step": 4285 - }, - { - "epoch": 3.312029349295231, - "grad_norm": 0.015880458056926727, - "learning_rate": 3.0900101830740325e-05, - "loss": 0.0033, - "step": 4286 - }, - { - "epoch": 3.312801699169724, - "grad_norm": 0.008047875016927719, - "learning_rate": 3.087517464252999e-05, - "loss": 0.0034, - "step": 4287 - }, - { - "epoch": 3.313574049044217, - "grad_norm": 0.00905267708003521, - "learning_rate": 3.08502530210942e-05, - "loss": 0.0033, - "step": 4288 - }, - { - "epoch": 3.31434639891871, - "grad_norm": 0.009174262173473835, - "learning_rate": 3.0825336973687015e-05, - "loss": 0.0031, - "step": 4289 - }, - { - "epoch": 3.3151187487932035, - "grad_norm": 0.010227401740849018, - "learning_rate": 3.0800426507560934e-05, - "loss": 0.0032, - "step": 4290 - }, - { - "epoch": 3.3158910986676964, - "grad_norm": 0.009878808632493019, - "learning_rate": 3.077552162996677e-05, - "loss": 0.0033, - "step": 4291 - }, - { - "epoch": 3.3166634485421898, - "grad_norm": 0.009305208921432495, - "learning_rate": 3.0750622348153726e-05, - "loss": 0.0031, - "step": 4292 - }, - { - "epoch": 3.3174357984166827, - "grad_norm": 0.00841295626014471, - "learning_rate": 3.072572866936939e-05, - "loss": 0.0036, - "step": 4293 - }, - { - "epoch": 3.318208148291176, - "grad_norm": 0.008652658201754093, - "learning_rate": 3.0700840600859727e-05, - "loss": 0.0033, - "step": 4294 - }, - { - "epoch": 3.318980498165669, - "grad_norm": 0.01883789338171482, - "learning_rate": 3.067595814986901e-05, - "loss": 0.003, - "step": 4295 - }, - { - "epoch": 3.3197528480401624, - "grad_norm": 0.010518788360059261, - "learning_rate": 3.0651081323639956e-05, - "loss": 0.0037, - "step": 4296 - }, - { - "epoch": 3.3205251979146553, - "grad_norm": 0.010141740553081036, - "learning_rate": 3.0626210129413606e-05, - "loss": 0.0032, - "step": 4297 - }, - { - "epoch": 3.3212975477891487, - "grad_norm": 0.010843572206795216, - "learning_rate": 3.060134457442935e-05, - "loss": 0.003, - "step": 4298 - }, - { - "epoch": 3.3220698976636416, - "grad_norm": 0.009291105903685093, - "learning_rate": 3.057648466592494e-05, - "loss": 0.0028, - "step": 4299 - }, - { - "epoch": 3.322842247538135, - "grad_norm": 0.010753841139376163, - "learning_rate": 3.055163041113653e-05, - "loss": 0.0032, - "step": 4300 - }, - { - "epoch": 3.323614597412628, - "grad_norm": 0.01016042847186327, - "learning_rate": 3.052678181729856e-05, - "loss": 0.003, - "step": 4301 - }, - { - "epoch": 3.3243869472871213, - "grad_norm": 0.010187407955527306, - "learning_rate": 3.0501938891643856e-05, - "loss": 0.0035, - "step": 4302 - }, - { - "epoch": 3.325159297161614, - "grad_norm": 0.009946313686668873, - "learning_rate": 3.047710164140362e-05, - "loss": 0.0034, - "step": 4303 - }, - { - "epoch": 3.325931647036107, - "grad_norm": 0.009504447691142559, - "learning_rate": 3.0452270073807366e-05, - "loss": 0.0028, - "step": 4304 - }, - { - "epoch": 3.3267039969106005, - "grad_norm": 0.010526880621910095, - "learning_rate": 3.0427444196082943e-05, - "loss": 0.0028, - "step": 4305 - }, - { - "epoch": 3.327476346785094, - "grad_norm": 0.010826265439391136, - "learning_rate": 3.040262401545658e-05, - "loss": 0.0032, - "step": 4306 - }, - { - "epoch": 3.328248696659587, - "grad_norm": 0.0090884268283844, - "learning_rate": 3.0377809539152846e-05, - "loss": 0.0034, - "step": 4307 - }, - { - "epoch": 3.3290210465340797, - "grad_norm": 0.008787116967141628, - "learning_rate": 3.0353000774394602e-05, - "loss": 0.0032, - "step": 4308 - }, - { - "epoch": 3.329793396408573, - "grad_norm": 0.020502891391515732, - "learning_rate": 3.0328197728403107e-05, - "loss": 0.0038, - "step": 4309 - }, - { - "epoch": 3.3305657462830665, - "grad_norm": 0.011007401160895824, - "learning_rate": 3.030340040839793e-05, - "loss": 0.0034, - "step": 4310 - }, - { - "epoch": 3.3313380961575594, - "grad_norm": 0.008363964967429638, - "learning_rate": 3.027860882159693e-05, - "loss": 0.0032, - "step": 4311 - }, - { - "epoch": 3.3321104460320523, - "grad_norm": 0.01503025647252798, - "learning_rate": 3.0253822975216397e-05, - "loss": 0.0034, - "step": 4312 - }, - { - "epoch": 3.3328827959065457, - "grad_norm": 0.014644940383732319, - "learning_rate": 3.0229042876470835e-05, - "loss": 0.0039, - "step": 4313 - }, - { - "epoch": 3.333655145781039, - "grad_norm": 0.012132090516388416, - "learning_rate": 3.02042685325732e-05, - "loss": 0.0031, - "step": 4314 - }, - { - "epoch": 3.334427495655532, - "grad_norm": 0.012613854371011257, - "learning_rate": 3.0179499950734647e-05, - "loss": 0.0036, - "step": 4315 - }, - { - "epoch": 3.335199845530025, - "grad_norm": 0.008916930295526981, - "learning_rate": 3.0154737138164723e-05, - "loss": 0.0035, - "step": 4316 - }, - { - "epoch": 3.3359721954045183, - "grad_norm": 0.02021050825715065, - "learning_rate": 3.012998010207131e-05, - "loss": 0.0032, - "step": 4317 - }, - { - "epoch": 3.336744545279011, - "grad_norm": 0.014060622081160545, - "learning_rate": 3.010522884966056e-05, - "loss": 0.003, - "step": 4318 - }, - { - "epoch": 3.3375168951535046, - "grad_norm": 0.015764638781547546, - "learning_rate": 3.008048338813696e-05, - "loss": 0.0034, - "step": 4319 - }, - { - "epoch": 3.3382892450279975, - "grad_norm": 0.010799824260175228, - "learning_rate": 3.0055743724703343e-05, - "loss": 0.0036, - "step": 4320 - }, - { - "epoch": 3.339061594902491, - "grad_norm": 0.018081430345773697, - "learning_rate": 3.00310098665608e-05, - "loss": 0.0036, - "step": 4321 - }, - { - "epoch": 3.339833944776984, - "grad_norm": 0.014368054457008839, - "learning_rate": 3.0006281820908766e-05, - "loss": 0.0032, - "step": 4322 - }, - { - "epoch": 3.340606294651477, - "grad_norm": 0.016936352476477623, - "learning_rate": 2.9981559594944987e-05, - "loss": 0.0034, - "step": 4323 - }, - { - "epoch": 3.34137864452597, - "grad_norm": 0.012375048361718655, - "learning_rate": 2.9956843195865515e-05, - "loss": 0.0027, - "step": 4324 - }, - { - "epoch": 3.3421509944004635, - "grad_norm": 0.013704188168048859, - "learning_rate": 2.9932132630864662e-05, - "loss": 0.003, - "step": 4325 - }, - { - "epoch": 3.3429233442749564, - "grad_norm": 0.012674598023295403, - "learning_rate": 2.990742790713509e-05, - "loss": 0.0028, - "step": 4326 - }, - { - "epoch": 3.34369569414945, - "grad_norm": 0.016746561974287033, - "learning_rate": 2.9882729031867772e-05, - "loss": 0.0034, - "step": 4327 - }, - { - "epoch": 3.3444680440239427, - "grad_norm": 0.012405228801071644, - "learning_rate": 2.9858036012251928e-05, - "loss": 0.0028, - "step": 4328 - }, - { - "epoch": 3.345240393898436, - "grad_norm": 0.011292995885014534, - "learning_rate": 2.9833348855475097e-05, - "loss": 0.0032, - "step": 4329 - }, - { - "epoch": 3.346012743772929, - "grad_norm": 0.009587439708411694, - "learning_rate": 2.9808667568723136e-05, - "loss": 0.0029, - "step": 4330 - }, - { - "epoch": 3.3467850936474224, - "grad_norm": 0.008292783051729202, - "learning_rate": 2.978399215918015e-05, - "loss": 0.0029, - "step": 4331 - }, - { - "epoch": 3.3475574435219153, - "grad_norm": 0.015449166297912598, - "learning_rate": 2.9759322634028557e-05, - "loss": 0.0033, - "step": 4332 - }, - { - "epoch": 3.3483297933964087, - "grad_norm": 0.007792491000145674, - "learning_rate": 2.9734659000449073e-05, - "loss": 0.0031, - "step": 4333 - }, - { - "epoch": 3.3491021432709016, - "grad_norm": 0.008889835327863693, - "learning_rate": 2.971000126562069e-05, - "loss": 0.003, - "step": 4334 - }, - { - "epoch": 3.349874493145395, - "grad_norm": 0.007819905877113342, - "learning_rate": 2.9685349436720646e-05, - "loss": 0.0032, - "step": 4335 - }, - { - "epoch": 3.350646843019888, - "grad_norm": 0.008440805599093437, - "learning_rate": 2.9660703520924528e-05, - "loss": 0.0029, - "step": 4336 - }, - { - "epoch": 3.3514191928943813, - "grad_norm": 0.012632417492568493, - "learning_rate": 2.9636063525406156e-05, - "loss": 0.0035, - "step": 4337 - }, - { - "epoch": 3.352191542768874, - "grad_norm": 0.010322070680558681, - "learning_rate": 2.9611429457337613e-05, - "loss": 0.0034, - "step": 4338 - }, - { - "epoch": 3.3529638926433676, - "grad_norm": 0.009829297661781311, - "learning_rate": 2.9586801323889314e-05, - "loss": 0.0034, - "step": 4339 - }, - { - "epoch": 3.3537362425178605, - "grad_norm": 0.010246274061501026, - "learning_rate": 2.9562179132229906e-05, - "loss": 0.0037, - "step": 4340 - }, - { - "epoch": 3.354508592392354, - "grad_norm": 0.010691756382584572, - "learning_rate": 2.9537562889526292e-05, - "loss": 0.0035, - "step": 4341 - }, - { - "epoch": 3.355280942266847, - "grad_norm": 0.012063859961926937, - "learning_rate": 2.95129526029437e-05, - "loss": 0.003, - "step": 4342 - }, - { - "epoch": 3.35605329214134, - "grad_norm": 0.009755225852131844, - "learning_rate": 2.948834827964555e-05, - "loss": 0.0035, - "step": 4343 - }, - { - "epoch": 3.356825642015833, - "grad_norm": 0.010563291609287262, - "learning_rate": 2.9463749926793616e-05, - "loss": 0.0032, - "step": 4344 - }, - { - "epoch": 3.3575979918903265, - "grad_norm": 0.009643464349210262, - "learning_rate": 2.9439157551547848e-05, - "loss": 0.0035, - "step": 4345 - }, - { - "epoch": 3.3583703417648194, - "grad_norm": 0.007082138676196337, - "learning_rate": 2.9414571161066485e-05, - "loss": 0.0027, - "step": 4346 - }, - { - "epoch": 3.359142691639313, - "grad_norm": 0.014375030994415283, - "learning_rate": 2.938999076250607e-05, - "loss": 0.0032, - "step": 4347 - }, - { - "epoch": 3.3599150415138057, - "grad_norm": 0.00861175823956728, - "learning_rate": 2.9365416363021325e-05, - "loss": 0.0027, - "step": 4348 - }, - { - "epoch": 3.360687391388299, - "grad_norm": 0.011097020469605923, - "learning_rate": 2.9340847969765278e-05, - "loss": 0.0032, - "step": 4349 - }, - { - "epoch": 3.361459741262792, - "grad_norm": 0.01772846095263958, - "learning_rate": 2.931628558988922e-05, - "loss": 0.0032, - "step": 4350 - }, - { - "epoch": 3.3622320911372854, - "grad_norm": 0.011551863513886929, - "learning_rate": 2.9291729230542607e-05, - "loss": 0.0032, - "step": 4351 - }, - { - "epoch": 3.3630044410117783, - "grad_norm": 0.0144035080447793, - "learning_rate": 2.926717889887326e-05, - "loss": 0.0033, - "step": 4352 - }, - { - "epoch": 3.3637767908862717, - "grad_norm": 0.01008586585521698, - "learning_rate": 2.9242634602027142e-05, - "loss": 0.0029, - "step": 4353 - }, - { - "epoch": 3.3645491407607646, - "grad_norm": 0.013119881972670555, - "learning_rate": 2.921809634714855e-05, - "loss": 0.0032, - "step": 4354 - }, - { - "epoch": 3.3653214906352575, - "grad_norm": 0.019614320248365402, - "learning_rate": 2.9193564141379942e-05, - "loss": 0.003, - "step": 4355 - }, - { - "epoch": 3.366093840509751, - "grad_norm": 0.016630122438073158, - "learning_rate": 2.9169037991862052e-05, - "loss": 0.0032, - "step": 4356 - }, - { - "epoch": 3.3668661903842443, - "grad_norm": 0.013291397131979465, - "learning_rate": 2.914451790573389e-05, - "loss": 0.0031, - "step": 4357 - }, - { - "epoch": 3.367638540258737, - "grad_norm": 0.010741799138486385, - "learning_rate": 2.9120003890132596e-05, - "loss": 0.0028, - "step": 4358 - }, - { - "epoch": 3.36841089013323, - "grad_norm": 0.008894594386219978, - "learning_rate": 2.9095495952193652e-05, - "loss": 0.0037, - "step": 4359 - }, - { - "epoch": 3.3691832400077235, - "grad_norm": 0.020004941150546074, - "learning_rate": 2.9070994099050742e-05, - "loss": 0.0035, - "step": 4360 - }, - { - "epoch": 3.369955589882217, - "grad_norm": 0.022412752732634544, - "learning_rate": 2.9046498337835698e-05, - "loss": 0.0038, - "step": 4361 - }, - { - "epoch": 3.37072793975671, - "grad_norm": 0.013329553417861462, - "learning_rate": 2.9022008675678703e-05, - "loss": 0.0031, - "step": 4362 - }, - { - "epoch": 3.3715002896312027, - "grad_norm": 0.009267443791031837, - "learning_rate": 2.8997525119708086e-05, - "loss": 0.0031, - "step": 4363 - }, - { - "epoch": 3.372272639505696, - "grad_norm": 0.01385226659476757, - "learning_rate": 2.897304767705042e-05, - "loss": 0.0037, - "step": 4364 - }, - { - "epoch": 3.373044989380189, - "grad_norm": 0.015692459419369698, - "learning_rate": 2.894857635483049e-05, - "loss": 0.0031, - "step": 4365 - }, - { - "epoch": 3.3738173392546824, - "grad_norm": 0.02431158907711506, - "learning_rate": 2.8924111160171318e-05, - "loss": 0.0032, - "step": 4366 - }, - { - "epoch": 3.3745896891291753, - "grad_norm": 0.014064965769648552, - "learning_rate": 2.889965210019413e-05, - "loss": 0.0032, - "step": 4367 - }, - { - "epoch": 3.3753620390036687, - "grad_norm": 0.008142712526023388, - "learning_rate": 2.8875199182018363e-05, - "loss": 0.0028, - "step": 4368 - }, - { - "epoch": 3.3761343888781616, - "grad_norm": 0.02171209640800953, - "learning_rate": 2.8850752412761666e-05, - "loss": 0.0033, - "step": 4369 - }, - { - "epoch": 3.376906738752655, - "grad_norm": 0.01805691048502922, - "learning_rate": 2.8826311799539907e-05, - "loss": 0.003, - "step": 4370 - }, - { - "epoch": 3.377679088627148, - "grad_norm": 0.016302617266774178, - "learning_rate": 2.8801877349467166e-05, - "loss": 0.0029, - "step": 4371 - }, - { - "epoch": 3.3784514385016413, - "grad_norm": 0.011869368143379688, - "learning_rate": 2.8777449069655704e-05, - "loss": 0.0031, - "step": 4372 - }, - { - "epoch": 3.3792237883761342, - "grad_norm": 0.011161305010318756, - "learning_rate": 2.8753026967216025e-05, - "loss": 0.0033, - "step": 4373 - }, - { - "epoch": 3.3799961382506276, - "grad_norm": 0.013350460678339005, - "learning_rate": 2.8728611049256794e-05, - "loss": 0.0033, - "step": 4374 - }, - { - "epoch": 3.3807684881251205, - "grad_norm": 0.01783025823533535, - "learning_rate": 2.8704201322884895e-05, - "loss": 0.0035, - "step": 4375 - }, - { - "epoch": 3.381540837999614, - "grad_norm": 0.017704160884022713, - "learning_rate": 2.867979779520542e-05, - "loss": 0.0034, - "step": 4376 - }, - { - "epoch": 3.382313187874107, - "grad_norm": 0.015627730637788773, - "learning_rate": 2.865540047332165e-05, - "loss": 0.0029, - "step": 4377 - }, - { - "epoch": 3.3830855377486, - "grad_norm": 0.009404877200722694, - "learning_rate": 2.8631009364335036e-05, - "loss": 0.0028, - "step": 4378 - }, - { - "epoch": 3.383857887623093, - "grad_norm": 0.011657997965812683, - "learning_rate": 2.8606624475345255e-05, - "loss": 0.0034, - "step": 4379 - }, - { - "epoch": 3.3846302374975865, - "grad_norm": 0.029644042253494263, - "learning_rate": 2.858224581345016e-05, - "loss": 0.0035, - "step": 4380 - }, - { - "epoch": 3.3854025873720794, - "grad_norm": 0.012362591922283173, - "learning_rate": 2.8557873385745793e-05, - "loss": 0.0032, - "step": 4381 - }, - { - "epoch": 3.386174937246573, - "grad_norm": 0.011876262724399567, - "learning_rate": 2.8533507199326358e-05, - "loss": 0.0033, - "step": 4382 - }, - { - "epoch": 3.3869472871210657, - "grad_norm": 0.015863923355937004, - "learning_rate": 2.8509147261284287e-05, - "loss": 0.003, - "step": 4383 - }, - { - "epoch": 3.387719636995559, - "grad_norm": 0.011141281574964523, - "learning_rate": 2.848479357871016e-05, - "loss": 0.0034, - "step": 4384 - }, - { - "epoch": 3.388491986870052, - "grad_norm": 0.012105172500014305, - "learning_rate": 2.8460446158692743e-05, - "loss": 0.0031, - "step": 4385 - }, - { - "epoch": 3.3892643367445454, - "grad_norm": 0.01008552499115467, - "learning_rate": 2.8436105008318993e-05, - "loss": 0.0036, - "step": 4386 - }, - { - "epoch": 3.3900366866190383, - "grad_norm": 0.010281000286340714, - "learning_rate": 2.841177013467401e-05, - "loss": 0.0036, - "step": 4387 - }, - { - "epoch": 3.3908090364935317, - "grad_norm": 0.013542928732931614, - "learning_rate": 2.8387441544841103e-05, - "loss": 0.0035, - "step": 4388 - }, - { - "epoch": 3.3915813863680246, - "grad_norm": 0.01576226018369198, - "learning_rate": 2.8363119245901737e-05, - "loss": 0.0035, - "step": 4389 - }, - { - "epoch": 3.392353736242518, - "grad_norm": 0.016558324918150902, - "learning_rate": 2.8338803244935537e-05, - "loss": 0.0034, - "step": 4390 - }, - { - "epoch": 3.393126086117011, - "grad_norm": 0.009921234101057053, - "learning_rate": 2.831449354902031e-05, - "loss": 0.0034, - "step": 4391 - }, - { - "epoch": 3.3938984359915043, - "grad_norm": 0.011850431561470032, - "learning_rate": 2.8290190165232023e-05, - "loss": 0.0033, - "step": 4392 - }, - { - "epoch": 3.3946707858659972, - "grad_norm": 0.014793440699577332, - "learning_rate": 2.8265893100644774e-05, - "loss": 0.0032, - "step": 4393 - }, - { - "epoch": 3.3954431357404906, - "grad_norm": 0.02236754447221756, - "learning_rate": 2.824160236233092e-05, - "loss": 0.0039, - "step": 4394 - }, - { - "epoch": 3.3962154856149835, - "grad_norm": 0.008079919032752514, - "learning_rate": 2.8217317957360843e-05, - "loss": 0.003, - "step": 4395 - }, - { - "epoch": 3.396987835489477, - "grad_norm": 0.015229464508593082, - "learning_rate": 2.819303989280315e-05, - "loss": 0.0033, - "step": 4396 - }, - { - "epoch": 3.39776018536397, - "grad_norm": 0.008884802460670471, - "learning_rate": 2.8168768175724662e-05, - "loss": 0.0032, - "step": 4397 - }, - { - "epoch": 3.398532535238463, - "grad_norm": 0.01749601401388645, - "learning_rate": 2.814450281319022e-05, - "loss": 0.0035, - "step": 4398 - }, - { - "epoch": 3.399304885112956, - "grad_norm": 0.021450838074088097, - "learning_rate": 2.8120243812262892e-05, - "loss": 0.0037, - "step": 4399 - }, - { - "epoch": 3.4000772349874495, - "grad_norm": 0.006622872315347195, - "learning_rate": 2.809599118000395e-05, - "loss": 0.0032, - "step": 4400 - }, - { - "epoch": 3.4008495848619424, - "grad_norm": 0.024547424167394638, - "learning_rate": 2.8071744923472675e-05, - "loss": 0.0035, - "step": 4401 - }, - { - "epoch": 3.4016219347364354, - "grad_norm": 0.020672762766480446, - "learning_rate": 2.8047505049726586e-05, - "loss": 0.0034, - "step": 4402 - }, - { - "epoch": 3.4023942846109287, - "grad_norm": 0.007940291427075863, - "learning_rate": 2.8023271565821347e-05, - "loss": 0.0028, - "step": 4403 - }, - { - "epoch": 3.403166634485422, - "grad_norm": 0.00886029377579689, - "learning_rate": 2.799904447881074e-05, - "loss": 0.003, - "step": 4404 - }, - { - "epoch": 3.403938984359915, - "grad_norm": 0.011499281041324139, - "learning_rate": 2.7974823795746642e-05, - "loss": 0.0031, - "step": 4405 - }, - { - "epoch": 3.404711334234408, - "grad_norm": 0.030732352286577225, - "learning_rate": 2.795060952367916e-05, - "loss": 0.0029, - "step": 4406 - }, - { - "epoch": 3.4054836841089013, - "grad_norm": 0.011285774409770966, - "learning_rate": 2.7926401669656476e-05, - "loss": 0.0037, - "step": 4407 - }, - { - "epoch": 3.4062560339833947, - "grad_norm": 0.010116951540112495, - "learning_rate": 2.7902200240724875e-05, - "loss": 0.0034, - "step": 4408 - }, - { - "epoch": 3.4070283838578876, - "grad_norm": 0.014413068071007729, - "learning_rate": 2.7878005243928813e-05, - "loss": 0.0031, - "step": 4409 - }, - { - "epoch": 3.4078007337323806, - "grad_norm": 0.009910531342029572, - "learning_rate": 2.7853816686310924e-05, - "loss": 0.0033, - "step": 4410 - }, - { - "epoch": 3.408573083606874, - "grad_norm": 0.007654004730284214, - "learning_rate": 2.7829634574911854e-05, - "loss": 0.003, - "step": 4411 - }, - { - "epoch": 3.409345433481367, - "grad_norm": 0.009115166030824184, - "learning_rate": 2.7805458916770432e-05, - "loss": 0.0035, - "step": 4412 - }, - { - "epoch": 3.4101177833558602, - "grad_norm": 0.010139040648937225, - "learning_rate": 2.7781289718923674e-05, - "loss": 0.0033, - "step": 4413 - }, - { - "epoch": 3.410890133230353, - "grad_norm": 0.009890168905258179, - "learning_rate": 2.7757126988406578e-05, - "loss": 0.003, - "step": 4414 - }, - { - "epoch": 3.4116624831048465, - "grad_norm": 0.00818744394928217, - "learning_rate": 2.7732970732252337e-05, - "loss": 0.003, - "step": 4415 - }, - { - "epoch": 3.4124348329793395, - "grad_norm": 0.008600006811320782, - "learning_rate": 2.7708820957492287e-05, - "loss": 0.0033, - "step": 4416 - }, - { - "epoch": 3.413207182853833, - "grad_norm": 0.007462901528924704, - "learning_rate": 2.7684677671155856e-05, - "loss": 0.0029, - "step": 4417 - }, - { - "epoch": 3.4139795327283258, - "grad_norm": 0.010175359435379505, - "learning_rate": 2.7660540880270507e-05, - "loss": 0.0033, - "step": 4418 - }, - { - "epoch": 3.414751882602819, - "grad_norm": 0.01211362425237894, - "learning_rate": 2.7636410591861927e-05, - "loss": 0.0033, - "step": 4419 - }, - { - "epoch": 3.415524232477312, - "grad_norm": 0.007548519875854254, - "learning_rate": 2.7612286812953876e-05, - "loss": 0.0027, - "step": 4420 - }, - { - "epoch": 3.4162965823518054, - "grad_norm": 0.011262309737503529, - "learning_rate": 2.7588169550568132e-05, - "loss": 0.003, - "step": 4421 - }, - { - "epoch": 3.4170689322262984, - "grad_norm": 0.011649170890450478, - "learning_rate": 2.756405881172471e-05, - "loss": 0.003, - "step": 4422 - }, - { - "epoch": 3.4178412821007917, - "grad_norm": 0.011346418410539627, - "learning_rate": 2.753995460344166e-05, - "loss": 0.0029, - "step": 4423 - }, - { - "epoch": 3.4186136319752847, - "grad_norm": 0.014188813045620918, - "learning_rate": 2.751585693273508e-05, - "loss": 0.0039, - "step": 4424 - }, - { - "epoch": 3.419385981849778, - "grad_norm": 0.00839192047715187, - "learning_rate": 2.7491765806619275e-05, - "loss": 0.0034, - "step": 4425 - }, - { - "epoch": 3.420158331724271, - "grad_norm": 0.00843577366322279, - "learning_rate": 2.7467681232106573e-05, - "loss": 0.003, - "step": 4426 - }, - { - "epoch": 3.4209306815987643, - "grad_norm": 0.009509515017271042, - "learning_rate": 2.7443603216207414e-05, - "loss": 0.0029, - "step": 4427 - }, - { - "epoch": 3.4217030314732573, - "grad_norm": 0.008844722993671894, - "learning_rate": 2.7419531765930324e-05, - "loss": 0.0031, - "step": 4428 - }, - { - "epoch": 3.4224753813477506, - "grad_norm": 0.010160795412957668, - "learning_rate": 2.7395466888281916e-05, - "loss": 0.003, - "step": 4429 - }, - { - "epoch": 3.4232477312222436, - "grad_norm": 0.01939941942691803, - "learning_rate": 2.7371408590266902e-05, - "loss": 0.0033, - "step": 4430 - }, - { - "epoch": 3.424020081096737, - "grad_norm": 0.01017013005912304, - "learning_rate": 2.7347356878888066e-05, - "loss": 0.0034, - "step": 4431 - }, - { - "epoch": 3.42479243097123, - "grad_norm": 0.007695165928453207, - "learning_rate": 2.732331176114628e-05, - "loss": 0.0034, - "step": 4432 - }, - { - "epoch": 3.4255647808457232, - "grad_norm": 0.007821792736649513, - "learning_rate": 2.7299273244040523e-05, - "loss": 0.0031, - "step": 4433 - }, - { - "epoch": 3.426337130720216, - "grad_norm": 0.015497798100113869, - "learning_rate": 2.727524133456776e-05, - "loss": 0.0032, - "step": 4434 - }, - { - "epoch": 3.4271094805947095, - "grad_norm": 0.01589919440448284, - "learning_rate": 2.725121603972316e-05, - "loss": 0.0028, - "step": 4435 - }, - { - "epoch": 3.4278818304692025, - "grad_norm": 0.012722725979983807, - "learning_rate": 2.722719736649989e-05, - "loss": 0.0037, - "step": 4436 - }, - { - "epoch": 3.428654180343696, - "grad_norm": 0.010282904841005802, - "learning_rate": 2.7203185321889203e-05, - "loss": 0.0034, - "step": 4437 - }, - { - "epoch": 3.4294265302181888, - "grad_norm": 0.010863185860216618, - "learning_rate": 2.7179179912880416e-05, - "loss": 0.0033, - "step": 4438 - }, - { - "epoch": 3.430198880092682, - "grad_norm": 0.013260203413665295, - "learning_rate": 2.715518114646094e-05, - "loss": 0.003, - "step": 4439 - }, - { - "epoch": 3.430971229967175, - "grad_norm": 0.018179895356297493, - "learning_rate": 2.713118902961622e-05, - "loss": 0.0031, - "step": 4440 - }, - { - "epoch": 3.4317435798416684, - "grad_norm": 0.021851636469364166, - "learning_rate": 2.71072035693298e-05, - "loss": 0.0032, - "step": 4441 - }, - { - "epoch": 3.4325159297161614, - "grad_norm": 0.01149999350309372, - "learning_rate": 2.7083224772583255e-05, - "loss": 0.0027, - "step": 4442 - }, - { - "epoch": 3.4332882795906547, - "grad_norm": 0.009091292507946491, - "learning_rate": 2.705925264635623e-05, - "loss": 0.0031, - "step": 4443 - }, - { - "epoch": 3.4340606294651477, - "grad_norm": 0.01148899830877781, - "learning_rate": 2.703528719762644e-05, - "loss": 0.0032, - "step": 4444 - }, - { - "epoch": 3.434832979339641, - "grad_norm": 0.01642238348722458, - "learning_rate": 2.701132843336965e-05, - "loss": 0.0035, - "step": 4445 - }, - { - "epoch": 3.435605329214134, - "grad_norm": 0.011737337335944176, - "learning_rate": 2.698737636055967e-05, - "loss": 0.003, - "step": 4446 - }, - { - "epoch": 3.4363776790886273, - "grad_norm": 0.013013274408876896, - "learning_rate": 2.696343098616837e-05, - "loss": 0.003, - "step": 4447 - }, - { - "epoch": 3.4371500289631203, - "grad_norm": 0.0073637585155665874, - "learning_rate": 2.693949231716567e-05, - "loss": 0.0032, - "step": 4448 - }, - { - "epoch": 3.437922378837613, - "grad_norm": 0.01137218065559864, - "learning_rate": 2.6915560360519544e-05, - "loss": 0.0031, - "step": 4449 - }, - { - "epoch": 3.4386947287121066, - "grad_norm": 0.00971105694770813, - "learning_rate": 2.6891635123196003e-05, - "loss": 0.0034, - "step": 4450 - }, - { - "epoch": 3.4394670785866, - "grad_norm": 0.013804982416331768, - "learning_rate": 2.68677166121591e-05, - "loss": 0.0036, - "step": 4451 - }, - { - "epoch": 3.440239428461093, - "grad_norm": 0.00971175916492939, - "learning_rate": 2.6843804834370946e-05, - "loss": 0.0034, - "step": 4452 - }, - { - "epoch": 3.441011778335586, - "grad_norm": 0.010609552264213562, - "learning_rate": 2.681989979679168e-05, - "loss": 0.0031, - "step": 4453 - }, - { - "epoch": 3.441784128210079, - "grad_norm": 0.009030761197209358, - "learning_rate": 2.679600150637947e-05, - "loss": 0.0033, - "step": 4454 - }, - { - "epoch": 3.4425564780845725, - "grad_norm": 0.013067315332591534, - "learning_rate": 2.6772109970090543e-05, - "loss": 0.0033, - "step": 4455 - }, - { - "epoch": 3.4433288279590655, - "grad_norm": 0.01771661825478077, - "learning_rate": 2.6748225194879117e-05, - "loss": 0.0033, - "step": 4456 - }, - { - "epoch": 3.4441011778335584, - "grad_norm": 0.008268121629953384, - "learning_rate": 2.6724347187697547e-05, - "loss": 0.0029, - "step": 4457 - }, - { - "epoch": 3.4448735277080518, - "grad_norm": 0.009157293476164341, - "learning_rate": 2.6700475955496073e-05, - "loss": 0.003, - "step": 4458 - }, - { - "epoch": 3.4456458775825447, - "grad_norm": 0.016499847173690796, - "learning_rate": 2.667661150522304e-05, - "loss": 0.0031, - "step": 4459 - }, - { - "epoch": 3.446418227457038, - "grad_norm": 0.010825825855135918, - "learning_rate": 2.6652753843824862e-05, - "loss": 0.003, - "step": 4460 - }, - { - "epoch": 3.447190577331531, - "grad_norm": 0.008508339524269104, - "learning_rate": 2.662890297824588e-05, - "loss": 0.0035, - "step": 4461 - }, - { - "epoch": 3.4479629272060244, - "grad_norm": 0.009811155498027802, - "learning_rate": 2.6605058915428515e-05, - "loss": 0.003, - "step": 4462 - }, - { - "epoch": 3.4487352770805173, - "grad_norm": 0.009760680608451366, - "learning_rate": 2.658122166231321e-05, - "loss": 0.003, - "step": 4463 - }, - { - "epoch": 3.4495076269550107, - "grad_norm": 0.012883470393717289, - "learning_rate": 2.65573912258384e-05, - "loss": 0.0033, - "step": 4464 - }, - { - "epoch": 3.4502799768295036, - "grad_norm": 0.009158185683190823, - "learning_rate": 2.6533567612940558e-05, - "loss": 0.003, - "step": 4465 - }, - { - "epoch": 3.451052326703997, - "grad_norm": 0.0091365035623312, - "learning_rate": 2.6509750830554143e-05, - "loss": 0.003, - "step": 4466 - }, - { - "epoch": 3.45182467657849, - "grad_norm": 0.015592905692756176, - "learning_rate": 2.6485940885611692e-05, - "loss": 0.0033, - "step": 4467 - }, - { - "epoch": 3.4525970264529833, - "grad_norm": 0.011113499291241169, - "learning_rate": 2.646213778504366e-05, - "loss": 0.0032, - "step": 4468 - }, - { - "epoch": 3.453369376327476, - "grad_norm": 0.012405202724039555, - "learning_rate": 2.6438341535778554e-05, - "loss": 0.0035, - "step": 4469 - }, - { - "epoch": 3.4541417262019696, - "grad_norm": 0.009333050809800625, - "learning_rate": 2.6414552144742933e-05, - "loss": 0.0029, - "step": 4470 - }, - { - "epoch": 3.4549140760764625, - "grad_norm": 0.010121024213731289, - "learning_rate": 2.6390769618861265e-05, - "loss": 0.0033, - "step": 4471 - }, - { - "epoch": 3.455686425950956, - "grad_norm": 0.009522978216409683, - "learning_rate": 2.6366993965056063e-05, - "loss": 0.003, - "step": 4472 - }, - { - "epoch": 3.456458775825449, - "grad_norm": 0.007180164568126202, - "learning_rate": 2.634322519024791e-05, - "loss": 0.0028, - "step": 4473 - }, - { - "epoch": 3.457231125699942, - "grad_norm": 0.01016936358064413, - "learning_rate": 2.6319463301355264e-05, - "loss": 0.0032, - "step": 4474 - }, - { - "epoch": 3.458003475574435, - "grad_norm": 0.007608390878885984, - "learning_rate": 2.629570830529463e-05, - "loss": 0.003, - "step": 4475 - }, - { - "epoch": 3.4587758254489285, - "grad_norm": 0.008880065754055977, - "learning_rate": 2.627196020898056e-05, - "loss": 0.0031, - "step": 4476 - }, - { - "epoch": 3.4595481753234214, - "grad_norm": 0.011755947023630142, - "learning_rate": 2.6248219019325544e-05, - "loss": 0.0027, - "step": 4477 - }, - { - "epoch": 3.4603205251979148, - "grad_norm": 0.010038244538009167, - "learning_rate": 2.622448474324001e-05, - "loss": 0.0031, - "step": 4478 - }, - { - "epoch": 3.4610928750724077, - "grad_norm": 0.01359093002974987, - "learning_rate": 2.6200757387632496e-05, - "loss": 0.0029, - "step": 4479 - }, - { - "epoch": 3.461865224946901, - "grad_norm": 0.008809817023575306, - "learning_rate": 2.617703695940946e-05, - "loss": 0.0027, - "step": 4480 - }, - { - "epoch": 3.462637574821394, - "grad_norm": 0.010269363410770893, - "learning_rate": 2.615332346547529e-05, - "loss": 0.0032, - "step": 4481 - }, - { - "epoch": 3.4634099246958874, - "grad_norm": 0.00966144073754549, - "learning_rate": 2.6129616912732457e-05, - "loss": 0.0032, - "step": 4482 - }, - { - "epoch": 3.4641822745703803, - "grad_norm": 0.014523585326969624, - "learning_rate": 2.6105917308081384e-05, - "loss": 0.003, - "step": 4483 - }, - { - "epoch": 3.4649546244448737, - "grad_norm": 0.014877774752676487, - "learning_rate": 2.6082224658420385e-05, - "loss": 0.0029, - "step": 4484 - }, - { - "epoch": 3.4657269743193666, - "grad_norm": 0.009811308234930038, - "learning_rate": 2.6058538970645875e-05, - "loss": 0.003, - "step": 4485 - }, - { - "epoch": 3.46649932419386, - "grad_norm": 0.016033053398132324, - "learning_rate": 2.603486025165216e-05, - "loss": 0.0031, - "step": 4486 - }, - { - "epoch": 3.467271674068353, - "grad_norm": 0.011928362771868706, - "learning_rate": 2.601118850833158e-05, - "loss": 0.0027, - "step": 4487 - }, - { - "epoch": 3.4680440239428463, - "grad_norm": 0.007985375821590424, - "learning_rate": 2.598752374757434e-05, - "loss": 0.003, - "step": 4488 - }, - { - "epoch": 3.468816373817339, - "grad_norm": 0.011764492839574814, - "learning_rate": 2.596386597626873e-05, - "loss": 0.0031, - "step": 4489 - }, - { - "epoch": 3.4695887236918326, - "grad_norm": 0.016873568296432495, - "learning_rate": 2.5940215201300965e-05, - "loss": 0.0027, - "step": 4490 - }, - { - "epoch": 3.4703610735663255, - "grad_norm": 0.012676803395152092, - "learning_rate": 2.5916571429555157e-05, - "loss": 0.0029, - "step": 4491 - }, - { - "epoch": 3.471133423440819, - "grad_norm": 0.009303895756602287, - "learning_rate": 2.58929346679135e-05, - "loss": 0.0034, - "step": 4492 - }, - { - "epoch": 3.471905773315312, - "grad_norm": 0.0083675691857934, - "learning_rate": 2.5869304923256067e-05, - "loss": 0.003, - "step": 4493 - }, - { - "epoch": 3.472678123189805, - "grad_norm": 0.021918458864092827, - "learning_rate": 2.5845682202460857e-05, - "loss": 0.0037, - "step": 4494 - }, - { - "epoch": 3.473450473064298, - "grad_norm": 0.013355713337659836, - "learning_rate": 2.582206651240393e-05, - "loss": 0.0031, - "step": 4495 - }, - { - "epoch": 3.474222822938791, - "grad_norm": 0.008953472599387169, - "learning_rate": 2.579845785995923e-05, - "loss": 0.0027, - "step": 4496 - }, - { - "epoch": 3.4749951728132844, - "grad_norm": 0.007987641729414463, - "learning_rate": 2.5774856251998657e-05, - "loss": 0.0034, - "step": 4497 - }, - { - "epoch": 3.4757675226877778, - "grad_norm": 0.010496587492525578, - "learning_rate": 2.575126169539207e-05, - "loss": 0.0031, - "step": 4498 - }, - { - "epoch": 3.4765398725622707, - "grad_norm": 0.010220379568636417, - "learning_rate": 2.572767419700728e-05, - "loss": 0.0033, - "step": 4499 - }, - { - "epoch": 3.4773122224367636, - "grad_norm": 0.013059530407190323, - "learning_rate": 2.570409376371003e-05, - "loss": 0.0032, - "step": 4500 - }, - { - "epoch": 3.478084572311257, - "grad_norm": 0.009420751594007015, - "learning_rate": 2.5680520402364015e-05, - "loss": 0.0033, - "step": 4501 - }, - { - "epoch": 3.4788569221857504, - "grad_norm": 0.010078287683427334, - "learning_rate": 2.5656954119830885e-05, - "loss": 0.0031, - "step": 4502 - }, - { - "epoch": 3.4796292720602433, - "grad_norm": 0.00803342368453741, - "learning_rate": 2.5633394922970194e-05, - "loss": 0.0029, - "step": 4503 - }, - { - "epoch": 3.4804016219347362, - "grad_norm": 0.00801512785255909, - "learning_rate": 2.5609842818639473e-05, - "loss": 0.0031, - "step": 4504 - }, - { - "epoch": 3.4811739718092296, - "grad_norm": 0.009992556646466255, - "learning_rate": 2.558629781369416e-05, - "loss": 0.0036, - "step": 4505 - }, - { - "epoch": 3.4819463216837225, - "grad_norm": 0.010577374137938023, - "learning_rate": 2.5562759914987632e-05, - "loss": 0.0033, - "step": 4506 - }, - { - "epoch": 3.482718671558216, - "grad_norm": 0.008530070073902607, - "learning_rate": 2.5539229129371222e-05, - "loss": 0.0028, - "step": 4507 - }, - { - "epoch": 3.483491021432709, - "grad_norm": 0.009210624732077122, - "learning_rate": 2.551570546369415e-05, - "loss": 0.0032, - "step": 4508 - }, - { - "epoch": 3.484263371307202, - "grad_norm": 0.012428054586052895, - "learning_rate": 2.5492188924803606e-05, - "loss": 0.0028, - "step": 4509 - }, - { - "epoch": 3.485035721181695, - "grad_norm": 0.009001705795526505, - "learning_rate": 2.5468679519544682e-05, - "loss": 0.0033, - "step": 4510 - }, - { - "epoch": 3.4858080710561885, - "grad_norm": 0.012225321494042873, - "learning_rate": 2.5445177254760387e-05, - "loss": 0.0036, - "step": 4511 - }, - { - "epoch": 3.4865804209306814, - "grad_norm": 0.00832716841250658, - "learning_rate": 2.542168213729167e-05, - "loss": 0.0031, - "step": 4512 - }, - { - "epoch": 3.487352770805175, - "grad_norm": 0.01162449549883604, - "learning_rate": 2.5398194173977397e-05, - "loss": 0.0034, - "step": 4513 - }, - { - "epoch": 3.4881251206796677, - "grad_norm": 0.010331190191209316, - "learning_rate": 2.5374713371654335e-05, - "loss": 0.0029, - "step": 4514 - }, - { - "epoch": 3.488897470554161, - "grad_norm": 0.01203492097556591, - "learning_rate": 2.5351239737157184e-05, - "loss": 0.0031, - "step": 4515 - }, - { - "epoch": 3.489669820428654, - "grad_norm": 0.013049033470451832, - "learning_rate": 2.5327773277318558e-05, - "loss": 0.0033, - "step": 4516 - }, - { - "epoch": 3.4904421703031474, - "grad_norm": 0.00960687268525362, - "learning_rate": 2.5304313998968964e-05, - "loss": 0.0032, - "step": 4517 - }, - { - "epoch": 3.4912145201776403, - "grad_norm": 0.009507626295089722, - "learning_rate": 2.5280861908936843e-05, - "loss": 0.0029, - "step": 4518 - }, - { - "epoch": 3.4919868700521337, - "grad_norm": 0.011058974079787731, - "learning_rate": 2.5257417014048522e-05, - "loss": 0.0031, - "step": 4519 - }, - { - "epoch": 3.4927592199266266, - "grad_norm": 0.016918856650590897, - "learning_rate": 2.5233979321128254e-05, - "loss": 0.0035, - "step": 4520 - }, - { - "epoch": 3.49353156980112, - "grad_norm": 0.009435974061489105, - "learning_rate": 2.521054883699817e-05, - "loss": 0.0032, - "step": 4521 - }, - { - "epoch": 3.494303919675613, - "grad_norm": 0.00975219439715147, - "learning_rate": 2.5187125568478332e-05, - "loss": 0.003, - "step": 4522 - }, - { - "epoch": 3.4950762695501063, - "grad_norm": 0.013011807575821877, - "learning_rate": 2.5163709522386685e-05, - "loss": 0.0035, - "step": 4523 - }, - { - "epoch": 3.4958486194245992, - "grad_norm": 0.010547825135290623, - "learning_rate": 2.5140300705539065e-05, - "loss": 0.0028, - "step": 4524 - }, - { - "epoch": 3.4966209692990926, - "grad_norm": 0.00943104736506939, - "learning_rate": 2.5116899124749228e-05, - "loss": 0.0031, - "step": 4525 - }, - { - "epoch": 3.4973933191735855, - "grad_norm": 0.00984232034534216, - "learning_rate": 2.509350478682877e-05, - "loss": 0.0029, - "step": 4526 - }, - { - "epoch": 3.498165669048079, - "grad_norm": 0.007884285412728786, - "learning_rate": 2.507011769858729e-05, - "loss": 0.0033, - "step": 4527 - }, - { - "epoch": 3.498938018922572, - "grad_norm": 0.011110519990324974, - "learning_rate": 2.504673786683215e-05, - "loss": 0.0029, - "step": 4528 - }, - { - "epoch": 3.499710368797065, - "grad_norm": 0.012727515771985054, - "learning_rate": 2.502336529836864e-05, - "loss": 0.0032, - "step": 4529 - }, - { - "epoch": 3.500482718671558, - "grad_norm": 0.011709541082382202, - "learning_rate": 2.500000000000001e-05, - "loss": 0.003, - "step": 4530 - }, - { - "epoch": 3.5012550685460515, - "grad_norm": 0.006993581540882587, - "learning_rate": 2.4976641978527287e-05, - "loss": 0.0029, - "step": 4531 - }, - { - "epoch": 3.5020274184205444, - "grad_norm": 0.008528415113687515, - "learning_rate": 2.4953291240749416e-05, - "loss": 0.0033, - "step": 4532 - }, - { - "epoch": 3.502799768295038, - "grad_norm": 0.010851817205548286, - "learning_rate": 2.49299477934633e-05, - "loss": 0.0034, - "step": 4533 - }, - { - "epoch": 3.5035721181695307, - "grad_norm": 0.015704330056905746, - "learning_rate": 2.4906611643463577e-05, - "loss": 0.0036, - "step": 4534 - }, - { - "epoch": 3.504344468044024, - "grad_norm": 0.02083994261920452, - "learning_rate": 2.4883282797542862e-05, - "loss": 0.0036, - "step": 4535 - }, - { - "epoch": 3.505116817918517, - "grad_norm": 0.010550920851528645, - "learning_rate": 2.4859961262491633e-05, - "loss": 0.0032, - "step": 4536 - }, - { - "epoch": 3.5058891677930104, - "grad_norm": 0.00966651551425457, - "learning_rate": 2.4836647045098244e-05, - "loss": 0.0031, - "step": 4537 - }, - { - "epoch": 3.5066615176675033, - "grad_norm": 0.012379195541143417, - "learning_rate": 2.481334015214883e-05, - "loss": 0.0033, - "step": 4538 - }, - { - "epoch": 3.5074338675419963, - "grad_norm": 0.01680929958820343, - "learning_rate": 2.4790040590427522e-05, - "loss": 0.0035, - "step": 4539 - }, - { - "epoch": 3.5082062174164896, - "grad_norm": 0.011752322316169739, - "learning_rate": 2.476674836671627e-05, - "loss": 0.0031, - "step": 4540 - }, - { - "epoch": 3.508978567290983, - "grad_norm": 0.014056556858122349, - "learning_rate": 2.4743463487794826e-05, - "loss": 0.0032, - "step": 4541 - }, - { - "epoch": 3.509750917165476, - "grad_norm": 0.008536022156476974, - "learning_rate": 2.4720185960440867e-05, - "loss": 0.0034, - "step": 4542 - }, - { - "epoch": 3.510523267039969, - "grad_norm": 0.009675630368292332, - "learning_rate": 2.469691579142997e-05, - "loss": 0.0037, - "step": 4543 - }, - { - "epoch": 3.5112956169144622, - "grad_norm": 0.015018140897154808, - "learning_rate": 2.467365298753547e-05, - "loss": 0.0033, - "step": 4544 - }, - { - "epoch": 3.5120679667889556, - "grad_norm": 0.015164218842983246, - "learning_rate": 2.4650397555528594e-05, - "loss": 0.0032, - "step": 4545 - }, - { - "epoch": 3.5128403166634485, - "grad_norm": 0.00981762446463108, - "learning_rate": 2.4627149502178476e-05, - "loss": 0.0028, - "step": 4546 - }, - { - "epoch": 3.5136126665379415, - "grad_norm": 0.010019616223871708, - "learning_rate": 2.460390883425207e-05, - "loss": 0.0025, - "step": 4547 - }, - { - "epoch": 3.514385016412435, - "grad_norm": 0.011683452874422073, - "learning_rate": 2.4580675558514116e-05, - "loss": 0.0029, - "step": 4548 - }, - { - "epoch": 3.515157366286928, - "grad_norm": 0.012275191023945808, - "learning_rate": 2.4557449681727312e-05, - "loss": 0.0034, - "step": 4549 - }, - { - "epoch": 3.515929716161421, - "grad_norm": 0.009555431082844734, - "learning_rate": 2.4534231210652152e-05, - "loss": 0.0029, - "step": 4550 - }, - { - "epoch": 3.516702066035914, - "grad_norm": 0.01259004045277834, - "learning_rate": 2.4511020152046914e-05, - "loss": 0.0034, - "step": 4551 - }, - { - "epoch": 3.5174744159104074, - "grad_norm": 0.008695184253156185, - "learning_rate": 2.4487816512667833e-05, - "loss": 0.0035, - "step": 4552 - }, - { - "epoch": 3.518246765784901, - "grad_norm": 0.010859877802431583, - "learning_rate": 2.4464620299268925e-05, - "loss": 0.0033, - "step": 4553 - }, - { - "epoch": 3.5190191156593937, - "grad_norm": 0.011442749761044979, - "learning_rate": 2.444143151860199e-05, - "loss": 0.0031, - "step": 4554 - }, - { - "epoch": 3.5197914655338867, - "grad_norm": 0.009345135651528835, - "learning_rate": 2.441825017741679e-05, - "loss": 0.0029, - "step": 4555 - }, - { - "epoch": 3.52056381540838, - "grad_norm": 0.009865041822195053, - "learning_rate": 2.4395076282460823e-05, - "loss": 0.0032, - "step": 4556 - }, - { - "epoch": 3.5213361652828734, - "grad_norm": 0.008451285772025585, - "learning_rate": 2.437190984047945e-05, - "loss": 0.0036, - "step": 4557 - }, - { - "epoch": 3.5221085151573663, - "grad_norm": 0.023029906675219536, - "learning_rate": 2.434875085821587e-05, - "loss": 0.0032, - "step": 4558 - }, - { - "epoch": 3.5228808650318593, - "grad_norm": 0.013170885853469372, - "learning_rate": 2.4325599342411094e-05, - "loss": 0.003, - "step": 4559 - }, - { - "epoch": 3.5236532149063526, - "grad_norm": 0.008758633397519588, - "learning_rate": 2.430245529980397e-05, - "loss": 0.0032, - "step": 4560 - }, - { - "epoch": 3.5244255647808456, - "grad_norm": 0.011905158869922161, - "learning_rate": 2.427931873713118e-05, - "loss": 0.0033, - "step": 4561 - }, - { - "epoch": 3.525197914655339, - "grad_norm": 0.013737745583057404, - "learning_rate": 2.425618966112721e-05, - "loss": 0.0033, - "step": 4562 - }, - { - "epoch": 3.525970264529832, - "grad_norm": 0.013534064404666424, - "learning_rate": 2.4233068078524375e-05, - "loss": 0.0035, - "step": 4563 - }, - { - "epoch": 3.5267426144043252, - "grad_norm": 0.008490565232932568, - "learning_rate": 2.4209953996052807e-05, - "loss": 0.0032, - "step": 4564 - }, - { - "epoch": 3.527514964278818, - "grad_norm": 0.008811703883111477, - "learning_rate": 2.4186847420440462e-05, - "loss": 0.003, - "step": 4565 - }, - { - "epoch": 3.5282873141533115, - "grad_norm": 0.01871240697801113, - "learning_rate": 2.4163748358413106e-05, - "loss": 0.0034, - "step": 4566 - }, - { - "epoch": 3.5290596640278045, - "grad_norm": 0.012067742645740509, - "learning_rate": 2.4140656816694318e-05, - "loss": 0.003, - "step": 4567 - }, - { - "epoch": 3.529832013902298, - "grad_norm": 0.012779579497873783, - "learning_rate": 2.4117572802005483e-05, - "loss": 0.0031, - "step": 4568 - }, - { - "epoch": 3.5306043637767908, - "grad_norm": 0.008804393000900745, - "learning_rate": 2.4094496321065803e-05, - "loss": 0.0031, - "step": 4569 - }, - { - "epoch": 3.531376713651284, - "grad_norm": 0.011759286746382713, - "learning_rate": 2.407142738059228e-05, - "loss": 0.0029, - "step": 4570 - }, - { - "epoch": 3.532149063525777, - "grad_norm": 0.017655447125434875, - "learning_rate": 2.4048365987299732e-05, - "loss": 0.0034, - "step": 4571 - }, - { - "epoch": 3.5329214134002704, - "grad_norm": 0.009172452613711357, - "learning_rate": 2.402531214790077e-05, - "loss": 0.0028, - "step": 4572 - }, - { - "epoch": 3.5336937632747634, - "grad_norm": 0.0073136230930686, - "learning_rate": 2.4002265869105813e-05, - "loss": 0.0027, - "step": 4573 - }, - { - "epoch": 3.5344661131492567, - "grad_norm": 0.009583295322954655, - "learning_rate": 2.397922715762307e-05, - "loss": 0.0028, - "step": 4574 - }, - { - "epoch": 3.5352384630237497, - "grad_norm": 0.009433571249246597, - "learning_rate": 2.395619602015855e-05, - "loss": 0.003, - "step": 4575 - }, - { - "epoch": 3.536010812898243, - "grad_norm": 0.010750960558652878, - "learning_rate": 2.393317246341607e-05, - "loss": 0.003, - "step": 4576 - }, - { - "epoch": 3.536783162772736, - "grad_norm": 0.01642046682536602, - "learning_rate": 2.391015649409723e-05, - "loss": 0.0029, - "step": 4577 - }, - { - "epoch": 3.5375555126472293, - "grad_norm": 0.012440281920135021, - "learning_rate": 2.388714811890142e-05, - "loss": 0.0033, - "step": 4578 - }, - { - "epoch": 3.5383278625217223, - "grad_norm": 0.011066598817706108, - "learning_rate": 2.3864147344525823e-05, - "loss": 0.003, - "step": 4579 - }, - { - "epoch": 3.5391002123962156, - "grad_norm": 0.010555957444012165, - "learning_rate": 2.3841154177665403e-05, - "loss": 0.0038, - "step": 4580 - }, - { - "epoch": 3.5398725622707086, - "grad_norm": 0.011121445335447788, - "learning_rate": 2.381816862501292e-05, - "loss": 0.0034, - "step": 4581 - }, - { - "epoch": 3.540644912145202, - "grad_norm": 0.009262949228286743, - "learning_rate": 2.3795190693258918e-05, - "loss": 0.0029, - "step": 4582 - }, - { - "epoch": 3.541417262019695, - "grad_norm": 0.013648148626089096, - "learning_rate": 2.377222038909171e-05, - "loss": 0.0034, - "step": 4583 - }, - { - "epoch": 3.5421896118941882, - "grad_norm": 0.01162190455943346, - "learning_rate": 2.37492577191974e-05, - "loss": 0.0034, - "step": 4584 - }, - { - "epoch": 3.542961961768681, - "grad_norm": 0.008952927775681019, - "learning_rate": 2.3726302690259867e-05, - "loss": 0.0034, - "step": 4585 - }, - { - "epoch": 3.543734311643174, - "grad_norm": 0.011494430713355541, - "learning_rate": 2.370335530896074e-05, - "loss": 0.0035, - "step": 4586 - }, - { - "epoch": 3.5445066615176675, - "grad_norm": 0.007824796251952648, - "learning_rate": 2.3680415581979508e-05, - "loss": 0.0032, - "step": 4587 - }, - { - "epoch": 3.545279011392161, - "grad_norm": 0.008753915317356586, - "learning_rate": 2.3657483515993324e-05, - "loss": 0.0033, - "step": 4588 - }, - { - "epoch": 3.5460513612666538, - "grad_norm": 0.008178495801985264, - "learning_rate": 2.3634559117677142e-05, - "loss": 0.003, - "step": 4589 - }, - { - "epoch": 3.5468237111411467, - "grad_norm": 0.010274248197674751, - "learning_rate": 2.361164239370377e-05, - "loss": 0.0032, - "step": 4590 - }, - { - "epoch": 3.54759606101564, - "grad_norm": 0.009580101817846298, - "learning_rate": 2.358873335074365e-05, - "loss": 0.0032, - "step": 4591 - }, - { - "epoch": 3.5483684108901334, - "grad_norm": 0.00803334079682827, - "learning_rate": 2.3565831995465045e-05, - "loss": 0.0032, - "step": 4592 - }, - { - "epoch": 3.5491407607646264, - "grad_norm": 0.01077238842844963, - "learning_rate": 2.3542938334534055e-05, - "loss": 0.0031, - "step": 4593 - }, - { - "epoch": 3.5499131106391193, - "grad_norm": 0.012328112497925758, - "learning_rate": 2.3520052374614403e-05, - "loss": 0.0038, - "step": 4594 - }, - { - "epoch": 3.5506854605136127, - "grad_norm": 0.010183589532971382, - "learning_rate": 2.349717412236767e-05, - "loss": 0.0037, - "step": 4595 - }, - { - "epoch": 3.551457810388106, - "grad_norm": 0.011016666889190674, - "learning_rate": 2.3474303584453155e-05, - "loss": 0.0024, - "step": 4596 - }, - { - "epoch": 3.552230160262599, - "grad_norm": 0.008774489164352417, - "learning_rate": 2.345144076752792e-05, - "loss": 0.0036, - "step": 4597 - }, - { - "epoch": 3.553002510137092, - "grad_norm": 0.013731655664741993, - "learning_rate": 2.342858567824678e-05, - "loss": 0.0033, - "step": 4598 - }, - { - "epoch": 3.5537748600115853, - "grad_norm": 0.00853305496275425, - "learning_rate": 2.3405738323262277e-05, - "loss": 0.003, - "step": 4599 - }, - { - "epoch": 3.5545472098860786, - "grad_norm": 0.0098152756690979, - "learning_rate": 2.3382898709224787e-05, - "loss": 0.0029, - "step": 4600 - }, - { - "epoch": 3.5553195597605716, - "grad_norm": 0.008680333383381367, - "learning_rate": 2.3360066842782306e-05, - "loss": 0.0033, - "step": 4601 - }, - { - "epoch": 3.5560919096350645, - "grad_norm": 0.009033185429871082, - "learning_rate": 2.3337242730580644e-05, - "loss": 0.0033, - "step": 4602 - }, - { - "epoch": 3.556864259509558, - "grad_norm": 0.013320722617208958, - "learning_rate": 2.33144263792634e-05, - "loss": 0.0034, - "step": 4603 - }, - { - "epoch": 3.5576366093840512, - "grad_norm": 0.0077407159842550755, - "learning_rate": 2.329161779547181e-05, - "loss": 0.003, - "step": 4604 - }, - { - "epoch": 3.558408959258544, - "grad_norm": 0.009022112935781479, - "learning_rate": 2.32688169858449e-05, - "loss": 0.0029, - "step": 4605 - }, - { - "epoch": 3.559181309133037, - "grad_norm": 0.008708495646715164, - "learning_rate": 2.324602395701949e-05, - "loss": 0.0033, - "step": 4606 - }, - { - "epoch": 3.5599536590075305, - "grad_norm": 0.01210398692637682, - "learning_rate": 2.3223238715630025e-05, - "loss": 0.0027, - "step": 4607 - }, - { - "epoch": 3.5607260088820234, - "grad_norm": 0.008263209834694862, - "learning_rate": 2.3200461268308744e-05, - "loss": 0.003, - "step": 4608 - }, - { - "epoch": 3.5614983587565168, - "grad_norm": 0.009071472100913525, - "learning_rate": 2.3177691621685638e-05, - "loss": 0.0035, - "step": 4609 - }, - { - "epoch": 3.5622707086310097, - "grad_norm": 0.007416809909045696, - "learning_rate": 2.3154929782388406e-05, - "loss": 0.0026, - "step": 4610 - }, - { - "epoch": 3.563043058505503, - "grad_norm": 0.010057068429887295, - "learning_rate": 2.313217575704242e-05, - "loss": 0.0032, - "step": 4611 - }, - { - "epoch": 3.563815408379996, - "grad_norm": 0.010340395383536816, - "learning_rate": 2.3109429552270874e-05, - "loss": 0.0034, - "step": 4612 - }, - { - "epoch": 3.5645877582544894, - "grad_norm": 0.012999104335904121, - "learning_rate": 2.3086691174694636e-05, - "loss": 0.0033, - "step": 4613 - }, - { - "epoch": 3.5653601081289823, - "grad_norm": 0.010097958147525787, - "learning_rate": 2.3063960630932252e-05, - "loss": 0.0033, - "step": 4614 - }, - { - "epoch": 3.5661324580034757, - "grad_norm": 0.01187471579760313, - "learning_rate": 2.3041237927600085e-05, - "loss": 0.0034, - "step": 4615 - }, - { - "epoch": 3.5669048078779686, - "grad_norm": 0.009638137184083462, - "learning_rate": 2.3018523071312164e-05, - "loss": 0.0028, - "step": 4616 - }, - { - "epoch": 3.567677157752462, - "grad_norm": 0.011620833538472652, - "learning_rate": 2.299581606868018e-05, - "loss": 0.0033, - "step": 4617 - }, - { - "epoch": 3.568449507626955, - "grad_norm": 0.008225577883422375, - "learning_rate": 2.2973116926313655e-05, - "loss": 0.0026, - "step": 4618 - }, - { - "epoch": 3.5692218575014483, - "grad_norm": 0.010084412060678005, - "learning_rate": 2.2950425650819728e-05, - "loss": 0.0037, - "step": 4619 - }, - { - "epoch": 3.569994207375941, - "grad_norm": 0.010329210199415684, - "learning_rate": 2.2927742248803313e-05, - "loss": 0.003, - "step": 4620 - }, - { - "epoch": 3.5707665572504346, - "grad_norm": 0.009796016849577427, - "learning_rate": 2.2905066726866937e-05, - "loss": 0.003, - "step": 4621 - }, - { - "epoch": 3.5715389071249275, - "grad_norm": 0.010266847908496857, - "learning_rate": 2.2882399091610957e-05, - "loss": 0.0029, - "step": 4622 - }, - { - "epoch": 3.572311256999421, - "grad_norm": 0.008089693263173103, - "learning_rate": 2.2859739349633368e-05, - "loss": 0.0031, - "step": 4623 - }, - { - "epoch": 3.573083606873914, - "grad_norm": 0.009946372359991074, - "learning_rate": 2.2837087507529826e-05, - "loss": 0.003, - "step": 4624 - }, - { - "epoch": 3.573855956748407, - "grad_norm": 0.009956549853086472, - "learning_rate": 2.2814443571893783e-05, - "loss": 0.0031, - "step": 4625 - }, - { - "epoch": 3.5746283066229, - "grad_norm": 0.007649792358279228, - "learning_rate": 2.279180754931634e-05, - "loss": 0.003, - "step": 4626 - }, - { - "epoch": 3.5754006564973935, - "grad_norm": 0.009878559038043022, - "learning_rate": 2.2769179446386253e-05, - "loss": 0.0035, - "step": 4627 - }, - { - "epoch": 3.5761730063718864, - "grad_norm": 0.009770630858838558, - "learning_rate": 2.274655926969006e-05, - "loss": 0.0031, - "step": 4628 - }, - { - "epoch": 3.5769453562463798, - "grad_norm": 0.009213805198669434, - "learning_rate": 2.2723947025811933e-05, - "loss": 0.003, - "step": 4629 - }, - { - "epoch": 3.5777177061208727, - "grad_norm": 0.013433797284960747, - "learning_rate": 2.2701342721333746e-05, - "loss": 0.0032, - "step": 4630 - }, - { - "epoch": 3.578490055995366, - "grad_norm": 0.01271948404610157, - "learning_rate": 2.2678746362835073e-05, - "loss": 0.0028, - "step": 4631 - }, - { - "epoch": 3.579262405869859, - "grad_norm": 0.009294467978179455, - "learning_rate": 2.2656157956893165e-05, - "loss": 0.0028, - "step": 4632 - }, - { - "epoch": 3.580034755744352, - "grad_norm": 0.007768784649670124, - "learning_rate": 2.2633577510082953e-05, - "loss": 0.0026, - "step": 4633 - }, - { - "epoch": 3.5808071056188453, - "grad_norm": 0.010570479556918144, - "learning_rate": 2.261100502897708e-05, - "loss": 0.0029, - "step": 4634 - }, - { - "epoch": 3.5815794554933387, - "grad_norm": 0.010714959353208542, - "learning_rate": 2.2588440520145824e-05, - "loss": 0.003, - "step": 4635 - }, - { - "epoch": 3.5823518053678316, - "grad_norm": 0.008927794173359871, - "learning_rate": 2.256588399015719e-05, - "loss": 0.0034, - "step": 4636 - }, - { - "epoch": 3.5831241552423245, - "grad_norm": 0.011611155234277248, - "learning_rate": 2.2543335445576824e-05, - "loss": 0.0034, - "step": 4637 - }, - { - "epoch": 3.583896505116818, - "grad_norm": 0.016657888889312744, - "learning_rate": 2.2520794892968072e-05, - "loss": 0.0035, - "step": 4638 - }, - { - "epoch": 3.5846688549913113, - "grad_norm": 0.009623616002500057, - "learning_rate": 2.249826233889194e-05, - "loss": 0.0033, - "step": 4639 - }, - { - "epoch": 3.585441204865804, - "grad_norm": 0.009875661693513393, - "learning_rate": 2.2475737789907107e-05, - "loss": 0.0031, - "step": 4640 - }, - { - "epoch": 3.586213554740297, - "grad_norm": 0.010192835703492165, - "learning_rate": 2.2453221252569932e-05, - "loss": 0.0027, - "step": 4641 - }, - { - "epoch": 3.5869859046147905, - "grad_norm": 0.00782682653516531, - "learning_rate": 2.2430712733434433e-05, - "loss": 0.0032, - "step": 4642 - }, - { - "epoch": 3.587758254489284, - "grad_norm": 0.009048606269061565, - "learning_rate": 2.2408212239052294e-05, - "loss": 0.0033, - "step": 4643 - }, - { - "epoch": 3.588530604363777, - "grad_norm": 0.011508859694004059, - "learning_rate": 2.2385719775972863e-05, - "loss": 0.0031, - "step": 4644 - }, - { - "epoch": 3.5893029542382697, - "grad_norm": 0.007973534055054188, - "learning_rate": 2.2363235350743162e-05, - "loss": 0.0027, - "step": 4645 - }, - { - "epoch": 3.590075304112763, - "grad_norm": 0.008861503563821316, - "learning_rate": 2.234075896990785e-05, - "loss": 0.0034, - "step": 4646 - }, - { - "epoch": 3.5908476539872565, - "grad_norm": 0.014525181613862514, - "learning_rate": 2.2318290640009264e-05, - "loss": 0.0035, - "step": 4647 - }, - { - "epoch": 3.5916200038617494, - "grad_norm": 0.0103535745292902, - "learning_rate": 2.22958303675874e-05, - "loss": 0.0031, - "step": 4648 - }, - { - "epoch": 3.5923923537362423, - "grad_norm": 0.006877128500491381, - "learning_rate": 2.2273378159179892e-05, - "loss": 0.0029, - "step": 4649 - }, - { - "epoch": 3.5931647036107357, - "grad_norm": 0.01847553625702858, - "learning_rate": 2.225093402132204e-05, - "loss": 0.0035, - "step": 4650 - }, - { - "epoch": 3.593937053485229, - "grad_norm": 0.010287228040397167, - "learning_rate": 2.2228497960546778e-05, - "loss": 0.0031, - "step": 4651 - }, - { - "epoch": 3.594709403359722, - "grad_norm": 0.01384812779724598, - "learning_rate": 2.220606998338472e-05, - "loss": 0.003, - "step": 4652 - }, - { - "epoch": 3.595481753234215, - "grad_norm": 0.008724525570869446, - "learning_rate": 2.2183650096364095e-05, - "loss": 0.0031, - "step": 4653 - }, - { - "epoch": 3.5962541031087083, - "grad_norm": 0.010740946978330612, - "learning_rate": 2.21612383060108e-05, - "loss": 0.0037, - "step": 4654 - }, - { - "epoch": 3.5970264529832012, - "grad_norm": 0.012365031987428665, - "learning_rate": 2.213883461884835e-05, - "loss": 0.0031, - "step": 4655 - }, - { - "epoch": 3.5977988028576946, - "grad_norm": 0.01322922669351101, - "learning_rate": 2.2116439041397936e-05, - "loss": 0.0033, - "step": 4656 - }, - { - "epoch": 3.5985711527321875, - "grad_norm": 0.012405367568135262, - "learning_rate": 2.209405158017836e-05, - "loss": 0.0034, - "step": 4657 - }, - { - "epoch": 3.599343502606681, - "grad_norm": 0.010477261617779732, - "learning_rate": 2.2071672241706067e-05, - "loss": 0.0031, - "step": 4658 - }, - { - "epoch": 3.600115852481174, - "grad_norm": 0.009934378787875175, - "learning_rate": 2.204930103249513e-05, - "loss": 0.0032, - "step": 4659 - }, - { - "epoch": 3.600888202355667, - "grad_norm": 0.013846274465322495, - "learning_rate": 2.202693795905732e-05, - "loss": 0.0032, - "step": 4660 - }, - { - "epoch": 3.60166055223016, - "grad_norm": 0.011807041242718697, - "learning_rate": 2.2004583027901932e-05, - "loss": 0.0028, - "step": 4661 - }, - { - "epoch": 3.6024329021046535, - "grad_norm": 0.009982018731534481, - "learning_rate": 2.198223624553595e-05, - "loss": 0.0031, - "step": 4662 - }, - { - "epoch": 3.6032052519791464, - "grad_norm": 0.00852767750620842, - "learning_rate": 2.1959897618464025e-05, - "loss": 0.0024, - "step": 4663 - }, - { - "epoch": 3.60397760185364, - "grad_norm": 0.016557959839701653, - "learning_rate": 2.1937567153188353e-05, - "loss": 0.003, - "step": 4664 - }, - { - "epoch": 3.6047499517281327, - "grad_norm": 0.00804685615003109, - "learning_rate": 2.1915244856208788e-05, - "loss": 0.0029, - "step": 4665 - }, - { - "epoch": 3.605522301602626, - "grad_norm": 0.007616143673658371, - "learning_rate": 2.189293073402286e-05, - "loss": 0.003, - "step": 4666 - }, - { - "epoch": 3.606294651477119, - "grad_norm": 0.010534284636378288, - "learning_rate": 2.187062479312562e-05, - "loss": 0.0034, - "step": 4667 - }, - { - "epoch": 3.6070670013516124, - "grad_norm": 0.01100563257932663, - "learning_rate": 2.184832704000979e-05, - "loss": 0.0033, - "step": 4668 - }, - { - "epoch": 3.6078393512261053, - "grad_norm": 0.010521691292524338, - "learning_rate": 2.182603748116574e-05, - "loss": 0.0031, - "step": 4669 - }, - { - "epoch": 3.6086117011005987, - "grad_norm": 0.008225660771131516, - "learning_rate": 2.1803756123081425e-05, - "loss": 0.0029, - "step": 4670 - }, - { - "epoch": 3.6093840509750916, - "grad_norm": 0.00809970311820507, - "learning_rate": 2.1781482972242352e-05, - "loss": 0.0031, - "step": 4671 - }, - { - "epoch": 3.610156400849585, - "grad_norm": 0.008778408169746399, - "learning_rate": 2.1759218035131758e-05, - "loss": 0.0028, - "step": 4672 - }, - { - "epoch": 3.610928750724078, - "grad_norm": 0.010100818239152431, - "learning_rate": 2.173696131823042e-05, - "loss": 0.0028, - "step": 4673 - }, - { - "epoch": 3.6117011005985713, - "grad_norm": 0.007120962720364332, - "learning_rate": 2.17147128280167e-05, - "loss": 0.0031, - "step": 4674 - }, - { - "epoch": 3.6124734504730642, - "grad_norm": 0.009938807226717472, - "learning_rate": 2.1692472570966593e-05, - "loss": 0.0026, - "step": 4675 - }, - { - "epoch": 3.6132458003475576, - "grad_norm": 0.009664575569331646, - "learning_rate": 2.1670240553553755e-05, - "loss": 0.0033, - "step": 4676 - }, - { - "epoch": 3.6140181502220505, - "grad_norm": 0.010297637432813644, - "learning_rate": 2.1648016782249337e-05, - "loss": 0.0034, - "step": 4677 - }, - { - "epoch": 3.614790500096544, - "grad_norm": 0.008731442503631115, - "learning_rate": 2.1625801263522143e-05, - "loss": 0.003, - "step": 4678 - }, - { - "epoch": 3.615562849971037, - "grad_norm": 0.013439536094665527, - "learning_rate": 2.1603594003838602e-05, - "loss": 0.0033, - "step": 4679 - }, - { - "epoch": 3.6163351998455298, - "grad_norm": 0.007052401080727577, - "learning_rate": 2.158139500966272e-05, - "loss": 0.0021, - "step": 4680 - }, - { - "epoch": 3.617107549720023, - "grad_norm": 0.0082322983071208, - "learning_rate": 2.155920428745603e-05, - "loss": 0.0025, - "step": 4681 - }, - { - "epoch": 3.6178798995945165, - "grad_norm": 0.01100889965891838, - "learning_rate": 2.153702184367777e-05, - "loss": 0.0034, - "step": 4682 - }, - { - "epoch": 3.6186522494690094, - "grad_norm": 0.022772731259465218, - "learning_rate": 2.1514847684784706e-05, - "loss": 0.0032, - "step": 4683 - }, - { - "epoch": 3.6194245993435024, - "grad_norm": 0.0094189727678895, - "learning_rate": 2.1492681817231153e-05, - "loss": 0.0027, - "step": 4684 - }, - { - "epoch": 3.6201969492179957, - "grad_norm": 0.008233190514147282, - "learning_rate": 2.1470524247469115e-05, - "loss": 0.0032, - "step": 4685 - }, - { - "epoch": 3.620969299092489, - "grad_norm": 0.011170231737196445, - "learning_rate": 2.1448374981948123e-05, - "loss": 0.003, - "step": 4686 - }, - { - "epoch": 3.621741648966982, - "grad_norm": 0.010109986178576946, - "learning_rate": 2.1426234027115237e-05, - "loss": 0.0031, - "step": 4687 - }, - { - "epoch": 3.622513998841475, - "grad_norm": 0.010346760973334312, - "learning_rate": 2.140410138941521e-05, - "loss": 0.0033, - "step": 4688 - }, - { - "epoch": 3.6232863487159683, - "grad_norm": 0.00939234159886837, - "learning_rate": 2.13819770752903e-05, - "loss": 0.0032, - "step": 4689 - }, - { - "epoch": 3.6240586985904617, - "grad_norm": 0.009421358816325665, - "learning_rate": 2.135986109118036e-05, - "loss": 0.0028, - "step": 4690 - }, - { - "epoch": 3.6248310484649546, - "grad_norm": 0.007759148720651865, - "learning_rate": 2.1337753443522818e-05, - "loss": 0.0033, - "step": 4691 - }, - { - "epoch": 3.6256033983394476, - "grad_norm": 0.009183508343994617, - "learning_rate": 2.1315654138752678e-05, - "loss": 0.0031, - "step": 4692 - }, - { - "epoch": 3.626375748213941, - "grad_norm": 0.016507580876350403, - "learning_rate": 2.129356318330251e-05, - "loss": 0.003, - "step": 4693 - }, - { - "epoch": 3.6271480980884343, - "grad_norm": 0.00906956847757101, - "learning_rate": 2.127148058360246e-05, - "loss": 0.0034, - "step": 4694 - }, - { - "epoch": 3.6279204479629272, - "grad_norm": 0.008150842040777206, - "learning_rate": 2.124940634608024e-05, - "loss": 0.0037, - "step": 4695 - }, - { - "epoch": 3.62869279783742, - "grad_norm": 0.008811009116470814, - "learning_rate": 2.1227340477161122e-05, - "loss": 0.003, - "step": 4696 - }, - { - "epoch": 3.6294651477119135, - "grad_norm": 0.012078574858605862, - "learning_rate": 2.1205282983267954e-05, - "loss": 0.0034, - "step": 4697 - }, - { - "epoch": 3.630237497586407, - "grad_norm": 0.01125948503613472, - "learning_rate": 2.118323387082114e-05, - "loss": 0.0032, - "step": 4698 - }, - { - "epoch": 3.6310098474609, - "grad_norm": 0.017745889723300934, - "learning_rate": 2.1161193146238633e-05, - "loss": 0.0032, - "step": 4699 - }, - { - "epoch": 3.6317821973353928, - "grad_norm": 0.010856762528419495, - "learning_rate": 2.1139160815935965e-05, - "loss": 0.0034, - "step": 4700 - }, - { - "epoch": 3.632554547209886, - "grad_norm": 0.008668830618262291, - "learning_rate": 2.1117136886326216e-05, - "loss": 0.0029, - "step": 4701 - }, - { - "epoch": 3.633326897084379, - "grad_norm": 0.019786952063441277, - "learning_rate": 2.1095121363820015e-05, - "loss": 0.0035, - "step": 4702 - }, - { - "epoch": 3.6340992469588724, - "grad_norm": 0.015364222228527069, - "learning_rate": 2.1073114254825547e-05, - "loss": 0.0029, - "step": 4703 - }, - { - "epoch": 3.6348715968333654, - "grad_norm": 0.007902778685092926, - "learning_rate": 2.105111556574856e-05, - "loss": 0.0027, - "step": 4704 - }, - { - "epoch": 3.6356439467078587, - "grad_norm": 0.0083285728469491, - "learning_rate": 2.1029125302992325e-05, - "loss": 0.0033, - "step": 4705 - }, - { - "epoch": 3.6364162965823517, - "grad_norm": 0.01187330111861229, - "learning_rate": 2.100714347295769e-05, - "loss": 0.0032, - "step": 4706 - }, - { - "epoch": 3.637188646456845, - "grad_norm": 0.010464648716151714, - "learning_rate": 2.0985170082043027e-05, - "loss": 0.0032, - "step": 4707 - }, - { - "epoch": 3.637960996331338, - "grad_norm": 0.010964437387883663, - "learning_rate": 2.096320513664426e-05, - "loss": 0.0028, - "step": 4708 - }, - { - "epoch": 3.6387333462058313, - "grad_norm": 0.008737288415431976, - "learning_rate": 2.0941248643154858e-05, - "loss": 0.0031, - "step": 4709 - }, - { - "epoch": 3.6395056960803243, - "grad_norm": 0.012885338626801968, - "learning_rate": 2.0919300607965824e-05, - "loss": 0.0034, - "step": 4710 - }, - { - "epoch": 3.6402780459548176, - "grad_norm": 0.00732586532831192, - "learning_rate": 2.0897361037465695e-05, - "loss": 0.0028, - "step": 4711 - }, - { - "epoch": 3.6410503958293106, - "grad_norm": 0.012714467942714691, - "learning_rate": 2.087542993804056e-05, - "loss": 0.003, - "step": 4712 - }, - { - "epoch": 3.641822745703804, - "grad_norm": 0.009296673350036144, - "learning_rate": 2.085350731607403e-05, - "loss": 0.0031, - "step": 4713 - }, - { - "epoch": 3.642595095578297, - "grad_norm": 0.009091252461075783, - "learning_rate": 2.083159317794724e-05, - "loss": 0.0034, - "step": 4714 - }, - { - "epoch": 3.6433674454527902, - "grad_norm": 0.010541771538555622, - "learning_rate": 2.0809687530038872e-05, - "loss": 0.0031, - "step": 4715 - }, - { - "epoch": 3.644139795327283, - "grad_norm": 0.014411487616598606, - "learning_rate": 2.0787790378725135e-05, - "loss": 0.0031, - "step": 4716 - }, - { - "epoch": 3.6449121452017765, - "grad_norm": 0.009617066010832787, - "learning_rate": 2.0765901730379756e-05, - "loss": 0.0031, - "step": 4717 - }, - { - "epoch": 3.6456844950762695, - "grad_norm": 0.00811022613197565, - "learning_rate": 2.0744021591373996e-05, - "loss": 0.0028, - "step": 4718 - }, - { - "epoch": 3.646456844950763, - "grad_norm": 0.01548534631729126, - "learning_rate": 2.0722149968076615e-05, - "loss": 0.0032, - "step": 4719 - }, - { - "epoch": 3.6472291948252558, - "grad_norm": 0.008539475500583649, - "learning_rate": 2.0700286866853963e-05, - "loss": 0.0029, - "step": 4720 - }, - { - "epoch": 3.648001544699749, - "grad_norm": 0.009630308486521244, - "learning_rate": 2.0678432294069815e-05, - "loss": 0.0032, - "step": 4721 - }, - { - "epoch": 3.648773894574242, - "grad_norm": 0.01124610099941492, - "learning_rate": 2.0656586256085504e-05, - "loss": 0.0034, - "step": 4722 - }, - { - "epoch": 3.6495462444487354, - "grad_norm": 0.012255601584911346, - "learning_rate": 2.0634748759259936e-05, - "loss": 0.0032, - "step": 4723 - }, - { - "epoch": 3.6503185943232284, - "grad_norm": 0.015464738011360168, - "learning_rate": 2.0612919809949427e-05, - "loss": 0.0035, - "step": 4724 - }, - { - "epoch": 3.6510909441977217, - "grad_norm": 0.009021823294460773, - "learning_rate": 2.059109941450786e-05, - "loss": 0.0032, - "step": 4725 - }, - { - "epoch": 3.6518632940722147, - "grad_norm": 0.012433023191988468, - "learning_rate": 2.0569287579286674e-05, - "loss": 0.0036, - "step": 4726 - }, - { - "epoch": 3.6526356439467076, - "grad_norm": 0.008983463048934937, - "learning_rate": 2.054748431063472e-05, - "loss": 0.003, - "step": 4727 - }, - { - "epoch": 3.653407993821201, - "grad_norm": 0.008886398747563362, - "learning_rate": 2.0525689614898407e-05, - "loss": 0.0031, - "step": 4728 - }, - { - "epoch": 3.6541803436956943, - "grad_norm": 0.01756923831999302, - "learning_rate": 2.050390349842164e-05, - "loss": 0.0029, - "step": 4729 - }, - { - "epoch": 3.6549526935701873, - "grad_norm": 0.010268906131386757, - "learning_rate": 2.0482125967545878e-05, - "loss": 0.0027, - "step": 4730 - }, - { - "epoch": 3.65572504344468, - "grad_norm": 0.007442606147378683, - "learning_rate": 2.046035702860998e-05, - "loss": 0.003, - "step": 4731 - }, - { - "epoch": 3.6564973933191736, - "grad_norm": 0.017092349007725716, - "learning_rate": 2.043859668795036e-05, - "loss": 0.0035, - "step": 4732 - }, - { - "epoch": 3.657269743193667, - "grad_norm": 0.007818550802767277, - "learning_rate": 2.0416844951900987e-05, - "loss": 0.003, - "step": 4733 - }, - { - "epoch": 3.65804209306816, - "grad_norm": 0.010054933838546276, - "learning_rate": 2.0395101826793207e-05, - "loss": 0.0031, - "step": 4734 - }, - { - "epoch": 3.658814442942653, - "grad_norm": 0.010219238698482513, - "learning_rate": 2.037336731895591e-05, - "loss": 0.0034, - "step": 4735 - }, - { - "epoch": 3.659586792817146, - "grad_norm": 0.009758195839822292, - "learning_rate": 2.0351641434715553e-05, - "loss": 0.0029, - "step": 4736 - }, - { - "epoch": 3.6603591426916395, - "grad_norm": 0.011908262968063354, - "learning_rate": 2.0329924180395953e-05, - "loss": 0.003, - "step": 4737 - }, - { - "epoch": 3.6611314925661325, - "grad_norm": 0.008491923101246357, - "learning_rate": 2.030821556231849e-05, - "loss": 0.0033, - "step": 4738 - }, - { - "epoch": 3.6619038424406254, - "grad_norm": 0.01156783476471901, - "learning_rate": 2.0286515586802034e-05, - "loss": 0.0027, - "step": 4739 - }, - { - "epoch": 3.6626761923151188, - "grad_norm": 0.011989074759185314, - "learning_rate": 2.0264824260162946e-05, - "loss": 0.0032, - "step": 4740 - }, - { - "epoch": 3.663448542189612, - "grad_norm": 0.01123703084886074, - "learning_rate": 2.0243141588714974e-05, - "loss": 0.0033, - "step": 4741 - }, - { - "epoch": 3.664220892064105, - "grad_norm": 0.008847612887620926, - "learning_rate": 2.0221467578769487e-05, - "loss": 0.0031, - "step": 4742 - }, - { - "epoch": 3.664993241938598, - "grad_norm": 0.008390386588871479, - "learning_rate": 2.0199802236635257e-05, - "loss": 0.0028, - "step": 4743 - }, - { - "epoch": 3.6657655918130914, - "grad_norm": 0.010448389686644077, - "learning_rate": 2.0178145568618495e-05, - "loss": 0.0032, - "step": 4744 - }, - { - "epoch": 3.6665379416875847, - "grad_norm": 0.011151538230478764, - "learning_rate": 2.0156497581022983e-05, - "loss": 0.003, - "step": 4745 - }, - { - "epoch": 3.6673102915620777, - "grad_norm": 0.011992346495389938, - "learning_rate": 2.0134858280149928e-05, - "loss": 0.0035, - "step": 4746 - }, - { - "epoch": 3.6680826414365706, - "grad_norm": 0.010773863643407822, - "learning_rate": 2.011322767229795e-05, - "loss": 0.0031, - "step": 4747 - }, - { - "epoch": 3.668854991311064, - "grad_norm": 0.014904526993632317, - "learning_rate": 2.0091605763763255e-05, - "loss": 0.0029, - "step": 4748 - }, - { - "epoch": 3.6696273411855573, - "grad_norm": 0.011230251751840115, - "learning_rate": 2.006999256083944e-05, - "loss": 0.0034, - "step": 4749 - }, - { - "epoch": 3.6703996910600503, - "grad_norm": 0.009951038286089897, - "learning_rate": 2.0048388069817585e-05, - "loss": 0.003, - "step": 4750 - }, - { - "epoch": 3.671172040934543, - "grad_norm": 0.014170085079967976, - "learning_rate": 2.0026792296986242e-05, - "loss": 0.0032, - "step": 4751 - }, - { - "epoch": 3.6719443908090366, - "grad_norm": 0.0095702288672328, - "learning_rate": 2.0005205248631404e-05, - "loss": 0.0031, - "step": 4752 - }, - { - "epoch": 3.6727167406835295, - "grad_norm": 0.014286153018474579, - "learning_rate": 1.998362693103657e-05, - "loss": 0.0036, - "step": 4753 - }, - { - "epoch": 3.673489090558023, - "grad_norm": 0.01710379868745804, - "learning_rate": 1.9962057350482605e-05, - "loss": 0.0029, - "step": 4754 - }, - { - "epoch": 3.674261440432516, - "grad_norm": 0.010805051773786545, - "learning_rate": 1.994049651324795e-05, - "loss": 0.0029, - "step": 4755 - }, - { - "epoch": 3.675033790307009, - "grad_norm": 0.008329571224749088, - "learning_rate": 1.9918944425608442e-05, - "loss": 0.0028, - "step": 4756 - }, - { - "epoch": 3.675806140181502, - "grad_norm": 0.008502728305757046, - "learning_rate": 1.9897401093837325e-05, - "loss": 0.0032, - "step": 4757 - }, - { - "epoch": 3.6765784900559955, - "grad_norm": 0.013527288101613522, - "learning_rate": 1.9875866524205395e-05, - "loss": 0.0033, - "step": 4758 - }, - { - "epoch": 3.6773508399304884, - "grad_norm": 0.012341136112809181, - "learning_rate": 1.985434072298081e-05, - "loss": 0.0031, - "step": 4759 - }, - { - "epoch": 3.6781231898049818, - "grad_norm": 0.011203516274690628, - "learning_rate": 1.9832823696429236e-05, - "loss": 0.003, - "step": 4760 - }, - { - "epoch": 3.6788955396794747, - "grad_norm": 0.013294699601829052, - "learning_rate": 1.981131545081375e-05, - "loss": 0.0031, - "step": 4761 - }, - { - "epoch": 3.679667889553968, - "grad_norm": 0.008355311118066311, - "learning_rate": 1.9789815992394873e-05, - "loss": 0.003, - "step": 4762 - }, - { - "epoch": 3.680440239428461, - "grad_norm": 0.013429731130599976, - "learning_rate": 1.9768325327430588e-05, - "loss": 0.0033, - "step": 4763 - }, - { - "epoch": 3.6812125893029544, - "grad_norm": 0.015098560601472855, - "learning_rate": 1.9746843462176307e-05, - "loss": 0.003, - "step": 4764 - }, - { - "epoch": 3.6819849391774473, - "grad_norm": 0.010638771578669548, - "learning_rate": 1.972537040288488e-05, - "loss": 0.0027, - "step": 4765 - }, - { - "epoch": 3.6827572890519407, - "grad_norm": 0.010106007568538189, - "learning_rate": 1.9703906155806594e-05, - "loss": 0.0035, - "step": 4766 - }, - { - "epoch": 3.6835296389264336, - "grad_norm": 0.009524368681013584, - "learning_rate": 1.968245072718918e-05, - "loss": 0.0034, - "step": 4767 - }, - { - "epoch": 3.684301988800927, - "grad_norm": 0.014750136993825436, - "learning_rate": 1.9661004123277783e-05, - "loss": 0.0032, - "step": 4768 - }, - { - "epoch": 3.68507433867542, - "grad_norm": 0.01715848408639431, - "learning_rate": 1.9639566350315003e-05, - "loss": 0.003, - "step": 4769 - }, - { - "epoch": 3.6858466885499133, - "grad_norm": 0.009163351729512215, - "learning_rate": 1.961813741454085e-05, - "loss": 0.0031, - "step": 4770 - }, - { - "epoch": 3.686619038424406, - "grad_norm": 0.008157082833349705, - "learning_rate": 1.959671732219277e-05, - "loss": 0.0032, - "step": 4771 - }, - { - "epoch": 3.6873913882988996, - "grad_norm": 0.008473200723528862, - "learning_rate": 1.9575306079505638e-05, - "loss": 0.0036, - "step": 4772 - }, - { - "epoch": 3.6881637381733925, - "grad_norm": 0.011803386732935905, - "learning_rate": 1.955390369271175e-05, - "loss": 0.0033, - "step": 4773 - }, - { - "epoch": 3.6889360880478854, - "grad_norm": 0.015068240463733673, - "learning_rate": 1.9532510168040824e-05, - "loss": 0.0033, - "step": 4774 - }, - { - "epoch": 3.689708437922379, - "grad_norm": 0.008709238842129707, - "learning_rate": 1.9511125511719992e-05, - "loss": 0.003, - "step": 4775 - }, - { - "epoch": 3.690480787796872, - "grad_norm": 0.00890977494418621, - "learning_rate": 1.9489749729973812e-05, - "loss": 0.0032, - "step": 4776 - }, - { - "epoch": 3.691253137671365, - "grad_norm": 0.006033789366483688, - "learning_rate": 1.9468382829024263e-05, - "loss": 0.0024, - "step": 4777 - }, - { - "epoch": 3.692025487545858, - "grad_norm": 0.012680809944868088, - "learning_rate": 1.9447024815090726e-05, - "loss": 0.0031, - "step": 4778 - }, - { - "epoch": 3.6927978374203514, - "grad_norm": 0.009670529514551163, - "learning_rate": 1.9425675694389995e-05, - "loss": 0.0032, - "step": 4779 - }, - { - "epoch": 3.6935701872948448, - "grad_norm": 0.014543136581778526, - "learning_rate": 1.9404335473136327e-05, - "loss": 0.0027, - "step": 4780 - }, - { - "epoch": 3.6943425371693377, - "grad_norm": 0.008619261905550957, - "learning_rate": 1.9383004157541296e-05, - "loss": 0.003, - "step": 4781 - }, - { - "epoch": 3.6951148870438306, - "grad_norm": 0.014257128350436687, - "learning_rate": 1.936168175381395e-05, - "loss": 0.003, - "step": 4782 - }, - { - "epoch": 3.695887236918324, - "grad_norm": 0.009102889336645603, - "learning_rate": 1.9340368268160725e-05, - "loss": 0.0029, - "step": 4783 - }, - { - "epoch": 3.6966595867928174, - "grad_norm": 0.00875469483435154, - "learning_rate": 1.9319063706785462e-05, - "loss": 0.0033, - "step": 4784 - }, - { - "epoch": 3.6974319366673103, - "grad_norm": 0.009385279379785061, - "learning_rate": 1.92977680758894e-05, - "loss": 0.0031, - "step": 4785 - }, - { - "epoch": 3.698204286541803, - "grad_norm": 0.01086689718067646, - "learning_rate": 1.927648138167119e-05, - "loss": 0.0029, - "step": 4786 - }, - { - "epoch": 3.6989766364162966, - "grad_norm": 0.01208088081330061, - "learning_rate": 1.9255203630326872e-05, - "loss": 0.0028, - "step": 4787 - }, - { - "epoch": 3.69974898629079, - "grad_norm": 0.008676470257341862, - "learning_rate": 1.9233934828049884e-05, - "loss": 0.0031, - "step": 4788 - }, - { - "epoch": 3.700521336165283, - "grad_norm": 0.008346854709088802, - "learning_rate": 1.9212674981031054e-05, - "loss": 0.003, - "step": 4789 - }, - { - "epoch": 3.701293686039776, - "grad_norm": 0.00872017815709114, - "learning_rate": 1.919142409545862e-05, - "loss": 0.0029, - "step": 4790 - }, - { - "epoch": 3.702066035914269, - "grad_norm": 0.00791302789002657, - "learning_rate": 1.9170182177518203e-05, - "loss": 0.003, - "step": 4791 - }, - { - "epoch": 3.7028383857887626, - "grad_norm": 0.010449966415762901, - "learning_rate": 1.9148949233392783e-05, - "loss": 0.0028, - "step": 4792 - }, - { - "epoch": 3.7036107356632555, - "grad_norm": 0.012326085940003395, - "learning_rate": 1.9127725269262814e-05, - "loss": 0.0033, - "step": 4793 - }, - { - "epoch": 3.7043830855377484, - "grad_norm": 0.01011696644127369, - "learning_rate": 1.9106510291306033e-05, - "loss": 0.003, - "step": 4794 - }, - { - "epoch": 3.705155435412242, - "grad_norm": 0.009063688106834888, - "learning_rate": 1.90853043056976e-05, - "loss": 0.003, - "step": 4795 - }, - { - "epoch": 3.705927785286735, - "grad_norm": 0.010787200182676315, - "learning_rate": 1.906410731861012e-05, - "loss": 0.003, - "step": 4796 - }, - { - "epoch": 3.706700135161228, - "grad_norm": 0.009205954149365425, - "learning_rate": 1.904291933621347e-05, - "loss": 0.0034, - "step": 4797 - }, - { - "epoch": 3.707472485035721, - "grad_norm": 0.013273040764033794, - "learning_rate": 1.9021740364674966e-05, - "loss": 0.003, - "step": 4798 - }, - { - "epoch": 3.7082448349102144, - "grad_norm": 0.02008615992963314, - "learning_rate": 1.900057041015934e-05, - "loss": 0.0037, - "step": 4799 - }, - { - "epoch": 3.7090171847847073, - "grad_norm": 0.006965349894016981, - "learning_rate": 1.8979409478828604e-05, - "loss": 0.003, - "step": 4800 - }, - { - "epoch": 3.7097895346592007, - "grad_norm": 0.011672982014715672, - "learning_rate": 1.8958257576842194e-05, - "loss": 0.0032, - "step": 4801 - }, - { - "epoch": 3.7105618845336936, - "grad_norm": 0.010026531293988228, - "learning_rate": 1.893711471035695e-05, - "loss": 0.0033, - "step": 4802 - }, - { - "epoch": 3.711334234408187, - "grad_norm": 0.007827379740774632, - "learning_rate": 1.8915980885527053e-05, - "loss": 0.0032, - "step": 4803 - }, - { - "epoch": 3.71210658428268, - "grad_norm": 0.012139009311795235, - "learning_rate": 1.8894856108503993e-05, - "loss": 0.0033, - "step": 4804 - }, - { - "epoch": 3.7128789341571733, - "grad_norm": 0.008282513357698917, - "learning_rate": 1.8873740385436743e-05, - "loss": 0.0029, - "step": 4805 - }, - { - "epoch": 3.7136512840316662, - "grad_norm": 0.00895603746175766, - "learning_rate": 1.885263372247157e-05, - "loss": 0.0028, - "step": 4806 - }, - { - "epoch": 3.7144236339061596, - "grad_norm": 0.008988351561129093, - "learning_rate": 1.8831536125752086e-05, - "loss": 0.0031, - "step": 4807 - }, - { - "epoch": 3.7151959837806525, - "grad_norm": 0.007388737518340349, - "learning_rate": 1.8810447601419285e-05, - "loss": 0.0027, - "step": 4808 - }, - { - "epoch": 3.715968333655146, - "grad_norm": 0.010159352794289589, - "learning_rate": 1.878936815561158e-05, - "loss": 0.0033, - "step": 4809 - }, - { - "epoch": 3.716740683529639, - "grad_norm": 0.011480720713734627, - "learning_rate": 1.876829779446464e-05, - "loss": 0.0028, - "step": 4810 - }, - { - "epoch": 3.717513033404132, - "grad_norm": 0.01044251024723053, - "learning_rate": 1.8747236524111534e-05, - "loss": 0.0029, - "step": 4811 - }, - { - "epoch": 3.718285383278625, - "grad_norm": 0.010147932916879654, - "learning_rate": 1.872618435068273e-05, - "loss": 0.0035, - "step": 4812 - }, - { - "epoch": 3.7190577331531185, - "grad_norm": 0.008173985406756401, - "learning_rate": 1.8705141280305998e-05, - "loss": 0.0029, - "step": 4813 - }, - { - "epoch": 3.7198300830276114, - "grad_norm": 0.014352631755173206, - "learning_rate": 1.8684107319106424e-05, - "loss": 0.0031, - "step": 4814 - }, - { - "epoch": 3.720602432902105, - "grad_norm": 0.013318825513124466, - "learning_rate": 1.8663082473206535e-05, - "loss": 0.0029, - "step": 4815 - }, - { - "epoch": 3.7213747827765977, - "grad_norm": 0.014817917719483376, - "learning_rate": 1.864206674872615e-05, - "loss": 0.0031, - "step": 4816 - }, - { - "epoch": 3.722147132651091, - "grad_norm": 0.010158638469874859, - "learning_rate": 1.8621060151782393e-05, - "loss": 0.0033, - "step": 4817 - }, - { - "epoch": 3.722919482525584, - "grad_norm": 0.014668983407318592, - "learning_rate": 1.8600062688489827e-05, - "loss": 0.0038, - "step": 4818 - }, - { - "epoch": 3.7236918324000774, - "grad_norm": 0.009824811480939388, - "learning_rate": 1.857907436496031e-05, - "loss": 0.0028, - "step": 4819 - }, - { - "epoch": 3.7244641822745703, - "grad_norm": 0.008027785457670689, - "learning_rate": 1.8558095187302977e-05, - "loss": 0.003, - "step": 4820 - }, - { - "epoch": 3.7252365321490633, - "grad_norm": 0.012118781916797161, - "learning_rate": 1.8537125161624414e-05, - "loss": 0.0034, - "step": 4821 - }, - { - "epoch": 3.7260088820235566, - "grad_norm": 0.008490202017128468, - "learning_rate": 1.8516164294028472e-05, - "loss": 0.0028, - "step": 4822 - }, - { - "epoch": 3.72678123189805, - "grad_norm": 0.008115014061331749, - "learning_rate": 1.8495212590616357e-05, - "loss": 0.0026, - "step": 4823 - }, - { - "epoch": 3.727553581772543, - "grad_norm": 0.011491495184600353, - "learning_rate": 1.8474270057486593e-05, - "loss": 0.0032, - "step": 4824 - }, - { - "epoch": 3.728325931647036, - "grad_norm": 0.010381430387496948, - "learning_rate": 1.8453336700735057e-05, - "loss": 0.0025, - "step": 4825 - }, - { - "epoch": 3.7290982815215292, - "grad_norm": 0.008572252467274666, - "learning_rate": 1.843241252645494e-05, - "loss": 0.0032, - "step": 4826 - }, - { - "epoch": 3.7298706313960226, - "grad_norm": 0.011093181557953358, - "learning_rate": 1.8411497540736757e-05, - "loss": 0.0032, - "step": 4827 - }, - { - "epoch": 3.7306429812705155, - "grad_norm": 0.01273476891219616, - "learning_rate": 1.8390591749668362e-05, - "loss": 0.003, - "step": 4828 - }, - { - "epoch": 3.7314153311450085, - "grad_norm": 0.009482769295573235, - "learning_rate": 1.8369695159334925e-05, - "loss": 0.003, - "step": 4829 - }, - { - "epoch": 3.732187681019502, - "grad_norm": 0.008260451257228851, - "learning_rate": 1.8348807775818932e-05, - "loss": 0.0031, - "step": 4830 - }, - { - "epoch": 3.732960030893995, - "grad_norm": 0.008538060821592808, - "learning_rate": 1.8327929605200206e-05, - "loss": 0.0028, - "step": 4831 - }, - { - "epoch": 3.733732380768488, - "grad_norm": 0.00773623026907444, - "learning_rate": 1.8307060653555874e-05, - "loss": 0.0029, - "step": 4832 - }, - { - "epoch": 3.734504730642981, - "grad_norm": 0.010263267904520035, - "learning_rate": 1.828620092696038e-05, - "loss": 0.0037, - "step": 4833 - }, - { - "epoch": 3.7352770805174744, - "grad_norm": 0.01147653441876173, - "learning_rate": 1.8265350431485488e-05, - "loss": 0.0029, - "step": 4834 - }, - { - "epoch": 3.736049430391968, - "grad_norm": 0.011068695224821568, - "learning_rate": 1.8244509173200276e-05, - "loss": 0.0031, - "step": 4835 - }, - { - "epoch": 3.7368217802664607, - "grad_norm": 0.009672621265053749, - "learning_rate": 1.8223677158171128e-05, - "loss": 0.0031, - "step": 4836 - }, - { - "epoch": 3.7375941301409537, - "grad_norm": 0.008995790034532547, - "learning_rate": 1.8202854392461738e-05, - "loss": 0.003, - "step": 4837 - }, - { - "epoch": 3.738366480015447, - "grad_norm": 0.00977084506303072, - "learning_rate": 1.8182040882133118e-05, - "loss": 0.0032, - "step": 4838 - }, - { - "epoch": 3.7391388298899404, - "grad_norm": 0.016638806089758873, - "learning_rate": 1.8161236633243566e-05, - "loss": 0.0031, - "step": 4839 - }, - { - "epoch": 3.7399111797644333, - "grad_norm": 0.008934970945119858, - "learning_rate": 1.814044165184871e-05, - "loss": 0.0031, - "step": 4840 - }, - { - "epoch": 3.7406835296389263, - "grad_norm": 0.008013790473341942, - "learning_rate": 1.8119655944001458e-05, - "loss": 0.0031, - "step": 4841 - }, - { - "epoch": 3.7414558795134196, - "grad_norm": 0.008421190083026886, - "learning_rate": 1.8098879515752025e-05, - "loss": 0.0028, - "step": 4842 - }, - { - "epoch": 3.742228229387913, - "grad_norm": 0.009693178348243237, - "learning_rate": 1.807811237314794e-05, - "loss": 0.0028, - "step": 4843 - }, - { - "epoch": 3.743000579262406, - "grad_norm": 0.016526630148291588, - "learning_rate": 1.8057354522233998e-05, - "loss": 0.0027, - "step": 4844 - }, - { - "epoch": 3.743772929136899, - "grad_norm": 0.008722502738237381, - "learning_rate": 1.8036605969052322e-05, - "loss": 0.0031, - "step": 4845 - }, - { - "epoch": 3.7445452790113922, - "grad_norm": 0.012371312826871872, - "learning_rate": 1.801586671964231e-05, - "loss": 0.0031, - "step": 4846 - }, - { - "epoch": 3.745317628885885, - "grad_norm": 0.009250137954950333, - "learning_rate": 1.7995136780040656e-05, - "loss": 0.0032, - "step": 4847 - }, - { - "epoch": 3.7460899787603785, - "grad_norm": 0.008596301078796387, - "learning_rate": 1.7974416156281342e-05, - "loss": 0.0037, - "step": 4848 - }, - { - "epoch": 3.7468623286348715, - "grad_norm": 0.014486918225884438, - "learning_rate": 1.7953704854395647e-05, - "loss": 0.0032, - "step": 4849 - }, - { - "epoch": 3.747634678509365, - "grad_norm": 0.015768401324748993, - "learning_rate": 1.7933002880412125e-05, - "loss": 0.0035, - "step": 4850 - }, - { - "epoch": 3.7484070283838578, - "grad_norm": 0.015920784324407578, - "learning_rate": 1.791231024035663e-05, - "loss": 0.0033, - "step": 4851 - }, - { - "epoch": 3.749179378258351, - "grad_norm": 0.008338225074112415, - "learning_rate": 1.7891626940252255e-05, - "loss": 0.003, - "step": 4852 - }, - { - "epoch": 3.749951728132844, - "grad_norm": 0.007774571422487497, - "learning_rate": 1.787095298611947e-05, - "loss": 0.0028, - "step": 4853 - }, - { - "epoch": 3.7507240780073374, - "grad_norm": 0.008573639206588268, - "learning_rate": 1.7850288383975923e-05, - "loss": 0.0029, - "step": 4854 - }, - { - "epoch": 3.7514964278818304, - "grad_norm": 0.014735306613147259, - "learning_rate": 1.782963313983656e-05, - "loss": 0.0033, - "step": 4855 - }, - { - "epoch": 3.7522687777563237, - "grad_norm": 0.010564186610281467, - "learning_rate": 1.7808987259713688e-05, - "loss": 0.003, - "step": 4856 - }, - { - "epoch": 3.7530411276308167, - "grad_norm": 0.008973659947514534, - "learning_rate": 1.7788350749616773e-05, - "loss": 0.0028, - "step": 4857 - }, - { - "epoch": 3.75381347750531, - "grad_norm": 0.017529862001538277, - "learning_rate": 1.7767723615552594e-05, - "loss": 0.0028, - "step": 4858 - }, - { - "epoch": 3.754585827379803, - "grad_norm": 0.009524044580757618, - "learning_rate": 1.774710586352527e-05, - "loss": 0.0032, - "step": 4859 - }, - { - "epoch": 3.7553581772542963, - "grad_norm": 0.015119528397917747, - "learning_rate": 1.7726497499536084e-05, - "loss": 0.0027, - "step": 4860 - }, - { - "epoch": 3.7561305271287893, - "grad_norm": 0.015329336747527122, - "learning_rate": 1.770589852958362e-05, - "loss": 0.0035, - "step": 4861 - }, - { - "epoch": 3.7569028770032826, - "grad_norm": 0.013769777491688728, - "learning_rate": 1.768530895966379e-05, - "loss": 0.0034, - "step": 4862 - }, - { - "epoch": 3.7576752268777756, - "grad_norm": 0.009363507851958275, - "learning_rate": 1.7664728795769704e-05, - "loss": 0.0031, - "step": 4863 - }, - { - "epoch": 3.758447576752269, - "grad_norm": 0.008630231022834778, - "learning_rate": 1.7644158043891727e-05, - "loss": 0.0029, - "step": 4864 - }, - { - "epoch": 3.759219926626762, - "grad_norm": 0.010387644171714783, - "learning_rate": 1.7623596710017505e-05, - "loss": 0.0032, - "step": 4865 - }, - { - "epoch": 3.7599922765012552, - "grad_norm": 0.014387257397174835, - "learning_rate": 1.7603044800131995e-05, - "loss": 0.0037, - "step": 4866 - }, - { - "epoch": 3.760764626375748, - "grad_norm": 0.009594297036528587, - "learning_rate": 1.7582502320217315e-05, - "loss": 0.0032, - "step": 4867 - }, - { - "epoch": 3.761536976250241, - "grad_norm": 0.008206100203096867, - "learning_rate": 1.756196927625288e-05, - "loss": 0.0028, - "step": 4868 - }, - { - "epoch": 3.7623093261247345, - "grad_norm": 0.008791467174887657, - "learning_rate": 1.7541445674215418e-05, - "loss": 0.0026, - "step": 4869 - }, - { - "epoch": 3.763081675999228, - "grad_norm": 0.010609994642436504, - "learning_rate": 1.7520931520078797e-05, - "loss": 0.0034, - "step": 4870 - }, - { - "epoch": 3.7638540258737208, - "grad_norm": 0.009034925140440464, - "learning_rate": 1.7500426819814198e-05, - "loss": 0.0032, - "step": 4871 - }, - { - "epoch": 3.7646263757482137, - "grad_norm": 0.010067600756883621, - "learning_rate": 1.747993157939007e-05, - "loss": 0.0032, - "step": 4872 - }, - { - "epoch": 3.765398725622707, - "grad_norm": 0.009893916547298431, - "learning_rate": 1.745944580477209e-05, - "loss": 0.003, - "step": 4873 - }, - { - "epoch": 3.7661710754972004, - "grad_norm": 0.008948578499257565, - "learning_rate": 1.7438969501923114e-05, - "loss": 0.0029, - "step": 4874 - }, - { - "epoch": 3.7669434253716934, - "grad_norm": 0.009708845056593418, - "learning_rate": 1.7418502676803362e-05, - "loss": 0.003, - "step": 4875 - }, - { - "epoch": 3.7677157752461863, - "grad_norm": 0.009008213877677917, - "learning_rate": 1.7398045335370215e-05, - "loss": 0.003, - "step": 4876 - }, - { - "epoch": 3.7684881251206797, - "grad_norm": 0.0089599983766675, - "learning_rate": 1.737759748357827e-05, - "loss": 0.0029, - "step": 4877 - }, - { - "epoch": 3.769260474995173, - "grad_norm": 0.007111499086022377, - "learning_rate": 1.735715912737946e-05, - "loss": 0.0025, - "step": 4878 - }, - { - "epoch": 3.770032824869666, - "grad_norm": 0.012843016535043716, - "learning_rate": 1.733673027272288e-05, - "loss": 0.0031, - "step": 4879 - }, - { - "epoch": 3.770805174744159, - "grad_norm": 0.00939163751900196, - "learning_rate": 1.7316310925554836e-05, - "loss": 0.0026, - "step": 4880 - }, - { - "epoch": 3.7715775246186523, - "grad_norm": 0.010984611697494984, - "learning_rate": 1.7295901091818955e-05, - "loss": 0.0032, - "step": 4881 - }, - { - "epoch": 3.7723498744931456, - "grad_norm": 0.012250718660652637, - "learning_rate": 1.7275500777456032e-05, - "loss": 0.0029, - "step": 4882 - }, - { - "epoch": 3.7731222243676386, - "grad_norm": 0.008747139945626259, - "learning_rate": 1.72551099884041e-05, - "loss": 0.0028, - "step": 4883 - }, - { - "epoch": 3.7738945742421315, - "grad_norm": 0.008452476002275944, - "learning_rate": 1.7234728730598432e-05, - "loss": 0.0027, - "step": 4884 - }, - { - "epoch": 3.774666924116625, - "grad_norm": 0.010798325762152672, - "learning_rate": 1.7214357009971517e-05, - "loss": 0.0028, - "step": 4885 - }, - { - "epoch": 3.7754392739911182, - "grad_norm": 0.0076159583404660225, - "learning_rate": 1.719399483245307e-05, - "loss": 0.0028, - "step": 4886 - }, - { - "epoch": 3.776211623865611, - "grad_norm": 0.009635258466005325, - "learning_rate": 1.7173642203970026e-05, - "loss": 0.0028, - "step": 4887 - }, - { - "epoch": 3.776983973740104, - "grad_norm": 0.010172554291784763, - "learning_rate": 1.7153299130446545e-05, - "loss": 0.0032, - "step": 4888 - }, - { - "epoch": 3.7777563236145975, - "grad_norm": 0.009352128021419048, - "learning_rate": 1.7132965617804027e-05, - "loss": 0.0029, - "step": 4889 - }, - { - "epoch": 3.778528673489091, - "grad_norm": 0.08108187466859818, - "learning_rate": 1.7112641671961007e-05, - "loss": 0.0034, - "step": 4890 - }, - { - "epoch": 3.7793010233635838, - "grad_norm": 0.009638349525630474, - "learning_rate": 1.7092327298833345e-05, - "loss": 0.0031, - "step": 4891 - }, - { - "epoch": 3.7800733732380767, - "grad_norm": 0.011018278077244759, - "learning_rate": 1.7072022504334058e-05, - "loss": 0.0033, - "step": 4892 - }, - { - "epoch": 3.78084572311257, - "grad_norm": 0.007833022624254227, - "learning_rate": 1.7051727294373367e-05, - "loss": 0.003, - "step": 4893 - }, - { - "epoch": 3.781618072987063, - "grad_norm": 0.008461980149149895, - "learning_rate": 1.7031441674858728e-05, - "loss": 0.0028, - "step": 4894 - }, - { - "epoch": 3.7823904228615564, - "grad_norm": 0.010157615877687931, - "learning_rate": 1.7011165651694795e-05, - "loss": 0.003, - "step": 4895 - }, - { - "epoch": 3.7831627727360493, - "grad_norm": 0.007885764352977276, - "learning_rate": 1.6990899230783418e-05, - "loss": 0.0027, - "step": 4896 - }, - { - "epoch": 3.7839351226105427, - "grad_norm": 0.011371986009180546, - "learning_rate": 1.697064241802367e-05, - "loss": 0.0032, - "step": 4897 - }, - { - "epoch": 3.7847074724850356, - "grad_norm": 0.010999456979334354, - "learning_rate": 1.6950395219311822e-05, - "loss": 0.0029, - "step": 4898 - }, - { - "epoch": 3.785479822359529, - "grad_norm": 0.012473049573600292, - "learning_rate": 1.693015764054134e-05, - "loss": 0.0031, - "step": 4899 - }, - { - "epoch": 3.786252172234022, - "grad_norm": 0.01124456524848938, - "learning_rate": 1.690992968760289e-05, - "loss": 0.0036, - "step": 4900 - }, - { - "epoch": 3.7870245221085153, - "grad_norm": 0.012125209905207157, - "learning_rate": 1.6889711366384347e-05, - "loss": 0.003, - "step": 4901 - }, - { - "epoch": 3.787796871983008, - "grad_norm": 0.01850360631942749, - "learning_rate": 1.6869502682770776e-05, - "loss": 0.003, - "step": 4902 - }, - { - "epoch": 3.7885692218575016, - "grad_norm": 0.013725490309298038, - "learning_rate": 1.684930364264444e-05, - "loss": 0.0033, - "step": 4903 - }, - { - "epoch": 3.7893415717319945, - "grad_norm": 0.009691640734672546, - "learning_rate": 1.6829114251884776e-05, - "loss": 0.0027, - "step": 4904 - }, - { - "epoch": 3.790113921606488, - "grad_norm": 0.010694482363760471, - "learning_rate": 1.6808934516368446e-05, - "loss": 0.0031, - "step": 4905 - }, - { - "epoch": 3.790886271480981, - "grad_norm": 0.013220801018178463, - "learning_rate": 1.678876444196927e-05, - "loss": 0.0035, - "step": 4906 - }, - { - "epoch": 3.791658621355474, - "grad_norm": 0.008447596803307533, - "learning_rate": 1.676860403455828e-05, - "loss": 0.0029, - "step": 4907 - }, - { - "epoch": 3.792430971229967, - "grad_norm": 0.012366946786642075, - "learning_rate": 1.674845330000367e-05, - "loss": 0.003, - "step": 4908 - }, - { - "epoch": 3.7932033211044605, - "grad_norm": 0.012804819270968437, - "learning_rate": 1.6728312244170847e-05, - "loss": 0.003, - "step": 4909 - }, - { - "epoch": 3.7939756709789534, - "grad_norm": 0.010311468504369259, - "learning_rate": 1.6708180872922373e-05, - "loss": 0.0029, - "step": 4910 - }, - { - "epoch": 3.7947480208534468, - "grad_norm": 0.00922460202127695, - "learning_rate": 1.6688059192118018e-05, - "loss": 0.0032, - "step": 4911 - }, - { - "epoch": 3.7955203707279397, - "grad_norm": 0.011730996891856194, - "learning_rate": 1.6667947207614682e-05, - "loss": 0.0034, - "step": 4912 - }, - { - "epoch": 3.796292720602433, - "grad_norm": 0.008386842906475067, - "learning_rate": 1.6647844925266544e-05, - "loss": 0.0031, - "step": 4913 - }, - { - "epoch": 3.797065070476926, - "grad_norm": 0.009484478272497654, - "learning_rate": 1.662775235092483e-05, - "loss": 0.0029, - "step": 4914 - }, - { - "epoch": 3.7978374203514194, - "grad_norm": 0.009336920455098152, - "learning_rate": 1.6607669490438015e-05, - "loss": 0.0029, - "step": 4915 - }, - { - "epoch": 3.7986097702259123, - "grad_norm": 0.008269627578556538, - "learning_rate": 1.6587596349651774e-05, - "loss": 0.0027, - "step": 4916 - }, - { - "epoch": 3.7993821201004057, - "grad_norm": 0.008038083091378212, - "learning_rate": 1.6567532934408876e-05, - "loss": 0.003, - "step": 4917 - }, - { - "epoch": 3.8001544699748986, - "grad_norm": 0.013694526627659798, - "learning_rate": 1.6547479250549296e-05, - "loss": 0.0028, - "step": 4918 - }, - { - "epoch": 3.8009268198493915, - "grad_norm": 0.009768110699951649, - "learning_rate": 1.652743530391019e-05, - "loss": 0.003, - "step": 4919 - }, - { - "epoch": 3.801699169723885, - "grad_norm": 0.00901896320283413, - "learning_rate": 1.650740110032586e-05, - "loss": 0.0033, - "step": 4920 - }, - { - "epoch": 3.8024715195983783, - "grad_norm": 0.011361799202859402, - "learning_rate": 1.648737664562778e-05, - "loss": 0.0028, - "step": 4921 - }, - { - "epoch": 3.803243869472871, - "grad_norm": 0.009843561798334122, - "learning_rate": 1.646736194564457e-05, - "loss": 0.0026, - "step": 4922 - }, - { - "epoch": 3.804016219347364, - "grad_norm": 0.010585188865661621, - "learning_rate": 1.6447357006202074e-05, - "loss": 0.0037, - "step": 4923 - }, - { - "epoch": 3.8047885692218575, - "grad_norm": 0.008782930672168732, - "learning_rate": 1.64273618331232e-05, - "loss": 0.0033, - "step": 4924 - }, - { - "epoch": 3.805560919096351, - "grad_norm": 0.012191285379230976, - "learning_rate": 1.6407376432228054e-05, - "loss": 0.0034, - "step": 4925 - }, - { - "epoch": 3.806333268970844, - "grad_norm": 0.01644955947995186, - "learning_rate": 1.638740080933396e-05, - "loss": 0.0031, - "step": 4926 - }, - { - "epoch": 3.8071056188453367, - "grad_norm": 0.012493155896663666, - "learning_rate": 1.636743497025528e-05, - "loss": 0.0031, - "step": 4927 - }, - { - "epoch": 3.80787796871983, - "grad_norm": 0.012926139868795872, - "learning_rate": 1.6347478920803593e-05, - "loss": 0.0029, - "step": 4928 - }, - { - "epoch": 3.8086503185943235, - "grad_norm": 0.011629858054220676, - "learning_rate": 1.632753266678767e-05, - "loss": 0.0028, - "step": 4929 - }, - { - "epoch": 3.8094226684688164, - "grad_norm": 0.010302181355655193, - "learning_rate": 1.6307596214013342e-05, - "loss": 0.003, - "step": 4930 - }, - { - "epoch": 3.8101950183433093, - "grad_norm": 0.011941662058234215, - "learning_rate": 1.628766956828362e-05, - "loss": 0.003, - "step": 4931 - }, - { - "epoch": 3.8109673682178027, - "grad_norm": 0.013709473423659801, - "learning_rate": 1.6267752735398707e-05, - "loss": 0.0031, - "step": 4932 - }, - { - "epoch": 3.811739718092296, - "grad_norm": 0.009955592453479767, - "learning_rate": 1.6247845721155903e-05, - "loss": 0.003, - "step": 4933 - }, - { - "epoch": 3.812512067966789, - "grad_norm": 0.00988897867500782, - "learning_rate": 1.6227948531349625e-05, - "loss": 0.0029, - "step": 4934 - }, - { - "epoch": 3.813284417841282, - "grad_norm": 0.00757649214938283, - "learning_rate": 1.62080611717715e-05, - "loss": 0.0034, - "step": 4935 - }, - { - "epoch": 3.8140567677157753, - "grad_norm": 0.010603350587189198, - "learning_rate": 1.6188183648210258e-05, - "loss": 0.0032, - "step": 4936 - }, - { - "epoch": 3.8148291175902687, - "grad_norm": 0.010256056673824787, - "learning_rate": 1.6168315966451725e-05, - "loss": 0.0027, - "step": 4937 - }, - { - "epoch": 3.8156014674647616, - "grad_norm": 0.015140022151172161, - "learning_rate": 1.6148458132278948e-05, - "loss": 0.0034, - "step": 4938 - }, - { - "epoch": 3.8163738173392545, - "grad_norm": 0.011739297769963741, - "learning_rate": 1.6128610151472063e-05, - "loss": 0.003, - "step": 4939 - }, - { - "epoch": 3.817146167213748, - "grad_norm": 0.01055013108998537, - "learning_rate": 1.610877202980829e-05, - "loss": 0.0028, - "step": 4940 - }, - { - "epoch": 3.817918517088241, - "grad_norm": 0.008143901824951172, - "learning_rate": 1.608894377306207e-05, - "loss": 0.0028, - "step": 4941 - }, - { - "epoch": 3.818690866962734, - "grad_norm": 0.018344135954976082, - "learning_rate": 1.606912538700492e-05, - "loss": 0.0033, - "step": 4942 - }, - { - "epoch": 3.819463216837227, - "grad_norm": 0.012720978818833828, - "learning_rate": 1.604931687740551e-05, - "loss": 0.0032, - "step": 4943 - }, - { - "epoch": 3.8202355667117205, - "grad_norm": 0.012700404971837997, - "learning_rate": 1.6029518250029563e-05, - "loss": 0.003, - "step": 4944 - }, - { - "epoch": 3.8210079165862134, - "grad_norm": 0.01139888260513544, - "learning_rate": 1.6009729510640032e-05, - "loss": 0.0029, - "step": 4945 - }, - { - "epoch": 3.821780266460707, - "grad_norm": 0.010537398979067802, - "learning_rate": 1.5989950664996945e-05, - "loss": 0.0027, - "step": 4946 - }, - { - "epoch": 3.8225526163351997, - "grad_norm": 0.008864232338964939, - "learning_rate": 1.597018171885739e-05, - "loss": 0.0026, - "step": 4947 - }, - { - "epoch": 3.823324966209693, - "grad_norm": 0.010028931312263012, - "learning_rate": 1.595042267797569e-05, - "loss": 0.0033, - "step": 4948 - }, - { - "epoch": 3.824097316084186, - "grad_norm": 0.010445699095726013, - "learning_rate": 1.5930673548103215e-05, - "loss": 0.0033, - "step": 4949 - }, - { - "epoch": 3.8248696659586794, - "grad_norm": 0.008232830092310905, - "learning_rate": 1.5910934334988415e-05, - "loss": 0.0028, - "step": 4950 - }, - { - "epoch": 3.8256420158331723, - "grad_norm": 0.009088825434446335, - "learning_rate": 1.589120504437694e-05, - "loss": 0.0031, - "step": 4951 - }, - { - "epoch": 3.8264143657076657, - "grad_norm": 0.009279578924179077, - "learning_rate": 1.58714856820115e-05, - "loss": 0.0027, - "step": 4952 - }, - { - "epoch": 3.8271867155821586, - "grad_norm": 0.01180476974695921, - "learning_rate": 1.585177625363192e-05, - "loss": 0.0033, - "step": 4953 - }, - { - "epoch": 3.827959065456652, - "grad_norm": 0.009612584486603737, - "learning_rate": 1.583207676497515e-05, - "loss": 0.0033, - "step": 4954 - }, - { - "epoch": 3.828731415331145, - "grad_norm": 0.011616314761340618, - "learning_rate": 1.581238722177522e-05, - "loss": 0.0029, - "step": 4955 - }, - { - "epoch": 3.8295037652056383, - "grad_norm": 0.00945865735411644, - "learning_rate": 1.579270762976329e-05, - "loss": 0.0032, - "step": 4956 - }, - { - "epoch": 3.830276115080131, - "grad_norm": 0.01082943007349968, - "learning_rate": 1.577303799466761e-05, - "loss": 0.0033, - "step": 4957 - }, - { - "epoch": 3.8310484649546246, - "grad_norm": 0.009206010028719902, - "learning_rate": 1.5753378322213536e-05, - "loss": 0.0031, - "step": 4958 - }, - { - "epoch": 3.8318208148291175, - "grad_norm": 0.006946722976863384, - "learning_rate": 1.573372861812352e-05, - "loss": 0.0029, - "step": 4959 - }, - { - "epoch": 3.832593164703611, - "grad_norm": 0.010826830752193928, - "learning_rate": 1.5714088888117125e-05, - "loss": 0.003, - "step": 4960 - }, - { - "epoch": 3.833365514578104, - "grad_norm": 0.00945520494133234, - "learning_rate": 1.5694459137910996e-05, - "loss": 0.0026, - "step": 4961 - }, - { - "epoch": 3.834137864452597, - "grad_norm": 0.006998469587415457, - "learning_rate": 1.567483937321888e-05, - "loss": 0.0025, - "step": 4962 - }, - { - "epoch": 3.83491021432709, - "grad_norm": 0.008991682901978493, - "learning_rate": 1.5655229599751614e-05, - "loss": 0.0028, - "step": 4963 - }, - { - "epoch": 3.8356825642015835, - "grad_norm": 0.008646678179502487, - "learning_rate": 1.563562982321713e-05, - "loss": 0.0029, - "step": 4964 - }, - { - "epoch": 3.8364549140760764, - "grad_norm": 0.008845383301377296, - "learning_rate": 1.561604004932046e-05, - "loss": 0.003, - "step": 4965 - }, - { - "epoch": 3.8372272639505693, - "grad_norm": 0.01046202052384615, - "learning_rate": 1.559646028376369e-05, - "loss": 0.003, - "step": 4966 - }, - { - "epoch": 3.8379996138250627, - "grad_norm": 0.009699558839201927, - "learning_rate": 1.557689053224604e-05, - "loss": 0.0026, - "step": 4967 - }, - { - "epoch": 3.838771963699556, - "grad_norm": 0.009032976813614368, - "learning_rate": 1.555733080046378e-05, - "loss": 0.0029, - "step": 4968 - }, - { - "epoch": 3.839544313574049, - "grad_norm": 0.012526086531579494, - "learning_rate": 1.553778109411028e-05, - "loss": 0.003, - "step": 4969 - }, - { - "epoch": 3.840316663448542, - "grad_norm": 0.013793490827083588, - "learning_rate": 1.5518241418875985e-05, - "loss": 0.0033, - "step": 4970 - }, - { - "epoch": 3.8410890133230353, - "grad_norm": 0.008525853976607323, - "learning_rate": 1.549871178044842e-05, - "loss": 0.0033, - "step": 4971 - }, - { - "epoch": 3.8418613631975287, - "grad_norm": 0.009102314710617065, - "learning_rate": 1.54791921845122e-05, - "loss": 0.0024, - "step": 4972 - }, - { - "epoch": 3.8426337130720216, - "grad_norm": 0.012971078976988792, - "learning_rate": 1.5459682636748996e-05, - "loss": 0.0031, - "step": 4973 - }, - { - "epoch": 3.8434060629465145, - "grad_norm": 0.0119175398722291, - "learning_rate": 1.5440183142837573e-05, - "loss": 0.0033, - "step": 4974 - }, - { - "epoch": 3.844178412821008, - "grad_norm": 0.013856014236807823, - "learning_rate": 1.542069370845376e-05, - "loss": 0.0032, - "step": 4975 - }, - { - "epoch": 3.8449507626955013, - "grad_norm": 0.008841684088110924, - "learning_rate": 1.5401214339270464e-05, - "loss": 0.0033, - "step": 4976 - }, - { - "epoch": 3.845723112569994, - "grad_norm": 0.008778228424489498, - "learning_rate": 1.538174504095765e-05, - "loss": 0.0028, - "step": 4977 - }, - { - "epoch": 3.846495462444487, - "grad_norm": 0.012533146888017654, - "learning_rate": 1.536228581918237e-05, - "loss": 0.0028, - "step": 4978 - }, - { - "epoch": 3.8472678123189805, - "grad_norm": 0.009665487334132195, - "learning_rate": 1.534283667960873e-05, - "loss": 0.0028, - "step": 4979 - }, - { - "epoch": 3.848040162193474, - "grad_norm": 0.009867614135146141, - "learning_rate": 1.5323397627897905e-05, - "loss": 0.0031, - "step": 4980 - }, - { - "epoch": 3.848812512067967, - "grad_norm": 0.012394512072205544, - "learning_rate": 1.5303968669708128e-05, - "loss": 0.0032, - "step": 4981 - }, - { - "epoch": 3.8495848619424597, - "grad_norm": 0.007355043664574623, - "learning_rate": 1.5284549810694705e-05, - "loss": 0.0026, - "step": 4982 - }, - { - "epoch": 3.850357211816953, - "grad_norm": 0.008631830103695393, - "learning_rate": 1.5265141056509996e-05, - "loss": 0.0029, - "step": 4983 - }, - { - "epoch": 3.8511295616914465, - "grad_norm": 0.008126567117869854, - "learning_rate": 1.5245742412803421e-05, - "loss": 0.0028, - "step": 4984 - }, - { - "epoch": 3.8519019115659394, - "grad_norm": 0.01578478515148163, - "learning_rate": 1.5226353885221433e-05, - "loss": 0.0032, - "step": 4985 - }, - { - "epoch": 3.8526742614404323, - "grad_norm": 0.009282187558710575, - "learning_rate": 1.5206975479407626e-05, - "loss": 0.0029, - "step": 4986 - }, - { - "epoch": 3.8534466113149257, - "grad_norm": 0.008422215469181538, - "learning_rate": 1.5187607201002524e-05, - "loss": 0.0032, - "step": 4987 - }, - { - "epoch": 3.8542189611894186, - "grad_norm": 0.008351623080670834, - "learning_rate": 1.5168249055643768e-05, - "loss": 0.0032, - "step": 4988 - }, - { - "epoch": 3.854991311063912, - "grad_norm": 0.010155430994927883, - "learning_rate": 1.5148901048966102e-05, - "loss": 0.003, - "step": 4989 - }, - { - "epoch": 3.855763660938405, - "grad_norm": 0.007980355992913246, - "learning_rate": 1.512956318660121e-05, - "loss": 0.0031, - "step": 4990 - }, - { - "epoch": 3.8565360108128983, - "grad_norm": 0.011200337670743465, - "learning_rate": 1.5110235474177876e-05, - "loss": 0.0031, - "step": 4991 - }, - { - "epoch": 3.8573083606873912, - "grad_norm": 0.01221932377666235, - "learning_rate": 1.5090917917321974e-05, - "loss": 0.0034, - "step": 4992 - }, - { - "epoch": 3.8580807105618846, - "grad_norm": 0.009794541634619236, - "learning_rate": 1.5071610521656337e-05, - "loss": 0.0028, - "step": 4993 - }, - { - "epoch": 3.8588530604363775, - "grad_norm": 0.012350419536232948, - "learning_rate": 1.505231329280088e-05, - "loss": 0.0029, - "step": 4994 - }, - { - "epoch": 3.859625410310871, - "grad_norm": 0.007574809715151787, - "learning_rate": 1.5033026236372588e-05, - "loss": 0.0031, - "step": 4995 - }, - { - "epoch": 3.860397760185364, - "grad_norm": 0.008688594214618206, - "learning_rate": 1.5013749357985462e-05, - "loss": 0.003, - "step": 4996 - }, - { - "epoch": 3.861170110059857, - "grad_norm": 0.0109042227268219, - "learning_rate": 1.4994482663250504e-05, - "loss": 0.0027, - "step": 4997 - }, - { - "epoch": 3.86194245993435, - "grad_norm": 0.010012637823820114, - "learning_rate": 1.497522615777578e-05, - "loss": 0.0033, - "step": 4998 - }, - { - "epoch": 3.8627148098088435, - "grad_norm": 0.00960414670407772, - "learning_rate": 1.4955979847166434e-05, - "loss": 0.0027, - "step": 4999 - }, - { - "epoch": 3.8634871596833364, - "grad_norm": 0.007731981575489044, - "learning_rate": 1.4936743737024572e-05, - "loss": 0.0026, - "step": 5000 - }, - { - "epoch": 3.86425950955783, - "grad_norm": 0.010577595792710781, - "learning_rate": 1.4917517832949346e-05, - "loss": 0.003, - "step": 5001 - }, - { - "epoch": 3.8650318594323227, - "grad_norm": 0.009672565385699272, - "learning_rate": 1.4898302140537008e-05, - "loss": 0.0026, - "step": 5002 - }, - { - "epoch": 3.865804209306816, - "grad_norm": 0.009176766499876976, - "learning_rate": 1.4879096665380727e-05, - "loss": 0.003, - "step": 5003 - }, - { - "epoch": 3.866576559181309, - "grad_norm": 0.018057817593216896, - "learning_rate": 1.485990141307076e-05, - "loss": 0.0029, - "step": 5004 - }, - { - "epoch": 3.8673489090558024, - "grad_norm": 0.019623538479208946, - "learning_rate": 1.484071638919441e-05, - "loss": 0.0029, - "step": 5005 - }, - { - "epoch": 3.8681212589302953, - "grad_norm": 0.009910893626511097, - "learning_rate": 1.4821541599335976e-05, - "loss": 0.0024, - "step": 5006 - }, - { - "epoch": 3.8688936088047887, - "grad_norm": 0.00982673466205597, - "learning_rate": 1.4802377049076721e-05, - "loss": 0.003, - "step": 5007 - }, - { - "epoch": 3.8696659586792816, - "grad_norm": 0.015128890983760357, - "learning_rate": 1.4783222743995034e-05, - "loss": 0.0031, - "step": 5008 - }, - { - "epoch": 3.870438308553775, - "grad_norm": 0.008337433449923992, - "learning_rate": 1.4764078689666272e-05, - "loss": 0.0029, - "step": 5009 - }, - { - "epoch": 3.871210658428268, - "grad_norm": 0.014315987937152386, - "learning_rate": 1.474494489166276e-05, - "loss": 0.0034, - "step": 5010 - }, - { - "epoch": 3.8719830083027613, - "grad_norm": 0.014011272229254246, - "learning_rate": 1.4725821355553931e-05, - "loss": 0.0028, - "step": 5011 - }, - { - "epoch": 3.8727553581772542, - "grad_norm": 0.010705845430493355, - "learning_rate": 1.4706708086906185e-05, - "loss": 0.0026, - "step": 5012 - }, - { - "epoch": 3.873527708051747, - "grad_norm": 0.008886613883078098, - "learning_rate": 1.468760509128288e-05, - "loss": 0.0031, - "step": 5013 - }, - { - "epoch": 3.8743000579262405, - "grad_norm": 0.016048626974225044, - "learning_rate": 1.4668512374244492e-05, - "loss": 0.0031, - "step": 5014 - }, - { - "epoch": 3.875072407800734, - "grad_norm": 0.016248665750026703, - "learning_rate": 1.4649429941348424e-05, - "loss": 0.0031, - "step": 5015 - }, - { - "epoch": 3.875844757675227, - "grad_norm": 0.009499805979430676, - "learning_rate": 1.463035779814912e-05, - "loss": 0.0032, - "step": 5016 - }, - { - "epoch": 3.8766171075497198, - "grad_norm": 0.02029765024781227, - "learning_rate": 1.4611295950198018e-05, - "loss": 0.0035, - "step": 5017 - }, - { - "epoch": 3.877389457424213, - "grad_norm": 0.008975028991699219, - "learning_rate": 1.459224440304356e-05, - "loss": 0.0031, - "step": 5018 - }, - { - "epoch": 3.8781618072987065, - "grad_norm": 0.010319342836737633, - "learning_rate": 1.4573203162231187e-05, - "loss": 0.0034, - "step": 5019 - }, - { - "epoch": 3.8789341571731994, - "grad_norm": 0.008209176361560822, - "learning_rate": 1.4554172233303349e-05, - "loss": 0.003, - "step": 5020 - }, - { - "epoch": 3.8797065070476924, - "grad_norm": 0.0124753937125206, - "learning_rate": 1.4535151621799487e-05, - "loss": 0.0031, - "step": 5021 - }, - { - "epoch": 3.8804788569221857, - "grad_norm": 0.01752236858010292, - "learning_rate": 1.4516141333256062e-05, - "loss": 0.0028, - "step": 5022 - }, - { - "epoch": 3.881251206796679, - "grad_norm": 0.016599994152784348, - "learning_rate": 1.4497141373206458e-05, - "loss": 0.0029, - "step": 5023 - }, - { - "epoch": 3.882023556671172, - "grad_norm": 0.008728167973458767, - "learning_rate": 1.4478151747181152e-05, - "loss": 0.0031, - "step": 5024 - }, - { - "epoch": 3.882795906545665, - "grad_norm": 0.009795735590159893, - "learning_rate": 1.4459172460707555e-05, - "loss": 0.003, - "step": 5025 - }, - { - "epoch": 3.8835682564201583, - "grad_norm": 0.008861212991178036, - "learning_rate": 1.444020351931007e-05, - "loss": 0.0031, - "step": 5026 - }, - { - "epoch": 3.8843406062946517, - "grad_norm": 0.017795542255043983, - "learning_rate": 1.4421244928510097e-05, - "loss": 0.0032, - "step": 5027 - }, - { - "epoch": 3.8851129561691446, - "grad_norm": 0.01518005970865488, - "learning_rate": 1.4402296693826034e-05, - "loss": 0.0029, - "step": 5028 - }, - { - "epoch": 3.8858853060436376, - "grad_norm": 0.014253837987780571, - "learning_rate": 1.4383358820773246e-05, - "loss": 0.0027, - "step": 5029 - }, - { - "epoch": 3.886657655918131, - "grad_norm": 0.008239194750785828, - "learning_rate": 1.436443131486409e-05, - "loss": 0.0027, - "step": 5030 - }, - { - "epoch": 3.8874300057926243, - "grad_norm": 0.01097941491752863, - "learning_rate": 1.4345514181607916e-05, - "loss": 0.0032, - "step": 5031 - }, - { - "epoch": 3.8882023556671172, - "grad_norm": 0.010556716471910477, - "learning_rate": 1.432660742651103e-05, - "loss": 0.003, - "step": 5032 - }, - { - "epoch": 3.88897470554161, - "grad_norm": 0.011672616004943848, - "learning_rate": 1.4307711055076739e-05, - "loss": 0.003, - "step": 5033 - }, - { - "epoch": 3.8897470554161035, - "grad_norm": 0.016163257881999016, - "learning_rate": 1.4288825072805322e-05, - "loss": 0.0029, - "step": 5034 - }, - { - "epoch": 3.8905194052905965, - "grad_norm": 0.012452706694602966, - "learning_rate": 1.426994948519403e-05, - "loss": 0.0028, - "step": 5035 - }, - { - "epoch": 3.89129175516509, - "grad_norm": 0.009630842134356499, - "learning_rate": 1.4251084297737088e-05, - "loss": 0.003, - "step": 5036 - }, - { - "epoch": 3.8920641050395828, - "grad_norm": 0.010833237320184708, - "learning_rate": 1.4232229515925693e-05, - "loss": 0.0032, - "step": 5037 - }, - { - "epoch": 3.892836454914076, - "grad_norm": 0.009995395317673683, - "learning_rate": 1.4213385145248032e-05, - "loss": 0.003, - "step": 5038 - }, - { - "epoch": 3.893608804788569, - "grad_norm": 0.01041726116091013, - "learning_rate": 1.4194551191189226e-05, - "loss": 0.0027, - "step": 5039 - }, - { - "epoch": 3.8943811546630624, - "grad_norm": 0.010740031488239765, - "learning_rate": 1.4175727659231397e-05, - "loss": 0.0026, - "step": 5040 - }, - { - "epoch": 3.8951535045375554, - "grad_norm": 0.008188420906662941, - "learning_rate": 1.4156914554853618e-05, - "loss": 0.0027, - "step": 5041 - }, - { - "epoch": 3.8959258544120487, - "grad_norm": 0.01434328407049179, - "learning_rate": 1.4138111883531923e-05, - "loss": 0.003, - "step": 5042 - }, - { - "epoch": 3.8966982042865417, - "grad_norm": 0.007261700462549925, - "learning_rate": 1.4119319650739327e-05, - "loss": 0.0031, - "step": 5043 - }, - { - "epoch": 3.897470554161035, - "grad_norm": 0.013153079897165298, - "learning_rate": 1.4100537861945783e-05, - "loss": 0.0031, - "step": 5044 - }, - { - "epoch": 3.898242904035528, - "grad_norm": 0.008110249415040016, - "learning_rate": 1.4081766522618207e-05, - "loss": 0.0027, - "step": 5045 - }, - { - "epoch": 3.8990152539100214, - "grad_norm": 0.007599998731166124, - "learning_rate": 1.4063005638220528e-05, - "loss": 0.0031, - "step": 5046 - }, - { - "epoch": 3.8997876037845143, - "grad_norm": 0.009179320186376572, - "learning_rate": 1.4044255214213537e-05, - "loss": 0.0028, - "step": 5047 - }, - { - "epoch": 3.9005599536590077, - "grad_norm": 0.008315013721585274, - "learning_rate": 1.4025515256055034e-05, - "loss": 0.003, - "step": 5048 - }, - { - "epoch": 3.9013323035335006, - "grad_norm": 0.011417336761951447, - "learning_rate": 1.400678576919981e-05, - "loss": 0.0034, - "step": 5049 - }, - { - "epoch": 3.902104653407994, - "grad_norm": 0.011364213190972805, - "learning_rate": 1.3988066759099516e-05, - "loss": 0.0031, - "step": 5050 - }, - { - "epoch": 3.902877003282487, - "grad_norm": 0.00905610527843237, - "learning_rate": 1.3969358231202827e-05, - "loss": 0.0032, - "step": 5051 - }, - { - "epoch": 3.9036493531569803, - "grad_norm": 0.009831871837377548, - "learning_rate": 1.3950660190955345e-05, - "loss": 0.0028, - "step": 5052 - }, - { - "epoch": 3.904421703031473, - "grad_norm": 0.008532781153917313, - "learning_rate": 1.3931972643799612e-05, - "loss": 0.0031, - "step": 5053 - }, - { - "epoch": 3.9051940529059666, - "grad_norm": 0.009511567652225494, - "learning_rate": 1.391329559517512e-05, - "loss": 0.0028, - "step": 5054 - }, - { - "epoch": 3.9059664027804595, - "grad_norm": 0.01154781598597765, - "learning_rate": 1.3894629050518294e-05, - "loss": 0.003, - "step": 5055 - }, - { - "epoch": 3.906738752654953, - "grad_norm": 0.011973300948739052, - "learning_rate": 1.3875973015262561e-05, - "loss": 0.0031, - "step": 5056 - }, - { - "epoch": 3.907511102529446, - "grad_norm": 0.009173572063446045, - "learning_rate": 1.3857327494838195e-05, - "loss": 0.0032, - "step": 5057 - }, - { - "epoch": 3.908283452403939, - "grad_norm": 0.010813619941473007, - "learning_rate": 1.3838692494672462e-05, - "loss": 0.0032, - "step": 5058 - }, - { - "epoch": 3.909055802278432, - "grad_norm": 0.011844763532280922, - "learning_rate": 1.3820068020189592e-05, - "loss": 0.003, - "step": 5059 - }, - { - "epoch": 3.909828152152925, - "grad_norm": 0.010590963065624237, - "learning_rate": 1.3801454076810688e-05, - "loss": 0.0029, - "step": 5060 - }, - { - "epoch": 3.9106005020274184, - "grad_norm": 0.015272004529833794, - "learning_rate": 1.3782850669953811e-05, - "loss": 0.0031, - "step": 5061 - }, - { - "epoch": 3.9113728519019118, - "grad_norm": 0.008821163326501846, - "learning_rate": 1.3764257805034015e-05, - "loss": 0.003, - "step": 5062 - }, - { - "epoch": 3.9121452017764047, - "grad_norm": 0.008121635764837265, - "learning_rate": 1.3745675487463183e-05, - "loss": 0.003, - "step": 5063 - }, - { - "epoch": 3.9129175516508976, - "grad_norm": 0.00942297838628292, - "learning_rate": 1.3727103722650175e-05, - "loss": 0.003, - "step": 5064 - }, - { - "epoch": 3.913689901525391, - "grad_norm": 0.008933554403483868, - "learning_rate": 1.3708542516000827e-05, - "loss": 0.0034, - "step": 5065 - }, - { - "epoch": 3.9144622513998844, - "grad_norm": 0.011289010755717754, - "learning_rate": 1.3689991872917845e-05, - "loss": 0.0033, - "step": 5066 - }, - { - "epoch": 3.9152346012743773, - "grad_norm": 0.013694976456463337, - "learning_rate": 1.3671451798800833e-05, - "loss": 0.0034, - "step": 5067 - }, - { - "epoch": 3.91600695114887, - "grad_norm": 0.010910279117524624, - "learning_rate": 1.36529222990464e-05, - "loss": 0.0031, - "step": 5068 - }, - { - "epoch": 3.9167793010233636, - "grad_norm": 0.011041252873837948, - "learning_rate": 1.3634403379048038e-05, - "loss": 0.0031, - "step": 5069 - }, - { - "epoch": 3.917551650897857, - "grad_norm": 0.009513885714113712, - "learning_rate": 1.361589504419612e-05, - "loss": 0.0035, - "step": 5070 - }, - { - "epoch": 3.91832400077235, - "grad_norm": 0.010692187584936619, - "learning_rate": 1.3597397299878006e-05, - "loss": 0.0032, - "step": 5071 - }, - { - "epoch": 3.919096350646843, - "grad_norm": 0.009557679295539856, - "learning_rate": 1.3578910151477947e-05, - "loss": 0.0027, - "step": 5072 - }, - { - "epoch": 3.919868700521336, - "grad_norm": 0.009597002528607845, - "learning_rate": 1.3560433604377064e-05, - "loss": 0.0034, - "step": 5073 - }, - { - "epoch": 3.9206410503958296, - "grad_norm": 0.009098115377128124, - "learning_rate": 1.354196766395348e-05, - "loss": 0.0032, - "step": 5074 - }, - { - "epoch": 3.9214134002703225, - "grad_norm": 0.012747055850923061, - "learning_rate": 1.3523512335582166e-05, - "loss": 0.0028, - "step": 5075 - }, - { - "epoch": 3.9221857501448154, - "grad_norm": 0.008872752077877522, - "learning_rate": 1.3505067624635032e-05, - "loss": 0.0035, - "step": 5076 - }, - { - "epoch": 3.922958100019309, - "grad_norm": 0.007852994836866856, - "learning_rate": 1.3486633536480852e-05, - "loss": 0.003, - "step": 5077 - }, - { - "epoch": 3.923730449893802, - "grad_norm": 0.00892792921513319, - "learning_rate": 1.3468210076485383e-05, - "loss": 0.0032, - "step": 5078 - }, - { - "epoch": 3.924502799768295, - "grad_norm": 0.011076129972934723, - "learning_rate": 1.3449797250011247e-05, - "loss": 0.0036, - "step": 5079 - }, - { - "epoch": 3.925275149642788, - "grad_norm": 0.008503065444529057, - "learning_rate": 1.3431395062417934e-05, - "loss": 0.0029, - "step": 5080 - }, - { - "epoch": 3.9260474995172814, - "grad_norm": 0.013432437554001808, - "learning_rate": 1.3413003519061917e-05, - "loss": 0.0031, - "step": 5081 - }, - { - "epoch": 3.9268198493917743, - "grad_norm": 0.008086584508419037, - "learning_rate": 1.3394622625296533e-05, - "loss": 0.0028, - "step": 5082 - }, - { - "epoch": 3.9275921992662677, - "grad_norm": 0.009846203029155731, - "learning_rate": 1.3376252386471965e-05, - "loss": 0.003, - "step": 5083 - }, - { - "epoch": 3.9283645491407606, - "grad_norm": 0.0073504154570400715, - "learning_rate": 1.3357892807935397e-05, - "loss": 0.003, - "step": 5084 - }, - { - "epoch": 3.929136899015254, - "grad_norm": 0.009216235019266605, - "learning_rate": 1.3339543895030843e-05, - "loss": 0.0028, - "step": 5085 - }, - { - "epoch": 3.929909248889747, - "grad_norm": 0.009160725399851799, - "learning_rate": 1.3321205653099222e-05, - "loss": 0.0032, - "step": 5086 - }, - { - "epoch": 3.9306815987642403, - "grad_norm": 0.009034167975187302, - "learning_rate": 1.330287808747836e-05, - "loss": 0.003, - "step": 5087 - }, - { - "epoch": 3.931453948638733, - "grad_norm": 0.01226490642875433, - "learning_rate": 1.3284561203502965e-05, - "loss": 0.0028, - "step": 5088 - }, - { - "epoch": 3.9322262985132266, - "grad_norm": 0.008918531239032745, - "learning_rate": 1.3266255006504646e-05, - "loss": 0.0032, - "step": 5089 - }, - { - "epoch": 3.9329986483877195, - "grad_norm": 0.010417668148875237, - "learning_rate": 1.3247959501811885e-05, - "loss": 0.0031, - "step": 5090 - }, - { - "epoch": 3.933770998262213, - "grad_norm": 0.010061853565275669, - "learning_rate": 1.3229674694750066e-05, - "loss": 0.0034, - "step": 5091 - }, - { - "epoch": 3.934543348136706, - "grad_norm": 0.008060588501393795, - "learning_rate": 1.3211400590641448e-05, - "loss": 0.0029, - "step": 5092 - }, - { - "epoch": 3.935315698011199, - "grad_norm": 0.007736505940556526, - "learning_rate": 1.3193137194805195e-05, - "loss": 0.0029, - "step": 5093 - }, - { - "epoch": 3.936088047885692, - "grad_norm": 0.012635236606001854, - "learning_rate": 1.3174884512557329e-05, - "loss": 0.0029, - "step": 5094 - }, - { - "epoch": 3.9368603977601855, - "grad_norm": 0.00976151879876852, - "learning_rate": 1.3156642549210768e-05, - "loss": 0.0028, - "step": 5095 - }, - { - "epoch": 3.9376327476346784, - "grad_norm": 0.013500361703336239, - "learning_rate": 1.313841131007531e-05, - "loss": 0.0029, - "step": 5096 - }, - { - "epoch": 3.938405097509172, - "grad_norm": 0.012697826139628887, - "learning_rate": 1.3120190800457622e-05, - "loss": 0.0031, - "step": 5097 - }, - { - "epoch": 3.9391774473836647, - "grad_norm": 0.01049091387540102, - "learning_rate": 1.3101981025661258e-05, - "loss": 0.0032, - "step": 5098 - }, - { - "epoch": 3.939949797258158, - "grad_norm": 0.009147342294454575, - "learning_rate": 1.3083781990986644e-05, - "loss": 0.0024, - "step": 5099 - }, - { - "epoch": 3.940722147132651, - "grad_norm": 0.012354012578725815, - "learning_rate": 1.3065593701731077e-05, - "loss": 0.0028, - "step": 5100 - }, - { - "epoch": 3.9414944970071444, - "grad_norm": 0.012569538317620754, - "learning_rate": 1.3047416163188724e-05, - "loss": 0.0025, - "step": 5101 - }, - { - "epoch": 3.9422668468816373, - "grad_norm": 0.008328719064593315, - "learning_rate": 1.3029249380650638e-05, - "loss": 0.0028, - "step": 5102 - }, - { - "epoch": 3.9430391967561307, - "grad_norm": 0.009669464081525803, - "learning_rate": 1.3011093359404725e-05, - "loss": 0.0029, - "step": 5103 - }, - { - "epoch": 3.9438115466306236, - "grad_norm": 0.013122282922267914, - "learning_rate": 1.2992948104735763e-05, - "loss": 0.003, - "step": 5104 - }, - { - "epoch": 3.944583896505117, - "grad_norm": 0.00925926398485899, - "learning_rate": 1.2974813621925397e-05, - "loss": 0.0026, - "step": 5105 - }, - { - "epoch": 3.94535624637961, - "grad_norm": 0.010746442712843418, - "learning_rate": 1.295668991625214e-05, - "loss": 0.0029, - "step": 5106 - }, - { - "epoch": 3.946128596254103, - "grad_norm": 0.010360931977629662, - "learning_rate": 1.2938576992991364e-05, - "loss": 0.0028, - "step": 5107 - }, - { - "epoch": 3.946900946128596, - "grad_norm": 0.0118419723585248, - "learning_rate": 1.29204748574153e-05, - "loss": 0.0032, - "step": 5108 - }, - { - "epoch": 3.9476732960030896, - "grad_norm": 0.007602566387504339, - "learning_rate": 1.2902383514793043e-05, - "loss": 0.0029, - "step": 5109 - }, - { - "epoch": 3.9484456458775825, - "grad_norm": 0.01111143920570612, - "learning_rate": 1.288430297039055e-05, - "loss": 0.0035, - "step": 5110 - }, - { - "epoch": 3.9492179957520754, - "grad_norm": 0.010026851668953896, - "learning_rate": 1.286623322947062e-05, - "loss": 0.0035, - "step": 5111 - }, - { - "epoch": 3.949990345626569, - "grad_norm": 0.008931624703109264, - "learning_rate": 1.2848174297292936e-05, - "loss": 0.003, - "step": 5112 - }, - { - "epoch": 3.950762695501062, - "grad_norm": 0.013498594984412193, - "learning_rate": 1.2830126179114e-05, - "loss": 0.0031, - "step": 5113 - }, - { - "epoch": 3.951535045375555, - "grad_norm": 0.008173106238245964, - "learning_rate": 1.281208888018719e-05, - "loss": 0.0028, - "step": 5114 - }, - { - "epoch": 3.952307395250048, - "grad_norm": 0.008225626312196255, - "learning_rate": 1.2794062405762713e-05, - "loss": 0.0026, - "step": 5115 - }, - { - "epoch": 3.9530797451245414, - "grad_norm": 0.00897209718823433, - "learning_rate": 1.2776046761087684e-05, - "loss": 0.0027, - "step": 5116 - }, - { - "epoch": 3.953852094999035, - "grad_norm": 0.009104445576667786, - "learning_rate": 1.275804195140598e-05, - "loss": 0.0028, - "step": 5117 - }, - { - "epoch": 3.9546244448735277, - "grad_norm": 0.008700024336576462, - "learning_rate": 1.2740047981958364e-05, - "loss": 0.0029, - "step": 5118 - }, - { - "epoch": 3.9553967947480206, - "grad_norm": 0.007199831306934357, - "learning_rate": 1.2722064857982486e-05, - "loss": 0.0025, - "step": 5119 - }, - { - "epoch": 3.956169144622514, - "grad_norm": 0.013268791139125824, - "learning_rate": 1.2704092584712762e-05, - "loss": 0.0029, - "step": 5120 - }, - { - "epoch": 3.9569414944970074, - "grad_norm": 0.009150993078947067, - "learning_rate": 1.2686131167380477e-05, - "loss": 0.003, - "step": 5121 - }, - { - "epoch": 3.9577138443715003, - "grad_norm": 0.013867981731891632, - "learning_rate": 1.266818061121382e-05, - "loss": 0.0027, - "step": 5122 - }, - { - "epoch": 3.9584861942459932, - "grad_norm": 0.008802138268947601, - "learning_rate": 1.2650240921437716e-05, - "loss": 0.0031, - "step": 5123 - }, - { - "epoch": 3.9592585441204866, - "grad_norm": 0.008668835274875164, - "learning_rate": 1.2632312103273974e-05, - "loss": 0.0026, - "step": 5124 - }, - { - "epoch": 3.96003089399498, - "grad_norm": 0.014852354303002357, - "learning_rate": 1.2614394161941267e-05, - "loss": 0.0029, - "step": 5125 - }, - { - "epoch": 3.960803243869473, - "grad_norm": 0.018547803163528442, - "learning_rate": 1.259648710265508e-05, - "loss": 0.0036, - "step": 5126 - }, - { - "epoch": 3.961575593743966, - "grad_norm": 0.008425499312579632, - "learning_rate": 1.2578590930627677e-05, - "loss": 0.003, - "step": 5127 - }, - { - "epoch": 3.962347943618459, - "grad_norm": 0.007943299598991871, - "learning_rate": 1.256070565106825e-05, - "loss": 0.003, - "step": 5128 - }, - { - "epoch": 3.9631202934929526, - "grad_norm": 0.00888790562748909, - "learning_rate": 1.2542831269182764e-05, - "loss": 0.003, - "step": 5129 - }, - { - "epoch": 3.9638926433674455, - "grad_norm": 0.009093926288187504, - "learning_rate": 1.2524967790174003e-05, - "loss": 0.0028, - "step": 5130 - }, - { - "epoch": 3.9646649932419384, - "grad_norm": 0.016038067638874054, - "learning_rate": 1.2507115219241577e-05, - "loss": 0.003, - "step": 5131 - }, - { - "epoch": 3.965437343116432, - "grad_norm": 0.012677005492150784, - "learning_rate": 1.2489273561581999e-05, - "loss": 0.0031, - "step": 5132 - }, - { - "epoch": 3.9662096929909247, - "grad_norm": 0.018096787855029106, - "learning_rate": 1.2471442822388485e-05, - "loss": 0.0034, - "step": 5133 - }, - { - "epoch": 3.966982042865418, - "grad_norm": 0.01353723555803299, - "learning_rate": 1.2453623006851145e-05, - "loss": 0.003, - "step": 5134 - }, - { - "epoch": 3.967754392739911, - "grad_norm": 0.009085814468562603, - "learning_rate": 1.243581412015692e-05, - "loss": 0.0034, - "step": 5135 - }, - { - "epoch": 3.9685267426144044, - "grad_norm": 0.009171192534267902, - "learning_rate": 1.241801616748955e-05, - "loss": 0.0034, - "step": 5136 - }, - { - "epoch": 3.9692990924888973, - "grad_norm": 0.008186507038772106, - "learning_rate": 1.2400229154029541e-05, - "loss": 0.0026, - "step": 5137 - }, - { - "epoch": 3.9700714423633907, - "grad_norm": 0.010795575566589832, - "learning_rate": 1.238245308495431e-05, - "loss": 0.0028, - "step": 5138 - }, - { - "epoch": 3.9708437922378836, - "grad_norm": 0.010350009426474571, - "learning_rate": 1.2364687965438033e-05, - "loss": 0.0026, - "step": 5139 - }, - { - "epoch": 3.971616142112377, - "grad_norm": 0.008702244609594345, - "learning_rate": 1.2346933800651678e-05, - "loss": 0.0028, - "step": 5140 - }, - { - "epoch": 3.97238849198687, - "grad_norm": 0.010398447513580322, - "learning_rate": 1.2329190595763085e-05, - "loss": 0.0029, - "step": 5141 - }, - { - "epoch": 3.9731608418613633, - "grad_norm": 0.01263085100799799, - "learning_rate": 1.2311458355936872e-05, - "loss": 0.0032, - "step": 5142 - }, - { - "epoch": 3.9739331917358562, - "grad_norm": 0.015385749749839306, - "learning_rate": 1.2293737086334433e-05, - "loss": 0.0032, - "step": 5143 - }, - { - "epoch": 3.9747055416103496, - "grad_norm": 0.012905635870993137, - "learning_rate": 1.2276026792114037e-05, - "loss": 0.0032, - "step": 5144 - }, - { - "epoch": 3.9754778914848425, - "grad_norm": 0.009418494068086147, - "learning_rate": 1.2258327478430704e-05, - "loss": 0.003, - "step": 5145 - }, - { - "epoch": 3.976250241359336, - "grad_norm": 0.01325889304280281, - "learning_rate": 1.224063915043629e-05, - "loss": 0.0032, - "step": 5146 - }, - { - "epoch": 3.977022591233829, - "grad_norm": 0.00991750042885542, - "learning_rate": 1.2222961813279426e-05, - "loss": 0.0028, - "step": 5147 - }, - { - "epoch": 3.977794941108322, - "grad_norm": 0.009794146753847599, - "learning_rate": 1.220529547210556e-05, - "loss": 0.0029, - "step": 5148 - }, - { - "epoch": 3.978567290982815, - "grad_norm": 0.011819394305348396, - "learning_rate": 1.2187640132056949e-05, - "loss": 0.0027, - "step": 5149 - }, - { - "epoch": 3.9793396408573085, - "grad_norm": 0.010317685082554817, - "learning_rate": 1.2169995798272622e-05, - "loss": 0.0034, - "step": 5150 - }, - { - "epoch": 3.9801119907318014, - "grad_norm": 0.00700108427554369, - "learning_rate": 1.2152362475888424e-05, - "loss": 0.0028, - "step": 5151 - }, - { - "epoch": 3.980884340606295, - "grad_norm": 0.00885722879320383, - "learning_rate": 1.213474017003699e-05, - "loss": 0.0031, - "step": 5152 - }, - { - "epoch": 3.9816566904807877, - "grad_norm": 0.007584693841636181, - "learning_rate": 1.2117128885847745e-05, - "loss": 0.0029, - "step": 5153 - }, - { - "epoch": 3.9824290403552807, - "grad_norm": 0.010611428879201412, - "learning_rate": 1.2099528628446905e-05, - "loss": 0.0033, - "step": 5154 - }, - { - "epoch": 3.983201390229774, - "grad_norm": 0.00826264638453722, - "learning_rate": 1.2081939402957487e-05, - "loss": 0.0029, - "step": 5155 - }, - { - "epoch": 3.9839737401042674, - "grad_norm": 0.008506223559379578, - "learning_rate": 1.2064361214499292e-05, - "loss": 0.0027, - "step": 5156 - }, - { - "epoch": 3.9847460899787603, - "grad_norm": 0.011393384076654911, - "learning_rate": 1.2046794068188893e-05, - "loss": 0.003, - "step": 5157 - }, - { - "epoch": 3.9855184398532533, - "grad_norm": 0.006410202011466026, - "learning_rate": 1.2029237969139673e-05, - "loss": 0.0028, - "step": 5158 - }, - { - "epoch": 3.9862907897277466, - "grad_norm": 0.008192823268473148, - "learning_rate": 1.2011692922461782e-05, - "loss": 0.0028, - "step": 5159 - }, - { - "epoch": 3.98706313960224, - "grad_norm": 0.009260269813239574, - "learning_rate": 1.1994158933262161e-05, - "loss": 0.0031, - "step": 5160 - }, - { - "epoch": 3.987835489476733, - "grad_norm": 0.00856191385537386, - "learning_rate": 1.1976636006644531e-05, - "loss": 0.0031, - "step": 5161 - }, - { - "epoch": 3.988607839351226, - "grad_norm": 0.008363443426787853, - "learning_rate": 1.19591241477094e-05, - "loss": 0.0029, - "step": 5162 - }, - { - "epoch": 3.9893801892257192, - "grad_norm": 0.01415453851222992, - "learning_rate": 1.1941623361554034e-05, - "loss": 0.003, - "step": 5163 - }, - { - "epoch": 3.9901525391002126, - "grad_norm": 0.011211784556508064, - "learning_rate": 1.192413365327249e-05, - "loss": 0.0035, - "step": 5164 - }, - { - "epoch": 3.9909248889747055, - "grad_norm": 0.008803755044937134, - "learning_rate": 1.190665502795561e-05, - "loss": 0.0027, - "step": 5165 - }, - { - "epoch": 3.9916972388491985, - "grad_norm": 0.010068275965750217, - "learning_rate": 1.188918749069099e-05, - "loss": 0.0032, - "step": 5166 - }, - { - "epoch": 3.992469588723692, - "grad_norm": 0.011156435124576092, - "learning_rate": 1.1871731046563017e-05, - "loss": 0.003, - "step": 5167 - }, - { - "epoch": 3.993241938598185, - "grad_norm": 0.015244616195559502, - "learning_rate": 1.1854285700652828e-05, - "loss": 0.0033, - "step": 5168 - }, - { - "epoch": 3.994014288472678, - "grad_norm": 0.007877008058130741, - "learning_rate": 1.1836851458038351e-05, - "loss": 0.0028, - "step": 5169 - }, - { - "epoch": 3.994786638347171, - "grad_norm": 0.012196575291454792, - "learning_rate": 1.1819428323794274e-05, - "loss": 0.0035, - "step": 5170 - }, - { - "epoch": 3.9955589882216644, - "grad_norm": 0.00742301857098937, - "learning_rate": 1.1802016302992042e-05, - "loss": 0.0033, - "step": 5171 - }, - { - "epoch": 3.996331338096158, - "grad_norm": 0.008889246731996536, - "learning_rate": 1.1784615400699878e-05, - "loss": 0.0029, - "step": 5172 - }, - { - "epoch": 3.9971036879706507, - "grad_norm": 0.009742755442857742, - "learning_rate": 1.1767225621982764e-05, - "loss": 0.0033, - "step": 5173 - }, - { - "epoch": 3.9978760378451437, - "grad_norm": 0.00847399327903986, - "learning_rate": 1.1749846971902446e-05, - "loss": 0.0033, - "step": 5174 - }, - { - "epoch": 3.998648387719637, - "grad_norm": 0.010470501147210598, - "learning_rate": 1.1732479455517425e-05, - "loss": 0.0029, - "step": 5175 - }, - { - "epoch": 3.9994207375941304, - "grad_norm": 0.008755206130445004, - "learning_rate": 1.1715123077882972e-05, - "loss": 0.0029, - "step": 5176 - } - ], - "logging_steps": 1, - "max_steps": 6470, - "num_input_tokens_seen": 0, - "num_train_epochs": 5, - "save_steps": 500, - "stateful_callbacks": { - "TrainerControl": { - "args": { - "should_epoch_stop": false, - "should_evaluate": false, - "should_log": false, - "should_save": true, - "should_training_stop": false - }, - "attributes": {} - } - }, - "total_flos": 5.604065328444211e+19, - "train_batch_size": 1, - "trial_name": null, - "trial_params": null -}