{ "best_metric": null, "best_model_checkpoint": null, "epoch": 3.0, "eval_steps": 100.0, "global_step": 22434, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0001337255950788981, "grad_norm": 6.774209976196289, "learning_rate": 2.967359050445104e-08, "loss": 1.4655, "step": 1 }, { "epoch": 0.0002674511901577962, "grad_norm": 11.749982833862305, "learning_rate": 5.934718100890208e-08, "loss": 1.4375, "step": 2 }, { "epoch": 0.0004011767852366943, "grad_norm": 7.676640033721924, "learning_rate": 8.902077151335312e-08, "loss": 1.2687, "step": 3 }, { "epoch": 0.0005349023803155924, "grad_norm": 3.7812612056732178, "learning_rate": 1.1869436201780416e-07, "loss": 1.3207, "step": 4 }, { "epoch": 0.0006686279753944905, "grad_norm": 4.283384799957275, "learning_rate": 1.4836795252225522e-07, "loss": 1.2081, "step": 5 }, { "epoch": 0.0008023535704733886, "grad_norm": 4.2286176681518555, "learning_rate": 1.7804154302670624e-07, "loss": 1.2178, "step": 6 }, { "epoch": 0.0009360791655522868, "grad_norm": 3.7192041873931885, "learning_rate": 2.0771513353115727e-07, "loss": 1.1508, "step": 7 }, { "epoch": 0.0010698047606311847, "grad_norm": 3.5845236778259277, "learning_rate": 2.3738872403560833e-07, "loss": 1.2143, "step": 8 }, { "epoch": 0.0012035303557100829, "grad_norm": 3.5265390872955322, "learning_rate": 2.6706231454005935e-07, "loss": 1.1649, "step": 9 }, { "epoch": 0.001337255950788981, "grad_norm": 3.386852979660034, "learning_rate": 2.9673590504451043e-07, "loss": 1.124, "step": 10 }, { "epoch": 0.001470981545867879, "grad_norm": 3.518141031265259, "learning_rate": 3.2640949554896146e-07, "loss": 1.1468, "step": 11 }, { "epoch": 0.0016047071409467772, "grad_norm": 4.059430122375488, "learning_rate": 3.560830860534125e-07, "loss": 1.2089, "step": 12 }, { "epoch": 0.0017384327360256753, "grad_norm": 3.551931142807007, "learning_rate": 3.857566765578635e-07, "loss": 1.2732, "step": 13 }, { "epoch": 0.0018721583311045735, "grad_norm": 3.1022541522979736, "learning_rate": 4.1543026706231454e-07, "loss": 1.1854, "step": 14 }, { "epoch": 0.0020058839261834715, "grad_norm": 3.359593391418457, "learning_rate": 4.451038575667656e-07, "loss": 1.1648, "step": 15 }, { "epoch": 0.0021396095212623694, "grad_norm": 3.3408291339874268, "learning_rate": 4.7477744807121665e-07, "loss": 1.0957, "step": 16 }, { "epoch": 0.002273335116341268, "grad_norm": 3.615788698196411, "learning_rate": 5.044510385756677e-07, "loss": 1.2142, "step": 17 }, { "epoch": 0.0024070607114201658, "grad_norm": 3.673964738845825, "learning_rate": 5.341246290801187e-07, "loss": 1.1817, "step": 18 }, { "epoch": 0.0025407863064990637, "grad_norm": 4.2981719970703125, "learning_rate": 5.637982195845697e-07, "loss": 1.1683, "step": 19 }, { "epoch": 0.002674511901577962, "grad_norm": 3.8127074241638184, "learning_rate": 5.934718100890209e-07, "loss": 1.1264, "step": 20 }, { "epoch": 0.00280823749665686, "grad_norm": 3.329232692718506, "learning_rate": 6.231454005934719e-07, "loss": 1.1128, "step": 21 }, { "epoch": 0.002941963091735758, "grad_norm": 3.3310046195983887, "learning_rate": 6.528189910979229e-07, "loss": 1.01, "step": 22 }, { "epoch": 0.0030756886868146564, "grad_norm": 3.648263454437256, "learning_rate": 6.82492581602374e-07, "loss": 1.2336, "step": 23 }, { "epoch": 0.0032094142818935543, "grad_norm": 3.3319995403289795, "learning_rate": 7.12166172106825e-07, "loss": 1.109, "step": 24 }, { "epoch": 0.0033431398769724527, "grad_norm": 3.712541341781616, "learning_rate": 7.41839762611276e-07, "loss": 1.1112, "step": 25 }, { "epoch": 0.0034768654720513507, "grad_norm": 3.5079431533813477, "learning_rate": 7.71513353115727e-07, "loss": 1.1406, "step": 26 }, { "epoch": 0.0036105910671302486, "grad_norm": 3.2847282886505127, "learning_rate": 8.011869436201782e-07, "loss": 1.0433, "step": 27 }, { "epoch": 0.003744316662209147, "grad_norm": 3.288848876953125, "learning_rate": 8.308605341246291e-07, "loss": 1.0544, "step": 28 }, { "epoch": 0.003878042257288045, "grad_norm": 3.266932249069214, "learning_rate": 8.605341246290802e-07, "loss": 1.1127, "step": 29 }, { "epoch": 0.004011767852366943, "grad_norm": 3.22568416595459, "learning_rate": 8.902077151335312e-07, "loss": 1.057, "step": 30 }, { "epoch": 0.004145493447445841, "grad_norm": 3.4046053886413574, "learning_rate": 9.198813056379823e-07, "loss": 1.1928, "step": 31 }, { "epoch": 0.004279219042524739, "grad_norm": 3.596522569656372, "learning_rate": 9.495548961424333e-07, "loss": 1.137, "step": 32 }, { "epoch": 0.004412944637603638, "grad_norm": 3.5023550987243652, "learning_rate": 9.792284866468842e-07, "loss": 1.0956, "step": 33 }, { "epoch": 0.004546670232682536, "grad_norm": 3.349449396133423, "learning_rate": 1.0089020771513354e-06, "loss": 1.1355, "step": 34 }, { "epoch": 0.004680395827761434, "grad_norm": 3.386521577835083, "learning_rate": 1.0385756676557865e-06, "loss": 1.1712, "step": 35 }, { "epoch": 0.0048141214228403315, "grad_norm": 3.3925702571868896, "learning_rate": 1.0682492581602374e-06, "loss": 1.1696, "step": 36 }, { "epoch": 0.0049478470179192295, "grad_norm": 3.2904958724975586, "learning_rate": 1.0979228486646885e-06, "loss": 1.0973, "step": 37 }, { "epoch": 0.005081572612998127, "grad_norm": 4.166140079498291, "learning_rate": 1.1275964391691395e-06, "loss": 1.0486, "step": 38 }, { "epoch": 0.005215298208077026, "grad_norm": 3.4132537841796875, "learning_rate": 1.1572700296735906e-06, "loss": 1.0377, "step": 39 }, { "epoch": 0.005349023803155924, "grad_norm": 3.469123363494873, "learning_rate": 1.1869436201780417e-06, "loss": 1.1972, "step": 40 }, { "epoch": 0.005482749398234822, "grad_norm": 3.132411003112793, "learning_rate": 1.2166172106824927e-06, "loss": 1.1215, "step": 41 }, { "epoch": 0.00561647499331372, "grad_norm": 3.9650537967681885, "learning_rate": 1.2462908011869438e-06, "loss": 1.1805, "step": 42 }, { "epoch": 0.005750200588392618, "grad_norm": 3.179628610610962, "learning_rate": 1.2759643916913947e-06, "loss": 1.1203, "step": 43 }, { "epoch": 0.005883926183471516, "grad_norm": 3.265901803970337, "learning_rate": 1.3056379821958458e-06, "loss": 1.1642, "step": 44 }, { "epoch": 0.006017651778550415, "grad_norm": 3.1950249671936035, "learning_rate": 1.335311572700297e-06, "loss": 1.1927, "step": 45 }, { "epoch": 0.006151377373629313, "grad_norm": 3.372641086578369, "learning_rate": 1.364985163204748e-06, "loss": 1.1282, "step": 46 }, { "epoch": 0.006285102968708211, "grad_norm": 3.790271043777466, "learning_rate": 1.394658753709199e-06, "loss": 1.1221, "step": 47 }, { "epoch": 0.006418828563787109, "grad_norm": 3.1019206047058105, "learning_rate": 1.42433234421365e-06, "loss": 1.1608, "step": 48 }, { "epoch": 0.006552554158866007, "grad_norm": 3.0705389976501465, "learning_rate": 1.4540059347181009e-06, "loss": 1.2259, "step": 49 }, { "epoch": 0.0066862797539449055, "grad_norm": 3.6250131130218506, "learning_rate": 1.483679525222552e-06, "loss": 1.1472, "step": 50 }, { "epoch": 0.006820005349023803, "grad_norm": 3.295196056365967, "learning_rate": 1.5133531157270031e-06, "loss": 1.0226, "step": 51 }, { "epoch": 0.006953730944102701, "grad_norm": 3.25294828414917, "learning_rate": 1.543026706231454e-06, "loss": 1.1366, "step": 52 }, { "epoch": 0.007087456539181599, "grad_norm": 4.034140586853027, "learning_rate": 1.572700296735905e-06, "loss": 1.0519, "step": 53 }, { "epoch": 0.007221182134260497, "grad_norm": 3.0847480297088623, "learning_rate": 1.6023738872403563e-06, "loss": 1.111, "step": 54 }, { "epoch": 0.007354907729339395, "grad_norm": 3.3600568771362305, "learning_rate": 1.6320474777448073e-06, "loss": 1.0352, "step": 55 }, { "epoch": 0.007488633324418294, "grad_norm": 3.2436251640319824, "learning_rate": 1.6617210682492582e-06, "loss": 1.0888, "step": 56 }, { "epoch": 0.007622358919497192, "grad_norm": 3.2049760818481445, "learning_rate": 1.6913946587537095e-06, "loss": 0.9075, "step": 57 }, { "epoch": 0.00775608451457609, "grad_norm": 3.4373586177825928, "learning_rate": 1.7210682492581604e-06, "loss": 1.1023, "step": 58 }, { "epoch": 0.007889810109654989, "grad_norm": 2.9051406383514404, "learning_rate": 1.7507418397626114e-06, "loss": 1.05, "step": 59 }, { "epoch": 0.008023535704733886, "grad_norm": 3.336681604385376, "learning_rate": 1.7804154302670625e-06, "loss": 1.1113, "step": 60 }, { "epoch": 0.008157261299812785, "grad_norm": 3.5226316452026367, "learning_rate": 1.8100890207715136e-06, "loss": 1.1932, "step": 61 }, { "epoch": 0.008290986894891682, "grad_norm": 3.498497247695923, "learning_rate": 1.8397626112759646e-06, "loss": 0.93, "step": 62 }, { "epoch": 0.00842471248997058, "grad_norm": 3.246614694595337, "learning_rate": 1.8694362017804155e-06, "loss": 1.0801, "step": 63 }, { "epoch": 0.008558438085049478, "grad_norm": 3.0907139778137207, "learning_rate": 1.8991097922848666e-06, "loss": 1.046, "step": 64 }, { "epoch": 0.008692163680128377, "grad_norm": 3.011974811553955, "learning_rate": 1.9287833827893175e-06, "loss": 0.966, "step": 65 }, { "epoch": 0.008825889275207275, "grad_norm": 3.368985176086426, "learning_rate": 1.9584569732937684e-06, "loss": 1.1601, "step": 66 }, { "epoch": 0.008959614870286172, "grad_norm": 3.3994250297546387, "learning_rate": 1.98813056379822e-06, "loss": 0.979, "step": 67 }, { "epoch": 0.009093340465365071, "grad_norm": 3.004976272583008, "learning_rate": 2.0178041543026707e-06, "loss": 1.0298, "step": 68 }, { "epoch": 0.009227066060443968, "grad_norm": 3.456462860107422, "learning_rate": 2.0474777448071216e-06, "loss": 1.0068, "step": 69 }, { "epoch": 0.009360791655522867, "grad_norm": 3.3445851802825928, "learning_rate": 2.077151335311573e-06, "loss": 1.1225, "step": 70 }, { "epoch": 0.009494517250601766, "grad_norm": 3.390965461730957, "learning_rate": 2.106824925816024e-06, "loss": 1.0308, "step": 71 }, { "epoch": 0.009628242845680663, "grad_norm": 2.8472771644592285, "learning_rate": 2.136498516320475e-06, "loss": 1.0652, "step": 72 }, { "epoch": 0.009761968440759562, "grad_norm": 3.320106029510498, "learning_rate": 2.166172106824926e-06, "loss": 0.9226, "step": 73 }, { "epoch": 0.009895694035838459, "grad_norm": 3.0391008853912354, "learning_rate": 2.195845697329377e-06, "loss": 1.0265, "step": 74 }, { "epoch": 0.010029419630917358, "grad_norm": 3.1674184799194336, "learning_rate": 2.225519287833828e-06, "loss": 1.0753, "step": 75 }, { "epoch": 0.010163145225996255, "grad_norm": 3.367748737335205, "learning_rate": 2.255192878338279e-06, "loss": 1.1706, "step": 76 }, { "epoch": 0.010296870821075154, "grad_norm": 3.194279432296753, "learning_rate": 2.2848664688427303e-06, "loss": 1.0591, "step": 77 }, { "epoch": 0.010430596416154053, "grad_norm": 3.1541213989257812, "learning_rate": 2.314540059347181e-06, "loss": 1.0715, "step": 78 }, { "epoch": 0.01056432201123295, "grad_norm": 3.121962308883667, "learning_rate": 2.344213649851632e-06, "loss": 1.0299, "step": 79 }, { "epoch": 0.010698047606311848, "grad_norm": 2.9535934925079346, "learning_rate": 2.3738872403560835e-06, "loss": 1.1894, "step": 80 }, { "epoch": 0.010831773201390745, "grad_norm": 3.1300742626190186, "learning_rate": 2.4035608308605344e-06, "loss": 1.0436, "step": 81 }, { "epoch": 0.010965498796469644, "grad_norm": 2.9919726848602295, "learning_rate": 2.4332344213649853e-06, "loss": 1.0633, "step": 82 }, { "epoch": 0.011099224391548543, "grad_norm": 2.8531136512756348, "learning_rate": 2.4629080118694367e-06, "loss": 1.1246, "step": 83 }, { "epoch": 0.01123294998662744, "grad_norm": 3.0183017253875732, "learning_rate": 2.4925816023738876e-06, "loss": 1.0557, "step": 84 }, { "epoch": 0.011366675581706339, "grad_norm": 3.064335823059082, "learning_rate": 2.5222551928783385e-06, "loss": 1.1713, "step": 85 }, { "epoch": 0.011500401176785236, "grad_norm": 3.0422163009643555, "learning_rate": 2.5519287833827894e-06, "loss": 1.2682, "step": 86 }, { "epoch": 0.011634126771864135, "grad_norm": 3.1033763885498047, "learning_rate": 2.5816023738872403e-06, "loss": 0.9758, "step": 87 }, { "epoch": 0.011767852366943032, "grad_norm": 3.202234983444214, "learning_rate": 2.6112759643916917e-06, "loss": 1.0183, "step": 88 }, { "epoch": 0.01190157796202193, "grad_norm": 2.9911811351776123, "learning_rate": 2.6409495548961426e-06, "loss": 1.0509, "step": 89 }, { "epoch": 0.01203530355710083, "grad_norm": 3.1936240196228027, "learning_rate": 2.670623145400594e-06, "loss": 1.0976, "step": 90 }, { "epoch": 0.012169029152179727, "grad_norm": 3.0125584602355957, "learning_rate": 2.700296735905045e-06, "loss": 1.0595, "step": 91 }, { "epoch": 0.012302754747258626, "grad_norm": 2.8042609691619873, "learning_rate": 2.729970326409496e-06, "loss": 1.0585, "step": 92 }, { "epoch": 0.012436480342337523, "grad_norm": 2.827275514602661, "learning_rate": 2.7596439169139467e-06, "loss": 1.0734, "step": 93 }, { "epoch": 0.012570205937416421, "grad_norm": 3.0967447757720947, "learning_rate": 2.789317507418398e-06, "loss": 0.9853, "step": 94 }, { "epoch": 0.01270393153249532, "grad_norm": 3.454965114593506, "learning_rate": 2.818991097922849e-06, "loss": 1.231, "step": 95 }, { "epoch": 0.012837657127574217, "grad_norm": 2.7370834350585938, "learning_rate": 2.8486646884273e-06, "loss": 0.9767, "step": 96 }, { "epoch": 0.012971382722653116, "grad_norm": 2.853695869445801, "learning_rate": 2.878338278931751e-06, "loss": 1.0175, "step": 97 }, { "epoch": 0.013105108317732013, "grad_norm": 3.0259323120117188, "learning_rate": 2.9080118694362018e-06, "loss": 1.0749, "step": 98 }, { "epoch": 0.013238833912810912, "grad_norm": 3.0562210083007812, "learning_rate": 2.937685459940653e-06, "loss": 1.0606, "step": 99 }, { "epoch": 0.013372559507889811, "grad_norm": 3.0582709312438965, "learning_rate": 2.967359050445104e-06, "loss": 1.0584, "step": 100 }, { "epoch": 0.013506285102968708, "grad_norm": 3.053069591522217, "learning_rate": 2.9970326409495554e-06, "loss": 1.1094, "step": 101 }, { "epoch": 0.013640010698047607, "grad_norm": 3.1664628982543945, "learning_rate": 3.0267062314540063e-06, "loss": 1.1372, "step": 102 }, { "epoch": 0.013773736293126504, "grad_norm": 2.9064559936523438, "learning_rate": 3.056379821958457e-06, "loss": 1.1974, "step": 103 }, { "epoch": 0.013907461888205403, "grad_norm": 2.9556403160095215, "learning_rate": 3.086053412462908e-06, "loss": 0.9398, "step": 104 }, { "epoch": 0.0140411874832843, "grad_norm": 2.860813617706299, "learning_rate": 3.115727002967359e-06, "loss": 1.0348, "step": 105 }, { "epoch": 0.014174913078363199, "grad_norm": 3.4265801906585693, "learning_rate": 3.14540059347181e-06, "loss": 1.0478, "step": 106 }, { "epoch": 0.014308638673442097, "grad_norm": 3.0446763038635254, "learning_rate": 3.1750741839762617e-06, "loss": 1.1204, "step": 107 }, { "epoch": 0.014442364268520995, "grad_norm": 2.9353718757629395, "learning_rate": 3.2047477744807127e-06, "loss": 1.0519, "step": 108 }, { "epoch": 0.014576089863599893, "grad_norm": 2.9855213165283203, "learning_rate": 3.2344213649851636e-06, "loss": 1.1898, "step": 109 }, { "epoch": 0.01470981545867879, "grad_norm": 2.753192186355591, "learning_rate": 3.2640949554896145e-06, "loss": 1.0611, "step": 110 }, { "epoch": 0.01484354105375769, "grad_norm": 3.6730103492736816, "learning_rate": 3.2937685459940654e-06, "loss": 1.25, "step": 111 }, { "epoch": 0.014977266648836588, "grad_norm": 2.8640997409820557, "learning_rate": 3.3234421364985163e-06, "loss": 1.0562, "step": 112 }, { "epoch": 0.015110992243915485, "grad_norm": 2.8067142963409424, "learning_rate": 3.3531157270029673e-06, "loss": 1.0079, "step": 113 }, { "epoch": 0.015244717838994384, "grad_norm": 2.7243802547454834, "learning_rate": 3.382789317507419e-06, "loss": 1.0879, "step": 114 }, { "epoch": 0.015378443434073281, "grad_norm": 2.5139846801757812, "learning_rate": 3.41246290801187e-06, "loss": 1.0199, "step": 115 }, { "epoch": 0.01551216902915218, "grad_norm": 2.7910163402557373, "learning_rate": 3.442136498516321e-06, "loss": 1.0227, "step": 116 }, { "epoch": 0.015645894624231077, "grad_norm": 2.6432650089263916, "learning_rate": 3.471810089020772e-06, "loss": 1.0555, "step": 117 }, { "epoch": 0.015779620219309978, "grad_norm": 2.885413408279419, "learning_rate": 3.5014836795252227e-06, "loss": 1.0901, "step": 118 }, { "epoch": 0.015913345814388875, "grad_norm": 2.700320243835449, "learning_rate": 3.5311572700296736e-06, "loss": 0.9736, "step": 119 }, { "epoch": 0.016047071409467772, "grad_norm": 2.968327283859253, "learning_rate": 3.560830860534125e-06, "loss": 1.0017, "step": 120 }, { "epoch": 0.01618079700454667, "grad_norm": 2.9698989391326904, "learning_rate": 3.5905044510385763e-06, "loss": 1.02, "step": 121 }, { "epoch": 0.01631452259962557, "grad_norm": 2.7675836086273193, "learning_rate": 3.6201780415430273e-06, "loss": 1.0432, "step": 122 }, { "epoch": 0.016448248194704466, "grad_norm": 3.183265209197998, "learning_rate": 3.649851632047478e-06, "loss": 1.2116, "step": 123 }, { "epoch": 0.016581973789783364, "grad_norm": 2.7153022289276123, "learning_rate": 3.679525222551929e-06, "loss": 1.0938, "step": 124 }, { "epoch": 0.016715699384862264, "grad_norm": 2.718348264694214, "learning_rate": 3.70919881305638e-06, "loss": 0.9795, "step": 125 }, { "epoch": 0.01684942497994116, "grad_norm": 2.8047678470611572, "learning_rate": 3.738872403560831e-06, "loss": 1.1341, "step": 126 }, { "epoch": 0.01698315057502006, "grad_norm": 3.0613105297088623, "learning_rate": 3.7685459940652823e-06, "loss": 1.038, "step": 127 }, { "epoch": 0.017116876170098955, "grad_norm": 2.5319697856903076, "learning_rate": 3.7982195845697332e-06, "loss": 0.8916, "step": 128 }, { "epoch": 0.017250601765177856, "grad_norm": 2.836512565612793, "learning_rate": 3.8278931750741846e-06, "loss": 1.1994, "step": 129 }, { "epoch": 0.017384327360256753, "grad_norm": 2.65423846244812, "learning_rate": 3.857566765578635e-06, "loss": 0.9341, "step": 130 }, { "epoch": 0.01751805295533565, "grad_norm": 2.5542070865631104, "learning_rate": 3.887240356083086e-06, "loss": 1.0036, "step": 131 }, { "epoch": 0.01765177855041455, "grad_norm": 2.9286203384399414, "learning_rate": 3.916913946587537e-06, "loss": 1.0573, "step": 132 }, { "epoch": 0.017785504145493448, "grad_norm": 2.5774710178375244, "learning_rate": 3.946587537091989e-06, "loss": 1.0594, "step": 133 }, { "epoch": 0.017919229740572345, "grad_norm": 2.819199562072754, "learning_rate": 3.97626112759644e-06, "loss": 1.0838, "step": 134 }, { "epoch": 0.018052955335651242, "grad_norm": 2.887545108795166, "learning_rate": 4.005934718100891e-06, "loss": 0.9737, "step": 135 }, { "epoch": 0.018186680930730142, "grad_norm": 2.6029891967773438, "learning_rate": 4.0356083086053414e-06, "loss": 0.9512, "step": 136 }, { "epoch": 0.01832040652580904, "grad_norm": 2.665370464324951, "learning_rate": 4.065281899109793e-06, "loss": 1.0299, "step": 137 }, { "epoch": 0.018454132120887937, "grad_norm": 2.8395845890045166, "learning_rate": 4.094955489614243e-06, "loss": 1.0842, "step": 138 }, { "epoch": 0.018587857715966837, "grad_norm": 2.8957955837249756, "learning_rate": 4.124629080118695e-06, "loss": 1.0248, "step": 139 }, { "epoch": 0.018721583311045734, "grad_norm": 2.6816883087158203, "learning_rate": 4.154302670623146e-06, "loss": 1.0519, "step": 140 }, { "epoch": 0.01885530890612463, "grad_norm": 2.8751018047332764, "learning_rate": 4.183976261127597e-06, "loss": 1.0726, "step": 141 }, { "epoch": 0.018989034501203532, "grad_norm": 2.589390516281128, "learning_rate": 4.213649851632048e-06, "loss": 0.9836, "step": 142 }, { "epoch": 0.01912276009628243, "grad_norm": 2.5117685794830322, "learning_rate": 4.243323442136499e-06, "loss": 1.0749, "step": 143 }, { "epoch": 0.019256485691361326, "grad_norm": 3.071716547012329, "learning_rate": 4.27299703264095e-06, "loss": 0.9973, "step": 144 }, { "epoch": 0.019390211286440223, "grad_norm": 2.778595209121704, "learning_rate": 4.302670623145401e-06, "loss": 1.0078, "step": 145 }, { "epoch": 0.019523936881519124, "grad_norm": 2.641528367996216, "learning_rate": 4.332344213649852e-06, "loss": 0.9591, "step": 146 }, { "epoch": 0.01965766247659802, "grad_norm": 2.8284873962402344, "learning_rate": 4.362017804154303e-06, "loss": 1.0953, "step": 147 }, { "epoch": 0.019791388071676918, "grad_norm": 2.419851303100586, "learning_rate": 4.391691394658754e-06, "loss": 1.0144, "step": 148 }, { "epoch": 0.01992511366675582, "grad_norm": 2.3353230953216553, "learning_rate": 4.4213649851632055e-06, "loss": 0.9886, "step": 149 }, { "epoch": 0.020058839261834716, "grad_norm": 2.36822509765625, "learning_rate": 4.451038575667656e-06, "loss": 1.002, "step": 150 }, { "epoch": 0.020192564856913613, "grad_norm": 2.6083362102508545, "learning_rate": 4.480712166172107e-06, "loss": 1.0712, "step": 151 }, { "epoch": 0.02032629045199251, "grad_norm": 2.6626477241516113, "learning_rate": 4.510385756676558e-06, "loss": 1.028, "step": 152 }, { "epoch": 0.02046001604707141, "grad_norm": 2.5567781925201416, "learning_rate": 4.540059347181009e-06, "loss": 1.0123, "step": 153 }, { "epoch": 0.020593741642150307, "grad_norm": 2.7441117763519287, "learning_rate": 4.5697329376854606e-06, "loss": 1.0435, "step": 154 }, { "epoch": 0.020727467237229204, "grad_norm": 2.5715830326080322, "learning_rate": 4.599406528189911e-06, "loss": 1.0412, "step": 155 }, { "epoch": 0.020861192832308105, "grad_norm": 2.8103010654449463, "learning_rate": 4.629080118694362e-06, "loss": 1.0561, "step": 156 }, { "epoch": 0.020994918427387002, "grad_norm": 2.6659176349639893, "learning_rate": 4.658753709198813e-06, "loss": 1.0716, "step": 157 }, { "epoch": 0.0211286440224659, "grad_norm": 2.795372247695923, "learning_rate": 4.688427299703264e-06, "loss": 0.9751, "step": 158 }, { "epoch": 0.0212623696175448, "grad_norm": 2.645467758178711, "learning_rate": 4.718100890207716e-06, "loss": 1.0348, "step": 159 }, { "epoch": 0.021396095212623697, "grad_norm": 2.497130870819092, "learning_rate": 4.747774480712167e-06, "loss": 0.9633, "step": 160 }, { "epoch": 0.021529820807702594, "grad_norm": 2.539609432220459, "learning_rate": 4.7774480712166174e-06, "loss": 1.07, "step": 161 }, { "epoch": 0.02166354640278149, "grad_norm": 2.526731252670288, "learning_rate": 4.807121661721069e-06, "loss": 1.1071, "step": 162 }, { "epoch": 0.02179727199786039, "grad_norm": 2.5451855659484863, "learning_rate": 4.836795252225519e-06, "loss": 1.1635, "step": 163 }, { "epoch": 0.02193099759293929, "grad_norm": 2.4113969802856445, "learning_rate": 4.866468842729971e-06, "loss": 1.1039, "step": 164 }, { "epoch": 0.022064723188018186, "grad_norm": 2.5407888889312744, "learning_rate": 4.896142433234421e-06, "loss": 1.018, "step": 165 }, { "epoch": 0.022198448783097086, "grad_norm": 2.674030065536499, "learning_rate": 4.925816023738873e-06, "loss": 1.053, "step": 166 }, { "epoch": 0.022332174378175983, "grad_norm": 2.4454448223114014, "learning_rate": 4.955489614243324e-06, "loss": 1.0399, "step": 167 }, { "epoch": 0.02246589997325488, "grad_norm": 2.7065093517303467, "learning_rate": 4.985163204747775e-06, "loss": 1.1457, "step": 168 }, { "epoch": 0.022599625568333778, "grad_norm": 2.1272506713867188, "learning_rate": 5.014836795252226e-06, "loss": 0.9201, "step": 169 }, { "epoch": 0.022733351163412678, "grad_norm": 2.385131597518921, "learning_rate": 5.044510385756677e-06, "loss": 0.9461, "step": 170 }, { "epoch": 0.022867076758491575, "grad_norm": 2.516397476196289, "learning_rate": 5.0741839762611275e-06, "loss": 0.9965, "step": 171 }, { "epoch": 0.023000802353570472, "grad_norm": 2.3747527599334717, "learning_rate": 5.103857566765579e-06, "loss": 1.1509, "step": 172 }, { "epoch": 0.023134527948649373, "grad_norm": 2.4126148223876953, "learning_rate": 5.133531157270029e-06, "loss": 0.9182, "step": 173 }, { "epoch": 0.02326825354372827, "grad_norm": 2.4889256954193115, "learning_rate": 5.163204747774481e-06, "loss": 1.2252, "step": 174 }, { "epoch": 0.023401979138807167, "grad_norm": 2.55551815032959, "learning_rate": 5.192878338278933e-06, "loss": 1.0766, "step": 175 }, { "epoch": 0.023535704733886064, "grad_norm": 2.6064417362213135, "learning_rate": 5.222551928783383e-06, "loss": 1.0438, "step": 176 }, { "epoch": 0.023669430328964965, "grad_norm": 2.5930440425872803, "learning_rate": 5.252225519287835e-06, "loss": 1.1387, "step": 177 }, { "epoch": 0.02380315592404386, "grad_norm": 2.2208077907562256, "learning_rate": 5.281899109792285e-06, "loss": 0.9841, "step": 178 }, { "epoch": 0.02393688151912276, "grad_norm": 2.4395084381103516, "learning_rate": 5.3115727002967366e-06, "loss": 1.049, "step": 179 }, { "epoch": 0.02407060711420166, "grad_norm": 2.4199535846710205, "learning_rate": 5.341246290801188e-06, "loss": 0.9561, "step": 180 }, { "epoch": 0.024204332709280556, "grad_norm": 2.315253734588623, "learning_rate": 5.370919881305638e-06, "loss": 1.0869, "step": 181 }, { "epoch": 0.024338058304359454, "grad_norm": 2.877007007598877, "learning_rate": 5.40059347181009e-06, "loss": 1.0394, "step": 182 }, { "epoch": 0.024471783899438354, "grad_norm": 2.3678910732269287, "learning_rate": 5.43026706231454e-06, "loss": 1.03, "step": 183 }, { "epoch": 0.02460550949451725, "grad_norm": 2.417448043823242, "learning_rate": 5.459940652818992e-06, "loss": 1.0384, "step": 184 }, { "epoch": 0.024739235089596148, "grad_norm": 2.5037484169006348, "learning_rate": 5.489614243323442e-06, "loss": 0.9478, "step": 185 }, { "epoch": 0.024872960684675045, "grad_norm": 2.1415839195251465, "learning_rate": 5.5192878338278934e-06, "loss": 0.9144, "step": 186 }, { "epoch": 0.025006686279753946, "grad_norm": 2.7575345039367676, "learning_rate": 5.548961424332344e-06, "loss": 1.0323, "step": 187 }, { "epoch": 0.025140411874832843, "grad_norm": 2.61792254447937, "learning_rate": 5.578635014836796e-06, "loss": 1.22, "step": 188 }, { "epoch": 0.02527413746991174, "grad_norm": 2.489809513092041, "learning_rate": 5.6083086053412475e-06, "loss": 1.0158, "step": 189 }, { "epoch": 0.02540786306499064, "grad_norm": 2.5097711086273193, "learning_rate": 5.637982195845698e-06, "loss": 1.1184, "step": 190 }, { "epoch": 0.025541588660069538, "grad_norm": 2.3829033374786377, "learning_rate": 5.667655786350149e-06, "loss": 1.0209, "step": 191 }, { "epoch": 0.025675314255148435, "grad_norm": 2.4585251808166504, "learning_rate": 5.6973293768546e-06, "loss": 0.9991, "step": 192 }, { "epoch": 0.025809039850227332, "grad_norm": 2.514333724975586, "learning_rate": 5.727002967359051e-06, "loss": 1.056, "step": 193 }, { "epoch": 0.025942765445306232, "grad_norm": 2.4111649990081787, "learning_rate": 5.756676557863502e-06, "loss": 1.0811, "step": 194 }, { "epoch": 0.02607649104038513, "grad_norm": 2.2878808975219727, "learning_rate": 5.786350148367953e-06, "loss": 0.9202, "step": 195 }, { "epoch": 0.026210216635464027, "grad_norm": 2.39331316947937, "learning_rate": 5.8160237388724035e-06, "loss": 0.9813, "step": 196 }, { "epoch": 0.026343942230542927, "grad_norm": 2.3745856285095215, "learning_rate": 5.845697329376855e-06, "loss": 1.1793, "step": 197 }, { "epoch": 0.026477667825621824, "grad_norm": 2.3182179927825928, "learning_rate": 5.875370919881306e-06, "loss": 1.138, "step": 198 }, { "epoch": 0.02661139342070072, "grad_norm": 2.509194850921631, "learning_rate": 5.905044510385757e-06, "loss": 1.0151, "step": 199 }, { "epoch": 0.026745119015779622, "grad_norm": 2.4581921100616455, "learning_rate": 5.934718100890208e-06, "loss": 0.9608, "step": 200 }, { "epoch": 0.02687884461085852, "grad_norm": 2.364325523376465, "learning_rate": 5.964391691394659e-06, "loss": 1.0874, "step": 201 }, { "epoch": 0.027012570205937416, "grad_norm": 2.4899895191192627, "learning_rate": 5.994065281899111e-06, "loss": 0.969, "step": 202 }, { "epoch": 0.027146295801016313, "grad_norm": 2.5167624950408936, "learning_rate": 6.023738872403562e-06, "loss": 1.0383, "step": 203 }, { "epoch": 0.027280021396095214, "grad_norm": 2.2799112796783447, "learning_rate": 6.0534124629080126e-06, "loss": 1.0219, "step": 204 }, { "epoch": 0.02741374699117411, "grad_norm": 2.4147493839263916, "learning_rate": 6.083086053412464e-06, "loss": 1.1141, "step": 205 }, { "epoch": 0.027547472586253008, "grad_norm": 2.7571961879730225, "learning_rate": 6.112759643916914e-06, "loss": 1.048, "step": 206 }, { "epoch": 0.02768119818133191, "grad_norm": 2.746429920196533, "learning_rate": 6.142433234421366e-06, "loss": 1.0362, "step": 207 }, { "epoch": 0.027814923776410806, "grad_norm": 2.5809566974639893, "learning_rate": 6.172106824925816e-06, "loss": 1.1742, "step": 208 }, { "epoch": 0.027948649371489703, "grad_norm": 2.5375313758850098, "learning_rate": 6.201780415430268e-06, "loss": 0.9105, "step": 209 }, { "epoch": 0.0280823749665686, "grad_norm": 2.2991561889648438, "learning_rate": 6.231454005934718e-06, "loss": 1.1034, "step": 210 }, { "epoch": 0.0282161005616475, "grad_norm": 2.2301433086395264, "learning_rate": 6.2611275964391694e-06, "loss": 1.1859, "step": 211 }, { "epoch": 0.028349826156726397, "grad_norm": 2.5075368881225586, "learning_rate": 6.29080118694362e-06, "loss": 1.0989, "step": 212 }, { "epoch": 0.028483551751805294, "grad_norm": 2.48530912399292, "learning_rate": 6.320474777448071e-06, "loss": 1.1044, "step": 213 }, { "epoch": 0.028617277346884195, "grad_norm": 2.4393742084503174, "learning_rate": 6.3501483679525235e-06, "loss": 1.0558, "step": 214 }, { "epoch": 0.028751002941963092, "grad_norm": 2.353741407394409, "learning_rate": 6.379821958456974e-06, "loss": 1.0659, "step": 215 }, { "epoch": 0.02888472853704199, "grad_norm": 2.4226839542388916, "learning_rate": 6.409495548961425e-06, "loss": 1.0435, "step": 216 }, { "epoch": 0.029018454132120886, "grad_norm": 2.4108943939208984, "learning_rate": 6.439169139465876e-06, "loss": 1.1524, "step": 217 }, { "epoch": 0.029152179727199787, "grad_norm": 2.1391913890838623, "learning_rate": 6.468842729970327e-06, "loss": 1.0214, "step": 218 }, { "epoch": 0.029285905322278684, "grad_norm": 2.3625972270965576, "learning_rate": 6.4985163204747785e-06, "loss": 1.1061, "step": 219 }, { "epoch": 0.02941963091735758, "grad_norm": 2.3423166275024414, "learning_rate": 6.528189910979229e-06, "loss": 1.0218, "step": 220 }, { "epoch": 0.02955335651243648, "grad_norm": 2.3345789909362793, "learning_rate": 6.55786350148368e-06, "loss": 0.951, "step": 221 }, { "epoch": 0.02968708210751538, "grad_norm": 2.1468496322631836, "learning_rate": 6.587537091988131e-06, "loss": 1.0768, "step": 222 }, { "epoch": 0.029820807702594276, "grad_norm": 2.5111019611358643, "learning_rate": 6.617210682492582e-06, "loss": 1.0467, "step": 223 }, { "epoch": 0.029954533297673176, "grad_norm": 2.2556746006011963, "learning_rate": 6.646884272997033e-06, "loss": 0.9626, "step": 224 }, { "epoch": 0.030088258892752073, "grad_norm": 2.3173787593841553, "learning_rate": 6.676557863501484e-06, "loss": 1.0564, "step": 225 }, { "epoch": 0.03022198448783097, "grad_norm": 2.258831262588501, "learning_rate": 6.7062314540059345e-06, "loss": 1.0219, "step": 226 }, { "epoch": 0.030355710082909867, "grad_norm": 2.3957011699676514, "learning_rate": 6.735905044510387e-06, "loss": 1.1111, "step": 227 }, { "epoch": 0.030489435677988768, "grad_norm": 2.3867766857147217, "learning_rate": 6.765578635014838e-06, "loss": 1.1592, "step": 228 }, { "epoch": 0.030623161273067665, "grad_norm": 2.1736695766448975, "learning_rate": 6.795252225519289e-06, "loss": 1.0399, "step": 229 }, { "epoch": 0.030756886868146562, "grad_norm": 1.9856196641921997, "learning_rate": 6.82492581602374e-06, "loss": 1.0211, "step": 230 }, { "epoch": 0.030890612463225463, "grad_norm": 2.1734778881073, "learning_rate": 6.85459940652819e-06, "loss": 1.0445, "step": 231 }, { "epoch": 0.03102433805830436, "grad_norm": 2.480058193206787, "learning_rate": 6.884272997032642e-06, "loss": 1.1444, "step": 232 }, { "epoch": 0.031158063653383257, "grad_norm": 2.1672844886779785, "learning_rate": 6.913946587537092e-06, "loss": 1.1014, "step": 233 }, { "epoch": 0.031291789248462154, "grad_norm": 2.188930034637451, "learning_rate": 6.943620178041544e-06, "loss": 0.965, "step": 234 }, { "epoch": 0.03142551484354105, "grad_norm": 2.2984232902526855, "learning_rate": 6.973293768545994e-06, "loss": 0.9799, "step": 235 }, { "epoch": 0.031559240438619955, "grad_norm": 2.391805648803711, "learning_rate": 7.0029673590504455e-06, "loss": 1.0379, "step": 236 }, { "epoch": 0.03169296603369885, "grad_norm": 2.4706690311431885, "learning_rate": 7.032640949554897e-06, "loss": 1.0702, "step": 237 }, { "epoch": 0.03182669162877775, "grad_norm": 2.1806297302246094, "learning_rate": 7.062314540059347e-06, "loss": 1.073, "step": 238 }, { "epoch": 0.031960417223856646, "grad_norm": 2.272017002105713, "learning_rate": 7.091988130563799e-06, "loss": 1.161, "step": 239 }, { "epoch": 0.032094142818935543, "grad_norm": 2.0684709548950195, "learning_rate": 7.12166172106825e-06, "loss": 1.1225, "step": 240 }, { "epoch": 0.03222786841401444, "grad_norm": 2.365962028503418, "learning_rate": 7.151335311572701e-06, "loss": 1.1505, "step": 241 }, { "epoch": 0.03236159400909334, "grad_norm": 2.3922576904296875, "learning_rate": 7.181008902077153e-06, "loss": 1.0955, "step": 242 }, { "epoch": 0.03249531960417224, "grad_norm": 2.075997829437256, "learning_rate": 7.210682492581603e-06, "loss": 1.149, "step": 243 }, { "epoch": 0.03262904519925114, "grad_norm": 2.3058278560638428, "learning_rate": 7.2403560830860545e-06, "loss": 1.1282, "step": 244 }, { "epoch": 0.032762770794330036, "grad_norm": 2.3840856552124023, "learning_rate": 7.270029673590505e-06, "loss": 1.0625, "step": 245 }, { "epoch": 0.03289649638940893, "grad_norm": 2.3734896183013916, "learning_rate": 7.299703264094956e-06, "loss": 1.1067, "step": 246 }, { "epoch": 0.03303022198448783, "grad_norm": 2.1043589115142822, "learning_rate": 7.329376854599407e-06, "loss": 1.0802, "step": 247 }, { "epoch": 0.03316394757956673, "grad_norm": 2.259168863296509, "learning_rate": 7.359050445103858e-06, "loss": 1.0337, "step": 248 }, { "epoch": 0.033297673174645624, "grad_norm": 2.263909339904785, "learning_rate": 7.388724035608309e-06, "loss": 1.0034, "step": 249 }, { "epoch": 0.03343139876972453, "grad_norm": 2.2374789714813232, "learning_rate": 7.41839762611276e-06, "loss": 1.0067, "step": 250 }, { "epoch": 0.033565124364803425, "grad_norm": 2.250603437423706, "learning_rate": 7.4480712166172105e-06, "loss": 0.9734, "step": 251 }, { "epoch": 0.03369884995988232, "grad_norm": 2.1564013957977295, "learning_rate": 7.477744807121662e-06, "loss": 1.0802, "step": 252 }, { "epoch": 0.03383257555496122, "grad_norm": 2.18611478805542, "learning_rate": 7.507418397626114e-06, "loss": 0.9864, "step": 253 }, { "epoch": 0.03396630115004012, "grad_norm": 2.1928086280822754, "learning_rate": 7.537091988130565e-06, "loss": 1.0776, "step": 254 }, { "epoch": 0.034100026745119014, "grad_norm": 2.232571840286255, "learning_rate": 7.566765578635016e-06, "loss": 1.0328, "step": 255 }, { "epoch": 0.03423375234019791, "grad_norm": 1.9836505651474, "learning_rate": 7.5964391691394664e-06, "loss": 0.9873, "step": 256 }, { "epoch": 0.034367477935276815, "grad_norm": 2.2321407794952393, "learning_rate": 7.626112759643918e-06, "loss": 0.896, "step": 257 }, { "epoch": 0.03450120353035571, "grad_norm": 2.369633674621582, "learning_rate": 7.655786350148369e-06, "loss": 1.1372, "step": 258 }, { "epoch": 0.03463492912543461, "grad_norm": 2.2932803630828857, "learning_rate": 7.68545994065282e-06, "loss": 1.1733, "step": 259 }, { "epoch": 0.034768654720513506, "grad_norm": 2.1567983627319336, "learning_rate": 7.71513353115727e-06, "loss": 1.0751, "step": 260 }, { "epoch": 0.0349023803155924, "grad_norm": 2.3114936351776123, "learning_rate": 7.744807121661722e-06, "loss": 1.0723, "step": 261 }, { "epoch": 0.0350361059106713, "grad_norm": 2.1507253646850586, "learning_rate": 7.774480712166173e-06, "loss": 1.0334, "step": 262 }, { "epoch": 0.0351698315057502, "grad_norm": 2.288597583770752, "learning_rate": 7.804154302670623e-06, "loss": 1.0296, "step": 263 }, { "epoch": 0.0353035571008291, "grad_norm": 2.122720241546631, "learning_rate": 7.833827893175074e-06, "loss": 1.0489, "step": 264 }, { "epoch": 0.035437282695908, "grad_norm": 2.343425989151001, "learning_rate": 7.863501483679526e-06, "loss": 1.1158, "step": 265 }, { "epoch": 0.035571008290986895, "grad_norm": 2.2356183528900146, "learning_rate": 7.893175074183978e-06, "loss": 1.041, "step": 266 }, { "epoch": 0.03570473388606579, "grad_norm": 2.0943832397460938, "learning_rate": 7.922848664688429e-06, "loss": 0.9385, "step": 267 }, { "epoch": 0.03583845948114469, "grad_norm": 2.2915427684783936, "learning_rate": 7.95252225519288e-06, "loss": 1.0868, "step": 268 }, { "epoch": 0.03597218507622359, "grad_norm": 1.8615281581878662, "learning_rate": 7.98219584569733e-06, "loss": 1.0241, "step": 269 }, { "epoch": 0.036105910671302484, "grad_norm": 2.223588228225708, "learning_rate": 8.011869436201782e-06, "loss": 1.1705, "step": 270 }, { "epoch": 0.03623963626638139, "grad_norm": 2.137033224105835, "learning_rate": 8.041543026706232e-06, "loss": 1.0364, "step": 271 }, { "epoch": 0.036373361861460285, "grad_norm": 2.3878941535949707, "learning_rate": 8.071216617210683e-06, "loss": 1.0049, "step": 272 }, { "epoch": 0.03650708745653918, "grad_norm": 2.1150004863739014, "learning_rate": 8.100890207715133e-06, "loss": 0.9852, "step": 273 }, { "epoch": 0.03664081305161808, "grad_norm": 2.1072487831115723, "learning_rate": 8.130563798219586e-06, "loss": 0.9824, "step": 274 }, { "epoch": 0.036774538646696976, "grad_norm": 2.4331510066986084, "learning_rate": 8.160237388724036e-06, "loss": 1.0798, "step": 275 }, { "epoch": 0.03690826424177587, "grad_norm": 2.257194995880127, "learning_rate": 8.189910979228487e-06, "loss": 0.9388, "step": 276 }, { "epoch": 0.03704198983685478, "grad_norm": 2.241044521331787, "learning_rate": 8.219584569732939e-06, "loss": 1.0501, "step": 277 }, { "epoch": 0.037175715431933674, "grad_norm": 2.255011558532715, "learning_rate": 8.24925816023739e-06, "loss": 1.127, "step": 278 }, { "epoch": 0.03730944102701257, "grad_norm": 2.3377556800842285, "learning_rate": 8.278931750741841e-06, "loss": 1.0611, "step": 279 }, { "epoch": 0.03744316662209147, "grad_norm": 2.2558035850524902, "learning_rate": 8.308605341246292e-06, "loss": 1.0199, "step": 280 }, { "epoch": 0.037576892217170366, "grad_norm": 2.139692544937134, "learning_rate": 8.338278931750742e-06, "loss": 1.0403, "step": 281 }, { "epoch": 0.03771061781224926, "grad_norm": 2.0205817222595215, "learning_rate": 8.367952522255195e-06, "loss": 1.0884, "step": 282 }, { "epoch": 0.03784434340732816, "grad_norm": 1.9978325366973877, "learning_rate": 8.397626112759645e-06, "loss": 1.0452, "step": 283 }, { "epoch": 0.037978069002407064, "grad_norm": 2.1449902057647705, "learning_rate": 8.427299703264096e-06, "loss": 1.0558, "step": 284 }, { "epoch": 0.03811179459748596, "grad_norm": 2.0404136180877686, "learning_rate": 8.456973293768546e-06, "loss": 0.9134, "step": 285 }, { "epoch": 0.03824552019256486, "grad_norm": 2.1561310291290283, "learning_rate": 8.486646884272998e-06, "loss": 1.0414, "step": 286 }, { "epoch": 0.038379245787643755, "grad_norm": 2.523919105529785, "learning_rate": 8.516320474777449e-06, "loss": 1.0492, "step": 287 }, { "epoch": 0.03851297138272265, "grad_norm": 2.0474090576171875, "learning_rate": 8.5459940652819e-06, "loss": 1.138, "step": 288 }, { "epoch": 0.03864669697780155, "grad_norm": 2.138185501098633, "learning_rate": 8.57566765578635e-06, "loss": 1.0607, "step": 289 }, { "epoch": 0.038780422572880446, "grad_norm": 1.9647016525268555, "learning_rate": 8.605341246290802e-06, "loss": 1.0818, "step": 290 }, { "epoch": 0.03891414816795935, "grad_norm": 2.0999245643615723, "learning_rate": 8.635014836795252e-06, "loss": 1.0305, "step": 291 }, { "epoch": 0.03904787376303825, "grad_norm": 2.158047914505005, "learning_rate": 8.664688427299705e-06, "loss": 0.9508, "step": 292 }, { "epoch": 0.039181599358117145, "grad_norm": 2.105544090270996, "learning_rate": 8.694362017804155e-06, "loss": 1.1117, "step": 293 }, { "epoch": 0.03931532495319604, "grad_norm": 2.112946033477783, "learning_rate": 8.724035608308606e-06, "loss": 1.0008, "step": 294 }, { "epoch": 0.03944905054827494, "grad_norm": 1.918140172958374, "learning_rate": 8.753709198813058e-06, "loss": 1.0394, "step": 295 }, { "epoch": 0.039582776143353836, "grad_norm": 2.1342079639434814, "learning_rate": 8.783382789317508e-06, "loss": 1.0523, "step": 296 }, { "epoch": 0.03971650173843273, "grad_norm": 1.9990925788879395, "learning_rate": 8.813056379821959e-06, "loss": 1.0935, "step": 297 }, { "epoch": 0.03985022733351164, "grad_norm": 2.1554577350616455, "learning_rate": 8.842729970326411e-06, "loss": 1.0306, "step": 298 }, { "epoch": 0.039983952928590534, "grad_norm": 2.0101583003997803, "learning_rate": 8.872403560830862e-06, "loss": 0.9633, "step": 299 }, { "epoch": 0.04011767852366943, "grad_norm": 2.2585713863372803, "learning_rate": 8.902077151335312e-06, "loss": 1.0711, "step": 300 }, { "epoch": 0.04025140411874833, "grad_norm": 2.0878374576568604, "learning_rate": 8.931750741839763e-06, "loss": 1.063, "step": 301 }, { "epoch": 0.040385129713827225, "grad_norm": 2.1737592220306396, "learning_rate": 8.961424332344215e-06, "loss": 1.0927, "step": 302 }, { "epoch": 0.04051885530890612, "grad_norm": 2.075831174850464, "learning_rate": 8.991097922848665e-06, "loss": 1.0185, "step": 303 }, { "epoch": 0.04065258090398502, "grad_norm": 2.2921831607818604, "learning_rate": 9.020771513353116e-06, "loss": 1.1527, "step": 304 }, { "epoch": 0.040786306499063923, "grad_norm": 1.9909112453460693, "learning_rate": 9.050445103857568e-06, "loss": 1.0634, "step": 305 }, { "epoch": 0.04092003209414282, "grad_norm": 2.2253475189208984, "learning_rate": 9.080118694362018e-06, "loss": 1.0065, "step": 306 }, { "epoch": 0.04105375768922172, "grad_norm": 1.9585331678390503, "learning_rate": 9.10979228486647e-06, "loss": 0.9817, "step": 307 }, { "epoch": 0.041187483284300615, "grad_norm": 1.9729301929473877, "learning_rate": 9.139465875370921e-06, "loss": 1.0654, "step": 308 }, { "epoch": 0.04132120887937951, "grad_norm": 1.9680832624435425, "learning_rate": 9.169139465875372e-06, "loss": 0.9948, "step": 309 }, { "epoch": 0.04145493447445841, "grad_norm": 2.1761891841888428, "learning_rate": 9.198813056379822e-06, "loss": 1.1241, "step": 310 }, { "epoch": 0.041588660069537306, "grad_norm": 2.004584312438965, "learning_rate": 9.228486646884274e-06, "loss": 1.0741, "step": 311 }, { "epoch": 0.04172238566461621, "grad_norm": 2.07663631439209, "learning_rate": 9.258160237388725e-06, "loss": 1.0239, "step": 312 }, { "epoch": 0.04185611125969511, "grad_norm": 1.9531216621398926, "learning_rate": 9.287833827893175e-06, "loss": 1.0311, "step": 313 }, { "epoch": 0.041989836854774004, "grad_norm": 2.1650898456573486, "learning_rate": 9.317507418397626e-06, "loss": 1.1843, "step": 314 }, { "epoch": 0.0421235624498529, "grad_norm": 1.9372197389602661, "learning_rate": 9.347181008902078e-06, "loss": 1.0604, "step": 315 }, { "epoch": 0.0422572880449318, "grad_norm": 2.1684212684631348, "learning_rate": 9.376854599406528e-06, "loss": 1.0199, "step": 316 }, { "epoch": 0.042391013640010695, "grad_norm": 1.7992501258850098, "learning_rate": 9.406528189910979e-06, "loss": 1.0042, "step": 317 }, { "epoch": 0.0425247392350896, "grad_norm": 2.1617825031280518, "learning_rate": 9.436201780415431e-06, "loss": 1.1622, "step": 318 }, { "epoch": 0.0426584648301685, "grad_norm": 1.8574516773223877, "learning_rate": 9.465875370919882e-06, "loss": 0.9786, "step": 319 }, { "epoch": 0.042792190425247394, "grad_norm": 2.1602120399475098, "learning_rate": 9.495548961424334e-06, "loss": 1.1207, "step": 320 }, { "epoch": 0.04292591602032629, "grad_norm": 2.036407470703125, "learning_rate": 9.525222551928784e-06, "loss": 1.2117, "step": 321 }, { "epoch": 0.04305964161540519, "grad_norm": 2.4887197017669678, "learning_rate": 9.554896142433235e-06, "loss": 1.0696, "step": 322 }, { "epoch": 0.043193367210484085, "grad_norm": 2.084690570831299, "learning_rate": 9.584569732937687e-06, "loss": 1.0508, "step": 323 }, { "epoch": 0.04332709280556298, "grad_norm": 2.001461982727051, "learning_rate": 9.614243323442138e-06, "loss": 1.1141, "step": 324 }, { "epoch": 0.043460818400641886, "grad_norm": 1.9245610237121582, "learning_rate": 9.643916913946588e-06, "loss": 1.0397, "step": 325 }, { "epoch": 0.04359454399572078, "grad_norm": 1.9328910112380981, "learning_rate": 9.673590504451039e-06, "loss": 1.0727, "step": 326 }, { "epoch": 0.04372826959079968, "grad_norm": 2.128110885620117, "learning_rate": 9.70326409495549e-06, "loss": 1.1606, "step": 327 }, { "epoch": 0.04386199518587858, "grad_norm": 2.0188353061676025, "learning_rate": 9.732937685459941e-06, "loss": 1.1571, "step": 328 }, { "epoch": 0.043995720780957474, "grad_norm": 2.025883913040161, "learning_rate": 9.762611275964392e-06, "loss": 1.0686, "step": 329 }, { "epoch": 0.04412944637603637, "grad_norm": 1.994724154472351, "learning_rate": 9.792284866468842e-06, "loss": 1.0488, "step": 330 }, { "epoch": 0.04426317197111527, "grad_norm": 1.8666287660598755, "learning_rate": 9.821958456973294e-06, "loss": 1.0374, "step": 331 }, { "epoch": 0.04439689756619417, "grad_norm": 2.102116584777832, "learning_rate": 9.851632047477747e-06, "loss": 1.0825, "step": 332 }, { "epoch": 0.04453062316127307, "grad_norm": 2.097844362258911, "learning_rate": 9.881305637982197e-06, "loss": 1.0835, "step": 333 }, { "epoch": 0.04466434875635197, "grad_norm": 2.0745432376861572, "learning_rate": 9.910979228486648e-06, "loss": 1.1736, "step": 334 }, { "epoch": 0.044798074351430864, "grad_norm": 1.9465725421905518, "learning_rate": 9.940652818991098e-06, "loss": 0.9932, "step": 335 }, { "epoch": 0.04493179994650976, "grad_norm": 1.9583405256271362, "learning_rate": 9.97032640949555e-06, "loss": 1.0855, "step": 336 }, { "epoch": 0.04506552554158866, "grad_norm": 1.9207028150558472, "learning_rate": 1e-05, "loss": 1.0726, "step": 337 }, { "epoch": 0.045199251136667555, "grad_norm": 2.0334630012512207, "learning_rate": 1.0029673590504451e-05, "loss": 1.1527, "step": 338 }, { "epoch": 0.04533297673174646, "grad_norm": 2.3026490211486816, "learning_rate": 1.0059347181008904e-05, "loss": 1.1648, "step": 339 }, { "epoch": 0.045466702326825356, "grad_norm": 2.1276729106903076, "learning_rate": 1.0089020771513354e-05, "loss": 1.0329, "step": 340 }, { "epoch": 0.04560042792190425, "grad_norm": 1.9764131307601929, "learning_rate": 1.0118694362017805e-05, "loss": 0.971, "step": 341 }, { "epoch": 0.04573415351698315, "grad_norm": 1.8805902004241943, "learning_rate": 1.0148367952522255e-05, "loss": 0.9807, "step": 342 }, { "epoch": 0.04586787911206205, "grad_norm": 1.9787400960922241, "learning_rate": 1.0178041543026707e-05, "loss": 1.0638, "step": 343 }, { "epoch": 0.046001604707140945, "grad_norm": 1.9420288801193237, "learning_rate": 1.0207715133531158e-05, "loss": 1.0202, "step": 344 }, { "epoch": 0.04613533030221984, "grad_norm": 2.027693033218384, "learning_rate": 1.0237388724035608e-05, "loss": 1.0184, "step": 345 }, { "epoch": 0.046269055897298746, "grad_norm": 2.3655102252960205, "learning_rate": 1.0267062314540059e-05, "loss": 1.2002, "step": 346 }, { "epoch": 0.04640278149237764, "grad_norm": 2.07200288772583, "learning_rate": 1.0296735905044511e-05, "loss": 1.0476, "step": 347 }, { "epoch": 0.04653650708745654, "grad_norm": 1.911447525024414, "learning_rate": 1.0326409495548961e-05, "loss": 1.1383, "step": 348 }, { "epoch": 0.04667023268253544, "grad_norm": 1.9544726610183716, "learning_rate": 1.0356083086053412e-05, "loss": 1.076, "step": 349 }, { "epoch": 0.046803958277614334, "grad_norm": 1.9751782417297363, "learning_rate": 1.0385756676557866e-05, "loss": 1.1232, "step": 350 }, { "epoch": 0.04693768387269323, "grad_norm": 2.0050604343414307, "learning_rate": 1.0415430267062316e-05, "loss": 1.1003, "step": 351 }, { "epoch": 0.04707140946777213, "grad_norm": 1.8618202209472656, "learning_rate": 1.0445103857566767e-05, "loss": 1.072, "step": 352 }, { "epoch": 0.04720513506285103, "grad_norm": 2.006801128387451, "learning_rate": 1.0474777448071219e-05, "loss": 1.0502, "step": 353 }, { "epoch": 0.04733886065792993, "grad_norm": 2.0382750034332275, "learning_rate": 1.050445103857567e-05, "loss": 1.0036, "step": 354 }, { "epoch": 0.047472586253008826, "grad_norm": 1.9037744998931885, "learning_rate": 1.053412462908012e-05, "loss": 1.0963, "step": 355 }, { "epoch": 0.04760631184808772, "grad_norm": 2.0088462829589844, "learning_rate": 1.056379821958457e-05, "loss": 1.0292, "step": 356 }, { "epoch": 0.04774003744316662, "grad_norm": 1.9464218616485596, "learning_rate": 1.0593471810089023e-05, "loss": 1.0355, "step": 357 }, { "epoch": 0.04787376303824552, "grad_norm": 2.139714479446411, "learning_rate": 1.0623145400593473e-05, "loss": 1.1815, "step": 358 }, { "epoch": 0.04800748863332442, "grad_norm": 2.0638954639434814, "learning_rate": 1.0652818991097924e-05, "loss": 1.067, "step": 359 }, { "epoch": 0.04814121422840332, "grad_norm": 2.1416609287261963, "learning_rate": 1.0682492581602376e-05, "loss": 1.0271, "step": 360 }, { "epoch": 0.048274939823482216, "grad_norm": 2.108983278274536, "learning_rate": 1.0712166172106826e-05, "loss": 1.1038, "step": 361 }, { "epoch": 0.04840866541856111, "grad_norm": 1.8399920463562012, "learning_rate": 1.0741839762611277e-05, "loss": 0.9519, "step": 362 }, { "epoch": 0.04854239101364001, "grad_norm": 1.9900883436203003, "learning_rate": 1.0771513353115727e-05, "loss": 1.0897, "step": 363 }, { "epoch": 0.04867611660871891, "grad_norm": 2.1006016731262207, "learning_rate": 1.080118694362018e-05, "loss": 1.1365, "step": 364 }, { "epoch": 0.048809842203797804, "grad_norm": 1.9143853187561035, "learning_rate": 1.083086053412463e-05, "loss": 1.0637, "step": 365 }, { "epoch": 0.04894356779887671, "grad_norm": 1.8166767358779907, "learning_rate": 1.086053412462908e-05, "loss": 1.0316, "step": 366 }, { "epoch": 0.049077293393955605, "grad_norm": 1.8061285018920898, "learning_rate": 1.0890207715133531e-05, "loss": 1.0147, "step": 367 }, { "epoch": 0.0492110189890345, "grad_norm": 2.083958625793457, "learning_rate": 1.0919881305637983e-05, "loss": 1.0835, "step": 368 }, { "epoch": 0.0493447445841134, "grad_norm": 2.1298437118530273, "learning_rate": 1.0949554896142434e-05, "loss": 1.1745, "step": 369 }, { "epoch": 0.049478470179192297, "grad_norm": 1.948065996170044, "learning_rate": 1.0979228486646884e-05, "loss": 1.1759, "step": 370 }, { "epoch": 0.049612195774271194, "grad_norm": 2.003896951675415, "learning_rate": 1.1008902077151335e-05, "loss": 0.9757, "step": 371 }, { "epoch": 0.04974592136935009, "grad_norm": 1.7542465925216675, "learning_rate": 1.1038575667655787e-05, "loss": 1.1357, "step": 372 }, { "epoch": 0.049879646964428995, "grad_norm": 1.9024899005889893, "learning_rate": 1.1068249258160237e-05, "loss": 1.1814, "step": 373 }, { "epoch": 0.05001337255950789, "grad_norm": 2.0373587608337402, "learning_rate": 1.1097922848664688e-05, "loss": 1.1393, "step": 374 }, { "epoch": 0.05014709815458679, "grad_norm": 2.022723913192749, "learning_rate": 1.112759643916914e-05, "loss": 1.1965, "step": 375 }, { "epoch": 0.050280823749665686, "grad_norm": 1.9113446474075317, "learning_rate": 1.1157270029673592e-05, "loss": 1.0913, "step": 376 }, { "epoch": 0.05041454934474458, "grad_norm": 1.7692008018493652, "learning_rate": 1.1186943620178043e-05, "loss": 0.9946, "step": 377 }, { "epoch": 0.05054827493982348, "grad_norm": 1.7674192190170288, "learning_rate": 1.1216617210682495e-05, "loss": 0.9828, "step": 378 }, { "epoch": 0.05068200053490238, "grad_norm": 1.8106404542922974, "learning_rate": 1.1246290801186945e-05, "loss": 1.0222, "step": 379 }, { "epoch": 0.05081572612998128, "grad_norm": 2.112492561340332, "learning_rate": 1.1275964391691396e-05, "loss": 1.2431, "step": 380 }, { "epoch": 0.05094945172506018, "grad_norm": 1.894589900970459, "learning_rate": 1.1305637982195846e-05, "loss": 1.1478, "step": 381 }, { "epoch": 0.051083177320139075, "grad_norm": 1.9375091791152954, "learning_rate": 1.1335311572700299e-05, "loss": 1.1071, "step": 382 }, { "epoch": 0.05121690291521797, "grad_norm": 1.700008749961853, "learning_rate": 1.1364985163204749e-05, "loss": 1.0168, "step": 383 }, { "epoch": 0.05135062851029687, "grad_norm": 1.5963480472564697, "learning_rate": 1.13946587537092e-05, "loss": 1.0312, "step": 384 }, { "epoch": 0.05148435410537577, "grad_norm": 2.1378207206726074, "learning_rate": 1.1424332344213652e-05, "loss": 1.1316, "step": 385 }, { "epoch": 0.051618079700454664, "grad_norm": 2.056802749633789, "learning_rate": 1.1454005934718102e-05, "loss": 1.141, "step": 386 }, { "epoch": 0.05175180529553357, "grad_norm": 1.8602255582809448, "learning_rate": 1.1483679525222553e-05, "loss": 1.0131, "step": 387 }, { "epoch": 0.051885530890612465, "grad_norm": 1.9550607204437256, "learning_rate": 1.1513353115727003e-05, "loss": 1.08, "step": 388 }, { "epoch": 0.05201925648569136, "grad_norm": 1.8199832439422607, "learning_rate": 1.1543026706231456e-05, "loss": 1.0667, "step": 389 }, { "epoch": 0.05215298208077026, "grad_norm": 2.0087828636169434, "learning_rate": 1.1572700296735906e-05, "loss": 1.1358, "step": 390 }, { "epoch": 0.052286707675849156, "grad_norm": 2.1731982231140137, "learning_rate": 1.1602373887240357e-05, "loss": 1.0322, "step": 391 }, { "epoch": 0.05242043327092805, "grad_norm": 2.1105966567993164, "learning_rate": 1.1632047477744807e-05, "loss": 1.1673, "step": 392 }, { "epoch": 0.05255415886600695, "grad_norm": 2.0049850940704346, "learning_rate": 1.166172106824926e-05, "loss": 0.9899, "step": 393 }, { "epoch": 0.052687884461085854, "grad_norm": 1.8965520858764648, "learning_rate": 1.169139465875371e-05, "loss": 1.0414, "step": 394 }, { "epoch": 0.05282161005616475, "grad_norm": 1.9036133289337158, "learning_rate": 1.172106824925816e-05, "loss": 0.9957, "step": 395 }, { "epoch": 0.05295533565124365, "grad_norm": 2.0364062786102295, "learning_rate": 1.1750741839762612e-05, "loss": 1.1938, "step": 396 }, { "epoch": 0.053089061246322546, "grad_norm": 1.9242273569107056, "learning_rate": 1.1780415430267063e-05, "loss": 1.0591, "step": 397 }, { "epoch": 0.05322278684140144, "grad_norm": 1.8653147220611572, "learning_rate": 1.1810089020771513e-05, "loss": 1.0832, "step": 398 }, { "epoch": 0.05335651243648034, "grad_norm": 1.8537421226501465, "learning_rate": 1.1839762611275964e-05, "loss": 1.0572, "step": 399 }, { "epoch": 0.053490238031559244, "grad_norm": 1.8426142930984497, "learning_rate": 1.1869436201780416e-05, "loss": 0.9869, "step": 400 }, { "epoch": 0.05362396362663814, "grad_norm": 1.8369946479797363, "learning_rate": 1.1899109792284867e-05, "loss": 1.0127, "step": 401 }, { "epoch": 0.05375768922171704, "grad_norm": 2.159726858139038, "learning_rate": 1.1928783382789319e-05, "loss": 1.1654, "step": 402 }, { "epoch": 0.053891414816795935, "grad_norm": 1.802620530128479, "learning_rate": 1.1958456973293771e-05, "loss": 1.148, "step": 403 }, { "epoch": 0.05402514041187483, "grad_norm": 1.9155060052871704, "learning_rate": 1.1988130563798221e-05, "loss": 1.2595, "step": 404 }, { "epoch": 0.05415886600695373, "grad_norm": 1.933816909790039, "learning_rate": 1.2017804154302672e-05, "loss": 1.0604, "step": 405 }, { "epoch": 0.054292591602032626, "grad_norm": 1.8533731698989868, "learning_rate": 1.2047477744807124e-05, "loss": 1.0313, "step": 406 }, { "epoch": 0.05442631719711153, "grad_norm": 2.145768165588379, "learning_rate": 1.2077151335311575e-05, "loss": 1.0669, "step": 407 }, { "epoch": 0.05456004279219043, "grad_norm": 1.8734737634658813, "learning_rate": 1.2106824925816025e-05, "loss": 1.0893, "step": 408 }, { "epoch": 0.054693768387269324, "grad_norm": 1.908294916152954, "learning_rate": 1.2136498516320476e-05, "loss": 1.1095, "step": 409 }, { "epoch": 0.05482749398234822, "grad_norm": 2.044063091278076, "learning_rate": 1.2166172106824928e-05, "loss": 0.9215, "step": 410 }, { "epoch": 0.05496121957742712, "grad_norm": 1.8715368509292603, "learning_rate": 1.2195845697329378e-05, "loss": 1.1203, "step": 411 }, { "epoch": 0.055094945172506016, "grad_norm": 1.980888843536377, "learning_rate": 1.2225519287833829e-05, "loss": 1.0646, "step": 412 }, { "epoch": 0.05522867076758491, "grad_norm": 2.0843894481658936, "learning_rate": 1.225519287833828e-05, "loss": 1.0394, "step": 413 }, { "epoch": 0.05536239636266382, "grad_norm": 1.9318420886993408, "learning_rate": 1.2284866468842732e-05, "loss": 1.0952, "step": 414 }, { "epoch": 0.055496121957742714, "grad_norm": 1.9481059312820435, "learning_rate": 1.2314540059347182e-05, "loss": 1.0212, "step": 415 }, { "epoch": 0.05562984755282161, "grad_norm": 2.1550583839416504, "learning_rate": 1.2344213649851633e-05, "loss": 1.1486, "step": 416 }, { "epoch": 0.05576357314790051, "grad_norm": 1.8132883310317993, "learning_rate": 1.2373887240356085e-05, "loss": 1.0486, "step": 417 }, { "epoch": 0.055897298742979405, "grad_norm": 2.0425143241882324, "learning_rate": 1.2403560830860535e-05, "loss": 1.083, "step": 418 }, { "epoch": 0.0560310243380583, "grad_norm": 1.7689743041992188, "learning_rate": 1.2433234421364986e-05, "loss": 1.0957, "step": 419 }, { "epoch": 0.0561647499331372, "grad_norm": 1.8951979875564575, "learning_rate": 1.2462908011869436e-05, "loss": 1.0642, "step": 420 }, { "epoch": 0.0562984755282161, "grad_norm": 1.8346600532531738, "learning_rate": 1.2492581602373888e-05, "loss": 1.0299, "step": 421 }, { "epoch": 0.056432201123295, "grad_norm": 2.0847954750061035, "learning_rate": 1.2522255192878339e-05, "loss": 1.1467, "step": 422 }, { "epoch": 0.0565659267183739, "grad_norm": 1.8992348909378052, "learning_rate": 1.255192878338279e-05, "loss": 1.0402, "step": 423 }, { "epoch": 0.056699652313452795, "grad_norm": 2.243069887161255, "learning_rate": 1.258160237388724e-05, "loss": 1.0912, "step": 424 }, { "epoch": 0.05683337790853169, "grad_norm": 1.9771530628204346, "learning_rate": 1.2611275964391692e-05, "loss": 1.0508, "step": 425 }, { "epoch": 0.05696710350361059, "grad_norm": 2.022507667541504, "learning_rate": 1.2640949554896143e-05, "loss": 1.1726, "step": 426 }, { "epoch": 0.057100829098689486, "grad_norm": 2.016322374343872, "learning_rate": 1.2670623145400593e-05, "loss": 1.1948, "step": 427 }, { "epoch": 0.05723455469376839, "grad_norm": 1.767403244972229, "learning_rate": 1.2700296735905047e-05, "loss": 1.13, "step": 428 }, { "epoch": 0.05736828028884729, "grad_norm": 2.187791109085083, "learning_rate": 1.2729970326409497e-05, "loss": 0.934, "step": 429 }, { "epoch": 0.057502005883926184, "grad_norm": 1.9319829940795898, "learning_rate": 1.2759643916913948e-05, "loss": 1.1322, "step": 430 }, { "epoch": 0.05763573147900508, "grad_norm": 2.2257282733917236, "learning_rate": 1.27893175074184e-05, "loss": 1.1496, "step": 431 }, { "epoch": 0.05776945707408398, "grad_norm": 1.8813470602035522, "learning_rate": 1.281899109792285e-05, "loss": 1.1021, "step": 432 }, { "epoch": 0.057903182669162875, "grad_norm": 1.9404021501541138, "learning_rate": 1.2848664688427301e-05, "loss": 1.1544, "step": 433 }, { "epoch": 0.05803690826424177, "grad_norm": 1.8423067331314087, "learning_rate": 1.2878338278931752e-05, "loss": 1.042, "step": 434 }, { "epoch": 0.058170633859320676, "grad_norm": 1.9482635259628296, "learning_rate": 1.2908011869436204e-05, "loss": 1.1394, "step": 435 }, { "epoch": 0.058304359454399574, "grad_norm": 1.925511121749878, "learning_rate": 1.2937685459940654e-05, "loss": 1.1292, "step": 436 }, { "epoch": 0.05843808504947847, "grad_norm": 1.9983603954315186, "learning_rate": 1.2967359050445105e-05, "loss": 1.0285, "step": 437 }, { "epoch": 0.05857181064455737, "grad_norm": 1.7735382318496704, "learning_rate": 1.2997032640949557e-05, "loss": 1.0779, "step": 438 }, { "epoch": 0.058705536239636265, "grad_norm": 1.6410995721817017, "learning_rate": 1.3026706231454008e-05, "loss": 1.0667, "step": 439 }, { "epoch": 0.05883926183471516, "grad_norm": 1.8130930662155151, "learning_rate": 1.3056379821958458e-05, "loss": 1.007, "step": 440 }, { "epoch": 0.058972987429794066, "grad_norm": 2.0643889904022217, "learning_rate": 1.3086053412462909e-05, "loss": 1.0519, "step": 441 }, { "epoch": 0.05910671302487296, "grad_norm": 1.7492270469665527, "learning_rate": 1.311572700296736e-05, "loss": 1.0643, "step": 442 }, { "epoch": 0.05924043861995186, "grad_norm": 1.8628084659576416, "learning_rate": 1.3145400593471811e-05, "loss": 1.0352, "step": 443 }, { "epoch": 0.05937416421503076, "grad_norm": 1.7055039405822754, "learning_rate": 1.3175074183976262e-05, "loss": 1.0474, "step": 444 }, { "epoch": 0.059507889810109654, "grad_norm": 1.7572062015533447, "learning_rate": 1.3204747774480712e-05, "loss": 1.0187, "step": 445 }, { "epoch": 0.05964161540518855, "grad_norm": 1.697801113128662, "learning_rate": 1.3234421364985164e-05, "loss": 1.0867, "step": 446 }, { "epoch": 0.05977534100026745, "grad_norm": 1.7225940227508545, "learning_rate": 1.3264094955489615e-05, "loss": 1.0676, "step": 447 }, { "epoch": 0.05990906659534635, "grad_norm": 1.7278627157211304, "learning_rate": 1.3293768545994065e-05, "loss": 0.9921, "step": 448 }, { "epoch": 0.06004279219042525, "grad_norm": 1.7823641300201416, "learning_rate": 1.3323442136498516e-05, "loss": 1.0143, "step": 449 }, { "epoch": 0.06017651778550415, "grad_norm": 1.6696406602859497, "learning_rate": 1.3353115727002968e-05, "loss": 1.0612, "step": 450 }, { "epoch": 0.060310243380583044, "grad_norm": 1.6459541320800781, "learning_rate": 1.3382789317507419e-05, "loss": 1.1657, "step": 451 }, { "epoch": 0.06044396897566194, "grad_norm": 2.0640554428100586, "learning_rate": 1.3412462908011869e-05, "loss": 1.1484, "step": 452 }, { "epoch": 0.06057769457074084, "grad_norm": 1.789831519126892, "learning_rate": 1.3442136498516321e-05, "loss": 1.0746, "step": 453 }, { "epoch": 0.060711420165819735, "grad_norm": 1.8117239475250244, "learning_rate": 1.3471810089020773e-05, "loss": 1.1049, "step": 454 }, { "epoch": 0.06084514576089864, "grad_norm": 1.7101154327392578, "learning_rate": 1.3501483679525224e-05, "loss": 1.127, "step": 455 }, { "epoch": 0.060978871355977536, "grad_norm": 1.7359715700149536, "learning_rate": 1.3531157270029676e-05, "loss": 1.0046, "step": 456 }, { "epoch": 0.06111259695105643, "grad_norm": 1.6229071617126465, "learning_rate": 1.3560830860534127e-05, "loss": 1.0604, "step": 457 }, { "epoch": 0.06124632254613533, "grad_norm": 1.6400669813156128, "learning_rate": 1.3590504451038577e-05, "loss": 0.965, "step": 458 }, { "epoch": 0.06138004814121423, "grad_norm": 1.9311940670013428, "learning_rate": 1.3620178041543028e-05, "loss": 1.0498, "step": 459 }, { "epoch": 0.061513773736293124, "grad_norm": 1.9464285373687744, "learning_rate": 1.364985163204748e-05, "loss": 1.1636, "step": 460 }, { "epoch": 0.06164749933137202, "grad_norm": 1.7950935363769531, "learning_rate": 1.367952522255193e-05, "loss": 1.0089, "step": 461 }, { "epoch": 0.061781224926450926, "grad_norm": 1.6973742246627808, "learning_rate": 1.370919881305638e-05, "loss": 0.9718, "step": 462 }, { "epoch": 0.06191495052152982, "grad_norm": 1.785804033279419, "learning_rate": 1.3738872403560833e-05, "loss": 1.2508, "step": 463 }, { "epoch": 0.06204867611660872, "grad_norm": 1.7714574337005615, "learning_rate": 1.3768545994065284e-05, "loss": 1.0941, "step": 464 }, { "epoch": 0.06218240171168762, "grad_norm": 1.916955590248108, "learning_rate": 1.3798219584569734e-05, "loss": 1.0871, "step": 465 }, { "epoch": 0.062316127306766514, "grad_norm": 2.0026700496673584, "learning_rate": 1.3827893175074185e-05, "loss": 1.0646, "step": 466 }, { "epoch": 0.06244985290184541, "grad_norm": 1.810957431793213, "learning_rate": 1.3857566765578637e-05, "loss": 0.9906, "step": 467 }, { "epoch": 0.06258357849692431, "grad_norm": 1.6772944927215576, "learning_rate": 1.3887240356083087e-05, "loss": 1.0513, "step": 468 }, { "epoch": 0.06271730409200321, "grad_norm": 1.9211347103118896, "learning_rate": 1.3916913946587538e-05, "loss": 1.1893, "step": 469 }, { "epoch": 0.0628510296870821, "grad_norm": 1.9009735584259033, "learning_rate": 1.3946587537091988e-05, "loss": 1.1878, "step": 470 }, { "epoch": 0.062984755282161, "grad_norm": 1.9403935670852661, "learning_rate": 1.397626112759644e-05, "loss": 1.2746, "step": 471 }, { "epoch": 0.06311848087723991, "grad_norm": 1.7685816287994385, "learning_rate": 1.4005934718100891e-05, "loss": 1.0635, "step": 472 }, { "epoch": 0.0632522064723188, "grad_norm": 1.9982801675796509, "learning_rate": 1.4035608308605341e-05, "loss": 0.9823, "step": 473 }, { "epoch": 0.0633859320673977, "grad_norm": 1.8481606245040894, "learning_rate": 1.4065281899109794e-05, "loss": 1.0665, "step": 474 }, { "epoch": 0.0635196576624766, "grad_norm": 1.8732539415359497, "learning_rate": 1.4094955489614244e-05, "loss": 1.1218, "step": 475 }, { "epoch": 0.0636533832575555, "grad_norm": 1.8248281478881836, "learning_rate": 1.4124629080118695e-05, "loss": 1.0921, "step": 476 }, { "epoch": 0.06378710885263439, "grad_norm": 1.7324649095535278, "learning_rate": 1.4154302670623145e-05, "loss": 1.107, "step": 477 }, { "epoch": 0.06392083444771329, "grad_norm": 1.9146908521652222, "learning_rate": 1.4183976261127597e-05, "loss": 1.123, "step": 478 }, { "epoch": 0.0640545600427922, "grad_norm": 1.6852599382400513, "learning_rate": 1.4213649851632048e-05, "loss": 1.1087, "step": 479 }, { "epoch": 0.06418828563787109, "grad_norm": 1.7641593217849731, "learning_rate": 1.42433234421365e-05, "loss": 1.04, "step": 480 }, { "epoch": 0.06432201123294999, "grad_norm": 1.8022555112838745, "learning_rate": 1.4272997032640952e-05, "loss": 1.1522, "step": 481 }, { "epoch": 0.06445573682802888, "grad_norm": 1.7718092203140259, "learning_rate": 1.4302670623145403e-05, "loss": 1.1176, "step": 482 }, { "epoch": 0.06458946242310779, "grad_norm": 1.91260826587677, "learning_rate": 1.4332344213649853e-05, "loss": 1.1029, "step": 483 }, { "epoch": 0.06472318801818668, "grad_norm": 1.8111521005630493, "learning_rate": 1.4362017804154305e-05, "loss": 1.0738, "step": 484 }, { "epoch": 0.06485691361326558, "grad_norm": 1.8115615844726562, "learning_rate": 1.4391691394658756e-05, "loss": 1.0991, "step": 485 }, { "epoch": 0.06499063920834448, "grad_norm": 1.7675265073776245, "learning_rate": 1.4421364985163206e-05, "loss": 1.0766, "step": 486 }, { "epoch": 0.06512436480342337, "grad_norm": 1.7739450931549072, "learning_rate": 1.4451038575667657e-05, "loss": 1.0198, "step": 487 }, { "epoch": 0.06525809039850228, "grad_norm": 1.7324966192245483, "learning_rate": 1.4480712166172109e-05, "loss": 1.0157, "step": 488 }, { "epoch": 0.06539181599358117, "grad_norm": 1.876874566078186, "learning_rate": 1.451038575667656e-05, "loss": 0.9863, "step": 489 }, { "epoch": 0.06552554158866007, "grad_norm": 1.9182907342910767, "learning_rate": 1.454005934718101e-05, "loss": 1.2746, "step": 490 }, { "epoch": 0.06565926718373896, "grad_norm": 1.8630485534667969, "learning_rate": 1.456973293768546e-05, "loss": 1.2688, "step": 491 }, { "epoch": 0.06579299277881787, "grad_norm": 1.8571923971176147, "learning_rate": 1.4599406528189913e-05, "loss": 1.016, "step": 492 }, { "epoch": 0.06592671837389677, "grad_norm": 1.9601504802703857, "learning_rate": 1.4629080118694363e-05, "loss": 1.0808, "step": 493 }, { "epoch": 0.06606044396897566, "grad_norm": 2.0460402965545654, "learning_rate": 1.4658753709198814e-05, "loss": 1.181, "step": 494 }, { "epoch": 0.06619416956405456, "grad_norm": 1.81797194480896, "learning_rate": 1.4688427299703266e-05, "loss": 1.0934, "step": 495 }, { "epoch": 0.06632789515913345, "grad_norm": 1.9067633152008057, "learning_rate": 1.4718100890207716e-05, "loss": 1.0242, "step": 496 }, { "epoch": 0.06646162075421236, "grad_norm": 1.9588450193405151, "learning_rate": 1.4747774480712167e-05, "loss": 0.9817, "step": 497 }, { "epoch": 0.06659534634929125, "grad_norm": 1.8363555669784546, "learning_rate": 1.4777448071216617e-05, "loss": 1.1149, "step": 498 }, { "epoch": 0.06672907194437015, "grad_norm": 1.9650105237960815, "learning_rate": 1.480712166172107e-05, "loss": 1.0482, "step": 499 }, { "epoch": 0.06686279753944906, "grad_norm": 1.9216324090957642, "learning_rate": 1.483679525222552e-05, "loss": 1.0384, "step": 500 }, { "epoch": 0.06699652313452795, "grad_norm": 1.835261583328247, "learning_rate": 1.486646884272997e-05, "loss": 1.0416, "step": 501 }, { "epoch": 0.06713024872960685, "grad_norm": 1.9500707387924194, "learning_rate": 1.4896142433234421e-05, "loss": 1.1085, "step": 502 }, { "epoch": 0.06726397432468574, "grad_norm": 1.7828469276428223, "learning_rate": 1.4925816023738873e-05, "loss": 1.1649, "step": 503 }, { "epoch": 0.06739769991976464, "grad_norm": 1.9030284881591797, "learning_rate": 1.4955489614243324e-05, "loss": 1.1232, "step": 504 }, { "epoch": 0.06753142551484353, "grad_norm": 1.6937415599822998, "learning_rate": 1.4985163204747774e-05, "loss": 1.0196, "step": 505 }, { "epoch": 0.06766515110992244, "grad_norm": 1.98890221118927, "learning_rate": 1.5014836795252228e-05, "loss": 1.0206, "step": 506 }, { "epoch": 0.06779887670500134, "grad_norm": 1.7247308492660522, "learning_rate": 1.5044510385756679e-05, "loss": 1.0526, "step": 507 }, { "epoch": 0.06793260230008023, "grad_norm": 1.7958847284317017, "learning_rate": 1.507418397626113e-05, "loss": 1.1249, "step": 508 }, { "epoch": 0.06806632789515914, "grad_norm": 1.601080298423767, "learning_rate": 1.5103857566765581e-05, "loss": 1.0878, "step": 509 }, { "epoch": 0.06820005349023803, "grad_norm": 1.6205987930297852, "learning_rate": 1.5133531157270032e-05, "loss": 1.0103, "step": 510 }, { "epoch": 0.06833377908531693, "grad_norm": 1.8104535341262817, "learning_rate": 1.5163204747774482e-05, "loss": 1.0343, "step": 511 }, { "epoch": 0.06846750468039582, "grad_norm": 1.7196216583251953, "learning_rate": 1.5192878338278933e-05, "loss": 1.0476, "step": 512 }, { "epoch": 0.06860123027547473, "grad_norm": 1.7211581468582153, "learning_rate": 1.5222551928783385e-05, "loss": 1.2031, "step": 513 }, { "epoch": 0.06873495587055363, "grad_norm": 1.576137661933899, "learning_rate": 1.5252225519287836e-05, "loss": 0.9521, "step": 514 }, { "epoch": 0.06886868146563252, "grad_norm": 1.800772786140442, "learning_rate": 1.5281899109792286e-05, "loss": 1.0912, "step": 515 }, { "epoch": 0.06900240706071142, "grad_norm": 1.9550946950912476, "learning_rate": 1.5311572700296738e-05, "loss": 1.0402, "step": 516 }, { "epoch": 0.06913613265579031, "grad_norm": 1.831992506980896, "learning_rate": 1.5341246290801187e-05, "loss": 1.0727, "step": 517 }, { "epoch": 0.06926985825086922, "grad_norm": 1.7334562540054321, "learning_rate": 1.537091988130564e-05, "loss": 1.0647, "step": 518 }, { "epoch": 0.06940358384594811, "grad_norm": 2.966041088104248, "learning_rate": 1.540059347181009e-05, "loss": 1.0686, "step": 519 }, { "epoch": 0.06953730944102701, "grad_norm": 1.5652555227279663, "learning_rate": 1.543026706231454e-05, "loss": 0.9349, "step": 520 }, { "epoch": 0.06967103503610592, "grad_norm": 1.8496533632278442, "learning_rate": 1.5459940652818992e-05, "loss": 1.0904, "step": 521 }, { "epoch": 0.0698047606311848, "grad_norm": 1.8235584497451782, "learning_rate": 1.5489614243323445e-05, "loss": 1.1043, "step": 522 }, { "epoch": 0.06993848622626371, "grad_norm": 2.2706425189971924, "learning_rate": 1.5519287833827893e-05, "loss": 1.2216, "step": 523 }, { "epoch": 0.0700722118213426, "grad_norm": 1.6049624681472778, "learning_rate": 1.5548961424332346e-05, "loss": 1.1498, "step": 524 }, { "epoch": 0.0702059374164215, "grad_norm": 1.6770635843276978, "learning_rate": 1.5578635014836794e-05, "loss": 1.0565, "step": 525 }, { "epoch": 0.0703396630115004, "grad_norm": 1.87369966506958, "learning_rate": 1.5608308605341247e-05, "loss": 1.1357, "step": 526 }, { "epoch": 0.0704733886065793, "grad_norm": 1.8151572942733765, "learning_rate": 1.56379821958457e-05, "loss": 1.0783, "step": 527 }, { "epoch": 0.0706071142016582, "grad_norm": 1.780057668685913, "learning_rate": 1.5667655786350148e-05, "loss": 1.1617, "step": 528 }, { "epoch": 0.07074083979673709, "grad_norm": 1.8757497072219849, "learning_rate": 1.56973293768546e-05, "loss": 1.1539, "step": 529 }, { "epoch": 0.070874565391816, "grad_norm": 1.6305298805236816, "learning_rate": 1.5727002967359052e-05, "loss": 1.1423, "step": 530 }, { "epoch": 0.07100829098689489, "grad_norm": 1.9086893796920776, "learning_rate": 1.57566765578635e-05, "loss": 1.1351, "step": 531 }, { "epoch": 0.07114201658197379, "grad_norm": 1.8791098594665527, "learning_rate": 1.5786350148367956e-05, "loss": 1.141, "step": 532 }, { "epoch": 0.07127574217705268, "grad_norm": 1.7010337114334106, "learning_rate": 1.5816023738872405e-05, "loss": 1.0457, "step": 533 }, { "epoch": 0.07140946777213159, "grad_norm": 1.7061164379119873, "learning_rate": 1.5845697329376857e-05, "loss": 1.1403, "step": 534 }, { "epoch": 0.07154319336721049, "grad_norm": 1.6749473810195923, "learning_rate": 1.5875370919881306e-05, "loss": 1.1078, "step": 535 }, { "epoch": 0.07167691896228938, "grad_norm": 1.814115285873413, "learning_rate": 1.590504451038576e-05, "loss": 0.9814, "step": 536 }, { "epoch": 0.07181064455736828, "grad_norm": 2.100039005279541, "learning_rate": 1.593471810089021e-05, "loss": 1.0659, "step": 537 }, { "epoch": 0.07194437015244717, "grad_norm": 1.8403300046920776, "learning_rate": 1.596439169139466e-05, "loss": 1.0329, "step": 538 }, { "epoch": 0.07207809574752608, "grad_norm": 1.7543425559997559, "learning_rate": 1.599406528189911e-05, "loss": 1.0606, "step": 539 }, { "epoch": 0.07221182134260497, "grad_norm": 1.798280954360962, "learning_rate": 1.6023738872403564e-05, "loss": 1.1828, "step": 540 }, { "epoch": 0.07234554693768387, "grad_norm": 1.6168230772018433, "learning_rate": 1.6053412462908013e-05, "loss": 0.9692, "step": 541 }, { "epoch": 0.07247927253276278, "grad_norm": 1.9506645202636719, "learning_rate": 1.6083086053412465e-05, "loss": 1.1364, "step": 542 }, { "epoch": 0.07261299812784167, "grad_norm": 1.5897406339645386, "learning_rate": 1.6112759643916917e-05, "loss": 0.9695, "step": 543 }, { "epoch": 0.07274672372292057, "grad_norm": 1.6204168796539307, "learning_rate": 1.6142433234421366e-05, "loss": 0.9741, "step": 544 }, { "epoch": 0.07288044931799946, "grad_norm": 1.7683537006378174, "learning_rate": 1.6172106824925818e-05, "loss": 1.0128, "step": 545 }, { "epoch": 0.07301417491307836, "grad_norm": 1.842466115951538, "learning_rate": 1.6201780415430267e-05, "loss": 1.1184, "step": 546 }, { "epoch": 0.07314790050815727, "grad_norm": 1.9419041872024536, "learning_rate": 1.623145400593472e-05, "loss": 1.2488, "step": 547 }, { "epoch": 0.07328162610323616, "grad_norm": 1.639898419380188, "learning_rate": 1.626112759643917e-05, "loss": 1.0694, "step": 548 }, { "epoch": 0.07341535169831506, "grad_norm": 1.6949163675308228, "learning_rate": 1.629080118694362e-05, "loss": 1.1051, "step": 549 }, { "epoch": 0.07354907729339395, "grad_norm": 1.7163790464401245, "learning_rate": 1.6320474777448072e-05, "loss": 1.1847, "step": 550 }, { "epoch": 0.07368280288847286, "grad_norm": 1.5699164867401123, "learning_rate": 1.6350148367952524e-05, "loss": 1.0933, "step": 551 }, { "epoch": 0.07381652848355175, "grad_norm": 1.6123524904251099, "learning_rate": 1.6379821958456973e-05, "loss": 1.0344, "step": 552 }, { "epoch": 0.07395025407863065, "grad_norm": 1.5783841609954834, "learning_rate": 1.6409495548961425e-05, "loss": 1.0606, "step": 553 }, { "epoch": 0.07408397967370955, "grad_norm": 1.6662625074386597, "learning_rate": 1.6439169139465877e-05, "loss": 1.0209, "step": 554 }, { "epoch": 0.07421770526878844, "grad_norm": 1.726669192314148, "learning_rate": 1.6468842729970326e-05, "loss": 1.1533, "step": 555 }, { "epoch": 0.07435143086386735, "grad_norm": 1.3600177764892578, "learning_rate": 1.649851632047478e-05, "loss": 1.0358, "step": 556 }, { "epoch": 0.07448515645894624, "grad_norm": 1.5984845161437988, "learning_rate": 1.6528189910979227e-05, "loss": 1.1866, "step": 557 }, { "epoch": 0.07461888205402514, "grad_norm": 1.819583773612976, "learning_rate": 1.6557863501483683e-05, "loss": 1.2103, "step": 558 }, { "epoch": 0.07475260764910403, "grad_norm": 1.7507035732269287, "learning_rate": 1.658753709198813e-05, "loss": 1.0579, "step": 559 }, { "epoch": 0.07488633324418294, "grad_norm": 1.8777177333831787, "learning_rate": 1.6617210682492584e-05, "loss": 1.2186, "step": 560 }, { "epoch": 0.07502005883926184, "grad_norm": 1.718030333518982, "learning_rate": 1.6646884272997036e-05, "loss": 1.1097, "step": 561 }, { "epoch": 0.07515378443434073, "grad_norm": 1.8223965167999268, "learning_rate": 1.6676557863501485e-05, "loss": 1.0753, "step": 562 }, { "epoch": 0.07528751002941964, "grad_norm": 1.5852609872817993, "learning_rate": 1.6706231454005937e-05, "loss": 1.1194, "step": 563 }, { "epoch": 0.07542123562449853, "grad_norm": 1.6189275979995728, "learning_rate": 1.673590504451039e-05, "loss": 1.1002, "step": 564 }, { "epoch": 0.07555496121957743, "grad_norm": 1.6777567863464355, "learning_rate": 1.6765578635014838e-05, "loss": 1.1022, "step": 565 }, { "epoch": 0.07568868681465632, "grad_norm": 1.5834295749664307, "learning_rate": 1.679525222551929e-05, "loss": 1.1079, "step": 566 }, { "epoch": 0.07582241240973522, "grad_norm": 1.673142671585083, "learning_rate": 1.682492581602374e-05, "loss": 1.1104, "step": 567 }, { "epoch": 0.07595613800481413, "grad_norm": 1.8786754608154297, "learning_rate": 1.685459940652819e-05, "loss": 1.1739, "step": 568 }, { "epoch": 0.07608986359989302, "grad_norm": 1.5208408832550049, "learning_rate": 1.6884272997032643e-05, "loss": 1.1456, "step": 569 }, { "epoch": 0.07622358919497192, "grad_norm": 1.6646441221237183, "learning_rate": 1.6913946587537092e-05, "loss": 1.0463, "step": 570 }, { "epoch": 0.07635731479005081, "grad_norm": 1.5181422233581543, "learning_rate": 1.6943620178041544e-05, "loss": 1.0637, "step": 571 }, { "epoch": 0.07649104038512972, "grad_norm": 1.8138445615768433, "learning_rate": 1.6973293768545997e-05, "loss": 0.9685, "step": 572 }, { "epoch": 0.0766247659802086, "grad_norm": 1.8401823043823242, "learning_rate": 1.7002967359050445e-05, "loss": 1.1918, "step": 573 }, { "epoch": 0.07675849157528751, "grad_norm": 1.8635797500610352, "learning_rate": 1.7032640949554898e-05, "loss": 1.2097, "step": 574 }, { "epoch": 0.07689221717036641, "grad_norm": 1.6062102317810059, "learning_rate": 1.706231454005935e-05, "loss": 1.1907, "step": 575 }, { "epoch": 0.0770259427654453, "grad_norm": 1.7362016439437866, "learning_rate": 1.70919881305638e-05, "loss": 1.0936, "step": 576 }, { "epoch": 0.07715966836052421, "grad_norm": 1.7279845476150513, "learning_rate": 1.712166172106825e-05, "loss": 1.1769, "step": 577 }, { "epoch": 0.0772933939556031, "grad_norm": 1.4477804899215698, "learning_rate": 1.71513353115727e-05, "loss": 0.9708, "step": 578 }, { "epoch": 0.077427119550682, "grad_norm": 1.7206075191497803, "learning_rate": 1.7181008902077152e-05, "loss": 0.971, "step": 579 }, { "epoch": 0.07756084514576089, "grad_norm": 1.6770507097244263, "learning_rate": 1.7210682492581604e-05, "loss": 1.1513, "step": 580 }, { "epoch": 0.0776945707408398, "grad_norm": 1.783970832824707, "learning_rate": 1.7240356083086053e-05, "loss": 1.0156, "step": 581 }, { "epoch": 0.0778282963359187, "grad_norm": 1.6209423542022705, "learning_rate": 1.7270029673590505e-05, "loss": 1.0152, "step": 582 }, { "epoch": 0.07796202193099759, "grad_norm": 1.7813389301300049, "learning_rate": 1.7299703264094957e-05, "loss": 1.1236, "step": 583 }, { "epoch": 0.0780957475260765, "grad_norm": 1.427749514579773, "learning_rate": 1.732937685459941e-05, "loss": 1.0396, "step": 584 }, { "epoch": 0.07822947312115539, "grad_norm": 1.670377492904663, "learning_rate": 1.735905044510386e-05, "loss": 1.0865, "step": 585 }, { "epoch": 0.07836319871623429, "grad_norm": 1.6798478364944458, "learning_rate": 1.738872403560831e-05, "loss": 1.1004, "step": 586 }, { "epoch": 0.07849692431131318, "grad_norm": 1.6190632581710815, "learning_rate": 1.7418397626112763e-05, "loss": 1.1169, "step": 587 }, { "epoch": 0.07863064990639208, "grad_norm": 1.64007568359375, "learning_rate": 1.744807121661721e-05, "loss": 1.0352, "step": 588 }, { "epoch": 0.07876437550147099, "grad_norm": 1.5555752515792847, "learning_rate": 1.7477744807121664e-05, "loss": 1.0817, "step": 589 }, { "epoch": 0.07889810109654988, "grad_norm": 1.62855064868927, "learning_rate": 1.7507418397626116e-05, "loss": 1.1072, "step": 590 }, { "epoch": 0.07903182669162878, "grad_norm": 1.67997407913208, "learning_rate": 1.7537091988130565e-05, "loss": 1.0737, "step": 591 }, { "epoch": 0.07916555228670767, "grad_norm": 1.6347873210906982, "learning_rate": 1.7566765578635017e-05, "loss": 1.1202, "step": 592 }, { "epoch": 0.07929927788178658, "grad_norm": 1.4767524003982544, "learning_rate": 1.759643916913947e-05, "loss": 0.925, "step": 593 }, { "epoch": 0.07943300347686547, "grad_norm": 1.7255375385284424, "learning_rate": 1.7626112759643918e-05, "loss": 1.1712, "step": 594 }, { "epoch": 0.07956672907194437, "grad_norm": 1.564583420753479, "learning_rate": 1.765578635014837e-05, "loss": 1.0353, "step": 595 }, { "epoch": 0.07970045466702327, "grad_norm": 1.6714822053909302, "learning_rate": 1.7685459940652822e-05, "loss": 1.057, "step": 596 }, { "epoch": 0.07983418026210216, "grad_norm": 1.7795729637145996, "learning_rate": 1.771513353115727e-05, "loss": 1.0538, "step": 597 }, { "epoch": 0.07996790585718107, "grad_norm": 1.663192629814148, "learning_rate": 1.7744807121661723e-05, "loss": 1.0549, "step": 598 }, { "epoch": 0.08010163145225996, "grad_norm": 1.563331127166748, "learning_rate": 1.7774480712166172e-05, "loss": 1.0973, "step": 599 }, { "epoch": 0.08023535704733886, "grad_norm": 1.6699390411376953, "learning_rate": 1.7804154302670624e-05, "loss": 1.0552, "step": 600 }, { "epoch": 0.08036908264241775, "grad_norm": 1.5246310234069824, "learning_rate": 1.7833827893175076e-05, "loss": 0.9989, "step": 601 }, { "epoch": 0.08050280823749666, "grad_norm": 1.7135409116744995, "learning_rate": 1.7863501483679525e-05, "loss": 1.0441, "step": 602 }, { "epoch": 0.08063653383257556, "grad_norm": 1.7785507440567017, "learning_rate": 1.7893175074183977e-05, "loss": 1.0397, "step": 603 }, { "epoch": 0.08077025942765445, "grad_norm": 1.7183959484100342, "learning_rate": 1.792284866468843e-05, "loss": 1.0164, "step": 604 }, { "epoch": 0.08090398502273335, "grad_norm": 1.6679848432540894, "learning_rate": 1.7952522255192878e-05, "loss": 1.164, "step": 605 }, { "epoch": 0.08103771061781224, "grad_norm": 1.684942603111267, "learning_rate": 1.798219584569733e-05, "loss": 1.1223, "step": 606 }, { "epoch": 0.08117143621289115, "grad_norm": 1.6504472494125366, "learning_rate": 1.801186943620178e-05, "loss": 1.0877, "step": 607 }, { "epoch": 0.08130516180797004, "grad_norm": 1.613499402999878, "learning_rate": 1.804154302670623e-05, "loss": 1.2023, "step": 608 }, { "epoch": 0.08143888740304894, "grad_norm": 1.6004951000213623, "learning_rate": 1.8071216617210684e-05, "loss": 1.1015, "step": 609 }, { "epoch": 0.08157261299812785, "grad_norm": 1.4661237001419067, "learning_rate": 1.8100890207715136e-05, "loss": 1.0876, "step": 610 }, { "epoch": 0.08170633859320674, "grad_norm": 1.6976242065429688, "learning_rate": 1.8130563798219588e-05, "loss": 1.0283, "step": 611 }, { "epoch": 0.08184006418828564, "grad_norm": 1.6769866943359375, "learning_rate": 1.8160237388724037e-05, "loss": 1.1553, "step": 612 }, { "epoch": 0.08197378978336453, "grad_norm": 1.6379057168960571, "learning_rate": 1.818991097922849e-05, "loss": 1.2497, "step": 613 }, { "epoch": 0.08210751537844344, "grad_norm": 1.6722640991210938, "learning_rate": 1.821958456973294e-05, "loss": 1.1207, "step": 614 }, { "epoch": 0.08224124097352233, "grad_norm": 1.6503626108169556, "learning_rate": 1.824925816023739e-05, "loss": 1.2204, "step": 615 }, { "epoch": 0.08237496656860123, "grad_norm": 1.6336792707443237, "learning_rate": 1.8278931750741842e-05, "loss": 1.0833, "step": 616 }, { "epoch": 0.08250869216368013, "grad_norm": 1.490787386894226, "learning_rate": 1.830860534124629e-05, "loss": 1.0959, "step": 617 }, { "epoch": 0.08264241775875902, "grad_norm": 1.6635373830795288, "learning_rate": 1.8338278931750743e-05, "loss": 1.1261, "step": 618 }, { "epoch": 0.08277614335383793, "grad_norm": 1.6656502485275269, "learning_rate": 1.8367952522255195e-05, "loss": 1.096, "step": 619 }, { "epoch": 0.08290986894891682, "grad_norm": 1.7153195142745972, "learning_rate": 1.8397626112759644e-05, "loss": 1.0446, "step": 620 }, { "epoch": 0.08304359454399572, "grad_norm": 1.662718415260315, "learning_rate": 1.8427299703264096e-05, "loss": 0.9879, "step": 621 }, { "epoch": 0.08317732013907461, "grad_norm": 1.5541090965270996, "learning_rate": 1.845697329376855e-05, "loss": 1.0667, "step": 622 }, { "epoch": 0.08331104573415352, "grad_norm": 1.8365212678909302, "learning_rate": 1.8486646884272997e-05, "loss": 1.0569, "step": 623 }, { "epoch": 0.08344477132923242, "grad_norm": 1.487030267715454, "learning_rate": 1.851632047477745e-05, "loss": 0.973, "step": 624 }, { "epoch": 0.08357849692431131, "grad_norm": 1.6239988803863525, "learning_rate": 1.8545994065281902e-05, "loss": 1.1092, "step": 625 }, { "epoch": 0.08371222251939021, "grad_norm": 1.47816801071167, "learning_rate": 1.857566765578635e-05, "loss": 0.9719, "step": 626 }, { "epoch": 0.0838459481144691, "grad_norm": 1.5233548879623413, "learning_rate": 1.8605341246290803e-05, "loss": 1.002, "step": 627 }, { "epoch": 0.08397967370954801, "grad_norm": 1.4014291763305664, "learning_rate": 1.863501483679525e-05, "loss": 1.0238, "step": 628 }, { "epoch": 0.08411339930462691, "grad_norm": 1.6621612310409546, "learning_rate": 1.8664688427299704e-05, "loss": 1.0618, "step": 629 }, { "epoch": 0.0842471248997058, "grad_norm": 1.8631902933120728, "learning_rate": 1.8694362017804156e-05, "loss": 1.1086, "step": 630 }, { "epoch": 0.0843808504947847, "grad_norm": 1.8764920234680176, "learning_rate": 1.8724035608308605e-05, "loss": 1.132, "step": 631 }, { "epoch": 0.0845145760898636, "grad_norm": 1.4854700565338135, "learning_rate": 1.8753709198813057e-05, "loss": 1.0871, "step": 632 }, { "epoch": 0.0846483016849425, "grad_norm": 1.701225996017456, "learning_rate": 1.878338278931751e-05, "loss": 1.1424, "step": 633 }, { "epoch": 0.08478202728002139, "grad_norm": 1.5029900074005127, "learning_rate": 1.8813056379821958e-05, "loss": 1.1177, "step": 634 }, { "epoch": 0.0849157528751003, "grad_norm": 1.762671709060669, "learning_rate": 1.884272997032641e-05, "loss": 1.2176, "step": 635 }, { "epoch": 0.0850494784701792, "grad_norm": 1.703949213027954, "learning_rate": 1.8872403560830862e-05, "loss": 1.0548, "step": 636 }, { "epoch": 0.08518320406525809, "grad_norm": 1.539078950881958, "learning_rate": 1.8902077151335315e-05, "loss": 1.1042, "step": 637 }, { "epoch": 0.085316929660337, "grad_norm": 1.5903327465057373, "learning_rate": 1.8931750741839763e-05, "loss": 1.1119, "step": 638 }, { "epoch": 0.08545065525541588, "grad_norm": 1.4694254398345947, "learning_rate": 1.8961424332344216e-05, "loss": 1.0844, "step": 639 }, { "epoch": 0.08558438085049479, "grad_norm": 1.6101256608963013, "learning_rate": 1.8991097922848668e-05, "loss": 1.1285, "step": 640 }, { "epoch": 0.08571810644557368, "grad_norm": 1.630458950996399, "learning_rate": 1.9020771513353117e-05, "loss": 1.0546, "step": 641 }, { "epoch": 0.08585183204065258, "grad_norm": 1.5213385820388794, "learning_rate": 1.905044510385757e-05, "loss": 1.1741, "step": 642 }, { "epoch": 0.08598555763573149, "grad_norm": 1.5422383546829224, "learning_rate": 1.908011869436202e-05, "loss": 1.0516, "step": 643 }, { "epoch": 0.08611928323081038, "grad_norm": 1.7990802526474, "learning_rate": 1.910979228486647e-05, "loss": 1.0978, "step": 644 }, { "epoch": 0.08625300882588928, "grad_norm": 1.5937308073043823, "learning_rate": 1.9139465875370922e-05, "loss": 1.1569, "step": 645 }, { "epoch": 0.08638673442096817, "grad_norm": 1.50184965133667, "learning_rate": 1.9169139465875374e-05, "loss": 1.1133, "step": 646 }, { "epoch": 0.08652046001604707, "grad_norm": 1.4961223602294922, "learning_rate": 1.9198813056379823e-05, "loss": 1.1017, "step": 647 }, { "epoch": 0.08665418561112596, "grad_norm": 1.6108611822128296, "learning_rate": 1.9228486646884275e-05, "loss": 1.1272, "step": 648 }, { "epoch": 0.08678791120620487, "grad_norm": 1.680821418762207, "learning_rate": 1.9258160237388724e-05, "loss": 1.0988, "step": 649 }, { "epoch": 0.08692163680128377, "grad_norm": 1.5543137788772583, "learning_rate": 1.9287833827893176e-05, "loss": 1.1021, "step": 650 }, { "epoch": 0.08705536239636266, "grad_norm": 1.5005958080291748, "learning_rate": 1.931750741839763e-05, "loss": 1.0576, "step": 651 }, { "epoch": 0.08718908799144157, "grad_norm": 1.5387135744094849, "learning_rate": 1.9347181008902077e-05, "loss": 1.0737, "step": 652 }, { "epoch": 0.08732281358652046, "grad_norm": 1.5775527954101562, "learning_rate": 1.937685459940653e-05, "loss": 1.081, "step": 653 }, { "epoch": 0.08745653918159936, "grad_norm": 1.4933686256408691, "learning_rate": 1.940652818991098e-05, "loss": 1.0057, "step": 654 }, { "epoch": 0.08759026477667825, "grad_norm": 1.4059333801269531, "learning_rate": 1.943620178041543e-05, "loss": 1.0166, "step": 655 }, { "epoch": 0.08772399037175715, "grad_norm": 1.7391966581344604, "learning_rate": 1.9465875370919883e-05, "loss": 1.1102, "step": 656 }, { "epoch": 0.08785771596683606, "grad_norm": 1.7350902557373047, "learning_rate": 1.9495548961424335e-05, "loss": 1.1912, "step": 657 }, { "epoch": 0.08799144156191495, "grad_norm": 1.5435680150985718, "learning_rate": 1.9525222551928784e-05, "loss": 1.0492, "step": 658 }, { "epoch": 0.08812516715699385, "grad_norm": 1.4920226335525513, "learning_rate": 1.9554896142433236e-05, "loss": 1.0184, "step": 659 }, { "epoch": 0.08825889275207274, "grad_norm": 1.5866831541061401, "learning_rate": 1.9584569732937684e-05, "loss": 1.0989, "step": 660 }, { "epoch": 0.08839261834715165, "grad_norm": 1.4927951097488403, "learning_rate": 1.9614243323442137e-05, "loss": 1.0298, "step": 661 }, { "epoch": 0.08852634394223054, "grad_norm": 1.7569540739059448, "learning_rate": 1.964391691394659e-05, "loss": 1.2346, "step": 662 }, { "epoch": 0.08866006953730944, "grad_norm": 1.4895068407058716, "learning_rate": 1.967359050445104e-05, "loss": 1.1893, "step": 663 }, { "epoch": 0.08879379513238835, "grad_norm": 1.5855401754379272, "learning_rate": 1.9703264094955493e-05, "loss": 1.19, "step": 664 }, { "epoch": 0.08892752072746724, "grad_norm": 1.5302454233169556, "learning_rate": 1.9732937685459942e-05, "loss": 1.1528, "step": 665 }, { "epoch": 0.08906124632254614, "grad_norm": 1.4474472999572754, "learning_rate": 1.9762611275964394e-05, "loss": 1.0872, "step": 666 }, { "epoch": 0.08919497191762503, "grad_norm": 1.627776026725769, "learning_rate": 1.9792284866468846e-05, "loss": 1.091, "step": 667 }, { "epoch": 0.08932869751270393, "grad_norm": 1.5421853065490723, "learning_rate": 1.9821958456973295e-05, "loss": 1.113, "step": 668 }, { "epoch": 0.08946242310778282, "grad_norm": 1.5004169940948486, "learning_rate": 1.9851632047477747e-05, "loss": 1.2059, "step": 669 }, { "epoch": 0.08959614870286173, "grad_norm": 1.4837934970855713, "learning_rate": 1.9881305637982196e-05, "loss": 1.1013, "step": 670 }, { "epoch": 0.08972987429794063, "grad_norm": 1.455596923828125, "learning_rate": 1.991097922848665e-05, "loss": 0.9714, "step": 671 }, { "epoch": 0.08986359989301952, "grad_norm": 1.584262490272522, "learning_rate": 1.99406528189911e-05, "loss": 1.0091, "step": 672 }, { "epoch": 0.08999732548809843, "grad_norm": 1.732277750968933, "learning_rate": 1.997032640949555e-05, "loss": 1.0931, "step": 673 }, { "epoch": 0.09013105108317732, "grad_norm": 1.5485100746154785, "learning_rate": 2e-05, "loss": 1.1427, "step": 674 }, { "epoch": 0.09026477667825622, "grad_norm": 1.5276463031768799, "learning_rate": 1.9999999895779787e-05, "loss": 1.0966, "step": 675 }, { "epoch": 0.09039850227333511, "grad_norm": 1.573179006576538, "learning_rate": 1.9999999583119143e-05, "loss": 1.1424, "step": 676 }, { "epoch": 0.09053222786841401, "grad_norm": 1.5417463779449463, "learning_rate": 1.9999999062018074e-05, "loss": 1.0743, "step": 677 }, { "epoch": 0.09066595346349292, "grad_norm": 1.3638125658035278, "learning_rate": 1.99999983324766e-05, "loss": 0.9034, "step": 678 }, { "epoch": 0.09079967905857181, "grad_norm": 1.5211032629013062, "learning_rate": 1.9999997394494723e-05, "loss": 1.0682, "step": 679 }, { "epoch": 0.09093340465365071, "grad_norm": 1.5782675743103027, "learning_rate": 1.999999624807247e-05, "loss": 1.0939, "step": 680 }, { "epoch": 0.0910671302487296, "grad_norm": 1.6293474435806274, "learning_rate": 1.999999489320987e-05, "loss": 1.1032, "step": 681 }, { "epoch": 0.0912008558438085, "grad_norm": 1.817825198173523, "learning_rate": 1.9999993329906938e-05, "loss": 1.178, "step": 682 }, { "epoch": 0.0913345814388874, "grad_norm": 1.5964808464050293, "learning_rate": 1.9999991558163718e-05, "loss": 1.1079, "step": 683 }, { "epoch": 0.0914683070339663, "grad_norm": 1.506198763847351, "learning_rate": 1.9999989577980245e-05, "loss": 1.1913, "step": 684 }, { "epoch": 0.0916020326290452, "grad_norm": 1.6369527578353882, "learning_rate": 1.9999987389356552e-05, "loss": 1.1608, "step": 685 }, { "epoch": 0.0917357582241241, "grad_norm": 1.560995101928711, "learning_rate": 1.9999984992292692e-05, "loss": 1.1004, "step": 686 }, { "epoch": 0.091869483819203, "grad_norm": 1.5909690856933594, "learning_rate": 1.9999982386788717e-05, "loss": 1.1373, "step": 687 }, { "epoch": 0.09200320941428189, "grad_norm": 1.6165322065353394, "learning_rate": 1.999997957284468e-05, "loss": 1.0808, "step": 688 }, { "epoch": 0.09213693500936079, "grad_norm": 1.7125903367996216, "learning_rate": 1.9999976550460633e-05, "loss": 1.1867, "step": 689 }, { "epoch": 0.09227066060443968, "grad_norm": 1.5367757081985474, "learning_rate": 1.999997331963665e-05, "loss": 1.0247, "step": 690 }, { "epoch": 0.09240438619951859, "grad_norm": 1.5418226718902588, "learning_rate": 1.9999969880372784e-05, "loss": 1.0632, "step": 691 }, { "epoch": 0.09253811179459749, "grad_norm": 1.5153613090515137, "learning_rate": 1.999996623266912e-05, "loss": 1.1241, "step": 692 }, { "epoch": 0.09267183738967638, "grad_norm": 1.5809426307678223, "learning_rate": 1.9999962376525726e-05, "loss": 1.0537, "step": 693 }, { "epoch": 0.09280556298475529, "grad_norm": 1.8835455179214478, "learning_rate": 1.9999958311942685e-05, "loss": 1.1884, "step": 694 }, { "epoch": 0.09293928857983418, "grad_norm": 1.4791765213012695, "learning_rate": 1.9999954038920086e-05, "loss": 1.1232, "step": 695 }, { "epoch": 0.09307301417491308, "grad_norm": 1.4661179780960083, "learning_rate": 1.999994955745801e-05, "loss": 1.0736, "step": 696 }, { "epoch": 0.09320673976999197, "grad_norm": 1.7484447956085205, "learning_rate": 1.9999944867556554e-05, "loss": 1.1007, "step": 697 }, { "epoch": 0.09334046536507087, "grad_norm": 1.4332221746444702, "learning_rate": 1.999993996921582e-05, "loss": 0.9437, "step": 698 }, { "epoch": 0.09347419096014978, "grad_norm": 1.5316094160079956, "learning_rate": 1.9999934862435904e-05, "loss": 1.1683, "step": 699 }, { "epoch": 0.09360791655522867, "grad_norm": 1.5216705799102783, "learning_rate": 1.9999929547216915e-05, "loss": 1.1666, "step": 700 }, { "epoch": 0.09374164215030757, "grad_norm": 1.6007503271102905, "learning_rate": 1.999992402355896e-05, "loss": 1.2513, "step": 701 }, { "epoch": 0.09387536774538646, "grad_norm": 1.5861904621124268, "learning_rate": 1.9999918291462164e-05, "loss": 1.0141, "step": 702 }, { "epoch": 0.09400909334046537, "grad_norm": 1.5060030221939087, "learning_rate": 1.999991235092664e-05, "loss": 1.1213, "step": 703 }, { "epoch": 0.09414281893554426, "grad_norm": 1.2562562227249146, "learning_rate": 1.9999906201952507e-05, "loss": 1.0161, "step": 704 }, { "epoch": 0.09427654453062316, "grad_norm": 1.572417974472046, "learning_rate": 1.9999899844539898e-05, "loss": 1.1206, "step": 705 }, { "epoch": 0.09441027012570206, "grad_norm": 1.4760481119155884, "learning_rate": 1.999989327868895e-05, "loss": 1.1317, "step": 706 }, { "epoch": 0.09454399572078095, "grad_norm": 1.621798038482666, "learning_rate": 1.9999886504399792e-05, "loss": 1.129, "step": 707 }, { "epoch": 0.09467772131585986, "grad_norm": 1.5630792379379272, "learning_rate": 1.9999879521672573e-05, "loss": 1.1616, "step": 708 }, { "epoch": 0.09481144691093875, "grad_norm": 1.4571009874343872, "learning_rate": 1.999987233050743e-05, "loss": 1.0897, "step": 709 }, { "epoch": 0.09494517250601765, "grad_norm": 1.4069567918777466, "learning_rate": 1.9999864930904516e-05, "loss": 1.0446, "step": 710 }, { "epoch": 0.09507889810109656, "grad_norm": 1.5291472673416138, "learning_rate": 1.999985732286399e-05, "loss": 1.0165, "step": 711 }, { "epoch": 0.09521262369617545, "grad_norm": 1.5607110261917114, "learning_rate": 1.9999849506386005e-05, "loss": 1.1873, "step": 712 }, { "epoch": 0.09534634929125435, "grad_norm": 1.518492341041565, "learning_rate": 1.9999841481470725e-05, "loss": 1.1127, "step": 713 }, { "epoch": 0.09548007488633324, "grad_norm": 1.6907273530960083, "learning_rate": 1.999983324811832e-05, "loss": 1.057, "step": 714 }, { "epoch": 0.09561380048141215, "grad_norm": 1.5902533531188965, "learning_rate": 1.999982480632896e-05, "loss": 1.1992, "step": 715 }, { "epoch": 0.09574752607649104, "grad_norm": 1.5743988752365112, "learning_rate": 1.999981615610282e-05, "loss": 1.039, "step": 716 }, { "epoch": 0.09588125167156994, "grad_norm": 1.491440773010254, "learning_rate": 1.999980729744008e-05, "loss": 1.009, "step": 717 }, { "epoch": 0.09601497726664884, "grad_norm": 1.4290639162063599, "learning_rate": 1.999979823034093e-05, "loss": 1.1107, "step": 718 }, { "epoch": 0.09614870286172773, "grad_norm": 1.5728894472122192, "learning_rate": 1.999978895480555e-05, "loss": 1.0969, "step": 719 }, { "epoch": 0.09628242845680664, "grad_norm": 1.4674407243728638, "learning_rate": 1.9999779470834137e-05, "loss": 1.0699, "step": 720 }, { "epoch": 0.09641615405188553, "grad_norm": 1.4822677373886108, "learning_rate": 1.9999769778426893e-05, "loss": 1.0629, "step": 721 }, { "epoch": 0.09654987964696443, "grad_norm": 1.4400196075439453, "learning_rate": 1.9999759877584015e-05, "loss": 0.9606, "step": 722 }, { "epoch": 0.09668360524204332, "grad_norm": 1.5773786306381226, "learning_rate": 1.9999749768305712e-05, "loss": 1.0582, "step": 723 }, { "epoch": 0.09681733083712223, "grad_norm": 1.5911723375320435, "learning_rate": 1.999973945059219e-05, "loss": 1.202, "step": 724 }, { "epoch": 0.09695105643220113, "grad_norm": 1.544969916343689, "learning_rate": 1.9999728924443675e-05, "loss": 1.2431, "step": 725 }, { "epoch": 0.09708478202728002, "grad_norm": 1.4910603761672974, "learning_rate": 1.9999718189860372e-05, "loss": 1.0222, "step": 726 }, { "epoch": 0.09721850762235892, "grad_norm": 1.419446587562561, "learning_rate": 1.9999707246842518e-05, "loss": 1.1206, "step": 727 }, { "epoch": 0.09735223321743781, "grad_norm": 1.524016261100769, "learning_rate": 1.9999696095390333e-05, "loss": 1.1079, "step": 728 }, { "epoch": 0.09748595881251672, "grad_norm": 1.6166530847549438, "learning_rate": 1.9999684735504052e-05, "loss": 1.1168, "step": 729 }, { "epoch": 0.09761968440759561, "grad_norm": 1.539605736732483, "learning_rate": 1.999967316718391e-05, "loss": 1.0489, "step": 730 }, { "epoch": 0.09775341000267451, "grad_norm": 1.3934693336486816, "learning_rate": 1.999966139043015e-05, "loss": 1.097, "step": 731 }, { "epoch": 0.09788713559775342, "grad_norm": 1.4110767841339111, "learning_rate": 1.9999649405243017e-05, "loss": 1.0853, "step": 732 }, { "epoch": 0.0980208611928323, "grad_norm": 1.618430733680725, "learning_rate": 1.999963721162276e-05, "loss": 1.0618, "step": 733 }, { "epoch": 0.09815458678791121, "grad_norm": 1.4578605890274048, "learning_rate": 1.9999624809569635e-05, "loss": 1.1054, "step": 734 }, { "epoch": 0.0982883123829901, "grad_norm": 1.5790408849716187, "learning_rate": 1.99996121990839e-05, "loss": 1.1486, "step": 735 }, { "epoch": 0.098422037978069, "grad_norm": 1.446220874786377, "learning_rate": 1.9999599380165817e-05, "loss": 1.0227, "step": 736 }, { "epoch": 0.0985557635731479, "grad_norm": 1.6072250604629517, "learning_rate": 1.9999586352815652e-05, "loss": 1.1664, "step": 737 }, { "epoch": 0.0986894891682268, "grad_norm": 1.4835716485977173, "learning_rate": 1.999957311703368e-05, "loss": 1.2007, "step": 738 }, { "epoch": 0.0988232147633057, "grad_norm": 1.5029864311218262, "learning_rate": 1.9999559672820173e-05, "loss": 1.2446, "step": 739 }, { "epoch": 0.09895694035838459, "grad_norm": 1.5523008108139038, "learning_rate": 1.9999546020175416e-05, "loss": 1.1736, "step": 740 }, { "epoch": 0.0990906659534635, "grad_norm": 1.36164128780365, "learning_rate": 1.9999532159099687e-05, "loss": 1.0314, "step": 741 }, { "epoch": 0.09922439154854239, "grad_norm": 1.4838365316390991, "learning_rate": 1.999951808959328e-05, "loss": 0.9192, "step": 742 }, { "epoch": 0.09935811714362129, "grad_norm": 1.3893494606018066, "learning_rate": 1.999950381165649e-05, "loss": 0.9618, "step": 743 }, { "epoch": 0.09949184273870018, "grad_norm": 1.5209376811981201, "learning_rate": 1.9999489325289607e-05, "loss": 1.169, "step": 744 }, { "epoch": 0.09962556833377909, "grad_norm": 1.4612441062927246, "learning_rate": 1.999947463049294e-05, "loss": 1.143, "step": 745 }, { "epoch": 0.09975929392885799, "grad_norm": 1.526669979095459, "learning_rate": 1.9999459727266793e-05, "loss": 1.0212, "step": 746 }, { "epoch": 0.09989301952393688, "grad_norm": 1.577208161354065, "learning_rate": 1.9999444615611475e-05, "loss": 1.0225, "step": 747 }, { "epoch": 0.10002674511901578, "grad_norm": 1.5396769046783447, "learning_rate": 1.9999429295527305e-05, "loss": 1.1321, "step": 748 }, { "epoch": 0.10016047071409467, "grad_norm": 1.604958415031433, "learning_rate": 1.9999413767014598e-05, "loss": 1.0807, "step": 749 }, { "epoch": 0.10029419630917358, "grad_norm": 1.6191061735153198, "learning_rate": 1.999939803007368e-05, "loss": 1.1931, "step": 750 }, { "epoch": 0.10042792190425247, "grad_norm": 1.5236402750015259, "learning_rate": 1.9999382084704875e-05, "loss": 1.1599, "step": 751 }, { "epoch": 0.10056164749933137, "grad_norm": 1.6902754306793213, "learning_rate": 1.9999365930908523e-05, "loss": 1.2153, "step": 752 }, { "epoch": 0.10069537309441028, "grad_norm": 1.643006443977356, "learning_rate": 1.9999349568684955e-05, "loss": 1.1013, "step": 753 }, { "epoch": 0.10082909868948917, "grad_norm": 1.402974009513855, "learning_rate": 1.9999332998034515e-05, "loss": 0.9351, "step": 754 }, { "epoch": 0.10096282428456807, "grad_norm": 1.4250565767288208, "learning_rate": 1.9999316218957543e-05, "loss": 1.0319, "step": 755 }, { "epoch": 0.10109654987964696, "grad_norm": 1.546347975730896, "learning_rate": 1.9999299231454396e-05, "loss": 1.1464, "step": 756 }, { "epoch": 0.10123027547472586, "grad_norm": 1.5152294635772705, "learning_rate": 1.9999282035525423e-05, "loss": 1.1636, "step": 757 }, { "epoch": 0.10136400106980475, "grad_norm": 1.4894416332244873, "learning_rate": 1.9999264631170987e-05, "loss": 1.2642, "step": 758 }, { "epoch": 0.10149772666488366, "grad_norm": 1.3535887002944946, "learning_rate": 1.999924701839145e-05, "loss": 1.0178, "step": 759 }, { "epoch": 0.10163145225996256, "grad_norm": 1.382920503616333, "learning_rate": 1.9999229197187172e-05, "loss": 1.09, "step": 760 }, { "epoch": 0.10176517785504145, "grad_norm": 1.5154727697372437, "learning_rate": 1.999921116755853e-05, "loss": 1.0927, "step": 761 }, { "epoch": 0.10189890345012036, "grad_norm": 1.5097575187683105, "learning_rate": 1.99991929295059e-05, "loss": 0.9447, "step": 762 }, { "epoch": 0.10203262904519925, "grad_norm": 1.6267471313476562, "learning_rate": 1.9999174483029665e-05, "loss": 0.9786, "step": 763 }, { "epoch": 0.10216635464027815, "grad_norm": 1.4890960454940796, "learning_rate": 1.99991558281302e-05, "loss": 1.1095, "step": 764 }, { "epoch": 0.10230008023535704, "grad_norm": 1.5385849475860596, "learning_rate": 1.9999136964807903e-05, "loss": 1.0284, "step": 765 }, { "epoch": 0.10243380583043595, "grad_norm": 1.5322134494781494, "learning_rate": 1.9999117893063163e-05, "loss": 1.0762, "step": 766 }, { "epoch": 0.10256753142551485, "grad_norm": 1.4597574472427368, "learning_rate": 1.9999098612896382e-05, "loss": 1.0041, "step": 767 }, { "epoch": 0.10270125702059374, "grad_norm": 1.4435315132141113, "learning_rate": 1.999907912430796e-05, "loss": 1.0628, "step": 768 }, { "epoch": 0.10283498261567264, "grad_norm": 1.3706847429275513, "learning_rate": 1.9999059427298294e-05, "loss": 1.0832, "step": 769 }, { "epoch": 0.10296870821075153, "grad_norm": 1.4939472675323486, "learning_rate": 1.999903952186781e-05, "loss": 1.0332, "step": 770 }, { "epoch": 0.10310243380583044, "grad_norm": 1.5139802694320679, "learning_rate": 1.9999019408016907e-05, "loss": 1.1147, "step": 771 }, { "epoch": 0.10323615940090933, "grad_norm": 1.4168496131896973, "learning_rate": 1.999899908574602e-05, "loss": 1.0616, "step": 772 }, { "epoch": 0.10336988499598823, "grad_norm": 1.3104252815246582, "learning_rate": 1.999897855505556e-05, "loss": 0.9378, "step": 773 }, { "epoch": 0.10350361059106714, "grad_norm": 1.4521965980529785, "learning_rate": 1.9998957815945962e-05, "loss": 1.0704, "step": 774 }, { "epoch": 0.10363733618614603, "grad_norm": 1.3755027055740356, "learning_rate": 1.999893686841766e-05, "loss": 1.0257, "step": 775 }, { "epoch": 0.10377106178122493, "grad_norm": 1.381283164024353, "learning_rate": 1.9998915712471084e-05, "loss": 1.0789, "step": 776 }, { "epoch": 0.10390478737630382, "grad_norm": 1.4885414838790894, "learning_rate": 1.9998894348106678e-05, "loss": 1.1055, "step": 777 }, { "epoch": 0.10403851297138272, "grad_norm": 1.4820473194122314, "learning_rate": 1.9998872775324886e-05, "loss": 1.1266, "step": 778 }, { "epoch": 0.10417223856646161, "grad_norm": 1.5443062782287598, "learning_rate": 1.9998850994126157e-05, "loss": 0.9858, "step": 779 }, { "epoch": 0.10430596416154052, "grad_norm": 1.5163604021072388, "learning_rate": 1.999882900451095e-05, "loss": 1.1787, "step": 780 }, { "epoch": 0.10443968975661942, "grad_norm": 1.6354924440383911, "learning_rate": 1.999880680647972e-05, "loss": 1.1238, "step": 781 }, { "epoch": 0.10457341535169831, "grad_norm": 1.6247735023498535, "learning_rate": 1.9998784400032928e-05, "loss": 1.1128, "step": 782 }, { "epoch": 0.10470714094677722, "grad_norm": 1.55426025390625, "learning_rate": 1.9998761785171047e-05, "loss": 1.1489, "step": 783 }, { "epoch": 0.1048408665418561, "grad_norm": 1.4675824642181396, "learning_rate": 1.9998738961894538e-05, "loss": 1.03, "step": 784 }, { "epoch": 0.10497459213693501, "grad_norm": 1.5163553953170776, "learning_rate": 1.999871593020389e-05, "loss": 1.1458, "step": 785 }, { "epoch": 0.1051083177320139, "grad_norm": 1.535949468612671, "learning_rate": 1.9998692690099572e-05, "loss": 1.1773, "step": 786 }, { "epoch": 0.1052420433270928, "grad_norm": 1.4176838397979736, "learning_rate": 1.9998669241582074e-05, "loss": 1.1097, "step": 787 }, { "epoch": 0.10537576892217171, "grad_norm": 1.441300868988037, "learning_rate": 1.9998645584651883e-05, "loss": 1.1189, "step": 788 }, { "epoch": 0.1055094945172506, "grad_norm": 1.4307371377944946, "learning_rate": 1.9998621719309496e-05, "loss": 1.0646, "step": 789 }, { "epoch": 0.1056432201123295, "grad_norm": 1.4143755435943604, "learning_rate": 1.99985976455554e-05, "loss": 1.061, "step": 790 }, { "epoch": 0.10577694570740839, "grad_norm": 1.4653947353363037, "learning_rate": 1.999857336339011e-05, "loss": 1.1117, "step": 791 }, { "epoch": 0.1059106713024873, "grad_norm": 1.4102991819381714, "learning_rate": 1.999854887281412e-05, "loss": 1.1248, "step": 792 }, { "epoch": 0.1060443968975662, "grad_norm": 1.5647644996643066, "learning_rate": 1.999852417382795e-05, "loss": 1.2091, "step": 793 }, { "epoch": 0.10617812249264509, "grad_norm": 1.3958559036254883, "learning_rate": 1.999849926643211e-05, "loss": 1.1335, "step": 794 }, { "epoch": 0.106311848087724, "grad_norm": 1.5958595275878906, "learning_rate": 1.9998474150627124e-05, "loss": 1.2062, "step": 795 }, { "epoch": 0.10644557368280289, "grad_norm": 1.40238618850708, "learning_rate": 1.9998448826413505e-05, "loss": 1.1176, "step": 796 }, { "epoch": 0.10657929927788179, "grad_norm": 1.4229419231414795, "learning_rate": 1.9998423293791793e-05, "loss": 1.0412, "step": 797 }, { "epoch": 0.10671302487296068, "grad_norm": 1.4735041856765747, "learning_rate": 1.999839755276251e-05, "loss": 1.187, "step": 798 }, { "epoch": 0.10684675046803958, "grad_norm": 1.4256937503814697, "learning_rate": 1.9998371603326202e-05, "loss": 1.0194, "step": 799 }, { "epoch": 0.10698047606311849, "grad_norm": 1.4449872970581055, "learning_rate": 1.9998345445483403e-05, "loss": 1.0154, "step": 800 }, { "epoch": 0.10711420165819738, "grad_norm": 1.5340802669525146, "learning_rate": 1.9998319079234664e-05, "loss": 1.0999, "step": 801 }, { "epoch": 0.10724792725327628, "grad_norm": 1.4936314821243286, "learning_rate": 1.9998292504580528e-05, "loss": 1.019, "step": 802 }, { "epoch": 0.10738165284835517, "grad_norm": 1.337742805480957, "learning_rate": 1.9998265721521552e-05, "loss": 0.9783, "step": 803 }, { "epoch": 0.10751537844343408, "grad_norm": 1.3495757579803467, "learning_rate": 1.99982387300583e-05, "loss": 1.0636, "step": 804 }, { "epoch": 0.10764910403851297, "grad_norm": 1.4058362245559692, "learning_rate": 1.999821153019132e-05, "loss": 1.0722, "step": 805 }, { "epoch": 0.10778282963359187, "grad_norm": 1.4431757926940918, "learning_rate": 1.999818412192119e-05, "loss": 1.1085, "step": 806 }, { "epoch": 0.10791655522867077, "grad_norm": 1.6855138540267944, "learning_rate": 1.9998156505248483e-05, "loss": 1.1215, "step": 807 }, { "epoch": 0.10805028082374966, "grad_norm": 1.6916824579238892, "learning_rate": 1.999812868017377e-05, "loss": 1.1433, "step": 808 }, { "epoch": 0.10818400641882857, "grad_norm": 1.5187382698059082, "learning_rate": 1.999810064669763e-05, "loss": 1.1883, "step": 809 }, { "epoch": 0.10831773201390746, "grad_norm": 1.3482873439788818, "learning_rate": 1.9998072404820648e-05, "loss": 1.1116, "step": 810 }, { "epoch": 0.10845145760898636, "grad_norm": 1.4767537117004395, "learning_rate": 1.999804395454342e-05, "loss": 1.0974, "step": 811 }, { "epoch": 0.10858518320406525, "grad_norm": 1.5022600889205933, "learning_rate": 1.9998015295866526e-05, "loss": 1.07, "step": 812 }, { "epoch": 0.10871890879914416, "grad_norm": 1.4016488790512085, "learning_rate": 1.9997986428790574e-05, "loss": 1.1137, "step": 813 }, { "epoch": 0.10885263439422306, "grad_norm": 1.425912618637085, "learning_rate": 1.999795735331616e-05, "loss": 1.0245, "step": 814 }, { "epoch": 0.10898635998930195, "grad_norm": 1.4399226903915405, "learning_rate": 1.9997928069443895e-05, "loss": 1.0498, "step": 815 }, { "epoch": 0.10912008558438085, "grad_norm": 1.421446681022644, "learning_rate": 1.9997898577174384e-05, "loss": 1.0426, "step": 816 }, { "epoch": 0.10925381117945974, "grad_norm": 1.983519434928894, "learning_rate": 1.9997868876508243e-05, "loss": 1.0427, "step": 817 }, { "epoch": 0.10938753677453865, "grad_norm": 1.4454611539840698, "learning_rate": 1.999783896744609e-05, "loss": 1.1261, "step": 818 }, { "epoch": 0.10952126236961754, "grad_norm": 1.501387119293213, "learning_rate": 1.9997808849988556e-05, "loss": 1.1553, "step": 819 }, { "epoch": 0.10965498796469644, "grad_norm": 1.4224389791488647, "learning_rate": 1.9997778524136263e-05, "loss": 1.0225, "step": 820 }, { "epoch": 0.10978871355977535, "grad_norm": 1.3407557010650635, "learning_rate": 1.9997747989889843e-05, "loss": 1.1544, "step": 821 }, { "epoch": 0.10992243915485424, "grad_norm": 1.5004007816314697, "learning_rate": 1.999771724724993e-05, "loss": 1.0672, "step": 822 }, { "epoch": 0.11005616474993314, "grad_norm": 1.4925388097763062, "learning_rate": 1.999768629621717e-05, "loss": 1.2413, "step": 823 }, { "epoch": 0.11018989034501203, "grad_norm": 1.3069093227386475, "learning_rate": 1.9997655136792206e-05, "loss": 0.9717, "step": 824 }, { "epoch": 0.11032361594009094, "grad_norm": 1.2697566747665405, "learning_rate": 1.9997623768975686e-05, "loss": 0.9992, "step": 825 }, { "epoch": 0.11045734153516983, "grad_norm": 1.4102569818496704, "learning_rate": 1.9997592192768268e-05, "loss": 1.118, "step": 826 }, { "epoch": 0.11059106713024873, "grad_norm": 1.3591125011444092, "learning_rate": 1.9997560408170605e-05, "loss": 1.0641, "step": 827 }, { "epoch": 0.11072479272532763, "grad_norm": 1.351444959640503, "learning_rate": 1.9997528415183363e-05, "loss": 1.0296, "step": 828 }, { "epoch": 0.11085851832040652, "grad_norm": 1.449341893196106, "learning_rate": 1.9997496213807208e-05, "loss": 1.2782, "step": 829 }, { "epoch": 0.11099224391548543, "grad_norm": 1.418331265449524, "learning_rate": 1.9997463804042808e-05, "loss": 1.0154, "step": 830 }, { "epoch": 0.11112596951056432, "grad_norm": 1.5046807527542114, "learning_rate": 1.9997431185890844e-05, "loss": 1.0964, "step": 831 }, { "epoch": 0.11125969510564322, "grad_norm": 1.3268418312072754, "learning_rate": 1.9997398359351994e-05, "loss": 1.0001, "step": 832 }, { "epoch": 0.11139342070072211, "grad_norm": 1.4519188404083252, "learning_rate": 1.999736532442694e-05, "loss": 1.0742, "step": 833 }, { "epoch": 0.11152714629580102, "grad_norm": 1.3644951581954956, "learning_rate": 1.9997332081116374e-05, "loss": 1.1354, "step": 834 }, { "epoch": 0.11166087189087992, "grad_norm": 1.4315654039382935, "learning_rate": 1.9997298629420988e-05, "loss": 0.9769, "step": 835 }, { "epoch": 0.11179459748595881, "grad_norm": 1.4563404321670532, "learning_rate": 1.9997264969341476e-05, "loss": 1.1633, "step": 836 }, { "epoch": 0.11192832308103771, "grad_norm": 1.4645648002624512, "learning_rate": 1.999723110087854e-05, "loss": 1.1675, "step": 837 }, { "epoch": 0.1120620486761166, "grad_norm": 1.403998851776123, "learning_rate": 1.9997197024032894e-05, "loss": 1.1952, "step": 838 }, { "epoch": 0.11219577427119551, "grad_norm": 1.3912056684494019, "learning_rate": 1.999716273880524e-05, "loss": 1.0715, "step": 839 }, { "epoch": 0.1123294998662744, "grad_norm": 1.5652885437011719, "learning_rate": 1.9997128245196294e-05, "loss": 1.2214, "step": 840 }, { "epoch": 0.1124632254613533, "grad_norm": 1.3047504425048828, "learning_rate": 1.9997093543206775e-05, "loss": 1.0069, "step": 841 }, { "epoch": 0.1125969510564322, "grad_norm": 1.445512294769287, "learning_rate": 1.9997058632837407e-05, "loss": 1.1069, "step": 842 }, { "epoch": 0.1127306766515111, "grad_norm": 1.4987990856170654, "learning_rate": 1.999702351408892e-05, "loss": 1.085, "step": 843 }, { "epoch": 0.11286440224659, "grad_norm": 1.6998556852340698, "learning_rate": 1.9996988186962044e-05, "loss": 1.0865, "step": 844 }, { "epoch": 0.11299812784166889, "grad_norm": 1.4945526123046875, "learning_rate": 1.9996952651457513e-05, "loss": 1.13, "step": 845 }, { "epoch": 0.1131318534367478, "grad_norm": 1.3498841524124146, "learning_rate": 1.9996916907576073e-05, "loss": 1.1502, "step": 846 }, { "epoch": 0.11326557903182669, "grad_norm": 1.3900498151779175, "learning_rate": 1.9996880955318466e-05, "loss": 1.0802, "step": 847 }, { "epoch": 0.11339930462690559, "grad_norm": 1.42745041847229, "learning_rate": 1.999684479468544e-05, "loss": 1.0422, "step": 848 }, { "epoch": 0.1135330302219845, "grad_norm": 1.417966604232788, "learning_rate": 1.999680842567775e-05, "loss": 1.1077, "step": 849 }, { "epoch": 0.11366675581706338, "grad_norm": 1.7110809087753296, "learning_rate": 1.9996771848296153e-05, "loss": 1.1402, "step": 850 }, { "epoch": 0.11380048141214229, "grad_norm": 1.3492650985717773, "learning_rate": 1.9996735062541413e-05, "loss": 1.0081, "step": 851 }, { "epoch": 0.11393420700722118, "grad_norm": 1.4812312126159668, "learning_rate": 1.99966980684143e-05, "loss": 1.1033, "step": 852 }, { "epoch": 0.11406793260230008, "grad_norm": 1.4075268507003784, "learning_rate": 1.999666086591558e-05, "loss": 1.0745, "step": 853 }, { "epoch": 0.11420165819737897, "grad_norm": 1.440185308456421, "learning_rate": 1.999662345504603e-05, "loss": 1.0596, "step": 854 }, { "epoch": 0.11433538379245788, "grad_norm": 1.3383756875991821, "learning_rate": 1.9996585835806427e-05, "loss": 1.0198, "step": 855 }, { "epoch": 0.11446910938753678, "grad_norm": 1.2926931381225586, "learning_rate": 1.999654800819756e-05, "loss": 1.1465, "step": 856 }, { "epoch": 0.11460283498261567, "grad_norm": 1.482844591140747, "learning_rate": 1.9996509972220218e-05, "loss": 1.1325, "step": 857 }, { "epoch": 0.11473656057769457, "grad_norm": 1.3716925382614136, "learning_rate": 1.9996471727875186e-05, "loss": 1.0085, "step": 858 }, { "epoch": 0.11487028617277346, "grad_norm": 1.5064419507980347, "learning_rate": 1.999643327516327e-05, "loss": 1.1449, "step": 859 }, { "epoch": 0.11500401176785237, "grad_norm": 1.3949836492538452, "learning_rate": 1.9996394614085267e-05, "loss": 1.0187, "step": 860 }, { "epoch": 0.11513773736293126, "grad_norm": 1.4905332326889038, "learning_rate": 1.9996355744641986e-05, "loss": 1.109, "step": 861 }, { "epoch": 0.11527146295801016, "grad_norm": 1.2240432500839233, "learning_rate": 1.9996316666834234e-05, "loss": 0.9338, "step": 862 }, { "epoch": 0.11540518855308907, "grad_norm": 1.4052257537841797, "learning_rate": 1.9996277380662824e-05, "loss": 1.1734, "step": 863 }, { "epoch": 0.11553891414816796, "grad_norm": 1.77515709400177, "learning_rate": 1.999623788612858e-05, "loss": 1.1584, "step": 864 }, { "epoch": 0.11567263974324686, "grad_norm": 1.2767348289489746, "learning_rate": 1.999619818323232e-05, "loss": 1.0446, "step": 865 }, { "epoch": 0.11580636533832575, "grad_norm": 1.3639624118804932, "learning_rate": 1.9996158271974875e-05, "loss": 1.1402, "step": 866 }, { "epoch": 0.11594009093340465, "grad_norm": 1.4850925207138062, "learning_rate": 1.999611815235708e-05, "loss": 1.0932, "step": 867 }, { "epoch": 0.11607381652848354, "grad_norm": 1.4100223779678345, "learning_rate": 1.999607782437976e-05, "loss": 1.0688, "step": 868 }, { "epoch": 0.11620754212356245, "grad_norm": 1.4685066938400269, "learning_rate": 1.999603728804377e-05, "loss": 1.1739, "step": 869 }, { "epoch": 0.11634126771864135, "grad_norm": 1.3807005882263184, "learning_rate": 1.9995996543349944e-05, "loss": 0.9948, "step": 870 }, { "epoch": 0.11647499331372024, "grad_norm": 1.6990382671356201, "learning_rate": 1.9995955590299134e-05, "loss": 1.0686, "step": 871 }, { "epoch": 0.11660871890879915, "grad_norm": 1.3167297840118408, "learning_rate": 1.9995914428892196e-05, "loss": 0.9903, "step": 872 }, { "epoch": 0.11674244450387804, "grad_norm": 1.4475982189178467, "learning_rate": 1.999587305912999e-05, "loss": 1.0136, "step": 873 }, { "epoch": 0.11687617009895694, "grad_norm": 1.3942874670028687, "learning_rate": 1.9995831481013376e-05, "loss": 1.1011, "step": 874 }, { "epoch": 0.11700989569403585, "grad_norm": 1.3756638765335083, "learning_rate": 1.9995789694543214e-05, "loss": 1.0536, "step": 875 }, { "epoch": 0.11714362128911474, "grad_norm": 1.4249614477157593, "learning_rate": 1.9995747699720383e-05, "loss": 0.9772, "step": 876 }, { "epoch": 0.11727734688419364, "grad_norm": 1.4275487661361694, "learning_rate": 1.9995705496545756e-05, "loss": 1.0343, "step": 877 }, { "epoch": 0.11741107247927253, "grad_norm": 1.460584044456482, "learning_rate": 1.9995663085020215e-05, "loss": 1.0276, "step": 878 }, { "epoch": 0.11754479807435143, "grad_norm": 1.469758152961731, "learning_rate": 1.999562046514464e-05, "loss": 1.0732, "step": 879 }, { "epoch": 0.11767852366943032, "grad_norm": 1.502478837966919, "learning_rate": 1.9995577636919922e-05, "loss": 1.0324, "step": 880 }, { "epoch": 0.11781224926450923, "grad_norm": 1.3687031269073486, "learning_rate": 1.999553460034695e-05, "loss": 1.0553, "step": 881 }, { "epoch": 0.11794597485958813, "grad_norm": 1.5284054279327393, "learning_rate": 1.9995491355426626e-05, "loss": 0.9838, "step": 882 }, { "epoch": 0.11807970045466702, "grad_norm": 1.4152560234069824, "learning_rate": 1.999544790215985e-05, "loss": 1.0605, "step": 883 }, { "epoch": 0.11821342604974593, "grad_norm": 1.4761606454849243, "learning_rate": 1.9995404240547527e-05, "loss": 1.0713, "step": 884 }, { "epoch": 0.11834715164482482, "grad_norm": 1.4720832109451294, "learning_rate": 1.9995360370590568e-05, "loss": 1.0868, "step": 885 }, { "epoch": 0.11848087723990372, "grad_norm": 1.4426681995391846, "learning_rate": 1.9995316292289883e-05, "loss": 1.17, "step": 886 }, { "epoch": 0.11861460283498261, "grad_norm": 1.3690184354782104, "learning_rate": 1.9995272005646398e-05, "loss": 0.9314, "step": 887 }, { "epoch": 0.11874832843006151, "grad_norm": 1.597335696220398, "learning_rate": 1.999522751066103e-05, "loss": 1.0875, "step": 888 }, { "epoch": 0.11888205402514042, "grad_norm": 1.3291088342666626, "learning_rate": 1.999518280733471e-05, "loss": 1.0475, "step": 889 }, { "epoch": 0.11901577962021931, "grad_norm": 1.4983829259872437, "learning_rate": 1.999513789566837e-05, "loss": 1.1089, "step": 890 }, { "epoch": 0.11914950521529821, "grad_norm": 1.419007658958435, "learning_rate": 1.9995092775662943e-05, "loss": 1.1114, "step": 891 }, { "epoch": 0.1192832308103771, "grad_norm": 1.4708096981048584, "learning_rate": 1.9995047447319373e-05, "loss": 1.1261, "step": 892 }, { "epoch": 0.119416956405456, "grad_norm": 1.4634017944335938, "learning_rate": 1.99950019106386e-05, "loss": 1.0147, "step": 893 }, { "epoch": 0.1195506820005349, "grad_norm": 1.4309417009353638, "learning_rate": 1.999495616562158e-05, "loss": 1.0773, "step": 894 }, { "epoch": 0.1196844075956138, "grad_norm": 1.559757947921753, "learning_rate": 1.999491021226926e-05, "loss": 1.0804, "step": 895 }, { "epoch": 0.1198181331906927, "grad_norm": 1.3439881801605225, "learning_rate": 1.9994864050582604e-05, "loss": 0.9283, "step": 896 }, { "epoch": 0.1199518587857716, "grad_norm": 1.4707006216049194, "learning_rate": 1.9994817680562567e-05, "loss": 1.1662, "step": 897 }, { "epoch": 0.1200855843808505, "grad_norm": 1.4414838552474976, "learning_rate": 1.9994771102210122e-05, "loss": 1.1078, "step": 898 }, { "epoch": 0.12021930997592939, "grad_norm": 1.4473570585250854, "learning_rate": 1.9994724315526237e-05, "loss": 1.1936, "step": 899 }, { "epoch": 0.1203530355710083, "grad_norm": 1.4292080402374268, "learning_rate": 1.9994677320511887e-05, "loss": 0.9882, "step": 900 }, { "epoch": 0.12048676116608718, "grad_norm": 1.4753037691116333, "learning_rate": 1.9994630117168054e-05, "loss": 1.0497, "step": 901 }, { "epoch": 0.12062048676116609, "grad_norm": 1.4018559455871582, "learning_rate": 1.9994582705495718e-05, "loss": 1.0766, "step": 902 }, { "epoch": 0.12075421235624499, "grad_norm": 1.4247944355010986, "learning_rate": 1.999453508549587e-05, "loss": 1.0656, "step": 903 }, { "epoch": 0.12088793795132388, "grad_norm": 1.4296343326568604, "learning_rate": 1.99944872571695e-05, "loss": 1.0928, "step": 904 }, { "epoch": 0.12102166354640279, "grad_norm": 1.4088895320892334, "learning_rate": 1.999443922051761e-05, "loss": 1.1054, "step": 905 }, { "epoch": 0.12115538914148168, "grad_norm": 1.5685827732086182, "learning_rate": 1.9994390975541197e-05, "loss": 0.9558, "step": 906 }, { "epoch": 0.12128911473656058, "grad_norm": 1.3884845972061157, "learning_rate": 1.9994342522241265e-05, "loss": 0.9925, "step": 907 }, { "epoch": 0.12142284033163947, "grad_norm": 1.2820035219192505, "learning_rate": 1.999429386061883e-05, "loss": 1.1495, "step": 908 }, { "epoch": 0.12155656592671837, "grad_norm": 1.4173537492752075, "learning_rate": 1.99942449906749e-05, "loss": 1.0623, "step": 909 }, { "epoch": 0.12169029152179728, "grad_norm": 1.3517982959747314, "learning_rate": 1.99941959124105e-05, "loss": 1.0155, "step": 910 }, { "epoch": 0.12182401711687617, "grad_norm": 1.5390291213989258, "learning_rate": 1.999414662582665e-05, "loss": 1.2597, "step": 911 }, { "epoch": 0.12195774271195507, "grad_norm": 1.467222809791565, "learning_rate": 1.9994097130924376e-05, "loss": 1.1345, "step": 912 }, { "epoch": 0.12209146830703396, "grad_norm": 1.4707090854644775, "learning_rate": 1.9994047427704707e-05, "loss": 1.203, "step": 913 }, { "epoch": 0.12222519390211287, "grad_norm": 1.3751864433288574, "learning_rate": 1.999399751616869e-05, "loss": 1.0646, "step": 914 }, { "epoch": 0.12235891949719176, "grad_norm": 1.509385347366333, "learning_rate": 1.999394739631735e-05, "loss": 1.0634, "step": 915 }, { "epoch": 0.12249264509227066, "grad_norm": 1.5277599096298218, "learning_rate": 1.9993897068151743e-05, "loss": 1.1014, "step": 916 }, { "epoch": 0.12262637068734956, "grad_norm": 1.3040642738342285, "learning_rate": 1.9993846531672915e-05, "loss": 1.0341, "step": 917 }, { "epoch": 0.12276009628242845, "grad_norm": 1.4020966291427612, "learning_rate": 1.999379578688192e-05, "loss": 1.1373, "step": 918 }, { "epoch": 0.12289382187750736, "grad_norm": 1.3358014822006226, "learning_rate": 1.9993744833779814e-05, "loss": 1.0387, "step": 919 }, { "epoch": 0.12302754747258625, "grad_norm": 1.3639771938323975, "learning_rate": 1.9993693672367658e-05, "loss": 1.0509, "step": 920 }, { "epoch": 0.12316127306766515, "grad_norm": 1.5706214904785156, "learning_rate": 1.9993642302646525e-05, "loss": 1.1363, "step": 921 }, { "epoch": 0.12329499866274404, "grad_norm": 1.5038011074066162, "learning_rate": 1.9993590724617476e-05, "loss": 0.9393, "step": 922 }, { "epoch": 0.12342872425782295, "grad_norm": 1.3753327131271362, "learning_rate": 1.9993538938281592e-05, "loss": 1.1533, "step": 923 }, { "epoch": 0.12356244985290185, "grad_norm": 1.3972834348678589, "learning_rate": 1.999348694363995e-05, "loss": 0.9897, "step": 924 }, { "epoch": 0.12369617544798074, "grad_norm": 1.3822537660598755, "learning_rate": 1.9993434740693643e-05, "loss": 1.0254, "step": 925 }, { "epoch": 0.12382990104305965, "grad_norm": 1.359426736831665, "learning_rate": 1.9993382329443746e-05, "loss": 1.0146, "step": 926 }, { "epoch": 0.12396362663813854, "grad_norm": 1.6056737899780273, "learning_rate": 1.9993329709891357e-05, "loss": 1.1209, "step": 927 }, { "epoch": 0.12409735223321744, "grad_norm": 1.4256788492202759, "learning_rate": 1.9993276882037575e-05, "loss": 1.1071, "step": 928 }, { "epoch": 0.12423107782829633, "grad_norm": 1.339333176612854, "learning_rate": 1.9993223845883496e-05, "loss": 1.0284, "step": 929 }, { "epoch": 0.12436480342337523, "grad_norm": 1.5683083534240723, "learning_rate": 1.9993170601430233e-05, "loss": 1.2294, "step": 930 }, { "epoch": 0.12449852901845414, "grad_norm": 1.4918873310089111, "learning_rate": 1.9993117148678887e-05, "loss": 1.077, "step": 931 }, { "epoch": 0.12463225461353303, "grad_norm": 1.4829609394073486, "learning_rate": 1.9993063487630575e-05, "loss": 1.1867, "step": 932 }, { "epoch": 0.12476598020861193, "grad_norm": 1.411744475364685, "learning_rate": 1.9993009618286422e-05, "loss": 1.1852, "step": 933 }, { "epoch": 0.12489970580369082, "grad_norm": 1.5613346099853516, "learning_rate": 1.9992955540647544e-05, "loss": 1.1824, "step": 934 }, { "epoch": 0.1250334313987697, "grad_norm": 1.627138614654541, "learning_rate": 1.9992901254715068e-05, "loss": 1.1637, "step": 935 }, { "epoch": 0.12516715699384862, "grad_norm": 1.5002950429916382, "learning_rate": 1.999284676049013e-05, "loss": 1.017, "step": 936 }, { "epoch": 0.12530088258892752, "grad_norm": 1.4754056930541992, "learning_rate": 1.999279205797386e-05, "loss": 1.1418, "step": 937 }, { "epoch": 0.12543460818400642, "grad_norm": 1.3784698247909546, "learning_rate": 1.99927371471674e-05, "loss": 1.0408, "step": 938 }, { "epoch": 0.12556833377908533, "grad_norm": 1.530531883239746, "learning_rate": 1.9992682028071905e-05, "loss": 1.0358, "step": 939 }, { "epoch": 0.1257020593741642, "grad_norm": 1.526116132736206, "learning_rate": 1.999262670068851e-05, "loss": 1.1353, "step": 940 }, { "epoch": 0.1258357849692431, "grad_norm": 1.275760293006897, "learning_rate": 1.9992571165018372e-05, "loss": 0.9365, "step": 941 }, { "epoch": 0.125969510564322, "grad_norm": 1.588582992553711, "learning_rate": 1.999251542106265e-05, "loss": 1.1299, "step": 942 }, { "epoch": 0.12610323615940092, "grad_norm": 1.5529191493988037, "learning_rate": 1.9992459468822507e-05, "loss": 1.0605, "step": 943 }, { "epoch": 0.12623696175447982, "grad_norm": 1.2800893783569336, "learning_rate": 1.9992403308299112e-05, "loss": 0.9714, "step": 944 }, { "epoch": 0.1263706873495587, "grad_norm": 1.4953656196594238, "learning_rate": 1.9992346939493625e-05, "loss": 1.1956, "step": 945 }, { "epoch": 0.1265044129446376, "grad_norm": 1.4463863372802734, "learning_rate": 1.9992290362407232e-05, "loss": 1.1637, "step": 946 }, { "epoch": 0.1266381385397165, "grad_norm": 1.3953571319580078, "learning_rate": 1.9992233577041106e-05, "loss": 1.0695, "step": 947 }, { "epoch": 0.1267718641347954, "grad_norm": 1.3301591873168945, "learning_rate": 1.9992176583396432e-05, "loss": 1.1116, "step": 948 }, { "epoch": 0.12690558972987429, "grad_norm": 1.381116271018982, "learning_rate": 1.9992119381474403e-05, "loss": 1.0297, "step": 949 }, { "epoch": 0.1270393153249532, "grad_norm": 1.5181941986083984, "learning_rate": 1.9992061971276202e-05, "loss": 1.0779, "step": 950 }, { "epoch": 0.1271730409200321, "grad_norm": 1.382551670074463, "learning_rate": 1.999200435280303e-05, "loss": 1.0969, "step": 951 }, { "epoch": 0.127306766515111, "grad_norm": 1.369179368019104, "learning_rate": 1.9991946526056093e-05, "loss": 1.0565, "step": 952 }, { "epoch": 0.1274404921101899, "grad_norm": 1.6462204456329346, "learning_rate": 1.9991888491036588e-05, "loss": 1.1654, "step": 953 }, { "epoch": 0.12757421770526878, "grad_norm": 1.305835247039795, "learning_rate": 1.9991830247745732e-05, "loss": 1.0273, "step": 954 }, { "epoch": 0.12770794330034768, "grad_norm": 1.3549668788909912, "learning_rate": 1.9991771796184734e-05, "loss": 1.0239, "step": 955 }, { "epoch": 0.12784166889542659, "grad_norm": 1.2129355669021606, "learning_rate": 1.9991713136354814e-05, "loss": 0.9812, "step": 956 }, { "epoch": 0.1279753944905055, "grad_norm": 1.4371880292892456, "learning_rate": 1.9991654268257196e-05, "loss": 1.1405, "step": 957 }, { "epoch": 0.1281091200855844, "grad_norm": 1.271898865699768, "learning_rate": 1.99915951918931e-05, "loss": 1.0416, "step": 958 }, { "epoch": 0.12824284568066327, "grad_norm": 1.3246506452560425, "learning_rate": 1.9991535907263772e-05, "loss": 1.0278, "step": 959 }, { "epoch": 0.12837657127574217, "grad_norm": 1.3879908323287964, "learning_rate": 1.9991476414370433e-05, "loss": 1.0976, "step": 960 }, { "epoch": 0.12851029687082108, "grad_norm": 1.2828664779663086, "learning_rate": 1.9991416713214332e-05, "loss": 1.0794, "step": 961 }, { "epoch": 0.12864402246589998, "grad_norm": 1.380135416984558, "learning_rate": 1.999135680379671e-05, "loss": 1.0182, "step": 962 }, { "epoch": 0.12877774806097886, "grad_norm": 1.238260269165039, "learning_rate": 1.9991296686118814e-05, "loss": 0.9956, "step": 963 }, { "epoch": 0.12891147365605776, "grad_norm": 1.3175809383392334, "learning_rate": 1.9991236360181897e-05, "loss": 0.9859, "step": 964 }, { "epoch": 0.12904519925113667, "grad_norm": 1.467980980873108, "learning_rate": 1.9991175825987222e-05, "loss": 1.1674, "step": 965 }, { "epoch": 0.12917892484621557, "grad_norm": 1.305045247077942, "learning_rate": 1.999111508353605e-05, "loss": 0.9751, "step": 966 }, { "epoch": 0.12931265044129447, "grad_norm": 1.5114868879318237, "learning_rate": 1.999105413282964e-05, "loss": 1.1466, "step": 967 }, { "epoch": 0.12944637603637335, "grad_norm": 1.3352289199829102, "learning_rate": 1.999099297386927e-05, "loss": 1.1406, "step": 968 }, { "epoch": 0.12958010163145225, "grad_norm": 1.418023943901062, "learning_rate": 1.9990931606656208e-05, "loss": 1.0463, "step": 969 }, { "epoch": 0.12971382722653116, "grad_norm": 1.431505560874939, "learning_rate": 1.999087003119174e-05, "loss": 1.0351, "step": 970 }, { "epoch": 0.12984755282161006, "grad_norm": 1.3251508474349976, "learning_rate": 1.9990808247477146e-05, "loss": 1.0285, "step": 971 }, { "epoch": 0.12998127841668897, "grad_norm": 1.2821189165115356, "learning_rate": 1.9990746255513717e-05, "loss": 1.1128, "step": 972 }, { "epoch": 0.13011500401176784, "grad_norm": 1.2176204919815063, "learning_rate": 1.9990684055302738e-05, "loss": 0.9462, "step": 973 }, { "epoch": 0.13024872960684675, "grad_norm": 1.3465044498443604, "learning_rate": 1.999062164684551e-05, "loss": 0.9825, "step": 974 }, { "epoch": 0.13038245520192565, "grad_norm": 1.4201785326004028, "learning_rate": 1.9990559030143337e-05, "loss": 0.9998, "step": 975 }, { "epoch": 0.13051618079700456, "grad_norm": 1.3675949573516846, "learning_rate": 1.999049620519752e-05, "loss": 1.047, "step": 976 }, { "epoch": 0.13064990639208343, "grad_norm": 1.5603874921798706, "learning_rate": 1.9990433172009367e-05, "loss": 1.0981, "step": 977 }, { "epoch": 0.13078363198716234, "grad_norm": 1.2853633165359497, "learning_rate": 1.9990369930580197e-05, "loss": 0.9648, "step": 978 }, { "epoch": 0.13091735758224124, "grad_norm": 1.3333139419555664, "learning_rate": 1.9990306480911325e-05, "loss": 1.177, "step": 979 }, { "epoch": 0.13105108317732014, "grad_norm": 1.3269702196121216, "learning_rate": 1.9990242823004075e-05, "loss": 1.112, "step": 980 }, { "epoch": 0.13118480877239905, "grad_norm": 1.4549920558929443, "learning_rate": 1.9990178956859768e-05, "loss": 0.9504, "step": 981 }, { "epoch": 0.13131853436747792, "grad_norm": 1.2439687252044678, "learning_rate": 1.9990114882479747e-05, "loss": 1.1438, "step": 982 }, { "epoch": 0.13145225996255683, "grad_norm": 1.4831445217132568, "learning_rate": 1.9990050599865335e-05, "loss": 1.1619, "step": 983 }, { "epoch": 0.13158598555763573, "grad_norm": 1.2838259935379028, "learning_rate": 1.9989986109017882e-05, "loss": 0.9752, "step": 984 }, { "epoch": 0.13171971115271464, "grad_norm": 1.3213512897491455, "learning_rate": 1.9989921409938726e-05, "loss": 0.9792, "step": 985 }, { "epoch": 0.13185343674779354, "grad_norm": 1.482013463973999, "learning_rate": 1.9989856502629218e-05, "loss": 1.0321, "step": 986 }, { "epoch": 0.13198716234287242, "grad_norm": 1.3989663124084473, "learning_rate": 1.9989791387090708e-05, "loss": 1.0959, "step": 987 }, { "epoch": 0.13212088793795132, "grad_norm": 1.4580987691879272, "learning_rate": 1.998972606332456e-05, "loss": 1.1436, "step": 988 }, { "epoch": 0.13225461353303022, "grad_norm": 1.4221844673156738, "learning_rate": 1.998966053133213e-05, "loss": 1.0731, "step": 989 }, { "epoch": 0.13238833912810913, "grad_norm": 1.3538439273834229, "learning_rate": 1.9989594791114783e-05, "loss": 1.0743, "step": 990 }, { "epoch": 0.132522064723188, "grad_norm": 1.453150749206543, "learning_rate": 1.9989528842673894e-05, "loss": 1.0672, "step": 991 }, { "epoch": 0.1326557903182669, "grad_norm": 1.3704817295074463, "learning_rate": 1.9989462686010834e-05, "loss": 0.997, "step": 992 }, { "epoch": 0.1327895159133458, "grad_norm": 1.4313379526138306, "learning_rate": 1.9989396321126983e-05, "loss": 0.9996, "step": 993 }, { "epoch": 0.13292324150842472, "grad_norm": 1.2943131923675537, "learning_rate": 1.9989329748023728e-05, "loss": 1.0498, "step": 994 }, { "epoch": 0.13305696710350362, "grad_norm": 1.4326971769332886, "learning_rate": 1.998926296670245e-05, "loss": 1.1362, "step": 995 }, { "epoch": 0.1331906926985825, "grad_norm": 1.435070514678955, "learning_rate": 1.998919597716454e-05, "loss": 1.1701, "step": 996 }, { "epoch": 0.1333244182936614, "grad_norm": 1.2473304271697998, "learning_rate": 1.9989128779411405e-05, "loss": 0.9806, "step": 997 }, { "epoch": 0.1334581438887403, "grad_norm": 1.3258861303329468, "learning_rate": 1.9989061373444437e-05, "loss": 0.9983, "step": 998 }, { "epoch": 0.1335918694838192, "grad_norm": 1.3803800344467163, "learning_rate": 1.9988993759265045e-05, "loss": 1.1152, "step": 999 }, { "epoch": 0.1337255950788981, "grad_norm": 1.3569376468658447, "learning_rate": 1.9988925936874635e-05, "loss": 1.0499, "step": 1000 }, { "epoch": 0.133859320673977, "grad_norm": 1.484125018119812, "learning_rate": 1.9988857906274618e-05, "loss": 1.2031, "step": 1001 }, { "epoch": 0.1339930462690559, "grad_norm": 1.3320391178131104, "learning_rate": 1.9988789667466425e-05, "loss": 0.9403, "step": 1002 }, { "epoch": 0.1341267718641348, "grad_norm": 1.4751819372177124, "learning_rate": 1.9988721220451463e-05, "loss": 1.164, "step": 1003 }, { "epoch": 0.1342604974592137, "grad_norm": 1.3565412759780884, "learning_rate": 1.9988652565231167e-05, "loss": 1.0498, "step": 1004 }, { "epoch": 0.1343942230542926, "grad_norm": 1.3807967901229858, "learning_rate": 1.9988583701806967e-05, "loss": 1.2248, "step": 1005 }, { "epoch": 0.13452794864937148, "grad_norm": 1.4893882274627686, "learning_rate": 1.9988514630180297e-05, "loss": 1.1945, "step": 1006 }, { "epoch": 0.13466167424445039, "grad_norm": 1.4328222274780273, "learning_rate": 1.9988445350352596e-05, "loss": 1.0893, "step": 1007 }, { "epoch": 0.1347953998395293, "grad_norm": 1.3250160217285156, "learning_rate": 1.9988375862325312e-05, "loss": 1.0296, "step": 1008 }, { "epoch": 0.1349291254346082, "grad_norm": 1.4690183401107788, "learning_rate": 1.998830616609989e-05, "loss": 1.1331, "step": 1009 }, { "epoch": 0.13506285102968707, "grad_norm": 1.3639572858810425, "learning_rate": 1.9988236261677786e-05, "loss": 1.1144, "step": 1010 }, { "epoch": 0.13519657662476597, "grad_norm": 1.315972089767456, "learning_rate": 1.998816614906045e-05, "loss": 1.0833, "step": 1011 }, { "epoch": 0.13533030221984488, "grad_norm": 1.3997548818588257, "learning_rate": 1.9988095828249354e-05, "loss": 1.0266, "step": 1012 }, { "epoch": 0.13546402781492378, "grad_norm": 1.5421127080917358, "learning_rate": 1.9988025299245953e-05, "loss": 1.0106, "step": 1013 }, { "epoch": 0.13559775341000269, "grad_norm": 1.5093224048614502, "learning_rate": 1.9987954562051724e-05, "loss": 1.0308, "step": 1014 }, { "epoch": 0.13573147900508156, "grad_norm": 1.3418471813201904, "learning_rate": 1.9987883616668143e-05, "loss": 1.0386, "step": 1015 }, { "epoch": 0.13586520460016047, "grad_norm": 1.2908902168273926, "learning_rate": 1.998781246309668e-05, "loss": 1.0741, "step": 1016 }, { "epoch": 0.13599893019523937, "grad_norm": 1.3005545139312744, "learning_rate": 1.9987741101338826e-05, "loss": 1.0705, "step": 1017 }, { "epoch": 0.13613265579031827, "grad_norm": 1.3663361072540283, "learning_rate": 1.9987669531396067e-05, "loss": 1.1085, "step": 1018 }, { "epoch": 0.13626638138539718, "grad_norm": 1.3333197832107544, "learning_rate": 1.9987597753269893e-05, "loss": 0.9394, "step": 1019 }, { "epoch": 0.13640010698047605, "grad_norm": 1.391821265220642, "learning_rate": 1.99875257669618e-05, "loss": 0.9739, "step": 1020 }, { "epoch": 0.13653383257555496, "grad_norm": 1.2928553819656372, "learning_rate": 1.998745357247329e-05, "loss": 1.0477, "step": 1021 }, { "epoch": 0.13666755817063386, "grad_norm": 1.2332611083984375, "learning_rate": 1.9987381169805866e-05, "loss": 1.0143, "step": 1022 }, { "epoch": 0.13680128376571277, "grad_norm": 1.407537579536438, "learning_rate": 1.998730855896104e-05, "loss": 1.1233, "step": 1023 }, { "epoch": 0.13693500936079164, "grad_norm": 1.3894444704055786, "learning_rate": 1.9987235739940325e-05, "loss": 1.0829, "step": 1024 }, { "epoch": 0.13706873495587055, "grad_norm": 1.3258349895477295, "learning_rate": 1.9987162712745235e-05, "loss": 1.0362, "step": 1025 }, { "epoch": 0.13720246055094945, "grad_norm": 1.297355055809021, "learning_rate": 1.9987089477377293e-05, "loss": 0.9773, "step": 1026 }, { "epoch": 0.13733618614602836, "grad_norm": 1.4536924362182617, "learning_rate": 1.9987016033838035e-05, "loss": 1.2054, "step": 1027 }, { "epoch": 0.13746991174110726, "grad_norm": 1.4943294525146484, "learning_rate": 1.998694238212898e-05, "loss": 1.1033, "step": 1028 }, { "epoch": 0.13760363733618614, "grad_norm": 1.3273707628250122, "learning_rate": 1.9986868522251664e-05, "loss": 1.0629, "step": 1029 }, { "epoch": 0.13773736293126504, "grad_norm": 1.287666916847229, "learning_rate": 1.9986794454207635e-05, "loss": 1.0503, "step": 1030 }, { "epoch": 0.13787108852634394, "grad_norm": 1.4131563901901245, "learning_rate": 1.9986720177998432e-05, "loss": 1.0698, "step": 1031 }, { "epoch": 0.13800481412142285, "grad_norm": 1.3133740425109863, "learning_rate": 1.9986645693625603e-05, "loss": 1.0225, "step": 1032 }, { "epoch": 0.13813853971650175, "grad_norm": 1.4140040874481201, "learning_rate": 1.9986571001090697e-05, "loss": 1.0661, "step": 1033 }, { "epoch": 0.13827226531158063, "grad_norm": 1.4234790802001953, "learning_rate": 1.9986496100395276e-05, "loss": 1.1509, "step": 1034 }, { "epoch": 0.13840599090665953, "grad_norm": 1.3856779336929321, "learning_rate": 1.9986420991540902e-05, "loss": 1.1703, "step": 1035 }, { "epoch": 0.13853971650173844, "grad_norm": 1.304639220237732, "learning_rate": 1.9986345674529136e-05, "loss": 1.1007, "step": 1036 }, { "epoch": 0.13867344209681734, "grad_norm": 1.395960807800293, "learning_rate": 1.998627014936155e-05, "loss": 1.0153, "step": 1037 }, { "epoch": 0.13880716769189622, "grad_norm": 1.4186303615570068, "learning_rate": 1.9986194416039723e-05, "loss": 1.1149, "step": 1038 }, { "epoch": 0.13894089328697512, "grad_norm": 1.2542798519134521, "learning_rate": 1.9986118474565222e-05, "loss": 0.9447, "step": 1039 }, { "epoch": 0.13907461888205402, "grad_norm": 1.484601378440857, "learning_rate": 1.9986042324939646e-05, "loss": 1.0876, "step": 1040 }, { "epoch": 0.13920834447713293, "grad_norm": 1.4490349292755127, "learning_rate": 1.9985965967164566e-05, "loss": 1.0187, "step": 1041 }, { "epoch": 0.13934207007221183, "grad_norm": 1.4552421569824219, "learning_rate": 1.9985889401241585e-05, "loss": 1.1728, "step": 1042 }, { "epoch": 0.1394757956672907, "grad_norm": 1.4438717365264893, "learning_rate": 1.9985812627172292e-05, "loss": 1.0943, "step": 1043 }, { "epoch": 0.1396095212623696, "grad_norm": 1.280500054359436, "learning_rate": 1.9985735644958292e-05, "loss": 0.958, "step": 1044 }, { "epoch": 0.13974324685744852, "grad_norm": 1.303842306137085, "learning_rate": 1.9985658454601186e-05, "loss": 1.0291, "step": 1045 }, { "epoch": 0.13987697245252742, "grad_norm": 1.1613770723342896, "learning_rate": 1.9985581056102585e-05, "loss": 1.1044, "step": 1046 }, { "epoch": 0.14001069804760632, "grad_norm": 1.3475439548492432, "learning_rate": 1.9985503449464107e-05, "loss": 1.0193, "step": 1047 }, { "epoch": 0.1401444236426852, "grad_norm": 1.4030957221984863, "learning_rate": 1.998542563468736e-05, "loss": 1.1651, "step": 1048 }, { "epoch": 0.1402781492377641, "grad_norm": 1.300018072128296, "learning_rate": 1.998534761177397e-05, "loss": 1.0676, "step": 1049 }, { "epoch": 0.140411874832843, "grad_norm": 1.3763556480407715, "learning_rate": 1.9985269380725567e-05, "loss": 1.1298, "step": 1050 }, { "epoch": 0.1405456004279219, "grad_norm": 1.5105760097503662, "learning_rate": 1.9985190941543778e-05, "loss": 1.1083, "step": 1051 }, { "epoch": 0.1406793260230008, "grad_norm": 1.4885454177856445, "learning_rate": 1.9985112294230236e-05, "loss": 1.0657, "step": 1052 }, { "epoch": 0.1408130516180797, "grad_norm": 1.3515266180038452, "learning_rate": 1.9985033438786587e-05, "loss": 1.0837, "step": 1053 }, { "epoch": 0.1409467772131586, "grad_norm": 1.3231233358383179, "learning_rate": 1.9984954375214464e-05, "loss": 0.976, "step": 1054 }, { "epoch": 0.1410805028082375, "grad_norm": 1.3824316263198853, "learning_rate": 1.9984875103515528e-05, "loss": 1.1269, "step": 1055 }, { "epoch": 0.1412142284033164, "grad_norm": 1.3620485067367554, "learning_rate": 1.998479562369142e-05, "loss": 1.0368, "step": 1056 }, { "epoch": 0.14134795399839528, "grad_norm": 1.3291696310043335, "learning_rate": 1.9984715935743805e-05, "loss": 1.0564, "step": 1057 }, { "epoch": 0.14148167959347419, "grad_norm": 1.2659496068954468, "learning_rate": 1.9984636039674342e-05, "loss": 1.0264, "step": 1058 }, { "epoch": 0.1416154051885531, "grad_norm": 1.281972885131836, "learning_rate": 1.9984555935484693e-05, "loss": 1.0962, "step": 1059 }, { "epoch": 0.141749130783632, "grad_norm": 1.2970126867294312, "learning_rate": 1.998447562317653e-05, "loss": 1.1622, "step": 1060 }, { "epoch": 0.1418828563787109, "grad_norm": 1.3969472646713257, "learning_rate": 1.9984395102751525e-05, "loss": 1.0963, "step": 1061 }, { "epoch": 0.14201658197378977, "grad_norm": 1.4189127683639526, "learning_rate": 1.998431437421136e-05, "loss": 1.0411, "step": 1062 }, { "epoch": 0.14215030756886868, "grad_norm": 1.4788099527359009, "learning_rate": 1.9984233437557716e-05, "loss": 0.8507, "step": 1063 }, { "epoch": 0.14228403316394758, "grad_norm": 1.6212916374206543, "learning_rate": 1.998415229279228e-05, "loss": 1.1119, "step": 1064 }, { "epoch": 0.14241775875902649, "grad_norm": 1.4980357885360718, "learning_rate": 1.9984070939916742e-05, "loss": 1.1114, "step": 1065 }, { "epoch": 0.14255148435410536, "grad_norm": 1.5158213376998901, "learning_rate": 1.99839893789328e-05, "loss": 1.0305, "step": 1066 }, { "epoch": 0.14268520994918427, "grad_norm": 1.5770738124847412, "learning_rate": 1.9983907609842153e-05, "loss": 0.9907, "step": 1067 }, { "epoch": 0.14281893554426317, "grad_norm": 1.3171989917755127, "learning_rate": 1.9983825632646504e-05, "loss": 0.9904, "step": 1068 }, { "epoch": 0.14295266113934207, "grad_norm": 1.4031672477722168, "learning_rate": 1.9983743447347567e-05, "loss": 1.0867, "step": 1069 }, { "epoch": 0.14308638673442098, "grad_norm": 1.6678587198257446, "learning_rate": 1.9983661053947047e-05, "loss": 1.1615, "step": 1070 }, { "epoch": 0.14322011232949985, "grad_norm": 1.491369605064392, "learning_rate": 1.9983578452446666e-05, "loss": 0.9742, "step": 1071 }, { "epoch": 0.14335383792457876, "grad_norm": 1.4158729314804077, "learning_rate": 1.9983495642848146e-05, "loss": 1.0652, "step": 1072 }, { "epoch": 0.14348756351965766, "grad_norm": 1.4791038036346436, "learning_rate": 1.9983412625153214e-05, "loss": 1.1056, "step": 1073 }, { "epoch": 0.14362128911473657, "grad_norm": 1.2821612358093262, "learning_rate": 1.99833293993636e-05, "loss": 1.0259, "step": 1074 }, { "epoch": 0.14375501470981547, "grad_norm": 1.2491177320480347, "learning_rate": 1.9983245965481034e-05, "loss": 0.9281, "step": 1075 }, { "epoch": 0.14388874030489435, "grad_norm": 1.4034216403961182, "learning_rate": 1.9983162323507258e-05, "loss": 1.1196, "step": 1076 }, { "epoch": 0.14402246589997325, "grad_norm": 1.4936261177062988, "learning_rate": 1.998307847344402e-05, "loss": 1.0654, "step": 1077 }, { "epoch": 0.14415619149505216, "grad_norm": 1.4542263746261597, "learning_rate": 1.9982994415293063e-05, "loss": 1.1758, "step": 1078 }, { "epoch": 0.14428991709013106, "grad_norm": 1.3498492240905762, "learning_rate": 1.9982910149056137e-05, "loss": 1.1029, "step": 1079 }, { "epoch": 0.14442364268520994, "grad_norm": 1.3693197965621948, "learning_rate": 1.9982825674735007e-05, "loss": 0.9717, "step": 1080 }, { "epoch": 0.14455736828028884, "grad_norm": 1.3028573989868164, "learning_rate": 1.9982740992331428e-05, "loss": 1.0772, "step": 1081 }, { "epoch": 0.14469109387536774, "grad_norm": 1.353473424911499, "learning_rate": 1.998265610184716e-05, "loss": 1.1543, "step": 1082 }, { "epoch": 0.14482481947044665, "grad_norm": 1.3129605054855347, "learning_rate": 1.9982571003283982e-05, "loss": 1.094, "step": 1083 }, { "epoch": 0.14495854506552555, "grad_norm": 1.363317847251892, "learning_rate": 1.9982485696643663e-05, "loss": 1.0833, "step": 1084 }, { "epoch": 0.14509227066060443, "grad_norm": 1.2662261724472046, "learning_rate": 1.998240018192798e-05, "loss": 1.1172, "step": 1085 }, { "epoch": 0.14522599625568333, "grad_norm": 1.474464774131775, "learning_rate": 1.9982314459138717e-05, "loss": 1.0084, "step": 1086 }, { "epoch": 0.14535972185076224, "grad_norm": 1.3057090044021606, "learning_rate": 1.9982228528277664e-05, "loss": 1.0635, "step": 1087 }, { "epoch": 0.14549344744584114, "grad_norm": 1.4982093572616577, "learning_rate": 1.998214238934661e-05, "loss": 0.984, "step": 1088 }, { "epoch": 0.14562717304092004, "grad_norm": 1.3765114545822144, "learning_rate": 1.9982056042347347e-05, "loss": 1.1145, "step": 1089 }, { "epoch": 0.14576089863599892, "grad_norm": 1.411778211593628, "learning_rate": 1.9981969487281678e-05, "loss": 1.038, "step": 1090 }, { "epoch": 0.14589462423107782, "grad_norm": 1.1712851524353027, "learning_rate": 1.9981882724151408e-05, "loss": 0.8742, "step": 1091 }, { "epoch": 0.14602834982615673, "grad_norm": 1.5205491781234741, "learning_rate": 1.9981795752958346e-05, "loss": 1.0199, "step": 1092 }, { "epoch": 0.14616207542123563, "grad_norm": 1.2660516500473022, "learning_rate": 1.99817085737043e-05, "loss": 1.0349, "step": 1093 }, { "epoch": 0.14629580101631454, "grad_norm": 1.3479373455047607, "learning_rate": 1.998162118639109e-05, "loss": 1.0081, "step": 1094 }, { "epoch": 0.1464295266113934, "grad_norm": 1.3804166316986084, "learning_rate": 1.9981533591020538e-05, "loss": 1.0759, "step": 1095 }, { "epoch": 0.14656325220647232, "grad_norm": 1.3884199857711792, "learning_rate": 1.998144578759447e-05, "loss": 1.0308, "step": 1096 }, { "epoch": 0.14669697780155122, "grad_norm": 1.306915521621704, "learning_rate": 1.9981357776114718e-05, "loss": 0.9887, "step": 1097 }, { "epoch": 0.14683070339663012, "grad_norm": 1.3915882110595703, "learning_rate": 1.9981269556583113e-05, "loss": 1.1231, "step": 1098 }, { "epoch": 0.146964428991709, "grad_norm": 1.2555911540985107, "learning_rate": 1.998118112900149e-05, "loss": 1.1144, "step": 1099 }, { "epoch": 0.1470981545867879, "grad_norm": 1.285923719406128, "learning_rate": 1.9981092493371707e-05, "loss": 1.0721, "step": 1100 }, { "epoch": 0.1472318801818668, "grad_norm": 1.3924224376678467, "learning_rate": 1.9981003649695598e-05, "loss": 1.067, "step": 1101 }, { "epoch": 0.1473656057769457, "grad_norm": 1.332041621208191, "learning_rate": 1.9980914597975014e-05, "loss": 0.8776, "step": 1102 }, { "epoch": 0.14749933137202462, "grad_norm": 1.3708878755569458, "learning_rate": 1.998082533821182e-05, "loss": 1.0888, "step": 1103 }, { "epoch": 0.1476330569671035, "grad_norm": 1.2529127597808838, "learning_rate": 1.998073587040787e-05, "loss": 1.0638, "step": 1104 }, { "epoch": 0.1477667825621824, "grad_norm": 1.4064927101135254, "learning_rate": 1.9980646194565036e-05, "loss": 0.961, "step": 1105 }, { "epoch": 0.1479005081572613, "grad_norm": 1.3865231275558472, "learning_rate": 1.998055631068518e-05, "loss": 1.0957, "step": 1106 }, { "epoch": 0.1480342337523402, "grad_norm": 1.4839482307434082, "learning_rate": 1.9980466218770175e-05, "loss": 1.0855, "step": 1107 }, { "epoch": 0.1481679593474191, "grad_norm": 1.3857457637786865, "learning_rate": 1.9980375918821904e-05, "loss": 1.0393, "step": 1108 }, { "epoch": 0.14830168494249799, "grad_norm": 1.2539458274841309, "learning_rate": 1.998028541084225e-05, "loss": 0.9052, "step": 1109 }, { "epoch": 0.1484354105375769, "grad_norm": 1.4512742757797241, "learning_rate": 1.9980194694833096e-05, "loss": 1.1834, "step": 1110 }, { "epoch": 0.1485691361326558, "grad_norm": 1.3668287992477417, "learning_rate": 1.998010377079633e-05, "loss": 1.0449, "step": 1111 }, { "epoch": 0.1487028617277347, "grad_norm": 1.4858283996582031, "learning_rate": 1.9980012638733852e-05, "loss": 1.1704, "step": 1112 }, { "epoch": 0.14883658732281357, "grad_norm": 1.3831220865249634, "learning_rate": 1.997992129864756e-05, "loss": 1.0653, "step": 1113 }, { "epoch": 0.14897031291789248, "grad_norm": 1.3879042863845825, "learning_rate": 1.997982975053936e-05, "loss": 1.0165, "step": 1114 }, { "epoch": 0.14910403851297138, "grad_norm": 1.38655424118042, "learning_rate": 1.997973799441116e-05, "loss": 1.1114, "step": 1115 }, { "epoch": 0.14923776410805029, "grad_norm": 1.1419224739074707, "learning_rate": 1.9979646030264867e-05, "loss": 0.9912, "step": 1116 }, { "epoch": 0.1493714897031292, "grad_norm": 1.633781909942627, "learning_rate": 1.9979553858102407e-05, "loss": 1.1575, "step": 1117 }, { "epoch": 0.14950521529820807, "grad_norm": 1.3146897554397583, "learning_rate": 1.9979461477925693e-05, "loss": 1.0874, "step": 1118 }, { "epoch": 0.14963894089328697, "grad_norm": 1.3741525411605835, "learning_rate": 1.997936888973665e-05, "loss": 1.0763, "step": 1119 }, { "epoch": 0.14977266648836587, "grad_norm": 1.2338871955871582, "learning_rate": 1.9979276093537216e-05, "loss": 1.0018, "step": 1120 }, { "epoch": 0.14990639208344478, "grad_norm": 1.4803754091262817, "learning_rate": 1.997918308932932e-05, "loss": 1.0732, "step": 1121 }, { "epoch": 0.15004011767852368, "grad_norm": 1.306488037109375, "learning_rate": 1.9979089877114905e-05, "loss": 1.0397, "step": 1122 }, { "epoch": 0.15017384327360256, "grad_norm": 1.3968250751495361, "learning_rate": 1.9978996456895906e-05, "loss": 1.1004, "step": 1123 }, { "epoch": 0.15030756886868146, "grad_norm": 1.3843406438827515, "learning_rate": 1.997890282867428e-05, "loss": 1.0359, "step": 1124 }, { "epoch": 0.15044129446376037, "grad_norm": 1.3199306726455688, "learning_rate": 1.9978808992451968e-05, "loss": 0.989, "step": 1125 }, { "epoch": 0.15057502005883927, "grad_norm": 1.3507875204086304, "learning_rate": 1.9978714948230932e-05, "loss": 1.002, "step": 1126 }, { "epoch": 0.15070874565391815, "grad_norm": 1.3643404245376587, "learning_rate": 1.9978620696013133e-05, "loss": 1.1206, "step": 1127 }, { "epoch": 0.15084247124899705, "grad_norm": 1.2881428003311157, "learning_rate": 1.9978526235800535e-05, "loss": 1.0239, "step": 1128 }, { "epoch": 0.15097619684407595, "grad_norm": 1.2554134130477905, "learning_rate": 1.9978431567595104e-05, "loss": 0.9482, "step": 1129 }, { "epoch": 0.15110992243915486, "grad_norm": 1.3809139728546143, "learning_rate": 1.9978336691398815e-05, "loss": 1.1723, "step": 1130 }, { "epoch": 0.15124364803423376, "grad_norm": 1.2555856704711914, "learning_rate": 1.9978241607213647e-05, "loss": 1.0094, "step": 1131 }, { "epoch": 0.15137737362931264, "grad_norm": 1.4045881032943726, "learning_rate": 1.997814631504158e-05, "loss": 1.1343, "step": 1132 }, { "epoch": 0.15151109922439154, "grad_norm": 1.335708737373352, "learning_rate": 1.9978050814884602e-05, "loss": 1.1089, "step": 1133 }, { "epoch": 0.15164482481947045, "grad_norm": 1.3846192359924316, "learning_rate": 1.9977955106744706e-05, "loss": 1.1987, "step": 1134 }, { "epoch": 0.15177855041454935, "grad_norm": 1.4184974431991577, "learning_rate": 1.997785919062388e-05, "loss": 1.0251, "step": 1135 }, { "epoch": 0.15191227600962826, "grad_norm": 1.2271558046340942, "learning_rate": 1.9977763066524124e-05, "loss": 0.9184, "step": 1136 }, { "epoch": 0.15204600160470713, "grad_norm": 1.3015462160110474, "learning_rate": 1.997766673444745e-05, "loss": 0.8884, "step": 1137 }, { "epoch": 0.15217972719978604, "grad_norm": 1.3521124124526978, "learning_rate": 1.9977570194395855e-05, "loss": 1.0723, "step": 1138 }, { "epoch": 0.15231345279486494, "grad_norm": 1.348889708518982, "learning_rate": 1.9977473446371363e-05, "loss": 1.0096, "step": 1139 }, { "epoch": 0.15244717838994384, "grad_norm": 1.286650538444519, "learning_rate": 1.997737649037598e-05, "loss": 1.0671, "step": 1140 }, { "epoch": 0.15258090398502272, "grad_norm": 1.2506932020187378, "learning_rate": 1.9977279326411734e-05, "loss": 1.0175, "step": 1141 }, { "epoch": 0.15271462958010162, "grad_norm": 1.1981256008148193, "learning_rate": 1.9977181954480646e-05, "loss": 0.9685, "step": 1142 }, { "epoch": 0.15284835517518053, "grad_norm": 1.505600929260254, "learning_rate": 1.9977084374584747e-05, "loss": 1.0145, "step": 1143 }, { "epoch": 0.15298208077025943, "grad_norm": 1.3016210794448853, "learning_rate": 1.9976986586726072e-05, "loss": 1.1631, "step": 1144 }, { "epoch": 0.15311580636533834, "grad_norm": 1.21498703956604, "learning_rate": 1.997688859090666e-05, "loss": 1.1183, "step": 1145 }, { "epoch": 0.1532495319604172, "grad_norm": 1.3456931114196777, "learning_rate": 1.9976790387128552e-05, "loss": 1.0023, "step": 1146 }, { "epoch": 0.15338325755549612, "grad_norm": 1.3332698345184326, "learning_rate": 1.997669197539379e-05, "loss": 1.1219, "step": 1147 }, { "epoch": 0.15351698315057502, "grad_norm": 1.3518805503845215, "learning_rate": 1.9976593355704438e-05, "loss": 0.9721, "step": 1148 }, { "epoch": 0.15365070874565392, "grad_norm": 1.445204496383667, "learning_rate": 1.9976494528062536e-05, "loss": 1.1067, "step": 1149 }, { "epoch": 0.15378443434073283, "grad_norm": 1.3716354370117188, "learning_rate": 1.997639549247016e-05, "loss": 0.9584, "step": 1150 }, { "epoch": 0.1539181599358117, "grad_norm": 1.3474990129470825, "learning_rate": 1.9976296248929362e-05, "loss": 0.9458, "step": 1151 }, { "epoch": 0.1540518855308906, "grad_norm": 1.4277440309524536, "learning_rate": 1.9976196797442213e-05, "loss": 1.0691, "step": 1152 }, { "epoch": 0.1541856111259695, "grad_norm": 1.3777574300765991, "learning_rate": 1.9976097138010793e-05, "loss": 1.0879, "step": 1153 }, { "epoch": 0.15431933672104842, "grad_norm": 1.5025060176849365, "learning_rate": 1.9975997270637172e-05, "loss": 1.1637, "step": 1154 }, { "epoch": 0.1544530623161273, "grad_norm": 1.3024978637695312, "learning_rate": 1.9975897195323434e-05, "loss": 0.9827, "step": 1155 }, { "epoch": 0.1545867879112062, "grad_norm": 1.532265067100525, "learning_rate": 1.9975796912071662e-05, "loss": 1.0997, "step": 1156 }, { "epoch": 0.1547205135062851, "grad_norm": 1.4757241010665894, "learning_rate": 1.9975696420883954e-05, "loss": 1.0537, "step": 1157 }, { "epoch": 0.154854239101364, "grad_norm": 1.3645095825195312, "learning_rate": 1.9975595721762397e-05, "loss": 1.023, "step": 1158 }, { "epoch": 0.1549879646964429, "grad_norm": 1.2984682321548462, "learning_rate": 1.997549481470909e-05, "loss": 1.2142, "step": 1159 }, { "epoch": 0.15512169029152179, "grad_norm": 1.3631954193115234, "learning_rate": 1.9975393699726148e-05, "loss": 1.0574, "step": 1160 }, { "epoch": 0.1552554158866007, "grad_norm": 1.3911529779434204, "learning_rate": 1.9975292376815664e-05, "loss": 0.9859, "step": 1161 }, { "epoch": 0.1553891414816796, "grad_norm": 1.2998430728912354, "learning_rate": 1.9975190845979754e-05, "loss": 1.0417, "step": 1162 }, { "epoch": 0.1555228670767585, "grad_norm": 1.3048453330993652, "learning_rate": 1.997508910722054e-05, "loss": 1.1224, "step": 1163 }, { "epoch": 0.1556565926718374, "grad_norm": 1.3541834354400635, "learning_rate": 1.9974987160540132e-05, "loss": 0.9705, "step": 1164 }, { "epoch": 0.15579031826691628, "grad_norm": 1.2443692684173584, "learning_rate": 1.997488500594067e-05, "loss": 1.0154, "step": 1165 }, { "epoch": 0.15592404386199518, "grad_norm": 1.503941297531128, "learning_rate": 1.997478264342427e-05, "loss": 1.0481, "step": 1166 }, { "epoch": 0.15605776945707409, "grad_norm": 1.3050588369369507, "learning_rate": 1.997468007299307e-05, "loss": 0.9695, "step": 1167 }, { "epoch": 0.156191495052153, "grad_norm": 1.386452555656433, "learning_rate": 1.9974577294649214e-05, "loss": 1.0114, "step": 1168 }, { "epoch": 0.1563252206472319, "grad_norm": 1.2783297300338745, "learning_rate": 1.9974474308394835e-05, "loss": 1.0403, "step": 1169 }, { "epoch": 0.15645894624231077, "grad_norm": 1.362115740776062, "learning_rate": 1.9974371114232083e-05, "loss": 0.9695, "step": 1170 }, { "epoch": 0.15659267183738967, "grad_norm": 1.2652329206466675, "learning_rate": 1.9974267712163112e-05, "loss": 0.9681, "step": 1171 }, { "epoch": 0.15672639743246858, "grad_norm": 1.5758148431777954, "learning_rate": 1.9974164102190074e-05, "loss": 1.1511, "step": 1172 }, { "epoch": 0.15686012302754748, "grad_norm": 1.4159247875213623, "learning_rate": 1.9974060284315126e-05, "loss": 1.114, "step": 1173 }, { "epoch": 0.15699384862262636, "grad_norm": 1.31596040725708, "learning_rate": 1.9973956258540438e-05, "loss": 1.0421, "step": 1174 }, { "epoch": 0.15712757421770526, "grad_norm": 1.4015370607376099, "learning_rate": 1.997385202486818e-05, "loss": 1.125, "step": 1175 }, { "epoch": 0.15726129981278417, "grad_norm": 1.600057601928711, "learning_rate": 1.9973747583300515e-05, "loss": 1.0565, "step": 1176 }, { "epoch": 0.15739502540786307, "grad_norm": 1.380564570426941, "learning_rate": 1.9973642933839628e-05, "loss": 1.0088, "step": 1177 }, { "epoch": 0.15752875100294197, "grad_norm": 1.3065496683120728, "learning_rate": 1.9973538076487697e-05, "loss": 1.0885, "step": 1178 }, { "epoch": 0.15766247659802085, "grad_norm": 1.2923986911773682, "learning_rate": 1.997343301124691e-05, "loss": 0.9865, "step": 1179 }, { "epoch": 0.15779620219309975, "grad_norm": 1.3223302364349365, "learning_rate": 1.9973327738119453e-05, "loss": 1.0333, "step": 1180 }, { "epoch": 0.15792992778817866, "grad_norm": 1.3359018564224243, "learning_rate": 1.9973222257107524e-05, "loss": 1.0729, "step": 1181 }, { "epoch": 0.15806365338325756, "grad_norm": 1.3748308420181274, "learning_rate": 1.997311656821332e-05, "loss": 0.904, "step": 1182 }, { "epoch": 0.15819737897833647, "grad_norm": 1.4424338340759277, "learning_rate": 1.9973010671439044e-05, "loss": 1.1533, "step": 1183 }, { "epoch": 0.15833110457341534, "grad_norm": 1.3110949993133545, "learning_rate": 1.9972904566786903e-05, "loss": 1.1767, "step": 1184 }, { "epoch": 0.15846483016849425, "grad_norm": 1.3076509237289429, "learning_rate": 1.9972798254259112e-05, "loss": 1.1519, "step": 1185 }, { "epoch": 0.15859855576357315, "grad_norm": 1.3424559831619263, "learning_rate": 1.997269173385788e-05, "loss": 1.0194, "step": 1186 }, { "epoch": 0.15873228135865206, "grad_norm": 1.4475231170654297, "learning_rate": 1.9972585005585435e-05, "loss": 0.9277, "step": 1187 }, { "epoch": 0.15886600695373093, "grad_norm": 1.3346045017242432, "learning_rate": 1.9972478069444e-05, "loss": 1.0138, "step": 1188 }, { "epoch": 0.15899973254880984, "grad_norm": 1.25464928150177, "learning_rate": 1.9972370925435797e-05, "loss": 1.1007, "step": 1189 }, { "epoch": 0.15913345814388874, "grad_norm": 1.3148994445800781, "learning_rate": 1.997226357356307e-05, "loss": 0.9114, "step": 1190 }, { "epoch": 0.15926718373896764, "grad_norm": 1.3009276390075684, "learning_rate": 1.9972156013828048e-05, "loss": 0.9254, "step": 1191 }, { "epoch": 0.15940090933404655, "grad_norm": 1.4865949153900146, "learning_rate": 1.997204824623298e-05, "loss": 1.1972, "step": 1192 }, { "epoch": 0.15953463492912542, "grad_norm": 1.5413349866867065, "learning_rate": 1.9971940270780103e-05, "loss": 1.1475, "step": 1193 }, { "epoch": 0.15966836052420433, "grad_norm": 1.3938820362091064, "learning_rate": 1.9971832087471678e-05, "loss": 1.2655, "step": 1194 }, { "epoch": 0.15980208611928323, "grad_norm": 1.3588309288024902, "learning_rate": 1.9971723696309953e-05, "loss": 0.9761, "step": 1195 }, { "epoch": 0.15993581171436214, "grad_norm": 1.2254605293273926, "learning_rate": 1.997161509729719e-05, "loss": 0.919, "step": 1196 }, { "epoch": 0.16006953730944104, "grad_norm": 1.4604758024215698, "learning_rate": 1.997150629043565e-05, "loss": 1.129, "step": 1197 }, { "epoch": 0.16020326290451992, "grad_norm": 1.281382441520691, "learning_rate": 1.9971397275727603e-05, "loss": 0.9982, "step": 1198 }, { "epoch": 0.16033698849959882, "grad_norm": 1.2233285903930664, "learning_rate": 1.9971288053175323e-05, "loss": 1.1027, "step": 1199 }, { "epoch": 0.16047071409467772, "grad_norm": 1.2381975650787354, "learning_rate": 1.9971178622781086e-05, "loss": 1.0045, "step": 1200 }, { "epoch": 0.16060443968975663, "grad_norm": 1.284056544303894, "learning_rate": 1.997106898454717e-05, "loss": 1.0338, "step": 1201 }, { "epoch": 0.1607381652848355, "grad_norm": 1.1576988697052002, "learning_rate": 1.9970959138475864e-05, "loss": 1.0281, "step": 1202 }, { "epoch": 0.1608718908799144, "grad_norm": 1.3140467405319214, "learning_rate": 1.9970849084569456e-05, "loss": 1.0018, "step": 1203 }, { "epoch": 0.1610056164749933, "grad_norm": 1.5472242832183838, "learning_rate": 1.9970738822830237e-05, "loss": 0.9519, "step": 1204 }, { "epoch": 0.16113934207007222, "grad_norm": 1.15891695022583, "learning_rate": 1.997062835326051e-05, "loss": 0.9989, "step": 1205 }, { "epoch": 0.16127306766515112, "grad_norm": 1.415971040725708, "learning_rate": 1.997051767586258e-05, "loss": 1.0005, "step": 1206 }, { "epoch": 0.16140679326023, "grad_norm": 1.4245842695236206, "learning_rate": 1.9970406790638745e-05, "loss": 1.2001, "step": 1207 }, { "epoch": 0.1615405188553089, "grad_norm": 1.3909989595413208, "learning_rate": 1.997029569759132e-05, "loss": 1.1613, "step": 1208 }, { "epoch": 0.1616742444503878, "grad_norm": 1.3046021461486816, "learning_rate": 1.9970184396722623e-05, "loss": 1.0694, "step": 1209 }, { "epoch": 0.1618079700454667, "grad_norm": 1.3252949714660645, "learning_rate": 1.9970072888034973e-05, "loss": 1.0544, "step": 1210 }, { "epoch": 0.1619416956405456, "grad_norm": 1.2924394607543945, "learning_rate": 1.9969961171530694e-05, "loss": 1.0484, "step": 1211 }, { "epoch": 0.1620754212356245, "grad_norm": 1.455288052558899, "learning_rate": 1.9969849247212116e-05, "loss": 1.0527, "step": 1212 }, { "epoch": 0.1622091468307034, "grad_norm": 1.3105442523956299, "learning_rate": 1.996973711508157e-05, "loss": 0.9414, "step": 1213 }, { "epoch": 0.1623428724257823, "grad_norm": 1.2527780532836914, "learning_rate": 1.9969624775141393e-05, "loss": 1.0454, "step": 1214 }, { "epoch": 0.1624765980208612, "grad_norm": 1.344901204109192, "learning_rate": 1.9969512227393925e-05, "loss": 0.9768, "step": 1215 }, { "epoch": 0.16261032361594008, "grad_norm": 1.3147163391113281, "learning_rate": 1.996939947184152e-05, "loss": 0.965, "step": 1216 }, { "epoch": 0.16274404921101898, "grad_norm": 1.4896321296691895, "learning_rate": 1.996928650848652e-05, "loss": 1.0783, "step": 1217 }, { "epoch": 0.16287777480609789, "grad_norm": 1.345624566078186, "learning_rate": 1.9969173337331283e-05, "loss": 0.963, "step": 1218 }, { "epoch": 0.1630115004011768, "grad_norm": 1.3063095808029175, "learning_rate": 1.9969059958378165e-05, "loss": 1.061, "step": 1219 }, { "epoch": 0.1631452259962557, "grad_norm": 1.2775745391845703, "learning_rate": 1.9968946371629533e-05, "loss": 1.047, "step": 1220 }, { "epoch": 0.16327895159133457, "grad_norm": 1.2119871377944946, "learning_rate": 1.9968832577087754e-05, "loss": 1.0177, "step": 1221 }, { "epoch": 0.16341267718641347, "grad_norm": 1.2533262968063354, "learning_rate": 1.9968718574755196e-05, "loss": 1.043, "step": 1222 }, { "epoch": 0.16354640278149238, "grad_norm": 1.2240618467330933, "learning_rate": 1.996860436463424e-05, "loss": 0.9752, "step": 1223 }, { "epoch": 0.16368012837657128, "grad_norm": 1.190651297569275, "learning_rate": 1.9968489946727265e-05, "loss": 0.8847, "step": 1224 }, { "epoch": 0.1638138539716502, "grad_norm": 1.3423857688903809, "learning_rate": 1.996837532103666e-05, "loss": 1.0516, "step": 1225 }, { "epoch": 0.16394757956672906, "grad_norm": 1.2970129251480103, "learning_rate": 1.9968260487564803e-05, "loss": 1.0122, "step": 1226 }, { "epoch": 0.16408130516180797, "grad_norm": 1.3332903385162354, "learning_rate": 1.99681454463141e-05, "loss": 1.0891, "step": 1227 }, { "epoch": 0.16421503075688687, "grad_norm": 1.2251132726669312, "learning_rate": 1.996803019728694e-05, "loss": 1.0937, "step": 1228 }, { "epoch": 0.16434875635196577, "grad_norm": 1.2835623025894165, "learning_rate": 1.996791474048573e-05, "loss": 1.0257, "step": 1229 }, { "epoch": 0.16448248194704465, "grad_norm": 1.2625056505203247, "learning_rate": 1.9967799075912878e-05, "loss": 1.026, "step": 1230 }, { "epoch": 0.16461620754212355, "grad_norm": 1.4082592725753784, "learning_rate": 1.996768320357079e-05, "loss": 1.016, "step": 1231 }, { "epoch": 0.16474993313720246, "grad_norm": 1.3528183698654175, "learning_rate": 1.9967567123461884e-05, "loss": 1.0833, "step": 1232 }, { "epoch": 0.16488365873228136, "grad_norm": 1.350899577140808, "learning_rate": 1.996745083558858e-05, "loss": 1.1091, "step": 1233 }, { "epoch": 0.16501738432736027, "grad_norm": 1.4369655847549438, "learning_rate": 1.9967334339953303e-05, "loss": 1.1446, "step": 1234 }, { "epoch": 0.16515110992243914, "grad_norm": 1.309149146080017, "learning_rate": 1.9967217636558474e-05, "loss": 1.071, "step": 1235 }, { "epoch": 0.16528483551751805, "grad_norm": 1.5724811553955078, "learning_rate": 1.9967100725406535e-05, "loss": 1.2274, "step": 1236 }, { "epoch": 0.16541856111259695, "grad_norm": 1.6956839561462402, "learning_rate": 1.9966983606499918e-05, "loss": 1.0835, "step": 1237 }, { "epoch": 0.16555228670767586, "grad_norm": 1.6455678939819336, "learning_rate": 1.9966866279841065e-05, "loss": 1.1027, "step": 1238 }, { "epoch": 0.16568601230275476, "grad_norm": 1.4267914295196533, "learning_rate": 1.996674874543242e-05, "loss": 1.0841, "step": 1239 }, { "epoch": 0.16581973789783364, "grad_norm": 1.3009576797485352, "learning_rate": 1.9966631003276436e-05, "loss": 1.1291, "step": 1240 }, { "epoch": 0.16595346349291254, "grad_norm": 1.4338685274124146, "learning_rate": 1.9966513053375566e-05, "loss": 1.1698, "step": 1241 }, { "epoch": 0.16608718908799144, "grad_norm": 1.2473822832107544, "learning_rate": 1.996639489573227e-05, "loss": 1.0553, "step": 1242 }, { "epoch": 0.16622091468307035, "grad_norm": 1.3874975442886353, "learning_rate": 1.9966276530349005e-05, "loss": 1.0887, "step": 1243 }, { "epoch": 0.16635464027814922, "grad_norm": 1.316266655921936, "learning_rate": 1.996615795722825e-05, "loss": 1.1605, "step": 1244 }, { "epoch": 0.16648836587322813, "grad_norm": 1.4590569734573364, "learning_rate": 1.996603917637246e-05, "loss": 1.0834, "step": 1245 }, { "epoch": 0.16662209146830703, "grad_norm": 1.267220139503479, "learning_rate": 1.9965920187784124e-05, "loss": 1.0875, "step": 1246 }, { "epoch": 0.16675581706338594, "grad_norm": 1.3364901542663574, "learning_rate": 1.9965800991465717e-05, "loss": 1.02, "step": 1247 }, { "epoch": 0.16688954265846484, "grad_norm": 1.3357272148132324, "learning_rate": 1.9965681587419726e-05, "loss": 1.0284, "step": 1248 }, { "epoch": 0.16702326825354372, "grad_norm": 1.268760085105896, "learning_rate": 1.9965561975648636e-05, "loss": 1.0813, "step": 1249 }, { "epoch": 0.16715699384862262, "grad_norm": 1.3667329549789429, "learning_rate": 1.9965442156154947e-05, "loss": 1.1272, "step": 1250 }, { "epoch": 0.16729071944370152, "grad_norm": 1.3121461868286133, "learning_rate": 1.996532212894115e-05, "loss": 1.0629, "step": 1251 }, { "epoch": 0.16742444503878043, "grad_norm": 1.3465462923049927, "learning_rate": 1.996520189400975e-05, "loss": 0.9768, "step": 1252 }, { "epoch": 0.16755817063385933, "grad_norm": 1.2989013195037842, "learning_rate": 1.996508145136325e-05, "loss": 1.1003, "step": 1253 }, { "epoch": 0.1676918962289382, "grad_norm": 1.1840544939041138, "learning_rate": 1.9964960801004164e-05, "loss": 0.9842, "step": 1254 }, { "epoch": 0.1678256218240171, "grad_norm": 1.3746353387832642, "learning_rate": 1.9964839942935002e-05, "loss": 1.0815, "step": 1255 }, { "epoch": 0.16795934741909602, "grad_norm": 1.4120936393737793, "learning_rate": 1.9964718877158292e-05, "loss": 1.0286, "step": 1256 }, { "epoch": 0.16809307301417492, "grad_norm": 1.222840428352356, "learning_rate": 1.996459760367655e-05, "loss": 1.0126, "step": 1257 }, { "epoch": 0.16822679860925382, "grad_norm": 1.3401939868927002, "learning_rate": 1.9964476122492304e-05, "loss": 1.1791, "step": 1258 }, { "epoch": 0.1683605242043327, "grad_norm": 1.2811201810836792, "learning_rate": 1.996435443360809e-05, "loss": 0.9881, "step": 1259 }, { "epoch": 0.1684942497994116, "grad_norm": 1.3692951202392578, "learning_rate": 1.9964232537026446e-05, "loss": 1.1729, "step": 1260 }, { "epoch": 0.1686279753944905, "grad_norm": 1.3178520202636719, "learning_rate": 1.9964110432749903e-05, "loss": 1.1177, "step": 1261 }, { "epoch": 0.1687617009895694, "grad_norm": 1.36300528049469, "learning_rate": 1.9963988120781014e-05, "loss": 1.0651, "step": 1262 }, { "epoch": 0.1688954265846483, "grad_norm": 1.2798807621002197, "learning_rate": 1.996386560112233e-05, "loss": 1.1263, "step": 1263 }, { "epoch": 0.1690291521797272, "grad_norm": 1.288643717765808, "learning_rate": 1.99637428737764e-05, "loss": 0.9759, "step": 1264 }, { "epoch": 0.1691628777748061, "grad_norm": 1.2387903928756714, "learning_rate": 1.9963619938745787e-05, "loss": 0.985, "step": 1265 }, { "epoch": 0.169296603369885, "grad_norm": 1.3293169736862183, "learning_rate": 1.9963496796033048e-05, "loss": 1.0464, "step": 1266 }, { "epoch": 0.1694303289649639, "grad_norm": 1.161393165588379, "learning_rate": 1.9963373445640753e-05, "loss": 1.0886, "step": 1267 }, { "epoch": 0.16956405456004278, "grad_norm": 1.2038220167160034, "learning_rate": 1.9963249887571473e-05, "loss": 1.0475, "step": 1268 }, { "epoch": 0.16969778015512169, "grad_norm": 1.2192479372024536, "learning_rate": 1.996312612182778e-05, "loss": 1.101, "step": 1269 }, { "epoch": 0.1698315057502006, "grad_norm": 1.4098663330078125, "learning_rate": 1.9963002148412262e-05, "loss": 1.0537, "step": 1270 }, { "epoch": 0.1699652313452795, "grad_norm": 1.220025897026062, "learning_rate": 1.9962877967327494e-05, "loss": 0.9807, "step": 1271 }, { "epoch": 0.1700989569403584, "grad_norm": 1.2133280038833618, "learning_rate": 1.996275357857607e-05, "loss": 0.9028, "step": 1272 }, { "epoch": 0.17023268253543727, "grad_norm": 1.3840975761413574, "learning_rate": 1.996262898216058e-05, "loss": 0.8801, "step": 1273 }, { "epoch": 0.17036640813051618, "grad_norm": 1.3612234592437744, "learning_rate": 1.996250417808362e-05, "loss": 1.1266, "step": 1274 }, { "epoch": 0.17050013372559508, "grad_norm": 1.4440313577651978, "learning_rate": 1.9962379166347797e-05, "loss": 0.9896, "step": 1275 }, { "epoch": 0.170633859320674, "grad_norm": 1.4276864528656006, "learning_rate": 1.996225394695571e-05, "loss": 1.0774, "step": 1276 }, { "epoch": 0.17076758491575286, "grad_norm": 1.4699558019638062, "learning_rate": 1.9962128519909975e-05, "loss": 1.2069, "step": 1277 }, { "epoch": 0.17090131051083177, "grad_norm": 1.4333025217056274, "learning_rate": 1.99620028852132e-05, "loss": 1.0759, "step": 1278 }, { "epoch": 0.17103503610591067, "grad_norm": 1.3049767017364502, "learning_rate": 1.996187704286801e-05, "loss": 1.1315, "step": 1279 }, { "epoch": 0.17116876170098957, "grad_norm": 1.3423848152160645, "learning_rate": 1.9961750992877027e-05, "loss": 1.0708, "step": 1280 }, { "epoch": 0.17130248729606848, "grad_norm": 1.4286481142044067, "learning_rate": 1.9961624735242875e-05, "loss": 1.1587, "step": 1281 }, { "epoch": 0.17143621289114735, "grad_norm": 1.2878954410552979, "learning_rate": 1.9961498269968187e-05, "loss": 1.01, "step": 1282 }, { "epoch": 0.17156993848622626, "grad_norm": 1.2917498350143433, "learning_rate": 1.99613715970556e-05, "loss": 1.0719, "step": 1283 }, { "epoch": 0.17170366408130516, "grad_norm": 1.370892882347107, "learning_rate": 1.9961244716507757e-05, "loss": 1.1881, "step": 1284 }, { "epoch": 0.17183738967638407, "grad_norm": 1.1769497394561768, "learning_rate": 1.9961117628327296e-05, "loss": 1.0336, "step": 1285 }, { "epoch": 0.17197111527146297, "grad_norm": 1.8080333471298218, "learning_rate": 1.9960990332516875e-05, "loss": 1.1228, "step": 1286 }, { "epoch": 0.17210484086654185, "grad_norm": 1.5088346004486084, "learning_rate": 1.996086282907914e-05, "loss": 1.1282, "step": 1287 }, { "epoch": 0.17223856646162075, "grad_norm": 1.364235520362854, "learning_rate": 1.9960735118016744e-05, "loss": 1.0958, "step": 1288 }, { "epoch": 0.17237229205669966, "grad_norm": 1.3936492204666138, "learning_rate": 1.996060719933236e-05, "loss": 1.021, "step": 1289 }, { "epoch": 0.17250601765177856, "grad_norm": 1.279437780380249, "learning_rate": 1.9960479073028655e-05, "loss": 1.126, "step": 1290 }, { "epoch": 0.17263974324685744, "grad_norm": 1.3440513610839844, "learning_rate": 1.996035073910829e-05, "loss": 1.0751, "step": 1291 }, { "epoch": 0.17277346884193634, "grad_norm": 1.33357834815979, "learning_rate": 1.9960222197573948e-05, "loss": 1.038, "step": 1292 }, { "epoch": 0.17290719443701524, "grad_norm": 1.2166985273361206, "learning_rate": 1.9960093448428305e-05, "loss": 0.8946, "step": 1293 }, { "epoch": 0.17304092003209415, "grad_norm": 1.257196068763733, "learning_rate": 1.995996449167404e-05, "loss": 0.9538, "step": 1294 }, { "epoch": 0.17317464562717305, "grad_norm": 1.1668092012405396, "learning_rate": 1.9959835327313853e-05, "loss": 0.9678, "step": 1295 }, { "epoch": 0.17330837122225193, "grad_norm": 1.3494043350219727, "learning_rate": 1.9959705955350425e-05, "loss": 1.0017, "step": 1296 }, { "epoch": 0.17344209681733083, "grad_norm": 1.2298766374588013, "learning_rate": 1.9959576375786454e-05, "loss": 1.0822, "step": 1297 }, { "epoch": 0.17357582241240974, "grad_norm": 1.4576905965805054, "learning_rate": 1.995944658862465e-05, "loss": 1.0572, "step": 1298 }, { "epoch": 0.17370954800748864, "grad_norm": 1.4217324256896973, "learning_rate": 1.995931659386771e-05, "loss": 1.1083, "step": 1299 }, { "epoch": 0.17384327360256754, "grad_norm": 1.297057032585144, "learning_rate": 1.9959186391518342e-05, "loss": 1.0294, "step": 1300 }, { "epoch": 0.17397699919764642, "grad_norm": 1.2857576608657837, "learning_rate": 1.9959055981579266e-05, "loss": 0.9504, "step": 1301 }, { "epoch": 0.17411072479272532, "grad_norm": 1.191306710243225, "learning_rate": 1.9958925364053197e-05, "loss": 0.9992, "step": 1302 }, { "epoch": 0.17424445038780423, "grad_norm": 1.1694518327713013, "learning_rate": 1.995879453894286e-05, "loss": 0.9396, "step": 1303 }, { "epoch": 0.17437817598288313, "grad_norm": 1.2788891792297363, "learning_rate": 1.995866350625098e-05, "loss": 1.0031, "step": 1304 }, { "epoch": 0.174511901577962, "grad_norm": 1.2893421649932861, "learning_rate": 1.9958532265980288e-05, "loss": 0.9913, "step": 1305 }, { "epoch": 0.1746456271730409, "grad_norm": 1.2778571844100952, "learning_rate": 1.995840081813352e-05, "loss": 1.1676, "step": 1306 }, { "epoch": 0.17477935276811982, "grad_norm": 1.305558681488037, "learning_rate": 1.9958269162713417e-05, "loss": 0.9545, "step": 1307 }, { "epoch": 0.17491307836319872, "grad_norm": 1.2032405138015747, "learning_rate": 1.9958137299722723e-05, "loss": 0.9236, "step": 1308 }, { "epoch": 0.17504680395827762, "grad_norm": 1.246390700340271, "learning_rate": 1.9958005229164182e-05, "loss": 0.9839, "step": 1309 }, { "epoch": 0.1751805295533565, "grad_norm": 1.317458152770996, "learning_rate": 1.9957872951040554e-05, "loss": 1.0407, "step": 1310 }, { "epoch": 0.1753142551484354, "grad_norm": 1.3092089891433716, "learning_rate": 1.9957740465354592e-05, "loss": 1.088, "step": 1311 }, { "epoch": 0.1754479807435143, "grad_norm": 1.306917667388916, "learning_rate": 1.995760777210906e-05, "loss": 1.0827, "step": 1312 }, { "epoch": 0.1755817063385932, "grad_norm": 1.2757031917572021, "learning_rate": 1.995747487130672e-05, "loss": 1.1113, "step": 1313 }, { "epoch": 0.17571543193367212, "grad_norm": 1.2835592031478882, "learning_rate": 1.9957341762950346e-05, "loss": 1.0935, "step": 1314 }, { "epoch": 0.175849157528751, "grad_norm": 1.319815993309021, "learning_rate": 1.995720844704271e-05, "loss": 1.0768, "step": 1315 }, { "epoch": 0.1759828831238299, "grad_norm": 1.3786754608154297, "learning_rate": 1.9957074923586594e-05, "loss": 1.1047, "step": 1316 }, { "epoch": 0.1761166087189088, "grad_norm": 1.2249692678451538, "learning_rate": 1.995694119258478e-05, "loss": 1.0078, "step": 1317 }, { "epoch": 0.1762503343139877, "grad_norm": 1.3519737720489502, "learning_rate": 1.9956807254040052e-05, "loss": 0.9356, "step": 1318 }, { "epoch": 0.17638405990906658, "grad_norm": 1.4273687601089478, "learning_rate": 1.9956673107955204e-05, "loss": 0.9754, "step": 1319 }, { "epoch": 0.17651778550414549, "grad_norm": 1.398954153060913, "learning_rate": 1.9956538754333033e-05, "loss": 1.0354, "step": 1320 }, { "epoch": 0.1766515110992244, "grad_norm": 1.56248140335083, "learning_rate": 1.995640419317634e-05, "loss": 0.9987, "step": 1321 }, { "epoch": 0.1767852366943033, "grad_norm": 1.3443942070007324, "learning_rate": 1.995626942448793e-05, "loss": 1.129, "step": 1322 }, { "epoch": 0.1769189622893822, "grad_norm": 1.3465995788574219, "learning_rate": 1.9956134448270608e-05, "loss": 1.0367, "step": 1323 }, { "epoch": 0.17705268788446107, "grad_norm": 1.3278660774230957, "learning_rate": 1.9955999264527194e-05, "loss": 1.0766, "step": 1324 }, { "epoch": 0.17718641347953998, "grad_norm": 1.3544702529907227, "learning_rate": 1.9955863873260498e-05, "loss": 1.1711, "step": 1325 }, { "epoch": 0.17732013907461888, "grad_norm": 1.3461451530456543, "learning_rate": 1.995572827447335e-05, "loss": 1.0088, "step": 1326 }, { "epoch": 0.1774538646696978, "grad_norm": 1.4259815216064453, "learning_rate": 1.995559246816857e-05, "loss": 1.1703, "step": 1327 }, { "epoch": 0.1775875902647767, "grad_norm": 1.197901964187622, "learning_rate": 1.9955456454348993e-05, "loss": 0.9623, "step": 1328 }, { "epoch": 0.17772131585985557, "grad_norm": 1.2175984382629395, "learning_rate": 1.9955320233017456e-05, "loss": 1.0177, "step": 1329 }, { "epoch": 0.17785504145493447, "grad_norm": 1.2803279161453247, "learning_rate": 1.995518380417679e-05, "loss": 1.1427, "step": 1330 }, { "epoch": 0.17798876705001337, "grad_norm": 1.0316349267959595, "learning_rate": 1.995504716782984e-05, "loss": 0.9121, "step": 1331 }, { "epoch": 0.17812249264509228, "grad_norm": 1.2992193698883057, "learning_rate": 1.9954910323979465e-05, "loss": 1.02, "step": 1332 }, { "epoch": 0.17825621824017118, "grad_norm": 1.2969260215759277, "learning_rate": 1.9954773272628508e-05, "loss": 1.0496, "step": 1333 }, { "epoch": 0.17838994383525006, "grad_norm": 1.2382975816726685, "learning_rate": 1.9954636013779826e-05, "loss": 0.9581, "step": 1334 }, { "epoch": 0.17852366943032896, "grad_norm": 1.1821105480194092, "learning_rate": 1.9954498547436284e-05, "loss": 1.023, "step": 1335 }, { "epoch": 0.17865739502540787, "grad_norm": 1.2189511060714722, "learning_rate": 1.9954360873600746e-05, "loss": 1.0656, "step": 1336 }, { "epoch": 0.17879112062048677, "grad_norm": 1.2334539890289307, "learning_rate": 1.995422299227608e-05, "loss": 1.0026, "step": 1337 }, { "epoch": 0.17892484621556565, "grad_norm": 1.2316467761993408, "learning_rate": 1.9954084903465158e-05, "loss": 0.9333, "step": 1338 }, { "epoch": 0.17905857181064455, "grad_norm": 1.1920417547225952, "learning_rate": 1.9953946607170867e-05, "loss": 1.0144, "step": 1339 }, { "epoch": 0.17919229740572346, "grad_norm": 1.293245553970337, "learning_rate": 1.995380810339608e-05, "loss": 1.0004, "step": 1340 }, { "epoch": 0.17932602300080236, "grad_norm": 1.2394880056381226, "learning_rate": 1.9953669392143685e-05, "loss": 0.9442, "step": 1341 }, { "epoch": 0.17945974859588126, "grad_norm": 1.2206966876983643, "learning_rate": 1.995353047341658e-05, "loss": 1.0577, "step": 1342 }, { "epoch": 0.17959347419096014, "grad_norm": 1.3495227098464966, "learning_rate": 1.995339134721766e-05, "loss": 1.138, "step": 1343 }, { "epoch": 0.17972719978603904, "grad_norm": 1.3647313117980957, "learning_rate": 1.9953252013549816e-05, "loss": 0.943, "step": 1344 }, { "epoch": 0.17986092538111795, "grad_norm": 1.1911678314208984, "learning_rate": 1.995311247241596e-05, "loss": 1.0972, "step": 1345 }, { "epoch": 0.17999465097619685, "grad_norm": 1.279129981994629, "learning_rate": 1.9952972723819e-05, "loss": 0.9772, "step": 1346 }, { "epoch": 0.18012837657127576, "grad_norm": 1.4000297784805298, "learning_rate": 1.9952832767761845e-05, "loss": 1.1036, "step": 1347 }, { "epoch": 0.18026210216635463, "grad_norm": 1.1939113140106201, "learning_rate": 1.9952692604247414e-05, "loss": 0.9358, "step": 1348 }, { "epoch": 0.18039582776143354, "grad_norm": 1.2493308782577515, "learning_rate": 1.995255223327863e-05, "loss": 0.9997, "step": 1349 }, { "epoch": 0.18052955335651244, "grad_norm": 1.3002734184265137, "learning_rate": 1.9952411654858423e-05, "loss": 1.0661, "step": 1350 }, { "epoch": 0.18066327895159134, "grad_norm": 1.3944391012191772, "learning_rate": 1.995227086898971e-05, "loss": 1.1317, "step": 1351 }, { "epoch": 0.18079700454667022, "grad_norm": 1.3092198371887207, "learning_rate": 1.9952129875675442e-05, "loss": 1.0812, "step": 1352 }, { "epoch": 0.18093073014174912, "grad_norm": 1.34480619430542, "learning_rate": 1.9951988674918548e-05, "loss": 1.0949, "step": 1353 }, { "epoch": 0.18106445573682803, "grad_norm": 1.3518292903900146, "learning_rate": 1.995184726672197e-05, "loss": 1.0952, "step": 1354 }, { "epoch": 0.18119818133190693, "grad_norm": 1.2242733240127563, "learning_rate": 1.995170565108866e-05, "loss": 1.0297, "step": 1355 }, { "epoch": 0.18133190692698584, "grad_norm": 1.3117685317993164, "learning_rate": 1.995156382802157e-05, "loss": 1.0777, "step": 1356 }, { "epoch": 0.1814656325220647, "grad_norm": 1.244023323059082, "learning_rate": 1.9951421797523652e-05, "loss": 1.0503, "step": 1357 }, { "epoch": 0.18159935811714362, "grad_norm": 1.1908466815948486, "learning_rate": 1.9951279559597872e-05, "loss": 0.9458, "step": 1358 }, { "epoch": 0.18173308371222252, "grad_norm": 1.2295639514923096, "learning_rate": 1.995113711424719e-05, "loss": 1.1538, "step": 1359 }, { "epoch": 0.18186680930730142, "grad_norm": 1.342751383781433, "learning_rate": 1.995099446147458e-05, "loss": 1.0869, "step": 1360 }, { "epoch": 0.18200053490238033, "grad_norm": 1.2374080419540405, "learning_rate": 1.995085160128301e-05, "loss": 0.9759, "step": 1361 }, { "epoch": 0.1821342604974592, "grad_norm": 1.3634812831878662, "learning_rate": 1.9950708533675457e-05, "loss": 0.9992, "step": 1362 }, { "epoch": 0.1822679860925381, "grad_norm": 1.1515052318572998, "learning_rate": 1.9950565258654913e-05, "loss": 1.0069, "step": 1363 }, { "epoch": 0.182401711687617, "grad_norm": 1.192280650138855, "learning_rate": 1.9950421776224353e-05, "loss": 0.9908, "step": 1364 }, { "epoch": 0.18253543728269592, "grad_norm": 1.164850115776062, "learning_rate": 1.9950278086386774e-05, "loss": 0.9073, "step": 1365 }, { "epoch": 0.1826691628777748, "grad_norm": 1.2419682741165161, "learning_rate": 1.995013418914517e-05, "loss": 1.0791, "step": 1366 }, { "epoch": 0.1828028884728537, "grad_norm": 1.2415947914123535, "learning_rate": 1.994999008450254e-05, "loss": 1.0605, "step": 1367 }, { "epoch": 0.1829366140679326, "grad_norm": 1.348641037940979, "learning_rate": 1.9949845772461887e-05, "loss": 1.1143, "step": 1368 }, { "epoch": 0.1830703396630115, "grad_norm": 1.2559539079666138, "learning_rate": 1.9949701253026223e-05, "loss": 0.9883, "step": 1369 }, { "epoch": 0.1832040652580904, "grad_norm": 1.0846824645996094, "learning_rate": 1.9949556526198553e-05, "loss": 0.9167, "step": 1370 }, { "epoch": 0.18333779085316929, "grad_norm": 1.1999516487121582, "learning_rate": 1.9949411591981904e-05, "loss": 1.0101, "step": 1371 }, { "epoch": 0.1834715164482482, "grad_norm": 1.2889819145202637, "learning_rate": 1.9949266450379286e-05, "loss": 1.1621, "step": 1372 }, { "epoch": 0.1836052420433271, "grad_norm": 1.2377021312713623, "learning_rate": 1.994912110139373e-05, "loss": 0.9699, "step": 1373 }, { "epoch": 0.183738967638406, "grad_norm": 1.2206727266311646, "learning_rate": 1.9948975545028263e-05, "loss": 0.9373, "step": 1374 }, { "epoch": 0.1838726932334849, "grad_norm": 1.3375742435455322, "learning_rate": 1.9948829781285922e-05, "loss": 1.1474, "step": 1375 }, { "epoch": 0.18400641882856378, "grad_norm": 1.2649837732315063, "learning_rate": 1.9948683810169746e-05, "loss": 1.0659, "step": 1376 }, { "epoch": 0.18414014442364268, "grad_norm": 1.2327882051467896, "learning_rate": 1.9948537631682778e-05, "loss": 1.0321, "step": 1377 }, { "epoch": 0.18427387001872159, "grad_norm": 1.2740992307662964, "learning_rate": 1.994839124582806e-05, "loss": 0.9341, "step": 1378 }, { "epoch": 0.1844075956138005, "grad_norm": 1.3299682140350342, "learning_rate": 1.994824465260864e-05, "loss": 1.0699, "step": 1379 }, { "epoch": 0.18454132120887937, "grad_norm": 1.3049708604812622, "learning_rate": 1.9948097852027587e-05, "loss": 0.9865, "step": 1380 }, { "epoch": 0.18467504680395827, "grad_norm": 1.448488473892212, "learning_rate": 1.9947950844087952e-05, "loss": 1.1393, "step": 1381 }, { "epoch": 0.18480877239903717, "grad_norm": 1.2646312713623047, "learning_rate": 1.99478036287928e-05, "loss": 1.0687, "step": 1382 }, { "epoch": 0.18494249799411608, "grad_norm": 1.277761459350586, "learning_rate": 1.9947656206145202e-05, "loss": 0.933, "step": 1383 }, { "epoch": 0.18507622358919498, "grad_norm": 1.4586188793182373, "learning_rate": 1.994750857614823e-05, "loss": 1.1912, "step": 1384 }, { "epoch": 0.18520994918427386, "grad_norm": 1.2215498685836792, "learning_rate": 1.9947360738804958e-05, "loss": 1.034, "step": 1385 }, { "epoch": 0.18534367477935276, "grad_norm": 1.3679231405258179, "learning_rate": 1.9947212694118473e-05, "loss": 1.1094, "step": 1386 }, { "epoch": 0.18547740037443167, "grad_norm": 1.2719625234603882, "learning_rate": 1.9947064442091854e-05, "loss": 0.9533, "step": 1387 }, { "epoch": 0.18561112596951057, "grad_norm": 1.2588043212890625, "learning_rate": 1.9946915982728196e-05, "loss": 0.9401, "step": 1388 }, { "epoch": 0.18574485156458947, "grad_norm": 1.3275405168533325, "learning_rate": 1.9946767316030595e-05, "loss": 1.0378, "step": 1389 }, { "epoch": 0.18587857715966835, "grad_norm": 1.361557126045227, "learning_rate": 1.9946618442002147e-05, "loss": 1.1666, "step": 1390 }, { "epoch": 0.18601230275474726, "grad_norm": 1.176049828529358, "learning_rate": 1.9946469360645953e-05, "loss": 0.9804, "step": 1391 }, { "epoch": 0.18614602834982616, "grad_norm": 1.1640080213546753, "learning_rate": 1.9946320071965122e-05, "loss": 1.0387, "step": 1392 }, { "epoch": 0.18627975394490506, "grad_norm": 1.232293725013733, "learning_rate": 1.994617057596277e-05, "loss": 1.056, "step": 1393 }, { "epoch": 0.18641347953998394, "grad_norm": 1.3213332891464233, "learning_rate": 1.994602087264201e-05, "loss": 1.003, "step": 1394 }, { "epoch": 0.18654720513506284, "grad_norm": 1.1948961019515991, "learning_rate": 1.9945870962005957e-05, "loss": 1.0284, "step": 1395 }, { "epoch": 0.18668093073014175, "grad_norm": 1.2003235816955566, "learning_rate": 1.9945720844057747e-05, "loss": 1.0704, "step": 1396 }, { "epoch": 0.18681465632522065, "grad_norm": 1.253259539604187, "learning_rate": 1.99455705188005e-05, "loss": 1.0141, "step": 1397 }, { "epoch": 0.18694838192029956, "grad_norm": 1.4753155708312988, "learning_rate": 1.9945419986237353e-05, "loss": 1.2368, "step": 1398 }, { "epoch": 0.18708210751537843, "grad_norm": 1.2579237222671509, "learning_rate": 1.9945269246371444e-05, "loss": 1.0123, "step": 1399 }, { "epoch": 0.18721583311045734, "grad_norm": 1.238404393196106, "learning_rate": 1.994511829920591e-05, "loss": 0.9939, "step": 1400 }, { "epoch": 0.18734955870553624, "grad_norm": 1.303276538848877, "learning_rate": 1.9944967144743907e-05, "loss": 1.054, "step": 1401 }, { "epoch": 0.18748328430061514, "grad_norm": 1.1089861392974854, "learning_rate": 1.994481578298858e-05, "loss": 0.99, "step": 1402 }, { "epoch": 0.18761700989569405, "grad_norm": 1.1575955152511597, "learning_rate": 1.994466421394308e-05, "loss": 1.0127, "step": 1403 }, { "epoch": 0.18775073549077292, "grad_norm": 1.1977077722549438, "learning_rate": 1.994451243761057e-05, "loss": 0.9783, "step": 1404 }, { "epoch": 0.18788446108585183, "grad_norm": 1.2853716611862183, "learning_rate": 1.994436045399422e-05, "loss": 1.1231, "step": 1405 }, { "epoch": 0.18801818668093073, "grad_norm": 1.2845269441604614, "learning_rate": 1.9944208263097188e-05, "loss": 1.0318, "step": 1406 }, { "epoch": 0.18815191227600964, "grad_norm": 1.4264994859695435, "learning_rate": 1.994405586492265e-05, "loss": 1.1893, "step": 1407 }, { "epoch": 0.1882856378710885, "grad_norm": 1.207533836364746, "learning_rate": 1.9943903259473783e-05, "loss": 0.9881, "step": 1408 }, { "epoch": 0.18841936346616742, "grad_norm": 1.5068691968917847, "learning_rate": 1.9943750446753772e-05, "loss": 1.0961, "step": 1409 }, { "epoch": 0.18855308906124632, "grad_norm": 1.2391252517700195, "learning_rate": 1.9943597426765792e-05, "loss": 0.9771, "step": 1410 }, { "epoch": 0.18868681465632522, "grad_norm": 1.2755193710327148, "learning_rate": 1.9943444199513044e-05, "loss": 1.0449, "step": 1411 }, { "epoch": 0.18882054025140413, "grad_norm": 1.1802613735198975, "learning_rate": 1.9943290764998716e-05, "loss": 0.9202, "step": 1412 }, { "epoch": 0.188954265846483, "grad_norm": 1.3295230865478516, "learning_rate": 1.9943137123226e-05, "loss": 1.0596, "step": 1413 }, { "epoch": 0.1890879914415619, "grad_norm": 1.2529610395431519, "learning_rate": 1.994298327419811e-05, "loss": 1.0356, "step": 1414 }, { "epoch": 0.1892217170366408, "grad_norm": 1.292808175086975, "learning_rate": 1.9942829217918248e-05, "loss": 1.0633, "step": 1415 }, { "epoch": 0.18935544263171972, "grad_norm": 1.1281393766403198, "learning_rate": 1.9942674954389627e-05, "loss": 0.9636, "step": 1416 }, { "epoch": 0.18948916822679862, "grad_norm": 1.2608211040496826, "learning_rate": 1.994252048361546e-05, "loss": 1.1608, "step": 1417 }, { "epoch": 0.1896228938218775, "grad_norm": 1.260239839553833, "learning_rate": 1.9942365805598967e-05, "loss": 1.0354, "step": 1418 }, { "epoch": 0.1897566194169564, "grad_norm": 1.2576032876968384, "learning_rate": 1.9942210920343372e-05, "loss": 0.8684, "step": 1419 }, { "epoch": 0.1898903450120353, "grad_norm": 1.155906081199646, "learning_rate": 1.9942055827851903e-05, "loss": 1.0674, "step": 1420 }, { "epoch": 0.1900240706071142, "grad_norm": 1.3107454776763916, "learning_rate": 1.9941900528127793e-05, "loss": 1.1328, "step": 1421 }, { "epoch": 0.1901577962021931, "grad_norm": 1.3248958587646484, "learning_rate": 1.9941745021174284e-05, "loss": 1.077, "step": 1422 }, { "epoch": 0.190291521797272, "grad_norm": 1.2896544933319092, "learning_rate": 1.9941589306994612e-05, "loss": 1.0823, "step": 1423 }, { "epoch": 0.1904252473923509, "grad_norm": 1.2361563444137573, "learning_rate": 1.9941433385592022e-05, "loss": 0.9757, "step": 1424 }, { "epoch": 0.1905589729874298, "grad_norm": 1.2389726638793945, "learning_rate": 1.9941277256969768e-05, "loss": 1.0236, "step": 1425 }, { "epoch": 0.1906926985825087, "grad_norm": 1.3151332139968872, "learning_rate": 1.99411209211311e-05, "loss": 1.0675, "step": 1426 }, { "epoch": 0.19082642417758758, "grad_norm": 1.3759338855743408, "learning_rate": 1.994096437807928e-05, "loss": 1.1237, "step": 1427 }, { "epoch": 0.19096014977266648, "grad_norm": 1.1903740167617798, "learning_rate": 1.9940807627817568e-05, "loss": 1.0254, "step": 1428 }, { "epoch": 0.19109387536774539, "grad_norm": 1.4401644468307495, "learning_rate": 1.9940650670349237e-05, "loss": 1.2035, "step": 1429 }, { "epoch": 0.1912276009628243, "grad_norm": 1.4020029306411743, "learning_rate": 1.9940493505677556e-05, "loss": 1.0789, "step": 1430 }, { "epoch": 0.1913613265579032, "grad_norm": 1.400356411933899, "learning_rate": 1.9940336133805796e-05, "loss": 1.1033, "step": 1431 }, { "epoch": 0.19149505215298207, "grad_norm": 1.2576701641082764, "learning_rate": 1.994017855473724e-05, "loss": 1.0703, "step": 1432 }, { "epoch": 0.19162877774806097, "grad_norm": 1.3306386470794678, "learning_rate": 1.994002076847518e-05, "loss": 0.9429, "step": 1433 }, { "epoch": 0.19176250334313988, "grad_norm": 1.3621129989624023, "learning_rate": 1.9939862775022893e-05, "loss": 1.0264, "step": 1434 }, { "epoch": 0.19189622893821878, "grad_norm": 1.266137957572937, "learning_rate": 1.993970457438368e-05, "loss": 0.9725, "step": 1435 }, { "epoch": 0.1920299545332977, "grad_norm": 1.253928303718567, "learning_rate": 1.9939546166560837e-05, "loss": 0.9758, "step": 1436 }, { "epoch": 0.19216368012837656, "grad_norm": 1.315943956375122, "learning_rate": 1.9939387551557666e-05, "loss": 1.1382, "step": 1437 }, { "epoch": 0.19229740572345547, "grad_norm": 1.3408398628234863, "learning_rate": 1.993922872937747e-05, "loss": 1.0115, "step": 1438 }, { "epoch": 0.19243113131853437, "grad_norm": 1.4184273481369019, "learning_rate": 1.9939069700023564e-05, "loss": 1.1077, "step": 1439 }, { "epoch": 0.19256485691361327, "grad_norm": 1.3159000873565674, "learning_rate": 1.993891046349926e-05, "loss": 1.0301, "step": 1440 }, { "epoch": 0.19269858250869215, "grad_norm": 1.2756348848342896, "learning_rate": 1.9938751019807874e-05, "loss": 1.1011, "step": 1441 }, { "epoch": 0.19283230810377106, "grad_norm": 1.2336317300796509, "learning_rate": 1.993859136895274e-05, "loss": 1.0357, "step": 1442 }, { "epoch": 0.19296603369884996, "grad_norm": 1.3553413152694702, "learning_rate": 1.9938431510937172e-05, "loss": 1.0071, "step": 1443 }, { "epoch": 0.19309975929392886, "grad_norm": 1.193389892578125, "learning_rate": 1.9938271445764515e-05, "loss": 0.9765, "step": 1444 }, { "epoch": 0.19323348488900777, "grad_norm": 1.336923360824585, "learning_rate": 1.99381111734381e-05, "loss": 1.0809, "step": 1445 }, { "epoch": 0.19336721048408664, "grad_norm": 1.2722831964492798, "learning_rate": 1.9937950693961264e-05, "loss": 0.9952, "step": 1446 }, { "epoch": 0.19350093607916555, "grad_norm": 1.5109161138534546, "learning_rate": 1.9937790007337355e-05, "loss": 0.9625, "step": 1447 }, { "epoch": 0.19363466167424445, "grad_norm": 1.2593663930892944, "learning_rate": 1.9937629113569727e-05, "loss": 1.0227, "step": 1448 }, { "epoch": 0.19376838726932336, "grad_norm": 1.2797428369522095, "learning_rate": 1.9937468012661726e-05, "loss": 1.0656, "step": 1449 }, { "epoch": 0.19390211286440226, "grad_norm": 1.318403959274292, "learning_rate": 1.9937306704616713e-05, "loss": 1.0834, "step": 1450 }, { "epoch": 0.19403583845948114, "grad_norm": 1.3928052186965942, "learning_rate": 1.993714518943805e-05, "loss": 1.0339, "step": 1451 }, { "epoch": 0.19416956405456004, "grad_norm": 1.3587417602539062, "learning_rate": 1.9936983467129108e-05, "loss": 0.9698, "step": 1452 }, { "epoch": 0.19430328964963894, "grad_norm": 1.3251879215240479, "learning_rate": 1.993682153769325e-05, "loss": 1.0764, "step": 1453 }, { "epoch": 0.19443701524471785, "grad_norm": 1.264893651008606, "learning_rate": 1.993665940113386e-05, "loss": 0.9309, "step": 1454 }, { "epoch": 0.19457074083979672, "grad_norm": 1.3603886365890503, "learning_rate": 1.9936497057454312e-05, "loss": 1.0698, "step": 1455 }, { "epoch": 0.19470446643487563, "grad_norm": 1.3581825494766235, "learning_rate": 1.993633450665799e-05, "loss": 1.0823, "step": 1456 }, { "epoch": 0.19483819202995453, "grad_norm": 1.3192652463912964, "learning_rate": 1.9936171748748284e-05, "loss": 1.0462, "step": 1457 }, { "epoch": 0.19497191762503344, "grad_norm": 1.4890048503875732, "learning_rate": 1.9936008783728583e-05, "loss": 1.1289, "step": 1458 }, { "epoch": 0.19510564322011234, "grad_norm": 1.3101075887680054, "learning_rate": 1.993584561160229e-05, "loss": 0.9737, "step": 1459 }, { "epoch": 0.19523936881519122, "grad_norm": 1.2497665882110596, "learning_rate": 1.9935682232372803e-05, "loss": 1.0344, "step": 1460 }, { "epoch": 0.19537309441027012, "grad_norm": 1.5189787149429321, "learning_rate": 1.9935518646043523e-05, "loss": 1.2251, "step": 1461 }, { "epoch": 0.19550682000534902, "grad_norm": 1.1921664476394653, "learning_rate": 1.993535485261787e-05, "loss": 0.9189, "step": 1462 }, { "epoch": 0.19564054560042793, "grad_norm": 1.348024845123291, "learning_rate": 1.993519085209925e-05, "loss": 1.0057, "step": 1463 }, { "epoch": 0.19577427119550683, "grad_norm": 1.3215440511703491, "learning_rate": 1.9935026644491082e-05, "loss": 1.0547, "step": 1464 }, { "epoch": 0.1959079967905857, "grad_norm": 1.1223944425582886, "learning_rate": 1.9934862229796793e-05, "loss": 0.9329, "step": 1465 }, { "epoch": 0.1960417223856646, "grad_norm": 1.2073897123336792, "learning_rate": 1.9934697608019805e-05, "loss": 0.9396, "step": 1466 }, { "epoch": 0.19617544798074352, "grad_norm": 1.1717466115951538, "learning_rate": 1.9934532779163553e-05, "loss": 1.0107, "step": 1467 }, { "epoch": 0.19630917357582242, "grad_norm": 1.2075103521347046, "learning_rate": 1.993436774323147e-05, "loss": 1.1506, "step": 1468 }, { "epoch": 0.1964428991709013, "grad_norm": 1.2787638902664185, "learning_rate": 1.9934202500226994e-05, "loss": 0.9664, "step": 1469 }, { "epoch": 0.1965766247659802, "grad_norm": 1.402434229850769, "learning_rate": 1.993403705015358e-05, "loss": 1.1229, "step": 1470 }, { "epoch": 0.1967103503610591, "grad_norm": 1.2485862970352173, "learning_rate": 1.9933871393014668e-05, "loss": 1.0393, "step": 1471 }, { "epoch": 0.196844075956138, "grad_norm": 1.2507344484329224, "learning_rate": 1.9933705528813713e-05, "loss": 1.0099, "step": 1472 }, { "epoch": 0.1969778015512169, "grad_norm": 1.2450827360153198, "learning_rate": 1.993353945755417e-05, "loss": 1.1463, "step": 1473 }, { "epoch": 0.1971115271462958, "grad_norm": 1.2674734592437744, "learning_rate": 1.99333731792395e-05, "loss": 1.0849, "step": 1474 }, { "epoch": 0.1972452527413747, "grad_norm": 1.402221918106079, "learning_rate": 1.9933206693873175e-05, "loss": 1.068, "step": 1475 }, { "epoch": 0.1973789783364536, "grad_norm": 1.2720280885696411, "learning_rate": 1.993304000145866e-05, "loss": 0.961, "step": 1476 }, { "epoch": 0.1975127039315325, "grad_norm": 1.2979453802108765, "learning_rate": 1.9932873101999433e-05, "loss": 1.1093, "step": 1477 }, { "epoch": 0.1976464295266114, "grad_norm": 1.3291829824447632, "learning_rate": 1.9932705995498968e-05, "loss": 1.0021, "step": 1478 }, { "epoch": 0.19778015512169028, "grad_norm": 1.380653977394104, "learning_rate": 1.9932538681960754e-05, "loss": 1.0836, "step": 1479 }, { "epoch": 0.19791388071676919, "grad_norm": 1.121737003326416, "learning_rate": 1.9932371161388274e-05, "loss": 0.9976, "step": 1480 }, { "epoch": 0.1980476063118481, "grad_norm": 1.1978175640106201, "learning_rate": 1.993220343378502e-05, "loss": 0.9092, "step": 1481 }, { "epoch": 0.198181331906927, "grad_norm": 1.2795583009719849, "learning_rate": 1.993203549915449e-05, "loss": 0.9456, "step": 1482 }, { "epoch": 0.19831505750200587, "grad_norm": 1.299006700515747, "learning_rate": 1.9931867357500184e-05, "loss": 1.0889, "step": 1483 }, { "epoch": 0.19844878309708477, "grad_norm": 1.191469430923462, "learning_rate": 1.993169900882561e-05, "loss": 0.9848, "step": 1484 }, { "epoch": 0.19858250869216368, "grad_norm": 1.231117606163025, "learning_rate": 1.993153045313427e-05, "loss": 0.9918, "step": 1485 }, { "epoch": 0.19871623428724258, "grad_norm": 1.269965648651123, "learning_rate": 1.9931361690429685e-05, "loss": 1.0111, "step": 1486 }, { "epoch": 0.1988499598823215, "grad_norm": 1.4489976167678833, "learning_rate": 1.9931192720715366e-05, "loss": 1.0607, "step": 1487 }, { "epoch": 0.19898368547740036, "grad_norm": 1.2560759782791138, "learning_rate": 1.9931023543994837e-05, "loss": 1.0016, "step": 1488 }, { "epoch": 0.19911741107247927, "grad_norm": 1.2509862184524536, "learning_rate": 1.9930854160271627e-05, "loss": 0.9656, "step": 1489 }, { "epoch": 0.19925113666755817, "grad_norm": 1.2689875364303589, "learning_rate": 1.9930684569549265e-05, "loss": 1.0097, "step": 1490 }, { "epoch": 0.19938486226263707, "grad_norm": 1.373589038848877, "learning_rate": 1.9930514771831285e-05, "loss": 1.0059, "step": 1491 }, { "epoch": 0.19951858785771598, "grad_norm": 1.2807098627090454, "learning_rate": 1.9930344767121225e-05, "loss": 0.9371, "step": 1492 }, { "epoch": 0.19965231345279486, "grad_norm": 1.4251853227615356, "learning_rate": 1.9930174555422634e-05, "loss": 1.1385, "step": 1493 }, { "epoch": 0.19978603904787376, "grad_norm": 1.179208755493164, "learning_rate": 1.9930004136739058e-05, "loss": 1.1001, "step": 1494 }, { "epoch": 0.19991976464295266, "grad_norm": 1.296034336090088, "learning_rate": 1.9929833511074043e-05, "loss": 1.0038, "step": 1495 }, { "epoch": 0.20005349023803157, "grad_norm": 1.381422996520996, "learning_rate": 1.9929662678431154e-05, "loss": 0.9869, "step": 1496 }, { "epoch": 0.20018721583311047, "grad_norm": 1.2675572633743286, "learning_rate": 1.9929491638813944e-05, "loss": 1.0377, "step": 1497 }, { "epoch": 0.20032094142818935, "grad_norm": 1.2095751762390137, "learning_rate": 1.9929320392225986e-05, "loss": 0.9932, "step": 1498 }, { "epoch": 0.20045466702326825, "grad_norm": 1.3412342071533203, "learning_rate": 1.9929148938670843e-05, "loss": 1.0222, "step": 1499 }, { "epoch": 0.20058839261834716, "grad_norm": 1.2667109966278076, "learning_rate": 1.9928977278152093e-05, "loss": 1.0213, "step": 1500 }, { "epoch": 0.20072211821342606, "grad_norm": 1.2945703268051147, "learning_rate": 1.9928805410673315e-05, "loss": 1.0451, "step": 1501 }, { "epoch": 0.20085584380850494, "grad_norm": 1.4261442422866821, "learning_rate": 1.9928633336238085e-05, "loss": 1.0747, "step": 1502 }, { "epoch": 0.20098956940358384, "grad_norm": 1.3149077892303467, "learning_rate": 1.9928461054849995e-05, "loss": 0.9554, "step": 1503 }, { "epoch": 0.20112329499866274, "grad_norm": 1.2049176692962646, "learning_rate": 1.9928288566512638e-05, "loss": 0.9615, "step": 1504 }, { "epoch": 0.20125702059374165, "grad_norm": 1.2378418445587158, "learning_rate": 1.9928115871229603e-05, "loss": 1.1586, "step": 1505 }, { "epoch": 0.20139074618882055, "grad_norm": 1.1850234270095825, "learning_rate": 1.9927942969004493e-05, "loss": 1.0283, "step": 1506 }, { "epoch": 0.20152447178389943, "grad_norm": 1.2999416589736938, "learning_rate": 1.992776985984091e-05, "loss": 0.9703, "step": 1507 }, { "epoch": 0.20165819737897833, "grad_norm": 1.3573546409606934, "learning_rate": 1.9927596543742468e-05, "loss": 1.1586, "step": 1508 }, { "epoch": 0.20179192297405724, "grad_norm": 1.2939001321792603, "learning_rate": 1.9927423020712772e-05, "loss": 0.9394, "step": 1509 }, { "epoch": 0.20192564856913614, "grad_norm": 1.2368680238723755, "learning_rate": 1.9927249290755445e-05, "loss": 1.1177, "step": 1510 }, { "epoch": 0.20205937416421504, "grad_norm": 1.3958741426467896, "learning_rate": 1.992707535387411e-05, "loss": 1.0623, "step": 1511 }, { "epoch": 0.20219309975929392, "grad_norm": 1.2417049407958984, "learning_rate": 1.992690121007238e-05, "loss": 0.9578, "step": 1512 }, { "epoch": 0.20232682535437282, "grad_norm": 1.2945034503936768, "learning_rate": 1.9926726859353897e-05, "loss": 1.0729, "step": 1513 }, { "epoch": 0.20246055094945173, "grad_norm": 1.2284021377563477, "learning_rate": 1.992655230172229e-05, "loss": 1.0441, "step": 1514 }, { "epoch": 0.20259427654453063, "grad_norm": 1.2182552814483643, "learning_rate": 1.9926377537181204e-05, "loss": 1.0401, "step": 1515 }, { "epoch": 0.2027280021396095, "grad_norm": 1.2048964500427246, "learning_rate": 1.9926202565734272e-05, "loss": 1.0128, "step": 1516 }, { "epoch": 0.2028617277346884, "grad_norm": 1.4047183990478516, "learning_rate": 1.9926027387385147e-05, "loss": 1.0301, "step": 1517 }, { "epoch": 0.20299545332976732, "grad_norm": 1.2000986337661743, "learning_rate": 1.992585200213748e-05, "loss": 1.0274, "step": 1518 }, { "epoch": 0.20312917892484622, "grad_norm": 1.3622068166732788, "learning_rate": 1.9925676409994927e-05, "loss": 1.1481, "step": 1519 }, { "epoch": 0.20326290451992512, "grad_norm": 1.6251646280288696, "learning_rate": 1.9925500610961146e-05, "loss": 1.1738, "step": 1520 }, { "epoch": 0.203396630115004, "grad_norm": 1.2874807119369507, "learning_rate": 1.99253246050398e-05, "loss": 0.978, "step": 1521 }, { "epoch": 0.2035303557100829, "grad_norm": 1.247368335723877, "learning_rate": 1.9925148392234562e-05, "loss": 1.069, "step": 1522 }, { "epoch": 0.2036640813051618, "grad_norm": 1.4271094799041748, "learning_rate": 1.9924971972549105e-05, "loss": 0.9888, "step": 1523 }, { "epoch": 0.2037978069002407, "grad_norm": 1.2225266695022583, "learning_rate": 1.9924795345987103e-05, "loss": 0.939, "step": 1524 }, { "epoch": 0.20393153249531962, "grad_norm": 1.163801908493042, "learning_rate": 1.992461851255224e-05, "loss": 1.0913, "step": 1525 }, { "epoch": 0.2040652580903985, "grad_norm": 1.389091968536377, "learning_rate": 1.9924441472248197e-05, "loss": 0.9915, "step": 1526 }, { "epoch": 0.2041989836854774, "grad_norm": 1.1570316553115845, "learning_rate": 1.992426422507867e-05, "loss": 1.0477, "step": 1527 }, { "epoch": 0.2043327092805563, "grad_norm": 1.2367157936096191, "learning_rate": 1.9924086771047352e-05, "loss": 1.0609, "step": 1528 }, { "epoch": 0.2044664348756352, "grad_norm": 1.1785728931427002, "learning_rate": 1.9923909110157945e-05, "loss": 0.977, "step": 1529 }, { "epoch": 0.20460016047071408, "grad_norm": 1.268080711364746, "learning_rate": 1.9923731242414143e-05, "loss": 1.0666, "step": 1530 }, { "epoch": 0.20473388606579299, "grad_norm": 1.1620804071426392, "learning_rate": 1.9923553167819665e-05, "loss": 1.0662, "step": 1531 }, { "epoch": 0.2048676116608719, "grad_norm": 1.2658056020736694, "learning_rate": 1.9923374886378212e-05, "loss": 0.8609, "step": 1532 }, { "epoch": 0.2050013372559508, "grad_norm": 1.1860029697418213, "learning_rate": 1.9923196398093506e-05, "loss": 0.9042, "step": 1533 }, { "epoch": 0.2051350628510297, "grad_norm": 1.4202818870544434, "learning_rate": 1.992301770296927e-05, "loss": 1.0598, "step": 1534 }, { "epoch": 0.20526878844610857, "grad_norm": 1.126663088798523, "learning_rate": 1.992283880100922e-05, "loss": 0.9607, "step": 1535 }, { "epoch": 0.20540251404118748, "grad_norm": 1.1963708400726318, "learning_rate": 1.9922659692217096e-05, "loss": 0.9042, "step": 1536 }, { "epoch": 0.20553623963626638, "grad_norm": 1.3543401956558228, "learning_rate": 1.992248037659662e-05, "loss": 1.0229, "step": 1537 }, { "epoch": 0.2056699652313453, "grad_norm": 1.3436503410339355, "learning_rate": 1.992230085415154e-05, "loss": 1.066, "step": 1538 }, { "epoch": 0.2058036908264242, "grad_norm": 1.2635085582733154, "learning_rate": 1.9922121124885593e-05, "loss": 0.943, "step": 1539 }, { "epoch": 0.20593741642150307, "grad_norm": 1.617545247077942, "learning_rate": 1.9921941188802524e-05, "loss": 1.1816, "step": 1540 }, { "epoch": 0.20607114201658197, "grad_norm": 1.163333773612976, "learning_rate": 1.9921761045906085e-05, "loss": 1.0044, "step": 1541 }, { "epoch": 0.20620486761166087, "grad_norm": 1.404226541519165, "learning_rate": 1.992158069620003e-05, "loss": 1.1155, "step": 1542 }, { "epoch": 0.20633859320673978, "grad_norm": 1.2709497213363647, "learning_rate": 1.9921400139688125e-05, "loss": 1.0189, "step": 1543 }, { "epoch": 0.20647231880181866, "grad_norm": 1.216774821281433, "learning_rate": 1.9921219376374123e-05, "loss": 0.9423, "step": 1544 }, { "epoch": 0.20660604439689756, "grad_norm": 1.2736154794692993, "learning_rate": 1.9921038406261798e-05, "loss": 1.1163, "step": 1545 }, { "epoch": 0.20673976999197646, "grad_norm": 1.4089024066925049, "learning_rate": 1.992085722935492e-05, "loss": 1.1179, "step": 1546 }, { "epoch": 0.20687349558705537, "grad_norm": 1.4038814306259155, "learning_rate": 1.9920675845657266e-05, "loss": 1.0696, "step": 1547 }, { "epoch": 0.20700722118213427, "grad_norm": 1.3698562383651733, "learning_rate": 1.9920494255172616e-05, "loss": 1.1089, "step": 1548 }, { "epoch": 0.20714094677721315, "grad_norm": 1.236655831336975, "learning_rate": 1.992031245790476e-05, "loss": 1.1088, "step": 1549 }, { "epoch": 0.20727467237229205, "grad_norm": 1.162148356437683, "learning_rate": 1.992013045385748e-05, "loss": 0.9601, "step": 1550 }, { "epoch": 0.20740839796737096, "grad_norm": 1.3058711290359497, "learning_rate": 1.9919948243034576e-05, "loss": 1.0112, "step": 1551 }, { "epoch": 0.20754212356244986, "grad_norm": 1.4047051668167114, "learning_rate": 1.991976582543984e-05, "loss": 1.0547, "step": 1552 }, { "epoch": 0.20767584915752876, "grad_norm": 1.4283814430236816, "learning_rate": 1.991958320107708e-05, "loss": 1.2141, "step": 1553 }, { "epoch": 0.20780957475260764, "grad_norm": 1.3425837755203247, "learning_rate": 1.99194003699501e-05, "loss": 0.9936, "step": 1554 }, { "epoch": 0.20794330034768654, "grad_norm": 1.2524040937423706, "learning_rate": 1.991921733206271e-05, "loss": 1.1153, "step": 1555 }, { "epoch": 0.20807702594276545, "grad_norm": 1.2638416290283203, "learning_rate": 1.9919034087418726e-05, "loss": 1.098, "step": 1556 }, { "epoch": 0.20821075153784435, "grad_norm": 1.4367287158966064, "learning_rate": 1.991885063602197e-05, "loss": 1.054, "step": 1557 }, { "epoch": 0.20834447713292323, "grad_norm": 1.2431389093399048, "learning_rate": 1.991866697787626e-05, "loss": 1.0866, "step": 1558 }, { "epoch": 0.20847820272800213, "grad_norm": 1.234021544456482, "learning_rate": 1.9918483112985433e-05, "loss": 1.109, "step": 1559 }, { "epoch": 0.20861192832308104, "grad_norm": 1.3274897336959839, "learning_rate": 1.9918299041353313e-05, "loss": 1.0185, "step": 1560 }, { "epoch": 0.20874565391815994, "grad_norm": 1.324401617050171, "learning_rate": 1.991811476298374e-05, "loss": 1.0422, "step": 1561 }, { "epoch": 0.20887937951323884, "grad_norm": 1.4328031539916992, "learning_rate": 1.991793027788056e-05, "loss": 0.9468, "step": 1562 }, { "epoch": 0.20901310510831772, "grad_norm": 1.367896556854248, "learning_rate": 1.991774558604761e-05, "loss": 1.0001, "step": 1563 }, { "epoch": 0.20914683070339662, "grad_norm": 1.3652435541152954, "learning_rate": 1.9917560687488743e-05, "loss": 1.0621, "step": 1564 }, { "epoch": 0.20928055629847553, "grad_norm": 1.1415847539901733, "learning_rate": 1.9917375582207813e-05, "loss": 0.9613, "step": 1565 }, { "epoch": 0.20941428189355443, "grad_norm": 1.2082712650299072, "learning_rate": 1.9917190270208683e-05, "loss": 1.0375, "step": 1566 }, { "epoch": 0.20954800748863334, "grad_norm": 1.186031699180603, "learning_rate": 1.991700475149521e-05, "loss": 1.0266, "step": 1567 }, { "epoch": 0.2096817330837122, "grad_norm": 1.3374103307724, "learning_rate": 1.9916819026071258e-05, "loss": 1.0311, "step": 1568 }, { "epoch": 0.20981545867879112, "grad_norm": 1.3388482332229614, "learning_rate": 1.991663309394071e-05, "loss": 0.9521, "step": 1569 }, { "epoch": 0.20994918427387002, "grad_norm": 1.242008924484253, "learning_rate": 1.991644695510743e-05, "loss": 1.0781, "step": 1570 }, { "epoch": 0.21008290986894892, "grad_norm": 1.2420485019683838, "learning_rate": 1.9916260609575302e-05, "loss": 1.0552, "step": 1571 }, { "epoch": 0.2102166354640278, "grad_norm": 1.2954745292663574, "learning_rate": 1.9916074057348213e-05, "loss": 0.9518, "step": 1572 }, { "epoch": 0.2103503610591067, "grad_norm": 1.2827038764953613, "learning_rate": 1.9915887298430044e-05, "loss": 0.9229, "step": 1573 }, { "epoch": 0.2104840866541856, "grad_norm": 1.330902099609375, "learning_rate": 1.9915700332824696e-05, "loss": 1.009, "step": 1574 }, { "epoch": 0.2106178122492645, "grad_norm": 1.5019946098327637, "learning_rate": 1.9915513160536066e-05, "loss": 1.1248, "step": 1575 }, { "epoch": 0.21075153784434342, "grad_norm": 1.0698785781860352, "learning_rate": 1.9915325781568048e-05, "loss": 0.9301, "step": 1576 }, { "epoch": 0.2108852634394223, "grad_norm": 1.2372167110443115, "learning_rate": 1.9915138195924554e-05, "loss": 0.9797, "step": 1577 }, { "epoch": 0.2110189890345012, "grad_norm": 1.1669971942901611, "learning_rate": 1.991495040360949e-05, "loss": 1.012, "step": 1578 }, { "epoch": 0.2111527146295801, "grad_norm": 1.4012240171432495, "learning_rate": 1.9914762404626775e-05, "loss": 1.04, "step": 1579 }, { "epoch": 0.211286440224659, "grad_norm": 1.27386474609375, "learning_rate": 1.991457419898032e-05, "loss": 1.122, "step": 1580 }, { "epoch": 0.2114201658197379, "grad_norm": 1.2302204370498657, "learning_rate": 1.9914385786674056e-05, "loss": 1.0593, "step": 1581 }, { "epoch": 0.21155389141481679, "grad_norm": 1.4075682163238525, "learning_rate": 1.9914197167711912e-05, "loss": 1.2212, "step": 1582 }, { "epoch": 0.2116876170098957, "grad_norm": 1.3643218278884888, "learning_rate": 1.991400834209781e-05, "loss": 1.0413, "step": 1583 }, { "epoch": 0.2118213426049746, "grad_norm": 1.2387275695800781, "learning_rate": 1.991381930983569e-05, "loss": 1.135, "step": 1584 }, { "epoch": 0.2119550682000535, "grad_norm": 1.2716870307922363, "learning_rate": 1.9913630070929496e-05, "loss": 1.0013, "step": 1585 }, { "epoch": 0.2120887937951324, "grad_norm": 1.1887692213058472, "learning_rate": 1.991344062538317e-05, "loss": 1.0176, "step": 1586 }, { "epoch": 0.21222251939021128, "grad_norm": 1.2463284730911255, "learning_rate": 1.9913250973200657e-05, "loss": 1.0395, "step": 1587 }, { "epoch": 0.21235624498529018, "grad_norm": 1.3631348609924316, "learning_rate": 1.9913061114385915e-05, "loss": 0.9055, "step": 1588 }, { "epoch": 0.2124899705803691, "grad_norm": 1.260302186012268, "learning_rate": 1.99128710489429e-05, "loss": 1.1092, "step": 1589 }, { "epoch": 0.212623696175448, "grad_norm": 1.17875075340271, "learning_rate": 1.9912680776875572e-05, "loss": 1.1438, "step": 1590 }, { "epoch": 0.21275742177052687, "grad_norm": 1.179146409034729, "learning_rate": 1.9912490298187902e-05, "loss": 0.9248, "step": 1591 }, { "epoch": 0.21289114736560577, "grad_norm": 1.2902222871780396, "learning_rate": 1.9912299612883855e-05, "loss": 1.166, "step": 1592 }, { "epoch": 0.21302487296068467, "grad_norm": 1.2124801874160767, "learning_rate": 1.9912108720967408e-05, "loss": 1.1121, "step": 1593 }, { "epoch": 0.21315859855576358, "grad_norm": 1.3027839660644531, "learning_rate": 1.991191762244254e-05, "loss": 1.0752, "step": 1594 }, { "epoch": 0.21329232415084248, "grad_norm": 1.3690192699432373, "learning_rate": 1.9911726317313232e-05, "loss": 0.9135, "step": 1595 }, { "epoch": 0.21342604974592136, "grad_norm": 1.2532507181167603, "learning_rate": 1.991153480558347e-05, "loss": 1.1035, "step": 1596 }, { "epoch": 0.21355977534100026, "grad_norm": 1.315988302230835, "learning_rate": 1.9911343087257256e-05, "loss": 1.0304, "step": 1597 }, { "epoch": 0.21369350093607917, "grad_norm": 0.9959737658500671, "learning_rate": 1.9911151162338577e-05, "loss": 0.9683, "step": 1598 }, { "epoch": 0.21382722653115807, "grad_norm": 1.29546058177948, "learning_rate": 1.9910959030831438e-05, "loss": 1.0407, "step": 1599 }, { "epoch": 0.21396095212623698, "grad_norm": 1.2687528133392334, "learning_rate": 1.9910766692739837e-05, "loss": 1.0348, "step": 1600 }, { "epoch": 0.21409467772131585, "grad_norm": 1.2766495943069458, "learning_rate": 1.991057414806779e-05, "loss": 0.9425, "step": 1601 }, { "epoch": 0.21422840331639476, "grad_norm": 1.2151179313659668, "learning_rate": 1.9910381396819313e-05, "loss": 1.0909, "step": 1602 }, { "epoch": 0.21436212891147366, "grad_norm": 1.2215831279754639, "learning_rate": 1.991018843899841e-05, "loss": 0.9567, "step": 1603 }, { "epoch": 0.21449585450655256, "grad_norm": 1.289377212524414, "learning_rate": 1.990999527460912e-05, "loss": 0.989, "step": 1604 }, { "epoch": 0.21462958010163144, "grad_norm": 1.3698750734329224, "learning_rate": 1.9909801903655456e-05, "loss": 1.0122, "step": 1605 }, { "epoch": 0.21476330569671034, "grad_norm": 1.2772101163864136, "learning_rate": 1.990960832614146e-05, "loss": 1.0785, "step": 1606 }, { "epoch": 0.21489703129178925, "grad_norm": 1.4027246236801147, "learning_rate": 1.9909414542071153e-05, "loss": 1.1017, "step": 1607 }, { "epoch": 0.21503075688686815, "grad_norm": 1.1750060319900513, "learning_rate": 1.990922055144859e-05, "loss": 0.9895, "step": 1608 }, { "epoch": 0.21516448248194706, "grad_norm": 1.3451895713806152, "learning_rate": 1.99090263542778e-05, "loss": 1.0147, "step": 1609 }, { "epoch": 0.21529820807702593, "grad_norm": 1.2734516859054565, "learning_rate": 1.9908831950562843e-05, "loss": 0.9915, "step": 1610 }, { "epoch": 0.21543193367210484, "grad_norm": 1.2370436191558838, "learning_rate": 1.9908637340307764e-05, "loss": 1.0139, "step": 1611 }, { "epoch": 0.21556565926718374, "grad_norm": 1.312137246131897, "learning_rate": 1.990844252351662e-05, "loss": 1.1411, "step": 1612 }, { "epoch": 0.21569938486226264, "grad_norm": 1.324389934539795, "learning_rate": 1.9908247500193473e-05, "loss": 0.9942, "step": 1613 }, { "epoch": 0.21583311045734155, "grad_norm": 1.3687394857406616, "learning_rate": 1.990805227034239e-05, "loss": 1.0887, "step": 1614 }, { "epoch": 0.21596683605242042, "grad_norm": 1.3292369842529297, "learning_rate": 1.9907856833967436e-05, "loss": 1.0825, "step": 1615 }, { "epoch": 0.21610056164749933, "grad_norm": 1.1241214275360107, "learning_rate": 1.990766119107269e-05, "loss": 0.9711, "step": 1616 }, { "epoch": 0.21623428724257823, "grad_norm": 1.1808533668518066, "learning_rate": 1.990746534166222e-05, "loss": 1.0161, "step": 1617 }, { "epoch": 0.21636801283765714, "grad_norm": 1.2656877040863037, "learning_rate": 1.990726928574012e-05, "loss": 1.0263, "step": 1618 }, { "epoch": 0.216501738432736, "grad_norm": 1.174050211906433, "learning_rate": 1.9907073023310476e-05, "loss": 1.0665, "step": 1619 }, { "epoch": 0.21663546402781492, "grad_norm": 1.1500409841537476, "learning_rate": 1.990687655437737e-05, "loss": 0.9714, "step": 1620 }, { "epoch": 0.21676918962289382, "grad_norm": 1.264868974685669, "learning_rate": 1.9906679878944903e-05, "loss": 0.94, "step": 1621 }, { "epoch": 0.21690291521797272, "grad_norm": 1.253832221031189, "learning_rate": 1.9906482997017174e-05, "loss": 1.0126, "step": 1622 }, { "epoch": 0.21703664081305163, "grad_norm": 1.3297213315963745, "learning_rate": 1.9906285908598285e-05, "loss": 1.0788, "step": 1623 }, { "epoch": 0.2171703664081305, "grad_norm": 1.0515578985214233, "learning_rate": 1.9906088613692348e-05, "loss": 0.9869, "step": 1624 }, { "epoch": 0.2173040920032094, "grad_norm": 1.1616219282150269, "learning_rate": 1.990589111230347e-05, "loss": 0.9937, "step": 1625 }, { "epoch": 0.2174378175982883, "grad_norm": 1.2673543691635132, "learning_rate": 1.990569340443577e-05, "loss": 0.9701, "step": 1626 }, { "epoch": 0.21757154319336722, "grad_norm": 1.2754830121994019, "learning_rate": 1.9905495490093376e-05, "loss": 1.045, "step": 1627 }, { "epoch": 0.21770526878844612, "grad_norm": 1.0595667362213135, "learning_rate": 1.9905297369280404e-05, "loss": 0.8422, "step": 1628 }, { "epoch": 0.217838994383525, "grad_norm": 1.2227568626403809, "learning_rate": 1.9905099042000983e-05, "loss": 1.0865, "step": 1629 }, { "epoch": 0.2179727199786039, "grad_norm": 1.2605799436569214, "learning_rate": 1.9904900508259257e-05, "loss": 0.9798, "step": 1630 }, { "epoch": 0.2181064455736828, "grad_norm": 1.396668791770935, "learning_rate": 1.9904701768059355e-05, "loss": 1.1154, "step": 1631 }, { "epoch": 0.2182401711687617, "grad_norm": 1.300413966178894, "learning_rate": 1.9904502821405418e-05, "loss": 1.0275, "step": 1632 }, { "epoch": 0.21837389676384059, "grad_norm": 1.278496503829956, "learning_rate": 1.9904303668301603e-05, "loss": 0.8988, "step": 1633 }, { "epoch": 0.2185076223589195, "grad_norm": 1.1647870540618896, "learning_rate": 1.9904104308752053e-05, "loss": 0.8864, "step": 1634 }, { "epoch": 0.2186413479539984, "grad_norm": 1.229142665863037, "learning_rate": 1.9903904742760927e-05, "loss": 0.9457, "step": 1635 }, { "epoch": 0.2187750735490773, "grad_norm": 1.2911765575408936, "learning_rate": 1.9903704970332384e-05, "loss": 1.0182, "step": 1636 }, { "epoch": 0.2189087991441562, "grad_norm": 1.2222390174865723, "learning_rate": 1.9903504991470582e-05, "loss": 1.1717, "step": 1637 }, { "epoch": 0.21904252473923508, "grad_norm": 1.186946153640747, "learning_rate": 1.9903304806179702e-05, "loss": 0.9279, "step": 1638 }, { "epoch": 0.21917625033431398, "grad_norm": 1.3122774362564087, "learning_rate": 1.9903104414463907e-05, "loss": 0.9592, "step": 1639 }, { "epoch": 0.2193099759293929, "grad_norm": 1.3455449342727661, "learning_rate": 1.990290381632738e-05, "loss": 0.9985, "step": 1640 }, { "epoch": 0.2194437015244718, "grad_norm": 1.3660632371902466, "learning_rate": 1.9902703011774297e-05, "loss": 1.128, "step": 1641 }, { "epoch": 0.2195774271195507, "grad_norm": 1.2830744981765747, "learning_rate": 1.9902502000808842e-05, "loss": 0.9833, "step": 1642 }, { "epoch": 0.21971115271462957, "grad_norm": 1.2083070278167725, "learning_rate": 1.990230078343521e-05, "loss": 1.1279, "step": 1643 }, { "epoch": 0.21984487830970847, "grad_norm": 1.2849210500717163, "learning_rate": 1.9902099359657597e-05, "loss": 1.0157, "step": 1644 }, { "epoch": 0.21997860390478738, "grad_norm": 1.1608140468597412, "learning_rate": 1.9901897729480195e-05, "loss": 0.9763, "step": 1645 }, { "epoch": 0.22011232949986628, "grad_norm": 1.4480957984924316, "learning_rate": 1.990169589290721e-05, "loss": 1.0631, "step": 1646 }, { "epoch": 0.22024605509494516, "grad_norm": 1.2743417024612427, "learning_rate": 1.990149384994285e-05, "loss": 1.0972, "step": 1647 }, { "epoch": 0.22037978069002406, "grad_norm": 1.2544283866882324, "learning_rate": 1.9901291600591328e-05, "loss": 1.0169, "step": 1648 }, { "epoch": 0.22051350628510297, "grad_norm": 1.1500414609909058, "learning_rate": 1.9901089144856852e-05, "loss": 1.1333, "step": 1649 }, { "epoch": 0.22064723188018187, "grad_norm": 1.3139997720718384, "learning_rate": 1.990088648274365e-05, "loss": 1.162, "step": 1650 }, { "epoch": 0.22078095747526078, "grad_norm": 1.2062839269638062, "learning_rate": 1.9900683614255945e-05, "loss": 1.0533, "step": 1651 }, { "epoch": 0.22091468307033965, "grad_norm": 1.181854486465454, "learning_rate": 1.9900480539397962e-05, "loss": 0.8994, "step": 1652 }, { "epoch": 0.22104840866541856, "grad_norm": 1.1434767246246338, "learning_rate": 1.9900277258173935e-05, "loss": 1.1453, "step": 1653 }, { "epoch": 0.22118213426049746, "grad_norm": 1.2050495147705078, "learning_rate": 1.9900073770588104e-05, "loss": 1.0205, "step": 1654 }, { "epoch": 0.22131585985557636, "grad_norm": 1.141922116279602, "learning_rate": 1.9899870076644708e-05, "loss": 0.8358, "step": 1655 }, { "epoch": 0.22144958545065527, "grad_norm": 1.3196815252304077, "learning_rate": 1.9899666176347993e-05, "loss": 1.0192, "step": 1656 }, { "epoch": 0.22158331104573414, "grad_norm": 1.3875477313995361, "learning_rate": 1.989946206970221e-05, "loss": 1.0939, "step": 1657 }, { "epoch": 0.22171703664081305, "grad_norm": 1.3519939184188843, "learning_rate": 1.989925775671161e-05, "loss": 0.9693, "step": 1658 }, { "epoch": 0.22185076223589195, "grad_norm": 1.156368613243103, "learning_rate": 1.9899053237380457e-05, "loss": 1.013, "step": 1659 }, { "epoch": 0.22198448783097086, "grad_norm": 1.2275915145874023, "learning_rate": 1.989884851171301e-05, "loss": 1.0088, "step": 1660 }, { "epoch": 0.22211821342604973, "grad_norm": 1.2816598415374756, "learning_rate": 1.989864357971354e-05, "loss": 1.0689, "step": 1661 }, { "epoch": 0.22225193902112864, "grad_norm": 1.2513022422790527, "learning_rate": 1.9898438441386317e-05, "loss": 1.0517, "step": 1662 }, { "epoch": 0.22238566461620754, "grad_norm": 1.2330793142318726, "learning_rate": 1.9898233096735617e-05, "loss": 1.0726, "step": 1663 }, { "epoch": 0.22251939021128644, "grad_norm": 1.4202208518981934, "learning_rate": 1.9898027545765715e-05, "loss": 1.0313, "step": 1664 }, { "epoch": 0.22265311580636535, "grad_norm": 1.3555512428283691, "learning_rate": 1.9897821788480906e-05, "loss": 1.1387, "step": 1665 }, { "epoch": 0.22278684140144422, "grad_norm": 1.2360832691192627, "learning_rate": 1.989761582488547e-05, "loss": 1.0887, "step": 1666 }, { "epoch": 0.22292056699652313, "grad_norm": 1.4536596536636353, "learning_rate": 1.9897409654983705e-05, "loss": 1.1604, "step": 1667 }, { "epoch": 0.22305429259160203, "grad_norm": 1.3999823331832886, "learning_rate": 1.9897203278779903e-05, "loss": 1.0132, "step": 1668 }, { "epoch": 0.22318801818668094, "grad_norm": 1.256679654121399, "learning_rate": 1.989699669627837e-05, "loss": 1.0332, "step": 1669 }, { "epoch": 0.22332174378175984, "grad_norm": 1.1690373420715332, "learning_rate": 1.9896789907483414e-05, "loss": 0.9846, "step": 1670 }, { "epoch": 0.22345546937683872, "grad_norm": 1.2324342727661133, "learning_rate": 1.989658291239934e-05, "loss": 1.1134, "step": 1671 }, { "epoch": 0.22358919497191762, "grad_norm": 1.4841729402542114, "learning_rate": 1.989637571103047e-05, "loss": 1.1622, "step": 1672 }, { "epoch": 0.22372292056699652, "grad_norm": 1.4637506008148193, "learning_rate": 1.989616830338111e-05, "loss": 0.9991, "step": 1673 }, { "epoch": 0.22385664616207543, "grad_norm": 1.3393809795379639, "learning_rate": 1.9895960689455598e-05, "loss": 1.0458, "step": 1674 }, { "epoch": 0.22399037175715433, "grad_norm": 1.3536499738693237, "learning_rate": 1.9895752869258254e-05, "loss": 1.1588, "step": 1675 }, { "epoch": 0.2241240973522332, "grad_norm": 1.2027587890625, "learning_rate": 1.989554484279341e-05, "loss": 0.9346, "step": 1676 }, { "epoch": 0.2242578229473121, "grad_norm": 1.1732772588729858, "learning_rate": 1.98953366100654e-05, "loss": 1.0172, "step": 1677 }, { "epoch": 0.22439154854239102, "grad_norm": 1.2582857608795166, "learning_rate": 1.989512817107857e-05, "loss": 0.9574, "step": 1678 }, { "epoch": 0.22452527413746992, "grad_norm": 1.3563693761825562, "learning_rate": 1.989491952583726e-05, "loss": 1.1232, "step": 1679 }, { "epoch": 0.2246589997325488, "grad_norm": 1.392820119857788, "learning_rate": 1.989471067434582e-05, "loss": 1.0483, "step": 1680 }, { "epoch": 0.2247927253276277, "grad_norm": 1.2169671058654785, "learning_rate": 1.9894501616608608e-05, "loss": 1.0356, "step": 1681 }, { "epoch": 0.2249264509227066, "grad_norm": 1.2124875783920288, "learning_rate": 1.9894292352629975e-05, "loss": 1.0111, "step": 1682 }, { "epoch": 0.2250601765177855, "grad_norm": 1.3783941268920898, "learning_rate": 1.9894082882414287e-05, "loss": 1.1084, "step": 1683 }, { "epoch": 0.2251939021128644, "grad_norm": 1.305086374282837, "learning_rate": 1.989387320596591e-05, "loss": 0.9498, "step": 1684 }, { "epoch": 0.2253276277079433, "grad_norm": 1.2112503051757812, "learning_rate": 1.989366332328921e-05, "loss": 1.0315, "step": 1685 }, { "epoch": 0.2254613533030222, "grad_norm": 1.25102698802948, "learning_rate": 1.989345323438857e-05, "loss": 0.9133, "step": 1686 }, { "epoch": 0.2255950788981011, "grad_norm": 1.3424426317214966, "learning_rate": 1.9893242939268363e-05, "loss": 1.0717, "step": 1687 }, { "epoch": 0.22572880449318, "grad_norm": 1.1164089441299438, "learning_rate": 1.989303243793297e-05, "loss": 1.0333, "step": 1688 }, { "epoch": 0.2258625300882589, "grad_norm": 1.3928332328796387, "learning_rate": 1.9892821730386784e-05, "loss": 1.1341, "step": 1689 }, { "epoch": 0.22599625568333778, "grad_norm": 1.2704873085021973, "learning_rate": 1.9892610816634196e-05, "loss": 1.018, "step": 1690 }, { "epoch": 0.2261299812784167, "grad_norm": 1.2805418968200684, "learning_rate": 1.9892399696679602e-05, "loss": 1.0936, "step": 1691 }, { "epoch": 0.2262637068734956, "grad_norm": 1.1365346908569336, "learning_rate": 1.9892188370527403e-05, "loss": 0.8885, "step": 1692 }, { "epoch": 0.2263974324685745, "grad_norm": 1.1580543518066406, "learning_rate": 1.9891976838182005e-05, "loss": 0.8722, "step": 1693 }, { "epoch": 0.22653115806365337, "grad_norm": 1.2320460081100464, "learning_rate": 1.989176509964781e-05, "loss": 0.9496, "step": 1694 }, { "epoch": 0.22666488365873227, "grad_norm": 1.2560293674468994, "learning_rate": 1.989155315492924e-05, "loss": 1.0095, "step": 1695 }, { "epoch": 0.22679860925381118, "grad_norm": 1.1870598793029785, "learning_rate": 1.989134100403071e-05, "loss": 1.0448, "step": 1696 }, { "epoch": 0.22693233484889008, "grad_norm": 1.3791059255599976, "learning_rate": 1.989112864695664e-05, "loss": 1.0746, "step": 1697 }, { "epoch": 0.227066060443969, "grad_norm": 1.163436770439148, "learning_rate": 1.9890916083711463e-05, "loss": 1.0365, "step": 1698 }, { "epoch": 0.22719978603904786, "grad_norm": 1.240439534187317, "learning_rate": 1.98907033142996e-05, "loss": 1.0096, "step": 1699 }, { "epoch": 0.22733351163412677, "grad_norm": 1.5015690326690674, "learning_rate": 1.989049033872549e-05, "loss": 1.0967, "step": 1700 }, { "epoch": 0.22746723722920567, "grad_norm": 1.276943564414978, "learning_rate": 1.9890277156993578e-05, "loss": 0.9603, "step": 1701 }, { "epoch": 0.22760096282428458, "grad_norm": 1.2091472148895264, "learning_rate": 1.9890063769108298e-05, "loss": 0.9708, "step": 1702 }, { "epoch": 0.22773468841936348, "grad_norm": 1.246006965637207, "learning_rate": 1.9889850175074105e-05, "loss": 1.0199, "step": 1703 }, { "epoch": 0.22786841401444236, "grad_norm": 1.18378746509552, "learning_rate": 1.988963637489545e-05, "loss": 1.0072, "step": 1704 }, { "epoch": 0.22800213960952126, "grad_norm": 1.30490243434906, "learning_rate": 1.988942236857678e-05, "loss": 1.1602, "step": 1705 }, { "epoch": 0.22813586520460016, "grad_norm": 1.2406933307647705, "learning_rate": 1.9889208156122573e-05, "loss": 0.913, "step": 1706 }, { "epoch": 0.22826959079967907, "grad_norm": 1.1971371173858643, "learning_rate": 1.9888993737537282e-05, "loss": 1.0402, "step": 1707 }, { "epoch": 0.22840331639475794, "grad_norm": 1.2137484550476074, "learning_rate": 1.988877911282538e-05, "loss": 0.953, "step": 1708 }, { "epoch": 0.22853704198983685, "grad_norm": 1.301276445388794, "learning_rate": 1.988856428199134e-05, "loss": 1.1484, "step": 1709 }, { "epoch": 0.22867076758491575, "grad_norm": 1.167849063873291, "learning_rate": 1.9888349245039637e-05, "loss": 1.019, "step": 1710 }, { "epoch": 0.22880449317999466, "grad_norm": 1.2915928363800049, "learning_rate": 1.9888134001974756e-05, "loss": 0.9844, "step": 1711 }, { "epoch": 0.22893821877507356, "grad_norm": 1.3031436204910278, "learning_rate": 1.9887918552801188e-05, "loss": 1.1605, "step": 1712 }, { "epoch": 0.22907194437015244, "grad_norm": 1.208278775215149, "learning_rate": 1.9887702897523414e-05, "loss": 1.1268, "step": 1713 }, { "epoch": 0.22920566996523134, "grad_norm": 1.351426362991333, "learning_rate": 1.9887487036145942e-05, "loss": 1.0266, "step": 1714 }, { "epoch": 0.22933939556031024, "grad_norm": 1.3134797811508179, "learning_rate": 1.9887270968673258e-05, "loss": 1.0484, "step": 1715 }, { "epoch": 0.22947312115538915, "grad_norm": 1.340421199798584, "learning_rate": 1.9887054695109872e-05, "loss": 1.0812, "step": 1716 }, { "epoch": 0.22960684675046805, "grad_norm": 1.2396200895309448, "learning_rate": 1.9886838215460297e-05, "loss": 0.9824, "step": 1717 }, { "epoch": 0.22974057234554693, "grad_norm": 1.2036662101745605, "learning_rate": 1.9886621529729036e-05, "loss": 1.021, "step": 1718 }, { "epoch": 0.22987429794062583, "grad_norm": 1.3463480472564697, "learning_rate": 1.9886404637920605e-05, "loss": 1.1, "step": 1719 }, { "epoch": 0.23000802353570474, "grad_norm": 1.241208791732788, "learning_rate": 1.9886187540039537e-05, "loss": 0.9654, "step": 1720 }, { "epoch": 0.23014174913078364, "grad_norm": 1.2407405376434326, "learning_rate": 1.988597023609035e-05, "loss": 1.049, "step": 1721 }, { "epoch": 0.23027547472586252, "grad_norm": 1.3173632621765137, "learning_rate": 1.9885752726077568e-05, "loss": 1.1569, "step": 1722 }, { "epoch": 0.23040920032094142, "grad_norm": 1.1023324728012085, "learning_rate": 1.9885535010005733e-05, "loss": 0.9903, "step": 1723 }, { "epoch": 0.23054292591602032, "grad_norm": 1.255845546722412, "learning_rate": 1.9885317087879378e-05, "loss": 1.0736, "step": 1724 }, { "epoch": 0.23067665151109923, "grad_norm": 1.1861283779144287, "learning_rate": 1.9885098959703052e-05, "loss": 0.9095, "step": 1725 }, { "epoch": 0.23081037710617813, "grad_norm": 1.2510441541671753, "learning_rate": 1.9884880625481294e-05, "loss": 0.9712, "step": 1726 }, { "epoch": 0.230944102701257, "grad_norm": 1.257417917251587, "learning_rate": 1.988466208521866e-05, "loss": 1.0272, "step": 1727 }, { "epoch": 0.2310778282963359, "grad_norm": 1.2012909650802612, "learning_rate": 1.98844433389197e-05, "loss": 0.9785, "step": 1728 }, { "epoch": 0.23121155389141482, "grad_norm": 1.2837928533554077, "learning_rate": 1.9884224386588982e-05, "loss": 1.0321, "step": 1729 }, { "epoch": 0.23134527948649372, "grad_norm": 1.1135002374649048, "learning_rate": 1.988400522823106e-05, "loss": 0.9341, "step": 1730 }, { "epoch": 0.23147900508157263, "grad_norm": 1.1023718118667603, "learning_rate": 1.988378586385051e-05, "loss": 0.9866, "step": 1731 }, { "epoch": 0.2316127306766515, "grad_norm": 1.1702369451522827, "learning_rate": 1.98835662934519e-05, "loss": 0.9643, "step": 1732 }, { "epoch": 0.2317464562717304, "grad_norm": 1.3463718891143799, "learning_rate": 1.9883346517039806e-05, "loss": 1.0697, "step": 1733 }, { "epoch": 0.2318801818668093, "grad_norm": 1.3385719060897827, "learning_rate": 1.9883126534618818e-05, "loss": 1.0988, "step": 1734 }, { "epoch": 0.2320139074618882, "grad_norm": 1.2779313325881958, "learning_rate": 1.9882906346193508e-05, "loss": 1.036, "step": 1735 }, { "epoch": 0.2321476330569671, "grad_norm": 1.1645325422286987, "learning_rate": 1.9882685951768477e-05, "loss": 0.9172, "step": 1736 }, { "epoch": 0.232281358652046, "grad_norm": 1.2101105451583862, "learning_rate": 1.988246535134831e-05, "loss": 1.073, "step": 1737 }, { "epoch": 0.2324150842471249, "grad_norm": 1.2771438360214233, "learning_rate": 1.988224454493761e-05, "loss": 1.0163, "step": 1738 }, { "epoch": 0.2325488098422038, "grad_norm": 1.2770310640335083, "learning_rate": 1.9882023532540978e-05, "loss": 1.0251, "step": 1739 }, { "epoch": 0.2326825354372827, "grad_norm": 1.2106629610061646, "learning_rate": 1.9881802314163025e-05, "loss": 0.9619, "step": 1740 }, { "epoch": 0.23281626103236158, "grad_norm": 1.279738426208496, "learning_rate": 1.9881580889808357e-05, "loss": 1.0065, "step": 1741 }, { "epoch": 0.2329499866274405, "grad_norm": 1.2920054197311401, "learning_rate": 1.988135925948159e-05, "loss": 1.0278, "step": 1742 }, { "epoch": 0.2330837122225194, "grad_norm": 1.2487566471099854, "learning_rate": 1.9881137423187343e-05, "loss": 1.0988, "step": 1743 }, { "epoch": 0.2332174378175983, "grad_norm": 1.3338139057159424, "learning_rate": 1.9880915380930245e-05, "loss": 1.063, "step": 1744 }, { "epoch": 0.2333511634126772, "grad_norm": 1.2368102073669434, "learning_rate": 1.988069313271492e-05, "loss": 1.0815, "step": 1745 }, { "epoch": 0.23348488900775607, "grad_norm": 1.1691656112670898, "learning_rate": 1.9880470678546004e-05, "loss": 0.9781, "step": 1746 }, { "epoch": 0.23361861460283498, "grad_norm": 1.3426061868667603, "learning_rate": 1.9880248018428124e-05, "loss": 0.9763, "step": 1747 }, { "epoch": 0.23375234019791388, "grad_norm": 1.316106915473938, "learning_rate": 1.9880025152365934e-05, "loss": 1.0609, "step": 1748 }, { "epoch": 0.2338860657929928, "grad_norm": 1.4029062986373901, "learning_rate": 1.9879802080364075e-05, "loss": 0.9971, "step": 1749 }, { "epoch": 0.2340197913880717, "grad_norm": 1.3324915170669556, "learning_rate": 1.9879578802427194e-05, "loss": 1.1213, "step": 1750 }, { "epoch": 0.23415351698315057, "grad_norm": 1.4130549430847168, "learning_rate": 1.9879355318559945e-05, "loss": 1.1817, "step": 1751 }, { "epoch": 0.23428724257822947, "grad_norm": 1.16493558883667, "learning_rate": 1.987913162876699e-05, "loss": 1.0426, "step": 1752 }, { "epoch": 0.23442096817330837, "grad_norm": 1.2090052366256714, "learning_rate": 1.9878907733052988e-05, "loss": 1.0109, "step": 1753 }, { "epoch": 0.23455469376838728, "grad_norm": 1.4182623624801636, "learning_rate": 1.9878683631422605e-05, "loss": 1.0022, "step": 1754 }, { "epoch": 0.23468841936346616, "grad_norm": 1.4664738178253174, "learning_rate": 1.987845932388052e-05, "loss": 1.0944, "step": 1755 }, { "epoch": 0.23482214495854506, "grad_norm": 1.3089425563812256, "learning_rate": 1.98782348104314e-05, "loss": 1.0131, "step": 1756 }, { "epoch": 0.23495587055362396, "grad_norm": 1.240602970123291, "learning_rate": 1.987801009107993e-05, "loss": 1.0855, "step": 1757 }, { "epoch": 0.23508959614870287, "grad_norm": 1.3846957683563232, "learning_rate": 1.9877785165830786e-05, "loss": 1.1221, "step": 1758 }, { "epoch": 0.23522332174378177, "grad_norm": 1.1926441192626953, "learning_rate": 1.9877560034688667e-05, "loss": 0.9657, "step": 1759 }, { "epoch": 0.23535704733886065, "grad_norm": 1.1520521640777588, "learning_rate": 1.987733469765826e-05, "loss": 0.9896, "step": 1760 }, { "epoch": 0.23549077293393955, "grad_norm": 1.2510225772857666, "learning_rate": 1.9877109154744264e-05, "loss": 1.0193, "step": 1761 }, { "epoch": 0.23562449852901846, "grad_norm": 1.2094061374664307, "learning_rate": 1.9876883405951378e-05, "loss": 0.9699, "step": 1762 }, { "epoch": 0.23575822412409736, "grad_norm": 1.159404993057251, "learning_rate": 1.987665745128431e-05, "loss": 1.017, "step": 1763 }, { "epoch": 0.23589194971917626, "grad_norm": 1.3118197917938232, "learning_rate": 1.9876431290747766e-05, "loss": 0.9993, "step": 1764 }, { "epoch": 0.23602567531425514, "grad_norm": 1.3398816585540771, "learning_rate": 1.987620492434646e-05, "loss": 1.1052, "step": 1765 }, { "epoch": 0.23615940090933404, "grad_norm": 1.1165670156478882, "learning_rate": 1.987597835208512e-05, "loss": 1.0392, "step": 1766 }, { "epoch": 0.23629312650441295, "grad_norm": 1.3947433233261108, "learning_rate": 1.9875751573968458e-05, "loss": 1.0299, "step": 1767 }, { "epoch": 0.23642685209949185, "grad_norm": 1.347752332687378, "learning_rate": 1.9875524590001205e-05, "loss": 0.9734, "step": 1768 }, { "epoch": 0.23656057769457073, "grad_norm": 1.3289934396743774, "learning_rate": 1.987529740018809e-05, "loss": 1.0205, "step": 1769 }, { "epoch": 0.23669430328964963, "grad_norm": 1.2924273014068604, "learning_rate": 1.9875070004533852e-05, "loss": 1.0705, "step": 1770 }, { "epoch": 0.23682802888472854, "grad_norm": 1.3444517850875854, "learning_rate": 1.987484240304323e-05, "loss": 0.9281, "step": 1771 }, { "epoch": 0.23696175447980744, "grad_norm": 1.1850080490112305, "learning_rate": 1.9874614595720965e-05, "loss": 1.0179, "step": 1772 }, { "epoch": 0.23709548007488634, "grad_norm": 1.2029681205749512, "learning_rate": 1.987438658257181e-05, "loss": 0.9856, "step": 1773 }, { "epoch": 0.23722920566996522, "grad_norm": 1.2050890922546387, "learning_rate": 1.9874158363600513e-05, "loss": 1.0652, "step": 1774 }, { "epoch": 0.23736293126504412, "grad_norm": 1.3133703470230103, "learning_rate": 1.9873929938811836e-05, "loss": 1.0136, "step": 1775 }, { "epoch": 0.23749665686012303, "grad_norm": 1.3208703994750977, "learning_rate": 1.9873701308210534e-05, "loss": 1.012, "step": 1776 }, { "epoch": 0.23763038245520193, "grad_norm": 1.2444545030593872, "learning_rate": 1.987347247180138e-05, "loss": 1.0115, "step": 1777 }, { "epoch": 0.23776410805028084, "grad_norm": 1.168429970741272, "learning_rate": 1.987324342958914e-05, "loss": 0.986, "step": 1778 }, { "epoch": 0.2378978336453597, "grad_norm": 1.1798261404037476, "learning_rate": 1.9873014181578588e-05, "loss": 1.06, "step": 1779 }, { "epoch": 0.23803155924043862, "grad_norm": 1.0831444263458252, "learning_rate": 1.98727847277745e-05, "loss": 0.9218, "step": 1780 }, { "epoch": 0.23816528483551752, "grad_norm": 1.2060914039611816, "learning_rate": 1.9872555068181663e-05, "loss": 1.0372, "step": 1781 }, { "epoch": 0.23829901043059643, "grad_norm": 1.4274358749389648, "learning_rate": 1.9872325202804866e-05, "loss": 1.0208, "step": 1782 }, { "epoch": 0.2384327360256753, "grad_norm": 1.2167881727218628, "learning_rate": 1.9872095131648892e-05, "loss": 1.0397, "step": 1783 }, { "epoch": 0.2385664616207542, "grad_norm": 1.255710244178772, "learning_rate": 1.9871864854718545e-05, "loss": 1.0658, "step": 1784 }, { "epoch": 0.2387001872158331, "grad_norm": 1.086239218711853, "learning_rate": 1.9871634372018616e-05, "loss": 0.968, "step": 1785 }, { "epoch": 0.238833912810912, "grad_norm": 1.3361839056015015, "learning_rate": 1.9871403683553924e-05, "loss": 1.0524, "step": 1786 }, { "epoch": 0.23896763840599092, "grad_norm": 1.3780192136764526, "learning_rate": 1.9871172789329262e-05, "loss": 1.0848, "step": 1787 }, { "epoch": 0.2391013640010698, "grad_norm": 1.2757710218429565, "learning_rate": 1.9870941689349448e-05, "loss": 1.0566, "step": 1788 }, { "epoch": 0.2392350895961487, "grad_norm": 1.1849250793457031, "learning_rate": 1.9870710383619304e-05, "loss": 0.8845, "step": 1789 }, { "epoch": 0.2393688151912276, "grad_norm": 1.2763221263885498, "learning_rate": 1.9870478872143644e-05, "loss": 1.0722, "step": 1790 }, { "epoch": 0.2395025407863065, "grad_norm": 1.4032946825027466, "learning_rate": 1.9870247154927297e-05, "loss": 1.0866, "step": 1791 }, { "epoch": 0.2396362663813854, "grad_norm": 1.346529245376587, "learning_rate": 1.9870015231975096e-05, "loss": 1.0401, "step": 1792 }, { "epoch": 0.2397699919764643, "grad_norm": 1.2019823789596558, "learning_rate": 1.9869783103291867e-05, "loss": 1.0495, "step": 1793 }, { "epoch": 0.2399037175715432, "grad_norm": 1.2543443441390991, "learning_rate": 1.986955076888246e-05, "loss": 1.1553, "step": 1794 }, { "epoch": 0.2400374431666221, "grad_norm": 1.270207166671753, "learning_rate": 1.9869318228751705e-05, "loss": 1.1326, "step": 1795 }, { "epoch": 0.240171168761701, "grad_norm": 1.2198190689086914, "learning_rate": 1.986908548290446e-05, "loss": 0.9553, "step": 1796 }, { "epoch": 0.24030489435677987, "grad_norm": 1.3369131088256836, "learning_rate": 1.986885253134557e-05, "loss": 1.0625, "step": 1797 }, { "epoch": 0.24043861995185878, "grad_norm": 1.166910171508789, "learning_rate": 1.9868619374079894e-05, "loss": 1.0121, "step": 1798 }, { "epoch": 0.24057234554693768, "grad_norm": 1.1858009099960327, "learning_rate": 1.9868386011112286e-05, "loss": 0.9834, "step": 1799 }, { "epoch": 0.2407060711420166, "grad_norm": 1.1821870803833008, "learning_rate": 1.986815244244762e-05, "loss": 1.1104, "step": 1800 }, { "epoch": 0.2408397967370955, "grad_norm": 1.1741657257080078, "learning_rate": 1.9867918668090755e-05, "loss": 0.9798, "step": 1801 }, { "epoch": 0.24097352233217437, "grad_norm": 1.139931082725525, "learning_rate": 1.986768468804657e-05, "loss": 1.0376, "step": 1802 }, { "epoch": 0.24110724792725327, "grad_norm": 1.3490135669708252, "learning_rate": 1.986745050231994e-05, "loss": 1.0519, "step": 1803 }, { "epoch": 0.24124097352233217, "grad_norm": 1.3365484476089478, "learning_rate": 1.9867216110915745e-05, "loss": 0.9338, "step": 1804 }, { "epoch": 0.24137469911741108, "grad_norm": 1.2020443677902222, "learning_rate": 1.9866981513838876e-05, "loss": 1.1121, "step": 1805 }, { "epoch": 0.24150842471248998, "grad_norm": 1.2612922191619873, "learning_rate": 1.9866746711094215e-05, "loss": 0.991, "step": 1806 }, { "epoch": 0.24164215030756886, "grad_norm": 1.2415944337844849, "learning_rate": 1.986651170268666e-05, "loss": 1.0719, "step": 1807 }, { "epoch": 0.24177587590264776, "grad_norm": 1.2530544996261597, "learning_rate": 1.986627648862111e-05, "loss": 1.0552, "step": 1808 }, { "epoch": 0.24190960149772667, "grad_norm": 1.235971212387085, "learning_rate": 1.9866041068902472e-05, "loss": 1.0245, "step": 1809 }, { "epoch": 0.24204332709280557, "grad_norm": 1.2835874557495117, "learning_rate": 1.9865805443535646e-05, "loss": 1.041, "step": 1810 }, { "epoch": 0.24217705268788445, "grad_norm": 1.2614458799362183, "learning_rate": 1.9865569612525544e-05, "loss": 1.0502, "step": 1811 }, { "epoch": 0.24231077828296335, "grad_norm": 1.2657123804092407, "learning_rate": 1.9865333575877085e-05, "loss": 0.9503, "step": 1812 }, { "epoch": 0.24244450387804226, "grad_norm": 1.1897975206375122, "learning_rate": 1.986509733359519e-05, "loss": 0.9872, "step": 1813 }, { "epoch": 0.24257822947312116, "grad_norm": 1.2130753993988037, "learning_rate": 1.986486088568478e-05, "loss": 0.984, "step": 1814 }, { "epoch": 0.24271195506820006, "grad_norm": 1.267230749130249, "learning_rate": 1.986462423215078e-05, "loss": 0.9928, "step": 1815 }, { "epoch": 0.24284568066327894, "grad_norm": 1.2979862689971924, "learning_rate": 1.9864387372998135e-05, "loss": 0.9632, "step": 1816 }, { "epoch": 0.24297940625835784, "grad_norm": 1.2650824785232544, "learning_rate": 1.9864150308231768e-05, "loss": 1.1465, "step": 1817 }, { "epoch": 0.24311313185343675, "grad_norm": 1.186245322227478, "learning_rate": 1.9863913037856627e-05, "loss": 1.0361, "step": 1818 }, { "epoch": 0.24324685744851565, "grad_norm": 1.198009967803955, "learning_rate": 1.986367556187766e-05, "loss": 0.9723, "step": 1819 }, { "epoch": 0.24338058304359456, "grad_norm": 1.321725845336914, "learning_rate": 1.9863437880299815e-05, "loss": 1.0475, "step": 1820 }, { "epoch": 0.24351430863867343, "grad_norm": 1.2239506244659424, "learning_rate": 1.9863199993128045e-05, "loss": 0.9676, "step": 1821 }, { "epoch": 0.24364803423375234, "grad_norm": 1.2939268350601196, "learning_rate": 1.9862961900367308e-05, "loss": 1.0887, "step": 1822 }, { "epoch": 0.24378175982883124, "grad_norm": 1.4115878343582153, "learning_rate": 1.986272360202257e-05, "loss": 1.1622, "step": 1823 }, { "epoch": 0.24391548542391014, "grad_norm": 1.0778623819351196, "learning_rate": 1.9862485098098796e-05, "loss": 0.9204, "step": 1824 }, { "epoch": 0.24404921101898902, "grad_norm": 1.1318458318710327, "learning_rate": 1.9862246388600956e-05, "loss": 0.8863, "step": 1825 }, { "epoch": 0.24418293661406792, "grad_norm": 1.1461611986160278, "learning_rate": 1.9862007473534026e-05, "loss": 0.8984, "step": 1826 }, { "epoch": 0.24431666220914683, "grad_norm": 1.3042157888412476, "learning_rate": 1.9861768352902992e-05, "loss": 1.0036, "step": 1827 }, { "epoch": 0.24445038780422573, "grad_norm": 1.2648496627807617, "learning_rate": 1.986152902671283e-05, "loss": 1.104, "step": 1828 }, { "epoch": 0.24458411339930464, "grad_norm": 1.2864179611206055, "learning_rate": 1.986128949496853e-05, "loss": 1.13, "step": 1829 }, { "epoch": 0.2447178389943835, "grad_norm": 1.35065758228302, "learning_rate": 1.9861049757675087e-05, "loss": 1.0656, "step": 1830 }, { "epoch": 0.24485156458946242, "grad_norm": 1.259787917137146, "learning_rate": 1.9860809814837502e-05, "loss": 0.9792, "step": 1831 }, { "epoch": 0.24498529018454132, "grad_norm": 1.155753254890442, "learning_rate": 1.986056966646077e-05, "loss": 1.0063, "step": 1832 }, { "epoch": 0.24511901577962023, "grad_norm": 1.2548549175262451, "learning_rate": 1.98603293125499e-05, "loss": 0.922, "step": 1833 }, { "epoch": 0.24525274137469913, "grad_norm": 1.1855548620224, "learning_rate": 1.9860088753109896e-05, "loss": 1.091, "step": 1834 }, { "epoch": 0.245386466969778, "grad_norm": 1.2218815088272095, "learning_rate": 1.985984798814578e-05, "loss": 0.9777, "step": 1835 }, { "epoch": 0.2455201925648569, "grad_norm": 1.2663795948028564, "learning_rate": 1.985960701766257e-05, "loss": 0.9608, "step": 1836 }, { "epoch": 0.2456539181599358, "grad_norm": 1.3177436590194702, "learning_rate": 1.9859365841665285e-05, "loss": 1.0368, "step": 1837 }, { "epoch": 0.24578764375501472, "grad_norm": 1.1861021518707275, "learning_rate": 1.9859124460158953e-05, "loss": 1.0009, "step": 1838 }, { "epoch": 0.24592136935009362, "grad_norm": 1.2948068380355835, "learning_rate": 1.9858882873148604e-05, "loss": 0.9578, "step": 1839 }, { "epoch": 0.2460550949451725, "grad_norm": 1.122073769569397, "learning_rate": 1.9858641080639277e-05, "loss": 0.9502, "step": 1840 }, { "epoch": 0.2461888205402514, "grad_norm": 1.0474636554718018, "learning_rate": 1.985839908263601e-05, "loss": 1.0072, "step": 1841 }, { "epoch": 0.2463225461353303, "grad_norm": 1.3063892126083374, "learning_rate": 1.985815687914385e-05, "loss": 1.0707, "step": 1842 }, { "epoch": 0.2464562717304092, "grad_norm": 1.2954157590866089, "learning_rate": 1.985791447016784e-05, "loss": 1.111, "step": 1843 }, { "epoch": 0.2465899973254881, "grad_norm": 1.1754432916641235, "learning_rate": 1.9857671855713038e-05, "loss": 0.989, "step": 1844 }, { "epoch": 0.246723722920567, "grad_norm": 1.278006911277771, "learning_rate": 1.9857429035784496e-05, "loss": 1.1338, "step": 1845 }, { "epoch": 0.2468574485156459, "grad_norm": 1.0881412029266357, "learning_rate": 1.985718601038728e-05, "loss": 1.0527, "step": 1846 }, { "epoch": 0.2469911741107248, "grad_norm": 1.1560137271881104, "learning_rate": 1.9856942779526452e-05, "loss": 0.9168, "step": 1847 }, { "epoch": 0.2471248997058037, "grad_norm": 1.2778552770614624, "learning_rate": 1.9856699343207088e-05, "loss": 1.1296, "step": 1848 }, { "epoch": 0.24725862530088258, "grad_norm": 1.2186923027038574, "learning_rate": 1.9856455701434254e-05, "loss": 1.1431, "step": 1849 }, { "epoch": 0.24739235089596148, "grad_norm": 1.1788978576660156, "learning_rate": 1.9856211854213034e-05, "loss": 0.9745, "step": 1850 }, { "epoch": 0.2475260764910404, "grad_norm": 1.2383781671524048, "learning_rate": 1.9855967801548512e-05, "loss": 1.0432, "step": 1851 }, { "epoch": 0.2476598020861193, "grad_norm": 1.1831271648406982, "learning_rate": 1.9855723543445768e-05, "loss": 1.0206, "step": 1852 }, { "epoch": 0.2477935276811982, "grad_norm": 1.1931451559066772, "learning_rate": 1.98554790799099e-05, "loss": 0.9704, "step": 1853 }, { "epoch": 0.24792725327627707, "grad_norm": 1.212536096572876, "learning_rate": 1.9855234410946002e-05, "loss": 1.0877, "step": 1854 }, { "epoch": 0.24806097887135597, "grad_norm": 1.2455673217773438, "learning_rate": 1.9854989536559172e-05, "loss": 1.0371, "step": 1855 }, { "epoch": 0.24819470446643488, "grad_norm": 1.284547209739685, "learning_rate": 1.9854744456754516e-05, "loss": 0.9894, "step": 1856 }, { "epoch": 0.24832843006151378, "grad_norm": 1.3304446935653687, "learning_rate": 1.985449917153714e-05, "loss": 1.1516, "step": 1857 }, { "epoch": 0.24846215565659266, "grad_norm": 1.2684322595596313, "learning_rate": 1.985425368091216e-05, "loss": 0.9932, "step": 1858 }, { "epoch": 0.24859588125167156, "grad_norm": 1.2613474130630493, "learning_rate": 1.9854007984884692e-05, "loss": 1.0205, "step": 1859 }, { "epoch": 0.24872960684675047, "grad_norm": 1.196800708770752, "learning_rate": 1.9853762083459856e-05, "loss": 0.9871, "step": 1860 }, { "epoch": 0.24886333244182937, "grad_norm": 1.1161975860595703, "learning_rate": 1.9853515976642778e-05, "loss": 0.9205, "step": 1861 }, { "epoch": 0.24899705803690828, "grad_norm": 1.1643733978271484, "learning_rate": 1.9853269664438587e-05, "loss": 0.9888, "step": 1862 }, { "epoch": 0.24913078363198715, "grad_norm": 1.4770944118499756, "learning_rate": 1.985302314685242e-05, "loss": 1.0691, "step": 1863 }, { "epoch": 0.24926450922706606, "grad_norm": 1.3144875764846802, "learning_rate": 1.9852776423889414e-05, "loss": 1.0569, "step": 1864 }, { "epoch": 0.24939823482214496, "grad_norm": 1.2212207317352295, "learning_rate": 1.985252949555471e-05, "loss": 1.0425, "step": 1865 }, { "epoch": 0.24953196041722386, "grad_norm": 1.1877015829086304, "learning_rate": 1.9852282361853458e-05, "loss": 0.9706, "step": 1866 }, { "epoch": 0.24966568601230277, "grad_norm": 1.2472724914550781, "learning_rate": 1.985203502279081e-05, "loss": 0.9921, "step": 1867 }, { "epoch": 0.24979941160738164, "grad_norm": 1.0756787061691284, "learning_rate": 1.9851787478371916e-05, "loss": 0.9435, "step": 1868 }, { "epoch": 0.24993313720246055, "grad_norm": 1.1400564908981323, "learning_rate": 1.9851539728601937e-05, "loss": 1.0747, "step": 1869 }, { "epoch": 0.2500668627975394, "grad_norm": 1.1055908203125, "learning_rate": 1.9851291773486045e-05, "loss": 1.0435, "step": 1870 }, { "epoch": 0.25020058839261833, "grad_norm": 1.2730300426483154, "learning_rate": 1.98510436130294e-05, "loss": 0.9609, "step": 1871 }, { "epoch": 0.25033431398769723, "grad_norm": 1.2270588874816895, "learning_rate": 1.9850795247237177e-05, "loss": 1.1107, "step": 1872 }, { "epoch": 0.25046803958277614, "grad_norm": 1.3665003776550293, "learning_rate": 1.9850546676114555e-05, "loss": 1.0609, "step": 1873 }, { "epoch": 0.25060176517785504, "grad_norm": 1.22626793384552, "learning_rate": 1.985029789966671e-05, "loss": 1.0594, "step": 1874 }, { "epoch": 0.25073549077293394, "grad_norm": 1.2743902206420898, "learning_rate": 1.9850048917898833e-05, "loss": 1.0707, "step": 1875 }, { "epoch": 0.25086921636801285, "grad_norm": 1.333679437637329, "learning_rate": 1.9849799730816112e-05, "loss": 1.1224, "step": 1876 }, { "epoch": 0.25100294196309175, "grad_norm": 1.2121695280075073, "learning_rate": 1.984955033842374e-05, "loss": 0.9937, "step": 1877 }, { "epoch": 0.25113666755817066, "grad_norm": 1.1435086727142334, "learning_rate": 1.9849300740726917e-05, "loss": 0.9828, "step": 1878 }, { "epoch": 0.2512703931532495, "grad_norm": 1.276832938194275, "learning_rate": 1.9849050937730846e-05, "loss": 0.9817, "step": 1879 }, { "epoch": 0.2514041187483284, "grad_norm": 1.2391608953475952, "learning_rate": 1.984880092944073e-05, "loss": 1.0029, "step": 1880 }, { "epoch": 0.2515378443434073, "grad_norm": 1.222477912902832, "learning_rate": 1.9848550715861786e-05, "loss": 1.0535, "step": 1881 }, { "epoch": 0.2516715699384862, "grad_norm": 1.1950234174728394, "learning_rate": 1.9848300296999222e-05, "loss": 1.1018, "step": 1882 }, { "epoch": 0.2518052955335651, "grad_norm": 1.319090485572815, "learning_rate": 1.9848049672858268e-05, "loss": 1.1281, "step": 1883 }, { "epoch": 0.251939021128644, "grad_norm": 1.2932372093200684, "learning_rate": 1.984779884344414e-05, "loss": 0.9921, "step": 1884 }, { "epoch": 0.25207274672372293, "grad_norm": 1.2814209461212158, "learning_rate": 1.9847547808762065e-05, "loss": 1.1613, "step": 1885 }, { "epoch": 0.25220647231880183, "grad_norm": 1.1482468843460083, "learning_rate": 1.984729656881728e-05, "loss": 0.9871, "step": 1886 }, { "epoch": 0.25234019791388074, "grad_norm": 1.1294058561325073, "learning_rate": 1.9847045123615024e-05, "loss": 0.9976, "step": 1887 }, { "epoch": 0.25247392350895964, "grad_norm": 1.2763704061508179, "learning_rate": 1.984679347316053e-05, "loss": 1.0715, "step": 1888 }, { "epoch": 0.2526076491040385, "grad_norm": 1.3573757410049438, "learning_rate": 1.9846541617459056e-05, "loss": 1.0977, "step": 1889 }, { "epoch": 0.2527413746991174, "grad_norm": 1.1321908235549927, "learning_rate": 1.9846289556515835e-05, "loss": 0.8935, "step": 1890 }, { "epoch": 0.2528751002941963, "grad_norm": 1.089170217514038, "learning_rate": 1.984603729033614e-05, "loss": 0.9726, "step": 1891 }, { "epoch": 0.2530088258892752, "grad_norm": 1.240153193473816, "learning_rate": 1.984578481892521e-05, "loss": 1.1333, "step": 1892 }, { "epoch": 0.2531425514843541, "grad_norm": 1.1886579990386963, "learning_rate": 1.984553214228832e-05, "loss": 1.0124, "step": 1893 }, { "epoch": 0.253276277079433, "grad_norm": 1.142903447151184, "learning_rate": 1.984527926043074e-05, "loss": 0.9633, "step": 1894 }, { "epoch": 0.2534100026745119, "grad_norm": 1.073736548423767, "learning_rate": 1.9845026173357725e-05, "loss": 1.0262, "step": 1895 }, { "epoch": 0.2535437282695908, "grad_norm": 1.2479990720748901, "learning_rate": 1.9844772881074568e-05, "loss": 1.0899, "step": 1896 }, { "epoch": 0.2536774538646697, "grad_norm": 1.1725611686706543, "learning_rate": 1.9844519383586536e-05, "loss": 0.8376, "step": 1897 }, { "epoch": 0.25381117945974857, "grad_norm": 1.2881478071212769, "learning_rate": 1.9844265680898917e-05, "loss": 1.1059, "step": 1898 }, { "epoch": 0.2539449050548275, "grad_norm": 1.1993621587753296, "learning_rate": 1.9844011773017e-05, "loss": 1.0495, "step": 1899 }, { "epoch": 0.2540786306499064, "grad_norm": 1.1411255598068237, "learning_rate": 1.984375765994608e-05, "loss": 0.9184, "step": 1900 }, { "epoch": 0.2542123562449853, "grad_norm": 1.1564644575119019, "learning_rate": 1.984350334169145e-05, "loss": 1.0139, "step": 1901 }, { "epoch": 0.2543460818400642, "grad_norm": 1.2602437734603882, "learning_rate": 1.9843248818258413e-05, "loss": 1.197, "step": 1902 }, { "epoch": 0.2544798074351431, "grad_norm": 1.305450677871704, "learning_rate": 1.984299408965227e-05, "loss": 1.134, "step": 1903 }, { "epoch": 0.254613533030222, "grad_norm": 1.1321536302566528, "learning_rate": 1.9842739155878337e-05, "loss": 1.0486, "step": 1904 }, { "epoch": 0.2547472586253009, "grad_norm": 1.204583764076233, "learning_rate": 1.9842484016941928e-05, "loss": 1.0183, "step": 1905 }, { "epoch": 0.2548809842203798, "grad_norm": 1.3423759937286377, "learning_rate": 1.984222867284835e-05, "loss": 0.9066, "step": 1906 }, { "epoch": 0.25501470981545865, "grad_norm": 1.114795207977295, "learning_rate": 1.9841973123602937e-05, "loss": 1.058, "step": 1907 }, { "epoch": 0.25514843541053756, "grad_norm": 1.4561917781829834, "learning_rate": 1.9841717369211016e-05, "loss": 1.2868, "step": 1908 }, { "epoch": 0.25528216100561646, "grad_norm": 1.2238914966583252, "learning_rate": 1.984146140967791e-05, "loss": 1.0454, "step": 1909 }, { "epoch": 0.25541588660069536, "grad_norm": 1.235836148262024, "learning_rate": 1.9841205245008955e-05, "loss": 1.0489, "step": 1910 }, { "epoch": 0.25554961219577427, "grad_norm": 1.2881172895431519, "learning_rate": 1.9840948875209498e-05, "loss": 1.0434, "step": 1911 }, { "epoch": 0.25568333779085317, "grad_norm": 1.2685083150863647, "learning_rate": 1.984069230028488e-05, "loss": 1.2239, "step": 1912 }, { "epoch": 0.2558170633859321, "grad_norm": 1.251535415649414, "learning_rate": 1.9840435520240443e-05, "loss": 1.0182, "step": 1913 }, { "epoch": 0.255950788981011, "grad_norm": 1.3424879312515259, "learning_rate": 1.9840178535081548e-05, "loss": 1.0421, "step": 1914 }, { "epoch": 0.2560845145760899, "grad_norm": 1.2001901865005493, "learning_rate": 1.9839921344813544e-05, "loss": 0.9788, "step": 1915 }, { "epoch": 0.2562182401711688, "grad_norm": 1.1476601362228394, "learning_rate": 1.9839663949441793e-05, "loss": 1.0128, "step": 1916 }, { "epoch": 0.25635196576624764, "grad_norm": 1.3407857418060303, "learning_rate": 1.983940634897167e-05, "loss": 1.0341, "step": 1917 }, { "epoch": 0.25648569136132654, "grad_norm": 1.2388408184051514, "learning_rate": 1.983914854340853e-05, "loss": 1.0779, "step": 1918 }, { "epoch": 0.25661941695640544, "grad_norm": 1.117304801940918, "learning_rate": 1.983889053275776e-05, "loss": 0.9571, "step": 1919 }, { "epoch": 0.25675314255148435, "grad_norm": 1.2159969806671143, "learning_rate": 1.9838632317024728e-05, "loss": 1.0526, "step": 1920 }, { "epoch": 0.25688686814656325, "grad_norm": 1.1933813095092773, "learning_rate": 1.983837389621482e-05, "loss": 0.9465, "step": 1921 }, { "epoch": 0.25702059374164216, "grad_norm": 1.138527750968933, "learning_rate": 1.983811527033342e-05, "loss": 0.8981, "step": 1922 }, { "epoch": 0.25715431933672106, "grad_norm": 1.2149871587753296, "learning_rate": 1.9837856439385925e-05, "loss": 1.0463, "step": 1923 }, { "epoch": 0.25728804493179996, "grad_norm": 1.156000018119812, "learning_rate": 1.9837597403377726e-05, "loss": 0.9269, "step": 1924 }, { "epoch": 0.25742177052687887, "grad_norm": 1.2171753644943237, "learning_rate": 1.983733816231422e-05, "loss": 1.0003, "step": 1925 }, { "epoch": 0.2575554961219577, "grad_norm": 1.1531507968902588, "learning_rate": 1.983707871620082e-05, "loss": 0.9261, "step": 1926 }, { "epoch": 0.2576892217170366, "grad_norm": 1.3676788806915283, "learning_rate": 1.983681906504292e-05, "loss": 1.2445, "step": 1927 }, { "epoch": 0.2578229473121155, "grad_norm": 1.2666972875595093, "learning_rate": 1.983655920884594e-05, "loss": 1.0857, "step": 1928 }, { "epoch": 0.25795667290719443, "grad_norm": 1.0382033586502075, "learning_rate": 1.98362991476153e-05, "loss": 0.9179, "step": 1929 }, { "epoch": 0.25809039850227333, "grad_norm": 1.2534615993499756, "learning_rate": 1.9836038881356415e-05, "loss": 0.9913, "step": 1930 }, { "epoch": 0.25822412409735224, "grad_norm": 1.2451242208480835, "learning_rate": 1.9835778410074712e-05, "loss": 1.0611, "step": 1931 }, { "epoch": 0.25835784969243114, "grad_norm": 1.1600897312164307, "learning_rate": 1.9835517733775616e-05, "loss": 0.8278, "step": 1932 }, { "epoch": 0.25849157528751004, "grad_norm": 1.2249618768692017, "learning_rate": 1.983525685246457e-05, "loss": 1.0727, "step": 1933 }, { "epoch": 0.25862530088258895, "grad_norm": 1.169432282447815, "learning_rate": 1.9834995766147e-05, "loss": 1.0738, "step": 1934 }, { "epoch": 0.25875902647766785, "grad_norm": 1.1673609018325806, "learning_rate": 1.983473447482836e-05, "loss": 0.9899, "step": 1935 }, { "epoch": 0.2588927520727467, "grad_norm": 1.309316873550415, "learning_rate": 1.983447297851409e-05, "loss": 1.0505, "step": 1936 }, { "epoch": 0.2590264776678256, "grad_norm": 1.3655641078948975, "learning_rate": 1.983421127720964e-05, "loss": 1.033, "step": 1937 }, { "epoch": 0.2591602032629045, "grad_norm": 1.1614853143692017, "learning_rate": 1.9833949370920465e-05, "loss": 0.9626, "step": 1938 }, { "epoch": 0.2592939288579834, "grad_norm": 1.0599461793899536, "learning_rate": 1.9833687259652025e-05, "loss": 0.8945, "step": 1939 }, { "epoch": 0.2594276544530623, "grad_norm": 1.1576303243637085, "learning_rate": 1.9833424943409784e-05, "loss": 1.1173, "step": 1940 }, { "epoch": 0.2595613800481412, "grad_norm": 1.1299622058868408, "learning_rate": 1.9833162422199213e-05, "loss": 1.0097, "step": 1941 }, { "epoch": 0.2596951056432201, "grad_norm": 1.1205140352249146, "learning_rate": 1.983289969602578e-05, "loss": 1.0306, "step": 1942 }, { "epoch": 0.25982883123829903, "grad_norm": 1.246517539024353, "learning_rate": 1.983263676489496e-05, "loss": 0.9248, "step": 1943 }, { "epoch": 0.25996255683337793, "grad_norm": 1.2294942140579224, "learning_rate": 1.9832373628812235e-05, "loss": 1.0918, "step": 1944 }, { "epoch": 0.2600962824284568, "grad_norm": 1.1532258987426758, "learning_rate": 1.983211028778309e-05, "loss": 0.9346, "step": 1945 }, { "epoch": 0.2602300080235357, "grad_norm": 1.1911040544509888, "learning_rate": 1.9831846741813018e-05, "loss": 0.8848, "step": 1946 }, { "epoch": 0.2603637336186146, "grad_norm": 1.1791915893554688, "learning_rate": 1.9831582990907506e-05, "loss": 1.0596, "step": 1947 }, { "epoch": 0.2604974592136935, "grad_norm": 1.1919529438018799, "learning_rate": 1.9831319035072053e-05, "loss": 1.0281, "step": 1948 }, { "epoch": 0.2606311848087724, "grad_norm": 1.1895248889923096, "learning_rate": 1.9831054874312167e-05, "loss": 0.9538, "step": 1949 }, { "epoch": 0.2607649104038513, "grad_norm": 1.0642296075820923, "learning_rate": 1.9830790508633343e-05, "loss": 0.9838, "step": 1950 }, { "epoch": 0.2608986359989302, "grad_norm": 1.2523521184921265, "learning_rate": 1.9830525938041102e-05, "loss": 0.9918, "step": 1951 }, { "epoch": 0.2610323615940091, "grad_norm": 1.2694766521453857, "learning_rate": 1.9830261162540956e-05, "loss": 1.022, "step": 1952 }, { "epoch": 0.261166087189088, "grad_norm": 1.053975224494934, "learning_rate": 1.982999618213842e-05, "loss": 0.9706, "step": 1953 }, { "epoch": 0.26129981278416686, "grad_norm": 1.1439032554626465, "learning_rate": 1.982973099683902e-05, "loss": 0.8719, "step": 1954 }, { "epoch": 0.26143353837924577, "grad_norm": 1.177422285079956, "learning_rate": 1.982946560664828e-05, "loss": 1.098, "step": 1955 }, { "epoch": 0.26156726397432467, "grad_norm": 1.075361967086792, "learning_rate": 1.982920001157174e-05, "loss": 0.9716, "step": 1956 }, { "epoch": 0.2617009895694036, "grad_norm": 1.1265277862548828, "learning_rate": 1.982893421161493e-05, "loss": 0.9133, "step": 1957 }, { "epoch": 0.2618347151644825, "grad_norm": 1.2292710542678833, "learning_rate": 1.9828668206783393e-05, "loss": 1.0538, "step": 1958 }, { "epoch": 0.2619684407595614, "grad_norm": 1.3366189002990723, "learning_rate": 1.9828401997082673e-05, "loss": 1.0706, "step": 1959 }, { "epoch": 0.2621021663546403, "grad_norm": 1.1893885135650635, "learning_rate": 1.9828135582518317e-05, "loss": 1.0161, "step": 1960 }, { "epoch": 0.2622358919497192, "grad_norm": 1.3990225791931152, "learning_rate": 1.9827868963095878e-05, "loss": 1.3058, "step": 1961 }, { "epoch": 0.2623696175447981, "grad_norm": 1.1887881755828857, "learning_rate": 1.9827602138820916e-05, "loss": 0.9939, "step": 1962 }, { "epoch": 0.262503343139877, "grad_norm": 1.180745244026184, "learning_rate": 1.982733510969899e-05, "loss": 0.9585, "step": 1963 }, { "epoch": 0.26263706873495585, "grad_norm": 1.2435686588287354, "learning_rate": 1.9827067875735667e-05, "loss": 1.0804, "step": 1964 }, { "epoch": 0.26277079433003475, "grad_norm": 1.2494195699691772, "learning_rate": 1.982680043693652e-05, "loss": 1.0272, "step": 1965 }, { "epoch": 0.26290451992511366, "grad_norm": 1.1703543663024902, "learning_rate": 1.982653279330712e-05, "loss": 0.9312, "step": 1966 }, { "epoch": 0.26303824552019256, "grad_norm": 1.1306079626083374, "learning_rate": 1.9826264944853047e-05, "loss": 0.9045, "step": 1967 }, { "epoch": 0.26317197111527146, "grad_norm": 1.3017314672470093, "learning_rate": 1.9825996891579882e-05, "loss": 0.9724, "step": 1968 }, { "epoch": 0.26330569671035037, "grad_norm": 1.2433143854141235, "learning_rate": 1.9825728633493216e-05, "loss": 1.034, "step": 1969 }, { "epoch": 0.26343942230542927, "grad_norm": 1.1472232341766357, "learning_rate": 1.9825460170598642e-05, "loss": 1.0474, "step": 1970 }, { "epoch": 0.2635731479005082, "grad_norm": 1.262888789176941, "learning_rate": 1.9825191502901746e-05, "loss": 1.0239, "step": 1971 }, { "epoch": 0.2637068734955871, "grad_norm": 1.0604875087738037, "learning_rate": 1.9824922630408138e-05, "loss": 1.0335, "step": 1972 }, { "epoch": 0.26384059909066593, "grad_norm": 1.200919270515442, "learning_rate": 1.982465355312342e-05, "loss": 1.1168, "step": 1973 }, { "epoch": 0.26397432468574483, "grad_norm": 1.1589775085449219, "learning_rate": 1.98243842710532e-05, "loss": 1.1048, "step": 1974 }, { "epoch": 0.26410805028082374, "grad_norm": 1.2466036081314087, "learning_rate": 1.9824114784203086e-05, "loss": 1.0275, "step": 1975 }, { "epoch": 0.26424177587590264, "grad_norm": 1.201248049736023, "learning_rate": 1.9823845092578707e-05, "loss": 1.086, "step": 1976 }, { "epoch": 0.26437550147098154, "grad_norm": 1.2614853382110596, "learning_rate": 1.9823575196185674e-05, "loss": 0.976, "step": 1977 }, { "epoch": 0.26450922706606045, "grad_norm": 1.2242079973220825, "learning_rate": 1.982330509502962e-05, "loss": 0.9678, "step": 1978 }, { "epoch": 0.26464295266113935, "grad_norm": 1.2078566551208496, "learning_rate": 1.9823034789116168e-05, "loss": 1.0589, "step": 1979 }, { "epoch": 0.26477667825621826, "grad_norm": 1.152355432510376, "learning_rate": 1.9822764278450952e-05, "loss": 0.993, "step": 1980 }, { "epoch": 0.26491040385129716, "grad_norm": 1.0910531282424927, "learning_rate": 1.9822493563039618e-05, "loss": 0.8957, "step": 1981 }, { "epoch": 0.265044129446376, "grad_norm": 1.2131431102752686, "learning_rate": 1.9822222642887804e-05, "loss": 1.0142, "step": 1982 }, { "epoch": 0.2651778550414549, "grad_norm": 1.2003467082977295, "learning_rate": 1.9821951518001156e-05, "loss": 0.9248, "step": 1983 }, { "epoch": 0.2653115806365338, "grad_norm": 1.2202606201171875, "learning_rate": 1.9821680188385334e-05, "loss": 1.0293, "step": 1984 }, { "epoch": 0.2654453062316127, "grad_norm": 1.2327533960342407, "learning_rate": 1.982140865404598e-05, "loss": 1.0535, "step": 1985 }, { "epoch": 0.2655790318266916, "grad_norm": 1.2835808992385864, "learning_rate": 1.982113691498876e-05, "loss": 1.0629, "step": 1986 }, { "epoch": 0.26571275742177053, "grad_norm": 1.2795361280441284, "learning_rate": 1.982086497121934e-05, "loss": 1.0403, "step": 1987 }, { "epoch": 0.26584648301684943, "grad_norm": 1.1625521183013916, "learning_rate": 1.9820592822743393e-05, "loss": 1.0305, "step": 1988 }, { "epoch": 0.26598020861192834, "grad_norm": 1.1160461902618408, "learning_rate": 1.982032046956658e-05, "loss": 1.0245, "step": 1989 }, { "epoch": 0.26611393420700724, "grad_norm": 1.2058887481689453, "learning_rate": 1.9820047911694584e-05, "loss": 1.0332, "step": 1990 }, { "epoch": 0.26624765980208615, "grad_norm": 1.1065646409988403, "learning_rate": 1.981977514913309e-05, "loss": 0.9288, "step": 1991 }, { "epoch": 0.266381385397165, "grad_norm": 1.178249716758728, "learning_rate": 1.9819502181887777e-05, "loss": 1.0043, "step": 1992 }, { "epoch": 0.2665151109922439, "grad_norm": 1.0966213941574097, "learning_rate": 1.9819229009964337e-05, "loss": 0.9295, "step": 1993 }, { "epoch": 0.2666488365873228, "grad_norm": 1.2853267192840576, "learning_rate": 1.9818955633368464e-05, "loss": 1.1404, "step": 1994 }, { "epoch": 0.2667825621824017, "grad_norm": 1.2749323844909668, "learning_rate": 1.9818682052105856e-05, "loss": 0.9866, "step": 1995 }, { "epoch": 0.2669162877774806, "grad_norm": 1.1184386014938354, "learning_rate": 1.981840826618222e-05, "loss": 1.021, "step": 1996 }, { "epoch": 0.2670500133725595, "grad_norm": 1.258388876914978, "learning_rate": 1.9818134275603253e-05, "loss": 0.9191, "step": 1997 }, { "epoch": 0.2671837389676384, "grad_norm": 1.1967415809631348, "learning_rate": 1.9817860080374674e-05, "loss": 1.0192, "step": 1998 }, { "epoch": 0.2673174645627173, "grad_norm": 1.3043500185012817, "learning_rate": 1.98175856805022e-05, "loss": 1.0995, "step": 1999 }, { "epoch": 0.2674511901577962, "grad_norm": 1.2193313837051392, "learning_rate": 1.9817311075991545e-05, "loss": 1.0968, "step": 2000 }, { "epoch": 0.2675849157528751, "grad_norm": 1.4520708322525024, "learning_rate": 1.981703626684843e-05, "loss": 1.0412, "step": 2001 }, { "epoch": 0.267718641347954, "grad_norm": 1.2047863006591797, "learning_rate": 1.9816761253078594e-05, "loss": 1.0384, "step": 2002 }, { "epoch": 0.2678523669430329, "grad_norm": 1.18962824344635, "learning_rate": 1.9816486034687762e-05, "loss": 0.9181, "step": 2003 }, { "epoch": 0.2679860925381118, "grad_norm": 1.2128196954727173, "learning_rate": 1.981621061168167e-05, "loss": 1.0559, "step": 2004 }, { "epoch": 0.2681198181331907, "grad_norm": 1.2461825609207153, "learning_rate": 1.981593498406606e-05, "loss": 1.0356, "step": 2005 }, { "epoch": 0.2682535437282696, "grad_norm": 1.342537760734558, "learning_rate": 1.9815659151846684e-05, "loss": 1.1618, "step": 2006 }, { "epoch": 0.2683872693233485, "grad_norm": 1.2740074396133423, "learning_rate": 1.981538311502928e-05, "loss": 1.1024, "step": 2007 }, { "epoch": 0.2685209949184274, "grad_norm": 1.3248499631881714, "learning_rate": 1.981510687361961e-05, "loss": 1.044, "step": 2008 }, { "epoch": 0.2686547205135063, "grad_norm": 1.3921226263046265, "learning_rate": 1.9814830427623426e-05, "loss": 1.0748, "step": 2009 }, { "epoch": 0.2687884461085852, "grad_norm": 1.1772385835647583, "learning_rate": 1.9814553777046497e-05, "loss": 0.9867, "step": 2010 }, { "epoch": 0.26892217170366406, "grad_norm": 1.255858063697815, "learning_rate": 1.9814276921894585e-05, "loss": 1.038, "step": 2011 }, { "epoch": 0.26905589729874296, "grad_norm": 1.111238718032837, "learning_rate": 1.9813999862173462e-05, "loss": 0.9807, "step": 2012 }, { "epoch": 0.26918962289382187, "grad_norm": 1.0777053833007812, "learning_rate": 1.98137225978889e-05, "loss": 0.9882, "step": 2013 }, { "epoch": 0.26932334848890077, "grad_norm": 1.3408069610595703, "learning_rate": 1.9813445129046685e-05, "loss": 1.1319, "step": 2014 }, { "epoch": 0.2694570740839797, "grad_norm": 1.153422474861145, "learning_rate": 1.9813167455652597e-05, "loss": 1.0102, "step": 2015 }, { "epoch": 0.2695907996790586, "grad_norm": 1.0136988162994385, "learning_rate": 1.981288957771242e-05, "loss": 0.8948, "step": 2016 }, { "epoch": 0.2697245252741375, "grad_norm": 1.1669692993164062, "learning_rate": 1.9812611495231952e-05, "loss": 1.0621, "step": 2017 }, { "epoch": 0.2698582508692164, "grad_norm": 1.274993658065796, "learning_rate": 1.981233320821699e-05, "loss": 0.9683, "step": 2018 }, { "epoch": 0.2699919764642953, "grad_norm": 1.1367610692977905, "learning_rate": 1.9812054716673327e-05, "loss": 0.9924, "step": 2019 }, { "epoch": 0.27012570205937414, "grad_norm": 1.1012234687805176, "learning_rate": 1.9811776020606773e-05, "loss": 1.0732, "step": 2020 }, { "epoch": 0.27025942765445304, "grad_norm": 1.2960808277130127, "learning_rate": 1.9811497120023136e-05, "loss": 1.1048, "step": 2021 }, { "epoch": 0.27039315324953195, "grad_norm": 1.1298588514328003, "learning_rate": 1.981121801492823e-05, "loss": 1.1276, "step": 2022 }, { "epoch": 0.27052687884461085, "grad_norm": 1.178869605064392, "learning_rate": 1.9810938705327873e-05, "loss": 0.9577, "step": 2023 }, { "epoch": 0.27066060443968976, "grad_norm": 1.2661691904067993, "learning_rate": 1.981065919122789e-05, "loss": 0.9624, "step": 2024 }, { "epoch": 0.27079433003476866, "grad_norm": 1.345145583152771, "learning_rate": 1.9810379472634103e-05, "loss": 1.0668, "step": 2025 }, { "epoch": 0.27092805562984756, "grad_norm": 1.1975499391555786, "learning_rate": 1.9810099549552343e-05, "loss": 0.9159, "step": 2026 }, { "epoch": 0.27106178122492647, "grad_norm": 1.1761215925216675, "learning_rate": 1.9809819421988443e-05, "loss": 1.1029, "step": 2027 }, { "epoch": 0.27119550682000537, "grad_norm": 1.1685346364974976, "learning_rate": 1.9809539089948245e-05, "loss": 0.9854, "step": 2028 }, { "epoch": 0.2713292324150842, "grad_norm": 1.090917706489563, "learning_rate": 1.980925855343759e-05, "loss": 0.9307, "step": 2029 }, { "epoch": 0.2714629580101631, "grad_norm": 1.30075204372406, "learning_rate": 1.9808977812462334e-05, "loss": 1.0652, "step": 2030 }, { "epoch": 0.27159668360524203, "grad_norm": 1.1271679401397705, "learning_rate": 1.9808696867028313e-05, "loss": 0.9973, "step": 2031 }, { "epoch": 0.27173040920032093, "grad_norm": 1.3763184547424316, "learning_rate": 1.9808415717141396e-05, "loss": 1.128, "step": 2032 }, { "epoch": 0.27186413479539984, "grad_norm": 1.2016905546188354, "learning_rate": 1.980813436280744e-05, "loss": 0.9957, "step": 2033 }, { "epoch": 0.27199786039047874, "grad_norm": 1.1422648429870605, "learning_rate": 1.9807852804032306e-05, "loss": 0.995, "step": 2034 }, { "epoch": 0.27213158598555764, "grad_norm": 1.2023279666900635, "learning_rate": 1.9807571040821866e-05, "loss": 1.0463, "step": 2035 }, { "epoch": 0.27226531158063655, "grad_norm": 1.230514407157898, "learning_rate": 1.9807289073181996e-05, "loss": 1.1129, "step": 2036 }, { "epoch": 0.27239903717571545, "grad_norm": 1.382514238357544, "learning_rate": 1.9807006901118564e-05, "loss": 1.1117, "step": 2037 }, { "epoch": 0.27253276277079436, "grad_norm": 1.1618587970733643, "learning_rate": 1.980672452463746e-05, "loss": 1.0056, "step": 2038 }, { "epoch": 0.2726664883658732, "grad_norm": 1.195986270904541, "learning_rate": 1.9806441943744567e-05, "loss": 1.0821, "step": 2039 }, { "epoch": 0.2728002139609521, "grad_norm": 1.1434705257415771, "learning_rate": 1.9806159158445774e-05, "loss": 1.0246, "step": 2040 }, { "epoch": 0.272933939556031, "grad_norm": 1.2295584678649902, "learning_rate": 1.9805876168746982e-05, "loss": 1.0765, "step": 2041 }, { "epoch": 0.2730676651511099, "grad_norm": 1.2134474515914917, "learning_rate": 1.980559297465408e-05, "loss": 0.9261, "step": 2042 }, { "epoch": 0.2732013907461888, "grad_norm": 1.0921227931976318, "learning_rate": 1.9805309576172976e-05, "loss": 0.9343, "step": 2043 }, { "epoch": 0.2733351163412677, "grad_norm": 1.2065128087997437, "learning_rate": 1.9805025973309577e-05, "loss": 0.9736, "step": 2044 }, { "epoch": 0.27346884193634663, "grad_norm": 1.186004400253296, "learning_rate": 1.9804742166069793e-05, "loss": 1.0101, "step": 2045 }, { "epoch": 0.27360256753142553, "grad_norm": 1.2628732919692993, "learning_rate": 1.9804458154459543e-05, "loss": 0.9957, "step": 2046 }, { "epoch": 0.27373629312650444, "grad_norm": 1.296435832977295, "learning_rate": 1.9804173938484742e-05, "loss": 1.0725, "step": 2047 }, { "epoch": 0.2738700187215833, "grad_norm": 1.1615389585494995, "learning_rate": 1.980388951815132e-05, "loss": 1.048, "step": 2048 }, { "epoch": 0.2740037443166622, "grad_norm": 1.1285301446914673, "learning_rate": 1.9803604893465202e-05, "loss": 0.9858, "step": 2049 }, { "epoch": 0.2741374699117411, "grad_norm": 1.0909568071365356, "learning_rate": 1.9803320064432318e-05, "loss": 0.8651, "step": 2050 }, { "epoch": 0.27427119550682, "grad_norm": 1.0887646675109863, "learning_rate": 1.9803035031058607e-05, "loss": 1.0906, "step": 2051 }, { "epoch": 0.2744049211018989, "grad_norm": 1.1738389730453491, "learning_rate": 1.9802749793350015e-05, "loss": 1.1135, "step": 2052 }, { "epoch": 0.2745386466969778, "grad_norm": 1.2649611234664917, "learning_rate": 1.9802464351312482e-05, "loss": 1.0186, "step": 2053 }, { "epoch": 0.2746723722920567, "grad_norm": 1.2647650241851807, "learning_rate": 1.980217870495196e-05, "loss": 1.0318, "step": 2054 }, { "epoch": 0.2748060978871356, "grad_norm": 1.2093485593795776, "learning_rate": 1.9801892854274404e-05, "loss": 1.0852, "step": 2055 }, { "epoch": 0.2749398234822145, "grad_norm": 1.2293505668640137, "learning_rate": 1.9801606799285768e-05, "loss": 1.0037, "step": 2056 }, { "epoch": 0.27507354907729337, "grad_norm": 1.216928482055664, "learning_rate": 1.980132053999202e-05, "loss": 1.0162, "step": 2057 }, { "epoch": 0.27520727467237227, "grad_norm": 1.1991596221923828, "learning_rate": 1.9801034076399125e-05, "loss": 1.08, "step": 2058 }, { "epoch": 0.2753410002674512, "grad_norm": 1.0891691446304321, "learning_rate": 1.980074740851305e-05, "loss": 1.1318, "step": 2059 }, { "epoch": 0.2754747258625301, "grad_norm": 1.264926791191101, "learning_rate": 1.9800460536339773e-05, "loss": 1.0582, "step": 2060 }, { "epoch": 0.275608451457609, "grad_norm": 1.1377394199371338, "learning_rate": 1.9800173459885277e-05, "loss": 0.9436, "step": 2061 }, { "epoch": 0.2757421770526879, "grad_norm": 1.2865523099899292, "learning_rate": 1.979988617915554e-05, "loss": 1.0339, "step": 2062 }, { "epoch": 0.2758759026477668, "grad_norm": 1.1571788787841797, "learning_rate": 1.9799598694156555e-05, "loss": 0.8308, "step": 2063 }, { "epoch": 0.2760096282428457, "grad_norm": 1.079723834991455, "learning_rate": 1.9799311004894314e-05, "loss": 0.9586, "step": 2064 }, { "epoch": 0.2761433538379246, "grad_norm": 1.264616847038269, "learning_rate": 1.979902311137481e-05, "loss": 1.0503, "step": 2065 }, { "epoch": 0.2762770794330035, "grad_norm": 1.3007923364639282, "learning_rate": 1.9798735013604047e-05, "loss": 1.1323, "step": 2066 }, { "epoch": 0.27641080502808235, "grad_norm": 0.9994477033615112, "learning_rate": 1.9798446711588028e-05, "loss": 0.8716, "step": 2067 }, { "epoch": 0.27654453062316126, "grad_norm": 1.094146490097046, "learning_rate": 1.9798158205332765e-05, "loss": 0.9982, "step": 2068 }, { "epoch": 0.27667825621824016, "grad_norm": 1.174985408782959, "learning_rate": 1.979786949484427e-05, "loss": 1.0327, "step": 2069 }, { "epoch": 0.27681198181331906, "grad_norm": 1.0805283784866333, "learning_rate": 1.979758058012856e-05, "loss": 0.9298, "step": 2070 }, { "epoch": 0.27694570740839797, "grad_norm": 1.2051345109939575, "learning_rate": 1.9797291461191655e-05, "loss": 0.9725, "step": 2071 }, { "epoch": 0.27707943300347687, "grad_norm": 1.1798629760742188, "learning_rate": 1.979700213803959e-05, "loss": 0.972, "step": 2072 }, { "epoch": 0.2772131585985558, "grad_norm": 1.143319845199585, "learning_rate": 1.9796712610678387e-05, "loss": 1.0372, "step": 2073 }, { "epoch": 0.2773468841936347, "grad_norm": 1.196845531463623, "learning_rate": 1.9796422879114082e-05, "loss": 0.9918, "step": 2074 }, { "epoch": 0.2774806097887136, "grad_norm": 1.2316111326217651, "learning_rate": 1.979613294335272e-05, "loss": 1.0823, "step": 2075 }, { "epoch": 0.27761433538379243, "grad_norm": 1.2022000551223755, "learning_rate": 1.979584280340034e-05, "loss": 1.0708, "step": 2076 }, { "epoch": 0.27774806097887134, "grad_norm": 1.1477826833724976, "learning_rate": 1.979555245926299e-05, "loss": 1.073, "step": 2077 }, { "epoch": 0.27788178657395024, "grad_norm": 1.1513049602508545, "learning_rate": 1.9795261910946723e-05, "loss": 1.0145, "step": 2078 }, { "epoch": 0.27801551216902914, "grad_norm": 1.114238977432251, "learning_rate": 1.979497115845759e-05, "loss": 1.1122, "step": 2079 }, { "epoch": 0.27814923776410805, "grad_norm": 1.0765010118484497, "learning_rate": 1.979468020180166e-05, "loss": 1.0476, "step": 2080 }, { "epoch": 0.27828296335918695, "grad_norm": 1.2604873180389404, "learning_rate": 1.9794389040984995e-05, "loss": 1.0258, "step": 2081 }, { "epoch": 0.27841668895426586, "grad_norm": 1.376246690750122, "learning_rate": 1.979409767601366e-05, "loss": 0.9719, "step": 2082 }, { "epoch": 0.27855041454934476, "grad_norm": 1.198371410369873, "learning_rate": 1.9793806106893735e-05, "loss": 0.9634, "step": 2083 }, { "epoch": 0.27868414014442366, "grad_norm": 1.2040808200836182, "learning_rate": 1.9793514333631287e-05, "loss": 1.117, "step": 2084 }, { "epoch": 0.27881786573950257, "grad_norm": 1.2664235830307007, "learning_rate": 1.979322235623241e-05, "loss": 1.0356, "step": 2085 }, { "epoch": 0.2789515913345814, "grad_norm": 1.3336726427078247, "learning_rate": 1.979293017470318e-05, "loss": 1.0523, "step": 2086 }, { "epoch": 0.2790853169296603, "grad_norm": 1.2350406646728516, "learning_rate": 1.9792637789049692e-05, "loss": 0.9748, "step": 2087 }, { "epoch": 0.2792190425247392, "grad_norm": 1.2094125747680664, "learning_rate": 1.979234519927804e-05, "loss": 0.9428, "step": 2088 }, { "epoch": 0.27935276811981813, "grad_norm": 1.2660276889801025, "learning_rate": 1.9792052405394324e-05, "loss": 1.0692, "step": 2089 }, { "epoch": 0.27948649371489703, "grad_norm": 1.3712819814682007, "learning_rate": 1.9791759407404644e-05, "loss": 1.0491, "step": 2090 }, { "epoch": 0.27962021930997594, "grad_norm": 1.2285821437835693, "learning_rate": 1.979146620531511e-05, "loss": 1.1753, "step": 2091 }, { "epoch": 0.27975394490505484, "grad_norm": 1.1159719228744507, "learning_rate": 1.979117279913183e-05, "loss": 0.9741, "step": 2092 }, { "epoch": 0.27988767050013375, "grad_norm": 1.3301478624343872, "learning_rate": 1.9790879188860927e-05, "loss": 1.2729, "step": 2093 }, { "epoch": 0.28002139609521265, "grad_norm": 1.1380884647369385, "learning_rate": 1.979058537450851e-05, "loss": 0.9453, "step": 2094 }, { "epoch": 0.2801551216902915, "grad_norm": 1.0854499340057373, "learning_rate": 1.9790291356080713e-05, "loss": 1.0756, "step": 2095 }, { "epoch": 0.2802888472853704, "grad_norm": 1.1421220302581787, "learning_rate": 1.9789997133583662e-05, "loss": 0.9802, "step": 2096 }, { "epoch": 0.2804225728804493, "grad_norm": 1.1509255170822144, "learning_rate": 1.9789702707023487e-05, "loss": 1.009, "step": 2097 }, { "epoch": 0.2805562984755282, "grad_norm": 1.2597370147705078, "learning_rate": 1.978940807640633e-05, "loss": 1.0477, "step": 2098 }, { "epoch": 0.2806900240706071, "grad_norm": 1.136932134628296, "learning_rate": 1.9789113241738323e-05, "loss": 0.9799, "step": 2099 }, { "epoch": 0.280823749665686, "grad_norm": 1.2583529949188232, "learning_rate": 1.978881820302562e-05, "loss": 1.0124, "step": 2100 }, { "epoch": 0.2809574752607649, "grad_norm": 1.2207236289978027, "learning_rate": 1.978852296027437e-05, "loss": 1.0581, "step": 2101 }, { "epoch": 0.2810912008558438, "grad_norm": 1.1520092487335205, "learning_rate": 1.9788227513490724e-05, "loss": 1.0098, "step": 2102 }, { "epoch": 0.28122492645092273, "grad_norm": 1.19609534740448, "learning_rate": 1.9787931862680843e-05, "loss": 1.0697, "step": 2103 }, { "epoch": 0.2813586520460016, "grad_norm": 1.2715709209442139, "learning_rate": 1.978763600785089e-05, "loss": 0.9648, "step": 2104 }, { "epoch": 0.2814923776410805, "grad_norm": 1.2218003273010254, "learning_rate": 1.9787339949007026e-05, "loss": 0.9728, "step": 2105 }, { "epoch": 0.2816261032361594, "grad_norm": 1.1784186363220215, "learning_rate": 1.978704368615543e-05, "loss": 1.0295, "step": 2106 }, { "epoch": 0.2817598288312383, "grad_norm": 1.194023609161377, "learning_rate": 1.978674721930227e-05, "loss": 1.0152, "step": 2107 }, { "epoch": 0.2818935544263172, "grad_norm": 1.112650752067566, "learning_rate": 1.9786450548453733e-05, "loss": 0.9971, "step": 2108 }, { "epoch": 0.2820272800213961, "grad_norm": 1.1879955530166626, "learning_rate": 1.9786153673615994e-05, "loss": 0.9128, "step": 2109 }, { "epoch": 0.282161005616475, "grad_norm": 1.1506524085998535, "learning_rate": 1.9785856594795253e-05, "loss": 1.0528, "step": 2110 }, { "epoch": 0.2822947312115539, "grad_norm": 1.0440477132797241, "learning_rate": 1.978555931199769e-05, "loss": 0.9084, "step": 2111 }, { "epoch": 0.2824284568066328, "grad_norm": 1.0785073041915894, "learning_rate": 1.9785261825229508e-05, "loss": 1.0204, "step": 2112 }, { "epoch": 0.2825621824017117, "grad_norm": 1.2903238534927368, "learning_rate": 1.9784964134496905e-05, "loss": 1.2216, "step": 2113 }, { "epoch": 0.28269590799679056, "grad_norm": 1.2642194032669067, "learning_rate": 1.978466623980609e-05, "loss": 0.9946, "step": 2114 }, { "epoch": 0.28282963359186947, "grad_norm": 1.2489137649536133, "learning_rate": 1.9784368141163274e-05, "loss": 0.974, "step": 2115 }, { "epoch": 0.28296335918694837, "grad_norm": 1.2502025365829468, "learning_rate": 1.978406983857466e-05, "loss": 1.1027, "step": 2116 }, { "epoch": 0.2830970847820273, "grad_norm": 1.3404505252838135, "learning_rate": 1.9783771332046476e-05, "loss": 1.103, "step": 2117 }, { "epoch": 0.2832308103771062, "grad_norm": 1.1972593069076538, "learning_rate": 1.978347262158494e-05, "loss": 0.9717, "step": 2118 }, { "epoch": 0.2833645359721851, "grad_norm": 1.2726585865020752, "learning_rate": 1.9783173707196278e-05, "loss": 1.0328, "step": 2119 }, { "epoch": 0.283498261567264, "grad_norm": 1.118133544921875, "learning_rate": 1.9782874588886723e-05, "loss": 0.9091, "step": 2120 }, { "epoch": 0.2836319871623429, "grad_norm": 1.1184636354446411, "learning_rate": 1.9782575266662507e-05, "loss": 0.9327, "step": 2121 }, { "epoch": 0.2837657127574218, "grad_norm": 1.2418081760406494, "learning_rate": 1.978227574052987e-05, "loss": 1.1151, "step": 2122 }, { "epoch": 0.28389943835250064, "grad_norm": 1.1560598611831665, "learning_rate": 1.978197601049506e-05, "loss": 0.8818, "step": 2123 }, { "epoch": 0.28403316394757955, "grad_norm": 1.1912810802459717, "learning_rate": 1.9781676076564316e-05, "loss": 1.0155, "step": 2124 }, { "epoch": 0.28416688954265845, "grad_norm": 1.1632498502731323, "learning_rate": 1.9781375938743896e-05, "loss": 1.0062, "step": 2125 }, { "epoch": 0.28430061513773736, "grad_norm": 1.1767581701278687, "learning_rate": 1.9781075597040054e-05, "loss": 1.0265, "step": 2126 }, { "epoch": 0.28443434073281626, "grad_norm": 1.186914324760437, "learning_rate": 1.978077505145905e-05, "loss": 0.8598, "step": 2127 }, { "epoch": 0.28456806632789516, "grad_norm": 1.2211833000183105, "learning_rate": 1.9780474302007148e-05, "loss": 1.094, "step": 2128 }, { "epoch": 0.28470179192297407, "grad_norm": 1.230087399482727, "learning_rate": 1.9780173348690623e-05, "loss": 0.94, "step": 2129 }, { "epoch": 0.28483551751805297, "grad_norm": 1.1156785488128662, "learning_rate": 1.977987219151574e-05, "loss": 0.8633, "step": 2130 }, { "epoch": 0.2849692431131319, "grad_norm": 1.3601911067962646, "learning_rate": 1.977957083048878e-05, "loss": 1.0123, "step": 2131 }, { "epoch": 0.2851029687082107, "grad_norm": 1.2156695127487183, "learning_rate": 1.9779269265616024e-05, "loss": 1.0344, "step": 2132 }, { "epoch": 0.28523669430328963, "grad_norm": 1.3115962743759155, "learning_rate": 1.9778967496903755e-05, "loss": 1.1399, "step": 2133 }, { "epoch": 0.28537041989836853, "grad_norm": 1.3130463361740112, "learning_rate": 1.977866552435827e-05, "loss": 1.0799, "step": 2134 }, { "epoch": 0.28550414549344744, "grad_norm": 1.1409205198287964, "learning_rate": 1.9778363347985857e-05, "loss": 0.9963, "step": 2135 }, { "epoch": 0.28563787108852634, "grad_norm": 1.238593339920044, "learning_rate": 1.977806096779282e-05, "loss": 1.0965, "step": 2136 }, { "epoch": 0.28577159668360524, "grad_norm": 1.2488987445831299, "learning_rate": 1.9777758383785455e-05, "loss": 1.0005, "step": 2137 }, { "epoch": 0.28590532227868415, "grad_norm": 1.1833666563034058, "learning_rate": 1.9777455595970073e-05, "loss": 0.9943, "step": 2138 }, { "epoch": 0.28603904787376305, "grad_norm": 1.1530786752700806, "learning_rate": 1.9777152604352986e-05, "loss": 1.0572, "step": 2139 }, { "epoch": 0.28617277346884196, "grad_norm": 1.0481016635894775, "learning_rate": 1.9776849408940508e-05, "loss": 1.0091, "step": 2140 }, { "epoch": 0.28630649906392086, "grad_norm": 1.2476197481155396, "learning_rate": 1.9776546009738963e-05, "loss": 0.9741, "step": 2141 }, { "epoch": 0.2864402246589997, "grad_norm": 1.258022665977478, "learning_rate": 1.9776242406754668e-05, "loss": 0.9666, "step": 2142 }, { "epoch": 0.2865739502540786, "grad_norm": 1.2579277753829956, "learning_rate": 1.9775938599993957e-05, "loss": 0.9849, "step": 2143 }, { "epoch": 0.2867076758491575, "grad_norm": 1.1031885147094727, "learning_rate": 1.9775634589463158e-05, "loss": 0.8727, "step": 2144 }, { "epoch": 0.2868414014442364, "grad_norm": 1.2367734909057617, "learning_rate": 1.9775330375168615e-05, "loss": 1.0247, "step": 2145 }, { "epoch": 0.2869751270393153, "grad_norm": 1.1762510538101196, "learning_rate": 1.9775025957116657e-05, "loss": 0.9424, "step": 2146 }, { "epoch": 0.28710885263439423, "grad_norm": 1.2029376029968262, "learning_rate": 1.977472133531364e-05, "loss": 0.9862, "step": 2147 }, { "epoch": 0.28724257822947313, "grad_norm": 1.2394628524780273, "learning_rate": 1.9774416509765914e-05, "loss": 1.0256, "step": 2148 }, { "epoch": 0.28737630382455204, "grad_norm": 1.2521089315414429, "learning_rate": 1.9774111480479827e-05, "loss": 1.061, "step": 2149 }, { "epoch": 0.28751002941963094, "grad_norm": 1.2654383182525635, "learning_rate": 1.9773806247461736e-05, "loss": 1.0511, "step": 2150 }, { "epoch": 0.2876437550147098, "grad_norm": 1.1485753059387207, "learning_rate": 1.977350081071801e-05, "loss": 0.9951, "step": 2151 }, { "epoch": 0.2877774806097887, "grad_norm": 1.2468384504318237, "learning_rate": 1.9773195170255007e-05, "loss": 1.0315, "step": 2152 }, { "epoch": 0.2879112062048676, "grad_norm": 1.2603297233581543, "learning_rate": 1.9772889326079104e-05, "loss": 0.9493, "step": 2153 }, { "epoch": 0.2880449317999465, "grad_norm": 1.1580828428268433, "learning_rate": 1.9772583278196677e-05, "loss": 1.0671, "step": 2154 }, { "epoch": 0.2881786573950254, "grad_norm": 1.109904170036316, "learning_rate": 1.97722770266141e-05, "loss": 1.0332, "step": 2155 }, { "epoch": 0.2883123829901043, "grad_norm": 1.3298914432525635, "learning_rate": 1.9771970571337764e-05, "loss": 1.1072, "step": 2156 }, { "epoch": 0.2884461085851832, "grad_norm": 1.2414443492889404, "learning_rate": 1.977166391237405e-05, "loss": 1.0372, "step": 2157 }, { "epoch": 0.2885798341802621, "grad_norm": 1.157848596572876, "learning_rate": 1.9771357049729353e-05, "loss": 0.9193, "step": 2158 }, { "epoch": 0.288713559775341, "grad_norm": 1.233790397644043, "learning_rate": 1.9771049983410068e-05, "loss": 0.9575, "step": 2159 }, { "epoch": 0.28884728537041987, "grad_norm": 1.459373950958252, "learning_rate": 1.9770742713422595e-05, "loss": 1.0812, "step": 2160 }, { "epoch": 0.2889810109654988, "grad_norm": 1.1394226551055908, "learning_rate": 1.977043523977334e-05, "loss": 0.9546, "step": 2161 }, { "epoch": 0.2891147365605777, "grad_norm": 1.1912246942520142, "learning_rate": 1.977012756246871e-05, "loss": 0.9978, "step": 2162 }, { "epoch": 0.2892484621556566, "grad_norm": 1.2213622331619263, "learning_rate": 1.9769819681515124e-05, "loss": 1.0146, "step": 2163 }, { "epoch": 0.2893821877507355, "grad_norm": 1.353010892868042, "learning_rate": 1.976951159691899e-05, "loss": 1.1299, "step": 2164 }, { "epoch": 0.2895159133458144, "grad_norm": 1.2113394737243652, "learning_rate": 1.976920330868674e-05, "loss": 1.0198, "step": 2165 }, { "epoch": 0.2896496389408933, "grad_norm": 1.1398069858551025, "learning_rate": 1.9768894816824795e-05, "loss": 0.957, "step": 2166 }, { "epoch": 0.2897833645359722, "grad_norm": 1.1836832761764526, "learning_rate": 1.976858612133958e-05, "loss": 0.93, "step": 2167 }, { "epoch": 0.2899170901310511, "grad_norm": 1.1302859783172607, "learning_rate": 1.976827722223754e-05, "loss": 1.0515, "step": 2168 }, { "epoch": 0.29005081572613, "grad_norm": 1.1368863582611084, "learning_rate": 1.9767968119525107e-05, "loss": 1.0261, "step": 2169 }, { "epoch": 0.29018454132120886, "grad_norm": 1.2033138275146484, "learning_rate": 1.9767658813208725e-05, "loss": 0.988, "step": 2170 }, { "epoch": 0.29031826691628776, "grad_norm": 1.3053178787231445, "learning_rate": 1.976734930329484e-05, "loss": 1.1476, "step": 2171 }, { "epoch": 0.29045199251136666, "grad_norm": 1.2360053062438965, "learning_rate": 1.976703958978991e-05, "loss": 1.1277, "step": 2172 }, { "epoch": 0.29058571810644557, "grad_norm": 1.1918829679489136, "learning_rate": 1.9766729672700384e-05, "loss": 1.0967, "step": 2173 }, { "epoch": 0.29071944370152447, "grad_norm": 1.1298093795776367, "learning_rate": 1.9766419552032723e-05, "loss": 0.8845, "step": 2174 }, { "epoch": 0.2908531692966034, "grad_norm": 1.1508750915527344, "learning_rate": 1.9766109227793392e-05, "loss": 0.9392, "step": 2175 }, { "epoch": 0.2909868948916823, "grad_norm": 1.164705753326416, "learning_rate": 1.976579869998886e-05, "loss": 1.0301, "step": 2176 }, { "epoch": 0.2911206204867612, "grad_norm": 1.1448957920074463, "learning_rate": 1.9765487968625598e-05, "loss": 1.0325, "step": 2177 }, { "epoch": 0.2912543460818401, "grad_norm": 1.1842694282531738, "learning_rate": 1.976517703371008e-05, "loss": 0.9806, "step": 2178 }, { "epoch": 0.29138807167691894, "grad_norm": 1.2840756177902222, "learning_rate": 1.9764865895248796e-05, "loss": 1.0796, "step": 2179 }, { "epoch": 0.29152179727199784, "grad_norm": 1.1901638507843018, "learning_rate": 1.9764554553248227e-05, "loss": 0.9933, "step": 2180 }, { "epoch": 0.29165552286707674, "grad_norm": 1.1696873903274536, "learning_rate": 1.976424300771486e-05, "loss": 1.0102, "step": 2181 }, { "epoch": 0.29178924846215565, "grad_norm": 1.4394396543502808, "learning_rate": 1.9763931258655192e-05, "loss": 0.9474, "step": 2182 }, { "epoch": 0.29192297405723455, "grad_norm": 1.3579124212265015, "learning_rate": 1.9763619306075718e-05, "loss": 1.0434, "step": 2183 }, { "epoch": 0.29205669965231346, "grad_norm": 1.296350121498108, "learning_rate": 1.9763307149982945e-05, "loss": 1.0776, "step": 2184 }, { "epoch": 0.29219042524739236, "grad_norm": 1.0558109283447266, "learning_rate": 1.9762994790383378e-05, "loss": 0.9698, "step": 2185 }, { "epoch": 0.29232415084247126, "grad_norm": 1.145194172859192, "learning_rate": 1.976268222728352e-05, "loss": 1.0556, "step": 2186 }, { "epoch": 0.29245787643755017, "grad_norm": 1.0450239181518555, "learning_rate": 1.9762369460689898e-05, "loss": 0.8883, "step": 2187 }, { "epoch": 0.2925916020326291, "grad_norm": 1.1923145055770874, "learning_rate": 1.9762056490609026e-05, "loss": 0.9768, "step": 2188 }, { "epoch": 0.2927253276277079, "grad_norm": 1.1975294351577759, "learning_rate": 1.9761743317047426e-05, "loss": 0.9298, "step": 2189 }, { "epoch": 0.2928590532227868, "grad_norm": 1.128410816192627, "learning_rate": 1.9761429940011628e-05, "loss": 0.9782, "step": 2190 }, { "epoch": 0.29299277881786573, "grad_norm": 1.1872555017471313, "learning_rate": 1.9761116359508166e-05, "loss": 1.0959, "step": 2191 }, { "epoch": 0.29312650441294463, "grad_norm": 1.122725486755371, "learning_rate": 1.976080257554357e-05, "loss": 0.9913, "step": 2192 }, { "epoch": 0.29326023000802354, "grad_norm": 1.1662139892578125, "learning_rate": 1.9760488588124386e-05, "loss": 0.9348, "step": 2193 }, { "epoch": 0.29339395560310244, "grad_norm": 1.1060807704925537, "learning_rate": 1.9760174397257158e-05, "loss": 0.9915, "step": 2194 }, { "epoch": 0.29352768119818134, "grad_norm": 1.083228588104248, "learning_rate": 1.9759860002948435e-05, "loss": 0.9153, "step": 2195 }, { "epoch": 0.29366140679326025, "grad_norm": 1.185099720954895, "learning_rate": 1.975954540520477e-05, "loss": 1.0591, "step": 2196 }, { "epoch": 0.29379513238833915, "grad_norm": 1.2541698217391968, "learning_rate": 1.9759230604032714e-05, "loss": 0.9767, "step": 2197 }, { "epoch": 0.293928857983418, "grad_norm": 1.2425549030303955, "learning_rate": 1.975891559943884e-05, "loss": 1.0153, "step": 2198 }, { "epoch": 0.2940625835784969, "grad_norm": 1.1876336336135864, "learning_rate": 1.9758600391429708e-05, "loss": 0.9501, "step": 2199 }, { "epoch": 0.2941963091735758, "grad_norm": 1.2072685956954956, "learning_rate": 1.975828498001189e-05, "loss": 1.0263, "step": 2200 }, { "epoch": 0.2943300347686547, "grad_norm": 1.2293521165847778, "learning_rate": 1.9757969365191955e-05, "loss": 1.0329, "step": 2201 }, { "epoch": 0.2944637603637336, "grad_norm": 1.1694260835647583, "learning_rate": 1.9757653546976486e-05, "loss": 1.0037, "step": 2202 }, { "epoch": 0.2945974859588125, "grad_norm": 1.2036082744598389, "learning_rate": 1.975733752537207e-05, "loss": 1.0584, "step": 2203 }, { "epoch": 0.2947312115538914, "grad_norm": 1.2488198280334473, "learning_rate": 1.9757021300385288e-05, "loss": 0.9947, "step": 2204 }, { "epoch": 0.29486493714897033, "grad_norm": 1.394675850868225, "learning_rate": 1.9756704872022734e-05, "loss": 1.0504, "step": 2205 }, { "epoch": 0.29499866274404923, "grad_norm": 1.1404701471328735, "learning_rate": 1.9756388240291002e-05, "loss": 0.8769, "step": 2206 }, { "epoch": 0.2951323883391281, "grad_norm": 1.2411913871765137, "learning_rate": 1.9756071405196692e-05, "loss": 1.0807, "step": 2207 }, { "epoch": 0.295266113934207, "grad_norm": 1.3194738626480103, "learning_rate": 1.975575436674641e-05, "loss": 1.1141, "step": 2208 }, { "epoch": 0.2953998395292859, "grad_norm": 1.1970902681350708, "learning_rate": 1.9755437124946767e-05, "loss": 0.9878, "step": 2209 }, { "epoch": 0.2955335651243648, "grad_norm": 1.111989974975586, "learning_rate": 1.975511967980437e-05, "loss": 0.9501, "step": 2210 }, { "epoch": 0.2956672907194437, "grad_norm": 1.1697918176651, "learning_rate": 1.9754802031325835e-05, "loss": 0.8758, "step": 2211 }, { "epoch": 0.2958010163145226, "grad_norm": 1.0515260696411133, "learning_rate": 1.975448417951779e-05, "loss": 1.0099, "step": 2212 }, { "epoch": 0.2959347419096015, "grad_norm": 1.5506232976913452, "learning_rate": 1.9754166124386854e-05, "loss": 1.1191, "step": 2213 }, { "epoch": 0.2960684675046804, "grad_norm": 1.1169734001159668, "learning_rate": 1.9753847865939657e-05, "loss": 1.0127, "step": 2214 }, { "epoch": 0.2962021930997593, "grad_norm": 1.2566661834716797, "learning_rate": 1.9753529404182837e-05, "loss": 1.1001, "step": 2215 }, { "epoch": 0.2963359186948382, "grad_norm": 1.1956835985183716, "learning_rate": 1.9753210739123033e-05, "loss": 1.0767, "step": 2216 }, { "epoch": 0.29646964428991707, "grad_norm": 1.16459059715271, "learning_rate": 1.9752891870766875e-05, "loss": 1.0828, "step": 2217 }, { "epoch": 0.29660336988499597, "grad_norm": 1.1912364959716797, "learning_rate": 1.9752572799121028e-05, "loss": 0.9906, "step": 2218 }, { "epoch": 0.2967370954800749, "grad_norm": 1.2806622982025146, "learning_rate": 1.975225352419213e-05, "loss": 0.9819, "step": 2219 }, { "epoch": 0.2968708210751538, "grad_norm": 1.1616157293319702, "learning_rate": 1.9751934045986834e-05, "loss": 0.9004, "step": 2220 }, { "epoch": 0.2970045466702327, "grad_norm": 1.1637113094329834, "learning_rate": 1.975161436451181e-05, "loss": 0.9883, "step": 2221 }, { "epoch": 0.2971382722653116, "grad_norm": 1.173741102218628, "learning_rate": 1.9751294479773717e-05, "loss": 0.9525, "step": 2222 }, { "epoch": 0.2972719978603905, "grad_norm": 1.216117262840271, "learning_rate": 1.975097439177922e-05, "loss": 0.94, "step": 2223 }, { "epoch": 0.2974057234554694, "grad_norm": 1.183048129081726, "learning_rate": 1.9750654100534992e-05, "loss": 1.052, "step": 2224 }, { "epoch": 0.2975394490505483, "grad_norm": 1.1433331966400146, "learning_rate": 1.975033360604771e-05, "loss": 0.9419, "step": 2225 }, { "epoch": 0.29767317464562715, "grad_norm": 1.189164400100708, "learning_rate": 1.9750012908324053e-05, "loss": 1.0725, "step": 2226 }, { "epoch": 0.29780690024070605, "grad_norm": 1.1216585636138916, "learning_rate": 1.9749692007370704e-05, "loss": 1.0431, "step": 2227 }, { "epoch": 0.29794062583578496, "grad_norm": 1.3941398859024048, "learning_rate": 1.9749370903194358e-05, "loss": 1.1377, "step": 2228 }, { "epoch": 0.29807435143086386, "grad_norm": 1.1828982830047607, "learning_rate": 1.9749049595801705e-05, "loss": 1.052, "step": 2229 }, { "epoch": 0.29820807702594276, "grad_norm": 1.1926177740097046, "learning_rate": 1.9748728085199442e-05, "loss": 1.0206, "step": 2230 }, { "epoch": 0.29834180262102167, "grad_norm": 1.3010913133621216, "learning_rate": 1.974840637139427e-05, "loss": 0.9751, "step": 2231 }, { "epoch": 0.29847552821610057, "grad_norm": 1.172194242477417, "learning_rate": 1.9748084454392896e-05, "loss": 1.0683, "step": 2232 }, { "epoch": 0.2986092538111795, "grad_norm": 1.2202507257461548, "learning_rate": 1.9747762334202027e-05, "loss": 1.0031, "step": 2233 }, { "epoch": 0.2987429794062584, "grad_norm": 1.1875579357147217, "learning_rate": 1.9747440010828384e-05, "loss": 0.9749, "step": 2234 }, { "epoch": 0.29887670500133723, "grad_norm": 1.1717984676361084, "learning_rate": 1.9747117484278676e-05, "loss": 1.0852, "step": 2235 }, { "epoch": 0.29901043059641613, "grad_norm": 1.0917893648147583, "learning_rate": 1.9746794754559635e-05, "loss": 0.9491, "step": 2236 }, { "epoch": 0.29914415619149504, "grad_norm": 1.1625267267227173, "learning_rate": 1.9746471821677984e-05, "loss": 1.0101, "step": 2237 }, { "epoch": 0.29927788178657394, "grad_norm": 1.028390884399414, "learning_rate": 1.974614868564045e-05, "loss": 0.9509, "step": 2238 }, { "epoch": 0.29941160738165284, "grad_norm": 1.2056939601898193, "learning_rate": 1.9745825346453777e-05, "loss": 1.1086, "step": 2239 }, { "epoch": 0.29954533297673175, "grad_norm": 1.0379281044006348, "learning_rate": 1.97455018041247e-05, "loss": 0.9287, "step": 2240 }, { "epoch": 0.29967905857181065, "grad_norm": 1.165000319480896, "learning_rate": 1.974517805865996e-05, "loss": 0.9207, "step": 2241 }, { "epoch": 0.29981278416688956, "grad_norm": 1.1398564577102661, "learning_rate": 1.9744854110066313e-05, "loss": 1.0619, "step": 2242 }, { "epoch": 0.29994650976196846, "grad_norm": 1.1573647260665894, "learning_rate": 1.9744529958350505e-05, "loss": 1.0359, "step": 2243 }, { "epoch": 0.30008023535704736, "grad_norm": 1.2807424068450928, "learning_rate": 1.9744205603519293e-05, "loss": 1.2654, "step": 2244 }, { "epoch": 0.3002139609521262, "grad_norm": 1.160482406616211, "learning_rate": 1.974388104557944e-05, "loss": 1.0263, "step": 2245 }, { "epoch": 0.3003476865472051, "grad_norm": 1.1286901235580444, "learning_rate": 1.974355628453771e-05, "loss": 0.9853, "step": 2246 }, { "epoch": 0.300481412142284, "grad_norm": 1.2090590000152588, "learning_rate": 1.9743231320400877e-05, "loss": 0.8374, "step": 2247 }, { "epoch": 0.3006151377373629, "grad_norm": 1.1466095447540283, "learning_rate": 1.9742906153175707e-05, "loss": 0.9463, "step": 2248 }, { "epoch": 0.30074886333244183, "grad_norm": 1.1246426105499268, "learning_rate": 1.9742580782868983e-05, "loss": 0.8882, "step": 2249 }, { "epoch": 0.30088258892752073, "grad_norm": 1.1797356605529785, "learning_rate": 1.9742255209487483e-05, "loss": 0.9906, "step": 2250 }, { "epoch": 0.30101631452259964, "grad_norm": 1.1286355257034302, "learning_rate": 1.9741929433037996e-05, "loss": 1.0426, "step": 2251 }, { "epoch": 0.30115004011767854, "grad_norm": 1.132765769958496, "learning_rate": 1.9741603453527314e-05, "loss": 0.9923, "step": 2252 }, { "epoch": 0.30128376571275745, "grad_norm": 1.15614914894104, "learning_rate": 1.9741277270962225e-05, "loss": 1.1228, "step": 2253 }, { "epoch": 0.3014174913078363, "grad_norm": 1.1618907451629639, "learning_rate": 1.9740950885349536e-05, "loss": 1.0237, "step": 2254 }, { "epoch": 0.3015512169029152, "grad_norm": 1.3347283601760864, "learning_rate": 1.974062429669605e-05, "loss": 1.022, "step": 2255 }, { "epoch": 0.3016849424979941, "grad_norm": 0.9908042550086975, "learning_rate": 1.9740297505008565e-05, "loss": 0.9313, "step": 2256 }, { "epoch": 0.301818668093073, "grad_norm": 1.2134469747543335, "learning_rate": 1.9739970510293903e-05, "loss": 1.0773, "step": 2257 }, { "epoch": 0.3019523936881519, "grad_norm": 1.1534372568130493, "learning_rate": 1.9739643312558875e-05, "loss": 0.8971, "step": 2258 }, { "epoch": 0.3020861192832308, "grad_norm": 1.1988004446029663, "learning_rate": 1.97393159118103e-05, "loss": 0.9809, "step": 2259 }, { "epoch": 0.3022198448783097, "grad_norm": 1.278108835220337, "learning_rate": 1.9738988308055006e-05, "loss": 1.0303, "step": 2260 }, { "epoch": 0.3023535704733886, "grad_norm": 1.265594720840454, "learning_rate": 1.9738660501299823e-05, "loss": 1.0356, "step": 2261 }, { "epoch": 0.3024872960684675, "grad_norm": 1.1128697395324707, "learning_rate": 1.9738332491551574e-05, "loss": 1.0335, "step": 2262 }, { "epoch": 0.30262102166354643, "grad_norm": 1.1275129318237305, "learning_rate": 1.9738004278817107e-05, "loss": 0.9489, "step": 2263 }, { "epoch": 0.3027547472586253, "grad_norm": 1.2506957054138184, "learning_rate": 1.9737675863103257e-05, "loss": 1.1969, "step": 2264 }, { "epoch": 0.3028884728537042, "grad_norm": 1.1707462072372437, "learning_rate": 1.9737347244416876e-05, "loss": 0.8943, "step": 2265 }, { "epoch": 0.3030221984487831, "grad_norm": 1.3129950761795044, "learning_rate": 1.9737018422764803e-05, "loss": 0.957, "step": 2266 }, { "epoch": 0.303155924043862, "grad_norm": 1.3260670900344849, "learning_rate": 1.9736689398153905e-05, "loss": 1.0951, "step": 2267 }, { "epoch": 0.3032896496389409, "grad_norm": 1.1444002389907837, "learning_rate": 1.973636017059103e-05, "loss": 1.0037, "step": 2268 }, { "epoch": 0.3034233752340198, "grad_norm": 1.2704200744628906, "learning_rate": 1.9736030740083045e-05, "loss": 0.9369, "step": 2269 }, { "epoch": 0.3035571008290987, "grad_norm": 1.2839304208755493, "learning_rate": 1.9735701106636814e-05, "loss": 1.093, "step": 2270 }, { "epoch": 0.3036908264241776, "grad_norm": 1.1554511785507202, "learning_rate": 1.973537127025921e-05, "loss": 0.9884, "step": 2271 }, { "epoch": 0.3038245520192565, "grad_norm": 1.139846682548523, "learning_rate": 1.9735041230957108e-05, "loss": 0.9654, "step": 2272 }, { "epoch": 0.30395827761433536, "grad_norm": 1.1091350317001343, "learning_rate": 1.9734710988737385e-05, "loss": 0.9497, "step": 2273 }, { "epoch": 0.30409200320941426, "grad_norm": 1.1415122747421265, "learning_rate": 1.9734380543606932e-05, "loss": 0.9337, "step": 2274 }, { "epoch": 0.30422572880449317, "grad_norm": 1.2031067609786987, "learning_rate": 1.9734049895572626e-05, "loss": 0.9536, "step": 2275 }, { "epoch": 0.30435945439957207, "grad_norm": 1.1437950134277344, "learning_rate": 1.9733719044641366e-05, "loss": 1.0396, "step": 2276 }, { "epoch": 0.304493179994651, "grad_norm": 1.2494534254074097, "learning_rate": 1.9733387990820047e-05, "loss": 0.9652, "step": 2277 }, { "epoch": 0.3046269055897299, "grad_norm": 1.2008118629455566, "learning_rate": 1.9733056734115567e-05, "loss": 1.0456, "step": 2278 }, { "epoch": 0.3047606311848088, "grad_norm": 1.1481235027313232, "learning_rate": 1.9732725274534837e-05, "loss": 0.9644, "step": 2279 }, { "epoch": 0.3048943567798877, "grad_norm": 1.2427749633789062, "learning_rate": 1.973239361208476e-05, "loss": 0.9663, "step": 2280 }, { "epoch": 0.3050280823749666, "grad_norm": 1.0829435586929321, "learning_rate": 1.973206174677225e-05, "loss": 1.0344, "step": 2281 }, { "epoch": 0.30516180797004544, "grad_norm": 1.0766233205795288, "learning_rate": 1.9731729678604226e-05, "loss": 1.0859, "step": 2282 }, { "epoch": 0.30529553356512434, "grad_norm": 1.216006875038147, "learning_rate": 1.973139740758761e-05, "loss": 0.9386, "step": 2283 }, { "epoch": 0.30542925916020325, "grad_norm": 1.1430648565292358, "learning_rate": 1.9731064933729324e-05, "loss": 1.0004, "step": 2284 }, { "epoch": 0.30556298475528215, "grad_norm": 1.1239149570465088, "learning_rate": 1.9730732257036303e-05, "loss": 0.9436, "step": 2285 }, { "epoch": 0.30569671035036106, "grad_norm": 1.2132402658462524, "learning_rate": 1.973039937751548e-05, "loss": 0.9276, "step": 2286 }, { "epoch": 0.30583043594543996, "grad_norm": 1.2516506910324097, "learning_rate": 1.9730066295173794e-05, "loss": 1.0626, "step": 2287 }, { "epoch": 0.30596416154051886, "grad_norm": 1.1240605115890503, "learning_rate": 1.9729733010018186e-05, "loss": 0.9528, "step": 2288 }, { "epoch": 0.30609788713559777, "grad_norm": 1.1625926494598389, "learning_rate": 1.9729399522055603e-05, "loss": 1.0005, "step": 2289 }, { "epoch": 0.30623161273067667, "grad_norm": 1.4405685663223267, "learning_rate": 1.9729065831292996e-05, "loss": 1.2219, "step": 2290 }, { "epoch": 0.3063653383257556, "grad_norm": 1.1085350513458252, "learning_rate": 1.9728731937737326e-05, "loss": 0.9635, "step": 2291 }, { "epoch": 0.3064990639208344, "grad_norm": 1.276872158050537, "learning_rate": 1.9728397841395544e-05, "loss": 1.0476, "step": 2292 }, { "epoch": 0.30663278951591333, "grad_norm": 1.1097266674041748, "learning_rate": 1.9728063542274617e-05, "loss": 0.9807, "step": 2293 }, { "epoch": 0.30676651511099223, "grad_norm": 1.1223084926605225, "learning_rate": 1.9727729040381517e-05, "loss": 1.0409, "step": 2294 }, { "epoch": 0.30690024070607114, "grad_norm": 1.1173349618911743, "learning_rate": 1.972739433572321e-05, "loss": 0.9245, "step": 2295 }, { "epoch": 0.30703396630115004, "grad_norm": 1.2066407203674316, "learning_rate": 1.972705942830668e-05, "loss": 1.0129, "step": 2296 }, { "epoch": 0.30716769189622894, "grad_norm": 1.1713558435440063, "learning_rate": 1.9726724318138905e-05, "loss": 0.969, "step": 2297 }, { "epoch": 0.30730141749130785, "grad_norm": 1.2510708570480347, "learning_rate": 1.9726389005226865e-05, "loss": 0.9516, "step": 2298 }, { "epoch": 0.30743514308638675, "grad_norm": 1.0327454805374146, "learning_rate": 1.9726053489577555e-05, "loss": 0.8915, "step": 2299 }, { "epoch": 0.30756886868146566, "grad_norm": 1.1685277223587036, "learning_rate": 1.972571777119797e-05, "loss": 1.0306, "step": 2300 }, { "epoch": 0.3077025942765445, "grad_norm": 1.1453516483306885, "learning_rate": 1.97253818500951e-05, "loss": 1.0247, "step": 2301 }, { "epoch": 0.3078363198716234, "grad_norm": 1.2007924318313599, "learning_rate": 1.9725045726275954e-05, "loss": 0.943, "step": 2302 }, { "epoch": 0.3079700454667023, "grad_norm": 1.3448234796524048, "learning_rate": 1.9724709399747532e-05, "loss": 1.0375, "step": 2303 }, { "epoch": 0.3081037710617812, "grad_norm": 1.2502800226211548, "learning_rate": 1.972437287051685e-05, "loss": 1.0505, "step": 2304 }, { "epoch": 0.3082374966568601, "grad_norm": 1.3218092918395996, "learning_rate": 1.9724036138590926e-05, "loss": 1.0447, "step": 2305 }, { "epoch": 0.308371222251939, "grad_norm": 1.1700409650802612, "learning_rate": 1.9723699203976768e-05, "loss": 0.9997, "step": 2306 }, { "epoch": 0.30850494784701793, "grad_norm": 1.1452702283859253, "learning_rate": 1.9723362066681403e-05, "loss": 1.0042, "step": 2307 }, { "epoch": 0.30863867344209683, "grad_norm": 1.1776940822601318, "learning_rate": 1.9723024726711866e-05, "loss": 1.1546, "step": 2308 }, { "epoch": 0.30877239903717574, "grad_norm": 1.200139045715332, "learning_rate": 1.972268718407518e-05, "loss": 0.9703, "step": 2309 }, { "epoch": 0.3089061246322546, "grad_norm": 1.2481821775436401, "learning_rate": 1.972234943877838e-05, "loss": 1.0634, "step": 2310 }, { "epoch": 0.3090398502273335, "grad_norm": 1.1221435070037842, "learning_rate": 1.9722011490828514e-05, "loss": 0.9114, "step": 2311 }, { "epoch": 0.3091735758224124, "grad_norm": 1.2518023252487183, "learning_rate": 1.9721673340232617e-05, "loss": 1.0682, "step": 2312 }, { "epoch": 0.3093073014174913, "grad_norm": 1.2186800241470337, "learning_rate": 1.9721334986997746e-05, "loss": 0.9866, "step": 2313 }, { "epoch": 0.3094410270125702, "grad_norm": 1.3271985054016113, "learning_rate": 1.9720996431130946e-05, "loss": 0.9743, "step": 2314 }, { "epoch": 0.3095747526076491, "grad_norm": 1.083513617515564, "learning_rate": 1.972065767263928e-05, "loss": 1.0378, "step": 2315 }, { "epoch": 0.309708478202728, "grad_norm": 1.1666163206100464, "learning_rate": 1.9720318711529804e-05, "loss": 1.0245, "step": 2316 }, { "epoch": 0.3098422037978069, "grad_norm": 1.1062474250793457, "learning_rate": 1.971997954780959e-05, "loss": 1.0461, "step": 2317 }, { "epoch": 0.3099759293928858, "grad_norm": 1.1604300737380981, "learning_rate": 1.97196401814857e-05, "loss": 1.054, "step": 2318 }, { "epoch": 0.3101096549879647, "grad_norm": 1.190425157546997, "learning_rate": 1.9719300612565214e-05, "loss": 1.0135, "step": 2319 }, { "epoch": 0.31024338058304357, "grad_norm": 1.3250095844268799, "learning_rate": 1.97189608410552e-05, "loss": 1.144, "step": 2320 }, { "epoch": 0.3103771061781225, "grad_norm": 1.1069201231002808, "learning_rate": 1.9718620866962754e-05, "loss": 0.9722, "step": 2321 }, { "epoch": 0.3105108317732014, "grad_norm": 1.240280270576477, "learning_rate": 1.9718280690294954e-05, "loss": 1.0689, "step": 2322 }, { "epoch": 0.3106445573682803, "grad_norm": 1.2191075086593628, "learning_rate": 1.9717940311058893e-05, "loss": 0.9788, "step": 2323 }, { "epoch": 0.3107782829633592, "grad_norm": 1.1158037185668945, "learning_rate": 1.9717599729261666e-05, "loss": 0.9691, "step": 2324 }, { "epoch": 0.3109120085584381, "grad_norm": 1.203933835029602, "learning_rate": 1.9717258944910366e-05, "loss": 0.9183, "step": 2325 }, { "epoch": 0.311045734153517, "grad_norm": 1.279782772064209, "learning_rate": 1.9716917958012106e-05, "loss": 1.0965, "step": 2326 }, { "epoch": 0.3111794597485959, "grad_norm": 1.1461976766586304, "learning_rate": 1.971657676857399e-05, "loss": 1.0505, "step": 2327 }, { "epoch": 0.3113131853436748, "grad_norm": 1.2181671857833862, "learning_rate": 1.971623537660313e-05, "loss": 1.008, "step": 2328 }, { "epoch": 0.31144691093875365, "grad_norm": 1.1614128351211548, "learning_rate": 1.9715893782106638e-05, "loss": 0.8455, "step": 2329 }, { "epoch": 0.31158063653383256, "grad_norm": 1.2468427419662476, "learning_rate": 1.9715551985091637e-05, "loss": 1.1784, "step": 2330 }, { "epoch": 0.31171436212891146, "grad_norm": 1.2512284517288208, "learning_rate": 1.9715209985565252e-05, "loss": 1.1879, "step": 2331 }, { "epoch": 0.31184808772399036, "grad_norm": 1.11635422706604, "learning_rate": 1.9714867783534614e-05, "loss": 0.9189, "step": 2332 }, { "epoch": 0.31198181331906927, "grad_norm": 1.2817426919937134, "learning_rate": 1.971452537900685e-05, "loss": 1.1133, "step": 2333 }, { "epoch": 0.31211553891414817, "grad_norm": 1.0824156999588013, "learning_rate": 1.97141827719891e-05, "loss": 0.9566, "step": 2334 }, { "epoch": 0.3122492645092271, "grad_norm": 1.1821709871292114, "learning_rate": 1.971383996248851e-05, "loss": 0.996, "step": 2335 }, { "epoch": 0.312382990104306, "grad_norm": 1.177978515625, "learning_rate": 1.9713496950512217e-05, "loss": 0.9692, "step": 2336 }, { "epoch": 0.3125167156993849, "grad_norm": 1.3418282270431519, "learning_rate": 1.9713153736067377e-05, "loss": 1.0135, "step": 2337 }, { "epoch": 0.3126504412944638, "grad_norm": 1.167195439338684, "learning_rate": 1.971281031916114e-05, "loss": 1.0505, "step": 2338 }, { "epoch": 0.31278416688954264, "grad_norm": 1.3744072914123535, "learning_rate": 1.971246669980067e-05, "loss": 1.1362, "step": 2339 }, { "epoch": 0.31291789248462154, "grad_norm": 1.084775447845459, "learning_rate": 1.971212287799312e-05, "loss": 0.9365, "step": 2340 }, { "epoch": 0.31305161807970044, "grad_norm": 1.0068764686584473, "learning_rate": 1.9711778853745663e-05, "loss": 0.8748, "step": 2341 }, { "epoch": 0.31318534367477935, "grad_norm": 1.0703853368759155, "learning_rate": 1.9711434627065472e-05, "loss": 0.996, "step": 2342 }, { "epoch": 0.31331906926985825, "grad_norm": 1.1515754461288452, "learning_rate": 1.9711090197959715e-05, "loss": 1.0193, "step": 2343 }, { "epoch": 0.31345279486493716, "grad_norm": 1.1545875072479248, "learning_rate": 1.9710745566435578e-05, "loss": 1.0018, "step": 2344 }, { "epoch": 0.31358652046001606, "grad_norm": 1.1640560626983643, "learning_rate": 1.9710400732500242e-05, "loss": 0.9701, "step": 2345 }, { "epoch": 0.31372024605509496, "grad_norm": 1.0496866703033447, "learning_rate": 1.9710055696160895e-05, "loss": 0.8721, "step": 2346 }, { "epoch": 0.31385397165017387, "grad_norm": 1.3501888513565063, "learning_rate": 1.970971045742473e-05, "loss": 1.0636, "step": 2347 }, { "epoch": 0.3139876972452527, "grad_norm": 1.1900006532669067, "learning_rate": 1.970936501629894e-05, "loss": 1.0052, "step": 2348 }, { "epoch": 0.3141214228403316, "grad_norm": 1.0351680517196655, "learning_rate": 1.9709019372790722e-05, "loss": 0.9268, "step": 2349 }, { "epoch": 0.3142551484354105, "grad_norm": 1.0425423383712769, "learning_rate": 1.9708673526907293e-05, "loss": 0.9444, "step": 2350 }, { "epoch": 0.31438887403048943, "grad_norm": 1.2512761354446411, "learning_rate": 1.9708327478655855e-05, "loss": 1.1876, "step": 2351 }, { "epoch": 0.31452259962556833, "grad_norm": 1.3124017715454102, "learning_rate": 1.9707981228043614e-05, "loss": 1.0344, "step": 2352 }, { "epoch": 0.31465632522064724, "grad_norm": 1.0086859464645386, "learning_rate": 1.9707634775077797e-05, "loss": 0.8714, "step": 2353 }, { "epoch": 0.31479005081572614, "grad_norm": 1.2256962060928345, "learning_rate": 1.9707288119765625e-05, "loss": 1.0197, "step": 2354 }, { "epoch": 0.31492377641080505, "grad_norm": 1.1449984312057495, "learning_rate": 1.9706941262114317e-05, "loss": 1.0212, "step": 2355 }, { "epoch": 0.31505750200588395, "grad_norm": 1.1958261728286743, "learning_rate": 1.9706594202131107e-05, "loss": 1.1267, "step": 2356 }, { "epoch": 0.3151912276009628, "grad_norm": 1.1744670867919922, "learning_rate": 1.9706246939823232e-05, "loss": 1.0381, "step": 2357 }, { "epoch": 0.3153249531960417, "grad_norm": 1.2177389860153198, "learning_rate": 1.9705899475197926e-05, "loss": 1.1104, "step": 2358 }, { "epoch": 0.3154586787911206, "grad_norm": 1.1450207233428955, "learning_rate": 1.9705551808262432e-05, "loss": 0.929, "step": 2359 }, { "epoch": 0.3155924043861995, "grad_norm": 1.0978758335113525, "learning_rate": 1.9705203939024e-05, "loss": 0.9712, "step": 2360 }, { "epoch": 0.3157261299812784, "grad_norm": 1.1750407218933105, "learning_rate": 1.9704855867489876e-05, "loss": 1.0172, "step": 2361 }, { "epoch": 0.3158598555763573, "grad_norm": 1.1775720119476318, "learning_rate": 1.970450759366732e-05, "loss": 0.9547, "step": 2362 }, { "epoch": 0.3159935811714362, "grad_norm": 1.0643346309661865, "learning_rate": 1.9704159117563587e-05, "loss": 0.8894, "step": 2363 }, { "epoch": 0.3161273067665151, "grad_norm": 1.1409015655517578, "learning_rate": 1.9703810439185946e-05, "loss": 1.0195, "step": 2364 }, { "epoch": 0.31626103236159403, "grad_norm": 1.1865304708480835, "learning_rate": 1.9703461558541662e-05, "loss": 1.0843, "step": 2365 }, { "epoch": 0.31639475795667293, "grad_norm": 1.1936390399932861, "learning_rate": 1.9703112475638003e-05, "loss": 1.11, "step": 2366 }, { "epoch": 0.3165284835517518, "grad_norm": 1.2269513607025146, "learning_rate": 1.9702763190482256e-05, "loss": 0.9617, "step": 2367 }, { "epoch": 0.3166622091468307, "grad_norm": 1.1934444904327393, "learning_rate": 1.970241370308169e-05, "loss": 1.0168, "step": 2368 }, { "epoch": 0.3167959347419096, "grad_norm": 1.138992190361023, "learning_rate": 1.9702064013443592e-05, "loss": 1.111, "step": 2369 }, { "epoch": 0.3169296603369885, "grad_norm": 1.2658984661102295, "learning_rate": 1.970171412157526e-05, "loss": 1.1461, "step": 2370 }, { "epoch": 0.3170633859320674, "grad_norm": 1.1690174341201782, "learning_rate": 1.970136402748398e-05, "loss": 0.9606, "step": 2371 }, { "epoch": 0.3171971115271463, "grad_norm": 1.230116844177246, "learning_rate": 1.9701013731177047e-05, "loss": 1.0524, "step": 2372 }, { "epoch": 0.3173308371222252, "grad_norm": 1.0801745653152466, "learning_rate": 1.9700663232661765e-05, "loss": 0.9436, "step": 2373 }, { "epoch": 0.3174645627173041, "grad_norm": 1.3025161027908325, "learning_rate": 1.9700312531945444e-05, "loss": 1.0195, "step": 2374 }, { "epoch": 0.317598288312383, "grad_norm": 1.1869661808013916, "learning_rate": 1.9699961629035386e-05, "loss": 1.0454, "step": 2375 }, { "epoch": 0.31773201390746186, "grad_norm": 1.2095932960510254, "learning_rate": 1.9699610523938912e-05, "loss": 0.9587, "step": 2376 }, { "epoch": 0.31786573950254077, "grad_norm": 1.0450241565704346, "learning_rate": 1.9699259216663338e-05, "loss": 0.9343, "step": 2377 }, { "epoch": 0.31799946509761967, "grad_norm": 1.1858789920806885, "learning_rate": 1.9698907707215985e-05, "loss": 0.9498, "step": 2378 }, { "epoch": 0.3181331906926986, "grad_norm": 1.230066180229187, "learning_rate": 1.9698555995604188e-05, "loss": 1.0616, "step": 2379 }, { "epoch": 0.3182669162877775, "grad_norm": 1.2173399925231934, "learning_rate": 1.9698204081835266e-05, "loss": 1.0992, "step": 2380 }, { "epoch": 0.3184006418828564, "grad_norm": 1.163827896118164, "learning_rate": 1.969785196591656e-05, "loss": 0.9741, "step": 2381 }, { "epoch": 0.3185343674779353, "grad_norm": 1.1509188413619995, "learning_rate": 1.9697499647855413e-05, "loss": 0.9972, "step": 2382 }, { "epoch": 0.3186680930730142, "grad_norm": 1.130071997642517, "learning_rate": 1.969714712765916e-05, "loss": 1.0734, "step": 2383 }, { "epoch": 0.3188018186680931, "grad_norm": 1.1836953163146973, "learning_rate": 1.969679440533516e-05, "loss": 1.0602, "step": 2384 }, { "epoch": 0.31893554426317194, "grad_norm": 1.176977276802063, "learning_rate": 1.9696441480890757e-05, "loss": 1.0698, "step": 2385 }, { "epoch": 0.31906926985825085, "grad_norm": 1.1579760313034058, "learning_rate": 1.9696088354333313e-05, "loss": 1.0044, "step": 2386 }, { "epoch": 0.31920299545332975, "grad_norm": 1.1565437316894531, "learning_rate": 1.9695735025670178e-05, "loss": 0.9129, "step": 2387 }, { "epoch": 0.31933672104840866, "grad_norm": 1.2603696584701538, "learning_rate": 1.9695381494908733e-05, "loss": 1.0412, "step": 2388 }, { "epoch": 0.31947044664348756, "grad_norm": 1.1807267665863037, "learning_rate": 1.9695027762056333e-05, "loss": 1.0563, "step": 2389 }, { "epoch": 0.31960417223856646, "grad_norm": 1.134080171585083, "learning_rate": 1.9694673827120354e-05, "loss": 1.0307, "step": 2390 }, { "epoch": 0.31973789783364537, "grad_norm": 1.1582611799240112, "learning_rate": 1.9694319690108182e-05, "loss": 1.0194, "step": 2391 }, { "epoch": 0.31987162342872427, "grad_norm": 1.3401755094528198, "learning_rate": 1.969396535102719e-05, "loss": 1.042, "step": 2392 }, { "epoch": 0.3200053490238032, "grad_norm": 1.2861007452011108, "learning_rate": 1.9693610809884764e-05, "loss": 1.0981, "step": 2393 }, { "epoch": 0.3201390746188821, "grad_norm": 1.0507349967956543, "learning_rate": 1.96932560666883e-05, "loss": 0.9389, "step": 2394 }, { "epoch": 0.32027280021396093, "grad_norm": 1.3202192783355713, "learning_rate": 1.9692901121445187e-05, "loss": 1.0343, "step": 2395 }, { "epoch": 0.32040652580903983, "grad_norm": 1.1251357793807983, "learning_rate": 1.9692545974162826e-05, "loss": 1.0231, "step": 2396 }, { "epoch": 0.32054025140411874, "grad_norm": 1.2302676439285278, "learning_rate": 1.9692190624848616e-05, "loss": 0.9627, "step": 2397 }, { "epoch": 0.32067397699919764, "grad_norm": 1.1330833435058594, "learning_rate": 1.969183507350997e-05, "loss": 0.8752, "step": 2398 }, { "epoch": 0.32080770259427654, "grad_norm": 1.0865366458892822, "learning_rate": 1.9691479320154295e-05, "loss": 1.0059, "step": 2399 }, { "epoch": 0.32094142818935545, "grad_norm": 1.3230291604995728, "learning_rate": 1.9691123364789008e-05, "loss": 1.0611, "step": 2400 }, { "epoch": 0.32107515378443435, "grad_norm": 1.3397996425628662, "learning_rate": 1.9690767207421527e-05, "loss": 1.069, "step": 2401 }, { "epoch": 0.32120887937951326, "grad_norm": 1.2741390466690063, "learning_rate": 1.9690410848059278e-05, "loss": 0.9918, "step": 2402 }, { "epoch": 0.32134260497459216, "grad_norm": 1.1550568342208862, "learning_rate": 1.969005428670969e-05, "loss": 1.1248, "step": 2403 }, { "epoch": 0.321476330569671, "grad_norm": 1.2188063859939575, "learning_rate": 1.968969752338019e-05, "loss": 0.9138, "step": 2404 }, { "epoch": 0.3216100561647499, "grad_norm": 1.1910172700881958, "learning_rate": 1.9689340558078212e-05, "loss": 0.975, "step": 2405 }, { "epoch": 0.3217437817598288, "grad_norm": 1.1405029296875, "learning_rate": 1.9688983390811204e-05, "loss": 0.8924, "step": 2406 }, { "epoch": 0.3218775073549077, "grad_norm": 1.1406763792037964, "learning_rate": 1.9688626021586615e-05, "loss": 0.913, "step": 2407 }, { "epoch": 0.3220112329499866, "grad_norm": 1.1816368103027344, "learning_rate": 1.9688268450411882e-05, "loss": 0.968, "step": 2408 }, { "epoch": 0.32214495854506553, "grad_norm": 1.2005079984664917, "learning_rate": 1.9687910677294466e-05, "loss": 1.0293, "step": 2409 }, { "epoch": 0.32227868414014443, "grad_norm": 1.2041183710098267, "learning_rate": 1.9687552702241823e-05, "loss": 1.0668, "step": 2410 }, { "epoch": 0.32241240973522334, "grad_norm": 1.1517561674118042, "learning_rate": 1.9687194525261408e-05, "loss": 0.9578, "step": 2411 }, { "epoch": 0.32254613533030224, "grad_norm": 1.372638463973999, "learning_rate": 1.9686836146360698e-05, "loss": 1.1175, "step": 2412 }, { "epoch": 0.32267986092538115, "grad_norm": 1.1384968757629395, "learning_rate": 1.9686477565547157e-05, "loss": 0.9554, "step": 2413 }, { "epoch": 0.32281358652046, "grad_norm": 1.1989781856536865, "learning_rate": 1.968611878282826e-05, "loss": 0.9901, "step": 2414 }, { "epoch": 0.3229473121155389, "grad_norm": 1.212981939315796, "learning_rate": 1.9685759798211488e-05, "loss": 1.0625, "step": 2415 }, { "epoch": 0.3230810377106178, "grad_norm": 1.1808278560638428, "learning_rate": 1.968540061170432e-05, "loss": 1.0136, "step": 2416 }, { "epoch": 0.3232147633056967, "grad_norm": 1.2867447137832642, "learning_rate": 1.968504122331424e-05, "loss": 0.9276, "step": 2417 }, { "epoch": 0.3233484889007756, "grad_norm": 1.2021349668502808, "learning_rate": 1.9684681633048748e-05, "loss": 1.0046, "step": 2418 }, { "epoch": 0.3234822144958545, "grad_norm": 1.2332921028137207, "learning_rate": 1.968432184091533e-05, "loss": 1.049, "step": 2419 }, { "epoch": 0.3236159400909334, "grad_norm": 1.0550178289413452, "learning_rate": 1.9683961846921495e-05, "loss": 0.9516, "step": 2420 }, { "epoch": 0.3237496656860123, "grad_norm": 1.1444095373153687, "learning_rate": 1.9683601651074743e-05, "loss": 1.045, "step": 2421 }, { "epoch": 0.3238833912810912, "grad_norm": 1.3518046140670776, "learning_rate": 1.9683241253382578e-05, "loss": 1.1205, "step": 2422 }, { "epoch": 0.3240171168761701, "grad_norm": 1.0830843448638916, "learning_rate": 1.968288065385251e-05, "loss": 1.0283, "step": 2423 }, { "epoch": 0.324150842471249, "grad_norm": 1.2061160802841187, "learning_rate": 1.9682519852492066e-05, "loss": 0.938, "step": 2424 }, { "epoch": 0.3242845680663279, "grad_norm": 1.095863699913025, "learning_rate": 1.968215884930876e-05, "loss": 0.8912, "step": 2425 }, { "epoch": 0.3244182936614068, "grad_norm": 1.141638159751892, "learning_rate": 1.9681797644310116e-05, "loss": 0.9213, "step": 2426 }, { "epoch": 0.3245520192564857, "grad_norm": 1.1542855501174927, "learning_rate": 1.9681436237503667e-05, "loss": 0.872, "step": 2427 }, { "epoch": 0.3246857448515646, "grad_norm": 1.1600905656814575, "learning_rate": 1.9681074628896945e-05, "loss": 1.0385, "step": 2428 }, { "epoch": 0.3248194704466435, "grad_norm": 1.1197657585144043, "learning_rate": 1.9680712818497484e-05, "loss": 0.9619, "step": 2429 }, { "epoch": 0.3249531960417224, "grad_norm": 1.2793159484863281, "learning_rate": 1.9680350806312826e-05, "loss": 1.0009, "step": 2430 }, { "epoch": 0.3250869216368013, "grad_norm": 1.1968907117843628, "learning_rate": 1.967998859235052e-05, "loss": 1.0387, "step": 2431 }, { "epoch": 0.32522064723188016, "grad_norm": 1.0916651487350464, "learning_rate": 1.9679626176618118e-05, "loss": 0.9038, "step": 2432 }, { "epoch": 0.32535437282695906, "grad_norm": 1.24396550655365, "learning_rate": 1.9679263559123164e-05, "loss": 1.0481, "step": 2433 }, { "epoch": 0.32548809842203796, "grad_norm": 1.1350520849227905, "learning_rate": 1.967890073987323e-05, "loss": 0.9202, "step": 2434 }, { "epoch": 0.32562182401711687, "grad_norm": 1.1618800163269043, "learning_rate": 1.9678537718875865e-05, "loss": 1.0256, "step": 2435 }, { "epoch": 0.32575554961219577, "grad_norm": 1.1498866081237793, "learning_rate": 1.9678174496138645e-05, "loss": 1.0106, "step": 2436 }, { "epoch": 0.3258892752072747, "grad_norm": 1.1057292222976685, "learning_rate": 1.967781107166914e-05, "loss": 0.9647, "step": 2437 }, { "epoch": 0.3260230008023536, "grad_norm": 1.298959732055664, "learning_rate": 1.9677447445474923e-05, "loss": 1.2417, "step": 2438 }, { "epoch": 0.3261567263974325, "grad_norm": 1.2618036270141602, "learning_rate": 1.967708361756358e-05, "loss": 0.9229, "step": 2439 }, { "epoch": 0.3262904519925114, "grad_norm": 1.0797914266586304, "learning_rate": 1.967671958794268e-05, "loss": 0.9972, "step": 2440 }, { "epoch": 0.3264241775875903, "grad_norm": 1.0676758289337158, "learning_rate": 1.9676355356619824e-05, "loss": 1.0198, "step": 2441 }, { "epoch": 0.32655790318266914, "grad_norm": 1.1564595699310303, "learning_rate": 1.96759909236026e-05, "loss": 0.941, "step": 2442 }, { "epoch": 0.32669162877774804, "grad_norm": 1.1766642332077026, "learning_rate": 1.9675626288898604e-05, "loss": 0.9186, "step": 2443 }, { "epoch": 0.32682535437282695, "grad_norm": 1.1774756908416748, "learning_rate": 1.9675261452515434e-05, "loss": 1.0513, "step": 2444 }, { "epoch": 0.32695907996790585, "grad_norm": 1.0944279432296753, "learning_rate": 1.96748964144607e-05, "loss": 1.0382, "step": 2445 }, { "epoch": 0.32709280556298476, "grad_norm": 1.17111074924469, "learning_rate": 1.9674531174742007e-05, "loss": 1.098, "step": 2446 }, { "epoch": 0.32722653115806366, "grad_norm": 1.1919169425964355, "learning_rate": 1.967416573336697e-05, "loss": 0.9892, "step": 2447 }, { "epoch": 0.32736025675314256, "grad_norm": 1.2979373931884766, "learning_rate": 1.9673800090343204e-05, "loss": 0.9587, "step": 2448 }, { "epoch": 0.32749398234822147, "grad_norm": 1.210742712020874, "learning_rate": 1.9673434245678335e-05, "loss": 1.0121, "step": 2449 }, { "epoch": 0.3276277079433004, "grad_norm": 1.227232813835144, "learning_rate": 1.9673068199379984e-05, "loss": 1.1142, "step": 2450 }, { "epoch": 0.3277614335383792, "grad_norm": 1.1151500940322876, "learning_rate": 1.967270195145578e-05, "loss": 1.0396, "step": 2451 }, { "epoch": 0.3278951591334581, "grad_norm": 1.2713627815246582, "learning_rate": 1.9672335501913365e-05, "loss": 1.0332, "step": 2452 }, { "epoch": 0.32802888472853703, "grad_norm": 1.1099375486373901, "learning_rate": 1.9671968850760366e-05, "loss": 1.1004, "step": 2453 }, { "epoch": 0.32816261032361593, "grad_norm": 1.2335171699523926, "learning_rate": 1.9671601998004436e-05, "loss": 1.1221, "step": 2454 }, { "epoch": 0.32829633591869484, "grad_norm": 1.2816839218139648, "learning_rate": 1.9671234943653215e-05, "loss": 1.1262, "step": 2455 }, { "epoch": 0.32843006151377374, "grad_norm": 1.1292667388916016, "learning_rate": 1.9670867687714356e-05, "loss": 0.9708, "step": 2456 }, { "epoch": 0.32856378710885265, "grad_norm": 1.2714191675186157, "learning_rate": 1.9670500230195512e-05, "loss": 0.8945, "step": 2457 }, { "epoch": 0.32869751270393155, "grad_norm": 1.2258857488632202, "learning_rate": 1.967013257110435e-05, "loss": 0.9633, "step": 2458 }, { "epoch": 0.32883123829901045, "grad_norm": 1.1638267040252686, "learning_rate": 1.9669764710448523e-05, "loss": 0.9807, "step": 2459 }, { "epoch": 0.3289649638940893, "grad_norm": 1.1591609716415405, "learning_rate": 1.9669396648235704e-05, "loss": 1.1655, "step": 2460 }, { "epoch": 0.3290986894891682, "grad_norm": 1.1324043273925781, "learning_rate": 1.9669028384473568e-05, "loss": 0.9203, "step": 2461 }, { "epoch": 0.3292324150842471, "grad_norm": 1.1558243036270142, "learning_rate": 1.9668659919169785e-05, "loss": 1.0153, "step": 2462 }, { "epoch": 0.329366140679326, "grad_norm": 1.1760532855987549, "learning_rate": 1.9668291252332038e-05, "loss": 0.8862, "step": 2463 }, { "epoch": 0.3294998662744049, "grad_norm": 1.3157655000686646, "learning_rate": 1.966792238396801e-05, "loss": 1.0399, "step": 2464 }, { "epoch": 0.3296335918694838, "grad_norm": 1.1519900560379028, "learning_rate": 1.966755331408539e-05, "loss": 1.06, "step": 2465 }, { "epoch": 0.3297673174645627, "grad_norm": 1.1726974248886108, "learning_rate": 1.9667184042691877e-05, "loss": 0.9835, "step": 2466 }, { "epoch": 0.32990104305964163, "grad_norm": 1.2968918085098267, "learning_rate": 1.966681456979516e-05, "loss": 0.9068, "step": 2467 }, { "epoch": 0.33003476865472053, "grad_norm": 1.1878401041030884, "learning_rate": 1.9666444895402942e-05, "loss": 0.9437, "step": 2468 }, { "epoch": 0.33016849424979944, "grad_norm": 1.1700770854949951, "learning_rate": 1.9666075019522933e-05, "loss": 1.0268, "step": 2469 }, { "epoch": 0.3303022198448783, "grad_norm": 1.2303813695907593, "learning_rate": 1.966570494216284e-05, "loss": 1.0115, "step": 2470 }, { "epoch": 0.3304359454399572, "grad_norm": 1.2742059230804443, "learning_rate": 1.9665334663330372e-05, "loss": 1.0371, "step": 2471 }, { "epoch": 0.3305696710350361, "grad_norm": 1.163232684135437, "learning_rate": 1.9664964183033256e-05, "loss": 1.0544, "step": 2472 }, { "epoch": 0.330703396630115, "grad_norm": 1.1946009397506714, "learning_rate": 1.966459350127921e-05, "loss": 1.1024, "step": 2473 }, { "epoch": 0.3308371222251939, "grad_norm": 1.2083193063735962, "learning_rate": 1.9664222618075958e-05, "loss": 0.9295, "step": 2474 }, { "epoch": 0.3309708478202728, "grad_norm": 1.2728837728500366, "learning_rate": 1.9663851533431236e-05, "loss": 1.1697, "step": 2475 }, { "epoch": 0.3311045734153517, "grad_norm": 1.3240692615509033, "learning_rate": 1.9663480247352775e-05, "loss": 0.9949, "step": 2476 }, { "epoch": 0.3312382990104306, "grad_norm": 1.1284722089767456, "learning_rate": 1.9663108759848314e-05, "loss": 0.956, "step": 2477 }, { "epoch": 0.3313720246055095, "grad_norm": 1.1202340126037598, "learning_rate": 1.96627370709256e-05, "loss": 0.9896, "step": 2478 }, { "epoch": 0.33150575020058837, "grad_norm": 1.4902220964431763, "learning_rate": 1.9662365180592372e-05, "loss": 1.0591, "step": 2479 }, { "epoch": 0.33163947579566727, "grad_norm": 1.1558111906051636, "learning_rate": 1.9661993088856395e-05, "loss": 0.9949, "step": 2480 }, { "epoch": 0.3317732013907462, "grad_norm": 1.227022647857666, "learning_rate": 1.9661620795725413e-05, "loss": 1.0558, "step": 2481 }, { "epoch": 0.3319069269858251, "grad_norm": 1.1978788375854492, "learning_rate": 1.966124830120719e-05, "loss": 1.125, "step": 2482 }, { "epoch": 0.332040652580904, "grad_norm": 1.2000869512557983, "learning_rate": 1.96608756053095e-05, "loss": 1.1443, "step": 2483 }, { "epoch": 0.3321743781759829, "grad_norm": 1.0709697008132935, "learning_rate": 1.9660502708040094e-05, "loss": 1.0164, "step": 2484 }, { "epoch": 0.3323081037710618, "grad_norm": 1.1124541759490967, "learning_rate": 1.9660129609406752e-05, "loss": 1.0067, "step": 2485 }, { "epoch": 0.3324418293661407, "grad_norm": 1.237353801727295, "learning_rate": 1.9659756309417254e-05, "loss": 0.9994, "step": 2486 }, { "epoch": 0.3325755549612196, "grad_norm": 1.1384249925613403, "learning_rate": 1.965938280807938e-05, "loss": 0.8429, "step": 2487 }, { "epoch": 0.33270928055629845, "grad_norm": 1.0440430641174316, "learning_rate": 1.9659009105400915e-05, "loss": 0.9322, "step": 2488 }, { "epoch": 0.33284300615137735, "grad_norm": 1.0262411832809448, "learning_rate": 1.9658635201389646e-05, "loss": 0.9499, "step": 2489 }, { "epoch": 0.33297673174645626, "grad_norm": 1.113940954208374, "learning_rate": 1.965826109605337e-05, "loss": 1.1036, "step": 2490 }, { "epoch": 0.33311045734153516, "grad_norm": 1.0630565881729126, "learning_rate": 1.9657886789399882e-05, "loss": 1.0036, "step": 2491 }, { "epoch": 0.33324418293661406, "grad_norm": 1.3706883192062378, "learning_rate": 1.965751228143699e-05, "loss": 0.979, "step": 2492 }, { "epoch": 0.33337790853169297, "grad_norm": 1.0768769979476929, "learning_rate": 1.965713757217249e-05, "loss": 0.9954, "step": 2493 }, { "epoch": 0.33351163412677187, "grad_norm": 1.0911844968795776, "learning_rate": 1.96567626616142e-05, "loss": 0.9375, "step": 2494 }, { "epoch": 0.3336453597218508, "grad_norm": 1.1118284463882446, "learning_rate": 1.9656387549769934e-05, "loss": 0.9625, "step": 2495 }, { "epoch": 0.3337790853169297, "grad_norm": 1.3816057443618774, "learning_rate": 1.965601223664751e-05, "loss": 0.9775, "step": 2496 }, { "epoch": 0.3339128109120086, "grad_norm": 1.3033983707427979, "learning_rate": 1.965563672225475e-05, "loss": 1.1202, "step": 2497 }, { "epoch": 0.33404653650708743, "grad_norm": 1.0809283256530762, "learning_rate": 1.9655261006599482e-05, "loss": 0.9068, "step": 2498 }, { "epoch": 0.33418026210216634, "grad_norm": 1.182268500328064, "learning_rate": 1.9654885089689537e-05, "loss": 0.9733, "step": 2499 }, { "epoch": 0.33431398769724524, "grad_norm": 1.1254799365997314, "learning_rate": 1.965450897153275e-05, "loss": 1.0003, "step": 2500 }, { "epoch": 0.33444771329232414, "grad_norm": 1.1354291439056396, "learning_rate": 1.9654132652136964e-05, "loss": 1.1529, "step": 2501 }, { "epoch": 0.33458143888740305, "grad_norm": 1.1071571111679077, "learning_rate": 1.965375613151002e-05, "loss": 0.9225, "step": 2502 }, { "epoch": 0.33471516448248195, "grad_norm": 1.3543483018875122, "learning_rate": 1.9653379409659767e-05, "loss": 1.06, "step": 2503 }, { "epoch": 0.33484889007756086, "grad_norm": 1.1036163568496704, "learning_rate": 1.9653002486594057e-05, "loss": 0.8874, "step": 2504 }, { "epoch": 0.33498261567263976, "grad_norm": 1.0290050506591797, "learning_rate": 1.9652625362320746e-05, "loss": 0.8567, "step": 2505 }, { "epoch": 0.33511634126771866, "grad_norm": 1.1527010202407837, "learning_rate": 1.9652248036847698e-05, "loss": 0.922, "step": 2506 }, { "epoch": 0.3352500668627975, "grad_norm": 1.2125111818313599, "learning_rate": 1.9651870510182776e-05, "loss": 1.1178, "step": 2507 }, { "epoch": 0.3353837924578764, "grad_norm": 1.2517215013504028, "learning_rate": 1.9651492782333848e-05, "loss": 1.0948, "step": 2508 }, { "epoch": 0.3355175180529553, "grad_norm": 1.2690868377685547, "learning_rate": 1.9651114853308788e-05, "loss": 0.9732, "step": 2509 }, { "epoch": 0.3356512436480342, "grad_norm": 1.1586898565292358, "learning_rate": 1.9650736723115476e-05, "loss": 1.0289, "step": 2510 }, { "epoch": 0.33578496924311313, "grad_norm": 1.2338892221450806, "learning_rate": 1.965035839176179e-05, "loss": 0.9628, "step": 2511 }, { "epoch": 0.33591869483819203, "grad_norm": 1.228184700012207, "learning_rate": 1.9649979859255618e-05, "loss": 0.9847, "step": 2512 }, { "epoch": 0.33605242043327094, "grad_norm": 1.3086342811584473, "learning_rate": 1.964960112560485e-05, "loss": 1.0722, "step": 2513 }, { "epoch": 0.33618614602834984, "grad_norm": 1.1865824460983276, "learning_rate": 1.9649222190817382e-05, "loss": 1.0829, "step": 2514 }, { "epoch": 0.33631987162342875, "grad_norm": 1.2394098043441772, "learning_rate": 1.9648843054901106e-05, "loss": 0.9169, "step": 2515 }, { "epoch": 0.33645359721850765, "grad_norm": 1.1646184921264648, "learning_rate": 1.9648463717863935e-05, "loss": 0.9327, "step": 2516 }, { "epoch": 0.3365873228135865, "grad_norm": 1.1969743967056274, "learning_rate": 1.9648084179713766e-05, "loss": 1.012, "step": 2517 }, { "epoch": 0.3367210484086654, "grad_norm": 1.1722489595413208, "learning_rate": 1.9647704440458518e-05, "loss": 0.995, "step": 2518 }, { "epoch": 0.3368547740037443, "grad_norm": 1.1746480464935303, "learning_rate": 1.96473245001061e-05, "loss": 1.0475, "step": 2519 }, { "epoch": 0.3369884995988232, "grad_norm": 1.1708028316497803, "learning_rate": 1.9646944358664436e-05, "loss": 1.099, "step": 2520 }, { "epoch": 0.3371222251939021, "grad_norm": 1.0921833515167236, "learning_rate": 1.9646564016141447e-05, "loss": 0.9723, "step": 2521 }, { "epoch": 0.337255950788981, "grad_norm": 1.1508148908615112, "learning_rate": 1.9646183472545063e-05, "loss": 1.0105, "step": 2522 }, { "epoch": 0.3373896763840599, "grad_norm": 1.2986013889312744, "learning_rate": 1.964580272788321e-05, "loss": 0.9449, "step": 2523 }, { "epoch": 0.3375234019791388, "grad_norm": 1.2493939399719238, "learning_rate": 1.9645421782163838e-05, "loss": 1.005, "step": 2524 }, { "epoch": 0.33765712757421773, "grad_norm": 1.093065857887268, "learning_rate": 1.9645040635394876e-05, "loss": 0.8448, "step": 2525 }, { "epoch": 0.3377908531692966, "grad_norm": 1.2449997663497925, "learning_rate": 1.9644659287584263e-05, "loss": 1.1083, "step": 2526 }, { "epoch": 0.3379245787643755, "grad_norm": 1.1653188467025757, "learning_rate": 1.9644277738739966e-05, "loss": 0.977, "step": 2527 }, { "epoch": 0.3380583043594544, "grad_norm": 1.2044494152069092, "learning_rate": 1.9643895988869922e-05, "loss": 1.032, "step": 2528 }, { "epoch": 0.3381920299545333, "grad_norm": 1.1300307512283325, "learning_rate": 1.96435140379821e-05, "loss": 0.9607, "step": 2529 }, { "epoch": 0.3383257555496122, "grad_norm": 1.1526036262512207, "learning_rate": 1.964313188608445e-05, "loss": 0.9449, "step": 2530 }, { "epoch": 0.3384594811446911, "grad_norm": 1.13448166847229, "learning_rate": 1.9642749533184945e-05, "loss": 0.9135, "step": 2531 }, { "epoch": 0.33859320673977, "grad_norm": 1.1744157075881958, "learning_rate": 1.9642366979291555e-05, "loss": 1.1695, "step": 2532 }, { "epoch": 0.3387269323348489, "grad_norm": 1.0801098346710205, "learning_rate": 1.964198422441225e-05, "loss": 0.9579, "step": 2533 }, { "epoch": 0.3388606579299278, "grad_norm": 1.310989260673523, "learning_rate": 1.964160126855501e-05, "loss": 1.1205, "step": 2534 }, { "epoch": 0.33899438352500666, "grad_norm": 1.3216352462768555, "learning_rate": 1.964121811172782e-05, "loss": 1.0463, "step": 2535 }, { "epoch": 0.33912810912008556, "grad_norm": 1.2654401063919067, "learning_rate": 1.9640834753938663e-05, "loss": 0.9809, "step": 2536 }, { "epoch": 0.33926183471516447, "grad_norm": 1.1328372955322266, "learning_rate": 1.9640451195195533e-05, "loss": 0.9372, "step": 2537 }, { "epoch": 0.33939556031024337, "grad_norm": 1.2147736549377441, "learning_rate": 1.9640067435506416e-05, "loss": 1.036, "step": 2538 }, { "epoch": 0.3395292859053223, "grad_norm": 1.2760734558105469, "learning_rate": 1.9639683474879326e-05, "loss": 1.0111, "step": 2539 }, { "epoch": 0.3396630115004012, "grad_norm": 1.22752046585083, "learning_rate": 1.963929931332225e-05, "loss": 1.0139, "step": 2540 }, { "epoch": 0.3397967370954801, "grad_norm": 1.0937491655349731, "learning_rate": 1.9638914950843212e-05, "loss": 0.956, "step": 2541 }, { "epoch": 0.339930462690559, "grad_norm": 1.2286529541015625, "learning_rate": 1.963853038745021e-05, "loss": 1.042, "step": 2542 }, { "epoch": 0.3400641882856379, "grad_norm": 1.168082594871521, "learning_rate": 1.9638145623151267e-05, "loss": 1.0048, "step": 2543 }, { "epoch": 0.3401979138807168, "grad_norm": 1.2270926237106323, "learning_rate": 1.96377606579544e-05, "loss": 1.097, "step": 2544 }, { "epoch": 0.34033163947579564, "grad_norm": 1.1742442846298218, "learning_rate": 1.9637375491867636e-05, "loss": 1.0339, "step": 2545 }, { "epoch": 0.34046536507087455, "grad_norm": 1.164702296257019, "learning_rate": 1.9636990124899e-05, "loss": 0.9833, "step": 2546 }, { "epoch": 0.34059909066595345, "grad_norm": 1.129084825515747, "learning_rate": 1.963660455705653e-05, "loss": 0.9439, "step": 2547 }, { "epoch": 0.34073281626103236, "grad_norm": 1.0737391710281372, "learning_rate": 1.9636218788348254e-05, "loss": 0.9155, "step": 2548 }, { "epoch": 0.34086654185611126, "grad_norm": 1.1754376888275146, "learning_rate": 1.963583281878222e-05, "loss": 1.0652, "step": 2549 }, { "epoch": 0.34100026745119016, "grad_norm": 1.1493417024612427, "learning_rate": 1.9635446648366473e-05, "loss": 0.988, "step": 2550 }, { "epoch": 0.34113399304626907, "grad_norm": 1.085188388824463, "learning_rate": 1.963506027710906e-05, "loss": 0.9255, "step": 2551 }, { "epoch": 0.341267718641348, "grad_norm": 1.1129672527313232, "learning_rate": 1.9634673705018034e-05, "loss": 1.0145, "step": 2552 }, { "epoch": 0.3414014442364269, "grad_norm": 1.2364767789840698, "learning_rate": 1.9634286932101457e-05, "loss": 0.9954, "step": 2553 }, { "epoch": 0.3415351698315057, "grad_norm": 1.079734444618225, "learning_rate": 1.9633899958367384e-05, "loss": 0.8759, "step": 2554 }, { "epoch": 0.34166889542658463, "grad_norm": 1.1786879301071167, "learning_rate": 1.9633512783823887e-05, "loss": 0.9272, "step": 2555 }, { "epoch": 0.34180262102166353, "grad_norm": 1.183010220527649, "learning_rate": 1.9633125408479035e-05, "loss": 0.9312, "step": 2556 }, { "epoch": 0.34193634661674244, "grad_norm": 1.05107843875885, "learning_rate": 1.9632737832340904e-05, "loss": 0.9726, "step": 2557 }, { "epoch": 0.34207007221182134, "grad_norm": 1.1555575132369995, "learning_rate": 1.9632350055417566e-05, "loss": 1.0098, "step": 2558 }, { "epoch": 0.34220379780690025, "grad_norm": 1.201690912246704, "learning_rate": 1.963196207771711e-05, "loss": 0.9987, "step": 2559 }, { "epoch": 0.34233752340197915, "grad_norm": 1.2961421012878418, "learning_rate": 1.963157389924762e-05, "loss": 1.1288, "step": 2560 }, { "epoch": 0.34247124899705805, "grad_norm": 1.1089577674865723, "learning_rate": 1.9631185520017187e-05, "loss": 1.0613, "step": 2561 }, { "epoch": 0.34260497459213696, "grad_norm": 1.1423362493515015, "learning_rate": 1.9630796940033913e-05, "loss": 1.0191, "step": 2562 }, { "epoch": 0.3427387001872158, "grad_norm": 1.1997482776641846, "learning_rate": 1.963040815930589e-05, "loss": 0.9508, "step": 2563 }, { "epoch": 0.3428724257822947, "grad_norm": 1.1286191940307617, "learning_rate": 1.9630019177841224e-05, "loss": 0.9615, "step": 2564 }, { "epoch": 0.3430061513773736, "grad_norm": 1.072165608406067, "learning_rate": 1.9629629995648024e-05, "loss": 1.0301, "step": 2565 }, { "epoch": 0.3431398769724525, "grad_norm": 1.2226704359054565, "learning_rate": 1.96292406127344e-05, "loss": 1.0236, "step": 2566 }, { "epoch": 0.3432736025675314, "grad_norm": 1.1634501218795776, "learning_rate": 1.962885102910847e-05, "loss": 0.9152, "step": 2567 }, { "epoch": 0.3434073281626103, "grad_norm": 1.1952215433120728, "learning_rate": 1.9628461244778356e-05, "loss": 0.9922, "step": 2568 }, { "epoch": 0.34354105375768923, "grad_norm": 1.2677711248397827, "learning_rate": 1.9628071259752177e-05, "loss": 0.9343, "step": 2569 }, { "epoch": 0.34367477935276813, "grad_norm": 1.1028345823287964, "learning_rate": 1.962768107403807e-05, "loss": 0.9223, "step": 2570 }, { "epoch": 0.34380850494784704, "grad_norm": 1.1565215587615967, "learning_rate": 1.962729068764416e-05, "loss": 1.0954, "step": 2571 }, { "epoch": 0.34394223054292594, "grad_norm": 1.2226780652999878, "learning_rate": 1.962690010057859e-05, "loss": 1.1138, "step": 2572 }, { "epoch": 0.3440759561380048, "grad_norm": 1.1678746938705444, "learning_rate": 1.96265093128495e-05, "loss": 1.0102, "step": 2573 }, { "epoch": 0.3442096817330837, "grad_norm": 1.349263072013855, "learning_rate": 1.9626118324465035e-05, "loss": 1.0013, "step": 2574 }, { "epoch": 0.3443434073281626, "grad_norm": 1.0769171714782715, "learning_rate": 1.9625727135433343e-05, "loss": 0.9626, "step": 2575 }, { "epoch": 0.3444771329232415, "grad_norm": 1.0992207527160645, "learning_rate": 1.9625335745762578e-05, "loss": 1.0471, "step": 2576 }, { "epoch": 0.3446108585183204, "grad_norm": 1.2378076314926147, "learning_rate": 1.96249441554609e-05, "loss": 1.0813, "step": 2577 }, { "epoch": 0.3447445841133993, "grad_norm": 1.1264938116073608, "learning_rate": 1.9624552364536472e-05, "loss": 0.9162, "step": 2578 }, { "epoch": 0.3448783097084782, "grad_norm": 1.243513822555542, "learning_rate": 1.962416037299746e-05, "loss": 1.1321, "step": 2579 }, { "epoch": 0.3450120353035571, "grad_norm": 1.0973551273345947, "learning_rate": 1.962376818085204e-05, "loss": 0.9682, "step": 2580 }, { "epoch": 0.345145760898636, "grad_norm": 1.0493675470352173, "learning_rate": 1.9623375788108373e-05, "loss": 0.9831, "step": 2581 }, { "epoch": 0.34527948649371487, "grad_norm": 1.1050320863723755, "learning_rate": 1.9622983194774652e-05, "loss": 0.9248, "step": 2582 }, { "epoch": 0.3454132120887938, "grad_norm": 1.0662256479263306, "learning_rate": 1.962259040085905e-05, "loss": 0.9449, "step": 2583 }, { "epoch": 0.3455469376838727, "grad_norm": 1.118995189666748, "learning_rate": 1.9622197406369764e-05, "loss": 1.0101, "step": 2584 }, { "epoch": 0.3456806632789516, "grad_norm": 1.1912171840667725, "learning_rate": 1.9621804211314974e-05, "loss": 1.0218, "step": 2585 }, { "epoch": 0.3458143888740305, "grad_norm": 1.166723370552063, "learning_rate": 1.9621410815702888e-05, "loss": 1.0849, "step": 2586 }, { "epoch": 0.3459481144691094, "grad_norm": 1.1717168092727661, "learning_rate": 1.9621017219541694e-05, "loss": 1.0346, "step": 2587 }, { "epoch": 0.3460818400641883, "grad_norm": 1.158998727798462, "learning_rate": 1.962062342283961e-05, "loss": 0.9908, "step": 2588 }, { "epoch": 0.3462155656592672, "grad_norm": 1.2118558883666992, "learning_rate": 1.962022942560483e-05, "loss": 1.0466, "step": 2589 }, { "epoch": 0.3463492912543461, "grad_norm": 1.2053078413009644, "learning_rate": 1.9619835227845582e-05, "loss": 0.9992, "step": 2590 }, { "epoch": 0.346483016849425, "grad_norm": 1.1855584383010864, "learning_rate": 1.9619440829570065e-05, "loss": 0.9243, "step": 2591 }, { "epoch": 0.34661674244450386, "grad_norm": 1.1357593536376953, "learning_rate": 1.9619046230786512e-05, "loss": 0.8814, "step": 2592 }, { "epoch": 0.34675046803958276, "grad_norm": 1.271559715270996, "learning_rate": 1.9618651431503146e-05, "loss": 1.0791, "step": 2593 }, { "epoch": 0.34688419363466166, "grad_norm": 1.1946696043014526, "learning_rate": 1.961825643172819e-05, "loss": 1.0079, "step": 2594 }, { "epoch": 0.34701791922974057, "grad_norm": 1.1071274280548096, "learning_rate": 1.9617861231469887e-05, "loss": 0.9431, "step": 2595 }, { "epoch": 0.34715164482481947, "grad_norm": 1.2470589876174927, "learning_rate": 1.961746583073647e-05, "loss": 1.0815, "step": 2596 }, { "epoch": 0.3472853704198984, "grad_norm": 1.1656633615493774, "learning_rate": 1.9617070229536178e-05, "loss": 1.0213, "step": 2597 }, { "epoch": 0.3474190960149773, "grad_norm": 1.1932566165924072, "learning_rate": 1.9616674427877264e-05, "loss": 0.9887, "step": 2598 }, { "epoch": 0.3475528216100562, "grad_norm": 1.1705557107925415, "learning_rate": 1.961627842576797e-05, "loss": 0.9304, "step": 2599 }, { "epoch": 0.3476865472051351, "grad_norm": 1.2132103443145752, "learning_rate": 1.9615882223216553e-05, "loss": 1.0532, "step": 2600 }, { "epoch": 0.34782027280021394, "grad_norm": 1.261538028717041, "learning_rate": 1.9615485820231278e-05, "loss": 0.9883, "step": 2601 }, { "epoch": 0.34795399839529284, "grad_norm": 1.2422410249710083, "learning_rate": 1.9615089216820395e-05, "loss": 1.0481, "step": 2602 }, { "epoch": 0.34808772399037174, "grad_norm": 1.1227924823760986, "learning_rate": 1.9614692412992183e-05, "loss": 1.0819, "step": 2603 }, { "epoch": 0.34822144958545065, "grad_norm": 1.2238742113113403, "learning_rate": 1.9614295408754908e-05, "loss": 1.1976, "step": 2604 }, { "epoch": 0.34835517518052955, "grad_norm": 1.1077107191085815, "learning_rate": 1.961389820411684e-05, "loss": 1.0073, "step": 2605 }, { "epoch": 0.34848890077560846, "grad_norm": 1.2013999223709106, "learning_rate": 1.9613500799086266e-05, "loss": 1.0746, "step": 2606 }, { "epoch": 0.34862262637068736, "grad_norm": 1.076201319694519, "learning_rate": 1.9613103193671466e-05, "loss": 0.9325, "step": 2607 }, { "epoch": 0.34875635196576626, "grad_norm": 1.078354001045227, "learning_rate": 1.9612705387880733e-05, "loss": 1.0074, "step": 2608 }, { "epoch": 0.34889007756084517, "grad_norm": 1.1448390483856201, "learning_rate": 1.961230738172235e-05, "loss": 0.9253, "step": 2609 }, { "epoch": 0.349023803155924, "grad_norm": 1.0853244066238403, "learning_rate": 1.961190917520462e-05, "loss": 1.0108, "step": 2610 }, { "epoch": 0.3491575287510029, "grad_norm": 1.1311365365982056, "learning_rate": 1.9611510768335842e-05, "loss": 1.0537, "step": 2611 }, { "epoch": 0.3492912543460818, "grad_norm": 1.0610649585723877, "learning_rate": 1.961111216112432e-05, "loss": 0.877, "step": 2612 }, { "epoch": 0.34942497994116073, "grad_norm": 1.1435920000076294, "learning_rate": 1.9610713353578356e-05, "loss": 0.9543, "step": 2613 }, { "epoch": 0.34955870553623963, "grad_norm": 3.3594019412994385, "learning_rate": 1.9610314345706275e-05, "loss": 0.9889, "step": 2614 }, { "epoch": 0.34969243113131854, "grad_norm": 1.2156792879104614, "learning_rate": 1.9609915137516383e-05, "loss": 1.0147, "step": 2615 }, { "epoch": 0.34982615672639744, "grad_norm": 1.363714575767517, "learning_rate": 1.9609515729017006e-05, "loss": 1.1006, "step": 2616 }, { "epoch": 0.34995988232147635, "grad_norm": 1.108022689819336, "learning_rate": 1.960911612021647e-05, "loss": 1.0501, "step": 2617 }, { "epoch": 0.35009360791655525, "grad_norm": 1.1953414678573608, "learning_rate": 1.9608716311123107e-05, "loss": 1.0165, "step": 2618 }, { "epoch": 0.35022733351163415, "grad_norm": 1.0880476236343384, "learning_rate": 1.9608316301745242e-05, "loss": 0.9524, "step": 2619 }, { "epoch": 0.350361059106713, "grad_norm": 1.113537073135376, "learning_rate": 1.960791609209122e-05, "loss": 1.0349, "step": 2620 }, { "epoch": 0.3504947847017919, "grad_norm": 1.159740924835205, "learning_rate": 1.9607515682169378e-05, "loss": 0.9616, "step": 2621 }, { "epoch": 0.3506285102968708, "grad_norm": 1.044344425201416, "learning_rate": 1.9607115071988068e-05, "loss": 0.7935, "step": 2622 }, { "epoch": 0.3507622358919497, "grad_norm": 1.2492702007293701, "learning_rate": 1.9606714261555637e-05, "loss": 1.098, "step": 2623 }, { "epoch": 0.3508959614870286, "grad_norm": 1.1514935493469238, "learning_rate": 1.960631325088044e-05, "loss": 0.9106, "step": 2624 }, { "epoch": 0.3510296870821075, "grad_norm": 1.0382087230682373, "learning_rate": 1.9605912039970835e-05, "loss": 0.9279, "step": 2625 }, { "epoch": 0.3511634126771864, "grad_norm": 1.158911943435669, "learning_rate": 1.9605510628835184e-05, "loss": 1.1021, "step": 2626 }, { "epoch": 0.35129713827226533, "grad_norm": 1.0473262071609497, "learning_rate": 1.960510901748186e-05, "loss": 0.9501, "step": 2627 }, { "epoch": 0.35143086386734423, "grad_norm": 1.1491297483444214, "learning_rate": 1.9604707205919223e-05, "loss": 1.0231, "step": 2628 }, { "epoch": 0.3515645894624231, "grad_norm": 1.1306887865066528, "learning_rate": 1.960430519415566e-05, "loss": 0.9688, "step": 2629 }, { "epoch": 0.351698315057502, "grad_norm": 1.2194674015045166, "learning_rate": 1.9603902982199544e-05, "loss": 0.9622, "step": 2630 }, { "epoch": 0.3518320406525809, "grad_norm": 1.2383387088775635, "learning_rate": 1.9603500570059258e-05, "loss": 1.1039, "step": 2631 }, { "epoch": 0.3519657662476598, "grad_norm": 1.1345744132995605, "learning_rate": 1.9603097957743197e-05, "loss": 0.9986, "step": 2632 }, { "epoch": 0.3520994918427387, "grad_norm": 1.085554599761963, "learning_rate": 1.9602695145259744e-05, "loss": 0.86, "step": 2633 }, { "epoch": 0.3522332174378176, "grad_norm": 1.1948943138122559, "learning_rate": 1.96022921326173e-05, "loss": 1.0867, "step": 2634 }, { "epoch": 0.3523669430328965, "grad_norm": 1.3336191177368164, "learning_rate": 1.960188891982427e-05, "loss": 0.9797, "step": 2635 }, { "epoch": 0.3525006686279754, "grad_norm": 1.1102896928787231, "learning_rate": 1.9601485506889047e-05, "loss": 0.8849, "step": 2636 }, { "epoch": 0.3526343942230543, "grad_norm": 1.0755975246429443, "learning_rate": 1.9601081893820048e-05, "loss": 0.9583, "step": 2637 }, { "epoch": 0.35276811981813316, "grad_norm": 1.2134389877319336, "learning_rate": 1.9600678080625685e-05, "loss": 0.9901, "step": 2638 }, { "epoch": 0.35290184541321207, "grad_norm": 1.1847506761550903, "learning_rate": 1.9600274067314374e-05, "loss": 1.0353, "step": 2639 }, { "epoch": 0.35303557100829097, "grad_norm": 1.3278470039367676, "learning_rate": 1.959986985389454e-05, "loss": 0.9557, "step": 2640 }, { "epoch": 0.3531692966033699, "grad_norm": 1.1818082332611084, "learning_rate": 1.95994654403746e-05, "loss": 1.133, "step": 2641 }, { "epoch": 0.3533030221984488, "grad_norm": 1.100904107093811, "learning_rate": 1.959906082676299e-05, "loss": 0.9336, "step": 2642 }, { "epoch": 0.3534367477935277, "grad_norm": 1.0586740970611572, "learning_rate": 1.9598656013068145e-05, "loss": 0.8484, "step": 2643 }, { "epoch": 0.3535704733886066, "grad_norm": 1.056347131729126, "learning_rate": 1.9598250999298495e-05, "loss": 0.9348, "step": 2644 }, { "epoch": 0.3537041989836855, "grad_norm": 1.1483207941055298, "learning_rate": 1.9597845785462492e-05, "loss": 0.9324, "step": 2645 }, { "epoch": 0.3538379245787644, "grad_norm": 1.149651288986206, "learning_rate": 1.9597440371568576e-05, "loss": 1.0206, "step": 2646 }, { "epoch": 0.3539716501738433, "grad_norm": 1.1656427383422852, "learning_rate": 1.95970347576252e-05, "loss": 0.9694, "step": 2647 }, { "epoch": 0.35410537576892215, "grad_norm": 1.1961395740509033, "learning_rate": 1.9596628943640817e-05, "loss": 0.999, "step": 2648 }, { "epoch": 0.35423910136400105, "grad_norm": 1.1476325988769531, "learning_rate": 1.9596222929623888e-05, "loss": 1.0927, "step": 2649 }, { "epoch": 0.35437282695907996, "grad_norm": 1.179354190826416, "learning_rate": 1.9595816715582873e-05, "loss": 0.9684, "step": 2650 }, { "epoch": 0.35450655255415886, "grad_norm": 1.2051736116409302, "learning_rate": 1.959541030152624e-05, "loss": 1.045, "step": 2651 }, { "epoch": 0.35464027814923776, "grad_norm": 1.286818504333496, "learning_rate": 1.9595003687462463e-05, "loss": 1.0269, "step": 2652 }, { "epoch": 0.35477400374431667, "grad_norm": 1.108031988143921, "learning_rate": 1.9594596873400015e-05, "loss": 1.0408, "step": 2653 }, { "epoch": 0.3549077293393956, "grad_norm": 1.1158322095870972, "learning_rate": 1.9594189859347376e-05, "loss": 0.9333, "step": 2654 }, { "epoch": 0.3550414549344745, "grad_norm": 1.1174850463867188, "learning_rate": 1.959378264531303e-05, "loss": 0.8743, "step": 2655 }, { "epoch": 0.3551751805295534, "grad_norm": 1.0827534198760986, "learning_rate": 1.9593375231305466e-05, "loss": 0.8946, "step": 2656 }, { "epoch": 0.35530890612463223, "grad_norm": 1.028654932975769, "learning_rate": 1.959296761733317e-05, "loss": 0.893, "step": 2657 }, { "epoch": 0.35544263171971113, "grad_norm": 1.186279296875, "learning_rate": 1.9592559803404652e-05, "loss": 0.9932, "step": 2658 }, { "epoch": 0.35557635731479004, "grad_norm": 1.1797289848327637, "learning_rate": 1.9592151789528397e-05, "loss": 1.0447, "step": 2659 }, { "epoch": 0.35571008290986894, "grad_norm": 1.1956654787063599, "learning_rate": 1.959174357571292e-05, "loss": 1.0292, "step": 2660 }, { "epoch": 0.35584380850494784, "grad_norm": 1.1413626670837402, "learning_rate": 1.9591335161966725e-05, "loss": 1.0862, "step": 2661 }, { "epoch": 0.35597753410002675, "grad_norm": 1.0182641744613647, "learning_rate": 1.959092654829833e-05, "loss": 0.9734, "step": 2662 }, { "epoch": 0.35611125969510565, "grad_norm": 1.2872415781021118, "learning_rate": 1.9590517734716244e-05, "loss": 1.0722, "step": 2663 }, { "epoch": 0.35624498529018456, "grad_norm": 1.2341710329055786, "learning_rate": 1.9590108721228994e-05, "loss": 1.0597, "step": 2664 }, { "epoch": 0.35637871088526346, "grad_norm": 1.129207968711853, "learning_rate": 1.9589699507845106e-05, "loss": 1.1569, "step": 2665 }, { "epoch": 0.35651243648034237, "grad_norm": 1.3279445171356201, "learning_rate": 1.958929009457311e-05, "loss": 1.0741, "step": 2666 }, { "epoch": 0.3566461620754212, "grad_norm": 1.0417251586914062, "learning_rate": 1.9588880481421537e-05, "loss": 0.9052, "step": 2667 }, { "epoch": 0.3567798876705001, "grad_norm": 1.086450457572937, "learning_rate": 1.958847066839892e-05, "loss": 0.9439, "step": 2668 }, { "epoch": 0.356913613265579, "grad_norm": 1.0320312976837158, "learning_rate": 1.9588060655513814e-05, "loss": 1.0193, "step": 2669 }, { "epoch": 0.3570473388606579, "grad_norm": 1.278128981590271, "learning_rate": 1.9587650442774756e-05, "loss": 1.041, "step": 2670 }, { "epoch": 0.35718106445573683, "grad_norm": 1.1445777416229248, "learning_rate": 1.9587240030190298e-05, "loss": 0.9989, "step": 2671 }, { "epoch": 0.35731479005081573, "grad_norm": 1.210056185722351, "learning_rate": 1.9586829417768995e-05, "loss": 0.9621, "step": 2672 }, { "epoch": 0.35744851564589464, "grad_norm": 1.16221284866333, "learning_rate": 1.9586418605519407e-05, "loss": 0.9983, "step": 2673 }, { "epoch": 0.35758224124097354, "grad_norm": 1.1673552989959717, "learning_rate": 1.9586007593450098e-05, "loss": 0.9709, "step": 2674 }, { "epoch": 0.35771596683605245, "grad_norm": 1.076001763343811, "learning_rate": 1.958559638156963e-05, "loss": 0.9639, "step": 2675 }, { "epoch": 0.3578496924311313, "grad_norm": 1.1253043413162231, "learning_rate": 1.9585184969886585e-05, "loss": 0.993, "step": 2676 }, { "epoch": 0.3579834180262102, "grad_norm": 1.1168324947357178, "learning_rate": 1.9584773358409525e-05, "loss": 1.0439, "step": 2677 }, { "epoch": 0.3581171436212891, "grad_norm": 1.2184659242630005, "learning_rate": 1.9584361547147036e-05, "loss": 1.0543, "step": 2678 }, { "epoch": 0.358250869216368, "grad_norm": 1.2141841650009155, "learning_rate": 1.9583949536107706e-05, "loss": 1.0282, "step": 2679 }, { "epoch": 0.3583845948114469, "grad_norm": 1.1196305751800537, "learning_rate": 1.9583537325300118e-05, "loss": 0.9203, "step": 2680 }, { "epoch": 0.3585183204065258, "grad_norm": 1.2981374263763428, "learning_rate": 1.958312491473286e-05, "loss": 0.9521, "step": 2681 }, { "epoch": 0.3586520460016047, "grad_norm": 1.1890085935592651, "learning_rate": 1.9582712304414538e-05, "loss": 0.9953, "step": 2682 }, { "epoch": 0.3587857715966836, "grad_norm": 1.1681146621704102, "learning_rate": 1.958229949435375e-05, "loss": 1.0206, "step": 2683 }, { "epoch": 0.3589194971917625, "grad_norm": 1.2158714532852173, "learning_rate": 1.958188648455909e-05, "loss": 1.0129, "step": 2684 }, { "epoch": 0.3590532227868414, "grad_norm": 1.080311894416809, "learning_rate": 1.958147327503918e-05, "loss": 0.8771, "step": 2685 }, { "epoch": 0.3591869483819203, "grad_norm": 1.1317156553268433, "learning_rate": 1.9581059865802627e-05, "loss": 1.0446, "step": 2686 }, { "epoch": 0.3593206739769992, "grad_norm": 1.175309419631958, "learning_rate": 1.9580646256858048e-05, "loss": 0.9078, "step": 2687 }, { "epoch": 0.3594543995720781, "grad_norm": 1.1587311029434204, "learning_rate": 1.9580232448214067e-05, "loss": 0.946, "step": 2688 }, { "epoch": 0.359588125167157, "grad_norm": 1.2271808385849, "learning_rate": 1.957981843987931e-05, "loss": 1.1262, "step": 2689 }, { "epoch": 0.3597218507622359, "grad_norm": 1.142259120941162, "learning_rate": 1.9579404231862403e-05, "loss": 0.9796, "step": 2690 }, { "epoch": 0.3598555763573148, "grad_norm": 1.247002124786377, "learning_rate": 1.9578989824171982e-05, "loss": 0.9748, "step": 2691 }, { "epoch": 0.3599893019523937, "grad_norm": 1.1332519054412842, "learning_rate": 1.957857521681668e-05, "loss": 0.9902, "step": 2692 }, { "epoch": 0.3601230275474726, "grad_norm": 1.2020732164382935, "learning_rate": 1.957816040980515e-05, "loss": 0.9782, "step": 2693 }, { "epoch": 0.3602567531425515, "grad_norm": 1.2204875946044922, "learning_rate": 1.9577745403146026e-05, "loss": 0.9771, "step": 2694 }, { "epoch": 0.36039047873763036, "grad_norm": 1.0782824754714966, "learning_rate": 1.9577330196847965e-05, "loss": 1.038, "step": 2695 }, { "epoch": 0.36052420433270926, "grad_norm": 1.1685690879821777, "learning_rate": 1.9576914790919624e-05, "loss": 1.0298, "step": 2696 }, { "epoch": 0.36065792992778817, "grad_norm": 1.0536532402038574, "learning_rate": 1.9576499185369652e-05, "loss": 0.9098, "step": 2697 }, { "epoch": 0.36079165552286707, "grad_norm": 1.263819932937622, "learning_rate": 1.9576083380206724e-05, "loss": 0.9821, "step": 2698 }, { "epoch": 0.360925381117946, "grad_norm": 1.3008877038955688, "learning_rate": 1.95756673754395e-05, "loss": 1.0891, "step": 2699 }, { "epoch": 0.3610591067130249, "grad_norm": 1.2156957387924194, "learning_rate": 1.9575251171076652e-05, "loss": 0.926, "step": 2700 }, { "epoch": 0.3611928323081038, "grad_norm": 1.1306465864181519, "learning_rate": 1.9574834767126855e-05, "loss": 1.0309, "step": 2701 }, { "epoch": 0.3613265579031827, "grad_norm": 1.0821365118026733, "learning_rate": 1.957441816359879e-05, "loss": 0.9483, "step": 2702 }, { "epoch": 0.3614602834982616, "grad_norm": 1.6093029975891113, "learning_rate": 1.957400136050114e-05, "loss": 1.0879, "step": 2703 }, { "epoch": 0.36159400909334044, "grad_norm": 1.121168851852417, "learning_rate": 1.9573584357842592e-05, "loss": 1.092, "step": 2704 }, { "epoch": 0.36172773468841934, "grad_norm": 1.1248654127120972, "learning_rate": 1.957316715563184e-05, "loss": 1.0473, "step": 2705 }, { "epoch": 0.36186146028349825, "grad_norm": 1.2085644006729126, "learning_rate": 1.957274975387758e-05, "loss": 1.0133, "step": 2706 }, { "epoch": 0.36199518587857715, "grad_norm": 1.1050665378570557, "learning_rate": 1.9572332152588513e-05, "loss": 0.9706, "step": 2707 }, { "epoch": 0.36212891147365606, "grad_norm": 1.1249905824661255, "learning_rate": 1.957191435177334e-05, "loss": 0.9016, "step": 2708 }, { "epoch": 0.36226263706873496, "grad_norm": 1.1558479070663452, "learning_rate": 1.957149635144077e-05, "loss": 0.9942, "step": 2709 }, { "epoch": 0.36239636266381386, "grad_norm": 1.2220560312271118, "learning_rate": 1.9571078151599517e-05, "loss": 1.0187, "step": 2710 }, { "epoch": 0.36253008825889277, "grad_norm": 1.073351263999939, "learning_rate": 1.9570659752258302e-05, "loss": 0.922, "step": 2711 }, { "epoch": 0.3626638138539717, "grad_norm": 1.1340545415878296, "learning_rate": 1.9570241153425842e-05, "loss": 1.0319, "step": 2712 }, { "epoch": 0.3627975394490505, "grad_norm": 1.2663789987564087, "learning_rate": 1.956982235511086e-05, "loss": 0.9037, "step": 2713 }, { "epoch": 0.3629312650441294, "grad_norm": 1.3487099409103394, "learning_rate": 1.956940335732209e-05, "loss": 1.0822, "step": 2714 }, { "epoch": 0.36306499063920833, "grad_norm": 1.1533018350601196, "learning_rate": 1.9568984160068263e-05, "loss": 0.9797, "step": 2715 }, { "epoch": 0.36319871623428723, "grad_norm": 1.0488159656524658, "learning_rate": 1.956856476335812e-05, "loss": 0.9819, "step": 2716 }, { "epoch": 0.36333244182936614, "grad_norm": 1.123511552810669, "learning_rate": 1.9568145167200397e-05, "loss": 1.0159, "step": 2717 }, { "epoch": 0.36346616742444504, "grad_norm": 1.1428534984588623, "learning_rate": 1.9567725371603848e-05, "loss": 0.9759, "step": 2718 }, { "epoch": 0.36359989301952395, "grad_norm": 1.1470234394073486, "learning_rate": 1.956730537657722e-05, "loss": 0.9382, "step": 2719 }, { "epoch": 0.36373361861460285, "grad_norm": 1.197984218597412, "learning_rate": 1.956688518212926e-05, "loss": 1.0164, "step": 2720 }, { "epoch": 0.36386734420968175, "grad_norm": 1.1778687238693237, "learning_rate": 1.9566464788268737e-05, "loss": 0.9922, "step": 2721 }, { "epoch": 0.36400106980476066, "grad_norm": 1.0675179958343506, "learning_rate": 1.956604419500441e-05, "loss": 0.9117, "step": 2722 }, { "epoch": 0.3641347953998395, "grad_norm": 1.2712956666946411, "learning_rate": 1.9565623402345045e-05, "loss": 0.9949, "step": 2723 }, { "epoch": 0.3642685209949184, "grad_norm": 1.1661655902862549, "learning_rate": 1.9565202410299415e-05, "loss": 0.9704, "step": 2724 }, { "epoch": 0.3644022465899973, "grad_norm": 1.1678333282470703, "learning_rate": 1.956478121887629e-05, "loss": 1.0302, "step": 2725 }, { "epoch": 0.3645359721850762, "grad_norm": 1.1373299360275269, "learning_rate": 1.9564359828084454e-05, "loss": 0.9866, "step": 2726 }, { "epoch": 0.3646696977801551, "grad_norm": 1.1422022581100464, "learning_rate": 1.9563938237932688e-05, "loss": 1.0216, "step": 2727 }, { "epoch": 0.364803423375234, "grad_norm": 1.2675966024398804, "learning_rate": 1.9563516448429783e-05, "loss": 1.0579, "step": 2728 }, { "epoch": 0.36493714897031293, "grad_norm": 1.1172945499420166, "learning_rate": 1.9563094459584532e-05, "loss": 0.9668, "step": 2729 }, { "epoch": 0.36507087456539183, "grad_norm": 1.0568033456802368, "learning_rate": 1.9562672271405723e-05, "loss": 1.0171, "step": 2730 }, { "epoch": 0.36520460016047074, "grad_norm": 1.3010711669921875, "learning_rate": 1.956224988390216e-05, "loss": 1.1311, "step": 2731 }, { "epoch": 0.3653383257555496, "grad_norm": 1.1235120296478271, "learning_rate": 1.9561827297082658e-05, "loss": 0.9701, "step": 2732 }, { "epoch": 0.3654720513506285, "grad_norm": 1.2145131826400757, "learning_rate": 1.9561404510956006e-05, "loss": 1.0043, "step": 2733 }, { "epoch": 0.3656057769457074, "grad_norm": 1.1204999685287476, "learning_rate": 1.9560981525531027e-05, "loss": 1.0026, "step": 2734 }, { "epoch": 0.3657395025407863, "grad_norm": 1.080398440361023, "learning_rate": 1.956055834081654e-05, "loss": 0.9443, "step": 2735 }, { "epoch": 0.3658732281358652, "grad_norm": 1.1875252723693848, "learning_rate": 1.9560134956821362e-05, "loss": 0.9752, "step": 2736 }, { "epoch": 0.3660069537309441, "grad_norm": 1.1252415180206299, "learning_rate": 1.955971137355432e-05, "loss": 1.0312, "step": 2737 }, { "epoch": 0.366140679326023, "grad_norm": 1.080429196357727, "learning_rate": 1.9559287591024237e-05, "loss": 0.9836, "step": 2738 }, { "epoch": 0.3662744049211019, "grad_norm": 1.1133793592453003, "learning_rate": 1.955886360923996e-05, "loss": 0.9282, "step": 2739 }, { "epoch": 0.3664081305161808, "grad_norm": 1.1525962352752686, "learning_rate": 1.9558439428210312e-05, "loss": 0.9629, "step": 2740 }, { "epoch": 0.36654185611125967, "grad_norm": 1.160780906677246, "learning_rate": 1.955801504794414e-05, "loss": 1.0303, "step": 2741 }, { "epoch": 0.36667558170633857, "grad_norm": 1.1687520742416382, "learning_rate": 1.9557590468450294e-05, "loss": 0.9728, "step": 2742 }, { "epoch": 0.3668093073014175, "grad_norm": 1.0965487957000732, "learning_rate": 1.955716568973762e-05, "loss": 0.8744, "step": 2743 }, { "epoch": 0.3669430328964964, "grad_norm": 1.1608115434646606, "learning_rate": 1.955674071181497e-05, "loss": 0.9731, "step": 2744 }, { "epoch": 0.3670767584915753, "grad_norm": 0.9959310293197632, "learning_rate": 1.9556315534691204e-05, "loss": 0.9334, "step": 2745 }, { "epoch": 0.3672104840866542, "grad_norm": 0.9976779818534851, "learning_rate": 1.9555890158375188e-05, "loss": 0.9926, "step": 2746 }, { "epoch": 0.3673442096817331, "grad_norm": 1.0713155269622803, "learning_rate": 1.9555464582875783e-05, "loss": 1.0225, "step": 2747 }, { "epoch": 0.367477935276812, "grad_norm": 1.1484497785568237, "learning_rate": 1.9555038808201866e-05, "loss": 0.9535, "step": 2748 }, { "epoch": 0.3676116608718909, "grad_norm": 1.1695374250411987, "learning_rate": 1.9554612834362304e-05, "loss": 0.979, "step": 2749 }, { "epoch": 0.3677453864669698, "grad_norm": 1.320141077041626, "learning_rate": 1.955418666136598e-05, "loss": 1.0258, "step": 2750 }, { "epoch": 0.36787911206204865, "grad_norm": 1.0721712112426758, "learning_rate": 1.955376028922178e-05, "loss": 0.9083, "step": 2751 }, { "epoch": 0.36801283765712756, "grad_norm": 1.1393400430679321, "learning_rate": 1.955333371793859e-05, "loss": 0.9897, "step": 2752 }, { "epoch": 0.36814656325220646, "grad_norm": 1.088148593902588, "learning_rate": 1.9552906947525295e-05, "loss": 0.9625, "step": 2753 }, { "epoch": 0.36828028884728536, "grad_norm": 1.153430461883545, "learning_rate": 1.9552479977990802e-05, "loss": 1.0234, "step": 2754 }, { "epoch": 0.36841401444236427, "grad_norm": 1.0644422769546509, "learning_rate": 1.9552052809344004e-05, "loss": 0.9467, "step": 2755 }, { "epoch": 0.36854774003744317, "grad_norm": 1.189340353012085, "learning_rate": 1.95516254415938e-05, "loss": 1.0651, "step": 2756 }, { "epoch": 0.3686814656325221, "grad_norm": 1.1223186254501343, "learning_rate": 1.9551197874749107e-05, "loss": 1.0251, "step": 2757 }, { "epoch": 0.368815191227601, "grad_norm": 1.1488653421401978, "learning_rate": 1.955077010881883e-05, "loss": 0.8895, "step": 2758 }, { "epoch": 0.3689489168226799, "grad_norm": 1.2532360553741455, "learning_rate": 1.9550342143811896e-05, "loss": 1.029, "step": 2759 }, { "epoch": 0.36908264241775873, "grad_norm": 1.2013119459152222, "learning_rate": 1.954991397973722e-05, "loss": 1.0284, "step": 2760 }, { "epoch": 0.36921636801283764, "grad_norm": 1.2756202220916748, "learning_rate": 1.9549485616603718e-05, "loss": 1.1279, "step": 2761 }, { "epoch": 0.36935009360791654, "grad_norm": 1.0860332250595093, "learning_rate": 1.954905705442033e-05, "loss": 1.0546, "step": 2762 }, { "epoch": 0.36948381920299544, "grad_norm": 1.2069071531295776, "learning_rate": 1.9548628293195983e-05, "loss": 0.8869, "step": 2763 }, { "epoch": 0.36961754479807435, "grad_norm": 1.208526611328125, "learning_rate": 1.954819933293962e-05, "loss": 0.9653, "step": 2764 }, { "epoch": 0.36975127039315325, "grad_norm": 1.1659077405929565, "learning_rate": 1.9547770173660173e-05, "loss": 0.9589, "step": 2765 }, { "epoch": 0.36988499598823216, "grad_norm": 1.0698506832122803, "learning_rate": 1.9547340815366595e-05, "loss": 0.9502, "step": 2766 }, { "epoch": 0.37001872158331106, "grad_norm": 1.1477301120758057, "learning_rate": 1.9546911258067836e-05, "loss": 1.0648, "step": 2767 }, { "epoch": 0.37015244717838997, "grad_norm": 1.1000534296035767, "learning_rate": 1.9546481501772846e-05, "loss": 1.0517, "step": 2768 }, { "epoch": 0.37028617277346887, "grad_norm": 1.1689552068710327, "learning_rate": 1.9546051546490586e-05, "loss": 0.9205, "step": 2769 }, { "epoch": 0.3704198983685477, "grad_norm": 1.1020498275756836, "learning_rate": 1.9545621392230013e-05, "loss": 0.9347, "step": 2770 }, { "epoch": 0.3705536239636266, "grad_norm": 1.0748441219329834, "learning_rate": 1.9545191039000096e-05, "loss": 0.97, "step": 2771 }, { "epoch": 0.3706873495587055, "grad_norm": 1.2570973634719849, "learning_rate": 1.9544760486809808e-05, "loss": 0.9091, "step": 2772 }, { "epoch": 0.37082107515378443, "grad_norm": 1.1747227907180786, "learning_rate": 1.954432973566812e-05, "loss": 1.0013, "step": 2773 }, { "epoch": 0.37095480074886333, "grad_norm": 1.249719262123108, "learning_rate": 1.954389878558401e-05, "loss": 0.9705, "step": 2774 }, { "epoch": 0.37108852634394224, "grad_norm": 1.1774156093597412, "learning_rate": 1.9543467636566463e-05, "loss": 0.9938, "step": 2775 }, { "epoch": 0.37122225193902114, "grad_norm": 1.0194612741470337, "learning_rate": 1.9543036288624465e-05, "loss": 0.9119, "step": 2776 }, { "epoch": 0.37135597753410005, "grad_norm": 1.1757391691207886, "learning_rate": 1.954260474176701e-05, "loss": 0.9419, "step": 2777 }, { "epoch": 0.37148970312917895, "grad_norm": 1.1901240348815918, "learning_rate": 1.954217299600309e-05, "loss": 1.0257, "step": 2778 }, { "epoch": 0.3716234287242578, "grad_norm": 1.302170991897583, "learning_rate": 1.95417410513417e-05, "loss": 1.1054, "step": 2779 }, { "epoch": 0.3717571543193367, "grad_norm": 1.1504501104354858, "learning_rate": 1.9541308907791854e-05, "loss": 0.9817, "step": 2780 }, { "epoch": 0.3718908799144156, "grad_norm": 1.1870187520980835, "learning_rate": 1.954087656536255e-05, "loss": 1.0348, "step": 2781 }, { "epoch": 0.3720246055094945, "grad_norm": 0.997534453868866, "learning_rate": 1.9540444024062807e-05, "loss": 0.8195, "step": 2782 }, { "epoch": 0.3721583311045734, "grad_norm": 1.0806421041488647, "learning_rate": 1.9540011283901635e-05, "loss": 0.8837, "step": 2783 }, { "epoch": 0.3722920566996523, "grad_norm": 1.0678706169128418, "learning_rate": 1.9539578344888057e-05, "loss": 0.9852, "step": 2784 }, { "epoch": 0.3724257822947312, "grad_norm": 1.1064411401748657, "learning_rate": 1.95391452070311e-05, "loss": 0.9974, "step": 2785 }, { "epoch": 0.3725595078898101, "grad_norm": 1.1063323020935059, "learning_rate": 1.953871187033978e-05, "loss": 0.8882, "step": 2786 }, { "epoch": 0.37269323348488903, "grad_norm": 1.0284321308135986, "learning_rate": 1.9538278334823148e-05, "loss": 0.8442, "step": 2787 }, { "epoch": 0.3728269590799679, "grad_norm": 1.0987207889556885, "learning_rate": 1.9537844600490227e-05, "loss": 0.9145, "step": 2788 }, { "epoch": 0.3729606846750468, "grad_norm": 1.2421810626983643, "learning_rate": 1.9537410667350064e-05, "loss": 1.1888, "step": 2789 }, { "epoch": 0.3730944102701257, "grad_norm": 1.0936285257339478, "learning_rate": 1.95369765354117e-05, "loss": 0.9538, "step": 2790 }, { "epoch": 0.3732281358652046, "grad_norm": 1.1306225061416626, "learning_rate": 1.9536542204684187e-05, "loss": 0.9783, "step": 2791 }, { "epoch": 0.3733618614602835, "grad_norm": 1.174850344657898, "learning_rate": 1.953610767517658e-05, "loss": 0.8413, "step": 2792 }, { "epoch": 0.3734955870553624, "grad_norm": 1.167121410369873, "learning_rate": 1.953567294689793e-05, "loss": 1.0104, "step": 2793 }, { "epoch": 0.3736293126504413, "grad_norm": 1.197521686553955, "learning_rate": 1.95352380198573e-05, "loss": 1.1348, "step": 2794 }, { "epoch": 0.3737630382455202, "grad_norm": 1.1388661861419678, "learning_rate": 1.9534802894063764e-05, "loss": 1.0382, "step": 2795 }, { "epoch": 0.3738967638405991, "grad_norm": 1.040999174118042, "learning_rate": 1.953436756952638e-05, "loss": 0.8724, "step": 2796 }, { "epoch": 0.374030489435678, "grad_norm": 1.170767068862915, "learning_rate": 1.953393204625423e-05, "loss": 1.0474, "step": 2797 }, { "epoch": 0.37416421503075686, "grad_norm": 1.1557772159576416, "learning_rate": 1.953349632425639e-05, "loss": 1.0461, "step": 2798 }, { "epoch": 0.37429794062583577, "grad_norm": 1.1081714630126953, "learning_rate": 1.9533060403541937e-05, "loss": 0.9683, "step": 2799 }, { "epoch": 0.37443166622091467, "grad_norm": 1.2257702350616455, "learning_rate": 1.953262428411997e-05, "loss": 1.0143, "step": 2800 }, { "epoch": 0.3745653918159936, "grad_norm": 1.2131214141845703, "learning_rate": 1.9532187965999565e-05, "loss": 1.0098, "step": 2801 }, { "epoch": 0.3746991174110725, "grad_norm": 1.0500364303588867, "learning_rate": 1.9531751449189826e-05, "loss": 1.0206, "step": 2802 }, { "epoch": 0.3748328430061514, "grad_norm": 1.1045676469802856, "learning_rate": 1.953131473369985e-05, "loss": 0.916, "step": 2803 }, { "epoch": 0.3749665686012303, "grad_norm": 1.1069353818893433, "learning_rate": 1.9530877819538736e-05, "loss": 0.9999, "step": 2804 }, { "epoch": 0.3751002941963092, "grad_norm": 0.9986951351165771, "learning_rate": 1.9530440706715595e-05, "loss": 0.8825, "step": 2805 }, { "epoch": 0.3752340197913881, "grad_norm": 1.0826143026351929, "learning_rate": 1.9530003395239538e-05, "loss": 1.0313, "step": 2806 }, { "epoch": 0.37536774538646694, "grad_norm": 1.1827342510223389, "learning_rate": 1.9529565885119676e-05, "loss": 0.997, "step": 2807 }, { "epoch": 0.37550147098154585, "grad_norm": 1.122597098350525, "learning_rate": 1.9529128176365137e-05, "loss": 0.8826, "step": 2808 }, { "epoch": 0.37563519657662475, "grad_norm": 1.0263030529022217, "learning_rate": 1.9528690268985037e-05, "loss": 0.8772, "step": 2809 }, { "epoch": 0.37576892217170366, "grad_norm": 1.1097997426986694, "learning_rate": 1.9528252162988505e-05, "loss": 0.9948, "step": 2810 }, { "epoch": 0.37590264776678256, "grad_norm": 1.2263058423995972, "learning_rate": 1.9527813858384678e-05, "loss": 1.1656, "step": 2811 }, { "epoch": 0.37603637336186146, "grad_norm": 1.264751672744751, "learning_rate": 1.9527375355182684e-05, "loss": 1.1627, "step": 2812 }, { "epoch": 0.37617009895694037, "grad_norm": 1.0346555709838867, "learning_rate": 1.952693665339167e-05, "loss": 0.9163, "step": 2813 }, { "epoch": 0.3763038245520193, "grad_norm": 1.1582435369491577, "learning_rate": 1.9526497753020776e-05, "loss": 0.9958, "step": 2814 }, { "epoch": 0.3764375501470982, "grad_norm": 1.1092829704284668, "learning_rate": 1.9526058654079155e-05, "loss": 0.9527, "step": 2815 }, { "epoch": 0.376571275742177, "grad_norm": 1.1325383186340332, "learning_rate": 1.9525619356575955e-05, "loss": 1.0141, "step": 2816 }, { "epoch": 0.37670500133725593, "grad_norm": 1.1630785465240479, "learning_rate": 1.9525179860520334e-05, "loss": 0.9861, "step": 2817 }, { "epoch": 0.37683872693233483, "grad_norm": 1.2063350677490234, "learning_rate": 1.9524740165921454e-05, "loss": 0.958, "step": 2818 }, { "epoch": 0.37697245252741374, "grad_norm": 1.213502049446106, "learning_rate": 1.9524300272788477e-05, "loss": 0.9974, "step": 2819 }, { "epoch": 0.37710617812249264, "grad_norm": 1.0902763605117798, "learning_rate": 1.952386018113058e-05, "loss": 1.06, "step": 2820 }, { "epoch": 0.37723990371757155, "grad_norm": 1.064086675643921, "learning_rate": 1.9523419890956927e-05, "loss": 0.8734, "step": 2821 }, { "epoch": 0.37737362931265045, "grad_norm": 1.1437721252441406, "learning_rate": 1.9522979402276704e-05, "loss": 1.1146, "step": 2822 }, { "epoch": 0.37750735490772935, "grad_norm": 1.127562403678894, "learning_rate": 1.952253871509908e-05, "loss": 0.9732, "step": 2823 }, { "epoch": 0.37764108050280826, "grad_norm": 1.2367345094680786, "learning_rate": 1.9522097829433252e-05, "loss": 1.1216, "step": 2824 }, { "epoch": 0.37777480609788716, "grad_norm": 1.2287040948867798, "learning_rate": 1.952165674528841e-05, "loss": 1.0174, "step": 2825 }, { "epoch": 0.377908531692966, "grad_norm": 1.0770084857940674, "learning_rate": 1.9521215462673743e-05, "loss": 0.9984, "step": 2826 }, { "epoch": 0.3780422572880449, "grad_norm": 1.1580950021743774, "learning_rate": 1.9520773981598446e-05, "loss": 1.0212, "step": 2827 }, { "epoch": 0.3781759828831238, "grad_norm": 1.1108931303024292, "learning_rate": 1.952033230207173e-05, "loss": 0.9547, "step": 2828 }, { "epoch": 0.3783097084782027, "grad_norm": 1.1535866260528564, "learning_rate": 1.9519890424102795e-05, "loss": 1.0631, "step": 2829 }, { "epoch": 0.3784434340732816, "grad_norm": 1.2534866333007812, "learning_rate": 1.9519448347700855e-05, "loss": 1.0352, "step": 2830 }, { "epoch": 0.37857715966836053, "grad_norm": 1.1554940938949585, "learning_rate": 1.951900607287512e-05, "loss": 1.0495, "step": 2831 }, { "epoch": 0.37871088526343943, "grad_norm": 1.0243728160858154, "learning_rate": 1.9518563599634815e-05, "loss": 0.9284, "step": 2832 }, { "epoch": 0.37884461085851834, "grad_norm": 1.187166690826416, "learning_rate": 1.951812092798916e-05, "loss": 0.8786, "step": 2833 }, { "epoch": 0.37897833645359724, "grad_norm": 1.212857961654663, "learning_rate": 1.9517678057947385e-05, "loss": 0.9292, "step": 2834 }, { "epoch": 0.3791120620486761, "grad_norm": 1.006543755531311, "learning_rate": 1.9517234989518715e-05, "loss": 0.8352, "step": 2835 }, { "epoch": 0.379245787643755, "grad_norm": 1.2194923162460327, "learning_rate": 1.9516791722712388e-05, "loss": 1.1225, "step": 2836 }, { "epoch": 0.3793795132388339, "grad_norm": 1.0792709589004517, "learning_rate": 1.9516348257537646e-05, "loss": 1.0428, "step": 2837 }, { "epoch": 0.3795132388339128, "grad_norm": 1.2383439540863037, "learning_rate": 1.951590459400373e-05, "loss": 1.0347, "step": 2838 }, { "epoch": 0.3796469644289917, "grad_norm": 1.1096854209899902, "learning_rate": 1.9515460732119887e-05, "loss": 1.0192, "step": 2839 }, { "epoch": 0.3797806900240706, "grad_norm": 1.0400632619857788, "learning_rate": 1.9515016671895373e-05, "loss": 1.0516, "step": 2840 }, { "epoch": 0.3799144156191495, "grad_norm": 1.222752332687378, "learning_rate": 1.9514572413339442e-05, "loss": 1.1986, "step": 2841 }, { "epoch": 0.3800481412142284, "grad_norm": 1.0761499404907227, "learning_rate": 1.9514127956461348e-05, "loss": 0.8845, "step": 2842 }, { "epoch": 0.3801818668093073, "grad_norm": 1.014450192451477, "learning_rate": 1.9513683301270364e-05, "loss": 0.8417, "step": 2843 }, { "epoch": 0.3803155924043862, "grad_norm": 1.1654932498931885, "learning_rate": 1.9513238447775757e-05, "loss": 0.9648, "step": 2844 }, { "epoch": 0.3804493179994651, "grad_norm": 1.1912479400634766, "learning_rate": 1.9512793395986796e-05, "loss": 1.104, "step": 2845 }, { "epoch": 0.380583043594544, "grad_norm": 1.302092432975769, "learning_rate": 1.951234814591276e-05, "loss": 1.0041, "step": 2846 }, { "epoch": 0.3807167691896229, "grad_norm": 1.2056795358657837, "learning_rate": 1.951190269756293e-05, "loss": 1.1675, "step": 2847 }, { "epoch": 0.3808504947847018, "grad_norm": 1.1844807863235474, "learning_rate": 1.9511457050946586e-05, "loss": 0.9501, "step": 2848 }, { "epoch": 0.3809842203797807, "grad_norm": 1.2032376527786255, "learning_rate": 1.9511011206073026e-05, "loss": 1.0995, "step": 2849 }, { "epoch": 0.3811179459748596, "grad_norm": 1.1233458518981934, "learning_rate": 1.9510565162951538e-05, "loss": 0.9985, "step": 2850 }, { "epoch": 0.3812516715699385, "grad_norm": 1.1801958084106445, "learning_rate": 1.9510118921591417e-05, "loss": 1.0025, "step": 2851 }, { "epoch": 0.3813853971650174, "grad_norm": 0.9823777675628662, "learning_rate": 1.9509672482001968e-05, "loss": 1.0633, "step": 2852 }, { "epoch": 0.3815191227600963, "grad_norm": 1.221772313117981, "learning_rate": 1.9509225844192498e-05, "loss": 0.9752, "step": 2853 }, { "epoch": 0.38165284835517516, "grad_norm": 1.2179268598556519, "learning_rate": 1.9508779008172314e-05, "loss": 1.0377, "step": 2854 }, { "epoch": 0.38178657395025406, "grad_norm": 1.0407724380493164, "learning_rate": 1.950833197395073e-05, "loss": 0.8815, "step": 2855 }, { "epoch": 0.38192029954533296, "grad_norm": 1.1152023077011108, "learning_rate": 1.9507884741537063e-05, "loss": 0.9635, "step": 2856 }, { "epoch": 0.38205402514041187, "grad_norm": 1.1226112842559814, "learning_rate": 1.950743731094064e-05, "loss": 0.9984, "step": 2857 }, { "epoch": 0.38218775073549077, "grad_norm": 1.1583904027938843, "learning_rate": 1.9506989682170782e-05, "loss": 0.8843, "step": 2858 }, { "epoch": 0.3823214763305697, "grad_norm": 1.120026707649231, "learning_rate": 1.950654185523682e-05, "loss": 0.9417, "step": 2859 }, { "epoch": 0.3824552019256486, "grad_norm": 1.2614694833755493, "learning_rate": 1.950609383014809e-05, "loss": 1.0384, "step": 2860 }, { "epoch": 0.3825889275207275, "grad_norm": 1.121084451675415, "learning_rate": 1.950564560691393e-05, "loss": 0.9409, "step": 2861 }, { "epoch": 0.3827226531158064, "grad_norm": 1.1301448345184326, "learning_rate": 1.9505197185543688e-05, "loss": 1.0272, "step": 2862 }, { "epoch": 0.38285637871088524, "grad_norm": 1.1735868453979492, "learning_rate": 1.9504748566046702e-05, "loss": 0.9279, "step": 2863 }, { "epoch": 0.38299010430596414, "grad_norm": 1.1865742206573486, "learning_rate": 1.9504299748432328e-05, "loss": 1.0019, "step": 2864 }, { "epoch": 0.38312382990104304, "grad_norm": 1.2304835319519043, "learning_rate": 1.9503850732709918e-05, "loss": 1.0377, "step": 2865 }, { "epoch": 0.38325755549612195, "grad_norm": 1.2325650453567505, "learning_rate": 1.950340151888884e-05, "loss": 1.0499, "step": 2866 }, { "epoch": 0.38339128109120085, "grad_norm": 1.0876178741455078, "learning_rate": 1.9502952106978447e-05, "loss": 0.9706, "step": 2867 }, { "epoch": 0.38352500668627976, "grad_norm": 1.2775626182556152, "learning_rate": 1.950250249698811e-05, "loss": 0.9256, "step": 2868 }, { "epoch": 0.38365873228135866, "grad_norm": 1.1436847448349, "learning_rate": 1.9502052688927203e-05, "loss": 1.0165, "step": 2869 }, { "epoch": 0.38379245787643756, "grad_norm": 1.235756754875183, "learning_rate": 1.95016026828051e-05, "loss": 1.0759, "step": 2870 }, { "epoch": 0.38392618347151647, "grad_norm": 1.212841272354126, "learning_rate": 1.9501152478631177e-05, "loss": 0.9744, "step": 2871 }, { "epoch": 0.3840599090665954, "grad_norm": 1.2590534687042236, "learning_rate": 1.9500702076414827e-05, "loss": 1.0691, "step": 2872 }, { "epoch": 0.3841936346616742, "grad_norm": 0.9985617995262146, "learning_rate": 1.9500251476165432e-05, "loss": 0.8867, "step": 2873 }, { "epoch": 0.3843273602567531, "grad_norm": 1.1148408651351929, "learning_rate": 1.9499800677892386e-05, "loss": 0.9833, "step": 2874 }, { "epoch": 0.38446108585183203, "grad_norm": 1.1358232498168945, "learning_rate": 1.9499349681605087e-05, "loss": 0.9318, "step": 2875 }, { "epoch": 0.38459481144691093, "grad_norm": 1.1911143064498901, "learning_rate": 1.949889848731293e-05, "loss": 0.9853, "step": 2876 }, { "epoch": 0.38472853704198984, "grad_norm": 1.1217687129974365, "learning_rate": 1.9498447095025324e-05, "loss": 0.9325, "step": 2877 }, { "epoch": 0.38486226263706874, "grad_norm": 1.1208195686340332, "learning_rate": 1.949799550475168e-05, "loss": 0.9799, "step": 2878 }, { "epoch": 0.38499598823214765, "grad_norm": 1.069865345954895, "learning_rate": 1.9497543716501404e-05, "loss": 0.8813, "step": 2879 }, { "epoch": 0.38512971382722655, "grad_norm": 1.076357126235962, "learning_rate": 1.949709173028392e-05, "loss": 0.8909, "step": 2880 }, { "epoch": 0.38526343942230545, "grad_norm": 1.1292835474014282, "learning_rate": 1.949663954610865e-05, "loss": 0.9698, "step": 2881 }, { "epoch": 0.3853971650173843, "grad_norm": 1.1143873929977417, "learning_rate": 1.9496187163985012e-05, "loss": 0.964, "step": 2882 }, { "epoch": 0.3855308906124632, "grad_norm": 1.1518305540084839, "learning_rate": 1.949573458392244e-05, "loss": 0.9965, "step": 2883 }, { "epoch": 0.3856646162075421, "grad_norm": 1.1327941417694092, "learning_rate": 1.949528180593037e-05, "loss": 0.9174, "step": 2884 }, { "epoch": 0.385798341802621, "grad_norm": 1.0774791240692139, "learning_rate": 1.9494828830018232e-05, "loss": 0.9655, "step": 2885 }, { "epoch": 0.3859320673976999, "grad_norm": 1.1973756551742554, "learning_rate": 1.9494375656195475e-05, "loss": 1.0465, "step": 2886 }, { "epoch": 0.3860657929927788, "grad_norm": 1.221407413482666, "learning_rate": 1.9493922284471543e-05, "loss": 0.9981, "step": 2887 }, { "epoch": 0.3861995185878577, "grad_norm": 1.1910771131515503, "learning_rate": 1.9493468714855887e-05, "loss": 1.028, "step": 2888 }, { "epoch": 0.38633324418293663, "grad_norm": 1.173493504524231, "learning_rate": 1.9493014947357955e-05, "loss": 0.9901, "step": 2889 }, { "epoch": 0.38646696977801553, "grad_norm": 1.1722590923309326, "learning_rate": 1.9492560981987215e-05, "loss": 1.0734, "step": 2890 }, { "epoch": 0.3866006953730944, "grad_norm": 1.0097779035568237, "learning_rate": 1.949210681875312e-05, "loss": 0.9698, "step": 2891 }, { "epoch": 0.3867344209681733, "grad_norm": 1.1475749015808105, "learning_rate": 1.9491652457665146e-05, "loss": 1.0029, "step": 2892 }, { "epoch": 0.3868681465632522, "grad_norm": 1.246366024017334, "learning_rate": 1.9491197898732758e-05, "loss": 1.16, "step": 2893 }, { "epoch": 0.3870018721583311, "grad_norm": 1.1189351081848145, "learning_rate": 1.949074314196543e-05, "loss": 1.0476, "step": 2894 }, { "epoch": 0.38713559775341, "grad_norm": 1.1763771772384644, "learning_rate": 1.9490288187372642e-05, "loss": 1.0936, "step": 2895 }, { "epoch": 0.3872693233484889, "grad_norm": 1.1193116903305054, "learning_rate": 1.948983303496388e-05, "loss": 0.9663, "step": 2896 }, { "epoch": 0.3874030489435678, "grad_norm": 1.2144430875778198, "learning_rate": 1.9489377684748628e-05, "loss": 1.0633, "step": 2897 }, { "epoch": 0.3875367745386467, "grad_norm": 1.2183281183242798, "learning_rate": 1.9488922136736382e-05, "loss": 1.0913, "step": 2898 }, { "epoch": 0.3876705001337256, "grad_norm": 1.0372185707092285, "learning_rate": 1.948846639093663e-05, "loss": 1.0226, "step": 2899 }, { "epoch": 0.3878042257288045, "grad_norm": 1.1767842769622803, "learning_rate": 1.948801044735888e-05, "loss": 0.967, "step": 2900 }, { "epoch": 0.38793795132388337, "grad_norm": 1.1482349634170532, "learning_rate": 1.9487554306012625e-05, "loss": 1.0494, "step": 2901 }, { "epoch": 0.38807167691896227, "grad_norm": 1.1677573919296265, "learning_rate": 1.9487097966907385e-05, "loss": 1.0827, "step": 2902 }, { "epoch": 0.3882054025140412, "grad_norm": 1.0432132482528687, "learning_rate": 1.9486641430052664e-05, "loss": 0.9769, "step": 2903 }, { "epoch": 0.3883391281091201, "grad_norm": 1.225866675376892, "learning_rate": 1.948618469545798e-05, "loss": 1.02, "step": 2904 }, { "epoch": 0.388472853704199, "grad_norm": 1.1266239881515503, "learning_rate": 1.9485727763132853e-05, "loss": 1.0114, "step": 2905 }, { "epoch": 0.3886065792992779, "grad_norm": 1.1278605461120605, "learning_rate": 1.9485270633086807e-05, "loss": 1.0389, "step": 2906 }, { "epoch": 0.3887403048943568, "grad_norm": 1.2346082925796509, "learning_rate": 1.948481330532937e-05, "loss": 1.098, "step": 2907 }, { "epoch": 0.3888740304894357, "grad_norm": 1.0492706298828125, "learning_rate": 1.9484355779870078e-05, "loss": 0.9568, "step": 2908 }, { "epoch": 0.3890077560845146, "grad_norm": 1.157475233078003, "learning_rate": 1.9483898056718464e-05, "loss": 0.9138, "step": 2909 }, { "epoch": 0.38914148167959345, "grad_norm": 1.0891684293746948, "learning_rate": 1.948344013588407e-05, "loss": 0.9448, "step": 2910 }, { "epoch": 0.38927520727467235, "grad_norm": 1.0841211080551147, "learning_rate": 1.9482982017376444e-05, "loss": 0.9558, "step": 2911 }, { "epoch": 0.38940893286975126, "grad_norm": 1.029944658279419, "learning_rate": 1.948252370120513e-05, "loss": 0.9462, "step": 2912 }, { "epoch": 0.38954265846483016, "grad_norm": 1.0911540985107422, "learning_rate": 1.9482065187379682e-05, "loss": 1.002, "step": 2913 }, { "epoch": 0.38967638405990906, "grad_norm": 1.0668919086456299, "learning_rate": 1.948160647590966e-05, "loss": 0.9342, "step": 2914 }, { "epoch": 0.38981010965498797, "grad_norm": 1.1697531938552856, "learning_rate": 1.9481147566804623e-05, "loss": 1.0074, "step": 2915 }, { "epoch": 0.3899438352500669, "grad_norm": 1.1550745964050293, "learning_rate": 1.9480688460074136e-05, "loss": 0.9349, "step": 2916 }, { "epoch": 0.3900775608451458, "grad_norm": 1.1940799951553345, "learning_rate": 1.9480229155727776e-05, "loss": 0.9116, "step": 2917 }, { "epoch": 0.3902112864402247, "grad_norm": 1.0912806987762451, "learning_rate": 1.9479769653775107e-05, "loss": 0.8927, "step": 2918 }, { "epoch": 0.3903450120353036, "grad_norm": 1.2202863693237305, "learning_rate": 1.947930995422571e-05, "loss": 0.9929, "step": 2919 }, { "epoch": 0.39047873763038243, "grad_norm": 1.2713217735290527, "learning_rate": 1.9478850057089168e-05, "loss": 1.157, "step": 2920 }, { "epoch": 0.39061246322546134, "grad_norm": 1.1349575519561768, "learning_rate": 1.947838996237507e-05, "loss": 0.895, "step": 2921 }, { "epoch": 0.39074618882054024, "grad_norm": 1.2043986320495605, "learning_rate": 1.9477929670092997e-05, "loss": 1.0364, "step": 2922 }, { "epoch": 0.39087991441561915, "grad_norm": 1.1637495756149292, "learning_rate": 1.947746918025255e-05, "loss": 0.9952, "step": 2923 }, { "epoch": 0.39101364001069805, "grad_norm": 1.1131538152694702, "learning_rate": 1.947700849286333e-05, "loss": 0.9592, "step": 2924 }, { "epoch": 0.39114736560577695, "grad_norm": 1.221379280090332, "learning_rate": 1.9476547607934937e-05, "loss": 0.9818, "step": 2925 }, { "epoch": 0.39128109120085586, "grad_norm": 1.0862956047058105, "learning_rate": 1.9476086525476977e-05, "loss": 0.9342, "step": 2926 }, { "epoch": 0.39141481679593476, "grad_norm": 1.2069255113601685, "learning_rate": 1.947562524549906e-05, "loss": 0.9713, "step": 2927 }, { "epoch": 0.39154854239101367, "grad_norm": 1.136892318725586, "learning_rate": 1.9475163768010802e-05, "loss": 0.9268, "step": 2928 }, { "epoch": 0.3916822679860925, "grad_norm": 1.2015352249145508, "learning_rate": 1.9474702093021823e-05, "loss": 1.1889, "step": 2929 }, { "epoch": 0.3918159935811714, "grad_norm": 1.1238012313842773, "learning_rate": 1.9474240220541745e-05, "loss": 0.9225, "step": 2930 }, { "epoch": 0.3919497191762503, "grad_norm": 1.0850963592529297, "learning_rate": 1.9473778150580194e-05, "loss": 0.9251, "step": 2931 }, { "epoch": 0.3920834447713292, "grad_norm": 1.1071081161499023, "learning_rate": 1.9473315883146803e-05, "loss": 1.033, "step": 2932 }, { "epoch": 0.39221717036640813, "grad_norm": 1.2282966375350952, "learning_rate": 1.947285341825121e-05, "loss": 1.2082, "step": 2933 }, { "epoch": 0.39235089596148703, "grad_norm": 1.1162861585617065, "learning_rate": 1.947239075590305e-05, "loss": 0.9242, "step": 2934 }, { "epoch": 0.39248462155656594, "grad_norm": 1.0945684909820557, "learning_rate": 1.9471927896111967e-05, "loss": 1.0234, "step": 2935 }, { "epoch": 0.39261834715164484, "grad_norm": 1.1530365943908691, "learning_rate": 1.9471464838887614e-05, "loss": 0.9675, "step": 2936 }, { "epoch": 0.39275207274672375, "grad_norm": 1.1861568689346313, "learning_rate": 1.9471001584239637e-05, "loss": 0.9735, "step": 2937 }, { "epoch": 0.3928857983418026, "grad_norm": 1.0933645963668823, "learning_rate": 1.9470538132177696e-05, "loss": 0.8628, "step": 2938 }, { "epoch": 0.3930195239368815, "grad_norm": 1.1958930492401123, "learning_rate": 1.947007448271145e-05, "loss": 1.0275, "step": 2939 }, { "epoch": 0.3931532495319604, "grad_norm": 1.1497830152511597, "learning_rate": 1.9469610635850566e-05, "loss": 0.9463, "step": 2940 }, { "epoch": 0.3932869751270393, "grad_norm": 1.118687629699707, "learning_rate": 1.9469146591604703e-05, "loss": 1.0117, "step": 2941 }, { "epoch": 0.3934207007221182, "grad_norm": 1.1976524591445923, "learning_rate": 1.9468682349983544e-05, "loss": 0.9626, "step": 2942 }, { "epoch": 0.3935544263171971, "grad_norm": 1.1083488464355469, "learning_rate": 1.9468217910996767e-05, "loss": 1.0688, "step": 2943 }, { "epoch": 0.393688151912276, "grad_norm": 1.0521825551986694, "learning_rate": 1.946775327465404e-05, "loss": 0.9823, "step": 2944 }, { "epoch": 0.3938218775073549, "grad_norm": 1.0379694700241089, "learning_rate": 1.946728844096506e-05, "loss": 0.9873, "step": 2945 }, { "epoch": 0.3939556031024338, "grad_norm": 1.135482668876648, "learning_rate": 1.946682340993951e-05, "loss": 0.9242, "step": 2946 }, { "epoch": 0.39408932869751273, "grad_norm": 1.2415028810501099, "learning_rate": 1.9466358181587085e-05, "loss": 1.1414, "step": 2947 }, { "epoch": 0.3942230542925916, "grad_norm": 1.1224685907363892, "learning_rate": 1.9465892755917482e-05, "loss": 1.0327, "step": 2948 }, { "epoch": 0.3943567798876705, "grad_norm": 1.1045244932174683, "learning_rate": 1.9465427132940404e-05, "loss": 1.0562, "step": 2949 }, { "epoch": 0.3944905054827494, "grad_norm": 1.1819961071014404, "learning_rate": 1.946496131266555e-05, "loss": 0.941, "step": 2950 }, { "epoch": 0.3946242310778283, "grad_norm": 1.1414289474487305, "learning_rate": 1.946449529510264e-05, "loss": 1.0656, "step": 2951 }, { "epoch": 0.3947579566729072, "grad_norm": 1.454622745513916, "learning_rate": 1.946402908026138e-05, "loss": 1.0048, "step": 2952 }, { "epoch": 0.3948916822679861, "grad_norm": 1.0038478374481201, "learning_rate": 1.946356266815149e-05, "loss": 0.925, "step": 2953 }, { "epoch": 0.395025407863065, "grad_norm": 1.0276093482971191, "learning_rate": 1.946309605878269e-05, "loss": 0.9707, "step": 2954 }, { "epoch": 0.3951591334581439, "grad_norm": 1.1300634145736694, "learning_rate": 1.9462629252164712e-05, "loss": 1.0376, "step": 2955 }, { "epoch": 0.3952928590532228, "grad_norm": 1.034170389175415, "learning_rate": 1.9462162248307276e-05, "loss": 0.9123, "step": 2956 }, { "epoch": 0.39542658464830166, "grad_norm": 1.1481757164001465, "learning_rate": 1.9461695047220125e-05, "loss": 0.8317, "step": 2957 }, { "epoch": 0.39556031024338056, "grad_norm": 1.1233325004577637, "learning_rate": 1.9461227648912998e-05, "loss": 0.8334, "step": 2958 }, { "epoch": 0.39569403583845947, "grad_norm": 1.3017017841339111, "learning_rate": 1.9460760053395628e-05, "loss": 1.028, "step": 2959 }, { "epoch": 0.39582776143353837, "grad_norm": 1.1644599437713623, "learning_rate": 1.9460292260677773e-05, "loss": 1.0041, "step": 2960 }, { "epoch": 0.3959614870286173, "grad_norm": 1.05825674533844, "learning_rate": 1.9459824270769178e-05, "loss": 0.9709, "step": 2961 }, { "epoch": 0.3960952126236962, "grad_norm": 1.1265883445739746, "learning_rate": 1.9459356083679596e-05, "loss": 0.9406, "step": 2962 }, { "epoch": 0.3962289382187751, "grad_norm": 1.0600440502166748, "learning_rate": 1.9458887699418786e-05, "loss": 0.959, "step": 2963 }, { "epoch": 0.396362663813854, "grad_norm": 1.0856757164001465, "learning_rate": 1.9458419117996516e-05, "loss": 0.9721, "step": 2964 }, { "epoch": 0.3964963894089329, "grad_norm": 1.1213642358779907, "learning_rate": 1.945795033942255e-05, "loss": 0.954, "step": 2965 }, { "epoch": 0.39663011500401174, "grad_norm": 0.9919081330299377, "learning_rate": 1.945748136370666e-05, "loss": 0.9499, "step": 2966 }, { "epoch": 0.39676384059909064, "grad_norm": 1.1107960939407349, "learning_rate": 1.945701219085862e-05, "loss": 0.9882, "step": 2967 }, { "epoch": 0.39689756619416955, "grad_norm": 1.348785400390625, "learning_rate": 1.9456542820888212e-05, "loss": 0.9671, "step": 2968 }, { "epoch": 0.39703129178924845, "grad_norm": 1.0729196071624756, "learning_rate": 1.9456073253805214e-05, "loss": 0.9435, "step": 2969 }, { "epoch": 0.39716501738432736, "grad_norm": 1.0821197032928467, "learning_rate": 1.945560348961942e-05, "loss": 0.9303, "step": 2970 }, { "epoch": 0.39729874297940626, "grad_norm": 1.1943069696426392, "learning_rate": 1.945513352834062e-05, "loss": 0.997, "step": 2971 }, { "epoch": 0.39743246857448516, "grad_norm": 1.0914510488510132, "learning_rate": 1.945466336997861e-05, "loss": 1.0456, "step": 2972 }, { "epoch": 0.39756619416956407, "grad_norm": 1.1857821941375732, "learning_rate": 1.9454193014543185e-05, "loss": 0.9323, "step": 2973 }, { "epoch": 0.397699919764643, "grad_norm": 1.286543369293213, "learning_rate": 1.9453722462044157e-05, "loss": 1.0902, "step": 2974 }, { "epoch": 0.3978336453597219, "grad_norm": 1.052204966545105, "learning_rate": 1.9453251712491326e-05, "loss": 0.9273, "step": 2975 }, { "epoch": 0.3979673709548007, "grad_norm": 1.0948431491851807, "learning_rate": 1.9452780765894516e-05, "loss": 1.0412, "step": 2976 }, { "epoch": 0.39810109654987963, "grad_norm": 1.1378690004348755, "learning_rate": 1.945230962226353e-05, "loss": 0.8893, "step": 2977 }, { "epoch": 0.39823482214495853, "grad_norm": 1.2577379941940308, "learning_rate": 1.94518382816082e-05, "loss": 0.9549, "step": 2978 }, { "epoch": 0.39836854774003744, "grad_norm": 1.0572412014007568, "learning_rate": 1.945136674393834e-05, "loss": 0.955, "step": 2979 }, { "epoch": 0.39850227333511634, "grad_norm": 1.176315188407898, "learning_rate": 1.9450895009263786e-05, "loss": 0.933, "step": 2980 }, { "epoch": 0.39863599893019525, "grad_norm": 1.030555009841919, "learning_rate": 1.9450423077594373e-05, "loss": 0.955, "step": 2981 }, { "epoch": 0.39876972452527415, "grad_norm": 1.1320264339447021, "learning_rate": 1.944995094893993e-05, "loss": 0.9533, "step": 2982 }, { "epoch": 0.39890345012035305, "grad_norm": 1.2765610218048096, "learning_rate": 1.94494786233103e-05, "loss": 1.1805, "step": 2983 }, { "epoch": 0.39903717571543196, "grad_norm": 1.199271321296692, "learning_rate": 1.9449006100715334e-05, "loss": 1.1222, "step": 2984 }, { "epoch": 0.3991709013105108, "grad_norm": 1.1603643894195557, "learning_rate": 1.9448533381164876e-05, "loss": 0.9553, "step": 2985 }, { "epoch": 0.3993046269055897, "grad_norm": 1.1358752250671387, "learning_rate": 1.944806046466878e-05, "loss": 0.9784, "step": 2986 }, { "epoch": 0.3994383525006686, "grad_norm": 1.0535459518432617, "learning_rate": 1.9447587351236907e-05, "loss": 0.8616, "step": 2987 }, { "epoch": 0.3995720780957475, "grad_norm": 1.1692144870758057, "learning_rate": 1.9447114040879115e-05, "loss": 0.8675, "step": 2988 }, { "epoch": 0.3997058036908264, "grad_norm": 1.0296725034713745, "learning_rate": 1.9446640533605272e-05, "loss": 0.9998, "step": 2989 }, { "epoch": 0.3998395292859053, "grad_norm": 1.220070242881775, "learning_rate": 1.9446166829425244e-05, "loss": 1.0327, "step": 2990 }, { "epoch": 0.39997325488098423, "grad_norm": 1.226694941520691, "learning_rate": 1.944569292834891e-05, "loss": 1.1087, "step": 2991 }, { "epoch": 0.40010698047606313, "grad_norm": 1.043750286102295, "learning_rate": 1.944521883038614e-05, "loss": 0.9626, "step": 2992 }, { "epoch": 0.40024070607114204, "grad_norm": 1.0843777656555176, "learning_rate": 1.9444744535546827e-05, "loss": 0.9701, "step": 2993 }, { "epoch": 0.40037443166622094, "grad_norm": 1.1172428131103516, "learning_rate": 1.9444270043840854e-05, "loss": 0.8868, "step": 2994 }, { "epoch": 0.4005081572612998, "grad_norm": 1.0918567180633545, "learning_rate": 1.9443795355278105e-05, "loss": 1.0663, "step": 2995 }, { "epoch": 0.4006418828563787, "grad_norm": 1.0752836465835571, "learning_rate": 1.944332046986848e-05, "loss": 0.9964, "step": 2996 }, { "epoch": 0.4007756084514576, "grad_norm": 1.24544095993042, "learning_rate": 1.9442845387621876e-05, "loss": 0.9478, "step": 2997 }, { "epoch": 0.4009093340465365, "grad_norm": 0.9910604357719421, "learning_rate": 1.9442370108548194e-05, "loss": 0.8961, "step": 2998 }, { "epoch": 0.4010430596416154, "grad_norm": 1.1637219190597534, "learning_rate": 1.9441894632657343e-05, "loss": 1.0771, "step": 2999 }, { "epoch": 0.4011767852366943, "grad_norm": 1.2301656007766724, "learning_rate": 1.9441418959959237e-05, "loss": 1.1962, "step": 3000 }, { "epoch": 0.4013105108317732, "grad_norm": 1.0858397483825684, "learning_rate": 1.9440943090463783e-05, "loss": 1.1201, "step": 3001 }, { "epoch": 0.4014442364268521, "grad_norm": 1.1521450281143188, "learning_rate": 1.94404670241809e-05, "loss": 1.0366, "step": 3002 }, { "epoch": 0.401577962021931, "grad_norm": 1.0844823122024536, "learning_rate": 1.9439990761120523e-05, "loss": 1.0114, "step": 3003 }, { "epoch": 0.40171168761700987, "grad_norm": 1.101545810699463, "learning_rate": 1.943951430129257e-05, "loss": 1.0517, "step": 3004 }, { "epoch": 0.4018454132120888, "grad_norm": 1.1841049194335938, "learning_rate": 1.9439037644706974e-05, "loss": 1.0221, "step": 3005 }, { "epoch": 0.4019791388071677, "grad_norm": 1.16738760471344, "learning_rate": 1.9438560791373668e-05, "loss": 1.0512, "step": 3006 }, { "epoch": 0.4021128644022466, "grad_norm": 1.2542275190353394, "learning_rate": 1.9438083741302598e-05, "loss": 1.0459, "step": 3007 }, { "epoch": 0.4022465899973255, "grad_norm": 1.157622218132019, "learning_rate": 1.94376064945037e-05, "loss": 1.0963, "step": 3008 }, { "epoch": 0.4023803155924044, "grad_norm": 1.1552412509918213, "learning_rate": 1.9437129050986928e-05, "loss": 1.0438, "step": 3009 }, { "epoch": 0.4025140411874833, "grad_norm": 1.0435905456542969, "learning_rate": 1.943665141076223e-05, "loss": 0.9397, "step": 3010 }, { "epoch": 0.4026477667825622, "grad_norm": 1.1731706857681274, "learning_rate": 1.9436173573839565e-05, "loss": 1.0182, "step": 3011 }, { "epoch": 0.4027814923776411, "grad_norm": 1.1348472833633423, "learning_rate": 1.943569554022889e-05, "loss": 1.0548, "step": 3012 }, { "epoch": 0.40291521797271995, "grad_norm": 1.1312835216522217, "learning_rate": 1.943521730994017e-05, "loss": 1.0425, "step": 3013 }, { "epoch": 0.40304894356779886, "grad_norm": 1.1038933992385864, "learning_rate": 1.9434738882983373e-05, "loss": 1.0935, "step": 3014 }, { "epoch": 0.40318266916287776, "grad_norm": 1.1634535789489746, "learning_rate": 1.9434260259368473e-05, "loss": 0.9917, "step": 3015 }, { "epoch": 0.40331639475795666, "grad_norm": 1.065834879875183, "learning_rate": 1.9433781439105446e-05, "loss": 0.8737, "step": 3016 }, { "epoch": 0.40345012035303557, "grad_norm": 1.089040994644165, "learning_rate": 1.9433302422204272e-05, "loss": 1.0413, "step": 3017 }, { "epoch": 0.4035838459481145, "grad_norm": 1.160011649131775, "learning_rate": 1.9432823208674936e-05, "loss": 1.0662, "step": 3018 }, { "epoch": 0.4037175715431934, "grad_norm": 1.0672634840011597, "learning_rate": 1.9432343798527427e-05, "loss": 0.9126, "step": 3019 }, { "epoch": 0.4038512971382723, "grad_norm": 1.1072423458099365, "learning_rate": 1.9431864191771733e-05, "loss": 0.8837, "step": 3020 }, { "epoch": 0.4039850227333512, "grad_norm": 1.2003124952316284, "learning_rate": 1.943138438841786e-05, "loss": 0.9467, "step": 3021 }, { "epoch": 0.4041187483284301, "grad_norm": 1.2278048992156982, "learning_rate": 1.9430904388475803e-05, "loss": 1.1152, "step": 3022 }, { "epoch": 0.40425247392350894, "grad_norm": 1.2416614294052124, "learning_rate": 1.9430424191955567e-05, "loss": 1.0251, "step": 3023 }, { "epoch": 0.40438619951858784, "grad_norm": 1.1391545534133911, "learning_rate": 1.9429943798867163e-05, "loss": 0.9551, "step": 3024 }, { "epoch": 0.40451992511366675, "grad_norm": 0.9949235320091248, "learning_rate": 1.9429463209220604e-05, "loss": 0.9185, "step": 3025 }, { "epoch": 0.40465365070874565, "grad_norm": 1.070574164390564, "learning_rate": 1.942898242302591e-05, "loss": 0.954, "step": 3026 }, { "epoch": 0.40478737630382455, "grad_norm": 1.1673306226730347, "learning_rate": 1.9428501440293098e-05, "loss": 1.0681, "step": 3027 }, { "epoch": 0.40492110189890346, "grad_norm": 1.080773949623108, "learning_rate": 1.9428020261032196e-05, "loss": 0.9421, "step": 3028 }, { "epoch": 0.40505482749398236, "grad_norm": 1.1092828512191772, "learning_rate": 1.9427538885253233e-05, "loss": 0.9367, "step": 3029 }, { "epoch": 0.40518855308906127, "grad_norm": 1.0429340600967407, "learning_rate": 1.942705731296624e-05, "loss": 0.8834, "step": 3030 }, { "epoch": 0.40532227868414017, "grad_norm": 1.245124340057373, "learning_rate": 1.9426575544181263e-05, "loss": 1.0274, "step": 3031 }, { "epoch": 0.405456004279219, "grad_norm": 1.1455271244049072, "learning_rate": 1.9426093578908335e-05, "loss": 0.9729, "step": 3032 }, { "epoch": 0.4055897298742979, "grad_norm": 1.1643540859222412, "learning_rate": 1.9425611417157512e-05, "loss": 0.9866, "step": 3033 }, { "epoch": 0.4057234554693768, "grad_norm": 1.2185564041137695, "learning_rate": 1.9425129058938833e-05, "loss": 0.9572, "step": 3034 }, { "epoch": 0.40585718106445573, "grad_norm": 1.0965440273284912, "learning_rate": 1.942464650426236e-05, "loss": 0.9759, "step": 3035 }, { "epoch": 0.40599090665953463, "grad_norm": 1.1587576866149902, "learning_rate": 1.9424163753138144e-05, "loss": 1.0272, "step": 3036 }, { "epoch": 0.40612463225461354, "grad_norm": 1.0783741474151611, "learning_rate": 1.942368080557626e-05, "loss": 0.9277, "step": 3037 }, { "epoch": 0.40625835784969244, "grad_norm": 1.0751574039459229, "learning_rate": 1.9423197661586765e-05, "loss": 1.0057, "step": 3038 }, { "epoch": 0.40639208344477135, "grad_norm": 1.169594407081604, "learning_rate": 1.942271432117973e-05, "loss": 0.9691, "step": 3039 }, { "epoch": 0.40652580903985025, "grad_norm": 1.227099061012268, "learning_rate": 1.942223078436523e-05, "loss": 1.0368, "step": 3040 }, { "epoch": 0.4066595346349291, "grad_norm": 1.2535454034805298, "learning_rate": 1.942174705115335e-05, "loss": 1.0428, "step": 3041 }, { "epoch": 0.406793260230008, "grad_norm": 1.1885732412338257, "learning_rate": 1.9421263121554163e-05, "loss": 1.0246, "step": 3042 }, { "epoch": 0.4069269858250869, "grad_norm": 1.1729487180709839, "learning_rate": 1.9420778995577768e-05, "loss": 1.0452, "step": 3043 }, { "epoch": 0.4070607114201658, "grad_norm": 1.1860896348953247, "learning_rate": 1.9420294673234243e-05, "loss": 1.1481, "step": 3044 }, { "epoch": 0.4071944370152447, "grad_norm": 1.3106626272201538, "learning_rate": 1.9419810154533694e-05, "loss": 1.0033, "step": 3045 }, { "epoch": 0.4073281626103236, "grad_norm": 1.2986791133880615, "learning_rate": 1.9419325439486213e-05, "loss": 1.0379, "step": 3046 }, { "epoch": 0.4074618882054025, "grad_norm": 1.1774249076843262, "learning_rate": 1.941884052810191e-05, "loss": 1.1168, "step": 3047 }, { "epoch": 0.4075956138004814, "grad_norm": 1.2179279327392578, "learning_rate": 1.9418355420390885e-05, "loss": 0.9946, "step": 3048 }, { "epoch": 0.40772933939556033, "grad_norm": 1.1613017320632935, "learning_rate": 1.941787011636326e-05, "loss": 1.0165, "step": 3049 }, { "epoch": 0.40786306499063923, "grad_norm": 1.1061269044876099, "learning_rate": 1.9417384616029137e-05, "loss": 0.9082, "step": 3050 }, { "epoch": 0.4079967905857181, "grad_norm": 1.0965896844863892, "learning_rate": 1.9416898919398646e-05, "loss": 0.9004, "step": 3051 }, { "epoch": 0.408130516180797, "grad_norm": 1.0301753282546997, "learning_rate": 1.9416413026481907e-05, "loss": 0.8921, "step": 3052 }, { "epoch": 0.4082642417758759, "grad_norm": 1.2491261959075928, "learning_rate": 1.9415926937289054e-05, "loss": 0.9608, "step": 3053 }, { "epoch": 0.4083979673709548, "grad_norm": 1.0893139839172363, "learning_rate": 1.941544065183021e-05, "loss": 1.0465, "step": 3054 }, { "epoch": 0.4085316929660337, "grad_norm": 1.0968921184539795, "learning_rate": 1.9414954170115516e-05, "loss": 0.9938, "step": 3055 }, { "epoch": 0.4086654185611126, "grad_norm": 1.203365445137024, "learning_rate": 1.9414467492155113e-05, "loss": 0.9408, "step": 3056 }, { "epoch": 0.4087991441561915, "grad_norm": 1.0484868288040161, "learning_rate": 1.9413980617959137e-05, "loss": 1.0231, "step": 3057 }, { "epoch": 0.4089328697512704, "grad_norm": 1.158787488937378, "learning_rate": 1.941349354753775e-05, "loss": 0.962, "step": 3058 }, { "epoch": 0.4090665953463493, "grad_norm": 1.2331900596618652, "learning_rate": 1.9413006280901098e-05, "loss": 0.9851, "step": 3059 }, { "epoch": 0.40920032094142816, "grad_norm": 1.1405267715454102, "learning_rate": 1.9412518818059335e-05, "loss": 0.9297, "step": 3060 }, { "epoch": 0.40933404653650707, "grad_norm": 1.1454408168792725, "learning_rate": 1.9412031159022624e-05, "loss": 1.0723, "step": 3061 }, { "epoch": 0.40946777213158597, "grad_norm": 1.1048020124435425, "learning_rate": 1.941154330380113e-05, "loss": 0.891, "step": 3062 }, { "epoch": 0.4096014977266649, "grad_norm": 1.266876220703125, "learning_rate": 1.9411055252405022e-05, "loss": 0.9157, "step": 3063 }, { "epoch": 0.4097352233217438, "grad_norm": 1.1568734645843506, "learning_rate": 1.9410567004844473e-05, "loss": 1.0723, "step": 3064 }, { "epoch": 0.4098689489168227, "grad_norm": 1.2634341716766357, "learning_rate": 1.9410078561129657e-05, "loss": 1.0037, "step": 3065 }, { "epoch": 0.4100026745119016, "grad_norm": 1.0296622514724731, "learning_rate": 1.9409589921270758e-05, "loss": 0.9384, "step": 3066 }, { "epoch": 0.4101364001069805, "grad_norm": 1.115875005722046, "learning_rate": 1.9409101085277966e-05, "loss": 0.9314, "step": 3067 }, { "epoch": 0.4102701257020594, "grad_norm": 1.1686104536056519, "learning_rate": 1.9408612053161464e-05, "loss": 1.0656, "step": 3068 }, { "epoch": 0.41040385129713824, "grad_norm": 1.030661940574646, "learning_rate": 1.9408122824931444e-05, "loss": 0.8682, "step": 3069 }, { "epoch": 0.41053757689221715, "grad_norm": 1.24689519405365, "learning_rate": 1.9407633400598107e-05, "loss": 1.061, "step": 3070 }, { "epoch": 0.41067130248729605, "grad_norm": 1.0940386056900024, "learning_rate": 1.9407143780171656e-05, "loss": 1.0071, "step": 3071 }, { "epoch": 0.41080502808237496, "grad_norm": 1.0001624822616577, "learning_rate": 1.9406653963662293e-05, "loss": 0.897, "step": 3072 }, { "epoch": 0.41093875367745386, "grad_norm": 1.158817172050476, "learning_rate": 1.9406163951080228e-05, "loss": 1.0116, "step": 3073 }, { "epoch": 0.41107247927253276, "grad_norm": 1.0831011533737183, "learning_rate": 1.9405673742435677e-05, "loss": 0.976, "step": 3074 }, { "epoch": 0.41120620486761167, "grad_norm": 1.1321932077407837, "learning_rate": 1.940518333773886e-05, "loss": 1.0039, "step": 3075 }, { "epoch": 0.4113399304626906, "grad_norm": 1.2162421941757202, "learning_rate": 1.940469273699999e-05, "loss": 1.0027, "step": 3076 }, { "epoch": 0.4114736560577695, "grad_norm": 1.2145994901657104, "learning_rate": 1.9404201940229305e-05, "loss": 1.044, "step": 3077 }, { "epoch": 0.4116073816528484, "grad_norm": 1.2762770652770996, "learning_rate": 1.9403710947437027e-05, "loss": 1.1144, "step": 3078 }, { "epoch": 0.41174110724792723, "grad_norm": 1.1265672445297241, "learning_rate": 1.9403219758633397e-05, "loss": 0.9767, "step": 3079 }, { "epoch": 0.41187483284300613, "grad_norm": 1.2573039531707764, "learning_rate": 1.9402728373828643e-05, "loss": 1.0313, "step": 3080 }, { "epoch": 0.41200855843808504, "grad_norm": 1.22186279296875, "learning_rate": 1.9402236793033015e-05, "loss": 1.0282, "step": 3081 }, { "epoch": 0.41214228403316394, "grad_norm": 1.14968740940094, "learning_rate": 1.940174501625676e-05, "loss": 1.0852, "step": 3082 }, { "epoch": 0.41227600962824285, "grad_norm": 1.2463802099227905, "learning_rate": 1.9401253043510126e-05, "loss": 1.0022, "step": 3083 }, { "epoch": 0.41240973522332175, "grad_norm": 0.998227596282959, "learning_rate": 1.9400760874803366e-05, "loss": 0.9332, "step": 3084 }, { "epoch": 0.41254346081840065, "grad_norm": 1.1748766899108887, "learning_rate": 1.940026851014674e-05, "loss": 0.9516, "step": 3085 }, { "epoch": 0.41267718641347956, "grad_norm": 1.0752381086349487, "learning_rate": 1.9399775949550516e-05, "loss": 0.9656, "step": 3086 }, { "epoch": 0.41281091200855846, "grad_norm": 1.2725883722305298, "learning_rate": 1.9399283193024957e-05, "loss": 1.1041, "step": 3087 }, { "epoch": 0.4129446376036373, "grad_norm": 1.0904481410980225, "learning_rate": 1.9398790240580333e-05, "loss": 0.9853, "step": 3088 }, { "epoch": 0.4130783631987162, "grad_norm": 0.9968140125274658, "learning_rate": 1.9398297092226918e-05, "loss": 0.977, "step": 3089 }, { "epoch": 0.4132120887937951, "grad_norm": 1.0959019660949707, "learning_rate": 1.9397803747974996e-05, "loss": 1.0732, "step": 3090 }, { "epoch": 0.413345814388874, "grad_norm": 1.2900850772857666, "learning_rate": 1.9397310207834847e-05, "loss": 1.0574, "step": 3091 }, { "epoch": 0.4134795399839529, "grad_norm": 1.2283388376235962, "learning_rate": 1.9396816471816756e-05, "loss": 0.9684, "step": 3092 }, { "epoch": 0.41361326557903183, "grad_norm": 1.1191096305847168, "learning_rate": 1.9396322539931025e-05, "loss": 0.9693, "step": 3093 }, { "epoch": 0.41374699117411073, "grad_norm": 1.1841137409210205, "learning_rate": 1.9395828412187935e-05, "loss": 0.9633, "step": 3094 }, { "epoch": 0.41388071676918964, "grad_norm": 1.0935860872268677, "learning_rate": 1.9395334088597793e-05, "loss": 1.0126, "step": 3095 }, { "epoch": 0.41401444236426854, "grad_norm": 1.212520718574524, "learning_rate": 1.9394839569170907e-05, "loss": 1.1544, "step": 3096 }, { "epoch": 0.41414816795934745, "grad_norm": 1.0677695274353027, "learning_rate": 1.9394344853917575e-05, "loss": 1.0052, "step": 3097 }, { "epoch": 0.4142818935544263, "grad_norm": 1.2430074214935303, "learning_rate": 1.9393849942848116e-05, "loss": 1.0152, "step": 3098 }, { "epoch": 0.4144156191495052, "grad_norm": 1.2330094575881958, "learning_rate": 1.9393354835972846e-05, "loss": 1.0846, "step": 3099 }, { "epoch": 0.4145493447445841, "grad_norm": 1.1950565576553345, "learning_rate": 1.9392859533302077e-05, "loss": 1.0088, "step": 3100 }, { "epoch": 0.414683070339663, "grad_norm": 1.3253813982009888, "learning_rate": 1.9392364034846145e-05, "loss": 0.9647, "step": 3101 }, { "epoch": 0.4148167959347419, "grad_norm": 1.0363998413085938, "learning_rate": 1.9391868340615366e-05, "loss": 0.9103, "step": 3102 }, { "epoch": 0.4149505215298208, "grad_norm": 1.0540871620178223, "learning_rate": 1.9391372450620087e-05, "loss": 0.8239, "step": 3103 }, { "epoch": 0.4150842471248997, "grad_norm": 1.19146728515625, "learning_rate": 1.939087636487063e-05, "loss": 1.1811, "step": 3104 }, { "epoch": 0.4152179727199786, "grad_norm": 1.251397967338562, "learning_rate": 1.939038008337734e-05, "loss": 1.2203, "step": 3105 }, { "epoch": 0.4153516983150575, "grad_norm": 1.1168112754821777, "learning_rate": 1.938988360615057e-05, "loss": 0.9466, "step": 3106 }, { "epoch": 0.4154854239101364, "grad_norm": 1.0601052045822144, "learning_rate": 1.9389386933200653e-05, "loss": 1.0691, "step": 3107 }, { "epoch": 0.4156191495052153, "grad_norm": 1.2435392141342163, "learning_rate": 1.9388890064537954e-05, "loss": 0.9285, "step": 3108 }, { "epoch": 0.4157528751002942, "grad_norm": 1.3495535850524902, "learning_rate": 1.9388393000172825e-05, "loss": 0.9492, "step": 3109 }, { "epoch": 0.4158866006953731, "grad_norm": 1.1146191358566284, "learning_rate": 1.9387895740115628e-05, "loss": 0.9502, "step": 3110 }, { "epoch": 0.416020326290452, "grad_norm": 1.157058835029602, "learning_rate": 1.9387398284376727e-05, "loss": 1.0556, "step": 3111 }, { "epoch": 0.4161540518855309, "grad_norm": 1.2075316905975342, "learning_rate": 1.9386900632966494e-05, "loss": 1.009, "step": 3112 }, { "epoch": 0.4162877774806098, "grad_norm": 1.1105562448501587, "learning_rate": 1.93864027858953e-05, "loss": 1.0024, "step": 3113 }, { "epoch": 0.4164215030756887, "grad_norm": 1.0195220708847046, "learning_rate": 1.938590474317352e-05, "loss": 0.9076, "step": 3114 }, { "epoch": 0.4165552286707676, "grad_norm": 1.2019658088684082, "learning_rate": 1.9385406504811534e-05, "loss": 0.9827, "step": 3115 }, { "epoch": 0.41668895426584646, "grad_norm": 1.2836029529571533, "learning_rate": 1.9384908070819733e-05, "loss": 1.0408, "step": 3116 }, { "epoch": 0.41682267986092536, "grad_norm": 1.196293592453003, "learning_rate": 1.9384409441208503e-05, "loss": 1.0515, "step": 3117 }, { "epoch": 0.41695640545600426, "grad_norm": 1.4096437692642212, "learning_rate": 1.9383910615988238e-05, "loss": 1.1003, "step": 3118 }, { "epoch": 0.41709013105108317, "grad_norm": 1.0785411596298218, "learning_rate": 1.9383411595169335e-05, "loss": 0.9615, "step": 3119 }, { "epoch": 0.4172238566461621, "grad_norm": 1.1911101341247559, "learning_rate": 1.9382912378762197e-05, "loss": 1.0025, "step": 3120 }, { "epoch": 0.417357582241241, "grad_norm": 1.0992122888565063, "learning_rate": 1.938241296677723e-05, "loss": 0.9557, "step": 3121 }, { "epoch": 0.4174913078363199, "grad_norm": 1.1893155574798584, "learning_rate": 1.9381913359224844e-05, "loss": 1.1261, "step": 3122 }, { "epoch": 0.4176250334313988, "grad_norm": 1.04340398311615, "learning_rate": 1.9381413556115446e-05, "loss": 0.919, "step": 3123 }, { "epoch": 0.4177587590264777, "grad_norm": 1.1195671558380127, "learning_rate": 1.9380913557459466e-05, "loss": 1.0603, "step": 3124 }, { "epoch": 0.4178924846215566, "grad_norm": 1.0873807668685913, "learning_rate": 1.9380413363267315e-05, "loss": 0.9804, "step": 3125 }, { "epoch": 0.41802621021663544, "grad_norm": 1.2355629205703735, "learning_rate": 1.9379912973549427e-05, "loss": 0.9814, "step": 3126 }, { "epoch": 0.41815993581171435, "grad_norm": 1.0799921751022339, "learning_rate": 1.9379412388316226e-05, "loss": 0.9747, "step": 3127 }, { "epoch": 0.41829366140679325, "grad_norm": 1.3345062732696533, "learning_rate": 1.9378911607578148e-05, "loss": 0.963, "step": 3128 }, { "epoch": 0.41842738700187215, "grad_norm": 1.0292000770568848, "learning_rate": 1.9378410631345634e-05, "loss": 0.9439, "step": 3129 }, { "epoch": 0.41856111259695106, "grad_norm": 1.104035496711731, "learning_rate": 1.9377909459629125e-05, "loss": 0.9696, "step": 3130 }, { "epoch": 0.41869483819202996, "grad_norm": 1.1421610116958618, "learning_rate": 1.9377408092439064e-05, "loss": 1.0003, "step": 3131 }, { "epoch": 0.41882856378710887, "grad_norm": 1.242640733718872, "learning_rate": 1.937690652978591e-05, "loss": 0.9891, "step": 3132 }, { "epoch": 0.41896228938218777, "grad_norm": 1.0270787477493286, "learning_rate": 1.9376404771680107e-05, "loss": 1.0013, "step": 3133 }, { "epoch": 0.4190960149772667, "grad_norm": 1.0702391862869263, "learning_rate": 1.9375902818132123e-05, "loss": 1.0552, "step": 3134 }, { "epoch": 0.4192297405723455, "grad_norm": 1.0596153736114502, "learning_rate": 1.9375400669152414e-05, "loss": 0.933, "step": 3135 }, { "epoch": 0.4193634661674244, "grad_norm": 1.139905333518982, "learning_rate": 1.9374898324751447e-05, "loss": 1.0294, "step": 3136 }, { "epoch": 0.41949719176250333, "grad_norm": 1.2888685464859009, "learning_rate": 1.9374395784939698e-05, "loss": 1.1142, "step": 3137 }, { "epoch": 0.41963091735758223, "grad_norm": 1.2379025220870972, "learning_rate": 1.9373893049727643e-05, "loss": 1.0417, "step": 3138 }, { "epoch": 0.41976464295266114, "grad_norm": 1.2016184329986572, "learning_rate": 1.937339011912575e-05, "loss": 1.0129, "step": 3139 }, { "epoch": 0.41989836854774004, "grad_norm": 1.1542670726776123, "learning_rate": 1.937288699314451e-05, "loss": 1.0602, "step": 3140 }, { "epoch": 0.42003209414281895, "grad_norm": 1.1953070163726807, "learning_rate": 1.9372383671794415e-05, "loss": 1.0738, "step": 3141 }, { "epoch": 0.42016581973789785, "grad_norm": 1.0075047016143799, "learning_rate": 1.9371880155085948e-05, "loss": 0.9126, "step": 3142 }, { "epoch": 0.42029954533297675, "grad_norm": 1.151087999343872, "learning_rate": 1.937137644302961e-05, "loss": 1.0463, "step": 3143 }, { "epoch": 0.4204332709280556, "grad_norm": 1.0070656538009644, "learning_rate": 1.937087253563589e-05, "loss": 0.8486, "step": 3144 }, { "epoch": 0.4205669965231345, "grad_norm": 1.0248143672943115, "learning_rate": 1.9370368432915306e-05, "loss": 0.9569, "step": 3145 }, { "epoch": 0.4207007221182134, "grad_norm": 1.0660024881362915, "learning_rate": 1.9369864134878352e-05, "loss": 0.9551, "step": 3146 }, { "epoch": 0.4208344477132923, "grad_norm": 1.2577528953552246, "learning_rate": 1.9369359641535554e-05, "loss": 1.1584, "step": 3147 }, { "epoch": 0.4209681733083712, "grad_norm": 1.084814190864563, "learning_rate": 1.9368854952897416e-05, "loss": 0.9724, "step": 3148 }, { "epoch": 0.4211018989034501, "grad_norm": 1.2028355598449707, "learning_rate": 1.936835006897446e-05, "loss": 1.0177, "step": 3149 }, { "epoch": 0.421235624498529, "grad_norm": 1.0317140817642212, "learning_rate": 1.936784498977721e-05, "loss": 1.0347, "step": 3150 }, { "epoch": 0.42136935009360793, "grad_norm": 1.0233596563339233, "learning_rate": 1.93673397153162e-05, "loss": 0.8617, "step": 3151 }, { "epoch": 0.42150307568868683, "grad_norm": 1.1906932592391968, "learning_rate": 1.9366834245601955e-05, "loss": 1.0344, "step": 3152 }, { "epoch": 0.42163680128376574, "grad_norm": 1.1631810665130615, "learning_rate": 1.9366328580645013e-05, "loss": 1.1191, "step": 3153 }, { "epoch": 0.4217705268788446, "grad_norm": 1.1617748737335205, "learning_rate": 1.9365822720455915e-05, "loss": 1.0361, "step": 3154 }, { "epoch": 0.4219042524739235, "grad_norm": 1.077890396118164, "learning_rate": 1.9365316665045204e-05, "loss": 0.9957, "step": 3155 }, { "epoch": 0.4220379780690024, "grad_norm": 1.1680024862289429, "learning_rate": 1.9364810414423428e-05, "loss": 1.0546, "step": 3156 }, { "epoch": 0.4221717036640813, "grad_norm": 1.2040678262710571, "learning_rate": 1.936430396860114e-05, "loss": 0.9844, "step": 3157 }, { "epoch": 0.4223054292591602, "grad_norm": 1.217994213104248, "learning_rate": 1.93637973275889e-05, "loss": 0.8955, "step": 3158 }, { "epoch": 0.4224391548542391, "grad_norm": 1.1274604797363281, "learning_rate": 1.936329049139726e-05, "loss": 1.0135, "step": 3159 }, { "epoch": 0.422572880449318, "grad_norm": 1.092786431312561, "learning_rate": 1.9362783460036794e-05, "loss": 0.9295, "step": 3160 }, { "epoch": 0.4227066060443969, "grad_norm": 1.092376947402954, "learning_rate": 1.9362276233518063e-05, "loss": 0.9725, "step": 3161 }, { "epoch": 0.4228403316394758, "grad_norm": 1.1029256582260132, "learning_rate": 1.936176881185164e-05, "loss": 0.9192, "step": 3162 }, { "epoch": 0.42297405723455467, "grad_norm": 1.1618587970733643, "learning_rate": 1.936126119504811e-05, "loss": 1.0509, "step": 3163 }, { "epoch": 0.42310778282963357, "grad_norm": 1.178165078163147, "learning_rate": 1.9360753383118048e-05, "loss": 0.9596, "step": 3164 }, { "epoch": 0.4232415084247125, "grad_norm": 1.1823660135269165, "learning_rate": 1.9360245376072035e-05, "loss": 1.0452, "step": 3165 }, { "epoch": 0.4233752340197914, "grad_norm": 1.1500324010849, "learning_rate": 1.9359737173920667e-05, "loss": 1.0456, "step": 3166 }, { "epoch": 0.4235089596148703, "grad_norm": 1.0814740657806396, "learning_rate": 1.935922877667453e-05, "loss": 0.9361, "step": 3167 }, { "epoch": 0.4236426852099492, "grad_norm": 1.0281052589416504, "learning_rate": 1.935872018434423e-05, "loss": 0.9499, "step": 3168 }, { "epoch": 0.4237764108050281, "grad_norm": 1.0019768476486206, "learning_rate": 1.9358211396940358e-05, "loss": 0.8612, "step": 3169 }, { "epoch": 0.423910136400107, "grad_norm": 1.1525962352752686, "learning_rate": 1.9357702414473528e-05, "loss": 1.0215, "step": 3170 }, { "epoch": 0.4240438619951859, "grad_norm": 1.1244524717330933, "learning_rate": 1.9357193236954342e-05, "loss": 1.0456, "step": 3171 }, { "epoch": 0.4241775875902648, "grad_norm": 1.05103600025177, "learning_rate": 1.9356683864393424e-05, "loss": 0.8903, "step": 3172 }, { "epoch": 0.42431131318534365, "grad_norm": 1.1584497690200806, "learning_rate": 1.9356174296801376e-05, "loss": 1.0763, "step": 3173 }, { "epoch": 0.42444503878042256, "grad_norm": 1.1762516498565674, "learning_rate": 1.9355664534188833e-05, "loss": 0.9392, "step": 3174 }, { "epoch": 0.42457876437550146, "grad_norm": 1.168289303779602, "learning_rate": 1.9355154576566414e-05, "loss": 0.9417, "step": 3175 }, { "epoch": 0.42471248997058036, "grad_norm": 1.1297686100006104, "learning_rate": 1.9354644423944747e-05, "loss": 0.9293, "step": 3176 }, { "epoch": 0.42484621556565927, "grad_norm": 1.2507750988006592, "learning_rate": 1.935413407633447e-05, "loss": 1.0181, "step": 3177 }, { "epoch": 0.4249799411607382, "grad_norm": 1.1753677129745483, "learning_rate": 1.935362353374622e-05, "loss": 1.042, "step": 3178 }, { "epoch": 0.4251136667558171, "grad_norm": 1.0900845527648926, "learning_rate": 1.9353112796190637e-05, "loss": 0.9168, "step": 3179 }, { "epoch": 0.425247392350896, "grad_norm": 1.1463525295257568, "learning_rate": 1.935260186367837e-05, "loss": 1.0905, "step": 3180 }, { "epoch": 0.4253811179459749, "grad_norm": 1.1809686422348022, "learning_rate": 1.9352090736220065e-05, "loss": 1.1016, "step": 3181 }, { "epoch": 0.42551484354105373, "grad_norm": 1.1723607778549194, "learning_rate": 1.9351579413826375e-05, "loss": 1.0922, "step": 3182 }, { "epoch": 0.42564856913613264, "grad_norm": 1.196715235710144, "learning_rate": 1.9351067896507964e-05, "loss": 1.0483, "step": 3183 }, { "epoch": 0.42578229473121154, "grad_norm": 0.9938531517982483, "learning_rate": 1.935055618427549e-05, "loss": 0.839, "step": 3184 }, { "epoch": 0.42591602032629045, "grad_norm": 1.0874520540237427, "learning_rate": 1.935004427713962e-05, "loss": 0.91, "step": 3185 }, { "epoch": 0.42604974592136935, "grad_norm": 1.1492681503295898, "learning_rate": 1.9349532175111023e-05, "loss": 1.0289, "step": 3186 }, { "epoch": 0.42618347151644825, "grad_norm": 1.1197785139083862, "learning_rate": 1.9349019878200374e-05, "loss": 1.0175, "step": 3187 }, { "epoch": 0.42631719711152716, "grad_norm": 1.2450969219207764, "learning_rate": 1.9348507386418354e-05, "loss": 0.9558, "step": 3188 }, { "epoch": 0.42645092270660606, "grad_norm": 1.167239785194397, "learning_rate": 1.934799469977564e-05, "loss": 0.9513, "step": 3189 }, { "epoch": 0.42658464830168497, "grad_norm": 0.9940695762634277, "learning_rate": 1.9347481818282927e-05, "loss": 0.9621, "step": 3190 }, { "epoch": 0.4267183738967638, "grad_norm": 1.1731464862823486, "learning_rate": 1.9346968741950896e-05, "loss": 1.0492, "step": 3191 }, { "epoch": 0.4268520994918427, "grad_norm": 1.1179336309432983, "learning_rate": 1.9346455470790245e-05, "loss": 0.9194, "step": 3192 }, { "epoch": 0.4269858250869216, "grad_norm": 1.0085368156433105, "learning_rate": 1.9345942004811674e-05, "loss": 0.8502, "step": 3193 }, { "epoch": 0.4271195506820005, "grad_norm": 1.1526203155517578, "learning_rate": 1.9345428344025883e-05, "loss": 0.9507, "step": 3194 }, { "epoch": 0.42725327627707943, "grad_norm": 1.0069924592971802, "learning_rate": 1.9344914488443585e-05, "loss": 0.981, "step": 3195 }, { "epoch": 0.42738700187215833, "grad_norm": 1.0836031436920166, "learning_rate": 1.9344400438075487e-05, "loss": 1.0073, "step": 3196 }, { "epoch": 0.42752072746723724, "grad_norm": 1.18271803855896, "learning_rate": 1.93438861929323e-05, "loss": 1.0621, "step": 3197 }, { "epoch": 0.42765445306231614, "grad_norm": 1.1367918252944946, "learning_rate": 1.9343371753024747e-05, "loss": 0.9822, "step": 3198 }, { "epoch": 0.42778817865739505, "grad_norm": 1.1420409679412842, "learning_rate": 1.934285711836355e-05, "loss": 1.0373, "step": 3199 }, { "epoch": 0.42792190425247395, "grad_norm": 1.1827529668807983, "learning_rate": 1.934234228895944e-05, "loss": 0.9689, "step": 3200 }, { "epoch": 0.4280556298475528, "grad_norm": 1.2171211242675781, "learning_rate": 1.9341827264823142e-05, "loss": 1.023, "step": 3201 }, { "epoch": 0.4281893554426317, "grad_norm": 1.1928479671478271, "learning_rate": 1.934131204596539e-05, "loss": 1.1413, "step": 3202 }, { "epoch": 0.4283230810377106, "grad_norm": 1.2026665210723877, "learning_rate": 1.9340796632396935e-05, "loss": 1.0534, "step": 3203 }, { "epoch": 0.4284568066327895, "grad_norm": 1.0762709379196167, "learning_rate": 1.934028102412851e-05, "loss": 0.8741, "step": 3204 }, { "epoch": 0.4285905322278684, "grad_norm": 1.2451571226119995, "learning_rate": 1.933976522117086e-05, "loss": 1.042, "step": 3205 }, { "epoch": 0.4287242578229473, "grad_norm": 1.1522217988967896, "learning_rate": 1.9339249223534743e-05, "loss": 1.0747, "step": 3206 }, { "epoch": 0.4288579834180262, "grad_norm": 1.00810706615448, "learning_rate": 1.9338733031230917e-05, "loss": 0.9487, "step": 3207 }, { "epoch": 0.4289917090131051, "grad_norm": 1.1863644123077393, "learning_rate": 1.9338216644270134e-05, "loss": 0.9177, "step": 3208 }, { "epoch": 0.42912543460818403, "grad_norm": 1.1721563339233398, "learning_rate": 1.933770006266316e-05, "loss": 1.0805, "step": 3209 }, { "epoch": 0.4292591602032629, "grad_norm": 1.0565379858016968, "learning_rate": 1.9337183286420764e-05, "loss": 0.9603, "step": 3210 }, { "epoch": 0.4293928857983418, "grad_norm": 1.0838137865066528, "learning_rate": 1.933666631555372e-05, "loss": 0.958, "step": 3211 }, { "epoch": 0.4295266113934207, "grad_norm": 1.149985432624817, "learning_rate": 1.9336149150072795e-05, "loss": 0.8762, "step": 3212 }, { "epoch": 0.4296603369884996, "grad_norm": 1.2659294605255127, "learning_rate": 1.933563178998878e-05, "loss": 1.0185, "step": 3213 }, { "epoch": 0.4297940625835785, "grad_norm": 1.0330573320388794, "learning_rate": 1.933511423531245e-05, "loss": 1.0157, "step": 3214 }, { "epoch": 0.4299277881786574, "grad_norm": 1.2409868240356445, "learning_rate": 1.93345964860546e-05, "loss": 0.9575, "step": 3215 }, { "epoch": 0.4300615137737363, "grad_norm": 1.226502776145935, "learning_rate": 1.9334078542226015e-05, "loss": 1.0152, "step": 3216 }, { "epoch": 0.4301952393688152, "grad_norm": 1.1387345790863037, "learning_rate": 1.9333560403837497e-05, "loss": 1.0068, "step": 3217 }, { "epoch": 0.4303289649638941, "grad_norm": 1.123658299446106, "learning_rate": 1.933304207089984e-05, "loss": 1.0246, "step": 3218 }, { "epoch": 0.43046269055897296, "grad_norm": 1.132745623588562, "learning_rate": 1.9332523543423858e-05, "loss": 0.9516, "step": 3219 }, { "epoch": 0.43059641615405186, "grad_norm": 1.17822265625, "learning_rate": 1.9332004821420346e-05, "loss": 0.9207, "step": 3220 }, { "epoch": 0.43073014174913077, "grad_norm": 1.1625081300735474, "learning_rate": 1.933148590490013e-05, "loss": 0.9799, "step": 3221 }, { "epoch": 0.4308638673442097, "grad_norm": 1.1458196640014648, "learning_rate": 1.9330966793874015e-05, "loss": 0.9598, "step": 3222 }, { "epoch": 0.4309975929392886, "grad_norm": 1.0614173412322998, "learning_rate": 1.933044748835283e-05, "loss": 0.9867, "step": 3223 }, { "epoch": 0.4311313185343675, "grad_norm": 1.1612673997879028, "learning_rate": 1.932992798834739e-05, "loss": 0.9199, "step": 3224 }, { "epoch": 0.4312650441294464, "grad_norm": 1.1997700929641724, "learning_rate": 1.9329408293868533e-05, "loss": 0.9475, "step": 3225 }, { "epoch": 0.4313987697245253, "grad_norm": 1.2088536024093628, "learning_rate": 1.9328888404927086e-05, "loss": 1.0813, "step": 3226 }, { "epoch": 0.4315324953196042, "grad_norm": 1.263952612876892, "learning_rate": 1.9328368321533885e-05, "loss": 1.0938, "step": 3227 }, { "epoch": 0.4316662209146831, "grad_norm": 1.1620732545852661, "learning_rate": 1.9327848043699774e-05, "loss": 0.9589, "step": 3228 }, { "epoch": 0.43179994650976194, "grad_norm": 1.4293076992034912, "learning_rate": 1.9327327571435597e-05, "loss": 1.0728, "step": 3229 }, { "epoch": 0.43193367210484085, "grad_norm": 1.212754487991333, "learning_rate": 1.93268069047522e-05, "loss": 0.9247, "step": 3230 }, { "epoch": 0.43206739769991975, "grad_norm": 1.1887993812561035, "learning_rate": 1.9326286043660442e-05, "loss": 0.9545, "step": 3231 }, { "epoch": 0.43220112329499866, "grad_norm": 1.0981377363204956, "learning_rate": 1.9325764988171173e-05, "loss": 0.9823, "step": 3232 }, { "epoch": 0.43233484889007756, "grad_norm": 1.1061948537826538, "learning_rate": 1.932524373829526e-05, "loss": 0.899, "step": 3233 }, { "epoch": 0.43246857448515647, "grad_norm": 1.14080011844635, "learning_rate": 1.932472229404356e-05, "loss": 1.0584, "step": 3234 }, { "epoch": 0.43260230008023537, "grad_norm": 1.0690444707870483, "learning_rate": 1.932420065542695e-05, "loss": 1.0231, "step": 3235 }, { "epoch": 0.4327360256753143, "grad_norm": 1.1371108293533325, "learning_rate": 1.9323678822456296e-05, "loss": 1.0213, "step": 3236 }, { "epoch": 0.4328697512703932, "grad_norm": 1.081855297088623, "learning_rate": 1.932315679514248e-05, "loss": 0.9224, "step": 3237 }, { "epoch": 0.433003476865472, "grad_norm": 1.0324413776397705, "learning_rate": 1.9322634573496383e-05, "loss": 0.8645, "step": 3238 }, { "epoch": 0.43313720246055093, "grad_norm": 1.0652774572372437, "learning_rate": 1.9322112157528886e-05, "loss": 1.0141, "step": 3239 }, { "epoch": 0.43327092805562983, "grad_norm": 1.16048002243042, "learning_rate": 1.932158954725089e-05, "loss": 1.0183, "step": 3240 }, { "epoch": 0.43340465365070874, "grad_norm": 1.1730974912643433, "learning_rate": 1.932106674267327e-05, "loss": 1.0345, "step": 3241 }, { "epoch": 0.43353837924578764, "grad_norm": 1.2496461868286133, "learning_rate": 1.9320543743806936e-05, "loss": 0.9948, "step": 3242 }, { "epoch": 0.43367210484086655, "grad_norm": 1.0477992296218872, "learning_rate": 1.932002055066279e-05, "loss": 0.883, "step": 3243 }, { "epoch": 0.43380583043594545, "grad_norm": 1.1289085149765015, "learning_rate": 1.9319497163251728e-05, "loss": 1.0263, "step": 3244 }, { "epoch": 0.43393955603102435, "grad_norm": 1.1679236888885498, "learning_rate": 1.931897358158467e-05, "loss": 0.9508, "step": 3245 }, { "epoch": 0.43407328162610326, "grad_norm": 1.1121903657913208, "learning_rate": 1.9318449805672524e-05, "loss": 0.9105, "step": 3246 }, { "epoch": 0.43420700722118216, "grad_norm": 1.1972988843917847, "learning_rate": 1.9317925835526206e-05, "loss": 1.154, "step": 3247 }, { "epoch": 0.434340732816261, "grad_norm": 1.1384882926940918, "learning_rate": 1.931740167115664e-05, "loss": 1.0495, "step": 3248 }, { "epoch": 0.4344744584113399, "grad_norm": 1.253135085105896, "learning_rate": 1.9316877312574756e-05, "loss": 1.1052, "step": 3249 }, { "epoch": 0.4346081840064188, "grad_norm": 1.188148021697998, "learning_rate": 1.931635275979148e-05, "loss": 0.9202, "step": 3250 }, { "epoch": 0.4347419096014977, "grad_norm": 1.1647379398345947, "learning_rate": 1.9315828012817742e-05, "loss": 1.0526, "step": 3251 }, { "epoch": 0.4348756351965766, "grad_norm": 1.0021169185638428, "learning_rate": 1.9315303071664486e-05, "loss": 1.0161, "step": 3252 }, { "epoch": 0.43500936079165553, "grad_norm": 1.1428781747817993, "learning_rate": 1.9314777936342648e-05, "loss": 0.9394, "step": 3253 }, { "epoch": 0.43514308638673443, "grad_norm": 0.9778270721435547, "learning_rate": 1.931425260686318e-05, "loss": 1.0201, "step": 3254 }, { "epoch": 0.43527681198181334, "grad_norm": 1.0943289995193481, "learning_rate": 1.9313727083237028e-05, "loss": 1.0505, "step": 3255 }, { "epoch": 0.43541053757689224, "grad_norm": 1.1592936515808105, "learning_rate": 1.9313201365475146e-05, "loss": 0.9662, "step": 3256 }, { "epoch": 0.4355442631719711, "grad_norm": 1.143384575843811, "learning_rate": 1.93126754535885e-05, "loss": 0.9783, "step": 3257 }, { "epoch": 0.43567798876705, "grad_norm": 1.1319254636764526, "learning_rate": 1.9312149347588035e-05, "loss": 0.9328, "step": 3258 }, { "epoch": 0.4358117143621289, "grad_norm": 0.9889384508132935, "learning_rate": 1.9311623047484734e-05, "loss": 0.9043, "step": 3259 }, { "epoch": 0.4359454399572078, "grad_norm": 1.17056143283844, "learning_rate": 1.9311096553289563e-05, "loss": 1.0451, "step": 3260 }, { "epoch": 0.4360791655522867, "grad_norm": 0.9741213917732239, "learning_rate": 1.9310569865013488e-05, "loss": 0.8717, "step": 3261 }, { "epoch": 0.4362128911473656, "grad_norm": 1.2895677089691162, "learning_rate": 1.9310042982667498e-05, "loss": 0.9078, "step": 3262 }, { "epoch": 0.4363466167424445, "grad_norm": 1.124538540840149, "learning_rate": 1.930951590626257e-05, "loss": 0.9789, "step": 3263 }, { "epoch": 0.4364803423375234, "grad_norm": 1.1540082693099976, "learning_rate": 1.9308988635809688e-05, "loss": 0.917, "step": 3264 }, { "epoch": 0.4366140679326023, "grad_norm": 1.3382575511932373, "learning_rate": 1.930846117131985e-05, "loss": 1.1223, "step": 3265 }, { "epoch": 0.43674779352768117, "grad_norm": 1.3084815740585327, "learning_rate": 1.930793351280404e-05, "loss": 1.0398, "step": 3266 }, { "epoch": 0.4368815191227601, "grad_norm": 1.111212968826294, "learning_rate": 1.930740566027327e-05, "loss": 0.9553, "step": 3267 }, { "epoch": 0.437015244717839, "grad_norm": 1.0764597654342651, "learning_rate": 1.9306877613738532e-05, "loss": 0.9113, "step": 3268 }, { "epoch": 0.4371489703129179, "grad_norm": 1.0475043058395386, "learning_rate": 1.9306349373210834e-05, "loss": 1.0067, "step": 3269 }, { "epoch": 0.4372826959079968, "grad_norm": 1.1076101064682007, "learning_rate": 1.9305820938701193e-05, "loss": 1.0595, "step": 3270 }, { "epoch": 0.4374164215030757, "grad_norm": 1.059186577796936, "learning_rate": 1.9305292310220614e-05, "loss": 0.8583, "step": 3271 }, { "epoch": 0.4375501470981546, "grad_norm": 1.1533136367797852, "learning_rate": 1.9304763487780125e-05, "loss": 1.0495, "step": 3272 }, { "epoch": 0.4376838726932335, "grad_norm": 1.1041620969772339, "learning_rate": 1.9304234471390742e-05, "loss": 0.9711, "step": 3273 }, { "epoch": 0.4378175982883124, "grad_norm": 1.0773154497146606, "learning_rate": 1.9303705261063496e-05, "loss": 1.0145, "step": 3274 }, { "epoch": 0.4379513238833913, "grad_norm": 1.1122961044311523, "learning_rate": 1.930317585680942e-05, "loss": 1.0048, "step": 3275 }, { "epoch": 0.43808504947847016, "grad_norm": 1.1464418172836304, "learning_rate": 1.9302646258639538e-05, "loss": 0.9541, "step": 3276 }, { "epoch": 0.43821877507354906, "grad_norm": 1.2643078565597534, "learning_rate": 1.93021164665649e-05, "loss": 1.0563, "step": 3277 }, { "epoch": 0.43835250066862796, "grad_norm": 1.1109564304351807, "learning_rate": 1.9301586480596547e-05, "loss": 0.9657, "step": 3278 }, { "epoch": 0.43848622626370687, "grad_norm": 1.0269380807876587, "learning_rate": 1.9301056300745523e-05, "loss": 0.8434, "step": 3279 }, { "epoch": 0.4386199518587858, "grad_norm": 1.0329316854476929, "learning_rate": 1.930052592702288e-05, "loss": 0.9783, "step": 3280 }, { "epoch": 0.4387536774538647, "grad_norm": 1.1038655042648315, "learning_rate": 1.9299995359439672e-05, "loss": 0.9532, "step": 3281 }, { "epoch": 0.4388874030489436, "grad_norm": 1.0996888875961304, "learning_rate": 1.9299464598006964e-05, "loss": 0.9495, "step": 3282 }, { "epoch": 0.4390211286440225, "grad_norm": 1.1291358470916748, "learning_rate": 1.9298933642735817e-05, "loss": 1.044, "step": 3283 }, { "epoch": 0.4391548542391014, "grad_norm": 1.1658300161361694, "learning_rate": 1.929840249363729e-05, "loss": 1.0118, "step": 3284 }, { "epoch": 0.43928857983418024, "grad_norm": 1.310865879058838, "learning_rate": 1.9297871150722463e-05, "loss": 1.1532, "step": 3285 }, { "epoch": 0.43942230542925914, "grad_norm": 1.1090534925460815, "learning_rate": 1.9297339614002412e-05, "loss": 0.9064, "step": 3286 }, { "epoch": 0.43955603102433805, "grad_norm": 1.0887482166290283, "learning_rate": 1.929680788348821e-05, "loss": 0.9505, "step": 3287 }, { "epoch": 0.43968975661941695, "grad_norm": 1.1095621585845947, "learning_rate": 1.9296275959190943e-05, "loss": 1.0883, "step": 3288 }, { "epoch": 0.43982348221449585, "grad_norm": 1.1983813047409058, "learning_rate": 1.92957438411217e-05, "loss": 1.0304, "step": 3289 }, { "epoch": 0.43995720780957476, "grad_norm": 0.9838898777961731, "learning_rate": 1.9295211529291574e-05, "loss": 0.9279, "step": 3290 }, { "epoch": 0.44009093340465366, "grad_norm": 1.1803792715072632, "learning_rate": 1.9294679023711653e-05, "loss": 1.029, "step": 3291 }, { "epoch": 0.44022465899973257, "grad_norm": 1.231771469116211, "learning_rate": 1.9294146324393047e-05, "loss": 1.0151, "step": 3292 }, { "epoch": 0.44035838459481147, "grad_norm": 1.0567387342453003, "learning_rate": 1.9293613431346853e-05, "loss": 0.9662, "step": 3293 }, { "epoch": 0.4404921101898903, "grad_norm": 0.9989197254180908, "learning_rate": 1.929308034458418e-05, "loss": 0.8648, "step": 3294 }, { "epoch": 0.4406258357849692, "grad_norm": 1.073473334312439, "learning_rate": 1.929254706411614e-05, "loss": 1.0167, "step": 3295 }, { "epoch": 0.4407595613800481, "grad_norm": 1.144068956375122, "learning_rate": 1.9292013589953847e-05, "loss": 0.991, "step": 3296 }, { "epoch": 0.44089328697512703, "grad_norm": 1.1017115116119385, "learning_rate": 1.929147992210842e-05, "loss": 0.9076, "step": 3297 }, { "epoch": 0.44102701257020593, "grad_norm": 1.3337068557739258, "learning_rate": 1.9290946060590992e-05, "loss": 1.0691, "step": 3298 }, { "epoch": 0.44116073816528484, "grad_norm": 1.0668264627456665, "learning_rate": 1.9290412005412676e-05, "loss": 0.9559, "step": 3299 }, { "epoch": 0.44129446376036374, "grad_norm": 0.9895573258399963, "learning_rate": 1.9289877756584618e-05, "loss": 0.8681, "step": 3300 }, { "epoch": 0.44142818935544265, "grad_norm": 1.1195969581604004, "learning_rate": 1.9289343314117946e-05, "loss": 1.0162, "step": 3301 }, { "epoch": 0.44156191495052155, "grad_norm": 1.0682613849639893, "learning_rate": 1.92888086780238e-05, "loss": 1.0334, "step": 3302 }, { "epoch": 0.44169564054560045, "grad_norm": 1.1326122283935547, "learning_rate": 1.9288273848313325e-05, "loss": 1.0388, "step": 3303 }, { "epoch": 0.4418293661406793, "grad_norm": 1.0917998552322388, "learning_rate": 1.9287738824997672e-05, "loss": 1.0486, "step": 3304 }, { "epoch": 0.4419630917357582, "grad_norm": 1.100752353668213, "learning_rate": 1.9287203608087987e-05, "loss": 1.0108, "step": 3305 }, { "epoch": 0.4420968173308371, "grad_norm": 1.1760727167129517, "learning_rate": 1.928666819759543e-05, "loss": 0.9483, "step": 3306 }, { "epoch": 0.442230542925916, "grad_norm": 1.0925190448760986, "learning_rate": 1.9286132593531167e-05, "loss": 0.9873, "step": 3307 }, { "epoch": 0.4423642685209949, "grad_norm": 1.2291847467422485, "learning_rate": 1.9285596795906353e-05, "loss": 0.9629, "step": 3308 }, { "epoch": 0.4424979941160738, "grad_norm": 1.081689476966858, "learning_rate": 1.928506080473216e-05, "loss": 0.9286, "step": 3309 }, { "epoch": 0.4426317197111527, "grad_norm": 1.132133960723877, "learning_rate": 1.9284524620019756e-05, "loss": 1.0137, "step": 3310 }, { "epoch": 0.44276544530623163, "grad_norm": 1.086695909500122, "learning_rate": 1.928398824178032e-05, "loss": 0.9395, "step": 3311 }, { "epoch": 0.44289917090131053, "grad_norm": 1.1986316442489624, "learning_rate": 1.9283451670025035e-05, "loss": 1.0323, "step": 3312 }, { "epoch": 0.4430328964963894, "grad_norm": 1.0736405849456787, "learning_rate": 1.9282914904765083e-05, "loss": 1.0116, "step": 3313 }, { "epoch": 0.4431666220914683, "grad_norm": 1.133349061012268, "learning_rate": 1.928237794601165e-05, "loss": 0.9197, "step": 3314 }, { "epoch": 0.4433003476865472, "grad_norm": 1.0145351886749268, "learning_rate": 1.928184079377594e-05, "loss": 0.8588, "step": 3315 }, { "epoch": 0.4434340732816261, "grad_norm": 1.0097167491912842, "learning_rate": 1.9281303448069132e-05, "loss": 0.9751, "step": 3316 }, { "epoch": 0.443567798876705, "grad_norm": 1.193129539489746, "learning_rate": 1.9280765908902437e-05, "loss": 0.9229, "step": 3317 }, { "epoch": 0.4437015244717839, "grad_norm": 1.1657564640045166, "learning_rate": 1.9280228176287057e-05, "loss": 0.9527, "step": 3318 }, { "epoch": 0.4438352500668628, "grad_norm": 1.0933988094329834, "learning_rate": 1.92796902502342e-05, "loss": 1.0438, "step": 3319 }, { "epoch": 0.4439689756619417, "grad_norm": 1.2894423007965088, "learning_rate": 1.9279152130755082e-05, "loss": 0.9411, "step": 3320 }, { "epoch": 0.4441027012570206, "grad_norm": 1.0571297407150269, "learning_rate": 1.9278613817860917e-05, "loss": 0.9331, "step": 3321 }, { "epoch": 0.44423642685209946, "grad_norm": 1.0643575191497803, "learning_rate": 1.9278075311562922e-05, "loss": 0.88, "step": 3322 }, { "epoch": 0.44437015244717837, "grad_norm": 1.0989140272140503, "learning_rate": 1.9277536611872327e-05, "loss": 0.9576, "step": 3323 }, { "epoch": 0.44450387804225727, "grad_norm": 1.154719591140747, "learning_rate": 1.9276997718800362e-05, "loss": 1.0726, "step": 3324 }, { "epoch": 0.4446376036373362, "grad_norm": 1.1565909385681152, "learning_rate": 1.9276458632358253e-05, "loss": 1.0416, "step": 3325 }, { "epoch": 0.4447713292324151, "grad_norm": 1.086600422859192, "learning_rate": 1.9275919352557242e-05, "loss": 0.9912, "step": 3326 }, { "epoch": 0.444905054827494, "grad_norm": 1.1150155067443848, "learning_rate": 1.927537987940857e-05, "loss": 1.0216, "step": 3327 }, { "epoch": 0.4450387804225729, "grad_norm": 1.0587728023529053, "learning_rate": 1.9274840212923476e-05, "loss": 0.9119, "step": 3328 }, { "epoch": 0.4451725060176518, "grad_norm": 1.2374671697616577, "learning_rate": 1.9274300353113212e-05, "loss": 0.9733, "step": 3329 }, { "epoch": 0.4453062316127307, "grad_norm": 1.161790132522583, "learning_rate": 1.9273760299989036e-05, "loss": 1.0323, "step": 3330 }, { "epoch": 0.4454399572078096, "grad_norm": 1.1512707471847534, "learning_rate": 1.92732200535622e-05, "loss": 0.9728, "step": 3331 }, { "epoch": 0.44557368280288845, "grad_norm": 1.2696303129196167, "learning_rate": 1.9272679613843962e-05, "loss": 0.963, "step": 3332 }, { "epoch": 0.44570740839796735, "grad_norm": 1.1581220626831055, "learning_rate": 1.9272138980845595e-05, "loss": 1.074, "step": 3333 }, { "epoch": 0.44584113399304626, "grad_norm": 1.1378134489059448, "learning_rate": 1.927159815457836e-05, "loss": 1.125, "step": 3334 }, { "epoch": 0.44597485958812516, "grad_norm": 1.1823351383209229, "learning_rate": 1.9271057135053537e-05, "loss": 0.9902, "step": 3335 }, { "epoch": 0.44610858518320406, "grad_norm": 0.9492054581642151, "learning_rate": 1.9270515922282394e-05, "loss": 0.908, "step": 3336 }, { "epoch": 0.44624231077828297, "grad_norm": 1.2447816133499146, "learning_rate": 1.9269974516276223e-05, "loss": 1.0801, "step": 3337 }, { "epoch": 0.4463760363733619, "grad_norm": 1.0827890634536743, "learning_rate": 1.9269432917046302e-05, "loss": 0.9343, "step": 3338 }, { "epoch": 0.4465097619684408, "grad_norm": 1.0911729335784912, "learning_rate": 1.926889112460392e-05, "loss": 0.9262, "step": 3339 }, { "epoch": 0.4466434875635197, "grad_norm": 1.197203516960144, "learning_rate": 1.9268349138960374e-05, "loss": 1.0089, "step": 3340 }, { "epoch": 0.44677721315859853, "grad_norm": 1.2953457832336426, "learning_rate": 1.926780696012696e-05, "loss": 1.0306, "step": 3341 }, { "epoch": 0.44691093875367743, "grad_norm": 1.0786229372024536, "learning_rate": 1.9267264588114975e-05, "loss": 0.9684, "step": 3342 }, { "epoch": 0.44704466434875634, "grad_norm": 1.0888077020645142, "learning_rate": 1.9266722022935728e-05, "loss": 0.9538, "step": 3343 }, { "epoch": 0.44717838994383524, "grad_norm": 1.1159228086471558, "learning_rate": 1.9266179264600527e-05, "loss": 0.9176, "step": 3344 }, { "epoch": 0.44731211553891415, "grad_norm": 1.1443184614181519, "learning_rate": 1.9265636313120687e-05, "loss": 1.0072, "step": 3345 }, { "epoch": 0.44744584113399305, "grad_norm": 1.2469744682312012, "learning_rate": 1.9265093168507525e-05, "loss": 1.0627, "step": 3346 }, { "epoch": 0.44757956672907195, "grad_norm": 1.0613532066345215, "learning_rate": 1.9264549830772363e-05, "loss": 0.9925, "step": 3347 }, { "epoch": 0.44771329232415086, "grad_norm": 1.0912984609603882, "learning_rate": 1.9264006299926523e-05, "loss": 0.9961, "step": 3348 }, { "epoch": 0.44784701791922976, "grad_norm": 1.2709434032440186, "learning_rate": 1.926346257598134e-05, "loss": 1.1751, "step": 3349 }, { "epoch": 0.44798074351430867, "grad_norm": 1.1200724840164185, "learning_rate": 1.9262918658948137e-05, "loss": 1.0059, "step": 3350 }, { "epoch": 0.4481144691093875, "grad_norm": 1.1213024854660034, "learning_rate": 1.9262374548838264e-05, "loss": 0.9931, "step": 3351 }, { "epoch": 0.4482481947044664, "grad_norm": 1.0249545574188232, "learning_rate": 1.9261830245663053e-05, "loss": 0.9238, "step": 3352 }, { "epoch": 0.4483819202995453, "grad_norm": 1.0901380777359009, "learning_rate": 1.9261285749433854e-05, "loss": 1.012, "step": 3353 }, { "epoch": 0.4485156458946242, "grad_norm": 1.2205151319503784, "learning_rate": 1.9260741060162015e-05, "loss": 1.0555, "step": 3354 }, { "epoch": 0.44864937148970313, "grad_norm": 1.1517947912216187, "learning_rate": 1.9260196177858892e-05, "loss": 1.1466, "step": 3355 }, { "epoch": 0.44878309708478203, "grad_norm": 1.0503699779510498, "learning_rate": 1.925965110253584e-05, "loss": 0.9033, "step": 3356 }, { "epoch": 0.44891682267986094, "grad_norm": 1.060001015663147, "learning_rate": 1.925910583420422e-05, "loss": 0.9939, "step": 3357 }, { "epoch": 0.44905054827493984, "grad_norm": 1.1567180156707764, "learning_rate": 1.9258560372875402e-05, "loss": 1.0456, "step": 3358 }, { "epoch": 0.44918427387001875, "grad_norm": 0.9911651611328125, "learning_rate": 1.9258014718560752e-05, "loss": 0.9523, "step": 3359 }, { "epoch": 0.4493179994650976, "grad_norm": 1.210352897644043, "learning_rate": 1.925746887127164e-05, "loss": 1.0788, "step": 3360 }, { "epoch": 0.4494517250601765, "grad_norm": 1.0245184898376465, "learning_rate": 1.9256922831019453e-05, "loss": 0.9591, "step": 3361 }, { "epoch": 0.4495854506552554, "grad_norm": 1.110620379447937, "learning_rate": 1.9256376597815565e-05, "loss": 0.9033, "step": 3362 }, { "epoch": 0.4497191762503343, "grad_norm": 1.0918771028518677, "learning_rate": 1.9255830171671364e-05, "loss": 0.9059, "step": 3363 }, { "epoch": 0.4498529018454132, "grad_norm": 1.0640569925308228, "learning_rate": 1.9255283552598242e-05, "loss": 1.026, "step": 3364 }, { "epoch": 0.4499866274404921, "grad_norm": 1.1876285076141357, "learning_rate": 1.9254736740607586e-05, "loss": 0.9007, "step": 3365 }, { "epoch": 0.450120353035571, "grad_norm": 1.1135878562927246, "learning_rate": 1.9254189735710805e-05, "loss": 1.0842, "step": 3366 }, { "epoch": 0.4502540786306499, "grad_norm": 1.2246215343475342, "learning_rate": 1.9253642537919288e-05, "loss": 1.0785, "step": 3367 }, { "epoch": 0.4503878042257288, "grad_norm": 1.175704002380371, "learning_rate": 1.925309514724445e-05, "loss": 1.084, "step": 3368 }, { "epoch": 0.4505215298208077, "grad_norm": 1.1717102527618408, "learning_rate": 1.92525475636977e-05, "loss": 0.9904, "step": 3369 }, { "epoch": 0.4506552554158866, "grad_norm": 1.1339911222457886, "learning_rate": 1.9251999787290445e-05, "loss": 0.9728, "step": 3370 }, { "epoch": 0.4507889810109655, "grad_norm": 1.2592439651489258, "learning_rate": 1.925145181803411e-05, "loss": 1.0138, "step": 3371 }, { "epoch": 0.4509227066060444, "grad_norm": 1.1844533681869507, "learning_rate": 1.9250903655940116e-05, "loss": 0.9797, "step": 3372 }, { "epoch": 0.4510564322011233, "grad_norm": 1.150302767753601, "learning_rate": 1.9250355301019885e-05, "loss": 1.0744, "step": 3373 }, { "epoch": 0.4511901577962022, "grad_norm": 1.041264533996582, "learning_rate": 1.924980675328485e-05, "loss": 1.093, "step": 3374 }, { "epoch": 0.4513238833912811, "grad_norm": 1.2654507160186768, "learning_rate": 1.9249258012746447e-05, "loss": 1.1424, "step": 3375 }, { "epoch": 0.45145760898636, "grad_norm": 1.0421652793884277, "learning_rate": 1.9248709079416107e-05, "loss": 0.9184, "step": 3376 }, { "epoch": 0.4515913345814389, "grad_norm": 1.1006495952606201, "learning_rate": 1.924815995330528e-05, "loss": 0.9203, "step": 3377 }, { "epoch": 0.4517250601765178, "grad_norm": 1.4314602613449097, "learning_rate": 1.9247610634425407e-05, "loss": 1.0535, "step": 3378 }, { "epoch": 0.45185878577159666, "grad_norm": 1.1162046194076538, "learning_rate": 1.9247061122787936e-05, "loss": 1.006, "step": 3379 }, { "epoch": 0.45199251136667556, "grad_norm": 0.9385702610015869, "learning_rate": 1.924651141840433e-05, "loss": 0.8776, "step": 3380 }, { "epoch": 0.45212623696175447, "grad_norm": 1.198063850402832, "learning_rate": 1.924596152128604e-05, "loss": 0.9883, "step": 3381 }, { "epoch": 0.4522599625568334, "grad_norm": 1.1203556060791016, "learning_rate": 1.9245411431444526e-05, "loss": 1.0444, "step": 3382 }, { "epoch": 0.4523936881519123, "grad_norm": 1.0597683191299438, "learning_rate": 1.924486114889126e-05, "loss": 0.9729, "step": 3383 }, { "epoch": 0.4525274137469912, "grad_norm": 1.0010021924972534, "learning_rate": 1.924431067363771e-05, "loss": 0.9988, "step": 3384 }, { "epoch": 0.4526611393420701, "grad_norm": 1.0378679037094116, "learning_rate": 1.924376000569535e-05, "loss": 1.0205, "step": 3385 }, { "epoch": 0.452794864937149, "grad_norm": 1.0878831148147583, "learning_rate": 1.9243209145075656e-05, "loss": 0.9553, "step": 3386 }, { "epoch": 0.4529285905322279, "grad_norm": 1.3530305624008179, "learning_rate": 1.9242658091790118e-05, "loss": 1.008, "step": 3387 }, { "epoch": 0.45306231612730674, "grad_norm": 1.2059698104858398, "learning_rate": 1.9242106845850208e-05, "loss": 1.0446, "step": 3388 }, { "epoch": 0.45319604172238565, "grad_norm": 1.076590657234192, "learning_rate": 1.924155540726743e-05, "loss": 0.9387, "step": 3389 }, { "epoch": 0.45332976731746455, "grad_norm": 1.1469002962112427, "learning_rate": 1.9241003776053273e-05, "loss": 1.0034, "step": 3390 }, { "epoch": 0.45346349291254345, "grad_norm": 1.130566120147705, "learning_rate": 1.9240451952219232e-05, "loss": 1.0745, "step": 3391 }, { "epoch": 0.45359721850762236, "grad_norm": 1.1840704679489136, "learning_rate": 1.9239899935776812e-05, "loss": 0.9338, "step": 3392 }, { "epoch": 0.45373094410270126, "grad_norm": 1.2050395011901855, "learning_rate": 1.9239347726737524e-05, "loss": 0.9768, "step": 3393 }, { "epoch": 0.45386466969778017, "grad_norm": 1.029349684715271, "learning_rate": 1.9238795325112867e-05, "loss": 0.9451, "step": 3394 }, { "epoch": 0.45399839529285907, "grad_norm": 1.068260908126831, "learning_rate": 1.923824273091437e-05, "loss": 1.0074, "step": 3395 }, { "epoch": 0.454132120887938, "grad_norm": 1.1231591701507568, "learning_rate": 1.9237689944153535e-05, "loss": 1.0076, "step": 3396 }, { "epoch": 0.4542658464830168, "grad_norm": 1.1692661046981812, "learning_rate": 1.92371369648419e-05, "loss": 0.9961, "step": 3397 }, { "epoch": 0.4543995720780957, "grad_norm": 1.1960813999176025, "learning_rate": 1.923658379299098e-05, "loss": 0.8335, "step": 3398 }, { "epoch": 0.45453329767317463, "grad_norm": 1.298230767250061, "learning_rate": 1.9236030428612307e-05, "loss": 1.0425, "step": 3399 }, { "epoch": 0.45466702326825353, "grad_norm": 1.0371997356414795, "learning_rate": 1.9235476871717422e-05, "loss": 0.7899, "step": 3400 }, { "epoch": 0.45480074886333244, "grad_norm": 1.0718671083450317, "learning_rate": 1.923492312231786e-05, "loss": 0.9279, "step": 3401 }, { "epoch": 0.45493447445841134, "grad_norm": 1.1243482828140259, "learning_rate": 1.923436918042516e-05, "loss": 1.0515, "step": 3402 }, { "epoch": 0.45506820005349025, "grad_norm": 1.146529197692871, "learning_rate": 1.9233815046050867e-05, "loss": 0.9858, "step": 3403 }, { "epoch": 0.45520192564856915, "grad_norm": 1.1278969049453735, "learning_rate": 1.9233260719206543e-05, "loss": 0.8909, "step": 3404 }, { "epoch": 0.45533565124364805, "grad_norm": 0.9876331686973572, "learning_rate": 1.923270619990373e-05, "loss": 0.961, "step": 3405 }, { "epoch": 0.45546937683872696, "grad_norm": 1.1827300786972046, "learning_rate": 1.923215148815399e-05, "loss": 1.0153, "step": 3406 }, { "epoch": 0.4556031024338058, "grad_norm": 1.1657085418701172, "learning_rate": 1.9231596583968888e-05, "loss": 0.9652, "step": 3407 }, { "epoch": 0.4557368280288847, "grad_norm": 1.0937281847000122, "learning_rate": 1.9231041487359988e-05, "loss": 0.9458, "step": 3408 }, { "epoch": 0.4558705536239636, "grad_norm": 1.1266882419586182, "learning_rate": 1.9230486198338863e-05, "loss": 1.0025, "step": 3409 }, { "epoch": 0.4560042792190425, "grad_norm": 0.9347115159034729, "learning_rate": 1.9229930716917085e-05, "loss": 0.8005, "step": 3410 }, { "epoch": 0.4561380048141214, "grad_norm": 1.0955075025558472, "learning_rate": 1.9229375043106233e-05, "loss": 0.9616, "step": 3411 }, { "epoch": 0.4562717304092003, "grad_norm": 1.1396260261535645, "learning_rate": 1.922881917691789e-05, "loss": 0.9329, "step": 3412 }, { "epoch": 0.45640545600427923, "grad_norm": 1.0695569515228271, "learning_rate": 1.922826311836364e-05, "loss": 1.0297, "step": 3413 }, { "epoch": 0.45653918159935813, "grad_norm": 1.126440167427063, "learning_rate": 1.922770686745508e-05, "loss": 1.0094, "step": 3414 }, { "epoch": 0.45667290719443704, "grad_norm": 1.039226770401001, "learning_rate": 1.92271504242038e-05, "loss": 0.9809, "step": 3415 }, { "epoch": 0.4568066327895159, "grad_norm": 1.430786371231079, "learning_rate": 1.9226593788621393e-05, "loss": 1.0911, "step": 3416 }, { "epoch": 0.4569403583845948, "grad_norm": 1.1412203311920166, "learning_rate": 1.9226036960719474e-05, "loss": 0.9621, "step": 3417 }, { "epoch": 0.4570740839796737, "grad_norm": 1.1158143281936646, "learning_rate": 1.922547994050964e-05, "loss": 1.0723, "step": 3418 }, { "epoch": 0.4572078095747526, "grad_norm": 1.1592521667480469, "learning_rate": 1.9224922728003507e-05, "loss": 1.0132, "step": 3419 }, { "epoch": 0.4573415351698315, "grad_norm": 1.0948034524917603, "learning_rate": 1.9224365323212685e-05, "loss": 0.9764, "step": 3420 }, { "epoch": 0.4574752607649104, "grad_norm": 1.1486250162124634, "learning_rate": 1.9223807726148792e-05, "loss": 0.8994, "step": 3421 }, { "epoch": 0.4576089863599893, "grad_norm": 1.1484692096710205, "learning_rate": 1.9223249936823457e-05, "loss": 1.0021, "step": 3422 }, { "epoch": 0.4577427119550682, "grad_norm": 1.1607353687286377, "learning_rate": 1.92226919552483e-05, "loss": 1.1202, "step": 3423 }, { "epoch": 0.4578764375501471, "grad_norm": 1.087226152420044, "learning_rate": 1.922213378143496e-05, "loss": 0.9923, "step": 3424 }, { "epoch": 0.458010163145226, "grad_norm": 1.1255282163619995, "learning_rate": 1.9221575415395058e-05, "loss": 0.8913, "step": 3425 }, { "epoch": 0.45814388874030487, "grad_norm": 1.1019564867019653, "learning_rate": 1.9221016857140244e-05, "loss": 0.9961, "step": 3426 }, { "epoch": 0.4582776143353838, "grad_norm": 1.1333547830581665, "learning_rate": 1.922045810668216e-05, "loss": 1.0114, "step": 3427 }, { "epoch": 0.4584113399304627, "grad_norm": 1.2449817657470703, "learning_rate": 1.9219899164032446e-05, "loss": 1.1199, "step": 3428 }, { "epoch": 0.4585450655255416, "grad_norm": 1.0216999053955078, "learning_rate": 1.921934002920276e-05, "loss": 0.8471, "step": 3429 }, { "epoch": 0.4586787911206205, "grad_norm": 1.1862415075302124, "learning_rate": 1.921878070220475e-05, "loss": 1.0736, "step": 3430 }, { "epoch": 0.4588125167156994, "grad_norm": 1.1326826810836792, "learning_rate": 1.921822118305008e-05, "loss": 0.9986, "step": 3431 }, { "epoch": 0.4589462423107783, "grad_norm": 1.077890157699585, "learning_rate": 1.9217661471750406e-05, "loss": 1.0586, "step": 3432 }, { "epoch": 0.4590799679058572, "grad_norm": 1.1471023559570312, "learning_rate": 1.9217101568317402e-05, "loss": 1.0043, "step": 3433 }, { "epoch": 0.4592136935009361, "grad_norm": 1.126162052154541, "learning_rate": 1.9216541472762736e-05, "loss": 1.0345, "step": 3434 }, { "epoch": 0.45934741909601495, "grad_norm": 1.087627649307251, "learning_rate": 1.9215981185098083e-05, "loss": 0.9682, "step": 3435 }, { "epoch": 0.45948114469109386, "grad_norm": 1.0893338918685913, "learning_rate": 1.9215420705335117e-05, "loss": 1.0124, "step": 3436 }, { "epoch": 0.45961487028617276, "grad_norm": 1.1037579774856567, "learning_rate": 1.921486003348553e-05, "loss": 1.0602, "step": 3437 }, { "epoch": 0.45974859588125166, "grad_norm": 1.064774990081787, "learning_rate": 1.9214299169561e-05, "loss": 0.9902, "step": 3438 }, { "epoch": 0.45988232147633057, "grad_norm": 1.067426085472107, "learning_rate": 1.921373811357322e-05, "loss": 1.0371, "step": 3439 }, { "epoch": 0.4600160470714095, "grad_norm": 1.1470178365707397, "learning_rate": 1.9213176865533887e-05, "loss": 1.0329, "step": 3440 }, { "epoch": 0.4601497726664884, "grad_norm": 1.0724917650222778, "learning_rate": 1.92126154254547e-05, "loss": 0.9333, "step": 3441 }, { "epoch": 0.4602834982615673, "grad_norm": 1.1104001998901367, "learning_rate": 1.921205379334736e-05, "loss": 0.9744, "step": 3442 }, { "epoch": 0.4604172238566462, "grad_norm": 1.1794346570968628, "learning_rate": 1.921149196922357e-05, "loss": 1.0706, "step": 3443 }, { "epoch": 0.46055094945172503, "grad_norm": 1.1289557218551636, "learning_rate": 1.9210929953095047e-05, "loss": 0.8562, "step": 3444 }, { "epoch": 0.46068467504680394, "grad_norm": 1.0861027240753174, "learning_rate": 1.9210367744973498e-05, "loss": 0.9655, "step": 3445 }, { "epoch": 0.46081840064188284, "grad_norm": 1.247092843055725, "learning_rate": 1.9209805344870654e-05, "loss": 0.9527, "step": 3446 }, { "epoch": 0.46095212623696175, "grad_norm": 1.2231624126434326, "learning_rate": 1.9209242752798225e-05, "loss": 1.0268, "step": 3447 }, { "epoch": 0.46108585183204065, "grad_norm": 1.1294763088226318, "learning_rate": 1.9208679968767947e-05, "loss": 0.9184, "step": 3448 }, { "epoch": 0.46121957742711955, "grad_norm": 1.0424902439117432, "learning_rate": 1.9208116992791546e-05, "loss": 0.948, "step": 3449 }, { "epoch": 0.46135330302219846, "grad_norm": 1.1189802885055542, "learning_rate": 1.920755382488076e-05, "loss": 0.9674, "step": 3450 }, { "epoch": 0.46148702861727736, "grad_norm": 1.1505566835403442, "learning_rate": 1.9206990465047316e-05, "loss": 0.9225, "step": 3451 }, { "epoch": 0.46162075421235627, "grad_norm": 1.078946590423584, "learning_rate": 1.9206426913302976e-05, "loss": 0.9448, "step": 3452 }, { "epoch": 0.46175447980743517, "grad_norm": 1.070104956626892, "learning_rate": 1.920586316965947e-05, "loss": 0.9962, "step": 3453 }, { "epoch": 0.461888205402514, "grad_norm": 1.1646274328231812, "learning_rate": 1.9205299234128558e-05, "loss": 0.9946, "step": 3454 }, { "epoch": 0.4620219309975929, "grad_norm": 1.1912081241607666, "learning_rate": 1.9204735106721992e-05, "loss": 1.0148, "step": 3455 }, { "epoch": 0.4621556565926718, "grad_norm": 1.134006381034851, "learning_rate": 1.920417078745153e-05, "loss": 1.0132, "step": 3456 }, { "epoch": 0.46228938218775073, "grad_norm": 1.0826951265335083, "learning_rate": 1.9203606276328937e-05, "loss": 0.9198, "step": 3457 }, { "epoch": 0.46242310778282963, "grad_norm": 0.9836342334747314, "learning_rate": 1.9203041573365978e-05, "loss": 0.8228, "step": 3458 }, { "epoch": 0.46255683337790854, "grad_norm": 1.1260766983032227, "learning_rate": 1.9202476678574424e-05, "loss": 0.9493, "step": 3459 }, { "epoch": 0.46269055897298744, "grad_norm": 1.1229695081710815, "learning_rate": 1.9201911591966045e-05, "loss": 1.003, "step": 3460 }, { "epoch": 0.46282428456806635, "grad_norm": 1.2000818252563477, "learning_rate": 1.9201346313552628e-05, "loss": 1.0445, "step": 3461 }, { "epoch": 0.46295801016314525, "grad_norm": 1.0836660861968994, "learning_rate": 1.920078084334595e-05, "loss": 0.8961, "step": 3462 }, { "epoch": 0.4630917357582241, "grad_norm": 1.1743868589401245, "learning_rate": 1.9200215181357798e-05, "loss": 0.9747, "step": 3463 }, { "epoch": 0.463225461353303, "grad_norm": 1.1364059448242188, "learning_rate": 1.919964932759997e-05, "loss": 0.9822, "step": 3464 }, { "epoch": 0.4633591869483819, "grad_norm": 1.0988980531692505, "learning_rate": 1.9199083282084253e-05, "loss": 0.9867, "step": 3465 }, { "epoch": 0.4634929125434608, "grad_norm": 1.0937973260879517, "learning_rate": 1.9198517044822445e-05, "loss": 0.9703, "step": 3466 }, { "epoch": 0.4636266381385397, "grad_norm": 1.0952130556106567, "learning_rate": 1.9197950615826354e-05, "loss": 0.923, "step": 3467 }, { "epoch": 0.4637603637336186, "grad_norm": 1.1552884578704834, "learning_rate": 1.919738399510778e-05, "loss": 0.9464, "step": 3468 }, { "epoch": 0.4638940893286975, "grad_norm": 1.0457805395126343, "learning_rate": 1.919681718267854e-05, "loss": 0.8644, "step": 3469 }, { "epoch": 0.4640278149237764, "grad_norm": 1.085153579711914, "learning_rate": 1.9196250178550447e-05, "loss": 1.0576, "step": 3470 }, { "epoch": 0.46416154051885533, "grad_norm": 1.2174235582351685, "learning_rate": 1.9195682982735317e-05, "loss": 1.0834, "step": 3471 }, { "epoch": 0.4642952661139342, "grad_norm": 1.2986717224121094, "learning_rate": 1.9195115595244976e-05, "loss": 0.9687, "step": 3472 }, { "epoch": 0.4644289917090131, "grad_norm": 1.1093624830245972, "learning_rate": 1.919454801609125e-05, "loss": 0.8698, "step": 3473 }, { "epoch": 0.464562717304092, "grad_norm": 1.1158854961395264, "learning_rate": 1.9193980245285967e-05, "loss": 0.9729, "step": 3474 }, { "epoch": 0.4646964428991709, "grad_norm": 1.2086210250854492, "learning_rate": 1.9193412282840965e-05, "loss": 0.9811, "step": 3475 }, { "epoch": 0.4648301684942498, "grad_norm": 1.0425963401794434, "learning_rate": 1.9192844128768077e-05, "loss": 0.9405, "step": 3476 }, { "epoch": 0.4649638940893287, "grad_norm": 1.2545664310455322, "learning_rate": 1.9192275783079155e-05, "loss": 1.0837, "step": 3477 }, { "epoch": 0.4650976196844076, "grad_norm": 1.0331977605819702, "learning_rate": 1.9191707245786038e-05, "loss": 0.9364, "step": 3478 }, { "epoch": 0.4652313452794865, "grad_norm": 1.200106143951416, "learning_rate": 1.919113851690058e-05, "loss": 0.9805, "step": 3479 }, { "epoch": 0.4653650708745654, "grad_norm": 1.121775507926941, "learning_rate": 1.9190569596434635e-05, "loss": 1.0226, "step": 3480 }, { "epoch": 0.4654987964696443, "grad_norm": 1.0887154340744019, "learning_rate": 1.9190000484400058e-05, "loss": 0.9592, "step": 3481 }, { "epoch": 0.46563252206472316, "grad_norm": 1.155894160270691, "learning_rate": 1.9189431180808715e-05, "loss": 0.919, "step": 3482 }, { "epoch": 0.46576624765980207, "grad_norm": 1.1092969179153442, "learning_rate": 1.9188861685672475e-05, "loss": 1.1008, "step": 3483 }, { "epoch": 0.465899973254881, "grad_norm": 1.0001921653747559, "learning_rate": 1.9188291999003207e-05, "loss": 0.8381, "step": 3484 }, { "epoch": 0.4660336988499599, "grad_norm": 0.9587041735649109, "learning_rate": 1.9187722120812783e-05, "loss": 0.9104, "step": 3485 }, { "epoch": 0.4661674244450388, "grad_norm": 1.0774333477020264, "learning_rate": 1.9187152051113082e-05, "loss": 0.9604, "step": 3486 }, { "epoch": 0.4663011500401177, "grad_norm": 1.054100513458252, "learning_rate": 1.918658178991599e-05, "loss": 0.9771, "step": 3487 }, { "epoch": 0.4664348756351966, "grad_norm": 1.1179627180099487, "learning_rate": 1.9186011337233387e-05, "loss": 0.8995, "step": 3488 }, { "epoch": 0.4665686012302755, "grad_norm": 1.2614140510559082, "learning_rate": 1.9185440693077168e-05, "loss": 1.0134, "step": 3489 }, { "epoch": 0.4667023268253544, "grad_norm": 1.060590386390686, "learning_rate": 1.9184869857459233e-05, "loss": 0.9413, "step": 3490 }, { "epoch": 0.46683605242043325, "grad_norm": 1.0428732633590698, "learning_rate": 1.918429883039147e-05, "loss": 0.8798, "step": 3491 }, { "epoch": 0.46696977801551215, "grad_norm": 1.1563969850540161, "learning_rate": 1.9183727611885787e-05, "loss": 1.0264, "step": 3492 }, { "epoch": 0.46710350361059105, "grad_norm": 1.1321064233779907, "learning_rate": 1.918315620195409e-05, "loss": 1.027, "step": 3493 }, { "epoch": 0.46723722920566996, "grad_norm": 1.073287844657898, "learning_rate": 1.918258460060829e-05, "loss": 1.0025, "step": 3494 }, { "epoch": 0.46737095480074886, "grad_norm": 1.0472468137741089, "learning_rate": 1.91820128078603e-05, "loss": 0.9481, "step": 3495 }, { "epoch": 0.46750468039582777, "grad_norm": 1.2310487031936646, "learning_rate": 1.9181440823722043e-05, "loss": 1.0559, "step": 3496 }, { "epoch": 0.46763840599090667, "grad_norm": 1.2014554738998413, "learning_rate": 1.9180868648205435e-05, "loss": 0.9462, "step": 3497 }, { "epoch": 0.4677721315859856, "grad_norm": 1.1634538173675537, "learning_rate": 1.9180296281322402e-05, "loss": 1.0781, "step": 3498 }, { "epoch": 0.4679058571810645, "grad_norm": 1.16202974319458, "learning_rate": 1.917972372308488e-05, "loss": 1.1788, "step": 3499 }, { "epoch": 0.4680395827761434, "grad_norm": 1.0067589282989502, "learning_rate": 1.91791509735048e-05, "loss": 0.8972, "step": 3500 }, { "epoch": 0.46817330837122223, "grad_norm": 1.0489157438278198, "learning_rate": 1.9178578032594105e-05, "loss": 0.9096, "step": 3501 }, { "epoch": 0.46830703396630113, "grad_norm": 1.1619493961334229, "learning_rate": 1.917800490036473e-05, "loss": 0.9396, "step": 3502 }, { "epoch": 0.46844075956138004, "grad_norm": 1.153590440750122, "learning_rate": 1.9177431576828626e-05, "loss": 0.9995, "step": 3503 }, { "epoch": 0.46857448515645894, "grad_norm": 1.1528078317642212, "learning_rate": 1.9176858061997744e-05, "loss": 1.1274, "step": 3504 }, { "epoch": 0.46870821075153785, "grad_norm": 1.1239203214645386, "learning_rate": 1.9176284355884038e-05, "loss": 1.0372, "step": 3505 }, { "epoch": 0.46884193634661675, "grad_norm": 1.1865296363830566, "learning_rate": 1.9175710458499464e-05, "loss": 0.962, "step": 3506 }, { "epoch": 0.46897566194169565, "grad_norm": 1.1185070276260376, "learning_rate": 1.9175136369855985e-05, "loss": 1.0542, "step": 3507 }, { "epoch": 0.46910938753677456, "grad_norm": 1.1700890064239502, "learning_rate": 1.917456208996557e-05, "loss": 0.9607, "step": 3508 }, { "epoch": 0.46924311313185346, "grad_norm": 1.0757564306259155, "learning_rate": 1.9173987618840185e-05, "loss": 0.923, "step": 3509 }, { "epoch": 0.4693768387269323, "grad_norm": 1.0142959356307983, "learning_rate": 1.9173412956491808e-05, "loss": 0.8703, "step": 3510 }, { "epoch": 0.4695105643220112, "grad_norm": 1.0474114418029785, "learning_rate": 1.9172838102932414e-05, "loss": 0.8675, "step": 3511 }, { "epoch": 0.4696442899170901, "grad_norm": 1.0961796045303345, "learning_rate": 1.917226305817399e-05, "loss": 0.8893, "step": 3512 }, { "epoch": 0.469778015512169, "grad_norm": 1.0665099620819092, "learning_rate": 1.917168782222852e-05, "loss": 0.9378, "step": 3513 }, { "epoch": 0.4699117411072479, "grad_norm": 1.0473741292953491, "learning_rate": 1.9171112395107988e-05, "loss": 0.8196, "step": 3514 }, { "epoch": 0.47004546670232683, "grad_norm": 1.1318310499191284, "learning_rate": 1.9170536776824396e-05, "loss": 1.1079, "step": 3515 }, { "epoch": 0.47017919229740573, "grad_norm": 1.0326460599899292, "learning_rate": 1.9169960967389744e-05, "loss": 0.9907, "step": 3516 }, { "epoch": 0.47031291789248464, "grad_norm": 1.258815050125122, "learning_rate": 1.9169384966816026e-05, "loss": 0.9849, "step": 3517 }, { "epoch": 0.47044664348756354, "grad_norm": 1.0982081890106201, "learning_rate": 1.9168808775115256e-05, "loss": 0.9476, "step": 3518 }, { "epoch": 0.4705803690826424, "grad_norm": 1.0521838665008545, "learning_rate": 1.916823239229944e-05, "loss": 1.0291, "step": 3519 }, { "epoch": 0.4707140946777213, "grad_norm": 1.1831716299057007, "learning_rate": 1.9167655818380594e-05, "loss": 0.9748, "step": 3520 }, { "epoch": 0.4708478202728002, "grad_norm": 1.128922462463379, "learning_rate": 1.916707905337073e-05, "loss": 0.9398, "step": 3521 }, { "epoch": 0.4709815458678791, "grad_norm": 1.2630934715270996, "learning_rate": 1.9166502097281882e-05, "loss": 0.9049, "step": 3522 }, { "epoch": 0.471115271462958, "grad_norm": 1.207610011100769, "learning_rate": 1.9165924950126064e-05, "loss": 1.059, "step": 3523 }, { "epoch": 0.4712489970580369, "grad_norm": 1.01205575466156, "learning_rate": 1.9165347611915313e-05, "loss": 0.9014, "step": 3524 }, { "epoch": 0.4713827226531158, "grad_norm": 1.217113971710205, "learning_rate": 1.9164770082661662e-05, "loss": 1.0318, "step": 3525 }, { "epoch": 0.4715164482481947, "grad_norm": 1.0785239934921265, "learning_rate": 1.9164192362377144e-05, "loss": 0.9314, "step": 3526 }, { "epoch": 0.4716501738432736, "grad_norm": 1.1666805744171143, "learning_rate": 1.9163614451073812e-05, "loss": 0.9629, "step": 3527 }, { "epoch": 0.4717838994383525, "grad_norm": 1.0410642623901367, "learning_rate": 1.91630363487637e-05, "loss": 0.8615, "step": 3528 }, { "epoch": 0.4719176250334314, "grad_norm": 1.0157328844070435, "learning_rate": 1.9162458055458866e-05, "loss": 0.887, "step": 3529 }, { "epoch": 0.4720513506285103, "grad_norm": 1.1191221475601196, "learning_rate": 1.916187957117136e-05, "loss": 1.0339, "step": 3530 }, { "epoch": 0.4721850762235892, "grad_norm": 1.1440049409866333, "learning_rate": 1.9161300895913242e-05, "loss": 0.9753, "step": 3531 }, { "epoch": 0.4723188018186681, "grad_norm": 1.07695734500885, "learning_rate": 1.9160722029696573e-05, "loss": 1.0189, "step": 3532 }, { "epoch": 0.472452527413747, "grad_norm": 1.1916295289993286, "learning_rate": 1.9160142972533423e-05, "loss": 1.0896, "step": 3533 }, { "epoch": 0.4725862530088259, "grad_norm": 1.0217418670654297, "learning_rate": 1.9159563724435852e-05, "loss": 1.0067, "step": 3534 }, { "epoch": 0.4727199786039048, "grad_norm": 1.1300748586654663, "learning_rate": 1.915898428541594e-05, "loss": 0.9931, "step": 3535 }, { "epoch": 0.4728537041989837, "grad_norm": 1.1691640615463257, "learning_rate": 1.915840465548577e-05, "loss": 1.076, "step": 3536 }, { "epoch": 0.4729874297940626, "grad_norm": 1.0145046710968018, "learning_rate": 1.9157824834657413e-05, "loss": 0.9771, "step": 3537 }, { "epoch": 0.47312115538914146, "grad_norm": 1.168656826019287, "learning_rate": 1.9157244822942965e-05, "loss": 1.0824, "step": 3538 }, { "epoch": 0.47325488098422036, "grad_norm": 1.1399892568588257, "learning_rate": 1.9156664620354514e-05, "loss": 1.0226, "step": 3539 }, { "epoch": 0.47338860657929926, "grad_norm": 1.123217225074768, "learning_rate": 1.9156084226904142e-05, "loss": 1.0251, "step": 3540 }, { "epoch": 0.47352233217437817, "grad_norm": 1.085670828819275, "learning_rate": 1.9155503642603963e-05, "loss": 1.0382, "step": 3541 }, { "epoch": 0.4736560577694571, "grad_norm": 0.9745550751686096, "learning_rate": 1.9154922867466067e-05, "loss": 0.906, "step": 3542 }, { "epoch": 0.473789783364536, "grad_norm": 1.0427231788635254, "learning_rate": 1.9154341901502566e-05, "loss": 1.0389, "step": 3543 }, { "epoch": 0.4739235089596149, "grad_norm": 1.0810281038284302, "learning_rate": 1.915376074472557e-05, "loss": 0.9746, "step": 3544 }, { "epoch": 0.4740572345546938, "grad_norm": 1.0047287940979004, "learning_rate": 1.9153179397147187e-05, "loss": 0.8923, "step": 3545 }, { "epoch": 0.4741909601497727, "grad_norm": 1.0907237529754639, "learning_rate": 1.9152597858779538e-05, "loss": 0.9467, "step": 3546 }, { "epoch": 0.47432468574485154, "grad_norm": 1.0600823163986206, "learning_rate": 1.9152016129634746e-05, "loss": 0.9208, "step": 3547 }, { "epoch": 0.47445841133993044, "grad_norm": 1.0306575298309326, "learning_rate": 1.9151434209724935e-05, "loss": 0.879, "step": 3548 }, { "epoch": 0.47459213693500935, "grad_norm": 1.1240202188491821, "learning_rate": 1.9150852099062236e-05, "loss": 0.9873, "step": 3549 }, { "epoch": 0.47472586253008825, "grad_norm": 1.038956642150879, "learning_rate": 1.915026979765878e-05, "loss": 0.9618, "step": 3550 }, { "epoch": 0.47485958812516715, "grad_norm": 1.1260778903961182, "learning_rate": 1.9149687305526704e-05, "loss": 1.0747, "step": 3551 }, { "epoch": 0.47499331372024606, "grad_norm": 1.0979074239730835, "learning_rate": 1.9149104622678155e-05, "loss": 0.9437, "step": 3552 }, { "epoch": 0.47512703931532496, "grad_norm": 1.1374695301055908, "learning_rate": 1.9148521749125275e-05, "loss": 0.9802, "step": 3553 }, { "epoch": 0.47526076491040387, "grad_norm": 1.1580686569213867, "learning_rate": 1.9147938684880213e-05, "loss": 0.938, "step": 3554 }, { "epoch": 0.47539449050548277, "grad_norm": 1.1435892581939697, "learning_rate": 1.9147355429955123e-05, "loss": 0.9127, "step": 3555 }, { "epoch": 0.4755282161005617, "grad_norm": 1.1364918947219849, "learning_rate": 1.9146771984362157e-05, "loss": 0.8869, "step": 3556 }, { "epoch": 0.4756619416956405, "grad_norm": 1.1763559579849243, "learning_rate": 1.9146188348113486e-05, "loss": 0.9242, "step": 3557 }, { "epoch": 0.4757956672907194, "grad_norm": 1.1106432676315308, "learning_rate": 1.914560452122127e-05, "loss": 0.9453, "step": 3558 }, { "epoch": 0.47592939288579833, "grad_norm": 0.9965659976005554, "learning_rate": 1.914502050369768e-05, "loss": 0.9085, "step": 3559 }, { "epoch": 0.47606311848087723, "grad_norm": 1.2233017683029175, "learning_rate": 1.9144436295554885e-05, "loss": 1.0362, "step": 3560 }, { "epoch": 0.47619684407595614, "grad_norm": 1.0986855030059814, "learning_rate": 1.914385189680507e-05, "loss": 0.9245, "step": 3561 }, { "epoch": 0.47633056967103504, "grad_norm": 1.04866623878479, "learning_rate": 1.914326730746041e-05, "loss": 0.9465, "step": 3562 }, { "epoch": 0.47646429526611395, "grad_norm": 1.1320934295654297, "learning_rate": 1.9142682527533095e-05, "loss": 0.974, "step": 3563 }, { "epoch": 0.47659802086119285, "grad_norm": 1.139564871788025, "learning_rate": 1.914209755703531e-05, "loss": 1.0573, "step": 3564 }, { "epoch": 0.47673174645627175, "grad_norm": 1.2128188610076904, "learning_rate": 1.914151239597925e-05, "loss": 1.1402, "step": 3565 }, { "epoch": 0.4768654720513506, "grad_norm": 1.164311408996582, "learning_rate": 1.9140927044377105e-05, "loss": 0.9737, "step": 3566 }, { "epoch": 0.4769991976464295, "grad_norm": 1.1340677738189697, "learning_rate": 1.9140341502241087e-05, "loss": 0.8472, "step": 3567 }, { "epoch": 0.4771329232415084, "grad_norm": 1.14836847782135, "learning_rate": 1.9139755769583398e-05, "loss": 1.0217, "step": 3568 }, { "epoch": 0.4772666488365873, "grad_norm": 1.1419048309326172, "learning_rate": 1.913916984641625e-05, "loss": 1.0192, "step": 3569 }, { "epoch": 0.4774003744316662, "grad_norm": 1.0963401794433594, "learning_rate": 1.913858373275184e-05, "loss": 0.8833, "step": 3570 }, { "epoch": 0.4775341000267451, "grad_norm": 1.1476385593414307, "learning_rate": 1.9137997428602406e-05, "loss": 0.9956, "step": 3571 }, { "epoch": 0.477667825621824, "grad_norm": 1.1408270597457886, "learning_rate": 1.913741093398016e-05, "loss": 0.9826, "step": 3572 }, { "epoch": 0.47780155121690293, "grad_norm": 1.1750731468200684, "learning_rate": 1.913682424889732e-05, "loss": 0.9487, "step": 3573 }, { "epoch": 0.47793527681198184, "grad_norm": 1.2140734195709229, "learning_rate": 1.9136237373366126e-05, "loss": 1.0776, "step": 3574 }, { "epoch": 0.47806900240706074, "grad_norm": 1.0570056438446045, "learning_rate": 1.9135650307398808e-05, "loss": 0.937, "step": 3575 }, { "epoch": 0.4782027280021396, "grad_norm": 1.1552014350891113, "learning_rate": 1.9135063051007597e-05, "loss": 0.9467, "step": 3576 }, { "epoch": 0.4783364535972185, "grad_norm": 1.0553832054138184, "learning_rate": 1.9134475604204742e-05, "loss": 0.9091, "step": 3577 }, { "epoch": 0.4784701791922974, "grad_norm": 1.1314283609390259, "learning_rate": 1.9133887967002483e-05, "loss": 1.0257, "step": 3578 }, { "epoch": 0.4786039047873763, "grad_norm": 1.2739524841308594, "learning_rate": 1.9133300139413067e-05, "loss": 1.0577, "step": 3579 }, { "epoch": 0.4787376303824552, "grad_norm": 1.1427751779556274, "learning_rate": 1.913271212144875e-05, "loss": 0.9154, "step": 3580 }, { "epoch": 0.4788713559775341, "grad_norm": 1.1865460872650146, "learning_rate": 1.913212391312179e-05, "loss": 1.0924, "step": 3581 }, { "epoch": 0.479005081572613, "grad_norm": 1.1503925323486328, "learning_rate": 1.9131535514444445e-05, "loss": 0.9781, "step": 3582 }, { "epoch": 0.4791388071676919, "grad_norm": 1.193393588066101, "learning_rate": 1.913094692542898e-05, "loss": 1.0568, "step": 3583 }, { "epoch": 0.4792725327627708, "grad_norm": 1.0923079252243042, "learning_rate": 1.913035814608766e-05, "loss": 0.9596, "step": 3584 }, { "epoch": 0.47940625835784967, "grad_norm": 1.068599820137024, "learning_rate": 1.9129769176432768e-05, "loss": 0.983, "step": 3585 }, { "epoch": 0.4795399839529286, "grad_norm": 1.0484039783477783, "learning_rate": 1.9129180016476568e-05, "loss": 0.9609, "step": 3586 }, { "epoch": 0.4796737095480075, "grad_norm": 1.0019081830978394, "learning_rate": 1.9128590666231347e-05, "loss": 0.9995, "step": 3587 }, { "epoch": 0.4798074351430864, "grad_norm": 1.1352434158325195, "learning_rate": 1.912800112570939e-05, "loss": 0.8628, "step": 3588 }, { "epoch": 0.4799411607381653, "grad_norm": 1.2107622623443604, "learning_rate": 1.9127411394922982e-05, "loss": 0.9074, "step": 3589 }, { "epoch": 0.4800748863332442, "grad_norm": 0.9772448539733887, "learning_rate": 1.9126821473884423e-05, "loss": 0.9309, "step": 3590 }, { "epoch": 0.4802086119283231, "grad_norm": 1.0572630167007446, "learning_rate": 1.9126231362605997e-05, "loss": 0.8697, "step": 3591 }, { "epoch": 0.480342337523402, "grad_norm": 1.1788923740386963, "learning_rate": 1.9125641061100014e-05, "loss": 0.8581, "step": 3592 }, { "epoch": 0.4804760631184809, "grad_norm": 1.1551872491836548, "learning_rate": 1.9125050569378777e-05, "loss": 1.0829, "step": 3593 }, { "epoch": 0.48060978871355975, "grad_norm": 1.131706714630127, "learning_rate": 1.912445988745459e-05, "loss": 0.9155, "step": 3594 }, { "epoch": 0.48074351430863865, "grad_norm": 1.1873979568481445, "learning_rate": 1.912386901533977e-05, "loss": 0.9594, "step": 3595 }, { "epoch": 0.48087723990371756, "grad_norm": 1.1682223081588745, "learning_rate": 1.912327795304663e-05, "loss": 1.0303, "step": 3596 }, { "epoch": 0.48101096549879646, "grad_norm": 1.195803165435791, "learning_rate": 1.912268670058749e-05, "loss": 1.1797, "step": 3597 }, { "epoch": 0.48114469109387537, "grad_norm": 1.2576552629470825, "learning_rate": 1.9122095257974676e-05, "loss": 0.9474, "step": 3598 }, { "epoch": 0.48127841668895427, "grad_norm": 1.1217975616455078, "learning_rate": 1.9121503625220515e-05, "loss": 1.0134, "step": 3599 }, { "epoch": 0.4814121422840332, "grad_norm": 1.1104400157928467, "learning_rate": 1.912091180233734e-05, "loss": 0.9757, "step": 3600 }, { "epoch": 0.4815458678791121, "grad_norm": 1.099687933921814, "learning_rate": 1.912031978933749e-05, "loss": 0.9546, "step": 3601 }, { "epoch": 0.481679593474191, "grad_norm": 1.0479400157928467, "learning_rate": 1.9119727586233295e-05, "loss": 0.9478, "step": 3602 }, { "epoch": 0.4818133190692699, "grad_norm": 1.0492379665374756, "learning_rate": 1.9119135193037108e-05, "loss": 1.0929, "step": 3603 }, { "epoch": 0.48194704466434873, "grad_norm": 1.0451514720916748, "learning_rate": 1.9118542609761273e-05, "loss": 1.037, "step": 3604 }, { "epoch": 0.48208077025942764, "grad_norm": 1.1628445386886597, "learning_rate": 1.9117949836418143e-05, "loss": 1.031, "step": 3605 }, { "epoch": 0.48221449585450654, "grad_norm": 1.1809580326080322, "learning_rate": 1.9117356873020075e-05, "loss": 1.0823, "step": 3606 }, { "epoch": 0.48234822144958545, "grad_norm": 1.0655548572540283, "learning_rate": 1.9116763719579424e-05, "loss": 1.0181, "step": 3607 }, { "epoch": 0.48248194704466435, "grad_norm": 1.0812218189239502, "learning_rate": 1.911617037610856e-05, "loss": 1.086, "step": 3608 }, { "epoch": 0.48261567263974325, "grad_norm": 1.0576560497283936, "learning_rate": 1.9115576842619846e-05, "loss": 0.895, "step": 3609 }, { "epoch": 0.48274939823482216, "grad_norm": 1.1840145587921143, "learning_rate": 1.911498311912566e-05, "loss": 1.0846, "step": 3610 }, { "epoch": 0.48288312382990106, "grad_norm": 1.1334906816482544, "learning_rate": 1.9114389205638367e-05, "loss": 1.0237, "step": 3611 }, { "epoch": 0.48301684942497997, "grad_norm": 1.1529263257980347, "learning_rate": 1.9113795102170357e-05, "loss": 0.9556, "step": 3612 }, { "epoch": 0.4831505750200588, "grad_norm": 1.0640449523925781, "learning_rate": 1.9113200808734005e-05, "loss": 0.8203, "step": 3613 }, { "epoch": 0.4832843006151377, "grad_norm": 1.05023992061615, "learning_rate": 1.9112606325341706e-05, "loss": 0.9326, "step": 3614 }, { "epoch": 0.4834180262102166, "grad_norm": 1.056945562362671, "learning_rate": 1.9112011652005843e-05, "loss": 0.8566, "step": 3615 }, { "epoch": 0.4835517518052955, "grad_norm": 1.1845916509628296, "learning_rate": 1.911141678873882e-05, "loss": 1.0519, "step": 3616 }, { "epoch": 0.48368547740037443, "grad_norm": 1.0535677671432495, "learning_rate": 1.9110821735553034e-05, "loss": 0.909, "step": 3617 }, { "epoch": 0.48381920299545333, "grad_norm": 1.175420880317688, "learning_rate": 1.9110226492460886e-05, "loss": 1.0694, "step": 3618 }, { "epoch": 0.48395292859053224, "grad_norm": 1.0371235609054565, "learning_rate": 1.9109631059474783e-05, "loss": 0.8209, "step": 3619 }, { "epoch": 0.48408665418561114, "grad_norm": 1.1100915670394897, "learning_rate": 1.9109035436607136e-05, "loss": 1.0566, "step": 3620 }, { "epoch": 0.48422037978069005, "grad_norm": 1.1361162662506104, "learning_rate": 1.910843962387037e-05, "loss": 1.0044, "step": 3621 }, { "epoch": 0.4843541053757689, "grad_norm": 1.0493508577346802, "learning_rate": 1.9107843621276886e-05, "loss": 0.9127, "step": 3622 }, { "epoch": 0.4844878309708478, "grad_norm": 1.0796613693237305, "learning_rate": 1.910724742883912e-05, "loss": 0.9193, "step": 3623 }, { "epoch": 0.4846215565659267, "grad_norm": 1.1538848876953125, "learning_rate": 1.91066510465695e-05, "loss": 0.9153, "step": 3624 }, { "epoch": 0.4847552821610056, "grad_norm": 1.111733078956604, "learning_rate": 1.9106054474480448e-05, "loss": 0.9949, "step": 3625 }, { "epoch": 0.4848890077560845, "grad_norm": 1.3412001132965088, "learning_rate": 1.9105457712584405e-05, "loss": 0.9663, "step": 3626 }, { "epoch": 0.4850227333511634, "grad_norm": 1.0330497026443481, "learning_rate": 1.9104860760893808e-05, "loss": 0.9699, "step": 3627 }, { "epoch": 0.4851564589462423, "grad_norm": 1.23576021194458, "learning_rate": 1.9104263619421105e-05, "loss": 1.0132, "step": 3628 }, { "epoch": 0.4852901845413212, "grad_norm": 1.037109613418579, "learning_rate": 1.9103666288178737e-05, "loss": 0.9309, "step": 3629 }, { "epoch": 0.4854239101364001, "grad_norm": 1.1156550645828247, "learning_rate": 1.9103068767179156e-05, "loss": 0.9313, "step": 3630 }, { "epoch": 0.48555763573147903, "grad_norm": 1.2146857976913452, "learning_rate": 1.9102471056434816e-05, "loss": 1.0601, "step": 3631 }, { "epoch": 0.4856913613265579, "grad_norm": 1.1898068189620972, "learning_rate": 1.910187315595818e-05, "loss": 1.0331, "step": 3632 }, { "epoch": 0.4858250869216368, "grad_norm": 1.020881175994873, "learning_rate": 1.9101275065761705e-05, "loss": 0.9203, "step": 3633 }, { "epoch": 0.4859588125167157, "grad_norm": 1.0890753269195557, "learning_rate": 1.9100676785857862e-05, "loss": 1.0052, "step": 3634 }, { "epoch": 0.4860925381117946, "grad_norm": 1.1613149642944336, "learning_rate": 1.9100078316259118e-05, "loss": 1.0049, "step": 3635 }, { "epoch": 0.4862262637068735, "grad_norm": 1.2421503067016602, "learning_rate": 1.909947965697795e-05, "loss": 1.0787, "step": 3636 }, { "epoch": 0.4863599893019524, "grad_norm": 1.1563407182693481, "learning_rate": 1.9098880808026832e-05, "loss": 0.9664, "step": 3637 }, { "epoch": 0.4864937148970313, "grad_norm": 1.1479296684265137, "learning_rate": 1.909828176941826e-05, "loss": 1.0166, "step": 3638 }, { "epoch": 0.4866274404921102, "grad_norm": 1.2485655546188354, "learning_rate": 1.90976825411647e-05, "loss": 1.1338, "step": 3639 }, { "epoch": 0.4867611660871891, "grad_norm": 1.0930095911026, "learning_rate": 1.909708312327866e-05, "loss": 1.048, "step": 3640 }, { "epoch": 0.48689489168226796, "grad_norm": 1.124192714691162, "learning_rate": 1.9096483515772625e-05, "loss": 1.0228, "step": 3641 }, { "epoch": 0.48702861727734686, "grad_norm": 1.0576257705688477, "learning_rate": 1.9095883718659095e-05, "loss": 0.9589, "step": 3642 }, { "epoch": 0.48716234287242577, "grad_norm": 1.0012192726135254, "learning_rate": 1.9095283731950572e-05, "loss": 0.9364, "step": 3643 }, { "epoch": 0.4872960684675047, "grad_norm": 1.049712896347046, "learning_rate": 1.9094683555659565e-05, "loss": 0.9965, "step": 3644 }, { "epoch": 0.4874297940625836, "grad_norm": 1.155009388923645, "learning_rate": 1.9094083189798583e-05, "loss": 1.0536, "step": 3645 }, { "epoch": 0.4875635196576625, "grad_norm": 1.1310431957244873, "learning_rate": 1.9093482634380135e-05, "loss": 0.9225, "step": 3646 }, { "epoch": 0.4876972452527414, "grad_norm": 1.1689939498901367, "learning_rate": 1.9092881889416744e-05, "loss": 1.017, "step": 3647 }, { "epoch": 0.4878309708478203, "grad_norm": 1.155153751373291, "learning_rate": 1.9092280954920935e-05, "loss": 0.9707, "step": 3648 }, { "epoch": 0.4879646964428992, "grad_norm": 1.1819772720336914, "learning_rate": 1.9091679830905225e-05, "loss": 0.9849, "step": 3649 }, { "epoch": 0.48809842203797804, "grad_norm": 1.2240902185440063, "learning_rate": 1.909107851738215e-05, "loss": 0.9631, "step": 3650 }, { "epoch": 0.48823214763305695, "grad_norm": 1.084999680519104, "learning_rate": 1.9090477014364242e-05, "loss": 0.9902, "step": 3651 }, { "epoch": 0.48836587322813585, "grad_norm": 1.1480823755264282, "learning_rate": 1.9089875321864043e-05, "loss": 0.9614, "step": 3652 }, { "epoch": 0.48849959882321475, "grad_norm": 1.0436360836029053, "learning_rate": 1.908927343989409e-05, "loss": 0.9406, "step": 3653 }, { "epoch": 0.48863332441829366, "grad_norm": 1.0209296941757202, "learning_rate": 1.9088671368466928e-05, "loss": 0.9442, "step": 3654 }, { "epoch": 0.48876705001337256, "grad_norm": 1.1864526271820068, "learning_rate": 1.9088069107595105e-05, "loss": 1.0133, "step": 3655 }, { "epoch": 0.48890077560845147, "grad_norm": 1.2133468389511108, "learning_rate": 1.908746665729118e-05, "loss": 0.9762, "step": 3656 }, { "epoch": 0.48903450120353037, "grad_norm": 1.1199297904968262, "learning_rate": 1.908686401756771e-05, "loss": 1.0346, "step": 3657 }, { "epoch": 0.4891682267986093, "grad_norm": 1.1451926231384277, "learning_rate": 1.9086261188437255e-05, "loss": 0.9842, "step": 3658 }, { "epoch": 0.4893019523936882, "grad_norm": 1.033084511756897, "learning_rate": 1.908565816991238e-05, "loss": 0.9413, "step": 3659 }, { "epoch": 0.489435677988767, "grad_norm": 1.0193250179290771, "learning_rate": 1.908505496200565e-05, "loss": 0.9432, "step": 3660 }, { "epoch": 0.48956940358384593, "grad_norm": 1.1539981365203857, "learning_rate": 1.908445156472965e-05, "loss": 0.9584, "step": 3661 }, { "epoch": 0.48970312917892483, "grad_norm": 1.114689826965332, "learning_rate": 1.9083847978096944e-05, "loss": 1.0218, "step": 3662 }, { "epoch": 0.48983685477400374, "grad_norm": 1.058713436126709, "learning_rate": 1.9083244202120124e-05, "loss": 0.9672, "step": 3663 }, { "epoch": 0.48997058036908264, "grad_norm": 1.150854468345642, "learning_rate": 1.9082640236811766e-05, "loss": 0.9935, "step": 3664 }, { "epoch": 0.49010430596416155, "grad_norm": 1.0963890552520752, "learning_rate": 1.9082036082184466e-05, "loss": 0.9814, "step": 3665 }, { "epoch": 0.49023803155924045, "grad_norm": 1.1178874969482422, "learning_rate": 1.9081431738250815e-05, "loss": 0.8903, "step": 3666 }, { "epoch": 0.49037175715431935, "grad_norm": 1.0414948463439941, "learning_rate": 1.908082720502341e-05, "loss": 1.0131, "step": 3667 }, { "epoch": 0.49050548274939826, "grad_norm": 1.0815478563308716, "learning_rate": 1.9080222482514847e-05, "loss": 0.9576, "step": 3668 }, { "epoch": 0.4906392083444771, "grad_norm": 1.0705641508102417, "learning_rate": 1.9079617570737738e-05, "loss": 0.9399, "step": 3669 }, { "epoch": 0.490772933939556, "grad_norm": 1.0730514526367188, "learning_rate": 1.907901246970469e-05, "loss": 0.9902, "step": 3670 }, { "epoch": 0.4909066595346349, "grad_norm": 1.1241930723190308, "learning_rate": 1.9078407179428313e-05, "loss": 1.1435, "step": 3671 }, { "epoch": 0.4910403851297138, "grad_norm": 1.034538745880127, "learning_rate": 1.9077801699921225e-05, "loss": 0.9647, "step": 3672 }, { "epoch": 0.4911741107247927, "grad_norm": 1.032455563545227, "learning_rate": 1.9077196031196047e-05, "loss": 0.9686, "step": 3673 }, { "epoch": 0.4913078363198716, "grad_norm": 1.2188570499420166, "learning_rate": 1.9076590173265406e-05, "loss": 1.0638, "step": 3674 }, { "epoch": 0.49144156191495053, "grad_norm": 1.1617692708969116, "learning_rate": 1.9075984126141927e-05, "loss": 0.9833, "step": 3675 }, { "epoch": 0.49157528751002944, "grad_norm": 1.257301688194275, "learning_rate": 1.9075377889838243e-05, "loss": 1.1119, "step": 3676 }, { "epoch": 0.49170901310510834, "grad_norm": 1.0904678106307983, "learning_rate": 1.907477146436699e-05, "loss": 0.9529, "step": 3677 }, { "epoch": 0.49184273870018724, "grad_norm": 1.039637804031372, "learning_rate": 1.9074164849740813e-05, "loss": 0.9508, "step": 3678 }, { "epoch": 0.4919764642952661, "grad_norm": 1.1693003177642822, "learning_rate": 1.9073558045972352e-05, "loss": 0.9997, "step": 3679 }, { "epoch": 0.492110189890345, "grad_norm": 1.0231982469558716, "learning_rate": 1.9072951053074252e-05, "loss": 0.9573, "step": 3680 }, { "epoch": 0.4922439154854239, "grad_norm": 1.1048824787139893, "learning_rate": 1.907234387105917e-05, "loss": 1.078, "step": 3681 }, { "epoch": 0.4923776410805028, "grad_norm": 1.1571928262710571, "learning_rate": 1.9071736499939765e-05, "loss": 0.9978, "step": 3682 }, { "epoch": 0.4925113666755817, "grad_norm": 1.0723716020584106, "learning_rate": 1.9071128939728693e-05, "loss": 1.0189, "step": 3683 }, { "epoch": 0.4926450922706606, "grad_norm": 0.9494854211807251, "learning_rate": 1.9070521190438618e-05, "loss": 0.886, "step": 3684 }, { "epoch": 0.4927788178657395, "grad_norm": 1.0872453451156616, "learning_rate": 1.9069913252082207e-05, "loss": 0.9689, "step": 3685 }, { "epoch": 0.4929125434608184, "grad_norm": 1.2586435079574585, "learning_rate": 1.9069305124672134e-05, "loss": 0.9871, "step": 3686 }, { "epoch": 0.4930462690558973, "grad_norm": 1.0400748252868652, "learning_rate": 1.9068696808221073e-05, "loss": 0.9015, "step": 3687 }, { "epoch": 0.4931799946509762, "grad_norm": 1.0710338354110718, "learning_rate": 1.9068088302741703e-05, "loss": 1.0109, "step": 3688 }, { "epoch": 0.4933137202460551, "grad_norm": 1.1903811693191528, "learning_rate": 1.906747960824671e-05, "loss": 1.0345, "step": 3689 }, { "epoch": 0.493447445841134, "grad_norm": 1.2121220827102661, "learning_rate": 1.9066870724748786e-05, "loss": 1.0551, "step": 3690 }, { "epoch": 0.4935811714362129, "grad_norm": 0.9684620499610901, "learning_rate": 1.9066261652260615e-05, "loss": 0.9148, "step": 3691 }, { "epoch": 0.4937148970312918, "grad_norm": 1.090959906578064, "learning_rate": 1.9065652390794894e-05, "loss": 1.0012, "step": 3692 }, { "epoch": 0.4938486226263707, "grad_norm": 1.0787307024002075, "learning_rate": 1.9065042940364326e-05, "loss": 0.9869, "step": 3693 }, { "epoch": 0.4939823482214496, "grad_norm": 1.052585482597351, "learning_rate": 1.906443330098161e-05, "loss": 0.8817, "step": 3694 }, { "epoch": 0.4941160738165285, "grad_norm": 1.048724889755249, "learning_rate": 1.9063823472659457e-05, "loss": 1.0274, "step": 3695 }, { "epoch": 0.4942497994116074, "grad_norm": 1.2308967113494873, "learning_rate": 1.9063213455410577e-05, "loss": 0.9794, "step": 3696 }, { "epoch": 0.49438352500668625, "grad_norm": 1.2070680856704712, "learning_rate": 1.9062603249247686e-05, "loss": 0.8997, "step": 3697 }, { "epoch": 0.49451725060176516, "grad_norm": 1.032382845878601, "learning_rate": 1.90619928541835e-05, "loss": 0.9202, "step": 3698 }, { "epoch": 0.49465097619684406, "grad_norm": 1.2193373441696167, "learning_rate": 1.9061382270230745e-05, "loss": 1.0274, "step": 3699 }, { "epoch": 0.49478470179192297, "grad_norm": 1.241326928138733, "learning_rate": 1.9060771497402147e-05, "loss": 1.1391, "step": 3700 }, { "epoch": 0.49491842738700187, "grad_norm": 1.0512620210647583, "learning_rate": 1.9060160535710438e-05, "loss": 0.94, "step": 3701 }, { "epoch": 0.4950521529820808, "grad_norm": 1.3783785104751587, "learning_rate": 1.9059549385168355e-05, "loss": 0.9942, "step": 3702 }, { "epoch": 0.4951858785771597, "grad_norm": 1.0376447439193726, "learning_rate": 1.905893804578863e-05, "loss": 0.8717, "step": 3703 }, { "epoch": 0.4953196041722386, "grad_norm": 1.1338492631912231, "learning_rate": 1.9058326517584014e-05, "loss": 1.0457, "step": 3704 }, { "epoch": 0.4954533297673175, "grad_norm": 1.1192903518676758, "learning_rate": 1.9057714800567244e-05, "loss": 0.9726, "step": 3705 }, { "epoch": 0.4955870553623964, "grad_norm": 1.0879130363464355, "learning_rate": 1.905710289475108e-05, "loss": 0.9052, "step": 3706 }, { "epoch": 0.49572078095747524, "grad_norm": 1.03330397605896, "learning_rate": 1.9056490800148273e-05, "loss": 0.9178, "step": 3707 }, { "epoch": 0.49585450655255414, "grad_norm": 1.083507776260376, "learning_rate": 1.905587851677158e-05, "loss": 1.0012, "step": 3708 }, { "epoch": 0.49598823214763305, "grad_norm": 1.0827792882919312, "learning_rate": 1.9055266044633765e-05, "loss": 0.8887, "step": 3709 }, { "epoch": 0.49612195774271195, "grad_norm": 1.1803441047668457, "learning_rate": 1.9054653383747593e-05, "loss": 1.1565, "step": 3710 }, { "epoch": 0.49625568333779085, "grad_norm": 1.2359166145324707, "learning_rate": 1.905404053412584e-05, "loss": 1.0697, "step": 3711 }, { "epoch": 0.49638940893286976, "grad_norm": 1.18559992313385, "learning_rate": 1.9053427495781273e-05, "loss": 1.0273, "step": 3712 }, { "epoch": 0.49652313452794866, "grad_norm": 1.1418718099594116, "learning_rate": 1.905281426872667e-05, "loss": 1.0001, "step": 3713 }, { "epoch": 0.49665686012302757, "grad_norm": 1.032114863395691, "learning_rate": 1.905220085297482e-05, "loss": 0.8729, "step": 3714 }, { "epoch": 0.49679058571810647, "grad_norm": 1.1375812292099, "learning_rate": 1.9051587248538505e-05, "loss": 0.9755, "step": 3715 }, { "epoch": 0.4969243113131853, "grad_norm": 1.0829858779907227, "learning_rate": 1.9050973455430517e-05, "loss": 0.949, "step": 3716 }, { "epoch": 0.4970580369082642, "grad_norm": 1.0053060054779053, "learning_rate": 1.9050359473663644e-05, "loss": 0.8596, "step": 3717 }, { "epoch": 0.4971917625033431, "grad_norm": 1.0411442518234253, "learning_rate": 1.9049745303250692e-05, "loss": 0.9247, "step": 3718 }, { "epoch": 0.49732548809842203, "grad_norm": 1.0137289762496948, "learning_rate": 1.9049130944204454e-05, "loss": 0.9255, "step": 3719 }, { "epoch": 0.49745921369350093, "grad_norm": 1.199967384338379, "learning_rate": 1.9048516396537745e-05, "loss": 1.0447, "step": 3720 }, { "epoch": 0.49759293928857984, "grad_norm": 1.1437036991119385, "learning_rate": 1.9047901660263372e-05, "loss": 1.0075, "step": 3721 }, { "epoch": 0.49772666488365874, "grad_norm": 1.3072922229766846, "learning_rate": 1.904728673539414e-05, "loss": 0.9919, "step": 3722 }, { "epoch": 0.49786039047873765, "grad_norm": 1.213537335395813, "learning_rate": 1.904667162194288e-05, "loss": 1.0676, "step": 3723 }, { "epoch": 0.49799411607381655, "grad_norm": 1.12119460105896, "learning_rate": 1.9046056319922403e-05, "loss": 0.9715, "step": 3724 }, { "epoch": 0.4981278416688954, "grad_norm": 1.0706086158752441, "learning_rate": 1.9045440829345536e-05, "loss": 1.0197, "step": 3725 }, { "epoch": 0.4982615672639743, "grad_norm": 1.054457187652588, "learning_rate": 1.904482515022511e-05, "loss": 0.9707, "step": 3726 }, { "epoch": 0.4983952928590532, "grad_norm": 1.1057053804397583, "learning_rate": 1.9044209282573963e-05, "loss": 0.9691, "step": 3727 }, { "epoch": 0.4985290184541321, "grad_norm": 1.1541610956192017, "learning_rate": 1.9043593226404927e-05, "loss": 0.9649, "step": 3728 }, { "epoch": 0.498662744049211, "grad_norm": 1.0658810138702393, "learning_rate": 1.9042976981730845e-05, "loss": 1.0062, "step": 3729 }, { "epoch": 0.4987964696442899, "grad_norm": 1.13431978225708, "learning_rate": 1.9042360548564557e-05, "loss": 1.0002, "step": 3730 }, { "epoch": 0.4989301952393688, "grad_norm": 1.0684891939163208, "learning_rate": 1.904174392691892e-05, "loss": 0.9908, "step": 3731 }, { "epoch": 0.4990639208344477, "grad_norm": 1.1629993915557861, "learning_rate": 1.9041127116806782e-05, "loss": 1.0002, "step": 3732 }, { "epoch": 0.49919764642952663, "grad_norm": 1.0453673601150513, "learning_rate": 1.9040510118241e-05, "loss": 1.0335, "step": 3733 }, { "epoch": 0.49933137202460554, "grad_norm": 1.224331259727478, "learning_rate": 1.9039892931234434e-05, "loss": 1.0918, "step": 3734 }, { "epoch": 0.4994650976196844, "grad_norm": 1.0447088479995728, "learning_rate": 1.903927555579995e-05, "loss": 0.9817, "step": 3735 }, { "epoch": 0.4995988232147633, "grad_norm": 1.0892528295516968, "learning_rate": 1.903865799195042e-05, "loss": 1.012, "step": 3736 }, { "epoch": 0.4997325488098422, "grad_norm": 1.1643753051757812, "learning_rate": 1.9038040239698712e-05, "loss": 1.145, "step": 3737 }, { "epoch": 0.4998662744049211, "grad_norm": 1.215293288230896, "learning_rate": 1.9037422299057703e-05, "loss": 0.9291, "step": 3738 }, { "epoch": 0.5, "grad_norm": 1.1376841068267822, "learning_rate": 1.9036804170040277e-05, "loss": 1.0363, "step": 3739 }, { "epoch": 0.5001337255950788, "grad_norm": 1.058864712715149, "learning_rate": 1.903618585265931e-05, "loss": 0.9304, "step": 3740 }, { "epoch": 0.5002674511901578, "grad_norm": 1.107782006263733, "learning_rate": 1.9035567346927698e-05, "loss": 0.9755, "step": 3741 }, { "epoch": 0.5004011767852367, "grad_norm": 1.1619786024093628, "learning_rate": 1.9034948652858333e-05, "loss": 1.0345, "step": 3742 }, { "epoch": 0.5005349023803156, "grad_norm": 1.1246308088302612, "learning_rate": 1.9034329770464107e-05, "loss": 0.8764, "step": 3743 }, { "epoch": 0.5006686279753945, "grad_norm": 0.9970629215240479, "learning_rate": 1.903371069975792e-05, "loss": 0.9618, "step": 3744 }, { "epoch": 0.5008023535704734, "grad_norm": 0.9689397215843201, "learning_rate": 1.9033091440752677e-05, "loss": 0.9536, "step": 3745 }, { "epoch": 0.5009360791655523, "grad_norm": 1.151862382888794, "learning_rate": 1.903247199346129e-05, "loss": 0.859, "step": 3746 }, { "epoch": 0.5010698047606312, "grad_norm": 1.1340296268463135, "learning_rate": 1.9031852357896667e-05, "loss": 1.0223, "step": 3747 }, { "epoch": 0.5012035303557101, "grad_norm": 1.0682473182678223, "learning_rate": 1.903123253407172e-05, "loss": 0.8795, "step": 3748 }, { "epoch": 0.5013372559507889, "grad_norm": 1.12838613986969, "learning_rate": 1.903061252199938e-05, "loss": 1.0473, "step": 3749 }, { "epoch": 0.5014709815458679, "grad_norm": 1.1914901733398438, "learning_rate": 1.902999232169256e-05, "loss": 0.9686, "step": 3750 }, { "epoch": 0.5016047071409467, "grad_norm": 1.078313946723938, "learning_rate": 1.9029371933164192e-05, "loss": 0.9996, "step": 3751 }, { "epoch": 0.5017384327360257, "grad_norm": 1.182842493057251, "learning_rate": 1.90287513564272e-05, "loss": 0.9314, "step": 3752 }, { "epoch": 0.5018721583311045, "grad_norm": 1.1277867555618286, "learning_rate": 1.9028130591494532e-05, "loss": 0.9839, "step": 3753 }, { "epoch": 0.5020058839261835, "grad_norm": 1.0247992277145386, "learning_rate": 1.9027509638379122e-05, "loss": 0.9676, "step": 3754 }, { "epoch": 0.5021396095212624, "grad_norm": 1.071983814239502, "learning_rate": 1.902688849709391e-05, "loss": 0.9449, "step": 3755 }, { "epoch": 0.5022733351163413, "grad_norm": 1.0668017864227295, "learning_rate": 1.902626716765184e-05, "loss": 0.9601, "step": 3756 }, { "epoch": 0.5024070607114202, "grad_norm": 1.1264066696166992, "learning_rate": 1.9025645650065874e-05, "loss": 0.9724, "step": 3757 }, { "epoch": 0.502540786306499, "grad_norm": 1.080678105354309, "learning_rate": 1.9025023944348957e-05, "loss": 1.0995, "step": 3758 }, { "epoch": 0.502674511901578, "grad_norm": 1.193969964981079, "learning_rate": 1.9024402050514056e-05, "loss": 0.9914, "step": 3759 }, { "epoch": 0.5028082374966568, "grad_norm": 1.1350390911102295, "learning_rate": 1.9023779968574127e-05, "loss": 0.9721, "step": 3760 }, { "epoch": 0.5029419630917358, "grad_norm": 1.1308635473251343, "learning_rate": 1.902315769854214e-05, "loss": 0.8765, "step": 3761 }, { "epoch": 0.5030756886868146, "grad_norm": 1.0877312421798706, "learning_rate": 1.9022535240431066e-05, "loss": 0.9379, "step": 3762 }, { "epoch": 0.5032094142818936, "grad_norm": 1.036550521850586, "learning_rate": 1.902191259425388e-05, "loss": 0.9568, "step": 3763 }, { "epoch": 0.5033431398769724, "grad_norm": 1.0522043704986572, "learning_rate": 1.9021289760023555e-05, "loss": 0.8939, "step": 3764 }, { "epoch": 0.5034768654720514, "grad_norm": 1.1509100198745728, "learning_rate": 1.902066673775308e-05, "loss": 0.9778, "step": 3765 }, { "epoch": 0.5036105910671302, "grad_norm": 1.0133944749832153, "learning_rate": 1.9020043527455438e-05, "loss": 0.9107, "step": 3766 }, { "epoch": 0.5037443166622092, "grad_norm": 1.0727956295013428, "learning_rate": 1.9019420129143618e-05, "loss": 0.8588, "step": 3767 }, { "epoch": 0.503878042257288, "grad_norm": 1.1666569709777832, "learning_rate": 1.9018796542830616e-05, "loss": 1.053, "step": 3768 }, { "epoch": 0.5040117678523669, "grad_norm": 1.1905138492584229, "learning_rate": 1.9018172768529433e-05, "loss": 1.0018, "step": 3769 }, { "epoch": 0.5041454934474459, "grad_norm": 1.1711221933364868, "learning_rate": 1.9017548806253068e-05, "loss": 1.0021, "step": 3770 }, { "epoch": 0.5042792190425247, "grad_norm": 0.9530136585235596, "learning_rate": 1.9016924656014525e-05, "loss": 0.9986, "step": 3771 }, { "epoch": 0.5044129446376037, "grad_norm": 1.056572437286377, "learning_rate": 1.901630031782682e-05, "loss": 1.0203, "step": 3772 }, { "epoch": 0.5045466702326825, "grad_norm": 1.1881023645401, "learning_rate": 1.9015675791702956e-05, "loss": 1.0977, "step": 3773 }, { "epoch": 0.5046803958277615, "grad_norm": 1.1704810857772827, "learning_rate": 1.9015051077655963e-05, "loss": 1.0022, "step": 3774 }, { "epoch": 0.5048141214228403, "grad_norm": 1.1093477010726929, "learning_rate": 1.901442617569885e-05, "loss": 1.0099, "step": 3775 }, { "epoch": 0.5049478470179193, "grad_norm": 1.2047743797302246, "learning_rate": 1.9013801085844655e-05, "loss": 1.0734, "step": 3776 }, { "epoch": 0.5050815726129981, "grad_norm": 0.972062349319458, "learning_rate": 1.90131758081064e-05, "loss": 0.9386, "step": 3777 }, { "epoch": 0.505215298208077, "grad_norm": 1.0205680131912231, "learning_rate": 1.901255034249712e-05, "loss": 0.9765, "step": 3778 }, { "epoch": 0.5053490238031559, "grad_norm": 1.0622607469558716, "learning_rate": 1.9011924689029856e-05, "loss": 0.9258, "step": 3779 }, { "epoch": 0.5054827493982348, "grad_norm": 1.0987156629562378, "learning_rate": 1.901129884771764e-05, "loss": 0.9289, "step": 3780 }, { "epoch": 0.5056164749933137, "grad_norm": 1.1513290405273438, "learning_rate": 1.9010672818573522e-05, "loss": 0.9944, "step": 3781 }, { "epoch": 0.5057502005883926, "grad_norm": 1.1628025770187378, "learning_rate": 1.9010046601610557e-05, "loss": 0.9449, "step": 3782 }, { "epoch": 0.5058839261834716, "grad_norm": 1.044317603111267, "learning_rate": 1.9009420196841786e-05, "loss": 0.9575, "step": 3783 }, { "epoch": 0.5060176517785504, "grad_norm": 1.1931627988815308, "learning_rate": 1.9008793604280275e-05, "loss": 0.9242, "step": 3784 }, { "epoch": 0.5061513773736294, "grad_norm": 1.124172568321228, "learning_rate": 1.900816682393908e-05, "loss": 0.9965, "step": 3785 }, { "epoch": 0.5062851029687082, "grad_norm": 1.0585620403289795, "learning_rate": 1.9007539855831272e-05, "loss": 0.9878, "step": 3786 }, { "epoch": 0.5064188285637871, "grad_norm": 1.2777968645095825, "learning_rate": 1.900691269996991e-05, "loss": 1.0873, "step": 3787 }, { "epoch": 0.506552554158866, "grad_norm": 1.1473510265350342, "learning_rate": 1.9006285356368076e-05, "loss": 0.991, "step": 3788 }, { "epoch": 0.5066862797539449, "grad_norm": 1.0634920597076416, "learning_rate": 1.9005657825038838e-05, "loss": 1.0212, "step": 3789 }, { "epoch": 0.5068200053490238, "grad_norm": 1.2837328910827637, "learning_rate": 1.900503010599528e-05, "loss": 1.0114, "step": 3790 }, { "epoch": 0.5069537309441027, "grad_norm": 1.1228044033050537, "learning_rate": 1.900440219925049e-05, "loss": 1.0315, "step": 3791 }, { "epoch": 0.5070874565391816, "grad_norm": 1.097066044807434, "learning_rate": 1.900377410481755e-05, "loss": 0.9859, "step": 3792 }, { "epoch": 0.5072211821342605, "grad_norm": 1.1513768434524536, "learning_rate": 1.9003145822709553e-05, "loss": 0.9559, "step": 3793 }, { "epoch": 0.5073549077293394, "grad_norm": 1.0075641870498657, "learning_rate": 1.90025173529396e-05, "loss": 0.9548, "step": 3794 }, { "epoch": 0.5074886333244183, "grad_norm": 1.0988287925720215, "learning_rate": 1.9001888695520785e-05, "loss": 0.9136, "step": 3795 }, { "epoch": 0.5076223589194971, "grad_norm": 1.1111541986465454, "learning_rate": 1.9001259850466214e-05, "loss": 1.1613, "step": 3796 }, { "epoch": 0.5077560845145761, "grad_norm": 1.156724214553833, "learning_rate": 1.9000630817788994e-05, "loss": 0.9497, "step": 3797 }, { "epoch": 0.507889810109655, "grad_norm": 1.051926612854004, "learning_rate": 1.900000159750224e-05, "loss": 0.8897, "step": 3798 }, { "epoch": 0.5080235357047339, "grad_norm": 1.1226952075958252, "learning_rate": 1.8999372189619062e-05, "loss": 0.9084, "step": 3799 }, { "epoch": 0.5081572612998128, "grad_norm": 1.1156808137893677, "learning_rate": 1.8998742594152585e-05, "loss": 0.9657, "step": 3800 }, { "epoch": 0.5082909868948917, "grad_norm": 1.051435112953186, "learning_rate": 1.8998112811115924e-05, "loss": 0.8922, "step": 3801 }, { "epoch": 0.5084247124899706, "grad_norm": 0.98753422498703, "learning_rate": 1.8997482840522218e-05, "loss": 0.9868, "step": 3802 }, { "epoch": 0.5085584380850495, "grad_norm": 1.0197744369506836, "learning_rate": 1.899685268238459e-05, "loss": 0.8274, "step": 3803 }, { "epoch": 0.5086921636801284, "grad_norm": 1.183379054069519, "learning_rate": 1.8996222336716172e-05, "loss": 1.1568, "step": 3804 }, { "epoch": 0.5088258892752072, "grad_norm": 1.0052472352981567, "learning_rate": 1.8995591803530115e-05, "loss": 0.9507, "step": 3805 }, { "epoch": 0.5089596148702862, "grad_norm": 1.1517760753631592, "learning_rate": 1.8994961082839548e-05, "loss": 0.9336, "step": 3806 }, { "epoch": 0.509093340465365, "grad_norm": 1.1153533458709717, "learning_rate": 1.899433017465763e-05, "loss": 0.8832, "step": 3807 }, { "epoch": 0.509227066060444, "grad_norm": 1.1506197452545166, "learning_rate": 1.8993699078997506e-05, "loss": 0.9337, "step": 3808 }, { "epoch": 0.5093607916555228, "grad_norm": 1.1759499311447144, "learning_rate": 1.899306779587233e-05, "loss": 0.9719, "step": 3809 }, { "epoch": 0.5094945172506018, "grad_norm": 0.9596386551856995, "learning_rate": 1.8992436325295258e-05, "loss": 0.8986, "step": 3810 }, { "epoch": 0.5096282428456806, "grad_norm": 1.219245195388794, "learning_rate": 1.8991804667279455e-05, "loss": 1.002, "step": 3811 }, { "epoch": 0.5097619684407596, "grad_norm": 1.2183505296707153, "learning_rate": 1.8991172821838093e-05, "loss": 0.9283, "step": 3812 }, { "epoch": 0.5098956940358385, "grad_norm": 1.310115098953247, "learning_rate": 1.8990540788984336e-05, "loss": 1.0332, "step": 3813 }, { "epoch": 0.5100294196309173, "grad_norm": 1.111986517906189, "learning_rate": 1.8989908568731356e-05, "loss": 0.9784, "step": 3814 }, { "epoch": 0.5101631452259963, "grad_norm": 1.2221065759658813, "learning_rate": 1.8989276161092337e-05, "loss": 1.1051, "step": 3815 }, { "epoch": 0.5102968708210751, "grad_norm": 1.1372051239013672, "learning_rate": 1.898864356608046e-05, "loss": 0.9523, "step": 3816 }, { "epoch": 0.5104305964161541, "grad_norm": 1.1853171586990356, "learning_rate": 1.8988010783708906e-05, "loss": 1.05, "step": 3817 }, { "epoch": 0.5105643220112329, "grad_norm": 1.1441550254821777, "learning_rate": 1.8987377813990867e-05, "loss": 1.0411, "step": 3818 }, { "epoch": 0.5106980476063119, "grad_norm": 1.1583088636398315, "learning_rate": 1.898674465693954e-05, "loss": 0.9104, "step": 3819 }, { "epoch": 0.5108317732013907, "grad_norm": 1.1596380472183228, "learning_rate": 1.8986111312568118e-05, "loss": 0.9962, "step": 3820 }, { "epoch": 0.5109654987964697, "grad_norm": 1.1127177476882935, "learning_rate": 1.8985477780889808e-05, "loss": 0.9593, "step": 3821 }, { "epoch": 0.5110992243915485, "grad_norm": 1.0773991346359253, "learning_rate": 1.8984844061917805e-05, "loss": 0.9177, "step": 3822 }, { "epoch": 0.5112329499866275, "grad_norm": 1.1321998834609985, "learning_rate": 1.898421015566533e-05, "loss": 1.0687, "step": 3823 }, { "epoch": 0.5113666755817063, "grad_norm": 1.1707265377044678, "learning_rate": 1.8983576062145594e-05, "loss": 1.16, "step": 3824 }, { "epoch": 0.5115004011767852, "grad_norm": 1.043716311454773, "learning_rate": 1.8982941781371807e-05, "loss": 0.8821, "step": 3825 }, { "epoch": 0.5116341267718642, "grad_norm": 1.0262171030044556, "learning_rate": 1.8982307313357195e-05, "loss": 0.891, "step": 3826 }, { "epoch": 0.511767852366943, "grad_norm": 1.095699429512024, "learning_rate": 1.8981672658114983e-05, "loss": 0.9601, "step": 3827 }, { "epoch": 0.511901577962022, "grad_norm": 1.1988531351089478, "learning_rate": 1.8981037815658398e-05, "loss": 0.9624, "step": 3828 }, { "epoch": 0.5120353035571008, "grad_norm": 1.1508054733276367, "learning_rate": 1.8980402786000677e-05, "loss": 1.0078, "step": 3829 }, { "epoch": 0.5121690291521798, "grad_norm": 1.093957543373108, "learning_rate": 1.8979767569155048e-05, "loss": 1.0322, "step": 3830 }, { "epoch": 0.5123027547472586, "grad_norm": 1.0881657600402832, "learning_rate": 1.897913216513476e-05, "loss": 0.8891, "step": 3831 }, { "epoch": 0.5124364803423376, "grad_norm": 1.05989670753479, "learning_rate": 1.8978496573953052e-05, "loss": 0.87, "step": 3832 }, { "epoch": 0.5125702059374164, "grad_norm": 1.113887906074524, "learning_rate": 1.8977860795623178e-05, "loss": 0.8853, "step": 3833 }, { "epoch": 0.5127039315324953, "grad_norm": 1.0479919910430908, "learning_rate": 1.897722483015838e-05, "loss": 1.051, "step": 3834 }, { "epoch": 0.5128376571275742, "grad_norm": 1.0667577981948853, "learning_rate": 1.897658867757193e-05, "loss": 1.0021, "step": 3835 }, { "epoch": 0.5129713827226531, "grad_norm": 1.096814513206482, "learning_rate": 1.897595233787707e-05, "loss": 0.9569, "step": 3836 }, { "epoch": 0.513105108317732, "grad_norm": 1.0351325273513794, "learning_rate": 1.8975315811087077e-05, "loss": 0.9629, "step": 3837 }, { "epoch": 0.5132388339128109, "grad_norm": 1.2123996019363403, "learning_rate": 1.8974679097215214e-05, "loss": 0.9604, "step": 3838 }, { "epoch": 0.5133725595078898, "grad_norm": 1.076482892036438, "learning_rate": 1.8974042196274752e-05, "loss": 0.8971, "step": 3839 }, { "epoch": 0.5135062851029687, "grad_norm": 1.0794481039047241, "learning_rate": 1.8973405108278967e-05, "loss": 0.959, "step": 3840 }, { "epoch": 0.5136400106980477, "grad_norm": 1.1245522499084473, "learning_rate": 1.8972767833241142e-05, "loss": 1.0548, "step": 3841 }, { "epoch": 0.5137737362931265, "grad_norm": 1.0636851787567139, "learning_rate": 1.8972130371174557e-05, "loss": 0.9976, "step": 3842 }, { "epoch": 0.5139074618882054, "grad_norm": 1.137702226638794, "learning_rate": 1.89714927220925e-05, "loss": 0.936, "step": 3843 }, { "epoch": 0.5140411874832843, "grad_norm": 1.1484642028808594, "learning_rate": 1.897085488600826e-05, "loss": 0.9398, "step": 3844 }, { "epoch": 0.5141749130783632, "grad_norm": 1.1954537630081177, "learning_rate": 1.8970216862935134e-05, "loss": 1.113, "step": 3845 }, { "epoch": 0.5143086386734421, "grad_norm": 1.1452709436416626, "learning_rate": 1.896957865288642e-05, "loss": 0.9846, "step": 3846 }, { "epoch": 0.514442364268521, "grad_norm": 1.0287585258483887, "learning_rate": 1.8968940255875426e-05, "loss": 1.0308, "step": 3847 }, { "epoch": 0.5145760898635999, "grad_norm": 1.0327305793762207, "learning_rate": 1.8968301671915454e-05, "loss": 1.0187, "step": 3848 }, { "epoch": 0.5147098154586788, "grad_norm": 0.9759637117385864, "learning_rate": 1.8967662901019813e-05, "loss": 1.072, "step": 3849 }, { "epoch": 0.5148435410537577, "grad_norm": 1.0826876163482666, "learning_rate": 1.8967023943201818e-05, "loss": 1.0316, "step": 3850 }, { "epoch": 0.5149772666488366, "grad_norm": 1.0880807638168335, "learning_rate": 1.8966384798474793e-05, "loss": 0.902, "step": 3851 }, { "epoch": 0.5151109922439154, "grad_norm": 1.1165785789489746, "learning_rate": 1.8965745466852055e-05, "loss": 0.9522, "step": 3852 }, { "epoch": 0.5152447178389944, "grad_norm": 1.1486557722091675, "learning_rate": 1.8965105948346934e-05, "loss": 1.0999, "step": 3853 }, { "epoch": 0.5153784434340732, "grad_norm": 1.2315080165863037, "learning_rate": 1.8964466242972758e-05, "loss": 1.161, "step": 3854 }, { "epoch": 0.5155121690291522, "grad_norm": 1.1569840908050537, "learning_rate": 1.896382635074286e-05, "loss": 1.0412, "step": 3855 }, { "epoch": 0.515645894624231, "grad_norm": 1.0345807075500488, "learning_rate": 1.8963186271670578e-05, "loss": 0.8947, "step": 3856 }, { "epoch": 0.51577962021931, "grad_norm": 1.2175929546356201, "learning_rate": 1.896254600576926e-05, "loss": 1.0052, "step": 3857 }, { "epoch": 0.5159133458143889, "grad_norm": 0.9854939579963684, "learning_rate": 1.896190555305224e-05, "loss": 0.8375, "step": 3858 }, { "epoch": 0.5160470714094678, "grad_norm": 1.0953930616378784, "learning_rate": 1.8961264913532876e-05, "loss": 0.9554, "step": 3859 }, { "epoch": 0.5161807970045467, "grad_norm": 1.3187388181686401, "learning_rate": 1.8960624087224527e-05, "loss": 1.0183, "step": 3860 }, { "epoch": 0.5163145225996255, "grad_norm": 1.0151300430297852, "learning_rate": 1.8959983074140535e-05, "loss": 0.8611, "step": 3861 }, { "epoch": 0.5164482481947045, "grad_norm": 1.0359649658203125, "learning_rate": 1.895934187429427e-05, "loss": 1.0728, "step": 3862 }, { "epoch": 0.5165819737897833, "grad_norm": 1.208021879196167, "learning_rate": 1.8958700487699103e-05, "loss": 1.1133, "step": 3863 }, { "epoch": 0.5167156993848623, "grad_norm": 1.060405969619751, "learning_rate": 1.8958058914368393e-05, "loss": 0.9771, "step": 3864 }, { "epoch": 0.5168494249799411, "grad_norm": 1.0115083456039429, "learning_rate": 1.8957417154315517e-05, "loss": 0.8762, "step": 3865 }, { "epoch": 0.5169831505750201, "grad_norm": 0.9833860993385315, "learning_rate": 1.8956775207553853e-05, "loss": 1.0173, "step": 3866 }, { "epoch": 0.5171168761700989, "grad_norm": 1.0816105604171753, "learning_rate": 1.895613307409678e-05, "loss": 1.106, "step": 3867 }, { "epoch": 0.5172506017651779, "grad_norm": 1.2732880115509033, "learning_rate": 1.8955490753957678e-05, "loss": 0.9081, "step": 3868 }, { "epoch": 0.5173843273602567, "grad_norm": 1.0721365213394165, "learning_rate": 1.8954848247149948e-05, "loss": 0.9248, "step": 3869 }, { "epoch": 0.5175180529553357, "grad_norm": 1.0737065076828003, "learning_rate": 1.895420555368697e-05, "loss": 0.8849, "step": 3870 }, { "epoch": 0.5176517785504146, "grad_norm": 1.1095443964004517, "learning_rate": 1.895356267358215e-05, "loss": 1.0696, "step": 3871 }, { "epoch": 0.5177855041454934, "grad_norm": 1.0778032541275024, "learning_rate": 1.8952919606848882e-05, "loss": 1.0665, "step": 3872 }, { "epoch": 0.5179192297405724, "grad_norm": 1.0335626602172852, "learning_rate": 1.895227635350057e-05, "loss": 1.0059, "step": 3873 }, { "epoch": 0.5180529553356512, "grad_norm": 1.0494685173034668, "learning_rate": 1.8951632913550625e-05, "loss": 0.9658, "step": 3874 }, { "epoch": 0.5181866809307302, "grad_norm": 1.127772331237793, "learning_rate": 1.8950989287012457e-05, "loss": 0.9021, "step": 3875 }, { "epoch": 0.518320406525809, "grad_norm": 1.1766126155853271, "learning_rate": 1.8950345473899484e-05, "loss": 1.0194, "step": 3876 }, { "epoch": 0.518454132120888, "grad_norm": 1.1021366119384766, "learning_rate": 1.8949701474225123e-05, "loss": 0.9546, "step": 3877 }, { "epoch": 0.5185878577159668, "grad_norm": 1.020088791847229, "learning_rate": 1.89490572880028e-05, "loss": 0.9591, "step": 3878 }, { "epoch": 0.5187215833110458, "grad_norm": 1.1151494979858398, "learning_rate": 1.894841291524594e-05, "loss": 0.9431, "step": 3879 }, { "epoch": 0.5188553089061246, "grad_norm": 1.071023941040039, "learning_rate": 1.8947768355967975e-05, "loss": 1.0015, "step": 3880 }, { "epoch": 0.5189890345012035, "grad_norm": 0.9775139093399048, "learning_rate": 1.8947123610182342e-05, "loss": 1.0225, "step": 3881 }, { "epoch": 0.5191227600962824, "grad_norm": 1.0321720838546753, "learning_rate": 1.894647867790248e-05, "loss": 0.8393, "step": 3882 }, { "epoch": 0.5192564856913613, "grad_norm": 1.1958746910095215, "learning_rate": 1.8945833559141825e-05, "loss": 1.0616, "step": 3883 }, { "epoch": 0.5193902112864403, "grad_norm": 1.1184295415878296, "learning_rate": 1.8945188253913837e-05, "loss": 1.1117, "step": 3884 }, { "epoch": 0.5195239368815191, "grad_norm": 1.1438792943954468, "learning_rate": 1.8944542762231955e-05, "loss": 0.9481, "step": 3885 }, { "epoch": 0.5196576624765981, "grad_norm": 1.1248042583465576, "learning_rate": 1.8943897084109638e-05, "loss": 0.8317, "step": 3886 }, { "epoch": 0.5197913880716769, "grad_norm": 1.2589408159255981, "learning_rate": 1.8943251219560347e-05, "loss": 1.034, "step": 3887 }, { "epoch": 0.5199251136667559, "grad_norm": 1.0450526475906372, "learning_rate": 1.8942605168597542e-05, "loss": 0.9552, "step": 3888 }, { "epoch": 0.5200588392618347, "grad_norm": 1.2703546285629272, "learning_rate": 1.894195893123469e-05, "loss": 0.9432, "step": 3889 }, { "epoch": 0.5201925648569136, "grad_norm": 1.1543128490447998, "learning_rate": 1.894131250748526e-05, "loss": 1.0503, "step": 3890 }, { "epoch": 0.5203262904519925, "grad_norm": 1.1224424839019775, "learning_rate": 1.8940665897362724e-05, "loss": 0.8552, "step": 3891 }, { "epoch": 0.5204600160470714, "grad_norm": 1.0187281370162964, "learning_rate": 1.8940019100880564e-05, "loss": 1.0045, "step": 3892 }, { "epoch": 0.5205937416421503, "grad_norm": 1.040170431137085, "learning_rate": 1.8939372118052263e-05, "loss": 0.922, "step": 3893 }, { "epoch": 0.5207274672372292, "grad_norm": 1.140371322631836, "learning_rate": 1.89387249488913e-05, "loss": 0.9302, "step": 3894 }, { "epoch": 0.5208611928323081, "grad_norm": 1.0946296453475952, "learning_rate": 1.8938077593411172e-05, "loss": 0.9625, "step": 3895 }, { "epoch": 0.520994918427387, "grad_norm": 1.0061008930206299, "learning_rate": 1.893743005162537e-05, "loss": 0.873, "step": 3896 }, { "epoch": 0.521128644022466, "grad_norm": 1.3982113599777222, "learning_rate": 1.8936782323547387e-05, "loss": 1.0298, "step": 3897 }, { "epoch": 0.5212623696175448, "grad_norm": 1.0664973258972168, "learning_rate": 1.893613440919073e-05, "loss": 1.1054, "step": 3898 }, { "epoch": 0.5213960952126236, "grad_norm": 1.007728099822998, "learning_rate": 1.8935486308568902e-05, "loss": 0.9052, "step": 3899 }, { "epoch": 0.5215298208077026, "grad_norm": 1.0875911712646484, "learning_rate": 1.8934838021695415e-05, "loss": 1.0236, "step": 3900 }, { "epoch": 0.5216635464027815, "grad_norm": 1.1241705417633057, "learning_rate": 1.8934189548583774e-05, "loss": 0.9267, "step": 3901 }, { "epoch": 0.5217972719978604, "grad_norm": 1.0521745681762695, "learning_rate": 1.8933540889247504e-05, "loss": 1.0, "step": 3902 }, { "epoch": 0.5219309975929393, "grad_norm": 1.100459337234497, "learning_rate": 1.8932892043700125e-05, "loss": 0.9531, "step": 3903 }, { "epoch": 0.5220647231880182, "grad_norm": 1.258790373802185, "learning_rate": 1.8932243011955154e-05, "loss": 1.0201, "step": 3904 }, { "epoch": 0.5221984487830971, "grad_norm": 1.1220483779907227, "learning_rate": 1.8931593794026128e-05, "loss": 0.9818, "step": 3905 }, { "epoch": 0.522332174378176, "grad_norm": 1.107544183731079, "learning_rate": 1.8930944389926575e-05, "loss": 0.919, "step": 3906 }, { "epoch": 0.5224658999732549, "grad_norm": 1.2315235137939453, "learning_rate": 1.8930294799670034e-05, "loss": 0.9646, "step": 3907 }, { "epoch": 0.5225996255683337, "grad_norm": 0.9500715136528015, "learning_rate": 1.892964502327004e-05, "loss": 0.7975, "step": 3908 }, { "epoch": 0.5227333511634127, "grad_norm": 1.1456037759780884, "learning_rate": 1.8928995060740144e-05, "loss": 0.9765, "step": 3909 }, { "epoch": 0.5228670767584915, "grad_norm": 1.0872883796691895, "learning_rate": 1.8928344912093887e-05, "loss": 0.9696, "step": 3910 }, { "epoch": 0.5230008023535705, "grad_norm": 1.057666540145874, "learning_rate": 1.8927694577344825e-05, "loss": 0.8645, "step": 3911 }, { "epoch": 0.5231345279486493, "grad_norm": 1.1444308757781982, "learning_rate": 1.892704405650651e-05, "loss": 0.9933, "step": 3912 }, { "epoch": 0.5232682535437283, "grad_norm": 1.1053740978240967, "learning_rate": 1.8926393349592506e-05, "loss": 0.9565, "step": 3913 }, { "epoch": 0.5234019791388071, "grad_norm": 1.1594345569610596, "learning_rate": 1.8925742456616375e-05, "loss": 1.0202, "step": 3914 }, { "epoch": 0.5235357047338861, "grad_norm": 1.0516413450241089, "learning_rate": 1.8925091377591684e-05, "loss": 0.983, "step": 3915 }, { "epoch": 0.523669430328965, "grad_norm": 1.1840918064117432, "learning_rate": 1.8924440112532e-05, "loss": 0.9984, "step": 3916 }, { "epoch": 0.5238031559240439, "grad_norm": 1.076615333557129, "learning_rate": 1.892378866145091e-05, "loss": 0.9514, "step": 3917 }, { "epoch": 0.5239368815191228, "grad_norm": 1.0013864040374756, "learning_rate": 1.8923137024361975e-05, "loss": 0.9191, "step": 3918 }, { "epoch": 0.5240706071142016, "grad_norm": 1.20210862159729, "learning_rate": 1.8922485201278792e-05, "loss": 1.0503, "step": 3919 }, { "epoch": 0.5242043327092806, "grad_norm": 1.0958980321884155, "learning_rate": 1.892183319221494e-05, "loss": 0.9515, "step": 3920 }, { "epoch": 0.5243380583043594, "grad_norm": 1.1728498935699463, "learning_rate": 1.8921180997184014e-05, "loss": 1.0282, "step": 3921 }, { "epoch": 0.5244717838994384, "grad_norm": 1.139930248260498, "learning_rate": 1.892052861619961e-05, "loss": 1.0716, "step": 3922 }, { "epoch": 0.5246055094945172, "grad_norm": 1.098024845123291, "learning_rate": 1.8919876049275318e-05, "loss": 0.9014, "step": 3923 }, { "epoch": 0.5247392350895962, "grad_norm": 1.1482025384902954, "learning_rate": 1.8919223296424746e-05, "loss": 1.0292, "step": 3924 }, { "epoch": 0.524872960684675, "grad_norm": 1.079440712928772, "learning_rate": 1.8918570357661502e-05, "loss": 1.0716, "step": 3925 }, { "epoch": 0.525006686279754, "grad_norm": 1.2071138620376587, "learning_rate": 1.891791723299919e-05, "loss": 1.0171, "step": 3926 }, { "epoch": 0.5251404118748328, "grad_norm": 1.0317797660827637, "learning_rate": 1.8917263922451427e-05, "loss": 0.993, "step": 3927 }, { "epoch": 0.5252741374699117, "grad_norm": 1.1713004112243652, "learning_rate": 1.8916610426031835e-05, "loss": 0.9571, "step": 3928 }, { "epoch": 0.5254078630649907, "grad_norm": 1.0108625888824463, "learning_rate": 1.8915956743754026e-05, "loss": 0.9371, "step": 3929 }, { "epoch": 0.5255415886600695, "grad_norm": 1.0294760465621948, "learning_rate": 1.8915302875631633e-05, "loss": 0.9245, "step": 3930 }, { "epoch": 0.5256753142551485, "grad_norm": 1.2941956520080566, "learning_rate": 1.8914648821678278e-05, "loss": 1.0639, "step": 3931 }, { "epoch": 0.5258090398502273, "grad_norm": 1.0763232707977295, "learning_rate": 1.8913994581907605e-05, "loss": 0.8877, "step": 3932 }, { "epoch": 0.5259427654453063, "grad_norm": 1.0422208309173584, "learning_rate": 1.891334015633324e-05, "loss": 0.9157, "step": 3933 }, { "epoch": 0.5260764910403851, "grad_norm": 1.0282213687896729, "learning_rate": 1.891268554496883e-05, "loss": 1.0086, "step": 3934 }, { "epoch": 0.5262102166354641, "grad_norm": 1.2093687057495117, "learning_rate": 1.8912030747828018e-05, "loss": 0.9986, "step": 3935 }, { "epoch": 0.5263439422305429, "grad_norm": 1.0463991165161133, "learning_rate": 1.8911375764924455e-05, "loss": 1.0043, "step": 3936 }, { "epoch": 0.5264776678256218, "grad_norm": 1.0864888429641724, "learning_rate": 1.8910720596271787e-05, "loss": 0.9172, "step": 3937 }, { "epoch": 0.5266113934207007, "grad_norm": 1.023023009300232, "learning_rate": 1.891006524188368e-05, "loss": 0.9725, "step": 3938 }, { "epoch": 0.5267451190157796, "grad_norm": 1.079361915588379, "learning_rate": 1.8909409701773787e-05, "loss": 0.8713, "step": 3939 }, { "epoch": 0.5268788446108585, "grad_norm": 1.0619900226593018, "learning_rate": 1.8908753975955772e-05, "loss": 0.924, "step": 3940 }, { "epoch": 0.5270125702059374, "grad_norm": 1.067112684249878, "learning_rate": 1.890809806444331e-05, "loss": 0.9711, "step": 3941 }, { "epoch": 0.5271462958010164, "grad_norm": 1.1576350927352905, "learning_rate": 1.8907441967250064e-05, "loss": 0.9091, "step": 3942 }, { "epoch": 0.5272800213960952, "grad_norm": 1.2047412395477295, "learning_rate": 1.8906785684389715e-05, "loss": 0.9792, "step": 3943 }, { "epoch": 0.5274137469911742, "grad_norm": 0.9922213554382324, "learning_rate": 1.8906129215875943e-05, "loss": 0.8706, "step": 3944 }, { "epoch": 0.527547472586253, "grad_norm": 1.1775661706924438, "learning_rate": 1.8905472561722425e-05, "loss": 1.0702, "step": 3945 }, { "epoch": 0.5276811981813319, "grad_norm": 1.1330151557922363, "learning_rate": 1.8904815721942857e-05, "loss": 1.0432, "step": 3946 }, { "epoch": 0.5278149237764108, "grad_norm": 1.0949708223342896, "learning_rate": 1.8904158696550927e-05, "loss": 1.0329, "step": 3947 }, { "epoch": 0.5279486493714897, "grad_norm": 1.1763720512390137, "learning_rate": 1.8903501485560328e-05, "loss": 1.0306, "step": 3948 }, { "epoch": 0.5280823749665686, "grad_norm": 1.034354329109192, "learning_rate": 1.8902844088984757e-05, "loss": 0.8144, "step": 3949 }, { "epoch": 0.5282161005616475, "grad_norm": 1.0692715644836426, "learning_rate": 1.8902186506837924e-05, "loss": 0.9686, "step": 3950 }, { "epoch": 0.5283498261567264, "grad_norm": 0.9756340384483337, "learning_rate": 1.890152873913353e-05, "loss": 0.8168, "step": 3951 }, { "epoch": 0.5284835517518053, "grad_norm": 1.1703331470489502, "learning_rate": 1.8900870785885288e-05, "loss": 1.0726, "step": 3952 }, { "epoch": 0.5286172773468842, "grad_norm": 1.0233592987060547, "learning_rate": 1.890021264710691e-05, "loss": 0.909, "step": 3953 }, { "epoch": 0.5287510029419631, "grad_norm": 1.038329839706421, "learning_rate": 1.889955432281212e-05, "loss": 0.9969, "step": 3954 }, { "epoch": 0.5288847285370419, "grad_norm": 1.1241778135299683, "learning_rate": 1.8898895813014633e-05, "loss": 0.995, "step": 3955 }, { "epoch": 0.5290184541321209, "grad_norm": 1.034817099571228, "learning_rate": 1.8898237117728177e-05, "loss": 0.8693, "step": 3956 }, { "epoch": 0.5291521797271997, "grad_norm": 1.0925058126449585, "learning_rate": 1.8897578236966486e-05, "loss": 0.9579, "step": 3957 }, { "epoch": 0.5292859053222787, "grad_norm": 1.1579203605651855, "learning_rate": 1.889691917074329e-05, "loss": 0.9749, "step": 3958 }, { "epoch": 0.5294196309173576, "grad_norm": 1.1268013715744019, "learning_rate": 1.8896259919072325e-05, "loss": 0.9824, "step": 3959 }, { "epoch": 0.5295533565124365, "grad_norm": 1.2145124673843384, "learning_rate": 1.8895600481967337e-05, "loss": 1.0323, "step": 3960 }, { "epoch": 0.5296870821075154, "grad_norm": 1.0292880535125732, "learning_rate": 1.889494085944207e-05, "loss": 0.9027, "step": 3961 }, { "epoch": 0.5298208077025943, "grad_norm": 1.286773443222046, "learning_rate": 1.8894281051510267e-05, "loss": 0.908, "step": 3962 }, { "epoch": 0.5299545332976732, "grad_norm": 1.1288243532180786, "learning_rate": 1.889362105818569e-05, "loss": 1.0053, "step": 3963 }, { "epoch": 0.530088258892752, "grad_norm": 1.1426972150802612, "learning_rate": 1.8892960879482092e-05, "loss": 0.9721, "step": 3964 }, { "epoch": 0.530221984487831, "grad_norm": 1.150708556175232, "learning_rate": 1.889230051541324e-05, "loss": 0.9593, "step": 3965 }, { "epoch": 0.5303557100829098, "grad_norm": 1.0496158599853516, "learning_rate": 1.8891639965992884e-05, "loss": 0.9213, "step": 3966 }, { "epoch": 0.5304894356779888, "grad_norm": 1.0713404417037964, "learning_rate": 1.8890979231234806e-05, "loss": 0.8702, "step": 3967 }, { "epoch": 0.5306231612730676, "grad_norm": 1.0482089519500732, "learning_rate": 1.8890318311152773e-05, "loss": 0.9381, "step": 3968 }, { "epoch": 0.5307568868681466, "grad_norm": 1.0940866470336914, "learning_rate": 1.888965720576056e-05, "loss": 0.9044, "step": 3969 }, { "epoch": 0.5308906124632254, "grad_norm": 1.1427652835845947, "learning_rate": 1.888899591507195e-05, "loss": 1.0506, "step": 3970 }, { "epoch": 0.5310243380583044, "grad_norm": 1.1380037069320679, "learning_rate": 1.8888334439100728e-05, "loss": 0.9982, "step": 3971 }, { "epoch": 0.5311580636533833, "grad_norm": 0.9526935815811157, "learning_rate": 1.8887672777860678e-05, "loss": 0.89, "step": 3972 }, { "epoch": 0.5312917892484622, "grad_norm": 1.0654829740524292, "learning_rate": 1.8887010931365592e-05, "loss": 0.9734, "step": 3973 }, { "epoch": 0.5314255148435411, "grad_norm": 1.1163285970687866, "learning_rate": 1.888634889962927e-05, "loss": 0.9873, "step": 3974 }, { "epoch": 0.5315592404386199, "grad_norm": 1.14678156375885, "learning_rate": 1.8885686682665505e-05, "loss": 0.8316, "step": 3975 }, { "epoch": 0.5316929660336989, "grad_norm": 1.165987253189087, "learning_rate": 1.8885024280488108e-05, "loss": 0.961, "step": 3976 }, { "epoch": 0.5318266916287777, "grad_norm": 1.1527067422866821, "learning_rate": 1.888436169311088e-05, "loss": 1.0257, "step": 3977 }, { "epoch": 0.5319604172238567, "grad_norm": 1.114498257637024, "learning_rate": 1.8883698920547633e-05, "loss": 0.9289, "step": 3978 }, { "epoch": 0.5320941428189355, "grad_norm": 1.0571329593658447, "learning_rate": 1.8883035962812184e-05, "loss": 0.8959, "step": 3979 }, { "epoch": 0.5322278684140145, "grad_norm": 1.1587417125701904, "learning_rate": 1.888237281991835e-05, "loss": 0.9485, "step": 3980 }, { "epoch": 0.5323615940090933, "grad_norm": 1.1801154613494873, "learning_rate": 1.8881709491879954e-05, "loss": 0.8969, "step": 3981 }, { "epoch": 0.5324953196041723, "grad_norm": 1.1700735092163086, "learning_rate": 1.8881045978710823e-05, "loss": 0.8846, "step": 3982 }, { "epoch": 0.5326290451992511, "grad_norm": 1.1310909986495972, "learning_rate": 1.8880382280424786e-05, "loss": 1.0252, "step": 3983 }, { "epoch": 0.53276277079433, "grad_norm": 1.061645269393921, "learning_rate": 1.887971839703568e-05, "loss": 1.0342, "step": 3984 }, { "epoch": 0.532896496389409, "grad_norm": 1.0959954261779785, "learning_rate": 1.887905432855734e-05, "loss": 0.9589, "step": 3985 }, { "epoch": 0.5330302219844878, "grad_norm": 1.0616424083709717, "learning_rate": 1.8878390075003607e-05, "loss": 0.9091, "step": 3986 }, { "epoch": 0.5331639475795668, "grad_norm": 1.180062174797058, "learning_rate": 1.8877725636388327e-05, "loss": 0.9624, "step": 3987 }, { "epoch": 0.5332976731746456, "grad_norm": 1.0381860733032227, "learning_rate": 1.8877061012725355e-05, "loss": 0.9843, "step": 3988 }, { "epoch": 0.5334313987697246, "grad_norm": 1.099104881286621, "learning_rate": 1.8876396204028543e-05, "loss": 0.9808, "step": 3989 }, { "epoch": 0.5335651243648034, "grad_norm": 1.3772532939910889, "learning_rate": 1.887573121031174e-05, "loss": 0.8673, "step": 3990 }, { "epoch": 0.5336988499598824, "grad_norm": 1.284437656402588, "learning_rate": 1.887506603158882e-05, "loss": 0.9918, "step": 3991 }, { "epoch": 0.5338325755549612, "grad_norm": 1.087471604347229, "learning_rate": 1.8874400667873634e-05, "loss": 1.0122, "step": 3992 }, { "epoch": 0.5339663011500401, "grad_norm": 1.028607964515686, "learning_rate": 1.887373511918006e-05, "loss": 0.9962, "step": 3993 }, { "epoch": 0.534100026745119, "grad_norm": 1.147425889968872, "learning_rate": 1.887306938552197e-05, "loss": 0.9383, "step": 3994 }, { "epoch": 0.5342337523401979, "grad_norm": 1.069148302078247, "learning_rate": 1.887240346691324e-05, "loss": 0.9109, "step": 3995 }, { "epoch": 0.5343674779352768, "grad_norm": 1.1001719236373901, "learning_rate": 1.8871737363367745e-05, "loss": 0.9228, "step": 3996 }, { "epoch": 0.5345012035303557, "grad_norm": 1.1183935403823853, "learning_rate": 1.887107107489938e-05, "loss": 1.0522, "step": 3997 }, { "epoch": 0.5346349291254346, "grad_norm": 1.1498290300369263, "learning_rate": 1.8870404601522022e-05, "loss": 0.9477, "step": 3998 }, { "epoch": 0.5347686547205135, "grad_norm": 1.1521180868148804, "learning_rate": 1.8869737943249572e-05, "loss": 0.9049, "step": 3999 }, { "epoch": 0.5349023803155925, "grad_norm": 1.210731029510498, "learning_rate": 1.8869071100095922e-05, "loss": 0.9458, "step": 4000 }, { "epoch": 0.5350361059106713, "grad_norm": 1.0592319965362549, "learning_rate": 1.886840407207497e-05, "loss": 1.0425, "step": 4001 }, { "epoch": 0.5351698315057501, "grad_norm": 1.1009807586669922, "learning_rate": 1.886773685920062e-05, "loss": 0.9616, "step": 4002 }, { "epoch": 0.5353035571008291, "grad_norm": 1.0995705127716064, "learning_rate": 1.8867069461486785e-05, "loss": 0.9673, "step": 4003 }, { "epoch": 0.535437282695908, "grad_norm": 1.076185941696167, "learning_rate": 1.8866401878947365e-05, "loss": 0.9884, "step": 4004 }, { "epoch": 0.5355710082909869, "grad_norm": 1.0944101810455322, "learning_rate": 1.886573411159629e-05, "loss": 1.032, "step": 4005 }, { "epoch": 0.5357047338860658, "grad_norm": 1.0662139654159546, "learning_rate": 1.8865066159447468e-05, "loss": 1.0553, "step": 4006 }, { "epoch": 0.5358384594811447, "grad_norm": 0.9646372199058533, "learning_rate": 1.8864398022514823e-05, "loss": 0.8748, "step": 4007 }, { "epoch": 0.5359721850762236, "grad_norm": 1.0678128004074097, "learning_rate": 1.8863729700812282e-05, "loss": 0.9366, "step": 4008 }, { "epoch": 0.5361059106713025, "grad_norm": 1.0341919660568237, "learning_rate": 1.886306119435378e-05, "loss": 0.8202, "step": 4009 }, { "epoch": 0.5362396362663814, "grad_norm": 1.1835156679153442, "learning_rate": 1.886239250315325e-05, "loss": 0.982, "step": 4010 }, { "epoch": 0.5363733618614602, "grad_norm": 1.1393098831176758, "learning_rate": 1.8861723627224627e-05, "loss": 0.9127, "step": 4011 }, { "epoch": 0.5365070874565392, "grad_norm": 1.1345680952072144, "learning_rate": 1.8861054566581852e-05, "loss": 0.9508, "step": 4012 }, { "epoch": 0.536640813051618, "grad_norm": 1.1731466054916382, "learning_rate": 1.8860385321238877e-05, "loss": 0.8737, "step": 4013 }, { "epoch": 0.536774538646697, "grad_norm": 1.1283605098724365, "learning_rate": 1.885971589120965e-05, "loss": 0.9562, "step": 4014 }, { "epoch": 0.5369082642417758, "grad_norm": 1.0630086660385132, "learning_rate": 1.8859046276508118e-05, "loss": 1.0774, "step": 4015 }, { "epoch": 0.5370419898368548, "grad_norm": 1.1081104278564453, "learning_rate": 1.885837647714825e-05, "loss": 0.9711, "step": 4016 }, { "epoch": 0.5371757154319337, "grad_norm": 0.9931021332740784, "learning_rate": 1.8857706493143995e-05, "loss": 0.979, "step": 4017 }, { "epoch": 0.5373094410270126, "grad_norm": 1.0917123556137085, "learning_rate": 1.8857036324509324e-05, "loss": 0.9207, "step": 4018 }, { "epoch": 0.5374431666220915, "grad_norm": 1.0740206241607666, "learning_rate": 1.8856365971258212e-05, "loss": 1.1062, "step": 4019 }, { "epoch": 0.5375768922171704, "grad_norm": 1.1552101373672485, "learning_rate": 1.885569543340462e-05, "loss": 1.0445, "step": 4020 }, { "epoch": 0.5377106178122493, "grad_norm": 1.117110013961792, "learning_rate": 1.8855024710962536e-05, "loss": 1.0089, "step": 4021 }, { "epoch": 0.5378443434073281, "grad_norm": 1.1631462574005127, "learning_rate": 1.885435380394593e-05, "loss": 0.9584, "step": 4022 }, { "epoch": 0.5379780690024071, "grad_norm": 1.017776370048523, "learning_rate": 1.8853682712368796e-05, "loss": 0.9404, "step": 4023 }, { "epoch": 0.5381117945974859, "grad_norm": 1.0239611864089966, "learning_rate": 1.8853011436245113e-05, "loss": 0.9939, "step": 4024 }, { "epoch": 0.5382455201925649, "grad_norm": 1.2450803518295288, "learning_rate": 1.885233997558888e-05, "loss": 1.1091, "step": 4025 }, { "epoch": 0.5383792457876437, "grad_norm": 1.122562050819397, "learning_rate": 1.8851668330414092e-05, "loss": 1.1424, "step": 4026 }, { "epoch": 0.5385129713827227, "grad_norm": 1.1152565479278564, "learning_rate": 1.885099650073475e-05, "loss": 0.9484, "step": 4027 }, { "epoch": 0.5386466969778015, "grad_norm": 1.0552746057510376, "learning_rate": 1.8850324486564853e-05, "loss": 0.8987, "step": 4028 }, { "epoch": 0.5387804225728805, "grad_norm": 1.0813008546829224, "learning_rate": 1.884965228791841e-05, "loss": 0.9762, "step": 4029 }, { "epoch": 0.5389141481679594, "grad_norm": 1.1637060642242432, "learning_rate": 1.8848979904809435e-05, "loss": 1.0349, "step": 4030 }, { "epoch": 0.5390478737630382, "grad_norm": 1.0969377756118774, "learning_rate": 1.884830733725194e-05, "loss": 0.9885, "step": 4031 }, { "epoch": 0.5391815993581172, "grad_norm": 1.0484496355056763, "learning_rate": 1.8847634585259948e-05, "loss": 0.9344, "step": 4032 }, { "epoch": 0.539315324953196, "grad_norm": 1.0504816770553589, "learning_rate": 1.8846961648847476e-05, "loss": 1.0066, "step": 4033 }, { "epoch": 0.539449050548275, "grad_norm": 1.1143165826797485, "learning_rate": 1.8846288528028555e-05, "loss": 1.0313, "step": 4034 }, { "epoch": 0.5395827761433538, "grad_norm": 1.118200421333313, "learning_rate": 1.8845615222817217e-05, "loss": 1.017, "step": 4035 }, { "epoch": 0.5397165017384328, "grad_norm": 1.1040101051330566, "learning_rate": 1.884494173322749e-05, "loss": 0.9642, "step": 4036 }, { "epoch": 0.5398502273335116, "grad_norm": 1.202311635017395, "learning_rate": 1.884426805927342e-05, "loss": 1.0766, "step": 4037 }, { "epoch": 0.5399839529285906, "grad_norm": 1.0037615299224854, "learning_rate": 1.8843594200969043e-05, "loss": 0.8847, "step": 4038 }, { "epoch": 0.5401176785236694, "grad_norm": 1.060538649559021, "learning_rate": 1.884292015832841e-05, "loss": 1.0628, "step": 4039 }, { "epoch": 0.5402514041187483, "grad_norm": 1.1091669797897339, "learning_rate": 1.8842245931365564e-05, "loss": 0.9419, "step": 4040 }, { "epoch": 0.5403851297138272, "grad_norm": 0.9443292617797852, "learning_rate": 1.8841571520094564e-05, "loss": 0.8612, "step": 4041 }, { "epoch": 0.5405188553089061, "grad_norm": 1.095067024230957, "learning_rate": 1.8840896924529466e-05, "loss": 0.9891, "step": 4042 }, { "epoch": 0.540652580903985, "grad_norm": 1.0677266120910645, "learning_rate": 1.8840222144684333e-05, "loss": 0.8, "step": 4043 }, { "epoch": 0.5407863064990639, "grad_norm": 1.0165082216262817, "learning_rate": 1.8839547180573228e-05, "loss": 0.9176, "step": 4044 }, { "epoch": 0.5409200320941429, "grad_norm": 1.2070832252502441, "learning_rate": 1.883887203221022e-05, "loss": 1.1452, "step": 4045 }, { "epoch": 0.5410537576892217, "grad_norm": 0.9099141955375671, "learning_rate": 1.8838196699609385e-05, "loss": 0.8936, "step": 4046 }, { "epoch": 0.5411874832843007, "grad_norm": 0.9718128442764282, "learning_rate": 1.8837521182784795e-05, "loss": 1.0062, "step": 4047 }, { "epoch": 0.5413212088793795, "grad_norm": 1.1335023641586304, "learning_rate": 1.8836845481750533e-05, "loss": 0.9909, "step": 4048 }, { "epoch": 0.5414549344744584, "grad_norm": 1.0748789310455322, "learning_rate": 1.8836169596520683e-05, "loss": 0.9943, "step": 4049 }, { "epoch": 0.5415886600695373, "grad_norm": 1.1526007652282715, "learning_rate": 1.883549352710933e-05, "loss": 0.9091, "step": 4050 }, { "epoch": 0.5417223856646162, "grad_norm": 1.204253911972046, "learning_rate": 1.8834817273530572e-05, "loss": 1.1026, "step": 4051 }, { "epoch": 0.5418561112596951, "grad_norm": 1.2260923385620117, "learning_rate": 1.88341408357985e-05, "loss": 1.0354, "step": 4052 }, { "epoch": 0.541989836854774, "grad_norm": 1.0631901025772095, "learning_rate": 1.8833464213927217e-05, "loss": 0.9088, "step": 4053 }, { "epoch": 0.5421235624498529, "grad_norm": 1.0479751825332642, "learning_rate": 1.8832787407930825e-05, "loss": 0.8789, "step": 4054 }, { "epoch": 0.5422572880449318, "grad_norm": 1.242635726928711, "learning_rate": 1.8832110417823433e-05, "loss": 1.014, "step": 4055 }, { "epoch": 0.5423910136400107, "grad_norm": 1.082195520401001, "learning_rate": 1.8831433243619148e-05, "loss": 0.9911, "step": 4056 }, { "epoch": 0.5425247392350896, "grad_norm": 1.1591027975082397, "learning_rate": 1.8830755885332087e-05, "loss": 1.037, "step": 4057 }, { "epoch": 0.5426584648301684, "grad_norm": 1.0585474967956543, "learning_rate": 1.8830078342976374e-05, "loss": 0.8676, "step": 4058 }, { "epoch": 0.5427921904252474, "grad_norm": 0.8934906125068665, "learning_rate": 1.8829400616566124e-05, "loss": 0.8947, "step": 4059 }, { "epoch": 0.5429259160203262, "grad_norm": 1.2074781656265259, "learning_rate": 1.882872270611547e-05, "loss": 1.1145, "step": 4060 }, { "epoch": 0.5430596416154052, "grad_norm": 1.0659806728363037, "learning_rate": 1.8828044611638538e-05, "loss": 0.9149, "step": 4061 }, { "epoch": 0.5431933672104841, "grad_norm": 1.1296091079711914, "learning_rate": 1.8827366333149465e-05, "loss": 1.0843, "step": 4062 }, { "epoch": 0.543327092805563, "grad_norm": 0.9791759848594666, "learning_rate": 1.8826687870662383e-05, "loss": 1.0003, "step": 4063 }, { "epoch": 0.5434608184006419, "grad_norm": 0.9883964657783508, "learning_rate": 1.882600922419144e-05, "loss": 0.8917, "step": 4064 }, { "epoch": 0.5435945439957208, "grad_norm": 1.1391581296920776, "learning_rate": 1.8825330393750783e-05, "loss": 1.0969, "step": 4065 }, { "epoch": 0.5437282695907997, "grad_norm": 1.1297281980514526, "learning_rate": 1.882465137935456e-05, "loss": 1.0222, "step": 4066 }, { "epoch": 0.5438619951858785, "grad_norm": 1.1820268630981445, "learning_rate": 1.8823972181016922e-05, "loss": 1.0208, "step": 4067 }, { "epoch": 0.5439957207809575, "grad_norm": 1.0535166263580322, "learning_rate": 1.8823292798752023e-05, "loss": 0.9482, "step": 4068 }, { "epoch": 0.5441294463760363, "grad_norm": 1.2228018045425415, "learning_rate": 1.8822613232574035e-05, "loss": 1.0862, "step": 4069 }, { "epoch": 0.5442631719711153, "grad_norm": 0.9343435168266296, "learning_rate": 1.882193348249711e-05, "loss": 0.9011, "step": 4070 }, { "epoch": 0.5443968975661941, "grad_norm": 1.1489194631576538, "learning_rate": 1.8821253548535427e-05, "loss": 1.0211, "step": 4071 }, { "epoch": 0.5445306231612731, "grad_norm": 1.1050649881362915, "learning_rate": 1.8820573430703155e-05, "loss": 1.0102, "step": 4072 }, { "epoch": 0.544664348756352, "grad_norm": 1.0614635944366455, "learning_rate": 1.881989312901447e-05, "loss": 0.9198, "step": 4073 }, { "epoch": 0.5447980743514309, "grad_norm": 0.9965329170227051, "learning_rate": 1.881921264348355e-05, "loss": 0.9444, "step": 4074 }, { "epoch": 0.5449317999465098, "grad_norm": 1.0792934894561768, "learning_rate": 1.8818531974124584e-05, "loss": 1.0339, "step": 4075 }, { "epoch": 0.5450655255415887, "grad_norm": 1.233396053314209, "learning_rate": 1.881785112095176e-05, "loss": 0.9802, "step": 4076 }, { "epoch": 0.5451992511366676, "grad_norm": 1.0449467897415161, "learning_rate": 1.8817170083979262e-05, "loss": 0.895, "step": 4077 }, { "epoch": 0.5453329767317464, "grad_norm": 1.0529789924621582, "learning_rate": 1.8816488863221294e-05, "loss": 0.9726, "step": 4078 }, { "epoch": 0.5454667023268254, "grad_norm": 1.057137370109558, "learning_rate": 1.881580745869205e-05, "loss": 0.9804, "step": 4079 }, { "epoch": 0.5456004279219042, "grad_norm": 1.1353020668029785, "learning_rate": 1.8815125870405738e-05, "loss": 0.9712, "step": 4080 }, { "epoch": 0.5457341535169832, "grad_norm": 1.165024995803833, "learning_rate": 1.8814444098376562e-05, "loss": 1.1781, "step": 4081 }, { "epoch": 0.545867879112062, "grad_norm": 1.25754976272583, "learning_rate": 1.881376214261873e-05, "loss": 1.0514, "step": 4082 }, { "epoch": 0.546001604707141, "grad_norm": 1.0897449254989624, "learning_rate": 1.8813080003146463e-05, "loss": 0.9676, "step": 4083 }, { "epoch": 0.5461353303022198, "grad_norm": 0.9986870884895325, "learning_rate": 1.8812397679973975e-05, "loss": 0.9263, "step": 4084 }, { "epoch": 0.5462690558972988, "grad_norm": 1.0525767803192139, "learning_rate": 1.8811715173115492e-05, "loss": 0.872, "step": 4085 }, { "epoch": 0.5464027814923776, "grad_norm": 1.033512830734253, "learning_rate": 1.8811032482585235e-05, "loss": 0.9688, "step": 4086 }, { "epoch": 0.5465365070874565, "grad_norm": 1.0833057165145874, "learning_rate": 1.881034960839744e-05, "loss": 0.8851, "step": 4087 }, { "epoch": 0.5466702326825355, "grad_norm": 1.0895195007324219, "learning_rate": 1.8809666550566334e-05, "loss": 0.9235, "step": 4088 }, { "epoch": 0.5468039582776143, "grad_norm": 1.0610026121139526, "learning_rate": 1.8808983309106164e-05, "loss": 0.8973, "step": 4089 }, { "epoch": 0.5469376838726933, "grad_norm": 1.1304194927215576, "learning_rate": 1.880829988403116e-05, "loss": 1.0943, "step": 4090 }, { "epoch": 0.5470714094677721, "grad_norm": 1.2175449132919312, "learning_rate": 1.880761627535558e-05, "loss": 0.967, "step": 4091 }, { "epoch": 0.5472051350628511, "grad_norm": 1.1401782035827637, "learning_rate": 1.8806932483093666e-05, "loss": 1.0145, "step": 4092 }, { "epoch": 0.5473388606579299, "grad_norm": 1.1192463636398315, "learning_rate": 1.8806248507259668e-05, "loss": 0.9546, "step": 4093 }, { "epoch": 0.5474725862530089, "grad_norm": 1.172466516494751, "learning_rate": 1.880556434786785e-05, "loss": 0.9377, "step": 4094 }, { "epoch": 0.5476063118480877, "grad_norm": 1.0581740140914917, "learning_rate": 1.8804880004932468e-05, "loss": 1.0167, "step": 4095 }, { "epoch": 0.5477400374431666, "grad_norm": 1.130346655845642, "learning_rate": 1.8804195478467785e-05, "loss": 1.0049, "step": 4096 }, { "epoch": 0.5478737630382455, "grad_norm": 1.031082272529602, "learning_rate": 1.8803510768488075e-05, "loss": 0.9096, "step": 4097 }, { "epoch": 0.5480074886333244, "grad_norm": 1.0581367015838623, "learning_rate": 1.8802825875007604e-05, "loss": 0.9791, "step": 4098 }, { "epoch": 0.5481412142284033, "grad_norm": 1.0102113485336304, "learning_rate": 1.8802140798040653e-05, "loss": 0.9072, "step": 4099 }, { "epoch": 0.5482749398234822, "grad_norm": 1.1040164232254028, "learning_rate": 1.88014555376015e-05, "loss": 0.9316, "step": 4100 }, { "epoch": 0.5484086654185611, "grad_norm": 1.0502278804779053, "learning_rate": 1.880077009370443e-05, "loss": 0.874, "step": 4101 }, { "epoch": 0.54854239101364, "grad_norm": 1.321721076965332, "learning_rate": 1.8800084466363726e-05, "loss": 1.0042, "step": 4102 }, { "epoch": 0.548676116608719, "grad_norm": 1.0465561151504517, "learning_rate": 1.8799398655593682e-05, "loss": 0.9917, "step": 4103 }, { "epoch": 0.5488098422037978, "grad_norm": 1.015295386314392, "learning_rate": 1.8798712661408594e-05, "loss": 0.9839, "step": 4104 }, { "epoch": 0.5489435677988767, "grad_norm": 1.0752264261245728, "learning_rate": 1.8798026483822763e-05, "loss": 0.905, "step": 4105 }, { "epoch": 0.5490772933939556, "grad_norm": 0.9739238023757935, "learning_rate": 1.8797340122850484e-05, "loss": 0.9949, "step": 4106 }, { "epoch": 0.5492110189890345, "grad_norm": 1.004654884338379, "learning_rate": 1.879665357850607e-05, "loss": 0.8715, "step": 4107 }, { "epoch": 0.5493447445841134, "grad_norm": 1.158569097518921, "learning_rate": 1.879596685080383e-05, "loss": 0.8753, "step": 4108 }, { "epoch": 0.5494784701791923, "grad_norm": 1.0677436590194702, "learning_rate": 1.8795279939758076e-05, "loss": 1.0681, "step": 4109 }, { "epoch": 0.5496121957742712, "grad_norm": 1.116233229637146, "learning_rate": 1.8794592845383133e-05, "loss": 1.0462, "step": 4110 }, { "epoch": 0.5497459213693501, "grad_norm": 1.1746715307235718, "learning_rate": 1.8793905567693313e-05, "loss": 0.889, "step": 4111 }, { "epoch": 0.549879646964429, "grad_norm": 1.1653187274932861, "learning_rate": 1.8793218106702947e-05, "loss": 0.9486, "step": 4112 }, { "epoch": 0.5500133725595079, "grad_norm": 1.1489580869674683, "learning_rate": 1.8792530462426364e-05, "loss": 0.9997, "step": 4113 }, { "epoch": 0.5501470981545867, "grad_norm": 1.076123595237732, "learning_rate": 1.87918426348779e-05, "loss": 0.9157, "step": 4114 }, { "epoch": 0.5502808237496657, "grad_norm": 1.066899299621582, "learning_rate": 1.8791154624071885e-05, "loss": 0.9784, "step": 4115 }, { "epoch": 0.5504145493447445, "grad_norm": 1.1230682134628296, "learning_rate": 1.8790466430022665e-05, "loss": 0.9966, "step": 4116 }, { "epoch": 0.5505482749398235, "grad_norm": 1.2411407232284546, "learning_rate": 1.8789778052744587e-05, "loss": 0.9593, "step": 4117 }, { "epoch": 0.5506820005349023, "grad_norm": 1.2296236753463745, "learning_rate": 1.878908949225199e-05, "loss": 1.025, "step": 4118 }, { "epoch": 0.5508157261299813, "grad_norm": 1.1244949102401733, "learning_rate": 1.878840074855924e-05, "loss": 0.9995, "step": 4119 }, { "epoch": 0.5509494517250602, "grad_norm": 1.1304404735565186, "learning_rate": 1.8787711821680682e-05, "loss": 1.0638, "step": 4120 }, { "epoch": 0.5510831773201391, "grad_norm": 1.1003179550170898, "learning_rate": 1.878702271163068e-05, "loss": 0.8397, "step": 4121 }, { "epoch": 0.551216902915218, "grad_norm": 1.2588616609573364, "learning_rate": 1.8786333418423597e-05, "loss": 1.0425, "step": 4122 }, { "epoch": 0.5513506285102969, "grad_norm": 1.2148840427398682, "learning_rate": 1.8785643942073804e-05, "loss": 1.1004, "step": 4123 }, { "epoch": 0.5514843541053758, "grad_norm": 0.9715263247489929, "learning_rate": 1.878495428259567e-05, "loss": 0.9765, "step": 4124 }, { "epoch": 0.5516180797004546, "grad_norm": 1.134539246559143, "learning_rate": 1.8784264440003567e-05, "loss": 1.0754, "step": 4125 }, { "epoch": 0.5517518052955336, "grad_norm": 1.012941837310791, "learning_rate": 1.878357441431188e-05, "loss": 1.0087, "step": 4126 }, { "epoch": 0.5518855308906124, "grad_norm": 1.0308141708374023, "learning_rate": 1.878288420553499e-05, "loss": 0.948, "step": 4127 }, { "epoch": 0.5520192564856914, "grad_norm": 1.0697585344314575, "learning_rate": 1.878219381368728e-05, "loss": 0.9309, "step": 4128 }, { "epoch": 0.5521529820807702, "grad_norm": 1.0895172357559204, "learning_rate": 1.8781503238783146e-05, "loss": 1.0639, "step": 4129 }, { "epoch": 0.5522867076758492, "grad_norm": 0.9940357804298401, "learning_rate": 1.878081248083698e-05, "loss": 0.8741, "step": 4130 }, { "epoch": 0.552420433270928, "grad_norm": 1.0215710401535034, "learning_rate": 1.8780121539863182e-05, "loss": 0.8339, "step": 4131 }, { "epoch": 0.552554158866007, "grad_norm": 1.1118327379226685, "learning_rate": 1.877943041587615e-05, "loss": 1.0038, "step": 4132 }, { "epoch": 0.5526878844610859, "grad_norm": 1.1815924644470215, "learning_rate": 1.877873910889029e-05, "loss": 0.9596, "step": 4133 }, { "epoch": 0.5528216100561647, "grad_norm": 1.1048861742019653, "learning_rate": 1.8778047618920016e-05, "loss": 0.873, "step": 4134 }, { "epoch": 0.5529553356512437, "grad_norm": 1.086790680885315, "learning_rate": 1.877735594597974e-05, "loss": 1.0458, "step": 4135 }, { "epoch": 0.5530890612463225, "grad_norm": 1.1849241256713867, "learning_rate": 1.8776664090083872e-05, "loss": 0.8433, "step": 4136 }, { "epoch": 0.5532227868414015, "grad_norm": 1.151999831199646, "learning_rate": 1.8775972051246846e-05, "loss": 0.996, "step": 4137 }, { "epoch": 0.5533565124364803, "grad_norm": 1.1179007291793823, "learning_rate": 1.877527982948308e-05, "loss": 0.984, "step": 4138 }, { "epoch": 0.5534902380315593, "grad_norm": 1.1322762966156006, "learning_rate": 1.8774587424807e-05, "loss": 1.0434, "step": 4139 }, { "epoch": 0.5536239636266381, "grad_norm": 1.1262671947479248, "learning_rate": 1.8773894837233044e-05, "loss": 0.9508, "step": 4140 }, { "epoch": 0.5537576892217171, "grad_norm": 1.2443318367004395, "learning_rate": 1.8773202066775646e-05, "loss": 1.0189, "step": 4141 }, { "epoch": 0.5538914148167959, "grad_norm": 1.0210678577423096, "learning_rate": 1.8772509113449243e-05, "loss": 0.8627, "step": 4142 }, { "epoch": 0.5540251404118748, "grad_norm": 1.1153596639633179, "learning_rate": 1.8771815977268284e-05, "loss": 1.01, "step": 4143 }, { "epoch": 0.5541588660069537, "grad_norm": 1.0814077854156494, "learning_rate": 1.8771122658247214e-05, "loss": 1.0077, "step": 4144 }, { "epoch": 0.5542925916020326, "grad_norm": 1.0816489458084106, "learning_rate": 1.877042915640049e-05, "loss": 1.0627, "step": 4145 }, { "epoch": 0.5544263171971116, "grad_norm": 1.2560906410217285, "learning_rate": 1.8769735471742555e-05, "loss": 0.9253, "step": 4146 }, { "epoch": 0.5545600427921904, "grad_norm": 1.0629435777664185, "learning_rate": 1.876904160428788e-05, "loss": 1.0188, "step": 4147 }, { "epoch": 0.5546937683872694, "grad_norm": 1.0532605648040771, "learning_rate": 1.8768347554050922e-05, "loss": 0.9647, "step": 4148 }, { "epoch": 0.5548274939823482, "grad_norm": 1.1149368286132812, "learning_rate": 1.8767653321046153e-05, "loss": 0.985, "step": 4149 }, { "epoch": 0.5549612195774272, "grad_norm": 1.1745245456695557, "learning_rate": 1.8766958905288035e-05, "loss": 0.9483, "step": 4150 }, { "epoch": 0.555094945172506, "grad_norm": 1.0421106815338135, "learning_rate": 1.876626430679105e-05, "loss": 1.0683, "step": 4151 }, { "epoch": 0.5552286707675849, "grad_norm": 1.0267407894134521, "learning_rate": 1.8765569525569677e-05, "loss": 0.9195, "step": 4152 }, { "epoch": 0.5553623963626638, "grad_norm": 1.0786383152008057, "learning_rate": 1.876487456163839e-05, "loss": 0.8578, "step": 4153 }, { "epoch": 0.5554961219577427, "grad_norm": 1.2095805406570435, "learning_rate": 1.876417941501168e-05, "loss": 0.9591, "step": 4154 }, { "epoch": 0.5556298475528216, "grad_norm": 1.0385119915008545, "learning_rate": 1.876348408570404e-05, "loss": 0.8748, "step": 4155 }, { "epoch": 0.5557635731479005, "grad_norm": 1.0932854413986206, "learning_rate": 1.876278857372996e-05, "loss": 0.8829, "step": 4156 }, { "epoch": 0.5558972987429794, "grad_norm": 0.9786622524261475, "learning_rate": 1.8762092879103938e-05, "loss": 0.9742, "step": 4157 }, { "epoch": 0.5560310243380583, "grad_norm": 1.1122028827667236, "learning_rate": 1.8761397001840472e-05, "loss": 0.9489, "step": 4158 }, { "epoch": 0.5561647499331372, "grad_norm": 1.1907130479812622, "learning_rate": 1.8760700941954066e-05, "loss": 1.0692, "step": 4159 }, { "epoch": 0.5562984755282161, "grad_norm": 1.1519546508789062, "learning_rate": 1.8760004699459236e-05, "loss": 1.0327, "step": 4160 }, { "epoch": 0.556432201123295, "grad_norm": 1.0557442903518677, "learning_rate": 1.8759308274370492e-05, "loss": 0.9116, "step": 4161 }, { "epoch": 0.5565659267183739, "grad_norm": 1.1056785583496094, "learning_rate": 1.8758611666702347e-05, "loss": 0.9897, "step": 4162 }, { "epoch": 0.5566996523134528, "grad_norm": 1.1571147441864014, "learning_rate": 1.875791487646932e-05, "loss": 0.9291, "step": 4163 }, { "epoch": 0.5568333779085317, "grad_norm": 1.0730870962142944, "learning_rate": 1.8757217903685943e-05, "loss": 1.0663, "step": 4164 }, { "epoch": 0.5569671035036106, "grad_norm": 1.1050481796264648, "learning_rate": 1.8756520748366735e-05, "loss": 0.9371, "step": 4165 }, { "epoch": 0.5571008290986895, "grad_norm": 1.100428819656372, "learning_rate": 1.875582341052623e-05, "loss": 0.9153, "step": 4166 }, { "epoch": 0.5572345546937684, "grad_norm": 1.1881496906280518, "learning_rate": 1.875512589017897e-05, "loss": 1.0603, "step": 4167 }, { "epoch": 0.5573682802888473, "grad_norm": 1.099585771560669, "learning_rate": 1.8754428187339484e-05, "loss": 1.0452, "step": 4168 }, { "epoch": 0.5575020058839262, "grad_norm": 1.0015478134155273, "learning_rate": 1.875373030202232e-05, "loss": 0.9291, "step": 4169 }, { "epoch": 0.5576357314790051, "grad_norm": 1.2411212921142578, "learning_rate": 1.8753032234242024e-05, "loss": 1.2123, "step": 4170 }, { "epoch": 0.557769457074084, "grad_norm": 0.9905581474304199, "learning_rate": 1.875233398401315e-05, "loss": 0.9302, "step": 4171 }, { "epoch": 0.5579031826691628, "grad_norm": 1.145564317703247, "learning_rate": 1.8751635551350243e-05, "loss": 0.9981, "step": 4172 }, { "epoch": 0.5580369082642418, "grad_norm": 1.1210523843765259, "learning_rate": 1.8750936936267874e-05, "loss": 0.922, "step": 4173 }, { "epoch": 0.5581706338593206, "grad_norm": 1.1233254671096802, "learning_rate": 1.8750238138780595e-05, "loss": 0.9693, "step": 4174 }, { "epoch": 0.5583043594543996, "grad_norm": 1.2204418182373047, "learning_rate": 1.8749539158902975e-05, "loss": 1.1435, "step": 4175 }, { "epoch": 0.5584380850494784, "grad_norm": 1.2252203226089478, "learning_rate": 1.8748839996649583e-05, "loss": 1.0065, "step": 4176 }, { "epoch": 0.5585718106445574, "grad_norm": 1.0889040231704712, "learning_rate": 1.8748140652034992e-05, "loss": 0.9989, "step": 4177 }, { "epoch": 0.5587055362396363, "grad_norm": 1.135770559310913, "learning_rate": 1.8747441125073784e-05, "loss": 0.9868, "step": 4178 }, { "epoch": 0.5588392618347152, "grad_norm": 1.039806842803955, "learning_rate": 1.8746741415780535e-05, "loss": 0.9424, "step": 4179 }, { "epoch": 0.5589729874297941, "grad_norm": 1.1514462232589722, "learning_rate": 1.874604152416983e-05, "loss": 0.9424, "step": 4180 }, { "epoch": 0.5591067130248729, "grad_norm": 1.019472599029541, "learning_rate": 1.874534145025626e-05, "loss": 0.8366, "step": 4181 }, { "epoch": 0.5592404386199519, "grad_norm": 1.045609712600708, "learning_rate": 1.8744641194054417e-05, "loss": 0.9199, "step": 4182 }, { "epoch": 0.5593741642150307, "grad_norm": 1.0890038013458252, "learning_rate": 1.874394075557889e-05, "loss": 1.0042, "step": 4183 }, { "epoch": 0.5595078898101097, "grad_norm": 1.1431515216827393, "learning_rate": 1.874324013484429e-05, "loss": 0.97, "step": 4184 }, { "epoch": 0.5596416154051885, "grad_norm": 1.0080128908157349, "learning_rate": 1.8742539331865214e-05, "loss": 0.8798, "step": 4185 }, { "epoch": 0.5597753410002675, "grad_norm": 1.025169849395752, "learning_rate": 1.8741838346656275e-05, "loss": 0.971, "step": 4186 }, { "epoch": 0.5599090665953463, "grad_norm": 1.0815359354019165, "learning_rate": 1.8741137179232077e-05, "loss": 1.051, "step": 4187 }, { "epoch": 0.5600427921904253, "grad_norm": 1.1972018480300903, "learning_rate": 1.8740435829607237e-05, "loss": 1.0302, "step": 4188 }, { "epoch": 0.5601765177855041, "grad_norm": 1.1575087308883667, "learning_rate": 1.873973429779638e-05, "loss": 1.0006, "step": 4189 }, { "epoch": 0.560310243380583, "grad_norm": 1.0021131038665771, "learning_rate": 1.8739032583814124e-05, "loss": 0.9701, "step": 4190 }, { "epoch": 0.560443968975662, "grad_norm": 1.107019305229187, "learning_rate": 1.8738330687675094e-05, "loss": 0.8772, "step": 4191 }, { "epoch": 0.5605776945707408, "grad_norm": 1.0211387872695923, "learning_rate": 1.8737628609393922e-05, "loss": 0.9294, "step": 4192 }, { "epoch": 0.5607114201658198, "grad_norm": 0.9384823441505432, "learning_rate": 1.8736926348985246e-05, "loss": 0.9133, "step": 4193 }, { "epoch": 0.5608451457608986, "grad_norm": 1.1420704126358032, "learning_rate": 1.8736223906463698e-05, "loss": 0.9611, "step": 4194 }, { "epoch": 0.5609788713559776, "grad_norm": 1.0146013498306274, "learning_rate": 1.8735521281843923e-05, "loss": 0.8352, "step": 4195 }, { "epoch": 0.5611125969510564, "grad_norm": 1.2548308372497559, "learning_rate": 1.8734818475140565e-05, "loss": 1.0533, "step": 4196 }, { "epoch": 0.5612463225461354, "grad_norm": 1.1899572610855103, "learning_rate": 1.8734115486368275e-05, "loss": 1.0579, "step": 4197 }, { "epoch": 0.5613800481412142, "grad_norm": 1.1153916120529175, "learning_rate": 1.8733412315541706e-05, "loss": 0.9172, "step": 4198 }, { "epoch": 0.5615137737362931, "grad_norm": 1.0486317873001099, "learning_rate": 1.8732708962675513e-05, "loss": 1.1226, "step": 4199 }, { "epoch": 0.561647499331372, "grad_norm": 1.20783531665802, "learning_rate": 1.8732005427784357e-05, "loss": 0.9975, "step": 4200 }, { "epoch": 0.5617812249264509, "grad_norm": 1.0829203128814697, "learning_rate": 1.8731301710882905e-05, "loss": 1.1299, "step": 4201 }, { "epoch": 0.5619149505215298, "grad_norm": 1.0710757970809937, "learning_rate": 1.8730597811985826e-05, "loss": 0.9116, "step": 4202 }, { "epoch": 0.5620486761166087, "grad_norm": 1.0341309309005737, "learning_rate": 1.872989373110779e-05, "loss": 0.8762, "step": 4203 }, { "epoch": 0.5621824017116877, "grad_norm": 1.127497673034668, "learning_rate": 1.8729189468263466e-05, "loss": 1.0129, "step": 4204 }, { "epoch": 0.5623161273067665, "grad_norm": 1.1886883974075317, "learning_rate": 1.8728485023467547e-05, "loss": 1.0923, "step": 4205 }, { "epoch": 0.5624498529018455, "grad_norm": 1.2200772762298584, "learning_rate": 1.8727780396734707e-05, "loss": 0.9698, "step": 4206 }, { "epoch": 0.5625835784969243, "grad_norm": 1.106721043586731, "learning_rate": 1.8727075588079638e-05, "loss": 1.0018, "step": 4207 }, { "epoch": 0.5627173040920032, "grad_norm": 0.8979536294937134, "learning_rate": 1.8726370597517026e-05, "loss": 0.904, "step": 4208 }, { "epoch": 0.5628510296870821, "grad_norm": 1.0531038045883179, "learning_rate": 1.8725665425061574e-05, "loss": 0.9454, "step": 4209 }, { "epoch": 0.562984755282161, "grad_norm": 1.0507900714874268, "learning_rate": 1.8724960070727974e-05, "loss": 0.8943, "step": 4210 }, { "epoch": 0.5631184808772399, "grad_norm": 1.1653261184692383, "learning_rate": 1.8724254534530926e-05, "loss": 1.0486, "step": 4211 }, { "epoch": 0.5632522064723188, "grad_norm": 1.126889705657959, "learning_rate": 1.8723548816485147e-05, "loss": 0.9609, "step": 4212 }, { "epoch": 0.5633859320673977, "grad_norm": 1.1318472623825073, "learning_rate": 1.8722842916605338e-05, "loss": 0.8531, "step": 4213 }, { "epoch": 0.5635196576624766, "grad_norm": 1.0813262462615967, "learning_rate": 1.8722136834906214e-05, "loss": 0.9788, "step": 4214 }, { "epoch": 0.5636533832575555, "grad_norm": 1.1463592052459717, "learning_rate": 1.8721430571402496e-05, "loss": 1.004, "step": 4215 }, { "epoch": 0.5637871088526344, "grad_norm": 1.1911218166351318, "learning_rate": 1.87207241261089e-05, "loss": 0.8571, "step": 4216 }, { "epoch": 0.5639208344477132, "grad_norm": 1.0400562286376953, "learning_rate": 1.8720017499040154e-05, "loss": 1.0385, "step": 4217 }, { "epoch": 0.5640545600427922, "grad_norm": 1.292311429977417, "learning_rate": 1.8719310690210993e-05, "loss": 1.0449, "step": 4218 }, { "epoch": 0.564188285637871, "grad_norm": 1.058998942375183, "learning_rate": 1.871860369963614e-05, "loss": 0.8971, "step": 4219 }, { "epoch": 0.56432201123295, "grad_norm": 0.9786389470100403, "learning_rate": 1.8717896527330334e-05, "loss": 0.9394, "step": 4220 }, { "epoch": 0.5644557368280289, "grad_norm": 1.1137664318084717, "learning_rate": 1.8717189173308322e-05, "loss": 1.0865, "step": 4221 }, { "epoch": 0.5645894624231078, "grad_norm": 1.2083667516708374, "learning_rate": 1.8716481637584838e-05, "loss": 1.0253, "step": 4222 }, { "epoch": 0.5647231880181867, "grad_norm": 1.041096806526184, "learning_rate": 1.871577392017464e-05, "loss": 0.8516, "step": 4223 }, { "epoch": 0.5648569136132656, "grad_norm": 1.1420881748199463, "learning_rate": 1.8715066021092472e-05, "loss": 1.0062, "step": 4224 }, { "epoch": 0.5649906392083445, "grad_norm": 1.0848466157913208, "learning_rate": 1.8714357940353092e-05, "loss": 0.9895, "step": 4225 }, { "epoch": 0.5651243648034234, "grad_norm": 1.1167887449264526, "learning_rate": 1.871364967797126e-05, "loss": 0.992, "step": 4226 }, { "epoch": 0.5652580903985023, "grad_norm": 1.1449971199035645, "learning_rate": 1.8712941233961736e-05, "loss": 1.0205, "step": 4227 }, { "epoch": 0.5653918159935811, "grad_norm": 1.130625605583191, "learning_rate": 1.8712232608339294e-05, "loss": 0.9632, "step": 4228 }, { "epoch": 0.5655255415886601, "grad_norm": 1.1609690189361572, "learning_rate": 1.8711523801118694e-05, "loss": 1.0497, "step": 4229 }, { "epoch": 0.5656592671837389, "grad_norm": 1.2421350479125977, "learning_rate": 1.8710814812314722e-05, "loss": 0.9908, "step": 4230 }, { "epoch": 0.5657929927788179, "grad_norm": 1.0364757776260376, "learning_rate": 1.871010564194215e-05, "loss": 0.9738, "step": 4231 }, { "epoch": 0.5659267183738967, "grad_norm": 1.160951852798462, "learning_rate": 1.870939629001576e-05, "loss": 1.0001, "step": 4232 }, { "epoch": 0.5660604439689757, "grad_norm": 1.1690552234649658, "learning_rate": 1.8708686756550338e-05, "loss": 0.9345, "step": 4233 }, { "epoch": 0.5661941695640546, "grad_norm": 1.1095682382583618, "learning_rate": 1.8707977041560673e-05, "loss": 0.9256, "step": 4234 }, { "epoch": 0.5663278951591335, "grad_norm": 1.1522551774978638, "learning_rate": 1.870726714506156e-05, "loss": 1.0911, "step": 4235 }, { "epoch": 0.5664616207542124, "grad_norm": 0.9971983432769775, "learning_rate": 1.8706557067067795e-05, "loss": 0.974, "step": 4236 }, { "epoch": 0.5665953463492912, "grad_norm": 1.1638718843460083, "learning_rate": 1.870584680759418e-05, "loss": 0.9948, "step": 4237 }, { "epoch": 0.5667290719443702, "grad_norm": 1.300297498703003, "learning_rate": 1.8705136366655518e-05, "loss": 1.013, "step": 4238 }, { "epoch": 0.566862797539449, "grad_norm": 0.9951872825622559, "learning_rate": 1.8704425744266616e-05, "loss": 0.7816, "step": 4239 }, { "epoch": 0.566996523134528, "grad_norm": 1.0650863647460938, "learning_rate": 1.8703714940442294e-05, "loss": 0.9191, "step": 4240 }, { "epoch": 0.5671302487296068, "grad_norm": 1.220794677734375, "learning_rate": 1.870300395519736e-05, "loss": 1.0494, "step": 4241 }, { "epoch": 0.5672639743246858, "grad_norm": 1.1574805974960327, "learning_rate": 1.8702292788546634e-05, "loss": 1.0441, "step": 4242 }, { "epoch": 0.5673976999197646, "grad_norm": 1.1586596965789795, "learning_rate": 1.8701581440504945e-05, "loss": 0.8493, "step": 4243 }, { "epoch": 0.5675314255148436, "grad_norm": 1.103170394897461, "learning_rate": 1.8700869911087115e-05, "loss": 0.9277, "step": 4244 }, { "epoch": 0.5676651511099224, "grad_norm": 1.0954698324203491, "learning_rate": 1.870015820030798e-05, "loss": 0.9952, "step": 4245 }, { "epoch": 0.5677988767050013, "grad_norm": 1.126454472541809, "learning_rate": 1.8699446308182372e-05, "loss": 1.025, "step": 4246 }, { "epoch": 0.5679326023000802, "grad_norm": 1.1199926137924194, "learning_rate": 1.869873423472513e-05, "loss": 0.9862, "step": 4247 }, { "epoch": 0.5680663278951591, "grad_norm": 1.0378814935684204, "learning_rate": 1.8698021979951096e-05, "loss": 0.9111, "step": 4248 }, { "epoch": 0.568200053490238, "grad_norm": 1.0390182733535767, "learning_rate": 1.8697309543875115e-05, "loss": 1.0847, "step": 4249 }, { "epoch": 0.5683337790853169, "grad_norm": 1.2294104099273682, "learning_rate": 1.8696596926512043e-05, "loss": 1.0063, "step": 4250 }, { "epoch": 0.5684675046803959, "grad_norm": 1.206725001335144, "learning_rate": 1.8695884127876728e-05, "loss": 1.0005, "step": 4251 }, { "epoch": 0.5686012302754747, "grad_norm": 1.1476114988327026, "learning_rate": 1.869517114798403e-05, "loss": 0.9163, "step": 4252 }, { "epoch": 0.5687349558705537, "grad_norm": 1.168081521987915, "learning_rate": 1.8694457986848808e-05, "loss": 0.9948, "step": 4253 }, { "epoch": 0.5688686814656325, "grad_norm": 1.1543513536453247, "learning_rate": 1.869374464448593e-05, "loss": 1.0201, "step": 4254 }, { "epoch": 0.5690024070607114, "grad_norm": 1.1034555435180664, "learning_rate": 1.8693031120910264e-05, "loss": 0.8849, "step": 4255 }, { "epoch": 0.5691361326557903, "grad_norm": 1.1699445247650146, "learning_rate": 1.8692317416136686e-05, "loss": 0.9204, "step": 4256 }, { "epoch": 0.5692698582508692, "grad_norm": 1.2645628452301025, "learning_rate": 1.8691603530180064e-05, "loss": 1.0621, "step": 4257 }, { "epoch": 0.5694035838459481, "grad_norm": 1.1419788599014282, "learning_rate": 1.8690889463055285e-05, "loss": 1.0086, "step": 4258 }, { "epoch": 0.569537309441027, "grad_norm": 1.179091453552246, "learning_rate": 1.8690175214777233e-05, "loss": 1.0043, "step": 4259 }, { "epoch": 0.5696710350361059, "grad_norm": 1.084663987159729, "learning_rate": 1.8689460785360792e-05, "loss": 1.0159, "step": 4260 }, { "epoch": 0.5698047606311848, "grad_norm": 1.1908880472183228, "learning_rate": 1.8688746174820857e-05, "loss": 1.0621, "step": 4261 }, { "epoch": 0.5699384862262638, "grad_norm": 1.1702381372451782, "learning_rate": 1.868803138317232e-05, "loss": 0.9558, "step": 4262 }, { "epoch": 0.5700722118213426, "grad_norm": 1.0802279710769653, "learning_rate": 1.8687316410430086e-05, "loss": 0.9667, "step": 4263 }, { "epoch": 0.5702059374164214, "grad_norm": 1.0563397407531738, "learning_rate": 1.8686601256609053e-05, "loss": 0.8215, "step": 4264 }, { "epoch": 0.5703396630115004, "grad_norm": 1.0943865776062012, "learning_rate": 1.868588592172413e-05, "loss": 1.0115, "step": 4265 }, { "epoch": 0.5704733886065793, "grad_norm": 1.1347267627716064, "learning_rate": 1.8685170405790222e-05, "loss": 0.9674, "step": 4266 }, { "epoch": 0.5706071142016582, "grad_norm": 1.0891984701156616, "learning_rate": 1.868445470882225e-05, "loss": 1.017, "step": 4267 }, { "epoch": 0.5707408397967371, "grad_norm": 1.0536088943481445, "learning_rate": 1.8683738830835132e-05, "loss": 1.0345, "step": 4268 }, { "epoch": 0.570874565391816, "grad_norm": 1.1744624376296997, "learning_rate": 1.8683022771843785e-05, "loss": 1.0318, "step": 4269 }, { "epoch": 0.5710082909868949, "grad_norm": 0.9553273916244507, "learning_rate": 1.8682306531863137e-05, "loss": 0.857, "step": 4270 }, { "epoch": 0.5711420165819738, "grad_norm": 1.1216747760772705, "learning_rate": 1.868159011090812e-05, "loss": 1.0382, "step": 4271 }, { "epoch": 0.5712757421770527, "grad_norm": 1.0053479671478271, "learning_rate": 1.868087350899366e-05, "loss": 0.9544, "step": 4272 }, { "epoch": 0.5714094677721316, "grad_norm": 1.147739291191101, "learning_rate": 1.8680156726134702e-05, "loss": 1.0534, "step": 4273 }, { "epoch": 0.5715431933672105, "grad_norm": 1.0907477140426636, "learning_rate": 1.8679439762346186e-05, "loss": 0.9791, "step": 4274 }, { "epoch": 0.5716769189622893, "grad_norm": 1.200300931930542, "learning_rate": 1.8678722617643047e-05, "loss": 1.0878, "step": 4275 }, { "epoch": 0.5718106445573683, "grad_norm": 1.168286919593811, "learning_rate": 1.8678005292040243e-05, "loss": 1.0301, "step": 4276 }, { "epoch": 0.5719443701524471, "grad_norm": 1.0300790071487427, "learning_rate": 1.8677287785552724e-05, "loss": 0.9846, "step": 4277 }, { "epoch": 0.5720780957475261, "grad_norm": 1.0243234634399414, "learning_rate": 1.8676570098195443e-05, "loss": 0.9177, "step": 4278 }, { "epoch": 0.572211821342605, "grad_norm": 1.092434048652649, "learning_rate": 1.867585222998336e-05, "loss": 0.8831, "step": 4279 }, { "epoch": 0.5723455469376839, "grad_norm": 1.1066855192184448, "learning_rate": 1.867513418093144e-05, "loss": 1.0003, "step": 4280 }, { "epoch": 0.5724792725327628, "grad_norm": 0.9998567700386047, "learning_rate": 1.8674415951054647e-05, "loss": 0.892, "step": 4281 }, { "epoch": 0.5726129981278417, "grad_norm": 0.9793708920478821, "learning_rate": 1.8673697540367957e-05, "loss": 0.8627, "step": 4282 }, { "epoch": 0.5727467237229206, "grad_norm": 1.2207691669464111, "learning_rate": 1.867297894888634e-05, "loss": 1.0422, "step": 4283 }, { "epoch": 0.5728804493179994, "grad_norm": 0.9690563082695007, "learning_rate": 1.8672260176624775e-05, "loss": 0.9389, "step": 4284 }, { "epoch": 0.5730141749130784, "grad_norm": 1.1791579723358154, "learning_rate": 1.8671541223598248e-05, "loss": 0.9732, "step": 4285 }, { "epoch": 0.5731479005081572, "grad_norm": 1.2792478799819946, "learning_rate": 1.867082208982174e-05, "loss": 1.0981, "step": 4286 }, { "epoch": 0.5732816261032362, "grad_norm": 1.2760359048843384, "learning_rate": 1.867010277531024e-05, "loss": 1.0178, "step": 4287 }, { "epoch": 0.573415351698315, "grad_norm": 1.3104729652404785, "learning_rate": 1.866938328007875e-05, "loss": 1.0475, "step": 4288 }, { "epoch": 0.573549077293394, "grad_norm": 1.0913432836532593, "learning_rate": 1.8668663604142257e-05, "loss": 0.9351, "step": 4289 }, { "epoch": 0.5736828028884728, "grad_norm": 1.0429764986038208, "learning_rate": 1.866794374751577e-05, "loss": 1.008, "step": 4290 }, { "epoch": 0.5738165284835518, "grad_norm": 1.0540709495544434, "learning_rate": 1.8667223710214286e-05, "loss": 1.0325, "step": 4291 }, { "epoch": 0.5739502540786307, "grad_norm": 1.1324586868286133, "learning_rate": 1.8666503492252818e-05, "loss": 0.9381, "step": 4292 }, { "epoch": 0.5740839796737095, "grad_norm": 1.1117823123931885, "learning_rate": 1.866578309364638e-05, "loss": 0.9818, "step": 4293 }, { "epoch": 0.5742177052687885, "grad_norm": 1.015079140663147, "learning_rate": 1.8665062514409985e-05, "loss": 1.0504, "step": 4294 }, { "epoch": 0.5743514308638673, "grad_norm": 1.011838674545288, "learning_rate": 1.866434175455865e-05, "loss": 0.9238, "step": 4295 }, { "epoch": 0.5744851564589463, "grad_norm": 1.219601035118103, "learning_rate": 1.8663620814107404e-05, "loss": 1.0773, "step": 4296 }, { "epoch": 0.5746188820540251, "grad_norm": 1.047299861907959, "learning_rate": 1.8662899693071276e-05, "loss": 0.9675, "step": 4297 }, { "epoch": 0.5747526076491041, "grad_norm": 1.1643438339233398, "learning_rate": 1.8662178391465288e-05, "loss": 0.8747, "step": 4298 }, { "epoch": 0.5748863332441829, "grad_norm": 1.1427836418151855, "learning_rate": 1.8661456909304482e-05, "loss": 1.0029, "step": 4299 }, { "epoch": 0.5750200588392619, "grad_norm": 1.093421220779419, "learning_rate": 1.8660735246603896e-05, "loss": 1.0718, "step": 4300 }, { "epoch": 0.5751537844343407, "grad_norm": 1.1446141004562378, "learning_rate": 1.866001340337857e-05, "loss": 0.9216, "step": 4301 }, { "epoch": 0.5752875100294196, "grad_norm": 1.064455270767212, "learning_rate": 1.8659291379643553e-05, "loss": 0.9749, "step": 4302 }, { "epoch": 0.5754212356244985, "grad_norm": 1.060905933380127, "learning_rate": 1.8658569175413893e-05, "loss": 0.9315, "step": 4303 }, { "epoch": 0.5755549612195774, "grad_norm": 1.0140661001205444, "learning_rate": 1.865784679070464e-05, "loss": 0.9146, "step": 4304 }, { "epoch": 0.5756886868146563, "grad_norm": 1.0355207920074463, "learning_rate": 1.8657124225530857e-05, "loss": 0.9328, "step": 4305 }, { "epoch": 0.5758224124097352, "grad_norm": 1.1159552335739136, "learning_rate": 1.8656401479907607e-05, "loss": 0.865, "step": 4306 }, { "epoch": 0.5759561380048142, "grad_norm": 1.05385160446167, "learning_rate": 1.865567855384995e-05, "loss": 0.9141, "step": 4307 }, { "epoch": 0.576089863599893, "grad_norm": 1.0680540800094604, "learning_rate": 1.8654955447372957e-05, "loss": 0.9237, "step": 4308 }, { "epoch": 0.576223589194972, "grad_norm": 1.0946894884109497, "learning_rate": 1.8654232160491696e-05, "loss": 0.8756, "step": 4309 }, { "epoch": 0.5763573147900508, "grad_norm": 1.0846202373504639, "learning_rate": 1.865350869322125e-05, "loss": 0.9475, "step": 4310 }, { "epoch": 0.5764910403851297, "grad_norm": 1.0334192514419556, "learning_rate": 1.8652785045576692e-05, "loss": 0.9424, "step": 4311 }, { "epoch": 0.5766247659802086, "grad_norm": 1.056339144706726, "learning_rate": 1.8652061217573115e-05, "loss": 0.9894, "step": 4312 }, { "epoch": 0.5767584915752875, "grad_norm": 1.25147545337677, "learning_rate": 1.8651337209225598e-05, "loss": 1.0031, "step": 4313 }, { "epoch": 0.5768922171703664, "grad_norm": 1.0496535301208496, "learning_rate": 1.8650613020549232e-05, "loss": 0.9595, "step": 4314 }, { "epoch": 0.5770259427654453, "grad_norm": 1.0301775932312012, "learning_rate": 1.8649888651559122e-05, "loss": 1.0372, "step": 4315 }, { "epoch": 0.5771596683605242, "grad_norm": 1.0061918497085571, "learning_rate": 1.8649164102270357e-05, "loss": 0.8965, "step": 4316 }, { "epoch": 0.5772933939556031, "grad_norm": 1.0771774053573608, "learning_rate": 1.8648439372698043e-05, "loss": 1.039, "step": 4317 }, { "epoch": 0.577427119550682, "grad_norm": 0.9801791906356812, "learning_rate": 1.8647714462857284e-05, "loss": 0.8077, "step": 4318 }, { "epoch": 0.5775608451457609, "grad_norm": 1.144612431526184, "learning_rate": 1.8646989372763194e-05, "loss": 0.9401, "step": 4319 }, { "epoch": 0.5776945707408397, "grad_norm": 1.1350862979888916, "learning_rate": 1.8646264102430884e-05, "loss": 1.1049, "step": 4320 }, { "epoch": 0.5778282963359187, "grad_norm": 1.1326942443847656, "learning_rate": 1.864553865187547e-05, "loss": 0.9418, "step": 4321 }, { "epoch": 0.5779620219309975, "grad_norm": 1.2036370038986206, "learning_rate": 1.864481302111208e-05, "loss": 0.993, "step": 4322 }, { "epoch": 0.5780957475260765, "grad_norm": 0.9390064477920532, "learning_rate": 1.8644087210155834e-05, "loss": 0.8166, "step": 4323 }, { "epoch": 0.5782294731211554, "grad_norm": 1.078291893005371, "learning_rate": 1.864336121902186e-05, "loss": 0.9941, "step": 4324 }, { "epoch": 0.5783631987162343, "grad_norm": 1.100733757019043, "learning_rate": 1.864263504772529e-05, "loss": 0.9133, "step": 4325 }, { "epoch": 0.5784969243113132, "grad_norm": 1.0020307302474976, "learning_rate": 1.864190869628127e-05, "loss": 1.0392, "step": 4326 }, { "epoch": 0.5786306499063921, "grad_norm": 1.1002930402755737, "learning_rate": 1.8641182164704924e-05, "loss": 0.9659, "step": 4327 }, { "epoch": 0.578764375501471, "grad_norm": 1.2451192140579224, "learning_rate": 1.864045545301141e-05, "loss": 1.068, "step": 4328 }, { "epoch": 0.5788981010965499, "grad_norm": 1.0918710231781006, "learning_rate": 1.863972856121587e-05, "loss": 0.9935, "step": 4329 }, { "epoch": 0.5790318266916288, "grad_norm": 1.054304838180542, "learning_rate": 1.8639001489333453e-05, "loss": 0.9033, "step": 4330 }, { "epoch": 0.5791655522867076, "grad_norm": 1.0869016647338867, "learning_rate": 1.8638274237379316e-05, "loss": 0.9684, "step": 4331 }, { "epoch": 0.5792992778817866, "grad_norm": 1.1065447330474854, "learning_rate": 1.863754680536862e-05, "loss": 1.0336, "step": 4332 }, { "epoch": 0.5794330034768654, "grad_norm": 1.0373461246490479, "learning_rate": 1.863681919331653e-05, "loss": 0.969, "step": 4333 }, { "epoch": 0.5795667290719444, "grad_norm": 1.161434292793274, "learning_rate": 1.86360914012382e-05, "loss": 0.9722, "step": 4334 }, { "epoch": 0.5797004546670232, "grad_norm": 1.1578494310379028, "learning_rate": 1.8635363429148816e-05, "loss": 0.9716, "step": 4335 }, { "epoch": 0.5798341802621022, "grad_norm": 1.0085622072219849, "learning_rate": 1.863463527706354e-05, "loss": 0.9918, "step": 4336 }, { "epoch": 0.579967905857181, "grad_norm": 1.1762139797210693, "learning_rate": 1.8633906944997557e-05, "loss": 0.8571, "step": 4337 }, { "epoch": 0.58010163145226, "grad_norm": 1.1650320291519165, "learning_rate": 1.8633178432966044e-05, "loss": 1.048, "step": 4338 }, { "epoch": 0.5802353570473389, "grad_norm": 1.022343635559082, "learning_rate": 1.8632449740984187e-05, "loss": 1.0059, "step": 4339 }, { "epoch": 0.5803690826424177, "grad_norm": 1.2207787036895752, "learning_rate": 1.863172086906718e-05, "loss": 0.9996, "step": 4340 }, { "epoch": 0.5805028082374967, "grad_norm": 1.0533626079559326, "learning_rate": 1.8630991817230205e-05, "loss": 0.9458, "step": 4341 }, { "epoch": 0.5806365338325755, "grad_norm": 1.092612624168396, "learning_rate": 1.8630262585488465e-05, "loss": 0.9323, "step": 4342 }, { "epoch": 0.5807702594276545, "grad_norm": 1.170183539390564, "learning_rate": 1.8629533173857164e-05, "loss": 0.9896, "step": 4343 }, { "epoch": 0.5809039850227333, "grad_norm": 0.9614370465278625, "learning_rate": 1.8628803582351497e-05, "loss": 0.9136, "step": 4344 }, { "epoch": 0.5810377106178123, "grad_norm": 1.0926681756973267, "learning_rate": 1.862807381098668e-05, "loss": 0.8956, "step": 4345 }, { "epoch": 0.5811714362128911, "grad_norm": 0.983894407749176, "learning_rate": 1.862734385977792e-05, "loss": 0.8954, "step": 4346 }, { "epoch": 0.5813051618079701, "grad_norm": 1.0330544710159302, "learning_rate": 1.862661372874043e-05, "loss": 0.8689, "step": 4347 }, { "epoch": 0.5814388874030489, "grad_norm": 1.0935121774673462, "learning_rate": 1.8625883417889435e-05, "loss": 1.044, "step": 4348 }, { "epoch": 0.5815726129981278, "grad_norm": 1.101121425628662, "learning_rate": 1.862515292724015e-05, "loss": 0.9675, "step": 4349 }, { "epoch": 0.5817063385932068, "grad_norm": 1.0263274908065796, "learning_rate": 1.862442225680781e-05, "loss": 0.733, "step": 4350 }, { "epoch": 0.5818400641882856, "grad_norm": 1.1773288249969482, "learning_rate": 1.862369140660764e-05, "loss": 1.0055, "step": 4351 }, { "epoch": 0.5819737897833646, "grad_norm": 1.075722336769104, "learning_rate": 1.8622960376654872e-05, "loss": 0.9618, "step": 4352 }, { "epoch": 0.5821075153784434, "grad_norm": 1.2372747659683228, "learning_rate": 1.8622229166964748e-05, "loss": 0.9435, "step": 4353 }, { "epoch": 0.5822412409735224, "grad_norm": 0.9891464114189148, "learning_rate": 1.8621497777552508e-05, "loss": 0.858, "step": 4354 }, { "epoch": 0.5823749665686012, "grad_norm": 1.172113299369812, "learning_rate": 1.8620766208433395e-05, "loss": 0.9022, "step": 4355 }, { "epoch": 0.5825086921636802, "grad_norm": 1.0522807836532593, "learning_rate": 1.8620034459622663e-05, "loss": 1.09, "step": 4356 }, { "epoch": 0.582642417758759, "grad_norm": 1.130573034286499, "learning_rate": 1.8619302531135555e-05, "loss": 1.1329, "step": 4357 }, { "epoch": 0.5827761433538379, "grad_norm": 1.1870092153549194, "learning_rate": 1.8618570422987342e-05, "loss": 0.9432, "step": 4358 }, { "epoch": 0.5829098689489168, "grad_norm": 1.079034447669983, "learning_rate": 1.861783813519327e-05, "loss": 0.9633, "step": 4359 }, { "epoch": 0.5830435945439957, "grad_norm": 0.9621286392211914, "learning_rate": 1.8617105667768607e-05, "loss": 0.9559, "step": 4360 }, { "epoch": 0.5831773201390746, "grad_norm": 1.178645372390747, "learning_rate": 1.8616373020728627e-05, "loss": 0.9894, "step": 4361 }, { "epoch": 0.5833110457341535, "grad_norm": 1.0526145696640015, "learning_rate": 1.8615640194088592e-05, "loss": 0.9668, "step": 4362 }, { "epoch": 0.5834447713292324, "grad_norm": 1.0943219661712646, "learning_rate": 1.8614907187863786e-05, "loss": 1.1666, "step": 4363 }, { "epoch": 0.5835784969243113, "grad_norm": 1.1866589784622192, "learning_rate": 1.861417400206948e-05, "loss": 1.1163, "step": 4364 }, { "epoch": 0.5837122225193903, "grad_norm": 1.2501007318496704, "learning_rate": 1.8613440636720958e-05, "loss": 1.1154, "step": 4365 }, { "epoch": 0.5838459481144691, "grad_norm": 0.9247719049453735, "learning_rate": 1.861270709183351e-05, "loss": 0.8368, "step": 4366 }, { "epoch": 0.583979673709548, "grad_norm": 1.028141975402832, "learning_rate": 1.8611973367422425e-05, "loss": 0.9193, "step": 4367 }, { "epoch": 0.5841133993046269, "grad_norm": 1.1229690313339233, "learning_rate": 1.8611239463502997e-05, "loss": 1.0214, "step": 4368 }, { "epoch": 0.5842471248997058, "grad_norm": 1.092471718788147, "learning_rate": 1.861050538009052e-05, "loss": 0.9486, "step": 4369 }, { "epoch": 0.5843808504947847, "grad_norm": 1.0574297904968262, "learning_rate": 1.86097711172003e-05, "loss": 0.8775, "step": 4370 }, { "epoch": 0.5845145760898636, "grad_norm": 1.1997524499893188, "learning_rate": 1.8609036674847635e-05, "loss": 0.9813, "step": 4371 }, { "epoch": 0.5846483016849425, "grad_norm": 1.069234848022461, "learning_rate": 1.8608302053047845e-05, "loss": 0.9694, "step": 4372 }, { "epoch": 0.5847820272800214, "grad_norm": 1.0913699865341187, "learning_rate": 1.8607567251816232e-05, "loss": 1.1134, "step": 4373 }, { "epoch": 0.5849157528751003, "grad_norm": 1.2003231048583984, "learning_rate": 1.8606832271168115e-05, "loss": 0.8635, "step": 4374 }, { "epoch": 0.5850494784701792, "grad_norm": 0.996042013168335, "learning_rate": 1.8606097111118817e-05, "loss": 0.9104, "step": 4375 }, { "epoch": 0.5851832040652581, "grad_norm": 1.040037989616394, "learning_rate": 1.860536177168366e-05, "loss": 1.0371, "step": 4376 }, { "epoch": 0.585316929660337, "grad_norm": 1.0615019798278809, "learning_rate": 1.8604626252877972e-05, "loss": 0.9577, "step": 4377 }, { "epoch": 0.5854506552554158, "grad_norm": 1.0888714790344238, "learning_rate": 1.8603890554717082e-05, "loss": 0.9777, "step": 4378 }, { "epoch": 0.5855843808504948, "grad_norm": 1.1677852869033813, "learning_rate": 1.8603154677216325e-05, "loss": 1.1023, "step": 4379 }, { "epoch": 0.5857181064455736, "grad_norm": 1.1060407161712646, "learning_rate": 1.8602418620391046e-05, "loss": 0.8889, "step": 4380 }, { "epoch": 0.5858518320406526, "grad_norm": 1.0657731294631958, "learning_rate": 1.8601682384256577e-05, "loss": 0.8008, "step": 4381 }, { "epoch": 0.5859855576357315, "grad_norm": 1.0989327430725098, "learning_rate": 1.8600945968828275e-05, "loss": 0.8763, "step": 4382 }, { "epoch": 0.5861192832308104, "grad_norm": 1.2909172773361206, "learning_rate": 1.860020937412148e-05, "loss": 1.059, "step": 4383 }, { "epoch": 0.5862530088258893, "grad_norm": 1.07817804813385, "learning_rate": 1.8599472600151555e-05, "loss": 0.9236, "step": 4384 }, { "epoch": 0.5863867344209682, "grad_norm": 1.0776126384735107, "learning_rate": 1.859873564693385e-05, "loss": 0.9189, "step": 4385 }, { "epoch": 0.5865204600160471, "grad_norm": 1.1654759645462036, "learning_rate": 1.8597998514483724e-05, "loss": 0.9382, "step": 4386 }, { "epoch": 0.5866541856111259, "grad_norm": 1.1703912019729614, "learning_rate": 1.8597261202816553e-05, "loss": 1.0317, "step": 4387 }, { "epoch": 0.5867879112062049, "grad_norm": 1.0751920938491821, "learning_rate": 1.8596523711947693e-05, "loss": 1.0388, "step": 4388 }, { "epoch": 0.5869216368012837, "grad_norm": 1.1128225326538086, "learning_rate": 1.8595786041892526e-05, "loss": 0.9699, "step": 4389 }, { "epoch": 0.5870553623963627, "grad_norm": 1.1440491676330566, "learning_rate": 1.8595048192666425e-05, "loss": 1.0197, "step": 4390 }, { "epoch": 0.5871890879914415, "grad_norm": 1.0496002435684204, "learning_rate": 1.8594310164284767e-05, "loss": 1.0001, "step": 4391 }, { "epoch": 0.5873228135865205, "grad_norm": 1.063289761543274, "learning_rate": 1.8593571956762937e-05, "loss": 1.0825, "step": 4392 }, { "epoch": 0.5874565391815993, "grad_norm": 1.088711142539978, "learning_rate": 1.8592833570116324e-05, "loss": 0.9692, "step": 4393 }, { "epoch": 0.5875902647766783, "grad_norm": 1.0396865606307983, "learning_rate": 1.8592095004360316e-05, "loss": 0.9048, "step": 4394 }, { "epoch": 0.5877239903717572, "grad_norm": 1.162926197052002, "learning_rate": 1.8591356259510315e-05, "loss": 1.0185, "step": 4395 }, { "epoch": 0.587857715966836, "grad_norm": 1.037520408630371, "learning_rate": 1.859061733558171e-05, "loss": 0.9886, "step": 4396 }, { "epoch": 0.587991441561915, "grad_norm": 1.1378631591796875, "learning_rate": 1.8589878232589904e-05, "loss": 0.9297, "step": 4397 }, { "epoch": 0.5881251671569938, "grad_norm": 1.198503851890564, "learning_rate": 1.858913895055031e-05, "loss": 0.9098, "step": 4398 }, { "epoch": 0.5882588927520728, "grad_norm": 1.0104840993881226, "learning_rate": 1.858839948947833e-05, "loss": 1.0114, "step": 4399 }, { "epoch": 0.5883926183471516, "grad_norm": 1.0440484285354614, "learning_rate": 1.8587659849389386e-05, "loss": 0.9127, "step": 4400 }, { "epoch": 0.5885263439422306, "grad_norm": 0.9837992787361145, "learning_rate": 1.8586920030298885e-05, "loss": 0.989, "step": 4401 }, { "epoch": 0.5886600695373094, "grad_norm": 1.0748567581176758, "learning_rate": 1.8586180032222255e-05, "loss": 1.0159, "step": 4402 }, { "epoch": 0.5887937951323884, "grad_norm": 1.2201601266860962, "learning_rate": 1.858543985517492e-05, "loss": 1.0879, "step": 4403 }, { "epoch": 0.5889275207274672, "grad_norm": 1.0066763162612915, "learning_rate": 1.8584699499172304e-05, "loss": 1.0039, "step": 4404 }, { "epoch": 0.5890612463225461, "grad_norm": 1.385453462600708, "learning_rate": 1.858395896422984e-05, "loss": 1.1156, "step": 4405 }, { "epoch": 0.589194971917625, "grad_norm": 1.074121356010437, "learning_rate": 1.8583218250362967e-05, "loss": 0.9929, "step": 4406 }, { "epoch": 0.5893286975127039, "grad_norm": 1.0838309526443481, "learning_rate": 1.8582477357587123e-05, "loss": 0.9797, "step": 4407 }, { "epoch": 0.5894624231077829, "grad_norm": 1.1560280323028564, "learning_rate": 1.858173628591775e-05, "loss": 1.0286, "step": 4408 }, { "epoch": 0.5895961487028617, "grad_norm": 1.151377558708191, "learning_rate": 1.85809950353703e-05, "loss": 0.9663, "step": 4409 }, { "epoch": 0.5897298742979407, "grad_norm": 1.1858372688293457, "learning_rate": 1.8580253605960215e-05, "loss": 0.9735, "step": 4410 }, { "epoch": 0.5898635998930195, "grad_norm": 1.083585500717163, "learning_rate": 1.8579511997702955e-05, "loss": 0.9989, "step": 4411 }, { "epoch": 0.5899973254880985, "grad_norm": 1.0679858922958374, "learning_rate": 1.857877021061398e-05, "loss": 0.9549, "step": 4412 }, { "epoch": 0.5901310510831773, "grad_norm": 1.0416409969329834, "learning_rate": 1.8578028244708747e-05, "loss": 0.9281, "step": 4413 }, { "epoch": 0.5902647766782562, "grad_norm": 1.0587183237075806, "learning_rate": 1.8577286100002723e-05, "loss": 0.9468, "step": 4414 }, { "epoch": 0.5903985022733351, "grad_norm": 1.1815359592437744, "learning_rate": 1.8576543776511378e-05, "loss": 1.0694, "step": 4415 }, { "epoch": 0.590532227868414, "grad_norm": 1.1404277086257935, "learning_rate": 1.8575801274250185e-05, "loss": 1.1438, "step": 4416 }, { "epoch": 0.5906659534634929, "grad_norm": 1.1776742935180664, "learning_rate": 1.857505859323462e-05, "loss": 1.0331, "step": 4417 }, { "epoch": 0.5907996790585718, "grad_norm": 0.981890082359314, "learning_rate": 1.8574315733480165e-05, "loss": 0.9235, "step": 4418 }, { "epoch": 0.5909334046536507, "grad_norm": 1.0318905115127563, "learning_rate": 1.85735726950023e-05, "loss": 0.9454, "step": 4419 }, { "epoch": 0.5910671302487296, "grad_norm": 1.1517056226730347, "learning_rate": 1.8572829477816522e-05, "loss": 1.0348, "step": 4420 }, { "epoch": 0.5912008558438085, "grad_norm": 1.0160032510757446, "learning_rate": 1.8572086081938315e-05, "loss": 0.9597, "step": 4421 }, { "epoch": 0.5913345814388874, "grad_norm": 1.0701489448547363, "learning_rate": 1.8571342507383175e-05, "loss": 0.9574, "step": 4422 }, { "epoch": 0.5914683070339664, "grad_norm": 1.0123778581619263, "learning_rate": 1.8570598754166602e-05, "loss": 0.945, "step": 4423 }, { "epoch": 0.5916020326290452, "grad_norm": 1.249263048171997, "learning_rate": 1.85698548223041e-05, "loss": 1.0119, "step": 4424 }, { "epoch": 0.591735758224124, "grad_norm": 1.0524859428405762, "learning_rate": 1.8569110711811173e-05, "loss": 0.9766, "step": 4425 }, { "epoch": 0.591869483819203, "grad_norm": 1.2803441286087036, "learning_rate": 1.8568366422703336e-05, "loss": 0.9684, "step": 4426 }, { "epoch": 0.5920032094142819, "grad_norm": 1.005005955696106, "learning_rate": 1.8567621954996098e-05, "loss": 0.9088, "step": 4427 }, { "epoch": 0.5921369350093608, "grad_norm": 1.062455654144287, "learning_rate": 1.8566877308704977e-05, "loss": 0.8976, "step": 4428 }, { "epoch": 0.5922706606044397, "grad_norm": 1.0038727521896362, "learning_rate": 1.8566132483845497e-05, "loss": 1.0398, "step": 4429 }, { "epoch": 0.5924043861995186, "grad_norm": 1.0474847555160522, "learning_rate": 1.8565387480433186e-05, "loss": 0.9291, "step": 4430 }, { "epoch": 0.5925381117945975, "grad_norm": 1.1138916015625, "learning_rate": 1.8564642298483565e-05, "loss": 1.0637, "step": 4431 }, { "epoch": 0.5926718373896764, "grad_norm": 1.0432411432266235, "learning_rate": 1.8563896938012173e-05, "loss": 0.9454, "step": 4432 }, { "epoch": 0.5928055629847553, "grad_norm": 1.147680401802063, "learning_rate": 1.8563151399034543e-05, "loss": 0.8906, "step": 4433 }, { "epoch": 0.5929392885798341, "grad_norm": 1.134974718093872, "learning_rate": 1.8562405681566217e-05, "loss": 1.0408, "step": 4434 }, { "epoch": 0.5930730141749131, "grad_norm": 1.0291316509246826, "learning_rate": 1.8561659785622737e-05, "loss": 0.9175, "step": 4435 }, { "epoch": 0.5932067397699919, "grad_norm": 0.9544959664344788, "learning_rate": 1.8560913711219653e-05, "loss": 1.0203, "step": 4436 }, { "epoch": 0.5933404653650709, "grad_norm": 1.1420345306396484, "learning_rate": 1.856016745837251e-05, "loss": 0.9704, "step": 4437 }, { "epoch": 0.5934741909601498, "grad_norm": 1.1274502277374268, "learning_rate": 1.8559421027096873e-05, "loss": 0.962, "step": 4438 }, { "epoch": 0.5936079165552287, "grad_norm": 1.0310615301132202, "learning_rate": 1.8558674417408293e-05, "loss": 0.9221, "step": 4439 }, { "epoch": 0.5937416421503076, "grad_norm": 1.0321381092071533, "learning_rate": 1.8557927629322333e-05, "loss": 0.9358, "step": 4440 }, { "epoch": 0.5938753677453865, "grad_norm": 1.0985547304153442, "learning_rate": 1.8557180662854565e-05, "loss": 0.9564, "step": 4441 }, { "epoch": 0.5940090933404654, "grad_norm": 1.0813101530075073, "learning_rate": 1.855643351802055e-05, "loss": 0.9261, "step": 4442 }, { "epoch": 0.5941428189355442, "grad_norm": 1.0591710805892944, "learning_rate": 1.8555686194835868e-05, "loss": 0.9867, "step": 4443 }, { "epoch": 0.5942765445306232, "grad_norm": 1.1935772895812988, "learning_rate": 1.8554938693316093e-05, "loss": 1.0344, "step": 4444 }, { "epoch": 0.594410270125702, "grad_norm": 1.1170843839645386, "learning_rate": 1.855419101347681e-05, "loss": 0.8892, "step": 4445 }, { "epoch": 0.594543995720781, "grad_norm": 1.0175094604492188, "learning_rate": 1.8553443155333596e-05, "loss": 0.8558, "step": 4446 }, { "epoch": 0.5946777213158598, "grad_norm": 1.2034450769424438, "learning_rate": 1.855269511890205e-05, "loss": 0.9747, "step": 4447 }, { "epoch": 0.5948114469109388, "grad_norm": 1.1066092252731323, "learning_rate": 1.8551946904197754e-05, "loss": 0.858, "step": 4448 }, { "epoch": 0.5949451725060176, "grad_norm": 1.0075006484985352, "learning_rate": 1.8551198511236308e-05, "loss": 0.8943, "step": 4449 }, { "epoch": 0.5950788981010966, "grad_norm": 1.1339243650436401, "learning_rate": 1.855044994003331e-05, "loss": 0.9346, "step": 4450 }, { "epoch": 0.5952126236961754, "grad_norm": 1.1379661560058594, "learning_rate": 1.854970119060437e-05, "loss": 1.044, "step": 4451 }, { "epoch": 0.5953463492912543, "grad_norm": 1.0578159093856812, "learning_rate": 1.854895226296509e-05, "loss": 0.9572, "step": 4452 }, { "epoch": 0.5954800748863333, "grad_norm": 1.1160355806350708, "learning_rate": 1.8548203157131074e-05, "loss": 0.9851, "step": 4453 }, { "epoch": 0.5956138004814121, "grad_norm": 1.2497044801712036, "learning_rate": 1.854745387311795e-05, "loss": 1.0036, "step": 4454 }, { "epoch": 0.5957475260764911, "grad_norm": 1.0638896226882935, "learning_rate": 1.8546704410941325e-05, "loss": 1.0284, "step": 4455 }, { "epoch": 0.5958812516715699, "grad_norm": 1.0296021699905396, "learning_rate": 1.8545954770616825e-05, "loss": 0.8751, "step": 4456 }, { "epoch": 0.5960149772666489, "grad_norm": 1.1330212354660034, "learning_rate": 1.8545204952160077e-05, "loss": 0.9918, "step": 4457 }, { "epoch": 0.5961487028617277, "grad_norm": 1.1670010089874268, "learning_rate": 1.8544454955586707e-05, "loss": 1.0835, "step": 4458 }, { "epoch": 0.5962824284568067, "grad_norm": 1.0553812980651855, "learning_rate": 1.8543704780912354e-05, "loss": 0.8798, "step": 4459 }, { "epoch": 0.5964161540518855, "grad_norm": 1.0852761268615723, "learning_rate": 1.8542954428152647e-05, "loss": 1.0532, "step": 4460 }, { "epoch": 0.5965498796469644, "grad_norm": 1.1600054502487183, "learning_rate": 1.8542203897323226e-05, "loss": 1.0638, "step": 4461 }, { "epoch": 0.5966836052420433, "grad_norm": 1.0125837326049805, "learning_rate": 1.8541453188439745e-05, "loss": 0.9601, "step": 4462 }, { "epoch": 0.5968173308371222, "grad_norm": 1.2771514654159546, "learning_rate": 1.854070230151784e-05, "loss": 1.0022, "step": 4463 }, { "epoch": 0.5969510564322011, "grad_norm": 1.2395879030227661, "learning_rate": 1.8539951236573173e-05, "loss": 0.9948, "step": 4464 }, { "epoch": 0.59708478202728, "grad_norm": 1.129096508026123, "learning_rate": 1.853919999362139e-05, "loss": 1.0318, "step": 4465 }, { "epoch": 0.597218507622359, "grad_norm": 1.0584124326705933, "learning_rate": 1.853844857267816e-05, "loss": 0.9854, "step": 4466 }, { "epoch": 0.5973522332174378, "grad_norm": 1.131452202796936, "learning_rate": 1.8537696973759135e-05, "loss": 0.918, "step": 4467 }, { "epoch": 0.5974859588125168, "grad_norm": 1.1946680545806885, "learning_rate": 1.853694519687999e-05, "loss": 1.0887, "step": 4468 }, { "epoch": 0.5976196844075956, "grad_norm": 1.147078514099121, "learning_rate": 1.8536193242056386e-05, "loss": 0.9997, "step": 4469 }, { "epoch": 0.5977534100026745, "grad_norm": 1.224615216255188, "learning_rate": 1.8535441109304006e-05, "loss": 1.1428, "step": 4470 }, { "epoch": 0.5978871355977534, "grad_norm": 1.0773061513900757, "learning_rate": 1.8534688798638524e-05, "loss": 1.0077, "step": 4471 }, { "epoch": 0.5980208611928323, "grad_norm": 1.1713714599609375, "learning_rate": 1.853393631007562e-05, "loss": 0.988, "step": 4472 }, { "epoch": 0.5981545867879112, "grad_norm": 1.0535506010055542, "learning_rate": 1.853318364363098e-05, "loss": 0.9334, "step": 4473 }, { "epoch": 0.5982883123829901, "grad_norm": 1.1029497385025024, "learning_rate": 1.853243079932029e-05, "loss": 1.0317, "step": 4474 }, { "epoch": 0.598422037978069, "grad_norm": 1.0632649660110474, "learning_rate": 1.8531677777159246e-05, "loss": 0.9816, "step": 4475 }, { "epoch": 0.5985557635731479, "grad_norm": 1.0627434253692627, "learning_rate": 1.8530924577163546e-05, "loss": 1.1314, "step": 4476 }, { "epoch": 0.5986894891682268, "grad_norm": 1.1574668884277344, "learning_rate": 1.853017119934888e-05, "loss": 1.0636, "step": 4477 }, { "epoch": 0.5988232147633057, "grad_norm": 1.1443142890930176, "learning_rate": 1.852941764373096e-05, "loss": 1.0335, "step": 4478 }, { "epoch": 0.5989569403583846, "grad_norm": 1.0488827228546143, "learning_rate": 1.8528663910325492e-05, "loss": 0.9357, "step": 4479 }, { "epoch": 0.5990906659534635, "grad_norm": 1.0668854713439941, "learning_rate": 1.852790999914819e-05, "loss": 0.9756, "step": 4480 }, { "epoch": 0.5992243915485423, "grad_norm": 1.1215001344680786, "learning_rate": 1.852715591021476e-05, "loss": 0.9152, "step": 4481 }, { "epoch": 0.5993581171436213, "grad_norm": 1.1761562824249268, "learning_rate": 1.8526401643540924e-05, "loss": 1.0071, "step": 4482 }, { "epoch": 0.5994918427387002, "grad_norm": 1.0299917459487915, "learning_rate": 1.8525647199142406e-05, "loss": 0.9649, "step": 4483 }, { "epoch": 0.5996255683337791, "grad_norm": 1.1721644401550293, "learning_rate": 1.8524892577034928e-05, "loss": 0.9146, "step": 4484 }, { "epoch": 0.599759293928858, "grad_norm": 1.0512962341308594, "learning_rate": 1.8524137777234226e-05, "loss": 0.8912, "step": 4485 }, { "epoch": 0.5998930195239369, "grad_norm": 1.1344468593597412, "learning_rate": 1.8523382799756024e-05, "loss": 1.0982, "step": 4486 }, { "epoch": 0.6000267451190158, "grad_norm": 1.016634464263916, "learning_rate": 1.8522627644616066e-05, "loss": 0.9431, "step": 4487 }, { "epoch": 0.6001604707140947, "grad_norm": 1.048527479171753, "learning_rate": 1.852187231183009e-05, "loss": 0.8622, "step": 4488 }, { "epoch": 0.6002941963091736, "grad_norm": 1.2555572986602783, "learning_rate": 1.852111680141384e-05, "loss": 1.0529, "step": 4489 }, { "epoch": 0.6004279219042524, "grad_norm": 1.0794832706451416, "learning_rate": 1.8520361113383068e-05, "loss": 1.0224, "step": 4490 }, { "epoch": 0.6005616474993314, "grad_norm": 1.0830272436141968, "learning_rate": 1.8519605247753517e-05, "loss": 0.9989, "step": 4491 }, { "epoch": 0.6006953730944102, "grad_norm": 1.099109411239624, "learning_rate": 1.8518849204540947e-05, "loss": 0.9453, "step": 4492 }, { "epoch": 0.6008290986894892, "grad_norm": 1.2155909538269043, "learning_rate": 1.8518092983761117e-05, "loss": 1.0033, "step": 4493 }, { "epoch": 0.600962824284568, "grad_norm": 1.1234641075134277, "learning_rate": 1.851733658542979e-05, "loss": 1.0491, "step": 4494 }, { "epoch": 0.601096549879647, "grad_norm": 1.1045058965682983, "learning_rate": 1.8516580009562734e-05, "loss": 0.9313, "step": 4495 }, { "epoch": 0.6012302754747259, "grad_norm": 1.1874134540557861, "learning_rate": 1.8515823256175716e-05, "loss": 0.9677, "step": 4496 }, { "epoch": 0.6013640010698048, "grad_norm": 1.1104332208633423, "learning_rate": 1.8515066325284513e-05, "loss": 0.9587, "step": 4497 }, { "epoch": 0.6014977266648837, "grad_norm": 1.0392301082611084, "learning_rate": 1.8514309216904895e-05, "loss": 0.9552, "step": 4498 }, { "epoch": 0.6016314522599625, "grad_norm": 1.0481865406036377, "learning_rate": 1.8513551931052654e-05, "loss": 0.9353, "step": 4499 }, { "epoch": 0.6017651778550415, "grad_norm": 1.0300705432891846, "learning_rate": 1.8512794467743567e-05, "loss": 0.9546, "step": 4500 }, { "epoch": 0.6018989034501203, "grad_norm": 1.1318790912628174, "learning_rate": 1.8512036826993425e-05, "loss": 1.0321, "step": 4501 }, { "epoch": 0.6020326290451993, "grad_norm": 1.0639405250549316, "learning_rate": 1.8511279008818022e-05, "loss": 0.9246, "step": 4502 }, { "epoch": 0.6021663546402781, "grad_norm": 1.2319903373718262, "learning_rate": 1.851052101323315e-05, "loss": 1.0496, "step": 4503 }, { "epoch": 0.6023000802353571, "grad_norm": 1.170634150505066, "learning_rate": 1.8509762840254613e-05, "loss": 0.9195, "step": 4504 }, { "epoch": 0.6024338058304359, "grad_norm": 1.0659806728363037, "learning_rate": 1.850900448989821e-05, "loss": 0.8844, "step": 4505 }, { "epoch": 0.6025675314255149, "grad_norm": 1.113992691040039, "learning_rate": 1.8508245962179755e-05, "loss": 0.989, "step": 4506 }, { "epoch": 0.6027012570205937, "grad_norm": 1.0443806648254395, "learning_rate": 1.8507487257115055e-05, "loss": 0.8596, "step": 4507 }, { "epoch": 0.6028349826156726, "grad_norm": 1.2125656604766846, "learning_rate": 1.850672837471992e-05, "loss": 1.0007, "step": 4508 }, { "epoch": 0.6029687082107515, "grad_norm": 1.0046961307525635, "learning_rate": 1.8505969315010175e-05, "loss": 0.9042, "step": 4509 }, { "epoch": 0.6031024338058304, "grad_norm": 1.0259705781936646, "learning_rate": 1.8505210078001635e-05, "loss": 0.978, "step": 4510 }, { "epoch": 0.6032361594009094, "grad_norm": 0.9716789722442627, "learning_rate": 1.8504450663710134e-05, "loss": 0.9601, "step": 4511 }, { "epoch": 0.6033698849959882, "grad_norm": 1.1140798330307007, "learning_rate": 1.8503691072151495e-05, "loss": 1.0877, "step": 4512 }, { "epoch": 0.6035036105910672, "grad_norm": 1.194087028503418, "learning_rate": 1.8502931303341553e-05, "loss": 0.9907, "step": 4513 }, { "epoch": 0.603637336186146, "grad_norm": 1.0034937858581543, "learning_rate": 1.8502171357296144e-05, "loss": 0.8972, "step": 4514 }, { "epoch": 0.603771061781225, "grad_norm": 0.9939236640930176, "learning_rate": 1.850141123403111e-05, "loss": 0.9661, "step": 4515 }, { "epoch": 0.6039047873763038, "grad_norm": 0.9628288745880127, "learning_rate": 1.850065093356229e-05, "loss": 0.8413, "step": 4516 }, { "epoch": 0.6040385129713827, "grad_norm": 1.0935051441192627, "learning_rate": 1.849989045590554e-05, "loss": 0.8985, "step": 4517 }, { "epoch": 0.6041722385664616, "grad_norm": 1.0853569507598877, "learning_rate": 1.8499129801076704e-05, "loss": 1.0148, "step": 4518 }, { "epoch": 0.6043059641615405, "grad_norm": 0.9970325827598572, "learning_rate": 1.849836896909164e-05, "loss": 0.9588, "step": 4519 }, { "epoch": 0.6044396897566194, "grad_norm": 1.0848073959350586, "learning_rate": 1.849760795996621e-05, "loss": 0.8414, "step": 4520 }, { "epoch": 0.6045734153516983, "grad_norm": 1.0946645736694336, "learning_rate": 1.8496846773716267e-05, "loss": 0.9546, "step": 4521 }, { "epoch": 0.6047071409467772, "grad_norm": 1.1542887687683105, "learning_rate": 1.849608541035769e-05, "loss": 0.9959, "step": 4522 }, { "epoch": 0.6048408665418561, "grad_norm": 1.093762993812561, "learning_rate": 1.8495323869906342e-05, "loss": 1.0061, "step": 4523 }, { "epoch": 0.604974592136935, "grad_norm": 1.123477578163147, "learning_rate": 1.8494562152378093e-05, "loss": 1.0387, "step": 4524 }, { "epoch": 0.6051083177320139, "grad_norm": 1.1093143224716187, "learning_rate": 1.849380025778883e-05, "loss": 0.935, "step": 4525 }, { "epoch": 0.6052420433270929, "grad_norm": 1.1035022735595703, "learning_rate": 1.8493038186154424e-05, "loss": 1.0147, "step": 4526 }, { "epoch": 0.6053757689221717, "grad_norm": 1.0522245168685913, "learning_rate": 1.8492275937490764e-05, "loss": 0.8308, "step": 4527 }, { "epoch": 0.6055094945172506, "grad_norm": 1.1079338788986206, "learning_rate": 1.849151351181374e-05, "loss": 1.0213, "step": 4528 }, { "epoch": 0.6056432201123295, "grad_norm": 1.0584173202514648, "learning_rate": 1.8490750909139242e-05, "loss": 1.0146, "step": 4529 }, { "epoch": 0.6057769457074084, "grad_norm": 1.1158702373504639, "learning_rate": 1.8489988129483167e-05, "loss": 0.8822, "step": 4530 }, { "epoch": 0.6059106713024873, "grad_norm": 1.125991702079773, "learning_rate": 1.848922517286141e-05, "loss": 1.0219, "step": 4531 }, { "epoch": 0.6060443968975662, "grad_norm": 1.1146489381790161, "learning_rate": 1.848846203928988e-05, "loss": 1.0876, "step": 4532 }, { "epoch": 0.6061781224926451, "grad_norm": 1.020655870437622, "learning_rate": 1.8487698728784482e-05, "loss": 0.9425, "step": 4533 }, { "epoch": 0.606311848087724, "grad_norm": 1.037375807762146, "learning_rate": 1.8486935241361127e-05, "loss": 1.0368, "step": 4534 }, { "epoch": 0.6064455736828029, "grad_norm": 1.2069827318191528, "learning_rate": 1.8486171577035727e-05, "loss": 1.0099, "step": 4535 }, { "epoch": 0.6065792992778818, "grad_norm": 1.0879740715026855, "learning_rate": 1.84854077358242e-05, "loss": 0.8572, "step": 4536 }, { "epoch": 0.6067130248729606, "grad_norm": 1.036346673965454, "learning_rate": 1.8484643717742465e-05, "loss": 1.0611, "step": 4537 }, { "epoch": 0.6068467504680396, "grad_norm": 1.044650673866272, "learning_rate": 1.8483879522806455e-05, "loss": 0.8069, "step": 4538 }, { "epoch": 0.6069804760631184, "grad_norm": 0.9842966794967651, "learning_rate": 1.8483115151032094e-05, "loss": 1.0056, "step": 4539 }, { "epoch": 0.6071142016581974, "grad_norm": 1.0881311893463135, "learning_rate": 1.8482350602435315e-05, "loss": 0.9188, "step": 4540 }, { "epoch": 0.6072479272532763, "grad_norm": 1.0662394762039185, "learning_rate": 1.8481585877032054e-05, "loss": 0.9113, "step": 4541 }, { "epoch": 0.6073816528483552, "grad_norm": 0.9918805360794067, "learning_rate": 1.848082097483825e-05, "loss": 0.8597, "step": 4542 }, { "epoch": 0.6075153784434341, "grad_norm": 1.0060417652130127, "learning_rate": 1.848005589586985e-05, "loss": 0.9096, "step": 4543 }, { "epoch": 0.607649104038513, "grad_norm": 1.0584622621536255, "learning_rate": 1.84792906401428e-05, "loss": 0.9055, "step": 4544 }, { "epoch": 0.6077828296335919, "grad_norm": 1.0492143630981445, "learning_rate": 1.847852520767305e-05, "loss": 0.9573, "step": 4545 }, { "epoch": 0.6079165552286707, "grad_norm": 1.0446584224700928, "learning_rate": 1.8477759598476556e-05, "loss": 1.0293, "step": 4546 }, { "epoch": 0.6080502808237497, "grad_norm": 1.0829218626022339, "learning_rate": 1.847699381256927e-05, "loss": 0.9103, "step": 4547 }, { "epoch": 0.6081840064188285, "grad_norm": 1.12076735496521, "learning_rate": 1.8476227849967166e-05, "loss": 0.9276, "step": 4548 }, { "epoch": 0.6083177320139075, "grad_norm": 1.1958202123641968, "learning_rate": 1.8475461710686202e-05, "loss": 0.9711, "step": 4549 }, { "epoch": 0.6084514576089863, "grad_norm": 1.0606281757354736, "learning_rate": 1.8474695394742345e-05, "loss": 0.8768, "step": 4550 }, { "epoch": 0.6085851832040653, "grad_norm": 1.0081276893615723, "learning_rate": 1.8473928902151576e-05, "loss": 0.9358, "step": 4551 }, { "epoch": 0.6087189087991441, "grad_norm": 1.0348228216171265, "learning_rate": 1.8473162232929867e-05, "loss": 0.9871, "step": 4552 }, { "epoch": 0.6088526343942231, "grad_norm": 1.1591606140136719, "learning_rate": 1.8472395387093195e-05, "loss": 0.901, "step": 4553 }, { "epoch": 0.608986359989302, "grad_norm": 1.1983684301376343, "learning_rate": 1.8471628364657555e-05, "loss": 0.9137, "step": 4554 }, { "epoch": 0.6091200855843808, "grad_norm": 1.1667745113372803, "learning_rate": 1.8470861165638926e-05, "loss": 1.0372, "step": 4555 }, { "epoch": 0.6092538111794598, "grad_norm": 1.0995213985443115, "learning_rate": 1.8470093790053297e-05, "loss": 1.234, "step": 4556 }, { "epoch": 0.6093875367745386, "grad_norm": 1.1155389547348022, "learning_rate": 1.8469326237916675e-05, "loss": 0.9736, "step": 4557 }, { "epoch": 0.6095212623696176, "grad_norm": 1.0100648403167725, "learning_rate": 1.846855850924505e-05, "loss": 0.9469, "step": 4558 }, { "epoch": 0.6096549879646964, "grad_norm": 1.1121280193328857, "learning_rate": 1.8467790604054423e-05, "loss": 1.0334, "step": 4559 }, { "epoch": 0.6097887135597754, "grad_norm": 1.0562087297439575, "learning_rate": 1.8467022522360805e-05, "loss": 0.921, "step": 4560 }, { "epoch": 0.6099224391548542, "grad_norm": 1.1882513761520386, "learning_rate": 1.8466254264180205e-05, "loss": 1.0534, "step": 4561 }, { "epoch": 0.6100561647499332, "grad_norm": 1.1301093101501465, "learning_rate": 1.846548582952864e-05, "loss": 0.9164, "step": 4562 }, { "epoch": 0.610189890345012, "grad_norm": 1.0955933332443237, "learning_rate": 1.8464717218422115e-05, "loss": 1.0461, "step": 4563 }, { "epoch": 0.6103236159400909, "grad_norm": 1.090499997138977, "learning_rate": 1.8463948430876667e-05, "loss": 1.012, "step": 4564 }, { "epoch": 0.6104573415351698, "grad_norm": 1.0175905227661133, "learning_rate": 1.846317946690831e-05, "loss": 0.7973, "step": 4565 }, { "epoch": 0.6105910671302487, "grad_norm": 1.081360936164856, "learning_rate": 1.8462410326533073e-05, "loss": 0.9581, "step": 4566 }, { "epoch": 0.6107247927253276, "grad_norm": 0.9667996764183044, "learning_rate": 1.8461641009766996e-05, "loss": 0.927, "step": 4567 }, { "epoch": 0.6108585183204065, "grad_norm": 1.0959899425506592, "learning_rate": 1.8460871516626105e-05, "loss": 0.9166, "step": 4568 }, { "epoch": 0.6109922439154855, "grad_norm": 1.0939836502075195, "learning_rate": 1.8460101847126445e-05, "loss": 1.0318, "step": 4569 }, { "epoch": 0.6111259695105643, "grad_norm": 0.9785194993019104, "learning_rate": 1.8459332001284057e-05, "loss": 0.9044, "step": 4570 }, { "epoch": 0.6112596951056433, "grad_norm": 1.062530517578125, "learning_rate": 1.845856197911499e-05, "loss": 0.9238, "step": 4571 }, { "epoch": 0.6113934207007221, "grad_norm": 1.0204249620437622, "learning_rate": 1.8457791780635288e-05, "loss": 0.8209, "step": 4572 }, { "epoch": 0.6115271462958011, "grad_norm": 1.0798455476760864, "learning_rate": 1.8457021405861014e-05, "loss": 0.9158, "step": 4573 }, { "epoch": 0.6116608718908799, "grad_norm": 0.983466386795044, "learning_rate": 1.845625085480822e-05, "loss": 0.831, "step": 4574 }, { "epoch": 0.6117945974859588, "grad_norm": 1.0896072387695312, "learning_rate": 1.8455480127492968e-05, "loss": 0.9387, "step": 4575 }, { "epoch": 0.6119283230810377, "grad_norm": 0.9671067595481873, "learning_rate": 1.8454709223931323e-05, "loss": 0.8393, "step": 4576 }, { "epoch": 0.6120620486761166, "grad_norm": 0.9905608892440796, "learning_rate": 1.8453938144139356e-05, "loss": 0.9594, "step": 4577 }, { "epoch": 0.6121957742711955, "grad_norm": 1.0986615419387817, "learning_rate": 1.845316688813314e-05, "loss": 1.0362, "step": 4578 }, { "epoch": 0.6123294998662744, "grad_norm": 1.175173282623291, "learning_rate": 1.8452395455928744e-05, "loss": 1.0637, "step": 4579 }, { "epoch": 0.6124632254613533, "grad_norm": 1.1355693340301514, "learning_rate": 1.8451623847542256e-05, "loss": 0.8776, "step": 4580 }, { "epoch": 0.6125969510564322, "grad_norm": 1.1460543870925903, "learning_rate": 1.8450852062989756e-05, "loss": 0.9882, "step": 4581 }, { "epoch": 0.6127306766515112, "grad_norm": 1.1756792068481445, "learning_rate": 1.845008010228733e-05, "loss": 0.9201, "step": 4582 }, { "epoch": 0.61286440224659, "grad_norm": 1.1689866781234741, "learning_rate": 1.844930796545107e-05, "loss": 1.0574, "step": 4583 }, { "epoch": 0.6129981278416688, "grad_norm": 1.0559935569763184, "learning_rate": 1.8448535652497073e-05, "loss": 1.0118, "step": 4584 }, { "epoch": 0.6131318534367478, "grad_norm": 1.1090352535247803, "learning_rate": 1.8447763163441433e-05, "loss": 0.8674, "step": 4585 }, { "epoch": 0.6132655790318267, "grad_norm": 1.0772432088851929, "learning_rate": 1.8446990498300254e-05, "loss": 0.9188, "step": 4586 }, { "epoch": 0.6133993046269056, "grad_norm": 1.3038002252578735, "learning_rate": 1.844621765708964e-05, "loss": 0.8902, "step": 4587 }, { "epoch": 0.6135330302219845, "grad_norm": 1.0212488174438477, "learning_rate": 1.84454446398257e-05, "loss": 0.9564, "step": 4588 }, { "epoch": 0.6136667558170634, "grad_norm": 1.185678482055664, "learning_rate": 1.8444671446524552e-05, "loss": 0.9754, "step": 4589 }, { "epoch": 0.6138004814121423, "grad_norm": 1.129547357559204, "learning_rate": 1.8443898077202306e-05, "loss": 0.8964, "step": 4590 }, { "epoch": 0.6139342070072212, "grad_norm": 1.1499437093734741, "learning_rate": 1.8443124531875086e-05, "loss": 1.0037, "step": 4591 }, { "epoch": 0.6140679326023001, "grad_norm": 1.084995985031128, "learning_rate": 1.8442350810559012e-05, "loss": 0.8568, "step": 4592 }, { "epoch": 0.6142016581973789, "grad_norm": 1.0891430377960205, "learning_rate": 1.8441576913270213e-05, "loss": 0.9021, "step": 4593 }, { "epoch": 0.6143353837924579, "grad_norm": 1.162308931350708, "learning_rate": 1.8440802840024824e-05, "loss": 1.0208, "step": 4594 }, { "epoch": 0.6144691093875367, "grad_norm": 1.1022157669067383, "learning_rate": 1.8440028590838975e-05, "loss": 0.923, "step": 4595 }, { "epoch": 0.6146028349826157, "grad_norm": 1.1547234058380127, "learning_rate": 1.8439254165728805e-05, "loss": 0.9396, "step": 4596 }, { "epoch": 0.6147365605776945, "grad_norm": 1.0485843420028687, "learning_rate": 1.8438479564710458e-05, "loss": 0.9575, "step": 4597 }, { "epoch": 0.6148702861727735, "grad_norm": 1.1971862316131592, "learning_rate": 1.8437704787800085e-05, "loss": 0.9345, "step": 4598 }, { "epoch": 0.6150040117678524, "grad_norm": 1.1647599935531616, "learning_rate": 1.8436929835013823e-05, "loss": 1.0097, "step": 4599 }, { "epoch": 0.6151377373629313, "grad_norm": 1.0963987112045288, "learning_rate": 1.843615470636783e-05, "loss": 1.0451, "step": 4600 }, { "epoch": 0.6152714629580102, "grad_norm": 1.0143883228302002, "learning_rate": 1.8435379401878274e-05, "loss": 0.9089, "step": 4601 }, { "epoch": 0.615405188553089, "grad_norm": 1.1572073698043823, "learning_rate": 1.84346039215613e-05, "loss": 0.8999, "step": 4602 }, { "epoch": 0.615538914148168, "grad_norm": 1.0570807456970215, "learning_rate": 1.8433828265433078e-05, "loss": 0.9531, "step": 4603 }, { "epoch": 0.6156726397432468, "grad_norm": 1.1528053283691406, "learning_rate": 1.843305243350978e-05, "loss": 1.0439, "step": 4604 }, { "epoch": 0.6158063653383258, "grad_norm": 1.0917670726776123, "learning_rate": 1.8432276425807566e-05, "loss": 1.0176, "step": 4605 }, { "epoch": 0.6159400909334046, "grad_norm": 1.0241259336471558, "learning_rate": 1.8431500242342623e-05, "loss": 0.94, "step": 4606 }, { "epoch": 0.6160738165284836, "grad_norm": 1.0566401481628418, "learning_rate": 1.843072388313113e-05, "loss": 0.916, "step": 4607 }, { "epoch": 0.6162075421235624, "grad_norm": 1.1511932611465454, "learning_rate": 1.8429947348189257e-05, "loss": 1.0375, "step": 4608 }, { "epoch": 0.6163412677186414, "grad_norm": 1.0721886157989502, "learning_rate": 1.8429170637533206e-05, "loss": 1.0052, "step": 4609 }, { "epoch": 0.6164749933137202, "grad_norm": 1.043841004371643, "learning_rate": 1.8428393751179154e-05, "loss": 1.0661, "step": 4610 }, { "epoch": 0.6166087189087991, "grad_norm": 1.049148440361023, "learning_rate": 1.84276166891433e-05, "loss": 1.0143, "step": 4611 }, { "epoch": 0.616742444503878, "grad_norm": 1.106191873550415, "learning_rate": 1.842683945144184e-05, "loss": 1.0137, "step": 4612 }, { "epoch": 0.6168761700989569, "grad_norm": 1.0513697862625122, "learning_rate": 1.8426062038090976e-05, "loss": 0.8247, "step": 4613 }, { "epoch": 0.6170098956940359, "grad_norm": 1.0746268033981323, "learning_rate": 1.8425284449106912e-05, "loss": 0.9646, "step": 4614 }, { "epoch": 0.6171436212891147, "grad_norm": 1.2171393632888794, "learning_rate": 1.8424506684505854e-05, "loss": 0.9417, "step": 4615 }, { "epoch": 0.6172773468841937, "grad_norm": 0.9602169990539551, "learning_rate": 1.8423728744304017e-05, "loss": 0.8997, "step": 4616 }, { "epoch": 0.6174110724792725, "grad_norm": 1.1689722537994385, "learning_rate": 1.8422950628517616e-05, "loss": 0.9718, "step": 4617 }, { "epoch": 0.6175447980743515, "grad_norm": 1.3014100790023804, "learning_rate": 1.8422172337162865e-05, "loss": 0.9334, "step": 4618 }, { "epoch": 0.6176785236694303, "grad_norm": 1.1799534559249878, "learning_rate": 1.8421393870255996e-05, "loss": 0.976, "step": 4619 }, { "epoch": 0.6178122492645092, "grad_norm": 1.1077040433883667, "learning_rate": 1.8420615227813227e-05, "loss": 0.9268, "step": 4620 }, { "epoch": 0.6179459748595881, "grad_norm": 1.1594727039337158, "learning_rate": 1.8419836409850794e-05, "loss": 0.9542, "step": 4621 }, { "epoch": 0.618079700454667, "grad_norm": 1.0005004405975342, "learning_rate": 1.8419057416384927e-05, "loss": 0.946, "step": 4622 }, { "epoch": 0.6182134260497459, "grad_norm": 1.129563808441162, "learning_rate": 1.8418278247431862e-05, "loss": 0.9058, "step": 4623 }, { "epoch": 0.6183471516448248, "grad_norm": 1.0353795289993286, "learning_rate": 1.8417498903007845e-05, "loss": 0.9461, "step": 4624 }, { "epoch": 0.6184808772399037, "grad_norm": 1.0598088502883911, "learning_rate": 1.8416719383129114e-05, "loss": 1.0126, "step": 4625 }, { "epoch": 0.6186146028349826, "grad_norm": 1.135843276977539, "learning_rate": 1.8415939687811927e-05, "loss": 1.058, "step": 4626 }, { "epoch": 0.6187483284300616, "grad_norm": 0.9938992857933044, "learning_rate": 1.8415159817072525e-05, "loss": 0.9312, "step": 4627 }, { "epoch": 0.6188820540251404, "grad_norm": 0.9811779856681824, "learning_rate": 1.841437977092717e-05, "loss": 0.9, "step": 4628 }, { "epoch": 0.6190157796202194, "grad_norm": 1.094675898551941, "learning_rate": 1.8413599549392126e-05, "loss": 1.0626, "step": 4629 }, { "epoch": 0.6191495052152982, "grad_norm": 1.2727317810058594, "learning_rate": 1.8412819152483643e-05, "loss": 0.9661, "step": 4630 }, { "epoch": 0.6192832308103771, "grad_norm": 1.0767731666564941, "learning_rate": 1.8412038580218002e-05, "loss": 1.0314, "step": 4631 }, { "epoch": 0.619416956405456, "grad_norm": 1.2193446159362793, "learning_rate": 1.8411257832611463e-05, "loss": 0.8313, "step": 4632 }, { "epoch": 0.6195506820005349, "grad_norm": 1.0851116180419922, "learning_rate": 1.84104769096803e-05, "loss": 0.9507, "step": 4633 }, { "epoch": 0.6196844075956138, "grad_norm": 1.064950942993164, "learning_rate": 1.8409695811440796e-05, "loss": 0.8756, "step": 4634 }, { "epoch": 0.6198181331906927, "grad_norm": 1.0337715148925781, "learning_rate": 1.840891453790923e-05, "loss": 0.8942, "step": 4635 }, { "epoch": 0.6199518587857716, "grad_norm": 1.1527636051177979, "learning_rate": 1.840813308910189e-05, "loss": 0.9237, "step": 4636 }, { "epoch": 0.6200855843808505, "grad_norm": 1.0866047143936157, "learning_rate": 1.8407351465035056e-05, "loss": 1.0748, "step": 4637 }, { "epoch": 0.6202193099759294, "grad_norm": 0.9458177089691162, "learning_rate": 1.8406569665725033e-05, "loss": 0.8488, "step": 4638 }, { "epoch": 0.6203530355710083, "grad_norm": 1.008725881576538, "learning_rate": 1.84057876911881e-05, "loss": 0.9238, "step": 4639 }, { "epoch": 0.6204867611660871, "grad_norm": 1.1742769479751587, "learning_rate": 1.840500554144057e-05, "loss": 1.0503, "step": 4640 }, { "epoch": 0.6206204867611661, "grad_norm": 1.0403498411178589, "learning_rate": 1.8404223216498747e-05, "loss": 0.8906, "step": 4641 }, { "epoch": 0.620754212356245, "grad_norm": 0.9641141295433044, "learning_rate": 1.840344071637893e-05, "loss": 0.8627, "step": 4642 }, { "epoch": 0.6208879379513239, "grad_norm": 0.9822632074356079, "learning_rate": 1.840265804109743e-05, "loss": 0.8605, "step": 4643 }, { "epoch": 0.6210216635464028, "grad_norm": 0.959027111530304, "learning_rate": 1.8401875190670565e-05, "loss": 0.8634, "step": 4644 }, { "epoch": 0.6211553891414817, "grad_norm": 1.1175315380096436, "learning_rate": 1.8401092165114654e-05, "loss": 0.9709, "step": 4645 }, { "epoch": 0.6212891147365606, "grad_norm": 1.069287657737732, "learning_rate": 1.840030896444601e-05, "loss": 1.0036, "step": 4646 }, { "epoch": 0.6214228403316395, "grad_norm": 1.1036072969436646, "learning_rate": 1.839952558868097e-05, "loss": 0.9933, "step": 4647 }, { "epoch": 0.6215565659267184, "grad_norm": 1.1730804443359375, "learning_rate": 1.8398742037835853e-05, "loss": 1.1598, "step": 4648 }, { "epoch": 0.6216902915217972, "grad_norm": 1.0492829084396362, "learning_rate": 1.8397958311927e-05, "loss": 0.8336, "step": 4649 }, { "epoch": 0.6218240171168762, "grad_norm": 0.9577750563621521, "learning_rate": 1.8397174410970736e-05, "loss": 0.9326, "step": 4650 }, { "epoch": 0.621957742711955, "grad_norm": 1.0941472053527832, "learning_rate": 1.8396390334983406e-05, "loss": 0.922, "step": 4651 }, { "epoch": 0.622091468307034, "grad_norm": 1.0802595615386963, "learning_rate": 1.839560608398136e-05, "loss": 0.8784, "step": 4652 }, { "epoch": 0.6222251939021128, "grad_norm": 1.0528788566589355, "learning_rate": 1.8394821657980936e-05, "loss": 0.8857, "step": 4653 }, { "epoch": 0.6223589194971918, "grad_norm": 1.1716103553771973, "learning_rate": 1.8394037056998485e-05, "loss": 0.9671, "step": 4654 }, { "epoch": 0.6224926450922706, "grad_norm": 1.091599941253662, "learning_rate": 1.8393252281050364e-05, "loss": 0.9228, "step": 4655 }, { "epoch": 0.6226263706873496, "grad_norm": 1.2274764776229858, "learning_rate": 1.839246733015293e-05, "loss": 0.9923, "step": 4656 }, { "epoch": 0.6227600962824285, "grad_norm": 1.0876737833023071, "learning_rate": 1.839168220432255e-05, "loss": 1.0606, "step": 4657 }, { "epoch": 0.6228938218775073, "grad_norm": 1.1105893850326538, "learning_rate": 1.8390896903575584e-05, "loss": 0.9703, "step": 4658 }, { "epoch": 0.6230275474725863, "grad_norm": 1.0752147436141968, "learning_rate": 1.8390111427928396e-05, "loss": 0.8449, "step": 4659 }, { "epoch": 0.6231612730676651, "grad_norm": 1.020026445388794, "learning_rate": 1.8389325777397368e-05, "loss": 0.9002, "step": 4660 }, { "epoch": 0.6232949986627441, "grad_norm": 1.0753370523452759, "learning_rate": 1.8388539951998875e-05, "loss": 1.0133, "step": 4661 }, { "epoch": 0.6234287242578229, "grad_norm": 1.231313705444336, "learning_rate": 1.8387753951749284e-05, "loss": 0.976, "step": 4662 }, { "epoch": 0.6235624498529019, "grad_norm": 1.132586121559143, "learning_rate": 1.8386967776664996e-05, "loss": 1.0082, "step": 4663 }, { "epoch": 0.6236961754479807, "grad_norm": 1.079953908920288, "learning_rate": 1.8386181426762387e-05, "loss": 1.018, "step": 4664 }, { "epoch": 0.6238299010430597, "grad_norm": 1.1663509607315063, "learning_rate": 1.8385394902057853e-05, "loss": 0.977, "step": 4665 }, { "epoch": 0.6239636266381385, "grad_norm": 1.2637856006622314, "learning_rate": 1.8384608202567786e-05, "loss": 0.9999, "step": 4666 }, { "epoch": 0.6240973522332174, "grad_norm": 1.0912624597549438, "learning_rate": 1.838382132830858e-05, "loss": 0.9518, "step": 4667 }, { "epoch": 0.6242310778282963, "grad_norm": 1.052543044090271, "learning_rate": 1.8383034279296646e-05, "loss": 0.9467, "step": 4668 }, { "epoch": 0.6243648034233752, "grad_norm": 1.0778412818908691, "learning_rate": 1.838224705554838e-05, "loss": 0.8677, "step": 4669 }, { "epoch": 0.6244985290184542, "grad_norm": 1.1696240901947021, "learning_rate": 1.83814596570802e-05, "loss": 0.9495, "step": 4670 }, { "epoch": 0.624632254613533, "grad_norm": 1.082571029663086, "learning_rate": 1.8380672083908512e-05, "loss": 0.9752, "step": 4671 }, { "epoch": 0.624765980208612, "grad_norm": 0.9724946618080139, "learning_rate": 1.837988433604973e-05, "loss": 0.915, "step": 4672 }, { "epoch": 0.6248997058036908, "grad_norm": 1.0701051950454712, "learning_rate": 1.837909641352028e-05, "loss": 1.0687, "step": 4673 }, { "epoch": 0.6250334313987698, "grad_norm": 1.0757858753204346, "learning_rate": 1.8378308316336585e-05, "loss": 0.9302, "step": 4674 }, { "epoch": 0.6251671569938486, "grad_norm": 1.1231738328933716, "learning_rate": 1.837752004451507e-05, "loss": 1.0059, "step": 4675 }, { "epoch": 0.6253008825889276, "grad_norm": 0.9858707785606384, "learning_rate": 1.837673159807216e-05, "loss": 1.0226, "step": 4676 }, { "epoch": 0.6254346081840064, "grad_norm": 1.1747640371322632, "learning_rate": 1.8375942977024305e-05, "loss": 1.1695, "step": 4677 }, { "epoch": 0.6255683337790853, "grad_norm": 1.0730839967727661, "learning_rate": 1.837515418138793e-05, "loss": 0.971, "step": 4678 }, { "epoch": 0.6257020593741642, "grad_norm": 1.0914748907089233, "learning_rate": 1.8374365211179475e-05, "loss": 1.0062, "step": 4679 }, { "epoch": 0.6258357849692431, "grad_norm": 1.0983752012252808, "learning_rate": 1.8373576066415397e-05, "loss": 1.0343, "step": 4680 }, { "epoch": 0.625969510564322, "grad_norm": 1.1198084354400635, "learning_rate": 1.8372786747112136e-05, "loss": 0.9457, "step": 4681 }, { "epoch": 0.6261032361594009, "grad_norm": 1.0994049310684204, "learning_rate": 1.8371997253286146e-05, "loss": 0.9689, "step": 4682 }, { "epoch": 0.6262369617544798, "grad_norm": 1.0492175817489624, "learning_rate": 1.8371207584953886e-05, "loss": 0.9985, "step": 4683 }, { "epoch": 0.6263706873495587, "grad_norm": 0.9940704107284546, "learning_rate": 1.8370417742131816e-05, "loss": 0.9362, "step": 4684 }, { "epoch": 0.6265044129446377, "grad_norm": 0.9964712858200073, "learning_rate": 1.8369627724836395e-05, "loss": 0.8798, "step": 4685 }, { "epoch": 0.6266381385397165, "grad_norm": 1.1672533750534058, "learning_rate": 1.8368837533084092e-05, "loss": 0.9855, "step": 4686 }, { "epoch": 0.6267718641347954, "grad_norm": 0.9894228577613831, "learning_rate": 1.8368047166891382e-05, "loss": 0.9179, "step": 4687 }, { "epoch": 0.6269055897298743, "grad_norm": 1.0386804342269897, "learning_rate": 1.8367256626274737e-05, "loss": 0.9276, "step": 4688 }, { "epoch": 0.6270393153249532, "grad_norm": 1.2990498542785645, "learning_rate": 1.836646591125063e-05, "loss": 0.9769, "step": 4689 }, { "epoch": 0.6271730409200321, "grad_norm": 1.1790125370025635, "learning_rate": 1.8365675021835548e-05, "loss": 1.0282, "step": 4690 }, { "epoch": 0.627306766515111, "grad_norm": 1.154527187347412, "learning_rate": 1.8364883958045978e-05, "loss": 0.9978, "step": 4691 }, { "epoch": 0.6274404921101899, "grad_norm": 1.2219253778457642, "learning_rate": 1.8364092719898402e-05, "loss": 0.9866, "step": 4692 }, { "epoch": 0.6275742177052688, "grad_norm": 1.1092414855957031, "learning_rate": 1.836330130740932e-05, "loss": 1.0046, "step": 4693 }, { "epoch": 0.6277079433003477, "grad_norm": 1.0951077938079834, "learning_rate": 1.8362509720595225e-05, "loss": 0.8354, "step": 4694 }, { "epoch": 0.6278416688954266, "grad_norm": 1.03300940990448, "learning_rate": 1.8361717959472618e-05, "loss": 0.902, "step": 4695 }, { "epoch": 0.6279753944905054, "grad_norm": 1.1207783222198486, "learning_rate": 1.8360926024058e-05, "loss": 0.9704, "step": 4696 }, { "epoch": 0.6281091200855844, "grad_norm": 1.087233543395996, "learning_rate": 1.836013391436788e-05, "loss": 0.9068, "step": 4697 }, { "epoch": 0.6282428456806632, "grad_norm": 0.927949845790863, "learning_rate": 1.8359341630418766e-05, "loss": 0.8789, "step": 4698 }, { "epoch": 0.6283765712757422, "grad_norm": 1.1531922817230225, "learning_rate": 1.8358549172227176e-05, "loss": 0.8717, "step": 4699 }, { "epoch": 0.628510296870821, "grad_norm": 1.162847638130188, "learning_rate": 1.8357756539809627e-05, "loss": 0.9495, "step": 4700 }, { "epoch": 0.6286440224659, "grad_norm": 1.2207088470458984, "learning_rate": 1.8356963733182642e-05, "loss": 0.9467, "step": 4701 }, { "epoch": 0.6287777480609789, "grad_norm": 1.0907632112503052, "learning_rate": 1.835617075236274e-05, "loss": 1.0881, "step": 4702 }, { "epoch": 0.6289114736560578, "grad_norm": 1.1067560911178589, "learning_rate": 1.835537759736646e-05, "loss": 0.949, "step": 4703 }, { "epoch": 0.6290451992511367, "grad_norm": 1.0939924716949463, "learning_rate": 1.8354584268210328e-05, "loss": 0.9371, "step": 4704 }, { "epoch": 0.6291789248462155, "grad_norm": 1.0594813823699951, "learning_rate": 1.835379076491088e-05, "loss": 0.9368, "step": 4705 }, { "epoch": 0.6293126504412945, "grad_norm": 1.1220465898513794, "learning_rate": 1.8352997087484657e-05, "loss": 0.9513, "step": 4706 }, { "epoch": 0.6294463760363733, "grad_norm": 1.0112025737762451, "learning_rate": 1.8352203235948202e-05, "loss": 0.9102, "step": 4707 }, { "epoch": 0.6295801016314523, "grad_norm": 1.080566644668579, "learning_rate": 1.8351409210318064e-05, "loss": 0.9121, "step": 4708 }, { "epoch": 0.6297138272265311, "grad_norm": 1.19144868850708, "learning_rate": 1.8350615010610796e-05, "loss": 0.9446, "step": 4709 }, { "epoch": 0.6298475528216101, "grad_norm": 1.1358686685562134, "learning_rate": 1.8349820636842944e-05, "loss": 0.9419, "step": 4710 }, { "epoch": 0.6299812784166889, "grad_norm": 1.1688398122787476, "learning_rate": 1.8349026089031072e-05, "loss": 0.9347, "step": 4711 }, { "epoch": 0.6301150040117679, "grad_norm": 1.1661232709884644, "learning_rate": 1.834823136719174e-05, "loss": 1.0436, "step": 4712 }, { "epoch": 0.6302487296068467, "grad_norm": 1.099495530128479, "learning_rate": 1.8347436471341514e-05, "loss": 0.8125, "step": 4713 }, { "epoch": 0.6303824552019256, "grad_norm": 1.0422277450561523, "learning_rate": 1.834664140149696e-05, "loss": 0.911, "step": 4714 }, { "epoch": 0.6305161807970046, "grad_norm": 1.060258150100708, "learning_rate": 1.8345846157674657e-05, "loss": 0.8364, "step": 4715 }, { "epoch": 0.6306499063920834, "grad_norm": 1.0634009838104248, "learning_rate": 1.8345050739891175e-05, "loss": 0.9344, "step": 4716 }, { "epoch": 0.6307836319871624, "grad_norm": 0.9452177286148071, "learning_rate": 1.8344255148163095e-05, "loss": 0.8351, "step": 4717 }, { "epoch": 0.6309173575822412, "grad_norm": 1.1992483139038086, "learning_rate": 1.8343459382507003e-05, "loss": 0.8849, "step": 4718 }, { "epoch": 0.6310510831773202, "grad_norm": 1.1165494918823242, "learning_rate": 1.834266344293948e-05, "loss": 0.9358, "step": 4719 }, { "epoch": 0.631184808772399, "grad_norm": 1.1300991773605347, "learning_rate": 1.8341867329477125e-05, "loss": 0.9112, "step": 4720 }, { "epoch": 0.631318534367478, "grad_norm": 1.1435790061950684, "learning_rate": 1.834107104213653e-05, "loss": 1.1174, "step": 4721 }, { "epoch": 0.6314522599625568, "grad_norm": 1.01833176612854, "learning_rate": 1.8340274580934284e-05, "loss": 0.9511, "step": 4722 }, { "epoch": 0.6315859855576357, "grad_norm": 1.0562607049942017, "learning_rate": 1.8339477945886998e-05, "loss": 0.9614, "step": 4723 }, { "epoch": 0.6317197111527146, "grad_norm": 1.1323667764663696, "learning_rate": 1.833868113701127e-05, "loss": 0.9349, "step": 4724 }, { "epoch": 0.6318534367477935, "grad_norm": 1.2900893688201904, "learning_rate": 1.833788415432372e-05, "loss": 1.084, "step": 4725 }, { "epoch": 0.6319871623428724, "grad_norm": 1.0599790811538696, "learning_rate": 1.8337086997840952e-05, "loss": 0.8973, "step": 4726 }, { "epoch": 0.6321208879379513, "grad_norm": 1.0545300245285034, "learning_rate": 1.833628966757958e-05, "loss": 0.8517, "step": 4727 }, { "epoch": 0.6322546135330303, "grad_norm": 1.0771454572677612, "learning_rate": 1.833549216355623e-05, "loss": 0.9529, "step": 4728 }, { "epoch": 0.6323883391281091, "grad_norm": 1.005878210067749, "learning_rate": 1.833469448578752e-05, "loss": 0.8878, "step": 4729 }, { "epoch": 0.6325220647231881, "grad_norm": 1.2105047702789307, "learning_rate": 1.833389663429008e-05, "loss": 0.9824, "step": 4730 }, { "epoch": 0.6326557903182669, "grad_norm": 1.0651311874389648, "learning_rate": 1.833309860908054e-05, "loss": 1.0532, "step": 4731 }, { "epoch": 0.6327895159133459, "grad_norm": 0.9635155200958252, "learning_rate": 1.833230041017553e-05, "loss": 0.9934, "step": 4732 }, { "epoch": 0.6329232415084247, "grad_norm": 1.1282401084899902, "learning_rate": 1.8331502037591696e-05, "loss": 0.8982, "step": 4733 }, { "epoch": 0.6330569671035036, "grad_norm": 1.0344957113265991, "learning_rate": 1.8330703491345668e-05, "loss": 0.9306, "step": 4734 }, { "epoch": 0.6331906926985825, "grad_norm": 0.9527806639671326, "learning_rate": 1.8329904771454105e-05, "loss": 0.8377, "step": 4735 }, { "epoch": 0.6333244182936614, "grad_norm": 1.1379767656326294, "learning_rate": 1.832910587793364e-05, "loss": 1.023, "step": 4736 }, { "epoch": 0.6334581438887403, "grad_norm": 0.9548843502998352, "learning_rate": 1.832830681080094e-05, "loss": 0.9354, "step": 4737 }, { "epoch": 0.6335918694838192, "grad_norm": 1.007220983505249, "learning_rate": 1.8327507570072648e-05, "loss": 0.9749, "step": 4738 }, { "epoch": 0.6337255950788981, "grad_norm": 1.145063042640686, "learning_rate": 1.8326708155765436e-05, "loss": 0.9314, "step": 4739 }, { "epoch": 0.633859320673977, "grad_norm": 1.093774437904358, "learning_rate": 1.8325908567895955e-05, "loss": 0.9659, "step": 4740 }, { "epoch": 0.633993046269056, "grad_norm": 1.2680492401123047, "learning_rate": 1.832510880648088e-05, "loss": 1.022, "step": 4741 }, { "epoch": 0.6341267718641348, "grad_norm": 0.9581674337387085, "learning_rate": 1.8324308871536877e-05, "loss": 0.9206, "step": 4742 }, { "epoch": 0.6342604974592136, "grad_norm": 1.139790654182434, "learning_rate": 1.832350876308062e-05, "loss": 0.9434, "step": 4743 }, { "epoch": 0.6343942230542926, "grad_norm": 1.0993587970733643, "learning_rate": 1.8322708481128787e-05, "loss": 1.1061, "step": 4744 }, { "epoch": 0.6345279486493715, "grad_norm": 0.9931357502937317, "learning_rate": 1.832190802569806e-05, "loss": 0.9053, "step": 4745 }, { "epoch": 0.6346616742444504, "grad_norm": 0.9859405159950256, "learning_rate": 1.8321107396805126e-05, "loss": 0.9323, "step": 4746 }, { "epoch": 0.6347953998395293, "grad_norm": 1.113785743713379, "learning_rate": 1.8320306594466667e-05, "loss": 0.9144, "step": 4747 }, { "epoch": 0.6349291254346082, "grad_norm": 1.0219230651855469, "learning_rate": 1.8319505618699384e-05, "loss": 0.8915, "step": 4748 }, { "epoch": 0.6350628510296871, "grad_norm": 1.0530736446380615, "learning_rate": 1.831870446951996e-05, "loss": 0.8833, "step": 4749 }, { "epoch": 0.635196576624766, "grad_norm": 1.1611578464508057, "learning_rate": 1.8317903146945106e-05, "loss": 0.961, "step": 4750 }, { "epoch": 0.6353303022198449, "grad_norm": 1.1257898807525635, "learning_rate": 1.831710165099152e-05, "loss": 1.0172, "step": 4751 }, { "epoch": 0.6354640278149237, "grad_norm": 1.1275643110275269, "learning_rate": 1.831629998167591e-05, "loss": 0.8632, "step": 4752 }, { "epoch": 0.6355977534100027, "grad_norm": 1.0739939212799072, "learning_rate": 1.8315498139014982e-05, "loss": 0.9498, "step": 4753 }, { "epoch": 0.6357314790050815, "grad_norm": 0.9668481945991516, "learning_rate": 1.8314696123025456e-05, "loss": 0.903, "step": 4754 }, { "epoch": 0.6358652046001605, "grad_norm": 1.1157217025756836, "learning_rate": 1.831389393372404e-05, "loss": 0.8521, "step": 4755 }, { "epoch": 0.6359989301952393, "grad_norm": 1.0479258298873901, "learning_rate": 1.8313091571127467e-05, "loss": 0.9637, "step": 4756 }, { "epoch": 0.6361326557903183, "grad_norm": 1.1885377168655396, "learning_rate": 1.8312289035252448e-05, "loss": 0.9181, "step": 4757 }, { "epoch": 0.6362663813853972, "grad_norm": 0.9766324758529663, "learning_rate": 1.8311486326115726e-05, "loss": 0.8511, "step": 4758 }, { "epoch": 0.6364001069804761, "grad_norm": 1.0711909532546997, "learning_rate": 1.8310683443734016e-05, "loss": 0.9584, "step": 4759 }, { "epoch": 0.636533832575555, "grad_norm": 1.1418178081512451, "learning_rate": 1.8309880388124067e-05, "loss": 0.9871, "step": 4760 }, { "epoch": 0.6366675581706338, "grad_norm": 0.9983953833580017, "learning_rate": 1.8309077159302612e-05, "loss": 0.9531, "step": 4761 }, { "epoch": 0.6368012837657128, "grad_norm": 1.1794579029083252, "learning_rate": 1.8308273757286396e-05, "loss": 1.0669, "step": 4762 }, { "epoch": 0.6369350093607916, "grad_norm": 1.1387255191802979, "learning_rate": 1.8307470182092163e-05, "loss": 0.907, "step": 4763 }, { "epoch": 0.6370687349558706, "grad_norm": 0.9812789559364319, "learning_rate": 1.8306666433736664e-05, "loss": 0.8724, "step": 4764 }, { "epoch": 0.6372024605509494, "grad_norm": 1.073772668838501, "learning_rate": 1.830586251223665e-05, "loss": 0.9547, "step": 4765 }, { "epoch": 0.6373361861460284, "grad_norm": 0.9252293109893799, "learning_rate": 1.830505841760888e-05, "loss": 0.8231, "step": 4766 }, { "epoch": 0.6374699117411072, "grad_norm": 1.2247083187103271, "learning_rate": 1.8304254149870114e-05, "loss": 1.0603, "step": 4767 }, { "epoch": 0.6376036373361862, "grad_norm": 1.1987895965576172, "learning_rate": 1.830344970903712e-05, "loss": 1.0693, "step": 4768 }, { "epoch": 0.637737362931265, "grad_norm": 1.0926916599273682, "learning_rate": 1.830264509512666e-05, "loss": 0.9995, "step": 4769 }, { "epoch": 0.6378710885263439, "grad_norm": 1.1352436542510986, "learning_rate": 1.8301840308155507e-05, "loss": 0.8883, "step": 4770 }, { "epoch": 0.6380048141214228, "grad_norm": 1.1419860124588013, "learning_rate": 1.830103534814044e-05, "loss": 0.9494, "step": 4771 }, { "epoch": 0.6381385397165017, "grad_norm": 1.1492091417312622, "learning_rate": 1.830023021509823e-05, "loss": 1.0767, "step": 4772 }, { "epoch": 0.6382722653115807, "grad_norm": 1.04586660861969, "learning_rate": 1.8299424909045665e-05, "loss": 0.9389, "step": 4773 }, { "epoch": 0.6384059909066595, "grad_norm": 1.0014731884002686, "learning_rate": 1.829861942999953e-05, "loss": 0.8989, "step": 4774 }, { "epoch": 0.6385397165017385, "grad_norm": 1.0885424613952637, "learning_rate": 1.8297813777976613e-05, "loss": 0.8818, "step": 4775 }, { "epoch": 0.6386734420968173, "grad_norm": 1.008792519569397, "learning_rate": 1.8297007952993713e-05, "loss": 0.8213, "step": 4776 }, { "epoch": 0.6388071676918963, "grad_norm": 1.1058796644210815, "learning_rate": 1.8296201955067614e-05, "loss": 0.9579, "step": 4777 }, { "epoch": 0.6389408932869751, "grad_norm": 0.9740918278694153, "learning_rate": 1.829539578421513e-05, "loss": 0.8771, "step": 4778 }, { "epoch": 0.6390746188820541, "grad_norm": 1.1766633987426758, "learning_rate": 1.8294589440453056e-05, "loss": 0.8778, "step": 4779 }, { "epoch": 0.6392083444771329, "grad_norm": 1.13731849193573, "learning_rate": 1.8293782923798203e-05, "loss": 0.9924, "step": 4780 }, { "epoch": 0.6393420700722118, "grad_norm": 1.1319187879562378, "learning_rate": 1.829297623426738e-05, "loss": 0.9942, "step": 4781 }, { "epoch": 0.6394757956672907, "grad_norm": 1.021061658859253, "learning_rate": 1.82921693718774e-05, "loss": 0.9521, "step": 4782 }, { "epoch": 0.6396095212623696, "grad_norm": 1.1099739074707031, "learning_rate": 1.8291362336645088e-05, "loss": 1.0728, "step": 4783 }, { "epoch": 0.6397432468574485, "grad_norm": 1.0654855966567993, "learning_rate": 1.8290555128587263e-05, "loss": 0.9691, "step": 4784 }, { "epoch": 0.6398769724525274, "grad_norm": 1.0759185552597046, "learning_rate": 1.8289747747720747e-05, "loss": 0.8999, "step": 4785 }, { "epoch": 0.6400106980476064, "grad_norm": 0.9984836578369141, "learning_rate": 1.8288940194062373e-05, "loss": 0.9299, "step": 4786 }, { "epoch": 0.6401444236426852, "grad_norm": 1.1172863245010376, "learning_rate": 1.8288132467628973e-05, "loss": 0.9654, "step": 4787 }, { "epoch": 0.6402781492377642, "grad_norm": 1.085368037223816, "learning_rate": 1.8287324568437383e-05, "loss": 0.9351, "step": 4788 }, { "epoch": 0.640411874832843, "grad_norm": 1.214124321937561, "learning_rate": 1.828651649650444e-05, "loss": 0.8691, "step": 4789 }, { "epoch": 0.6405456004279219, "grad_norm": 1.0281245708465576, "learning_rate": 1.8285708251846994e-05, "loss": 0.913, "step": 4790 }, { "epoch": 0.6406793260230008, "grad_norm": 1.0462946891784668, "learning_rate": 1.8284899834481883e-05, "loss": 0.897, "step": 4791 }, { "epoch": 0.6408130516180797, "grad_norm": 1.0465197563171387, "learning_rate": 1.8284091244425965e-05, "loss": 1.0796, "step": 4792 }, { "epoch": 0.6409467772131586, "grad_norm": 1.0862988233566284, "learning_rate": 1.8283282481696093e-05, "loss": 0.9644, "step": 4793 }, { "epoch": 0.6410805028082375, "grad_norm": 0.9503269791603088, "learning_rate": 1.828247354630912e-05, "loss": 0.7391, "step": 4794 }, { "epoch": 0.6412142284033164, "grad_norm": 0.9751207828521729, "learning_rate": 1.8281664438281918e-05, "loss": 0.8947, "step": 4795 }, { "epoch": 0.6413479539983953, "grad_norm": 0.9611235857009888, "learning_rate": 1.8280855157631337e-05, "loss": 0.773, "step": 4796 }, { "epoch": 0.6414816795934742, "grad_norm": 0.935499906539917, "learning_rate": 1.8280045704374263e-05, "loss": 0.8314, "step": 4797 }, { "epoch": 0.6416154051885531, "grad_norm": 1.11974036693573, "learning_rate": 1.8279236078527555e-05, "loss": 1.0479, "step": 4798 }, { "epoch": 0.6417491307836319, "grad_norm": 1.0581741333007812, "learning_rate": 1.8278426280108092e-05, "loss": 0.9767, "step": 4799 }, { "epoch": 0.6418828563787109, "grad_norm": 1.095953106880188, "learning_rate": 1.8277616309132758e-05, "loss": 1.1055, "step": 4800 }, { "epoch": 0.6420165819737897, "grad_norm": 1.1555147171020508, "learning_rate": 1.8276806165618432e-05, "loss": 0.9283, "step": 4801 }, { "epoch": 0.6421503075688687, "grad_norm": 1.1237616539001465, "learning_rate": 1.8275995849582e-05, "loss": 0.9517, "step": 4802 }, { "epoch": 0.6422840331639476, "grad_norm": 1.1533702611923218, "learning_rate": 1.8275185361040357e-05, "loss": 0.8827, "step": 4803 }, { "epoch": 0.6424177587590265, "grad_norm": 1.1576662063598633, "learning_rate": 1.8274374700010387e-05, "loss": 0.9852, "step": 4804 }, { "epoch": 0.6425514843541054, "grad_norm": 1.0092716217041016, "learning_rate": 1.8273563866509e-05, "loss": 0.8786, "step": 4805 }, { "epoch": 0.6426852099491843, "grad_norm": 1.0286104679107666, "learning_rate": 1.8272752860553088e-05, "loss": 0.9335, "step": 4806 }, { "epoch": 0.6428189355442632, "grad_norm": 1.1145694255828857, "learning_rate": 1.8271941682159562e-05, "loss": 1.0152, "step": 4807 }, { "epoch": 0.642952661139342, "grad_norm": 1.2004724740982056, "learning_rate": 1.8271130331345324e-05, "loss": 0.9799, "step": 4808 }, { "epoch": 0.643086386734421, "grad_norm": 1.161144733428955, "learning_rate": 1.827031880812729e-05, "loss": 0.9336, "step": 4809 }, { "epoch": 0.6432201123294998, "grad_norm": 1.011474370956421, "learning_rate": 1.8269507112522375e-05, "loss": 0.9388, "step": 4810 }, { "epoch": 0.6433538379245788, "grad_norm": 1.0157440900802612, "learning_rate": 1.82686952445475e-05, "loss": 0.9514, "step": 4811 }, { "epoch": 0.6434875635196576, "grad_norm": 1.1161648035049438, "learning_rate": 1.826788320421958e-05, "loss": 0.8761, "step": 4812 }, { "epoch": 0.6436212891147366, "grad_norm": 1.0362584590911865, "learning_rate": 1.8267070991555546e-05, "loss": 0.9569, "step": 4813 }, { "epoch": 0.6437550147098154, "grad_norm": 1.1404283046722412, "learning_rate": 1.826625860657233e-05, "loss": 0.9438, "step": 4814 }, { "epoch": 0.6438887403048944, "grad_norm": 0.9849143028259277, "learning_rate": 1.8265446049286864e-05, "loss": 0.883, "step": 4815 }, { "epoch": 0.6440224658999733, "grad_norm": 1.10081946849823, "learning_rate": 1.8264633319716084e-05, "loss": 0.8855, "step": 4816 }, { "epoch": 0.6441561914950521, "grad_norm": 1.2425425052642822, "learning_rate": 1.8263820417876926e-05, "loss": 1.0352, "step": 4817 }, { "epoch": 0.6442899170901311, "grad_norm": 0.9970521330833435, "learning_rate": 1.8263007343786347e-05, "loss": 1.0226, "step": 4818 }, { "epoch": 0.6444236426852099, "grad_norm": 0.9919441938400269, "learning_rate": 1.8262194097461284e-05, "loss": 0.9416, "step": 4819 }, { "epoch": 0.6445573682802889, "grad_norm": 1.0043584108352661, "learning_rate": 1.826138067891869e-05, "loss": 0.9224, "step": 4820 }, { "epoch": 0.6446910938753677, "grad_norm": 0.9892141819000244, "learning_rate": 1.826056708817552e-05, "loss": 0.9419, "step": 4821 }, { "epoch": 0.6448248194704467, "grad_norm": 0.9406230449676514, "learning_rate": 1.825975332524873e-05, "loss": 0.9413, "step": 4822 }, { "epoch": 0.6449585450655255, "grad_norm": 1.1169888973236084, "learning_rate": 1.8258939390155294e-05, "loss": 1.0176, "step": 4823 }, { "epoch": 0.6450922706606045, "grad_norm": 1.1389446258544922, "learning_rate": 1.8258125282912168e-05, "loss": 0.9899, "step": 4824 }, { "epoch": 0.6452259962556833, "grad_norm": 0.9809902906417847, "learning_rate": 1.8257311003536317e-05, "loss": 0.8799, "step": 4825 }, { "epoch": 0.6453597218507623, "grad_norm": 1.0323007106781006, "learning_rate": 1.8256496552044724e-05, "loss": 1.05, "step": 4826 }, { "epoch": 0.6454934474458411, "grad_norm": 1.0785446166992188, "learning_rate": 1.825568192845436e-05, "loss": 0.9471, "step": 4827 }, { "epoch": 0.64562717304092, "grad_norm": 0.957306444644928, "learning_rate": 1.8254867132782203e-05, "loss": 0.8708, "step": 4828 }, { "epoch": 0.645760898635999, "grad_norm": 1.0840375423431396, "learning_rate": 1.8254052165045245e-05, "loss": 0.9732, "step": 4829 }, { "epoch": 0.6458946242310778, "grad_norm": 1.1256452798843384, "learning_rate": 1.8253237025260465e-05, "loss": 0.9341, "step": 4830 }, { "epoch": 0.6460283498261568, "grad_norm": 1.032884120941162, "learning_rate": 1.8252421713444856e-05, "loss": 1.0918, "step": 4831 }, { "epoch": 0.6461620754212356, "grad_norm": 1.1249043941497803, "learning_rate": 1.8251606229615416e-05, "loss": 0.9002, "step": 4832 }, { "epoch": 0.6462958010163146, "grad_norm": 1.1865218877792358, "learning_rate": 1.8250790573789135e-05, "loss": 0.9437, "step": 4833 }, { "epoch": 0.6464295266113934, "grad_norm": 1.162433385848999, "learning_rate": 1.8249974745983023e-05, "loss": 0.9614, "step": 4834 }, { "epoch": 0.6465632522064724, "grad_norm": 1.1015671491622925, "learning_rate": 1.8249158746214085e-05, "loss": 0.8857, "step": 4835 }, { "epoch": 0.6466969778015512, "grad_norm": 1.0929932594299316, "learning_rate": 1.824834257449932e-05, "loss": 0.9356, "step": 4836 }, { "epoch": 0.6468307033966301, "grad_norm": 1.180952787399292, "learning_rate": 1.824752623085575e-05, "loss": 0.9681, "step": 4837 }, { "epoch": 0.646964428991709, "grad_norm": 1.0900529623031616, "learning_rate": 1.824670971530039e-05, "loss": 1.019, "step": 4838 }, { "epoch": 0.6470981545867879, "grad_norm": 1.1713682413101196, "learning_rate": 1.8245893027850255e-05, "loss": 1.0454, "step": 4839 }, { "epoch": 0.6472318801818668, "grad_norm": 1.0716418027877808, "learning_rate": 1.824507616852237e-05, "loss": 0.9767, "step": 4840 }, { "epoch": 0.6473656057769457, "grad_norm": 1.1568701267242432, "learning_rate": 1.8244259137333763e-05, "loss": 0.9264, "step": 4841 }, { "epoch": 0.6474993313720246, "grad_norm": 0.9616028070449829, "learning_rate": 1.8243441934301462e-05, "loss": 0.8802, "step": 4842 }, { "epoch": 0.6476330569671035, "grad_norm": 1.1977514028549194, "learning_rate": 1.82426245594425e-05, "loss": 1.0983, "step": 4843 }, { "epoch": 0.6477667825621825, "grad_norm": 1.1566635370254517, "learning_rate": 1.824180701277392e-05, "loss": 0.976, "step": 4844 }, { "epoch": 0.6479005081572613, "grad_norm": 1.0821622610092163, "learning_rate": 1.8240989294312758e-05, "loss": 0.8601, "step": 4845 }, { "epoch": 0.6480342337523401, "grad_norm": 1.052411675453186, "learning_rate": 1.824017140407606e-05, "loss": 0.903, "step": 4846 }, { "epoch": 0.6481679593474191, "grad_norm": 1.0975204706192017, "learning_rate": 1.8239353342080874e-05, "loss": 1.0223, "step": 4847 }, { "epoch": 0.648301684942498, "grad_norm": 1.213929533958435, "learning_rate": 1.8238535108344253e-05, "loss": 1.0235, "step": 4848 }, { "epoch": 0.6484354105375769, "grad_norm": 1.0271984338760376, "learning_rate": 1.823771670288325e-05, "loss": 0.988, "step": 4849 }, { "epoch": 0.6485691361326558, "grad_norm": 1.079950213432312, "learning_rate": 1.8236898125714925e-05, "loss": 0.8478, "step": 4850 }, { "epoch": 0.6487028617277347, "grad_norm": 1.2224106788635254, "learning_rate": 1.823607937685634e-05, "loss": 0.9977, "step": 4851 }, { "epoch": 0.6488365873228136, "grad_norm": 1.0794486999511719, "learning_rate": 1.8235260456324562e-05, "loss": 0.9593, "step": 4852 }, { "epoch": 0.6489703129178925, "grad_norm": 1.0531206130981445, "learning_rate": 1.823444136413666e-05, "loss": 1.0668, "step": 4853 }, { "epoch": 0.6491040385129714, "grad_norm": 1.0323549509048462, "learning_rate": 1.8233622100309705e-05, "loss": 0.9464, "step": 4854 }, { "epoch": 0.6492377641080502, "grad_norm": 1.0065720081329346, "learning_rate": 1.8232802664860783e-05, "loss": 0.9281, "step": 4855 }, { "epoch": 0.6493714897031292, "grad_norm": 1.1893373727798462, "learning_rate": 1.823198305780696e-05, "loss": 1.0031, "step": 4856 }, { "epoch": 0.649505215298208, "grad_norm": 1.0254460573196411, "learning_rate": 1.823116327916533e-05, "loss": 0.8221, "step": 4857 }, { "epoch": 0.649638940893287, "grad_norm": 1.0601791143417358, "learning_rate": 1.823034332895298e-05, "loss": 0.901, "step": 4858 }, { "epoch": 0.6497726664883658, "grad_norm": 1.052120566368103, "learning_rate": 1.8229523207186995e-05, "loss": 0.9601, "step": 4859 }, { "epoch": 0.6499063920834448, "grad_norm": 1.0642207860946655, "learning_rate": 1.8228702913884476e-05, "loss": 0.9759, "step": 4860 }, { "epoch": 0.6500401176785237, "grad_norm": 1.2952349185943604, "learning_rate": 1.8227882449062516e-05, "loss": 1.052, "step": 4861 }, { "epoch": 0.6501738432736026, "grad_norm": 1.1930335760116577, "learning_rate": 1.8227061812738223e-05, "loss": 1.0181, "step": 4862 }, { "epoch": 0.6503075688686815, "grad_norm": 1.1082086563110352, "learning_rate": 1.82262410049287e-05, "loss": 0.9708, "step": 4863 }, { "epoch": 0.6504412944637603, "grad_norm": 1.0963523387908936, "learning_rate": 1.822542002565105e-05, "loss": 0.9398, "step": 4864 }, { "epoch": 0.6505750200588393, "grad_norm": 1.01266610622406, "learning_rate": 1.822459887492239e-05, "loss": 0.9298, "step": 4865 }, { "epoch": 0.6507087456539181, "grad_norm": 1.793610692024231, "learning_rate": 1.822377755275984e-05, "loss": 0.9023, "step": 4866 }, { "epoch": 0.6508424712489971, "grad_norm": 1.0492956638336182, "learning_rate": 1.822295605918052e-05, "loss": 0.9671, "step": 4867 }, { "epoch": 0.6509761968440759, "grad_norm": 1.0646440982818604, "learning_rate": 1.8222134394201543e-05, "loss": 0.9851, "step": 4868 }, { "epoch": 0.6511099224391549, "grad_norm": 1.0914469957351685, "learning_rate": 1.8221312557840047e-05, "loss": 1.0447, "step": 4869 }, { "epoch": 0.6512436480342337, "grad_norm": 1.0594173669815063, "learning_rate": 1.8220490550113153e-05, "loss": 1.0577, "step": 4870 }, { "epoch": 0.6513773736293127, "grad_norm": 1.0965802669525146, "learning_rate": 1.8219668371038002e-05, "loss": 1.0581, "step": 4871 }, { "epoch": 0.6515110992243915, "grad_norm": 0.9848311543464661, "learning_rate": 1.8218846020631725e-05, "loss": 0.8925, "step": 4872 }, { "epoch": 0.6516448248194704, "grad_norm": 1.1393353939056396, "learning_rate": 1.8218023498911476e-05, "loss": 0.9456, "step": 4873 }, { "epoch": 0.6517785504145494, "grad_norm": 1.1741771697998047, "learning_rate": 1.8217200805894382e-05, "loss": 0.9856, "step": 4874 }, { "epoch": 0.6519122760096282, "grad_norm": 1.1804367303848267, "learning_rate": 1.8216377941597607e-05, "loss": 0.8658, "step": 4875 }, { "epoch": 0.6520460016047072, "grad_norm": 1.0566893815994263, "learning_rate": 1.8215554906038292e-05, "loss": 0.9917, "step": 4876 }, { "epoch": 0.652179727199786, "grad_norm": 1.1456278562545776, "learning_rate": 1.8214731699233597e-05, "loss": 1.0047, "step": 4877 }, { "epoch": 0.652313452794865, "grad_norm": 1.0901113748550415, "learning_rate": 1.821390832120068e-05, "loss": 0.9165, "step": 4878 }, { "epoch": 0.6524471783899438, "grad_norm": 1.0466879606246948, "learning_rate": 1.8213084771956707e-05, "loss": 0.9102, "step": 4879 }, { "epoch": 0.6525809039850228, "grad_norm": 1.1013215780258179, "learning_rate": 1.821226105151884e-05, "loss": 1.0052, "step": 4880 }, { "epoch": 0.6527146295801016, "grad_norm": 1.161557912826538, "learning_rate": 1.821143715990425e-05, "loss": 1.0318, "step": 4881 }, { "epoch": 0.6528483551751806, "grad_norm": 1.0900743007659912, "learning_rate": 1.821061309713011e-05, "loss": 0.894, "step": 4882 }, { "epoch": 0.6529820807702594, "grad_norm": 1.0907121896743774, "learning_rate": 1.8209788863213594e-05, "loss": 0.9536, "step": 4883 }, { "epoch": 0.6531158063653383, "grad_norm": 0.998594343662262, "learning_rate": 1.8208964458171884e-05, "loss": 0.9984, "step": 4884 }, { "epoch": 0.6532495319604172, "grad_norm": 1.1376252174377441, "learning_rate": 1.820813988202217e-05, "loss": 0.955, "step": 4885 }, { "epoch": 0.6533832575554961, "grad_norm": 1.083677887916565, "learning_rate": 1.8207315134781633e-05, "loss": 0.9364, "step": 4886 }, { "epoch": 0.653516983150575, "grad_norm": 1.0465039014816284, "learning_rate": 1.8206490216467464e-05, "loss": 0.8135, "step": 4887 }, { "epoch": 0.6536507087456539, "grad_norm": 0.9878882765769958, "learning_rate": 1.8205665127096855e-05, "loss": 0.8827, "step": 4888 }, { "epoch": 0.6537844343407329, "grad_norm": 1.1272354125976562, "learning_rate": 1.8204839866687014e-05, "loss": 1.0562, "step": 4889 }, { "epoch": 0.6539181599358117, "grad_norm": 1.004042387008667, "learning_rate": 1.8204014435255136e-05, "loss": 0.8929, "step": 4890 }, { "epoch": 0.6540518855308907, "grad_norm": 0.9849901795387268, "learning_rate": 1.820318883281843e-05, "loss": 0.9727, "step": 4891 }, { "epoch": 0.6541856111259695, "grad_norm": 1.2953550815582275, "learning_rate": 1.82023630593941e-05, "loss": 0.9918, "step": 4892 }, { "epoch": 0.6543193367210484, "grad_norm": 1.1145843267440796, "learning_rate": 1.820153711499936e-05, "loss": 0.9013, "step": 4893 }, { "epoch": 0.6544530623161273, "grad_norm": 1.1399295330047607, "learning_rate": 1.820071099965143e-05, "loss": 1.0358, "step": 4894 }, { "epoch": 0.6545867879112062, "grad_norm": 1.1147061586380005, "learning_rate": 1.8199884713367524e-05, "loss": 1.0702, "step": 4895 }, { "epoch": 0.6547205135062851, "grad_norm": 1.0737214088439941, "learning_rate": 1.8199058256164866e-05, "loss": 0.9767, "step": 4896 }, { "epoch": 0.654854239101364, "grad_norm": 1.1452678442001343, "learning_rate": 1.8198231628060686e-05, "loss": 0.9766, "step": 4897 }, { "epoch": 0.6549879646964429, "grad_norm": 1.0882238149642944, "learning_rate": 1.8197404829072214e-05, "loss": 1.0399, "step": 4898 }, { "epoch": 0.6551216902915218, "grad_norm": 1.237720251083374, "learning_rate": 1.819657785921668e-05, "loss": 0.9628, "step": 4899 }, { "epoch": 0.6552554158866007, "grad_norm": 1.051042914390564, "learning_rate": 1.8195750718511326e-05, "loss": 0.9597, "step": 4900 }, { "epoch": 0.6553891414816796, "grad_norm": 1.1524134874343872, "learning_rate": 1.819492340697339e-05, "loss": 0.9633, "step": 4901 }, { "epoch": 0.6555228670767584, "grad_norm": 1.1068754196166992, "learning_rate": 1.8194095924620114e-05, "loss": 0.8818, "step": 4902 }, { "epoch": 0.6556565926718374, "grad_norm": 1.1498146057128906, "learning_rate": 1.8193268271468754e-05, "loss": 1.0163, "step": 4903 }, { "epoch": 0.6557903182669163, "grad_norm": 1.1875187158584595, "learning_rate": 1.8192440447536554e-05, "loss": 1.0938, "step": 4904 }, { "epoch": 0.6559240438619952, "grad_norm": 0.9934622645378113, "learning_rate": 1.8191612452840775e-05, "loss": 0.8908, "step": 4905 }, { "epoch": 0.6560577694570741, "grad_norm": 1.1322556734085083, "learning_rate": 1.819078428739867e-05, "loss": 0.9663, "step": 4906 }, { "epoch": 0.656191495052153, "grad_norm": 1.1673023700714111, "learning_rate": 1.8189955951227504e-05, "loss": 0.8333, "step": 4907 }, { "epoch": 0.6563252206472319, "grad_norm": 1.0496773719787598, "learning_rate": 1.818912744434455e-05, "loss": 1.0686, "step": 4908 }, { "epoch": 0.6564589462423108, "grad_norm": 0.9572871327400208, "learning_rate": 1.818829876676706e-05, "loss": 0.8953, "step": 4909 }, { "epoch": 0.6565926718373897, "grad_norm": 1.0872960090637207, "learning_rate": 1.8187469918512323e-05, "loss": 0.874, "step": 4910 }, { "epoch": 0.6567263974324685, "grad_norm": 1.0465223789215088, "learning_rate": 1.8186640899597612e-05, "loss": 0.8465, "step": 4911 }, { "epoch": 0.6568601230275475, "grad_norm": 1.1264820098876953, "learning_rate": 1.8185811710040203e-05, "loss": 1.0422, "step": 4912 }, { "epoch": 0.6569938486226263, "grad_norm": 1.042545199394226, "learning_rate": 1.8184982349857384e-05, "loss": 0.9631, "step": 4913 }, { "epoch": 0.6571275742177053, "grad_norm": 1.063456416130066, "learning_rate": 1.8184152819066437e-05, "loss": 0.9864, "step": 4914 }, { "epoch": 0.6572612998127841, "grad_norm": 1.0736908912658691, "learning_rate": 1.8183323117684656e-05, "loss": 0.7838, "step": 4915 }, { "epoch": 0.6573950254078631, "grad_norm": 1.1113524436950684, "learning_rate": 1.818249324572934e-05, "loss": 0.9997, "step": 4916 }, { "epoch": 0.657528751002942, "grad_norm": 1.0285409688949585, "learning_rate": 1.8181663203217774e-05, "loss": 0.9389, "step": 4917 }, { "epoch": 0.6576624765980209, "grad_norm": 1.1099438667297363, "learning_rate": 1.8180832990167273e-05, "loss": 0.9968, "step": 4918 }, { "epoch": 0.6577962021930998, "grad_norm": 0.9810138940811157, "learning_rate": 1.8180002606595135e-05, "loss": 1.0279, "step": 4919 }, { "epoch": 0.6579299277881786, "grad_norm": 0.9956666827201843, "learning_rate": 1.817917205251867e-05, "loss": 1.0663, "step": 4920 }, { "epoch": 0.6580636533832576, "grad_norm": 1.0833066701889038, "learning_rate": 1.8178341327955193e-05, "loss": 0.9678, "step": 4921 }, { "epoch": 0.6581973789783364, "grad_norm": 1.0350220203399658, "learning_rate": 1.8177510432922013e-05, "loss": 1.0245, "step": 4922 }, { "epoch": 0.6583311045734154, "grad_norm": 1.1310279369354248, "learning_rate": 1.8176679367436453e-05, "loss": 1.0295, "step": 4923 }, { "epoch": 0.6584648301684942, "grad_norm": 0.9682749509811401, "learning_rate": 1.817584813151584e-05, "loss": 0.8932, "step": 4924 }, { "epoch": 0.6585985557635732, "grad_norm": 1.152813196182251, "learning_rate": 1.817501672517749e-05, "loss": 0.9556, "step": 4925 }, { "epoch": 0.658732281358652, "grad_norm": 1.0485787391662598, "learning_rate": 1.8174185148438745e-05, "loss": 0.9174, "step": 4926 }, { "epoch": 0.658866006953731, "grad_norm": 1.0092227458953857, "learning_rate": 1.817335340131693e-05, "loss": 0.9915, "step": 4927 }, { "epoch": 0.6589997325488098, "grad_norm": 1.175471544265747, "learning_rate": 1.8172521483829384e-05, "loss": 0.9766, "step": 4928 }, { "epoch": 0.6591334581438888, "grad_norm": 1.0688331127166748, "learning_rate": 1.8171689395993447e-05, "loss": 0.9493, "step": 4929 }, { "epoch": 0.6592671837389676, "grad_norm": 0.9807957410812378, "learning_rate": 1.8170857137826465e-05, "loss": 0.8672, "step": 4930 }, { "epoch": 0.6594009093340465, "grad_norm": 1.101035714149475, "learning_rate": 1.8170024709345786e-05, "loss": 1.0332, "step": 4931 }, { "epoch": 0.6595346349291255, "grad_norm": 1.2423990964889526, "learning_rate": 1.816919211056876e-05, "loss": 1.0438, "step": 4932 }, { "epoch": 0.6596683605242043, "grad_norm": 1.0998975038528442, "learning_rate": 1.816835934151274e-05, "loss": 0.9625, "step": 4933 }, { "epoch": 0.6598020861192833, "grad_norm": 1.059422254562378, "learning_rate": 1.8167526402195085e-05, "loss": 0.9311, "step": 4934 }, { "epoch": 0.6599358117143621, "grad_norm": 0.9626438617706299, "learning_rate": 1.816669329263316e-05, "loss": 0.9523, "step": 4935 }, { "epoch": 0.6600695373094411, "grad_norm": 1.1004456281661987, "learning_rate": 1.8165860012844325e-05, "loss": 0.9433, "step": 4936 }, { "epoch": 0.6602032629045199, "grad_norm": 1.078370451927185, "learning_rate": 1.8165026562845954e-05, "loss": 0.998, "step": 4937 }, { "epoch": 0.6603369884995989, "grad_norm": 1.0814099311828613, "learning_rate": 1.8164192942655418e-05, "loss": 0.9913, "step": 4938 }, { "epoch": 0.6604707140946777, "grad_norm": 1.044791579246521, "learning_rate": 1.816335915229009e-05, "loss": 0.9861, "step": 4939 }, { "epoch": 0.6606044396897566, "grad_norm": 1.0157090425491333, "learning_rate": 1.8162525191767354e-05, "loss": 0.945, "step": 4940 }, { "epoch": 0.6607381652848355, "grad_norm": 1.212355613708496, "learning_rate": 1.816169106110459e-05, "loss": 0.9928, "step": 4941 }, { "epoch": 0.6608718908799144, "grad_norm": 1.040511131286621, "learning_rate": 1.8160856760319186e-05, "loss": 1.0804, "step": 4942 }, { "epoch": 0.6610056164749933, "grad_norm": 1.191188097000122, "learning_rate": 1.816002228942853e-05, "loss": 0.9962, "step": 4943 }, { "epoch": 0.6611393420700722, "grad_norm": 1.2231699228286743, "learning_rate": 1.815918764845002e-05, "loss": 1.2826, "step": 4944 }, { "epoch": 0.6612730676651511, "grad_norm": 1.021012783050537, "learning_rate": 1.8158352837401052e-05, "loss": 0.9311, "step": 4945 }, { "epoch": 0.66140679326023, "grad_norm": 1.165655255317688, "learning_rate": 1.8157517856299024e-05, "loss": 0.9375, "step": 4946 }, { "epoch": 0.661540518855309, "grad_norm": 1.1837654113769531, "learning_rate": 1.815668270516134e-05, "loss": 0.9278, "step": 4947 }, { "epoch": 0.6616742444503878, "grad_norm": 1.0211386680603027, "learning_rate": 1.8155847384005417e-05, "loss": 0.8607, "step": 4948 }, { "epoch": 0.6618079700454667, "grad_norm": 1.158022403717041, "learning_rate": 1.8155011892848656e-05, "loss": 0.9783, "step": 4949 }, { "epoch": 0.6619416956405456, "grad_norm": 1.0513328313827515, "learning_rate": 1.8154176231708472e-05, "loss": 0.9936, "step": 4950 }, { "epoch": 0.6620754212356245, "grad_norm": 1.0957874059677124, "learning_rate": 1.815334040060229e-05, "loss": 0.8866, "step": 4951 }, { "epoch": 0.6622091468307034, "grad_norm": 1.163976788520813, "learning_rate": 1.815250439954753e-05, "loss": 1.0454, "step": 4952 }, { "epoch": 0.6623428724257823, "grad_norm": 0.9470677375793457, "learning_rate": 1.8151668228561616e-05, "loss": 0.8984, "step": 4953 }, { "epoch": 0.6624765980208612, "grad_norm": 1.0575108528137207, "learning_rate": 1.815083188766198e-05, "loss": 0.9072, "step": 4954 }, { "epoch": 0.6626103236159401, "grad_norm": 1.243083119392395, "learning_rate": 1.814999537686605e-05, "loss": 1.1552, "step": 4955 }, { "epoch": 0.662744049211019, "grad_norm": 1.0193355083465576, "learning_rate": 1.8149158696191268e-05, "loss": 0.8107, "step": 4956 }, { "epoch": 0.6628777748060979, "grad_norm": 0.9294533133506775, "learning_rate": 1.8148321845655066e-05, "loss": 0.8147, "step": 4957 }, { "epoch": 0.6630115004011767, "grad_norm": 0.9704387187957764, "learning_rate": 1.8147484825274895e-05, "loss": 0.8627, "step": 4958 }, { "epoch": 0.6631452259962557, "grad_norm": 1.010780930519104, "learning_rate": 1.81466476350682e-05, "loss": 0.8921, "step": 4959 }, { "epoch": 0.6632789515913345, "grad_norm": 1.1641600131988525, "learning_rate": 1.814581027505243e-05, "loss": 0.8169, "step": 4960 }, { "epoch": 0.6634126771864135, "grad_norm": 1.101241946220398, "learning_rate": 1.814497274524504e-05, "loss": 0.8898, "step": 4961 }, { "epoch": 0.6635464027814924, "grad_norm": 1.1946091651916504, "learning_rate": 1.8144135045663486e-05, "loss": 0.9805, "step": 4962 }, { "epoch": 0.6636801283765713, "grad_norm": 1.1613874435424805, "learning_rate": 1.814329717632523e-05, "loss": 0.9625, "step": 4963 }, { "epoch": 0.6638138539716502, "grad_norm": 1.202282428741455, "learning_rate": 1.814245913724774e-05, "loss": 1.1162, "step": 4964 }, { "epoch": 0.6639475795667291, "grad_norm": 1.077477216720581, "learning_rate": 1.8141620928448474e-05, "loss": 0.9634, "step": 4965 }, { "epoch": 0.664081305161808, "grad_norm": 1.1463258266448975, "learning_rate": 1.8140782549944915e-05, "loss": 0.977, "step": 4966 }, { "epoch": 0.6642150307568868, "grad_norm": 0.9715328812599182, "learning_rate": 1.8139944001754533e-05, "loss": 0.88, "step": 4967 }, { "epoch": 0.6643487563519658, "grad_norm": 1.2045345306396484, "learning_rate": 1.813910528389481e-05, "loss": 0.9837, "step": 4968 }, { "epoch": 0.6644824819470446, "grad_norm": 1.047640085220337, "learning_rate": 1.8138266396383222e-05, "loss": 1.0145, "step": 4969 }, { "epoch": 0.6646162075421236, "grad_norm": 1.0173547267913818, "learning_rate": 1.813742733923726e-05, "loss": 0.8953, "step": 4970 }, { "epoch": 0.6647499331372024, "grad_norm": 1.2930530309677124, "learning_rate": 1.813658811247441e-05, "loss": 0.9198, "step": 4971 }, { "epoch": 0.6648836587322814, "grad_norm": 1.1037321090698242, "learning_rate": 1.8135748716112168e-05, "loss": 0.9414, "step": 4972 }, { "epoch": 0.6650173843273602, "grad_norm": 1.1478307247161865, "learning_rate": 1.8134909150168028e-05, "loss": 0.9024, "step": 4973 }, { "epoch": 0.6651511099224392, "grad_norm": 1.0730478763580322, "learning_rate": 1.8134069414659496e-05, "loss": 0.8417, "step": 4974 }, { "epoch": 0.665284835517518, "grad_norm": 1.0726128816604614, "learning_rate": 1.813322950960406e-05, "loss": 1.0292, "step": 4975 }, { "epoch": 0.6654185611125969, "grad_norm": 1.0035371780395508, "learning_rate": 1.8132389435019248e-05, "loss": 0.9956, "step": 4976 }, { "epoch": 0.6655522867076759, "grad_norm": 1.1524064540863037, "learning_rate": 1.8131549190922556e-05, "loss": 0.8932, "step": 4977 }, { "epoch": 0.6656860123027547, "grad_norm": 1.0357332229614258, "learning_rate": 1.81307087773315e-05, "loss": 0.9171, "step": 4978 }, { "epoch": 0.6658197378978337, "grad_norm": 1.0936928987503052, "learning_rate": 1.81298681942636e-05, "loss": 1.0408, "step": 4979 }, { "epoch": 0.6659534634929125, "grad_norm": 1.0289288759231567, "learning_rate": 1.8129027441736382e-05, "loss": 0.9297, "step": 4980 }, { "epoch": 0.6660871890879915, "grad_norm": 1.031346321105957, "learning_rate": 1.8128186519767364e-05, "loss": 0.9367, "step": 4981 }, { "epoch": 0.6662209146830703, "grad_norm": 1.0336720943450928, "learning_rate": 1.8127345428374074e-05, "loss": 1.0336, "step": 4982 }, { "epoch": 0.6663546402781493, "grad_norm": 0.9850664138793945, "learning_rate": 1.8126504167574045e-05, "loss": 0.9371, "step": 4983 }, { "epoch": 0.6664883658732281, "grad_norm": 1.029054880142212, "learning_rate": 1.8125662737384814e-05, "loss": 0.9669, "step": 4984 }, { "epoch": 0.6666220914683071, "grad_norm": 1.0611985921859741, "learning_rate": 1.812482113782392e-05, "loss": 1.0181, "step": 4985 }, { "epoch": 0.6667558170633859, "grad_norm": 1.0016247034072876, "learning_rate": 1.81239793689089e-05, "loss": 0.9778, "step": 4986 }, { "epoch": 0.6668895426584648, "grad_norm": 1.0768470764160156, "learning_rate": 1.8123137430657308e-05, "loss": 0.8778, "step": 4987 }, { "epoch": 0.6670232682535437, "grad_norm": 1.0309611558914185, "learning_rate": 1.8122295323086688e-05, "loss": 0.9919, "step": 4988 }, { "epoch": 0.6671569938486226, "grad_norm": 1.0286513566970825, "learning_rate": 1.8121453046214593e-05, "loss": 0.8895, "step": 4989 }, { "epoch": 0.6672907194437016, "grad_norm": 1.0100020170211792, "learning_rate": 1.8120610600058582e-05, "loss": 0.8965, "step": 4990 }, { "epoch": 0.6674244450387804, "grad_norm": 1.101260781288147, "learning_rate": 1.8119767984636213e-05, "loss": 1.0634, "step": 4991 }, { "epoch": 0.6675581706338594, "grad_norm": 0.9628750681877136, "learning_rate": 1.811892519996505e-05, "loss": 0.8216, "step": 4992 }, { "epoch": 0.6676918962289382, "grad_norm": 1.0571770668029785, "learning_rate": 1.8118082246062657e-05, "loss": 0.9784, "step": 4993 }, { "epoch": 0.6678256218240172, "grad_norm": 1.1104413270950317, "learning_rate": 1.8117239122946615e-05, "loss": 0.9442, "step": 4994 }, { "epoch": 0.667959347419096, "grad_norm": 1.0943197011947632, "learning_rate": 1.8116395830634485e-05, "loss": 1.0236, "step": 4995 }, { "epoch": 0.6680930730141749, "grad_norm": 0.9976595044136047, "learning_rate": 1.8115552369143855e-05, "loss": 0.9944, "step": 4996 }, { "epoch": 0.6682267986092538, "grad_norm": 1.1618831157684326, "learning_rate": 1.81147087384923e-05, "loss": 1.0038, "step": 4997 }, { "epoch": 0.6683605242043327, "grad_norm": 1.1059714555740356, "learning_rate": 1.81138649386974e-05, "loss": 0.9281, "step": 4998 }, { "epoch": 0.6684942497994116, "grad_norm": 0.9660097360610962, "learning_rate": 1.8113020969776758e-05, "loss": 0.888, "step": 4999 }, { "epoch": 0.6686279753944905, "grad_norm": 1.064026117324829, "learning_rate": 1.8112176831747953e-05, "loss": 1.0256, "step": 5000 }, { "epoch": 0.6687617009895694, "grad_norm": 0.9980587959289551, "learning_rate": 1.8111332524628587e-05, "loss": 0.9215, "step": 5001 }, { "epoch": 0.6688954265846483, "grad_norm": 1.037880778312683, "learning_rate": 1.8110488048436254e-05, "loss": 0.9625, "step": 5002 }, { "epoch": 0.6690291521797272, "grad_norm": 1.139431118965149, "learning_rate": 1.8109643403188558e-05, "loss": 1.1008, "step": 5003 }, { "epoch": 0.6691628777748061, "grad_norm": 0.9601593613624573, "learning_rate": 1.8108798588903105e-05, "loss": 0.9325, "step": 5004 }, { "epoch": 0.669296603369885, "grad_norm": 1.069495677947998, "learning_rate": 1.8107953605597507e-05, "loss": 0.9648, "step": 5005 }, { "epoch": 0.6694303289649639, "grad_norm": 1.0853127241134644, "learning_rate": 1.8107108453289373e-05, "loss": 0.8966, "step": 5006 }, { "epoch": 0.6695640545600428, "grad_norm": 1.0191290378570557, "learning_rate": 1.810626313199632e-05, "loss": 0.9711, "step": 5007 }, { "epoch": 0.6696977801551217, "grad_norm": 1.1415996551513672, "learning_rate": 1.8105417641735974e-05, "loss": 1.0939, "step": 5008 }, { "epoch": 0.6698315057502006, "grad_norm": 0.9952882528305054, "learning_rate": 1.810457198252595e-05, "loss": 0.8584, "step": 5009 }, { "epoch": 0.6699652313452795, "grad_norm": 1.0715973377227783, "learning_rate": 1.8103726154383876e-05, "loss": 0.9274, "step": 5010 }, { "epoch": 0.6700989569403584, "grad_norm": 1.0314003229141235, "learning_rate": 1.8102880157327386e-05, "loss": 1.0282, "step": 5011 }, { "epoch": 0.6702326825354373, "grad_norm": 1.1185998916625977, "learning_rate": 1.8102033991374118e-05, "loss": 0.968, "step": 5012 }, { "epoch": 0.6703664081305162, "grad_norm": 1.0908783674240112, "learning_rate": 1.8101187656541695e-05, "loss": 1.0646, "step": 5013 }, { "epoch": 0.670500133725595, "grad_norm": 1.1463176012039185, "learning_rate": 1.8100341152847772e-05, "loss": 1.0432, "step": 5014 }, { "epoch": 0.670633859320674, "grad_norm": 1.1876200437545776, "learning_rate": 1.809949448030999e-05, "loss": 1.0687, "step": 5015 }, { "epoch": 0.6707675849157528, "grad_norm": 1.129399061203003, "learning_rate": 1.8098647638946e-05, "loss": 0.9486, "step": 5016 }, { "epoch": 0.6709013105108318, "grad_norm": 1.0842876434326172, "learning_rate": 1.809780062877344e-05, "loss": 1.0181, "step": 5017 }, { "epoch": 0.6710350361059106, "grad_norm": 1.132673740386963, "learning_rate": 1.8096953449809983e-05, "loss": 0.98, "step": 5018 }, { "epoch": 0.6711687617009896, "grad_norm": 0.9741018414497375, "learning_rate": 1.809610610207327e-05, "loss": 0.9816, "step": 5019 }, { "epoch": 0.6713024872960685, "grad_norm": 1.0211485624313354, "learning_rate": 1.8095258585580983e-05, "loss": 0.8669, "step": 5020 }, { "epoch": 0.6714362128911474, "grad_norm": 1.368371844291687, "learning_rate": 1.809441090035077e-05, "loss": 1.0076, "step": 5021 }, { "epoch": 0.6715699384862263, "grad_norm": 1.080718994140625, "learning_rate": 1.809356304640031e-05, "loss": 1.0083, "step": 5022 }, { "epoch": 0.6717036640813051, "grad_norm": 0.990145206451416, "learning_rate": 1.809271502374727e-05, "loss": 0.9311, "step": 5023 }, { "epoch": 0.6718373896763841, "grad_norm": 1.17551589012146, "learning_rate": 1.8091866832409332e-05, "loss": 1.0158, "step": 5024 }, { "epoch": 0.6719711152714629, "grad_norm": 1.1224229335784912, "learning_rate": 1.8091018472404172e-05, "loss": 1.1643, "step": 5025 }, { "epoch": 0.6721048408665419, "grad_norm": 1.0456095933914185, "learning_rate": 1.8090169943749477e-05, "loss": 0.9099, "step": 5026 }, { "epoch": 0.6722385664616207, "grad_norm": 0.9828181862831116, "learning_rate": 1.808932124646293e-05, "loss": 0.9243, "step": 5027 }, { "epoch": 0.6723722920566997, "grad_norm": 1.097732424736023, "learning_rate": 1.8088472380562218e-05, "loss": 0.989, "step": 5028 }, { "epoch": 0.6725060176517785, "grad_norm": 1.2297818660736084, "learning_rate": 1.808762334606504e-05, "loss": 1.0223, "step": 5029 }, { "epoch": 0.6726397432468575, "grad_norm": 1.1043789386749268, "learning_rate": 1.8086774142989095e-05, "loss": 0.9125, "step": 5030 }, { "epoch": 0.6727734688419363, "grad_norm": 1.0243536233901978, "learning_rate": 1.8085924771352083e-05, "loss": 0.8846, "step": 5031 }, { "epoch": 0.6729071944370153, "grad_norm": 0.9904436469078064, "learning_rate": 1.8085075231171702e-05, "loss": 0.9528, "step": 5032 }, { "epoch": 0.6730409200320941, "grad_norm": 1.0466152429580688, "learning_rate": 1.8084225522465667e-05, "loss": 0.9227, "step": 5033 }, { "epoch": 0.673174645627173, "grad_norm": 1.0991414785385132, "learning_rate": 1.8083375645251687e-05, "loss": 0.9701, "step": 5034 }, { "epoch": 0.673308371222252, "grad_norm": 1.1972569227218628, "learning_rate": 1.8082525599547474e-05, "loss": 0.9533, "step": 5035 }, { "epoch": 0.6734420968173308, "grad_norm": 1.0884032249450684, "learning_rate": 1.8081675385370753e-05, "loss": 0.8965, "step": 5036 }, { "epoch": 0.6735758224124098, "grad_norm": 1.0727729797363281, "learning_rate": 1.808082500273924e-05, "loss": 0.9585, "step": 5037 }, { "epoch": 0.6737095480074886, "grad_norm": 1.0311223268508911, "learning_rate": 1.807997445167066e-05, "loss": 0.865, "step": 5038 }, { "epoch": 0.6738432736025676, "grad_norm": 1.069775104522705, "learning_rate": 1.8079123732182748e-05, "loss": 0.9585, "step": 5039 }, { "epoch": 0.6739769991976464, "grad_norm": 1.1405057907104492, "learning_rate": 1.807827284429323e-05, "loss": 0.9612, "step": 5040 }, { "epoch": 0.6741107247927254, "grad_norm": 0.9590426087379456, "learning_rate": 1.8077421788019848e-05, "loss": 0.7721, "step": 5041 }, { "epoch": 0.6742444503878042, "grad_norm": 1.1761194467544556, "learning_rate": 1.8076570563380333e-05, "loss": 1.02, "step": 5042 }, { "epoch": 0.6743781759828831, "grad_norm": 1.163806676864624, "learning_rate": 1.8075719170392437e-05, "loss": 1.1724, "step": 5043 }, { "epoch": 0.674511901577962, "grad_norm": 1.0814969539642334, "learning_rate": 1.80748676090739e-05, "loss": 0.895, "step": 5044 }, { "epoch": 0.6746456271730409, "grad_norm": 1.1215808391571045, "learning_rate": 1.8074015879442475e-05, "loss": 1.0519, "step": 5045 }, { "epoch": 0.6747793527681198, "grad_norm": 1.0824809074401855, "learning_rate": 1.8073163981515915e-05, "loss": 0.9824, "step": 5046 }, { "epoch": 0.6749130783631987, "grad_norm": 1.1442539691925049, "learning_rate": 1.8072311915311978e-05, "loss": 1.0461, "step": 5047 }, { "epoch": 0.6750468039582777, "grad_norm": 1.0627573728561401, "learning_rate": 1.8071459680848423e-05, "loss": 0.8791, "step": 5048 }, { "epoch": 0.6751805295533565, "grad_norm": 1.005487322807312, "learning_rate": 1.8070607278143016e-05, "loss": 0.9051, "step": 5049 }, { "epoch": 0.6753142551484355, "grad_norm": 1.163400650024414, "learning_rate": 1.8069754707213522e-05, "loss": 0.9219, "step": 5050 }, { "epoch": 0.6754479807435143, "grad_norm": 1.077052354812622, "learning_rate": 1.806890196807771e-05, "loss": 0.9624, "step": 5051 }, { "epoch": 0.6755817063385932, "grad_norm": 0.980795681476593, "learning_rate": 1.8068049060753365e-05, "loss": 1.0012, "step": 5052 }, { "epoch": 0.6757154319336721, "grad_norm": 1.0475205183029175, "learning_rate": 1.8067195985258253e-05, "loss": 0.867, "step": 5053 }, { "epoch": 0.675849157528751, "grad_norm": 1.0309828519821167, "learning_rate": 1.8066342741610158e-05, "loss": 0.98, "step": 5054 }, { "epoch": 0.6759828831238299, "grad_norm": 1.0276451110839844, "learning_rate": 1.806548932982687e-05, "loss": 0.8414, "step": 5055 }, { "epoch": 0.6761166087189088, "grad_norm": 1.0409561395645142, "learning_rate": 1.8064635749926172e-05, "loss": 0.8625, "step": 5056 }, { "epoch": 0.6762503343139877, "grad_norm": 1.0347881317138672, "learning_rate": 1.8063782001925864e-05, "loss": 0.987, "step": 5057 }, { "epoch": 0.6763840599090666, "grad_norm": 1.0494024753570557, "learning_rate": 1.8062928085843732e-05, "loss": 0.9924, "step": 5058 }, { "epoch": 0.6765177855041455, "grad_norm": 1.0453131198883057, "learning_rate": 1.806207400169758e-05, "loss": 1.0123, "step": 5059 }, { "epoch": 0.6766515110992244, "grad_norm": 1.0931572914123535, "learning_rate": 1.806121974950521e-05, "loss": 0.9703, "step": 5060 }, { "epoch": 0.6767852366943032, "grad_norm": 1.053357481956482, "learning_rate": 1.806036532928443e-05, "loss": 0.9707, "step": 5061 }, { "epoch": 0.6769189622893822, "grad_norm": 1.0865283012390137, "learning_rate": 1.8059510741053045e-05, "loss": 0.941, "step": 5062 }, { "epoch": 0.677052687884461, "grad_norm": 1.1608012914657593, "learning_rate": 1.805865598482887e-05, "loss": 0.9522, "step": 5063 }, { "epoch": 0.67718641347954, "grad_norm": 1.0921530723571777, "learning_rate": 1.805780106062973e-05, "loss": 0.9258, "step": 5064 }, { "epoch": 0.6773201390746189, "grad_norm": 1.0793124437332153, "learning_rate": 1.805694596847343e-05, "loss": 0.9239, "step": 5065 }, { "epoch": 0.6774538646696978, "grad_norm": 1.0646467208862305, "learning_rate": 1.80560907083778e-05, "loss": 0.8461, "step": 5066 }, { "epoch": 0.6775875902647767, "grad_norm": 1.1142200231552124, "learning_rate": 1.8055235280360674e-05, "loss": 1.0139, "step": 5067 }, { "epoch": 0.6777213158598556, "grad_norm": 1.1605818271636963, "learning_rate": 1.8054379684439874e-05, "loss": 0.9115, "step": 5068 }, { "epoch": 0.6778550414549345, "grad_norm": 1.194240927696228, "learning_rate": 1.8053523920633235e-05, "loss": 1.0478, "step": 5069 }, { "epoch": 0.6779887670500133, "grad_norm": 0.9740921854972839, "learning_rate": 1.8052667988958597e-05, "loss": 0.9738, "step": 5070 }, { "epoch": 0.6781224926450923, "grad_norm": 1.2290987968444824, "learning_rate": 1.8051811889433803e-05, "loss": 0.8986, "step": 5071 }, { "epoch": 0.6782562182401711, "grad_norm": 1.0792953968048096, "learning_rate": 1.805095562207669e-05, "loss": 1.0764, "step": 5072 }, { "epoch": 0.6783899438352501, "grad_norm": 1.1804550886154175, "learning_rate": 1.8050099186905114e-05, "loss": 1.0404, "step": 5073 }, { "epoch": 0.6785236694303289, "grad_norm": 1.1123442649841309, "learning_rate": 1.8049242583936923e-05, "loss": 1.0377, "step": 5074 }, { "epoch": 0.6786573950254079, "grad_norm": 1.0268845558166504, "learning_rate": 1.8048385813189973e-05, "loss": 0.9334, "step": 5075 }, { "epoch": 0.6787911206204867, "grad_norm": 1.058103084564209, "learning_rate": 1.804752887468212e-05, "loss": 0.9569, "step": 5076 }, { "epoch": 0.6789248462155657, "grad_norm": 1.0855058431625366, "learning_rate": 1.8046671768431233e-05, "loss": 0.9504, "step": 5077 }, { "epoch": 0.6790585718106446, "grad_norm": 1.0597195625305176, "learning_rate": 1.804581449445517e-05, "loss": 0.9085, "step": 5078 }, { "epoch": 0.6791922974057235, "grad_norm": 1.0111112594604492, "learning_rate": 1.8044957052771803e-05, "loss": 1.0389, "step": 5079 }, { "epoch": 0.6793260230008024, "grad_norm": 0.8890573382377625, "learning_rate": 1.8044099443399003e-05, "loss": 0.9215, "step": 5080 }, { "epoch": 0.6794597485958812, "grad_norm": 1.094689130783081, "learning_rate": 1.804324166635465e-05, "loss": 0.9368, "step": 5081 }, { "epoch": 0.6795934741909602, "grad_norm": 1.1405119895935059, "learning_rate": 1.8042383721656617e-05, "loss": 0.9582, "step": 5082 }, { "epoch": 0.679727199786039, "grad_norm": 1.1554011106491089, "learning_rate": 1.8041525609322795e-05, "loss": 1.1045, "step": 5083 }, { "epoch": 0.679860925381118, "grad_norm": 1.1559550762176514, "learning_rate": 1.8040667329371063e-05, "loss": 1.0195, "step": 5084 }, { "epoch": 0.6799946509761968, "grad_norm": 1.0837669372558594, "learning_rate": 1.8039808881819318e-05, "loss": 0.9063, "step": 5085 }, { "epoch": 0.6801283765712758, "grad_norm": 1.0689849853515625, "learning_rate": 1.803895026668545e-05, "loss": 0.8929, "step": 5086 }, { "epoch": 0.6802621021663546, "grad_norm": 1.1741976737976074, "learning_rate": 1.8038091483987357e-05, "loss": 0.8775, "step": 5087 }, { "epoch": 0.6803958277614336, "grad_norm": 1.2029422521591187, "learning_rate": 1.8037232533742936e-05, "loss": 1.0531, "step": 5088 }, { "epoch": 0.6805295533565124, "grad_norm": 1.0770916938781738, "learning_rate": 1.8036373415970093e-05, "loss": 1.0407, "step": 5089 }, { "epoch": 0.6806632789515913, "grad_norm": 0.9712393879890442, "learning_rate": 1.8035514130686737e-05, "loss": 0.8879, "step": 5090 }, { "epoch": 0.6807970045466702, "grad_norm": 1.0829929113388062, "learning_rate": 1.803465467791078e-05, "loss": 0.9188, "step": 5091 }, { "epoch": 0.6809307301417491, "grad_norm": 1.0641124248504639, "learning_rate": 1.8033795057660134e-05, "loss": 0.7929, "step": 5092 }, { "epoch": 0.6810644557368281, "grad_norm": 1.576263666152954, "learning_rate": 1.8032935269952714e-05, "loss": 0.9511, "step": 5093 }, { "epoch": 0.6811981813319069, "grad_norm": 1.057915449142456, "learning_rate": 1.803207531480645e-05, "loss": 0.9749, "step": 5094 }, { "epoch": 0.6813319069269859, "grad_norm": 1.077998161315918, "learning_rate": 1.803121519223926e-05, "loss": 0.9927, "step": 5095 }, { "epoch": 0.6814656325220647, "grad_norm": 1.2218754291534424, "learning_rate": 1.8030354902269077e-05, "loss": 1.0748, "step": 5096 }, { "epoch": 0.6815993581171437, "grad_norm": 1.1164921522140503, "learning_rate": 1.8029494444913825e-05, "loss": 0.9096, "step": 5097 }, { "epoch": 0.6817330837122225, "grad_norm": 1.3206048011779785, "learning_rate": 1.8028633820191448e-05, "loss": 1.0513, "step": 5098 }, { "epoch": 0.6818668093073014, "grad_norm": 1.0226329565048218, "learning_rate": 1.8027773028119878e-05, "loss": 0.9239, "step": 5099 }, { "epoch": 0.6820005349023803, "grad_norm": 1.1730430126190186, "learning_rate": 1.8026912068717064e-05, "loss": 1.0135, "step": 5100 }, { "epoch": 0.6821342604974592, "grad_norm": 1.0840502977371216, "learning_rate": 1.8026050942000946e-05, "loss": 0.7907, "step": 5101 }, { "epoch": 0.6822679860925381, "grad_norm": 1.049568772315979, "learning_rate": 1.8025189647989483e-05, "loss": 0.9023, "step": 5102 }, { "epoch": 0.682401711687617, "grad_norm": 1.0245225429534912, "learning_rate": 1.8024328186700616e-05, "loss": 1.0354, "step": 5103 }, { "epoch": 0.682535437282696, "grad_norm": 0.9409737586975098, "learning_rate": 1.8023466558152308e-05, "loss": 0.9803, "step": 5104 }, { "epoch": 0.6826691628777748, "grad_norm": 1.1060967445373535, "learning_rate": 1.8022604762362514e-05, "loss": 0.9058, "step": 5105 }, { "epoch": 0.6828028884728538, "grad_norm": 1.1317620277404785, "learning_rate": 1.8021742799349206e-05, "loss": 0.9523, "step": 5106 }, { "epoch": 0.6829366140679326, "grad_norm": 1.2041938304901123, "learning_rate": 1.802088066913034e-05, "loss": 1.0111, "step": 5107 }, { "epoch": 0.6830703396630114, "grad_norm": 1.054218053817749, "learning_rate": 1.8020018371723895e-05, "loss": 0.9488, "step": 5108 }, { "epoch": 0.6832040652580904, "grad_norm": 1.1941221952438354, "learning_rate": 1.801915590714784e-05, "loss": 0.9669, "step": 5109 }, { "epoch": 0.6833377908531693, "grad_norm": 1.0763728618621826, "learning_rate": 1.8018293275420156e-05, "loss": 0.8966, "step": 5110 }, { "epoch": 0.6834715164482482, "grad_norm": 1.0471513271331787, "learning_rate": 1.801743047655882e-05, "loss": 0.8978, "step": 5111 }, { "epoch": 0.6836052420433271, "grad_norm": 1.0998284816741943, "learning_rate": 1.8016567510581814e-05, "loss": 0.9878, "step": 5112 }, { "epoch": 0.683738967638406, "grad_norm": 1.173107624053955, "learning_rate": 1.801570437750713e-05, "loss": 1.0458, "step": 5113 }, { "epoch": 0.6838726932334849, "grad_norm": 1.088143229484558, "learning_rate": 1.8014841077352764e-05, "loss": 0.9432, "step": 5114 }, { "epoch": 0.6840064188285638, "grad_norm": 1.123342514038086, "learning_rate": 1.8013977610136698e-05, "loss": 0.941, "step": 5115 }, { "epoch": 0.6841401444236427, "grad_norm": 1.0366772413253784, "learning_rate": 1.8013113975876942e-05, "loss": 0.808, "step": 5116 }, { "epoch": 0.6842738700187215, "grad_norm": 1.0562697649002075, "learning_rate": 1.8012250174591492e-05, "loss": 0.8577, "step": 5117 }, { "epoch": 0.6844075956138005, "grad_norm": 1.283618688583374, "learning_rate": 1.8011386206298357e-05, "loss": 1.095, "step": 5118 }, { "epoch": 0.6845413212088793, "grad_norm": 0.9584662318229675, "learning_rate": 1.8010522071015537e-05, "loss": 0.8278, "step": 5119 }, { "epoch": 0.6846750468039583, "grad_norm": 1.0604195594787598, "learning_rate": 1.8009657768761052e-05, "loss": 1.0009, "step": 5120 }, { "epoch": 0.6848087723990371, "grad_norm": 1.0978963375091553, "learning_rate": 1.8008793299552914e-05, "loss": 0.9388, "step": 5121 }, { "epoch": 0.6849424979941161, "grad_norm": 1.1427022218704224, "learning_rate": 1.8007928663409148e-05, "loss": 0.9831, "step": 5122 }, { "epoch": 0.685076223589195, "grad_norm": 1.060240387916565, "learning_rate": 1.8007063860347768e-05, "loss": 0.9301, "step": 5123 }, { "epoch": 0.6852099491842739, "grad_norm": 1.0550285577774048, "learning_rate": 1.8006198890386802e-05, "loss": 1.0026, "step": 5124 }, { "epoch": 0.6853436747793528, "grad_norm": 1.1321195363998413, "learning_rate": 1.8005333753544283e-05, "loss": 1.0482, "step": 5125 }, { "epoch": 0.6854774003744316, "grad_norm": 1.0665620565414429, "learning_rate": 1.8004468449838245e-05, "loss": 0.9728, "step": 5126 }, { "epoch": 0.6856111259695106, "grad_norm": 1.1393606662750244, "learning_rate": 1.8003602979286717e-05, "loss": 0.9197, "step": 5127 }, { "epoch": 0.6857448515645894, "grad_norm": 1.111890435218811, "learning_rate": 1.8002737341907743e-05, "loss": 1.0298, "step": 5128 }, { "epoch": 0.6858785771596684, "grad_norm": 1.1211916208267212, "learning_rate": 1.800187153771937e-05, "loss": 0.9258, "step": 5129 }, { "epoch": 0.6860123027547472, "grad_norm": 1.0774627923965454, "learning_rate": 1.800100556673964e-05, "loss": 1.0111, "step": 5130 }, { "epoch": 0.6861460283498262, "grad_norm": 0.9830366969108582, "learning_rate": 1.800013942898661e-05, "loss": 0.8513, "step": 5131 }, { "epoch": 0.686279753944905, "grad_norm": 1.2034239768981934, "learning_rate": 1.7999273124478324e-05, "loss": 0.9994, "step": 5132 }, { "epoch": 0.686413479539984, "grad_norm": 1.1258162260055542, "learning_rate": 1.7998406653232842e-05, "loss": 1.0047, "step": 5133 }, { "epoch": 0.6865472051350628, "grad_norm": 1.1947698593139648, "learning_rate": 1.7997540015268234e-05, "loss": 0.9751, "step": 5134 }, { "epoch": 0.6866809307301418, "grad_norm": 1.1146042346954346, "learning_rate": 1.7996673210602555e-05, "loss": 0.9367, "step": 5135 }, { "epoch": 0.6868146563252207, "grad_norm": 1.0870232582092285, "learning_rate": 1.7995806239253873e-05, "loss": 0.9517, "step": 5136 }, { "epoch": 0.6869483819202995, "grad_norm": 1.0905252695083618, "learning_rate": 1.799493910124026e-05, "loss": 0.9957, "step": 5137 }, { "epoch": 0.6870821075153785, "grad_norm": 1.0507646799087524, "learning_rate": 1.7994071796579794e-05, "loss": 0.9696, "step": 5138 }, { "epoch": 0.6872158331104573, "grad_norm": 1.0436795949935913, "learning_rate": 1.799320432529055e-05, "loss": 1.0902, "step": 5139 }, { "epoch": 0.6873495587055363, "grad_norm": 1.0312986373901367, "learning_rate": 1.799233668739061e-05, "loss": 0.8826, "step": 5140 }, { "epoch": 0.6874832843006151, "grad_norm": 1.0144051313400269, "learning_rate": 1.799146888289806e-05, "loss": 0.8882, "step": 5141 }, { "epoch": 0.6876170098956941, "grad_norm": 1.09243643283844, "learning_rate": 1.7990600911830988e-05, "loss": 0.938, "step": 5142 }, { "epoch": 0.6877507354907729, "grad_norm": 1.116445541381836, "learning_rate": 1.7989732774207486e-05, "loss": 0.9108, "step": 5143 }, { "epoch": 0.6878844610858519, "grad_norm": 1.0592668056488037, "learning_rate": 1.798886447004565e-05, "loss": 0.9364, "step": 5144 }, { "epoch": 0.6880181866809307, "grad_norm": 1.0879862308502197, "learning_rate": 1.798799599936358e-05, "loss": 1.0881, "step": 5145 }, { "epoch": 0.6881519122760096, "grad_norm": 1.022619366645813, "learning_rate": 1.7987127362179375e-05, "loss": 0.8993, "step": 5146 }, { "epoch": 0.6882856378710885, "grad_norm": 1.0596449375152588, "learning_rate": 1.7986258558511146e-05, "loss": 0.9809, "step": 5147 }, { "epoch": 0.6884193634661674, "grad_norm": 1.019476294517517, "learning_rate": 1.7985389588377e-05, "loss": 0.9455, "step": 5148 }, { "epoch": 0.6885530890612463, "grad_norm": 1.0632236003875732, "learning_rate": 1.7984520451795043e-05, "loss": 0.9762, "step": 5149 }, { "epoch": 0.6886868146563252, "grad_norm": 1.2100046873092651, "learning_rate": 1.7983651148783402e-05, "loss": 0.9919, "step": 5150 }, { "epoch": 0.6888205402514042, "grad_norm": 1.1318711042404175, "learning_rate": 1.798278167936019e-05, "loss": 0.9039, "step": 5151 }, { "epoch": 0.688954265846483, "grad_norm": 1.0051398277282715, "learning_rate": 1.7981912043543535e-05, "loss": 0.9316, "step": 5152 }, { "epoch": 0.689087991441562, "grad_norm": 0.9786632657051086, "learning_rate": 1.798104224135156e-05, "loss": 0.9173, "step": 5153 }, { "epoch": 0.6892217170366408, "grad_norm": 1.1040886640548706, "learning_rate": 1.7980172272802398e-05, "loss": 0.9628, "step": 5154 }, { "epoch": 0.6893554426317197, "grad_norm": 1.1284029483795166, "learning_rate": 1.797930213791418e-05, "loss": 0.9817, "step": 5155 }, { "epoch": 0.6894891682267986, "grad_norm": 1.1185822486877441, "learning_rate": 1.7978431836705043e-05, "loss": 0.9284, "step": 5156 }, { "epoch": 0.6896228938218775, "grad_norm": 0.9561588168144226, "learning_rate": 1.797756136919313e-05, "loss": 0.8564, "step": 5157 }, { "epoch": 0.6897566194169564, "grad_norm": 1.0426297187805176, "learning_rate": 1.7976690735396586e-05, "loss": 1.0143, "step": 5158 }, { "epoch": 0.6898903450120353, "grad_norm": 1.085315465927124, "learning_rate": 1.7975819935333554e-05, "loss": 0.9952, "step": 5159 }, { "epoch": 0.6900240706071142, "grad_norm": 0.9940829277038574, "learning_rate": 1.797494896902219e-05, "loss": 0.9741, "step": 5160 }, { "epoch": 0.6901577962021931, "grad_norm": 1.067638874053955, "learning_rate": 1.797407783648064e-05, "loss": 0.8824, "step": 5161 }, { "epoch": 0.690291521797272, "grad_norm": 1.098319172859192, "learning_rate": 1.797320653772707e-05, "loss": 1.0436, "step": 5162 }, { "epoch": 0.6904252473923509, "grad_norm": 1.1455984115600586, "learning_rate": 1.7972335072779646e-05, "loss": 1.0688, "step": 5163 }, { "epoch": 0.6905589729874297, "grad_norm": 1.0796681642532349, "learning_rate": 1.797146344165652e-05, "loss": 1.0348, "step": 5164 }, { "epoch": 0.6906926985825087, "grad_norm": 1.1698533296585083, "learning_rate": 1.797059164437587e-05, "loss": 1.0522, "step": 5165 }, { "epoch": 0.6908264241775876, "grad_norm": 1.1333894729614258, "learning_rate": 1.796971968095586e-05, "loss": 0.9504, "step": 5166 }, { "epoch": 0.6909601497726665, "grad_norm": 1.1816248893737793, "learning_rate": 1.796884755141467e-05, "loss": 1.1082, "step": 5167 }, { "epoch": 0.6910938753677454, "grad_norm": 1.1587681770324707, "learning_rate": 1.796797525577048e-05, "loss": 0.9955, "step": 5168 }, { "epoch": 0.6912276009628243, "grad_norm": 1.1130093336105347, "learning_rate": 1.796710279404147e-05, "loss": 0.9992, "step": 5169 }, { "epoch": 0.6913613265579032, "grad_norm": 1.2344483137130737, "learning_rate": 1.7966230166245825e-05, "loss": 0.9963, "step": 5170 }, { "epoch": 0.6914950521529821, "grad_norm": 1.0873537063598633, "learning_rate": 1.7965357372401733e-05, "loss": 0.9422, "step": 5171 }, { "epoch": 0.691628777748061, "grad_norm": 1.083786964416504, "learning_rate": 1.7964484412527394e-05, "loss": 1.038, "step": 5172 }, { "epoch": 0.6917625033431398, "grad_norm": 1.1155650615692139, "learning_rate": 1.7963611286640996e-05, "loss": 0.9729, "step": 5173 }, { "epoch": 0.6918962289382188, "grad_norm": 1.072467565536499, "learning_rate": 1.7962737994760743e-05, "loss": 0.9215, "step": 5174 }, { "epoch": 0.6920299545332976, "grad_norm": 1.2219685316085815, "learning_rate": 1.796186453690483e-05, "loss": 1.0741, "step": 5175 }, { "epoch": 0.6921636801283766, "grad_norm": 0.9055397510528564, "learning_rate": 1.7960990913091477e-05, "loss": 0.8628, "step": 5176 }, { "epoch": 0.6922974057234554, "grad_norm": 1.066775918006897, "learning_rate": 1.7960117123338884e-05, "loss": 0.986, "step": 5177 }, { "epoch": 0.6924311313185344, "grad_norm": 1.1202335357666016, "learning_rate": 1.7959243167665263e-05, "loss": 0.9648, "step": 5178 }, { "epoch": 0.6925648569136132, "grad_norm": 1.1116210222244263, "learning_rate": 1.7958369046088837e-05, "loss": 0.9279, "step": 5179 }, { "epoch": 0.6926985825086922, "grad_norm": 1.0598499774932861, "learning_rate": 1.7957494758627823e-05, "loss": 1.0667, "step": 5180 }, { "epoch": 0.692832308103771, "grad_norm": 1.0478835105895996, "learning_rate": 1.7956620305300444e-05, "loss": 0.9479, "step": 5181 }, { "epoch": 0.69296603369885, "grad_norm": 1.0910258293151855, "learning_rate": 1.795574568612493e-05, "loss": 1.0016, "step": 5182 }, { "epoch": 0.6930997592939289, "grad_norm": 1.0974817276000977, "learning_rate": 1.795487090111951e-05, "loss": 0.9586, "step": 5183 }, { "epoch": 0.6932334848890077, "grad_norm": 1.0336499214172363, "learning_rate": 1.795399595030242e-05, "loss": 1.0022, "step": 5184 }, { "epoch": 0.6933672104840867, "grad_norm": 1.0178587436676025, "learning_rate": 1.7953120833691894e-05, "loss": 0.8889, "step": 5185 }, { "epoch": 0.6935009360791655, "grad_norm": 1.0869948863983154, "learning_rate": 1.7952245551306173e-05, "loss": 0.8964, "step": 5186 }, { "epoch": 0.6936346616742445, "grad_norm": 1.051178216934204, "learning_rate": 1.7951370103163507e-05, "loss": 0.9192, "step": 5187 }, { "epoch": 0.6937683872693233, "grad_norm": 1.145849585533142, "learning_rate": 1.795049448928213e-05, "loss": 0.9115, "step": 5188 }, { "epoch": 0.6939021128644023, "grad_norm": 1.0809355974197388, "learning_rate": 1.7949618709680315e-05, "loss": 0.9271, "step": 5189 }, { "epoch": 0.6940358384594811, "grad_norm": 1.172094464302063, "learning_rate": 1.79487427643763e-05, "loss": 1.0029, "step": 5190 }, { "epoch": 0.6941695640545601, "grad_norm": 1.1019080877304077, "learning_rate": 1.7947866653388346e-05, "loss": 1.003, "step": 5191 }, { "epoch": 0.6943032896496389, "grad_norm": 1.0884426832199097, "learning_rate": 1.794699037673472e-05, "loss": 0.9258, "step": 5192 }, { "epoch": 0.6944370152447178, "grad_norm": 1.0800942182540894, "learning_rate": 1.7946113934433686e-05, "loss": 0.9453, "step": 5193 }, { "epoch": 0.6945707408397968, "grad_norm": 1.0571277141571045, "learning_rate": 1.7945237326503507e-05, "loss": 1.0003, "step": 5194 }, { "epoch": 0.6947044664348756, "grad_norm": 1.208843469619751, "learning_rate": 1.7944360552962455e-05, "loss": 0.9992, "step": 5195 }, { "epoch": 0.6948381920299546, "grad_norm": 1.2451117038726807, "learning_rate": 1.7943483613828817e-05, "loss": 0.9665, "step": 5196 }, { "epoch": 0.6949719176250334, "grad_norm": 1.166275978088379, "learning_rate": 1.7942606509120862e-05, "loss": 0.9463, "step": 5197 }, { "epoch": 0.6951056432201124, "grad_norm": 1.0716139078140259, "learning_rate": 1.7941729238856868e-05, "loss": 0.957, "step": 5198 }, { "epoch": 0.6952393688151912, "grad_norm": 1.001428246498108, "learning_rate": 1.7940851803055138e-05, "loss": 0.9251, "step": 5199 }, { "epoch": 0.6953730944102702, "grad_norm": 1.0861520767211914, "learning_rate": 1.7939974201733944e-05, "loss": 1.0275, "step": 5200 }, { "epoch": 0.695506820005349, "grad_norm": 1.0136325359344482, "learning_rate": 1.7939096434911586e-05, "loss": 0.8965, "step": 5201 }, { "epoch": 0.6956405456004279, "grad_norm": 1.1068634986877441, "learning_rate": 1.7938218502606362e-05, "loss": 1.0769, "step": 5202 }, { "epoch": 0.6957742711955068, "grad_norm": 1.0925811529159546, "learning_rate": 1.7937340404836566e-05, "loss": 0.9873, "step": 5203 }, { "epoch": 0.6959079967905857, "grad_norm": 0.9867472648620605, "learning_rate": 1.7936462141620507e-05, "loss": 0.982, "step": 5204 }, { "epoch": 0.6960417223856646, "grad_norm": 1.0225833654403687, "learning_rate": 1.7935583712976487e-05, "loss": 0.9542, "step": 5205 }, { "epoch": 0.6961754479807435, "grad_norm": 1.1636637449264526, "learning_rate": 1.7934705118922823e-05, "loss": 0.9161, "step": 5206 }, { "epoch": 0.6963091735758224, "grad_norm": 1.1225420236587524, "learning_rate": 1.793382635947782e-05, "loss": 0.9665, "step": 5207 }, { "epoch": 0.6964428991709013, "grad_norm": 1.0824493169784546, "learning_rate": 1.7932947434659796e-05, "loss": 1.0437, "step": 5208 }, { "epoch": 0.6965766247659803, "grad_norm": 0.9740232229232788, "learning_rate": 1.7932068344487076e-05, "loss": 0.9959, "step": 5209 }, { "epoch": 0.6967103503610591, "grad_norm": 1.0829992294311523, "learning_rate": 1.7931189088977984e-05, "loss": 1.0007, "step": 5210 }, { "epoch": 0.696844075956138, "grad_norm": 1.2006179094314575, "learning_rate": 1.793030966815084e-05, "loss": 0.9698, "step": 5211 }, { "epoch": 0.6969778015512169, "grad_norm": 1.1203135251998901, "learning_rate": 1.792943008202398e-05, "loss": 0.8842, "step": 5212 }, { "epoch": 0.6971115271462958, "grad_norm": 1.1312508583068848, "learning_rate": 1.7928550330615743e-05, "loss": 0.8798, "step": 5213 }, { "epoch": 0.6972452527413747, "grad_norm": 1.0806264877319336, "learning_rate": 1.7927670413944458e-05, "loss": 0.9134, "step": 5214 }, { "epoch": 0.6973789783364536, "grad_norm": 1.141685128211975, "learning_rate": 1.792679033202847e-05, "loss": 0.9105, "step": 5215 }, { "epoch": 0.6975127039315325, "grad_norm": 1.0786662101745605, "learning_rate": 1.792591008488612e-05, "loss": 0.8921, "step": 5216 }, { "epoch": 0.6976464295266114, "grad_norm": 1.2741613388061523, "learning_rate": 1.792502967253576e-05, "loss": 0.9572, "step": 5217 }, { "epoch": 0.6977801551216903, "grad_norm": 0.9734985828399658, "learning_rate": 1.792414909499574e-05, "loss": 0.9395, "step": 5218 }, { "epoch": 0.6979138807167692, "grad_norm": 1.041425108909607, "learning_rate": 1.7923268352284415e-05, "loss": 0.9642, "step": 5219 }, { "epoch": 0.698047606311848, "grad_norm": 1.037178874015808, "learning_rate": 1.7922387444420143e-05, "loss": 0.9762, "step": 5220 }, { "epoch": 0.698181331906927, "grad_norm": 1.1781412363052368, "learning_rate": 1.7921506371421285e-05, "loss": 0.873, "step": 5221 }, { "epoch": 0.6983150575020058, "grad_norm": 0.996019184589386, "learning_rate": 1.7920625133306205e-05, "loss": 0.8171, "step": 5222 }, { "epoch": 0.6984487830970848, "grad_norm": 1.1254467964172363, "learning_rate": 1.7919743730093278e-05, "loss": 1.031, "step": 5223 }, { "epoch": 0.6985825086921637, "grad_norm": 1.1469203233718872, "learning_rate": 1.791886216180087e-05, "loss": 1.1622, "step": 5224 }, { "epoch": 0.6987162342872426, "grad_norm": 1.1206374168395996, "learning_rate": 1.7917980428447356e-05, "loss": 1.0425, "step": 5225 }, { "epoch": 0.6988499598823215, "grad_norm": 1.1212012767791748, "learning_rate": 1.7917098530051117e-05, "loss": 0.918, "step": 5226 }, { "epoch": 0.6989836854774004, "grad_norm": 1.1862643957138062, "learning_rate": 1.7916216466630532e-05, "loss": 1.0259, "step": 5227 }, { "epoch": 0.6991174110724793, "grad_norm": 1.0381441116333008, "learning_rate": 1.7915334238203995e-05, "loss": 0.9888, "step": 5228 }, { "epoch": 0.6992511366675581, "grad_norm": 1.0241427421569824, "learning_rate": 1.7914451844789887e-05, "loss": 0.9357, "step": 5229 }, { "epoch": 0.6993848622626371, "grad_norm": 1.0289061069488525, "learning_rate": 1.7913569286406606e-05, "loss": 0.8757, "step": 5230 }, { "epoch": 0.6995185878577159, "grad_norm": 0.9870235323905945, "learning_rate": 1.7912686563072542e-05, "loss": 1.079, "step": 5231 }, { "epoch": 0.6996523134527949, "grad_norm": 0.9351276159286499, "learning_rate": 1.79118036748061e-05, "loss": 0.9584, "step": 5232 }, { "epoch": 0.6997860390478737, "grad_norm": 1.1108912229537964, "learning_rate": 1.791092062162568e-05, "loss": 0.9493, "step": 5233 }, { "epoch": 0.6999197646429527, "grad_norm": 1.1023406982421875, "learning_rate": 1.7910037403549695e-05, "loss": 0.9597, "step": 5234 }, { "epoch": 0.7000534902380315, "grad_norm": 0.9589882493019104, "learning_rate": 1.7909154020596543e-05, "loss": 0.8516, "step": 5235 }, { "epoch": 0.7001872158331105, "grad_norm": 1.09096360206604, "learning_rate": 1.7908270472784647e-05, "loss": 0.9421, "step": 5236 }, { "epoch": 0.7003209414281893, "grad_norm": 0.996152937412262, "learning_rate": 1.7907386760132418e-05, "loss": 0.9539, "step": 5237 }, { "epoch": 0.7004546670232683, "grad_norm": 1.2319047451019287, "learning_rate": 1.790650288265828e-05, "loss": 0.8822, "step": 5238 }, { "epoch": 0.7005883926183472, "grad_norm": 1.1690583229064941, "learning_rate": 1.7905618840380655e-05, "loss": 0.9331, "step": 5239 }, { "epoch": 0.700722118213426, "grad_norm": 1.053465723991394, "learning_rate": 1.790473463331797e-05, "loss": 0.7813, "step": 5240 }, { "epoch": 0.700855843808505, "grad_norm": 1.1357570886611938, "learning_rate": 1.7903850261488656e-05, "loss": 1.0106, "step": 5241 }, { "epoch": 0.7009895694035838, "grad_norm": 1.087565302848816, "learning_rate": 1.7902965724911148e-05, "loss": 1.0207, "step": 5242 }, { "epoch": 0.7011232949986628, "grad_norm": 1.0234267711639404, "learning_rate": 1.7902081023603878e-05, "loss": 0.8715, "step": 5243 }, { "epoch": 0.7012570205937416, "grad_norm": 1.0132678747177124, "learning_rate": 1.7901196157585296e-05, "loss": 0.8377, "step": 5244 }, { "epoch": 0.7013907461888206, "grad_norm": 1.1064453125, "learning_rate": 1.7900311126873835e-05, "loss": 1.0199, "step": 5245 }, { "epoch": 0.7015244717838994, "grad_norm": 1.1431941986083984, "learning_rate": 1.789942593148795e-05, "loss": 0.8774, "step": 5246 }, { "epoch": 0.7016581973789784, "grad_norm": 1.0045065879821777, "learning_rate": 1.7898540571446093e-05, "loss": 0.9808, "step": 5247 }, { "epoch": 0.7017919229740572, "grad_norm": 0.9827533960342407, "learning_rate": 1.7897655046766712e-05, "loss": 0.992, "step": 5248 }, { "epoch": 0.7019256485691361, "grad_norm": 1.1866761445999146, "learning_rate": 1.789676935746827e-05, "loss": 0.9527, "step": 5249 }, { "epoch": 0.702059374164215, "grad_norm": 1.115775465965271, "learning_rate": 1.7895883503569228e-05, "loss": 0.9541, "step": 5250 }, { "epoch": 0.7021930997592939, "grad_norm": 1.0478111505508423, "learning_rate": 1.789499748508805e-05, "loss": 0.8408, "step": 5251 }, { "epoch": 0.7023268253543729, "grad_norm": 0.9772509336471558, "learning_rate": 1.7894111302043203e-05, "loss": 0.9891, "step": 5252 }, { "epoch": 0.7024605509494517, "grad_norm": 0.959745466709137, "learning_rate": 1.7893224954453163e-05, "loss": 0.9032, "step": 5253 }, { "epoch": 0.7025942765445307, "grad_norm": 1.1236652135849, "learning_rate": 1.78923384423364e-05, "loss": 1.0313, "step": 5254 }, { "epoch": 0.7027280021396095, "grad_norm": 1.166336178779602, "learning_rate": 1.7891451765711393e-05, "loss": 1.0228, "step": 5255 }, { "epoch": 0.7028617277346885, "grad_norm": 1.0581203699111938, "learning_rate": 1.7890564924596624e-05, "loss": 0.9542, "step": 5256 }, { "epoch": 0.7029954533297673, "grad_norm": 1.1052253246307373, "learning_rate": 1.788967791901058e-05, "loss": 0.9759, "step": 5257 }, { "epoch": 0.7031291789248462, "grad_norm": 1.1265859603881836, "learning_rate": 1.7888790748971753e-05, "loss": 1.0141, "step": 5258 }, { "epoch": 0.7032629045199251, "grad_norm": 0.9762358069419861, "learning_rate": 1.7887903414498632e-05, "loss": 0.9018, "step": 5259 }, { "epoch": 0.703396630115004, "grad_norm": 1.12031090259552, "learning_rate": 1.7887015915609708e-05, "loss": 1.0562, "step": 5260 }, { "epoch": 0.7035303557100829, "grad_norm": 1.1069087982177734, "learning_rate": 1.7886128252323486e-05, "loss": 1.031, "step": 5261 }, { "epoch": 0.7036640813051618, "grad_norm": 1.1441650390625, "learning_rate": 1.7885240424658466e-05, "loss": 0.9809, "step": 5262 }, { "epoch": 0.7037978069002407, "grad_norm": 0.9909963607788086, "learning_rate": 1.7884352432633157e-05, "loss": 0.9829, "step": 5263 }, { "epoch": 0.7039315324953196, "grad_norm": 1.0335956811904907, "learning_rate": 1.7883464276266064e-05, "loss": 0.8514, "step": 5264 }, { "epoch": 0.7040652580903985, "grad_norm": 1.1265125274658203, "learning_rate": 1.7882575955575702e-05, "loss": 0.9805, "step": 5265 }, { "epoch": 0.7041989836854774, "grad_norm": 0.9583535194396973, "learning_rate": 1.788168747058059e-05, "loss": 0.8798, "step": 5266 }, { "epoch": 0.7043327092805562, "grad_norm": 1.2350503206253052, "learning_rate": 1.788079882129924e-05, "loss": 1.0675, "step": 5267 }, { "epoch": 0.7044664348756352, "grad_norm": 1.0673515796661377, "learning_rate": 1.7879910007750184e-05, "loss": 0.9671, "step": 5268 }, { "epoch": 0.704600160470714, "grad_norm": 1.1447023153305054, "learning_rate": 1.787902102995194e-05, "loss": 0.9373, "step": 5269 }, { "epoch": 0.704733886065793, "grad_norm": 1.0985013246536255, "learning_rate": 1.7878131887923045e-05, "loss": 0.8941, "step": 5270 }, { "epoch": 0.7048676116608719, "grad_norm": 1.0972760915756226, "learning_rate": 1.7877242581682028e-05, "loss": 0.9793, "step": 5271 }, { "epoch": 0.7050013372559508, "grad_norm": 1.0065748691558838, "learning_rate": 1.7876353111247425e-05, "loss": 0.9436, "step": 5272 }, { "epoch": 0.7051350628510297, "grad_norm": 1.0345087051391602, "learning_rate": 1.7875463476637783e-05, "loss": 0.9163, "step": 5273 }, { "epoch": 0.7052687884461086, "grad_norm": 1.1655449867248535, "learning_rate": 1.7874573677871638e-05, "loss": 0.904, "step": 5274 }, { "epoch": 0.7054025140411875, "grad_norm": 1.2329585552215576, "learning_rate": 1.787368371496754e-05, "loss": 0.9389, "step": 5275 }, { "epoch": 0.7055362396362663, "grad_norm": 1.0300840139389038, "learning_rate": 1.787279358794404e-05, "loss": 0.9652, "step": 5276 }, { "epoch": 0.7056699652313453, "grad_norm": 1.0218945741653442, "learning_rate": 1.787190329681969e-05, "loss": 0.987, "step": 5277 }, { "epoch": 0.7058036908264241, "grad_norm": 1.064854383468628, "learning_rate": 1.787101284161305e-05, "loss": 0.9745, "step": 5278 }, { "epoch": 0.7059374164215031, "grad_norm": 1.0169978141784668, "learning_rate": 1.787012222234268e-05, "loss": 0.9774, "step": 5279 }, { "epoch": 0.7060711420165819, "grad_norm": 1.0274205207824707, "learning_rate": 1.786923143902714e-05, "loss": 0.8909, "step": 5280 }, { "epoch": 0.7062048676116609, "grad_norm": 1.074730396270752, "learning_rate": 1.7868340491685e-05, "loss": 0.9401, "step": 5281 }, { "epoch": 0.7063385932067398, "grad_norm": 1.1362671852111816, "learning_rate": 1.7867449380334834e-05, "loss": 0.9214, "step": 5282 }, { "epoch": 0.7064723188018187, "grad_norm": 1.0211025476455688, "learning_rate": 1.7866558104995214e-05, "loss": 0.8922, "step": 5283 }, { "epoch": 0.7066060443968976, "grad_norm": 1.0863420963287354, "learning_rate": 1.786566666568472e-05, "loss": 0.9344, "step": 5284 }, { "epoch": 0.7067397699919765, "grad_norm": 1.0758394002914429, "learning_rate": 1.7864775062421924e-05, "loss": 1.0502, "step": 5285 }, { "epoch": 0.7068734955870554, "grad_norm": 1.0227526426315308, "learning_rate": 1.7863883295225423e-05, "loss": 1.0557, "step": 5286 }, { "epoch": 0.7070072211821342, "grad_norm": 1.0228816270828247, "learning_rate": 1.78629913641138e-05, "loss": 0.9861, "step": 5287 }, { "epoch": 0.7071409467772132, "grad_norm": 1.1481306552886963, "learning_rate": 1.7862099269105644e-05, "loss": 0.9826, "step": 5288 }, { "epoch": 0.707274672372292, "grad_norm": 1.1520885229110718, "learning_rate": 1.786120701021955e-05, "loss": 0.9349, "step": 5289 }, { "epoch": 0.707408397967371, "grad_norm": 1.0344934463500977, "learning_rate": 1.7860314587474125e-05, "loss": 0.8703, "step": 5290 }, { "epoch": 0.7075421235624498, "grad_norm": 1.1576783657073975, "learning_rate": 1.785942200088796e-05, "loss": 0.8574, "step": 5291 }, { "epoch": 0.7076758491575288, "grad_norm": 1.1413007974624634, "learning_rate": 1.785852925047966e-05, "loss": 1.0546, "step": 5292 }, { "epoch": 0.7078095747526076, "grad_norm": 1.1409422159194946, "learning_rate": 1.7857636336267843e-05, "loss": 0.9736, "step": 5293 }, { "epoch": 0.7079433003476866, "grad_norm": 1.0932285785675049, "learning_rate": 1.7856743258271115e-05, "loss": 1.0161, "step": 5294 }, { "epoch": 0.7080770259427654, "grad_norm": 1.1391288042068481, "learning_rate": 1.785585001650809e-05, "loss": 0.9781, "step": 5295 }, { "epoch": 0.7082107515378443, "grad_norm": 1.0212510824203491, "learning_rate": 1.7854956610997388e-05, "loss": 0.9149, "step": 5296 }, { "epoch": 0.7083444771329233, "grad_norm": 1.2093931436538696, "learning_rate": 1.7854063041757635e-05, "loss": 1.0497, "step": 5297 }, { "epoch": 0.7084782027280021, "grad_norm": 1.082269549369812, "learning_rate": 1.785316930880745e-05, "loss": 1.0709, "step": 5298 }, { "epoch": 0.7086119283230811, "grad_norm": 0.9924930930137634, "learning_rate": 1.7852275412165467e-05, "loss": 0.964, "step": 5299 }, { "epoch": 0.7087456539181599, "grad_norm": 1.0674864053726196, "learning_rate": 1.7851381351850318e-05, "loss": 0.9801, "step": 5300 }, { "epoch": 0.7088793795132389, "grad_norm": 1.0504636764526367, "learning_rate": 1.7850487127880636e-05, "loss": 0.9648, "step": 5301 }, { "epoch": 0.7090131051083177, "grad_norm": 1.0514013767242432, "learning_rate": 1.7849592740275063e-05, "loss": 0.9881, "step": 5302 }, { "epoch": 0.7091468307033967, "grad_norm": 1.1882227659225464, "learning_rate": 1.784869818905224e-05, "loss": 0.9545, "step": 5303 }, { "epoch": 0.7092805562984755, "grad_norm": 1.171319842338562, "learning_rate": 1.7847803474230813e-05, "loss": 1.0266, "step": 5304 }, { "epoch": 0.7094142818935544, "grad_norm": 1.018519639968872, "learning_rate": 1.7846908595829432e-05, "loss": 0.9881, "step": 5305 }, { "epoch": 0.7095480074886333, "grad_norm": 1.0081459283828735, "learning_rate": 1.7846013553866754e-05, "loss": 0.8423, "step": 5306 }, { "epoch": 0.7096817330837122, "grad_norm": 1.0839706659317017, "learning_rate": 1.7845118348361428e-05, "loss": 0.9642, "step": 5307 }, { "epoch": 0.7098154586787911, "grad_norm": 0.9726243615150452, "learning_rate": 1.7844222979332115e-05, "loss": 0.7332, "step": 5308 }, { "epoch": 0.70994918427387, "grad_norm": 1.054402470588684, "learning_rate": 1.7843327446797482e-05, "loss": 0.9754, "step": 5309 }, { "epoch": 0.710082909868949, "grad_norm": 1.0407793521881104, "learning_rate": 1.7842431750776196e-05, "loss": 0.9681, "step": 5310 }, { "epoch": 0.7102166354640278, "grad_norm": 0.9815563559532166, "learning_rate": 1.784153589128692e-05, "loss": 0.9874, "step": 5311 }, { "epoch": 0.7103503610591068, "grad_norm": 1.109031081199646, "learning_rate": 1.7840639868348338e-05, "loss": 1.008, "step": 5312 }, { "epoch": 0.7104840866541856, "grad_norm": 1.0666192770004272, "learning_rate": 1.7839743681979117e-05, "loss": 1.0199, "step": 5313 }, { "epoch": 0.7106178122492645, "grad_norm": 1.0544461011886597, "learning_rate": 1.783884733219794e-05, "loss": 0.8564, "step": 5314 }, { "epoch": 0.7107515378443434, "grad_norm": 0.9892165064811707, "learning_rate": 1.783795081902349e-05, "loss": 0.9478, "step": 5315 }, { "epoch": 0.7108852634394223, "grad_norm": 0.9916752576828003, "learning_rate": 1.783705414247446e-05, "loss": 0.8816, "step": 5316 }, { "epoch": 0.7110189890345012, "grad_norm": 1.0418808460235596, "learning_rate": 1.783615730256953e-05, "loss": 0.9885, "step": 5317 }, { "epoch": 0.7111527146295801, "grad_norm": 1.0031366348266602, "learning_rate": 1.7835260299327402e-05, "loss": 0.9534, "step": 5318 }, { "epoch": 0.711286440224659, "grad_norm": 1.0235954523086548, "learning_rate": 1.7834363132766772e-05, "loss": 0.9269, "step": 5319 }, { "epoch": 0.7114201658197379, "grad_norm": 1.0455982685089111, "learning_rate": 1.7833465802906338e-05, "loss": 1.0242, "step": 5320 }, { "epoch": 0.7115538914148168, "grad_norm": 1.2224328517913818, "learning_rate": 1.7832568309764802e-05, "loss": 0.9916, "step": 5321 }, { "epoch": 0.7116876170098957, "grad_norm": 0.9905663728713989, "learning_rate": 1.783167065336088e-05, "loss": 0.9772, "step": 5322 }, { "epoch": 0.7118213426049745, "grad_norm": 0.9096208810806274, "learning_rate": 1.7830772833713275e-05, "loss": 0.9369, "step": 5323 }, { "epoch": 0.7119550682000535, "grad_norm": 1.181073546409607, "learning_rate": 1.7829874850840705e-05, "loss": 1.0427, "step": 5324 }, { "epoch": 0.7120887937951323, "grad_norm": 1.0163829326629639, "learning_rate": 1.7828976704761884e-05, "loss": 0.9686, "step": 5325 }, { "epoch": 0.7122225193902113, "grad_norm": 1.2507660388946533, "learning_rate": 1.7828078395495536e-05, "loss": 0.8775, "step": 5326 }, { "epoch": 0.7123562449852902, "grad_norm": 1.048471212387085, "learning_rate": 1.7827179923060382e-05, "loss": 0.946, "step": 5327 }, { "epoch": 0.7124899705803691, "grad_norm": 1.0272212028503418, "learning_rate": 1.782628128747516e-05, "loss": 0.9341, "step": 5328 }, { "epoch": 0.712623696175448, "grad_norm": 1.1031184196472168, "learning_rate": 1.7825382488758585e-05, "loss": 1.0057, "step": 5329 }, { "epoch": 0.7127574217705269, "grad_norm": 1.1085314750671387, "learning_rate": 1.7824483526929403e-05, "loss": 1.1132, "step": 5330 }, { "epoch": 0.7128911473656058, "grad_norm": 1.0439192056655884, "learning_rate": 1.782358440200635e-05, "loss": 1.0181, "step": 5331 }, { "epoch": 0.7130248729606847, "grad_norm": 1.0995310544967651, "learning_rate": 1.782268511400817e-05, "loss": 1.0269, "step": 5332 }, { "epoch": 0.7131585985557636, "grad_norm": 1.021683692932129, "learning_rate": 1.7821785662953597e-05, "loss": 0.9717, "step": 5333 }, { "epoch": 0.7132923241508424, "grad_norm": 1.1692471504211426, "learning_rate": 1.782088604886139e-05, "loss": 0.9467, "step": 5334 }, { "epoch": 0.7134260497459214, "grad_norm": 1.189568281173706, "learning_rate": 1.7819986271750295e-05, "loss": 1.0362, "step": 5335 }, { "epoch": 0.7135597753410002, "grad_norm": 1.0767238140106201, "learning_rate": 1.781908633163907e-05, "loss": 0.8939, "step": 5336 }, { "epoch": 0.7136935009360792, "grad_norm": 0.966705858707428, "learning_rate": 1.7818186228546474e-05, "loss": 0.8912, "step": 5337 }, { "epoch": 0.713827226531158, "grad_norm": 1.1073014736175537, "learning_rate": 1.7817285962491268e-05, "loss": 0.8977, "step": 5338 }, { "epoch": 0.713960952126237, "grad_norm": 1.1901623010635376, "learning_rate": 1.7816385533492213e-05, "loss": 0.9191, "step": 5339 }, { "epoch": 0.7140946777213159, "grad_norm": 1.0701591968536377, "learning_rate": 1.7815484941568084e-05, "loss": 0.9866, "step": 5340 }, { "epoch": 0.7142284033163948, "grad_norm": 0.9914907813072205, "learning_rate": 1.781458418673765e-05, "loss": 0.9453, "step": 5341 }, { "epoch": 0.7143621289114737, "grad_norm": 1.0258045196533203, "learning_rate": 1.7813683269019682e-05, "loss": 0.9324, "step": 5342 }, { "epoch": 0.7144958545065525, "grad_norm": 0.9813135266304016, "learning_rate": 1.781278218843297e-05, "loss": 0.8608, "step": 5343 }, { "epoch": 0.7146295801016315, "grad_norm": 0.950508713722229, "learning_rate": 1.7811880944996285e-05, "loss": 0.9924, "step": 5344 }, { "epoch": 0.7147633056967103, "grad_norm": 1.1717063188552856, "learning_rate": 1.7810979538728416e-05, "loss": 1.0356, "step": 5345 }, { "epoch": 0.7148970312917893, "grad_norm": 1.1714346408843994, "learning_rate": 1.7810077969648157e-05, "loss": 1.0761, "step": 5346 }, { "epoch": 0.7150307568868681, "grad_norm": 1.1618902683258057, "learning_rate": 1.780917623777429e-05, "loss": 1.1361, "step": 5347 }, { "epoch": 0.7151644824819471, "grad_norm": 1.1420725584030151, "learning_rate": 1.7808274343125626e-05, "loss": 0.932, "step": 5348 }, { "epoch": 0.7152982080770259, "grad_norm": 1.1327266693115234, "learning_rate": 1.7807372285720945e-05, "loss": 0.936, "step": 5349 }, { "epoch": 0.7154319336721049, "grad_norm": 1.107387900352478, "learning_rate": 1.7806470065579064e-05, "loss": 1.022, "step": 5350 }, { "epoch": 0.7155656592671837, "grad_norm": 1.0707104206085205, "learning_rate": 1.7805567682718785e-05, "loss": 0.8787, "step": 5351 }, { "epoch": 0.7156993848622626, "grad_norm": 1.0453429222106934, "learning_rate": 1.7804665137158917e-05, "loss": 0.9422, "step": 5352 }, { "epoch": 0.7158331104573415, "grad_norm": 0.9811695218086243, "learning_rate": 1.780376242891827e-05, "loss": 0.86, "step": 5353 }, { "epoch": 0.7159668360524204, "grad_norm": 1.0117377042770386, "learning_rate": 1.7802859558015666e-05, "loss": 0.9357, "step": 5354 }, { "epoch": 0.7161005616474994, "grad_norm": 1.071099042892456, "learning_rate": 1.7801956524469922e-05, "loss": 0.9805, "step": 5355 }, { "epoch": 0.7162342872425782, "grad_norm": 1.0444166660308838, "learning_rate": 1.7801053328299856e-05, "loss": 0.9908, "step": 5356 }, { "epoch": 0.7163680128376572, "grad_norm": 1.1647387742996216, "learning_rate": 1.78001499695243e-05, "loss": 0.9765, "step": 5357 }, { "epoch": 0.716501738432736, "grad_norm": 1.1209625005722046, "learning_rate": 1.779924644816208e-05, "loss": 0.8907, "step": 5358 }, { "epoch": 0.716635464027815, "grad_norm": 1.054835319519043, "learning_rate": 1.779834276423203e-05, "loss": 0.9039, "step": 5359 }, { "epoch": 0.7167691896228938, "grad_norm": 0.9631587266921997, "learning_rate": 1.7797438917752992e-05, "loss": 0.8217, "step": 5360 }, { "epoch": 0.7169029152179727, "grad_norm": 1.1388700008392334, "learning_rate": 1.7796534908743798e-05, "loss": 0.9218, "step": 5361 }, { "epoch": 0.7170366408130516, "grad_norm": 1.0172324180603027, "learning_rate": 1.7795630737223296e-05, "loss": 0.9053, "step": 5362 }, { "epoch": 0.7171703664081305, "grad_norm": 1.015089511871338, "learning_rate": 1.7794726403210328e-05, "loss": 0.8661, "step": 5363 }, { "epoch": 0.7173040920032094, "grad_norm": 1.0246933698654175, "learning_rate": 1.779382190672375e-05, "loss": 0.8592, "step": 5364 }, { "epoch": 0.7174378175982883, "grad_norm": 1.292546272277832, "learning_rate": 1.779291724778241e-05, "loss": 0.8703, "step": 5365 }, { "epoch": 0.7175715431933672, "grad_norm": 1.070896863937378, "learning_rate": 1.779201242640517e-05, "loss": 1.0226, "step": 5366 }, { "epoch": 0.7177052687884461, "grad_norm": 1.0165013074874878, "learning_rate": 1.7791107442610886e-05, "loss": 0.9088, "step": 5367 }, { "epoch": 0.717838994383525, "grad_norm": 1.0338480472564697, "learning_rate": 1.779020229641842e-05, "loss": 1.0075, "step": 5368 }, { "epoch": 0.7179727199786039, "grad_norm": 1.1418612003326416, "learning_rate": 1.7789296987846644e-05, "loss": 1.0456, "step": 5369 }, { "epoch": 0.7181064455736828, "grad_norm": 1.0352901220321655, "learning_rate": 1.7788391516914422e-05, "loss": 0.8802, "step": 5370 }, { "epoch": 0.7182401711687617, "grad_norm": 1.0773141384124756, "learning_rate": 1.7787485883640635e-05, "loss": 0.9889, "step": 5371 }, { "epoch": 0.7183738967638406, "grad_norm": 1.1402558088302612, "learning_rate": 1.7786580088044157e-05, "loss": 0.9228, "step": 5372 }, { "epoch": 0.7185076223589195, "grad_norm": 1.1984896659851074, "learning_rate": 1.7785674130143865e-05, "loss": 1.1222, "step": 5373 }, { "epoch": 0.7186413479539984, "grad_norm": 0.9233139753341675, "learning_rate": 1.778476800995865e-05, "loss": 0.8696, "step": 5374 }, { "epoch": 0.7187750735490773, "grad_norm": 1.0708703994750977, "learning_rate": 1.7783861727507394e-05, "loss": 0.9305, "step": 5375 }, { "epoch": 0.7189087991441562, "grad_norm": 1.2617658376693726, "learning_rate": 1.7782955282808986e-05, "loss": 1.0838, "step": 5376 }, { "epoch": 0.7190425247392351, "grad_norm": 1.1590847969055176, "learning_rate": 1.7782048675882325e-05, "loss": 0.8672, "step": 5377 }, { "epoch": 0.719176250334314, "grad_norm": 1.036059021949768, "learning_rate": 1.7781141906746304e-05, "loss": 0.7874, "step": 5378 }, { "epoch": 0.7193099759293928, "grad_norm": 1.0484755039215088, "learning_rate": 1.7780234975419828e-05, "loss": 0.8291, "step": 5379 }, { "epoch": 0.7194437015244718, "grad_norm": 1.2048934698104858, "learning_rate": 1.77793278819218e-05, "loss": 1.0946, "step": 5380 }, { "epoch": 0.7195774271195506, "grad_norm": 1.1381714344024658, "learning_rate": 1.7778420626271123e-05, "loss": 0.9157, "step": 5381 }, { "epoch": 0.7197111527146296, "grad_norm": 1.1251357793807983, "learning_rate": 1.777751320848671e-05, "loss": 0.9915, "step": 5382 }, { "epoch": 0.7198448783097084, "grad_norm": 1.180052638053894, "learning_rate": 1.777660562858748e-05, "loss": 1.0945, "step": 5383 }, { "epoch": 0.7199786039047874, "grad_norm": 1.0401805639266968, "learning_rate": 1.7775697886592345e-05, "loss": 0.9261, "step": 5384 }, { "epoch": 0.7201123294998663, "grad_norm": 1.0714852809906006, "learning_rate": 1.777478998252023e-05, "loss": 0.9992, "step": 5385 }, { "epoch": 0.7202460550949452, "grad_norm": 1.098952054977417, "learning_rate": 1.7773881916390056e-05, "loss": 0.9417, "step": 5386 }, { "epoch": 0.7203797806900241, "grad_norm": 1.1172902584075928, "learning_rate": 1.777297368822075e-05, "loss": 0.9151, "step": 5387 }, { "epoch": 0.720513506285103, "grad_norm": 1.043253779411316, "learning_rate": 1.777206529803125e-05, "loss": 0.9418, "step": 5388 }, { "epoch": 0.7206472318801819, "grad_norm": 0.9360518455505371, "learning_rate": 1.7771156745840482e-05, "loss": 0.8409, "step": 5389 }, { "epoch": 0.7207809574752607, "grad_norm": 0.9903491139411926, "learning_rate": 1.777024803166739e-05, "loss": 0.9553, "step": 5390 }, { "epoch": 0.7209146830703397, "grad_norm": 1.061397910118103, "learning_rate": 1.7769339155530915e-05, "loss": 0.9157, "step": 5391 }, { "epoch": 0.7210484086654185, "grad_norm": 1.0103857517242432, "learning_rate": 1.7768430117449998e-05, "loss": 0.8587, "step": 5392 }, { "epoch": 0.7211821342604975, "grad_norm": 1.0836666822433472, "learning_rate": 1.7767520917443584e-05, "loss": 1.046, "step": 5393 }, { "epoch": 0.7213158598555763, "grad_norm": 1.131263017654419, "learning_rate": 1.7766611555530638e-05, "loss": 1.0568, "step": 5394 }, { "epoch": 0.7214495854506553, "grad_norm": 0.9226694107055664, "learning_rate": 1.7765702031730102e-05, "loss": 0.8317, "step": 5395 }, { "epoch": 0.7215833110457341, "grad_norm": 1.1343775987625122, "learning_rate": 1.7764792346060936e-05, "loss": 0.9089, "step": 5396 }, { "epoch": 0.7217170366408131, "grad_norm": 1.0138285160064697, "learning_rate": 1.7763882498542104e-05, "loss": 0.9279, "step": 5397 }, { "epoch": 0.721850762235892, "grad_norm": 1.101556658744812, "learning_rate": 1.7762972489192575e-05, "loss": 1.0081, "step": 5398 }, { "epoch": 0.7219844878309708, "grad_norm": 1.0840650796890259, "learning_rate": 1.7762062318031307e-05, "loss": 0.801, "step": 5399 }, { "epoch": 0.7221182134260498, "grad_norm": 1.1196093559265137, "learning_rate": 1.776115198507728e-05, "loss": 0.9397, "step": 5400 }, { "epoch": 0.7222519390211286, "grad_norm": 1.0707428455352783, "learning_rate": 1.776024149034947e-05, "loss": 0.9643, "step": 5401 }, { "epoch": 0.7223856646162076, "grad_norm": 1.1441192626953125, "learning_rate": 1.7759330833866847e-05, "loss": 0.9521, "step": 5402 }, { "epoch": 0.7225193902112864, "grad_norm": 1.0849087238311768, "learning_rate": 1.77584200156484e-05, "loss": 0.927, "step": 5403 }, { "epoch": 0.7226531158063654, "grad_norm": 0.9749464392662048, "learning_rate": 1.7757509035713107e-05, "loss": 0.8853, "step": 5404 }, { "epoch": 0.7227868414014442, "grad_norm": 0.947208046913147, "learning_rate": 1.7756597894079966e-05, "loss": 0.8962, "step": 5405 }, { "epoch": 0.7229205669965232, "grad_norm": 1.1464723348617554, "learning_rate": 1.7755686590767962e-05, "loss": 0.9572, "step": 5406 }, { "epoch": 0.723054292591602, "grad_norm": 1.189192533493042, "learning_rate": 1.7754775125796095e-05, "loss": 0.9651, "step": 5407 }, { "epoch": 0.7231880181866809, "grad_norm": 1.0269020795822144, "learning_rate": 1.7753863499183358e-05, "loss": 0.8988, "step": 5408 }, { "epoch": 0.7233217437817598, "grad_norm": 1.1524895429611206, "learning_rate": 1.775295171094876e-05, "loss": 1.2092, "step": 5409 }, { "epoch": 0.7234554693768387, "grad_norm": 1.0381126403808594, "learning_rate": 1.77520397611113e-05, "loss": 0.9952, "step": 5410 }, { "epoch": 0.7235891949719176, "grad_norm": 1.070483922958374, "learning_rate": 1.775112764968999e-05, "loss": 0.859, "step": 5411 }, { "epoch": 0.7237229205669965, "grad_norm": 1.022913932800293, "learning_rate": 1.775021537670384e-05, "loss": 0.7976, "step": 5412 }, { "epoch": 0.7238566461620755, "grad_norm": 1.089581847190857, "learning_rate": 1.7749302942171866e-05, "loss": 0.99, "step": 5413 }, { "epoch": 0.7239903717571543, "grad_norm": 1.1272262334823608, "learning_rate": 1.7748390346113085e-05, "loss": 1.0403, "step": 5414 }, { "epoch": 0.7241240973522333, "grad_norm": 1.1359671354293823, "learning_rate": 1.7747477588546528e-05, "loss": 0.9009, "step": 5415 }, { "epoch": 0.7242578229473121, "grad_norm": 1.015596866607666, "learning_rate": 1.774656466949121e-05, "loss": 0.8761, "step": 5416 }, { "epoch": 0.724391548542391, "grad_norm": 1.0954852104187012, "learning_rate": 1.7745651588966167e-05, "loss": 0.9472, "step": 5417 }, { "epoch": 0.7245252741374699, "grad_norm": 1.0864711999893188, "learning_rate": 1.7744738346990425e-05, "loss": 0.7797, "step": 5418 }, { "epoch": 0.7246589997325488, "grad_norm": 1.0508103370666504, "learning_rate": 1.7743824943583028e-05, "loss": 0.9695, "step": 5419 }, { "epoch": 0.7247927253276277, "grad_norm": 1.115043044090271, "learning_rate": 1.7742911378763006e-05, "loss": 0.9365, "step": 5420 }, { "epoch": 0.7249264509227066, "grad_norm": 1.0312976837158203, "learning_rate": 1.7741997652549408e-05, "loss": 1.0612, "step": 5421 }, { "epoch": 0.7250601765177855, "grad_norm": 1.1387337446212769, "learning_rate": 1.7741083764961274e-05, "loss": 0.9425, "step": 5422 }, { "epoch": 0.7251939021128644, "grad_norm": 1.0692471265792847, "learning_rate": 1.774016971601766e-05, "loss": 0.8531, "step": 5423 }, { "epoch": 0.7253276277079433, "grad_norm": 0.9859254956245422, "learning_rate": 1.773925550573761e-05, "loss": 0.9036, "step": 5424 }, { "epoch": 0.7254613533030222, "grad_norm": 1.1124447584152222, "learning_rate": 1.7738341134140188e-05, "loss": 0.9257, "step": 5425 }, { "epoch": 0.725595078898101, "grad_norm": 1.0326091051101685, "learning_rate": 1.773742660124445e-05, "loss": 1.0117, "step": 5426 }, { "epoch": 0.72572880449318, "grad_norm": 0.9975651502609253, "learning_rate": 1.7736511907069455e-05, "loss": 0.9069, "step": 5427 }, { "epoch": 0.7258625300882589, "grad_norm": 1.1344283819198608, "learning_rate": 1.7735597051634277e-05, "loss": 1.0254, "step": 5428 }, { "epoch": 0.7259962556833378, "grad_norm": 1.1387197971343994, "learning_rate": 1.773468203495798e-05, "loss": 0.8987, "step": 5429 }, { "epoch": 0.7261299812784167, "grad_norm": 1.1215769052505493, "learning_rate": 1.7733766857059635e-05, "loss": 1.008, "step": 5430 }, { "epoch": 0.7262637068734956, "grad_norm": 1.0588525533676147, "learning_rate": 1.773285151795832e-05, "loss": 0.8995, "step": 5431 }, { "epoch": 0.7263974324685745, "grad_norm": 1.140607476234436, "learning_rate": 1.7731936017673116e-05, "loss": 1.0114, "step": 5432 }, { "epoch": 0.7265311580636534, "grad_norm": 1.0446076393127441, "learning_rate": 1.7731020356223102e-05, "loss": 1.0907, "step": 5433 }, { "epoch": 0.7266648836587323, "grad_norm": 1.117741346359253, "learning_rate": 1.773010453362737e-05, "loss": 1.0299, "step": 5434 }, { "epoch": 0.7267986092538112, "grad_norm": 1.116154432296753, "learning_rate": 1.7729188549905004e-05, "loss": 1.012, "step": 5435 }, { "epoch": 0.7269323348488901, "grad_norm": 1.1451771259307861, "learning_rate": 1.77282724050751e-05, "loss": 1.0374, "step": 5436 }, { "epoch": 0.7270660604439689, "grad_norm": 1.016891360282898, "learning_rate": 1.7727356099156755e-05, "loss": 0.8703, "step": 5437 }, { "epoch": 0.7271997860390479, "grad_norm": 1.0598512887954712, "learning_rate": 1.7726439632169064e-05, "loss": 0.964, "step": 5438 }, { "epoch": 0.7273335116341267, "grad_norm": 1.020731806755066, "learning_rate": 1.772552300413113e-05, "loss": 0.8849, "step": 5439 }, { "epoch": 0.7274672372292057, "grad_norm": 1.1210649013519287, "learning_rate": 1.7724606215062065e-05, "loss": 0.9839, "step": 5440 }, { "epoch": 0.7276009628242845, "grad_norm": 1.1568015813827515, "learning_rate": 1.7723689264980974e-05, "loss": 0.9993, "step": 5441 }, { "epoch": 0.7277346884193635, "grad_norm": 1.0551351308822632, "learning_rate": 1.772277215390697e-05, "loss": 0.9102, "step": 5442 }, { "epoch": 0.7278684140144424, "grad_norm": 1.0488078594207764, "learning_rate": 1.7721854881859166e-05, "loss": 1.0048, "step": 5443 }, { "epoch": 0.7280021396095213, "grad_norm": 1.0173295736312866, "learning_rate": 1.7720937448856694e-05, "loss": 0.875, "step": 5444 }, { "epoch": 0.7281358652046002, "grad_norm": 1.01760995388031, "learning_rate": 1.7720019854918663e-05, "loss": 0.9178, "step": 5445 }, { "epoch": 0.728269590799679, "grad_norm": 1.0633618831634521, "learning_rate": 1.771910210006421e-05, "loss": 0.9401, "step": 5446 }, { "epoch": 0.728403316394758, "grad_norm": 0.9810612201690674, "learning_rate": 1.771818418431246e-05, "loss": 0.933, "step": 5447 }, { "epoch": 0.7285370419898368, "grad_norm": 1.0796051025390625, "learning_rate": 1.7717266107682544e-05, "loss": 1.0128, "step": 5448 }, { "epoch": 0.7286707675849158, "grad_norm": 1.2071588039398193, "learning_rate": 1.77163478701936e-05, "loss": 1.1054, "step": 5449 }, { "epoch": 0.7288044931799946, "grad_norm": 1.03304123878479, "learning_rate": 1.7715429471864768e-05, "loss": 1.023, "step": 5450 }, { "epoch": 0.7289382187750736, "grad_norm": 1.0942498445510864, "learning_rate": 1.7714510912715194e-05, "loss": 0.8822, "step": 5451 }, { "epoch": 0.7290719443701524, "grad_norm": 0.954436182975769, "learning_rate": 1.771359219276402e-05, "loss": 0.92, "step": 5452 }, { "epoch": 0.7292056699652314, "grad_norm": 1.010201096534729, "learning_rate": 1.77126733120304e-05, "loss": 0.8816, "step": 5453 }, { "epoch": 0.7293393955603102, "grad_norm": 0.9629737138748169, "learning_rate": 1.7711754270533483e-05, "loss": 0.9224, "step": 5454 }, { "epoch": 0.7294731211553891, "grad_norm": 1.0090998411178589, "learning_rate": 1.771083506829243e-05, "loss": 0.8518, "step": 5455 }, { "epoch": 0.729606846750468, "grad_norm": 0.9697344899177551, "learning_rate": 1.7709915705326394e-05, "loss": 0.8565, "step": 5456 }, { "epoch": 0.7297405723455469, "grad_norm": 1.096519947052002, "learning_rate": 1.770899618165455e-05, "loss": 0.9162, "step": 5457 }, { "epoch": 0.7298742979406259, "grad_norm": 1.0003653764724731, "learning_rate": 1.770807649729605e-05, "loss": 0.8868, "step": 5458 }, { "epoch": 0.7300080235357047, "grad_norm": 1.062525749206543, "learning_rate": 1.7707156652270076e-05, "loss": 0.9921, "step": 5459 }, { "epoch": 0.7301417491307837, "grad_norm": 1.144569754600525, "learning_rate": 1.7706236646595792e-05, "loss": 0.9239, "step": 5460 }, { "epoch": 0.7302754747258625, "grad_norm": 1.0911624431610107, "learning_rate": 1.7705316480292386e-05, "loss": 0.8827, "step": 5461 }, { "epoch": 0.7304092003209415, "grad_norm": 1.1237787008285522, "learning_rate": 1.7704396153379024e-05, "loss": 0.9305, "step": 5462 }, { "epoch": 0.7305429259160203, "grad_norm": 1.0386147499084473, "learning_rate": 1.77034756658749e-05, "loss": 0.9271, "step": 5463 }, { "epoch": 0.7306766515110992, "grad_norm": 1.1341667175292969, "learning_rate": 1.7702555017799197e-05, "loss": 0.8147, "step": 5464 }, { "epoch": 0.7308103771061781, "grad_norm": 1.025303602218628, "learning_rate": 1.7701634209171103e-05, "loss": 0.8925, "step": 5465 }, { "epoch": 0.730944102701257, "grad_norm": 1.1619781255722046, "learning_rate": 1.770071324000982e-05, "loss": 0.9704, "step": 5466 }, { "epoch": 0.7310778282963359, "grad_norm": 0.9426234364509583, "learning_rate": 1.769979211033453e-05, "loss": 0.8628, "step": 5467 }, { "epoch": 0.7312115538914148, "grad_norm": 1.0559861660003662, "learning_rate": 1.7698870820164448e-05, "loss": 1.0462, "step": 5468 }, { "epoch": 0.7313452794864937, "grad_norm": 0.9688773155212402, "learning_rate": 1.7697949369518766e-05, "loss": 0.7941, "step": 5469 }, { "epoch": 0.7314790050815726, "grad_norm": 1.1188685894012451, "learning_rate": 1.76970277584167e-05, "loss": 0.9583, "step": 5470 }, { "epoch": 0.7316127306766516, "grad_norm": 1.0497543811798096, "learning_rate": 1.769610598687745e-05, "loss": 1.0556, "step": 5471 }, { "epoch": 0.7317464562717304, "grad_norm": 1.0324809551239014, "learning_rate": 1.7695184054920236e-05, "loss": 0.9078, "step": 5472 }, { "epoch": 0.7318801818668093, "grad_norm": 1.0529309511184692, "learning_rate": 1.7694261962564278e-05, "loss": 0.969, "step": 5473 }, { "epoch": 0.7320139074618882, "grad_norm": 1.1453649997711182, "learning_rate": 1.769333970982879e-05, "loss": 0.9219, "step": 5474 }, { "epoch": 0.7321476330569671, "grad_norm": 1.103806734085083, "learning_rate": 1.7692417296733e-05, "loss": 1.0451, "step": 5475 }, { "epoch": 0.732281358652046, "grad_norm": 1.2688848972320557, "learning_rate": 1.769149472329613e-05, "loss": 0.9989, "step": 5476 }, { "epoch": 0.7324150842471249, "grad_norm": 1.1294771432876587, "learning_rate": 1.769057198953741e-05, "loss": 1.0707, "step": 5477 }, { "epoch": 0.7325488098422038, "grad_norm": 1.0375664234161377, "learning_rate": 1.7689649095476078e-05, "loss": 0.9184, "step": 5478 }, { "epoch": 0.7326825354372827, "grad_norm": 1.0189743041992188, "learning_rate": 1.768872604113137e-05, "loss": 0.9285, "step": 5479 }, { "epoch": 0.7328162610323616, "grad_norm": 1.1088390350341797, "learning_rate": 1.7687802826522525e-05, "loss": 1.014, "step": 5480 }, { "epoch": 0.7329499866274405, "grad_norm": 0.9751871824264526, "learning_rate": 1.7686879451668783e-05, "loss": 0.8401, "step": 5481 }, { "epoch": 0.7330837122225193, "grad_norm": 1.022199273109436, "learning_rate": 1.7685955916589396e-05, "loss": 0.8735, "step": 5482 }, { "epoch": 0.7332174378175983, "grad_norm": 1.0358741283416748, "learning_rate": 1.7685032221303616e-05, "loss": 0.9189, "step": 5483 }, { "epoch": 0.7333511634126771, "grad_norm": 1.0660679340362549, "learning_rate": 1.768410836583069e-05, "loss": 0.9417, "step": 5484 }, { "epoch": 0.7334848890077561, "grad_norm": 0.9852597713470459, "learning_rate": 1.7683184350189878e-05, "loss": 0.9258, "step": 5485 }, { "epoch": 0.733618614602835, "grad_norm": 0.9773516654968262, "learning_rate": 1.768226017440044e-05, "loss": 0.9559, "step": 5486 }, { "epoch": 0.7337523401979139, "grad_norm": 1.1555254459381104, "learning_rate": 1.768133583848164e-05, "loss": 1.011, "step": 5487 }, { "epoch": 0.7338860657929928, "grad_norm": 1.1057606935501099, "learning_rate": 1.768041134245275e-05, "loss": 1.0207, "step": 5488 }, { "epoch": 0.7340197913880717, "grad_norm": 1.0660011768341064, "learning_rate": 1.7679486686333027e-05, "loss": 1.0686, "step": 5489 }, { "epoch": 0.7341535169831506, "grad_norm": 1.104441523551941, "learning_rate": 1.7678561870141755e-05, "loss": 0.912, "step": 5490 }, { "epoch": 0.7342872425782295, "grad_norm": 1.0470383167266846, "learning_rate": 1.767763689389821e-05, "loss": 0.9346, "step": 5491 }, { "epoch": 0.7344209681733084, "grad_norm": 1.101184606552124, "learning_rate": 1.767671175762167e-05, "loss": 1.0052, "step": 5492 }, { "epoch": 0.7345546937683872, "grad_norm": 1.0381447076797485, "learning_rate": 1.767578646133142e-05, "loss": 0.9898, "step": 5493 }, { "epoch": 0.7346884193634662, "grad_norm": 1.0679866075515747, "learning_rate": 1.7674861005046743e-05, "loss": 1.0527, "step": 5494 }, { "epoch": 0.734822144958545, "grad_norm": 0.9806519746780396, "learning_rate": 1.7673935388786936e-05, "loss": 0.9514, "step": 5495 }, { "epoch": 0.734955870553624, "grad_norm": 0.9385021328926086, "learning_rate": 1.767300961257129e-05, "loss": 0.9131, "step": 5496 }, { "epoch": 0.7350895961487028, "grad_norm": 1.114537000656128, "learning_rate": 1.7672083676419095e-05, "loss": 0.9469, "step": 5497 }, { "epoch": 0.7352233217437818, "grad_norm": 1.0891109704971313, "learning_rate": 1.767115758034966e-05, "loss": 0.9819, "step": 5498 }, { "epoch": 0.7353570473388606, "grad_norm": 1.0426448583602905, "learning_rate": 1.767023132438229e-05, "loss": 1.0279, "step": 5499 }, { "epoch": 0.7354907729339396, "grad_norm": 0.9964267611503601, "learning_rate": 1.766930490853628e-05, "loss": 0.8487, "step": 5500 }, { "epoch": 0.7356244985290185, "grad_norm": 1.0381603240966797, "learning_rate": 1.7668378332830953e-05, "loss": 0.9124, "step": 5501 }, { "epoch": 0.7357582241240973, "grad_norm": 0.9481689929962158, "learning_rate": 1.7667451597285617e-05, "loss": 0.8301, "step": 5502 }, { "epoch": 0.7358919497191763, "grad_norm": 1.0289973020553589, "learning_rate": 1.7666524701919588e-05, "loss": 0.8151, "step": 5503 }, { "epoch": 0.7360256753142551, "grad_norm": 1.0425347089767456, "learning_rate": 1.7665597646752187e-05, "loss": 0.894, "step": 5504 }, { "epoch": 0.7361594009093341, "grad_norm": 1.006659746170044, "learning_rate": 1.766467043180274e-05, "loss": 0.9095, "step": 5505 }, { "epoch": 0.7362931265044129, "grad_norm": 1.0175583362579346, "learning_rate": 1.7663743057090572e-05, "loss": 0.9025, "step": 5506 }, { "epoch": 0.7364268520994919, "grad_norm": 1.0142004489898682, "learning_rate": 1.7662815522635016e-05, "loss": 0.8041, "step": 5507 }, { "epoch": 0.7365605776945707, "grad_norm": 1.0304288864135742, "learning_rate": 1.7661887828455396e-05, "loss": 0.928, "step": 5508 }, { "epoch": 0.7366943032896497, "grad_norm": 1.1089518070220947, "learning_rate": 1.7660959974571064e-05, "loss": 1.0912, "step": 5509 }, { "epoch": 0.7368280288847285, "grad_norm": 1.0991125106811523, "learning_rate": 1.7660031961001344e-05, "loss": 0.8898, "step": 5510 }, { "epoch": 0.7369617544798074, "grad_norm": 1.158766746520996, "learning_rate": 1.7659103787765594e-05, "loss": 1.1214, "step": 5511 }, { "epoch": 0.7370954800748863, "grad_norm": 1.1270241737365723, "learning_rate": 1.7658175454883152e-05, "loss": 0.964, "step": 5512 }, { "epoch": 0.7372292056699652, "grad_norm": 1.0338053703308105, "learning_rate": 1.765724696237337e-05, "loss": 1.036, "step": 5513 }, { "epoch": 0.7373629312650442, "grad_norm": 1.0214444398880005, "learning_rate": 1.7656318310255604e-05, "loss": 0.9089, "step": 5514 }, { "epoch": 0.737496656860123, "grad_norm": 1.1164906024932861, "learning_rate": 1.765538949854921e-05, "loss": 1.0408, "step": 5515 }, { "epoch": 0.737630382455202, "grad_norm": 1.0141122341156006, "learning_rate": 1.7654460527273543e-05, "loss": 0.8648, "step": 5516 }, { "epoch": 0.7377641080502808, "grad_norm": 1.1392110586166382, "learning_rate": 1.7653531396447975e-05, "loss": 1.0089, "step": 5517 }, { "epoch": 0.7378978336453598, "grad_norm": 1.0669268369674683, "learning_rate": 1.7652602106091866e-05, "loss": 0.9318, "step": 5518 }, { "epoch": 0.7380315592404386, "grad_norm": 1.0497102737426758, "learning_rate": 1.7651672656224592e-05, "loss": 0.9506, "step": 5519 }, { "epoch": 0.7381652848355175, "grad_norm": 1.0458214282989502, "learning_rate": 1.765074304686552e-05, "loss": 0.9395, "step": 5520 }, { "epoch": 0.7382990104305964, "grad_norm": 1.0274564027786255, "learning_rate": 1.7649813278034032e-05, "loss": 0.9141, "step": 5521 }, { "epoch": 0.7384327360256753, "grad_norm": 1.0003740787506104, "learning_rate": 1.7648883349749506e-05, "loss": 0.8309, "step": 5522 }, { "epoch": 0.7385664616207542, "grad_norm": 1.0978950262069702, "learning_rate": 1.7647953262031325e-05, "loss": 0.927, "step": 5523 }, { "epoch": 0.7387001872158331, "grad_norm": 1.028764009475708, "learning_rate": 1.7647023014898878e-05, "loss": 0.9315, "step": 5524 }, { "epoch": 0.738833912810912, "grad_norm": 1.088834285736084, "learning_rate": 1.7646092608371553e-05, "loss": 0.9202, "step": 5525 }, { "epoch": 0.7389676384059909, "grad_norm": 1.1014736890792847, "learning_rate": 1.7645162042468742e-05, "loss": 0.9356, "step": 5526 }, { "epoch": 0.7391013640010698, "grad_norm": 1.1460351943969727, "learning_rate": 1.764423131720985e-05, "loss": 0.9033, "step": 5527 }, { "epoch": 0.7392350895961487, "grad_norm": 1.1521360874176025, "learning_rate": 1.7643300432614262e-05, "loss": 0.8716, "step": 5528 }, { "epoch": 0.7393688151912275, "grad_norm": 0.9602109789848328, "learning_rate": 1.7642369388701394e-05, "loss": 0.8171, "step": 5529 }, { "epoch": 0.7395025407863065, "grad_norm": 1.0671948194503784, "learning_rate": 1.764143818549065e-05, "loss": 0.9885, "step": 5530 }, { "epoch": 0.7396362663813854, "grad_norm": 1.1693493127822876, "learning_rate": 1.764050682300144e-05, "loss": 1.0208, "step": 5531 }, { "epoch": 0.7397699919764643, "grad_norm": 1.0283278226852417, "learning_rate": 1.7639575301253174e-05, "loss": 0.9083, "step": 5532 }, { "epoch": 0.7399037175715432, "grad_norm": 1.1111806631088257, "learning_rate": 1.7638643620265275e-05, "loss": 0.9466, "step": 5533 }, { "epoch": 0.7400374431666221, "grad_norm": 1.0647213459014893, "learning_rate": 1.7637711780057157e-05, "loss": 0.856, "step": 5534 }, { "epoch": 0.740171168761701, "grad_norm": 1.0383224487304688, "learning_rate": 1.7636779780648244e-05, "loss": 0.9703, "step": 5535 }, { "epoch": 0.7403048943567799, "grad_norm": 1.2511208057403564, "learning_rate": 1.7635847622057967e-05, "loss": 0.9503, "step": 5536 }, { "epoch": 0.7404386199518588, "grad_norm": 0.9401532411575317, "learning_rate": 1.7634915304305752e-05, "loss": 0.8861, "step": 5537 }, { "epoch": 0.7405723455469377, "grad_norm": 1.1136353015899658, "learning_rate": 1.763398282741103e-05, "loss": 1.0631, "step": 5538 }, { "epoch": 0.7407060711420166, "grad_norm": 1.039443850517273, "learning_rate": 1.7633050191393243e-05, "loss": 0.9546, "step": 5539 }, { "epoch": 0.7408397967370954, "grad_norm": 1.177010416984558, "learning_rate": 1.763211739627183e-05, "loss": 1.0136, "step": 5540 }, { "epoch": 0.7409735223321744, "grad_norm": 1.1243691444396973, "learning_rate": 1.7631184442066232e-05, "loss": 0.946, "step": 5541 }, { "epoch": 0.7411072479272532, "grad_norm": 1.0923787355422974, "learning_rate": 1.76302513287959e-05, "loss": 0.9143, "step": 5542 }, { "epoch": 0.7412409735223322, "grad_norm": 1.1249938011169434, "learning_rate": 1.7629318056480276e-05, "loss": 1.0334, "step": 5543 }, { "epoch": 0.741374699117411, "grad_norm": 1.1163212060928345, "learning_rate": 1.7628384625138818e-05, "loss": 0.9399, "step": 5544 }, { "epoch": 0.74150842471249, "grad_norm": 0.9701418280601501, "learning_rate": 1.7627451034790983e-05, "loss": 0.7557, "step": 5545 }, { "epoch": 0.7416421503075689, "grad_norm": 1.0682822465896606, "learning_rate": 1.762651728545623e-05, "loss": 0.9188, "step": 5546 }, { "epoch": 0.7417758759026478, "grad_norm": 0.987820565700531, "learning_rate": 1.7625583377154023e-05, "loss": 0.9236, "step": 5547 }, { "epoch": 0.7419096014977267, "grad_norm": 1.1549816131591797, "learning_rate": 1.7624649309903824e-05, "loss": 1.0382, "step": 5548 }, { "epoch": 0.7420433270928055, "grad_norm": 1.1395118236541748, "learning_rate": 1.7623715083725107e-05, "loss": 0.8969, "step": 5549 }, { "epoch": 0.7421770526878845, "grad_norm": 1.0887000560760498, "learning_rate": 1.7622780698637348e-05, "loss": 0.8673, "step": 5550 }, { "epoch": 0.7423107782829633, "grad_norm": 1.0476871728897095, "learning_rate": 1.7621846154660017e-05, "loss": 0.9219, "step": 5551 }, { "epoch": 0.7424445038780423, "grad_norm": 0.9983686208724976, "learning_rate": 1.7620911451812595e-05, "loss": 0.9583, "step": 5552 }, { "epoch": 0.7425782294731211, "grad_norm": 1.0809341669082642, "learning_rate": 1.7619976590114568e-05, "loss": 0.9644, "step": 5553 }, { "epoch": 0.7427119550682001, "grad_norm": 1.088506817817688, "learning_rate": 1.761904156958542e-05, "loss": 1.0363, "step": 5554 }, { "epoch": 0.7428456806632789, "grad_norm": 0.9728800654411316, "learning_rate": 1.7618106390244643e-05, "loss": 0.8884, "step": 5555 }, { "epoch": 0.7429794062583579, "grad_norm": 1.0573627948760986, "learning_rate": 1.7617171052111722e-05, "loss": 0.9946, "step": 5556 }, { "epoch": 0.7431131318534367, "grad_norm": 1.1788753271102905, "learning_rate": 1.7616235555206165e-05, "loss": 0.9673, "step": 5557 }, { "epoch": 0.7432468574485156, "grad_norm": 1.0032631158828735, "learning_rate": 1.7615299899547466e-05, "loss": 0.8881, "step": 5558 }, { "epoch": 0.7433805830435946, "grad_norm": 1.1179721355438232, "learning_rate": 1.7614364085155126e-05, "loss": 0.8891, "step": 5559 }, { "epoch": 0.7435143086386734, "grad_norm": 1.1642725467681885, "learning_rate": 1.7613428112048652e-05, "loss": 1.0801, "step": 5560 }, { "epoch": 0.7436480342337524, "grad_norm": 1.1616088151931763, "learning_rate": 1.7612491980247553e-05, "loss": 0.9963, "step": 5561 }, { "epoch": 0.7437817598288312, "grad_norm": 1.0798288583755493, "learning_rate": 1.7611555689771346e-05, "loss": 0.9141, "step": 5562 }, { "epoch": 0.7439154854239102, "grad_norm": 1.0646347999572754, "learning_rate": 1.7610619240639545e-05, "loss": 0.9248, "step": 5563 }, { "epoch": 0.744049211018989, "grad_norm": 1.119341254234314, "learning_rate": 1.7609682632871664e-05, "loss": 0.7928, "step": 5564 }, { "epoch": 0.744182936614068, "grad_norm": 0.9966019988059998, "learning_rate": 1.7608745866487233e-05, "loss": 0.9003, "step": 5565 }, { "epoch": 0.7443166622091468, "grad_norm": 1.0849602222442627, "learning_rate": 1.7607808941505774e-05, "loss": 0.9232, "step": 5566 }, { "epoch": 0.7444503878042257, "grad_norm": 1.1072165966033936, "learning_rate": 1.7606871857946817e-05, "loss": 0.934, "step": 5567 }, { "epoch": 0.7445841133993046, "grad_norm": 1.032358169555664, "learning_rate": 1.7605934615829897e-05, "loss": 0.9402, "step": 5568 }, { "epoch": 0.7447178389943835, "grad_norm": 0.9713364243507385, "learning_rate": 1.760499721517455e-05, "loss": 1.003, "step": 5569 }, { "epoch": 0.7448515645894624, "grad_norm": 1.0515556335449219, "learning_rate": 1.7604059656000313e-05, "loss": 0.9463, "step": 5570 }, { "epoch": 0.7449852901845413, "grad_norm": 1.027031421661377, "learning_rate": 1.7603121938326726e-05, "loss": 0.9029, "step": 5571 }, { "epoch": 0.7451190157796203, "grad_norm": 1.1110166311264038, "learning_rate": 1.7602184062173338e-05, "loss": 1.0512, "step": 5572 }, { "epoch": 0.7452527413746991, "grad_norm": 1.0961997509002686, "learning_rate": 1.7601246027559697e-05, "loss": 0.8847, "step": 5573 }, { "epoch": 0.7453864669697781, "grad_norm": 1.0574986934661865, "learning_rate": 1.7600307834505358e-05, "loss": 0.9476, "step": 5574 }, { "epoch": 0.7455201925648569, "grad_norm": 1.1161792278289795, "learning_rate": 1.759936948302987e-05, "loss": 1.0106, "step": 5575 }, { "epoch": 0.7456539181599358, "grad_norm": 0.9402395486831665, "learning_rate": 1.7598430973152805e-05, "loss": 0.9083, "step": 5576 }, { "epoch": 0.7457876437550147, "grad_norm": 1.133420467376709, "learning_rate": 1.759749230489371e-05, "loss": 0.8799, "step": 5577 }, { "epoch": 0.7459213693500936, "grad_norm": 0.9927236437797546, "learning_rate": 1.759655347827216e-05, "loss": 1.0189, "step": 5578 }, { "epoch": 0.7460550949451725, "grad_norm": 1.092087984085083, "learning_rate": 1.7595614493307726e-05, "loss": 1.0268, "step": 5579 }, { "epoch": 0.7461888205402514, "grad_norm": 1.0169463157653809, "learning_rate": 1.7594675350019975e-05, "loss": 0.9565, "step": 5580 }, { "epoch": 0.7463225461353303, "grad_norm": 0.9976377487182617, "learning_rate": 1.759373604842848e-05, "loss": 0.9439, "step": 5581 }, { "epoch": 0.7464562717304092, "grad_norm": 1.0684986114501953, "learning_rate": 1.759279658855282e-05, "loss": 0.9419, "step": 5582 }, { "epoch": 0.7465899973254881, "grad_norm": 1.2004917860031128, "learning_rate": 1.759185697041259e-05, "loss": 1.0049, "step": 5583 }, { "epoch": 0.746723722920567, "grad_norm": 1.0028046369552612, "learning_rate": 1.759091719402736e-05, "loss": 1.0354, "step": 5584 }, { "epoch": 0.746857448515646, "grad_norm": 1.0717568397521973, "learning_rate": 1.7589977259416728e-05, "loss": 0.9992, "step": 5585 }, { "epoch": 0.7469911741107248, "grad_norm": 1.0084487199783325, "learning_rate": 1.7589037166600283e-05, "loss": 0.9196, "step": 5586 }, { "epoch": 0.7471248997058036, "grad_norm": 1.0724035501480103, "learning_rate": 1.758809691559762e-05, "loss": 0.9689, "step": 5587 }, { "epoch": 0.7472586253008826, "grad_norm": 1.0877522230148315, "learning_rate": 1.7587156506428337e-05, "loss": 1.0199, "step": 5588 }, { "epoch": 0.7473923508959615, "grad_norm": 0.9636661410331726, "learning_rate": 1.758621593911203e-05, "loss": 0.9479, "step": 5589 }, { "epoch": 0.7475260764910404, "grad_norm": 1.1206096410751343, "learning_rate": 1.758527521366832e-05, "loss": 0.9614, "step": 5590 }, { "epoch": 0.7476598020861193, "grad_norm": 0.9915058016777039, "learning_rate": 1.7584334330116807e-05, "loss": 0.9226, "step": 5591 }, { "epoch": 0.7477935276811982, "grad_norm": 1.0223729610443115, "learning_rate": 1.7583393288477097e-05, "loss": 0.9411, "step": 5592 }, { "epoch": 0.7479272532762771, "grad_norm": 0.9829967617988586, "learning_rate": 1.7582452088768814e-05, "loss": 0.9011, "step": 5593 }, { "epoch": 0.748060978871356, "grad_norm": 1.0687378644943237, "learning_rate": 1.758151073101157e-05, "loss": 0.9306, "step": 5594 }, { "epoch": 0.7481947044664349, "grad_norm": 1.1205363273620605, "learning_rate": 1.758056921522499e-05, "loss": 0.9327, "step": 5595 }, { "epoch": 0.7483284300615137, "grad_norm": 1.0322699546813965, "learning_rate": 1.7579627541428702e-05, "loss": 0.882, "step": 5596 }, { "epoch": 0.7484621556565927, "grad_norm": 1.1521402597427368, "learning_rate": 1.7578685709642327e-05, "loss": 0.9656, "step": 5597 }, { "epoch": 0.7485958812516715, "grad_norm": 1.1766597032546997, "learning_rate": 1.75777437198855e-05, "loss": 0.9424, "step": 5598 }, { "epoch": 0.7487296068467505, "grad_norm": 1.0219770669937134, "learning_rate": 1.7576801572177858e-05, "loss": 0.8523, "step": 5599 }, { "epoch": 0.7488633324418293, "grad_norm": 1.075208067893982, "learning_rate": 1.7575859266539036e-05, "loss": 1.0568, "step": 5600 }, { "epoch": 0.7489970580369083, "grad_norm": 1.033706784248352, "learning_rate": 1.757491680298868e-05, "loss": 0.8333, "step": 5601 }, { "epoch": 0.7491307836319872, "grad_norm": 0.9717497229576111, "learning_rate": 1.757397418154643e-05, "loss": 0.8621, "step": 5602 }, { "epoch": 0.7492645092270661, "grad_norm": 1.0269144773483276, "learning_rate": 1.7573031402231936e-05, "loss": 0.9406, "step": 5603 }, { "epoch": 0.749398234822145, "grad_norm": 1.1177387237548828, "learning_rate": 1.7572088465064847e-05, "loss": 0.9934, "step": 5604 }, { "epoch": 0.7495319604172238, "grad_norm": 1.0443004369735718, "learning_rate": 1.757114537006482e-05, "loss": 1.0455, "step": 5605 }, { "epoch": 0.7496656860123028, "grad_norm": 1.0846948623657227, "learning_rate": 1.7570202117251517e-05, "loss": 0.921, "step": 5606 }, { "epoch": 0.7497994116073816, "grad_norm": 1.1932439804077148, "learning_rate": 1.7569258706644588e-05, "loss": 0.9046, "step": 5607 }, { "epoch": 0.7499331372024606, "grad_norm": 1.0925523042678833, "learning_rate": 1.756831513826371e-05, "loss": 1.0535, "step": 5608 }, { "epoch": 0.7500668627975394, "grad_norm": 1.0358389616012573, "learning_rate": 1.7567371412128544e-05, "loss": 0.9062, "step": 5609 }, { "epoch": 0.7502005883926184, "grad_norm": 1.0828266143798828, "learning_rate": 1.7566427528258758e-05, "loss": 0.9396, "step": 5610 }, { "epoch": 0.7503343139876972, "grad_norm": 1.0721856355667114, "learning_rate": 1.7565483486674035e-05, "loss": 0.9433, "step": 5611 }, { "epoch": 0.7504680395827762, "grad_norm": 0.9857664704322815, "learning_rate": 1.7564539287394048e-05, "loss": 0.9331, "step": 5612 }, { "epoch": 0.750601765177855, "grad_norm": 1.0738693475723267, "learning_rate": 1.7563594930438475e-05, "loss": 1.064, "step": 5613 }, { "epoch": 0.7507354907729339, "grad_norm": 0.9988113045692444, "learning_rate": 1.7562650415827004e-05, "loss": 1.0144, "step": 5614 }, { "epoch": 0.7508692163680128, "grad_norm": 1.0331710577011108, "learning_rate": 1.7561705743579323e-05, "loss": 0.8437, "step": 5615 }, { "epoch": 0.7510029419630917, "grad_norm": 1.015241026878357, "learning_rate": 1.756076091371512e-05, "loss": 0.8231, "step": 5616 }, { "epoch": 0.7511366675581707, "grad_norm": 1.1775310039520264, "learning_rate": 1.755981592625409e-05, "loss": 0.9648, "step": 5617 }, { "epoch": 0.7512703931532495, "grad_norm": 1.0573056936264038, "learning_rate": 1.7558870781215936e-05, "loss": 0.9883, "step": 5618 }, { "epoch": 0.7514041187483285, "grad_norm": 1.0810927152633667, "learning_rate": 1.755792547862035e-05, "loss": 0.8707, "step": 5619 }, { "epoch": 0.7515378443434073, "grad_norm": 1.0351015329360962, "learning_rate": 1.7556980018487036e-05, "loss": 0.9802, "step": 5620 }, { "epoch": 0.7516715699384863, "grad_norm": 1.0617460012435913, "learning_rate": 1.7556034400835712e-05, "loss": 0.904, "step": 5621 }, { "epoch": 0.7518052955335651, "grad_norm": 1.2125509977340698, "learning_rate": 1.7555088625686075e-05, "loss": 1.0427, "step": 5622 }, { "epoch": 0.751939021128644, "grad_norm": 1.1726773977279663, "learning_rate": 1.7554142693057848e-05, "loss": 1.0246, "step": 5623 }, { "epoch": 0.7520727467237229, "grad_norm": 1.0637493133544922, "learning_rate": 1.7553196602970746e-05, "loss": 1.0829, "step": 5624 }, { "epoch": 0.7522064723188018, "grad_norm": 1.1356314420700073, "learning_rate": 1.7552250355444486e-05, "loss": 0.9245, "step": 5625 }, { "epoch": 0.7523401979138807, "grad_norm": 1.0804098844528198, "learning_rate": 1.75513039504988e-05, "loss": 1.0833, "step": 5626 }, { "epoch": 0.7524739235089596, "grad_norm": 0.9765375852584839, "learning_rate": 1.75503573881534e-05, "loss": 0.9018, "step": 5627 }, { "epoch": 0.7526076491040385, "grad_norm": 1.0798091888427734, "learning_rate": 1.754941066842803e-05, "loss": 0.9924, "step": 5628 }, { "epoch": 0.7527413746991174, "grad_norm": 1.0957142114639282, "learning_rate": 1.754846379134242e-05, "loss": 1.0673, "step": 5629 }, { "epoch": 0.7528751002941964, "grad_norm": 1.0026651620864868, "learning_rate": 1.7547516756916304e-05, "loss": 1.0201, "step": 5630 }, { "epoch": 0.7530088258892752, "grad_norm": 0.9785611629486084, "learning_rate": 1.7546569565169423e-05, "loss": 0.8949, "step": 5631 }, { "epoch": 0.753142551484354, "grad_norm": 1.0145694017410278, "learning_rate": 1.754562221612152e-05, "loss": 0.9347, "step": 5632 }, { "epoch": 0.753276277079433, "grad_norm": 1.1531141996383667, "learning_rate": 1.7544674709792343e-05, "loss": 0.9761, "step": 5633 }, { "epoch": 0.7534100026745119, "grad_norm": 1.1732995510101318, "learning_rate": 1.7543727046201642e-05, "loss": 1.1516, "step": 5634 }, { "epoch": 0.7535437282695908, "grad_norm": 1.114938497543335, "learning_rate": 1.754277922536917e-05, "loss": 0.8211, "step": 5635 }, { "epoch": 0.7536774538646697, "grad_norm": 1.1262239217758179, "learning_rate": 1.7541831247314678e-05, "loss": 1.0543, "step": 5636 }, { "epoch": 0.7538111794597486, "grad_norm": 1.1927224397659302, "learning_rate": 1.7540883112057933e-05, "loss": 1.0679, "step": 5637 }, { "epoch": 0.7539449050548275, "grad_norm": 1.0859661102294922, "learning_rate": 1.7539934819618696e-05, "loss": 1.0626, "step": 5638 }, { "epoch": 0.7540786306499064, "grad_norm": 1.0610706806182861, "learning_rate": 1.7538986370016732e-05, "loss": 0.9055, "step": 5639 }, { "epoch": 0.7542123562449853, "grad_norm": 1.0506479740142822, "learning_rate": 1.7538037763271812e-05, "loss": 0.8957, "step": 5640 }, { "epoch": 0.7543460818400642, "grad_norm": 0.9687379598617554, "learning_rate": 1.7537088999403708e-05, "loss": 0.9853, "step": 5641 }, { "epoch": 0.7544798074351431, "grad_norm": 0.9650346040725708, "learning_rate": 1.7536140078432194e-05, "loss": 0.8248, "step": 5642 }, { "epoch": 0.7546135330302219, "grad_norm": 1.0056564807891846, "learning_rate": 1.7535191000377055e-05, "loss": 0.9724, "step": 5643 }, { "epoch": 0.7547472586253009, "grad_norm": 0.97073894739151, "learning_rate": 1.753424176525807e-05, "loss": 1.0219, "step": 5644 }, { "epoch": 0.7548809842203797, "grad_norm": 1.0251795053482056, "learning_rate": 1.753329237309502e-05, "loss": 0.8723, "step": 5645 }, { "epoch": 0.7550147098154587, "grad_norm": 1.2767223119735718, "learning_rate": 1.75323428239077e-05, "loss": 1.0067, "step": 5646 }, { "epoch": 0.7551484354105376, "grad_norm": 1.0767724514007568, "learning_rate": 1.7531393117715906e-05, "loss": 0.9036, "step": 5647 }, { "epoch": 0.7552821610056165, "grad_norm": 0.9715018272399902, "learning_rate": 1.7530443254539426e-05, "loss": 0.801, "step": 5648 }, { "epoch": 0.7554158866006954, "grad_norm": 1.1763389110565186, "learning_rate": 1.7529493234398062e-05, "loss": 0.9739, "step": 5649 }, { "epoch": 0.7555496121957743, "grad_norm": 1.3050271272659302, "learning_rate": 1.752854305731162e-05, "loss": 0.9803, "step": 5650 }, { "epoch": 0.7556833377908532, "grad_norm": 1.058416724205017, "learning_rate": 1.75275927232999e-05, "loss": 1.0525, "step": 5651 }, { "epoch": 0.755817063385932, "grad_norm": 1.0298298597335815, "learning_rate": 1.752664223238271e-05, "loss": 0.9139, "step": 5652 }, { "epoch": 0.755950788981011, "grad_norm": 0.9952281713485718, "learning_rate": 1.7525691584579866e-05, "loss": 0.8472, "step": 5653 }, { "epoch": 0.7560845145760898, "grad_norm": 1.1030126810073853, "learning_rate": 1.7524740779911185e-05, "loss": 0.9506, "step": 5654 }, { "epoch": 0.7562182401711688, "grad_norm": 1.1117812395095825, "learning_rate": 1.752378981839648e-05, "loss": 0.9457, "step": 5655 }, { "epoch": 0.7563519657662476, "grad_norm": 1.0242729187011719, "learning_rate": 1.752283870005558e-05, "loss": 0.9337, "step": 5656 }, { "epoch": 0.7564856913613266, "grad_norm": 1.1097509860992432, "learning_rate": 1.7521887424908298e-05, "loss": 0.9674, "step": 5657 }, { "epoch": 0.7566194169564054, "grad_norm": 1.0772755146026611, "learning_rate": 1.7520935992974477e-05, "loss": 0.9165, "step": 5658 }, { "epoch": 0.7567531425514844, "grad_norm": 1.1165934801101685, "learning_rate": 1.7519984404273936e-05, "loss": 0.964, "step": 5659 }, { "epoch": 0.7568868681465633, "grad_norm": 1.1086770296096802, "learning_rate": 1.7519032658826523e-05, "loss": 0.9455, "step": 5660 }, { "epoch": 0.7570205937416421, "grad_norm": 1.1837263107299805, "learning_rate": 1.7518080756652068e-05, "loss": 1.0432, "step": 5661 }, { "epoch": 0.7571543193367211, "grad_norm": 1.078892707824707, "learning_rate": 1.751712869777041e-05, "loss": 1.0047, "step": 5662 }, { "epoch": 0.7572880449317999, "grad_norm": 1.0345041751861572, "learning_rate": 1.7516176482201397e-05, "loss": 0.9906, "step": 5663 }, { "epoch": 0.7574217705268789, "grad_norm": 1.018334150314331, "learning_rate": 1.751522410996488e-05, "loss": 0.8971, "step": 5664 }, { "epoch": 0.7575554961219577, "grad_norm": 1.1631557941436768, "learning_rate": 1.751427158108071e-05, "loss": 0.9846, "step": 5665 }, { "epoch": 0.7576892217170367, "grad_norm": 0.9003881216049194, "learning_rate": 1.7513318895568734e-05, "loss": 0.9793, "step": 5666 }, { "epoch": 0.7578229473121155, "grad_norm": 0.9781140089035034, "learning_rate": 1.7512366053448818e-05, "loss": 0.7924, "step": 5667 }, { "epoch": 0.7579566729071945, "grad_norm": 1.0694317817687988, "learning_rate": 1.751141305474082e-05, "loss": 0.9497, "step": 5668 }, { "epoch": 0.7580903985022733, "grad_norm": 1.1110020875930786, "learning_rate": 1.7510459899464604e-05, "loss": 0.9409, "step": 5669 }, { "epoch": 0.7582241240973522, "grad_norm": 0.9874710440635681, "learning_rate": 1.750950658764004e-05, "loss": 0.9847, "step": 5670 }, { "epoch": 0.7583578496924311, "grad_norm": 1.0974586009979248, "learning_rate": 1.7508553119286995e-05, "loss": 0.9138, "step": 5671 }, { "epoch": 0.75849157528751, "grad_norm": 1.0416758060455322, "learning_rate": 1.7507599494425344e-05, "loss": 0.8963, "step": 5672 }, { "epoch": 0.758625300882589, "grad_norm": 1.052480697631836, "learning_rate": 1.7506645713074967e-05, "loss": 1.042, "step": 5673 }, { "epoch": 0.7587590264776678, "grad_norm": 1.0267629623413086, "learning_rate": 1.7505691775255744e-05, "loss": 0.9767, "step": 5674 }, { "epoch": 0.7588927520727468, "grad_norm": 1.0133389234542847, "learning_rate": 1.7504737680987557e-05, "loss": 0.8877, "step": 5675 }, { "epoch": 0.7590264776678256, "grad_norm": 1.019167184829712, "learning_rate": 1.7503783430290295e-05, "loss": 0.8761, "step": 5676 }, { "epoch": 0.7591602032629046, "grad_norm": 1.1321409940719604, "learning_rate": 1.7502829023183848e-05, "loss": 1.0391, "step": 5677 }, { "epoch": 0.7592939288579834, "grad_norm": 1.2106661796569824, "learning_rate": 1.750187445968811e-05, "loss": 0.9548, "step": 5678 }, { "epoch": 0.7594276544530623, "grad_norm": 1.2190868854522705, "learning_rate": 1.7500919739822973e-05, "loss": 0.8892, "step": 5679 }, { "epoch": 0.7595613800481412, "grad_norm": 1.1106572151184082, "learning_rate": 1.749996486360835e-05, "loss": 0.8902, "step": 5680 }, { "epoch": 0.7596951056432201, "grad_norm": 0.9934551119804382, "learning_rate": 1.7499009831064127e-05, "loss": 0.9601, "step": 5681 }, { "epoch": 0.759828831238299, "grad_norm": 1.0583659410476685, "learning_rate": 1.7498054642210225e-05, "loss": 0.9447, "step": 5682 }, { "epoch": 0.7599625568333779, "grad_norm": 1.0365110635757446, "learning_rate": 1.7497099297066546e-05, "loss": 0.8503, "step": 5683 }, { "epoch": 0.7600962824284568, "grad_norm": 1.0689677000045776, "learning_rate": 1.749614379565301e-05, "loss": 0.9704, "step": 5684 }, { "epoch": 0.7602300080235357, "grad_norm": 1.05976402759552, "learning_rate": 1.7495188137989526e-05, "loss": 0.9119, "step": 5685 }, { "epoch": 0.7603637336186146, "grad_norm": 1.1034635305404663, "learning_rate": 1.749423232409602e-05, "loss": 0.9304, "step": 5686 }, { "epoch": 0.7604974592136935, "grad_norm": 1.0311964750289917, "learning_rate": 1.749327635399241e-05, "loss": 0.9736, "step": 5687 }, { "epoch": 0.7606311848087725, "grad_norm": 1.1243400573730469, "learning_rate": 1.7492320227698624e-05, "loss": 1.043, "step": 5688 }, { "epoch": 0.7607649104038513, "grad_norm": 1.0421708822250366, "learning_rate": 1.7491363945234595e-05, "loss": 0.9555, "step": 5689 }, { "epoch": 0.7608986359989302, "grad_norm": 1.1084234714508057, "learning_rate": 1.7490407506620252e-05, "loss": 0.9402, "step": 5690 }, { "epoch": 0.7610323615940091, "grad_norm": 0.9782710671424866, "learning_rate": 1.748945091187553e-05, "loss": 0.8836, "step": 5691 }, { "epoch": 0.761166087189088, "grad_norm": 1.0322253704071045, "learning_rate": 1.7488494161020374e-05, "loss": 0.9128, "step": 5692 }, { "epoch": 0.7612998127841669, "grad_norm": 1.0175551176071167, "learning_rate": 1.748753725407472e-05, "loss": 0.9546, "step": 5693 }, { "epoch": 0.7614335383792458, "grad_norm": 1.0329780578613281, "learning_rate": 1.748658019105852e-05, "loss": 0.9517, "step": 5694 }, { "epoch": 0.7615672639743247, "grad_norm": 1.0101404190063477, "learning_rate": 1.7485622971991718e-05, "loss": 0.9757, "step": 5695 }, { "epoch": 0.7617009895694036, "grad_norm": 1.1176928281784058, "learning_rate": 1.748466559689427e-05, "loss": 0.971, "step": 5696 }, { "epoch": 0.7618347151644825, "grad_norm": 1.085686445236206, "learning_rate": 1.7483708065786124e-05, "loss": 0.9593, "step": 5697 }, { "epoch": 0.7619684407595614, "grad_norm": 1.1791216135025024, "learning_rate": 1.748275037868725e-05, "loss": 0.8614, "step": 5698 }, { "epoch": 0.7621021663546402, "grad_norm": 1.1431652307510376, "learning_rate": 1.7481792535617602e-05, "loss": 0.972, "step": 5699 }, { "epoch": 0.7622358919497192, "grad_norm": 1.0990146398544312, "learning_rate": 1.748083453659715e-05, "loss": 0.9129, "step": 5700 }, { "epoch": 0.762369617544798, "grad_norm": 1.1180288791656494, "learning_rate": 1.747987638164586e-05, "loss": 0.8663, "step": 5701 }, { "epoch": 0.762503343139877, "grad_norm": 0.9956672191619873, "learning_rate": 1.7478918070783703e-05, "loss": 0.9314, "step": 5702 }, { "epoch": 0.7626370687349558, "grad_norm": 0.9825080633163452, "learning_rate": 1.7477959604030656e-05, "loss": 0.9435, "step": 5703 }, { "epoch": 0.7627707943300348, "grad_norm": 1.0081071853637695, "learning_rate": 1.7477000981406697e-05, "loss": 0.9414, "step": 5704 }, { "epoch": 0.7629045199251137, "grad_norm": 1.0427356958389282, "learning_rate": 1.7476042202931806e-05, "loss": 1.0138, "step": 5705 }, { "epoch": 0.7630382455201926, "grad_norm": 1.0891045331954956, "learning_rate": 1.747508326862597e-05, "loss": 0.9377, "step": 5706 }, { "epoch": 0.7631719711152715, "grad_norm": 1.2020474672317505, "learning_rate": 1.7474124178509176e-05, "loss": 1.0658, "step": 5707 }, { "epoch": 0.7633056967103503, "grad_norm": 1.0939958095550537, "learning_rate": 1.7473164932601414e-05, "loss": 0.914, "step": 5708 }, { "epoch": 0.7634394223054293, "grad_norm": 1.1803240776062012, "learning_rate": 1.7472205530922683e-05, "loss": 1.1071, "step": 5709 }, { "epoch": 0.7635731479005081, "grad_norm": 1.0756531953811646, "learning_rate": 1.7471245973492977e-05, "loss": 1.0456, "step": 5710 }, { "epoch": 0.7637068734955871, "grad_norm": 1.1000767946243286, "learning_rate": 1.7470286260332296e-05, "loss": 0.9322, "step": 5711 }, { "epoch": 0.7638405990906659, "grad_norm": 1.0814807415008545, "learning_rate": 1.7469326391460647e-05, "loss": 0.8867, "step": 5712 }, { "epoch": 0.7639743246857449, "grad_norm": 1.0714526176452637, "learning_rate": 1.7468366366898038e-05, "loss": 0.9926, "step": 5713 }, { "epoch": 0.7641080502808237, "grad_norm": 1.1460797786712646, "learning_rate": 1.7467406186664474e-05, "loss": 0.7909, "step": 5714 }, { "epoch": 0.7642417758759027, "grad_norm": 0.9759002923965454, "learning_rate": 1.746644585077998e-05, "loss": 0.9048, "step": 5715 }, { "epoch": 0.7643755014709815, "grad_norm": 0.9731238484382629, "learning_rate": 1.7465485359264565e-05, "loss": 0.9642, "step": 5716 }, { "epoch": 0.7645092270660604, "grad_norm": 0.9622951149940491, "learning_rate": 1.7464524712138252e-05, "loss": 0.8957, "step": 5717 }, { "epoch": 0.7646429526611394, "grad_norm": 1.0308570861816406, "learning_rate": 1.746356390942106e-05, "loss": 0.8636, "step": 5718 }, { "epoch": 0.7647766782562182, "grad_norm": 1.0122634172439575, "learning_rate": 1.7462602951133022e-05, "loss": 0.7879, "step": 5719 }, { "epoch": 0.7649104038512972, "grad_norm": 1.12986421585083, "learning_rate": 1.7461641837294167e-05, "loss": 0.9342, "step": 5720 }, { "epoch": 0.765044129446376, "grad_norm": 1.1417661905288696, "learning_rate": 1.7460680567924528e-05, "loss": 1.0302, "step": 5721 }, { "epoch": 0.765177855041455, "grad_norm": 1.1987031698226929, "learning_rate": 1.7459719143044146e-05, "loss": 1.0263, "step": 5722 }, { "epoch": 0.7653115806365338, "grad_norm": 1.044432282447815, "learning_rate": 1.745875756267305e-05, "loss": 0.8962, "step": 5723 }, { "epoch": 0.7654453062316128, "grad_norm": 1.0600156784057617, "learning_rate": 1.7457795826831293e-05, "loss": 0.9589, "step": 5724 }, { "epoch": 0.7655790318266916, "grad_norm": 1.1277058124542236, "learning_rate": 1.7456833935538917e-05, "loss": 1.0862, "step": 5725 }, { "epoch": 0.7657127574217705, "grad_norm": 1.094230055809021, "learning_rate": 1.7455871888815972e-05, "loss": 0.9279, "step": 5726 }, { "epoch": 0.7658464830168494, "grad_norm": 1.0901530981063843, "learning_rate": 1.7454909686682515e-05, "loss": 0.9446, "step": 5727 }, { "epoch": 0.7659802086119283, "grad_norm": 1.1245795488357544, "learning_rate": 1.7453947329158597e-05, "loss": 1.1089, "step": 5728 }, { "epoch": 0.7661139342070072, "grad_norm": 1.0885945558547974, "learning_rate": 1.7452984816264282e-05, "loss": 0.9002, "step": 5729 }, { "epoch": 0.7662476598020861, "grad_norm": 1.0388959646224976, "learning_rate": 1.7452022148019626e-05, "loss": 0.8455, "step": 5730 }, { "epoch": 0.766381385397165, "grad_norm": 1.0902312994003296, "learning_rate": 1.7451059324444702e-05, "loss": 0.9229, "step": 5731 }, { "epoch": 0.7665151109922439, "grad_norm": 1.0550434589385986, "learning_rate": 1.7450096345559576e-05, "loss": 0.954, "step": 5732 }, { "epoch": 0.7666488365873229, "grad_norm": 0.9747079014778137, "learning_rate": 1.7449133211384325e-05, "loss": 0.9638, "step": 5733 }, { "epoch": 0.7667825621824017, "grad_norm": 1.0863221883773804, "learning_rate": 1.7448169921939014e-05, "loss": 0.9623, "step": 5734 }, { "epoch": 0.7669162877774807, "grad_norm": 1.0640642642974854, "learning_rate": 1.744720647724373e-05, "loss": 0.7959, "step": 5735 }, { "epoch": 0.7670500133725595, "grad_norm": 0.9944091439247131, "learning_rate": 1.7446242877318553e-05, "loss": 0.9731, "step": 5736 }, { "epoch": 0.7671837389676384, "grad_norm": 0.9624443650245667, "learning_rate": 1.7445279122183567e-05, "loss": 0.8952, "step": 5737 }, { "epoch": 0.7673174645627173, "grad_norm": 1.149829387664795, "learning_rate": 1.7444315211858864e-05, "loss": 0.8447, "step": 5738 }, { "epoch": 0.7674511901577962, "grad_norm": 0.9767423272132874, "learning_rate": 1.7443351146364534e-05, "loss": 0.9548, "step": 5739 }, { "epoch": 0.7675849157528751, "grad_norm": 1.1116724014282227, "learning_rate": 1.744238692572067e-05, "loss": 1.0041, "step": 5740 }, { "epoch": 0.767718641347954, "grad_norm": 1.12540864944458, "learning_rate": 1.7441422549947375e-05, "loss": 0.955, "step": 5741 }, { "epoch": 0.7678523669430329, "grad_norm": 1.1024413108825684, "learning_rate": 1.7440458019064745e-05, "loss": 0.9544, "step": 5742 }, { "epoch": 0.7679860925381118, "grad_norm": 1.094484806060791, "learning_rate": 1.743949333309289e-05, "loss": 0.9678, "step": 5743 }, { "epoch": 0.7681198181331907, "grad_norm": 1.133272409439087, "learning_rate": 1.7438528492051914e-05, "loss": 0.9342, "step": 5744 }, { "epoch": 0.7682535437282696, "grad_norm": 1.1478476524353027, "learning_rate": 1.743756349596193e-05, "loss": 0.9971, "step": 5745 }, { "epoch": 0.7683872693233484, "grad_norm": 1.0720198154449463, "learning_rate": 1.743659834484305e-05, "loss": 0.9334, "step": 5746 }, { "epoch": 0.7685209949184274, "grad_norm": 1.0617471933364868, "learning_rate": 1.7435633038715396e-05, "loss": 0.8908, "step": 5747 }, { "epoch": 0.7686547205135063, "grad_norm": 1.0409166812896729, "learning_rate": 1.7434667577599086e-05, "loss": 1.0, "step": 5748 }, { "epoch": 0.7687884461085852, "grad_norm": 1.1328110694885254, "learning_rate": 1.7433701961514242e-05, "loss": 1.0408, "step": 5749 }, { "epoch": 0.7689221717036641, "grad_norm": 1.175031304359436, "learning_rate": 1.7432736190480995e-05, "loss": 0.9908, "step": 5750 }, { "epoch": 0.769055897298743, "grad_norm": 1.1278076171875, "learning_rate": 1.7431770264519478e-05, "loss": 1.0363, "step": 5751 }, { "epoch": 0.7691896228938219, "grad_norm": 1.2135567665100098, "learning_rate": 1.7430804183649818e-05, "loss": 0.8803, "step": 5752 }, { "epoch": 0.7693233484889008, "grad_norm": 1.0099236965179443, "learning_rate": 1.7429837947892154e-05, "loss": 0.8361, "step": 5753 }, { "epoch": 0.7694570740839797, "grad_norm": 1.0451719760894775, "learning_rate": 1.7428871557266628e-05, "loss": 0.9258, "step": 5754 }, { "epoch": 0.7695907996790585, "grad_norm": 1.1481705904006958, "learning_rate": 1.7427905011793385e-05, "loss": 0.9013, "step": 5755 }, { "epoch": 0.7697245252741375, "grad_norm": 1.0951621532440186, "learning_rate": 1.742693831149257e-05, "loss": 0.9217, "step": 5756 }, { "epoch": 0.7698582508692163, "grad_norm": 1.0907785892486572, "learning_rate": 1.7425971456384333e-05, "loss": 0.9773, "step": 5757 }, { "epoch": 0.7699919764642953, "grad_norm": 1.0733246803283691, "learning_rate": 1.7425004446488825e-05, "loss": 0.8736, "step": 5758 }, { "epoch": 0.7701257020593741, "grad_norm": 1.0340436697006226, "learning_rate": 1.7424037281826204e-05, "loss": 1.0529, "step": 5759 }, { "epoch": 0.7702594276544531, "grad_norm": 1.153602123260498, "learning_rate": 1.7423069962416634e-05, "loss": 0.995, "step": 5760 }, { "epoch": 0.770393153249532, "grad_norm": 0.9764849543571472, "learning_rate": 1.7422102488280266e-05, "loss": 0.9111, "step": 5761 }, { "epoch": 0.7705268788446109, "grad_norm": 1.1071991920471191, "learning_rate": 1.742113485943728e-05, "loss": 1.0711, "step": 5762 }, { "epoch": 0.7706606044396898, "grad_norm": 1.0592581033706665, "learning_rate": 1.742016707590784e-05, "loss": 0.8927, "step": 5763 }, { "epoch": 0.7707943300347686, "grad_norm": 1.1966403722763062, "learning_rate": 1.7419199137712112e-05, "loss": 1.1665, "step": 5764 }, { "epoch": 0.7709280556298476, "grad_norm": 1.1510671377182007, "learning_rate": 1.7418231044870283e-05, "loss": 1.0723, "step": 5765 }, { "epoch": 0.7710617812249264, "grad_norm": 0.9715163111686707, "learning_rate": 1.741726279740252e-05, "loss": 0.93, "step": 5766 }, { "epoch": 0.7711955068200054, "grad_norm": 1.0256233215332031, "learning_rate": 1.7416294395329018e-05, "loss": 0.8397, "step": 5767 }, { "epoch": 0.7713292324150842, "grad_norm": 1.136228084564209, "learning_rate": 1.741532583866995e-05, "loss": 0.9558, "step": 5768 }, { "epoch": 0.7714629580101632, "grad_norm": 1.1174087524414062, "learning_rate": 1.7414357127445515e-05, "loss": 0.9662, "step": 5769 }, { "epoch": 0.771596683605242, "grad_norm": 1.1421359777450562, "learning_rate": 1.74133882616759e-05, "loss": 1.0482, "step": 5770 }, { "epoch": 0.771730409200321, "grad_norm": 1.0439422130584717, "learning_rate": 1.74124192413813e-05, "loss": 0.9371, "step": 5771 }, { "epoch": 0.7718641347953998, "grad_norm": 1.0925662517547607, "learning_rate": 1.7411450066581913e-05, "loss": 0.9907, "step": 5772 }, { "epoch": 0.7719978603904787, "grad_norm": 1.1391443014144897, "learning_rate": 1.7410480737297942e-05, "loss": 0.9923, "step": 5773 }, { "epoch": 0.7721315859855576, "grad_norm": 1.1026073694229126, "learning_rate": 1.7409511253549592e-05, "loss": 0.8843, "step": 5774 }, { "epoch": 0.7722653115806365, "grad_norm": 1.0762516260147095, "learning_rate": 1.740854161535707e-05, "loss": 0.9102, "step": 5775 }, { "epoch": 0.7723990371757155, "grad_norm": 1.0368516445159912, "learning_rate": 1.7407571822740584e-05, "loss": 0.9529, "step": 5776 }, { "epoch": 0.7725327627707943, "grad_norm": 1.1312167644500732, "learning_rate": 1.7406601875720354e-05, "loss": 0.9205, "step": 5777 }, { "epoch": 0.7726664883658733, "grad_norm": 1.128832221031189, "learning_rate": 1.7405631774316595e-05, "loss": 0.943, "step": 5778 }, { "epoch": 0.7728002139609521, "grad_norm": 2.2299225330352783, "learning_rate": 1.740466151854953e-05, "loss": 0.8692, "step": 5779 }, { "epoch": 0.7729339395560311, "grad_norm": 1.0916657447814941, "learning_rate": 1.740369110843938e-05, "loss": 0.9667, "step": 5780 }, { "epoch": 0.7730676651511099, "grad_norm": 1.0875526666641235, "learning_rate": 1.740272054400637e-05, "loss": 0.9951, "step": 5781 }, { "epoch": 0.7732013907461888, "grad_norm": 1.1238740682601929, "learning_rate": 1.740174982527074e-05, "loss": 0.8968, "step": 5782 }, { "epoch": 0.7733351163412677, "grad_norm": 1.1533631086349487, "learning_rate": 1.7400778952252716e-05, "loss": 1.026, "step": 5783 }, { "epoch": 0.7734688419363466, "grad_norm": 1.141110897064209, "learning_rate": 1.7399807924972533e-05, "loss": 1.0415, "step": 5784 }, { "epoch": 0.7736025675314255, "grad_norm": 1.1428486108779907, "learning_rate": 1.739883674345044e-05, "loss": 0.9102, "step": 5785 }, { "epoch": 0.7737362931265044, "grad_norm": 1.0603721141815186, "learning_rate": 1.7397865407706667e-05, "loss": 0.7729, "step": 5786 }, { "epoch": 0.7738700187215833, "grad_norm": 1.0730334520339966, "learning_rate": 1.7396893917761476e-05, "loss": 1.0116, "step": 5787 }, { "epoch": 0.7740037443166622, "grad_norm": 1.0567317008972168, "learning_rate": 1.7395922273635106e-05, "loss": 0.8683, "step": 5788 }, { "epoch": 0.7741374699117411, "grad_norm": 1.1615196466445923, "learning_rate": 1.7394950475347814e-05, "loss": 0.9797, "step": 5789 }, { "epoch": 0.77427119550682, "grad_norm": 0.9932485222816467, "learning_rate": 1.7393978522919855e-05, "loss": 0.8486, "step": 5790 }, { "epoch": 0.774404921101899, "grad_norm": 1.0752326250076294, "learning_rate": 1.739300641637149e-05, "loss": 0.9237, "step": 5791 }, { "epoch": 0.7745386466969778, "grad_norm": 1.1332244873046875, "learning_rate": 1.7392034155722977e-05, "loss": 1.0123, "step": 5792 }, { "epoch": 0.7746723722920567, "grad_norm": 1.0429304838180542, "learning_rate": 1.739106174099459e-05, "loss": 0.9772, "step": 5793 }, { "epoch": 0.7748060978871356, "grad_norm": 1.0938130617141724, "learning_rate": 1.7390089172206594e-05, "loss": 0.9542, "step": 5794 }, { "epoch": 0.7749398234822145, "grad_norm": 1.0806126594543457, "learning_rate": 1.738911644937926e-05, "loss": 0.9704, "step": 5795 }, { "epoch": 0.7750735490772934, "grad_norm": 1.1858100891113281, "learning_rate": 1.738814357253286e-05, "loss": 1.0272, "step": 5796 }, { "epoch": 0.7752072746723723, "grad_norm": 1.1996965408325195, "learning_rate": 1.738717054168768e-05, "loss": 1.0184, "step": 5797 }, { "epoch": 0.7753410002674512, "grad_norm": 0.9996867775917053, "learning_rate": 1.7386197356863998e-05, "loss": 0.8248, "step": 5798 }, { "epoch": 0.7754747258625301, "grad_norm": 1.0730143785476685, "learning_rate": 1.73852240180821e-05, "loss": 0.9124, "step": 5799 }, { "epoch": 0.775608451457609, "grad_norm": 1.207648515701294, "learning_rate": 1.7384250525362277e-05, "loss": 0.9296, "step": 5800 }, { "epoch": 0.7757421770526879, "grad_norm": 1.144271969795227, "learning_rate": 1.738327687872481e-05, "loss": 0.9525, "step": 5801 }, { "epoch": 0.7758759026477667, "grad_norm": 1.1087696552276611, "learning_rate": 1.7382303078190014e-05, "loss": 0.9432, "step": 5802 }, { "epoch": 0.7760096282428457, "grad_norm": 1.032373309135437, "learning_rate": 1.7381329123778166e-05, "loss": 0.9219, "step": 5803 }, { "epoch": 0.7761433538379245, "grad_norm": 1.115530014038086, "learning_rate": 1.7380355015509577e-05, "loss": 1.0668, "step": 5804 }, { "epoch": 0.7762770794330035, "grad_norm": 1.0470343828201294, "learning_rate": 1.7379380753404548e-05, "loss": 0.9275, "step": 5805 }, { "epoch": 0.7764108050280824, "grad_norm": 1.125929832458496, "learning_rate": 1.737840633748339e-05, "loss": 0.9981, "step": 5806 }, { "epoch": 0.7765445306231613, "grad_norm": 0.9741032123565674, "learning_rate": 1.7377431767766414e-05, "loss": 0.9639, "step": 5807 }, { "epoch": 0.7766782562182402, "grad_norm": 1.324411392211914, "learning_rate": 1.7376457044273932e-05, "loss": 0.9877, "step": 5808 }, { "epoch": 0.7768119818133191, "grad_norm": 1.006172776222229, "learning_rate": 1.737548216702626e-05, "loss": 0.8731, "step": 5809 }, { "epoch": 0.776945707408398, "grad_norm": 1.1740729808807373, "learning_rate": 1.737450713604372e-05, "loss": 1.0281, "step": 5810 }, { "epoch": 0.7770794330034768, "grad_norm": 1.071735143661499, "learning_rate": 1.7373531951346634e-05, "loss": 0.8052, "step": 5811 }, { "epoch": 0.7772131585985558, "grad_norm": 1.148179292678833, "learning_rate": 1.7372556612955335e-05, "loss": 1.0308, "step": 5812 }, { "epoch": 0.7773468841936346, "grad_norm": 1.0332939624786377, "learning_rate": 1.737158112089014e-05, "loss": 0.9039, "step": 5813 }, { "epoch": 0.7774806097887136, "grad_norm": 1.237817406654358, "learning_rate": 1.73706054751714e-05, "loss": 0.9084, "step": 5814 }, { "epoch": 0.7776143353837924, "grad_norm": 1.0857131481170654, "learning_rate": 1.7369629675819436e-05, "loss": 0.9833, "step": 5815 }, { "epoch": 0.7777480609788714, "grad_norm": 1.030595302581787, "learning_rate": 1.7368653722854593e-05, "loss": 0.9321, "step": 5816 }, { "epoch": 0.7778817865739502, "grad_norm": 1.0658055543899536, "learning_rate": 1.7367677616297215e-05, "loss": 0.9836, "step": 5817 }, { "epoch": 0.7780155121690292, "grad_norm": 1.0333417654037476, "learning_rate": 1.7366701356167648e-05, "loss": 0.9866, "step": 5818 }, { "epoch": 0.778149237764108, "grad_norm": 1.0584616661071777, "learning_rate": 1.7365724942486243e-05, "loss": 0.833, "step": 5819 }, { "epoch": 0.7782829633591869, "grad_norm": 1.0248152017593384, "learning_rate": 1.7364748375273347e-05, "loss": 0.9557, "step": 5820 }, { "epoch": 0.7784166889542659, "grad_norm": 1.035446047782898, "learning_rate": 1.7363771654549317e-05, "loss": 1.0483, "step": 5821 }, { "epoch": 0.7785504145493447, "grad_norm": 1.056353211402893, "learning_rate": 1.7362794780334516e-05, "loss": 0.9852, "step": 5822 }, { "epoch": 0.7786841401444237, "grad_norm": 1.0895535945892334, "learning_rate": 1.73618177526493e-05, "loss": 0.8998, "step": 5823 }, { "epoch": 0.7788178657395025, "grad_norm": 1.0371977090835571, "learning_rate": 1.736084057151404e-05, "loss": 0.9623, "step": 5824 }, { "epoch": 0.7789515913345815, "grad_norm": 1.2121824026107788, "learning_rate": 1.73598632369491e-05, "loss": 1.062, "step": 5825 }, { "epoch": 0.7790853169296603, "grad_norm": 1.1948134899139404, "learning_rate": 1.7358885748974853e-05, "loss": 0.9181, "step": 5826 }, { "epoch": 0.7792190425247393, "grad_norm": 1.1606647968292236, "learning_rate": 1.7357908107611677e-05, "loss": 1.0609, "step": 5827 }, { "epoch": 0.7793527681198181, "grad_norm": 1.107747197151184, "learning_rate": 1.735693031287995e-05, "loss": 0.9753, "step": 5828 }, { "epoch": 0.779486493714897, "grad_norm": 1.1481611728668213, "learning_rate": 1.7355952364800045e-05, "loss": 1.0478, "step": 5829 }, { "epoch": 0.7796202193099759, "grad_norm": 1.1143673658370972, "learning_rate": 1.7354974263392353e-05, "loss": 0.9896, "step": 5830 }, { "epoch": 0.7797539449050548, "grad_norm": 1.1509370803833008, "learning_rate": 1.7353996008677262e-05, "loss": 1.0293, "step": 5831 }, { "epoch": 0.7798876705001337, "grad_norm": 0.9845685362815857, "learning_rate": 1.735301760067516e-05, "loss": 0.7959, "step": 5832 }, { "epoch": 0.7800213960952126, "grad_norm": 1.1169915199279785, "learning_rate": 1.7352039039406442e-05, "loss": 1.0104, "step": 5833 }, { "epoch": 0.7801551216902916, "grad_norm": 1.0956919193267822, "learning_rate": 1.7351060324891506e-05, "loss": 0.8499, "step": 5834 }, { "epoch": 0.7802888472853704, "grad_norm": 0.954009473323822, "learning_rate": 1.735008145715075e-05, "loss": 0.8643, "step": 5835 }, { "epoch": 0.7804225728804494, "grad_norm": 1.2194390296936035, "learning_rate": 1.734910243620458e-05, "loss": 1.0588, "step": 5836 }, { "epoch": 0.7805562984755282, "grad_norm": 1.0090768337249756, "learning_rate": 1.73481232620734e-05, "loss": 0.8894, "step": 5837 }, { "epoch": 0.7806900240706072, "grad_norm": 1.0626386404037476, "learning_rate": 1.734714393477763e-05, "loss": 0.9734, "step": 5838 }, { "epoch": 0.780823749665686, "grad_norm": 0.9648792147636414, "learning_rate": 1.734616445433767e-05, "loss": 0.9039, "step": 5839 }, { "epoch": 0.7809574752607649, "grad_norm": 1.12740957736969, "learning_rate": 1.734518482077394e-05, "loss": 1.0055, "step": 5840 }, { "epoch": 0.7810912008558438, "grad_norm": 1.0662246942520142, "learning_rate": 1.7344205034106862e-05, "loss": 0.9313, "step": 5841 }, { "epoch": 0.7812249264509227, "grad_norm": 1.106798768043518, "learning_rate": 1.7343225094356857e-05, "loss": 1.0032, "step": 5842 }, { "epoch": 0.7813586520460016, "grad_norm": 1.1787093877792358, "learning_rate": 1.7342245001544352e-05, "loss": 1.0699, "step": 5843 }, { "epoch": 0.7814923776410805, "grad_norm": 1.0218850374221802, "learning_rate": 1.7341264755689776e-05, "loss": 0.902, "step": 5844 }, { "epoch": 0.7816261032361594, "grad_norm": 1.0944106578826904, "learning_rate": 1.734028435681356e-05, "loss": 1.0364, "step": 5845 }, { "epoch": 0.7817598288312383, "grad_norm": 1.1498346328735352, "learning_rate": 1.7339303804936145e-05, "loss": 0.983, "step": 5846 }, { "epoch": 0.7818935544263172, "grad_norm": 0.9575804471969604, "learning_rate": 1.7338323100077962e-05, "loss": 0.8816, "step": 5847 }, { "epoch": 0.7820272800213961, "grad_norm": 1.039419412612915, "learning_rate": 1.7337342242259455e-05, "loss": 0.9654, "step": 5848 }, { "epoch": 0.782161005616475, "grad_norm": 1.0011546611785889, "learning_rate": 1.733636123150107e-05, "loss": 0.8725, "step": 5849 }, { "epoch": 0.7822947312115539, "grad_norm": 0.9742418527603149, "learning_rate": 1.7335380067823258e-05, "loss": 0.9797, "step": 5850 }, { "epoch": 0.7824284568066328, "grad_norm": 0.9383313059806824, "learning_rate": 1.7334398751246463e-05, "loss": 0.8143, "step": 5851 }, { "epoch": 0.7825621824017117, "grad_norm": 1.0585530996322632, "learning_rate": 1.733341728179115e-05, "loss": 0.8865, "step": 5852 }, { "epoch": 0.7826959079967906, "grad_norm": 1.0603220462799072, "learning_rate": 1.7332435659477765e-05, "loss": 0.9445, "step": 5853 }, { "epoch": 0.7828296335918695, "grad_norm": 0.9509584903717041, "learning_rate": 1.733145388432678e-05, "loss": 0.8455, "step": 5854 }, { "epoch": 0.7829633591869484, "grad_norm": 1.1102031469345093, "learning_rate": 1.7330471956358653e-05, "loss": 0.9293, "step": 5855 }, { "epoch": 0.7830970847820273, "grad_norm": 1.098401427268982, "learning_rate": 1.7329489875593852e-05, "loss": 0.8899, "step": 5856 }, { "epoch": 0.7832308103771062, "grad_norm": 1.0150678157806396, "learning_rate": 1.732850764205285e-05, "loss": 0.8922, "step": 5857 }, { "epoch": 0.783364535972185, "grad_norm": 0.9785661101341248, "learning_rate": 1.7327525255756118e-05, "loss": 0.9742, "step": 5858 }, { "epoch": 0.783498261567264, "grad_norm": 1.0655995607376099, "learning_rate": 1.7326542716724127e-05, "loss": 0.983, "step": 5859 }, { "epoch": 0.7836319871623428, "grad_norm": 0.9597586393356323, "learning_rate": 1.732556002497737e-05, "loss": 0.9121, "step": 5860 }, { "epoch": 0.7837657127574218, "grad_norm": 0.9849139451980591, "learning_rate": 1.7324577180536325e-05, "loss": 0.8767, "step": 5861 }, { "epoch": 0.7838994383525006, "grad_norm": 0.9647621512413025, "learning_rate": 1.7323594183421476e-05, "loss": 0.9009, "step": 5862 }, { "epoch": 0.7840331639475796, "grad_norm": 1.1644455194473267, "learning_rate": 1.7322611033653316e-05, "loss": 0.8827, "step": 5863 }, { "epoch": 0.7841668895426585, "grad_norm": 1.057141661643982, "learning_rate": 1.7321627731252336e-05, "loss": 1.0698, "step": 5864 }, { "epoch": 0.7843006151377374, "grad_norm": 1.129396677017212, "learning_rate": 1.732064427623903e-05, "loss": 1.0372, "step": 5865 }, { "epoch": 0.7844343407328163, "grad_norm": 1.0874342918395996, "learning_rate": 1.7319660668633897e-05, "loss": 0.9073, "step": 5866 }, { "epoch": 0.7845680663278951, "grad_norm": 1.1351569890975952, "learning_rate": 1.7318676908457447e-05, "loss": 1.076, "step": 5867 }, { "epoch": 0.7847017919229741, "grad_norm": 1.0553786754608154, "learning_rate": 1.7317692995730174e-05, "loss": 0.9703, "step": 5868 }, { "epoch": 0.7848355175180529, "grad_norm": 1.2016065120697021, "learning_rate": 1.7316708930472596e-05, "loss": 0.8443, "step": 5869 }, { "epoch": 0.7849692431131319, "grad_norm": 1.0746028423309326, "learning_rate": 1.731572471270522e-05, "loss": 1.0887, "step": 5870 }, { "epoch": 0.7851029687082107, "grad_norm": 0.981548547744751, "learning_rate": 1.7314740342448565e-05, "loss": 0.866, "step": 5871 }, { "epoch": 0.7852366943032897, "grad_norm": 1.1151477098464966, "learning_rate": 1.731375581972315e-05, "loss": 1.0798, "step": 5872 }, { "epoch": 0.7853704198983685, "grad_norm": 1.1292221546173096, "learning_rate": 1.7312771144549488e-05, "loss": 1.0079, "step": 5873 }, { "epoch": 0.7855041454934475, "grad_norm": 1.0944479703903198, "learning_rate": 1.7311786316948112e-05, "loss": 1.0172, "step": 5874 }, { "epoch": 0.7856378710885263, "grad_norm": 1.0610533952713013, "learning_rate": 1.7310801336939542e-05, "loss": 0.8997, "step": 5875 }, { "epoch": 0.7857715966836052, "grad_norm": 1.0645579099655151, "learning_rate": 1.730981620454432e-05, "loss": 0.8724, "step": 5876 }, { "epoch": 0.7859053222786841, "grad_norm": 1.1806964874267578, "learning_rate": 1.7308830919782972e-05, "loss": 0.9395, "step": 5877 }, { "epoch": 0.786039047873763, "grad_norm": 1.1036674976348877, "learning_rate": 1.7307845482676033e-05, "loss": 0.9602, "step": 5878 }, { "epoch": 0.786172773468842, "grad_norm": 1.0884637832641602, "learning_rate": 1.7306859893244056e-05, "loss": 0.9046, "step": 5879 }, { "epoch": 0.7863064990639208, "grad_norm": 1.0975658893585205, "learning_rate": 1.730587415150757e-05, "loss": 0.8987, "step": 5880 }, { "epoch": 0.7864402246589998, "grad_norm": 1.2087692022323608, "learning_rate": 1.7304888257487128e-05, "loss": 1.0424, "step": 5881 }, { "epoch": 0.7865739502540786, "grad_norm": 1.114935278892517, "learning_rate": 1.7303902211203282e-05, "loss": 1.0312, "step": 5882 }, { "epoch": 0.7867076758491576, "grad_norm": 1.0774348974227905, "learning_rate": 1.7302916012676587e-05, "loss": 1.014, "step": 5883 }, { "epoch": 0.7868414014442364, "grad_norm": 1.0701504945755005, "learning_rate": 1.730192966192759e-05, "loss": 0.9218, "step": 5884 }, { "epoch": 0.7869751270393153, "grad_norm": 1.119737982749939, "learning_rate": 1.7300943158976863e-05, "loss": 1.0027, "step": 5885 }, { "epoch": 0.7871088526343942, "grad_norm": 0.9682656526565552, "learning_rate": 1.7299956503844955e-05, "loss": 0.9071, "step": 5886 }, { "epoch": 0.7872425782294731, "grad_norm": 1.1441692113876343, "learning_rate": 1.7298969696552442e-05, "loss": 0.9025, "step": 5887 }, { "epoch": 0.787376303824552, "grad_norm": 1.169907808303833, "learning_rate": 1.729798273711989e-05, "loss": 0.973, "step": 5888 }, { "epoch": 0.7875100294196309, "grad_norm": 1.281720757484436, "learning_rate": 1.7296995625567872e-05, "loss": 0.9467, "step": 5889 }, { "epoch": 0.7876437550147098, "grad_norm": 1.0011168718338013, "learning_rate": 1.729600836191696e-05, "loss": 0.9103, "step": 5890 }, { "epoch": 0.7877774806097887, "grad_norm": 1.0064868927001953, "learning_rate": 1.729502094618774e-05, "loss": 0.915, "step": 5891 }, { "epoch": 0.7879112062048677, "grad_norm": 1.0504189729690552, "learning_rate": 1.7294033378400786e-05, "loss": 0.8785, "step": 5892 }, { "epoch": 0.7880449317999465, "grad_norm": 1.0779844522476196, "learning_rate": 1.7293045658576687e-05, "loss": 0.9442, "step": 5893 }, { "epoch": 0.7881786573950255, "grad_norm": 1.0728856325149536, "learning_rate": 1.729205778673603e-05, "loss": 0.869, "step": 5894 }, { "epoch": 0.7883123829901043, "grad_norm": 1.02186918258667, "learning_rate": 1.7291069762899404e-05, "loss": 0.8884, "step": 5895 }, { "epoch": 0.7884461085851832, "grad_norm": 1.074196219444275, "learning_rate": 1.7290081587087406e-05, "loss": 0.8941, "step": 5896 }, { "epoch": 0.7885798341802621, "grad_norm": 1.127129077911377, "learning_rate": 1.7289093259320635e-05, "loss": 0.926, "step": 5897 }, { "epoch": 0.788713559775341, "grad_norm": 1.024257779121399, "learning_rate": 1.7288104779619688e-05, "loss": 0.8504, "step": 5898 }, { "epoch": 0.7888472853704199, "grad_norm": 1.0059282779693604, "learning_rate": 1.7287116148005173e-05, "loss": 0.877, "step": 5899 }, { "epoch": 0.7889810109654988, "grad_norm": 1.1229854822158813, "learning_rate": 1.7286127364497692e-05, "loss": 0.9255, "step": 5900 }, { "epoch": 0.7891147365605777, "grad_norm": 1.1694836616516113, "learning_rate": 1.728513842911786e-05, "loss": 1.0296, "step": 5901 }, { "epoch": 0.7892484621556566, "grad_norm": 0.9748122692108154, "learning_rate": 1.7284149341886286e-05, "loss": 0.845, "step": 5902 }, { "epoch": 0.7893821877507355, "grad_norm": 1.0393608808517456, "learning_rate": 1.7283160102823594e-05, "loss": 1.0101, "step": 5903 }, { "epoch": 0.7895159133458144, "grad_norm": 1.0212371349334717, "learning_rate": 1.7282170711950396e-05, "loss": 0.8974, "step": 5904 }, { "epoch": 0.7896496389408932, "grad_norm": 1.131479263305664, "learning_rate": 1.7281181169287318e-05, "loss": 0.9799, "step": 5905 }, { "epoch": 0.7897833645359722, "grad_norm": 1.0069595575332642, "learning_rate": 1.7280191474854988e-05, "loss": 0.9808, "step": 5906 }, { "epoch": 0.789917090131051, "grad_norm": 1.0685888528823853, "learning_rate": 1.7279201628674028e-05, "loss": 1.0175, "step": 5907 }, { "epoch": 0.79005081572613, "grad_norm": 0.9918084144592285, "learning_rate": 1.727821163076508e-05, "loss": 0.9228, "step": 5908 }, { "epoch": 0.7901845413212089, "grad_norm": 0.9413108825683594, "learning_rate": 1.7277221481148774e-05, "loss": 0.9198, "step": 5909 }, { "epoch": 0.7903182669162878, "grad_norm": 1.0364792346954346, "learning_rate": 1.727623117984575e-05, "loss": 0.8837, "step": 5910 }, { "epoch": 0.7904519925113667, "grad_norm": 1.1601110696792603, "learning_rate": 1.727524072687665e-05, "loss": 1.0836, "step": 5911 }, { "epoch": 0.7905857181064456, "grad_norm": 1.0005912780761719, "learning_rate": 1.7274250122262116e-05, "loss": 0.9599, "step": 5912 }, { "epoch": 0.7907194437015245, "grad_norm": 1.0677276849746704, "learning_rate": 1.7273259366022802e-05, "loss": 0.8913, "step": 5913 }, { "epoch": 0.7908531692966033, "grad_norm": 1.0820367336273193, "learning_rate": 1.7272268458179352e-05, "loss": 0.9278, "step": 5914 }, { "epoch": 0.7909868948916823, "grad_norm": 1.1510486602783203, "learning_rate": 1.727127739875243e-05, "loss": 0.8708, "step": 5915 }, { "epoch": 0.7911206204867611, "grad_norm": 1.0579713582992554, "learning_rate": 1.7270286187762686e-05, "loss": 0.8709, "step": 5916 }, { "epoch": 0.7912543460818401, "grad_norm": 1.0919411182403564, "learning_rate": 1.7269294825230784e-05, "loss": 0.9742, "step": 5917 }, { "epoch": 0.7913880716769189, "grad_norm": 1.0626649856567383, "learning_rate": 1.7268303311177387e-05, "loss": 0.9494, "step": 5918 }, { "epoch": 0.7915217972719979, "grad_norm": 0.970781147480011, "learning_rate": 1.7267311645623163e-05, "loss": 1.0083, "step": 5919 }, { "epoch": 0.7916555228670767, "grad_norm": 1.118196725845337, "learning_rate": 1.726631982858878e-05, "loss": 0.9503, "step": 5920 }, { "epoch": 0.7917892484621557, "grad_norm": 1.153403401374817, "learning_rate": 1.7265327860094916e-05, "loss": 1.0777, "step": 5921 }, { "epoch": 0.7919229740572346, "grad_norm": 0.9938598871231079, "learning_rate": 1.7264335740162244e-05, "loss": 0.8602, "step": 5922 }, { "epoch": 0.7920566996523134, "grad_norm": 1.1479504108428955, "learning_rate": 1.7263343468811444e-05, "loss": 1.028, "step": 5923 }, { "epoch": 0.7921904252473924, "grad_norm": 1.1931774616241455, "learning_rate": 1.72623510460632e-05, "loss": 0.9505, "step": 5924 }, { "epoch": 0.7923241508424712, "grad_norm": 1.0811222791671753, "learning_rate": 1.7261358471938195e-05, "loss": 0.8999, "step": 5925 }, { "epoch": 0.7924578764375502, "grad_norm": 1.014931082725525, "learning_rate": 1.7260365746457125e-05, "loss": 0.892, "step": 5926 }, { "epoch": 0.792591602032629, "grad_norm": 0.9597230553627014, "learning_rate": 1.725937286964068e-05, "loss": 0.8746, "step": 5927 }, { "epoch": 0.792725327627708, "grad_norm": 0.9802173972129822, "learning_rate": 1.725837984150955e-05, "loss": 0.7494, "step": 5928 }, { "epoch": 0.7928590532227868, "grad_norm": 1.0733377933502197, "learning_rate": 1.7257386662084435e-05, "loss": 0.8316, "step": 5929 }, { "epoch": 0.7929927788178658, "grad_norm": 1.0939191579818726, "learning_rate": 1.7256393331386046e-05, "loss": 1.0157, "step": 5930 }, { "epoch": 0.7931265044129446, "grad_norm": 1.167578935623169, "learning_rate": 1.7255399849435077e-05, "loss": 1.0044, "step": 5931 }, { "epoch": 0.7932602300080235, "grad_norm": 0.9683929681777954, "learning_rate": 1.7254406216252243e-05, "loss": 0.927, "step": 5932 }, { "epoch": 0.7933939556031024, "grad_norm": 1.0881621837615967, "learning_rate": 1.7253412431858253e-05, "loss": 0.9656, "step": 5933 }, { "epoch": 0.7935276811981813, "grad_norm": 0.9965432286262512, "learning_rate": 1.7252418496273822e-05, "loss": 0.9237, "step": 5934 }, { "epoch": 0.7936614067932602, "grad_norm": 1.0255216360092163, "learning_rate": 1.7251424409519665e-05, "loss": 0.951, "step": 5935 }, { "epoch": 0.7937951323883391, "grad_norm": 0.9688674211502075, "learning_rate": 1.7250430171616507e-05, "loss": 0.9138, "step": 5936 }, { "epoch": 0.7939288579834181, "grad_norm": 1.1297768354415894, "learning_rate": 1.724943578258507e-05, "loss": 0.9318, "step": 5937 }, { "epoch": 0.7940625835784969, "grad_norm": 1.1526602506637573, "learning_rate": 1.7248441242446082e-05, "loss": 0.9276, "step": 5938 }, { "epoch": 0.7941963091735759, "grad_norm": 1.1144160032272339, "learning_rate": 1.7247446551220273e-05, "loss": 0.934, "step": 5939 }, { "epoch": 0.7943300347686547, "grad_norm": 1.1218068599700928, "learning_rate": 1.724645170892837e-05, "loss": 0.9859, "step": 5940 }, { "epoch": 0.7944637603637337, "grad_norm": 1.1022231578826904, "learning_rate": 1.7245456715591122e-05, "loss": 1.0143, "step": 5941 }, { "epoch": 0.7945974859588125, "grad_norm": 0.9646422863006592, "learning_rate": 1.724446157122926e-05, "loss": 0.9203, "step": 5942 }, { "epoch": 0.7947312115538914, "grad_norm": 0.9386504888534546, "learning_rate": 1.7243466275863525e-05, "loss": 0.8632, "step": 5943 }, { "epoch": 0.7948649371489703, "grad_norm": 1.1277166604995728, "learning_rate": 1.7242470829514674e-05, "loss": 0.9393, "step": 5944 }, { "epoch": 0.7949986627440492, "grad_norm": 1.03009831905365, "learning_rate": 1.724147523220344e-05, "loss": 1.0191, "step": 5945 }, { "epoch": 0.7951323883391281, "grad_norm": 1.011220932006836, "learning_rate": 1.724047948395059e-05, "loss": 0.96, "step": 5946 }, { "epoch": 0.795266113934207, "grad_norm": 1.137093186378479, "learning_rate": 1.7239483584776873e-05, "loss": 0.9475, "step": 5947 }, { "epoch": 0.795399839529286, "grad_norm": 1.0254755020141602, "learning_rate": 1.7238487534703045e-05, "loss": 0.9039, "step": 5948 }, { "epoch": 0.7955335651243648, "grad_norm": 1.081653356552124, "learning_rate": 1.7237491333749874e-05, "loss": 0.8243, "step": 5949 }, { "epoch": 0.7956672907194438, "grad_norm": 1.0846514701843262, "learning_rate": 1.723649498193812e-05, "loss": 0.9438, "step": 5950 }, { "epoch": 0.7958010163145226, "grad_norm": 1.1029421091079712, "learning_rate": 1.7235498479288554e-05, "loss": 0.949, "step": 5951 }, { "epoch": 0.7959347419096015, "grad_norm": 1.2071943283081055, "learning_rate": 1.7234501825821946e-05, "loss": 1.0229, "step": 5952 }, { "epoch": 0.7960684675046804, "grad_norm": 1.0350154638290405, "learning_rate": 1.7233505021559066e-05, "loss": 0.9488, "step": 5953 }, { "epoch": 0.7962021930997593, "grad_norm": 1.114148497581482, "learning_rate": 1.7232508066520702e-05, "loss": 0.9225, "step": 5954 }, { "epoch": 0.7963359186948382, "grad_norm": 1.0580759048461914, "learning_rate": 1.7231510960727625e-05, "loss": 1.0391, "step": 5955 }, { "epoch": 0.7964696442899171, "grad_norm": 1.0351217985153198, "learning_rate": 1.723051370420062e-05, "loss": 0.9178, "step": 5956 }, { "epoch": 0.796603369884996, "grad_norm": 1.1464687585830688, "learning_rate": 1.7229516296960477e-05, "loss": 1.0899, "step": 5957 }, { "epoch": 0.7967370954800749, "grad_norm": 1.1180436611175537, "learning_rate": 1.7228518739027985e-05, "loss": 1.0905, "step": 5958 }, { "epoch": 0.7968708210751538, "grad_norm": 1.0598148107528687, "learning_rate": 1.7227521030423938e-05, "loss": 0.9592, "step": 5959 }, { "epoch": 0.7970045466702327, "grad_norm": 1.0116569995880127, "learning_rate": 1.722652317116913e-05, "loss": 0.8654, "step": 5960 }, { "epoch": 0.7971382722653115, "grad_norm": 1.1499139070510864, "learning_rate": 1.722552516128436e-05, "loss": 0.9046, "step": 5961 }, { "epoch": 0.7972719978603905, "grad_norm": 1.0761595964431763, "learning_rate": 1.7224527000790436e-05, "loss": 0.8976, "step": 5962 }, { "epoch": 0.7974057234554693, "grad_norm": 1.2150306701660156, "learning_rate": 1.7223528689708157e-05, "loss": 1.0545, "step": 5963 }, { "epoch": 0.7975394490505483, "grad_norm": 0.9700686931610107, "learning_rate": 1.7222530228058338e-05, "loss": 0.9248, "step": 5964 }, { "epoch": 0.7976731746456271, "grad_norm": 1.1248748302459717, "learning_rate": 1.722153161586178e-05, "loss": 0.8833, "step": 5965 }, { "epoch": 0.7978069002407061, "grad_norm": 1.2003587484359741, "learning_rate": 1.7220532853139313e-05, "loss": 1.011, "step": 5966 }, { "epoch": 0.797940625835785, "grad_norm": 1.085605263710022, "learning_rate": 1.7219533939911743e-05, "loss": 0.8001, "step": 5967 }, { "epoch": 0.7980743514308639, "grad_norm": 1.1982121467590332, "learning_rate": 1.72185348761999e-05, "loss": 0.8284, "step": 5968 }, { "epoch": 0.7982080770259428, "grad_norm": 1.0838556289672852, "learning_rate": 1.7217535662024602e-05, "loss": 1.1263, "step": 5969 }, { "epoch": 0.7983418026210216, "grad_norm": 1.0332542657852173, "learning_rate": 1.721653629740668e-05, "loss": 0.9432, "step": 5970 }, { "epoch": 0.7984755282161006, "grad_norm": 1.08811616897583, "learning_rate": 1.721553678236697e-05, "loss": 0.9644, "step": 5971 }, { "epoch": 0.7986092538111794, "grad_norm": 1.099745750427246, "learning_rate": 1.7214537116926292e-05, "loss": 0.8914, "step": 5972 }, { "epoch": 0.7987429794062584, "grad_norm": 1.1409785747528076, "learning_rate": 1.7213537301105496e-05, "loss": 0.9315, "step": 5973 }, { "epoch": 0.7988767050013372, "grad_norm": 1.2062530517578125, "learning_rate": 1.7212537334925416e-05, "loss": 1.0215, "step": 5974 }, { "epoch": 0.7990104305964162, "grad_norm": 1.1689670085906982, "learning_rate": 1.7211537218406897e-05, "loss": 1.0395, "step": 5975 }, { "epoch": 0.799144156191495, "grad_norm": 1.2341601848602295, "learning_rate": 1.7210536951570788e-05, "loss": 0.9498, "step": 5976 }, { "epoch": 0.799277881786574, "grad_norm": 1.0076992511749268, "learning_rate": 1.7209536534437935e-05, "loss": 0.8595, "step": 5977 }, { "epoch": 0.7994116073816528, "grad_norm": 1.0309330224990845, "learning_rate": 1.720853596702919e-05, "loss": 0.9613, "step": 5978 }, { "epoch": 0.7995453329767317, "grad_norm": 1.03667151927948, "learning_rate": 1.7207535249365412e-05, "loss": 0.941, "step": 5979 }, { "epoch": 0.7996790585718107, "grad_norm": 1.2212883234024048, "learning_rate": 1.7206534381467456e-05, "loss": 0.9793, "step": 5980 }, { "epoch": 0.7998127841668895, "grad_norm": 1.0123236179351807, "learning_rate": 1.720553336335619e-05, "loss": 0.9442, "step": 5981 }, { "epoch": 0.7999465097619685, "grad_norm": 1.1629676818847656, "learning_rate": 1.7204532195052476e-05, "loss": 0.8257, "step": 5982 }, { "epoch": 0.8000802353570473, "grad_norm": 0.9287083148956299, "learning_rate": 1.720353087657718e-05, "loss": 0.9001, "step": 5983 }, { "epoch": 0.8002139609521263, "grad_norm": 1.1815904378890991, "learning_rate": 1.7202529407951175e-05, "loss": 0.9154, "step": 5984 }, { "epoch": 0.8003476865472051, "grad_norm": 1.0900535583496094, "learning_rate": 1.720152778919534e-05, "loss": 1.086, "step": 5985 }, { "epoch": 0.8004814121422841, "grad_norm": 1.1996012926101685, "learning_rate": 1.720052602033055e-05, "loss": 1.0278, "step": 5986 }, { "epoch": 0.8006151377373629, "grad_norm": 1.0817656517028809, "learning_rate": 1.719952410137768e-05, "loss": 0.9064, "step": 5987 }, { "epoch": 0.8007488633324419, "grad_norm": 1.1302690505981445, "learning_rate": 1.7198522032357622e-05, "loss": 1.0129, "step": 5988 }, { "epoch": 0.8008825889275207, "grad_norm": 1.0130740404129028, "learning_rate": 1.7197519813291262e-05, "loss": 0.8896, "step": 5989 }, { "epoch": 0.8010163145225996, "grad_norm": 1.072466254234314, "learning_rate": 1.7196517444199487e-05, "loss": 1.0032, "step": 5990 }, { "epoch": 0.8011500401176785, "grad_norm": 1.0459058284759521, "learning_rate": 1.7195514925103195e-05, "loss": 0.9505, "step": 5991 }, { "epoch": 0.8012837657127574, "grad_norm": 1.1594972610473633, "learning_rate": 1.7194512256023276e-05, "loss": 0.9115, "step": 5992 }, { "epoch": 0.8014174913078363, "grad_norm": 1.20310640335083, "learning_rate": 1.7193509436980633e-05, "loss": 0.913, "step": 5993 }, { "epoch": 0.8015512169029152, "grad_norm": 1.1311678886413574, "learning_rate": 1.7192506467996174e-05, "loss": 0.8977, "step": 5994 }, { "epoch": 0.8016849424979942, "grad_norm": 0.9222077131271362, "learning_rate": 1.7191503349090797e-05, "loss": 0.8419, "step": 5995 }, { "epoch": 0.801818668093073, "grad_norm": 1.1015582084655762, "learning_rate": 1.7190500080285412e-05, "loss": 0.8525, "step": 5996 }, { "epoch": 0.801952393688152, "grad_norm": 1.1134991645812988, "learning_rate": 1.7189496661600936e-05, "loss": 1.0288, "step": 5997 }, { "epoch": 0.8020861192832308, "grad_norm": 1.0536115169525146, "learning_rate": 1.7188493093058283e-05, "loss": 1.0164, "step": 5998 }, { "epoch": 0.8022198448783097, "grad_norm": 0.9787282943725586, "learning_rate": 1.718748937467837e-05, "loss": 0.9134, "step": 5999 }, { "epoch": 0.8023535704733886, "grad_norm": 1.1369825601577759, "learning_rate": 1.7186485506482115e-05, "loss": 0.9756, "step": 6000 }, { "epoch": 0.8024872960684675, "grad_norm": 1.1553720235824585, "learning_rate": 1.718548148849045e-05, "loss": 0.9445, "step": 6001 }, { "epoch": 0.8026210216635464, "grad_norm": 0.9981961846351624, "learning_rate": 1.7184477320724297e-05, "loss": 0.9742, "step": 6002 }, { "epoch": 0.8027547472586253, "grad_norm": 1.0971591472625732, "learning_rate": 1.718347300320459e-05, "loss": 0.89, "step": 6003 }, { "epoch": 0.8028884728537042, "grad_norm": 0.9448205232620239, "learning_rate": 1.7182468535952263e-05, "loss": 0.8237, "step": 6004 }, { "epoch": 0.8030221984487831, "grad_norm": 1.0414693355560303, "learning_rate": 1.718146391898825e-05, "loss": 0.8833, "step": 6005 }, { "epoch": 0.803155924043862, "grad_norm": 0.9588685035705566, "learning_rate": 1.71804591523335e-05, "loss": 0.835, "step": 6006 }, { "epoch": 0.8032896496389409, "grad_norm": 0.981637716293335, "learning_rate": 1.717945423600894e-05, "loss": 0.904, "step": 6007 }, { "epoch": 0.8034233752340197, "grad_norm": 1.0093623399734497, "learning_rate": 1.717844917003553e-05, "loss": 0.9563, "step": 6008 }, { "epoch": 0.8035571008290987, "grad_norm": 0.9742627143859863, "learning_rate": 1.7177443954434218e-05, "loss": 0.9329, "step": 6009 }, { "epoch": 0.8036908264241776, "grad_norm": 1.0158179998397827, "learning_rate": 1.7176438589225955e-05, "loss": 0.7878, "step": 6010 }, { "epoch": 0.8038245520192565, "grad_norm": 0.9885859489440918, "learning_rate": 1.7175433074431697e-05, "loss": 0.8924, "step": 6011 }, { "epoch": 0.8039582776143354, "grad_norm": 1.1555663347244263, "learning_rate": 1.7174427410072404e-05, "loss": 1.0202, "step": 6012 }, { "epoch": 0.8040920032094143, "grad_norm": 0.9582664966583252, "learning_rate": 1.717342159616903e-05, "loss": 0.8576, "step": 6013 }, { "epoch": 0.8042257288044932, "grad_norm": 1.136109471321106, "learning_rate": 1.7172415632742552e-05, "loss": 0.8963, "step": 6014 }, { "epoch": 0.8043594543995721, "grad_norm": 1.0619771480560303, "learning_rate": 1.7171409519813936e-05, "loss": 0.9359, "step": 6015 }, { "epoch": 0.804493179994651, "grad_norm": 1.134253978729248, "learning_rate": 1.7170403257404147e-05, "loss": 1.0642, "step": 6016 }, { "epoch": 0.8046269055897298, "grad_norm": 1.12119722366333, "learning_rate": 1.7169396845534164e-05, "loss": 0.8841, "step": 6017 }, { "epoch": 0.8047606311848088, "grad_norm": 1.0171111822128296, "learning_rate": 1.7168390284224964e-05, "loss": 1.0339, "step": 6018 }, { "epoch": 0.8048943567798876, "grad_norm": 1.0128767490386963, "learning_rate": 1.7167383573497526e-05, "loss": 1.0289, "step": 6019 }, { "epoch": 0.8050280823749666, "grad_norm": 1.2031018733978271, "learning_rate": 1.716637671337284e-05, "loss": 0.9209, "step": 6020 }, { "epoch": 0.8051618079700454, "grad_norm": 1.1009597778320312, "learning_rate": 1.7165369703871886e-05, "loss": 1.086, "step": 6021 }, { "epoch": 0.8052955335651244, "grad_norm": 1.144898772239685, "learning_rate": 1.7164362545015656e-05, "loss": 0.9716, "step": 6022 }, { "epoch": 0.8054292591602032, "grad_norm": 1.0333991050720215, "learning_rate": 1.7163355236825146e-05, "loss": 0.8193, "step": 6023 }, { "epoch": 0.8055629847552822, "grad_norm": 1.0955322980880737, "learning_rate": 1.7162347779321352e-05, "loss": 0.8673, "step": 6024 }, { "epoch": 0.8056967103503611, "grad_norm": 1.046897530555725, "learning_rate": 1.716134017252527e-05, "loss": 0.9881, "step": 6025 }, { "epoch": 0.8058304359454399, "grad_norm": 1.1322290897369385, "learning_rate": 1.7160332416457907e-05, "loss": 0.9068, "step": 6026 }, { "epoch": 0.8059641615405189, "grad_norm": 1.1079896688461304, "learning_rate": 1.7159324511140266e-05, "loss": 0.9603, "step": 6027 }, { "epoch": 0.8060978871355977, "grad_norm": 0.9854230284690857, "learning_rate": 1.7158316456593356e-05, "loss": 0.9239, "step": 6028 }, { "epoch": 0.8062316127306767, "grad_norm": 1.167246699333191, "learning_rate": 1.7157308252838187e-05, "loss": 0.9519, "step": 6029 }, { "epoch": 0.8063653383257555, "grad_norm": 1.0009126663208008, "learning_rate": 1.715629989989578e-05, "loss": 0.9555, "step": 6030 }, { "epoch": 0.8064990639208345, "grad_norm": 0.962867021560669, "learning_rate": 1.7155291397787147e-05, "loss": 0.9597, "step": 6031 }, { "epoch": 0.8066327895159133, "grad_norm": 1.0597095489501953, "learning_rate": 1.715428274653331e-05, "loss": 0.8535, "step": 6032 }, { "epoch": 0.8067665151109923, "grad_norm": 1.1344106197357178, "learning_rate": 1.71532739461553e-05, "loss": 0.9758, "step": 6033 }, { "epoch": 0.8069002407060711, "grad_norm": 1.1039469242095947, "learning_rate": 1.7152264996674138e-05, "loss": 0.9708, "step": 6034 }, { "epoch": 0.80703396630115, "grad_norm": 0.9794313907623291, "learning_rate": 1.7151255898110853e-05, "loss": 0.8675, "step": 6035 }, { "epoch": 0.807167691896229, "grad_norm": 1.0070325136184692, "learning_rate": 1.7150246650486483e-05, "loss": 0.9654, "step": 6036 }, { "epoch": 0.8073014174913078, "grad_norm": 1.0271183252334595, "learning_rate": 1.7149237253822065e-05, "loss": 0.8769, "step": 6037 }, { "epoch": 0.8074351430863868, "grad_norm": 1.057939052581787, "learning_rate": 1.714822770813864e-05, "loss": 0.9432, "step": 6038 }, { "epoch": 0.8075688686814656, "grad_norm": 1.1301624774932861, "learning_rate": 1.714721801345724e-05, "loss": 1.0501, "step": 6039 }, { "epoch": 0.8077025942765446, "grad_norm": 1.1286258697509766, "learning_rate": 1.714620816979893e-05, "loss": 0.9327, "step": 6040 }, { "epoch": 0.8078363198716234, "grad_norm": 0.9469525218009949, "learning_rate": 1.714519817718474e-05, "loss": 0.8419, "step": 6041 }, { "epoch": 0.8079700454667024, "grad_norm": 1.1028311252593994, "learning_rate": 1.7144188035635735e-05, "loss": 0.9878, "step": 6042 }, { "epoch": 0.8081037710617812, "grad_norm": 1.1041207313537598, "learning_rate": 1.714317774517297e-05, "loss": 1.0624, "step": 6043 }, { "epoch": 0.8082374966568602, "grad_norm": 1.0350028276443481, "learning_rate": 1.7142167305817495e-05, "loss": 0.9587, "step": 6044 }, { "epoch": 0.808371222251939, "grad_norm": 1.0243061780929565, "learning_rate": 1.714115671759038e-05, "loss": 0.8189, "step": 6045 }, { "epoch": 0.8085049478470179, "grad_norm": 1.1283940076828003, "learning_rate": 1.7140145980512684e-05, "loss": 0.9866, "step": 6046 }, { "epoch": 0.8086386734420968, "grad_norm": 1.0392546653747559, "learning_rate": 1.7139135094605478e-05, "loss": 0.9221, "step": 6047 }, { "epoch": 0.8087723990371757, "grad_norm": 1.103288173675537, "learning_rate": 1.7138124059889834e-05, "loss": 0.9427, "step": 6048 }, { "epoch": 0.8089061246322546, "grad_norm": 1.0742000341415405, "learning_rate": 1.713711287638682e-05, "loss": 0.8547, "step": 6049 }, { "epoch": 0.8090398502273335, "grad_norm": 1.0859650373458862, "learning_rate": 1.7136101544117526e-05, "loss": 0.8976, "step": 6050 }, { "epoch": 0.8091735758224124, "grad_norm": 1.0058294534683228, "learning_rate": 1.713509006310302e-05, "loss": 0.9624, "step": 6051 }, { "epoch": 0.8093073014174913, "grad_norm": 0.9886820912361145, "learning_rate": 1.7134078433364386e-05, "loss": 0.9371, "step": 6052 }, { "epoch": 0.8094410270125703, "grad_norm": 1.1034040451049805, "learning_rate": 1.7133066654922714e-05, "loss": 1.1178, "step": 6053 }, { "epoch": 0.8095747526076491, "grad_norm": 1.0523929595947266, "learning_rate": 1.7132054727799096e-05, "loss": 1.0018, "step": 6054 }, { "epoch": 0.809708478202728, "grad_norm": 0.9644655585289001, "learning_rate": 1.7131042652014623e-05, "loss": 1.0176, "step": 6055 }, { "epoch": 0.8098422037978069, "grad_norm": 1.1424295902252197, "learning_rate": 1.7130030427590386e-05, "loss": 0.9593, "step": 6056 }, { "epoch": 0.8099759293928858, "grad_norm": 1.0487345457077026, "learning_rate": 1.712901805454749e-05, "loss": 0.8514, "step": 6057 }, { "epoch": 0.8101096549879647, "grad_norm": 1.1162453889846802, "learning_rate": 1.712800553290703e-05, "loss": 0.893, "step": 6058 }, { "epoch": 0.8102433805830436, "grad_norm": 1.0783329010009766, "learning_rate": 1.712699286269012e-05, "loss": 0.9317, "step": 6059 }, { "epoch": 0.8103771061781225, "grad_norm": 0.9578342437744141, "learning_rate": 1.712598004391786e-05, "loss": 0.9456, "step": 6060 }, { "epoch": 0.8105108317732014, "grad_norm": 1.022254228591919, "learning_rate": 1.7124967076611368e-05, "loss": 0.8595, "step": 6061 }, { "epoch": 0.8106445573682803, "grad_norm": 1.091898798942566, "learning_rate": 1.7123953960791754e-05, "loss": 0.9, "step": 6062 }, { "epoch": 0.8107782829633592, "grad_norm": 1.0217387676239014, "learning_rate": 1.7122940696480137e-05, "loss": 0.8918, "step": 6063 }, { "epoch": 0.810912008558438, "grad_norm": 1.0604270696640015, "learning_rate": 1.7121927283697636e-05, "loss": 0.882, "step": 6064 }, { "epoch": 0.811045734153517, "grad_norm": 0.9987754225730896, "learning_rate": 1.7120913722465378e-05, "loss": 0.8589, "step": 6065 }, { "epoch": 0.8111794597485958, "grad_norm": 1.1152828931808472, "learning_rate": 1.7119900012804484e-05, "loss": 0.9458, "step": 6066 }, { "epoch": 0.8113131853436748, "grad_norm": 1.1335035562515259, "learning_rate": 1.7118886154736092e-05, "loss": 0.9186, "step": 6067 }, { "epoch": 0.8114469109387537, "grad_norm": 0.950318455696106, "learning_rate": 1.7117872148281324e-05, "loss": 0.8612, "step": 6068 }, { "epoch": 0.8115806365338326, "grad_norm": 1.0655595064163208, "learning_rate": 1.7116857993461326e-05, "loss": 0.9819, "step": 6069 }, { "epoch": 0.8117143621289115, "grad_norm": 0.924047589302063, "learning_rate": 1.7115843690297236e-05, "loss": 0.8233, "step": 6070 }, { "epoch": 0.8118480877239904, "grad_norm": 1.0580531358718872, "learning_rate": 1.711482923881019e-05, "loss": 0.9554, "step": 6071 }, { "epoch": 0.8119818133190693, "grad_norm": 0.9948450326919556, "learning_rate": 1.7113814639021334e-05, "loss": 0.891, "step": 6072 }, { "epoch": 0.8121155389141481, "grad_norm": 0.9294485449790955, "learning_rate": 1.7112799890951823e-05, "loss": 0.938, "step": 6073 }, { "epoch": 0.8122492645092271, "grad_norm": 1.0952844619750977, "learning_rate": 1.7111784994622804e-05, "loss": 0.986, "step": 6074 }, { "epoch": 0.8123829901043059, "grad_norm": 1.0463758707046509, "learning_rate": 1.711076995005543e-05, "loss": 0.9233, "step": 6075 }, { "epoch": 0.8125167156993849, "grad_norm": 1.1055735349655151, "learning_rate": 1.710975475727086e-05, "loss": 0.8834, "step": 6076 }, { "epoch": 0.8126504412944637, "grad_norm": 1.1485838890075684, "learning_rate": 1.7108739416290257e-05, "loss": 0.9209, "step": 6077 }, { "epoch": 0.8127841668895427, "grad_norm": 1.16169273853302, "learning_rate": 1.7107723927134788e-05, "loss": 1.0076, "step": 6078 }, { "epoch": 0.8129178924846215, "grad_norm": 1.0903571844100952, "learning_rate": 1.710670828982561e-05, "loss": 1.0211, "step": 6079 }, { "epoch": 0.8130516180797005, "grad_norm": 1.1035288572311401, "learning_rate": 1.7105692504383898e-05, "loss": 0.954, "step": 6080 }, { "epoch": 0.8131853436747793, "grad_norm": 1.0425844192504883, "learning_rate": 1.7104676570830824e-05, "loss": 0.9728, "step": 6081 }, { "epoch": 0.8133190692698582, "grad_norm": 1.0070650577545166, "learning_rate": 1.710366048918757e-05, "loss": 0.8758, "step": 6082 }, { "epoch": 0.8134527948649372, "grad_norm": 1.0774873495101929, "learning_rate": 1.7102644259475308e-05, "loss": 0.902, "step": 6083 }, { "epoch": 0.813586520460016, "grad_norm": 1.142493724822998, "learning_rate": 1.710162788171522e-05, "loss": 1.0043, "step": 6084 }, { "epoch": 0.813720246055095, "grad_norm": 0.8893013000488281, "learning_rate": 1.71006113559285e-05, "loss": 0.8673, "step": 6085 }, { "epoch": 0.8138539716501738, "grad_norm": 1.0045000314712524, "learning_rate": 1.7099594682136325e-05, "loss": 0.9643, "step": 6086 }, { "epoch": 0.8139876972452528, "grad_norm": 1.2097805738449097, "learning_rate": 1.7098577860359896e-05, "loss": 0.9119, "step": 6087 }, { "epoch": 0.8141214228403316, "grad_norm": 1.0805107355117798, "learning_rate": 1.7097560890620403e-05, "loss": 0.9811, "step": 6088 }, { "epoch": 0.8142551484354106, "grad_norm": 1.1926743984222412, "learning_rate": 1.7096543772939047e-05, "loss": 0.9281, "step": 6089 }, { "epoch": 0.8143888740304894, "grad_norm": 0.946707010269165, "learning_rate": 1.709552650733702e-05, "loss": 0.8964, "step": 6090 }, { "epoch": 0.8145225996255684, "grad_norm": 0.9843320250511169, "learning_rate": 1.709450909383554e-05, "loss": 0.9068, "step": 6091 }, { "epoch": 0.8146563252206472, "grad_norm": 1.0823416709899902, "learning_rate": 1.7093491532455804e-05, "loss": 0.906, "step": 6092 }, { "epoch": 0.8147900508157261, "grad_norm": 1.0088683366775513, "learning_rate": 1.7092473823219028e-05, "loss": 0.9362, "step": 6093 }, { "epoch": 0.814923776410805, "grad_norm": 0.9953064322471619, "learning_rate": 1.7091455966146418e-05, "loss": 0.9045, "step": 6094 }, { "epoch": 0.8150575020058839, "grad_norm": 1.0562125444412231, "learning_rate": 1.7090437961259195e-05, "loss": 0.9155, "step": 6095 }, { "epoch": 0.8151912276009629, "grad_norm": 1.160382628440857, "learning_rate": 1.7089419808578574e-05, "loss": 0.9863, "step": 6096 }, { "epoch": 0.8153249531960417, "grad_norm": 1.1183600425720215, "learning_rate": 1.7088401508125785e-05, "loss": 0.925, "step": 6097 }, { "epoch": 0.8154586787911207, "grad_norm": 1.0507615804672241, "learning_rate": 1.708738305992205e-05, "loss": 1.1005, "step": 6098 }, { "epoch": 0.8155924043861995, "grad_norm": 1.0413898229599, "learning_rate": 1.7086364463988597e-05, "loss": 0.9254, "step": 6099 }, { "epoch": 0.8157261299812785, "grad_norm": 1.0653586387634277, "learning_rate": 1.7085345720346655e-05, "loss": 1.0276, "step": 6100 }, { "epoch": 0.8158598555763573, "grad_norm": 1.0066090822219849, "learning_rate": 1.7084326829017464e-05, "loss": 0.9608, "step": 6101 }, { "epoch": 0.8159935811714362, "grad_norm": 1.0620393753051758, "learning_rate": 1.7083307790022255e-05, "loss": 0.8517, "step": 6102 }, { "epoch": 0.8161273067665151, "grad_norm": 1.108443021774292, "learning_rate": 1.708228860338228e-05, "loss": 0.9747, "step": 6103 }, { "epoch": 0.816261032361594, "grad_norm": 1.1763421297073364, "learning_rate": 1.7081269269118773e-05, "loss": 1.0128, "step": 6104 }, { "epoch": 0.8163947579566729, "grad_norm": 1.0500962734222412, "learning_rate": 1.7080249787252984e-05, "loss": 0.9683, "step": 6105 }, { "epoch": 0.8165284835517518, "grad_norm": 0.9833402633666992, "learning_rate": 1.707923015780616e-05, "loss": 0.853, "step": 6106 }, { "epoch": 0.8166622091468307, "grad_norm": 1.1283477544784546, "learning_rate": 1.707821038079956e-05, "loss": 0.9281, "step": 6107 }, { "epoch": 0.8167959347419096, "grad_norm": 0.9429518580436707, "learning_rate": 1.707719045625444e-05, "loss": 0.7806, "step": 6108 }, { "epoch": 0.8169296603369886, "grad_norm": 1.0016028881072998, "learning_rate": 1.7076170384192053e-05, "loss": 0.9516, "step": 6109 }, { "epoch": 0.8170633859320674, "grad_norm": 1.1430487632751465, "learning_rate": 1.7075150164633666e-05, "loss": 1.0045, "step": 6110 }, { "epoch": 0.8171971115271462, "grad_norm": 1.1011921167373657, "learning_rate": 1.7074129797600547e-05, "loss": 0.9519, "step": 6111 }, { "epoch": 0.8173308371222252, "grad_norm": 1.0478070974349976, "learning_rate": 1.707310928311396e-05, "loss": 0.8948, "step": 6112 }, { "epoch": 0.817464562717304, "grad_norm": 1.0234606266021729, "learning_rate": 1.707208862119518e-05, "loss": 0.969, "step": 6113 }, { "epoch": 0.817598288312383, "grad_norm": 1.094452977180481, "learning_rate": 1.7071067811865477e-05, "loss": 0.9826, "step": 6114 }, { "epoch": 0.8177320139074619, "grad_norm": 1.162048101425171, "learning_rate": 1.707004685514613e-05, "loss": 0.9562, "step": 6115 }, { "epoch": 0.8178657395025408, "grad_norm": 1.042914628982544, "learning_rate": 1.7069025751058426e-05, "loss": 0.9095, "step": 6116 }, { "epoch": 0.8179994650976197, "grad_norm": 1.0458208322525024, "learning_rate": 1.7068004499623645e-05, "loss": 0.8642, "step": 6117 }, { "epoch": 0.8181331906926986, "grad_norm": 1.1367179155349731, "learning_rate": 1.7066983100863072e-05, "loss": 0.9487, "step": 6118 }, { "epoch": 0.8182669162877775, "grad_norm": 1.0643582344055176, "learning_rate": 1.7065961554797997e-05, "loss": 0.8661, "step": 6119 }, { "epoch": 0.8184006418828563, "grad_norm": 1.0496025085449219, "learning_rate": 1.7064939861449716e-05, "loss": 0.9107, "step": 6120 }, { "epoch": 0.8185343674779353, "grad_norm": 1.0279496908187866, "learning_rate": 1.7063918020839525e-05, "loss": 0.9959, "step": 6121 }, { "epoch": 0.8186680930730141, "grad_norm": 1.1633539199829102, "learning_rate": 1.7062896032988723e-05, "loss": 1.0575, "step": 6122 }, { "epoch": 0.8188018186680931, "grad_norm": 0.9825596213340759, "learning_rate": 1.7061873897918607e-05, "loss": 0.8754, "step": 6123 }, { "epoch": 0.8189355442631719, "grad_norm": 1.1353669166564941, "learning_rate": 1.706085161565049e-05, "loss": 0.9121, "step": 6124 }, { "epoch": 0.8190692698582509, "grad_norm": 1.0410075187683105, "learning_rate": 1.705982918620568e-05, "loss": 1.0054, "step": 6125 }, { "epoch": 0.8192029954533298, "grad_norm": 1.1720629930496216, "learning_rate": 1.7058806609605482e-05, "loss": 0.9491, "step": 6126 }, { "epoch": 0.8193367210484087, "grad_norm": 1.0217541456222534, "learning_rate": 1.705778388587122e-05, "loss": 0.8616, "step": 6127 }, { "epoch": 0.8194704466434876, "grad_norm": 1.0271517038345337, "learning_rate": 1.70567610150242e-05, "loss": 0.8867, "step": 6128 }, { "epoch": 0.8196041722385664, "grad_norm": 1.1515856981277466, "learning_rate": 1.7055737997085753e-05, "loss": 1.0929, "step": 6129 }, { "epoch": 0.8197378978336454, "grad_norm": 1.0481700897216797, "learning_rate": 1.7054714832077198e-05, "loss": 0.9406, "step": 6130 }, { "epoch": 0.8198716234287242, "grad_norm": 1.2201708555221558, "learning_rate": 1.7053691520019863e-05, "loss": 0.9357, "step": 6131 }, { "epoch": 0.8200053490238032, "grad_norm": 0.9761015176773071, "learning_rate": 1.705266806093508e-05, "loss": 0.8883, "step": 6132 }, { "epoch": 0.820139074618882, "grad_norm": 1.061244249343872, "learning_rate": 1.7051644454844175e-05, "loss": 0.9383, "step": 6133 }, { "epoch": 0.820272800213961, "grad_norm": 1.055127739906311, "learning_rate": 1.705062070176849e-05, "loss": 0.8859, "step": 6134 }, { "epoch": 0.8204065258090398, "grad_norm": 1.154029369354248, "learning_rate": 1.704959680172937e-05, "loss": 1.0374, "step": 6135 }, { "epoch": 0.8205402514041188, "grad_norm": 1.0986170768737793, "learning_rate": 1.7048572754748143e-05, "loss": 0.9582, "step": 6136 }, { "epoch": 0.8206739769991976, "grad_norm": 1.1026197671890259, "learning_rate": 1.7047548560846166e-05, "loss": 0.89, "step": 6137 }, { "epoch": 0.8208077025942765, "grad_norm": 0.9968591332435608, "learning_rate": 1.7046524220044783e-05, "loss": 0.9244, "step": 6138 }, { "epoch": 0.8209414281893554, "grad_norm": 1.1902706623077393, "learning_rate": 1.7045499732365342e-05, "loss": 0.9832, "step": 6139 }, { "epoch": 0.8210751537844343, "grad_norm": 1.1944248676300049, "learning_rate": 1.7044475097829203e-05, "loss": 1.0832, "step": 6140 }, { "epoch": 0.8212088793795133, "grad_norm": 1.1179265975952148, "learning_rate": 1.704345031645772e-05, "loss": 1.0104, "step": 6141 }, { "epoch": 0.8213426049745921, "grad_norm": 1.0587571859359741, "learning_rate": 1.7042425388272256e-05, "loss": 0.9153, "step": 6142 }, { "epoch": 0.8214763305696711, "grad_norm": 1.060757040977478, "learning_rate": 1.7041400313294176e-05, "loss": 1.0031, "step": 6143 }, { "epoch": 0.8216100561647499, "grad_norm": 1.040330410003662, "learning_rate": 1.704037509154484e-05, "loss": 0.9761, "step": 6144 }, { "epoch": 0.8217437817598289, "grad_norm": 1.3250054121017456, "learning_rate": 1.7039349723045625e-05, "loss": 0.9934, "step": 6145 }, { "epoch": 0.8218775073549077, "grad_norm": 0.9038297533988953, "learning_rate": 1.7038324207817902e-05, "loss": 0.9361, "step": 6146 }, { "epoch": 0.8220112329499867, "grad_norm": 0.9741806983947754, "learning_rate": 1.7037298545883042e-05, "loss": 0.8073, "step": 6147 }, { "epoch": 0.8221449585450655, "grad_norm": 0.8642858862876892, "learning_rate": 1.7036272737262432e-05, "loss": 0.8329, "step": 6148 }, { "epoch": 0.8222786841401444, "grad_norm": 1.0173125267028809, "learning_rate": 1.7035246781977447e-05, "loss": 0.929, "step": 6149 }, { "epoch": 0.8224124097352233, "grad_norm": 1.0292012691497803, "learning_rate": 1.7034220680049477e-05, "loss": 0.9053, "step": 6150 }, { "epoch": 0.8225461353303022, "grad_norm": 1.065398097038269, "learning_rate": 1.7033194431499903e-05, "loss": 1.0302, "step": 6151 }, { "epoch": 0.8226798609253811, "grad_norm": 1.0922472476959229, "learning_rate": 1.7032168036350126e-05, "loss": 0.9388, "step": 6152 }, { "epoch": 0.82281358652046, "grad_norm": 1.0479071140289307, "learning_rate": 1.7031141494621534e-05, "loss": 0.8563, "step": 6153 }, { "epoch": 0.822947312115539, "grad_norm": 1.1110531091690063, "learning_rate": 1.7030114806335528e-05, "loss": 0.9729, "step": 6154 }, { "epoch": 0.8230810377106178, "grad_norm": 1.0562607049942017, "learning_rate": 1.70290879715135e-05, "loss": 0.8602, "step": 6155 }, { "epoch": 0.8232147633056968, "grad_norm": 1.1326544284820557, "learning_rate": 1.7028060990176865e-05, "loss": 0.8301, "step": 6156 }, { "epoch": 0.8233484889007756, "grad_norm": 1.1494784355163574, "learning_rate": 1.702703386234702e-05, "loss": 1.0559, "step": 6157 }, { "epoch": 0.8234822144958545, "grad_norm": 1.0292245149612427, "learning_rate": 1.7026006588045382e-05, "loss": 0.9313, "step": 6158 }, { "epoch": 0.8236159400909334, "grad_norm": 1.1391910314559937, "learning_rate": 1.7024979167293354e-05, "loss": 0.8746, "step": 6159 }, { "epoch": 0.8237496656860123, "grad_norm": 1.0820252895355225, "learning_rate": 1.702395160011236e-05, "loss": 1.071, "step": 6160 }, { "epoch": 0.8238833912810912, "grad_norm": 1.1025400161743164, "learning_rate": 1.7022923886523818e-05, "loss": 0.9617, "step": 6161 }, { "epoch": 0.8240171168761701, "grad_norm": 1.1073821783065796, "learning_rate": 1.702189602654915e-05, "loss": 1.0077, "step": 6162 }, { "epoch": 0.824150842471249, "grad_norm": 1.083636999130249, "learning_rate": 1.7020868020209773e-05, "loss": 1.0497, "step": 6163 }, { "epoch": 0.8242845680663279, "grad_norm": 1.0290521383285522, "learning_rate": 1.7019839867527122e-05, "loss": 0.8729, "step": 6164 }, { "epoch": 0.8244182936614068, "grad_norm": 1.0141433477401733, "learning_rate": 1.701881156852263e-05, "loss": 0.9581, "step": 6165 }, { "epoch": 0.8245520192564857, "grad_norm": 1.0510011911392212, "learning_rate": 1.7017783123217725e-05, "loss": 0.8941, "step": 6166 }, { "epoch": 0.8246857448515645, "grad_norm": 1.148488163948059, "learning_rate": 1.7016754531633846e-05, "loss": 0.9721, "step": 6167 }, { "epoch": 0.8248194704466435, "grad_norm": 0.9917287826538086, "learning_rate": 1.701572579379243e-05, "loss": 0.9532, "step": 6168 }, { "epoch": 0.8249531960417223, "grad_norm": 1.1366647481918335, "learning_rate": 1.7014696909714928e-05, "loss": 1.0275, "step": 6169 }, { "epoch": 0.8250869216368013, "grad_norm": 1.041864275932312, "learning_rate": 1.7013667879422778e-05, "loss": 0.9064, "step": 6170 }, { "epoch": 0.8252206472318802, "grad_norm": 1.248285174369812, "learning_rate": 1.701263870293743e-05, "loss": 0.9973, "step": 6171 }, { "epoch": 0.8253543728269591, "grad_norm": 1.0920511484146118, "learning_rate": 1.7011609380280344e-05, "loss": 0.9129, "step": 6172 }, { "epoch": 0.825488098422038, "grad_norm": 1.3310837745666504, "learning_rate": 1.701057991147297e-05, "loss": 0.9571, "step": 6173 }, { "epoch": 0.8256218240171169, "grad_norm": 1.1831388473510742, "learning_rate": 1.7009550296536762e-05, "loss": 0.9721, "step": 6174 }, { "epoch": 0.8257555496121958, "grad_norm": 1.0175886154174805, "learning_rate": 1.700852053549319e-05, "loss": 0.8446, "step": 6175 }, { "epoch": 0.8258892752072746, "grad_norm": 1.0355157852172852, "learning_rate": 1.7007490628363706e-05, "loss": 0.8424, "step": 6176 }, { "epoch": 0.8260230008023536, "grad_norm": 1.1303750276565552, "learning_rate": 1.7006460575169792e-05, "loss": 0.9823, "step": 6177 }, { "epoch": 0.8261567263974324, "grad_norm": 1.0448142290115356, "learning_rate": 1.700543037593291e-05, "loss": 1.0546, "step": 6178 }, { "epoch": 0.8262904519925114, "grad_norm": 1.0527616739273071, "learning_rate": 1.700440003067454e-05, "loss": 0.8833, "step": 6179 }, { "epoch": 0.8264241775875902, "grad_norm": 1.1139705181121826, "learning_rate": 1.7003369539416147e-05, "loss": 0.9029, "step": 6180 }, { "epoch": 0.8265579031826692, "grad_norm": 1.0564367771148682, "learning_rate": 1.700233890217922e-05, "loss": 0.8723, "step": 6181 }, { "epoch": 0.826691628777748, "grad_norm": 1.1202948093414307, "learning_rate": 1.7001308118985237e-05, "loss": 1.0272, "step": 6182 }, { "epoch": 0.826825354372827, "grad_norm": 1.1927080154418945, "learning_rate": 1.700027718985569e-05, "loss": 0.9133, "step": 6183 }, { "epoch": 0.8269590799679059, "grad_norm": 1.0962576866149902, "learning_rate": 1.699924611481206e-05, "loss": 0.8929, "step": 6184 }, { "epoch": 0.8270928055629847, "grad_norm": 1.0162962675094604, "learning_rate": 1.6998214893875845e-05, "loss": 0.901, "step": 6185 }, { "epoch": 0.8272265311580637, "grad_norm": 1.0443971157073975, "learning_rate": 1.6997183527068536e-05, "loss": 0.8625, "step": 6186 }, { "epoch": 0.8273602567531425, "grad_norm": 1.0037717819213867, "learning_rate": 1.699615201441163e-05, "loss": 0.9086, "step": 6187 }, { "epoch": 0.8274939823482215, "grad_norm": 1.1338119506835938, "learning_rate": 1.699512035592663e-05, "loss": 0.848, "step": 6188 }, { "epoch": 0.8276277079433003, "grad_norm": 1.0317057371139526, "learning_rate": 1.6994088551635043e-05, "loss": 0.9257, "step": 6189 }, { "epoch": 0.8277614335383793, "grad_norm": 1.0992035865783691, "learning_rate": 1.6993056601558372e-05, "loss": 0.8783, "step": 6190 }, { "epoch": 0.8278951591334581, "grad_norm": 1.0445293188095093, "learning_rate": 1.6992024505718126e-05, "loss": 0.9876, "step": 6191 }, { "epoch": 0.8280288847285371, "grad_norm": 1.0581703186035156, "learning_rate": 1.699099226413582e-05, "loss": 0.8892, "step": 6192 }, { "epoch": 0.8281626103236159, "grad_norm": 1.1670211553573608, "learning_rate": 1.6989959876832972e-05, "loss": 0.9715, "step": 6193 }, { "epoch": 0.8282963359186949, "grad_norm": 1.0369857549667358, "learning_rate": 1.6988927343831093e-05, "loss": 0.8635, "step": 6194 }, { "epoch": 0.8284300615137737, "grad_norm": 1.0399136543273926, "learning_rate": 1.6987894665151718e-05, "loss": 0.9588, "step": 6195 }, { "epoch": 0.8285637871088526, "grad_norm": 1.045790433883667, "learning_rate": 1.698686184081636e-05, "loss": 0.8707, "step": 6196 }, { "epoch": 0.8286975127039315, "grad_norm": 1.0708565711975098, "learning_rate": 1.698582887084656e-05, "loss": 0.9746, "step": 6197 }, { "epoch": 0.8288312382990104, "grad_norm": 1.1816719770431519, "learning_rate": 1.6984795755263836e-05, "loss": 0.9087, "step": 6198 }, { "epoch": 0.8289649638940894, "grad_norm": 1.0195719003677368, "learning_rate": 1.6983762494089732e-05, "loss": 0.8929, "step": 6199 }, { "epoch": 0.8290986894891682, "grad_norm": 0.986464262008667, "learning_rate": 1.698272908734578e-05, "loss": 0.868, "step": 6200 }, { "epoch": 0.8292324150842472, "grad_norm": 1.1000392436981201, "learning_rate": 1.6981695535053518e-05, "loss": 0.9668, "step": 6201 }, { "epoch": 0.829366140679326, "grad_norm": 0.9747217893600464, "learning_rate": 1.69806618372345e-05, "loss": 0.855, "step": 6202 }, { "epoch": 0.829499866274405, "grad_norm": 1.1245551109313965, "learning_rate": 1.697962799391026e-05, "loss": 0.9342, "step": 6203 }, { "epoch": 0.8296335918694838, "grad_norm": 1.0616766214370728, "learning_rate": 1.6978594005102354e-05, "loss": 0.9354, "step": 6204 }, { "epoch": 0.8297673174645627, "grad_norm": 1.0917917490005493, "learning_rate": 1.6977559870832336e-05, "loss": 0.9014, "step": 6205 }, { "epoch": 0.8299010430596416, "grad_norm": 1.1070598363876343, "learning_rate": 1.697652559112176e-05, "loss": 0.8093, "step": 6206 }, { "epoch": 0.8300347686547205, "grad_norm": 1.0546437501907349, "learning_rate": 1.6975491165992182e-05, "loss": 0.9038, "step": 6207 }, { "epoch": 0.8301684942497994, "grad_norm": 1.072019100189209, "learning_rate": 1.6974456595465166e-05, "loss": 0.9929, "step": 6208 }, { "epoch": 0.8303022198448783, "grad_norm": 1.1376469135284424, "learning_rate": 1.6973421879562275e-05, "loss": 0.8224, "step": 6209 }, { "epoch": 0.8304359454399572, "grad_norm": 0.9903003573417664, "learning_rate": 1.697238701830508e-05, "loss": 0.9159, "step": 6210 }, { "epoch": 0.8305696710350361, "grad_norm": 0.9316397309303284, "learning_rate": 1.697135201171515e-05, "loss": 0.872, "step": 6211 }, { "epoch": 0.830703396630115, "grad_norm": 1.0285007953643799, "learning_rate": 1.6970316859814054e-05, "loss": 0.978, "step": 6212 }, { "epoch": 0.8308371222251939, "grad_norm": 0.994144856929779, "learning_rate": 1.6969281562623375e-05, "loss": 0.9887, "step": 6213 }, { "epoch": 0.8309708478202728, "grad_norm": 1.176943063735962, "learning_rate": 1.6968246120164692e-05, "loss": 0.882, "step": 6214 }, { "epoch": 0.8311045734153517, "grad_norm": 1.0672295093536377, "learning_rate": 1.6967210532459584e-05, "loss": 0.9149, "step": 6215 }, { "epoch": 0.8312382990104306, "grad_norm": 1.1021041870117188, "learning_rate": 1.696617479952964e-05, "loss": 0.971, "step": 6216 }, { "epoch": 0.8313720246055095, "grad_norm": 1.0570067167282104, "learning_rate": 1.6965138921396452e-05, "loss": 0.9608, "step": 6217 }, { "epoch": 0.8315057502005884, "grad_norm": 0.9825366139411926, "learning_rate": 1.6964102898081608e-05, "loss": 0.9281, "step": 6218 }, { "epoch": 0.8316394757956673, "grad_norm": 1.0337327718734741, "learning_rate": 1.69630667296067e-05, "loss": 0.9509, "step": 6219 }, { "epoch": 0.8317732013907462, "grad_norm": 1.192141056060791, "learning_rate": 1.6962030415993327e-05, "loss": 1.0371, "step": 6220 }, { "epoch": 0.8319069269858251, "grad_norm": 1.1258766651153564, "learning_rate": 1.6960993957263094e-05, "loss": 0.9718, "step": 6221 }, { "epoch": 0.832040652580904, "grad_norm": 0.9789291024208069, "learning_rate": 1.6959957353437605e-05, "loss": 0.87, "step": 6222 }, { "epoch": 0.8321743781759828, "grad_norm": 1.0538341999053955, "learning_rate": 1.6958920604538462e-05, "loss": 0.9748, "step": 6223 }, { "epoch": 0.8323081037710618, "grad_norm": 1.275272011756897, "learning_rate": 1.695788371058728e-05, "loss": 0.96, "step": 6224 }, { "epoch": 0.8324418293661406, "grad_norm": 1.0702353715896606, "learning_rate": 1.6956846671605667e-05, "loss": 0.9858, "step": 6225 }, { "epoch": 0.8325755549612196, "grad_norm": 1.1408076286315918, "learning_rate": 1.6955809487615244e-05, "loss": 0.8968, "step": 6226 }, { "epoch": 0.8327092805562984, "grad_norm": 1.1220728158950806, "learning_rate": 1.695477215863763e-05, "loss": 1.0432, "step": 6227 }, { "epoch": 0.8328430061513774, "grad_norm": 1.0511724948883057, "learning_rate": 1.6953734684694444e-05, "loss": 1.0044, "step": 6228 }, { "epoch": 0.8329767317464563, "grad_norm": 1.1092078685760498, "learning_rate": 1.695269706580731e-05, "loss": 0.8889, "step": 6229 }, { "epoch": 0.8331104573415352, "grad_norm": 1.339896321296692, "learning_rate": 1.695165930199786e-05, "loss": 1.0218, "step": 6230 }, { "epoch": 0.8332441829366141, "grad_norm": 1.057202696800232, "learning_rate": 1.695062139328773e-05, "loss": 0.9761, "step": 6231 }, { "epoch": 0.8333779085316929, "grad_norm": 1.1081269979476929, "learning_rate": 1.694958333969854e-05, "loss": 0.949, "step": 6232 }, { "epoch": 0.8335116341267719, "grad_norm": 1.081121563911438, "learning_rate": 1.6948545141251934e-05, "loss": 0.9558, "step": 6233 }, { "epoch": 0.8336453597218507, "grad_norm": 1.0447009801864624, "learning_rate": 1.6947506797969563e-05, "loss": 0.9995, "step": 6234 }, { "epoch": 0.8337790853169297, "grad_norm": 1.0064798593521118, "learning_rate": 1.6946468309873055e-05, "loss": 1.0295, "step": 6235 }, { "epoch": 0.8339128109120085, "grad_norm": 0.9835310578346252, "learning_rate": 1.694542967698406e-05, "loss": 0.9852, "step": 6236 }, { "epoch": 0.8340465365070875, "grad_norm": 0.9826045036315918, "learning_rate": 1.6944390899324234e-05, "loss": 0.8355, "step": 6237 }, { "epoch": 0.8341802621021663, "grad_norm": 1.0677248239517212, "learning_rate": 1.694335197691522e-05, "loss": 0.8803, "step": 6238 }, { "epoch": 0.8343139876972453, "grad_norm": 1.047454595565796, "learning_rate": 1.6942312909778683e-05, "loss": 0.8361, "step": 6239 }, { "epoch": 0.8344477132923241, "grad_norm": 1.0687263011932373, "learning_rate": 1.6941273697936273e-05, "loss": 0.903, "step": 6240 }, { "epoch": 0.8345814388874031, "grad_norm": 1.0576106309890747, "learning_rate": 1.6940234341409657e-05, "loss": 0.7732, "step": 6241 }, { "epoch": 0.834715164482482, "grad_norm": 0.9619467854499817, "learning_rate": 1.6939194840220497e-05, "loss": 0.8796, "step": 6242 }, { "epoch": 0.8348488900775608, "grad_norm": 1.1115882396697998, "learning_rate": 1.693815519439046e-05, "loss": 0.8976, "step": 6243 }, { "epoch": 0.8349826156726398, "grad_norm": 0.9547367095947266, "learning_rate": 1.693711540394122e-05, "loss": 0.8988, "step": 6244 }, { "epoch": 0.8351163412677186, "grad_norm": 1.1374239921569824, "learning_rate": 1.693607546889444e-05, "loss": 0.9039, "step": 6245 }, { "epoch": 0.8352500668627976, "grad_norm": 1.0855188369750977, "learning_rate": 1.693503538927181e-05, "loss": 1.0201, "step": 6246 }, { "epoch": 0.8353837924578764, "grad_norm": 1.0434775352478027, "learning_rate": 1.6933995165095006e-05, "loss": 0.9262, "step": 6247 }, { "epoch": 0.8355175180529554, "grad_norm": 1.0397087335586548, "learning_rate": 1.6932954796385703e-05, "loss": 0.9966, "step": 6248 }, { "epoch": 0.8356512436480342, "grad_norm": 0.989005982875824, "learning_rate": 1.693191428316559e-05, "loss": 0.9304, "step": 6249 }, { "epoch": 0.8357849692431132, "grad_norm": 1.1155787706375122, "learning_rate": 1.6930873625456362e-05, "loss": 0.987, "step": 6250 }, { "epoch": 0.835918694838192, "grad_norm": 1.014721155166626, "learning_rate": 1.69298328232797e-05, "loss": 0.7934, "step": 6251 }, { "epoch": 0.8360524204332709, "grad_norm": 1.0660616159439087, "learning_rate": 1.6928791876657306e-05, "loss": 0.8197, "step": 6252 }, { "epoch": 0.8361861460283498, "grad_norm": 1.0063304901123047, "learning_rate": 1.6927750785610876e-05, "loss": 0.9246, "step": 6253 }, { "epoch": 0.8363198716234287, "grad_norm": 1.0346862077713013, "learning_rate": 1.6926709550162112e-05, "loss": 0.911, "step": 6254 }, { "epoch": 0.8364535972185076, "grad_norm": 1.2380086183547974, "learning_rate": 1.692566817033271e-05, "loss": 1.0264, "step": 6255 }, { "epoch": 0.8365873228135865, "grad_norm": 1.0647270679473877, "learning_rate": 1.692462664614439e-05, "loss": 0.8628, "step": 6256 }, { "epoch": 0.8367210484086655, "grad_norm": 1.0911678075790405, "learning_rate": 1.692358497761885e-05, "loss": 0.9609, "step": 6257 }, { "epoch": 0.8368547740037443, "grad_norm": 0.980737566947937, "learning_rate": 1.6922543164777805e-05, "loss": 0.9897, "step": 6258 }, { "epoch": 0.8369884995988233, "grad_norm": 1.0662826299667358, "learning_rate": 1.692150120764297e-05, "loss": 1.0212, "step": 6259 }, { "epoch": 0.8371222251939021, "grad_norm": 1.0151029825210571, "learning_rate": 1.692045910623607e-05, "loss": 0.9197, "step": 6260 }, { "epoch": 0.837255950788981, "grad_norm": 1.0873527526855469, "learning_rate": 1.691941686057882e-05, "loss": 0.9893, "step": 6261 }, { "epoch": 0.8373896763840599, "grad_norm": 1.0680855512619019, "learning_rate": 1.691837447069295e-05, "loss": 1.0698, "step": 6262 }, { "epoch": 0.8375234019791388, "grad_norm": 0.9014647603034973, "learning_rate": 1.6917331936600183e-05, "loss": 0.8106, "step": 6263 }, { "epoch": 0.8376571275742177, "grad_norm": 1.0312988758087158, "learning_rate": 1.6916289258322246e-05, "loss": 0.8819, "step": 6264 }, { "epoch": 0.8377908531692966, "grad_norm": 0.9442629814147949, "learning_rate": 1.691524643588088e-05, "loss": 0.9669, "step": 6265 }, { "epoch": 0.8379245787643755, "grad_norm": 1.1172345876693726, "learning_rate": 1.691420346929782e-05, "loss": 0.9529, "step": 6266 }, { "epoch": 0.8380583043594544, "grad_norm": 1.006263017654419, "learning_rate": 1.6913160358594803e-05, "loss": 0.894, "step": 6267 }, { "epoch": 0.8381920299545333, "grad_norm": 0.9992109537124634, "learning_rate": 1.6912117103793578e-05, "loss": 1.0314, "step": 6268 }, { "epoch": 0.8383257555496122, "grad_norm": 1.0451394319534302, "learning_rate": 1.6911073704915883e-05, "loss": 0.9283, "step": 6269 }, { "epoch": 0.838459481144691, "grad_norm": 1.1377421617507935, "learning_rate": 1.691003016198347e-05, "loss": 1.0524, "step": 6270 }, { "epoch": 0.83859320673977, "grad_norm": 0.9296470284461975, "learning_rate": 1.690898647501809e-05, "loss": 0.9054, "step": 6271 }, { "epoch": 0.8387269323348489, "grad_norm": 1.1319226026535034, "learning_rate": 1.69079426440415e-05, "loss": 0.9183, "step": 6272 }, { "epoch": 0.8388606579299278, "grad_norm": 1.005556583404541, "learning_rate": 1.6906898669075452e-05, "loss": 0.8667, "step": 6273 }, { "epoch": 0.8389943835250067, "grad_norm": 1.1296900510787964, "learning_rate": 1.6905854550141717e-05, "loss": 1.0613, "step": 6274 }, { "epoch": 0.8391281091200856, "grad_norm": 0.9757203459739685, "learning_rate": 1.6904810287262047e-05, "loss": 0.8075, "step": 6275 }, { "epoch": 0.8392618347151645, "grad_norm": 1.1405946016311646, "learning_rate": 1.6903765880458216e-05, "loss": 1.013, "step": 6276 }, { "epoch": 0.8393955603102434, "grad_norm": 0.9648895263671875, "learning_rate": 1.690272132975199e-05, "loss": 0.8188, "step": 6277 }, { "epoch": 0.8395292859053223, "grad_norm": 0.9771251678466797, "learning_rate": 1.6901676635165144e-05, "loss": 0.9642, "step": 6278 }, { "epoch": 0.8396630115004011, "grad_norm": 1.098215937614441, "learning_rate": 1.6900631796719455e-05, "loss": 0.9902, "step": 6279 }, { "epoch": 0.8397967370954801, "grad_norm": 0.9888482689857483, "learning_rate": 1.6899586814436692e-05, "loss": 1.0555, "step": 6280 }, { "epoch": 0.8399304626905589, "grad_norm": 1.0288373231887817, "learning_rate": 1.6898541688338648e-05, "loss": 0.9126, "step": 6281 }, { "epoch": 0.8400641882856379, "grad_norm": 1.0977911949157715, "learning_rate": 1.6897496418447108e-05, "loss": 1.0251, "step": 6282 }, { "epoch": 0.8401979138807167, "grad_norm": 0.9422560930252075, "learning_rate": 1.6896451004783848e-05, "loss": 0.8863, "step": 6283 }, { "epoch": 0.8403316394757957, "grad_norm": 1.0523384809494019, "learning_rate": 1.689540544737067e-05, "loss": 0.9248, "step": 6284 }, { "epoch": 0.8404653650708745, "grad_norm": 0.9838606119155884, "learning_rate": 1.6894359746229362e-05, "loss": 0.9582, "step": 6285 }, { "epoch": 0.8405990906659535, "grad_norm": 1.1502082347869873, "learning_rate": 1.6893313901381724e-05, "loss": 0.8797, "step": 6286 }, { "epoch": 0.8407328162610324, "grad_norm": 1.0644716024398804, "learning_rate": 1.6892267912849556e-05, "loss": 0.9738, "step": 6287 }, { "epoch": 0.8408665418561112, "grad_norm": 1.1231529712677002, "learning_rate": 1.6891221780654654e-05, "loss": 0.8603, "step": 6288 }, { "epoch": 0.8410002674511902, "grad_norm": 1.2128039598464966, "learning_rate": 1.689017550481883e-05, "loss": 0.9645, "step": 6289 }, { "epoch": 0.841133993046269, "grad_norm": 0.9433903098106384, "learning_rate": 1.6889129085363892e-05, "loss": 0.8889, "step": 6290 }, { "epoch": 0.841267718641348, "grad_norm": 1.2111896276474, "learning_rate": 1.6888082522311648e-05, "loss": 1.0538, "step": 6291 }, { "epoch": 0.8414014442364268, "grad_norm": 0.9870617985725403, "learning_rate": 1.6887035815683918e-05, "loss": 0.9643, "step": 6292 }, { "epoch": 0.8415351698315058, "grad_norm": 0.9630647301673889, "learning_rate": 1.6885988965502514e-05, "loss": 0.895, "step": 6293 }, { "epoch": 0.8416688954265846, "grad_norm": 1.0599976778030396, "learning_rate": 1.6884941971789263e-05, "loss": 0.9944, "step": 6294 }, { "epoch": 0.8418026210216636, "grad_norm": 1.0369551181793213, "learning_rate": 1.688389483456598e-05, "loss": 0.8877, "step": 6295 }, { "epoch": 0.8419363466167424, "grad_norm": 1.0309689044952393, "learning_rate": 1.6882847553854497e-05, "loss": 0.9182, "step": 6296 }, { "epoch": 0.8420700722118214, "grad_norm": 1.0261473655700684, "learning_rate": 1.6881800129676643e-05, "loss": 0.9038, "step": 6297 }, { "epoch": 0.8422037978069002, "grad_norm": 1.0375601053237915, "learning_rate": 1.6880752562054253e-05, "loss": 0.9059, "step": 6298 }, { "epoch": 0.8423375234019791, "grad_norm": 1.0322469472885132, "learning_rate": 1.687970485100916e-05, "loss": 0.8628, "step": 6299 }, { "epoch": 0.842471248997058, "grad_norm": 0.9662466645240784, "learning_rate": 1.68786569965632e-05, "loss": 0.8839, "step": 6300 }, { "epoch": 0.8426049745921369, "grad_norm": 1.0548816919326782, "learning_rate": 1.6877608998738216e-05, "loss": 0.9052, "step": 6301 }, { "epoch": 0.8427387001872159, "grad_norm": 1.0748306512832642, "learning_rate": 1.687656085755606e-05, "loss": 0.9428, "step": 6302 }, { "epoch": 0.8428724257822947, "grad_norm": 1.1008113622665405, "learning_rate": 1.687551257303857e-05, "loss": 0.9902, "step": 6303 }, { "epoch": 0.8430061513773737, "grad_norm": 0.990467369556427, "learning_rate": 1.6874464145207597e-05, "loss": 0.8826, "step": 6304 }, { "epoch": 0.8431398769724525, "grad_norm": 1.0164737701416016, "learning_rate": 1.6873415574085e-05, "loss": 0.9027, "step": 6305 }, { "epoch": 0.8432736025675315, "grad_norm": 0.9884905219078064, "learning_rate": 1.687236685969263e-05, "loss": 0.9582, "step": 6306 }, { "epoch": 0.8434073281626103, "grad_norm": 1.0693950653076172, "learning_rate": 1.687131800205235e-05, "loss": 1.0188, "step": 6307 }, { "epoch": 0.8435410537576892, "grad_norm": 1.2533334493637085, "learning_rate": 1.687026900118602e-05, "loss": 1.0249, "step": 6308 }, { "epoch": 0.8436747793527681, "grad_norm": 0.9755898118019104, "learning_rate": 1.686921985711551e-05, "loss": 0.9091, "step": 6309 }, { "epoch": 0.843808504947847, "grad_norm": 1.092630386352539, "learning_rate": 1.686817056986268e-05, "loss": 0.8727, "step": 6310 }, { "epoch": 0.8439422305429259, "grad_norm": 1.0801206827163696, "learning_rate": 1.6867121139449413e-05, "loss": 0.977, "step": 6311 }, { "epoch": 0.8440759561380048, "grad_norm": 1.1071114540100098, "learning_rate": 1.6866071565897574e-05, "loss": 0.9515, "step": 6312 }, { "epoch": 0.8442096817330837, "grad_norm": 1.0245574712753296, "learning_rate": 1.6865021849229042e-05, "loss": 0.9799, "step": 6313 }, { "epoch": 0.8443434073281626, "grad_norm": 0.9975886344909668, "learning_rate": 1.68639719894657e-05, "loss": 0.914, "step": 6314 }, { "epoch": 0.8444771329232416, "grad_norm": 1.087110161781311, "learning_rate": 1.686292198662943e-05, "loss": 1.0333, "step": 6315 }, { "epoch": 0.8446108585183204, "grad_norm": 1.081152081489563, "learning_rate": 1.6861871840742118e-05, "loss": 0.8577, "step": 6316 }, { "epoch": 0.8447445841133993, "grad_norm": 1.0627353191375732, "learning_rate": 1.6860821551825655e-05, "loss": 0.9317, "step": 6317 }, { "epoch": 0.8448783097084782, "grad_norm": 1.0807102918624878, "learning_rate": 1.685977111990193e-05, "loss": 0.9217, "step": 6318 }, { "epoch": 0.8450120353035571, "grad_norm": 1.1931391954421997, "learning_rate": 1.6858720544992843e-05, "loss": 0.9624, "step": 6319 }, { "epoch": 0.845145760898636, "grad_norm": 1.0161738395690918, "learning_rate": 1.6857669827120285e-05, "loss": 0.8237, "step": 6320 }, { "epoch": 0.8452794864937149, "grad_norm": 1.0203443765640259, "learning_rate": 1.6856618966306164e-05, "loss": 0.9922, "step": 6321 }, { "epoch": 0.8454132120887938, "grad_norm": 1.057619571685791, "learning_rate": 1.685556796257238e-05, "loss": 0.8714, "step": 6322 }, { "epoch": 0.8455469376838727, "grad_norm": 1.0800080299377441, "learning_rate": 1.6854516815940844e-05, "loss": 0.9564, "step": 6323 }, { "epoch": 0.8456806632789516, "grad_norm": 1.0452362298965454, "learning_rate": 1.6853465526433465e-05, "loss": 0.9349, "step": 6324 }, { "epoch": 0.8458143888740305, "grad_norm": 1.063637137413025, "learning_rate": 1.6852414094072153e-05, "loss": 1.0769, "step": 6325 }, { "epoch": 0.8459481144691093, "grad_norm": 1.0307679176330566, "learning_rate": 1.6851362518878823e-05, "loss": 1.0414, "step": 6326 }, { "epoch": 0.8460818400641883, "grad_norm": 1.0028204917907715, "learning_rate": 1.6850310800875402e-05, "loss": 0.975, "step": 6327 }, { "epoch": 0.8462155656592671, "grad_norm": 1.2184512615203857, "learning_rate": 1.6849258940083806e-05, "loss": 0.9348, "step": 6328 }, { "epoch": 0.8463492912543461, "grad_norm": 1.021688461303711, "learning_rate": 1.684820693652596e-05, "loss": 0.9101, "step": 6329 }, { "epoch": 0.846483016849425, "grad_norm": 1.1253647804260254, "learning_rate": 1.6847154790223797e-05, "loss": 0.8863, "step": 6330 }, { "epoch": 0.8466167424445039, "grad_norm": 1.1511632204055786, "learning_rate": 1.6846102501199244e-05, "loss": 0.9205, "step": 6331 }, { "epoch": 0.8467504680395828, "grad_norm": 1.0134265422821045, "learning_rate": 1.6845050069474234e-05, "loss": 0.9559, "step": 6332 }, { "epoch": 0.8468841936346617, "grad_norm": 1.1101819276809692, "learning_rate": 1.6843997495070702e-05, "loss": 1.001, "step": 6333 }, { "epoch": 0.8470179192297406, "grad_norm": 1.129840612411499, "learning_rate": 1.68429447780106e-05, "loss": 0.9079, "step": 6334 }, { "epoch": 0.8471516448248194, "grad_norm": 1.0620453357696533, "learning_rate": 1.6841891918315853e-05, "loss": 0.9264, "step": 6335 }, { "epoch": 0.8472853704198984, "grad_norm": 1.1281931400299072, "learning_rate": 1.684083891600842e-05, "loss": 0.8918, "step": 6336 }, { "epoch": 0.8474190960149772, "grad_norm": 1.1712507009506226, "learning_rate": 1.6839785771110247e-05, "loss": 0.8917, "step": 6337 }, { "epoch": 0.8475528216100562, "grad_norm": 1.0798373222351074, "learning_rate": 1.683873248364328e-05, "loss": 1.0495, "step": 6338 }, { "epoch": 0.847686547205135, "grad_norm": 1.0146881341934204, "learning_rate": 1.6837679053629483e-05, "loss": 0.9955, "step": 6339 }, { "epoch": 0.847820272800214, "grad_norm": 1.0500850677490234, "learning_rate": 1.683662548109081e-05, "loss": 0.9478, "step": 6340 }, { "epoch": 0.8479539983952928, "grad_norm": 1.0305777788162231, "learning_rate": 1.6835571766049214e-05, "loss": 0.8954, "step": 6341 }, { "epoch": 0.8480877239903718, "grad_norm": 0.9722110033035278, "learning_rate": 1.683451790852667e-05, "loss": 0.8995, "step": 6342 }, { "epoch": 0.8482214495854506, "grad_norm": 0.9783356189727783, "learning_rate": 1.683346390854514e-05, "loss": 0.9075, "step": 6343 }, { "epoch": 0.8483551751805296, "grad_norm": 1.064634084701538, "learning_rate": 1.6832409766126593e-05, "loss": 0.8733, "step": 6344 }, { "epoch": 0.8484889007756085, "grad_norm": 1.0619784593582153, "learning_rate": 1.6831355481293004e-05, "loss": 0.9727, "step": 6345 }, { "epoch": 0.8486226263706873, "grad_norm": 1.1045472621917725, "learning_rate": 1.6830301054066343e-05, "loss": 0.8665, "step": 6346 }, { "epoch": 0.8487563519657663, "grad_norm": 1.002352237701416, "learning_rate": 1.68292464844686e-05, "loss": 0.9916, "step": 6347 }, { "epoch": 0.8488900775608451, "grad_norm": 1.0003159046173096, "learning_rate": 1.6828191772521744e-05, "loss": 0.8624, "step": 6348 }, { "epoch": 0.8490238031559241, "grad_norm": 0.9276086091995239, "learning_rate": 1.6827136918247763e-05, "loss": 0.8647, "step": 6349 }, { "epoch": 0.8491575287510029, "grad_norm": 1.0791691541671753, "learning_rate": 1.6826081921668645e-05, "loss": 0.8793, "step": 6350 }, { "epoch": 0.8492912543460819, "grad_norm": 1.1185963153839111, "learning_rate": 1.6825026782806383e-05, "loss": 1.0109, "step": 6351 }, { "epoch": 0.8494249799411607, "grad_norm": 1.0141671895980835, "learning_rate": 1.682397150168297e-05, "loss": 0.8502, "step": 6352 }, { "epoch": 0.8495587055362397, "grad_norm": 1.0985190868377686, "learning_rate": 1.68229160783204e-05, "loss": 0.9827, "step": 6353 }, { "epoch": 0.8496924311313185, "grad_norm": 1.115431785583496, "learning_rate": 1.6821860512740674e-05, "loss": 0.899, "step": 6354 }, { "epoch": 0.8498261567263974, "grad_norm": 1.030537724494934, "learning_rate": 1.6820804804965792e-05, "loss": 0.9981, "step": 6355 }, { "epoch": 0.8499598823214763, "grad_norm": 1.0183442831039429, "learning_rate": 1.681974895501776e-05, "loss": 0.9282, "step": 6356 }, { "epoch": 0.8500936079165552, "grad_norm": 1.0021448135375977, "learning_rate": 1.681869296291859e-05, "loss": 0.9135, "step": 6357 }, { "epoch": 0.8502273335116342, "grad_norm": 1.019509196281433, "learning_rate": 1.6817636828690288e-05, "loss": 0.8565, "step": 6358 }, { "epoch": 0.850361059106713, "grad_norm": 1.062915563583374, "learning_rate": 1.681658055235487e-05, "loss": 1.0596, "step": 6359 }, { "epoch": 0.850494784701792, "grad_norm": 1.0293793678283691, "learning_rate": 1.681552413393435e-05, "loss": 0.9461, "step": 6360 }, { "epoch": 0.8506285102968708, "grad_norm": 1.0702258348464966, "learning_rate": 1.6814467573450754e-05, "loss": 1.0166, "step": 6361 }, { "epoch": 0.8507622358919498, "grad_norm": 1.1517055034637451, "learning_rate": 1.6813410870926105e-05, "loss": 0.9807, "step": 6362 }, { "epoch": 0.8508959614870286, "grad_norm": 1.0516215562820435, "learning_rate": 1.6812354026382426e-05, "loss": 0.9033, "step": 6363 }, { "epoch": 0.8510296870821075, "grad_norm": 1.0838863849639893, "learning_rate": 1.681129703984174e-05, "loss": 0.9396, "step": 6364 }, { "epoch": 0.8511634126771864, "grad_norm": 1.093553900718689, "learning_rate": 1.6810239911326086e-05, "loss": 1.0545, "step": 6365 }, { "epoch": 0.8512971382722653, "grad_norm": 1.0164642333984375, "learning_rate": 1.6809182640857504e-05, "loss": 1.0266, "step": 6366 }, { "epoch": 0.8514308638673442, "grad_norm": 1.0966217517852783, "learning_rate": 1.680812522845802e-05, "loss": 0.9495, "step": 6367 }, { "epoch": 0.8515645894624231, "grad_norm": 1.065967321395874, "learning_rate": 1.680706767414968e-05, "loss": 0.7678, "step": 6368 }, { "epoch": 0.851698315057502, "grad_norm": 1.1220910549163818, "learning_rate": 1.6806009977954533e-05, "loss": 0.8639, "step": 6369 }, { "epoch": 0.8518320406525809, "grad_norm": 1.0540400743484497, "learning_rate": 1.6804952139894618e-05, "loss": 0.9823, "step": 6370 }, { "epoch": 0.8519657662476599, "grad_norm": 1.0192756652832031, "learning_rate": 1.6803894159991985e-05, "loss": 0.9771, "step": 6371 }, { "epoch": 0.8520994918427387, "grad_norm": 0.9443618059158325, "learning_rate": 1.6802836038268694e-05, "loss": 0.8796, "step": 6372 }, { "epoch": 0.8522332174378175, "grad_norm": 1.0384531021118164, "learning_rate": 1.680177777474679e-05, "loss": 0.8645, "step": 6373 }, { "epoch": 0.8523669430328965, "grad_norm": 1.1033827066421509, "learning_rate": 1.6800719369448336e-05, "loss": 0.8759, "step": 6374 }, { "epoch": 0.8525006686279754, "grad_norm": 0.9726662635803223, "learning_rate": 1.67996608223954e-05, "loss": 0.9768, "step": 6375 }, { "epoch": 0.8526343942230543, "grad_norm": 1.1042805910110474, "learning_rate": 1.679860213361004e-05, "loss": 0.8964, "step": 6376 }, { "epoch": 0.8527681198181332, "grad_norm": 1.0877240896224976, "learning_rate": 1.6797543303114322e-05, "loss": 0.9579, "step": 6377 }, { "epoch": 0.8529018454132121, "grad_norm": 1.1410986185073853, "learning_rate": 1.6796484330930315e-05, "loss": 0.9157, "step": 6378 }, { "epoch": 0.853035571008291, "grad_norm": 1.003361701965332, "learning_rate": 1.6795425217080098e-05, "loss": 0.9624, "step": 6379 }, { "epoch": 0.8531692966033699, "grad_norm": 1.067478895187378, "learning_rate": 1.679436596158575e-05, "loss": 1.0772, "step": 6380 }, { "epoch": 0.8533030221984488, "grad_norm": 1.0158237218856812, "learning_rate": 1.679330656446934e-05, "loss": 0.978, "step": 6381 }, { "epoch": 0.8534367477935276, "grad_norm": 1.029374122619629, "learning_rate": 1.6792247025752956e-05, "loss": 0.924, "step": 6382 }, { "epoch": 0.8535704733886066, "grad_norm": 1.111932396888733, "learning_rate": 1.679118734545868e-05, "loss": 1.0051, "step": 6383 }, { "epoch": 0.8537041989836854, "grad_norm": 1.0799624919891357, "learning_rate": 1.679012752360861e-05, "loss": 0.9312, "step": 6384 }, { "epoch": 0.8538379245787644, "grad_norm": 1.0726861953735352, "learning_rate": 1.678906756022482e-05, "loss": 0.9636, "step": 6385 }, { "epoch": 0.8539716501738432, "grad_norm": 1.075973629951477, "learning_rate": 1.678800745532942e-05, "loss": 0.8986, "step": 6386 }, { "epoch": 0.8541053757689222, "grad_norm": 1.0156878232955933, "learning_rate": 1.6786947208944494e-05, "loss": 1.0191, "step": 6387 }, { "epoch": 0.854239101364001, "grad_norm": 0.9368893504142761, "learning_rate": 1.6785886821092153e-05, "loss": 0.8995, "step": 6388 }, { "epoch": 0.85437282695908, "grad_norm": 1.0493046045303345, "learning_rate": 1.6784826291794495e-05, "loss": 0.9437, "step": 6389 }, { "epoch": 0.8545065525541589, "grad_norm": 1.1224291324615479, "learning_rate": 1.678376562107362e-05, "loss": 0.7937, "step": 6390 }, { "epoch": 0.8546402781492377, "grad_norm": 0.9947245121002197, "learning_rate": 1.6782704808951646e-05, "loss": 0.936, "step": 6391 }, { "epoch": 0.8547740037443167, "grad_norm": 0.9639949798583984, "learning_rate": 1.678164385545068e-05, "loss": 0.9088, "step": 6392 }, { "epoch": 0.8549077293393955, "grad_norm": 1.0433982610702515, "learning_rate": 1.6780582760592836e-05, "loss": 1.0057, "step": 6393 }, { "epoch": 0.8550414549344745, "grad_norm": 1.0665639638900757, "learning_rate": 1.6779521524400234e-05, "loss": 1.0163, "step": 6394 }, { "epoch": 0.8551751805295533, "grad_norm": 1.0499364137649536, "learning_rate": 1.677846014689499e-05, "loss": 0.9395, "step": 6395 }, { "epoch": 0.8553089061246323, "grad_norm": 1.201156497001648, "learning_rate": 1.6777398628099234e-05, "loss": 0.941, "step": 6396 }, { "epoch": 0.8554426317197111, "grad_norm": 1.0105317831039429, "learning_rate": 1.677633696803509e-05, "loss": 0.8268, "step": 6397 }, { "epoch": 0.8555763573147901, "grad_norm": 0.9905195236206055, "learning_rate": 1.677527516672468e-05, "loss": 0.9659, "step": 6398 }, { "epoch": 0.8557100829098689, "grad_norm": 1.1213469505310059, "learning_rate": 1.6774213224190148e-05, "loss": 0.9894, "step": 6399 }, { "epoch": 0.8558438085049479, "grad_norm": 1.0489760637283325, "learning_rate": 1.6773151140453624e-05, "loss": 0.931, "step": 6400 }, { "epoch": 0.8559775341000267, "grad_norm": 1.0773919820785522, "learning_rate": 1.677208891553724e-05, "loss": 0.8668, "step": 6401 }, { "epoch": 0.8561112596951056, "grad_norm": 1.2183749675750732, "learning_rate": 1.6771026549463148e-05, "loss": 0.9943, "step": 6402 }, { "epoch": 0.8562449852901846, "grad_norm": 0.9685238003730774, "learning_rate": 1.6769964042253485e-05, "loss": 0.9971, "step": 6403 }, { "epoch": 0.8563787108852634, "grad_norm": 1.0275424718856812, "learning_rate": 1.6768901393930403e-05, "loss": 0.8311, "step": 6404 }, { "epoch": 0.8565124364803424, "grad_norm": 1.0167380571365356, "learning_rate": 1.6767838604516043e-05, "loss": 0.8401, "step": 6405 }, { "epoch": 0.8566461620754212, "grad_norm": 1.1026512384414673, "learning_rate": 1.6766775674032565e-05, "loss": 1.0043, "step": 6406 }, { "epoch": 0.8567798876705002, "grad_norm": 0.9721025824546814, "learning_rate": 1.6765712602502122e-05, "loss": 0.9988, "step": 6407 }, { "epoch": 0.856913613265579, "grad_norm": 0.9958188533782959, "learning_rate": 1.676464938994688e-05, "loss": 0.9182, "step": 6408 }, { "epoch": 0.857047338860658, "grad_norm": 1.0558589696884155, "learning_rate": 1.6763586036388988e-05, "loss": 1.0118, "step": 6409 }, { "epoch": 0.8571810644557368, "grad_norm": 1.0125571489334106, "learning_rate": 1.676252254185062e-05, "loss": 0.9108, "step": 6410 }, { "epoch": 0.8573147900508157, "grad_norm": 1.1763077974319458, "learning_rate": 1.676145890635394e-05, "loss": 0.9964, "step": 6411 }, { "epoch": 0.8574485156458946, "grad_norm": 1.1250919103622437, "learning_rate": 1.6760395129921118e-05, "loss": 0.9941, "step": 6412 }, { "epoch": 0.8575822412409735, "grad_norm": 1.0218565464019775, "learning_rate": 1.675933121257433e-05, "loss": 0.9758, "step": 6413 }, { "epoch": 0.8577159668360524, "grad_norm": 0.9700666666030884, "learning_rate": 1.675826715433575e-05, "loss": 0.9058, "step": 6414 }, { "epoch": 0.8578496924311313, "grad_norm": 0.958427906036377, "learning_rate": 1.6757202955227557e-05, "loss": 1.0698, "step": 6415 }, { "epoch": 0.8579834180262103, "grad_norm": 1.051458716392517, "learning_rate": 1.675613861527194e-05, "loss": 0.9571, "step": 6416 }, { "epoch": 0.8581171436212891, "grad_norm": 1.131280541419983, "learning_rate": 1.6755074134491075e-05, "loss": 0.9619, "step": 6417 }, { "epoch": 0.8582508692163681, "grad_norm": 1.127591609954834, "learning_rate": 1.675400951290715e-05, "loss": 0.9754, "step": 6418 }, { "epoch": 0.8583845948114469, "grad_norm": 1.1054295301437378, "learning_rate": 1.6752944750542366e-05, "loss": 0.8947, "step": 6419 }, { "epoch": 0.8585183204065258, "grad_norm": 1.2202069759368896, "learning_rate": 1.6751879847418907e-05, "loss": 1.0274, "step": 6420 }, { "epoch": 0.8586520460016047, "grad_norm": 1.0476248264312744, "learning_rate": 1.675081480355897e-05, "loss": 0.9219, "step": 6421 }, { "epoch": 0.8587857715966836, "grad_norm": 1.0108592510223389, "learning_rate": 1.6749749618984763e-05, "loss": 0.8629, "step": 6422 }, { "epoch": 0.8589194971917625, "grad_norm": 1.0441325902938843, "learning_rate": 1.6748684293718484e-05, "loss": 1.0049, "step": 6423 }, { "epoch": 0.8590532227868414, "grad_norm": 1.100607991218567, "learning_rate": 1.674761882778234e-05, "loss": 0.9337, "step": 6424 }, { "epoch": 0.8591869483819203, "grad_norm": 1.0563383102416992, "learning_rate": 1.6746553221198532e-05, "loss": 0.9714, "step": 6425 }, { "epoch": 0.8593206739769992, "grad_norm": 1.1651633977890015, "learning_rate": 1.6745487473989285e-05, "loss": 0.8994, "step": 6426 }, { "epoch": 0.8594543995720781, "grad_norm": 1.005658507347107, "learning_rate": 1.67444215861768e-05, "loss": 0.9515, "step": 6427 }, { "epoch": 0.859588125167157, "grad_norm": 1.0972975492477417, "learning_rate": 1.6743355557783308e-05, "loss": 0.9155, "step": 6428 }, { "epoch": 0.8597218507622358, "grad_norm": 1.1275793313980103, "learning_rate": 1.6742289388831014e-05, "loss": 0.9988, "step": 6429 }, { "epoch": 0.8598555763573148, "grad_norm": 1.0282682180404663, "learning_rate": 1.6741223079342153e-05, "loss": 0.9155, "step": 6430 }, { "epoch": 0.8599893019523936, "grad_norm": 1.0839102268218994, "learning_rate": 1.674015662933895e-05, "loss": 0.8919, "step": 6431 }, { "epoch": 0.8601230275474726, "grad_norm": 1.1187360286712646, "learning_rate": 1.673909003884363e-05, "loss": 0.9458, "step": 6432 }, { "epoch": 0.8602567531425515, "grad_norm": 0.9898458123207092, "learning_rate": 1.6738023307878425e-05, "loss": 0.8712, "step": 6433 }, { "epoch": 0.8603904787376304, "grad_norm": 1.0592583417892456, "learning_rate": 1.6736956436465573e-05, "loss": 0.9341, "step": 6434 }, { "epoch": 0.8605242043327093, "grad_norm": 1.1703660488128662, "learning_rate": 1.6735889424627313e-05, "loss": 1.0303, "step": 6435 }, { "epoch": 0.8606579299277882, "grad_norm": 0.9925939440727234, "learning_rate": 1.673482227238588e-05, "loss": 0.944, "step": 6436 }, { "epoch": 0.8607916555228671, "grad_norm": 1.0885568857192993, "learning_rate": 1.6733754979763525e-05, "loss": 0.9736, "step": 6437 }, { "epoch": 0.8609253811179459, "grad_norm": 1.0746959447860718, "learning_rate": 1.6732687546782486e-05, "loss": 0.9015, "step": 6438 }, { "epoch": 0.8610591067130249, "grad_norm": 1.0241910219192505, "learning_rate": 1.6731619973465018e-05, "loss": 0.9399, "step": 6439 }, { "epoch": 0.8611928323081037, "grad_norm": 1.1427667140960693, "learning_rate": 1.6730552259833378e-05, "loss": 1.0284, "step": 6440 }, { "epoch": 0.8613265579031827, "grad_norm": 1.1317977905273438, "learning_rate": 1.672948440590981e-05, "loss": 0.8813, "step": 6441 }, { "epoch": 0.8614602834982615, "grad_norm": 1.0913825035095215, "learning_rate": 1.6728416411716587e-05, "loss": 0.9214, "step": 6442 }, { "epoch": 0.8615940090933405, "grad_norm": 1.1184508800506592, "learning_rate": 1.6727348277275957e-05, "loss": 0.8926, "step": 6443 }, { "epoch": 0.8617277346884193, "grad_norm": 1.1488111019134521, "learning_rate": 1.6726280002610188e-05, "loss": 0.8701, "step": 6444 }, { "epoch": 0.8618614602834983, "grad_norm": 1.0850615501403809, "learning_rate": 1.6725211587741553e-05, "loss": 0.8452, "step": 6445 }, { "epoch": 0.8619951858785772, "grad_norm": 1.044378638267517, "learning_rate": 1.6724143032692316e-05, "loss": 0.8968, "step": 6446 }, { "epoch": 0.8621289114736561, "grad_norm": 0.9474478363990784, "learning_rate": 1.672307433748475e-05, "loss": 0.7783, "step": 6447 }, { "epoch": 0.862262637068735, "grad_norm": 1.2427572011947632, "learning_rate": 1.6722005502141135e-05, "loss": 0.9915, "step": 6448 }, { "epoch": 0.8623963626638138, "grad_norm": 1.0530056953430176, "learning_rate": 1.6720936526683748e-05, "loss": 1.0304, "step": 6449 }, { "epoch": 0.8625300882588928, "grad_norm": 1.0332579612731934, "learning_rate": 1.671986741113487e-05, "loss": 1.0461, "step": 6450 }, { "epoch": 0.8626638138539716, "grad_norm": 0.9718854427337646, "learning_rate": 1.6718798155516785e-05, "loss": 0.956, "step": 6451 }, { "epoch": 0.8627975394490506, "grad_norm": 0.8710107803344727, "learning_rate": 1.671772875985178e-05, "loss": 0.7821, "step": 6452 }, { "epoch": 0.8629312650441294, "grad_norm": 1.0515718460083008, "learning_rate": 1.671665922416215e-05, "loss": 0.9487, "step": 6453 }, { "epoch": 0.8630649906392084, "grad_norm": 1.097126841545105, "learning_rate": 1.6715589548470187e-05, "loss": 0.893, "step": 6454 }, { "epoch": 0.8631987162342872, "grad_norm": 1.0665756464004517, "learning_rate": 1.6714519732798184e-05, "loss": 1.008, "step": 6455 }, { "epoch": 0.8633324418293662, "grad_norm": 1.2057867050170898, "learning_rate": 1.671344977716844e-05, "loss": 0.9382, "step": 6456 }, { "epoch": 0.863466167424445, "grad_norm": 1.169060468673706, "learning_rate": 1.6712379681603264e-05, "loss": 1.0681, "step": 6457 }, { "epoch": 0.8635998930195239, "grad_norm": 1.212019920349121, "learning_rate": 1.6711309446124954e-05, "loss": 1.0063, "step": 6458 }, { "epoch": 0.8637336186146028, "grad_norm": 1.14297354221344, "learning_rate": 1.6710239070755818e-05, "loss": 0.9958, "step": 6459 }, { "epoch": 0.8638673442096817, "grad_norm": 1.1121227741241455, "learning_rate": 1.670916855551817e-05, "loss": 0.9495, "step": 6460 }, { "epoch": 0.8640010698047607, "grad_norm": 1.0511651039123535, "learning_rate": 1.6708097900434328e-05, "loss": 0.8992, "step": 6461 }, { "epoch": 0.8641347953998395, "grad_norm": 1.0957285165786743, "learning_rate": 1.6707027105526602e-05, "loss": 0.9639, "step": 6462 }, { "epoch": 0.8642685209949185, "grad_norm": 0.9509884715080261, "learning_rate": 1.6705956170817315e-05, "loss": 0.8177, "step": 6463 }, { "epoch": 0.8644022465899973, "grad_norm": 1.0080265998840332, "learning_rate": 1.6704885096328787e-05, "loss": 0.8999, "step": 6464 }, { "epoch": 0.8645359721850763, "grad_norm": 0.9609020948410034, "learning_rate": 1.6703813882083347e-05, "loss": 0.8572, "step": 6465 }, { "epoch": 0.8646696977801551, "grad_norm": 0.9913627505302429, "learning_rate": 1.6702742528103318e-05, "loss": 0.8913, "step": 6466 }, { "epoch": 0.864803423375234, "grad_norm": 0.9418418407440186, "learning_rate": 1.670167103441104e-05, "loss": 0.9404, "step": 6467 }, { "epoch": 0.8649371489703129, "grad_norm": 1.016886830329895, "learning_rate": 1.6700599401028834e-05, "loss": 0.9073, "step": 6468 }, { "epoch": 0.8650708745653918, "grad_norm": 1.114442229270935, "learning_rate": 1.6699527627979052e-05, "loss": 0.8685, "step": 6469 }, { "epoch": 0.8652046001604707, "grad_norm": 1.1099072694778442, "learning_rate": 1.6698455715284026e-05, "loss": 1.0016, "step": 6470 }, { "epoch": 0.8653383257555496, "grad_norm": 1.1658971309661865, "learning_rate": 1.66973836629661e-05, "loss": 0.9098, "step": 6471 }, { "epoch": 0.8654720513506285, "grad_norm": 0.9998052716255188, "learning_rate": 1.669631147104762e-05, "loss": 0.8715, "step": 6472 }, { "epoch": 0.8656057769457074, "grad_norm": 1.0649808645248413, "learning_rate": 1.6695239139550934e-05, "loss": 1.0347, "step": 6473 }, { "epoch": 0.8657395025407864, "grad_norm": 0.9812138676643372, "learning_rate": 1.6694166668498396e-05, "loss": 0.8371, "step": 6474 }, { "epoch": 0.8658732281358652, "grad_norm": 0.9409304261207581, "learning_rate": 1.669309405791236e-05, "loss": 0.9774, "step": 6475 }, { "epoch": 0.866006953730944, "grad_norm": 1.0984230041503906, "learning_rate": 1.669202130781518e-05, "loss": 0.8815, "step": 6476 }, { "epoch": 0.866140679326023, "grad_norm": 0.9248968362808228, "learning_rate": 1.6690948418229224e-05, "loss": 0.8473, "step": 6477 }, { "epoch": 0.8662744049211019, "grad_norm": 0.9722856879234314, "learning_rate": 1.668987538917685e-05, "loss": 0.9668, "step": 6478 }, { "epoch": 0.8664081305161808, "grad_norm": 1.1002607345581055, "learning_rate": 1.6688802220680422e-05, "loss": 1.1034, "step": 6479 }, { "epoch": 0.8665418561112597, "grad_norm": 1.1081945896148682, "learning_rate": 1.6687728912762314e-05, "loss": 0.8969, "step": 6480 }, { "epoch": 0.8666755817063386, "grad_norm": 1.0859794616699219, "learning_rate": 1.6686655465444897e-05, "loss": 0.8881, "step": 6481 }, { "epoch": 0.8668093073014175, "grad_norm": 0.9970587491989136, "learning_rate": 1.6685581878750543e-05, "loss": 0.9209, "step": 6482 }, { "epoch": 0.8669430328964964, "grad_norm": 1.078643560409546, "learning_rate": 1.6684508152701634e-05, "loss": 0.9579, "step": 6483 }, { "epoch": 0.8670767584915753, "grad_norm": 1.0877625942230225, "learning_rate": 1.668343428732055e-05, "loss": 1.0697, "step": 6484 }, { "epoch": 0.8672104840866541, "grad_norm": 1.102967381477356, "learning_rate": 1.6682360282629672e-05, "loss": 0.9681, "step": 6485 }, { "epoch": 0.8673442096817331, "grad_norm": 1.1853241920471191, "learning_rate": 1.6681286138651386e-05, "loss": 0.9703, "step": 6486 }, { "epoch": 0.8674779352768119, "grad_norm": 1.0619043111801147, "learning_rate": 1.6680211855408087e-05, "loss": 0.9474, "step": 6487 }, { "epoch": 0.8676116608718909, "grad_norm": 1.1336722373962402, "learning_rate": 1.6679137432922163e-05, "loss": 0.9418, "step": 6488 }, { "epoch": 0.8677453864669697, "grad_norm": 1.1401530504226685, "learning_rate": 1.667806287121601e-05, "loss": 1.0314, "step": 6489 }, { "epoch": 0.8678791120620487, "grad_norm": 1.0824079513549805, "learning_rate": 1.6676988170312027e-05, "loss": 0.8481, "step": 6490 }, { "epoch": 0.8680128376571276, "grad_norm": 1.1097157001495361, "learning_rate": 1.6675913330232613e-05, "loss": 0.9324, "step": 6491 }, { "epoch": 0.8681465632522065, "grad_norm": 1.1484395265579224, "learning_rate": 1.6674838351000176e-05, "loss": 0.8745, "step": 6492 }, { "epoch": 0.8682802888472854, "grad_norm": 0.9537686705589294, "learning_rate": 1.6673763232637123e-05, "loss": 0.9596, "step": 6493 }, { "epoch": 0.8684140144423643, "grad_norm": 1.1138883829116821, "learning_rate": 1.667268797516586e-05, "loss": 0.904, "step": 6494 }, { "epoch": 0.8685477400374432, "grad_norm": 1.2903140783309937, "learning_rate": 1.66716125786088e-05, "loss": 0.9347, "step": 6495 }, { "epoch": 0.868681465632522, "grad_norm": 1.0341150760650635, "learning_rate": 1.667053704298836e-05, "loss": 0.9555, "step": 6496 }, { "epoch": 0.868815191227601, "grad_norm": 1.029263973236084, "learning_rate": 1.6669461368326958e-05, "loss": 0.9997, "step": 6497 }, { "epoch": 0.8689489168226798, "grad_norm": 1.029625415802002, "learning_rate": 1.6668385554647017e-05, "loss": 0.8084, "step": 6498 }, { "epoch": 0.8690826424177588, "grad_norm": 1.074678897857666, "learning_rate": 1.6667309601970957e-05, "loss": 0.9658, "step": 6499 }, { "epoch": 0.8692163680128376, "grad_norm": 1.1187047958374023, "learning_rate": 1.666623351032121e-05, "loss": 0.9354, "step": 6500 }, { "epoch": 0.8693500936079166, "grad_norm": 1.012219786643982, "learning_rate": 1.6665157279720207e-05, "loss": 0.8596, "step": 6501 }, { "epoch": 0.8694838192029954, "grad_norm": 1.1061692237854004, "learning_rate": 1.6664080910190374e-05, "loss": 0.966, "step": 6502 }, { "epoch": 0.8696175447980744, "grad_norm": 1.1396405696868896, "learning_rate": 1.6663004401754155e-05, "loss": 1.0234, "step": 6503 }, { "epoch": 0.8697512703931533, "grad_norm": 1.1247122287750244, "learning_rate": 1.6661927754433982e-05, "loss": 0.9256, "step": 6504 }, { "epoch": 0.8698849959882321, "grad_norm": 1.0590485334396362, "learning_rate": 1.6660850968252305e-05, "loss": 0.9014, "step": 6505 }, { "epoch": 0.8700187215833111, "grad_norm": 1.4007304906845093, "learning_rate": 1.6659774043231557e-05, "loss": 0.9617, "step": 6506 }, { "epoch": 0.8701524471783899, "grad_norm": 1.2011232376098633, "learning_rate": 1.6658696979394194e-05, "loss": 1.0781, "step": 6507 }, { "epoch": 0.8702861727734689, "grad_norm": 1.0361733436584473, "learning_rate": 1.6657619776762667e-05, "loss": 0.8808, "step": 6508 }, { "epoch": 0.8704198983685477, "grad_norm": 0.9740707874298096, "learning_rate": 1.665654243535942e-05, "loss": 0.8449, "step": 6509 }, { "epoch": 0.8705536239636267, "grad_norm": 1.112112283706665, "learning_rate": 1.665546495520692e-05, "loss": 1.0124, "step": 6510 }, { "epoch": 0.8706873495587055, "grad_norm": 1.0324573516845703, "learning_rate": 1.665438733632762e-05, "loss": 0.9964, "step": 6511 }, { "epoch": 0.8708210751537845, "grad_norm": 1.034432053565979, "learning_rate": 1.6653309578743986e-05, "loss": 0.8778, "step": 6512 }, { "epoch": 0.8709548007488633, "grad_norm": 1.0609415769577026, "learning_rate": 1.665223168247848e-05, "loss": 1.0163, "step": 6513 }, { "epoch": 0.8710885263439422, "grad_norm": 1.0072652101516724, "learning_rate": 1.665115364755357e-05, "loss": 0.8497, "step": 6514 }, { "epoch": 0.8712222519390211, "grad_norm": 1.1178102493286133, "learning_rate": 1.6650075473991726e-05, "loss": 0.8636, "step": 6515 }, { "epoch": 0.8713559775341, "grad_norm": 0.9420791268348694, "learning_rate": 1.664899716181542e-05, "loss": 0.8286, "step": 6516 }, { "epoch": 0.871489703129179, "grad_norm": 1.0138992071151733, "learning_rate": 1.6647918711047133e-05, "loss": 0.8808, "step": 6517 }, { "epoch": 0.8716234287242578, "grad_norm": 1.022444486618042, "learning_rate": 1.664684012170934e-05, "loss": 0.9333, "step": 6518 }, { "epoch": 0.8717571543193368, "grad_norm": 1.0528024435043335, "learning_rate": 1.6645761393824526e-05, "loss": 0.9654, "step": 6519 }, { "epoch": 0.8718908799144156, "grad_norm": 1.107457160949707, "learning_rate": 1.6644682527415176e-05, "loss": 0.9726, "step": 6520 }, { "epoch": 0.8720246055094946, "grad_norm": 1.0602091550827026, "learning_rate": 1.664360352250378e-05, "loss": 0.8374, "step": 6521 }, { "epoch": 0.8721583311045734, "grad_norm": 1.1460821628570557, "learning_rate": 1.664252437911282e-05, "loss": 0.9698, "step": 6522 }, { "epoch": 0.8722920566996523, "grad_norm": 1.0244218111038208, "learning_rate": 1.6641445097264796e-05, "loss": 0.811, "step": 6523 }, { "epoch": 0.8724257822947312, "grad_norm": 1.1320558786392212, "learning_rate": 1.6640365676982208e-05, "loss": 0.9525, "step": 6524 }, { "epoch": 0.8725595078898101, "grad_norm": 1.0552382469177246, "learning_rate": 1.6639286118287548e-05, "loss": 1.0394, "step": 6525 }, { "epoch": 0.872693233484889, "grad_norm": 0.9924234747886658, "learning_rate": 1.6638206421203324e-05, "loss": 0.7836, "step": 6526 }, { "epoch": 0.8728269590799679, "grad_norm": 1.1913471221923828, "learning_rate": 1.6637126585752036e-05, "loss": 0.976, "step": 6527 }, { "epoch": 0.8729606846750468, "grad_norm": 1.0651968717575073, "learning_rate": 1.66360466119562e-05, "loss": 0.9332, "step": 6528 }, { "epoch": 0.8730944102701257, "grad_norm": 1.1777064800262451, "learning_rate": 1.6634966499838323e-05, "loss": 0.9653, "step": 6529 }, { "epoch": 0.8732281358652046, "grad_norm": 1.074629306793213, "learning_rate": 1.6633886249420915e-05, "loss": 0.9182, "step": 6530 }, { "epoch": 0.8733618614602835, "grad_norm": 1.214073896408081, "learning_rate": 1.6632805860726497e-05, "loss": 0.9795, "step": 6531 }, { "epoch": 0.8734955870553623, "grad_norm": 1.1148756742477417, "learning_rate": 1.6631725333777585e-05, "loss": 0.9912, "step": 6532 }, { "epoch": 0.8736293126504413, "grad_norm": 1.1715505123138428, "learning_rate": 1.663064466859671e-05, "loss": 1.0807, "step": 6533 }, { "epoch": 0.8737630382455202, "grad_norm": 1.1862242221832275, "learning_rate": 1.6629563865206388e-05, "loss": 1.0166, "step": 6534 }, { "epoch": 0.8738967638405991, "grad_norm": 1.0223350524902344, "learning_rate": 1.6628482923629147e-05, "loss": 0.9204, "step": 6535 }, { "epoch": 0.874030489435678, "grad_norm": 1.1405360698699951, "learning_rate": 1.6627401843887526e-05, "loss": 0.9605, "step": 6536 }, { "epoch": 0.8741642150307569, "grad_norm": 0.9497440457344055, "learning_rate": 1.662632062600406e-05, "loss": 0.842, "step": 6537 }, { "epoch": 0.8742979406258358, "grad_norm": 1.1066334247589111, "learning_rate": 1.6625239270001277e-05, "loss": 0.954, "step": 6538 }, { "epoch": 0.8744316662209147, "grad_norm": 0.9312584400177002, "learning_rate": 1.662415777590172e-05, "loss": 0.9546, "step": 6539 }, { "epoch": 0.8745653918159936, "grad_norm": 1.1156271696090698, "learning_rate": 1.6623076143727933e-05, "loss": 0.989, "step": 6540 }, { "epoch": 0.8746991174110724, "grad_norm": 1.104783296585083, "learning_rate": 1.6621994373502463e-05, "loss": 0.9606, "step": 6541 }, { "epoch": 0.8748328430061514, "grad_norm": 0.9976694583892822, "learning_rate": 1.6620912465247857e-05, "loss": 1.0008, "step": 6542 }, { "epoch": 0.8749665686012302, "grad_norm": 0.9949148297309875, "learning_rate": 1.6619830418986665e-05, "loss": 0.9588, "step": 6543 }, { "epoch": 0.8751002941963092, "grad_norm": 1.0580958127975464, "learning_rate": 1.661874823474144e-05, "loss": 0.9878, "step": 6544 }, { "epoch": 0.875234019791388, "grad_norm": 0.9579415917396545, "learning_rate": 1.6617665912534746e-05, "loss": 0.9061, "step": 6545 }, { "epoch": 0.875367745386467, "grad_norm": 1.2294880151748657, "learning_rate": 1.661658345238914e-05, "loss": 0.998, "step": 6546 }, { "epoch": 0.8755014709815458, "grad_norm": 1.0071120262145996, "learning_rate": 1.661550085432718e-05, "loss": 0.9151, "step": 6547 }, { "epoch": 0.8756351965766248, "grad_norm": 1.0598218441009521, "learning_rate": 1.6614418118371435e-05, "loss": 0.986, "step": 6548 }, { "epoch": 0.8757689221717037, "grad_norm": 1.0410268306732178, "learning_rate": 1.661333524454447e-05, "loss": 0.9356, "step": 6549 }, { "epoch": 0.8759026477667826, "grad_norm": 1.0316548347473145, "learning_rate": 1.6612252232868868e-05, "loss": 1.0216, "step": 6550 }, { "epoch": 0.8760363733618615, "grad_norm": 1.0013291835784912, "learning_rate": 1.6611169083367188e-05, "loss": 0.9016, "step": 6551 }, { "epoch": 0.8761700989569403, "grad_norm": 0.9989796280860901, "learning_rate": 1.6610085796062022e-05, "loss": 0.9127, "step": 6552 }, { "epoch": 0.8763038245520193, "grad_norm": 1.116627812385559, "learning_rate": 1.6609002370975937e-05, "loss": 0.8754, "step": 6553 }, { "epoch": 0.8764375501470981, "grad_norm": 1.0261844396591187, "learning_rate": 1.6607918808131526e-05, "loss": 0.957, "step": 6554 }, { "epoch": 0.8765712757421771, "grad_norm": 1.0615718364715576, "learning_rate": 1.6606835107551365e-05, "loss": 0.9624, "step": 6555 }, { "epoch": 0.8767050013372559, "grad_norm": 1.0716177225112915, "learning_rate": 1.6605751269258054e-05, "loss": 1.0075, "step": 6556 }, { "epoch": 0.8768387269323349, "grad_norm": 1.0088770389556885, "learning_rate": 1.6604667293274174e-05, "loss": 0.8836, "step": 6557 }, { "epoch": 0.8769724525274137, "grad_norm": 0.9648098349571228, "learning_rate": 1.6603583179622327e-05, "loss": 0.9297, "step": 6558 }, { "epoch": 0.8771061781224927, "grad_norm": 1.0674529075622559, "learning_rate": 1.6602498928325105e-05, "loss": 0.8852, "step": 6559 }, { "epoch": 0.8772399037175715, "grad_norm": 0.9591109156608582, "learning_rate": 1.6601414539405114e-05, "loss": 0.9268, "step": 6560 }, { "epoch": 0.8773736293126504, "grad_norm": 1.0160752534866333, "learning_rate": 1.660033001288495e-05, "loss": 0.9684, "step": 6561 }, { "epoch": 0.8775073549077294, "grad_norm": 1.109384298324585, "learning_rate": 1.659924534878723e-05, "loss": 1.0523, "step": 6562 }, { "epoch": 0.8776410805028082, "grad_norm": 0.9370120763778687, "learning_rate": 1.659816054713455e-05, "loss": 0.9346, "step": 6563 }, { "epoch": 0.8777748060978872, "grad_norm": 1.0478984117507935, "learning_rate": 1.6597075607949525e-05, "loss": 0.8721, "step": 6564 }, { "epoch": 0.877908531692966, "grad_norm": 0.9581248164176941, "learning_rate": 1.6595990531254776e-05, "loss": 0.9286, "step": 6565 }, { "epoch": 0.878042257288045, "grad_norm": 0.9890875220298767, "learning_rate": 1.6594905317072916e-05, "loss": 0.8923, "step": 6566 }, { "epoch": 0.8781759828831238, "grad_norm": 0.9938724040985107, "learning_rate": 1.6593819965426563e-05, "loss": 0.8655, "step": 6567 }, { "epoch": 0.8783097084782028, "grad_norm": 1.1764945983886719, "learning_rate": 1.6592734476338344e-05, "loss": 0.9498, "step": 6568 }, { "epoch": 0.8784434340732816, "grad_norm": 1.0022259950637817, "learning_rate": 1.659164884983088e-05, "loss": 0.8936, "step": 6569 }, { "epoch": 0.8785771596683605, "grad_norm": 1.003389835357666, "learning_rate": 1.659056308592681e-05, "loss": 0.9314, "step": 6570 }, { "epoch": 0.8787108852634394, "grad_norm": 0.9841601252555847, "learning_rate": 1.6589477184648752e-05, "loss": 1.0215, "step": 6571 }, { "epoch": 0.8788446108585183, "grad_norm": 1.0515648126602173, "learning_rate": 1.658839114601935e-05, "loss": 0.9147, "step": 6572 }, { "epoch": 0.8789783364535972, "grad_norm": 1.1183761358261108, "learning_rate": 1.658730497006124e-05, "loss": 0.9117, "step": 6573 }, { "epoch": 0.8791120620486761, "grad_norm": 1.11531400680542, "learning_rate": 1.658621865679706e-05, "loss": 1.0543, "step": 6574 }, { "epoch": 0.879245787643755, "grad_norm": 0.946401834487915, "learning_rate": 1.6585132206249455e-05, "loss": 0.9579, "step": 6575 }, { "epoch": 0.8793795132388339, "grad_norm": 1.1686761379241943, "learning_rate": 1.658404561844107e-05, "loss": 1.0309, "step": 6576 }, { "epoch": 0.8795132388339129, "grad_norm": 1.0884144306182861, "learning_rate": 1.6582958893394556e-05, "loss": 0.7868, "step": 6577 }, { "epoch": 0.8796469644289917, "grad_norm": 1.0776336193084717, "learning_rate": 1.6581872031132565e-05, "loss": 0.9171, "step": 6578 }, { "epoch": 0.8797806900240706, "grad_norm": 1.0471206903457642, "learning_rate": 1.6580785031677743e-05, "loss": 0.9102, "step": 6579 }, { "epoch": 0.8799144156191495, "grad_norm": 1.0015294551849365, "learning_rate": 1.6579697895052758e-05, "loss": 0.9495, "step": 6580 }, { "epoch": 0.8800481412142284, "grad_norm": 1.0121917724609375, "learning_rate": 1.6578610621280267e-05, "loss": 0.9573, "step": 6581 }, { "epoch": 0.8801818668093073, "grad_norm": 0.9298769235610962, "learning_rate": 1.6577523210382935e-05, "loss": 0.9368, "step": 6582 }, { "epoch": 0.8803155924043862, "grad_norm": 1.111396074295044, "learning_rate": 1.657643566238342e-05, "loss": 0.9755, "step": 6583 }, { "epoch": 0.8804493179994651, "grad_norm": 0.9899436235427856, "learning_rate": 1.6575347977304398e-05, "loss": 0.897, "step": 6584 }, { "epoch": 0.880583043594544, "grad_norm": 1.1105124950408936, "learning_rate": 1.657426015516854e-05, "loss": 0.8432, "step": 6585 }, { "epoch": 0.8807167691896229, "grad_norm": 1.1892081499099731, "learning_rate": 1.657317219599852e-05, "loss": 1.05, "step": 6586 }, { "epoch": 0.8808504947847018, "grad_norm": 0.9492790699005127, "learning_rate": 1.657208409981702e-05, "loss": 0.9101, "step": 6587 }, { "epoch": 0.8809842203797806, "grad_norm": 1.0121068954467773, "learning_rate": 1.6570995866646707e-05, "loss": 0.8026, "step": 6588 }, { "epoch": 0.8811179459748596, "grad_norm": 1.101181983947754, "learning_rate": 1.656990749651028e-05, "loss": 0.8982, "step": 6589 }, { "epoch": 0.8812516715699384, "grad_norm": 1.0665388107299805, "learning_rate": 1.6568818989430416e-05, "loss": 0.9951, "step": 6590 }, { "epoch": 0.8813853971650174, "grad_norm": 1.0103232860565186, "learning_rate": 1.6567730345429803e-05, "loss": 0.8333, "step": 6591 }, { "epoch": 0.8815191227600963, "grad_norm": 1.0503722429275513, "learning_rate": 1.656664156453114e-05, "loss": 0.8986, "step": 6592 }, { "epoch": 0.8816528483551752, "grad_norm": 1.1477293968200684, "learning_rate": 1.6565552646757114e-05, "loss": 0.9581, "step": 6593 }, { "epoch": 0.8817865739502541, "grad_norm": 0.9949456453323364, "learning_rate": 1.656446359213043e-05, "loss": 1.0245, "step": 6594 }, { "epoch": 0.881920299545333, "grad_norm": 1.1275508403778076, "learning_rate": 1.656337440067378e-05, "loss": 0.9538, "step": 6595 }, { "epoch": 0.8820540251404119, "grad_norm": 1.0195989608764648, "learning_rate": 1.656228507240987e-05, "loss": 0.9261, "step": 6596 }, { "epoch": 0.8821877507354908, "grad_norm": 1.0239284038543701, "learning_rate": 1.6561195607361407e-05, "loss": 0.9152, "step": 6597 }, { "epoch": 0.8823214763305697, "grad_norm": 0.939383327960968, "learning_rate": 1.6560106005551106e-05, "loss": 0.8932, "step": 6598 }, { "epoch": 0.8824552019256485, "grad_norm": 0.9758705496788025, "learning_rate": 1.6559016267001667e-05, "loss": 0.8921, "step": 6599 }, { "epoch": 0.8825889275207275, "grad_norm": 1.225216269493103, "learning_rate": 1.655792639173581e-05, "loss": 1.1565, "step": 6600 }, { "epoch": 0.8827226531158063, "grad_norm": 1.187839388847351, "learning_rate": 1.6556836379776254e-05, "loss": 0.8809, "step": 6601 }, { "epoch": 0.8828563787108853, "grad_norm": 1.1948819160461426, "learning_rate": 1.655574623114572e-05, "loss": 0.924, "step": 6602 }, { "epoch": 0.8829901043059641, "grad_norm": 1.0240799188613892, "learning_rate": 1.6554655945866926e-05, "loss": 0.9221, "step": 6603 }, { "epoch": 0.8831238299010431, "grad_norm": 1.087586760520935, "learning_rate": 1.6553565523962602e-05, "loss": 0.988, "step": 6604 }, { "epoch": 0.883257555496122, "grad_norm": 1.1838536262512207, "learning_rate": 1.6552474965455475e-05, "loss": 0.9485, "step": 6605 }, { "epoch": 0.8833912810912009, "grad_norm": 0.9590768218040466, "learning_rate": 1.6551384270368277e-05, "loss": 0.8552, "step": 6606 }, { "epoch": 0.8835250066862798, "grad_norm": 1.1438350677490234, "learning_rate": 1.6550293438723745e-05, "loss": 0.8386, "step": 6607 }, { "epoch": 0.8836587322813586, "grad_norm": 1.0866752862930298, "learning_rate": 1.6549202470544613e-05, "loss": 0.9223, "step": 6608 }, { "epoch": 0.8837924578764376, "grad_norm": 0.9984356760978699, "learning_rate": 1.6548111365853623e-05, "loss": 0.9169, "step": 6609 }, { "epoch": 0.8839261834715164, "grad_norm": 1.0741022825241089, "learning_rate": 1.654702012467352e-05, "loss": 0.831, "step": 6610 }, { "epoch": 0.8840599090665954, "grad_norm": 1.0321089029312134, "learning_rate": 1.6545928747027044e-05, "loss": 0.9227, "step": 6611 }, { "epoch": 0.8841936346616742, "grad_norm": 0.9451794624328613, "learning_rate": 1.6544837232936946e-05, "loss": 0.8206, "step": 6612 }, { "epoch": 0.8843273602567532, "grad_norm": 1.1911232471466064, "learning_rate": 1.654374558242598e-05, "loss": 1.1484, "step": 6613 }, { "epoch": 0.884461085851832, "grad_norm": 0.9948623776435852, "learning_rate": 1.65426537955169e-05, "loss": 0.8514, "step": 6614 }, { "epoch": 0.884594811446911, "grad_norm": 1.0557894706726074, "learning_rate": 1.654156187223246e-05, "loss": 0.997, "step": 6615 }, { "epoch": 0.8847285370419898, "grad_norm": 0.9317017197608948, "learning_rate": 1.6540469812595424e-05, "loss": 0.8912, "step": 6616 }, { "epoch": 0.8848622626370687, "grad_norm": 1.1089012622833252, "learning_rate": 1.6539377616628554e-05, "loss": 0.9976, "step": 6617 }, { "epoch": 0.8849959882321476, "grad_norm": 1.0861963033676147, "learning_rate": 1.6538285284354615e-05, "loss": 0.954, "step": 6618 }, { "epoch": 0.8851297138272265, "grad_norm": 1.0418671369552612, "learning_rate": 1.653719281579637e-05, "loss": 0.8918, "step": 6619 }, { "epoch": 0.8852634394223055, "grad_norm": 1.0051528215408325, "learning_rate": 1.6536100210976604e-05, "loss": 0.9519, "step": 6620 }, { "epoch": 0.8853971650173843, "grad_norm": 1.0714529752731323, "learning_rate": 1.653500746991808e-05, "loss": 1.0263, "step": 6621 }, { "epoch": 0.8855308906124633, "grad_norm": 1.13709557056427, "learning_rate": 1.6533914592643582e-05, "loss": 1.0005, "step": 6622 }, { "epoch": 0.8856646162075421, "grad_norm": 1.1949125528335571, "learning_rate": 1.6532821579175884e-05, "loss": 1.0461, "step": 6623 }, { "epoch": 0.8857983418026211, "grad_norm": 1.1206669807434082, "learning_rate": 1.6531728429537766e-05, "loss": 0.933, "step": 6624 }, { "epoch": 0.8859320673976999, "grad_norm": 1.0955919027328491, "learning_rate": 1.6530635143752028e-05, "loss": 0.8734, "step": 6625 }, { "epoch": 0.8860657929927788, "grad_norm": 1.0365923643112183, "learning_rate": 1.6529541721841444e-05, "loss": 0.9513, "step": 6626 }, { "epoch": 0.8861995185878577, "grad_norm": 1.0940231084823608, "learning_rate": 1.6528448163828814e-05, "loss": 0.9823, "step": 6627 }, { "epoch": 0.8863332441829366, "grad_norm": 1.0482330322265625, "learning_rate": 1.6527354469736928e-05, "loss": 0.9454, "step": 6628 }, { "epoch": 0.8864669697780155, "grad_norm": 1.0636597871780396, "learning_rate": 1.6526260639588583e-05, "loss": 0.9386, "step": 6629 }, { "epoch": 0.8866006953730944, "grad_norm": 1.0186216831207275, "learning_rate": 1.652516667340658e-05, "loss": 0.8274, "step": 6630 }, { "epoch": 0.8867344209681733, "grad_norm": 1.1662896871566772, "learning_rate": 1.6524072571213724e-05, "loss": 0.9656, "step": 6631 }, { "epoch": 0.8868681465632522, "grad_norm": 1.1851017475128174, "learning_rate": 1.6522978333032817e-05, "loss": 1.0751, "step": 6632 }, { "epoch": 0.8870018721583312, "grad_norm": 1.0155028104782104, "learning_rate": 1.6521883958886665e-05, "loss": 0.9998, "step": 6633 }, { "epoch": 0.88713559775341, "grad_norm": 1.0203315019607544, "learning_rate": 1.6520789448798086e-05, "loss": 0.9042, "step": 6634 }, { "epoch": 0.8872693233484888, "grad_norm": 1.0252208709716797, "learning_rate": 1.6519694802789893e-05, "loss": 0.987, "step": 6635 }, { "epoch": 0.8874030489435678, "grad_norm": 1.120632529258728, "learning_rate": 1.6518600020884896e-05, "loss": 0.9857, "step": 6636 }, { "epoch": 0.8875367745386467, "grad_norm": 0.9164204597473145, "learning_rate": 1.651750510310592e-05, "loss": 0.9361, "step": 6637 }, { "epoch": 0.8876705001337256, "grad_norm": 0.9940130710601807, "learning_rate": 1.6516410049475788e-05, "loss": 0.9198, "step": 6638 }, { "epoch": 0.8878042257288045, "grad_norm": 1.0626641511917114, "learning_rate": 1.6515314860017328e-05, "loss": 0.9058, "step": 6639 }, { "epoch": 0.8879379513238834, "grad_norm": 0.9900780916213989, "learning_rate": 1.6514219534753357e-05, "loss": 0.936, "step": 6640 }, { "epoch": 0.8880716769189623, "grad_norm": 1.0205928087234497, "learning_rate": 1.6513124073706715e-05, "loss": 0.9373, "step": 6641 }, { "epoch": 0.8882054025140412, "grad_norm": 1.1461232900619507, "learning_rate": 1.6512028476900234e-05, "loss": 1.0156, "step": 6642 }, { "epoch": 0.8883391281091201, "grad_norm": 1.0463027954101562, "learning_rate": 1.6510932744356754e-05, "loss": 0.7738, "step": 6643 }, { "epoch": 0.8884728537041989, "grad_norm": 1.181414246559143, "learning_rate": 1.650983687609911e-05, "loss": 0.8949, "step": 6644 }, { "epoch": 0.8886065792992779, "grad_norm": 1.1667274236679077, "learning_rate": 1.6508740872150143e-05, "loss": 1.0011, "step": 6645 }, { "epoch": 0.8887403048943567, "grad_norm": 1.177300214767456, "learning_rate": 1.6507644732532702e-05, "loss": 1.0411, "step": 6646 }, { "epoch": 0.8888740304894357, "grad_norm": 1.0851504802703857, "learning_rate": 1.6506548457269635e-05, "loss": 0.9778, "step": 6647 }, { "epoch": 0.8890077560845145, "grad_norm": 1.0519440174102783, "learning_rate": 1.650545204638379e-05, "loss": 0.8881, "step": 6648 }, { "epoch": 0.8891414816795935, "grad_norm": 1.1065679788589478, "learning_rate": 1.6504355499898023e-05, "loss": 0.9898, "step": 6649 }, { "epoch": 0.8892752072746724, "grad_norm": 1.0286918878555298, "learning_rate": 1.650325881783519e-05, "loss": 0.9253, "step": 6650 }, { "epoch": 0.8894089328697513, "grad_norm": 1.0029408931732178, "learning_rate": 1.650216200021815e-05, "loss": 0.9028, "step": 6651 }, { "epoch": 0.8895426584648302, "grad_norm": 1.0041744709014893, "learning_rate": 1.6501065047069764e-05, "loss": 0.9046, "step": 6652 }, { "epoch": 0.8896763840599091, "grad_norm": 0.9768277406692505, "learning_rate": 1.64999679584129e-05, "loss": 0.8084, "step": 6653 }, { "epoch": 0.889810109654988, "grad_norm": 1.1030744314193726, "learning_rate": 1.649887073427042e-05, "loss": 0.984, "step": 6654 }, { "epoch": 0.8899438352500668, "grad_norm": 0.9453567862510681, "learning_rate": 1.64977733746652e-05, "loss": 0.8216, "step": 6655 }, { "epoch": 0.8900775608451458, "grad_norm": 1.2263792753219604, "learning_rate": 1.6496675879620113e-05, "loss": 0.917, "step": 6656 }, { "epoch": 0.8902112864402246, "grad_norm": 0.9549890756607056, "learning_rate": 1.649557824915803e-05, "loss": 0.8498, "step": 6657 }, { "epoch": 0.8903450120353036, "grad_norm": 1.0324268341064453, "learning_rate": 1.6494480483301836e-05, "loss": 0.8399, "step": 6658 }, { "epoch": 0.8904787376303824, "grad_norm": 0.9723221659660339, "learning_rate": 1.6493382582074415e-05, "loss": 0.9927, "step": 6659 }, { "epoch": 0.8906124632254614, "grad_norm": 1.1457146406173706, "learning_rate": 1.6492284545498645e-05, "loss": 1.0311, "step": 6660 }, { "epoch": 0.8907461888205402, "grad_norm": 1.1672335863113403, "learning_rate": 1.649118637359741e-05, "loss": 0.9032, "step": 6661 }, { "epoch": 0.8908799144156192, "grad_norm": 1.0801018476486206, "learning_rate": 1.6490088066393614e-05, "loss": 0.8185, "step": 6662 }, { "epoch": 0.891013640010698, "grad_norm": 1.1729601621627808, "learning_rate": 1.648898962391014e-05, "loss": 0.9311, "step": 6663 }, { "epoch": 0.8911473656057769, "grad_norm": 0.871172308921814, "learning_rate": 1.648789104616989e-05, "loss": 0.8354, "step": 6664 }, { "epoch": 0.8912810912008559, "grad_norm": 0.9779297113418579, "learning_rate": 1.6486792333195752e-05, "loss": 0.7912, "step": 6665 }, { "epoch": 0.8914148167959347, "grad_norm": 1.0173784494400024, "learning_rate": 1.6485693485010643e-05, "loss": 0.9237, "step": 6666 }, { "epoch": 0.8915485423910137, "grad_norm": 1.0498394966125488, "learning_rate": 1.6484594501637453e-05, "loss": 0.9572, "step": 6667 }, { "epoch": 0.8916822679860925, "grad_norm": 1.0005152225494385, "learning_rate": 1.6483495383099103e-05, "loss": 0.9084, "step": 6668 }, { "epoch": 0.8918159935811715, "grad_norm": 1.0867047309875488, "learning_rate": 1.6482396129418488e-05, "loss": 0.9289, "step": 6669 }, { "epoch": 0.8919497191762503, "grad_norm": 1.1120163202285767, "learning_rate": 1.648129674061853e-05, "loss": 1.0947, "step": 6670 }, { "epoch": 0.8920834447713293, "grad_norm": 1.0160980224609375, "learning_rate": 1.648019721672215e-05, "loss": 0.8579, "step": 6671 }, { "epoch": 0.8922171703664081, "grad_norm": 1.051330804824829, "learning_rate": 1.6479097557752254e-05, "loss": 0.9057, "step": 6672 }, { "epoch": 0.892350895961487, "grad_norm": 1.267731785774231, "learning_rate": 1.647799776373177e-05, "loss": 1.0916, "step": 6673 }, { "epoch": 0.8924846215565659, "grad_norm": 1.0835604667663574, "learning_rate": 1.647689783468362e-05, "loss": 0.8878, "step": 6674 }, { "epoch": 0.8926183471516448, "grad_norm": 1.0329865217208862, "learning_rate": 1.6475797770630736e-05, "loss": 0.7677, "step": 6675 }, { "epoch": 0.8927520727467237, "grad_norm": 1.0527637004852295, "learning_rate": 1.6474697571596042e-05, "loss": 1.0187, "step": 6676 }, { "epoch": 0.8928857983418026, "grad_norm": 1.2311348915100098, "learning_rate": 1.6473597237602472e-05, "loss": 1.0094, "step": 6677 }, { "epoch": 0.8930195239368816, "grad_norm": 0.9658203125, "learning_rate": 1.6472496768672965e-05, "loss": 0.9126, "step": 6678 }, { "epoch": 0.8931532495319604, "grad_norm": 1.0320508480072021, "learning_rate": 1.6471396164830452e-05, "loss": 0.9129, "step": 6679 }, { "epoch": 0.8932869751270394, "grad_norm": 1.1232877969741821, "learning_rate": 1.647029542609788e-05, "loss": 0.9018, "step": 6680 }, { "epoch": 0.8934207007221182, "grad_norm": 1.0318970680236816, "learning_rate": 1.6469194552498194e-05, "loss": 0.8608, "step": 6681 }, { "epoch": 0.8935544263171971, "grad_norm": 1.0595561265945435, "learning_rate": 1.6468093544054334e-05, "loss": 1.0453, "step": 6682 }, { "epoch": 0.893688151912276, "grad_norm": 0.9870584011077881, "learning_rate": 1.6466992400789256e-05, "loss": 0.8809, "step": 6683 }, { "epoch": 0.8938218775073549, "grad_norm": 1.1204252243041992, "learning_rate": 1.646589112272591e-05, "loss": 1.0889, "step": 6684 }, { "epoch": 0.8939556031024338, "grad_norm": 1.1926586627960205, "learning_rate": 1.646478970988725e-05, "loss": 0.9022, "step": 6685 }, { "epoch": 0.8940893286975127, "grad_norm": 1.0428320169448853, "learning_rate": 1.6463688162296232e-05, "loss": 0.9375, "step": 6686 }, { "epoch": 0.8942230542925916, "grad_norm": 0.989416241645813, "learning_rate": 1.6462586479975823e-05, "loss": 1.1299, "step": 6687 }, { "epoch": 0.8943567798876705, "grad_norm": 1.232982873916626, "learning_rate": 1.6461484662948982e-05, "loss": 0.9408, "step": 6688 }, { "epoch": 0.8944905054827494, "grad_norm": 1.0534180402755737, "learning_rate": 1.6460382711238678e-05, "loss": 0.9389, "step": 6689 }, { "epoch": 0.8946242310778283, "grad_norm": 1.0252068042755127, "learning_rate": 1.6459280624867876e-05, "loss": 0.9771, "step": 6690 }, { "epoch": 0.8947579566729071, "grad_norm": 1.0314444303512573, "learning_rate": 1.6458178403859547e-05, "loss": 0.9464, "step": 6691 }, { "epoch": 0.8948916822679861, "grad_norm": 0.9803935885429382, "learning_rate": 1.6457076048236676e-05, "loss": 0.9805, "step": 6692 }, { "epoch": 0.895025407863065, "grad_norm": 1.0925337076187134, "learning_rate": 1.645597355802223e-05, "loss": 0.9957, "step": 6693 }, { "epoch": 0.8951591334581439, "grad_norm": 1.004028081893921, "learning_rate": 1.6454870933239192e-05, "loss": 0.926, "step": 6694 }, { "epoch": 0.8952928590532228, "grad_norm": 1.0104879140853882, "learning_rate": 1.6453768173910546e-05, "loss": 1.0194, "step": 6695 }, { "epoch": 0.8954265846483017, "grad_norm": 1.0822699069976807, "learning_rate": 1.6452665280059277e-05, "loss": 0.9793, "step": 6696 }, { "epoch": 0.8955603102433806, "grad_norm": 1.0720988512039185, "learning_rate": 1.6451562251708376e-05, "loss": 0.9781, "step": 6697 }, { "epoch": 0.8956940358384595, "grad_norm": 1.0022727251052856, "learning_rate": 1.6450459088880836e-05, "loss": 0.9052, "step": 6698 }, { "epoch": 0.8958277614335384, "grad_norm": 1.0359539985656738, "learning_rate": 1.6449355791599647e-05, "loss": 0.9257, "step": 6699 }, { "epoch": 0.8959614870286173, "grad_norm": 1.0689456462860107, "learning_rate": 1.6448252359887808e-05, "loss": 0.8847, "step": 6700 }, { "epoch": 0.8960952126236962, "grad_norm": 0.9872997999191284, "learning_rate": 1.6447148793768316e-05, "loss": 0.9318, "step": 6701 }, { "epoch": 0.896228938218775, "grad_norm": 0.9834489226341248, "learning_rate": 1.644604509326418e-05, "loss": 0.7495, "step": 6702 }, { "epoch": 0.896362663813854, "grad_norm": 1.0131887197494507, "learning_rate": 1.6444941258398403e-05, "loss": 0.9347, "step": 6703 }, { "epoch": 0.8964963894089328, "grad_norm": 1.0297667980194092, "learning_rate": 1.644383728919399e-05, "loss": 0.964, "step": 6704 }, { "epoch": 0.8966301150040118, "grad_norm": 1.0307282209396362, "learning_rate": 1.6442733185673953e-05, "loss": 0.8684, "step": 6705 }, { "epoch": 0.8967638405990906, "grad_norm": 0.9437198042869568, "learning_rate": 1.6441628947861312e-05, "loss": 0.939, "step": 6706 }, { "epoch": 0.8968975661941696, "grad_norm": 1.0671260356903076, "learning_rate": 1.644052457577908e-05, "loss": 0.9214, "step": 6707 }, { "epoch": 0.8970312917892485, "grad_norm": 1.0547828674316406, "learning_rate": 1.6439420069450273e-05, "loss": 0.8918, "step": 6708 }, { "epoch": 0.8971650173843274, "grad_norm": 1.0720034837722778, "learning_rate": 1.6438315428897914e-05, "loss": 1.0493, "step": 6709 }, { "epoch": 0.8972987429794063, "grad_norm": 0.9499634504318237, "learning_rate": 1.6437210654145036e-05, "loss": 0.9203, "step": 6710 }, { "epoch": 0.8974324685744851, "grad_norm": 1.0873655080795288, "learning_rate": 1.6436105745214658e-05, "loss": 0.8987, "step": 6711 }, { "epoch": 0.8975661941695641, "grad_norm": 1.091537594795227, "learning_rate": 1.6435000702129816e-05, "loss": 0.9886, "step": 6712 }, { "epoch": 0.8976999197646429, "grad_norm": 1.1032395362854004, "learning_rate": 1.6433895524913546e-05, "loss": 0.8877, "step": 6713 }, { "epoch": 0.8978336453597219, "grad_norm": 1.0616761445999146, "learning_rate": 1.6432790213588874e-05, "loss": 0.9119, "step": 6714 }, { "epoch": 0.8979673709548007, "grad_norm": 1.0024023056030273, "learning_rate": 1.643168476817885e-05, "loss": 0.8723, "step": 6715 }, { "epoch": 0.8981010965498797, "grad_norm": 1.068844199180603, "learning_rate": 1.643057918870651e-05, "loss": 0.8446, "step": 6716 }, { "epoch": 0.8982348221449585, "grad_norm": 1.0865434408187866, "learning_rate": 1.6429473475194898e-05, "loss": 0.9481, "step": 6717 }, { "epoch": 0.8983685477400375, "grad_norm": 1.0358482599258423, "learning_rate": 1.6428367627667067e-05, "loss": 0.9401, "step": 6718 }, { "epoch": 0.8985022733351163, "grad_norm": 1.0376105308532715, "learning_rate": 1.642726164614606e-05, "loss": 0.9841, "step": 6719 }, { "epoch": 0.8986359989301952, "grad_norm": 1.1672828197479248, "learning_rate": 1.6426155530654943e-05, "loss": 1.0423, "step": 6720 }, { "epoch": 0.8987697245252741, "grad_norm": 1.1026726961135864, "learning_rate": 1.6425049281216755e-05, "loss": 0.9826, "step": 6721 }, { "epoch": 0.898903450120353, "grad_norm": 1.1375296115875244, "learning_rate": 1.642394289785456e-05, "loss": 0.8839, "step": 6722 }, { "epoch": 0.899037175715432, "grad_norm": 1.045061707496643, "learning_rate": 1.642283638059143e-05, "loss": 0.9149, "step": 6723 }, { "epoch": 0.8991709013105108, "grad_norm": 0.9502860307693481, "learning_rate": 1.642172972945042e-05, "loss": 0.8937, "step": 6724 }, { "epoch": 0.8993046269055898, "grad_norm": 0.9991877675056458, "learning_rate": 1.6420622944454598e-05, "loss": 0.8316, "step": 6725 }, { "epoch": 0.8994383525006686, "grad_norm": 1.03340482711792, "learning_rate": 1.641951602562703e-05, "loss": 0.8907, "step": 6726 }, { "epoch": 0.8995720780957476, "grad_norm": 1.0601781606674194, "learning_rate": 1.64184089729908e-05, "loss": 0.9461, "step": 6727 }, { "epoch": 0.8997058036908264, "grad_norm": 1.0657267570495605, "learning_rate": 1.6417301786568973e-05, "loss": 1.0307, "step": 6728 }, { "epoch": 0.8998395292859053, "grad_norm": 0.9871540665626526, "learning_rate": 1.6416194466384632e-05, "loss": 0.9574, "step": 6729 }, { "epoch": 0.8999732548809842, "grad_norm": 0.9986724257469177, "learning_rate": 1.6415087012460857e-05, "loss": 0.8814, "step": 6730 }, { "epoch": 0.9001069804760631, "grad_norm": 1.0343241691589355, "learning_rate": 1.6413979424820733e-05, "loss": 0.8484, "step": 6731 }, { "epoch": 0.900240706071142, "grad_norm": 1.114450216293335, "learning_rate": 1.6412871703487345e-05, "loss": 0.8975, "step": 6732 }, { "epoch": 0.9003744316662209, "grad_norm": 1.2138824462890625, "learning_rate": 1.6411763848483782e-05, "loss": 0.9997, "step": 6733 }, { "epoch": 0.9005081572612998, "grad_norm": 1.0738543272018433, "learning_rate": 1.641065585983314e-05, "loss": 0.9995, "step": 6734 }, { "epoch": 0.9006418828563787, "grad_norm": 0.9797514081001282, "learning_rate": 1.6409547737558504e-05, "loss": 0.8487, "step": 6735 }, { "epoch": 0.9007756084514577, "grad_norm": 1.0873870849609375, "learning_rate": 1.6408439481682985e-05, "loss": 0.9828, "step": 6736 }, { "epoch": 0.9009093340465365, "grad_norm": 1.237776517868042, "learning_rate": 1.6407331092229673e-05, "loss": 1.0074, "step": 6737 }, { "epoch": 0.9010430596416154, "grad_norm": 1.0938637256622314, "learning_rate": 1.6406222569221678e-05, "loss": 0.949, "step": 6738 }, { "epoch": 0.9011767852366943, "grad_norm": 1.1377477645874023, "learning_rate": 1.64051139126821e-05, "loss": 0.9336, "step": 6739 }, { "epoch": 0.9013105108317732, "grad_norm": 1.1673563718795776, "learning_rate": 1.6404005122634058e-05, "loss": 0.9324, "step": 6740 }, { "epoch": 0.9014442364268521, "grad_norm": 1.0079574584960938, "learning_rate": 1.640289619910065e-05, "loss": 0.8181, "step": 6741 }, { "epoch": 0.901577962021931, "grad_norm": 1.0750503540039062, "learning_rate": 1.6401787142105004e-05, "loss": 1.0669, "step": 6742 }, { "epoch": 0.9017116876170099, "grad_norm": 1.024989128112793, "learning_rate": 1.6400677951670228e-05, "loss": 0.9234, "step": 6743 }, { "epoch": 0.9018454132120888, "grad_norm": 1.0257606506347656, "learning_rate": 1.6399568627819445e-05, "loss": 0.9521, "step": 6744 }, { "epoch": 0.9019791388071677, "grad_norm": 1.0672742128372192, "learning_rate": 1.6398459170575776e-05, "loss": 0.9358, "step": 6745 }, { "epoch": 0.9021128644022466, "grad_norm": 0.9566826820373535, "learning_rate": 1.639734957996235e-05, "loss": 0.9811, "step": 6746 }, { "epoch": 0.9022465899973255, "grad_norm": 1.0109277963638306, "learning_rate": 1.6396239856002295e-05, "loss": 0.9077, "step": 6747 }, { "epoch": 0.9023803155924044, "grad_norm": 1.2165746688842773, "learning_rate": 1.639512999871874e-05, "loss": 0.9854, "step": 6748 }, { "epoch": 0.9025140411874832, "grad_norm": 1.08646559715271, "learning_rate": 1.639402000813482e-05, "loss": 1.0142, "step": 6749 }, { "epoch": 0.9026477667825622, "grad_norm": 1.0451642274856567, "learning_rate": 1.639290988427367e-05, "loss": 1.005, "step": 6750 }, { "epoch": 0.902781492377641, "grad_norm": 1.022635579109192, "learning_rate": 1.6391799627158432e-05, "loss": 0.8274, "step": 6751 }, { "epoch": 0.90291521797272, "grad_norm": 1.0998194217681885, "learning_rate": 1.6390689236812244e-05, "loss": 1.0794, "step": 6752 }, { "epoch": 0.9030489435677989, "grad_norm": 1.0877984762191772, "learning_rate": 1.638957871325826e-05, "loss": 0.9725, "step": 6753 }, { "epoch": 0.9031826691628778, "grad_norm": 1.0117844343185425, "learning_rate": 1.638846805651961e-05, "loss": 0.8947, "step": 6754 }, { "epoch": 0.9033163947579567, "grad_norm": 0.939814567565918, "learning_rate": 1.6387357266619467e-05, "loss": 0.8768, "step": 6755 }, { "epoch": 0.9034501203530356, "grad_norm": 1.0196579694747925, "learning_rate": 1.6386246343580973e-05, "loss": 0.9017, "step": 6756 }, { "epoch": 0.9035838459481145, "grad_norm": 1.1136674880981445, "learning_rate": 1.6385135287427284e-05, "loss": 0.9509, "step": 6757 }, { "epoch": 0.9037175715431933, "grad_norm": 1.0302562713623047, "learning_rate": 1.6384024098181557e-05, "loss": 0.8894, "step": 6758 }, { "epoch": 0.9038512971382723, "grad_norm": 0.9795793890953064, "learning_rate": 1.638291277586696e-05, "loss": 0.8875, "step": 6759 }, { "epoch": 0.9039850227333511, "grad_norm": 1.160224199295044, "learning_rate": 1.6381801320506655e-05, "loss": 1.043, "step": 6760 }, { "epoch": 0.9041187483284301, "grad_norm": 1.0704209804534912, "learning_rate": 1.6380689732123804e-05, "loss": 0.9311, "step": 6761 }, { "epoch": 0.9042524739235089, "grad_norm": 0.9757776856422424, "learning_rate": 1.6379578010741582e-05, "loss": 0.8842, "step": 6762 }, { "epoch": 0.9043861995185879, "grad_norm": 1.0957953929901123, "learning_rate": 1.6378466156383163e-05, "loss": 0.9848, "step": 6763 }, { "epoch": 0.9045199251136667, "grad_norm": 0.9756842851638794, "learning_rate": 1.637735416907172e-05, "loss": 0.7924, "step": 6764 }, { "epoch": 0.9046536507087457, "grad_norm": 1.0808343887329102, "learning_rate": 1.6376242048830432e-05, "loss": 1.0144, "step": 6765 }, { "epoch": 0.9047873763038246, "grad_norm": 0.9551745653152466, "learning_rate": 1.637512979568248e-05, "loss": 0.8754, "step": 6766 }, { "epoch": 0.9049211018989034, "grad_norm": 1.1391195058822632, "learning_rate": 1.6374017409651045e-05, "loss": 1.0409, "step": 6767 }, { "epoch": 0.9050548274939824, "grad_norm": 1.0498212575912476, "learning_rate": 1.637290489075932e-05, "loss": 0.899, "step": 6768 }, { "epoch": 0.9051885530890612, "grad_norm": 0.9705497026443481, "learning_rate": 1.6371792239030488e-05, "loss": 0.8813, "step": 6769 }, { "epoch": 0.9053222786841402, "grad_norm": 0.9668666124343872, "learning_rate": 1.6370679454487747e-05, "loss": 0.9106, "step": 6770 }, { "epoch": 0.905456004279219, "grad_norm": 1.105089545249939, "learning_rate": 1.6369566537154285e-05, "loss": 1.0491, "step": 6771 }, { "epoch": 0.905589729874298, "grad_norm": 1.0066843032836914, "learning_rate": 1.6368453487053305e-05, "loss": 0.8109, "step": 6772 }, { "epoch": 0.9057234554693768, "grad_norm": 1.1398422718048096, "learning_rate": 1.6367340304208008e-05, "loss": 0.9952, "step": 6773 }, { "epoch": 0.9058571810644558, "grad_norm": 1.152174949645996, "learning_rate": 1.6366226988641593e-05, "loss": 0.997, "step": 6774 }, { "epoch": 0.9059909066595346, "grad_norm": 0.9366565942764282, "learning_rate": 1.6365113540377268e-05, "loss": 0.833, "step": 6775 }, { "epoch": 0.9061246322546135, "grad_norm": 0.9946486353874207, "learning_rate": 1.6363999959438243e-05, "loss": 0.9197, "step": 6776 }, { "epoch": 0.9062583578496924, "grad_norm": 1.0952951908111572, "learning_rate": 1.6362886245847732e-05, "loss": 0.9187, "step": 6777 }, { "epoch": 0.9063920834447713, "grad_norm": 1.068766474723816, "learning_rate": 1.636177239962894e-05, "loss": 1.0242, "step": 6778 }, { "epoch": 0.9065258090398502, "grad_norm": 1.1795401573181152, "learning_rate": 1.6360658420805093e-05, "loss": 0.9773, "step": 6779 }, { "epoch": 0.9066595346349291, "grad_norm": 1.0513519048690796, "learning_rate": 1.6359544309399406e-05, "loss": 0.8669, "step": 6780 }, { "epoch": 0.9067932602300081, "grad_norm": 1.0561473369598389, "learning_rate": 1.6358430065435106e-05, "loss": 1.0301, "step": 6781 }, { "epoch": 0.9069269858250869, "grad_norm": 1.0933724641799927, "learning_rate": 1.6357315688935414e-05, "loss": 0.999, "step": 6782 }, { "epoch": 0.9070607114201659, "grad_norm": 0.9801920652389526, "learning_rate": 1.6356201179923558e-05, "loss": 0.943, "step": 6783 }, { "epoch": 0.9071944370152447, "grad_norm": 1.1041316986083984, "learning_rate": 1.6355086538422775e-05, "loss": 1.0108, "step": 6784 }, { "epoch": 0.9073281626103236, "grad_norm": 1.0017673969268799, "learning_rate": 1.635397176445629e-05, "loss": 0.8917, "step": 6785 }, { "epoch": 0.9074618882054025, "grad_norm": 1.0964072942733765, "learning_rate": 1.6352856858047347e-05, "loss": 1.024, "step": 6786 }, { "epoch": 0.9075956138004814, "grad_norm": 1.1336349248886108, "learning_rate": 1.6351741819219177e-05, "loss": 0.9834, "step": 6787 }, { "epoch": 0.9077293393955603, "grad_norm": 1.040679693222046, "learning_rate": 1.635062664799503e-05, "loss": 0.8408, "step": 6788 }, { "epoch": 0.9078630649906392, "grad_norm": 1.1216731071472168, "learning_rate": 1.6349511344398148e-05, "loss": 1.0029, "step": 6789 }, { "epoch": 0.9079967905857181, "grad_norm": 1.0857746601104736, "learning_rate": 1.6348395908451778e-05, "loss": 0.8722, "step": 6790 }, { "epoch": 0.908130516180797, "grad_norm": 0.9849955439567566, "learning_rate": 1.634728034017917e-05, "loss": 0.9195, "step": 6791 }, { "epoch": 0.908264241775876, "grad_norm": 1.0231690406799316, "learning_rate": 1.6346164639603575e-05, "loss": 0.9708, "step": 6792 }, { "epoch": 0.9083979673709548, "grad_norm": 1.0137907266616821, "learning_rate": 1.6345048806748248e-05, "loss": 0.9653, "step": 6793 }, { "epoch": 0.9085316929660336, "grad_norm": 1.0158982276916504, "learning_rate": 1.6343932841636455e-05, "loss": 0.8228, "step": 6794 }, { "epoch": 0.9086654185611126, "grad_norm": 1.1140466928482056, "learning_rate": 1.634281674429145e-05, "loss": 0.9588, "step": 6795 }, { "epoch": 0.9087991441561915, "grad_norm": 1.0771057605743408, "learning_rate": 1.6341700514736504e-05, "loss": 0.9503, "step": 6796 }, { "epoch": 0.9089328697512704, "grad_norm": 0.9898585677146912, "learning_rate": 1.6340584152994876e-05, "loss": 0.8934, "step": 6797 }, { "epoch": 0.9090665953463493, "grad_norm": 0.9850866794586182, "learning_rate": 1.633946765908984e-05, "loss": 0.9223, "step": 6798 }, { "epoch": 0.9092003209414282, "grad_norm": 1.0196812152862549, "learning_rate": 1.6338351033044665e-05, "loss": 0.9486, "step": 6799 }, { "epoch": 0.9093340465365071, "grad_norm": 0.9686949849128723, "learning_rate": 1.6337234274882625e-05, "loss": 0.8834, "step": 6800 }, { "epoch": 0.909467772131586, "grad_norm": 0.9994778037071228, "learning_rate": 1.6336117384627007e-05, "loss": 0.8873, "step": 6801 }, { "epoch": 0.9096014977266649, "grad_norm": 1.0648632049560547, "learning_rate": 1.6335000362301083e-05, "loss": 0.9371, "step": 6802 }, { "epoch": 0.9097352233217438, "grad_norm": 1.0102609395980835, "learning_rate": 1.6333883207928133e-05, "loss": 0.9382, "step": 6803 }, { "epoch": 0.9098689489168227, "grad_norm": 1.0610584020614624, "learning_rate": 1.633276592153145e-05, "loss": 0.8768, "step": 6804 }, { "epoch": 0.9100026745119015, "grad_norm": 0.94576096534729, "learning_rate": 1.6331648503134327e-05, "loss": 0.9957, "step": 6805 }, { "epoch": 0.9101364001069805, "grad_norm": 1.1003552675247192, "learning_rate": 1.6330530952760048e-05, "loss": 1.1125, "step": 6806 }, { "epoch": 0.9102701257020593, "grad_norm": 1.0401118993759155, "learning_rate": 1.6329413270431906e-05, "loss": 0.8955, "step": 6807 }, { "epoch": 0.9104038512971383, "grad_norm": 1.008165717124939, "learning_rate": 1.6328295456173206e-05, "loss": 0.9357, "step": 6808 }, { "epoch": 0.9105375768922171, "grad_norm": 1.0534212589263916, "learning_rate": 1.6327177510007237e-05, "loss": 0.9088, "step": 6809 }, { "epoch": 0.9106713024872961, "grad_norm": 1.0965501070022583, "learning_rate": 1.632605943195731e-05, "loss": 0.9472, "step": 6810 }, { "epoch": 0.910805028082375, "grad_norm": 0.9653745889663696, "learning_rate": 1.6324941222046725e-05, "loss": 0.8664, "step": 6811 }, { "epoch": 0.9109387536774539, "grad_norm": 1.0409152507781982, "learning_rate": 1.6323822880298795e-05, "loss": 1.0884, "step": 6812 }, { "epoch": 0.9110724792725328, "grad_norm": 1.1187019348144531, "learning_rate": 1.632270440673683e-05, "loss": 0.9499, "step": 6813 }, { "epoch": 0.9112062048676116, "grad_norm": 1.0470764636993408, "learning_rate": 1.6321585801384138e-05, "loss": 1.0397, "step": 6814 }, { "epoch": 0.9113399304626906, "grad_norm": 1.034440517425537, "learning_rate": 1.632046706426404e-05, "loss": 0.8911, "step": 6815 }, { "epoch": 0.9114736560577694, "grad_norm": 1.0755183696746826, "learning_rate": 1.6319348195399855e-05, "loss": 0.9586, "step": 6816 }, { "epoch": 0.9116073816528484, "grad_norm": 1.0901182889938354, "learning_rate": 1.6318229194814906e-05, "loss": 0.9703, "step": 6817 }, { "epoch": 0.9117411072479272, "grad_norm": 1.0668545961380005, "learning_rate": 1.631711006253251e-05, "loss": 0.9826, "step": 6818 }, { "epoch": 0.9118748328430062, "grad_norm": 1.0639677047729492, "learning_rate": 1.6315990798576002e-05, "loss": 0.8764, "step": 6819 }, { "epoch": 0.912008558438085, "grad_norm": 1.1224595308303833, "learning_rate": 1.631487140296871e-05, "loss": 0.9688, "step": 6820 }, { "epoch": 0.912142284033164, "grad_norm": 1.0143686532974243, "learning_rate": 1.6313751875733966e-05, "loss": 0.8944, "step": 6821 }, { "epoch": 0.9122760096282428, "grad_norm": 1.1076228618621826, "learning_rate": 1.6312632216895107e-05, "loss": 1.0189, "step": 6822 }, { "epoch": 0.9124097352233217, "grad_norm": 1.1387327909469604, "learning_rate": 1.6311512426475472e-05, "loss": 0.971, "step": 6823 }, { "epoch": 0.9125434608184007, "grad_norm": 1.0415048599243164, "learning_rate": 1.6310392504498397e-05, "loss": 0.8719, "step": 6824 }, { "epoch": 0.9126771864134795, "grad_norm": 1.0837432146072388, "learning_rate": 1.6309272450987226e-05, "loss": 0.9144, "step": 6825 }, { "epoch": 0.9128109120085585, "grad_norm": 0.958026111125946, "learning_rate": 1.6308152265965313e-05, "loss": 0.8446, "step": 6826 }, { "epoch": 0.9129446376036373, "grad_norm": 1.0343043804168701, "learning_rate": 1.6307031949455998e-05, "loss": 0.928, "step": 6827 }, { "epoch": 0.9130783631987163, "grad_norm": 1.097752571105957, "learning_rate": 1.630591150148264e-05, "loss": 1.0244, "step": 6828 }, { "epoch": 0.9132120887937951, "grad_norm": 1.1021761894226074, "learning_rate": 1.630479092206859e-05, "loss": 1.0021, "step": 6829 }, { "epoch": 0.9133458143888741, "grad_norm": 1.1466758251190186, "learning_rate": 1.6303670211237206e-05, "loss": 0.9519, "step": 6830 }, { "epoch": 0.9134795399839529, "grad_norm": 1.0269718170166016, "learning_rate": 1.6302549369011847e-05, "loss": 0.9521, "step": 6831 }, { "epoch": 0.9136132655790318, "grad_norm": 1.0369553565979004, "learning_rate": 1.630142839541588e-05, "loss": 0.8876, "step": 6832 }, { "epoch": 0.9137469911741107, "grad_norm": 1.0761327743530273, "learning_rate": 1.630030729047267e-05, "loss": 0.9824, "step": 6833 }, { "epoch": 0.9138807167691896, "grad_norm": 1.0410864353179932, "learning_rate": 1.629918605420558e-05, "loss": 0.9437, "step": 6834 }, { "epoch": 0.9140144423642685, "grad_norm": 1.0458101034164429, "learning_rate": 1.6298064686637983e-05, "loss": 0.8177, "step": 6835 }, { "epoch": 0.9141481679593474, "grad_norm": 1.0597580671310425, "learning_rate": 1.6296943187793256e-05, "loss": 0.8724, "step": 6836 }, { "epoch": 0.9142818935544264, "grad_norm": 0.9621270895004272, "learning_rate": 1.629582155769477e-05, "loss": 0.8305, "step": 6837 }, { "epoch": 0.9144156191495052, "grad_norm": 1.1643141508102417, "learning_rate": 1.6294699796365912e-05, "loss": 1.131, "step": 6838 }, { "epoch": 0.9145493447445842, "grad_norm": 1.004451036453247, "learning_rate": 1.629357790383006e-05, "loss": 0.9581, "step": 6839 }, { "epoch": 0.914683070339663, "grad_norm": 0.9370132684707642, "learning_rate": 1.62924558801106e-05, "loss": 0.8862, "step": 6840 }, { "epoch": 0.9148167959347419, "grad_norm": 1.0726017951965332, "learning_rate": 1.629133372523092e-05, "loss": 0.9228, "step": 6841 }, { "epoch": 0.9149505215298208, "grad_norm": 1.070617437362671, "learning_rate": 1.6290211439214402e-05, "loss": 0.9753, "step": 6842 }, { "epoch": 0.9150842471248997, "grad_norm": 0.9160057306289673, "learning_rate": 1.628908902208445e-05, "loss": 0.9042, "step": 6843 }, { "epoch": 0.9152179727199786, "grad_norm": 0.9815974235534668, "learning_rate": 1.6287966473864455e-05, "loss": 0.9692, "step": 6844 }, { "epoch": 0.9153516983150575, "grad_norm": 1.0848268270492554, "learning_rate": 1.6286843794577815e-05, "loss": 1.0521, "step": 6845 }, { "epoch": 0.9154854239101364, "grad_norm": 1.1050935983657837, "learning_rate": 1.628572098424793e-05, "loss": 0.9754, "step": 6846 }, { "epoch": 0.9156191495052153, "grad_norm": 1.190428614616394, "learning_rate": 1.628459804289821e-05, "loss": 0.9364, "step": 6847 }, { "epoch": 0.9157528751002942, "grad_norm": 0.9780930280685425, "learning_rate": 1.6283474970552055e-05, "loss": 0.8962, "step": 6848 }, { "epoch": 0.9158866006953731, "grad_norm": 1.1244094371795654, "learning_rate": 1.628235176723288e-05, "loss": 1.0553, "step": 6849 }, { "epoch": 0.916020326290452, "grad_norm": 1.0004079341888428, "learning_rate": 1.6281228432964092e-05, "loss": 0.9232, "step": 6850 }, { "epoch": 0.9161540518855309, "grad_norm": 1.1048442125320435, "learning_rate": 1.6280104967769106e-05, "loss": 0.7968, "step": 6851 }, { "epoch": 0.9162877774806097, "grad_norm": 1.1042070388793945, "learning_rate": 1.6278981371671345e-05, "loss": 0.9654, "step": 6852 }, { "epoch": 0.9164215030756887, "grad_norm": 1.08100163936615, "learning_rate": 1.6277857644694223e-05, "loss": 0.9049, "step": 6853 }, { "epoch": 0.9165552286707676, "grad_norm": 1.057588815689087, "learning_rate": 1.6276733786861166e-05, "loss": 0.919, "step": 6854 }, { "epoch": 0.9166889542658465, "grad_norm": 1.114198088645935, "learning_rate": 1.6275609798195598e-05, "loss": 0.9968, "step": 6855 }, { "epoch": 0.9168226798609254, "grad_norm": 1.0222796201705933, "learning_rate": 1.6274485678720952e-05, "loss": 0.8945, "step": 6856 }, { "epoch": 0.9169564054560043, "grad_norm": 1.0690758228302002, "learning_rate": 1.627336142846065e-05, "loss": 0.992, "step": 6857 }, { "epoch": 0.9170901310510832, "grad_norm": 1.101946234703064, "learning_rate": 1.627223704743814e-05, "loss": 0.9204, "step": 6858 }, { "epoch": 0.9172238566461621, "grad_norm": 1.08036208152771, "learning_rate": 1.6271112535676846e-05, "loss": 1.0989, "step": 6859 }, { "epoch": 0.917357582241241, "grad_norm": 1.1058663129806519, "learning_rate": 1.6269987893200213e-05, "loss": 0.9127, "step": 6860 }, { "epoch": 0.9174913078363198, "grad_norm": 1.2037876844406128, "learning_rate": 1.6268863120031682e-05, "loss": 0.9866, "step": 6861 }, { "epoch": 0.9176250334313988, "grad_norm": 1.056054949760437, "learning_rate": 1.6267738216194698e-05, "loss": 0.8618, "step": 6862 }, { "epoch": 0.9177587590264776, "grad_norm": 1.081805944442749, "learning_rate": 1.6266613181712708e-05, "loss": 0.9243, "step": 6863 }, { "epoch": 0.9178924846215566, "grad_norm": 1.0295435190200806, "learning_rate": 1.626548801660916e-05, "loss": 0.9538, "step": 6864 }, { "epoch": 0.9180262102166354, "grad_norm": 1.0153684616088867, "learning_rate": 1.6264362720907514e-05, "loss": 0.9541, "step": 6865 }, { "epoch": 0.9181599358117144, "grad_norm": 0.9508002400398254, "learning_rate": 1.6263237294631224e-05, "loss": 0.7294, "step": 6866 }, { "epoch": 0.9182936614067932, "grad_norm": 1.086427927017212, "learning_rate": 1.6262111737803737e-05, "loss": 0.9688, "step": 6867 }, { "epoch": 0.9184273870018722, "grad_norm": 1.1007853746414185, "learning_rate": 1.626098605044853e-05, "loss": 0.9601, "step": 6868 }, { "epoch": 0.9185611125969511, "grad_norm": 1.064684510231018, "learning_rate": 1.625986023258906e-05, "loss": 0.9213, "step": 6869 }, { "epoch": 0.9186948381920299, "grad_norm": 0.9863426089286804, "learning_rate": 1.625873428424879e-05, "loss": 0.806, "step": 6870 }, { "epoch": 0.9188285637871089, "grad_norm": 1.1444082260131836, "learning_rate": 1.6257608205451192e-05, "loss": 0.9432, "step": 6871 }, { "epoch": 0.9189622893821877, "grad_norm": 1.0987794399261475, "learning_rate": 1.6256481996219743e-05, "loss": 0.9893, "step": 6872 }, { "epoch": 0.9190960149772667, "grad_norm": 1.0105737447738647, "learning_rate": 1.6255355656577915e-05, "loss": 0.9716, "step": 6873 }, { "epoch": 0.9192297405723455, "grad_norm": 1.0987651348114014, "learning_rate": 1.625422918654918e-05, "loss": 0.8949, "step": 6874 }, { "epoch": 0.9193634661674245, "grad_norm": 1.0292030572891235, "learning_rate": 1.6253102586157022e-05, "loss": 0.9582, "step": 6875 }, { "epoch": 0.9194971917625033, "grad_norm": 0.965427577495575, "learning_rate": 1.6251975855424924e-05, "loss": 0.8976, "step": 6876 }, { "epoch": 0.9196309173575823, "grad_norm": 1.1971684694290161, "learning_rate": 1.6250848994376377e-05, "loss": 0.9965, "step": 6877 }, { "epoch": 0.9197646429526611, "grad_norm": 1.0184681415557861, "learning_rate": 1.624972200303486e-05, "loss": 0.9054, "step": 6878 }, { "epoch": 0.91989836854774, "grad_norm": 1.0956999063491821, "learning_rate": 1.6248594881423866e-05, "loss": 1.0516, "step": 6879 }, { "epoch": 0.920032094142819, "grad_norm": 1.0508352518081665, "learning_rate": 1.624746762956689e-05, "loss": 0.8213, "step": 6880 }, { "epoch": 0.9201658197378978, "grad_norm": 0.9868884682655334, "learning_rate": 1.6246340247487435e-05, "loss": 0.8521, "step": 6881 }, { "epoch": 0.9202995453329768, "grad_norm": 1.1002509593963623, "learning_rate": 1.6245212735208994e-05, "loss": 1.0102, "step": 6882 }, { "epoch": 0.9204332709280556, "grad_norm": 0.9806067943572998, "learning_rate": 1.6244085092755066e-05, "loss": 0.8298, "step": 6883 }, { "epoch": 0.9205669965231346, "grad_norm": 1.1113206148147583, "learning_rate": 1.624295732014916e-05, "loss": 0.9031, "step": 6884 }, { "epoch": 0.9207007221182134, "grad_norm": 1.0912349224090576, "learning_rate": 1.6241829417414784e-05, "loss": 0.9522, "step": 6885 }, { "epoch": 0.9208344477132924, "grad_norm": 1.0708122253417969, "learning_rate": 1.6240701384575446e-05, "loss": 0.99, "step": 6886 }, { "epoch": 0.9209681733083712, "grad_norm": 1.0628043413162231, "learning_rate": 1.623957322165466e-05, "loss": 0.9201, "step": 6887 }, { "epoch": 0.9211018989034501, "grad_norm": 1.1664705276489258, "learning_rate": 1.623844492867594e-05, "loss": 0.945, "step": 6888 }, { "epoch": 0.921235624498529, "grad_norm": 0.9236838221549988, "learning_rate": 1.6237316505662808e-05, "loss": 0.9173, "step": 6889 }, { "epoch": 0.9213693500936079, "grad_norm": 1.0149219036102295, "learning_rate": 1.623618795263878e-05, "loss": 0.8982, "step": 6890 }, { "epoch": 0.9215030756886868, "grad_norm": 1.0694936513900757, "learning_rate": 1.623505926962738e-05, "loss": 0.9214, "step": 6891 }, { "epoch": 0.9216368012837657, "grad_norm": 1.076536774635315, "learning_rate": 1.6233930456652138e-05, "loss": 0.9253, "step": 6892 }, { "epoch": 0.9217705268788446, "grad_norm": 1.0527029037475586, "learning_rate": 1.6232801513736576e-05, "loss": 0.9864, "step": 6893 }, { "epoch": 0.9219042524739235, "grad_norm": 1.0681196451187134, "learning_rate": 1.6231672440904236e-05, "loss": 0.9317, "step": 6894 }, { "epoch": 0.9220379780690025, "grad_norm": 1.0799440145492554, "learning_rate": 1.6230543238178645e-05, "loss": 1.0109, "step": 6895 }, { "epoch": 0.9221717036640813, "grad_norm": 1.3043550252914429, "learning_rate": 1.622941390558334e-05, "loss": 0.8347, "step": 6896 }, { "epoch": 0.9223054292591603, "grad_norm": 1.0095030069351196, "learning_rate": 1.6228284443141866e-05, "loss": 0.8994, "step": 6897 }, { "epoch": 0.9224391548542391, "grad_norm": 1.1030194759368896, "learning_rate": 1.6227154850877762e-05, "loss": 1.0404, "step": 6898 }, { "epoch": 0.922572880449318, "grad_norm": 1.1107786893844604, "learning_rate": 1.6226025128814577e-05, "loss": 1.0034, "step": 6899 }, { "epoch": 0.9227066060443969, "grad_norm": 1.0816348791122437, "learning_rate": 1.622489527697585e-05, "loss": 0.8769, "step": 6900 }, { "epoch": 0.9228403316394758, "grad_norm": 1.0478155612945557, "learning_rate": 1.6223765295385142e-05, "loss": 0.9268, "step": 6901 }, { "epoch": 0.9229740572345547, "grad_norm": 1.0490728616714478, "learning_rate": 1.6222635184065997e-05, "loss": 0.8867, "step": 6902 }, { "epoch": 0.9231077828296336, "grad_norm": 1.1856682300567627, "learning_rate": 1.6221504943041982e-05, "loss": 0.99, "step": 6903 }, { "epoch": 0.9232415084247125, "grad_norm": 1.033455729484558, "learning_rate": 1.6220374572336646e-05, "loss": 0.8888, "step": 6904 }, { "epoch": 0.9233752340197914, "grad_norm": 1.088620901107788, "learning_rate": 1.6219244071973554e-05, "loss": 0.8729, "step": 6905 }, { "epoch": 0.9235089596148703, "grad_norm": 1.163955807685852, "learning_rate": 1.6218113441976275e-05, "loss": 0.9952, "step": 6906 }, { "epoch": 0.9236426852099492, "grad_norm": 1.0452533960342407, "learning_rate": 1.6216982682368365e-05, "loss": 0.8816, "step": 6907 }, { "epoch": 0.923776410805028, "grad_norm": 0.9951573014259338, "learning_rate": 1.6215851793173403e-05, "loss": 0.9032, "step": 6908 }, { "epoch": 0.923910136400107, "grad_norm": 1.0714987516403198, "learning_rate": 1.6214720774414956e-05, "loss": 1.0529, "step": 6909 }, { "epoch": 0.9240438619951858, "grad_norm": 1.290366530418396, "learning_rate": 1.6213589626116607e-05, "loss": 0.9136, "step": 6910 }, { "epoch": 0.9241775875902648, "grad_norm": 0.992388904094696, "learning_rate": 1.6212458348301926e-05, "loss": 0.8399, "step": 6911 }, { "epoch": 0.9243113131853437, "grad_norm": 1.137168288230896, "learning_rate": 1.621132694099449e-05, "loss": 0.9279, "step": 6912 }, { "epoch": 0.9244450387804226, "grad_norm": 1.2961491346359253, "learning_rate": 1.621019540421789e-05, "loss": 1.0161, "step": 6913 }, { "epoch": 0.9245787643755015, "grad_norm": 1.0697441101074219, "learning_rate": 1.6209063737995716e-05, "loss": 0.9842, "step": 6914 }, { "epoch": 0.9247124899705804, "grad_norm": 1.1107462644577026, "learning_rate": 1.6207931942351543e-05, "loss": 0.8655, "step": 6915 }, { "epoch": 0.9248462155656593, "grad_norm": 0.9772646427154541, "learning_rate": 1.620680001730897e-05, "loss": 0.8821, "step": 6916 }, { "epoch": 0.9249799411607381, "grad_norm": 1.0757473707199097, "learning_rate": 1.620566796289159e-05, "loss": 0.9021, "step": 6917 }, { "epoch": 0.9251136667558171, "grad_norm": 1.1896097660064697, "learning_rate": 1.6204535779123002e-05, "loss": 0.9483, "step": 6918 }, { "epoch": 0.9252473923508959, "grad_norm": 1.0323418378829956, "learning_rate": 1.62034034660268e-05, "loss": 0.8985, "step": 6919 }, { "epoch": 0.9253811179459749, "grad_norm": 1.0475860834121704, "learning_rate": 1.620227102362659e-05, "loss": 1.0187, "step": 6920 }, { "epoch": 0.9255148435410537, "grad_norm": 1.021731972694397, "learning_rate": 1.6201138451945976e-05, "loss": 0.9402, "step": 6921 }, { "epoch": 0.9256485691361327, "grad_norm": 1.11515474319458, "learning_rate": 1.6200005751008564e-05, "loss": 0.9872, "step": 6922 }, { "epoch": 0.9257822947312115, "grad_norm": 1.0412979125976562, "learning_rate": 1.6198872920837966e-05, "loss": 0.9056, "step": 6923 }, { "epoch": 0.9259160203262905, "grad_norm": 1.0476435422897339, "learning_rate": 1.619773996145779e-05, "loss": 0.9424, "step": 6924 }, { "epoch": 0.9260497459213693, "grad_norm": 1.0711697340011597, "learning_rate": 1.6196606872891657e-05, "loss": 0.8298, "step": 6925 }, { "epoch": 0.9261834715164482, "grad_norm": 1.055830955505371, "learning_rate": 1.6195473655163187e-05, "loss": 1.0201, "step": 6926 }, { "epoch": 0.9263171971115272, "grad_norm": 1.1842652559280396, "learning_rate": 1.619434030829599e-05, "loss": 1.0027, "step": 6927 }, { "epoch": 0.926450922706606, "grad_norm": 0.9343481659889221, "learning_rate": 1.6193206832313702e-05, "loss": 0.8421, "step": 6928 }, { "epoch": 0.926584648301685, "grad_norm": 1.092033863067627, "learning_rate": 1.6192073227239942e-05, "loss": 0.9271, "step": 6929 }, { "epoch": 0.9267183738967638, "grad_norm": 1.0830357074737549, "learning_rate": 1.6190939493098344e-05, "loss": 0.9572, "step": 6930 }, { "epoch": 0.9268520994918428, "grad_norm": 0.9988926649093628, "learning_rate": 1.618980562991253e-05, "loss": 0.8107, "step": 6931 }, { "epoch": 0.9269858250869216, "grad_norm": 1.1210498809814453, "learning_rate": 1.6188671637706143e-05, "loss": 1.0304, "step": 6932 }, { "epoch": 0.9271195506820006, "grad_norm": 1.0100246667861938, "learning_rate": 1.618753751650282e-05, "loss": 0.8508, "step": 6933 }, { "epoch": 0.9272532762770794, "grad_norm": 0.915416955947876, "learning_rate": 1.61864032663262e-05, "loss": 0.9, "step": 6934 }, { "epoch": 0.9273870018721583, "grad_norm": 1.163590431213379, "learning_rate": 1.618526888719992e-05, "loss": 1.0662, "step": 6935 }, { "epoch": 0.9275207274672372, "grad_norm": 1.1036838293075562, "learning_rate": 1.6184134379147627e-05, "loss": 0.9838, "step": 6936 }, { "epoch": 0.9276544530623161, "grad_norm": 1.1418052911758423, "learning_rate": 1.6182999742192974e-05, "loss": 0.7533, "step": 6937 }, { "epoch": 0.927788178657395, "grad_norm": 1.008998155593872, "learning_rate": 1.6181864976359608e-05, "loss": 0.8454, "step": 6938 }, { "epoch": 0.9279219042524739, "grad_norm": 1.0258378982543945, "learning_rate": 1.618073008167118e-05, "loss": 0.9303, "step": 6939 }, { "epoch": 0.9280556298475529, "grad_norm": 1.0755735635757446, "learning_rate": 1.6179595058151346e-05, "loss": 0.9665, "step": 6940 }, { "epoch": 0.9281893554426317, "grad_norm": 1.157312273979187, "learning_rate": 1.617845990582377e-05, "loss": 0.9464, "step": 6941 }, { "epoch": 0.9283230810377107, "grad_norm": 1.0503900051116943, "learning_rate": 1.617732462471211e-05, "loss": 0.8372, "step": 6942 }, { "epoch": 0.9284568066327895, "grad_norm": 0.9213406443595886, "learning_rate": 1.6176189214840027e-05, "loss": 0.8771, "step": 6943 }, { "epoch": 0.9285905322278684, "grad_norm": 0.9571143984794617, "learning_rate": 1.6175053676231188e-05, "loss": 0.7725, "step": 6944 }, { "epoch": 0.9287242578229473, "grad_norm": 1.1020632982254028, "learning_rate": 1.6173918008909266e-05, "loss": 1.011, "step": 6945 }, { "epoch": 0.9288579834180262, "grad_norm": 0.9676728248596191, "learning_rate": 1.617278221289793e-05, "loss": 0.802, "step": 6946 }, { "epoch": 0.9289917090131051, "grad_norm": 1.1829897165298462, "learning_rate": 1.617164628822086e-05, "loss": 0.9476, "step": 6947 }, { "epoch": 0.929125434608184, "grad_norm": 1.079222321510315, "learning_rate": 1.6170510234901723e-05, "loss": 0.9338, "step": 6948 }, { "epoch": 0.9292591602032629, "grad_norm": 1.049131155014038, "learning_rate": 1.6169374052964205e-05, "loss": 0.8555, "step": 6949 }, { "epoch": 0.9293928857983418, "grad_norm": 1.0093390941619873, "learning_rate": 1.616823774243199e-05, "loss": 0.8914, "step": 6950 }, { "epoch": 0.9295266113934207, "grad_norm": 1.0331645011901855, "learning_rate": 1.6167101303328766e-05, "loss": 0.9178, "step": 6951 }, { "epoch": 0.9296603369884996, "grad_norm": 0.9970361590385437, "learning_rate": 1.616596473567821e-05, "loss": 0.8872, "step": 6952 }, { "epoch": 0.9297940625835786, "grad_norm": 0.9104267954826355, "learning_rate": 1.6164828039504022e-05, "loss": 0.9486, "step": 6953 }, { "epoch": 0.9299277881786574, "grad_norm": 1.0969213247299194, "learning_rate": 1.6163691214829895e-05, "loss": 1.0143, "step": 6954 }, { "epoch": 0.9300615137737362, "grad_norm": 1.0657401084899902, "learning_rate": 1.6162554261679517e-05, "loss": 0.9617, "step": 6955 }, { "epoch": 0.9301952393688152, "grad_norm": 1.1671828031539917, "learning_rate": 1.6161417180076596e-05, "loss": 0.8382, "step": 6956 }, { "epoch": 0.9303289649638941, "grad_norm": 1.0025434494018555, "learning_rate": 1.616027997004483e-05, "loss": 0.9213, "step": 6957 }, { "epoch": 0.930462690558973, "grad_norm": 1.061132788658142, "learning_rate": 1.615914263160792e-05, "loss": 0.9992, "step": 6958 }, { "epoch": 0.9305964161540519, "grad_norm": 0.9592460989952087, "learning_rate": 1.615800516478958e-05, "loss": 0.9026, "step": 6959 }, { "epoch": 0.9307301417491308, "grad_norm": 1.0587468147277832, "learning_rate": 1.615686756961351e-05, "loss": 0.9834, "step": 6960 }, { "epoch": 0.9308638673442097, "grad_norm": 1.0437768697738647, "learning_rate": 1.6155729846103428e-05, "loss": 1.0631, "step": 6961 }, { "epoch": 0.9309975929392886, "grad_norm": 0.9286686778068542, "learning_rate": 1.615459199428305e-05, "loss": 0.8268, "step": 6962 }, { "epoch": 0.9311313185343675, "grad_norm": 1.083432912826538, "learning_rate": 1.615345401417609e-05, "loss": 1.0214, "step": 6963 }, { "epoch": 0.9312650441294463, "grad_norm": 0.9935000538825989, "learning_rate": 1.615231590580627e-05, "loss": 0.8595, "step": 6964 }, { "epoch": 0.9313987697245253, "grad_norm": 1.0275914669036865, "learning_rate": 1.6151177669197312e-05, "loss": 0.8787, "step": 6965 }, { "epoch": 0.9315324953196041, "grad_norm": 1.0694726705551147, "learning_rate": 1.615003930437294e-05, "loss": 0.9003, "step": 6966 }, { "epoch": 0.9316662209146831, "grad_norm": 1.1867496967315674, "learning_rate": 1.6148900811356886e-05, "loss": 0.934, "step": 6967 }, { "epoch": 0.931799946509762, "grad_norm": 1.1108530759811401, "learning_rate": 1.6147762190172877e-05, "loss": 0.9237, "step": 6968 }, { "epoch": 0.9319336721048409, "grad_norm": 1.0014153718948364, "learning_rate": 1.6146623440844645e-05, "loss": 0.9495, "step": 6969 }, { "epoch": 0.9320673976999198, "grad_norm": 1.0120370388031006, "learning_rate": 1.6145484563395934e-05, "loss": 0.8602, "step": 6970 }, { "epoch": 0.9322011232949987, "grad_norm": 1.0823014974594116, "learning_rate": 1.6144345557850475e-05, "loss": 0.9375, "step": 6971 }, { "epoch": 0.9323348488900776, "grad_norm": 1.0309419631958008, "learning_rate": 1.6143206424232018e-05, "loss": 0.9405, "step": 6972 }, { "epoch": 0.9324685744851564, "grad_norm": 1.0053772926330566, "learning_rate": 1.6142067162564293e-05, "loss": 0.8849, "step": 6973 }, { "epoch": 0.9326023000802354, "grad_norm": 1.0059148073196411, "learning_rate": 1.614092777287106e-05, "loss": 0.8845, "step": 6974 }, { "epoch": 0.9327360256753142, "grad_norm": 1.1131207942962646, "learning_rate": 1.6139788255176063e-05, "loss": 1.0046, "step": 6975 }, { "epoch": 0.9328697512703932, "grad_norm": 1.1017849445343018, "learning_rate": 1.6138648609503055e-05, "loss": 0.9255, "step": 6976 }, { "epoch": 0.933003476865472, "grad_norm": 1.1533608436584473, "learning_rate": 1.613750883587579e-05, "loss": 1.0285, "step": 6977 }, { "epoch": 0.933137202460551, "grad_norm": 1.0690585374832153, "learning_rate": 1.6136368934318028e-05, "loss": 0.9821, "step": 6978 }, { "epoch": 0.9332709280556298, "grad_norm": 1.077472448348999, "learning_rate": 1.6135228904853525e-05, "loss": 0.8647, "step": 6979 }, { "epoch": 0.9334046536507088, "grad_norm": 1.1467127799987793, "learning_rate": 1.6134088747506046e-05, "loss": 0.9894, "step": 6980 }, { "epoch": 0.9335383792457876, "grad_norm": 1.0875422954559326, "learning_rate": 1.6132948462299362e-05, "loss": 0.9001, "step": 6981 }, { "epoch": 0.9336721048408665, "grad_norm": 1.076904296875, "learning_rate": 1.6131808049257228e-05, "loss": 1.0571, "step": 6982 }, { "epoch": 0.9338058304359454, "grad_norm": 0.9852768778800964, "learning_rate": 1.613066750840343e-05, "loss": 0.8687, "step": 6983 }, { "epoch": 0.9339395560310243, "grad_norm": 1.1950201988220215, "learning_rate": 1.612952683976173e-05, "loss": 0.9328, "step": 6984 }, { "epoch": 0.9340732816261033, "grad_norm": 0.9338102340698242, "learning_rate": 1.612838604335591e-05, "loss": 0.8697, "step": 6985 }, { "epoch": 0.9342070072211821, "grad_norm": 1.0609676837921143, "learning_rate": 1.6127245119209747e-05, "loss": 0.8812, "step": 6986 }, { "epoch": 0.9343407328162611, "grad_norm": 1.0481910705566406, "learning_rate": 1.6126104067347023e-05, "loss": 0.889, "step": 6987 }, { "epoch": 0.9344744584113399, "grad_norm": 1.1022799015045166, "learning_rate": 1.612496288779152e-05, "loss": 1.1037, "step": 6988 }, { "epoch": 0.9346081840064189, "grad_norm": 1.087249994277954, "learning_rate": 1.6123821580567028e-05, "loss": 0.9815, "step": 6989 }, { "epoch": 0.9347419096014977, "grad_norm": 0.9721426963806152, "learning_rate": 1.6122680145697334e-05, "loss": 0.861, "step": 6990 }, { "epoch": 0.9348756351965766, "grad_norm": 0.9519912600517273, "learning_rate": 1.6121538583206232e-05, "loss": 0.8518, "step": 6991 }, { "epoch": 0.9350093607916555, "grad_norm": 0.9744101166725159, "learning_rate": 1.6120396893117518e-05, "loss": 0.85, "step": 6992 }, { "epoch": 0.9351430863867344, "grad_norm": 0.9318773746490479, "learning_rate": 1.6119255075454986e-05, "loss": 0.8196, "step": 6993 }, { "epoch": 0.9352768119818133, "grad_norm": 1.2241122722625732, "learning_rate": 1.6118113130242435e-05, "loss": 0.94, "step": 6994 }, { "epoch": 0.9354105375768922, "grad_norm": 0.9897013902664185, "learning_rate": 1.6116971057503673e-05, "loss": 0.9284, "step": 6995 }, { "epoch": 0.9355442631719711, "grad_norm": 1.1496587991714478, "learning_rate": 1.6115828857262502e-05, "loss": 0.9902, "step": 6996 }, { "epoch": 0.93567798876705, "grad_norm": 1.0751930475234985, "learning_rate": 1.611468652954273e-05, "loss": 0.9193, "step": 6997 }, { "epoch": 0.935811714362129, "grad_norm": 1.0770121812820435, "learning_rate": 1.6113544074368166e-05, "loss": 1.0087, "step": 6998 }, { "epoch": 0.9359454399572078, "grad_norm": 1.0164508819580078, "learning_rate": 1.611240149176263e-05, "loss": 0.9297, "step": 6999 }, { "epoch": 0.9360791655522868, "grad_norm": 1.1183191537857056, "learning_rate": 1.6111258781749934e-05, "loss": 0.9097, "step": 7000 }, { "epoch": 0.9362128911473656, "grad_norm": 1.0159372091293335, "learning_rate": 1.611011594435389e-05, "loss": 0.894, "step": 7001 }, { "epoch": 0.9363466167424445, "grad_norm": 1.1424487829208374, "learning_rate": 1.610897297959833e-05, "loss": 0.9422, "step": 7002 }, { "epoch": 0.9364803423375234, "grad_norm": 1.208791732788086, "learning_rate": 1.6107829887507076e-05, "loss": 0.8775, "step": 7003 }, { "epoch": 0.9366140679326023, "grad_norm": 1.1572887897491455, "learning_rate": 1.610668666810395e-05, "loss": 1.0455, "step": 7004 }, { "epoch": 0.9367477935276812, "grad_norm": 1.0640041828155518, "learning_rate": 1.6105543321412786e-05, "loss": 1.041, "step": 7005 }, { "epoch": 0.9368815191227601, "grad_norm": 1.091064214706421, "learning_rate": 1.610439984745741e-05, "loss": 0.9387, "step": 7006 }, { "epoch": 0.937015244717839, "grad_norm": 1.1536128520965576, "learning_rate": 1.6103256246261665e-05, "loss": 0.9314, "step": 7007 }, { "epoch": 0.9371489703129179, "grad_norm": 1.0467454195022583, "learning_rate": 1.6102112517849383e-05, "loss": 0.9944, "step": 7008 }, { "epoch": 0.9372826959079968, "grad_norm": 1.4069218635559082, "learning_rate": 1.6100968662244402e-05, "loss": 0.9626, "step": 7009 }, { "epoch": 0.9374164215030757, "grad_norm": 0.951005220413208, "learning_rate": 1.609982467947057e-05, "loss": 0.9785, "step": 7010 }, { "epoch": 0.9375501470981545, "grad_norm": 0.9597281813621521, "learning_rate": 1.6098680569551727e-05, "loss": 0.849, "step": 7011 }, { "epoch": 0.9376838726932335, "grad_norm": 1.0189837217330933, "learning_rate": 1.6097536332511726e-05, "loss": 0.8753, "step": 7012 }, { "epoch": 0.9378175982883123, "grad_norm": 1.0029723644256592, "learning_rate": 1.609639196837441e-05, "loss": 0.9114, "step": 7013 }, { "epoch": 0.9379513238833913, "grad_norm": 1.0561949014663696, "learning_rate": 1.6095247477163644e-05, "loss": 0.9898, "step": 7014 }, { "epoch": 0.9380850494784702, "grad_norm": 1.015450119972229, "learning_rate": 1.6094102858903275e-05, "loss": 0.9567, "step": 7015 }, { "epoch": 0.9382187750735491, "grad_norm": 1.1168749332427979, "learning_rate": 1.609295811361716e-05, "loss": 0.8879, "step": 7016 }, { "epoch": 0.938352500668628, "grad_norm": 1.0824220180511475, "learning_rate": 1.6091813241329163e-05, "loss": 1.0041, "step": 7017 }, { "epoch": 0.9384862262637069, "grad_norm": 1.029380202293396, "learning_rate": 1.6090668242063152e-05, "loss": 0.8724, "step": 7018 }, { "epoch": 0.9386199518587858, "grad_norm": 1.1161150932312012, "learning_rate": 1.608952311584299e-05, "loss": 0.9717, "step": 7019 }, { "epoch": 0.9387536774538646, "grad_norm": 1.097906231880188, "learning_rate": 1.608837786269254e-05, "loss": 1.0158, "step": 7020 }, { "epoch": 0.9388874030489436, "grad_norm": 1.023992657661438, "learning_rate": 1.6087232482635685e-05, "loss": 0.824, "step": 7021 }, { "epoch": 0.9390211286440224, "grad_norm": 1.0950909852981567, "learning_rate": 1.608608697569629e-05, "loss": 0.8724, "step": 7022 }, { "epoch": 0.9391548542391014, "grad_norm": 1.0345914363861084, "learning_rate": 1.608494134189824e-05, "loss": 0.9444, "step": 7023 }, { "epoch": 0.9392885798341802, "grad_norm": 1.0029183626174927, "learning_rate": 1.6083795581265406e-05, "loss": 0.9527, "step": 7024 }, { "epoch": 0.9394223054292592, "grad_norm": 1.0698575973510742, "learning_rate": 1.6082649693821677e-05, "loss": 0.9311, "step": 7025 }, { "epoch": 0.939556031024338, "grad_norm": 0.9933726787567139, "learning_rate": 1.6081503679590932e-05, "loss": 0.8298, "step": 7026 }, { "epoch": 0.939689756619417, "grad_norm": 1.07218599319458, "learning_rate": 1.608035753859707e-05, "loss": 0.9439, "step": 7027 }, { "epoch": 0.9398234822144959, "grad_norm": 1.0953887701034546, "learning_rate": 1.6079211270863966e-05, "loss": 0.91, "step": 7028 }, { "epoch": 0.9399572078095747, "grad_norm": 1.1669597625732422, "learning_rate": 1.6078064876415523e-05, "loss": 0.9468, "step": 7029 }, { "epoch": 0.9400909334046537, "grad_norm": 1.11777663230896, "learning_rate": 1.607691835527563e-05, "loss": 0.9716, "step": 7030 }, { "epoch": 0.9402246589997325, "grad_norm": 1.0105535984039307, "learning_rate": 1.6075771707468196e-05, "loss": 0.8458, "step": 7031 }, { "epoch": 0.9403583845948115, "grad_norm": 1.125118374824524, "learning_rate": 1.607462493301711e-05, "loss": 0.9395, "step": 7032 }, { "epoch": 0.9404921101898903, "grad_norm": 0.9611837863922119, "learning_rate": 1.6073478031946282e-05, "loss": 0.787, "step": 7033 }, { "epoch": 0.9406258357849693, "grad_norm": 0.9582514762878418, "learning_rate": 1.6072331004279617e-05, "loss": 0.9639, "step": 7034 }, { "epoch": 0.9407595613800481, "grad_norm": 1.057563066482544, "learning_rate": 1.6071183850041022e-05, "loss": 0.8635, "step": 7035 }, { "epoch": 0.9408932869751271, "grad_norm": 1.1197491884231567, "learning_rate": 1.6070036569254407e-05, "loss": 1.0785, "step": 7036 }, { "epoch": 0.9410270125702059, "grad_norm": 1.102464199066162, "learning_rate": 1.606888916194369e-05, "loss": 0.884, "step": 7037 }, { "epoch": 0.9411607381652848, "grad_norm": 1.087418794631958, "learning_rate": 1.6067741628132784e-05, "loss": 0.9721, "step": 7038 }, { "epoch": 0.9412944637603637, "grad_norm": 1.0805827379226685, "learning_rate": 1.6066593967845613e-05, "loss": 0.9675, "step": 7039 }, { "epoch": 0.9414281893554426, "grad_norm": 1.090860366821289, "learning_rate": 1.6065446181106093e-05, "loss": 0.9597, "step": 7040 }, { "epoch": 0.9415619149505216, "grad_norm": 0.9238436222076416, "learning_rate": 1.606429826793815e-05, "loss": 0.9327, "step": 7041 }, { "epoch": 0.9416956405456004, "grad_norm": 0.9414849877357483, "learning_rate": 1.6063150228365712e-05, "loss": 0.8568, "step": 7042 }, { "epoch": 0.9418293661406794, "grad_norm": 1.206019401550293, "learning_rate": 1.6062002062412717e-05, "loss": 1.0164, "step": 7043 }, { "epoch": 0.9419630917357582, "grad_norm": 0.9971834421157837, "learning_rate": 1.6060853770103083e-05, "loss": 0.8606, "step": 7044 }, { "epoch": 0.9420968173308372, "grad_norm": 1.0779533386230469, "learning_rate": 1.605970535146075e-05, "loss": 1.0597, "step": 7045 }, { "epoch": 0.942230542925916, "grad_norm": 1.0883381366729736, "learning_rate": 1.6058556806509663e-05, "loss": 0.9305, "step": 7046 }, { "epoch": 0.9423642685209949, "grad_norm": 1.0482873916625977, "learning_rate": 1.605740813527376e-05, "loss": 0.9097, "step": 7047 }, { "epoch": 0.9424979941160738, "grad_norm": 1.0759990215301514, "learning_rate": 1.6056259337776975e-05, "loss": 0.9647, "step": 7048 }, { "epoch": 0.9426317197111527, "grad_norm": 1.1531344652175903, "learning_rate": 1.605511041404326e-05, "loss": 0.9418, "step": 7049 }, { "epoch": 0.9427654453062316, "grad_norm": 1.044476866722107, "learning_rate": 1.605396136409656e-05, "loss": 0.866, "step": 7050 }, { "epoch": 0.9428991709013105, "grad_norm": 1.0457857847213745, "learning_rate": 1.605281218796083e-05, "loss": 0.9232, "step": 7051 }, { "epoch": 0.9430328964963894, "grad_norm": 1.0963647365570068, "learning_rate": 1.6051662885660025e-05, "loss": 0.8869, "step": 7052 }, { "epoch": 0.9431666220914683, "grad_norm": 1.1349354982376099, "learning_rate": 1.6050513457218092e-05, "loss": 0.9139, "step": 7053 }, { "epoch": 0.9433003476865472, "grad_norm": 1.0369625091552734, "learning_rate": 1.6049363902659e-05, "loss": 0.8333, "step": 7054 }, { "epoch": 0.9434340732816261, "grad_norm": 0.931880533695221, "learning_rate": 1.6048214222006703e-05, "loss": 0.8946, "step": 7055 }, { "epoch": 0.943567798876705, "grad_norm": 1.0355690717697144, "learning_rate": 1.6047064415285173e-05, "loss": 0.8142, "step": 7056 }, { "epoch": 0.9437015244717839, "grad_norm": 1.1400094032287598, "learning_rate": 1.6045914482518366e-05, "loss": 1.0132, "step": 7057 }, { "epoch": 0.9438352500668628, "grad_norm": 1.0273704528808594, "learning_rate": 1.6044764423730262e-05, "loss": 0.9492, "step": 7058 }, { "epoch": 0.9439689756619417, "grad_norm": 1.0778993368148804, "learning_rate": 1.6043614238944828e-05, "loss": 0.9291, "step": 7059 }, { "epoch": 0.9441027012570206, "grad_norm": 1.110110878944397, "learning_rate": 1.6042463928186035e-05, "loss": 1.0442, "step": 7060 }, { "epoch": 0.9442364268520995, "grad_norm": 1.065592885017395, "learning_rate": 1.6041313491477865e-05, "loss": 0.9682, "step": 7061 }, { "epoch": 0.9443701524471784, "grad_norm": 0.9232655167579651, "learning_rate": 1.6040162928844294e-05, "loss": 0.8607, "step": 7062 }, { "epoch": 0.9445038780422573, "grad_norm": 1.0336666107177734, "learning_rate": 1.6039012240309308e-05, "loss": 0.9665, "step": 7063 }, { "epoch": 0.9446376036373362, "grad_norm": 1.0749419927597046, "learning_rate": 1.603786142589689e-05, "loss": 0.9714, "step": 7064 }, { "epoch": 0.9447713292324151, "grad_norm": 1.2030086517333984, "learning_rate": 1.6036710485631032e-05, "loss": 0.9622, "step": 7065 }, { "epoch": 0.944905054827494, "grad_norm": 1.2007665634155273, "learning_rate": 1.6035559419535714e-05, "loss": 1.0341, "step": 7066 }, { "epoch": 0.9450387804225728, "grad_norm": 1.0877426862716675, "learning_rate": 1.603440822763494e-05, "loss": 1.0252, "step": 7067 }, { "epoch": 0.9451725060176518, "grad_norm": 1.021519422531128, "learning_rate": 1.60332569099527e-05, "loss": 0.8859, "step": 7068 }, { "epoch": 0.9453062316127306, "grad_norm": 1.0105737447738647, "learning_rate": 1.6032105466512993e-05, "loss": 0.9081, "step": 7069 }, { "epoch": 0.9454399572078096, "grad_norm": 1.068971037864685, "learning_rate": 1.6030953897339817e-05, "loss": 0.9759, "step": 7070 }, { "epoch": 0.9455736828028884, "grad_norm": 1.0634922981262207, "learning_rate": 1.602980220245718e-05, "loss": 0.9358, "step": 7071 }, { "epoch": 0.9457074083979674, "grad_norm": 1.0173265933990479, "learning_rate": 1.6028650381889088e-05, "loss": 0.839, "step": 7072 }, { "epoch": 0.9458411339930463, "grad_norm": 1.1603729724884033, "learning_rate": 1.6027498435659545e-05, "loss": 0.9718, "step": 7073 }, { "epoch": 0.9459748595881252, "grad_norm": 0.8913689255714417, "learning_rate": 1.6026346363792565e-05, "loss": 0.7897, "step": 7074 }, { "epoch": 0.9461085851832041, "grad_norm": 1.1767996549606323, "learning_rate": 1.6025194166312162e-05, "loss": 0.9082, "step": 7075 }, { "epoch": 0.9462423107782829, "grad_norm": 1.076306939125061, "learning_rate": 1.6024041843242353e-05, "loss": 1.0019, "step": 7076 }, { "epoch": 0.9463760363733619, "grad_norm": 0.9961170554161072, "learning_rate": 1.6022889394607156e-05, "loss": 0.9902, "step": 7077 }, { "epoch": 0.9465097619684407, "grad_norm": 0.9746331572532654, "learning_rate": 1.602173682043059e-05, "loss": 0.9623, "step": 7078 }, { "epoch": 0.9466434875635197, "grad_norm": 1.0101170539855957, "learning_rate": 1.6020584120736686e-05, "loss": 0.9123, "step": 7079 }, { "epoch": 0.9467772131585985, "grad_norm": 1.105758786201477, "learning_rate": 1.6019431295549463e-05, "loss": 1.0562, "step": 7080 }, { "epoch": 0.9469109387536775, "grad_norm": 1.127420425415039, "learning_rate": 1.601827834489296e-05, "loss": 1.1044, "step": 7081 }, { "epoch": 0.9470446643487563, "grad_norm": 1.0131900310516357, "learning_rate": 1.60171252687912e-05, "loss": 0.9414, "step": 7082 }, { "epoch": 0.9471783899438353, "grad_norm": 1.0410038232803345, "learning_rate": 1.601597206726822e-05, "loss": 0.8778, "step": 7083 }, { "epoch": 0.9473121155389141, "grad_norm": 1.093634843826294, "learning_rate": 1.6014818740348064e-05, "loss": 0.9463, "step": 7084 }, { "epoch": 0.947445841133993, "grad_norm": 1.015401005744934, "learning_rate": 1.6013665288054767e-05, "loss": 0.8959, "step": 7085 }, { "epoch": 0.947579566729072, "grad_norm": 0.9743746519088745, "learning_rate": 1.6012511710412364e-05, "loss": 0.879, "step": 7086 }, { "epoch": 0.9477132923241508, "grad_norm": 1.092085361480713, "learning_rate": 1.6011358007444914e-05, "loss": 0.906, "step": 7087 }, { "epoch": 0.9478470179192298, "grad_norm": 1.0641460418701172, "learning_rate": 1.6010204179176456e-05, "loss": 0.9441, "step": 7088 }, { "epoch": 0.9479807435143086, "grad_norm": 1.1013720035552979, "learning_rate": 1.6009050225631043e-05, "loss": 1.0118, "step": 7089 }, { "epoch": 0.9481144691093876, "grad_norm": 1.0126711130142212, "learning_rate": 1.600789614683273e-05, "loss": 0.9325, "step": 7090 }, { "epoch": 0.9482481947044664, "grad_norm": 1.091216802597046, "learning_rate": 1.600674194280557e-05, "loss": 0.9372, "step": 7091 }, { "epoch": 0.9483819202995454, "grad_norm": 1.2878303527832031, "learning_rate": 1.600558761357362e-05, "loss": 0.9828, "step": 7092 }, { "epoch": 0.9485156458946242, "grad_norm": 1.0509196519851685, "learning_rate": 1.6004433159160946e-05, "loss": 0.8716, "step": 7093 }, { "epoch": 0.9486493714897031, "grad_norm": 1.1290314197540283, "learning_rate": 1.6003278579591608e-05, "loss": 1.0432, "step": 7094 }, { "epoch": 0.948783097084782, "grad_norm": 1.0398333072662354, "learning_rate": 1.6002123874889672e-05, "loss": 0.866, "step": 7095 }, { "epoch": 0.9489168226798609, "grad_norm": 1.1429569721221924, "learning_rate": 1.600096904507921e-05, "loss": 0.978, "step": 7096 }, { "epoch": 0.9490505482749398, "grad_norm": 1.0715259313583374, "learning_rate": 1.5999814090184286e-05, "loss": 0.9275, "step": 7097 }, { "epoch": 0.9491842738700187, "grad_norm": 0.9913287162780762, "learning_rate": 1.5998659010228978e-05, "loss": 0.8453, "step": 7098 }, { "epoch": 0.9493179994650977, "grad_norm": 1.1056674718856812, "learning_rate": 1.5997503805237366e-05, "loss": 0.9428, "step": 7099 }, { "epoch": 0.9494517250601765, "grad_norm": 0.990154504776001, "learning_rate": 1.5996348475233526e-05, "loss": 0.8141, "step": 7100 }, { "epoch": 0.9495854506552555, "grad_norm": 0.9788251519203186, "learning_rate": 1.5995193020241536e-05, "loss": 0.9946, "step": 7101 }, { "epoch": 0.9497191762503343, "grad_norm": 1.018373966217041, "learning_rate": 1.5994037440285487e-05, "loss": 0.9282, "step": 7102 }, { "epoch": 0.9498529018454133, "grad_norm": 1.0913234949111938, "learning_rate": 1.5992881735389463e-05, "loss": 0.9456, "step": 7103 }, { "epoch": 0.9499866274404921, "grad_norm": 1.19563889503479, "learning_rate": 1.5991725905577557e-05, "loss": 0.9816, "step": 7104 }, { "epoch": 0.950120353035571, "grad_norm": 0.9748798608779907, "learning_rate": 1.5990569950873855e-05, "loss": 1.0286, "step": 7105 }, { "epoch": 0.9502540786306499, "grad_norm": 0.9589357376098633, "learning_rate": 1.5989413871302456e-05, "loss": 0.7902, "step": 7106 }, { "epoch": 0.9503878042257288, "grad_norm": 1.031050205230713, "learning_rate": 1.5988257666887454e-05, "loss": 0.8276, "step": 7107 }, { "epoch": 0.9505215298208077, "grad_norm": 1.0419827699661255, "learning_rate": 1.5987101337652955e-05, "loss": 0.9675, "step": 7108 }, { "epoch": 0.9506552554158866, "grad_norm": 1.0728776454925537, "learning_rate": 1.5985944883623052e-05, "loss": 0.9662, "step": 7109 }, { "epoch": 0.9507889810109655, "grad_norm": 1.013154149055481, "learning_rate": 1.598478830482186e-05, "loss": 0.9446, "step": 7110 }, { "epoch": 0.9509227066060444, "grad_norm": 1.0370818376541138, "learning_rate": 1.598363160127348e-05, "loss": 0.9762, "step": 7111 }, { "epoch": 0.9510564322011233, "grad_norm": 1.0818289518356323, "learning_rate": 1.5982474773002028e-05, "loss": 0.9293, "step": 7112 }, { "epoch": 0.9511901577962022, "grad_norm": 1.0101479291915894, "learning_rate": 1.5981317820031613e-05, "loss": 0.9021, "step": 7113 }, { "epoch": 0.951323883391281, "grad_norm": 1.0467801094055176, "learning_rate": 1.598016074238635e-05, "loss": 0.9594, "step": 7114 }, { "epoch": 0.95145760898636, "grad_norm": 1.0949842929840088, "learning_rate": 1.597900354009036e-05, "loss": 0.9634, "step": 7115 }, { "epoch": 0.9515913345814389, "grad_norm": 1.0705264806747437, "learning_rate": 1.597784621316776e-05, "loss": 0.9533, "step": 7116 }, { "epoch": 0.9517250601765178, "grad_norm": 1.1463476419448853, "learning_rate": 1.597668876164268e-05, "loss": 1.0464, "step": 7117 }, { "epoch": 0.9518587857715967, "grad_norm": 1.0410542488098145, "learning_rate": 1.5975531185539238e-05, "loss": 1.0321, "step": 7118 }, { "epoch": 0.9519925113666756, "grad_norm": 0.991000771522522, "learning_rate": 1.5974373484881568e-05, "loss": 0.9515, "step": 7119 }, { "epoch": 0.9521262369617545, "grad_norm": 0.9156233072280884, "learning_rate": 1.5973215659693802e-05, "loss": 0.9305, "step": 7120 }, { "epoch": 0.9522599625568334, "grad_norm": 1.0865553617477417, "learning_rate": 1.5972057710000067e-05, "loss": 0.928, "step": 7121 }, { "epoch": 0.9523936881519123, "grad_norm": 1.1766663789749146, "learning_rate": 1.5970899635824506e-05, "loss": 1.0142, "step": 7122 }, { "epoch": 0.9525274137469911, "grad_norm": 1.0699774026870728, "learning_rate": 1.5969741437191254e-05, "loss": 0.889, "step": 7123 }, { "epoch": 0.9526611393420701, "grad_norm": 1.1796070337295532, "learning_rate": 1.5968583114124457e-05, "loss": 0.9452, "step": 7124 }, { "epoch": 0.9527948649371489, "grad_norm": 1.1045987606048584, "learning_rate": 1.5967424666648253e-05, "loss": 0.8774, "step": 7125 }, { "epoch": 0.9529285905322279, "grad_norm": 0.9412549734115601, "learning_rate": 1.59662660947868e-05, "loss": 0.7952, "step": 7126 }, { "epoch": 0.9530623161273067, "grad_norm": 1.1132361888885498, "learning_rate": 1.5965107398564228e-05, "loss": 0.9673, "step": 7127 }, { "epoch": 0.9531960417223857, "grad_norm": 1.2136839628219604, "learning_rate": 1.5963948578004708e-05, "loss": 1.0005, "step": 7128 }, { "epoch": 0.9533297673174645, "grad_norm": 0.9749768376350403, "learning_rate": 1.5962789633132383e-05, "loss": 0.874, "step": 7129 }, { "epoch": 0.9534634929125435, "grad_norm": 1.067405104637146, "learning_rate": 1.5961630563971414e-05, "loss": 0.9864, "step": 7130 }, { "epoch": 0.9535972185076224, "grad_norm": 1.035555362701416, "learning_rate": 1.5960471370545962e-05, "loss": 0.8438, "step": 7131 }, { "epoch": 0.9537309441027012, "grad_norm": 1.10111665725708, "learning_rate": 1.595931205288019e-05, "loss": 0.9101, "step": 7132 }, { "epoch": 0.9538646696977802, "grad_norm": 1.0182299613952637, "learning_rate": 1.595815261099826e-05, "loss": 0.8642, "step": 7133 }, { "epoch": 0.953998395292859, "grad_norm": 0.957973837852478, "learning_rate": 1.5956993044924334e-05, "loss": 0.7498, "step": 7134 }, { "epoch": 0.954132120887938, "grad_norm": 0.9944035410881042, "learning_rate": 1.5955833354682593e-05, "loss": 0.888, "step": 7135 }, { "epoch": 0.9542658464830168, "grad_norm": 1.026961088180542, "learning_rate": 1.5954673540297205e-05, "loss": 0.8378, "step": 7136 }, { "epoch": 0.9543995720780958, "grad_norm": 1.0202935934066772, "learning_rate": 1.5953513601792346e-05, "loss": 0.8807, "step": 7137 }, { "epoch": 0.9545332976731746, "grad_norm": 1.1004679203033447, "learning_rate": 1.595235353919219e-05, "loss": 0.9564, "step": 7138 }, { "epoch": 0.9546670232682536, "grad_norm": 0.9983121156692505, "learning_rate": 1.5951193352520918e-05, "loss": 1.0089, "step": 7139 }, { "epoch": 0.9548007488633324, "grad_norm": 1.105841040611267, "learning_rate": 1.595003304180272e-05, "loss": 0.944, "step": 7140 }, { "epoch": 0.9549344744584113, "grad_norm": 0.9592905044555664, "learning_rate": 1.5948872607061777e-05, "loss": 0.8909, "step": 7141 }, { "epoch": 0.9550682000534902, "grad_norm": 1.1284663677215576, "learning_rate": 1.5947712048322273e-05, "loss": 1.0262, "step": 7142 }, { "epoch": 0.9552019256485691, "grad_norm": 1.0334285497665405, "learning_rate": 1.594655136560841e-05, "loss": 0.8187, "step": 7143 }, { "epoch": 0.955335651243648, "grad_norm": 1.04921555519104, "learning_rate": 1.5945390558944368e-05, "loss": 0.965, "step": 7144 }, { "epoch": 0.9554693768387269, "grad_norm": 1.1667208671569824, "learning_rate": 1.594422962835435e-05, "loss": 0.9663, "step": 7145 }, { "epoch": 0.9556031024338059, "grad_norm": 1.188725471496582, "learning_rate": 1.5943068573862554e-05, "loss": 1.0056, "step": 7146 }, { "epoch": 0.9557368280288847, "grad_norm": 1.0738738775253296, "learning_rate": 1.594190739549318e-05, "loss": 0.8296, "step": 7147 }, { "epoch": 0.9558705536239637, "grad_norm": 1.0802658796310425, "learning_rate": 1.594074609327043e-05, "loss": 0.9642, "step": 7148 }, { "epoch": 0.9560042792190425, "grad_norm": 1.1569119691848755, "learning_rate": 1.5939584667218517e-05, "loss": 0.9603, "step": 7149 }, { "epoch": 0.9561380048141215, "grad_norm": 0.9737552404403687, "learning_rate": 1.5938423117361642e-05, "loss": 0.8927, "step": 7150 }, { "epoch": 0.9562717304092003, "grad_norm": 1.00571870803833, "learning_rate": 1.593726144372402e-05, "loss": 0.9489, "step": 7151 }, { "epoch": 0.9564054560042792, "grad_norm": 1.0208015441894531, "learning_rate": 1.5936099646329865e-05, "loss": 0.8536, "step": 7152 }, { "epoch": 0.9565391815993581, "grad_norm": 1.0271661281585693, "learning_rate": 1.5934937725203396e-05, "loss": 1.0031, "step": 7153 }, { "epoch": 0.956672907194437, "grad_norm": 1.0311570167541504, "learning_rate": 1.5933775680368825e-05, "loss": 1.0142, "step": 7154 }, { "epoch": 0.9568066327895159, "grad_norm": 1.034283995628357, "learning_rate": 1.5932613511850378e-05, "loss": 0.9385, "step": 7155 }, { "epoch": 0.9569403583845948, "grad_norm": 1.1296002864837646, "learning_rate": 1.593145121967228e-05, "loss": 0.9521, "step": 7156 }, { "epoch": 0.9570740839796738, "grad_norm": 1.0069224834442139, "learning_rate": 1.593028880385876e-05, "loss": 0.8716, "step": 7157 }, { "epoch": 0.9572078095747526, "grad_norm": 1.0381200313568115, "learning_rate": 1.592912626443404e-05, "loss": 0.947, "step": 7158 }, { "epoch": 0.9573415351698316, "grad_norm": 0.9566755890846252, "learning_rate": 1.5927963601422357e-05, "loss": 0.8566, "step": 7159 }, { "epoch": 0.9574752607649104, "grad_norm": 1.0272274017333984, "learning_rate": 1.5926800814847946e-05, "loss": 0.9518, "step": 7160 }, { "epoch": 0.9576089863599893, "grad_norm": 0.9850673675537109, "learning_rate": 1.5925637904735047e-05, "loss": 0.8041, "step": 7161 }, { "epoch": 0.9577427119550682, "grad_norm": 1.0681962966918945, "learning_rate": 1.5924474871107892e-05, "loss": 0.8249, "step": 7162 }, { "epoch": 0.9578764375501471, "grad_norm": 1.2337106466293335, "learning_rate": 1.592331171399073e-05, "loss": 0.9851, "step": 7163 }, { "epoch": 0.958010163145226, "grad_norm": 1.004093885421753, "learning_rate": 1.5922148433407802e-05, "loss": 0.9539, "step": 7164 }, { "epoch": 0.9581438887403049, "grad_norm": 1.1471190452575684, "learning_rate": 1.5920985029383357e-05, "loss": 0.9826, "step": 7165 }, { "epoch": 0.9582776143353838, "grad_norm": 1.1019096374511719, "learning_rate": 1.5919821501941645e-05, "loss": 0.915, "step": 7166 }, { "epoch": 0.9584113399304627, "grad_norm": 1.000279188156128, "learning_rate": 1.5918657851106914e-05, "loss": 0.8721, "step": 7167 }, { "epoch": 0.9585450655255416, "grad_norm": 1.0223945379257202, "learning_rate": 1.591749407690343e-05, "loss": 0.8033, "step": 7168 }, { "epoch": 0.9586787911206205, "grad_norm": 1.02645742893219, "learning_rate": 1.5916330179355443e-05, "loss": 0.9105, "step": 7169 }, { "epoch": 0.9588125167156993, "grad_norm": 1.0516108274459839, "learning_rate": 1.5915166158487213e-05, "loss": 0.9061, "step": 7170 }, { "epoch": 0.9589462423107783, "grad_norm": 0.9885880947113037, "learning_rate": 1.5914002014323004e-05, "loss": 0.8048, "step": 7171 }, { "epoch": 0.9590799679058571, "grad_norm": 1.195559024810791, "learning_rate": 1.5912837746887086e-05, "loss": 0.8807, "step": 7172 }, { "epoch": 0.9592136935009361, "grad_norm": 0.9748232364654541, "learning_rate": 1.591167335620372e-05, "loss": 0.9439, "step": 7173 }, { "epoch": 0.959347419096015, "grad_norm": 1.0711393356323242, "learning_rate": 1.591050884229718e-05, "loss": 0.984, "step": 7174 }, { "epoch": 0.9594811446910939, "grad_norm": 1.0485951900482178, "learning_rate": 1.590934420519174e-05, "loss": 0.8776, "step": 7175 }, { "epoch": 0.9596148702861728, "grad_norm": 1.1060349941253662, "learning_rate": 1.5908179444911676e-05, "loss": 0.9949, "step": 7176 }, { "epoch": 0.9597485958812517, "grad_norm": 1.109278917312622, "learning_rate": 1.590701456148126e-05, "loss": 0.8684, "step": 7177 }, { "epoch": 0.9598823214763306, "grad_norm": 1.2605009078979492, "learning_rate": 1.5905849554924782e-05, "loss": 0.9653, "step": 7178 }, { "epoch": 0.9600160470714094, "grad_norm": 0.9390795230865479, "learning_rate": 1.590468442526652e-05, "loss": 0.9289, "step": 7179 }, { "epoch": 0.9601497726664884, "grad_norm": 1.0866782665252686, "learning_rate": 1.5903519172530762e-05, "loss": 0.9498, "step": 7180 }, { "epoch": 0.9602834982615672, "grad_norm": 1.0394997596740723, "learning_rate": 1.5902353796741796e-05, "loss": 0.9013, "step": 7181 }, { "epoch": 0.9604172238566462, "grad_norm": 1.0646930932998657, "learning_rate": 1.5901188297923914e-05, "loss": 0.918, "step": 7182 }, { "epoch": 0.960550949451725, "grad_norm": 1.0803992748260498, "learning_rate": 1.5900022676101404e-05, "loss": 0.9694, "step": 7183 }, { "epoch": 0.960684675046804, "grad_norm": 0.9850744009017944, "learning_rate": 1.589885693129857e-05, "loss": 0.8975, "step": 7184 }, { "epoch": 0.9608184006418828, "grad_norm": 1.1196023225784302, "learning_rate": 1.589769106353971e-05, "loss": 0.8876, "step": 7185 }, { "epoch": 0.9609521262369618, "grad_norm": 1.1130235195159912, "learning_rate": 1.589652507284912e-05, "loss": 0.8198, "step": 7186 }, { "epoch": 0.9610858518320406, "grad_norm": 1.2156789302825928, "learning_rate": 1.5895358959251107e-05, "loss": 1.0711, "step": 7187 }, { "epoch": 0.9612195774271195, "grad_norm": 1.0984864234924316, "learning_rate": 1.5894192722769984e-05, "loss": 0.9077, "step": 7188 }, { "epoch": 0.9613533030221985, "grad_norm": 1.096029281616211, "learning_rate": 1.5893026363430046e-05, "loss": 0.9039, "step": 7189 }, { "epoch": 0.9614870286172773, "grad_norm": 1.1097468137741089, "learning_rate": 1.5891859881255617e-05, "loss": 0.9623, "step": 7190 }, { "epoch": 0.9616207542123563, "grad_norm": 1.152674674987793, "learning_rate": 1.5890693276271005e-05, "loss": 1.0019, "step": 7191 }, { "epoch": 0.9617544798074351, "grad_norm": 1.0698479413986206, "learning_rate": 1.588952654850053e-05, "loss": 0.9085, "step": 7192 }, { "epoch": 0.9618882054025141, "grad_norm": 1.0701878070831299, "learning_rate": 1.588835969796851e-05, "loss": 0.9388, "step": 7193 }, { "epoch": 0.9620219309975929, "grad_norm": 1.1567819118499756, "learning_rate": 1.5887192724699263e-05, "loss": 0.9001, "step": 7194 }, { "epoch": 0.9621556565926719, "grad_norm": 1.0278853178024292, "learning_rate": 1.588602562871712e-05, "loss": 0.9151, "step": 7195 }, { "epoch": 0.9622893821877507, "grad_norm": 0.9728804230690002, "learning_rate": 1.5884858410046403e-05, "loss": 0.8306, "step": 7196 }, { "epoch": 0.9624231077828296, "grad_norm": 0.9693159461021423, "learning_rate": 1.5883691068711445e-05, "loss": 0.8942, "step": 7197 }, { "epoch": 0.9625568333779085, "grad_norm": 1.1105037927627563, "learning_rate": 1.5882523604736576e-05, "loss": 1.1167, "step": 7198 }, { "epoch": 0.9626905589729874, "grad_norm": 1.2085965871810913, "learning_rate": 1.5881356018146132e-05, "loss": 0.9998, "step": 7199 }, { "epoch": 0.9628242845680663, "grad_norm": 0.9997827410697937, "learning_rate": 1.588018830896445e-05, "loss": 0.9018, "step": 7200 }, { "epoch": 0.9629580101631452, "grad_norm": 0.8537271022796631, "learning_rate": 1.587902047721587e-05, "loss": 0.9466, "step": 7201 }, { "epoch": 0.9630917357582242, "grad_norm": 0.9956924319267273, "learning_rate": 1.5877852522924733e-05, "loss": 0.9128, "step": 7202 }, { "epoch": 0.963225461353303, "grad_norm": 1.1981338262557983, "learning_rate": 1.5876684446115383e-05, "loss": 1.0041, "step": 7203 }, { "epoch": 0.963359186948382, "grad_norm": 1.0202155113220215, "learning_rate": 1.587551624681217e-05, "loss": 0.8941, "step": 7204 }, { "epoch": 0.9634929125434608, "grad_norm": 1.1028200387954712, "learning_rate": 1.5874347925039447e-05, "loss": 0.9363, "step": 7205 }, { "epoch": 0.9636266381385398, "grad_norm": 1.1392103433609009, "learning_rate": 1.5873179480821558e-05, "loss": 0.8347, "step": 7206 }, { "epoch": 0.9637603637336186, "grad_norm": 1.1301209926605225, "learning_rate": 1.5872010914182864e-05, "loss": 0.9658, "step": 7207 }, { "epoch": 0.9638940893286975, "grad_norm": 0.950319766998291, "learning_rate": 1.5870842225147722e-05, "loss": 0.8847, "step": 7208 }, { "epoch": 0.9640278149237764, "grad_norm": 1.048004150390625, "learning_rate": 1.586967341374049e-05, "loss": 0.8838, "step": 7209 }, { "epoch": 0.9641615405188553, "grad_norm": 0.9903548955917358, "learning_rate": 1.5868504479985534e-05, "loss": 0.9453, "step": 7210 }, { "epoch": 0.9642952661139342, "grad_norm": 1.0336371660232544, "learning_rate": 1.586733542390722e-05, "loss": 0.8521, "step": 7211 }, { "epoch": 0.9644289917090131, "grad_norm": 1.1012523174285889, "learning_rate": 1.586616624552991e-05, "loss": 0.8786, "step": 7212 }, { "epoch": 0.964562717304092, "grad_norm": 1.0312836170196533, "learning_rate": 1.586499694487798e-05, "loss": 0.9128, "step": 7213 }, { "epoch": 0.9646964428991709, "grad_norm": 1.0870500802993774, "learning_rate": 1.58638275219758e-05, "loss": 0.8969, "step": 7214 }, { "epoch": 0.9648301684942499, "grad_norm": 1.109235167503357, "learning_rate": 1.5862657976847745e-05, "loss": 0.9953, "step": 7215 }, { "epoch": 0.9649638940893287, "grad_norm": 1.0422286987304688, "learning_rate": 1.5861488309518193e-05, "loss": 0.9205, "step": 7216 }, { "epoch": 0.9650976196844075, "grad_norm": 1.1091305017471313, "learning_rate": 1.586031852001153e-05, "loss": 1.0459, "step": 7217 }, { "epoch": 0.9652313452794865, "grad_norm": 0.9934311509132385, "learning_rate": 1.5859148608352134e-05, "loss": 0.8989, "step": 7218 }, { "epoch": 0.9653650708745654, "grad_norm": 0.9826712608337402, "learning_rate": 1.585797857456439e-05, "loss": 0.8234, "step": 7219 }, { "epoch": 0.9654987964696443, "grad_norm": 0.9722713828086853, "learning_rate": 1.5856808418672688e-05, "loss": 0.8972, "step": 7220 }, { "epoch": 0.9656325220647232, "grad_norm": 1.0086407661437988, "learning_rate": 1.585563814070142e-05, "loss": 0.8801, "step": 7221 }, { "epoch": 0.9657662476598021, "grad_norm": 1.0919053554534912, "learning_rate": 1.5854467740674983e-05, "loss": 0.8855, "step": 7222 }, { "epoch": 0.965899973254881, "grad_norm": 1.0863865613937378, "learning_rate": 1.585329721861776e-05, "loss": 0.8437, "step": 7223 }, { "epoch": 0.9660336988499599, "grad_norm": 1.0866085290908813, "learning_rate": 1.5852126574554162e-05, "loss": 0.8242, "step": 7224 }, { "epoch": 0.9661674244450388, "grad_norm": 0.9943642616271973, "learning_rate": 1.5850955808508582e-05, "loss": 0.8387, "step": 7225 }, { "epoch": 0.9663011500401176, "grad_norm": 0.9850141406059265, "learning_rate": 1.5849784920505434e-05, "loss": 0.9243, "step": 7226 }, { "epoch": 0.9664348756351966, "grad_norm": 1.0083836317062378, "learning_rate": 1.584861391056911e-05, "loss": 0.9777, "step": 7227 }, { "epoch": 0.9665686012302754, "grad_norm": 1.0428203344345093, "learning_rate": 1.5847442778724028e-05, "loss": 0.956, "step": 7228 }, { "epoch": 0.9667023268253544, "grad_norm": 1.1508619785308838, "learning_rate": 1.5846271524994597e-05, "loss": 0.9521, "step": 7229 }, { "epoch": 0.9668360524204332, "grad_norm": 1.158668041229248, "learning_rate": 1.584510014940523e-05, "loss": 1.0887, "step": 7230 }, { "epoch": 0.9669697780155122, "grad_norm": 1.0060038566589355, "learning_rate": 1.5843928651980344e-05, "loss": 0.9524, "step": 7231 }, { "epoch": 0.967103503610591, "grad_norm": 1.1059223413467407, "learning_rate": 1.5842757032744355e-05, "loss": 1.0009, "step": 7232 }, { "epoch": 0.96723722920567, "grad_norm": 1.037505865097046, "learning_rate": 1.5841585291721688e-05, "loss": 0.9598, "step": 7233 }, { "epoch": 0.9673709548007489, "grad_norm": 1.0458277463912964, "learning_rate": 1.5840413428936767e-05, "loss": 1.0886, "step": 7234 }, { "epoch": 0.9675046803958277, "grad_norm": 1.0625215768814087, "learning_rate": 1.5839241444414018e-05, "loss": 1.03, "step": 7235 }, { "epoch": 0.9676384059909067, "grad_norm": 0.8698956966400146, "learning_rate": 1.5838069338177865e-05, "loss": 0.8138, "step": 7236 }, { "epoch": 0.9677721315859855, "grad_norm": 1.1134084463119507, "learning_rate": 1.5836897110252745e-05, "loss": 1.0748, "step": 7237 }, { "epoch": 0.9679058571810645, "grad_norm": 1.0385980606079102, "learning_rate": 1.583572476066309e-05, "loss": 0.9196, "step": 7238 }, { "epoch": 0.9680395827761433, "grad_norm": 1.126681923866272, "learning_rate": 1.5834552289433334e-05, "loss": 0.9359, "step": 7239 }, { "epoch": 0.9681733083712223, "grad_norm": 1.0145320892333984, "learning_rate": 1.583337969658792e-05, "loss": 0.9498, "step": 7240 }, { "epoch": 0.9683070339663011, "grad_norm": 1.014756679534912, "learning_rate": 1.5832206982151288e-05, "loss": 0.9218, "step": 7241 }, { "epoch": 0.9684407595613801, "grad_norm": 1.1135059595108032, "learning_rate": 1.5831034146147882e-05, "loss": 0.9866, "step": 7242 }, { "epoch": 0.9685744851564589, "grad_norm": 1.0222879648208618, "learning_rate": 1.582986118860215e-05, "loss": 0.8613, "step": 7243 }, { "epoch": 0.9687082107515378, "grad_norm": 1.0654881000518799, "learning_rate": 1.582868810953854e-05, "loss": 0.9161, "step": 7244 }, { "epoch": 0.9688419363466167, "grad_norm": 1.0502616167068481, "learning_rate": 1.5827514908981504e-05, "loss": 0.9286, "step": 7245 }, { "epoch": 0.9689756619416956, "grad_norm": 1.0014979839324951, "learning_rate": 1.58263415869555e-05, "loss": 0.9352, "step": 7246 }, { "epoch": 0.9691093875367746, "grad_norm": 1.0085316896438599, "learning_rate": 1.5825168143484974e-05, "loss": 1.0052, "step": 7247 }, { "epoch": 0.9692431131318534, "grad_norm": 1.0513637065887451, "learning_rate": 1.5823994578594396e-05, "loss": 0.9846, "step": 7248 }, { "epoch": 0.9693768387269324, "grad_norm": 1.007477879524231, "learning_rate": 1.5822820892308222e-05, "loss": 0.9268, "step": 7249 }, { "epoch": 0.9695105643220112, "grad_norm": 1.0830000638961792, "learning_rate": 1.5821647084650917e-05, "loss": 0.9143, "step": 7250 }, { "epoch": 0.9696442899170902, "grad_norm": 1.082414150238037, "learning_rate": 1.582047315564695e-05, "loss": 0.9255, "step": 7251 }, { "epoch": 0.969778015512169, "grad_norm": 1.0586217641830444, "learning_rate": 1.5819299105320795e-05, "loss": 0.9145, "step": 7252 }, { "epoch": 0.969911741107248, "grad_norm": 1.059692621231079, "learning_rate": 1.5818124933696912e-05, "loss": 0.9215, "step": 7253 }, { "epoch": 0.9700454667023268, "grad_norm": 0.9990198016166687, "learning_rate": 1.5816950640799785e-05, "loss": 0.9542, "step": 7254 }, { "epoch": 0.9701791922974057, "grad_norm": 1.0688501596450806, "learning_rate": 1.581577622665389e-05, "loss": 0.9605, "step": 7255 }, { "epoch": 0.9703129178924846, "grad_norm": 1.0125993490219116, "learning_rate": 1.58146016912837e-05, "loss": 0.9259, "step": 7256 }, { "epoch": 0.9704466434875635, "grad_norm": 1.0894843339920044, "learning_rate": 1.5813427034713705e-05, "loss": 0.8929, "step": 7257 }, { "epoch": 0.9705803690826424, "grad_norm": 1.1687116622924805, "learning_rate": 1.5812252256968386e-05, "loss": 0.9441, "step": 7258 }, { "epoch": 0.9707140946777213, "grad_norm": 0.9743062853813171, "learning_rate": 1.581107735807223e-05, "loss": 0.9526, "step": 7259 }, { "epoch": 0.9708478202728003, "grad_norm": 1.0323352813720703, "learning_rate": 1.5809902338049722e-05, "loss": 0.9257, "step": 7260 }, { "epoch": 0.9709815458678791, "grad_norm": 1.093103051185608, "learning_rate": 1.5808727196925366e-05, "loss": 0.9672, "step": 7261 }, { "epoch": 0.9711152714629581, "grad_norm": 1.0001981258392334, "learning_rate": 1.580755193472365e-05, "loss": 0.9416, "step": 7262 }, { "epoch": 0.9712489970580369, "grad_norm": 0.978725790977478, "learning_rate": 1.580637655146907e-05, "loss": 0.8561, "step": 7263 }, { "epoch": 0.9713827226531158, "grad_norm": 0.9973233938217163, "learning_rate": 1.5805201047186124e-05, "loss": 0.8071, "step": 7264 }, { "epoch": 0.9715164482481947, "grad_norm": 1.031728982925415, "learning_rate": 1.580402542189932e-05, "loss": 0.9183, "step": 7265 }, { "epoch": 0.9716501738432736, "grad_norm": 0.9891021847724915, "learning_rate": 1.580284967563316e-05, "loss": 0.9383, "step": 7266 }, { "epoch": 0.9717838994383525, "grad_norm": 1.0382641553878784, "learning_rate": 1.580167380841215e-05, "loss": 0.8823, "step": 7267 }, { "epoch": 0.9719176250334314, "grad_norm": 1.1642438173294067, "learning_rate": 1.58004978202608e-05, "loss": 0.8949, "step": 7268 }, { "epoch": 0.9720513506285103, "grad_norm": 1.1263982057571411, "learning_rate": 1.5799321711203622e-05, "loss": 1.1033, "step": 7269 }, { "epoch": 0.9721850762235892, "grad_norm": 0.9878901243209839, "learning_rate": 1.579814548126514e-05, "loss": 0.8936, "step": 7270 }, { "epoch": 0.9723188018186681, "grad_norm": 0.9581366181373596, "learning_rate": 1.5796969130469857e-05, "loss": 0.8792, "step": 7271 }, { "epoch": 0.972452527413747, "grad_norm": 0.9988245368003845, "learning_rate": 1.57957926588423e-05, "loss": 1.0183, "step": 7272 }, { "epoch": 0.9725862530088258, "grad_norm": 1.1319457292556763, "learning_rate": 1.5794616066406993e-05, "loss": 0.9338, "step": 7273 }, { "epoch": 0.9727199786039048, "grad_norm": 1.068511724472046, "learning_rate": 1.579343935318846e-05, "loss": 0.9566, "step": 7274 }, { "epoch": 0.9728537041989836, "grad_norm": 1.039093017578125, "learning_rate": 1.5792262519211224e-05, "loss": 0.9718, "step": 7275 }, { "epoch": 0.9729874297940626, "grad_norm": 1.0397595167160034, "learning_rate": 1.579108556449982e-05, "loss": 0.9138, "step": 7276 }, { "epoch": 0.9731211553891415, "grad_norm": 1.037088394165039, "learning_rate": 1.578990848907878e-05, "loss": 0.9724, "step": 7277 }, { "epoch": 0.9732548809842204, "grad_norm": 0.9322558045387268, "learning_rate": 1.578873129297264e-05, "loss": 0.8115, "step": 7278 }, { "epoch": 0.9733886065792993, "grad_norm": 1.0813533067703247, "learning_rate": 1.5787553976205928e-05, "loss": 0.9492, "step": 7279 }, { "epoch": 0.9735223321743782, "grad_norm": 0.8945992588996887, "learning_rate": 1.5786376538803197e-05, "loss": 0.7536, "step": 7280 }, { "epoch": 0.9736560577694571, "grad_norm": 1.068833589553833, "learning_rate": 1.578519898078898e-05, "loss": 0.9726, "step": 7281 }, { "epoch": 0.9737897833645359, "grad_norm": 1.0371111631393433, "learning_rate": 1.578402130218783e-05, "loss": 0.971, "step": 7282 }, { "epoch": 0.9739235089596149, "grad_norm": 1.0172487497329712, "learning_rate": 1.578284350302429e-05, "loss": 0.9343, "step": 7283 }, { "epoch": 0.9740572345546937, "grad_norm": 0.942463219165802, "learning_rate": 1.5781665583322913e-05, "loss": 0.8904, "step": 7284 }, { "epoch": 0.9741909601497727, "grad_norm": 0.9291203022003174, "learning_rate": 1.5780487543108246e-05, "loss": 0.8689, "step": 7285 }, { "epoch": 0.9743246857448515, "grad_norm": 1.0542680025100708, "learning_rate": 1.577930938240485e-05, "loss": 0.8416, "step": 7286 }, { "epoch": 0.9744584113399305, "grad_norm": 1.0547653436660767, "learning_rate": 1.5778131101237275e-05, "loss": 0.8744, "step": 7287 }, { "epoch": 0.9745921369350093, "grad_norm": 1.0307928323745728, "learning_rate": 1.577695269963009e-05, "loss": 0.9741, "step": 7288 }, { "epoch": 0.9747258625300883, "grad_norm": 0.9972244501113892, "learning_rate": 1.577577417760785e-05, "loss": 0.9564, "step": 7289 }, { "epoch": 0.9748595881251672, "grad_norm": 1.143341064453125, "learning_rate": 1.577459553519513e-05, "loss": 0.9124, "step": 7290 }, { "epoch": 0.974993313720246, "grad_norm": 1.1272506713867188, "learning_rate": 1.577341677241649e-05, "loss": 0.873, "step": 7291 }, { "epoch": 0.975127039315325, "grad_norm": 0.9385217428207397, "learning_rate": 1.57722378892965e-05, "loss": 0.8888, "step": 7292 }, { "epoch": 0.9752607649104038, "grad_norm": 1.1405953168869019, "learning_rate": 1.5771058885859735e-05, "loss": 0.8609, "step": 7293 }, { "epoch": 0.9753944905054828, "grad_norm": 0.9752020835876465, "learning_rate": 1.5769879762130775e-05, "loss": 0.8865, "step": 7294 }, { "epoch": 0.9755282161005616, "grad_norm": 1.0191373825073242, "learning_rate": 1.5768700518134184e-05, "loss": 0.868, "step": 7295 }, { "epoch": 0.9756619416956406, "grad_norm": 1.1318840980529785, "learning_rate": 1.5767521153894555e-05, "loss": 0.9369, "step": 7296 }, { "epoch": 0.9757956672907194, "grad_norm": 1.0205382108688354, "learning_rate": 1.5766341669436468e-05, "loss": 0.9231, "step": 7297 }, { "epoch": 0.9759293928857984, "grad_norm": 1.1478955745697021, "learning_rate": 1.5765162064784504e-05, "loss": 0.972, "step": 7298 }, { "epoch": 0.9760631184808772, "grad_norm": 1.1083965301513672, "learning_rate": 1.5763982339963254e-05, "loss": 0.957, "step": 7299 }, { "epoch": 0.9761968440759561, "grad_norm": 1.1210089921951294, "learning_rate": 1.576280249499731e-05, "loss": 0.8837, "step": 7300 }, { "epoch": 0.976330569671035, "grad_norm": 1.0822051763534546, "learning_rate": 1.576162252991126e-05, "loss": 0.984, "step": 7301 }, { "epoch": 0.9764642952661139, "grad_norm": 1.034590244293213, "learning_rate": 1.5760442444729703e-05, "loss": 0.8801, "step": 7302 }, { "epoch": 0.9765980208611929, "grad_norm": 1.1002167463302612, "learning_rate": 1.5759262239477237e-05, "loss": 1.0588, "step": 7303 }, { "epoch": 0.9767317464562717, "grad_norm": 0.9665080308914185, "learning_rate": 1.5758081914178457e-05, "loss": 0.8592, "step": 7304 }, { "epoch": 0.9768654720513507, "grad_norm": 1.0083622932434082, "learning_rate": 1.575690146885797e-05, "loss": 0.9497, "step": 7305 }, { "epoch": 0.9769991976464295, "grad_norm": 1.1475253105163574, "learning_rate": 1.575572090354038e-05, "loss": 0.9841, "step": 7306 }, { "epoch": 0.9771329232415085, "grad_norm": 1.1957204341888428, "learning_rate": 1.5754540218250296e-05, "loss": 1.0083, "step": 7307 }, { "epoch": 0.9772666488365873, "grad_norm": 1.1623096466064453, "learning_rate": 1.5753359413012332e-05, "loss": 0.9365, "step": 7308 }, { "epoch": 0.9774003744316663, "grad_norm": 0.9830026030540466, "learning_rate": 1.5752178487851087e-05, "loss": 0.7962, "step": 7309 }, { "epoch": 0.9775341000267451, "grad_norm": 0.9934231042861938, "learning_rate": 1.575099744279119e-05, "loss": 0.8736, "step": 7310 }, { "epoch": 0.977667825621824, "grad_norm": 0.9317435026168823, "learning_rate": 1.574981627785726e-05, "loss": 0.9831, "step": 7311 }, { "epoch": 0.9778015512169029, "grad_norm": 1.062959909439087, "learning_rate": 1.5748634993073906e-05, "loss": 0.9071, "step": 7312 }, { "epoch": 0.9779352768119818, "grad_norm": 1.0343540906906128, "learning_rate": 1.5747453588465758e-05, "loss": 0.8875, "step": 7313 }, { "epoch": 0.9780690024070607, "grad_norm": 1.0103228092193604, "learning_rate": 1.5746272064057438e-05, "loss": 0.8804, "step": 7314 }, { "epoch": 0.9782027280021396, "grad_norm": 1.0440034866333008, "learning_rate": 1.574509041987358e-05, "loss": 1.019, "step": 7315 }, { "epoch": 0.9783364535972185, "grad_norm": 1.0099412202835083, "learning_rate": 1.5743908655938803e-05, "loss": 0.9038, "step": 7316 }, { "epoch": 0.9784701791922974, "grad_norm": 1.095073938369751, "learning_rate": 1.574272677227775e-05, "loss": 0.9544, "step": 7317 }, { "epoch": 0.9786039047873764, "grad_norm": 0.9910484552383423, "learning_rate": 1.5741544768915055e-05, "loss": 0.8614, "step": 7318 }, { "epoch": 0.9787376303824552, "grad_norm": 1.0039806365966797, "learning_rate": 1.574036264587535e-05, "loss": 0.908, "step": 7319 }, { "epoch": 0.978871355977534, "grad_norm": 1.0282633304595947, "learning_rate": 1.573918040318328e-05, "loss": 1.0054, "step": 7320 }, { "epoch": 0.979005081572613, "grad_norm": 1.112988829612732, "learning_rate": 1.5737998040863484e-05, "loss": 0.9732, "step": 7321 }, { "epoch": 0.9791388071676919, "grad_norm": 0.9976562857627869, "learning_rate": 1.5736815558940612e-05, "loss": 0.9111, "step": 7322 }, { "epoch": 0.9792725327627708, "grad_norm": 1.100832462310791, "learning_rate": 1.573563295743931e-05, "loss": 1.033, "step": 7323 }, { "epoch": 0.9794062583578497, "grad_norm": 0.9580290913581848, "learning_rate": 1.5734450236384225e-05, "loss": 0.8979, "step": 7324 }, { "epoch": 0.9795399839529286, "grad_norm": 1.113832950592041, "learning_rate": 1.5733267395800014e-05, "loss": 0.9109, "step": 7325 }, { "epoch": 0.9796737095480075, "grad_norm": 1.127493977546692, "learning_rate": 1.5732084435711326e-05, "loss": 0.9898, "step": 7326 }, { "epoch": 0.9798074351430864, "grad_norm": 0.9688475728034973, "learning_rate": 1.573090135614283e-05, "loss": 0.8576, "step": 7327 }, { "epoch": 0.9799411607381653, "grad_norm": 1.1061797142028809, "learning_rate": 1.5729718157119176e-05, "loss": 0.9895, "step": 7328 }, { "epoch": 0.9800748863332441, "grad_norm": 1.051244854927063, "learning_rate": 1.5728534838665027e-05, "loss": 0.9773, "step": 7329 }, { "epoch": 0.9802086119283231, "grad_norm": 1.1054096221923828, "learning_rate": 1.5727351400805054e-05, "loss": 0.9508, "step": 7330 }, { "epoch": 0.9803423375234019, "grad_norm": 1.0104281902313232, "learning_rate": 1.572616784356392e-05, "loss": 0.8988, "step": 7331 }, { "epoch": 0.9804760631184809, "grad_norm": 1.1442021131515503, "learning_rate": 1.5724984166966297e-05, "loss": 1.0162, "step": 7332 }, { "epoch": 0.9806097887135597, "grad_norm": 1.1502915620803833, "learning_rate": 1.572380037103686e-05, "loss": 0.9163, "step": 7333 }, { "epoch": 0.9807435143086387, "grad_norm": 0.9938676357269287, "learning_rate": 1.572261645580028e-05, "loss": 0.9383, "step": 7334 }, { "epoch": 0.9808772399037176, "grad_norm": 1.1184587478637695, "learning_rate": 1.572143242128123e-05, "loss": 0.9633, "step": 7335 }, { "epoch": 0.9810109654987965, "grad_norm": 0.9877696633338928, "learning_rate": 1.57202482675044e-05, "loss": 0.8644, "step": 7336 }, { "epoch": 0.9811446910938754, "grad_norm": 1.213194727897644, "learning_rate": 1.5719063994494474e-05, "loss": 0.8408, "step": 7337 }, { "epoch": 0.9812784166889542, "grad_norm": 0.9884855151176453, "learning_rate": 1.5717879602276123e-05, "loss": 0.9135, "step": 7338 }, { "epoch": 0.9814121422840332, "grad_norm": 1.1477363109588623, "learning_rate": 1.571669509087405e-05, "loss": 0.9605, "step": 7339 }, { "epoch": 0.981545867879112, "grad_norm": 1.0498074293136597, "learning_rate": 1.5715510460312936e-05, "loss": 0.9601, "step": 7340 }, { "epoch": 0.981679593474191, "grad_norm": 1.1118855476379395, "learning_rate": 1.571432571061747e-05, "loss": 1.0461, "step": 7341 }, { "epoch": 0.9818133190692698, "grad_norm": 1.0796371698379517, "learning_rate": 1.571314084181236e-05, "loss": 0.9027, "step": 7342 }, { "epoch": 0.9819470446643488, "grad_norm": 1.021366834640503, "learning_rate": 1.5711955853922295e-05, "loss": 0.882, "step": 7343 }, { "epoch": 0.9820807702594276, "grad_norm": 1.1015697717666626, "learning_rate": 1.5710770746971973e-05, "loss": 1.147, "step": 7344 }, { "epoch": 0.9822144958545066, "grad_norm": 1.0094363689422607, "learning_rate": 1.5709585520986098e-05, "loss": 0.896, "step": 7345 }, { "epoch": 0.9823482214495854, "grad_norm": 1.0328314304351807, "learning_rate": 1.570840017598938e-05, "loss": 0.8975, "step": 7346 }, { "epoch": 0.9824819470446643, "grad_norm": 1.023192286491394, "learning_rate": 1.5707214712006523e-05, "loss": 0.9552, "step": 7347 }, { "epoch": 0.9826156726397433, "grad_norm": 1.0753324031829834, "learning_rate": 1.5706029129062235e-05, "loss": 0.9155, "step": 7348 }, { "epoch": 0.9827493982348221, "grad_norm": 0.9969714283943176, "learning_rate": 1.570484342718123e-05, "loss": 0.9115, "step": 7349 }, { "epoch": 0.9828831238299011, "grad_norm": 0.9892032146453857, "learning_rate": 1.570365760638822e-05, "loss": 0.8759, "step": 7350 }, { "epoch": 0.9830168494249799, "grad_norm": 0.9327731728553772, "learning_rate": 1.5702471666707932e-05, "loss": 0.8997, "step": 7351 }, { "epoch": 0.9831505750200589, "grad_norm": 1.0576577186584473, "learning_rate": 1.5701285608165073e-05, "loss": 1.0087, "step": 7352 }, { "epoch": 0.9832843006151377, "grad_norm": 0.9899141788482666, "learning_rate": 1.570009943078437e-05, "loss": 0.9294, "step": 7353 }, { "epoch": 0.9834180262102167, "grad_norm": 1.059346079826355, "learning_rate": 1.5698913134590552e-05, "loss": 0.9147, "step": 7354 }, { "epoch": 0.9835517518052955, "grad_norm": 1.059155821800232, "learning_rate": 1.5697726719608345e-05, "loss": 1.0502, "step": 7355 }, { "epoch": 0.9836854774003745, "grad_norm": 0.9704837203025818, "learning_rate": 1.5696540185862472e-05, "loss": 0.8843, "step": 7356 }, { "epoch": 0.9838192029954533, "grad_norm": 0.9326636791229248, "learning_rate": 1.5695353533377674e-05, "loss": 0.8452, "step": 7357 }, { "epoch": 0.9839529285905322, "grad_norm": 1.3575596809387207, "learning_rate": 1.5694166762178677e-05, "loss": 1.0068, "step": 7358 }, { "epoch": 0.9840866541856111, "grad_norm": 0.9865586757659912, "learning_rate": 1.569297987229023e-05, "loss": 0.909, "step": 7359 }, { "epoch": 0.98422037978069, "grad_norm": 1.0550199747085571, "learning_rate": 1.5691792863737053e-05, "loss": 0.8825, "step": 7360 }, { "epoch": 0.984354105375769, "grad_norm": 1.013679027557373, "learning_rate": 1.569060573654391e-05, "loss": 0.9704, "step": 7361 }, { "epoch": 0.9844878309708478, "grad_norm": 0.8777495622634888, "learning_rate": 1.5689418490735533e-05, "loss": 0.8687, "step": 7362 }, { "epoch": 0.9846215565659268, "grad_norm": 1.0988759994506836, "learning_rate": 1.568823112633667e-05, "loss": 0.9328, "step": 7363 }, { "epoch": 0.9847552821610056, "grad_norm": 1.0776951313018799, "learning_rate": 1.5687043643372076e-05, "loss": 0.9867, "step": 7364 }, { "epoch": 0.9848890077560846, "grad_norm": 1.1265208721160889, "learning_rate": 1.5685856041866495e-05, "loss": 1.013, "step": 7365 }, { "epoch": 0.9850227333511634, "grad_norm": 1.0599254369735718, "learning_rate": 1.5684668321844688e-05, "loss": 0.8655, "step": 7366 }, { "epoch": 0.9851564589462423, "grad_norm": 0.9927284121513367, "learning_rate": 1.568348048333141e-05, "loss": 0.8722, "step": 7367 }, { "epoch": 0.9852901845413212, "grad_norm": 1.0686157941818237, "learning_rate": 1.568229252635142e-05, "loss": 0.8717, "step": 7368 }, { "epoch": 0.9854239101364001, "grad_norm": 1.0806455612182617, "learning_rate": 1.5681104450929478e-05, "loss": 0.9112, "step": 7369 }, { "epoch": 0.985557635731479, "grad_norm": 1.0151512622833252, "learning_rate": 1.5679916257090352e-05, "loss": 1.0006, "step": 7370 }, { "epoch": 0.9856913613265579, "grad_norm": 1.1096863746643066, "learning_rate": 1.5678727944858805e-05, "loss": 0.9463, "step": 7371 }, { "epoch": 0.9858250869216368, "grad_norm": 0.9373346567153931, "learning_rate": 1.5677539514259608e-05, "loss": 1.045, "step": 7372 }, { "epoch": 0.9859588125167157, "grad_norm": 1.094415307044983, "learning_rate": 1.5676350965317532e-05, "loss": 0.9842, "step": 7373 }, { "epoch": 0.9860925381117946, "grad_norm": 1.0701595544815063, "learning_rate": 1.5675162298057353e-05, "loss": 1.0295, "step": 7374 }, { "epoch": 0.9862262637068735, "grad_norm": 1.0825715065002441, "learning_rate": 1.5673973512503846e-05, "loss": 0.9807, "step": 7375 }, { "epoch": 0.9863599893019523, "grad_norm": 1.0876411199569702, "learning_rate": 1.567278460868179e-05, "loss": 0.9936, "step": 7376 }, { "epoch": 0.9864937148970313, "grad_norm": 0.9611084461212158, "learning_rate": 1.5671595586615968e-05, "loss": 0.9066, "step": 7377 }, { "epoch": 0.9866274404921102, "grad_norm": 0.972186803817749, "learning_rate": 1.5670406446331162e-05, "loss": 0.9281, "step": 7378 }, { "epoch": 0.9867611660871891, "grad_norm": 0.9542217254638672, "learning_rate": 1.566921718785216e-05, "loss": 0.9863, "step": 7379 }, { "epoch": 0.986894891682268, "grad_norm": 0.9953468441963196, "learning_rate": 1.5668027811203752e-05, "loss": 0.8012, "step": 7380 }, { "epoch": 0.9870286172773469, "grad_norm": 1.2139370441436768, "learning_rate": 1.5666838316410727e-05, "loss": 0.8389, "step": 7381 }, { "epoch": 0.9871623428724258, "grad_norm": 0.953430712223053, "learning_rate": 1.566564870349788e-05, "loss": 0.8267, "step": 7382 }, { "epoch": 0.9872960684675047, "grad_norm": 1.0456918478012085, "learning_rate": 1.566445897249001e-05, "loss": 0.841, "step": 7383 }, { "epoch": 0.9874297940625836, "grad_norm": 1.1001205444335938, "learning_rate": 1.566326912341191e-05, "loss": 0.9885, "step": 7384 }, { "epoch": 0.9875635196576624, "grad_norm": 0.989668071269989, "learning_rate": 1.566207915628838e-05, "loss": 0.8151, "step": 7385 }, { "epoch": 0.9876972452527414, "grad_norm": 1.3655250072479248, "learning_rate": 1.5660889071144233e-05, "loss": 0.9882, "step": 7386 }, { "epoch": 0.9878309708478202, "grad_norm": 1.140180230140686, "learning_rate": 1.5659698868004273e-05, "loss": 0.912, "step": 7387 }, { "epoch": 0.9879646964428992, "grad_norm": 1.0537627935409546, "learning_rate": 1.56585085468933e-05, "loss": 0.9008, "step": 7388 }, { "epoch": 0.988098422037978, "grad_norm": 1.3320255279541016, "learning_rate": 1.5657318107836133e-05, "loss": 0.977, "step": 7389 }, { "epoch": 0.988232147633057, "grad_norm": 1.1631561517715454, "learning_rate": 1.5656127550857582e-05, "loss": 1.0036, "step": 7390 }, { "epoch": 0.9883658732281358, "grad_norm": 0.962174117565155, "learning_rate": 1.565493687598247e-05, "loss": 0.9014, "step": 7391 }, { "epoch": 0.9884995988232148, "grad_norm": 1.0408951044082642, "learning_rate": 1.5653746083235605e-05, "loss": 0.9889, "step": 7392 }, { "epoch": 0.9886333244182937, "grad_norm": 1.0736782550811768, "learning_rate": 1.5652555172641815e-05, "loss": 0.8964, "step": 7393 }, { "epoch": 0.9887670500133725, "grad_norm": 0.9619301557540894, "learning_rate": 1.565136414422592e-05, "loss": 0.8366, "step": 7394 }, { "epoch": 0.9889007756084515, "grad_norm": 0.990788996219635, "learning_rate": 1.5650172998012746e-05, "loss": 0.8359, "step": 7395 }, { "epoch": 0.9890345012035303, "grad_norm": 1.0370807647705078, "learning_rate": 1.5648981734027128e-05, "loss": 0.8799, "step": 7396 }, { "epoch": 0.9891682267986093, "grad_norm": 0.9267067313194275, "learning_rate": 1.5647790352293887e-05, "loss": 0.943, "step": 7397 }, { "epoch": 0.9893019523936881, "grad_norm": 0.9224997758865356, "learning_rate": 1.5646598852837862e-05, "loss": 0.9372, "step": 7398 }, { "epoch": 0.9894356779887671, "grad_norm": 1.0839706659317017, "learning_rate": 1.5645407235683885e-05, "loss": 0.9052, "step": 7399 }, { "epoch": 0.9895694035838459, "grad_norm": 1.0069739818572998, "learning_rate": 1.5644215500856795e-05, "loss": 0.8346, "step": 7400 }, { "epoch": 0.9897031291789249, "grad_norm": 0.9451998472213745, "learning_rate": 1.564302364838144e-05, "loss": 0.8905, "step": 7401 }, { "epoch": 0.9898368547740037, "grad_norm": 0.9637553095817566, "learning_rate": 1.564183167828265e-05, "loss": 0.7804, "step": 7402 }, { "epoch": 0.9899705803690827, "grad_norm": 1.0743519067764282, "learning_rate": 1.5640639590585283e-05, "loss": 0.8407, "step": 7403 }, { "epoch": 0.9901043059641615, "grad_norm": 1.0005062818527222, "learning_rate": 1.5639447385314176e-05, "loss": 0.9274, "step": 7404 }, { "epoch": 0.9902380315592404, "grad_norm": 1.0159330368041992, "learning_rate": 1.563825506249419e-05, "loss": 0.889, "step": 7405 }, { "epoch": 0.9903717571543194, "grad_norm": 1.1005687713623047, "learning_rate": 1.5637062622150168e-05, "loss": 0.9576, "step": 7406 }, { "epoch": 0.9905054827493982, "grad_norm": 1.0260437726974487, "learning_rate": 1.563587006430697e-05, "loss": 1.0048, "step": 7407 }, { "epoch": 0.9906392083444772, "grad_norm": 1.042299509048462, "learning_rate": 1.5634677388989457e-05, "loss": 0.9219, "step": 7408 }, { "epoch": 0.990772933939556, "grad_norm": 1.1027491092681885, "learning_rate": 1.5633484596222485e-05, "loss": 0.8893, "step": 7409 }, { "epoch": 0.990906659534635, "grad_norm": 1.0449978113174438, "learning_rate": 1.5632291686030915e-05, "loss": 0.9282, "step": 7410 }, { "epoch": 0.9910403851297138, "grad_norm": 1.004031777381897, "learning_rate": 1.5631098658439613e-05, "loss": 0.834, "step": 7411 }, { "epoch": 0.9911741107247928, "grad_norm": 0.9347333908081055, "learning_rate": 1.562990551347345e-05, "loss": 0.9026, "step": 7412 }, { "epoch": 0.9913078363198716, "grad_norm": 1.027057409286499, "learning_rate": 1.5628712251157298e-05, "loss": 1.1266, "step": 7413 }, { "epoch": 0.9914415619149505, "grad_norm": 1.117738127708435, "learning_rate": 1.562751887151602e-05, "loss": 0.9656, "step": 7414 }, { "epoch": 0.9915752875100294, "grad_norm": 1.0061217546463013, "learning_rate": 1.5626325374574495e-05, "loss": 0.8059, "step": 7415 }, { "epoch": 0.9917090131051083, "grad_norm": 1.0674973726272583, "learning_rate": 1.5625131760357603e-05, "loss": 0.9177, "step": 7416 }, { "epoch": 0.9918427387001872, "grad_norm": 0.9088889360427856, "learning_rate": 1.5623938028890222e-05, "loss": 0.9132, "step": 7417 }, { "epoch": 0.9919764642952661, "grad_norm": 1.1138849258422852, "learning_rate": 1.5622744180197236e-05, "loss": 0.8085, "step": 7418 }, { "epoch": 0.992110189890345, "grad_norm": 0.9512990713119507, "learning_rate": 1.5621550214303526e-05, "loss": 0.8653, "step": 7419 }, { "epoch": 0.9922439154854239, "grad_norm": 1.0697312355041504, "learning_rate": 1.5620356131233982e-05, "loss": 0.9056, "step": 7420 }, { "epoch": 0.9923776410805029, "grad_norm": 1.0702040195465088, "learning_rate": 1.5619161931013494e-05, "loss": 0.954, "step": 7421 }, { "epoch": 0.9925113666755817, "grad_norm": 0.9819034337997437, "learning_rate": 1.561796761366695e-05, "loss": 0.8585, "step": 7422 }, { "epoch": 0.9926450922706606, "grad_norm": 0.9655911326408386, "learning_rate": 1.5616773179219248e-05, "loss": 1.0147, "step": 7423 }, { "epoch": 0.9927788178657395, "grad_norm": 1.0780658721923828, "learning_rate": 1.5615578627695283e-05, "loss": 0.8669, "step": 7424 }, { "epoch": 0.9929125434608184, "grad_norm": 0.9833524227142334, "learning_rate": 1.5614383959119958e-05, "loss": 0.8619, "step": 7425 }, { "epoch": 0.9930462690558973, "grad_norm": 0.9126155376434326, "learning_rate": 1.5613189173518167e-05, "loss": 0.84, "step": 7426 }, { "epoch": 0.9931799946509762, "grad_norm": 1.0242676734924316, "learning_rate": 1.561199427091482e-05, "loss": 0.9353, "step": 7427 }, { "epoch": 0.9933137202460551, "grad_norm": 1.106041669845581, "learning_rate": 1.5610799251334825e-05, "loss": 0.9128, "step": 7428 }, { "epoch": 0.993447445841134, "grad_norm": 1.106690526008606, "learning_rate": 1.5609604114803086e-05, "loss": 0.9987, "step": 7429 }, { "epoch": 0.9935811714362129, "grad_norm": 1.1168925762176514, "learning_rate": 1.560840886134452e-05, "loss": 0.9451, "step": 7430 }, { "epoch": 0.9937148970312918, "grad_norm": 1.088492512702942, "learning_rate": 1.5607213490984038e-05, "loss": 0.8905, "step": 7431 }, { "epoch": 0.9938486226263706, "grad_norm": 1.0752133131027222, "learning_rate": 1.5606018003746554e-05, "loss": 0.8631, "step": 7432 }, { "epoch": 0.9939823482214496, "grad_norm": 1.1848264932632446, "learning_rate": 1.560482239965699e-05, "loss": 1.0006, "step": 7433 }, { "epoch": 0.9941160738165284, "grad_norm": 1.0940881967544556, "learning_rate": 1.5603626678740266e-05, "loss": 0.7483, "step": 7434 }, { "epoch": 0.9942497994116074, "grad_norm": 1.1784415245056152, "learning_rate": 1.5602430841021304e-05, "loss": 1.0622, "step": 7435 }, { "epoch": 0.9943835250066863, "grad_norm": 1.0477439165115356, "learning_rate": 1.5601234886525034e-05, "loss": 1.0632, "step": 7436 }, { "epoch": 0.9945172506017652, "grad_norm": 0.9544627666473389, "learning_rate": 1.560003881527638e-05, "loss": 0.869, "step": 7437 }, { "epoch": 0.9946509761968441, "grad_norm": 0.9303823709487915, "learning_rate": 1.559884262730028e-05, "loss": 0.8926, "step": 7438 }, { "epoch": 0.994784701791923, "grad_norm": 1.0375386476516724, "learning_rate": 1.5597646322621663e-05, "loss": 0.8717, "step": 7439 }, { "epoch": 0.9949184273870019, "grad_norm": 1.0868362188339233, "learning_rate": 1.559644990126546e-05, "loss": 0.9887, "step": 7440 }, { "epoch": 0.9950521529820807, "grad_norm": 1.0401692390441895, "learning_rate": 1.559525336325662e-05, "loss": 0.845, "step": 7441 }, { "epoch": 0.9951858785771597, "grad_norm": 1.1592814922332764, "learning_rate": 1.5594056708620073e-05, "loss": 0.9772, "step": 7442 }, { "epoch": 0.9953196041722385, "grad_norm": 1.0790382623672485, "learning_rate": 1.559285993738077e-05, "loss": 0.9896, "step": 7443 }, { "epoch": 0.9954533297673175, "grad_norm": 0.9497143030166626, "learning_rate": 1.559166304956365e-05, "loss": 0.9018, "step": 7444 }, { "epoch": 0.9955870553623963, "grad_norm": 0.9990165829658508, "learning_rate": 1.5590466045193666e-05, "loss": 0.831, "step": 7445 }, { "epoch": 0.9957207809574753, "grad_norm": 1.0630600452423096, "learning_rate": 1.5589268924295768e-05, "loss": 0.9651, "step": 7446 }, { "epoch": 0.9958545065525541, "grad_norm": 1.0353327989578247, "learning_rate": 1.558807168689491e-05, "loss": 0.8993, "step": 7447 }, { "epoch": 0.9959882321476331, "grad_norm": 1.175265908241272, "learning_rate": 1.558687433301604e-05, "loss": 1.0157, "step": 7448 }, { "epoch": 0.996121957742712, "grad_norm": 1.012931227684021, "learning_rate": 1.558567686268412e-05, "loss": 0.9541, "step": 7449 }, { "epoch": 0.9962556833377908, "grad_norm": 0.9576447010040283, "learning_rate": 1.5584479275924112e-05, "loss": 0.9133, "step": 7450 }, { "epoch": 0.9963894089328698, "grad_norm": 1.0208418369293213, "learning_rate": 1.558328157276098e-05, "loss": 0.9846, "step": 7451 }, { "epoch": 0.9965231345279486, "grad_norm": 1.1926788091659546, "learning_rate": 1.5582083753219682e-05, "loss": 0.991, "step": 7452 }, { "epoch": 0.9966568601230276, "grad_norm": 1.2596707344055176, "learning_rate": 1.5580885817325192e-05, "loss": 1.0054, "step": 7453 }, { "epoch": 0.9967905857181064, "grad_norm": 1.0863382816314697, "learning_rate": 1.557968776510248e-05, "loss": 0.9511, "step": 7454 }, { "epoch": 0.9969243113131854, "grad_norm": 1.040247917175293, "learning_rate": 1.5578489596576513e-05, "loss": 0.9496, "step": 7455 }, { "epoch": 0.9970580369082642, "grad_norm": 0.9456450343132019, "learning_rate": 1.5577291311772268e-05, "loss": 0.8919, "step": 7456 }, { "epoch": 0.9971917625033432, "grad_norm": 1.0574474334716797, "learning_rate": 1.557609291071472e-05, "loss": 0.9092, "step": 7457 }, { "epoch": 0.997325488098422, "grad_norm": 0.9996885061264038, "learning_rate": 1.5574894393428856e-05, "loss": 0.9425, "step": 7458 }, { "epoch": 0.997459213693501, "grad_norm": 0.9460115432739258, "learning_rate": 1.557369575993965e-05, "loss": 0.8576, "step": 7459 }, { "epoch": 0.9975929392885798, "grad_norm": 1.1186572313308716, "learning_rate": 1.5572497010272093e-05, "loss": 0.9487, "step": 7460 }, { "epoch": 0.9977266648836587, "grad_norm": 1.111188530921936, "learning_rate": 1.5571298144451165e-05, "loss": 1.0133, "step": 7461 }, { "epoch": 0.9978603904787376, "grad_norm": 1.029048204421997, "learning_rate": 1.557009916250186e-05, "loss": 0.8549, "step": 7462 }, { "epoch": 0.9979941160738165, "grad_norm": 0.9296038150787354, "learning_rate": 1.5568900064449164e-05, "loss": 0.8791, "step": 7463 }, { "epoch": 0.9981278416688955, "grad_norm": 1.2395427227020264, "learning_rate": 1.556770085031808e-05, "loss": 0.9524, "step": 7464 }, { "epoch": 0.9982615672639743, "grad_norm": 0.9644502997398376, "learning_rate": 1.5566501520133595e-05, "loss": 0.8514, "step": 7465 }, { "epoch": 0.9983952928590533, "grad_norm": 1.0432003736495972, "learning_rate": 1.5565302073920715e-05, "loss": 0.8675, "step": 7466 }, { "epoch": 0.9985290184541321, "grad_norm": 0.9974961280822754, "learning_rate": 1.5564102511704436e-05, "loss": 0.8368, "step": 7467 }, { "epoch": 0.9986627440492111, "grad_norm": 1.2333546876907349, "learning_rate": 1.5562902833509773e-05, "loss": 1.0585, "step": 7468 }, { "epoch": 0.9987964696442899, "grad_norm": 1.0616283416748047, "learning_rate": 1.5561703039361715e-05, "loss": 0.8662, "step": 7469 }, { "epoch": 0.9989301952393688, "grad_norm": 1.1124180555343628, "learning_rate": 1.556050312928528e-05, "loss": 0.9639, "step": 7470 }, { "epoch": 0.9990639208344477, "grad_norm": 1.0908548831939697, "learning_rate": 1.555930310330548e-05, "loss": 0.9877, "step": 7471 }, { "epoch": 0.9991976464295266, "grad_norm": 0.9785193204879761, "learning_rate": 1.5558102961447327e-05, "loss": 0.9318, "step": 7472 }, { "epoch": 0.9993313720246055, "grad_norm": 1.0105390548706055, "learning_rate": 1.5556902703735836e-05, "loss": 0.8848, "step": 7473 }, { "epoch": 0.9994650976196844, "grad_norm": 1.0255123376846313, "learning_rate": 1.5555702330196024e-05, "loss": 0.9047, "step": 7474 }, { "epoch": 0.9995988232147633, "grad_norm": 1.240639090538025, "learning_rate": 1.5554501840852915e-05, "loss": 1.1178, "step": 7475 }, { "epoch": 0.9997325488098422, "grad_norm": 1.174062967300415, "learning_rate": 1.5553301235731527e-05, "loss": 0.9829, "step": 7476 }, { "epoch": 0.9998662744049212, "grad_norm": 1.141680121421814, "learning_rate": 1.5552100514856895e-05, "loss": 1.0286, "step": 7477 }, { "epoch": 1.0, "grad_norm": 1.0114703178405762, "learning_rate": 1.555089967825403e-05, "loss": 0.967, "step": 7478 }, { "epoch": 1.0001337255950788, "grad_norm": 1.0181385278701782, "learning_rate": 1.554969872594798e-05, "loss": 0.6743, "step": 7479 }, { "epoch": 1.0002674511901577, "grad_norm": 1.0982142686843872, "learning_rate": 1.554849765796377e-05, "loss": 0.8714, "step": 7480 }, { "epoch": 1.0004011767852368, "grad_norm": 1.0393790006637573, "learning_rate": 1.5547296474326438e-05, "loss": 0.7681, "step": 7481 }, { "epoch": 1.0005349023803156, "grad_norm": 0.9464859366416931, "learning_rate": 1.554609517506102e-05, "loss": 0.7087, "step": 7482 }, { "epoch": 1.0006686279753945, "grad_norm": 0.9220442175865173, "learning_rate": 1.5544893760192546e-05, "loss": 0.768, "step": 7483 }, { "epoch": 1.0008023535704733, "grad_norm": 0.919678270816803, "learning_rate": 1.5543692229746076e-05, "loss": 0.7483, "step": 7484 }, { "epoch": 1.0009360791655524, "grad_norm": 0.9600428938865662, "learning_rate": 1.5542490583746642e-05, "loss": 0.811, "step": 7485 }, { "epoch": 1.0010698047606312, "grad_norm": 0.8796353340148926, "learning_rate": 1.5541288822219297e-05, "loss": 0.6771, "step": 7486 }, { "epoch": 1.00120353035571, "grad_norm": 0.9270517230033875, "learning_rate": 1.554008694518909e-05, "loss": 0.7446, "step": 7487 }, { "epoch": 1.001337255950789, "grad_norm": 1.0358089208602905, "learning_rate": 1.5538884952681067e-05, "loss": 0.7442, "step": 7488 }, { "epoch": 1.0014709815458678, "grad_norm": 1.012755274772644, "learning_rate": 1.5537682844720296e-05, "loss": 0.7974, "step": 7489 }, { "epoch": 1.0016047071409468, "grad_norm": 1.0328119993209839, "learning_rate": 1.5536480621331818e-05, "loss": 0.7532, "step": 7490 }, { "epoch": 1.0017384327360257, "grad_norm": 0.9924728870391846, "learning_rate": 1.55352782825407e-05, "loss": 0.7145, "step": 7491 }, { "epoch": 1.0018721583311045, "grad_norm": 1.0950642824172974, "learning_rate": 1.5534075828372004e-05, "loss": 0.7319, "step": 7492 }, { "epoch": 1.0020058839261834, "grad_norm": 1.0122344493865967, "learning_rate": 1.5532873258850796e-05, "loss": 0.7377, "step": 7493 }, { "epoch": 1.0021396095212625, "grad_norm": 1.0589524507522583, "learning_rate": 1.5531670574002136e-05, "loss": 0.7842, "step": 7494 }, { "epoch": 1.0022733351163413, "grad_norm": 1.0826622247695923, "learning_rate": 1.5530467773851096e-05, "loss": 0.706, "step": 7495 }, { "epoch": 1.0024070607114202, "grad_norm": 1.1649103164672852, "learning_rate": 1.5529264858422747e-05, "loss": 0.7697, "step": 7496 }, { "epoch": 1.002540786306499, "grad_norm": 1.091210126876831, "learning_rate": 1.5528061827742166e-05, "loss": 0.797, "step": 7497 }, { "epoch": 1.0026745119015779, "grad_norm": 1.0944057703018188, "learning_rate": 1.552685868183442e-05, "loss": 0.6992, "step": 7498 }, { "epoch": 1.002808237496657, "grad_norm": 1.148113489151001, "learning_rate": 1.55256554207246e-05, "loss": 0.78, "step": 7499 }, { "epoch": 1.0029419630917358, "grad_norm": 0.9884711503982544, "learning_rate": 1.5524452044437777e-05, "loss": 0.6956, "step": 7500 }, { "epoch": 1.0030756886868146, "grad_norm": 1.2058829069137573, "learning_rate": 1.5523248552999038e-05, "loss": 0.6796, "step": 7501 }, { "epoch": 1.0032094142818935, "grad_norm": 0.9653275012969971, "learning_rate": 1.5522044946433468e-05, "loss": 0.6804, "step": 7502 }, { "epoch": 1.0033431398769725, "grad_norm": 1.1390634775161743, "learning_rate": 1.5520841224766153e-05, "loss": 0.759, "step": 7503 }, { "epoch": 1.0034768654720514, "grad_norm": 1.2696011066436768, "learning_rate": 1.551963738802219e-05, "loss": 0.8251, "step": 7504 }, { "epoch": 1.0036105910671302, "grad_norm": 1.0664371252059937, "learning_rate": 1.5518433436226664e-05, "loss": 0.7601, "step": 7505 }, { "epoch": 1.003744316662209, "grad_norm": 1.079526424407959, "learning_rate": 1.5517229369404675e-05, "loss": 0.7216, "step": 7506 }, { "epoch": 1.003878042257288, "grad_norm": 1.3189359903335571, "learning_rate": 1.5516025187581318e-05, "loss": 0.8131, "step": 7507 }, { "epoch": 1.004011767852367, "grad_norm": 1.0641002655029297, "learning_rate": 1.5514820890781695e-05, "loss": 0.6483, "step": 7508 }, { "epoch": 1.0041454934474459, "grad_norm": 1.0159856081008911, "learning_rate": 1.551361647903091e-05, "loss": 0.6727, "step": 7509 }, { "epoch": 1.0042792190425247, "grad_norm": 1.1783243417739868, "learning_rate": 1.551241195235406e-05, "loss": 0.748, "step": 7510 }, { "epoch": 1.0044129446376036, "grad_norm": 1.3051390647888184, "learning_rate": 1.551120731077626e-05, "loss": 0.79, "step": 7511 }, { "epoch": 1.0045466702326826, "grad_norm": 1.114362359046936, "learning_rate": 1.5510002554322617e-05, "loss": 0.7885, "step": 7512 }, { "epoch": 1.0046803958277615, "grad_norm": 1.0880696773529053, "learning_rate": 1.550879768301825e-05, "loss": 0.8691, "step": 7513 }, { "epoch": 1.0048141214228403, "grad_norm": 1.113642930984497, "learning_rate": 1.5507592696888258e-05, "loss": 0.7344, "step": 7514 }, { "epoch": 1.0049478470179192, "grad_norm": 1.1645268201828003, "learning_rate": 1.550638759595777e-05, "loss": 0.788, "step": 7515 }, { "epoch": 1.005081572612998, "grad_norm": 1.0820420980453491, "learning_rate": 1.55051823802519e-05, "loss": 0.7616, "step": 7516 }, { "epoch": 1.005215298208077, "grad_norm": 1.120851755142212, "learning_rate": 1.5503977049795772e-05, "loss": 0.774, "step": 7517 }, { "epoch": 1.005349023803156, "grad_norm": 1.2607370615005493, "learning_rate": 1.550277160461451e-05, "loss": 0.7642, "step": 7518 }, { "epoch": 1.0054827493982348, "grad_norm": 1.1562846899032593, "learning_rate": 1.5501566044733237e-05, "loss": 0.8203, "step": 7519 }, { "epoch": 1.0056164749933136, "grad_norm": 1.116916537284851, "learning_rate": 1.5500360370177087e-05, "loss": 0.762, "step": 7520 }, { "epoch": 1.0057502005883927, "grad_norm": 1.0174331665039062, "learning_rate": 1.549915458097119e-05, "loss": 0.6953, "step": 7521 }, { "epoch": 1.0058839261834716, "grad_norm": 1.155356764793396, "learning_rate": 1.5497948677140673e-05, "loss": 0.667, "step": 7522 }, { "epoch": 1.0060176517785504, "grad_norm": 1.143683910369873, "learning_rate": 1.549674265871068e-05, "loss": 0.823, "step": 7523 }, { "epoch": 1.0061513773736293, "grad_norm": 1.0673437118530273, "learning_rate": 1.5495536525706346e-05, "loss": 0.7503, "step": 7524 }, { "epoch": 1.006285102968708, "grad_norm": 1.1236730813980103, "learning_rate": 1.549433027815281e-05, "loss": 0.7651, "step": 7525 }, { "epoch": 1.0064188285637872, "grad_norm": 1.1302212476730347, "learning_rate": 1.5493123916075218e-05, "loss": 0.7805, "step": 7526 }, { "epoch": 1.006552554158866, "grad_norm": 1.105208158493042, "learning_rate": 1.5491917439498714e-05, "loss": 0.7663, "step": 7527 }, { "epoch": 1.0066862797539449, "grad_norm": 0.9848840832710266, "learning_rate": 1.5490710848448446e-05, "loss": 0.7009, "step": 7528 }, { "epoch": 1.0068200053490237, "grad_norm": 1.0820873975753784, "learning_rate": 1.548950414294957e-05, "loss": 0.6751, "step": 7529 }, { "epoch": 1.0069537309441028, "grad_norm": 1.0202966928482056, "learning_rate": 1.5488297323027223e-05, "loss": 0.7154, "step": 7530 }, { "epoch": 1.0070874565391816, "grad_norm": 1.034687876701355, "learning_rate": 1.5487090388706573e-05, "loss": 0.7451, "step": 7531 }, { "epoch": 1.0072211821342605, "grad_norm": 1.3755745887756348, "learning_rate": 1.5485883340012778e-05, "loss": 0.8128, "step": 7532 }, { "epoch": 1.0073549077293393, "grad_norm": 1.0626695156097412, "learning_rate": 1.5484676176970996e-05, "loss": 0.6603, "step": 7533 }, { "epoch": 1.0074886333244184, "grad_norm": 1.0270764827728271, "learning_rate": 1.548346889960638e-05, "loss": 0.6209, "step": 7534 }, { "epoch": 1.0076223589194973, "grad_norm": 1.161803960800171, "learning_rate": 1.5482261507944106e-05, "loss": 0.8197, "step": 7535 }, { "epoch": 1.007756084514576, "grad_norm": 0.9051011204719543, "learning_rate": 1.5481054002009336e-05, "loss": 0.6343, "step": 7536 }, { "epoch": 1.007889810109655, "grad_norm": 1.278534173965454, "learning_rate": 1.5479846381827243e-05, "loss": 0.8111, "step": 7537 }, { "epoch": 1.0080235357047338, "grad_norm": 1.13411283493042, "learning_rate": 1.547863864742299e-05, "loss": 0.7163, "step": 7538 }, { "epoch": 1.0081572612998129, "grad_norm": 1.230907678604126, "learning_rate": 1.547743079882176e-05, "loss": 0.8011, "step": 7539 }, { "epoch": 1.0082909868948917, "grad_norm": 1.104331612586975, "learning_rate": 1.5476222836048725e-05, "loss": 0.7482, "step": 7540 }, { "epoch": 1.0084247124899706, "grad_norm": 1.0554298162460327, "learning_rate": 1.547501475912907e-05, "loss": 0.7563, "step": 7541 }, { "epoch": 1.0085584380850494, "grad_norm": 1.1173374652862549, "learning_rate": 1.547380656808797e-05, "loss": 0.7814, "step": 7542 }, { "epoch": 1.0086921636801285, "grad_norm": 0.9938890337944031, "learning_rate": 1.5472598262950604e-05, "loss": 0.7358, "step": 7543 }, { "epoch": 1.0088258892752073, "grad_norm": 1.2468020915985107, "learning_rate": 1.547138984374217e-05, "loss": 0.828, "step": 7544 }, { "epoch": 1.0089596148702862, "grad_norm": 1.267331838607788, "learning_rate": 1.547018131048785e-05, "loss": 0.7864, "step": 7545 }, { "epoch": 1.009093340465365, "grad_norm": 1.1960279941558838, "learning_rate": 1.5468972663212832e-05, "loss": 0.7876, "step": 7546 }, { "epoch": 1.0092270660604439, "grad_norm": 1.0529667139053345, "learning_rate": 1.5467763901942312e-05, "loss": 0.7413, "step": 7547 }, { "epoch": 1.009360791655523, "grad_norm": 0.991306483745575, "learning_rate": 1.5466555026701486e-05, "loss": 0.6867, "step": 7548 }, { "epoch": 1.0094945172506018, "grad_norm": 1.0380163192749023, "learning_rate": 1.5465346037515555e-05, "loss": 0.768, "step": 7549 }, { "epoch": 1.0096282428456806, "grad_norm": 1.1175163984298706, "learning_rate": 1.546413693440971e-05, "loss": 0.8219, "step": 7550 }, { "epoch": 1.0097619684407595, "grad_norm": 1.0123742818832397, "learning_rate": 1.5462927717409165e-05, "loss": 0.688, "step": 7551 }, { "epoch": 1.0098956940358386, "grad_norm": 1.1075096130371094, "learning_rate": 1.5461718386539115e-05, "loss": 0.6891, "step": 7552 }, { "epoch": 1.0100294196309174, "grad_norm": 1.1292492151260376, "learning_rate": 1.546050894182477e-05, "loss": 0.7164, "step": 7553 }, { "epoch": 1.0101631452259963, "grad_norm": 1.0311020612716675, "learning_rate": 1.5459299383291347e-05, "loss": 0.7824, "step": 7554 }, { "epoch": 1.010296870821075, "grad_norm": 1.192772626876831, "learning_rate": 1.5458089710964047e-05, "loss": 0.6792, "step": 7555 }, { "epoch": 1.010430596416154, "grad_norm": 1.165932536125183, "learning_rate": 1.5456879924868093e-05, "loss": 0.7361, "step": 7556 }, { "epoch": 1.010564322011233, "grad_norm": 0.9414857029914856, "learning_rate": 1.54556700250287e-05, "loss": 0.596, "step": 7557 }, { "epoch": 1.0106980476063119, "grad_norm": 1.0853465795516968, "learning_rate": 1.5454460011471082e-05, "loss": 0.6773, "step": 7558 }, { "epoch": 1.0108317732013907, "grad_norm": 1.103758454322815, "learning_rate": 1.5453249884220466e-05, "loss": 0.715, "step": 7559 }, { "epoch": 1.0109654987964696, "grad_norm": 1.095453143119812, "learning_rate": 1.5452039643302073e-05, "loss": 0.8082, "step": 7560 }, { "epoch": 1.0110992243915486, "grad_norm": 1.1548391580581665, "learning_rate": 1.545082928874113e-05, "loss": 0.7326, "step": 7561 }, { "epoch": 1.0112329499866275, "grad_norm": 1.137392282485962, "learning_rate": 1.5449618820562874e-05, "loss": 0.7373, "step": 7562 }, { "epoch": 1.0113666755817063, "grad_norm": 1.1104589700698853, "learning_rate": 1.544840823879252e-05, "loss": 0.715, "step": 7563 }, { "epoch": 1.0115004011767852, "grad_norm": 1.2356964349746704, "learning_rate": 1.544719754345531e-05, "loss": 0.8533, "step": 7564 }, { "epoch": 1.011634126771864, "grad_norm": 1.0686579942703247, "learning_rate": 1.5445986734576485e-05, "loss": 0.7558, "step": 7565 }, { "epoch": 1.011767852366943, "grad_norm": 1.0532584190368652, "learning_rate": 1.5444775812181275e-05, "loss": 0.7313, "step": 7566 }, { "epoch": 1.011901577962022, "grad_norm": 1.0541143417358398, "learning_rate": 1.5443564776294922e-05, "loss": 0.7965, "step": 7567 }, { "epoch": 1.0120353035571008, "grad_norm": 1.0703556537628174, "learning_rate": 1.5442353626942672e-05, "loss": 0.8002, "step": 7568 }, { "epoch": 1.0121690291521797, "grad_norm": 1.0438132286071777, "learning_rate": 1.544114236414977e-05, "loss": 0.7316, "step": 7569 }, { "epoch": 1.0123027547472587, "grad_norm": 1.0834113359451294, "learning_rate": 1.543993098794146e-05, "loss": 0.7714, "step": 7570 }, { "epoch": 1.0124364803423376, "grad_norm": 1.1129871606826782, "learning_rate": 1.5438719498342992e-05, "loss": 0.7676, "step": 7571 }, { "epoch": 1.0125702059374164, "grad_norm": 1.1400810480117798, "learning_rate": 1.5437507895379624e-05, "loss": 0.7749, "step": 7572 }, { "epoch": 1.0127039315324953, "grad_norm": 1.0076345205307007, "learning_rate": 1.5436296179076605e-05, "loss": 0.7155, "step": 7573 }, { "epoch": 1.0128376571275741, "grad_norm": 1.1146738529205322, "learning_rate": 1.5435084349459194e-05, "loss": 0.752, "step": 7574 }, { "epoch": 1.0129713827226532, "grad_norm": 1.04277503490448, "learning_rate": 1.543387240655265e-05, "loss": 0.7397, "step": 7575 }, { "epoch": 1.013105108317732, "grad_norm": 1.0737124681472778, "learning_rate": 1.5432660350382235e-05, "loss": 0.7451, "step": 7576 }, { "epoch": 1.0132388339128109, "grad_norm": 1.223185658454895, "learning_rate": 1.5431448180973218e-05, "loss": 0.7692, "step": 7577 }, { "epoch": 1.0133725595078897, "grad_norm": 1.0702157020568848, "learning_rate": 1.5430235898350858e-05, "loss": 0.7072, "step": 7578 }, { "epoch": 1.0135062851029688, "grad_norm": 1.0938825607299805, "learning_rate": 1.5429023502540426e-05, "loss": 0.7642, "step": 7579 }, { "epoch": 1.0136400106980477, "grad_norm": 1.2363417148590088, "learning_rate": 1.5427810993567193e-05, "loss": 0.7874, "step": 7580 }, { "epoch": 1.0137737362931265, "grad_norm": 1.1881234645843506, "learning_rate": 1.5426598371456436e-05, "loss": 0.8263, "step": 7581 }, { "epoch": 1.0139074618882054, "grad_norm": 0.9601733088493347, "learning_rate": 1.542538563623343e-05, "loss": 0.6876, "step": 7582 }, { "epoch": 1.0140411874832842, "grad_norm": 1.1035581827163696, "learning_rate": 1.5424172787923448e-05, "loss": 0.7318, "step": 7583 }, { "epoch": 1.0141749130783633, "grad_norm": 1.0428141355514526, "learning_rate": 1.5422959826551778e-05, "loss": 0.6184, "step": 7584 }, { "epoch": 1.0143086386734421, "grad_norm": 1.1401928663253784, "learning_rate": 1.5421746752143696e-05, "loss": 0.8123, "step": 7585 }, { "epoch": 1.014442364268521, "grad_norm": 1.137660264968872, "learning_rate": 1.5420533564724495e-05, "loss": 0.749, "step": 7586 }, { "epoch": 1.0145760898635998, "grad_norm": 1.0200623273849487, "learning_rate": 1.5419320264319458e-05, "loss": 0.7213, "step": 7587 }, { "epoch": 1.0147098154586789, "grad_norm": 1.086226224899292, "learning_rate": 1.5418106850953877e-05, "loss": 0.6638, "step": 7588 }, { "epoch": 1.0148435410537577, "grad_norm": 1.1296967267990112, "learning_rate": 1.5416893324653037e-05, "loss": 0.7647, "step": 7589 }, { "epoch": 1.0149772666488366, "grad_norm": 1.1705378293991089, "learning_rate": 1.5415679685442247e-05, "loss": 0.7331, "step": 7590 }, { "epoch": 1.0151109922439154, "grad_norm": 1.1013715267181396, "learning_rate": 1.541446593334679e-05, "loss": 0.7277, "step": 7591 }, { "epoch": 1.0152447178389943, "grad_norm": 1.140773892402649, "learning_rate": 1.5413252068391973e-05, "loss": 0.7372, "step": 7592 }, { "epoch": 1.0153784434340734, "grad_norm": 1.0598537921905518, "learning_rate": 1.5412038090603098e-05, "loss": 0.631, "step": 7593 }, { "epoch": 1.0155121690291522, "grad_norm": 1.075382947921753, "learning_rate": 1.541082400000547e-05, "loss": 0.7277, "step": 7594 }, { "epoch": 1.015645894624231, "grad_norm": 1.1742466688156128, "learning_rate": 1.5409609796624387e-05, "loss": 0.6812, "step": 7595 }, { "epoch": 1.01577962021931, "grad_norm": 1.1625468730926514, "learning_rate": 1.540839548048517e-05, "loss": 0.7777, "step": 7596 }, { "epoch": 1.015913345814389, "grad_norm": 1.2257791757583618, "learning_rate": 1.540718105161312e-05, "loss": 0.7278, "step": 7597 }, { "epoch": 1.0160470714094678, "grad_norm": 1.2902549505233765, "learning_rate": 1.540596651003356e-05, "loss": 0.8606, "step": 7598 }, { "epoch": 1.0161807970045467, "grad_norm": 1.1143171787261963, "learning_rate": 1.5404751855771798e-05, "loss": 0.7524, "step": 7599 }, { "epoch": 1.0163145225996255, "grad_norm": 1.1104305982589722, "learning_rate": 1.5403537088853157e-05, "loss": 0.7269, "step": 7600 }, { "epoch": 1.0164482481947044, "grad_norm": 1.1554456949234009, "learning_rate": 1.5402322209302953e-05, "loss": 0.8392, "step": 7601 }, { "epoch": 1.0165819737897834, "grad_norm": 1.096725344657898, "learning_rate": 1.5401107217146515e-05, "loss": 0.6679, "step": 7602 }, { "epoch": 1.0167156993848623, "grad_norm": 1.0478761196136475, "learning_rate": 1.5399892112409163e-05, "loss": 0.6948, "step": 7603 }, { "epoch": 1.0168494249799411, "grad_norm": 1.0522668361663818, "learning_rate": 1.539867689511623e-05, "loss": 0.6586, "step": 7604 }, { "epoch": 1.01698315057502, "grad_norm": 1.0767230987548828, "learning_rate": 1.5397461565293038e-05, "loss": 0.7066, "step": 7605 }, { "epoch": 1.017116876170099, "grad_norm": 1.323459506034851, "learning_rate": 1.539624612296493e-05, "loss": 0.7615, "step": 7606 }, { "epoch": 1.017250601765178, "grad_norm": 1.0911426544189453, "learning_rate": 1.5395030568157232e-05, "loss": 0.7037, "step": 7607 }, { "epoch": 1.0173843273602567, "grad_norm": 1.0339018106460571, "learning_rate": 1.5393814900895284e-05, "loss": 0.6774, "step": 7608 }, { "epoch": 1.0175180529553356, "grad_norm": 1.1799553632736206, "learning_rate": 1.5392599121204427e-05, "loss": 0.7507, "step": 7609 }, { "epoch": 1.0176517785504144, "grad_norm": 1.0164538621902466, "learning_rate": 1.5391383229110005e-05, "loss": 0.6394, "step": 7610 }, { "epoch": 1.0177855041454935, "grad_norm": 1.103979229927063, "learning_rate": 1.5390167224637353e-05, "loss": 0.7171, "step": 7611 }, { "epoch": 1.0179192297405724, "grad_norm": 1.038739800453186, "learning_rate": 1.5388951107811828e-05, "loss": 0.6823, "step": 7612 }, { "epoch": 1.0180529553356512, "grad_norm": 1.2042981386184692, "learning_rate": 1.538773487865877e-05, "loss": 0.8267, "step": 7613 }, { "epoch": 1.01818668093073, "grad_norm": 1.1565076112747192, "learning_rate": 1.5386518537203533e-05, "loss": 0.8447, "step": 7614 }, { "epoch": 1.0183204065258091, "grad_norm": 1.2337570190429688, "learning_rate": 1.5385302083471474e-05, "loss": 0.736, "step": 7615 }, { "epoch": 1.018454132120888, "grad_norm": 1.091556191444397, "learning_rate": 1.5384085517487948e-05, "loss": 0.7016, "step": 7616 }, { "epoch": 1.0185878577159668, "grad_norm": 1.1633453369140625, "learning_rate": 1.5382868839278307e-05, "loss": 0.7836, "step": 7617 }, { "epoch": 1.0187215833110457, "grad_norm": 1.1030980348587036, "learning_rate": 1.538165204886792e-05, "loss": 0.6738, "step": 7618 }, { "epoch": 1.0188553089061245, "grad_norm": 1.052276372909546, "learning_rate": 1.538043514628214e-05, "loss": 0.6532, "step": 7619 }, { "epoch": 1.0189890345012036, "grad_norm": 1.1536833047866821, "learning_rate": 1.5379218131546344e-05, "loss": 0.7828, "step": 7620 }, { "epoch": 1.0191227600962824, "grad_norm": 1.1853820085525513, "learning_rate": 1.5378001004685888e-05, "loss": 0.7369, "step": 7621 }, { "epoch": 1.0192564856913613, "grad_norm": 1.1158623695373535, "learning_rate": 1.5376783765726155e-05, "loss": 0.7696, "step": 7622 }, { "epoch": 1.0193902112864401, "grad_norm": 1.1290605068206787, "learning_rate": 1.5375566414692504e-05, "loss": 0.7385, "step": 7623 }, { "epoch": 1.0195239368815192, "grad_norm": 1.227765679359436, "learning_rate": 1.5374348951610312e-05, "loss": 0.7714, "step": 7624 }, { "epoch": 1.019657662476598, "grad_norm": 1.2493822574615479, "learning_rate": 1.5373131376504964e-05, "loss": 0.8793, "step": 7625 }, { "epoch": 1.019791388071677, "grad_norm": 1.0335562229156494, "learning_rate": 1.5371913689401833e-05, "loss": 0.7588, "step": 7626 }, { "epoch": 1.0199251136667558, "grad_norm": 1.1311569213867188, "learning_rate": 1.53706958903263e-05, "loss": 0.7408, "step": 7627 }, { "epoch": 1.0200588392618348, "grad_norm": 1.2213736772537231, "learning_rate": 1.5369477979303752e-05, "loss": 0.7611, "step": 7628 }, { "epoch": 1.0201925648569137, "grad_norm": 1.1411844491958618, "learning_rate": 1.5368259956359572e-05, "loss": 0.7809, "step": 7629 }, { "epoch": 1.0203262904519925, "grad_norm": 1.0908231735229492, "learning_rate": 1.5367041821519152e-05, "loss": 0.7484, "step": 7630 }, { "epoch": 1.0204600160470714, "grad_norm": 1.0617804527282715, "learning_rate": 1.536582357480788e-05, "loss": 0.6735, "step": 7631 }, { "epoch": 1.0205937416421502, "grad_norm": 1.186063289642334, "learning_rate": 1.5364605216251146e-05, "loss": 0.6922, "step": 7632 }, { "epoch": 1.0207274672372293, "grad_norm": 1.1263413429260254, "learning_rate": 1.5363386745874355e-05, "loss": 0.8077, "step": 7633 }, { "epoch": 1.0208611928323081, "grad_norm": 1.1766207218170166, "learning_rate": 1.53621681637029e-05, "loss": 0.7518, "step": 7634 }, { "epoch": 1.020994918427387, "grad_norm": 1.2635776996612549, "learning_rate": 1.536094946976218e-05, "loss": 0.7889, "step": 7635 }, { "epoch": 1.0211286440224658, "grad_norm": 1.2026598453521729, "learning_rate": 1.53597306640776e-05, "loss": 0.7264, "step": 7636 }, { "epoch": 1.021262369617545, "grad_norm": 1.0593008995056152, "learning_rate": 1.5358511746674555e-05, "loss": 0.6773, "step": 7637 }, { "epoch": 1.0213960952126238, "grad_norm": 1.080504298210144, "learning_rate": 1.5357292717578463e-05, "loss": 0.6721, "step": 7638 }, { "epoch": 1.0215298208077026, "grad_norm": 1.1393852233886719, "learning_rate": 1.5356073576814732e-05, "loss": 0.724, "step": 7639 }, { "epoch": 1.0216635464027815, "grad_norm": 1.1669518947601318, "learning_rate": 1.5354854324408776e-05, "loss": 0.7754, "step": 7640 }, { "epoch": 1.0217972719978603, "grad_norm": 1.0751136541366577, "learning_rate": 1.5353634960386004e-05, "loss": 0.7728, "step": 7641 }, { "epoch": 1.0219309975929394, "grad_norm": 1.187941312789917, "learning_rate": 1.5352415484771833e-05, "loss": 0.7812, "step": 7642 }, { "epoch": 1.0220647231880182, "grad_norm": 1.2098135948181152, "learning_rate": 1.5351195897591683e-05, "loss": 0.7483, "step": 7643 }, { "epoch": 1.022198448783097, "grad_norm": 1.0966928005218506, "learning_rate": 1.5349976198870974e-05, "loss": 0.7814, "step": 7644 }, { "epoch": 1.022332174378176, "grad_norm": 1.2695571184158325, "learning_rate": 1.5348756388635133e-05, "loss": 0.7782, "step": 7645 }, { "epoch": 1.022465899973255, "grad_norm": 1.1409188508987427, "learning_rate": 1.534753646690958e-05, "loss": 0.6869, "step": 7646 }, { "epoch": 1.0225996255683338, "grad_norm": 1.007348895072937, "learning_rate": 1.5346316433719747e-05, "loss": 0.6972, "step": 7647 }, { "epoch": 1.0227333511634127, "grad_norm": 1.023591160774231, "learning_rate": 1.5345096289091066e-05, "loss": 0.6395, "step": 7648 }, { "epoch": 1.0228670767584915, "grad_norm": 1.0743900537490845, "learning_rate": 1.5343876033048964e-05, "loss": 0.706, "step": 7649 }, { "epoch": 1.0230008023535704, "grad_norm": 1.2269963026046753, "learning_rate": 1.5342655665618885e-05, "loss": 0.7874, "step": 7650 }, { "epoch": 1.0231345279486495, "grad_norm": 1.0078877210617065, "learning_rate": 1.5341435186826257e-05, "loss": 0.7155, "step": 7651 }, { "epoch": 1.0232682535437283, "grad_norm": 1.1455148458480835, "learning_rate": 1.5340214596696525e-05, "loss": 0.7526, "step": 7652 }, { "epoch": 1.0234019791388071, "grad_norm": 1.3359558582305908, "learning_rate": 1.533899389525513e-05, "loss": 0.8031, "step": 7653 }, { "epoch": 1.023535704733886, "grad_norm": 1.1383705139160156, "learning_rate": 1.5337773082527515e-05, "loss": 0.7555, "step": 7654 }, { "epoch": 1.023669430328965, "grad_norm": 1.149391531944275, "learning_rate": 1.533655215853913e-05, "loss": 0.7358, "step": 7655 }, { "epoch": 1.023803155924044, "grad_norm": 1.0689034461975098, "learning_rate": 1.5335331123315424e-05, "loss": 0.7288, "step": 7656 }, { "epoch": 1.0239368815191228, "grad_norm": 1.1047368049621582, "learning_rate": 1.533410997688184e-05, "loss": 0.7397, "step": 7657 }, { "epoch": 1.0240706071142016, "grad_norm": 1.2034196853637695, "learning_rate": 1.533288871926384e-05, "loss": 0.7762, "step": 7658 }, { "epoch": 1.0242043327092805, "grad_norm": 1.112215518951416, "learning_rate": 1.5331667350486876e-05, "loss": 0.7732, "step": 7659 }, { "epoch": 1.0243380583043595, "grad_norm": 1.161926507949829, "learning_rate": 1.5330445870576412e-05, "loss": 0.7539, "step": 7660 }, { "epoch": 1.0244717838994384, "grad_norm": 1.1224719285964966, "learning_rate": 1.5329224279557903e-05, "loss": 0.6833, "step": 7661 }, { "epoch": 1.0246055094945172, "grad_norm": 1.143916130065918, "learning_rate": 1.532800257745681e-05, "loss": 0.7793, "step": 7662 }, { "epoch": 1.024739235089596, "grad_norm": 1.106979489326477, "learning_rate": 1.5326780764298607e-05, "loss": 0.7608, "step": 7663 }, { "epoch": 1.0248729606846751, "grad_norm": 1.0660679340362549, "learning_rate": 1.532555884010875e-05, "loss": 0.7445, "step": 7664 }, { "epoch": 1.025006686279754, "grad_norm": 1.19538414478302, "learning_rate": 1.532433680491272e-05, "loss": 0.6868, "step": 7665 }, { "epoch": 1.0251404118748328, "grad_norm": 1.1730625629425049, "learning_rate": 1.532311465873598e-05, "loss": 0.7196, "step": 7666 }, { "epoch": 1.0252741374699117, "grad_norm": 1.1953115463256836, "learning_rate": 1.5321892401604014e-05, "loss": 0.7392, "step": 7667 }, { "epoch": 1.0254078630649905, "grad_norm": 1.1381534337997437, "learning_rate": 1.532067003354229e-05, "loss": 0.7688, "step": 7668 }, { "epoch": 1.0255415886600696, "grad_norm": 1.0532985925674438, "learning_rate": 1.5319447554576292e-05, "loss": 0.6737, "step": 7669 }, { "epoch": 1.0256753142551485, "grad_norm": 1.024971842765808, "learning_rate": 1.53182249647315e-05, "loss": 0.7649, "step": 7670 }, { "epoch": 1.0258090398502273, "grad_norm": 1.0704389810562134, "learning_rate": 1.5317002264033395e-05, "loss": 0.681, "step": 7671 }, { "epoch": 1.0259427654453062, "grad_norm": 0.9812400341033936, "learning_rate": 1.5315779452507466e-05, "loss": 0.6796, "step": 7672 }, { "epoch": 1.0260764910403852, "grad_norm": 1.1281324625015259, "learning_rate": 1.53145565301792e-05, "loss": 0.7788, "step": 7673 }, { "epoch": 1.026210216635464, "grad_norm": 1.0692516565322876, "learning_rate": 1.5313333497074094e-05, "loss": 0.7108, "step": 7674 }, { "epoch": 1.026343942230543, "grad_norm": 1.0706753730773926, "learning_rate": 1.5312110353217634e-05, "loss": 0.8069, "step": 7675 }, { "epoch": 1.0264776678256218, "grad_norm": 1.0035525560379028, "learning_rate": 1.5310887098635313e-05, "loss": 0.7323, "step": 7676 }, { "epoch": 1.0266113934207006, "grad_norm": 1.1789295673370361, "learning_rate": 1.5309663733352634e-05, "loss": 0.709, "step": 7677 }, { "epoch": 1.0267451190157797, "grad_norm": 1.0701169967651367, "learning_rate": 1.5308440257395095e-05, "loss": 0.7297, "step": 7678 }, { "epoch": 1.0268788446108585, "grad_norm": 1.2434269189834595, "learning_rate": 1.5307216670788202e-05, "loss": 0.8357, "step": 7679 }, { "epoch": 1.0270125702059374, "grad_norm": 1.068922519683838, "learning_rate": 1.530599297355745e-05, "loss": 0.6805, "step": 7680 }, { "epoch": 1.0271462958010162, "grad_norm": 1.0845290422439575, "learning_rate": 1.5304769165728357e-05, "loss": 0.7124, "step": 7681 }, { "epoch": 1.0272800213960953, "grad_norm": 1.2109038829803467, "learning_rate": 1.5303545247326424e-05, "loss": 0.7761, "step": 7682 }, { "epoch": 1.0274137469911742, "grad_norm": 1.1997913122177124, "learning_rate": 1.5302321218377167e-05, "loss": 0.7426, "step": 7683 }, { "epoch": 1.027547472586253, "grad_norm": 1.1841999292373657, "learning_rate": 1.5301097078906096e-05, "loss": 0.7871, "step": 7684 }, { "epoch": 1.0276811981813319, "grad_norm": 1.0841727256774902, "learning_rate": 1.529987282893873e-05, "loss": 0.7479, "step": 7685 }, { "epoch": 1.0278149237764107, "grad_norm": 1.135014533996582, "learning_rate": 1.5298648468500585e-05, "loss": 0.7296, "step": 7686 }, { "epoch": 1.0279486493714898, "grad_norm": 1.0995436906814575, "learning_rate": 1.5297423997617187e-05, "loss": 0.7498, "step": 7687 }, { "epoch": 1.0280823749665686, "grad_norm": 1.1187154054641724, "learning_rate": 1.5296199416314052e-05, "loss": 0.7266, "step": 7688 }, { "epoch": 1.0282161005616475, "grad_norm": 1.1194250583648682, "learning_rate": 1.529497472461671e-05, "loss": 0.692, "step": 7689 }, { "epoch": 1.0283498261567263, "grad_norm": 1.1201798915863037, "learning_rate": 1.529374992255068e-05, "loss": 0.7743, "step": 7690 }, { "epoch": 1.0284835517518054, "grad_norm": 1.2152721881866455, "learning_rate": 1.5292525010141507e-05, "loss": 0.7895, "step": 7691 }, { "epoch": 1.0286172773468842, "grad_norm": 1.1908994913101196, "learning_rate": 1.529129998741471e-05, "loss": 0.7759, "step": 7692 }, { "epoch": 1.028751002941963, "grad_norm": 1.0199247598648071, "learning_rate": 1.529007485439583e-05, "loss": 0.7094, "step": 7693 }, { "epoch": 1.028884728537042, "grad_norm": 1.1857454776763916, "learning_rate": 1.5288849611110398e-05, "loss": 0.7611, "step": 7694 }, { "epoch": 1.0290184541321208, "grad_norm": 1.0865466594696045, "learning_rate": 1.528762425758396e-05, "loss": 0.6766, "step": 7695 }, { "epoch": 1.0291521797271999, "grad_norm": 1.1344951391220093, "learning_rate": 1.5286398793842054e-05, "loss": 0.7611, "step": 7696 }, { "epoch": 1.0292859053222787, "grad_norm": 1.2493953704833984, "learning_rate": 1.528517321991022e-05, "loss": 0.7794, "step": 7697 }, { "epoch": 1.0294196309173576, "grad_norm": 1.1999006271362305, "learning_rate": 1.528394753581401e-05, "loss": 0.7271, "step": 7698 }, { "epoch": 1.0295533565124364, "grad_norm": 0.9770349860191345, "learning_rate": 1.5282721741578974e-05, "loss": 0.7138, "step": 7699 }, { "epoch": 1.0296870821075155, "grad_norm": 1.1624467372894287, "learning_rate": 1.5281495837230654e-05, "loss": 0.7632, "step": 7700 }, { "epoch": 1.0298208077025943, "grad_norm": 1.1058036088943481, "learning_rate": 1.5280269822794607e-05, "loss": 0.6974, "step": 7701 }, { "epoch": 1.0299545332976732, "grad_norm": 1.0494451522827148, "learning_rate": 1.527904369829639e-05, "loss": 0.6077, "step": 7702 }, { "epoch": 1.030088258892752, "grad_norm": 1.0553343296051025, "learning_rate": 1.5277817463761558e-05, "loss": 0.6866, "step": 7703 }, { "epoch": 1.0302219844878309, "grad_norm": 1.1266316175460815, "learning_rate": 1.527659111921567e-05, "loss": 0.8065, "step": 7704 }, { "epoch": 1.03035571008291, "grad_norm": 1.2385667562484741, "learning_rate": 1.527536466468429e-05, "loss": 0.7601, "step": 7705 }, { "epoch": 1.0304894356779888, "grad_norm": 1.1131343841552734, "learning_rate": 1.527413810019298e-05, "loss": 0.7167, "step": 7706 }, { "epoch": 1.0306231612730676, "grad_norm": 1.1246212720870972, "learning_rate": 1.5272911425767315e-05, "loss": 0.7108, "step": 7707 }, { "epoch": 1.0307568868681465, "grad_norm": 1.1344150304794312, "learning_rate": 1.5271684641432848e-05, "loss": 0.7182, "step": 7708 }, { "epoch": 1.0308906124632256, "grad_norm": 1.1381834745407104, "learning_rate": 1.5270457747215164e-05, "loss": 0.7624, "step": 7709 }, { "epoch": 1.0310243380583044, "grad_norm": 1.2702410221099854, "learning_rate": 1.5269230743139828e-05, "loss": 0.7766, "step": 7710 }, { "epoch": 1.0311580636533833, "grad_norm": 1.3508851528167725, "learning_rate": 1.5268003629232423e-05, "loss": 0.705, "step": 7711 }, { "epoch": 1.031291789248462, "grad_norm": 1.0799938440322876, "learning_rate": 1.5266776405518523e-05, "loss": 0.6907, "step": 7712 }, { "epoch": 1.031425514843541, "grad_norm": 1.0558677911758423, "learning_rate": 1.5265549072023705e-05, "loss": 0.6693, "step": 7713 }, { "epoch": 1.03155924043862, "grad_norm": 1.340999722480774, "learning_rate": 1.526432162877356e-05, "loss": 0.8498, "step": 7714 }, { "epoch": 1.0316929660336989, "grad_norm": 1.1497763395309448, "learning_rate": 1.5263094075793667e-05, "loss": 0.7542, "step": 7715 }, { "epoch": 1.0318266916287777, "grad_norm": 1.188692569732666, "learning_rate": 1.526186641310961e-05, "loss": 0.7285, "step": 7716 }, { "epoch": 1.0319604172238566, "grad_norm": 1.1685937643051147, "learning_rate": 1.526063864074699e-05, "loss": 0.7877, "step": 7717 }, { "epoch": 1.0320941428189356, "grad_norm": 0.9850782155990601, "learning_rate": 1.5259410758731384e-05, "loss": 0.6436, "step": 7718 }, { "epoch": 1.0322278684140145, "grad_norm": 1.1593655347824097, "learning_rate": 1.5258182767088397e-05, "loss": 0.7555, "step": 7719 }, { "epoch": 1.0323615940090933, "grad_norm": 1.1131411790847778, "learning_rate": 1.5256954665843622e-05, "loss": 0.6778, "step": 7720 }, { "epoch": 1.0324953196041722, "grad_norm": 1.0048941373825073, "learning_rate": 1.5255726455022655e-05, "loss": 0.7417, "step": 7721 }, { "epoch": 1.032629045199251, "grad_norm": 1.0125313997268677, "learning_rate": 1.5254498134651102e-05, "loss": 0.7162, "step": 7722 }, { "epoch": 1.03276277079433, "grad_norm": 1.1664342880249023, "learning_rate": 1.5253269704754564e-05, "loss": 0.775, "step": 7723 }, { "epoch": 1.032896496389409, "grad_norm": 1.1665126085281372, "learning_rate": 1.5252041165358642e-05, "loss": 0.7525, "step": 7724 }, { "epoch": 1.0330302219844878, "grad_norm": 1.1431002616882324, "learning_rate": 1.5250812516488949e-05, "loss": 0.8173, "step": 7725 }, { "epoch": 1.0331639475795666, "grad_norm": 1.1822288036346436, "learning_rate": 1.5249583758171094e-05, "loss": 0.7813, "step": 7726 }, { "epoch": 1.0332976731746457, "grad_norm": 1.2135124206542969, "learning_rate": 1.5248354890430693e-05, "loss": 0.8046, "step": 7727 }, { "epoch": 1.0334313987697246, "grad_norm": 1.020592212677002, "learning_rate": 1.524712591329335e-05, "loss": 0.7193, "step": 7728 }, { "epoch": 1.0335651243648034, "grad_norm": 1.0275360345840454, "learning_rate": 1.5245896826784689e-05, "loss": 0.7367, "step": 7729 }, { "epoch": 1.0336988499598823, "grad_norm": 1.2109256982803345, "learning_rate": 1.5244667630930332e-05, "loss": 0.7077, "step": 7730 }, { "epoch": 1.0338325755549613, "grad_norm": 1.0550034046173096, "learning_rate": 1.5243438325755894e-05, "loss": 0.7654, "step": 7731 }, { "epoch": 1.0339663011500402, "grad_norm": 1.1888256072998047, "learning_rate": 1.5242208911287005e-05, "loss": 0.7312, "step": 7732 }, { "epoch": 1.034100026745119, "grad_norm": 1.3184868097305298, "learning_rate": 1.5240979387549284e-05, "loss": 0.9253, "step": 7733 }, { "epoch": 1.0342337523401979, "grad_norm": 1.164965033531189, "learning_rate": 1.5239749754568362e-05, "loss": 0.7321, "step": 7734 }, { "epoch": 1.0343674779352767, "grad_norm": 1.0995858907699585, "learning_rate": 1.5238520012369872e-05, "loss": 0.7279, "step": 7735 }, { "epoch": 1.0345012035303558, "grad_norm": 1.1932474374771118, "learning_rate": 1.5237290160979448e-05, "loss": 0.8036, "step": 7736 }, { "epoch": 1.0346349291254346, "grad_norm": 1.1842924356460571, "learning_rate": 1.523606020042272e-05, "loss": 0.7739, "step": 7737 }, { "epoch": 1.0347686547205135, "grad_norm": 1.1720792055130005, "learning_rate": 1.5234830130725325e-05, "loss": 0.823, "step": 7738 }, { "epoch": 1.0349023803155923, "grad_norm": 1.2058141231536865, "learning_rate": 1.5233599951912905e-05, "loss": 0.7501, "step": 7739 }, { "epoch": 1.0350361059106712, "grad_norm": 1.0340498685836792, "learning_rate": 1.5232369664011106e-05, "loss": 0.7775, "step": 7740 }, { "epoch": 1.0351698315057503, "grad_norm": 1.2109304666519165, "learning_rate": 1.5231139267045567e-05, "loss": 0.7607, "step": 7741 }, { "epoch": 1.035303557100829, "grad_norm": 1.3134193420410156, "learning_rate": 1.5229908761041934e-05, "loss": 0.8178, "step": 7742 }, { "epoch": 1.035437282695908, "grad_norm": 1.170034408569336, "learning_rate": 1.5228678146025856e-05, "loss": 0.7065, "step": 7743 }, { "epoch": 1.0355710082909868, "grad_norm": 1.1636693477630615, "learning_rate": 1.5227447422022991e-05, "loss": 0.6384, "step": 7744 }, { "epoch": 1.0357047338860659, "grad_norm": 1.1211869716644287, "learning_rate": 1.5226216589058982e-05, "loss": 0.7299, "step": 7745 }, { "epoch": 1.0358384594811447, "grad_norm": 1.2007683515548706, "learning_rate": 1.5224985647159489e-05, "loss": 0.826, "step": 7746 }, { "epoch": 1.0359721850762236, "grad_norm": 1.2099323272705078, "learning_rate": 1.5223754596350171e-05, "loss": 0.8739, "step": 7747 }, { "epoch": 1.0361059106713024, "grad_norm": 1.0444656610488892, "learning_rate": 1.5222523436656689e-05, "loss": 0.6837, "step": 7748 }, { "epoch": 1.0362396362663815, "grad_norm": 1.2492817640304565, "learning_rate": 1.5221292168104702e-05, "loss": 0.7844, "step": 7749 }, { "epoch": 1.0363733618614603, "grad_norm": 1.225478172302246, "learning_rate": 1.5220060790719875e-05, "loss": 0.7288, "step": 7750 }, { "epoch": 1.0365070874565392, "grad_norm": 1.169121265411377, "learning_rate": 1.5218829304527875e-05, "loss": 0.7512, "step": 7751 }, { "epoch": 1.036640813051618, "grad_norm": 1.0508670806884766, "learning_rate": 1.5217597709554377e-05, "loss": 0.7318, "step": 7752 }, { "epoch": 1.0367745386466969, "grad_norm": 1.0857781171798706, "learning_rate": 1.5216366005825043e-05, "loss": 0.7097, "step": 7753 }, { "epoch": 1.036908264241776, "grad_norm": 1.1035232543945312, "learning_rate": 1.521513419336555e-05, "loss": 0.6957, "step": 7754 }, { "epoch": 1.0370419898368548, "grad_norm": 1.1231297254562378, "learning_rate": 1.5213902272201577e-05, "loss": 0.7067, "step": 7755 }, { "epoch": 1.0371757154319337, "grad_norm": 1.0912600755691528, "learning_rate": 1.52126702423588e-05, "loss": 0.7353, "step": 7756 }, { "epoch": 1.0373094410270125, "grad_norm": 1.0572762489318848, "learning_rate": 1.52114381038629e-05, "loss": 0.6715, "step": 7757 }, { "epoch": 1.0374431666220916, "grad_norm": 1.1576606035232544, "learning_rate": 1.5210205856739561e-05, "loss": 0.6999, "step": 7758 }, { "epoch": 1.0375768922171704, "grad_norm": 1.1035009622573853, "learning_rate": 1.5208973501014466e-05, "loss": 0.7705, "step": 7759 }, { "epoch": 1.0377106178122493, "grad_norm": 1.160325050354004, "learning_rate": 1.5207741036713304e-05, "loss": 0.728, "step": 7760 }, { "epoch": 1.0378443434073281, "grad_norm": 1.1255453824996948, "learning_rate": 1.5206508463861759e-05, "loss": 0.7592, "step": 7761 }, { "epoch": 1.037978069002407, "grad_norm": 1.2259798049926758, "learning_rate": 1.520527578248553e-05, "loss": 0.7424, "step": 7762 }, { "epoch": 1.038111794597486, "grad_norm": 1.0097345113754272, "learning_rate": 1.5204042992610308e-05, "loss": 0.6897, "step": 7763 }, { "epoch": 1.0382455201925649, "grad_norm": 1.0380107164382935, "learning_rate": 1.520281009426179e-05, "loss": 0.6248, "step": 7764 }, { "epoch": 1.0383792457876437, "grad_norm": 1.164419174194336, "learning_rate": 1.5201577087465673e-05, "loss": 0.713, "step": 7765 }, { "epoch": 1.0385129713827226, "grad_norm": 1.202943205833435, "learning_rate": 1.520034397224766e-05, "loss": 0.6972, "step": 7766 }, { "epoch": 1.0386466969778017, "grad_norm": 1.2013190984725952, "learning_rate": 1.5199110748633452e-05, "loss": 0.7052, "step": 7767 }, { "epoch": 1.0387804225728805, "grad_norm": 1.022189974784851, "learning_rate": 1.5197877416648757e-05, "loss": 0.6821, "step": 7768 }, { "epoch": 1.0389141481679594, "grad_norm": 1.1417109966278076, "learning_rate": 1.5196643976319281e-05, "loss": 0.7354, "step": 7769 }, { "epoch": 1.0390478737630382, "grad_norm": 1.1212451457977295, "learning_rate": 1.519541042767073e-05, "loss": 0.7125, "step": 7770 }, { "epoch": 1.039181599358117, "grad_norm": 1.1309548616409302, "learning_rate": 1.5194176770728826e-05, "loss": 0.7927, "step": 7771 }, { "epoch": 1.0393153249531961, "grad_norm": 1.1652313470840454, "learning_rate": 1.5192943005519274e-05, "loss": 0.7471, "step": 7772 }, { "epoch": 1.039449050548275, "grad_norm": 1.1374988555908203, "learning_rate": 1.5191709132067795e-05, "loss": 0.7548, "step": 7773 }, { "epoch": 1.0395827761433538, "grad_norm": 1.2150187492370605, "learning_rate": 1.5190475150400107e-05, "loss": 0.7564, "step": 7774 }, { "epoch": 1.0397165017384327, "grad_norm": 1.0521843433380127, "learning_rate": 1.5189241060541928e-05, "loss": 0.6141, "step": 7775 }, { "epoch": 1.0398502273335117, "grad_norm": 1.165489912033081, "learning_rate": 1.5188006862518992e-05, "loss": 0.7199, "step": 7776 }, { "epoch": 1.0399839529285906, "grad_norm": 1.1734412908554077, "learning_rate": 1.5186772556357012e-05, "loss": 0.7287, "step": 7777 }, { "epoch": 1.0401176785236694, "grad_norm": 1.1646074056625366, "learning_rate": 1.5185538142081721e-05, "loss": 0.77, "step": 7778 }, { "epoch": 1.0402514041187483, "grad_norm": 1.2943426370620728, "learning_rate": 1.5184303619718852e-05, "loss": 0.9134, "step": 7779 }, { "epoch": 1.0403851297138271, "grad_norm": 1.1749294996261597, "learning_rate": 1.5183068989294133e-05, "loss": 0.767, "step": 7780 }, { "epoch": 1.0405188553089062, "grad_norm": 1.081661581993103, "learning_rate": 1.51818342508333e-05, "loss": 0.7161, "step": 7781 }, { "epoch": 1.040652580903985, "grad_norm": 1.343120813369751, "learning_rate": 1.5180599404362093e-05, "loss": 0.7882, "step": 7782 }, { "epoch": 1.040786306499064, "grad_norm": 1.3312262296676636, "learning_rate": 1.5179364449906246e-05, "loss": 0.794, "step": 7783 }, { "epoch": 1.0409200320941427, "grad_norm": 1.0863099098205566, "learning_rate": 1.5178129387491507e-05, "loss": 0.7671, "step": 7784 }, { "epoch": 1.0410537576892218, "grad_norm": 1.0764286518096924, "learning_rate": 1.5176894217143617e-05, "loss": 0.6912, "step": 7785 }, { "epoch": 1.0411874832843007, "grad_norm": 1.2228187322616577, "learning_rate": 1.5175658938888313e-05, "loss": 0.8214, "step": 7786 }, { "epoch": 1.0413212088793795, "grad_norm": 1.062856674194336, "learning_rate": 1.5174423552751356e-05, "loss": 0.7335, "step": 7787 }, { "epoch": 1.0414549344744584, "grad_norm": 1.2317777872085571, "learning_rate": 1.5173188058758492e-05, "loss": 0.8234, "step": 7788 }, { "epoch": 1.0415886600695372, "grad_norm": 1.2182952165603638, "learning_rate": 1.5171952456935471e-05, "loss": 0.6809, "step": 7789 }, { "epoch": 1.0417223856646163, "grad_norm": 1.1359412670135498, "learning_rate": 1.5170716747308052e-05, "loss": 0.7095, "step": 7790 }, { "epoch": 1.0418561112596951, "grad_norm": 1.0818493366241455, "learning_rate": 1.516948092990199e-05, "loss": 0.7312, "step": 7791 }, { "epoch": 1.041989836854774, "grad_norm": 1.005029559135437, "learning_rate": 1.5168245004743045e-05, "loss": 0.8068, "step": 7792 }, { "epoch": 1.0421235624498528, "grad_norm": 1.084871768951416, "learning_rate": 1.5167008971856977e-05, "loss": 0.7285, "step": 7793 }, { "epoch": 1.042257288044932, "grad_norm": 1.1375077962875366, "learning_rate": 1.5165772831269547e-05, "loss": 0.7741, "step": 7794 }, { "epoch": 1.0423910136400107, "grad_norm": 1.1234840154647827, "learning_rate": 1.516453658300653e-05, "loss": 0.8046, "step": 7795 }, { "epoch": 1.0425247392350896, "grad_norm": 1.2515041828155518, "learning_rate": 1.5163300227093691e-05, "loss": 0.846, "step": 7796 }, { "epoch": 1.0426584648301684, "grad_norm": 1.2193487882614136, "learning_rate": 1.51620637635568e-05, "loss": 0.7578, "step": 7797 }, { "epoch": 1.0427921904252473, "grad_norm": 1.0596857070922852, "learning_rate": 1.5160827192421628e-05, "loss": 0.6683, "step": 7798 }, { "epoch": 1.0429259160203264, "grad_norm": 1.1342637538909912, "learning_rate": 1.5159590513713952e-05, "loss": 0.6882, "step": 7799 }, { "epoch": 1.0430596416154052, "grad_norm": 1.1775076389312744, "learning_rate": 1.5158353727459548e-05, "loss": 0.838, "step": 7800 }, { "epoch": 1.043193367210484, "grad_norm": 1.1239255666732788, "learning_rate": 1.5157116833684196e-05, "loss": 0.7315, "step": 7801 }, { "epoch": 1.043327092805563, "grad_norm": 1.1853537559509277, "learning_rate": 1.5155879832413678e-05, "loss": 0.7447, "step": 7802 }, { "epoch": 1.043460818400642, "grad_norm": 1.15453040599823, "learning_rate": 1.515464272367378e-05, "loss": 0.7479, "step": 7803 }, { "epoch": 1.0435945439957208, "grad_norm": 1.0728952884674072, "learning_rate": 1.5153405507490288e-05, "loss": 0.722, "step": 7804 }, { "epoch": 1.0437282695907997, "grad_norm": 1.1377720832824707, "learning_rate": 1.5152168183888987e-05, "loss": 0.6686, "step": 7805 }, { "epoch": 1.0438619951858785, "grad_norm": 1.210998296737671, "learning_rate": 1.515093075289567e-05, "loss": 0.6818, "step": 7806 }, { "epoch": 1.0439957207809574, "grad_norm": 1.1565691232681274, "learning_rate": 1.5149693214536131e-05, "loss": 0.748, "step": 7807 }, { "epoch": 1.0441294463760364, "grad_norm": 0.973939061164856, "learning_rate": 1.514845556883617e-05, "loss": 0.6781, "step": 7808 }, { "epoch": 1.0442631719711153, "grad_norm": 1.0522398948669434, "learning_rate": 1.5147217815821571e-05, "loss": 0.6567, "step": 7809 }, { "epoch": 1.0443968975661941, "grad_norm": 1.0849666595458984, "learning_rate": 1.5145979955518147e-05, "loss": 0.6557, "step": 7810 }, { "epoch": 1.044530623161273, "grad_norm": 1.2479661703109741, "learning_rate": 1.5144741987951692e-05, "loss": 0.7564, "step": 7811 }, { "epoch": 1.044664348756352, "grad_norm": 1.0706498622894287, "learning_rate": 1.5143503913148017e-05, "loss": 0.7227, "step": 7812 }, { "epoch": 1.044798074351431, "grad_norm": 1.3072758913040161, "learning_rate": 1.514226573113292e-05, "loss": 0.7438, "step": 7813 }, { "epoch": 1.0449317999465098, "grad_norm": 1.2006278038024902, "learning_rate": 1.5141027441932217e-05, "loss": 0.7254, "step": 7814 }, { "epoch": 1.0450655255415886, "grad_norm": 1.2566536664962769, "learning_rate": 1.5139789045571718e-05, "loss": 0.7583, "step": 7815 }, { "epoch": 1.0451992511366675, "grad_norm": 1.1450823545455933, "learning_rate": 1.5138550542077233e-05, "loss": 0.7164, "step": 7816 }, { "epoch": 1.0453329767317465, "grad_norm": 1.298461675643921, "learning_rate": 1.5137311931474582e-05, "loss": 0.663, "step": 7817 }, { "epoch": 1.0454667023268254, "grad_norm": 1.2287315130233765, "learning_rate": 1.5136073213789574e-05, "loss": 0.6659, "step": 7818 }, { "epoch": 1.0456004279219042, "grad_norm": 1.0969955921173096, "learning_rate": 1.5134834389048036e-05, "loss": 0.6467, "step": 7819 }, { "epoch": 1.045734153516983, "grad_norm": 1.0878665447235107, "learning_rate": 1.513359545727579e-05, "loss": 0.7212, "step": 7820 }, { "epoch": 1.0458678791120621, "grad_norm": 1.1976404190063477, "learning_rate": 1.5132356418498661e-05, "loss": 0.7916, "step": 7821 }, { "epoch": 1.046001604707141, "grad_norm": 1.1810678243637085, "learning_rate": 1.513111727274247e-05, "loss": 0.7156, "step": 7822 }, { "epoch": 1.0461353303022198, "grad_norm": 1.1508187055587769, "learning_rate": 1.5129878020033051e-05, "loss": 0.7306, "step": 7823 }, { "epoch": 1.0462690558972987, "grad_norm": 1.2293113470077515, "learning_rate": 1.5128638660396234e-05, "loss": 0.7661, "step": 7824 }, { "epoch": 1.0464027814923775, "grad_norm": 1.0373139381408691, "learning_rate": 1.512739919385785e-05, "loss": 0.6897, "step": 7825 }, { "epoch": 1.0465365070874566, "grad_norm": 1.0706820487976074, "learning_rate": 1.5126159620443738e-05, "loss": 0.7035, "step": 7826 }, { "epoch": 1.0466702326825355, "grad_norm": 1.0648683309555054, "learning_rate": 1.5124919940179732e-05, "loss": 0.6626, "step": 7827 }, { "epoch": 1.0468039582776143, "grad_norm": 1.1726741790771484, "learning_rate": 1.5123680153091675e-05, "loss": 0.7532, "step": 7828 }, { "epoch": 1.0469376838726931, "grad_norm": 1.1182183027267456, "learning_rate": 1.5122440259205408e-05, "loss": 0.6436, "step": 7829 }, { "epoch": 1.0470714094677722, "grad_norm": 1.0258846282958984, "learning_rate": 1.5121200258546778e-05, "loss": 0.6762, "step": 7830 }, { "epoch": 1.047205135062851, "grad_norm": 1.1166654825210571, "learning_rate": 1.5119960151141627e-05, "loss": 0.7016, "step": 7831 }, { "epoch": 1.04733886065793, "grad_norm": 1.0509123802185059, "learning_rate": 1.5118719937015805e-05, "loss": 0.7406, "step": 7832 }, { "epoch": 1.0474725862530088, "grad_norm": 1.086255431175232, "learning_rate": 1.5117479616195163e-05, "loss": 0.7557, "step": 7833 }, { "epoch": 1.0476063118480878, "grad_norm": 1.1505647897720337, "learning_rate": 1.5116239188705557e-05, "loss": 0.716, "step": 7834 }, { "epoch": 1.0477400374431667, "grad_norm": 1.1314702033996582, "learning_rate": 1.511499865457284e-05, "loss": 0.7549, "step": 7835 }, { "epoch": 1.0478737630382455, "grad_norm": 1.0411655902862549, "learning_rate": 1.511375801382287e-05, "loss": 0.7928, "step": 7836 }, { "epoch": 1.0480074886333244, "grad_norm": 1.027052402496338, "learning_rate": 1.5112517266481513e-05, "loss": 0.6924, "step": 7837 }, { "epoch": 1.0481412142284032, "grad_norm": 1.249130368232727, "learning_rate": 1.511127641257462e-05, "loss": 0.7465, "step": 7838 }, { "epoch": 1.0482749398234823, "grad_norm": 1.0493065118789673, "learning_rate": 1.511003545212806e-05, "loss": 0.7807, "step": 7839 }, { "epoch": 1.0484086654185611, "grad_norm": 1.100597858428955, "learning_rate": 1.5108794385167703e-05, "loss": 0.6785, "step": 7840 }, { "epoch": 1.04854239101364, "grad_norm": 1.1343402862548828, "learning_rate": 1.5107553211719416e-05, "loss": 0.7264, "step": 7841 }, { "epoch": 1.0486761166087188, "grad_norm": 1.166481852531433, "learning_rate": 1.510631193180907e-05, "loss": 0.707, "step": 7842 }, { "epoch": 1.0488098422037977, "grad_norm": 1.2873793840408325, "learning_rate": 1.5105070545462538e-05, "loss": 0.7043, "step": 7843 }, { "epoch": 1.0489435677988768, "grad_norm": 1.241848111152649, "learning_rate": 1.5103829052705697e-05, "loss": 0.7241, "step": 7844 }, { "epoch": 1.0490772933939556, "grad_norm": 1.2145333290100098, "learning_rate": 1.510258745356442e-05, "loss": 0.7788, "step": 7845 }, { "epoch": 1.0492110189890345, "grad_norm": 1.0562639236450195, "learning_rate": 1.5101345748064593e-05, "loss": 0.6481, "step": 7846 }, { "epoch": 1.0493447445841133, "grad_norm": 1.022594928741455, "learning_rate": 1.510010393623209e-05, "loss": 0.7426, "step": 7847 }, { "epoch": 1.0494784701791924, "grad_norm": 1.0815496444702148, "learning_rate": 1.5098862018092808e-05, "loss": 0.761, "step": 7848 }, { "epoch": 1.0496121957742712, "grad_norm": 1.113710880279541, "learning_rate": 1.5097619993672624e-05, "loss": 0.8225, "step": 7849 }, { "epoch": 1.04974592136935, "grad_norm": 1.1431163549423218, "learning_rate": 1.5096377862997428e-05, "loss": 0.7019, "step": 7850 }, { "epoch": 1.049879646964429, "grad_norm": 1.0646350383758545, "learning_rate": 1.5095135626093112e-05, "loss": 0.7537, "step": 7851 }, { "epoch": 1.050013372559508, "grad_norm": 1.1504433155059814, "learning_rate": 1.5093893282985565e-05, "loss": 0.696, "step": 7852 }, { "epoch": 1.0501470981545868, "grad_norm": 1.3994109630584717, "learning_rate": 1.5092650833700695e-05, "loss": 0.735, "step": 7853 }, { "epoch": 1.0502808237496657, "grad_norm": 1.2312335968017578, "learning_rate": 1.5091408278264388e-05, "loss": 0.7855, "step": 7854 }, { "epoch": 1.0504145493447445, "grad_norm": 1.2383403778076172, "learning_rate": 1.5090165616702548e-05, "loss": 0.7748, "step": 7855 }, { "epoch": 1.0505482749398234, "grad_norm": 1.2579381465911865, "learning_rate": 1.5088922849041075e-05, "loss": 0.7354, "step": 7856 }, { "epoch": 1.0506820005349025, "grad_norm": 1.356278419494629, "learning_rate": 1.5087679975305876e-05, "loss": 0.8059, "step": 7857 }, { "epoch": 1.0508157261299813, "grad_norm": 1.2162450551986694, "learning_rate": 1.5086436995522855e-05, "loss": 0.7977, "step": 7858 }, { "epoch": 1.0509494517250602, "grad_norm": 1.0081366300582886, "learning_rate": 1.508519390971792e-05, "loss": 0.6465, "step": 7859 }, { "epoch": 1.051083177320139, "grad_norm": 1.260623574256897, "learning_rate": 1.5083950717916991e-05, "loss": 0.7264, "step": 7860 }, { "epoch": 1.051216902915218, "grad_norm": 1.275819182395935, "learning_rate": 1.508270742014597e-05, "loss": 0.7399, "step": 7861 }, { "epoch": 1.051350628510297, "grad_norm": 1.0182470083236694, "learning_rate": 1.5081464016430775e-05, "loss": 0.6674, "step": 7862 }, { "epoch": 1.0514843541053758, "grad_norm": 1.3782334327697754, "learning_rate": 1.5080220506797327e-05, "loss": 0.7498, "step": 7863 }, { "epoch": 1.0516180797004546, "grad_norm": 1.2058110237121582, "learning_rate": 1.5078976891271544e-05, "loss": 0.7779, "step": 7864 }, { "epoch": 1.0517518052955335, "grad_norm": 1.1041940450668335, "learning_rate": 1.5077733169879346e-05, "loss": 0.8611, "step": 7865 }, { "epoch": 1.0518855308906125, "grad_norm": 1.1350985765457153, "learning_rate": 1.5076489342646659e-05, "loss": 0.8007, "step": 7866 }, { "epoch": 1.0520192564856914, "grad_norm": 1.2510885000228882, "learning_rate": 1.5075245409599411e-05, "loss": 0.7812, "step": 7867 }, { "epoch": 1.0521529820807702, "grad_norm": 1.117118000984192, "learning_rate": 1.5074001370763527e-05, "loss": 0.7637, "step": 7868 }, { "epoch": 1.052286707675849, "grad_norm": 1.0593831539154053, "learning_rate": 1.5072757226164942e-05, "loss": 0.6722, "step": 7869 }, { "epoch": 1.0524204332709282, "grad_norm": 1.0556825399398804, "learning_rate": 1.5071512975829588e-05, "loss": 0.6929, "step": 7870 }, { "epoch": 1.052554158866007, "grad_norm": 1.0788562297821045, "learning_rate": 1.5070268619783392e-05, "loss": 0.7828, "step": 7871 }, { "epoch": 1.0526878844610859, "grad_norm": 1.1442049741744995, "learning_rate": 1.5069024158052306e-05, "loss": 0.7238, "step": 7872 }, { "epoch": 1.0528216100561647, "grad_norm": 1.1112186908721924, "learning_rate": 1.5067779590662258e-05, "loss": 0.6995, "step": 7873 }, { "epoch": 1.0529553356512436, "grad_norm": 1.0672895908355713, "learning_rate": 1.5066534917639195e-05, "loss": 0.724, "step": 7874 }, { "epoch": 1.0530890612463226, "grad_norm": 0.9480273723602295, "learning_rate": 1.506529013900906e-05, "loss": 0.706, "step": 7875 }, { "epoch": 1.0532227868414015, "grad_norm": 1.1259511709213257, "learning_rate": 1.5064045254797797e-05, "loss": 0.8082, "step": 7876 }, { "epoch": 1.0533565124364803, "grad_norm": 1.2826861143112183, "learning_rate": 1.5062800265031358e-05, "loss": 0.7976, "step": 7877 }, { "epoch": 1.0534902380315592, "grad_norm": 1.0068809986114502, "learning_rate": 1.506155516973569e-05, "loss": 0.6445, "step": 7878 }, { "epoch": 1.0536239636266382, "grad_norm": 1.1160383224487305, "learning_rate": 1.5060309968936753e-05, "loss": 0.6528, "step": 7879 }, { "epoch": 1.053757689221717, "grad_norm": 1.177682638168335, "learning_rate": 1.5059064662660491e-05, "loss": 0.7247, "step": 7880 }, { "epoch": 1.053891414816796, "grad_norm": 1.1462388038635254, "learning_rate": 1.5057819250932872e-05, "loss": 0.7115, "step": 7881 }, { "epoch": 1.0540251404118748, "grad_norm": 1.2165483236312866, "learning_rate": 1.5056573733779848e-05, "loss": 0.82, "step": 7882 }, { "epoch": 1.0541588660069536, "grad_norm": 1.0892783403396606, "learning_rate": 1.5055328111227386e-05, "loss": 0.7073, "step": 7883 }, { "epoch": 1.0542925916020327, "grad_norm": 1.057525396347046, "learning_rate": 1.5054082383301441e-05, "loss": 0.7064, "step": 7884 }, { "epoch": 1.0544263171971116, "grad_norm": 1.0444414615631104, "learning_rate": 1.505283655002799e-05, "loss": 0.7184, "step": 7885 }, { "epoch": 1.0545600427921904, "grad_norm": 1.2299485206604004, "learning_rate": 1.5051590611432994e-05, "loss": 0.666, "step": 7886 }, { "epoch": 1.0546937683872692, "grad_norm": 1.2642645835876465, "learning_rate": 1.5050344567542425e-05, "loss": 0.8481, "step": 7887 }, { "epoch": 1.0548274939823483, "grad_norm": 1.0813515186309814, "learning_rate": 1.5049098418382257e-05, "loss": 0.6449, "step": 7888 }, { "epoch": 1.0549612195774272, "grad_norm": 1.2013095617294312, "learning_rate": 1.5047852163978464e-05, "loss": 0.7718, "step": 7889 }, { "epoch": 1.055094945172506, "grad_norm": 1.0395716428756714, "learning_rate": 1.5046605804357021e-05, "loss": 0.6732, "step": 7890 }, { "epoch": 1.0552286707675849, "grad_norm": 1.2394373416900635, "learning_rate": 1.5045359339543912e-05, "loss": 0.7364, "step": 7891 }, { "epoch": 1.0553623963626637, "grad_norm": 1.1406208276748657, "learning_rate": 1.5044112769565113e-05, "loss": 0.8007, "step": 7892 }, { "epoch": 1.0554961219577428, "grad_norm": 1.0284279584884644, "learning_rate": 1.5042866094446615e-05, "loss": 0.7638, "step": 7893 }, { "epoch": 1.0556298475528216, "grad_norm": 1.0506573915481567, "learning_rate": 1.5041619314214396e-05, "loss": 0.692, "step": 7894 }, { "epoch": 1.0557635731479005, "grad_norm": 1.2404394149780273, "learning_rate": 1.5040372428894446e-05, "loss": 0.6897, "step": 7895 }, { "epoch": 1.0558972987429793, "grad_norm": 1.128794550895691, "learning_rate": 1.5039125438512755e-05, "loss": 0.7774, "step": 7896 }, { "epoch": 1.0560310243380584, "grad_norm": 1.0648314952850342, "learning_rate": 1.5037878343095319e-05, "loss": 0.6927, "step": 7897 }, { "epoch": 1.0561647499331372, "grad_norm": 1.1216869354248047, "learning_rate": 1.5036631142668125e-05, "loss": 0.7425, "step": 7898 }, { "epoch": 1.056298475528216, "grad_norm": 0.9986578226089478, "learning_rate": 1.5035383837257178e-05, "loss": 0.6363, "step": 7899 }, { "epoch": 1.056432201123295, "grad_norm": 1.1233991384506226, "learning_rate": 1.5034136426888472e-05, "loss": 0.739, "step": 7900 }, { "epoch": 1.0565659267183738, "grad_norm": 1.2780122756958008, "learning_rate": 1.5032888911588008e-05, "loss": 0.7462, "step": 7901 }, { "epoch": 1.0566996523134529, "grad_norm": 1.181603193283081, "learning_rate": 1.5031641291381793e-05, "loss": 0.7972, "step": 7902 }, { "epoch": 1.0568333779085317, "grad_norm": 1.107285737991333, "learning_rate": 1.5030393566295829e-05, "loss": 0.6787, "step": 7903 }, { "epoch": 1.0569671035036106, "grad_norm": 1.1563758850097656, "learning_rate": 1.5029145736356125e-05, "loss": 0.7457, "step": 7904 }, { "epoch": 1.0571008290986894, "grad_norm": 1.2809792757034302, "learning_rate": 1.5027897801588692e-05, "loss": 0.8484, "step": 7905 }, { "epoch": 1.0572345546937685, "grad_norm": 1.0872395038604736, "learning_rate": 1.5026649762019539e-05, "loss": 0.6895, "step": 7906 }, { "epoch": 1.0573682802888473, "grad_norm": 1.1085779666900635, "learning_rate": 1.5025401617674682e-05, "loss": 0.7008, "step": 7907 }, { "epoch": 1.0575020058839262, "grad_norm": 1.1455177068710327, "learning_rate": 1.5024153368580137e-05, "loss": 0.7664, "step": 7908 }, { "epoch": 1.057635731479005, "grad_norm": 1.0578618049621582, "learning_rate": 1.5022905014761921e-05, "loss": 0.6531, "step": 7909 }, { "epoch": 1.0577694570740839, "grad_norm": 1.330992341041565, "learning_rate": 1.5021656556246056e-05, "loss": 0.7683, "step": 7910 }, { "epoch": 1.057903182669163, "grad_norm": 1.0405502319335938, "learning_rate": 1.5020407993058568e-05, "loss": 0.6146, "step": 7911 }, { "epoch": 1.0580369082642418, "grad_norm": 1.152622938156128, "learning_rate": 1.5019159325225476e-05, "loss": 0.7217, "step": 7912 }, { "epoch": 1.0581706338593206, "grad_norm": 1.235860824584961, "learning_rate": 1.5017910552772813e-05, "loss": 0.665, "step": 7913 }, { "epoch": 1.0583043594543995, "grad_norm": 1.179604172706604, "learning_rate": 1.501666167572661e-05, "loss": 0.7759, "step": 7914 }, { "epoch": 1.0584380850494786, "grad_norm": 1.1920925378799438, "learning_rate": 1.501541269411289e-05, "loss": 0.7098, "step": 7915 }, { "epoch": 1.0585718106445574, "grad_norm": 1.165855050086975, "learning_rate": 1.5014163607957691e-05, "loss": 0.7255, "step": 7916 }, { "epoch": 1.0587055362396363, "grad_norm": 1.313413381576538, "learning_rate": 1.501291441728705e-05, "loss": 0.7119, "step": 7917 }, { "epoch": 1.058839261834715, "grad_norm": 1.2340948581695557, "learning_rate": 1.5011665122127008e-05, "loss": 0.7101, "step": 7918 }, { "epoch": 1.0589729874297942, "grad_norm": 1.2666867971420288, "learning_rate": 1.5010415722503599e-05, "loss": 0.7976, "step": 7919 }, { "epoch": 1.059106713024873, "grad_norm": 1.1065138578414917, "learning_rate": 1.500916621844287e-05, "loss": 0.7337, "step": 7920 }, { "epoch": 1.0592404386199519, "grad_norm": 1.1073086261749268, "learning_rate": 1.5007916609970864e-05, "loss": 0.7428, "step": 7921 }, { "epoch": 1.0593741642150307, "grad_norm": 1.1413471698760986, "learning_rate": 1.5006666897113632e-05, "loss": 0.7266, "step": 7922 }, { "epoch": 1.0595078898101096, "grad_norm": 1.0360164642333984, "learning_rate": 1.5005417079897213e-05, "loss": 0.7063, "step": 7923 }, { "epoch": 1.0596416154051886, "grad_norm": 1.0270274877548218, "learning_rate": 1.5004167158347667e-05, "loss": 0.6272, "step": 7924 }, { "epoch": 1.0597753410002675, "grad_norm": 1.1838332414627075, "learning_rate": 1.5002917132491047e-05, "loss": 0.7247, "step": 7925 }, { "epoch": 1.0599090665953463, "grad_norm": 1.1557414531707764, "learning_rate": 1.5001667002353407e-05, "loss": 0.8115, "step": 7926 }, { "epoch": 1.0600427921904252, "grad_norm": 1.0948350429534912, "learning_rate": 1.5000416767960802e-05, "loss": 0.6706, "step": 7927 }, { "epoch": 1.060176517785504, "grad_norm": 1.0772641897201538, "learning_rate": 1.4999166429339296e-05, "loss": 0.7422, "step": 7928 }, { "epoch": 1.060310243380583, "grad_norm": 1.1993083953857422, "learning_rate": 1.4997915986514945e-05, "loss": 0.7016, "step": 7929 }, { "epoch": 1.060443968975662, "grad_norm": 1.3113874197006226, "learning_rate": 1.4996665439513825e-05, "loss": 0.8047, "step": 7930 }, { "epoch": 1.0605776945707408, "grad_norm": 0.9327312707901001, "learning_rate": 1.4995414788361991e-05, "loss": 0.6063, "step": 7931 }, { "epoch": 1.0607114201658197, "grad_norm": 1.0853824615478516, "learning_rate": 1.4994164033085516e-05, "loss": 0.7428, "step": 7932 }, { "epoch": 1.0608451457608987, "grad_norm": 1.1677602529525757, "learning_rate": 1.4992913173710471e-05, "loss": 0.7379, "step": 7933 }, { "epoch": 1.0609788713559776, "grad_norm": 1.313225507736206, "learning_rate": 1.4991662210262929e-05, "loss": 0.7372, "step": 7934 }, { "epoch": 1.0611125969510564, "grad_norm": 1.1834877729415894, "learning_rate": 1.4990411142768963e-05, "loss": 0.6436, "step": 7935 }, { "epoch": 1.0612463225461353, "grad_norm": 1.192625880241394, "learning_rate": 1.4989159971254652e-05, "loss": 0.7257, "step": 7936 }, { "epoch": 1.0613800481412143, "grad_norm": 1.0600441694259644, "learning_rate": 1.4987908695746078e-05, "loss": 0.7332, "step": 7937 }, { "epoch": 1.0615137737362932, "grad_norm": 1.1197410821914673, "learning_rate": 1.498665731626932e-05, "loss": 0.6785, "step": 7938 }, { "epoch": 1.061647499331372, "grad_norm": 1.2340052127838135, "learning_rate": 1.4985405832850462e-05, "loss": 0.6885, "step": 7939 }, { "epoch": 1.0617812249264509, "grad_norm": 1.1823999881744385, "learning_rate": 1.4984154245515587e-05, "loss": 0.7914, "step": 7940 }, { "epoch": 1.0619149505215297, "grad_norm": 1.3604391813278198, "learning_rate": 1.4982902554290787e-05, "loss": 0.7062, "step": 7941 }, { "epoch": 1.0620486761166088, "grad_norm": 1.12320876121521, "learning_rate": 1.4981650759202154e-05, "loss": 0.703, "step": 7942 }, { "epoch": 1.0621824017116877, "grad_norm": 1.0019127130508423, "learning_rate": 1.4980398860275775e-05, "loss": 0.6569, "step": 7943 }, { "epoch": 1.0623161273067665, "grad_norm": 1.2252477407455444, "learning_rate": 1.497914685753775e-05, "loss": 0.7438, "step": 7944 }, { "epoch": 1.0624498529018453, "grad_norm": 1.2669659852981567, "learning_rate": 1.4977894751014171e-05, "loss": 0.8165, "step": 7945 }, { "epoch": 1.0625835784969242, "grad_norm": 1.1917272806167603, "learning_rate": 1.497664254073114e-05, "loss": 0.7416, "step": 7946 }, { "epoch": 1.0627173040920033, "grad_norm": 1.1570379734039307, "learning_rate": 1.4975390226714762e-05, "loss": 0.7177, "step": 7947 }, { "epoch": 1.0628510296870821, "grad_norm": 1.1439061164855957, "learning_rate": 1.4974137808991128e-05, "loss": 0.6859, "step": 7948 }, { "epoch": 1.062984755282161, "grad_norm": 1.3195852041244507, "learning_rate": 1.4972885287586353e-05, "loss": 0.8295, "step": 7949 }, { "epoch": 1.0631184808772398, "grad_norm": 1.20064115524292, "learning_rate": 1.4971632662526545e-05, "loss": 0.7172, "step": 7950 }, { "epoch": 1.0632522064723189, "grad_norm": 1.0977063179016113, "learning_rate": 1.4970379933837811e-05, "loss": 0.7198, "step": 7951 }, { "epoch": 1.0633859320673977, "grad_norm": 1.128766417503357, "learning_rate": 1.4969127101546263e-05, "loss": 0.7118, "step": 7952 }, { "epoch": 1.0635196576624766, "grad_norm": 1.0214426517486572, "learning_rate": 1.4967874165678016e-05, "loss": 0.6831, "step": 7953 }, { "epoch": 1.0636533832575554, "grad_norm": 1.2549512386322021, "learning_rate": 1.4966621126259184e-05, "loss": 0.7759, "step": 7954 }, { "epoch": 1.0637871088526345, "grad_norm": 1.146721363067627, "learning_rate": 1.4965367983315889e-05, "loss": 0.7533, "step": 7955 }, { "epoch": 1.0639208344477133, "grad_norm": 1.2159944772720337, "learning_rate": 1.4964114736874249e-05, "loss": 0.7325, "step": 7956 }, { "epoch": 1.0640545600427922, "grad_norm": 1.1576440334320068, "learning_rate": 1.4962861386960389e-05, "loss": 0.7394, "step": 7957 }, { "epoch": 1.064188285637871, "grad_norm": 1.2468523979187012, "learning_rate": 1.4961607933600431e-05, "loss": 0.7831, "step": 7958 }, { "epoch": 1.06432201123295, "grad_norm": 1.1124165058135986, "learning_rate": 1.4960354376820503e-05, "loss": 0.6631, "step": 7959 }, { "epoch": 1.064455736828029, "grad_norm": 1.00448477268219, "learning_rate": 1.4959100716646733e-05, "loss": 0.695, "step": 7960 }, { "epoch": 1.0645894624231078, "grad_norm": 0.9752576351165771, "learning_rate": 1.4957846953105257e-05, "loss": 0.6546, "step": 7961 }, { "epoch": 1.0647231880181867, "grad_norm": 1.0262619256973267, "learning_rate": 1.4956593086222204e-05, "loss": 0.7418, "step": 7962 }, { "epoch": 1.0648569136132655, "grad_norm": 1.0726574659347534, "learning_rate": 1.495533911602371e-05, "loss": 0.6765, "step": 7963 }, { "epoch": 1.0649906392083446, "grad_norm": 1.15571928024292, "learning_rate": 1.4954085042535915e-05, "loss": 0.7203, "step": 7964 }, { "epoch": 1.0651243648034234, "grad_norm": 1.2726913690567017, "learning_rate": 1.4952830865784958e-05, "loss": 0.7127, "step": 7965 }, { "epoch": 1.0652580903985023, "grad_norm": 1.2412673234939575, "learning_rate": 1.4951576585796984e-05, "loss": 0.7437, "step": 7966 }, { "epoch": 1.0653918159935811, "grad_norm": 1.3505381345748901, "learning_rate": 1.495032220259813e-05, "loss": 0.816, "step": 7967 }, { "epoch": 1.06552554158866, "grad_norm": 1.26339852809906, "learning_rate": 1.4949067716214545e-05, "loss": 0.8216, "step": 7968 }, { "epoch": 1.065659267183739, "grad_norm": 1.1283643245697021, "learning_rate": 1.4947813126672381e-05, "loss": 0.736, "step": 7969 }, { "epoch": 1.065792992778818, "grad_norm": 1.1084765195846558, "learning_rate": 1.4946558433997792e-05, "loss": 0.7229, "step": 7970 }, { "epoch": 1.0659267183738967, "grad_norm": 1.316051721572876, "learning_rate": 1.494530363821692e-05, "loss": 0.8169, "step": 7971 }, { "epoch": 1.0660604439689756, "grad_norm": 1.1914138793945312, "learning_rate": 1.4944048739355928e-05, "loss": 0.7305, "step": 7972 }, { "epoch": 1.0661941695640547, "grad_norm": 1.071157693862915, "learning_rate": 1.4942793737440968e-05, "loss": 0.7483, "step": 7973 }, { "epoch": 1.0663278951591335, "grad_norm": 1.2871873378753662, "learning_rate": 1.4941538632498204e-05, "loss": 0.7611, "step": 7974 }, { "epoch": 1.0664616207542124, "grad_norm": 1.0527194738388062, "learning_rate": 1.49402834245538e-05, "loss": 0.7416, "step": 7975 }, { "epoch": 1.0665953463492912, "grad_norm": 1.2433918714523315, "learning_rate": 1.493902811363391e-05, "loss": 0.726, "step": 7976 }, { "epoch": 1.06672907194437, "grad_norm": 1.064510464668274, "learning_rate": 1.4937772699764707e-05, "loss": 0.6955, "step": 7977 }, { "epoch": 1.0668627975394491, "grad_norm": 1.1654877662658691, "learning_rate": 1.4936517182972359e-05, "loss": 0.8231, "step": 7978 }, { "epoch": 1.066996523134528, "grad_norm": 1.1321035623550415, "learning_rate": 1.493526156328303e-05, "loss": 0.7221, "step": 7979 }, { "epoch": 1.0671302487296068, "grad_norm": 1.1151471138000488, "learning_rate": 1.4934005840722896e-05, "loss": 0.6988, "step": 7980 }, { "epoch": 1.0672639743246857, "grad_norm": 1.085904836654663, "learning_rate": 1.4932750015318134e-05, "loss": 0.6893, "step": 7981 }, { "epoch": 1.0673976999197647, "grad_norm": 1.0934933423995972, "learning_rate": 1.493149408709492e-05, "loss": 0.7431, "step": 7982 }, { "epoch": 1.0675314255148436, "grad_norm": 1.1888419389724731, "learning_rate": 1.493023805607943e-05, "loss": 0.8035, "step": 7983 }, { "epoch": 1.0676651511099224, "grad_norm": 1.2238980531692505, "learning_rate": 1.4928981922297842e-05, "loss": 0.6947, "step": 7984 }, { "epoch": 1.0677988767050013, "grad_norm": 1.1276289224624634, "learning_rate": 1.4927725685776344e-05, "loss": 0.6843, "step": 7985 }, { "epoch": 1.0679326023000801, "grad_norm": 1.111707091331482, "learning_rate": 1.492646934654112e-05, "loss": 0.6745, "step": 7986 }, { "epoch": 1.0680663278951592, "grad_norm": 1.0623356103897095, "learning_rate": 1.4925212904618355e-05, "loss": 0.6081, "step": 7987 }, { "epoch": 1.068200053490238, "grad_norm": 1.073350191116333, "learning_rate": 1.4923956360034242e-05, "loss": 0.5927, "step": 7988 }, { "epoch": 1.068333779085317, "grad_norm": 1.1215358972549438, "learning_rate": 1.492269971281497e-05, "loss": 0.6867, "step": 7989 }, { "epoch": 1.0684675046803958, "grad_norm": 1.220745325088501, "learning_rate": 1.4921442962986732e-05, "loss": 0.7181, "step": 7990 }, { "epoch": 1.0686012302754748, "grad_norm": 1.0404924154281616, "learning_rate": 1.4920186110575728e-05, "loss": 0.7144, "step": 7991 }, { "epoch": 1.0687349558705537, "grad_norm": 1.3028619289398193, "learning_rate": 1.4918929155608148e-05, "loss": 0.726, "step": 7992 }, { "epoch": 1.0688686814656325, "grad_norm": 1.1932357549667358, "learning_rate": 1.4917672098110198e-05, "loss": 0.7601, "step": 7993 }, { "epoch": 1.0690024070607114, "grad_norm": 1.099233627319336, "learning_rate": 1.491641493810808e-05, "loss": 0.6799, "step": 7994 }, { "epoch": 1.0691361326557902, "grad_norm": 1.0862386226654053, "learning_rate": 1.4915157675627999e-05, "loss": 0.686, "step": 7995 }, { "epoch": 1.0692698582508693, "grad_norm": 1.113158941268921, "learning_rate": 1.4913900310696154e-05, "loss": 0.6449, "step": 7996 }, { "epoch": 1.0694035838459481, "grad_norm": 1.1517256498336792, "learning_rate": 1.4912642843338762e-05, "loss": 0.703, "step": 7997 }, { "epoch": 1.069537309441027, "grad_norm": 1.1889827251434326, "learning_rate": 1.4911385273582033e-05, "loss": 0.7619, "step": 7998 }, { "epoch": 1.0696710350361058, "grad_norm": 1.0371898412704468, "learning_rate": 1.4910127601452175e-05, "loss": 0.6199, "step": 7999 }, { "epoch": 1.069804760631185, "grad_norm": 1.2458781003952026, "learning_rate": 1.4908869826975404e-05, "loss": 0.7884, "step": 8000 }, { "epoch": 1.0699384862262638, "grad_norm": 1.1779608726501465, "learning_rate": 1.4907611950177943e-05, "loss": 0.7183, "step": 8001 }, { "epoch": 1.0700722118213426, "grad_norm": 1.0234830379486084, "learning_rate": 1.4906353971086004e-05, "loss": 0.6898, "step": 8002 }, { "epoch": 1.0702059374164214, "grad_norm": 1.1388604640960693, "learning_rate": 1.4905095889725814e-05, "loss": 0.6996, "step": 8003 }, { "epoch": 1.0703396630115003, "grad_norm": 1.0985702276229858, "learning_rate": 1.4903837706123591e-05, "loss": 0.7371, "step": 8004 }, { "epoch": 1.0704733886065794, "grad_norm": 1.0438344478607178, "learning_rate": 1.4902579420305564e-05, "loss": 0.6736, "step": 8005 }, { "epoch": 1.0706071142016582, "grad_norm": 1.1711899042129517, "learning_rate": 1.4901321032297964e-05, "loss": 0.7527, "step": 8006 }, { "epoch": 1.070740839796737, "grad_norm": 1.2331076860427856, "learning_rate": 1.4900062542127013e-05, "loss": 0.7944, "step": 8007 }, { "epoch": 1.070874565391816, "grad_norm": 1.193842887878418, "learning_rate": 1.4898803949818947e-05, "loss": 0.8149, "step": 8008 }, { "epoch": 1.071008290986895, "grad_norm": 1.1356539726257324, "learning_rate": 1.48975452554e-05, "loss": 0.7538, "step": 8009 }, { "epoch": 1.0711420165819738, "grad_norm": 1.2428690195083618, "learning_rate": 1.4896286458896411e-05, "loss": 0.8581, "step": 8010 }, { "epoch": 1.0712757421770527, "grad_norm": 1.129564642906189, "learning_rate": 1.4895027560334418e-05, "loss": 0.7538, "step": 8011 }, { "epoch": 1.0714094677721315, "grad_norm": 1.2056195735931396, "learning_rate": 1.4893768559740256e-05, "loss": 0.7199, "step": 8012 }, { "epoch": 1.0715431933672104, "grad_norm": 1.0560389757156372, "learning_rate": 1.4892509457140171e-05, "loss": 0.743, "step": 8013 }, { "epoch": 1.0716769189622894, "grad_norm": 1.0318557024002075, "learning_rate": 1.4891250252560408e-05, "loss": 0.6776, "step": 8014 }, { "epoch": 1.0718106445573683, "grad_norm": 1.140726089477539, "learning_rate": 1.4889990946027217e-05, "loss": 0.7679, "step": 8015 }, { "epoch": 1.0719443701524471, "grad_norm": 1.1712989807128906, "learning_rate": 1.4888731537566841e-05, "loss": 0.827, "step": 8016 }, { "epoch": 1.072078095747526, "grad_norm": 1.0172204971313477, "learning_rate": 1.4887472027205534e-05, "loss": 0.709, "step": 8017 }, { "epoch": 1.072211821342605, "grad_norm": 1.14127779006958, "learning_rate": 1.4886212414969551e-05, "loss": 0.7133, "step": 8018 }, { "epoch": 1.072345546937684, "grad_norm": 1.075136423110962, "learning_rate": 1.4884952700885145e-05, "loss": 0.7841, "step": 8019 }, { "epoch": 1.0724792725327628, "grad_norm": 1.0378270149230957, "learning_rate": 1.4883692884978574e-05, "loss": 0.6831, "step": 8020 }, { "epoch": 1.0726129981278416, "grad_norm": 1.1945972442626953, "learning_rate": 1.4882432967276099e-05, "loss": 0.7693, "step": 8021 }, { "epoch": 1.0727467237229207, "grad_norm": 1.317657232284546, "learning_rate": 1.4881172947803978e-05, "loss": 0.8187, "step": 8022 }, { "epoch": 1.0728804493179995, "grad_norm": 1.1680006980895996, "learning_rate": 1.4879912826588483e-05, "loss": 0.8914, "step": 8023 }, { "epoch": 1.0730141749130784, "grad_norm": 1.0466971397399902, "learning_rate": 1.4878652603655873e-05, "loss": 0.6712, "step": 8024 }, { "epoch": 1.0731479005081572, "grad_norm": 1.1777764558792114, "learning_rate": 1.4877392279032415e-05, "loss": 0.7747, "step": 8025 }, { "epoch": 1.073281626103236, "grad_norm": 1.130785346031189, "learning_rate": 1.4876131852744382e-05, "loss": 0.7119, "step": 8026 }, { "epoch": 1.0734153516983151, "grad_norm": 1.1580387353897095, "learning_rate": 1.487487132481805e-05, "loss": 0.7873, "step": 8027 }, { "epoch": 1.073549077293394, "grad_norm": 1.142564296722412, "learning_rate": 1.4873610695279688e-05, "loss": 0.7553, "step": 8028 }, { "epoch": 1.0736828028884728, "grad_norm": 1.1370848417282104, "learning_rate": 1.4872349964155573e-05, "loss": 0.7562, "step": 8029 }, { "epoch": 1.0738165284835517, "grad_norm": 1.124147653579712, "learning_rate": 1.4871089131471987e-05, "loss": 0.7632, "step": 8030 }, { "epoch": 1.0739502540786305, "grad_norm": 1.145578145980835, "learning_rate": 1.4869828197255208e-05, "loss": 0.7407, "step": 8031 }, { "epoch": 1.0740839796737096, "grad_norm": 1.069765567779541, "learning_rate": 1.4868567161531523e-05, "loss": 0.6926, "step": 8032 }, { "epoch": 1.0742177052687885, "grad_norm": 1.128211259841919, "learning_rate": 1.486730602432721e-05, "loss": 0.7774, "step": 8033 }, { "epoch": 1.0743514308638673, "grad_norm": 1.1276506185531616, "learning_rate": 1.4866044785668563e-05, "loss": 0.7169, "step": 8034 }, { "epoch": 1.0744851564589462, "grad_norm": 1.181768536567688, "learning_rate": 1.4864783445581869e-05, "loss": 0.7352, "step": 8035 }, { "epoch": 1.0746188820540252, "grad_norm": 1.1667119264602661, "learning_rate": 1.486352200409342e-05, "loss": 0.7638, "step": 8036 }, { "epoch": 1.074752607649104, "grad_norm": 1.1565440893173218, "learning_rate": 1.4862260461229507e-05, "loss": 0.7129, "step": 8037 }, { "epoch": 1.074886333244183, "grad_norm": 1.0787787437438965, "learning_rate": 1.4860998817016427e-05, "loss": 0.7316, "step": 8038 }, { "epoch": 1.0750200588392618, "grad_norm": 1.3282588720321655, "learning_rate": 1.485973707148048e-05, "loss": 0.7993, "step": 8039 }, { "epoch": 1.0751537844343408, "grad_norm": 1.2888188362121582, "learning_rate": 1.4858475224647964e-05, "loss": 0.7518, "step": 8040 }, { "epoch": 1.0752875100294197, "grad_norm": 1.0674335956573486, "learning_rate": 1.485721327654518e-05, "loss": 0.664, "step": 8041 }, { "epoch": 1.0754212356244985, "grad_norm": 1.2739207744598389, "learning_rate": 1.4855951227198433e-05, "loss": 0.7982, "step": 8042 }, { "epoch": 1.0755549612195774, "grad_norm": 1.1310818195343018, "learning_rate": 1.485468907663403e-05, "loss": 0.7283, "step": 8043 }, { "epoch": 1.0756886868146562, "grad_norm": 1.0344246625900269, "learning_rate": 1.4853426824878279e-05, "loss": 0.7179, "step": 8044 }, { "epoch": 1.0758224124097353, "grad_norm": 1.0455644130706787, "learning_rate": 1.4852164471957486e-05, "loss": 0.6618, "step": 8045 }, { "epoch": 1.0759561380048142, "grad_norm": 1.2927911281585693, "learning_rate": 1.485090201789797e-05, "loss": 0.7691, "step": 8046 }, { "epoch": 1.076089863599893, "grad_norm": 1.1126906871795654, "learning_rate": 1.4849639462726046e-05, "loss": 0.6866, "step": 8047 }, { "epoch": 1.0762235891949719, "grad_norm": 1.204754114151001, "learning_rate": 1.4848376806468025e-05, "loss": 0.7977, "step": 8048 }, { "epoch": 1.0763573147900507, "grad_norm": 0.9937276244163513, "learning_rate": 1.484711404915023e-05, "loss": 0.6615, "step": 8049 }, { "epoch": 1.0764910403851298, "grad_norm": 1.0892422199249268, "learning_rate": 1.4845851190798981e-05, "loss": 0.752, "step": 8050 }, { "epoch": 1.0766247659802086, "grad_norm": 1.1484980583190918, "learning_rate": 1.48445882314406e-05, "loss": 0.7497, "step": 8051 }, { "epoch": 1.0767584915752875, "grad_norm": 1.1781418323516846, "learning_rate": 1.4843325171101413e-05, "loss": 0.8083, "step": 8052 }, { "epoch": 1.0768922171703663, "grad_norm": 1.185939908027649, "learning_rate": 1.484206200980775e-05, "loss": 0.7941, "step": 8053 }, { "epoch": 1.0770259427654454, "grad_norm": 1.1410280466079712, "learning_rate": 1.4840798747585934e-05, "loss": 0.7479, "step": 8054 }, { "epoch": 1.0771596683605242, "grad_norm": 1.0887832641601562, "learning_rate": 1.4839535384462305e-05, "loss": 0.6584, "step": 8055 }, { "epoch": 1.077293393955603, "grad_norm": 1.120153784751892, "learning_rate": 1.4838271920463188e-05, "loss": 0.6967, "step": 8056 }, { "epoch": 1.077427119550682, "grad_norm": 1.2411237955093384, "learning_rate": 1.4837008355614923e-05, "loss": 0.7097, "step": 8057 }, { "epoch": 1.077560845145761, "grad_norm": 1.2029176950454712, "learning_rate": 1.4835744689943844e-05, "loss": 0.73, "step": 8058 }, { "epoch": 1.0776945707408399, "grad_norm": 1.1196104288101196, "learning_rate": 1.4834480923476302e-05, "loss": 0.7253, "step": 8059 }, { "epoch": 1.0778282963359187, "grad_norm": 1.145012617111206, "learning_rate": 1.4833217056238628e-05, "loss": 0.7388, "step": 8060 }, { "epoch": 1.0779620219309975, "grad_norm": 1.3130682706832886, "learning_rate": 1.4831953088257167e-05, "loss": 0.7989, "step": 8061 }, { "epoch": 1.0780957475260764, "grad_norm": 1.0504564046859741, "learning_rate": 1.4830689019558269e-05, "loss": 0.7565, "step": 8062 }, { "epoch": 1.0782294731211555, "grad_norm": 1.3305295705795288, "learning_rate": 1.4829424850168282e-05, "loss": 0.7663, "step": 8063 }, { "epoch": 1.0783631987162343, "grad_norm": 1.146509051322937, "learning_rate": 1.4828160580113554e-05, "loss": 0.748, "step": 8064 }, { "epoch": 1.0784969243113132, "grad_norm": 1.2264225482940674, "learning_rate": 1.4826896209420439e-05, "loss": 0.767, "step": 8065 }, { "epoch": 1.078630649906392, "grad_norm": 1.201645016670227, "learning_rate": 1.4825631738115289e-05, "loss": 0.7268, "step": 8066 }, { "epoch": 1.078764375501471, "grad_norm": 1.1194539070129395, "learning_rate": 1.4824367166224468e-05, "loss": 0.7099, "step": 8067 }, { "epoch": 1.07889810109655, "grad_norm": 1.1586989164352417, "learning_rate": 1.4823102493774325e-05, "loss": 0.7231, "step": 8068 }, { "epoch": 1.0790318266916288, "grad_norm": 1.1162248849868774, "learning_rate": 1.482183772079123e-05, "loss": 0.731, "step": 8069 }, { "epoch": 1.0791655522867076, "grad_norm": 1.174980878829956, "learning_rate": 1.482057284730154e-05, "loss": 0.7975, "step": 8070 }, { "epoch": 1.0792992778817865, "grad_norm": 1.1753500699996948, "learning_rate": 1.4819307873331619e-05, "loss": 0.6958, "step": 8071 }, { "epoch": 1.0794330034768655, "grad_norm": 1.130003809928894, "learning_rate": 1.4818042798907841e-05, "loss": 0.7167, "step": 8072 }, { "epoch": 1.0795667290719444, "grad_norm": 1.296520471572876, "learning_rate": 1.481677762405657e-05, "loss": 0.7385, "step": 8073 }, { "epoch": 1.0797004546670232, "grad_norm": 1.1674833297729492, "learning_rate": 1.4815512348804177e-05, "loss": 0.7438, "step": 8074 }, { "epoch": 1.079834180262102, "grad_norm": 1.1946903467178345, "learning_rate": 1.4814246973177038e-05, "loss": 0.6894, "step": 8075 }, { "epoch": 1.0799679058571812, "grad_norm": 1.1635011434555054, "learning_rate": 1.481298149720153e-05, "loss": 0.7705, "step": 8076 }, { "epoch": 1.08010163145226, "grad_norm": 1.1581525802612305, "learning_rate": 1.4811715920904024e-05, "loss": 0.6686, "step": 8077 }, { "epoch": 1.0802353570473389, "grad_norm": 1.2597030401229858, "learning_rate": 1.4810450244310905e-05, "loss": 0.8075, "step": 8078 }, { "epoch": 1.0803690826424177, "grad_norm": 1.0820128917694092, "learning_rate": 1.4809184467448554e-05, "loss": 0.7388, "step": 8079 }, { "epoch": 1.0805028082374966, "grad_norm": 1.1963951587677002, "learning_rate": 1.4807918590343358e-05, "loss": 0.7591, "step": 8080 }, { "epoch": 1.0806365338325756, "grad_norm": 1.13186776638031, "learning_rate": 1.4806652613021697e-05, "loss": 0.7629, "step": 8081 }, { "epoch": 1.0807702594276545, "grad_norm": 1.1139552593231201, "learning_rate": 1.4805386535509963e-05, "loss": 0.6952, "step": 8082 }, { "epoch": 1.0809039850227333, "grad_norm": 1.1565749645233154, "learning_rate": 1.4804120357834545e-05, "loss": 0.6826, "step": 8083 }, { "epoch": 1.0810377106178122, "grad_norm": 1.1168111562728882, "learning_rate": 1.4802854080021831e-05, "loss": 0.8055, "step": 8084 }, { "epoch": 1.0811714362128912, "grad_norm": 1.1635884046554565, "learning_rate": 1.480158770209822e-05, "loss": 0.7753, "step": 8085 }, { "epoch": 1.08130516180797, "grad_norm": 1.1757169961929321, "learning_rate": 1.4800321224090114e-05, "loss": 0.7215, "step": 8086 }, { "epoch": 1.081438887403049, "grad_norm": 1.0689467191696167, "learning_rate": 1.47990546460239e-05, "loss": 0.6968, "step": 8087 }, { "epoch": 1.0815726129981278, "grad_norm": 1.1055799722671509, "learning_rate": 1.4797787967925988e-05, "loss": 0.7091, "step": 8088 }, { "epoch": 1.0817063385932066, "grad_norm": 1.1361255645751953, "learning_rate": 1.4796521189822774e-05, "loss": 0.6615, "step": 8089 }, { "epoch": 1.0818400641882857, "grad_norm": 1.2079881429672241, "learning_rate": 1.4795254311740666e-05, "loss": 0.7231, "step": 8090 }, { "epoch": 1.0819737897833646, "grad_norm": 1.0947825908660889, "learning_rate": 1.479398733370607e-05, "loss": 0.7818, "step": 8091 }, { "epoch": 1.0821075153784434, "grad_norm": 1.1490260362625122, "learning_rate": 1.47927202557454e-05, "loss": 0.7583, "step": 8092 }, { "epoch": 1.0822412409735223, "grad_norm": 1.160922884941101, "learning_rate": 1.4791453077885056e-05, "loss": 0.7376, "step": 8093 }, { "epoch": 1.0823749665686013, "grad_norm": 1.2203446626663208, "learning_rate": 1.479018580015146e-05, "loss": 0.8051, "step": 8094 }, { "epoch": 1.0825086921636802, "grad_norm": 1.2284289598464966, "learning_rate": 1.4788918422571023e-05, "loss": 0.8003, "step": 8095 }, { "epoch": 1.082642417758759, "grad_norm": 1.1419718265533447, "learning_rate": 1.4787650945170167e-05, "loss": 0.7464, "step": 8096 }, { "epoch": 1.0827761433538379, "grad_norm": 1.1263338327407837, "learning_rate": 1.4786383367975308e-05, "loss": 0.7531, "step": 8097 }, { "epoch": 1.0829098689489167, "grad_norm": 1.1092720031738281, "learning_rate": 1.4785115691012866e-05, "loss": 0.74, "step": 8098 }, { "epoch": 1.0830435945439958, "grad_norm": 1.1124712228775024, "learning_rate": 1.4783847914309268e-05, "loss": 0.7782, "step": 8099 }, { "epoch": 1.0831773201390746, "grad_norm": 1.1575204133987427, "learning_rate": 1.478258003789094e-05, "loss": 0.7335, "step": 8100 }, { "epoch": 1.0833110457341535, "grad_norm": 1.0983413457870483, "learning_rate": 1.4781312061784302e-05, "loss": 0.6824, "step": 8101 }, { "epoch": 1.0834447713292323, "grad_norm": 1.0680220127105713, "learning_rate": 1.4780043986015792e-05, "loss": 0.6741, "step": 8102 }, { "epoch": 1.0835784969243114, "grad_norm": 0.989000678062439, "learning_rate": 1.4778775810611836e-05, "loss": 0.6589, "step": 8103 }, { "epoch": 1.0837122225193903, "grad_norm": 1.1988558769226074, "learning_rate": 1.4777507535598878e-05, "loss": 0.7461, "step": 8104 }, { "epoch": 1.083845948114469, "grad_norm": 1.2828068733215332, "learning_rate": 1.4776239161003343e-05, "loss": 0.7594, "step": 8105 }, { "epoch": 1.083979673709548, "grad_norm": 1.1348973512649536, "learning_rate": 1.4774970686851671e-05, "loss": 0.706, "step": 8106 }, { "epoch": 1.084113399304627, "grad_norm": 1.1082526445388794, "learning_rate": 1.4773702113170308e-05, "loss": 0.704, "step": 8107 }, { "epoch": 1.0842471248997059, "grad_norm": 1.1594127416610718, "learning_rate": 1.4772433439985692e-05, "loss": 0.7319, "step": 8108 }, { "epoch": 1.0843808504947847, "grad_norm": 1.0741583108901978, "learning_rate": 1.4771164667324262e-05, "loss": 0.7362, "step": 8109 }, { "epoch": 1.0845145760898636, "grad_norm": 1.0652552843093872, "learning_rate": 1.4769895795212476e-05, "loss": 0.7232, "step": 8110 }, { "epoch": 1.0846483016849424, "grad_norm": 1.0738643407821655, "learning_rate": 1.4768626823676775e-05, "loss": 0.7287, "step": 8111 }, { "epoch": 1.0847820272800215, "grad_norm": 0.9982830286026001, "learning_rate": 1.4767357752743612e-05, "loss": 0.7438, "step": 8112 }, { "epoch": 1.0849157528751003, "grad_norm": 1.0571751594543457, "learning_rate": 1.4766088582439438e-05, "loss": 0.7127, "step": 8113 }, { "epoch": 1.0850494784701792, "grad_norm": 1.2180893421173096, "learning_rate": 1.4764819312790706e-05, "loss": 0.7474, "step": 8114 }, { "epoch": 1.085183204065258, "grad_norm": 1.1505693197250366, "learning_rate": 1.4763549943823876e-05, "loss": 0.7408, "step": 8115 }, { "epoch": 1.0853169296603369, "grad_norm": 1.306916356086731, "learning_rate": 1.4762280475565404e-05, "loss": 0.7685, "step": 8116 }, { "epoch": 1.085450655255416, "grad_norm": 1.18074631690979, "learning_rate": 1.4761010908041758e-05, "loss": 0.8804, "step": 8117 }, { "epoch": 1.0855843808504948, "grad_norm": 1.1360831260681152, "learning_rate": 1.475974124127939e-05, "loss": 0.7791, "step": 8118 }, { "epoch": 1.0857181064455736, "grad_norm": 1.148830771446228, "learning_rate": 1.4758471475304773e-05, "loss": 0.719, "step": 8119 }, { "epoch": 1.0858518320406525, "grad_norm": 1.187147855758667, "learning_rate": 1.4757201610144372e-05, "loss": 0.7466, "step": 8120 }, { "epoch": 1.0859855576357316, "grad_norm": 1.1305124759674072, "learning_rate": 1.4755931645824653e-05, "loss": 0.7766, "step": 8121 }, { "epoch": 1.0861192832308104, "grad_norm": 1.1587939262390137, "learning_rate": 1.475466158237209e-05, "loss": 0.7616, "step": 8122 }, { "epoch": 1.0862530088258893, "grad_norm": 1.1204460859298706, "learning_rate": 1.4753391419813156e-05, "loss": 0.7082, "step": 8123 }, { "epoch": 1.0863867344209681, "grad_norm": 1.2031095027923584, "learning_rate": 1.4752121158174331e-05, "loss": 0.7621, "step": 8124 }, { "epoch": 1.0865204600160472, "grad_norm": 1.2159233093261719, "learning_rate": 1.4750850797482082e-05, "loss": 0.8265, "step": 8125 }, { "epoch": 1.086654185611126, "grad_norm": 1.2648773193359375, "learning_rate": 1.4749580337762896e-05, "loss": 0.7418, "step": 8126 }, { "epoch": 1.0867879112062049, "grad_norm": 1.0170738697052002, "learning_rate": 1.4748309779043253e-05, "loss": 0.712, "step": 8127 }, { "epoch": 1.0869216368012837, "grad_norm": 1.3066020011901855, "learning_rate": 1.4747039121349636e-05, "loss": 0.7049, "step": 8128 }, { "epoch": 1.0870553623963626, "grad_norm": 1.2325260639190674, "learning_rate": 1.4745768364708532e-05, "loss": 0.7926, "step": 8129 }, { "epoch": 1.0871890879914416, "grad_norm": 1.176430106163025, "learning_rate": 1.4744497509146427e-05, "loss": 0.6746, "step": 8130 }, { "epoch": 1.0873228135865205, "grad_norm": 1.1593271493911743, "learning_rate": 1.4743226554689811e-05, "loss": 0.7296, "step": 8131 }, { "epoch": 1.0874565391815993, "grad_norm": 1.1588596105575562, "learning_rate": 1.4741955501365177e-05, "loss": 0.7083, "step": 8132 }, { "epoch": 1.0875902647766782, "grad_norm": 1.0420947074890137, "learning_rate": 1.474068434919902e-05, "loss": 0.6524, "step": 8133 }, { "epoch": 1.087723990371757, "grad_norm": 1.1558109521865845, "learning_rate": 1.473941309821783e-05, "loss": 0.7209, "step": 8134 }, { "epoch": 1.0878577159668361, "grad_norm": 1.224700927734375, "learning_rate": 1.4738141748448112e-05, "loss": 0.7218, "step": 8135 }, { "epoch": 1.087991441561915, "grad_norm": 1.0838958024978638, "learning_rate": 1.4736870299916361e-05, "loss": 0.7305, "step": 8136 }, { "epoch": 1.0881251671569938, "grad_norm": 1.0932518243789673, "learning_rate": 1.4735598752649084e-05, "loss": 0.632, "step": 8137 }, { "epoch": 1.0882588927520727, "grad_norm": 1.052201747894287, "learning_rate": 1.473432710667278e-05, "loss": 0.6958, "step": 8138 }, { "epoch": 1.0883926183471517, "grad_norm": 1.2122379541397095, "learning_rate": 1.4733055362013957e-05, "loss": 0.7268, "step": 8139 }, { "epoch": 1.0885263439422306, "grad_norm": 1.193186640739441, "learning_rate": 1.4731783518699128e-05, "loss": 0.7608, "step": 8140 }, { "epoch": 1.0886600695373094, "grad_norm": 1.11224365234375, "learning_rate": 1.4730511576754794e-05, "loss": 0.7338, "step": 8141 }, { "epoch": 1.0887937951323883, "grad_norm": 1.2209076881408691, "learning_rate": 1.4729239536207476e-05, "loss": 0.7144, "step": 8142 }, { "epoch": 1.0889275207274673, "grad_norm": 1.338446021080017, "learning_rate": 1.4727967397083684e-05, "loss": 0.7481, "step": 8143 }, { "epoch": 1.0890612463225462, "grad_norm": 1.1219849586486816, "learning_rate": 1.4726695159409938e-05, "loss": 0.6898, "step": 8144 }, { "epoch": 1.089194971917625, "grad_norm": 1.0940457582473755, "learning_rate": 1.4725422823212754e-05, "loss": 0.6844, "step": 8145 }, { "epoch": 1.089328697512704, "grad_norm": 1.1348212957382202, "learning_rate": 1.4724150388518651e-05, "loss": 0.6175, "step": 8146 }, { "epoch": 1.0894624231077827, "grad_norm": 1.2818306684494019, "learning_rate": 1.4722877855354156e-05, "loss": 0.7989, "step": 8147 }, { "epoch": 1.0895961487028618, "grad_norm": 1.2465180158615112, "learning_rate": 1.472160522374579e-05, "loss": 0.8142, "step": 8148 }, { "epoch": 1.0897298742979407, "grad_norm": 1.0677372217178345, "learning_rate": 1.4720332493720082e-05, "loss": 0.7122, "step": 8149 }, { "epoch": 1.0898635998930195, "grad_norm": 1.032468318939209, "learning_rate": 1.4719059665303559e-05, "loss": 0.682, "step": 8150 }, { "epoch": 1.0899973254880984, "grad_norm": 1.2742773294448853, "learning_rate": 1.4717786738522753e-05, "loss": 0.7498, "step": 8151 }, { "epoch": 1.0901310510831772, "grad_norm": 1.2955206632614136, "learning_rate": 1.4716513713404199e-05, "loss": 0.7706, "step": 8152 }, { "epoch": 1.0902647766782563, "grad_norm": 1.1426101922988892, "learning_rate": 1.4715240589974428e-05, "loss": 0.7016, "step": 8153 }, { "epoch": 1.0903985022733351, "grad_norm": 1.1886787414550781, "learning_rate": 1.4713967368259981e-05, "loss": 0.6999, "step": 8154 }, { "epoch": 1.090532227868414, "grad_norm": 1.1136610507965088, "learning_rate": 1.4712694048287387e-05, "loss": 0.7448, "step": 8155 }, { "epoch": 1.0906659534634928, "grad_norm": 1.1471967697143555, "learning_rate": 1.4711420630083204e-05, "loss": 0.7783, "step": 8156 }, { "epoch": 1.090799679058572, "grad_norm": 1.2274174690246582, "learning_rate": 1.4710147113673965e-05, "loss": 0.7361, "step": 8157 }, { "epoch": 1.0909334046536507, "grad_norm": 0.9566587209701538, "learning_rate": 1.4708873499086214e-05, "loss": 0.6595, "step": 8158 }, { "epoch": 1.0910671302487296, "grad_norm": 1.1610045433044434, "learning_rate": 1.4707599786346501e-05, "loss": 0.7283, "step": 8159 }, { "epoch": 1.0912008558438084, "grad_norm": 1.1392569541931152, "learning_rate": 1.4706325975481377e-05, "loss": 0.7235, "step": 8160 }, { "epoch": 1.0913345814388875, "grad_norm": 1.1950937509536743, "learning_rate": 1.4705052066517388e-05, "loss": 0.7693, "step": 8161 }, { "epoch": 1.0914683070339664, "grad_norm": 1.1389201879501343, "learning_rate": 1.4703778059481096e-05, "loss": 0.7151, "step": 8162 }, { "epoch": 1.0916020326290452, "grad_norm": 1.4349377155303955, "learning_rate": 1.4702503954399047e-05, "loss": 0.8371, "step": 8163 }, { "epoch": 1.091735758224124, "grad_norm": 1.0885009765625, "learning_rate": 1.4701229751297806e-05, "loss": 0.7257, "step": 8164 }, { "epoch": 1.091869483819203, "grad_norm": 1.1161704063415527, "learning_rate": 1.4699955450203929e-05, "loss": 0.7088, "step": 8165 }, { "epoch": 1.092003209414282, "grad_norm": 1.15769624710083, "learning_rate": 1.4698681051143976e-05, "loss": 0.7665, "step": 8166 }, { "epoch": 1.0921369350093608, "grad_norm": 1.1866463422775269, "learning_rate": 1.4697406554144513e-05, "loss": 0.763, "step": 8167 }, { "epoch": 1.0922706606044397, "grad_norm": 1.27335786819458, "learning_rate": 1.4696131959232105e-05, "loss": 0.7819, "step": 8168 }, { "epoch": 1.0924043861995185, "grad_norm": 1.2271827459335327, "learning_rate": 1.4694857266433322e-05, "loss": 0.7255, "step": 8169 }, { "epoch": 1.0925381117945976, "grad_norm": 1.1655311584472656, "learning_rate": 1.469358247577473e-05, "loss": 0.7435, "step": 8170 }, { "epoch": 1.0926718373896764, "grad_norm": 1.06745183467865, "learning_rate": 1.4692307587282905e-05, "loss": 0.6415, "step": 8171 }, { "epoch": 1.0928055629847553, "grad_norm": 1.1530661582946777, "learning_rate": 1.4691032600984416e-05, "loss": 0.7624, "step": 8172 }, { "epoch": 1.0929392885798341, "grad_norm": 1.2113919258117676, "learning_rate": 1.4689757516905842e-05, "loss": 0.7125, "step": 8173 }, { "epoch": 1.093073014174913, "grad_norm": 1.3119593858718872, "learning_rate": 1.468848233507376e-05, "loss": 0.7488, "step": 8174 }, { "epoch": 1.093206739769992, "grad_norm": 1.217664361000061, "learning_rate": 1.468720705551475e-05, "loss": 0.7328, "step": 8175 }, { "epoch": 1.093340465365071, "grad_norm": 1.1345393657684326, "learning_rate": 1.4685931678255394e-05, "loss": 0.7015, "step": 8176 }, { "epoch": 1.0934741909601498, "grad_norm": 1.1055500507354736, "learning_rate": 1.4684656203322278e-05, "loss": 0.7373, "step": 8177 }, { "epoch": 1.0936079165552286, "grad_norm": 1.232519268989563, "learning_rate": 1.4683380630741986e-05, "loss": 0.689, "step": 8178 }, { "epoch": 1.0937416421503077, "grad_norm": 1.2854101657867432, "learning_rate": 1.4682104960541104e-05, "loss": 0.7696, "step": 8179 }, { "epoch": 1.0938753677453865, "grad_norm": 1.1046152114868164, "learning_rate": 1.4680829192746224e-05, "loss": 0.7974, "step": 8180 }, { "epoch": 1.0940090933404654, "grad_norm": 1.0964359045028687, "learning_rate": 1.4679553327383942e-05, "loss": 0.7228, "step": 8181 }, { "epoch": 1.0941428189355442, "grad_norm": 1.1129666566848755, "learning_rate": 1.4678277364480846e-05, "loss": 0.762, "step": 8182 }, { "epoch": 1.094276544530623, "grad_norm": 1.1247106790542603, "learning_rate": 1.4677001304063533e-05, "loss": 0.7522, "step": 8183 }, { "epoch": 1.0944102701257021, "grad_norm": 1.1707801818847656, "learning_rate": 1.4675725146158609e-05, "loss": 0.7204, "step": 8184 }, { "epoch": 1.094543995720781, "grad_norm": 1.1898560523986816, "learning_rate": 1.4674448890792666e-05, "loss": 0.7254, "step": 8185 }, { "epoch": 1.0946777213158598, "grad_norm": 1.277867078781128, "learning_rate": 1.4673172537992306e-05, "loss": 0.7744, "step": 8186 }, { "epoch": 1.0948114469109387, "grad_norm": 1.2944467067718506, "learning_rate": 1.4671896087784136e-05, "loss": 0.7232, "step": 8187 }, { "epoch": 1.0949451725060177, "grad_norm": 1.2902640104293823, "learning_rate": 1.4670619540194766e-05, "loss": 0.7709, "step": 8188 }, { "epoch": 1.0950788981010966, "grad_norm": 1.0623537302017212, "learning_rate": 1.4669342895250803e-05, "loss": 0.7152, "step": 8189 }, { "epoch": 1.0952126236961754, "grad_norm": 1.0780636072158813, "learning_rate": 1.4668066152978851e-05, "loss": 0.6926, "step": 8190 }, { "epoch": 1.0953463492912543, "grad_norm": 1.3469547033309937, "learning_rate": 1.4666789313405528e-05, "loss": 0.793, "step": 8191 }, { "epoch": 1.0954800748863331, "grad_norm": 1.2358331680297852, "learning_rate": 1.4665512376557446e-05, "loss": 0.6815, "step": 8192 }, { "epoch": 1.0956138004814122, "grad_norm": 1.0827410221099854, "learning_rate": 1.4664235342461226e-05, "loss": 0.6968, "step": 8193 }, { "epoch": 1.095747526076491, "grad_norm": 1.1867256164550781, "learning_rate": 1.466295821114348e-05, "loss": 0.6783, "step": 8194 }, { "epoch": 1.09588125167157, "grad_norm": 1.1024630069732666, "learning_rate": 1.4661680982630834e-05, "loss": 0.7491, "step": 8195 }, { "epoch": 1.0960149772666488, "grad_norm": 1.1474690437316895, "learning_rate": 1.4660403656949908e-05, "loss": 0.76, "step": 8196 }, { "epoch": 1.0961487028617278, "grad_norm": 1.1353682279586792, "learning_rate": 1.4659126234127333e-05, "loss": 0.7312, "step": 8197 }, { "epoch": 1.0962824284568067, "grad_norm": 1.1524615287780762, "learning_rate": 1.4657848714189724e-05, "loss": 0.7491, "step": 8198 }, { "epoch": 1.0964161540518855, "grad_norm": 1.2165710926055908, "learning_rate": 1.4656571097163717e-05, "loss": 0.7293, "step": 8199 }, { "epoch": 1.0965498796469644, "grad_norm": 1.2023200988769531, "learning_rate": 1.4655293383075937e-05, "loss": 0.7938, "step": 8200 }, { "epoch": 1.0966836052420432, "grad_norm": 1.2271883487701416, "learning_rate": 1.465401557195303e-05, "loss": 0.7369, "step": 8201 }, { "epoch": 1.0968173308371223, "grad_norm": 1.1714974641799927, "learning_rate": 1.4652737663821614e-05, "loss": 0.7004, "step": 8202 }, { "epoch": 1.0969510564322011, "grad_norm": 1.1790149211883545, "learning_rate": 1.4651459658708336e-05, "loss": 0.7297, "step": 8203 }, { "epoch": 1.09708478202728, "grad_norm": 1.1862670183181763, "learning_rate": 1.4650181556639833e-05, "loss": 0.746, "step": 8204 }, { "epoch": 1.0972185076223588, "grad_norm": 1.123826503753662, "learning_rate": 1.4648903357642748e-05, "loss": 0.7171, "step": 8205 }, { "epoch": 1.097352233217438, "grad_norm": 1.2132987976074219, "learning_rate": 1.4647625061743713e-05, "loss": 0.6997, "step": 8206 }, { "epoch": 1.0974859588125168, "grad_norm": 1.106748104095459, "learning_rate": 1.4646346668969386e-05, "loss": 0.6823, "step": 8207 }, { "epoch": 1.0976196844075956, "grad_norm": 1.1480863094329834, "learning_rate": 1.4645068179346408e-05, "loss": 0.7117, "step": 8208 }, { "epoch": 1.0977534100026745, "grad_norm": 1.254892110824585, "learning_rate": 1.4643789592901433e-05, "loss": 0.773, "step": 8209 }, { "epoch": 1.0978871355977535, "grad_norm": 1.1178590059280396, "learning_rate": 1.4642510909661103e-05, "loss": 0.7485, "step": 8210 }, { "epoch": 1.0980208611928324, "grad_norm": 1.2524044513702393, "learning_rate": 1.4641232129652076e-05, "loss": 0.8698, "step": 8211 }, { "epoch": 1.0981545867879112, "grad_norm": 1.1347885131835938, "learning_rate": 1.4639953252901007e-05, "loss": 0.7251, "step": 8212 }, { "epoch": 1.09828831238299, "grad_norm": 1.1306626796722412, "learning_rate": 1.4638674279434553e-05, "loss": 0.6958, "step": 8213 }, { "epoch": 1.098422037978069, "grad_norm": 1.3701748847961426, "learning_rate": 1.463739520927937e-05, "loss": 0.8246, "step": 8214 }, { "epoch": 1.098555763573148, "grad_norm": 1.06728994846344, "learning_rate": 1.4636116042462123e-05, "loss": 0.6576, "step": 8215 }, { "epoch": 1.0986894891682268, "grad_norm": 1.1241544485092163, "learning_rate": 1.4634836779009474e-05, "loss": 0.6693, "step": 8216 }, { "epoch": 1.0988232147633057, "grad_norm": 1.1395597457885742, "learning_rate": 1.4633557418948089e-05, "loss": 0.6872, "step": 8217 }, { "epoch": 1.0989569403583845, "grad_norm": 1.0658247470855713, "learning_rate": 1.4632277962304629e-05, "loss": 0.6689, "step": 8218 }, { "epoch": 1.0990906659534634, "grad_norm": 1.2273157835006714, "learning_rate": 1.4630998409105767e-05, "loss": 0.6485, "step": 8219 }, { "epoch": 1.0992243915485425, "grad_norm": 1.1881983280181885, "learning_rate": 1.4629718759378177e-05, "loss": 0.7606, "step": 8220 }, { "epoch": 1.0993581171436213, "grad_norm": 1.2353265285491943, "learning_rate": 1.4628439013148532e-05, "loss": 0.7571, "step": 8221 }, { "epoch": 1.0994918427387002, "grad_norm": 1.1384950876235962, "learning_rate": 1.4627159170443504e-05, "loss": 0.6894, "step": 8222 }, { "epoch": 1.099625568333779, "grad_norm": 1.159988284111023, "learning_rate": 1.4625879231289767e-05, "loss": 0.7109, "step": 8223 }, { "epoch": 1.099759293928858, "grad_norm": 1.1748900413513184, "learning_rate": 1.4624599195714006e-05, "loss": 0.7693, "step": 8224 }, { "epoch": 1.099893019523937, "grad_norm": 1.2397748231887817, "learning_rate": 1.4623319063742902e-05, "loss": 0.7272, "step": 8225 }, { "epoch": 1.1000267451190158, "grad_norm": 1.3209439516067505, "learning_rate": 1.4622038835403135e-05, "loss": 0.7664, "step": 8226 }, { "epoch": 1.1001604707140946, "grad_norm": 1.043557047843933, "learning_rate": 1.462075851072139e-05, "loss": 0.7401, "step": 8227 }, { "epoch": 1.1002941963091737, "grad_norm": 1.1530739068984985, "learning_rate": 1.4619478089724355e-05, "loss": 0.674, "step": 8228 }, { "epoch": 1.1004279219042525, "grad_norm": 1.008626937866211, "learning_rate": 1.4618197572438722e-05, "loss": 0.6545, "step": 8229 }, { "epoch": 1.1005616474993314, "grad_norm": 1.319429874420166, "learning_rate": 1.4616916958891179e-05, "loss": 0.7613, "step": 8230 }, { "epoch": 1.1006953730944102, "grad_norm": 1.1527820825576782, "learning_rate": 1.4615636249108418e-05, "loss": 0.7826, "step": 8231 }, { "epoch": 1.100829098689489, "grad_norm": 1.0154234170913696, "learning_rate": 1.4614355443117137e-05, "loss": 0.6993, "step": 8232 }, { "epoch": 1.1009628242845682, "grad_norm": 1.0166356563568115, "learning_rate": 1.4613074540944032e-05, "loss": 0.7573, "step": 8233 }, { "epoch": 1.101096549879647, "grad_norm": 1.1730951070785522, "learning_rate": 1.4611793542615805e-05, "loss": 0.6703, "step": 8234 }, { "epoch": 1.1012302754747259, "grad_norm": 1.1418660879135132, "learning_rate": 1.461051244815915e-05, "loss": 0.6953, "step": 8235 }, { "epoch": 1.1013640010698047, "grad_norm": 1.1512385606765747, "learning_rate": 1.4609231257600778e-05, "loss": 0.6838, "step": 8236 }, { "epoch": 1.1014977266648835, "grad_norm": 1.1949220895767212, "learning_rate": 1.4607949970967391e-05, "loss": 0.7357, "step": 8237 }, { "epoch": 1.1016314522599626, "grad_norm": 1.188032627105713, "learning_rate": 1.4606668588285694e-05, "loss": 0.8132, "step": 8238 }, { "epoch": 1.1017651778550415, "grad_norm": 1.0157471895217896, "learning_rate": 1.4605387109582401e-05, "loss": 0.6925, "step": 8239 }, { "epoch": 1.1018989034501203, "grad_norm": 1.0028070211410522, "learning_rate": 1.4604105534884218e-05, "loss": 0.6577, "step": 8240 }, { "epoch": 1.1020326290451992, "grad_norm": 1.1497137546539307, "learning_rate": 1.4602823864217863e-05, "loss": 0.7842, "step": 8241 }, { "epoch": 1.1021663546402782, "grad_norm": 1.1665130853652954, "learning_rate": 1.4601542097610051e-05, "loss": 0.7361, "step": 8242 }, { "epoch": 1.102300080235357, "grad_norm": 1.2151650190353394, "learning_rate": 1.4600260235087493e-05, "loss": 0.7859, "step": 8243 }, { "epoch": 1.102433805830436, "grad_norm": 1.1079157590866089, "learning_rate": 1.4598978276676916e-05, "loss": 0.7165, "step": 8244 }, { "epoch": 1.1025675314255148, "grad_norm": 1.131780743598938, "learning_rate": 1.4597696222405033e-05, "loss": 0.7482, "step": 8245 }, { "epoch": 1.1027012570205939, "grad_norm": 0.9700050354003906, "learning_rate": 1.4596414072298575e-05, "loss": 0.6226, "step": 8246 }, { "epoch": 1.1028349826156727, "grad_norm": 1.5398085117340088, "learning_rate": 1.4595131826384263e-05, "loss": 0.7452, "step": 8247 }, { "epoch": 1.1029687082107515, "grad_norm": 1.0944463014602661, "learning_rate": 1.4593849484688827e-05, "loss": 0.7169, "step": 8248 }, { "epoch": 1.1031024338058304, "grad_norm": 1.1606014966964722, "learning_rate": 1.459256704723899e-05, "loss": 0.7613, "step": 8249 }, { "epoch": 1.1032361594009092, "grad_norm": 1.2230435609817505, "learning_rate": 1.4591284514061492e-05, "loss": 0.7593, "step": 8250 }, { "epoch": 1.1033698849959883, "grad_norm": 1.139914631843567, "learning_rate": 1.4590001885183059e-05, "loss": 0.6618, "step": 8251 }, { "epoch": 1.1035036105910672, "grad_norm": 1.1277605295181274, "learning_rate": 1.4588719160630429e-05, "loss": 0.7045, "step": 8252 }, { "epoch": 1.103637336186146, "grad_norm": 1.1307988166809082, "learning_rate": 1.4587436340430338e-05, "loss": 0.7168, "step": 8253 }, { "epoch": 1.1037710617812249, "grad_norm": 1.2383873462677002, "learning_rate": 1.458615342460953e-05, "loss": 0.8023, "step": 8254 }, { "epoch": 1.1039047873763037, "grad_norm": 1.1057050228118896, "learning_rate": 1.458487041319474e-05, "loss": 0.6998, "step": 8255 }, { "epoch": 1.1040385129713828, "grad_norm": 1.3270267248153687, "learning_rate": 1.4583587306212714e-05, "loss": 0.7997, "step": 8256 }, { "epoch": 1.1041722385664616, "grad_norm": 1.100528359413147, "learning_rate": 1.4582304103690197e-05, "loss": 0.7124, "step": 8257 }, { "epoch": 1.1043059641615405, "grad_norm": 1.4334498643875122, "learning_rate": 1.4581020805653934e-05, "loss": 0.737, "step": 8258 }, { "epoch": 1.1044396897566193, "grad_norm": 1.4105724096298218, "learning_rate": 1.4579737412130679e-05, "loss": 0.7779, "step": 8259 }, { "epoch": 1.1045734153516984, "grad_norm": 1.3646095991134644, "learning_rate": 1.4578453923147176e-05, "loss": 0.8912, "step": 8260 }, { "epoch": 1.1047071409467772, "grad_norm": 1.2176775932312012, "learning_rate": 1.4577170338730184e-05, "loss": 0.7781, "step": 8261 }, { "epoch": 1.104840866541856, "grad_norm": 1.172537088394165, "learning_rate": 1.4575886658906458e-05, "loss": 0.7208, "step": 8262 }, { "epoch": 1.104974592136935, "grad_norm": 1.0549992322921753, "learning_rate": 1.4574602883702752e-05, "loss": 0.6205, "step": 8263 }, { "epoch": 1.105108317732014, "grad_norm": 1.1880916357040405, "learning_rate": 1.4573319013145823e-05, "loss": 0.6961, "step": 8264 }, { "epoch": 1.1052420433270929, "grad_norm": 1.0007102489471436, "learning_rate": 1.4572035047262439e-05, "loss": 0.7052, "step": 8265 }, { "epoch": 1.1053757689221717, "grad_norm": 1.376042366027832, "learning_rate": 1.4570750986079358e-05, "loss": 0.7653, "step": 8266 }, { "epoch": 1.1055094945172506, "grad_norm": 1.09882652759552, "learning_rate": 1.456946682962335e-05, "loss": 0.7688, "step": 8267 }, { "epoch": 1.1056432201123294, "grad_norm": 1.2237251996994019, "learning_rate": 1.4568182577921172e-05, "loss": 0.7083, "step": 8268 }, { "epoch": 1.1057769457074085, "grad_norm": 1.0586533546447754, "learning_rate": 1.4566898230999604e-05, "loss": 0.7236, "step": 8269 }, { "epoch": 1.1059106713024873, "grad_norm": 1.1571077108383179, "learning_rate": 1.4565613788885412e-05, "loss": 0.6302, "step": 8270 }, { "epoch": 1.1060443968975662, "grad_norm": 1.2033671140670776, "learning_rate": 1.4564329251605367e-05, "loss": 0.7421, "step": 8271 }, { "epoch": 1.106178122492645, "grad_norm": 1.1124781370162964, "learning_rate": 1.4563044619186248e-05, "loss": 0.7263, "step": 8272 }, { "epoch": 1.106311848087724, "grad_norm": 1.1049301624298096, "learning_rate": 1.456175989165483e-05, "loss": 0.7322, "step": 8273 }, { "epoch": 1.106445573682803, "grad_norm": 1.2442961931228638, "learning_rate": 1.4560475069037895e-05, "loss": 0.7968, "step": 8274 }, { "epoch": 1.1065792992778818, "grad_norm": 1.1149489879608154, "learning_rate": 1.455919015136222e-05, "loss": 0.7821, "step": 8275 }, { "epoch": 1.1067130248729606, "grad_norm": 1.1606643199920654, "learning_rate": 1.4557905138654586e-05, "loss": 0.7163, "step": 8276 }, { "epoch": 1.1068467504680395, "grad_norm": 1.1662418842315674, "learning_rate": 1.4556620030941782e-05, "loss": 0.8299, "step": 8277 }, { "epoch": 1.1069804760631186, "grad_norm": 1.170881748199463, "learning_rate": 1.4555334828250594e-05, "loss": 0.6927, "step": 8278 }, { "epoch": 1.1071142016581974, "grad_norm": 1.1598966121673584, "learning_rate": 1.455404953060781e-05, "loss": 0.6625, "step": 8279 }, { "epoch": 1.1072479272532763, "grad_norm": 1.1514116525650024, "learning_rate": 1.4552764138040221e-05, "loss": 0.7153, "step": 8280 }, { "epoch": 1.107381652848355, "grad_norm": 1.0893383026123047, "learning_rate": 1.455147865057462e-05, "loss": 0.7577, "step": 8281 }, { "epoch": 1.1075153784434342, "grad_norm": 1.0285717248916626, "learning_rate": 1.4550193068237805e-05, "loss": 0.6728, "step": 8282 }, { "epoch": 1.107649104038513, "grad_norm": 1.1683778762817383, "learning_rate": 1.4548907391056567e-05, "loss": 0.7945, "step": 8283 }, { "epoch": 1.1077828296335919, "grad_norm": 1.0316197872161865, "learning_rate": 1.4547621619057706e-05, "loss": 0.6435, "step": 8284 }, { "epoch": 1.1079165552286707, "grad_norm": 1.1161266565322876, "learning_rate": 1.4546335752268027e-05, "loss": 0.7115, "step": 8285 }, { "epoch": 1.1080502808237496, "grad_norm": 1.156480073928833, "learning_rate": 1.4545049790714328e-05, "loss": 0.6633, "step": 8286 }, { "epoch": 1.1081840064188286, "grad_norm": 1.200325846672058, "learning_rate": 1.4543763734423415e-05, "loss": 0.775, "step": 8287 }, { "epoch": 1.1083177320139075, "grad_norm": 1.1742380857467651, "learning_rate": 1.4542477583422095e-05, "loss": 0.7337, "step": 8288 }, { "epoch": 1.1084514576089863, "grad_norm": 1.2142648696899414, "learning_rate": 1.4541191337737175e-05, "loss": 0.7902, "step": 8289 }, { "epoch": 1.1085851832040652, "grad_norm": 1.0733712911605835, "learning_rate": 1.4539904997395468e-05, "loss": 0.7547, "step": 8290 }, { "epoch": 1.1087189087991443, "grad_norm": 1.1593356132507324, "learning_rate": 1.4538618562423788e-05, "loss": 0.6661, "step": 8291 }, { "epoch": 1.108852634394223, "grad_norm": 1.0731009244918823, "learning_rate": 1.4537332032848945e-05, "loss": 0.6722, "step": 8292 }, { "epoch": 1.108986359989302, "grad_norm": 1.2825267314910889, "learning_rate": 1.4536045408697757e-05, "loss": 0.7378, "step": 8293 }, { "epoch": 1.1091200855843808, "grad_norm": 1.0678948163986206, "learning_rate": 1.4534758689997046e-05, "loss": 0.6775, "step": 8294 }, { "epoch": 1.1092538111794596, "grad_norm": 1.2530276775360107, "learning_rate": 1.4533471876773626e-05, "loss": 0.7898, "step": 8295 }, { "epoch": 1.1093875367745387, "grad_norm": 1.1837270259857178, "learning_rate": 1.4532184969054322e-05, "loss": 0.6566, "step": 8296 }, { "epoch": 1.1095212623696176, "grad_norm": 1.2918622493743896, "learning_rate": 1.4530897966865963e-05, "loss": 0.6923, "step": 8297 }, { "epoch": 1.1096549879646964, "grad_norm": 1.0833202600479126, "learning_rate": 1.4529610870235368e-05, "loss": 0.7413, "step": 8298 }, { "epoch": 1.1097887135597753, "grad_norm": 1.0432302951812744, "learning_rate": 1.4528323679189371e-05, "loss": 0.6814, "step": 8299 }, { "epoch": 1.1099224391548543, "grad_norm": 1.0792577266693115, "learning_rate": 1.4527036393754799e-05, "loss": 0.7264, "step": 8300 }, { "epoch": 1.1100561647499332, "grad_norm": 1.1741794347763062, "learning_rate": 1.4525749013958486e-05, "loss": 0.7297, "step": 8301 }, { "epoch": 1.110189890345012, "grad_norm": 1.230660319328308, "learning_rate": 1.4524461539827267e-05, "loss": 0.8102, "step": 8302 }, { "epoch": 1.1103236159400909, "grad_norm": 1.0732176303863525, "learning_rate": 1.4523173971387973e-05, "loss": 0.7796, "step": 8303 }, { "epoch": 1.1104573415351697, "grad_norm": 1.2005423307418823, "learning_rate": 1.4521886308667448e-05, "loss": 0.7563, "step": 8304 }, { "epoch": 1.1105910671302488, "grad_norm": 1.11322021484375, "learning_rate": 1.4520598551692529e-05, "loss": 0.7337, "step": 8305 }, { "epoch": 1.1107247927253276, "grad_norm": 1.1368488073349, "learning_rate": 1.4519310700490061e-05, "loss": 0.7094, "step": 8306 }, { "epoch": 1.1108585183204065, "grad_norm": 1.2020680904388428, "learning_rate": 1.4518022755086883e-05, "loss": 0.7403, "step": 8307 }, { "epoch": 1.1109922439154853, "grad_norm": 1.1665252447128296, "learning_rate": 1.4516734715509846e-05, "loss": 0.6498, "step": 8308 }, { "epoch": 1.1111259695105644, "grad_norm": 1.1584941148757935, "learning_rate": 1.4515446581785795e-05, "loss": 0.6541, "step": 8309 }, { "epoch": 1.1112596951056433, "grad_norm": 1.1055808067321777, "learning_rate": 1.4514158353941581e-05, "loss": 0.6904, "step": 8310 }, { "epoch": 1.1113934207007221, "grad_norm": 1.1115529537200928, "learning_rate": 1.4512870032004057e-05, "loss": 0.7516, "step": 8311 }, { "epoch": 1.111527146295801, "grad_norm": 1.1111924648284912, "learning_rate": 1.4511581616000072e-05, "loss": 0.675, "step": 8312 }, { "epoch": 1.11166087189088, "grad_norm": 1.2660021781921387, "learning_rate": 1.4510293105956488e-05, "loss": 0.797, "step": 8313 }, { "epoch": 1.1117945974859589, "grad_norm": 1.1217706203460693, "learning_rate": 1.4509004501900161e-05, "loss": 0.6675, "step": 8314 }, { "epoch": 1.1119283230810377, "grad_norm": 1.1810978651046753, "learning_rate": 1.4507715803857948e-05, "loss": 0.7535, "step": 8315 }, { "epoch": 1.1120620486761166, "grad_norm": 1.2078564167022705, "learning_rate": 1.4506427011856712e-05, "loss": 0.6926, "step": 8316 }, { "epoch": 1.1121957742711954, "grad_norm": 1.1513147354125977, "learning_rate": 1.4505138125923316e-05, "loss": 0.7591, "step": 8317 }, { "epoch": 1.1123294998662745, "grad_norm": 1.1595839262008667, "learning_rate": 1.450384914608463e-05, "loss": 0.6512, "step": 8318 }, { "epoch": 1.1124632254613533, "grad_norm": 1.2681446075439453, "learning_rate": 1.4502560072367518e-05, "loss": 0.8134, "step": 8319 }, { "epoch": 1.1125969510564322, "grad_norm": 1.0839310884475708, "learning_rate": 1.4501270904798847e-05, "loss": 0.7156, "step": 8320 }, { "epoch": 1.112730676651511, "grad_norm": 1.0616414546966553, "learning_rate": 1.4499981643405495e-05, "loss": 0.7017, "step": 8321 }, { "epoch": 1.11286440224659, "grad_norm": 1.2882949113845825, "learning_rate": 1.449869228821433e-05, "loss": 0.7014, "step": 8322 }, { "epoch": 1.112998127841669, "grad_norm": 1.213078260421753, "learning_rate": 1.4497402839252228e-05, "loss": 0.7571, "step": 8323 }, { "epoch": 1.1131318534367478, "grad_norm": 1.1187180280685425, "learning_rate": 1.4496113296546068e-05, "loss": 0.727, "step": 8324 }, { "epoch": 1.1132655790318267, "grad_norm": 1.1193311214447021, "learning_rate": 1.4494823660122727e-05, "loss": 0.6898, "step": 8325 }, { "epoch": 1.1133993046269055, "grad_norm": 1.1341887712478638, "learning_rate": 1.4493533930009092e-05, "loss": 0.751, "step": 8326 }, { "epoch": 1.1135330302219846, "grad_norm": 1.1902804374694824, "learning_rate": 1.449224410623204e-05, "loss": 0.7927, "step": 8327 }, { "epoch": 1.1136667558170634, "grad_norm": 1.1972242593765259, "learning_rate": 1.4490954188818458e-05, "loss": 0.8212, "step": 8328 }, { "epoch": 1.1138004814121423, "grad_norm": 1.1225979328155518, "learning_rate": 1.448966417779523e-05, "loss": 0.7361, "step": 8329 }, { "epoch": 1.1139342070072211, "grad_norm": 1.1491978168487549, "learning_rate": 1.4488374073189251e-05, "loss": 0.8213, "step": 8330 }, { "epoch": 1.1140679326023002, "grad_norm": 1.0951247215270996, "learning_rate": 1.4487083875027412e-05, "loss": 0.7643, "step": 8331 }, { "epoch": 1.114201658197379, "grad_norm": 1.0306428670883179, "learning_rate": 1.4485793583336602e-05, "loss": 0.6175, "step": 8332 }, { "epoch": 1.114335383792458, "grad_norm": 1.1568228006362915, "learning_rate": 1.4484503198143715e-05, "loss": 0.723, "step": 8333 }, { "epoch": 1.1144691093875367, "grad_norm": 1.059244990348816, "learning_rate": 1.4483212719475652e-05, "loss": 0.7117, "step": 8334 }, { "epoch": 1.1146028349826156, "grad_norm": 1.1356573104858398, "learning_rate": 1.4481922147359309e-05, "loss": 0.8079, "step": 8335 }, { "epoch": 1.1147365605776947, "grad_norm": 1.1072382926940918, "learning_rate": 1.4480631481821588e-05, "loss": 0.7734, "step": 8336 }, { "epoch": 1.1148702861727735, "grad_norm": 1.1229808330535889, "learning_rate": 1.447934072288939e-05, "loss": 0.765, "step": 8337 }, { "epoch": 1.1150040117678524, "grad_norm": 1.1347295045852661, "learning_rate": 1.4478049870589623e-05, "loss": 0.7218, "step": 8338 }, { "epoch": 1.1151377373629312, "grad_norm": 1.1059898138046265, "learning_rate": 1.4476758924949192e-05, "loss": 0.7806, "step": 8339 }, { "epoch": 1.11527146295801, "grad_norm": 1.1295607089996338, "learning_rate": 1.4475467885995003e-05, "loss": 0.7562, "step": 8340 }, { "epoch": 1.1154051885530891, "grad_norm": 1.2315341234207153, "learning_rate": 1.4474176753753968e-05, "loss": 0.8163, "step": 8341 }, { "epoch": 1.115538914148168, "grad_norm": 1.1378015279769897, "learning_rate": 1.4472885528253e-05, "loss": 0.6845, "step": 8342 }, { "epoch": 1.1156726397432468, "grad_norm": 1.080583930015564, "learning_rate": 1.4471594209519016e-05, "loss": 0.7279, "step": 8343 }, { "epoch": 1.1158063653383257, "grad_norm": 1.1425526142120361, "learning_rate": 1.4470302797578928e-05, "loss": 0.7383, "step": 8344 }, { "epoch": 1.1159400909334047, "grad_norm": 1.086901068687439, "learning_rate": 1.4469011292459653e-05, "loss": 0.6816, "step": 8345 }, { "epoch": 1.1160738165284836, "grad_norm": 1.2208452224731445, "learning_rate": 1.4467719694188118e-05, "loss": 0.7934, "step": 8346 }, { "epoch": 1.1162075421235624, "grad_norm": 1.1572463512420654, "learning_rate": 1.446642800279124e-05, "loss": 0.7203, "step": 8347 }, { "epoch": 1.1163412677186413, "grad_norm": 1.181697964668274, "learning_rate": 1.4465136218295944e-05, "loss": 0.7683, "step": 8348 }, { "epoch": 1.1164749933137204, "grad_norm": 1.0777674913406372, "learning_rate": 1.4463844340729155e-05, "loss": 0.6594, "step": 8349 }, { "epoch": 1.1166087189087992, "grad_norm": 1.0535820722579956, "learning_rate": 1.4462552370117802e-05, "loss": 0.64, "step": 8350 }, { "epoch": 1.116742444503878, "grad_norm": 1.0885950326919556, "learning_rate": 1.4461260306488818e-05, "loss": 0.7339, "step": 8351 }, { "epoch": 1.116876170098957, "grad_norm": 1.2883414030075073, "learning_rate": 1.445996814986913e-05, "loss": 0.7829, "step": 8352 }, { "epoch": 1.1170098956940357, "grad_norm": 1.0322016477584839, "learning_rate": 1.4458675900285672e-05, "loss": 0.6413, "step": 8353 }, { "epoch": 1.1171436212891148, "grad_norm": 1.219429612159729, "learning_rate": 1.4457383557765385e-05, "loss": 0.7373, "step": 8354 }, { "epoch": 1.1172773468841937, "grad_norm": 1.2747584581375122, "learning_rate": 1.44560911223352e-05, "loss": 0.7141, "step": 8355 }, { "epoch": 1.1174110724792725, "grad_norm": 1.1081844568252563, "learning_rate": 1.4454798594022062e-05, "loss": 0.6584, "step": 8356 }, { "epoch": 1.1175447980743514, "grad_norm": 1.1719917058944702, "learning_rate": 1.4453505972852905e-05, "loss": 0.7685, "step": 8357 }, { "epoch": 1.1176785236694302, "grad_norm": 1.1642959117889404, "learning_rate": 1.4452213258854684e-05, "loss": 0.7295, "step": 8358 }, { "epoch": 1.1178122492645093, "grad_norm": 1.1818652153015137, "learning_rate": 1.4450920452054336e-05, "loss": 0.7802, "step": 8359 }, { "epoch": 1.1179459748595881, "grad_norm": 1.1717503070831299, "learning_rate": 1.4449627552478809e-05, "loss": 0.7147, "step": 8360 }, { "epoch": 1.118079700454667, "grad_norm": 1.0776591300964355, "learning_rate": 1.4448334560155053e-05, "loss": 0.7007, "step": 8361 }, { "epoch": 1.1182134260497458, "grad_norm": 1.359649658203125, "learning_rate": 1.4447041475110019e-05, "loss": 0.7576, "step": 8362 }, { "epoch": 1.118347151644825, "grad_norm": 1.134954810142517, "learning_rate": 1.4445748297370665e-05, "loss": 0.672, "step": 8363 }, { "epoch": 1.1184808772399037, "grad_norm": 1.1859557628631592, "learning_rate": 1.444445502696394e-05, "loss": 0.8165, "step": 8364 }, { "epoch": 1.1186146028349826, "grad_norm": 1.228947401046753, "learning_rate": 1.44431616639168e-05, "loss": 0.7425, "step": 8365 }, { "epoch": 1.1187483284300614, "grad_norm": 1.187402606010437, "learning_rate": 1.4441868208256208e-05, "loss": 0.6699, "step": 8366 }, { "epoch": 1.1188820540251405, "grad_norm": 1.0500737428665161, "learning_rate": 1.4440574660009125e-05, "loss": 0.6688, "step": 8367 }, { "epoch": 1.1190157796202194, "grad_norm": 1.0190798044204712, "learning_rate": 1.4439281019202512e-05, "loss": 0.6732, "step": 8368 }, { "epoch": 1.1191495052152982, "grad_norm": 1.1765137910842896, "learning_rate": 1.4437987285863332e-05, "loss": 0.6954, "step": 8369 }, { "epoch": 1.119283230810377, "grad_norm": 1.012528896331787, "learning_rate": 1.4436693460018558e-05, "loss": 0.6741, "step": 8370 }, { "epoch": 1.119416956405456, "grad_norm": 1.071714162826538, "learning_rate": 1.4435399541695154e-05, "loss": 0.7568, "step": 8371 }, { "epoch": 1.119550682000535, "grad_norm": 1.0907952785491943, "learning_rate": 1.4434105530920089e-05, "loss": 0.6664, "step": 8372 }, { "epoch": 1.1196844075956138, "grad_norm": 1.081235408782959, "learning_rate": 1.4432811427720334e-05, "loss": 0.7514, "step": 8373 }, { "epoch": 1.1198181331906927, "grad_norm": 1.1514976024627686, "learning_rate": 1.443151723212287e-05, "loss": 0.6899, "step": 8374 }, { "epoch": 1.1199518587857715, "grad_norm": 1.0804319381713867, "learning_rate": 1.4430222944154668e-05, "loss": 0.7072, "step": 8375 }, { "epoch": 1.1200855843808506, "grad_norm": 1.145990014076233, "learning_rate": 1.4428928563842711e-05, "loss": 0.7858, "step": 8376 }, { "epoch": 1.1202193099759294, "grad_norm": 1.2893422842025757, "learning_rate": 1.4427634091213973e-05, "loss": 0.7551, "step": 8377 }, { "epoch": 1.1203530355710083, "grad_norm": 1.1194885969161987, "learning_rate": 1.442633952629544e-05, "loss": 0.6657, "step": 8378 }, { "epoch": 1.1204867611660871, "grad_norm": 1.1008917093276978, "learning_rate": 1.4425044869114097e-05, "loss": 0.711, "step": 8379 }, { "epoch": 1.120620486761166, "grad_norm": 1.0503158569335938, "learning_rate": 1.4423750119696927e-05, "loss": 0.6324, "step": 8380 }, { "epoch": 1.120754212356245, "grad_norm": 1.1039241552352905, "learning_rate": 1.4422455278070916e-05, "loss": 0.703, "step": 8381 }, { "epoch": 1.120887937951324, "grad_norm": 1.1159162521362305, "learning_rate": 1.4421160344263059e-05, "loss": 0.7403, "step": 8382 }, { "epoch": 1.1210216635464028, "grad_norm": 1.3382800817489624, "learning_rate": 1.4419865318300348e-05, "loss": 0.8027, "step": 8383 }, { "epoch": 1.1211553891414816, "grad_norm": 1.2750722169876099, "learning_rate": 1.4418570200209772e-05, "loss": 0.7523, "step": 8384 }, { "epoch": 1.1212891147365607, "grad_norm": 1.199256420135498, "learning_rate": 1.4417274990018327e-05, "loss": 0.7078, "step": 8385 }, { "epoch": 1.1214228403316395, "grad_norm": 1.1698222160339355, "learning_rate": 1.441597968775301e-05, "loss": 0.7445, "step": 8386 }, { "epoch": 1.1215565659267184, "grad_norm": 1.2105101346969604, "learning_rate": 1.4414684293440823e-05, "loss": 0.7751, "step": 8387 }, { "epoch": 1.1216902915217972, "grad_norm": 1.0842067003250122, "learning_rate": 1.4413388807108768e-05, "loss": 0.7006, "step": 8388 }, { "epoch": 1.121824017116876, "grad_norm": 1.197487711906433, "learning_rate": 1.4412093228783846e-05, "loss": 0.676, "step": 8389 }, { "epoch": 1.1219577427119551, "grad_norm": 1.181753396987915, "learning_rate": 1.4410797558493062e-05, "loss": 0.7219, "step": 8390 }, { "epoch": 1.122091468307034, "grad_norm": 1.3178125619888306, "learning_rate": 1.4409501796263425e-05, "loss": 0.8266, "step": 8391 }, { "epoch": 1.1222251939021128, "grad_norm": 1.3144235610961914, "learning_rate": 1.4408205942121942e-05, "loss": 0.8319, "step": 8392 }, { "epoch": 1.1223589194971917, "grad_norm": 1.3660727739334106, "learning_rate": 1.4406909996095622e-05, "loss": 0.7775, "step": 8393 }, { "epoch": 1.1224926450922708, "grad_norm": 1.1368908882141113, "learning_rate": 1.4405613958211482e-05, "loss": 0.7697, "step": 8394 }, { "epoch": 1.1226263706873496, "grad_norm": 1.2406312227249146, "learning_rate": 1.4404317828496534e-05, "loss": 0.6978, "step": 8395 }, { "epoch": 1.1227600962824285, "grad_norm": 1.209076166152954, "learning_rate": 1.4403021606977798e-05, "loss": 0.7224, "step": 8396 }, { "epoch": 1.1228938218775073, "grad_norm": 1.1883964538574219, "learning_rate": 1.4401725293682287e-05, "loss": 0.7328, "step": 8397 }, { "epoch": 1.1230275474725862, "grad_norm": 1.0854756832122803, "learning_rate": 1.4400428888637026e-05, "loss": 0.7264, "step": 8398 }, { "epoch": 1.1231612730676652, "grad_norm": 1.1856213808059692, "learning_rate": 1.4399132391869032e-05, "loss": 0.7885, "step": 8399 }, { "epoch": 1.123294998662744, "grad_norm": 1.1723554134368896, "learning_rate": 1.4397835803405338e-05, "loss": 0.7398, "step": 8400 }, { "epoch": 1.123428724257823, "grad_norm": 1.0858168601989746, "learning_rate": 1.439653912327296e-05, "loss": 0.7272, "step": 8401 }, { "epoch": 1.1235624498529018, "grad_norm": 1.1664848327636719, "learning_rate": 1.4395242351498934e-05, "loss": 0.7228, "step": 8402 }, { "epoch": 1.1236961754479808, "grad_norm": 1.0693929195404053, "learning_rate": 1.4393945488110287e-05, "loss": 0.7125, "step": 8403 }, { "epoch": 1.1238299010430597, "grad_norm": 0.954888105392456, "learning_rate": 1.4392648533134051e-05, "loss": 0.6363, "step": 8404 }, { "epoch": 1.1239636266381385, "grad_norm": 1.2978016138076782, "learning_rate": 1.4391351486597259e-05, "loss": 0.7303, "step": 8405 }, { "epoch": 1.1240973522332174, "grad_norm": 1.1401885747909546, "learning_rate": 1.4390054348526945e-05, "loss": 0.6367, "step": 8406 }, { "epoch": 1.1242310778282962, "grad_norm": 1.0839036703109741, "learning_rate": 1.4388757118950152e-05, "loss": 0.6753, "step": 8407 }, { "epoch": 1.1243648034233753, "grad_norm": 1.1497244834899902, "learning_rate": 1.4387459797893915e-05, "loss": 0.707, "step": 8408 }, { "epoch": 1.1244985290184542, "grad_norm": 1.0623220205307007, "learning_rate": 1.4386162385385279e-05, "loss": 0.7331, "step": 8409 }, { "epoch": 1.124632254613533, "grad_norm": 1.031921148300171, "learning_rate": 1.438486488145128e-05, "loss": 0.6237, "step": 8410 }, { "epoch": 1.1247659802086118, "grad_norm": 1.1798107624053955, "learning_rate": 1.4383567286118973e-05, "loss": 0.8239, "step": 8411 }, { "epoch": 1.124899705803691, "grad_norm": 1.1528325080871582, "learning_rate": 1.43822695994154e-05, "loss": 0.7122, "step": 8412 }, { "epoch": 1.1250334313987698, "grad_norm": 1.156791090965271, "learning_rate": 1.438097182136761e-05, "loss": 0.6808, "step": 8413 }, { "epoch": 1.1251671569938486, "grad_norm": 1.048363208770752, "learning_rate": 1.4379673952002656e-05, "loss": 0.6498, "step": 8414 }, { "epoch": 1.1253008825889275, "grad_norm": 1.2286697626113892, "learning_rate": 1.4378375991347586e-05, "loss": 0.7571, "step": 8415 }, { "epoch": 1.1254346081840065, "grad_norm": 1.14577054977417, "learning_rate": 1.4377077939429463e-05, "loss": 0.708, "step": 8416 }, { "epoch": 1.1255683337790854, "grad_norm": 1.0847517251968384, "learning_rate": 1.4375779796275336e-05, "loss": 0.7086, "step": 8417 }, { "epoch": 1.1257020593741642, "grad_norm": 1.1408618688583374, "learning_rate": 1.4374481561912266e-05, "loss": 0.7373, "step": 8418 }, { "epoch": 1.125835784969243, "grad_norm": 1.1123744249343872, "learning_rate": 1.4373183236367312e-05, "loss": 0.6288, "step": 8419 }, { "epoch": 1.125969510564322, "grad_norm": 1.0524399280548096, "learning_rate": 1.437188481966754e-05, "loss": 0.743, "step": 8420 }, { "epoch": 1.126103236159401, "grad_norm": 1.3016549348831177, "learning_rate": 1.4370586311840014e-05, "loss": 0.7414, "step": 8421 }, { "epoch": 1.1262369617544798, "grad_norm": 1.1124123334884644, "learning_rate": 1.4369287712911795e-05, "loss": 0.6367, "step": 8422 }, { "epoch": 1.1263706873495587, "grad_norm": 1.0690258741378784, "learning_rate": 1.4367989022909956e-05, "loss": 0.7009, "step": 8423 }, { "epoch": 1.1265044129446375, "grad_norm": 1.1233340501785278, "learning_rate": 1.436669024186157e-05, "loss": 0.7075, "step": 8424 }, { "epoch": 1.1266381385397164, "grad_norm": 1.3396233320236206, "learning_rate": 1.4365391369793697e-05, "loss": 0.7889, "step": 8425 }, { "epoch": 1.1267718641347955, "grad_norm": 1.0610556602478027, "learning_rate": 1.436409240673342e-05, "loss": 0.7138, "step": 8426 }, { "epoch": 1.1269055897298743, "grad_norm": 1.215868353843689, "learning_rate": 1.4362793352707816e-05, "loss": 0.799, "step": 8427 }, { "epoch": 1.1270393153249532, "grad_norm": 0.9768369197845459, "learning_rate": 1.4361494207743958e-05, "loss": 0.5782, "step": 8428 }, { "epoch": 1.127173040920032, "grad_norm": 1.1867334842681885, "learning_rate": 1.4360194971868926e-05, "loss": 0.7511, "step": 8429 }, { "epoch": 1.127306766515111, "grad_norm": 1.1280359029769897, "learning_rate": 1.4358895645109803e-05, "loss": 0.7167, "step": 8430 }, { "epoch": 1.12744049211019, "grad_norm": 1.242020845413208, "learning_rate": 1.4357596227493672e-05, "loss": 0.7527, "step": 8431 }, { "epoch": 1.1275742177052688, "grad_norm": 1.1730339527130127, "learning_rate": 1.4356296719047615e-05, "loss": 0.7791, "step": 8432 }, { "epoch": 1.1277079433003476, "grad_norm": 1.1614855527877808, "learning_rate": 1.4354997119798722e-05, "loss": 0.7372, "step": 8433 }, { "epoch": 1.1278416688954267, "grad_norm": 1.2613730430603027, "learning_rate": 1.4353697429774083e-05, "loss": 0.8149, "step": 8434 }, { "epoch": 1.1279753944905055, "grad_norm": 1.1981089115142822, "learning_rate": 1.4352397649000785e-05, "loss": 0.7142, "step": 8435 }, { "epoch": 1.1281091200855844, "grad_norm": 1.134832739830017, "learning_rate": 1.4351097777505924e-05, "loss": 0.7167, "step": 8436 }, { "epoch": 1.1282428456806632, "grad_norm": 1.1421397924423218, "learning_rate": 1.4349797815316593e-05, "loss": 0.7638, "step": 8437 }, { "epoch": 1.128376571275742, "grad_norm": 1.1641876697540283, "learning_rate": 1.4348497762459887e-05, "loss": 0.7985, "step": 8438 }, { "epoch": 1.1285102968708212, "grad_norm": 1.3237162828445435, "learning_rate": 1.434719761896291e-05, "loss": 0.8014, "step": 8439 }, { "epoch": 1.1286440224659, "grad_norm": 1.1083369255065918, "learning_rate": 1.4345897384852756e-05, "loss": 0.7299, "step": 8440 }, { "epoch": 1.1287777480609789, "grad_norm": 1.1464701890945435, "learning_rate": 1.434459706015653e-05, "loss": 0.6939, "step": 8441 }, { "epoch": 1.1289114736560577, "grad_norm": 1.221779704093933, "learning_rate": 1.4343296644901336e-05, "loss": 0.7446, "step": 8442 }, { "epoch": 1.1290451992511366, "grad_norm": 1.1120717525482178, "learning_rate": 1.434199613911428e-05, "loss": 0.7296, "step": 8443 }, { "epoch": 1.1291789248462156, "grad_norm": 1.1548521518707275, "learning_rate": 1.434069554282247e-05, "loss": 0.7336, "step": 8444 }, { "epoch": 1.1293126504412945, "grad_norm": 1.1772726774215698, "learning_rate": 1.433939485605301e-05, "loss": 0.7568, "step": 8445 }, { "epoch": 1.1294463760363733, "grad_norm": 1.1283254623413086, "learning_rate": 1.4338094078833022e-05, "loss": 0.746, "step": 8446 }, { "epoch": 1.1295801016314522, "grad_norm": 1.069089651107788, "learning_rate": 1.4336793211189612e-05, "loss": 0.6375, "step": 8447 }, { "epoch": 1.1297138272265312, "grad_norm": 1.1044714450836182, "learning_rate": 1.4335492253149901e-05, "loss": 0.7728, "step": 8448 }, { "epoch": 1.12984755282161, "grad_norm": 1.101341724395752, "learning_rate": 1.4334191204740997e-05, "loss": 0.7272, "step": 8449 }, { "epoch": 1.129981278416689, "grad_norm": 1.1498056650161743, "learning_rate": 1.4332890065990027e-05, "loss": 0.8056, "step": 8450 }, { "epoch": 1.1301150040117678, "grad_norm": 1.107475996017456, "learning_rate": 1.4331588836924111e-05, "loss": 0.7414, "step": 8451 }, { "epoch": 1.1302487296068469, "grad_norm": 1.1508893966674805, "learning_rate": 1.4330287517570367e-05, "loss": 0.7202, "step": 8452 }, { "epoch": 1.1303824552019257, "grad_norm": 1.3827824592590332, "learning_rate": 1.4328986107955926e-05, "loss": 0.8509, "step": 8453 }, { "epoch": 1.1305161807970046, "grad_norm": 1.0723426342010498, "learning_rate": 1.4327684608107912e-05, "loss": 0.7052, "step": 8454 }, { "epoch": 1.1306499063920834, "grad_norm": 1.161230206489563, "learning_rate": 1.4326383018053451e-05, "loss": 0.7492, "step": 8455 }, { "epoch": 1.1307836319871623, "grad_norm": 1.2608108520507812, "learning_rate": 1.4325081337819681e-05, "loss": 0.6881, "step": 8456 }, { "epoch": 1.1309173575822413, "grad_norm": 1.0458427667617798, "learning_rate": 1.4323779567433725e-05, "loss": 0.7075, "step": 8457 }, { "epoch": 1.1310510831773202, "grad_norm": 1.1560204029083252, "learning_rate": 1.4322477706922721e-05, "loss": 0.7117, "step": 8458 }, { "epoch": 1.131184808772399, "grad_norm": 1.1166564226150513, "learning_rate": 1.4321175756313807e-05, "loss": 0.6497, "step": 8459 }, { "epoch": 1.1313185343674779, "grad_norm": 1.0914586782455444, "learning_rate": 1.431987371563412e-05, "loss": 0.7421, "step": 8460 }, { "epoch": 1.1314522599625567, "grad_norm": 1.208411693572998, "learning_rate": 1.4318571584910798e-05, "loss": 0.6725, "step": 8461 }, { "epoch": 1.1315859855576358, "grad_norm": 1.0694564580917358, "learning_rate": 1.4317269364170985e-05, "loss": 0.6785, "step": 8462 }, { "epoch": 1.1317197111527146, "grad_norm": 1.507333517074585, "learning_rate": 1.4315967053441822e-05, "loss": 0.7433, "step": 8463 }, { "epoch": 1.1318534367477935, "grad_norm": 1.075852394104004, "learning_rate": 1.4314664652750454e-05, "loss": 0.7372, "step": 8464 }, { "epoch": 1.1319871623428723, "grad_norm": 1.0763435363769531, "learning_rate": 1.431336216212403e-05, "loss": 0.7077, "step": 8465 }, { "epoch": 1.1321208879379514, "grad_norm": 1.2975422143936157, "learning_rate": 1.4312059581589704e-05, "loss": 0.7248, "step": 8466 }, { "epoch": 1.1322546135330303, "grad_norm": 1.3175022602081299, "learning_rate": 1.4310756911174619e-05, "loss": 0.7831, "step": 8467 }, { "epoch": 1.132388339128109, "grad_norm": 1.2464368343353271, "learning_rate": 1.4309454150905933e-05, "loss": 0.8005, "step": 8468 }, { "epoch": 1.132522064723188, "grad_norm": 1.1143320798873901, "learning_rate": 1.4308151300810797e-05, "loss": 0.7271, "step": 8469 }, { "epoch": 1.132655790318267, "grad_norm": 1.313994288444519, "learning_rate": 1.4306848360916368e-05, "loss": 0.7555, "step": 8470 }, { "epoch": 1.1327895159133459, "grad_norm": 1.2605969905853271, "learning_rate": 1.4305545331249807e-05, "loss": 0.7659, "step": 8471 }, { "epoch": 1.1329232415084247, "grad_norm": 1.1556403636932373, "learning_rate": 1.4304242211838277e-05, "loss": 0.7244, "step": 8472 }, { "epoch": 1.1330569671035036, "grad_norm": 1.0986402034759521, "learning_rate": 1.4302939002708933e-05, "loss": 0.7212, "step": 8473 }, { "epoch": 1.1331906926985824, "grad_norm": 1.2066212892532349, "learning_rate": 1.4301635703888946e-05, "loss": 0.7807, "step": 8474 }, { "epoch": 1.1333244182936615, "grad_norm": 1.251274824142456, "learning_rate": 1.4300332315405476e-05, "loss": 0.7074, "step": 8475 }, { "epoch": 1.1334581438887403, "grad_norm": 1.191537618637085, "learning_rate": 1.4299028837285693e-05, "loss": 0.7749, "step": 8476 }, { "epoch": 1.1335918694838192, "grad_norm": 1.2016936540603638, "learning_rate": 1.429772526955677e-05, "loss": 0.7906, "step": 8477 }, { "epoch": 1.133725595078898, "grad_norm": 1.271976351737976, "learning_rate": 1.4296421612245877e-05, "loss": 0.7332, "step": 8478 }, { "epoch": 1.1338593206739769, "grad_norm": 1.0917885303497314, "learning_rate": 1.4295117865380185e-05, "loss": 0.6792, "step": 8479 }, { "epoch": 1.133993046269056, "grad_norm": 1.1485202312469482, "learning_rate": 1.4293814028986874e-05, "loss": 0.8026, "step": 8480 }, { "epoch": 1.1341267718641348, "grad_norm": 1.1456812620162964, "learning_rate": 1.4292510103093115e-05, "loss": 0.7674, "step": 8481 }, { "epoch": 1.1342604974592136, "grad_norm": 1.106204867362976, "learning_rate": 1.429120608772609e-05, "loss": 0.7397, "step": 8482 }, { "epoch": 1.1343942230542927, "grad_norm": 1.2597233057022095, "learning_rate": 1.4289901982912983e-05, "loss": 0.7568, "step": 8483 }, { "epoch": 1.1345279486493716, "grad_norm": 1.0477244853973389, "learning_rate": 1.4288597788680974e-05, "loss": 0.6722, "step": 8484 }, { "epoch": 1.1346616742444504, "grad_norm": 1.1919598579406738, "learning_rate": 1.4287293505057248e-05, "loss": 0.8004, "step": 8485 }, { "epoch": 1.1347953998395293, "grad_norm": 1.2121471166610718, "learning_rate": 1.4285989132068988e-05, "loss": 0.7372, "step": 8486 }, { "epoch": 1.134929125434608, "grad_norm": 1.2106009721755981, "learning_rate": 1.4284684669743387e-05, "loss": 0.7802, "step": 8487 }, { "epoch": 1.1350628510296872, "grad_norm": 1.272891879081726, "learning_rate": 1.4283380118107636e-05, "loss": 0.75, "step": 8488 }, { "epoch": 1.135196576624766, "grad_norm": 1.025378704071045, "learning_rate": 1.4282075477188923e-05, "loss": 0.625, "step": 8489 }, { "epoch": 1.1353303022198449, "grad_norm": 1.1192607879638672, "learning_rate": 1.4280770747014445e-05, "loss": 0.6792, "step": 8490 }, { "epoch": 1.1354640278149237, "grad_norm": 1.1058380603790283, "learning_rate": 1.4279465927611399e-05, "loss": 0.7806, "step": 8491 }, { "epoch": 1.1355977534100026, "grad_norm": 1.272824764251709, "learning_rate": 1.427816101900698e-05, "loss": 0.7497, "step": 8492 }, { "epoch": 1.1357314790050816, "grad_norm": 1.1337007284164429, "learning_rate": 1.4276856021228387e-05, "loss": 0.7275, "step": 8493 }, { "epoch": 1.1358652046001605, "grad_norm": 1.1484978199005127, "learning_rate": 1.4275550934302822e-05, "loss": 0.7108, "step": 8494 }, { "epoch": 1.1359989301952393, "grad_norm": 1.343007206916809, "learning_rate": 1.4274245758257492e-05, "loss": 0.7246, "step": 8495 }, { "epoch": 1.1361326557903182, "grad_norm": 1.1766449213027954, "learning_rate": 1.4272940493119596e-05, "loss": 0.6656, "step": 8496 }, { "epoch": 1.1362663813853973, "grad_norm": 1.1537761688232422, "learning_rate": 1.4271635138916344e-05, "loss": 0.7072, "step": 8497 }, { "epoch": 1.136400106980476, "grad_norm": 1.1074657440185547, "learning_rate": 1.427032969567495e-05, "loss": 0.7172, "step": 8498 }, { "epoch": 1.136533832575555, "grad_norm": 1.1687147617340088, "learning_rate": 1.4269024163422614e-05, "loss": 0.7512, "step": 8499 }, { "epoch": 1.1366675581706338, "grad_norm": 1.1800237894058228, "learning_rate": 1.4267718542186557e-05, "loss": 0.7302, "step": 8500 }, { "epoch": 1.1368012837657129, "grad_norm": 1.1873095035552979, "learning_rate": 1.4266412831993991e-05, "loss": 0.7327, "step": 8501 }, { "epoch": 1.1369350093607917, "grad_norm": 1.192323088645935, "learning_rate": 1.4265107032872131e-05, "loss": 0.7678, "step": 8502 }, { "epoch": 1.1370687349558706, "grad_norm": 1.2861554622650146, "learning_rate": 1.4263801144848196e-05, "loss": 0.7472, "step": 8503 }, { "epoch": 1.1372024605509494, "grad_norm": 1.296046495437622, "learning_rate": 1.4262495167949406e-05, "loss": 0.7541, "step": 8504 }, { "epoch": 1.1373361861460283, "grad_norm": 1.3652756214141846, "learning_rate": 1.4261189102202985e-05, "loss": 0.7721, "step": 8505 }, { "epoch": 1.1374699117411073, "grad_norm": 1.1960608959197998, "learning_rate": 1.4259882947636154e-05, "loss": 0.7946, "step": 8506 }, { "epoch": 1.1376036373361862, "grad_norm": 1.2483481168746948, "learning_rate": 1.4258576704276139e-05, "loss": 0.7292, "step": 8507 }, { "epoch": 1.137737362931265, "grad_norm": 1.0186744928359985, "learning_rate": 1.4257270372150167e-05, "loss": 0.6636, "step": 8508 }, { "epoch": 1.1378710885263439, "grad_norm": 1.134162187576294, "learning_rate": 1.4255963951285467e-05, "loss": 0.73, "step": 8509 }, { "epoch": 1.1380048141214227, "grad_norm": 1.1401047706604004, "learning_rate": 1.4254657441709273e-05, "loss": 0.7301, "step": 8510 }, { "epoch": 1.1381385397165018, "grad_norm": 1.195168375968933, "learning_rate": 1.4253350843448815e-05, "loss": 0.6777, "step": 8511 }, { "epoch": 1.1382722653115807, "grad_norm": 1.2958393096923828, "learning_rate": 1.4252044156531328e-05, "loss": 0.7897, "step": 8512 }, { "epoch": 1.1384059909066595, "grad_norm": 1.1841095685958862, "learning_rate": 1.4250737380984053e-05, "loss": 0.7024, "step": 8513 }, { "epoch": 1.1385397165017384, "grad_norm": 1.1955180168151855, "learning_rate": 1.4249430516834222e-05, "loss": 0.7659, "step": 8514 }, { "epoch": 1.1386734420968174, "grad_norm": 1.1328061819076538, "learning_rate": 1.4248123564109077e-05, "loss": 0.6739, "step": 8515 }, { "epoch": 1.1388071676918963, "grad_norm": 1.1669927835464478, "learning_rate": 1.424681652283586e-05, "loss": 0.6403, "step": 8516 }, { "epoch": 1.1389408932869751, "grad_norm": 1.1896038055419922, "learning_rate": 1.4245509393041821e-05, "loss": 0.7513, "step": 8517 }, { "epoch": 1.139074618882054, "grad_norm": 1.1753462553024292, "learning_rate": 1.4244202174754199e-05, "loss": 0.6886, "step": 8518 }, { "epoch": 1.139208344477133, "grad_norm": 1.0358129739761353, "learning_rate": 1.4242894868000244e-05, "loss": 0.7288, "step": 8519 }, { "epoch": 1.1393420700722119, "grad_norm": 1.273347020149231, "learning_rate": 1.4241587472807203e-05, "loss": 0.7129, "step": 8520 }, { "epoch": 1.1394757956672907, "grad_norm": 1.164568305015564, "learning_rate": 1.4240279989202332e-05, "loss": 0.7281, "step": 8521 }, { "epoch": 1.1396095212623696, "grad_norm": 1.291204810142517, "learning_rate": 1.4238972417212882e-05, "loss": 0.8326, "step": 8522 }, { "epoch": 1.1397432468574484, "grad_norm": 1.2569732666015625, "learning_rate": 1.423766475686611e-05, "loss": 0.7998, "step": 8523 }, { "epoch": 1.1398769724525275, "grad_norm": 1.1659945249557495, "learning_rate": 1.423635700818927e-05, "loss": 0.7371, "step": 8524 }, { "epoch": 1.1400106980476064, "grad_norm": 1.0587611198425293, "learning_rate": 1.4235049171209624e-05, "loss": 0.7012, "step": 8525 }, { "epoch": 1.1401444236426852, "grad_norm": 1.0336602926254272, "learning_rate": 1.4233741245954427e-05, "loss": 0.6941, "step": 8526 }, { "epoch": 1.140278149237764, "grad_norm": 0.9703884124755859, "learning_rate": 1.4232433232450945e-05, "loss": 0.6497, "step": 8527 }, { "epoch": 1.140411874832843, "grad_norm": 1.0044950246810913, "learning_rate": 1.4231125130726442e-05, "loss": 0.662, "step": 8528 }, { "epoch": 1.140545600427922, "grad_norm": 1.0813864469528198, "learning_rate": 1.4229816940808188e-05, "loss": 0.7495, "step": 8529 }, { "epoch": 1.1406793260230008, "grad_norm": 1.0305730104446411, "learning_rate": 1.4228508662723443e-05, "loss": 0.7073, "step": 8530 }, { "epoch": 1.1408130516180797, "grad_norm": 1.1615597009658813, "learning_rate": 1.4227200296499484e-05, "loss": 0.743, "step": 8531 }, { "epoch": 1.1409467772131585, "grad_norm": 1.1701550483703613, "learning_rate": 1.4225891842163578e-05, "loss": 0.7721, "step": 8532 }, { "epoch": 1.1410805028082376, "grad_norm": 1.1151748895645142, "learning_rate": 1.4224583299743004e-05, "loss": 0.704, "step": 8533 }, { "epoch": 1.1412142284033164, "grad_norm": 1.1730639934539795, "learning_rate": 1.422327466926503e-05, "loss": 0.7313, "step": 8534 }, { "epoch": 1.1413479539983953, "grad_norm": 1.0634881258010864, "learning_rate": 1.4221965950756937e-05, "loss": 0.6938, "step": 8535 }, { "epoch": 1.1414816795934741, "grad_norm": 1.0702561140060425, "learning_rate": 1.4220657144246004e-05, "loss": 0.7519, "step": 8536 }, { "epoch": 1.1416154051885532, "grad_norm": 0.9902053475379944, "learning_rate": 1.4219348249759512e-05, "loss": 0.6488, "step": 8537 }, { "epoch": 1.141749130783632, "grad_norm": 1.128320574760437, "learning_rate": 1.4218039267324743e-05, "loss": 0.7919, "step": 8538 }, { "epoch": 1.141882856378711, "grad_norm": 1.1763845682144165, "learning_rate": 1.4216730196968982e-05, "loss": 0.7355, "step": 8539 }, { "epoch": 1.1420165819737897, "grad_norm": 1.1773360967636108, "learning_rate": 1.4215421038719516e-05, "loss": 0.7291, "step": 8540 }, { "epoch": 1.1421503075688686, "grad_norm": 1.1140855550765991, "learning_rate": 1.4214111792603632e-05, "loss": 0.7341, "step": 8541 }, { "epoch": 1.1422840331639477, "grad_norm": 1.2221038341522217, "learning_rate": 1.4212802458648618e-05, "loss": 0.814, "step": 8542 }, { "epoch": 1.1424177587590265, "grad_norm": 1.1990512609481812, "learning_rate": 1.421149303688177e-05, "loss": 0.7601, "step": 8543 }, { "epoch": 1.1425514843541054, "grad_norm": 1.2618746757507324, "learning_rate": 1.4210183527330377e-05, "loss": 0.79, "step": 8544 }, { "epoch": 1.1426852099491842, "grad_norm": 0.9865466952323914, "learning_rate": 1.420887393002174e-05, "loss": 0.6646, "step": 8545 }, { "epoch": 1.142818935544263, "grad_norm": 1.2747303247451782, "learning_rate": 1.4207564244983154e-05, "loss": 0.7054, "step": 8546 }, { "epoch": 1.1429526611393421, "grad_norm": 1.0761216878890991, "learning_rate": 1.4206254472241916e-05, "loss": 0.6914, "step": 8547 }, { "epoch": 1.143086386734421, "grad_norm": 1.1071749925613403, "learning_rate": 1.4204944611825324e-05, "loss": 0.7068, "step": 8548 }, { "epoch": 1.1432201123294998, "grad_norm": 1.145937204360962, "learning_rate": 1.4203634663760693e-05, "loss": 0.7523, "step": 8549 }, { "epoch": 1.1433538379245787, "grad_norm": 1.0613558292388916, "learning_rate": 1.4202324628075317e-05, "loss": 0.7225, "step": 8550 }, { "epoch": 1.1434875635196577, "grad_norm": 1.1577098369598389, "learning_rate": 1.4201014504796505e-05, "loss": 0.7011, "step": 8551 }, { "epoch": 1.1436212891147366, "grad_norm": 1.2799396514892578, "learning_rate": 1.4199704293951564e-05, "loss": 0.7621, "step": 8552 }, { "epoch": 1.1437550147098154, "grad_norm": 1.19056236743927, "learning_rate": 1.4198393995567807e-05, "loss": 0.6998, "step": 8553 }, { "epoch": 1.1438887403048943, "grad_norm": 1.2797596454620361, "learning_rate": 1.4197083609672543e-05, "loss": 0.7416, "step": 8554 }, { "epoch": 1.1440224658999734, "grad_norm": 1.1415300369262695, "learning_rate": 1.419577313629309e-05, "loss": 0.6601, "step": 8555 }, { "epoch": 1.1441561914950522, "grad_norm": 1.204128623008728, "learning_rate": 1.419446257545676e-05, "loss": 0.6756, "step": 8556 }, { "epoch": 1.144289917090131, "grad_norm": 1.330568790435791, "learning_rate": 1.4193151927190871e-05, "loss": 0.7287, "step": 8557 }, { "epoch": 1.14442364268521, "grad_norm": 1.2889591455459595, "learning_rate": 1.4191841191522744e-05, "loss": 0.7485, "step": 8558 }, { "epoch": 1.1445573682802888, "grad_norm": 1.069575548171997, "learning_rate": 1.4190530368479696e-05, "loss": 0.7496, "step": 8559 }, { "epoch": 1.1446910938753678, "grad_norm": 1.1083292961120605, "learning_rate": 1.4189219458089053e-05, "loss": 0.6813, "step": 8560 }, { "epoch": 1.1448248194704467, "grad_norm": 1.1319963932037354, "learning_rate": 1.4187908460378142e-05, "loss": 0.7337, "step": 8561 }, { "epoch": 1.1449585450655255, "grad_norm": 1.1391440629959106, "learning_rate": 1.4186597375374283e-05, "loss": 0.6399, "step": 8562 }, { "epoch": 1.1450922706606044, "grad_norm": 1.2806825637817383, "learning_rate": 1.4185286203104809e-05, "loss": 0.7703, "step": 8563 }, { "epoch": 1.1452259962556832, "grad_norm": 1.1351758241653442, "learning_rate": 1.4183974943597047e-05, "loss": 0.689, "step": 8564 }, { "epoch": 1.1453597218507623, "grad_norm": 1.1527087688446045, "learning_rate": 1.4182663596878334e-05, "loss": 0.6758, "step": 8565 }, { "epoch": 1.1454934474458411, "grad_norm": 1.2319458723068237, "learning_rate": 1.4181352162976002e-05, "loss": 0.8237, "step": 8566 }, { "epoch": 1.14562717304092, "grad_norm": 1.25574791431427, "learning_rate": 1.4180040641917381e-05, "loss": 0.7409, "step": 8567 }, { "epoch": 1.1457608986359988, "grad_norm": 1.024193525314331, "learning_rate": 1.4178729033729812e-05, "loss": 0.6227, "step": 8568 }, { "epoch": 1.145894624231078, "grad_norm": 1.2052526473999023, "learning_rate": 1.417741733844064e-05, "loss": 0.7083, "step": 8569 }, { "epoch": 1.1460283498261568, "grad_norm": 1.0336365699768066, "learning_rate": 1.4176105556077198e-05, "loss": 0.6438, "step": 8570 }, { "epoch": 1.1461620754212356, "grad_norm": 1.300750732421875, "learning_rate": 1.4174793686666833e-05, "loss": 0.7706, "step": 8571 }, { "epoch": 1.1462958010163145, "grad_norm": 1.1572023630142212, "learning_rate": 1.4173481730236886e-05, "loss": 0.7218, "step": 8572 }, { "epoch": 1.1464295266113935, "grad_norm": 1.131834626197815, "learning_rate": 1.4172169686814707e-05, "loss": 0.6434, "step": 8573 }, { "epoch": 1.1465632522064724, "grad_norm": 1.2664762735366821, "learning_rate": 1.4170857556427645e-05, "loss": 0.857, "step": 8574 }, { "epoch": 1.1466969778015512, "grad_norm": 1.1545804738998413, "learning_rate": 1.4169545339103046e-05, "loss": 0.7402, "step": 8575 }, { "epoch": 1.14683070339663, "grad_norm": 1.1405287981033325, "learning_rate": 1.4168233034868267e-05, "loss": 0.7207, "step": 8576 }, { "epoch": 1.146964428991709, "grad_norm": 1.0656049251556396, "learning_rate": 1.4166920643750657e-05, "loss": 0.6793, "step": 8577 }, { "epoch": 1.147098154586788, "grad_norm": 1.1234911680221558, "learning_rate": 1.4165608165777574e-05, "loss": 0.7566, "step": 8578 }, { "epoch": 1.1472318801818668, "grad_norm": 1.0855439901351929, "learning_rate": 1.4164295600976375e-05, "loss": 0.7283, "step": 8579 }, { "epoch": 1.1473656057769457, "grad_norm": 1.0276319980621338, "learning_rate": 1.4162982949374416e-05, "loss": 0.677, "step": 8580 }, { "epoch": 1.1474993313720245, "grad_norm": 1.1346845626831055, "learning_rate": 1.4161670210999063e-05, "loss": 0.738, "step": 8581 }, { "epoch": 1.1476330569671034, "grad_norm": 1.270289421081543, "learning_rate": 1.4160357385877678e-05, "loss": 0.8415, "step": 8582 }, { "epoch": 1.1477667825621825, "grad_norm": 1.16777765750885, "learning_rate": 1.4159044474037625e-05, "loss": 0.7379, "step": 8583 }, { "epoch": 1.1479005081572613, "grad_norm": 1.1192541122436523, "learning_rate": 1.4157731475506266e-05, "loss": 0.7636, "step": 8584 }, { "epoch": 1.1480342337523401, "grad_norm": 1.1054061651229858, "learning_rate": 1.4156418390310976e-05, "loss": 0.6861, "step": 8585 }, { "epoch": 1.1481679593474192, "grad_norm": 1.2601593732833862, "learning_rate": 1.4155105218479121e-05, "loss": 0.6597, "step": 8586 }, { "epoch": 1.148301684942498, "grad_norm": 1.1186531782150269, "learning_rate": 1.4153791960038075e-05, "loss": 0.708, "step": 8587 }, { "epoch": 1.148435410537577, "grad_norm": 1.1941864490509033, "learning_rate": 1.4152478615015209e-05, "loss": 0.8372, "step": 8588 }, { "epoch": 1.1485691361326558, "grad_norm": 1.214064359664917, "learning_rate": 1.4151165183437899e-05, "loss": 0.7483, "step": 8589 }, { "epoch": 1.1487028617277346, "grad_norm": 1.2008904218673706, "learning_rate": 1.4149851665333525e-05, "loss": 0.6955, "step": 8590 }, { "epoch": 1.1488365873228137, "grad_norm": 1.094260573387146, "learning_rate": 1.4148538060729463e-05, "loss": 0.6934, "step": 8591 }, { "epoch": 1.1489703129178925, "grad_norm": 1.1319067478179932, "learning_rate": 1.4147224369653094e-05, "loss": 0.7467, "step": 8592 }, { "epoch": 1.1491040385129714, "grad_norm": 1.1795001029968262, "learning_rate": 1.4145910592131799e-05, "loss": 0.715, "step": 8593 }, { "epoch": 1.1492377641080502, "grad_norm": 1.0578622817993164, "learning_rate": 1.4144596728192972e-05, "loss": 0.6834, "step": 8594 }, { "epoch": 1.149371489703129, "grad_norm": 1.2457751035690308, "learning_rate": 1.4143282777863987e-05, "loss": 0.6848, "step": 8595 }, { "epoch": 1.1495052152982081, "grad_norm": 1.2480480670928955, "learning_rate": 1.4141968741172239e-05, "loss": 0.7924, "step": 8596 }, { "epoch": 1.149638940893287, "grad_norm": 1.327481746673584, "learning_rate": 1.4140654618145115e-05, "loss": 0.7697, "step": 8597 }, { "epoch": 1.1497726664883658, "grad_norm": 1.156846284866333, "learning_rate": 1.4139340408810011e-05, "loss": 0.7421, "step": 8598 }, { "epoch": 1.1499063920834447, "grad_norm": 1.1139090061187744, "learning_rate": 1.4138026113194312e-05, "loss": 0.6874, "step": 8599 }, { "epoch": 1.1500401176785238, "grad_norm": 1.0461763143539429, "learning_rate": 1.413671173132542e-05, "loss": 0.755, "step": 8600 }, { "epoch": 1.1501738432736026, "grad_norm": 1.1975128650665283, "learning_rate": 1.413539726323073e-05, "loss": 0.8022, "step": 8601 }, { "epoch": 1.1503075688686815, "grad_norm": 1.1955822706222534, "learning_rate": 1.4134082708937644e-05, "loss": 0.6982, "step": 8602 }, { "epoch": 1.1504412944637603, "grad_norm": 1.3038724660873413, "learning_rate": 1.413276806847356e-05, "loss": 0.8329, "step": 8603 }, { "epoch": 1.1505750200588394, "grad_norm": 1.2546051740646362, "learning_rate": 1.4131453341865877e-05, "loss": 0.8053, "step": 8604 }, { "epoch": 1.1507087456539182, "grad_norm": 1.122104287147522, "learning_rate": 1.4130138529142003e-05, "loss": 0.7438, "step": 8605 }, { "epoch": 1.150842471248997, "grad_norm": 1.0660064220428467, "learning_rate": 1.4128823630329345e-05, "loss": 0.7037, "step": 8606 }, { "epoch": 1.150976196844076, "grad_norm": 1.1916389465332031, "learning_rate": 1.4127508645455308e-05, "loss": 0.8063, "step": 8607 }, { "epoch": 1.1511099224391548, "grad_norm": 1.2864009141921997, "learning_rate": 1.4126193574547303e-05, "loss": 0.785, "step": 8608 }, { "epoch": 1.1512436480342338, "grad_norm": 1.157697081565857, "learning_rate": 1.4124878417632741e-05, "loss": 0.7277, "step": 8609 }, { "epoch": 1.1513773736293127, "grad_norm": 1.3003860712051392, "learning_rate": 1.4123563174739036e-05, "loss": 0.7306, "step": 8610 }, { "epoch": 1.1515110992243915, "grad_norm": 1.0955166816711426, "learning_rate": 1.4122247845893604e-05, "loss": 0.6919, "step": 8611 }, { "epoch": 1.1516448248194704, "grad_norm": 0.992414653301239, "learning_rate": 1.4120932431123858e-05, "loss": 0.6377, "step": 8612 }, { "epoch": 1.1517785504145492, "grad_norm": 1.1841528415679932, "learning_rate": 1.4119616930457219e-05, "loss": 0.6997, "step": 8613 }, { "epoch": 1.1519122760096283, "grad_norm": 1.1276887655258179, "learning_rate": 1.4118301343921109e-05, "loss": 0.7506, "step": 8614 }, { "epoch": 1.1520460016047072, "grad_norm": 1.2013367414474487, "learning_rate": 1.4116985671542946e-05, "loss": 0.7302, "step": 8615 }, { "epoch": 1.152179727199786, "grad_norm": 1.2439512014389038, "learning_rate": 1.4115669913350156e-05, "loss": 0.7535, "step": 8616 }, { "epoch": 1.1523134527948649, "grad_norm": 1.1796938180923462, "learning_rate": 1.4114354069370166e-05, "loss": 0.7438, "step": 8617 }, { "epoch": 1.152447178389944, "grad_norm": 1.0779612064361572, "learning_rate": 1.4113038139630404e-05, "loss": 0.696, "step": 8618 }, { "epoch": 1.1525809039850228, "grad_norm": 1.2216672897338867, "learning_rate": 1.4111722124158295e-05, "loss": 0.7136, "step": 8619 }, { "epoch": 1.1527146295801016, "grad_norm": 1.1894123554229736, "learning_rate": 1.4110406022981274e-05, "loss": 0.6657, "step": 8620 }, { "epoch": 1.1528483551751805, "grad_norm": 1.1297942399978638, "learning_rate": 1.4109089836126773e-05, "loss": 0.6761, "step": 8621 }, { "epoch": 1.1529820807702595, "grad_norm": 1.3036282062530518, "learning_rate": 1.4107773563622227e-05, "loss": 0.8332, "step": 8622 }, { "epoch": 1.1531158063653384, "grad_norm": 1.208046317100525, "learning_rate": 1.410645720549507e-05, "loss": 0.8326, "step": 8623 }, { "epoch": 1.1532495319604172, "grad_norm": 1.1304584741592407, "learning_rate": 1.4105140761772745e-05, "loss": 0.6624, "step": 8624 }, { "epoch": 1.153383257555496, "grad_norm": 1.1081061363220215, "learning_rate": 1.4103824232482686e-05, "loss": 0.7277, "step": 8625 }, { "epoch": 1.153516983150575, "grad_norm": 1.1539075374603271, "learning_rate": 1.4102507617652337e-05, "loss": 0.6668, "step": 8626 }, { "epoch": 1.153650708745654, "grad_norm": 1.263707160949707, "learning_rate": 1.4101190917309144e-05, "loss": 0.7189, "step": 8627 }, { "epoch": 1.1537844343407329, "grad_norm": 1.2170530557632446, "learning_rate": 1.4099874131480551e-05, "loss": 0.7739, "step": 8628 }, { "epoch": 1.1539181599358117, "grad_norm": 1.2079498767852783, "learning_rate": 1.4098557260194007e-05, "loss": 0.7092, "step": 8629 }, { "epoch": 1.1540518855308906, "grad_norm": 1.214830756187439, "learning_rate": 1.4097240303476955e-05, "loss": 0.6964, "step": 8630 }, { "epoch": 1.1541856111259694, "grad_norm": 1.134867787361145, "learning_rate": 1.409592326135685e-05, "loss": 0.7115, "step": 8631 }, { "epoch": 1.1543193367210485, "grad_norm": 1.0751886367797852, "learning_rate": 1.4094606133861143e-05, "loss": 0.7004, "step": 8632 }, { "epoch": 1.1544530623161273, "grad_norm": 1.2977218627929688, "learning_rate": 1.4093288921017292e-05, "loss": 0.8383, "step": 8633 }, { "epoch": 1.1545867879112062, "grad_norm": 1.180029273033142, "learning_rate": 1.4091971622852751e-05, "loss": 0.7448, "step": 8634 }, { "epoch": 1.154720513506285, "grad_norm": 1.1232386827468872, "learning_rate": 1.4090654239394977e-05, "loss": 0.7136, "step": 8635 }, { "epoch": 1.154854239101364, "grad_norm": 1.1235896348953247, "learning_rate": 1.4089336770671427e-05, "loss": 0.6868, "step": 8636 }, { "epoch": 1.154987964696443, "grad_norm": 1.216508388519287, "learning_rate": 1.4088019216709568e-05, "loss": 0.8026, "step": 8637 }, { "epoch": 1.1551216902915218, "grad_norm": 1.2002564668655396, "learning_rate": 1.4086701577536857e-05, "loss": 0.6738, "step": 8638 }, { "epoch": 1.1552554158866006, "grad_norm": 1.101381540298462, "learning_rate": 1.4085383853180762e-05, "loss": 0.7282, "step": 8639 }, { "epoch": 1.1553891414816797, "grad_norm": 1.0521728992462158, "learning_rate": 1.4084066043668753e-05, "loss": 0.6756, "step": 8640 }, { "epoch": 1.1555228670767586, "grad_norm": 1.0231339931488037, "learning_rate": 1.4082748149028294e-05, "loss": 0.6464, "step": 8641 }, { "epoch": 1.1556565926718374, "grad_norm": 1.219417929649353, "learning_rate": 1.4081430169286859e-05, "loss": 0.7474, "step": 8642 }, { "epoch": 1.1557903182669163, "grad_norm": 1.141446590423584, "learning_rate": 1.4080112104471914e-05, "loss": 0.6851, "step": 8643 }, { "epoch": 1.155924043861995, "grad_norm": 1.0480694770812988, "learning_rate": 1.4078793954610937e-05, "loss": 0.6654, "step": 8644 }, { "epoch": 1.1560577694570742, "grad_norm": 1.254658818244934, "learning_rate": 1.4077475719731402e-05, "loss": 0.833, "step": 8645 }, { "epoch": 1.156191495052153, "grad_norm": 1.148121953010559, "learning_rate": 1.407615739986079e-05, "loss": 0.6883, "step": 8646 }, { "epoch": 1.1563252206472319, "grad_norm": 1.070624589920044, "learning_rate": 1.4074838995026578e-05, "loss": 0.6765, "step": 8647 }, { "epoch": 1.1564589462423107, "grad_norm": 1.2298760414123535, "learning_rate": 1.4073520505256244e-05, "loss": 0.7207, "step": 8648 }, { "epoch": 1.1565926718373896, "grad_norm": 1.193013310432434, "learning_rate": 1.4072201930577274e-05, "loss": 0.7162, "step": 8649 }, { "epoch": 1.1567263974324686, "grad_norm": 1.144661545753479, "learning_rate": 1.4070883271017151e-05, "loss": 0.7471, "step": 8650 }, { "epoch": 1.1568601230275475, "grad_norm": 1.1560523509979248, "learning_rate": 1.4069564526603361e-05, "loss": 0.7606, "step": 8651 }, { "epoch": 1.1569938486226263, "grad_norm": 1.1969174146652222, "learning_rate": 1.4068245697363394e-05, "loss": 0.6867, "step": 8652 }, { "epoch": 1.1571275742177052, "grad_norm": 1.0731309652328491, "learning_rate": 1.406692678332474e-05, "loss": 0.6963, "step": 8653 }, { "epoch": 1.1572612998127842, "grad_norm": 1.1388859748840332, "learning_rate": 1.4065607784514886e-05, "loss": 0.7123, "step": 8654 }, { "epoch": 1.157395025407863, "grad_norm": 1.0731945037841797, "learning_rate": 1.4064288700961328e-05, "loss": 0.706, "step": 8655 }, { "epoch": 1.157528751002942, "grad_norm": 1.2505344152450562, "learning_rate": 1.4062969532691564e-05, "loss": 0.7287, "step": 8656 }, { "epoch": 1.1576624765980208, "grad_norm": 1.2416878938674927, "learning_rate": 1.4061650279733083e-05, "loss": 0.6929, "step": 8657 }, { "epoch": 1.1577962021930999, "grad_norm": 1.1526620388031006, "learning_rate": 1.4060330942113392e-05, "loss": 0.6635, "step": 8658 }, { "epoch": 1.1579299277881787, "grad_norm": 1.119500756263733, "learning_rate": 1.4059011519859987e-05, "loss": 0.7361, "step": 8659 }, { "epoch": 1.1580636533832576, "grad_norm": 1.2432773113250732, "learning_rate": 1.405769201300037e-05, "loss": 0.7927, "step": 8660 }, { "epoch": 1.1581973789783364, "grad_norm": 1.1736907958984375, "learning_rate": 1.4056372421562048e-05, "loss": 0.6717, "step": 8661 }, { "epoch": 1.1583311045734153, "grad_norm": 1.2128117084503174, "learning_rate": 1.4055052745572524e-05, "loss": 0.7063, "step": 8662 }, { "epoch": 1.1584648301684943, "grad_norm": 1.1113380193710327, "learning_rate": 1.4053732985059304e-05, "loss": 0.6941, "step": 8663 }, { "epoch": 1.1585985557635732, "grad_norm": 1.0834118127822876, "learning_rate": 1.4052413140049898e-05, "loss": 0.6819, "step": 8664 }, { "epoch": 1.158732281358652, "grad_norm": 0.9484673738479614, "learning_rate": 1.4051093210571822e-05, "loss": 0.5555, "step": 8665 }, { "epoch": 1.1588660069537309, "grad_norm": 1.2186510562896729, "learning_rate": 1.4049773196652582e-05, "loss": 0.7367, "step": 8666 }, { "epoch": 1.1589997325488097, "grad_norm": 1.1992262601852417, "learning_rate": 1.4048453098319696e-05, "loss": 0.7132, "step": 8667 }, { "epoch": 1.1591334581438888, "grad_norm": 1.2447351217269897, "learning_rate": 1.4047132915600678e-05, "loss": 0.7855, "step": 8668 }, { "epoch": 1.1592671837389676, "grad_norm": 1.0706075429916382, "learning_rate": 1.4045812648523047e-05, "loss": 0.6872, "step": 8669 }, { "epoch": 1.1594009093340465, "grad_norm": 1.1923249959945679, "learning_rate": 1.4044492297114323e-05, "loss": 0.7446, "step": 8670 }, { "epoch": 1.1595346349291253, "grad_norm": 1.0739163160324097, "learning_rate": 1.4043171861402028e-05, "loss": 0.7262, "step": 8671 }, { "epoch": 1.1596683605242044, "grad_norm": 1.2027435302734375, "learning_rate": 1.4041851341413683e-05, "loss": 0.7452, "step": 8672 }, { "epoch": 1.1598020861192833, "grad_norm": 1.1391794681549072, "learning_rate": 1.4040530737176817e-05, "loss": 0.6828, "step": 8673 }, { "epoch": 1.159935811714362, "grad_norm": 1.1397111415863037, "learning_rate": 1.403921004871895e-05, "loss": 0.7224, "step": 8674 }, { "epoch": 1.160069537309441, "grad_norm": 1.0840952396392822, "learning_rate": 1.403788927606762e-05, "loss": 0.7314, "step": 8675 }, { "epoch": 1.16020326290452, "grad_norm": 1.2008872032165527, "learning_rate": 1.403656841925035e-05, "loss": 0.734, "step": 8676 }, { "epoch": 1.1603369884995989, "grad_norm": 1.2115564346313477, "learning_rate": 1.403524747829467e-05, "loss": 0.7213, "step": 8677 }, { "epoch": 1.1604707140946777, "grad_norm": 1.1864066123962402, "learning_rate": 1.403392645322812e-05, "loss": 0.7151, "step": 8678 }, { "epoch": 1.1606044396897566, "grad_norm": 0.9672351479530334, "learning_rate": 1.4032605344078235e-05, "loss": 0.6875, "step": 8679 }, { "epoch": 1.1607381652848354, "grad_norm": 1.1373335123062134, "learning_rate": 1.4031284150872548e-05, "loss": 0.8084, "step": 8680 }, { "epoch": 1.1608718908799145, "grad_norm": 1.0992237329483032, "learning_rate": 1.40299628736386e-05, "loss": 0.6975, "step": 8681 }, { "epoch": 1.1610056164749933, "grad_norm": 1.0596797466278076, "learning_rate": 1.4028641512403934e-05, "loss": 0.6568, "step": 8682 }, { "epoch": 1.1611393420700722, "grad_norm": 1.263583779335022, "learning_rate": 1.4027320067196091e-05, "loss": 0.7983, "step": 8683 }, { "epoch": 1.161273067665151, "grad_norm": 1.2121177911758423, "learning_rate": 1.4025998538042613e-05, "loss": 0.6975, "step": 8684 }, { "epoch": 1.1614067932602299, "grad_norm": 1.1451551914215088, "learning_rate": 1.4024676924971048e-05, "loss": 0.7743, "step": 8685 }, { "epoch": 1.161540518855309, "grad_norm": 1.0530674457550049, "learning_rate": 1.4023355228008946e-05, "loss": 0.6739, "step": 8686 }, { "epoch": 1.1616742444503878, "grad_norm": 1.1487675905227661, "learning_rate": 1.4022033447183854e-05, "loss": 0.7105, "step": 8687 }, { "epoch": 1.1618079700454667, "grad_norm": 1.1700356006622314, "learning_rate": 1.4020711582523323e-05, "loss": 0.8047, "step": 8688 }, { "epoch": 1.1619416956405457, "grad_norm": 1.0820037126541138, "learning_rate": 1.4019389634054905e-05, "loss": 0.7741, "step": 8689 }, { "epoch": 1.1620754212356246, "grad_norm": 1.2940515279769897, "learning_rate": 1.4018067601806155e-05, "loss": 0.7896, "step": 8690 }, { "epoch": 1.1622091468307034, "grad_norm": 1.2776328325271606, "learning_rate": 1.4016745485804634e-05, "loss": 0.7523, "step": 8691 }, { "epoch": 1.1623428724257823, "grad_norm": 1.1768579483032227, "learning_rate": 1.4015423286077896e-05, "loss": 0.7907, "step": 8692 }, { "epoch": 1.1624765980208611, "grad_norm": 1.1278249025344849, "learning_rate": 1.4014101002653501e-05, "loss": 0.7546, "step": 8693 }, { "epoch": 1.1626103236159402, "grad_norm": 1.07382333278656, "learning_rate": 1.4012778635559013e-05, "loss": 0.6894, "step": 8694 }, { "epoch": 1.162744049211019, "grad_norm": 1.120728850364685, "learning_rate": 1.4011456184821994e-05, "loss": 0.7577, "step": 8695 }, { "epoch": 1.1628777748060979, "grad_norm": 1.1440843343734741, "learning_rate": 1.4010133650470007e-05, "loss": 0.7175, "step": 8696 }, { "epoch": 1.1630115004011767, "grad_norm": 1.0723897218704224, "learning_rate": 1.4008811032530624e-05, "loss": 0.6454, "step": 8697 }, { "epoch": 1.1631452259962556, "grad_norm": 1.1582127809524536, "learning_rate": 1.4007488331031409e-05, "loss": 0.7125, "step": 8698 }, { "epoch": 1.1632789515913347, "grad_norm": 1.0734220743179321, "learning_rate": 1.4006165545999939e-05, "loss": 0.7638, "step": 8699 }, { "epoch": 1.1634126771864135, "grad_norm": 1.1487008333206177, "learning_rate": 1.4004842677463777e-05, "loss": 0.6616, "step": 8700 }, { "epoch": 1.1635464027814924, "grad_norm": 1.1554994583129883, "learning_rate": 1.4003519725450505e-05, "loss": 0.6921, "step": 8701 }, { "epoch": 1.1636801283765712, "grad_norm": 1.2108056545257568, "learning_rate": 1.4002196689987693e-05, "loss": 0.7351, "step": 8702 }, { "epoch": 1.1638138539716503, "grad_norm": 1.1940504312515259, "learning_rate": 1.400087357110292e-05, "loss": 0.7992, "step": 8703 }, { "epoch": 1.1639475795667291, "grad_norm": 1.223440170288086, "learning_rate": 1.3999550368823767e-05, "loss": 0.7041, "step": 8704 }, { "epoch": 1.164081305161808, "grad_norm": 1.0549988746643066, "learning_rate": 1.3998227083177814e-05, "loss": 0.6334, "step": 8705 }, { "epoch": 1.1642150307568868, "grad_norm": 1.1634438037872314, "learning_rate": 1.3996903714192643e-05, "loss": 0.7877, "step": 8706 }, { "epoch": 1.1643487563519659, "grad_norm": 1.215009093284607, "learning_rate": 1.3995580261895839e-05, "loss": 0.7276, "step": 8707 }, { "epoch": 1.1644824819470447, "grad_norm": 1.0690982341766357, "learning_rate": 1.3994256726314988e-05, "loss": 0.7073, "step": 8708 }, { "epoch": 1.1646162075421236, "grad_norm": 1.0872148275375366, "learning_rate": 1.3992933107477673e-05, "loss": 0.7298, "step": 8709 }, { "epoch": 1.1647499331372024, "grad_norm": 1.160008192062378, "learning_rate": 1.3991609405411493e-05, "loss": 0.7087, "step": 8710 }, { "epoch": 1.1648836587322813, "grad_norm": 1.1520310640335083, "learning_rate": 1.3990285620144035e-05, "loss": 0.7353, "step": 8711 }, { "epoch": 1.1650173843273604, "grad_norm": 1.1476534605026245, "learning_rate": 1.398896175170289e-05, "loss": 0.7309, "step": 8712 }, { "epoch": 1.1651511099224392, "grad_norm": 1.276505470275879, "learning_rate": 1.3987637800115654e-05, "loss": 0.7307, "step": 8713 }, { "epoch": 1.165284835517518, "grad_norm": 1.2760090827941895, "learning_rate": 1.3986313765409924e-05, "loss": 0.7366, "step": 8714 }, { "epoch": 1.165418561112597, "grad_norm": 1.2144317626953125, "learning_rate": 1.3984989647613301e-05, "loss": 0.701, "step": 8715 }, { "epoch": 1.1655522867076757, "grad_norm": 1.1092779636383057, "learning_rate": 1.3983665446753378e-05, "loss": 0.6982, "step": 8716 }, { "epoch": 1.1656860123027548, "grad_norm": 1.281235933303833, "learning_rate": 1.3982341162857761e-05, "loss": 0.728, "step": 8717 }, { "epoch": 1.1658197378978337, "grad_norm": 1.2920124530792236, "learning_rate": 1.3981016795954054e-05, "loss": 0.802, "step": 8718 }, { "epoch": 1.1659534634929125, "grad_norm": 1.1635991334915161, "learning_rate": 1.3979692346069863e-05, "loss": 0.6693, "step": 8719 }, { "epoch": 1.1660871890879914, "grad_norm": 1.0509663820266724, "learning_rate": 1.3978367813232793e-05, "loss": 0.6353, "step": 8720 }, { "epoch": 1.1662209146830704, "grad_norm": 1.1317791938781738, "learning_rate": 1.397704319747045e-05, "loss": 0.6493, "step": 8721 }, { "epoch": 1.1663546402781493, "grad_norm": 0.9960140585899353, "learning_rate": 1.3975718498810449e-05, "loss": 0.6789, "step": 8722 }, { "epoch": 1.1664883658732281, "grad_norm": 1.185448169708252, "learning_rate": 1.39743937172804e-05, "loss": 0.7647, "step": 8723 }, { "epoch": 1.166622091468307, "grad_norm": 1.1212080717086792, "learning_rate": 1.3973068852907918e-05, "loss": 0.7471, "step": 8724 }, { "epoch": 1.166755817063386, "grad_norm": 1.0912054777145386, "learning_rate": 1.3971743905720616e-05, "loss": 0.6986, "step": 8725 }, { "epoch": 1.166889542658465, "grad_norm": 1.0776363611221313, "learning_rate": 1.3970418875746114e-05, "loss": 0.7313, "step": 8726 }, { "epoch": 1.1670232682535437, "grad_norm": 1.2628676891326904, "learning_rate": 1.3969093763012031e-05, "loss": 0.7524, "step": 8727 }, { "epoch": 1.1671569938486226, "grad_norm": 1.319196105003357, "learning_rate": 1.396776856754598e-05, "loss": 0.7586, "step": 8728 }, { "epoch": 1.1672907194437014, "grad_norm": 1.131996512413025, "learning_rate": 1.3966443289375598e-05, "loss": 0.7214, "step": 8729 }, { "epoch": 1.1674244450387805, "grad_norm": 1.1569792032241821, "learning_rate": 1.3965117928528495e-05, "loss": 0.7005, "step": 8730 }, { "epoch": 1.1675581706338594, "grad_norm": 1.1557785272598267, "learning_rate": 1.396379248503231e-05, "loss": 0.7837, "step": 8731 }, { "epoch": 1.1676918962289382, "grad_norm": 1.1009104251861572, "learning_rate": 1.3962466958914657e-05, "loss": 0.7104, "step": 8732 }, { "epoch": 1.167825621824017, "grad_norm": 1.1699339151382446, "learning_rate": 1.3961141350203176e-05, "loss": 0.7004, "step": 8733 }, { "epoch": 1.167959347419096, "grad_norm": 1.296496033668518, "learning_rate": 1.395981565892549e-05, "loss": 0.6736, "step": 8734 }, { "epoch": 1.168093073014175, "grad_norm": 1.052441120147705, "learning_rate": 1.3958489885109238e-05, "loss": 0.6537, "step": 8735 }, { "epoch": 1.1682267986092538, "grad_norm": 1.2211965322494507, "learning_rate": 1.3957164028782053e-05, "loss": 0.7055, "step": 8736 }, { "epoch": 1.1683605242043327, "grad_norm": 1.1071503162384033, "learning_rate": 1.395583808997157e-05, "loss": 0.7142, "step": 8737 }, { "epoch": 1.1684942497994115, "grad_norm": 1.1510775089263916, "learning_rate": 1.3954512068705425e-05, "loss": 0.7937, "step": 8738 }, { "epoch": 1.1686279753944906, "grad_norm": 1.0915801525115967, "learning_rate": 1.3953185965011265e-05, "loss": 0.7228, "step": 8739 }, { "epoch": 1.1687617009895694, "grad_norm": 1.116228461265564, "learning_rate": 1.3951859778916723e-05, "loss": 0.7526, "step": 8740 }, { "epoch": 1.1688954265846483, "grad_norm": 1.1896651983261108, "learning_rate": 1.3950533510449444e-05, "loss": 0.7557, "step": 8741 }, { "epoch": 1.1690291521797271, "grad_norm": 1.1185009479522705, "learning_rate": 1.3949207159637075e-05, "loss": 0.7198, "step": 8742 }, { "epoch": 1.1691628777748062, "grad_norm": 1.0864053964614868, "learning_rate": 1.3947880726507267e-05, "loss": 0.6541, "step": 8743 }, { "epoch": 1.169296603369885, "grad_norm": 1.2050734758377075, "learning_rate": 1.3946554211087657e-05, "loss": 0.6605, "step": 8744 }, { "epoch": 1.169430328964964, "grad_norm": 1.1356236934661865, "learning_rate": 1.3945227613405902e-05, "loss": 0.7107, "step": 8745 }, { "epoch": 1.1695640545600428, "grad_norm": 1.0730347633361816, "learning_rate": 1.3943900933489653e-05, "loss": 0.687, "step": 8746 }, { "epoch": 1.1696977801551216, "grad_norm": 1.0903875827789307, "learning_rate": 1.3942574171366563e-05, "loss": 0.6412, "step": 8747 }, { "epoch": 1.1698315057502007, "grad_norm": 1.2559584379196167, "learning_rate": 1.3941247327064286e-05, "loss": 0.706, "step": 8748 }, { "epoch": 1.1699652313452795, "grad_norm": 1.115257740020752, "learning_rate": 1.3939920400610483e-05, "loss": 0.6505, "step": 8749 }, { "epoch": 1.1700989569403584, "grad_norm": 1.0530403852462769, "learning_rate": 1.3938593392032806e-05, "loss": 0.7214, "step": 8750 }, { "epoch": 1.1702326825354372, "grad_norm": 1.0763143301010132, "learning_rate": 1.393726630135892e-05, "loss": 0.7458, "step": 8751 }, { "epoch": 1.170366408130516, "grad_norm": 1.0420470237731934, "learning_rate": 1.3935939128616486e-05, "loss": 0.6818, "step": 8752 }, { "epoch": 1.1705001337255951, "grad_norm": 1.3923213481903076, "learning_rate": 1.3934611873833168e-05, "loss": 0.8303, "step": 8753 }, { "epoch": 1.170633859320674, "grad_norm": 1.255861520767212, "learning_rate": 1.3933284537036626e-05, "loss": 0.6855, "step": 8754 }, { "epoch": 1.1707675849157528, "grad_norm": 1.1773043870925903, "learning_rate": 1.3931957118254536e-05, "loss": 0.6539, "step": 8755 }, { "epoch": 1.1709013105108317, "grad_norm": 1.0987216234207153, "learning_rate": 1.3930629617514562e-05, "loss": 0.6991, "step": 8756 }, { "epoch": 1.1710350361059108, "grad_norm": 1.2814117670059204, "learning_rate": 1.3929302034844373e-05, "loss": 0.7427, "step": 8757 }, { "epoch": 1.1711687617009896, "grad_norm": 1.229577660560608, "learning_rate": 1.3927974370271644e-05, "loss": 0.7938, "step": 8758 }, { "epoch": 1.1713024872960685, "grad_norm": 1.1454765796661377, "learning_rate": 1.3926646623824047e-05, "loss": 0.7589, "step": 8759 }, { "epoch": 1.1714362128911473, "grad_norm": 1.0660130977630615, "learning_rate": 1.392531879552926e-05, "loss": 0.7225, "step": 8760 }, { "epoch": 1.1715699384862264, "grad_norm": 1.0706727504730225, "learning_rate": 1.3923990885414958e-05, "loss": 0.6179, "step": 8761 }, { "epoch": 1.1717036640813052, "grad_norm": 1.1329622268676758, "learning_rate": 1.392266289350882e-05, "loss": 0.7456, "step": 8762 }, { "epoch": 1.171837389676384, "grad_norm": 1.2155379056930542, "learning_rate": 1.3921334819838527e-05, "loss": 0.7453, "step": 8763 }, { "epoch": 1.171971115271463, "grad_norm": 1.1920222043991089, "learning_rate": 1.3920006664431767e-05, "loss": 0.7464, "step": 8764 }, { "epoch": 1.1721048408665418, "grad_norm": 1.190022587776184, "learning_rate": 1.3918678427316215e-05, "loss": 0.6758, "step": 8765 }, { "epoch": 1.1722385664616208, "grad_norm": 1.1319959163665771, "learning_rate": 1.391735010851956e-05, "loss": 0.8224, "step": 8766 }, { "epoch": 1.1723722920566997, "grad_norm": 1.1790599822998047, "learning_rate": 1.3916021708069492e-05, "loss": 0.7751, "step": 8767 }, { "epoch": 1.1725060176517785, "grad_norm": 1.3080675601959229, "learning_rate": 1.3914693225993701e-05, "loss": 0.761, "step": 8768 }, { "epoch": 1.1726397432468574, "grad_norm": 1.1109412908554077, "learning_rate": 1.3913364662319872e-05, "loss": 0.748, "step": 8769 }, { "epoch": 1.1727734688419362, "grad_norm": 1.0980364084243774, "learning_rate": 1.3912036017075703e-05, "loss": 0.7127, "step": 8770 }, { "epoch": 1.1729071944370153, "grad_norm": 1.385385513305664, "learning_rate": 1.3910707290288885e-05, "loss": 0.6909, "step": 8771 }, { "epoch": 1.1730409200320941, "grad_norm": 1.1751184463500977, "learning_rate": 1.390937848198712e-05, "loss": 0.6841, "step": 8772 }, { "epoch": 1.173174645627173, "grad_norm": 1.1100140810012817, "learning_rate": 1.3908049592198096e-05, "loss": 0.7163, "step": 8773 }, { "epoch": 1.1733083712222518, "grad_norm": 1.091443419456482, "learning_rate": 1.3906720620949521e-05, "loss": 0.7079, "step": 8774 }, { "epoch": 1.173442096817331, "grad_norm": 1.1175602674484253, "learning_rate": 1.3905391568269091e-05, "loss": 0.6854, "step": 8775 }, { "epoch": 1.1735758224124098, "grad_norm": 1.3727306127548218, "learning_rate": 1.3904062434184514e-05, "loss": 0.7469, "step": 8776 }, { "epoch": 1.1737095480074886, "grad_norm": 1.154465913772583, "learning_rate": 1.390273321872349e-05, "loss": 0.7488, "step": 8777 }, { "epoch": 1.1738432736025675, "grad_norm": 1.2908077239990234, "learning_rate": 1.3901403921913725e-05, "loss": 0.7196, "step": 8778 }, { "epoch": 1.1739769991976465, "grad_norm": 1.1978435516357422, "learning_rate": 1.3900074543782931e-05, "loss": 0.7052, "step": 8779 }, { "epoch": 1.1741107247927254, "grad_norm": 1.1991652250289917, "learning_rate": 1.3898745084358814e-05, "loss": 0.7444, "step": 8780 }, { "epoch": 1.1742444503878042, "grad_norm": 1.1526405811309814, "learning_rate": 1.3897415543669084e-05, "loss": 0.7453, "step": 8781 }, { "epoch": 1.174378175982883, "grad_norm": 1.0260189771652222, "learning_rate": 1.3896085921741458e-05, "loss": 0.641, "step": 8782 }, { "epoch": 1.174511901577962, "grad_norm": 1.1377290487289429, "learning_rate": 1.389475621860365e-05, "loss": 0.7085, "step": 8783 }, { "epoch": 1.174645627173041, "grad_norm": 1.068386435508728, "learning_rate": 1.3893426434283376e-05, "loss": 0.6937, "step": 8784 }, { "epoch": 1.1747793527681198, "grad_norm": 1.2375776767730713, "learning_rate": 1.3892096568808353e-05, "loss": 0.7389, "step": 8785 }, { "epoch": 1.1749130783631987, "grad_norm": 1.1360684633255005, "learning_rate": 1.3890766622206298e-05, "loss": 0.6776, "step": 8786 }, { "epoch": 1.1750468039582775, "grad_norm": 1.1640294790267944, "learning_rate": 1.3889436594504939e-05, "loss": 0.7427, "step": 8787 }, { "epoch": 1.1751805295533564, "grad_norm": 1.2284187078475952, "learning_rate": 1.3888106485731999e-05, "loss": 0.7745, "step": 8788 }, { "epoch": 1.1753142551484355, "grad_norm": 1.2578758001327515, "learning_rate": 1.3886776295915194e-05, "loss": 0.7296, "step": 8789 }, { "epoch": 1.1754479807435143, "grad_norm": 1.1694920063018799, "learning_rate": 1.388544602508226e-05, "loss": 0.65, "step": 8790 }, { "epoch": 1.1755817063385932, "grad_norm": 1.0403350591659546, "learning_rate": 1.388411567326092e-05, "loss": 0.6418, "step": 8791 }, { "epoch": 1.1757154319336722, "grad_norm": 1.112365961074829, "learning_rate": 1.3882785240478906e-05, "loss": 0.7248, "step": 8792 }, { "epoch": 1.175849157528751, "grad_norm": 1.2182716131210327, "learning_rate": 1.3881454726763947e-05, "loss": 0.7454, "step": 8793 }, { "epoch": 1.17598288312383, "grad_norm": 1.1608079671859741, "learning_rate": 1.3880124132143782e-05, "loss": 0.8102, "step": 8794 }, { "epoch": 1.1761166087189088, "grad_norm": 1.1517155170440674, "learning_rate": 1.387879345664614e-05, "loss": 0.7546, "step": 8795 }, { "epoch": 1.1762503343139876, "grad_norm": 1.1888848543167114, "learning_rate": 1.3877462700298763e-05, "loss": 0.777, "step": 8796 }, { "epoch": 1.1763840599090667, "grad_norm": 1.1409714221954346, "learning_rate": 1.3876131863129384e-05, "loss": 0.7521, "step": 8797 }, { "epoch": 1.1765177855041455, "grad_norm": 1.151750087738037, "learning_rate": 1.3874800945165746e-05, "loss": 0.6729, "step": 8798 }, { "epoch": 1.1766515110992244, "grad_norm": 1.1697924137115479, "learning_rate": 1.387346994643559e-05, "loss": 0.6946, "step": 8799 }, { "epoch": 1.1767852366943032, "grad_norm": 1.2502949237823486, "learning_rate": 1.3872138866966658e-05, "loss": 0.738, "step": 8800 }, { "epoch": 1.176918962289382, "grad_norm": 1.328881859779358, "learning_rate": 1.3870807706786697e-05, "loss": 0.8431, "step": 8801 }, { "epoch": 1.1770526878844612, "grad_norm": 1.194585919380188, "learning_rate": 1.3869476465923455e-05, "loss": 0.7061, "step": 8802 }, { "epoch": 1.17718641347954, "grad_norm": 1.397708773612976, "learning_rate": 1.3868145144404677e-05, "loss": 0.791, "step": 8803 }, { "epoch": 1.1773201390746189, "grad_norm": 1.1802654266357422, "learning_rate": 1.3866813742258116e-05, "loss": 0.7434, "step": 8804 }, { "epoch": 1.1774538646696977, "grad_norm": 0.9789415001869202, "learning_rate": 1.386548225951152e-05, "loss": 0.6187, "step": 8805 }, { "epoch": 1.1775875902647768, "grad_norm": 1.1203033924102783, "learning_rate": 1.386415069619265e-05, "loss": 0.7305, "step": 8806 }, { "epoch": 1.1777213158598556, "grad_norm": 1.1714390516281128, "learning_rate": 1.386281905232925e-05, "loss": 0.777, "step": 8807 }, { "epoch": 1.1778550414549345, "grad_norm": 1.0305331945419312, "learning_rate": 1.386148732794909e-05, "loss": 0.6869, "step": 8808 }, { "epoch": 1.1779887670500133, "grad_norm": 1.1210215091705322, "learning_rate": 1.386015552307992e-05, "loss": 0.657, "step": 8809 }, { "epoch": 1.1781224926450924, "grad_norm": 1.1202268600463867, "learning_rate": 1.3858823637749498e-05, "loss": 0.6542, "step": 8810 }, { "epoch": 1.1782562182401712, "grad_norm": 1.1044254302978516, "learning_rate": 1.3857491671985592e-05, "loss": 0.7153, "step": 8811 }, { "epoch": 1.17838994383525, "grad_norm": 1.404664158821106, "learning_rate": 1.3856159625815964e-05, "loss": 0.7749, "step": 8812 }, { "epoch": 1.178523669430329, "grad_norm": 1.0921473503112793, "learning_rate": 1.3854827499268377e-05, "loss": 0.7063, "step": 8813 }, { "epoch": 1.1786573950254078, "grad_norm": 1.0977472066879272, "learning_rate": 1.3853495292370603e-05, "loss": 0.6862, "step": 8814 }, { "epoch": 1.1787911206204869, "grad_norm": 1.143293857574463, "learning_rate": 1.3852163005150402e-05, "loss": 0.7437, "step": 8815 }, { "epoch": 1.1789248462155657, "grad_norm": 1.1174218654632568, "learning_rate": 1.3850830637635556e-05, "loss": 0.698, "step": 8816 }, { "epoch": 1.1790585718106446, "grad_norm": 1.2731053829193115, "learning_rate": 1.3849498189853826e-05, "loss": 0.7469, "step": 8817 }, { "epoch": 1.1791922974057234, "grad_norm": 1.2739965915679932, "learning_rate": 1.3848165661832986e-05, "loss": 0.7478, "step": 8818 }, { "epoch": 1.1793260230008022, "grad_norm": 1.0636004209518433, "learning_rate": 1.3846833053600819e-05, "loss": 0.7355, "step": 8819 }, { "epoch": 1.1794597485958813, "grad_norm": 1.1283109188079834, "learning_rate": 1.38455003651851e-05, "loss": 0.7518, "step": 8820 }, { "epoch": 1.1795934741909602, "grad_norm": 1.2054831981658936, "learning_rate": 1.3844167596613604e-05, "loss": 0.7001, "step": 8821 }, { "epoch": 1.179727199786039, "grad_norm": 1.2663980722427368, "learning_rate": 1.3842834747914111e-05, "loss": 0.7324, "step": 8822 }, { "epoch": 1.1798609253811179, "grad_norm": 1.1540831327438354, "learning_rate": 1.3841501819114407e-05, "loss": 0.7871, "step": 8823 }, { "epoch": 1.179994650976197, "grad_norm": 1.0857833623886108, "learning_rate": 1.3840168810242274e-05, "loss": 0.6513, "step": 8824 }, { "epoch": 1.1801283765712758, "grad_norm": 1.1277796030044556, "learning_rate": 1.3838835721325493e-05, "loss": 0.7216, "step": 8825 }, { "epoch": 1.1802621021663546, "grad_norm": 1.3190909624099731, "learning_rate": 1.3837502552391859e-05, "loss": 0.7467, "step": 8826 }, { "epoch": 1.1803958277614335, "grad_norm": 1.152365803718567, "learning_rate": 1.3836169303469154e-05, "loss": 0.6277, "step": 8827 }, { "epoch": 1.1805295533565126, "grad_norm": 1.2193844318389893, "learning_rate": 1.3834835974585175e-05, "loss": 0.7012, "step": 8828 }, { "epoch": 1.1806632789515914, "grad_norm": 1.150195837020874, "learning_rate": 1.3833502565767705e-05, "loss": 0.7062, "step": 8829 }, { "epoch": 1.1807970045466702, "grad_norm": 1.2230052947998047, "learning_rate": 1.3832169077044544e-05, "loss": 0.6639, "step": 8830 }, { "epoch": 1.180930730141749, "grad_norm": 1.1692253351211548, "learning_rate": 1.3830835508443484e-05, "loss": 0.7204, "step": 8831 }, { "epoch": 1.181064455736828, "grad_norm": 1.3504698276519775, "learning_rate": 1.3829501859992322e-05, "loss": 0.7721, "step": 8832 }, { "epoch": 1.181198181331907, "grad_norm": 1.1660438776016235, "learning_rate": 1.3828168131718861e-05, "loss": 0.7388, "step": 8833 }, { "epoch": 1.1813319069269859, "grad_norm": 1.2020562887191772, "learning_rate": 1.3826834323650899e-05, "loss": 0.7192, "step": 8834 }, { "epoch": 1.1814656325220647, "grad_norm": 1.1986483335494995, "learning_rate": 1.3825500435816237e-05, "loss": 0.7328, "step": 8835 }, { "epoch": 1.1815993581171436, "grad_norm": 1.162795901298523, "learning_rate": 1.3824166468242677e-05, "loss": 0.7271, "step": 8836 }, { "epoch": 1.1817330837122224, "grad_norm": 0.9895807504653931, "learning_rate": 1.3822832420958028e-05, "loss": 0.6605, "step": 8837 }, { "epoch": 1.1818668093073015, "grad_norm": 1.1690711975097656, "learning_rate": 1.3821498293990097e-05, "loss": 0.6813, "step": 8838 }, { "epoch": 1.1820005349023803, "grad_norm": 1.1499677896499634, "learning_rate": 1.3820164087366688e-05, "loss": 0.7283, "step": 8839 }, { "epoch": 1.1821342604974592, "grad_norm": 1.1874727010726929, "learning_rate": 1.3818829801115615e-05, "loss": 0.7385, "step": 8840 }, { "epoch": 1.182267986092538, "grad_norm": 1.2177965641021729, "learning_rate": 1.381749543526469e-05, "loss": 0.8257, "step": 8841 }, { "epoch": 1.182401711687617, "grad_norm": 1.1914085149765015, "learning_rate": 1.3816160989841725e-05, "loss": 0.7723, "step": 8842 }, { "epoch": 1.182535437282696, "grad_norm": 1.1730551719665527, "learning_rate": 1.3814826464874536e-05, "loss": 0.7329, "step": 8843 }, { "epoch": 1.1826691628777748, "grad_norm": 1.1350566148757935, "learning_rate": 1.3813491860390938e-05, "loss": 0.7369, "step": 8844 }, { "epoch": 1.1828028884728536, "grad_norm": 1.006437063217163, "learning_rate": 1.3812157176418752e-05, "loss": 0.6378, "step": 8845 }, { "epoch": 1.1829366140679327, "grad_norm": 1.1954995393753052, "learning_rate": 1.3810822412985798e-05, "loss": 0.751, "step": 8846 }, { "epoch": 1.1830703396630116, "grad_norm": 1.178267478942871, "learning_rate": 1.3809487570119898e-05, "loss": 0.7354, "step": 8847 }, { "epoch": 1.1832040652580904, "grad_norm": 1.1975408792495728, "learning_rate": 1.3808152647848874e-05, "loss": 0.787, "step": 8848 }, { "epoch": 1.1833377908531693, "grad_norm": 1.2115012407302856, "learning_rate": 1.3806817646200554e-05, "loss": 0.7524, "step": 8849 }, { "epoch": 1.183471516448248, "grad_norm": 1.059596061706543, "learning_rate": 1.380548256520276e-05, "loss": 0.6741, "step": 8850 }, { "epoch": 1.1836052420433272, "grad_norm": 1.1211072206497192, "learning_rate": 1.3804147404883323e-05, "loss": 0.6924, "step": 8851 }, { "epoch": 1.183738967638406, "grad_norm": 1.1713043451309204, "learning_rate": 1.3802812165270076e-05, "loss": 0.7666, "step": 8852 }, { "epoch": 1.1838726932334849, "grad_norm": 1.132416844367981, "learning_rate": 1.3801476846390848e-05, "loss": 0.6866, "step": 8853 }, { "epoch": 1.1840064188285637, "grad_norm": 1.156435489654541, "learning_rate": 1.3800141448273472e-05, "loss": 0.7049, "step": 8854 }, { "epoch": 1.1841401444236426, "grad_norm": 1.0945433378219604, "learning_rate": 1.3798805970945783e-05, "loss": 0.7547, "step": 8855 }, { "epoch": 1.1842738700187216, "grad_norm": 1.171900749206543, "learning_rate": 1.379747041443562e-05, "loss": 0.7394, "step": 8856 }, { "epoch": 1.1844075956138005, "grad_norm": 1.1396409273147583, "learning_rate": 1.3796134778770819e-05, "loss": 0.6695, "step": 8857 }, { "epoch": 1.1845413212088793, "grad_norm": 1.2525348663330078, "learning_rate": 1.3794799063979224e-05, "loss": 0.7491, "step": 8858 }, { "epoch": 1.1846750468039582, "grad_norm": 0.999947726726532, "learning_rate": 1.379346327008867e-05, "loss": 0.6549, "step": 8859 }, { "epoch": 1.1848087723990373, "grad_norm": 1.0630241632461548, "learning_rate": 1.3792127397127006e-05, "loss": 0.7409, "step": 8860 }, { "epoch": 1.184942497994116, "grad_norm": 1.2196308374404907, "learning_rate": 1.3790791445122076e-05, "loss": 0.737, "step": 8861 }, { "epoch": 1.185076223589195, "grad_norm": 1.088104009628296, "learning_rate": 1.3789455414101724e-05, "loss": 0.6795, "step": 8862 }, { "epoch": 1.1852099491842738, "grad_norm": 1.2695367336273193, "learning_rate": 1.3788119304093801e-05, "loss": 0.7401, "step": 8863 }, { "epoch": 1.1853436747793529, "grad_norm": 1.137142539024353, "learning_rate": 1.3786783115126152e-05, "loss": 0.6798, "step": 8864 }, { "epoch": 1.1854774003744317, "grad_norm": 0.982507050037384, "learning_rate": 1.3785446847226638e-05, "loss": 0.6312, "step": 8865 }, { "epoch": 1.1856111259695106, "grad_norm": 1.1282432079315186, "learning_rate": 1.3784110500423104e-05, "loss": 0.6972, "step": 8866 }, { "epoch": 1.1857448515645894, "grad_norm": 1.0812016725540161, "learning_rate": 1.3782774074743409e-05, "loss": 0.7179, "step": 8867 }, { "epoch": 1.1858785771596683, "grad_norm": 1.2406963109970093, "learning_rate": 1.3781437570215405e-05, "loss": 0.7363, "step": 8868 }, { "epoch": 1.1860123027547473, "grad_norm": 1.6887593269348145, "learning_rate": 1.3780100986866957e-05, "loss": 0.705, "step": 8869 }, { "epoch": 1.1861460283498262, "grad_norm": 1.120638132095337, "learning_rate": 1.3778764324725919e-05, "loss": 0.7388, "step": 8870 }, { "epoch": 1.186279753944905, "grad_norm": 1.109403371810913, "learning_rate": 1.3777427583820156e-05, "loss": 0.6708, "step": 8871 }, { "epoch": 1.1864134795399839, "grad_norm": 1.0980756282806396, "learning_rate": 1.3776090764177527e-05, "loss": 0.6973, "step": 8872 }, { "epoch": 1.1865472051350627, "grad_norm": 1.2522748708724976, "learning_rate": 1.3774753865825905e-05, "loss": 0.7233, "step": 8873 }, { "epoch": 1.1866809307301418, "grad_norm": 1.0618726015090942, "learning_rate": 1.3773416888793145e-05, "loss": 0.6981, "step": 8874 }, { "epoch": 1.1868146563252207, "grad_norm": 1.1258240938186646, "learning_rate": 1.3772079833107123e-05, "loss": 0.7874, "step": 8875 }, { "epoch": 1.1869483819202995, "grad_norm": 1.2461109161376953, "learning_rate": 1.3770742698795707e-05, "loss": 0.7259, "step": 8876 }, { "epoch": 1.1870821075153783, "grad_norm": 1.091899037361145, "learning_rate": 1.3769405485886767e-05, "loss": 0.6422, "step": 8877 }, { "epoch": 1.1872158331104574, "grad_norm": 1.3187233209609985, "learning_rate": 1.3768068194408175e-05, "loss": 0.7763, "step": 8878 }, { "epoch": 1.1873495587055363, "grad_norm": 1.1897106170654297, "learning_rate": 1.3766730824387808e-05, "loss": 0.7885, "step": 8879 }, { "epoch": 1.1874832843006151, "grad_norm": 1.1752712726593018, "learning_rate": 1.3765393375853541e-05, "loss": 0.7061, "step": 8880 }, { "epoch": 1.187617009895694, "grad_norm": 1.2943899631500244, "learning_rate": 1.3764055848833256e-05, "loss": 0.686, "step": 8881 }, { "epoch": 1.187750735490773, "grad_norm": 1.155847430229187, "learning_rate": 1.3762718243354824e-05, "loss": 0.6728, "step": 8882 }, { "epoch": 1.1878844610858519, "grad_norm": 1.12455415725708, "learning_rate": 1.3761380559446131e-05, "loss": 0.7771, "step": 8883 }, { "epoch": 1.1880181866809307, "grad_norm": 1.1706955432891846, "learning_rate": 1.376004279713506e-05, "loss": 0.7042, "step": 8884 }, { "epoch": 1.1881519122760096, "grad_norm": 1.286660075187683, "learning_rate": 1.3758704956449497e-05, "loss": 0.8019, "step": 8885 }, { "epoch": 1.1882856378710884, "grad_norm": 1.1379283666610718, "learning_rate": 1.3757367037417324e-05, "loss": 0.7099, "step": 8886 }, { "epoch": 1.1884193634661675, "grad_norm": 1.2692679166793823, "learning_rate": 1.3756029040066432e-05, "loss": 0.729, "step": 8887 }, { "epoch": 1.1885530890612463, "grad_norm": 1.0977730751037598, "learning_rate": 1.3754690964424709e-05, "loss": 0.7416, "step": 8888 }, { "epoch": 1.1886868146563252, "grad_norm": 1.1407732963562012, "learning_rate": 1.3753352810520042e-05, "loss": 0.7673, "step": 8889 }, { "epoch": 1.188820540251404, "grad_norm": 1.2526460886001587, "learning_rate": 1.375201457838033e-05, "loss": 0.7578, "step": 8890 }, { "epoch": 1.188954265846483, "grad_norm": 1.1501094102859497, "learning_rate": 1.3750676268033462e-05, "loss": 0.6452, "step": 8891 }, { "epoch": 1.189087991441562, "grad_norm": 1.1355444192886353, "learning_rate": 1.374933787950734e-05, "loss": 0.7067, "step": 8892 }, { "epoch": 1.1892217170366408, "grad_norm": 1.235040307044983, "learning_rate": 1.3747999412829857e-05, "loss": 0.745, "step": 8893 }, { "epoch": 1.1893554426317197, "grad_norm": 1.1710262298583984, "learning_rate": 1.3746660868028911e-05, "loss": 0.7526, "step": 8894 }, { "epoch": 1.1894891682267987, "grad_norm": 1.306667685508728, "learning_rate": 1.3745322245132406e-05, "loss": 0.7782, "step": 8895 }, { "epoch": 1.1896228938218776, "grad_norm": 1.0348923206329346, "learning_rate": 1.374398354416824e-05, "loss": 0.6467, "step": 8896 }, { "epoch": 1.1897566194169564, "grad_norm": 1.1548538208007812, "learning_rate": 1.3742644765164324e-05, "loss": 0.6854, "step": 8897 }, { "epoch": 1.1898903450120353, "grad_norm": 1.0818729400634766, "learning_rate": 1.3741305908148555e-05, "loss": 0.6799, "step": 8898 }, { "epoch": 1.1900240706071141, "grad_norm": 1.2441802024841309, "learning_rate": 1.3739966973148846e-05, "loss": 0.8208, "step": 8899 }, { "epoch": 1.1901577962021932, "grad_norm": 1.2848570346832275, "learning_rate": 1.3738627960193105e-05, "loss": 0.7952, "step": 8900 }, { "epoch": 1.190291521797272, "grad_norm": 1.1952954530715942, "learning_rate": 1.3737288869309241e-05, "loss": 0.7042, "step": 8901 }, { "epoch": 1.190425247392351, "grad_norm": 1.1041487455368042, "learning_rate": 1.3735949700525164e-05, "loss": 0.7044, "step": 8902 }, { "epoch": 1.1905589729874297, "grad_norm": 1.1584585905075073, "learning_rate": 1.3734610453868793e-05, "loss": 0.7313, "step": 8903 }, { "epoch": 1.1906926985825086, "grad_norm": 1.2655977010726929, "learning_rate": 1.3733271129368042e-05, "loss": 0.728, "step": 8904 }, { "epoch": 1.1908264241775877, "grad_norm": 1.1579521894454956, "learning_rate": 1.3731931727050826e-05, "loss": 0.746, "step": 8905 }, { "epoch": 1.1909601497726665, "grad_norm": 1.2433384656906128, "learning_rate": 1.3730592246945063e-05, "loss": 0.7423, "step": 8906 }, { "epoch": 1.1910938753677454, "grad_norm": 1.284486174583435, "learning_rate": 1.3729252689078676e-05, "loss": 0.7611, "step": 8907 }, { "epoch": 1.1912276009628242, "grad_norm": 1.1267811059951782, "learning_rate": 1.3727913053479582e-05, "loss": 0.7506, "step": 8908 }, { "epoch": 1.1913613265579033, "grad_norm": 1.261964201927185, "learning_rate": 1.372657334017571e-05, "loss": 0.7413, "step": 8909 }, { "epoch": 1.1914950521529821, "grad_norm": 1.1986085176467896, "learning_rate": 1.3725233549194983e-05, "loss": 0.7656, "step": 8910 }, { "epoch": 1.191628777748061, "grad_norm": 1.2177395820617676, "learning_rate": 1.3723893680565325e-05, "loss": 0.7527, "step": 8911 }, { "epoch": 1.1917625033431398, "grad_norm": 1.1732949018478394, "learning_rate": 1.3722553734314669e-05, "loss": 0.7769, "step": 8912 }, { "epoch": 1.191896228938219, "grad_norm": 1.258568525314331, "learning_rate": 1.3721213710470944e-05, "loss": 0.7731, "step": 8913 }, { "epoch": 1.1920299545332977, "grad_norm": 1.1091668605804443, "learning_rate": 1.3719873609062078e-05, "loss": 0.657, "step": 8914 }, { "epoch": 1.1921636801283766, "grad_norm": 1.11782968044281, "learning_rate": 1.3718533430116003e-05, "loss": 0.6743, "step": 8915 }, { "epoch": 1.1922974057234554, "grad_norm": 1.2586084604263306, "learning_rate": 1.371719317366066e-05, "loss": 0.8165, "step": 8916 }, { "epoch": 1.1924311313185343, "grad_norm": 1.2471113204956055, "learning_rate": 1.3715852839723984e-05, "loss": 0.709, "step": 8917 }, { "epoch": 1.1925648569136134, "grad_norm": 1.0984491109848022, "learning_rate": 1.3714512428333908e-05, "loss": 0.6364, "step": 8918 }, { "epoch": 1.1926985825086922, "grad_norm": 1.070490837097168, "learning_rate": 1.3713171939518378e-05, "loss": 0.658, "step": 8919 }, { "epoch": 1.192832308103771, "grad_norm": 1.169248342514038, "learning_rate": 1.3711831373305329e-05, "loss": 0.74, "step": 8920 }, { "epoch": 1.19296603369885, "grad_norm": 1.1993176937103271, "learning_rate": 1.3710490729722707e-05, "loss": 0.7456, "step": 8921 }, { "epoch": 1.1930997592939288, "grad_norm": 1.0816614627838135, "learning_rate": 1.3709150008798457e-05, "loss": 0.6742, "step": 8922 }, { "epoch": 1.1932334848890078, "grad_norm": 1.1516257524490356, "learning_rate": 1.3707809210560528e-05, "loss": 0.7109, "step": 8923 }, { "epoch": 1.1933672104840867, "grad_norm": 1.61143159866333, "learning_rate": 1.370646833503686e-05, "loss": 0.6871, "step": 8924 }, { "epoch": 1.1935009360791655, "grad_norm": 1.145888090133667, "learning_rate": 1.3705127382255406e-05, "loss": 0.7097, "step": 8925 }, { "epoch": 1.1936346616742444, "grad_norm": 1.2164641618728638, "learning_rate": 1.3703786352244119e-05, "loss": 0.7333, "step": 8926 }, { "epoch": 1.1937683872693234, "grad_norm": 1.1248600482940674, "learning_rate": 1.3702445245030949e-05, "loss": 0.7242, "step": 8927 }, { "epoch": 1.1939021128644023, "grad_norm": 1.1660642623901367, "learning_rate": 1.3701104060643848e-05, "loss": 0.7204, "step": 8928 }, { "epoch": 1.1940358384594811, "grad_norm": 1.250727891921997, "learning_rate": 1.3699762799110779e-05, "loss": 0.7675, "step": 8929 }, { "epoch": 1.19416956405456, "grad_norm": 1.1870901584625244, "learning_rate": 1.3698421460459692e-05, "loss": 0.687, "step": 8930 }, { "epoch": 1.194303289649639, "grad_norm": 1.1625667810440063, "learning_rate": 1.3697080044718549e-05, "loss": 0.7632, "step": 8931 }, { "epoch": 1.194437015244718, "grad_norm": 1.2416410446166992, "learning_rate": 1.3695738551915312e-05, "loss": 0.7545, "step": 8932 }, { "epoch": 1.1945707408397968, "grad_norm": 1.2610715627670288, "learning_rate": 1.369439698207794e-05, "loss": 0.7099, "step": 8933 }, { "epoch": 1.1947044664348756, "grad_norm": 1.1958414316177368, "learning_rate": 1.3693055335234398e-05, "loss": 0.795, "step": 8934 }, { "epoch": 1.1948381920299544, "grad_norm": 1.1745306253433228, "learning_rate": 1.3691713611412649e-05, "loss": 0.7491, "step": 8935 }, { "epoch": 1.1949719176250335, "grad_norm": 0.9663350582122803, "learning_rate": 1.3690371810640665e-05, "loss": 0.7107, "step": 8936 }, { "epoch": 1.1951056432201124, "grad_norm": 1.056146264076233, "learning_rate": 1.3689029932946411e-05, "loss": 0.6118, "step": 8937 }, { "epoch": 1.1952393688151912, "grad_norm": 1.1266562938690186, "learning_rate": 1.3687687978357863e-05, "loss": 0.7755, "step": 8938 }, { "epoch": 1.19537309441027, "grad_norm": 1.261659026145935, "learning_rate": 1.3686345946902981e-05, "loss": 0.6906, "step": 8939 }, { "epoch": 1.195506820005349, "grad_norm": 1.150908350944519, "learning_rate": 1.3685003838609747e-05, "loss": 0.7062, "step": 8940 }, { "epoch": 1.195640545600428, "grad_norm": 1.129273533821106, "learning_rate": 1.3683661653506133e-05, "loss": 0.6716, "step": 8941 }, { "epoch": 1.1957742711955068, "grad_norm": 1.2441515922546387, "learning_rate": 1.368231939162012e-05, "loss": 0.7274, "step": 8942 }, { "epoch": 1.1959079967905857, "grad_norm": 1.1343867778778076, "learning_rate": 1.3680977052979682e-05, "loss": 0.6808, "step": 8943 }, { "epoch": 1.1960417223856645, "grad_norm": 1.164981722831726, "learning_rate": 1.3679634637612799e-05, "loss": 0.7791, "step": 8944 }, { "epoch": 1.1961754479807436, "grad_norm": 1.155806064605713, "learning_rate": 1.3678292145547454e-05, "loss": 0.7834, "step": 8945 }, { "epoch": 1.1963091735758224, "grad_norm": 1.058268666267395, "learning_rate": 1.367694957681163e-05, "loss": 0.6326, "step": 8946 }, { "epoch": 1.1964428991709013, "grad_norm": 1.2116761207580566, "learning_rate": 1.3675606931433305e-05, "loss": 0.7254, "step": 8947 }, { "epoch": 1.1965766247659801, "grad_norm": 1.1323367357254028, "learning_rate": 1.3674264209440474e-05, "loss": 0.769, "step": 8948 }, { "epoch": 1.1967103503610592, "grad_norm": 1.143675446510315, "learning_rate": 1.3672921410861122e-05, "loss": 0.7264, "step": 8949 }, { "epoch": 1.196844075956138, "grad_norm": 1.0681509971618652, "learning_rate": 1.367157853572324e-05, "loss": 0.7323, "step": 8950 }, { "epoch": 1.196977801551217, "grad_norm": 1.111977219581604, "learning_rate": 1.3670235584054814e-05, "loss": 0.729, "step": 8951 }, { "epoch": 1.1971115271462958, "grad_norm": 1.2315517663955688, "learning_rate": 1.3668892555883839e-05, "loss": 0.7327, "step": 8952 }, { "epoch": 1.1972452527413746, "grad_norm": 1.2540357112884521, "learning_rate": 1.3667549451238308e-05, "loss": 0.6144, "step": 8953 }, { "epoch": 1.1973789783364537, "grad_norm": 1.1160366535186768, "learning_rate": 1.3666206270146223e-05, "loss": 0.755, "step": 8954 }, { "epoch": 1.1975127039315325, "grad_norm": 1.1704756021499634, "learning_rate": 1.3664863012635572e-05, "loss": 0.802, "step": 8955 }, { "epoch": 1.1976464295266114, "grad_norm": 1.2126598358154297, "learning_rate": 1.366351967873436e-05, "loss": 0.7207, "step": 8956 }, { "epoch": 1.1977801551216902, "grad_norm": 1.2041130065917969, "learning_rate": 1.3662176268470586e-05, "loss": 0.7634, "step": 8957 }, { "epoch": 1.197913880716769, "grad_norm": 1.1671525239944458, "learning_rate": 1.3660832781872253e-05, "loss": 0.7265, "step": 8958 }, { "epoch": 1.1980476063118481, "grad_norm": 1.162858247756958, "learning_rate": 1.3659489218967363e-05, "loss": 0.7515, "step": 8959 }, { "epoch": 1.198181331906927, "grad_norm": 1.2248237133026123, "learning_rate": 1.3658145579783919e-05, "loss": 0.7519, "step": 8960 }, { "epoch": 1.1983150575020058, "grad_norm": 1.0587517023086548, "learning_rate": 1.3656801864349933e-05, "loss": 0.6069, "step": 8961 }, { "epoch": 1.1984487830970847, "grad_norm": 1.4005359411239624, "learning_rate": 1.3655458072693413e-05, "loss": 0.7751, "step": 8962 }, { "epoch": 1.1985825086921638, "grad_norm": 1.2449924945831299, "learning_rate": 1.3654114204842369e-05, "loss": 0.802, "step": 8963 }, { "epoch": 1.1987162342872426, "grad_norm": 1.1139699220657349, "learning_rate": 1.3652770260824806e-05, "loss": 0.7239, "step": 8964 }, { "epoch": 1.1988499598823215, "grad_norm": 1.2092784643173218, "learning_rate": 1.3651426240668744e-05, "loss": 0.7572, "step": 8965 }, { "epoch": 1.1989836854774003, "grad_norm": 1.0910764932632446, "learning_rate": 1.3650082144402195e-05, "loss": 0.7116, "step": 8966 }, { "epoch": 1.1991174110724794, "grad_norm": 1.0360510349273682, "learning_rate": 1.3648737972053179e-05, "loss": 0.7197, "step": 8967 }, { "epoch": 1.1992511366675582, "grad_norm": 1.0435012578964233, "learning_rate": 1.3647393723649708e-05, "loss": 0.6281, "step": 8968 }, { "epoch": 1.199384862262637, "grad_norm": 1.2182564735412598, "learning_rate": 1.364604939921981e-05, "loss": 0.7659, "step": 8969 }, { "epoch": 1.199518587857716, "grad_norm": 1.2867982387542725, "learning_rate": 1.3644704998791501e-05, "loss": 0.7183, "step": 8970 }, { "epoch": 1.1996523134527948, "grad_norm": 1.1223868131637573, "learning_rate": 1.3643360522392799e-05, "loss": 0.7158, "step": 8971 }, { "epoch": 1.1997860390478738, "grad_norm": 1.2870337963104248, "learning_rate": 1.3642015970051737e-05, "loss": 0.6995, "step": 8972 }, { "epoch": 1.1999197646429527, "grad_norm": 1.2040070295333862, "learning_rate": 1.3640671341796334e-05, "loss": 0.7711, "step": 8973 }, { "epoch": 1.2000534902380315, "grad_norm": 1.2238504886627197, "learning_rate": 1.3639326637654622e-05, "loss": 0.7578, "step": 8974 }, { "epoch": 1.2001872158331104, "grad_norm": 1.4510940313339233, "learning_rate": 1.3637981857654629e-05, "loss": 0.7657, "step": 8975 }, { "epoch": 1.2003209414281892, "grad_norm": 1.2163002490997314, "learning_rate": 1.3636637001824386e-05, "loss": 0.7903, "step": 8976 }, { "epoch": 1.2004546670232683, "grad_norm": 1.2259807586669922, "learning_rate": 1.3635292070191924e-05, "loss": 0.738, "step": 8977 }, { "epoch": 1.2005883926183472, "grad_norm": 1.1371212005615234, "learning_rate": 1.3633947062785277e-05, "loss": 0.7273, "step": 8978 }, { "epoch": 1.200722118213426, "grad_norm": 1.1110987663269043, "learning_rate": 1.363260197963248e-05, "loss": 0.749, "step": 8979 }, { "epoch": 1.2008558438085049, "grad_norm": 1.1727619171142578, "learning_rate": 1.363125682076157e-05, "loss": 0.7306, "step": 8980 }, { "epoch": 1.200989569403584, "grad_norm": 1.148292899131775, "learning_rate": 1.3629911586200591e-05, "loss": 0.7505, "step": 8981 }, { "epoch": 1.2011232949986628, "grad_norm": 1.1893748044967651, "learning_rate": 1.3628566275977577e-05, "loss": 0.7412, "step": 8982 }, { "epoch": 1.2012570205937416, "grad_norm": 1.1427544355392456, "learning_rate": 1.362722089012057e-05, "loss": 0.7256, "step": 8983 }, { "epoch": 1.2013907461888205, "grad_norm": 1.1609103679656982, "learning_rate": 1.3625875428657614e-05, "loss": 0.7795, "step": 8984 }, { "epoch": 1.2015244717838995, "grad_norm": 1.2255394458770752, "learning_rate": 1.3624529891616754e-05, "loss": 0.7574, "step": 8985 }, { "epoch": 1.2016581973789784, "grad_norm": 1.2172746658325195, "learning_rate": 1.3623184279026036e-05, "loss": 0.7157, "step": 8986 }, { "epoch": 1.2017919229740572, "grad_norm": 1.1932650804519653, "learning_rate": 1.3621838590913509e-05, "loss": 0.7531, "step": 8987 }, { "epoch": 1.201925648569136, "grad_norm": 1.0475637912750244, "learning_rate": 1.3620492827307223e-05, "loss": 0.6543, "step": 8988 }, { "epoch": 1.202059374164215, "grad_norm": 1.1967012882232666, "learning_rate": 1.361914698823523e-05, "loss": 0.7041, "step": 8989 }, { "epoch": 1.202193099759294, "grad_norm": 1.1847583055496216, "learning_rate": 1.3617801073725581e-05, "loss": 0.733, "step": 8990 }, { "epoch": 1.2023268253543729, "grad_norm": 1.0628321170806885, "learning_rate": 1.361645508380633e-05, "loss": 0.6143, "step": 8991 }, { "epoch": 1.2024605509494517, "grad_norm": 1.2126635313034058, "learning_rate": 1.361510901850553e-05, "loss": 0.7869, "step": 8992 }, { "epoch": 1.2025942765445305, "grad_norm": 0.9837111830711365, "learning_rate": 1.3613762877851244e-05, "loss": 0.5897, "step": 8993 }, { "epoch": 1.2027280021396094, "grad_norm": 1.1172072887420654, "learning_rate": 1.3612416661871532e-05, "loss": 0.6758, "step": 8994 }, { "epoch": 1.2028617277346885, "grad_norm": 1.1416600942611694, "learning_rate": 1.3611070370594448e-05, "loss": 0.7022, "step": 8995 }, { "epoch": 1.2029954533297673, "grad_norm": 1.2693506479263306, "learning_rate": 1.3609724004048057e-05, "loss": 0.784, "step": 8996 }, { "epoch": 1.2031291789248462, "grad_norm": 1.1978055238723755, "learning_rate": 1.3608377562260423e-05, "loss": 0.7454, "step": 8997 }, { "epoch": 1.2032629045199252, "grad_norm": 1.13164484500885, "learning_rate": 1.3607031045259615e-05, "loss": 0.6797, "step": 8998 }, { "epoch": 1.203396630115004, "grad_norm": 1.2723455429077148, "learning_rate": 1.3605684453073696e-05, "loss": 0.7519, "step": 8999 }, { "epoch": 1.203530355710083, "grad_norm": 1.1814661026000977, "learning_rate": 1.3604337785730732e-05, "loss": 0.6732, "step": 9000 }, { "epoch": 1.2036640813051618, "grad_norm": 1.364017367362976, "learning_rate": 1.3602991043258795e-05, "loss": 0.8782, "step": 9001 }, { "epoch": 1.2037978069002406, "grad_norm": 1.2103763818740845, "learning_rate": 1.3601644225685963e-05, "loss": 0.6955, "step": 9002 }, { "epoch": 1.2039315324953197, "grad_norm": 1.098138451576233, "learning_rate": 1.36002973330403e-05, "loss": 0.6409, "step": 9003 }, { "epoch": 1.2040652580903985, "grad_norm": 1.1424403190612793, "learning_rate": 1.3598950365349884e-05, "loss": 0.7057, "step": 9004 }, { "epoch": 1.2041989836854774, "grad_norm": 1.1962815523147583, "learning_rate": 1.3597603322642791e-05, "loss": 0.6874, "step": 9005 }, { "epoch": 1.2043327092805562, "grad_norm": 1.0450505018234253, "learning_rate": 1.3596256204947098e-05, "loss": 0.7199, "step": 9006 }, { "epoch": 1.204466434875635, "grad_norm": 1.111109972000122, "learning_rate": 1.3594909012290889e-05, "loss": 0.6696, "step": 9007 }, { "epoch": 1.2046001604707142, "grad_norm": 1.2168430089950562, "learning_rate": 1.3593561744702241e-05, "loss": 0.7348, "step": 9008 }, { "epoch": 1.204733886065793, "grad_norm": 1.294054388999939, "learning_rate": 1.3592214402209236e-05, "loss": 0.6228, "step": 9009 }, { "epoch": 1.2048676116608719, "grad_norm": 0.956149160861969, "learning_rate": 1.3590866984839959e-05, "loss": 0.6602, "step": 9010 }, { "epoch": 1.2050013372559507, "grad_norm": 1.1104375123977661, "learning_rate": 1.3589519492622496e-05, "loss": 0.6943, "step": 9011 }, { "epoch": 1.2051350628510298, "grad_norm": 1.0774385929107666, "learning_rate": 1.3588171925584935e-05, "loss": 0.6798, "step": 9012 }, { "epoch": 1.2052687884461086, "grad_norm": 1.214463233947754, "learning_rate": 1.3586824283755362e-05, "loss": 0.7376, "step": 9013 }, { "epoch": 1.2054025140411875, "grad_norm": 1.1363368034362793, "learning_rate": 1.358547656716187e-05, "loss": 0.7964, "step": 9014 }, { "epoch": 1.2055362396362663, "grad_norm": 1.2589733600616455, "learning_rate": 1.358412877583255e-05, "loss": 0.7599, "step": 9015 }, { "epoch": 1.2056699652313454, "grad_norm": 1.1610214710235596, "learning_rate": 1.3582780909795497e-05, "loss": 0.6943, "step": 9016 }, { "epoch": 1.2058036908264242, "grad_norm": 1.2416789531707764, "learning_rate": 1.3581432969078803e-05, "loss": 0.7636, "step": 9017 }, { "epoch": 1.205937416421503, "grad_norm": 1.1754951477050781, "learning_rate": 1.3580084953710564e-05, "loss": 0.6766, "step": 9018 }, { "epoch": 1.206071142016582, "grad_norm": 1.2357126474380493, "learning_rate": 1.3578736863718879e-05, "loss": 0.7567, "step": 9019 }, { "epoch": 1.2062048676116608, "grad_norm": 1.049296498298645, "learning_rate": 1.3577388699131852e-05, "loss": 0.6947, "step": 9020 }, { "epoch": 1.2063385932067399, "grad_norm": 1.3287372589111328, "learning_rate": 1.3576040459977579e-05, "loss": 0.8127, "step": 9021 }, { "epoch": 1.2064723188018187, "grad_norm": 1.2081409692764282, "learning_rate": 1.3574692146284166e-05, "loss": 0.814, "step": 9022 }, { "epoch": 1.2066060443968976, "grad_norm": 1.1873949766159058, "learning_rate": 1.3573343758079716e-05, "loss": 0.8072, "step": 9023 }, { "epoch": 1.2067397699919764, "grad_norm": 1.3341482877731323, "learning_rate": 1.3571995295392333e-05, "loss": 0.8152, "step": 9024 }, { "epoch": 1.2068734955870553, "grad_norm": 1.1328319311141968, "learning_rate": 1.3570646758250123e-05, "loss": 0.6571, "step": 9025 }, { "epoch": 1.2070072211821343, "grad_norm": 1.2272077798843384, "learning_rate": 1.3569298146681202e-05, "loss": 0.737, "step": 9026 }, { "epoch": 1.2071409467772132, "grad_norm": 1.1021692752838135, "learning_rate": 1.3567949460713678e-05, "loss": 0.7397, "step": 9027 }, { "epoch": 1.207274672372292, "grad_norm": 1.2565913200378418, "learning_rate": 1.356660070037566e-05, "loss": 0.7179, "step": 9028 }, { "epoch": 1.2074083979673709, "grad_norm": 1.0973520278930664, "learning_rate": 1.3565251865695263e-05, "loss": 0.6684, "step": 9029 }, { "epoch": 1.20754212356245, "grad_norm": 1.0415103435516357, "learning_rate": 1.3563902956700603e-05, "loss": 0.7182, "step": 9030 }, { "epoch": 1.2076758491575288, "grad_norm": 1.2298235893249512, "learning_rate": 1.3562553973419796e-05, "loss": 0.7067, "step": 9031 }, { "epoch": 1.2078095747526076, "grad_norm": 1.2295136451721191, "learning_rate": 1.3561204915880958e-05, "loss": 0.713, "step": 9032 }, { "epoch": 1.2079433003476865, "grad_norm": 1.1624665260314941, "learning_rate": 1.3559855784112215e-05, "loss": 0.6372, "step": 9033 }, { "epoch": 1.2080770259427656, "grad_norm": 1.2671111822128296, "learning_rate": 1.3558506578141683e-05, "loss": 0.7635, "step": 9034 }, { "epoch": 1.2082107515378444, "grad_norm": 1.1694306135177612, "learning_rate": 1.3557157297997487e-05, "loss": 0.7431, "step": 9035 }, { "epoch": 1.2083444771329233, "grad_norm": 1.1366825103759766, "learning_rate": 1.3555807943707752e-05, "loss": 0.6743, "step": 9036 }, { "epoch": 1.208478202728002, "grad_norm": 1.1026197671890259, "learning_rate": 1.3554458515300602e-05, "loss": 0.6857, "step": 9037 }, { "epoch": 1.208611928323081, "grad_norm": 1.066704511642456, "learning_rate": 1.3553109012804162e-05, "loss": 0.6505, "step": 9038 }, { "epoch": 1.20874565391816, "grad_norm": 1.0710009336471558, "learning_rate": 1.3551759436246568e-05, "loss": 0.7318, "step": 9039 }, { "epoch": 1.2088793795132389, "grad_norm": 1.268571138381958, "learning_rate": 1.3550409785655947e-05, "loss": 0.7892, "step": 9040 }, { "epoch": 1.2090131051083177, "grad_norm": 1.2657946348190308, "learning_rate": 1.3549060061060431e-05, "loss": 0.7881, "step": 9041 }, { "epoch": 1.2091468307033966, "grad_norm": 1.124334454536438, "learning_rate": 1.3547710262488154e-05, "loss": 0.7144, "step": 9042 }, { "epoch": 1.2092805562984754, "grad_norm": 1.1629618406295776, "learning_rate": 1.3546360389967252e-05, "loss": 0.6834, "step": 9043 }, { "epoch": 1.2094142818935545, "grad_norm": 1.185330867767334, "learning_rate": 1.354501044352586e-05, "loss": 0.7259, "step": 9044 }, { "epoch": 1.2095480074886333, "grad_norm": 1.1155431270599365, "learning_rate": 1.3543660423192117e-05, "loss": 0.6733, "step": 9045 }, { "epoch": 1.2096817330837122, "grad_norm": 1.1219710111618042, "learning_rate": 1.3542310328994166e-05, "loss": 0.6995, "step": 9046 }, { "epoch": 1.209815458678791, "grad_norm": 1.2864099740982056, "learning_rate": 1.3540960160960147e-05, "loss": 0.7953, "step": 9047 }, { "epoch": 1.20994918427387, "grad_norm": 1.0919420719146729, "learning_rate": 1.3539609919118197e-05, "loss": 0.6712, "step": 9048 }, { "epoch": 1.210082909868949, "grad_norm": 1.3224000930786133, "learning_rate": 1.3538259603496469e-05, "loss": 0.7563, "step": 9049 }, { "epoch": 1.2102166354640278, "grad_norm": 1.1616071462631226, "learning_rate": 1.3536909214123104e-05, "loss": 0.7004, "step": 9050 }, { "epoch": 1.2103503610591066, "grad_norm": 1.0560840368270874, "learning_rate": 1.353555875102625e-05, "loss": 0.7057, "step": 9051 }, { "epoch": 1.2104840866541857, "grad_norm": 1.0857654809951782, "learning_rate": 1.3534208214234057e-05, "loss": 0.7171, "step": 9052 }, { "epoch": 1.2106178122492646, "grad_norm": 1.1566907167434692, "learning_rate": 1.3532857603774676e-05, "loss": 0.7329, "step": 9053 }, { "epoch": 1.2107515378443434, "grad_norm": 1.2094645500183105, "learning_rate": 1.3531506919676259e-05, "loss": 0.7174, "step": 9054 }, { "epoch": 1.2108852634394223, "grad_norm": 1.1702481508255005, "learning_rate": 1.3530156161966961e-05, "loss": 0.7195, "step": 9055 }, { "epoch": 1.2110189890345011, "grad_norm": 1.174026608467102, "learning_rate": 1.3528805330674934e-05, "loss": 0.6701, "step": 9056 }, { "epoch": 1.2111527146295802, "grad_norm": 1.3334025144577026, "learning_rate": 1.3527454425828336e-05, "loss": 0.909, "step": 9057 }, { "epoch": 1.211286440224659, "grad_norm": 1.1262354850769043, "learning_rate": 1.3526103447455326e-05, "loss": 0.6869, "step": 9058 }, { "epoch": 1.2114201658197379, "grad_norm": 1.2559438943862915, "learning_rate": 1.3524752395584066e-05, "loss": 0.7785, "step": 9059 }, { "epoch": 1.2115538914148167, "grad_norm": 1.1379504203796387, "learning_rate": 1.3523401270242715e-05, "loss": 0.7159, "step": 9060 }, { "epoch": 1.2116876170098956, "grad_norm": 1.147474765777588, "learning_rate": 1.3522050071459434e-05, "loss": 0.652, "step": 9061 }, { "epoch": 1.2118213426049746, "grad_norm": 1.308051347732544, "learning_rate": 1.352069879926239e-05, "loss": 0.7718, "step": 9062 }, { "epoch": 1.2119550682000535, "grad_norm": 1.209902048110962, "learning_rate": 1.351934745367975e-05, "loss": 0.6957, "step": 9063 }, { "epoch": 1.2120887937951323, "grad_norm": 1.087220549583435, "learning_rate": 1.3517996034739678e-05, "loss": 0.7052, "step": 9064 }, { "epoch": 1.2122225193902112, "grad_norm": 1.1273982524871826, "learning_rate": 1.3516644542470346e-05, "loss": 0.7367, "step": 9065 }, { "epoch": 1.2123562449852903, "grad_norm": 1.0759533643722534, "learning_rate": 1.3515292976899922e-05, "loss": 0.6827, "step": 9066 }, { "epoch": 1.2124899705803691, "grad_norm": 1.2487528324127197, "learning_rate": 1.3513941338056584e-05, "loss": 0.6992, "step": 9067 }, { "epoch": 1.212623696175448, "grad_norm": 1.2670923471450806, "learning_rate": 1.35125896259685e-05, "loss": 0.7265, "step": 9068 }, { "epoch": 1.2127574217705268, "grad_norm": 1.2367582321166992, "learning_rate": 1.3511237840663842e-05, "loss": 0.7012, "step": 9069 }, { "epoch": 1.2128911473656059, "grad_norm": 1.2065536975860596, "learning_rate": 1.3509885982170793e-05, "loss": 0.7631, "step": 9070 }, { "epoch": 1.2130248729606847, "grad_norm": 1.2679996490478516, "learning_rate": 1.3508534050517532e-05, "loss": 0.7024, "step": 9071 }, { "epoch": 1.2131585985557636, "grad_norm": 1.0177645683288574, "learning_rate": 1.3507182045732235e-05, "loss": 0.6703, "step": 9072 }, { "epoch": 1.2132923241508424, "grad_norm": 1.1901986598968506, "learning_rate": 1.3505829967843083e-05, "loss": 0.6614, "step": 9073 }, { "epoch": 1.2134260497459213, "grad_norm": 1.222050666809082, "learning_rate": 1.350447781687826e-05, "loss": 0.7073, "step": 9074 }, { "epoch": 1.2135597753410003, "grad_norm": 1.0645278692245483, "learning_rate": 1.3503125592865954e-05, "loss": 0.6836, "step": 9075 }, { "epoch": 1.2136935009360792, "grad_norm": 1.238612174987793, "learning_rate": 1.3501773295834339e-05, "loss": 0.7145, "step": 9076 }, { "epoch": 1.213827226531158, "grad_norm": 1.1346899271011353, "learning_rate": 1.3500420925811618e-05, "loss": 0.699, "step": 9077 }, { "epoch": 1.213960952126237, "grad_norm": 1.2261466979980469, "learning_rate": 1.3499068482825968e-05, "loss": 0.8005, "step": 9078 }, { "epoch": 1.2140946777213157, "grad_norm": 1.122787356376648, "learning_rate": 1.349771596690559e-05, "loss": 0.6888, "step": 9079 }, { "epoch": 1.2142284033163948, "grad_norm": 1.140896201133728, "learning_rate": 1.3496363378078662e-05, "loss": 0.7441, "step": 9080 }, { "epoch": 1.2143621289114737, "grad_norm": 1.0653181076049805, "learning_rate": 1.349501071637339e-05, "loss": 0.7188, "step": 9081 }, { "epoch": 1.2144958545065525, "grad_norm": 1.1413154602050781, "learning_rate": 1.3493657981817961e-05, "loss": 0.689, "step": 9082 }, { "epoch": 1.2146295801016314, "grad_norm": 1.17963707447052, "learning_rate": 1.3492305174440574e-05, "loss": 0.6383, "step": 9083 }, { "epoch": 1.2147633056967104, "grad_norm": 1.1084611415863037, "learning_rate": 1.3490952294269431e-05, "loss": 0.6496, "step": 9084 }, { "epoch": 1.2148970312917893, "grad_norm": 1.0990934371948242, "learning_rate": 1.3489599341332723e-05, "loss": 0.6105, "step": 9085 }, { "epoch": 1.2150307568868681, "grad_norm": 1.055248498916626, "learning_rate": 1.3488246315658659e-05, "loss": 0.6539, "step": 9086 }, { "epoch": 1.215164482481947, "grad_norm": 1.2267736196517944, "learning_rate": 1.348689321727544e-05, "loss": 0.6938, "step": 9087 }, { "epoch": 1.215298208077026, "grad_norm": 1.0088655948638916, "learning_rate": 1.348554004621127e-05, "loss": 0.5853, "step": 9088 }, { "epoch": 1.215431933672105, "grad_norm": 1.2697254419326782, "learning_rate": 1.3484186802494346e-05, "loss": 0.7804, "step": 9089 }, { "epoch": 1.2155656592671837, "grad_norm": 1.2124444246292114, "learning_rate": 1.3482833486152886e-05, "loss": 0.7508, "step": 9090 }, { "epoch": 1.2156993848622626, "grad_norm": 1.224016547203064, "learning_rate": 1.3481480097215094e-05, "loss": 0.687, "step": 9091 }, { "epoch": 1.2158331104573414, "grad_norm": 1.0906602144241333, "learning_rate": 1.3480126635709183e-05, "loss": 0.6957, "step": 9092 }, { "epoch": 1.2159668360524205, "grad_norm": 1.2782318592071533, "learning_rate": 1.3478773101663362e-05, "loss": 0.6999, "step": 9093 }, { "epoch": 1.2161005616474994, "grad_norm": 1.2403485774993896, "learning_rate": 1.3477419495105843e-05, "loss": 0.7243, "step": 9094 }, { "epoch": 1.2162342872425782, "grad_norm": 1.317039132118225, "learning_rate": 1.3476065816064842e-05, "loss": 0.7872, "step": 9095 }, { "epoch": 1.216368012837657, "grad_norm": 1.1456726789474487, "learning_rate": 1.3474712064568576e-05, "loss": 0.6878, "step": 9096 }, { "epoch": 1.216501738432736, "grad_norm": 1.1739799976348877, "learning_rate": 1.3473358240645263e-05, "loss": 0.746, "step": 9097 }, { "epoch": 1.216635464027815, "grad_norm": 1.1599738597869873, "learning_rate": 1.3472004344323118e-05, "loss": 0.7282, "step": 9098 }, { "epoch": 1.2167691896228938, "grad_norm": 1.1250706911087036, "learning_rate": 1.3470650375630365e-05, "loss": 0.6601, "step": 9099 }, { "epoch": 1.2169029152179727, "grad_norm": 1.1711138486862183, "learning_rate": 1.346929633459523e-05, "loss": 0.6367, "step": 9100 }, { "epoch": 1.2170366408130517, "grad_norm": 1.2530628442764282, "learning_rate": 1.3467942221245931e-05, "loss": 0.8214, "step": 9101 }, { "epoch": 1.2171703664081306, "grad_norm": 1.1551121473312378, "learning_rate": 1.3466588035610693e-05, "loss": 0.732, "step": 9102 }, { "epoch": 1.2173040920032094, "grad_norm": 1.2006908655166626, "learning_rate": 1.3465233777717744e-05, "loss": 0.7339, "step": 9103 }, { "epoch": 1.2174378175982883, "grad_norm": 1.1822844743728638, "learning_rate": 1.3463879447595316e-05, "loss": 0.6394, "step": 9104 }, { "epoch": 1.2175715431933671, "grad_norm": 1.252432942390442, "learning_rate": 1.3462525045271635e-05, "loss": 0.7074, "step": 9105 }, { "epoch": 1.2177052687884462, "grad_norm": 1.1828771829605103, "learning_rate": 1.346117057077493e-05, "loss": 0.7078, "step": 9106 }, { "epoch": 1.217838994383525, "grad_norm": 1.2033642530441284, "learning_rate": 1.3459816024133439e-05, "loss": 0.6621, "step": 9107 }, { "epoch": 1.217972719978604, "grad_norm": 1.2897650003433228, "learning_rate": 1.3458461405375394e-05, "loss": 0.726, "step": 9108 }, { "epoch": 1.2181064455736828, "grad_norm": 1.0923179388046265, "learning_rate": 1.3457106714529027e-05, "loss": 0.7295, "step": 9109 }, { "epoch": 1.2182401711687616, "grad_norm": 1.2467091083526611, "learning_rate": 1.3455751951622582e-05, "loss": 0.7442, "step": 9110 }, { "epoch": 1.2183738967638407, "grad_norm": 1.2819690704345703, "learning_rate": 1.3454397116684292e-05, "loss": 0.7542, "step": 9111 }, { "epoch": 1.2185076223589195, "grad_norm": 1.0074762105941772, "learning_rate": 1.3453042209742405e-05, "loss": 0.6177, "step": 9112 }, { "epoch": 1.2186413479539984, "grad_norm": 1.2283340692520142, "learning_rate": 1.345168723082515e-05, "loss": 0.7797, "step": 9113 }, { "epoch": 1.2187750735490772, "grad_norm": 1.2426378726959229, "learning_rate": 1.345033217996078e-05, "loss": 0.715, "step": 9114 }, { "epoch": 1.2189087991441563, "grad_norm": 1.1323283910751343, "learning_rate": 1.3448977057177538e-05, "loss": 0.7304, "step": 9115 }, { "epoch": 1.2190425247392351, "grad_norm": 1.1827670335769653, "learning_rate": 1.3447621862503671e-05, "loss": 0.7563, "step": 9116 }, { "epoch": 1.219176250334314, "grad_norm": 1.2890276908874512, "learning_rate": 1.3446266595967424e-05, "loss": 0.7837, "step": 9117 }, { "epoch": 1.2193099759293928, "grad_norm": 1.2116713523864746, "learning_rate": 1.3444911257597047e-05, "loss": 0.7578, "step": 9118 }, { "epoch": 1.219443701524472, "grad_norm": 1.3103309869766235, "learning_rate": 1.344355584742079e-05, "loss": 0.7385, "step": 9119 }, { "epoch": 1.2195774271195507, "grad_norm": 1.0817703008651733, "learning_rate": 1.344220036546691e-05, "loss": 0.7058, "step": 9120 }, { "epoch": 1.2197111527146296, "grad_norm": 1.1764171123504639, "learning_rate": 1.3440844811763653e-05, "loss": 0.7341, "step": 9121 }, { "epoch": 1.2198448783097084, "grad_norm": 1.31882905960083, "learning_rate": 1.3439489186339283e-05, "loss": 0.7853, "step": 9122 }, { "epoch": 1.2199786039047873, "grad_norm": 1.2082699537277222, "learning_rate": 1.3438133489222049e-05, "loss": 0.6659, "step": 9123 }, { "epoch": 1.2201123294998664, "grad_norm": 1.1116266250610352, "learning_rate": 1.3436777720440214e-05, "loss": 0.6724, "step": 9124 }, { "epoch": 1.2202460550949452, "grad_norm": 1.1878135204315186, "learning_rate": 1.3435421880022035e-05, "loss": 0.6575, "step": 9125 }, { "epoch": 1.220379780690024, "grad_norm": 0.9940921664237976, "learning_rate": 1.3434065967995776e-05, "loss": 0.6926, "step": 9126 }, { "epoch": 1.220513506285103, "grad_norm": 1.1991914510726929, "learning_rate": 1.3432709984389696e-05, "loss": 0.7586, "step": 9127 }, { "epoch": 1.2206472318801818, "grad_norm": 1.0793712139129639, "learning_rate": 1.343135392923206e-05, "loss": 0.6719, "step": 9128 }, { "epoch": 1.2207809574752608, "grad_norm": 1.337990164756775, "learning_rate": 1.3429997802551138e-05, "loss": 0.8594, "step": 9129 }, { "epoch": 1.2209146830703397, "grad_norm": 1.0496231317520142, "learning_rate": 1.3428641604375192e-05, "loss": 0.7154, "step": 9130 }, { "epoch": 1.2210484086654185, "grad_norm": 1.3032883405685425, "learning_rate": 1.3427285334732494e-05, "loss": 0.7992, "step": 9131 }, { "epoch": 1.2211821342604974, "grad_norm": 1.2036288976669312, "learning_rate": 1.342592899365131e-05, "loss": 0.7823, "step": 9132 }, { "epoch": 1.2213158598555764, "grad_norm": 1.2072639465332031, "learning_rate": 1.3424572581159919e-05, "loss": 0.7215, "step": 9133 }, { "epoch": 1.2214495854506553, "grad_norm": 1.1556113958358765, "learning_rate": 1.3423216097286585e-05, "loss": 0.705, "step": 9134 }, { "epoch": 1.2215833110457341, "grad_norm": 1.1503335237503052, "learning_rate": 1.3421859542059587e-05, "loss": 0.7115, "step": 9135 }, { "epoch": 1.221717036640813, "grad_norm": 1.197332501411438, "learning_rate": 1.3420502915507206e-05, "loss": 0.7228, "step": 9136 }, { "epoch": 1.221850762235892, "grad_norm": 1.1593574285507202, "learning_rate": 1.341914621765771e-05, "loss": 0.7537, "step": 9137 }, { "epoch": 1.221984487830971, "grad_norm": 1.1598589420318604, "learning_rate": 1.3417789448539384e-05, "loss": 0.6726, "step": 9138 }, { "epoch": 1.2221182134260498, "grad_norm": 1.1910388469696045, "learning_rate": 1.341643260818051e-05, "loss": 0.6937, "step": 9139 }, { "epoch": 1.2222519390211286, "grad_norm": 1.2916746139526367, "learning_rate": 1.3415075696609364e-05, "loss": 0.7449, "step": 9140 }, { "epoch": 1.2223856646162075, "grad_norm": 1.1323336362838745, "learning_rate": 1.3413718713854236e-05, "loss": 0.7664, "step": 9141 }, { "epoch": 1.2225193902112865, "grad_norm": 1.0582225322723389, "learning_rate": 1.3412361659943405e-05, "loss": 0.6398, "step": 9142 }, { "epoch": 1.2226531158063654, "grad_norm": 1.2325347661972046, "learning_rate": 1.341100453490516e-05, "loss": 0.7474, "step": 9143 }, { "epoch": 1.2227868414014442, "grad_norm": 1.207645058631897, "learning_rate": 1.3409647338767795e-05, "loss": 0.8142, "step": 9144 }, { "epoch": 1.222920566996523, "grad_norm": 1.185514211654663, "learning_rate": 1.3408290071559589e-05, "loss": 0.741, "step": 9145 }, { "epoch": 1.223054292591602, "grad_norm": 1.2109448909759521, "learning_rate": 1.340693273330884e-05, "loss": 0.6867, "step": 9146 }, { "epoch": 1.223188018186681, "grad_norm": 1.2844654321670532, "learning_rate": 1.3405575324043837e-05, "loss": 0.7042, "step": 9147 }, { "epoch": 1.2233217437817598, "grad_norm": 1.2015427350997925, "learning_rate": 1.3404217843792874e-05, "loss": 0.7876, "step": 9148 }, { "epoch": 1.2234554693768387, "grad_norm": 1.1601043939590454, "learning_rate": 1.340286029258425e-05, "loss": 0.7641, "step": 9149 }, { "epoch": 1.2235891949719175, "grad_norm": 1.2381569147109985, "learning_rate": 1.3401502670446259e-05, "loss": 0.7631, "step": 9150 }, { "epoch": 1.2237229205669966, "grad_norm": 1.28840970993042, "learning_rate": 1.3400144977407199e-05, "loss": 0.7778, "step": 9151 }, { "epoch": 1.2238566461620755, "grad_norm": 1.1501911878585815, "learning_rate": 1.3398787213495372e-05, "loss": 0.656, "step": 9152 }, { "epoch": 1.2239903717571543, "grad_norm": 1.1074639558792114, "learning_rate": 1.3397429378739076e-05, "loss": 0.6894, "step": 9153 }, { "epoch": 1.2241240973522332, "grad_norm": 1.1977347135543823, "learning_rate": 1.3396071473166614e-05, "loss": 0.7477, "step": 9154 }, { "epoch": 1.2242578229473122, "grad_norm": 1.2164485454559326, "learning_rate": 1.3394713496806295e-05, "loss": 0.7884, "step": 9155 }, { "epoch": 1.224391548542391, "grad_norm": 1.2040317058563232, "learning_rate": 1.339335544968642e-05, "loss": 0.772, "step": 9156 }, { "epoch": 1.22452527413747, "grad_norm": 1.065250277519226, "learning_rate": 1.33919973318353e-05, "loss": 0.6443, "step": 9157 }, { "epoch": 1.2246589997325488, "grad_norm": 1.2347534894943237, "learning_rate": 1.3390639143281239e-05, "loss": 0.6742, "step": 9158 }, { "epoch": 1.2247927253276276, "grad_norm": 1.0989460945129395, "learning_rate": 1.3389280884052549e-05, "loss": 0.6806, "step": 9159 }, { "epoch": 1.2249264509227067, "grad_norm": 1.1263281106948853, "learning_rate": 1.3387922554177545e-05, "loss": 0.6876, "step": 9160 }, { "epoch": 1.2250601765177855, "grad_norm": 1.2382155656814575, "learning_rate": 1.3386564153684533e-05, "loss": 0.7451, "step": 9161 }, { "epoch": 1.2251939021128644, "grad_norm": 1.1751782894134521, "learning_rate": 1.3385205682601837e-05, "loss": 0.7174, "step": 9162 }, { "epoch": 1.2253276277079432, "grad_norm": 1.2271381616592407, "learning_rate": 1.3383847140957764e-05, "loss": 0.7191, "step": 9163 }, { "epoch": 1.225461353303022, "grad_norm": 1.199062466621399, "learning_rate": 1.338248852878064e-05, "loss": 0.7538, "step": 9164 }, { "epoch": 1.2255950788981012, "grad_norm": 1.2072817087173462, "learning_rate": 1.3381129846098776e-05, "loss": 0.6909, "step": 9165 }, { "epoch": 1.22572880449318, "grad_norm": 1.1895650625228882, "learning_rate": 1.3379771092940493e-05, "loss": 0.6783, "step": 9166 }, { "epoch": 1.2258625300882589, "grad_norm": 1.0856274366378784, "learning_rate": 1.3378412269334117e-05, "loss": 0.6686, "step": 9167 }, { "epoch": 1.2259962556833377, "grad_norm": 1.1868494749069214, "learning_rate": 1.3377053375307974e-05, "loss": 0.7101, "step": 9168 }, { "epoch": 1.2261299812784168, "grad_norm": 1.0710448026657104, "learning_rate": 1.337569441089038e-05, "loss": 0.7359, "step": 9169 }, { "epoch": 1.2262637068734956, "grad_norm": 1.1362768411636353, "learning_rate": 1.3374335376109668e-05, "loss": 0.6601, "step": 9170 }, { "epoch": 1.2263974324685745, "grad_norm": 1.091895580291748, "learning_rate": 1.3372976270994164e-05, "loss": 0.6771, "step": 9171 }, { "epoch": 1.2265311580636533, "grad_norm": 1.042287826538086, "learning_rate": 1.3371617095572199e-05, "loss": 0.6542, "step": 9172 }, { "epoch": 1.2266648836587324, "grad_norm": 1.0992852449417114, "learning_rate": 1.3370257849872102e-05, "loss": 0.6042, "step": 9173 }, { "epoch": 1.2267986092538112, "grad_norm": 1.1706446409225464, "learning_rate": 1.3368898533922202e-05, "loss": 0.7921, "step": 9174 }, { "epoch": 1.22693233484889, "grad_norm": 1.0392574071884155, "learning_rate": 1.3367539147750837e-05, "loss": 0.6841, "step": 9175 }, { "epoch": 1.227066060443969, "grad_norm": 1.2831294536590576, "learning_rate": 1.336617969138634e-05, "loss": 0.7552, "step": 9176 }, { "epoch": 1.2271997860390478, "grad_norm": 1.11100435256958, "learning_rate": 1.3364820164857053e-05, "loss": 0.6336, "step": 9177 }, { "epoch": 1.2273335116341269, "grad_norm": 1.1974517107009888, "learning_rate": 1.3363460568191306e-05, "loss": 0.6763, "step": 9178 }, { "epoch": 1.2274672372292057, "grad_norm": 1.1916143894195557, "learning_rate": 1.336210090141744e-05, "loss": 0.7089, "step": 9179 }, { "epoch": 1.2276009628242845, "grad_norm": 1.3191468715667725, "learning_rate": 1.3360741164563797e-05, "loss": 0.7639, "step": 9180 }, { "epoch": 1.2277346884193634, "grad_norm": 1.2226437330245972, "learning_rate": 1.3359381357658728e-05, "loss": 0.7305, "step": 9181 }, { "epoch": 1.2278684140144422, "grad_norm": 1.098572015762329, "learning_rate": 1.3358021480730563e-05, "loss": 0.7693, "step": 9182 }, { "epoch": 1.2280021396095213, "grad_norm": 1.2061750888824463, "learning_rate": 1.3356661533807655e-05, "loss": 0.7106, "step": 9183 }, { "epoch": 1.2281358652046002, "grad_norm": 1.1974519491195679, "learning_rate": 1.3355301516918348e-05, "loss": 0.7862, "step": 9184 }, { "epoch": 1.228269590799679, "grad_norm": 1.2106926441192627, "learning_rate": 1.3353941430090992e-05, "loss": 0.7608, "step": 9185 }, { "epoch": 1.2284033163947579, "grad_norm": 1.2067160606384277, "learning_rate": 1.335258127335394e-05, "loss": 0.7511, "step": 9186 }, { "epoch": 1.228537041989837, "grad_norm": 1.0389653444290161, "learning_rate": 1.3351221046735533e-05, "loss": 0.6509, "step": 9187 }, { "epoch": 1.2286707675849158, "grad_norm": 1.1797090768814087, "learning_rate": 1.3349860750264134e-05, "loss": 0.708, "step": 9188 }, { "epoch": 1.2288044931799946, "grad_norm": 1.164821982383728, "learning_rate": 1.3348500383968095e-05, "loss": 0.7475, "step": 9189 }, { "epoch": 1.2289382187750735, "grad_norm": 1.190679669380188, "learning_rate": 1.3347139947875767e-05, "loss": 0.7659, "step": 9190 }, { "epoch": 1.2290719443701525, "grad_norm": 1.1989288330078125, "learning_rate": 1.3345779442015512e-05, "loss": 0.7308, "step": 9191 }, { "epoch": 1.2292056699652314, "grad_norm": 1.3162899017333984, "learning_rate": 1.3344418866415683e-05, "loss": 0.7444, "step": 9192 }, { "epoch": 1.2293393955603102, "grad_norm": 1.1718229055404663, "learning_rate": 1.3343058221104643e-05, "loss": 0.7566, "step": 9193 }, { "epoch": 1.229473121155389, "grad_norm": 1.1812809705734253, "learning_rate": 1.3341697506110753e-05, "loss": 0.7782, "step": 9194 }, { "epoch": 1.229606846750468, "grad_norm": 0.9140693545341492, "learning_rate": 1.334033672146238e-05, "loss": 0.6426, "step": 9195 }, { "epoch": 1.229740572345547, "grad_norm": 1.1095671653747559, "learning_rate": 1.333897586718788e-05, "loss": 0.6872, "step": 9196 }, { "epoch": 1.2298742979406259, "grad_norm": 0.9914871454238892, "learning_rate": 1.3337614943315629e-05, "loss": 0.6941, "step": 9197 }, { "epoch": 1.2300080235357047, "grad_norm": 1.3530595302581787, "learning_rate": 1.3336253949873983e-05, "loss": 0.7923, "step": 9198 }, { "epoch": 1.2301417491307836, "grad_norm": 1.1554523706436157, "learning_rate": 1.3334892886891316e-05, "loss": 0.7561, "step": 9199 }, { "epoch": 1.2302754747258624, "grad_norm": 1.1662774085998535, "learning_rate": 1.3333531754395996e-05, "loss": 0.715, "step": 9200 }, { "epoch": 1.2304092003209415, "grad_norm": 1.3221659660339355, "learning_rate": 1.3332170552416403e-05, "loss": 0.786, "step": 9201 }, { "epoch": 1.2305429259160203, "grad_norm": 1.153968334197998, "learning_rate": 1.3330809280980899e-05, "loss": 0.7722, "step": 9202 }, { "epoch": 1.2306766515110992, "grad_norm": 1.2599009275436401, "learning_rate": 1.3329447940117863e-05, "loss": 0.7505, "step": 9203 }, { "epoch": 1.2308103771061782, "grad_norm": 1.263197898864746, "learning_rate": 1.3328086529855672e-05, "loss": 0.7665, "step": 9204 }, { "epoch": 1.230944102701257, "grad_norm": 1.2896510362625122, "learning_rate": 1.33267250502227e-05, "loss": 0.6569, "step": 9205 }, { "epoch": 1.231077828296336, "grad_norm": 1.1006722450256348, "learning_rate": 1.332536350124733e-05, "loss": 0.6312, "step": 9206 }, { "epoch": 1.2312115538914148, "grad_norm": 1.158721923828125, "learning_rate": 1.3324001882957938e-05, "loss": 0.7693, "step": 9207 }, { "epoch": 1.2313452794864936, "grad_norm": 1.136100172996521, "learning_rate": 1.3322640195382908e-05, "loss": 0.7002, "step": 9208 }, { "epoch": 1.2314790050815727, "grad_norm": 1.2908204793930054, "learning_rate": 1.3321278438550625e-05, "loss": 0.8045, "step": 9209 }, { "epoch": 1.2316127306766516, "grad_norm": 1.2968108654022217, "learning_rate": 1.3319916612489468e-05, "loss": 0.7081, "step": 9210 }, { "epoch": 1.2317464562717304, "grad_norm": 1.1575469970703125, "learning_rate": 1.3318554717227827e-05, "loss": 0.7008, "step": 9211 }, { "epoch": 1.2318801818668093, "grad_norm": 1.1811562776565552, "learning_rate": 1.3317192752794086e-05, "loss": 0.7049, "step": 9212 }, { "epoch": 1.232013907461888, "grad_norm": 1.0972161293029785, "learning_rate": 1.331583071921664e-05, "loss": 0.7106, "step": 9213 }, { "epoch": 1.2321476330569672, "grad_norm": 1.245848298072815, "learning_rate": 1.3314468616523874e-05, "loss": 0.7063, "step": 9214 }, { "epoch": 1.232281358652046, "grad_norm": 1.20819890499115, "learning_rate": 1.3313106444744181e-05, "loss": 0.718, "step": 9215 }, { "epoch": 1.2324150842471249, "grad_norm": 1.063406229019165, "learning_rate": 1.3311744203905957e-05, "loss": 0.671, "step": 9216 }, { "epoch": 1.2325488098422037, "grad_norm": 1.1256377696990967, "learning_rate": 1.3310381894037589e-05, "loss": 0.6896, "step": 9217 }, { "epoch": 1.2326825354372828, "grad_norm": 1.3882501125335693, "learning_rate": 1.3309019515167481e-05, "loss": 0.8054, "step": 9218 }, { "epoch": 1.2328162610323616, "grad_norm": 1.1527929306030273, "learning_rate": 1.3307657067324029e-05, "loss": 0.7419, "step": 9219 }, { "epoch": 1.2329499866274405, "grad_norm": 1.3470364809036255, "learning_rate": 1.3306294550535627e-05, "loss": 0.8162, "step": 9220 }, { "epoch": 1.2330837122225193, "grad_norm": 1.2529911994934082, "learning_rate": 1.3304931964830683e-05, "loss": 0.7286, "step": 9221 }, { "epoch": 1.2332174378175984, "grad_norm": 1.2772401571273804, "learning_rate": 1.3303569310237593e-05, "loss": 0.7296, "step": 9222 }, { "epoch": 1.2333511634126773, "grad_norm": 1.30802321434021, "learning_rate": 1.3302206586784762e-05, "loss": 0.7089, "step": 9223 }, { "epoch": 1.233484889007756, "grad_norm": 1.2973982095718384, "learning_rate": 1.3300843794500593e-05, "loss": 0.7568, "step": 9224 }, { "epoch": 1.233618614602835, "grad_norm": 1.3099365234375, "learning_rate": 1.3299480933413495e-05, "loss": 0.8263, "step": 9225 }, { "epoch": 1.2337523401979138, "grad_norm": 1.1525856256484985, "learning_rate": 1.3298118003551875e-05, "loss": 0.7334, "step": 9226 }, { "epoch": 1.2338860657929929, "grad_norm": 1.1708300113677979, "learning_rate": 1.329675500494414e-05, "loss": 0.7283, "step": 9227 }, { "epoch": 1.2340197913880717, "grad_norm": 1.1763737201690674, "learning_rate": 1.32953919376187e-05, "loss": 0.7685, "step": 9228 }, { "epoch": 1.2341535169831506, "grad_norm": 1.0706772804260254, "learning_rate": 1.3294028801603973e-05, "loss": 0.6866, "step": 9229 }, { "epoch": 1.2342872425782294, "grad_norm": 1.225495457649231, "learning_rate": 1.3292665596928365e-05, "loss": 0.7378, "step": 9230 }, { "epoch": 1.2344209681733083, "grad_norm": 1.171221137046814, "learning_rate": 1.329130232362029e-05, "loss": 0.6813, "step": 9231 }, { "epoch": 1.2345546937683873, "grad_norm": 1.1999200582504272, "learning_rate": 1.328993898170817e-05, "loss": 0.6696, "step": 9232 }, { "epoch": 1.2346884193634662, "grad_norm": 1.2723852396011353, "learning_rate": 1.3288575571220424e-05, "loss": 0.6923, "step": 9233 }, { "epoch": 1.234822144958545, "grad_norm": 1.3295897245407104, "learning_rate": 1.3287212092185464e-05, "loss": 0.7733, "step": 9234 }, { "epoch": 1.2349558705536239, "grad_norm": 0.9353153109550476, "learning_rate": 1.3285848544631713e-05, "loss": 0.6253, "step": 9235 }, { "epoch": 1.235089596148703, "grad_norm": 1.2121052742004395, "learning_rate": 1.3284484928587593e-05, "loss": 0.7198, "step": 9236 }, { "epoch": 1.2352233217437818, "grad_norm": 1.2026607990264893, "learning_rate": 1.3283121244081526e-05, "loss": 0.6829, "step": 9237 }, { "epoch": 1.2353570473388606, "grad_norm": 1.1836035251617432, "learning_rate": 1.3281757491141942e-05, "loss": 0.7276, "step": 9238 }, { "epoch": 1.2354907729339395, "grad_norm": 1.0486087799072266, "learning_rate": 1.3280393669797263e-05, "loss": 0.7099, "step": 9239 }, { "epoch": 1.2356244985290186, "grad_norm": 1.0635490417480469, "learning_rate": 1.3279029780075913e-05, "loss": 0.679, "step": 9240 }, { "epoch": 1.2357582241240974, "grad_norm": 1.1884044408798218, "learning_rate": 1.3277665822006331e-05, "loss": 0.7168, "step": 9241 }, { "epoch": 1.2358919497191763, "grad_norm": 1.0989524126052856, "learning_rate": 1.3276301795616937e-05, "loss": 0.7674, "step": 9242 }, { "epoch": 1.2360256753142551, "grad_norm": 1.1169859170913696, "learning_rate": 1.3274937700936168e-05, "loss": 0.7421, "step": 9243 }, { "epoch": 1.236159400909334, "grad_norm": 1.234826683998108, "learning_rate": 1.3273573537992455e-05, "loss": 0.697, "step": 9244 }, { "epoch": 1.236293126504413, "grad_norm": 1.1430209875106812, "learning_rate": 1.3272209306814237e-05, "loss": 0.7121, "step": 9245 }, { "epoch": 1.2364268520994919, "grad_norm": 1.1017210483551025, "learning_rate": 1.3270845007429946e-05, "loss": 0.7298, "step": 9246 }, { "epoch": 1.2365605776945707, "grad_norm": 1.278051495552063, "learning_rate": 1.326948063986802e-05, "loss": 0.7704, "step": 9247 }, { "epoch": 1.2366943032896496, "grad_norm": 1.1119502782821655, "learning_rate": 1.32681162041569e-05, "loss": 0.7175, "step": 9248 }, { "epoch": 1.2368280288847284, "grad_norm": 1.1583329439163208, "learning_rate": 1.3266751700325027e-05, "loss": 0.7816, "step": 9249 }, { "epoch": 1.2369617544798075, "grad_norm": 1.142851710319519, "learning_rate": 1.3265387128400833e-05, "loss": 0.714, "step": 9250 }, { "epoch": 1.2370954800748863, "grad_norm": 1.2522541284561157, "learning_rate": 1.3264022488412773e-05, "loss": 0.7698, "step": 9251 }, { "epoch": 1.2372292056699652, "grad_norm": 1.1853128671646118, "learning_rate": 1.326265778038929e-05, "loss": 0.7105, "step": 9252 }, { "epoch": 1.237362931265044, "grad_norm": 1.306074857711792, "learning_rate": 1.3261293004358829e-05, "loss": 0.7971, "step": 9253 }, { "epoch": 1.2374966568601231, "grad_norm": 1.2058424949645996, "learning_rate": 1.325992816034983e-05, "loss": 0.7788, "step": 9254 }, { "epoch": 1.237630382455202, "grad_norm": 1.2013771533966064, "learning_rate": 1.3258563248390752e-05, "loss": 0.8256, "step": 9255 }, { "epoch": 1.2377641080502808, "grad_norm": 1.1267105340957642, "learning_rate": 1.3257198268510041e-05, "loss": 0.6362, "step": 9256 }, { "epoch": 1.2378978336453597, "grad_norm": 1.1598784923553467, "learning_rate": 1.3255833220736147e-05, "loss": 0.7099, "step": 9257 }, { "epoch": 1.2380315592404387, "grad_norm": 1.3934515714645386, "learning_rate": 1.3254468105097526e-05, "loss": 0.7537, "step": 9258 }, { "epoch": 1.2381652848355176, "grad_norm": 1.1538817882537842, "learning_rate": 1.3253102921622632e-05, "loss": 0.6936, "step": 9259 }, { "epoch": 1.2382990104305964, "grad_norm": 1.1210070848464966, "learning_rate": 1.325173767033992e-05, "loss": 0.6696, "step": 9260 }, { "epoch": 1.2384327360256753, "grad_norm": 1.0543426275253296, "learning_rate": 1.3250372351277844e-05, "loss": 0.6525, "step": 9261 }, { "epoch": 1.2385664616207541, "grad_norm": 1.1687966585159302, "learning_rate": 1.3249006964464875e-05, "loss": 0.7233, "step": 9262 }, { "epoch": 1.2387001872158332, "grad_norm": 1.2662804126739502, "learning_rate": 1.3247641509929459e-05, "loss": 0.7454, "step": 9263 }, { "epoch": 1.238833912810912, "grad_norm": 1.197374701499939, "learning_rate": 1.3246275987700063e-05, "loss": 0.7152, "step": 9264 }, { "epoch": 1.238967638405991, "grad_norm": 1.321302056312561, "learning_rate": 1.3244910397805151e-05, "loss": 0.7913, "step": 9265 }, { "epoch": 1.2391013640010697, "grad_norm": 1.1644649505615234, "learning_rate": 1.324354474027319e-05, "loss": 0.6837, "step": 9266 }, { "epoch": 1.2392350895961486, "grad_norm": 1.0506736040115356, "learning_rate": 1.3242179015132641e-05, "loss": 0.7244, "step": 9267 }, { "epoch": 1.2393688151912277, "grad_norm": 1.1492433547973633, "learning_rate": 1.3240813222411973e-05, "loss": 0.6767, "step": 9268 }, { "epoch": 1.2395025407863065, "grad_norm": 1.0844646692276, "learning_rate": 1.3239447362139652e-05, "loss": 0.6765, "step": 9269 }, { "epoch": 1.2396362663813854, "grad_norm": 1.3227527141571045, "learning_rate": 1.3238081434344153e-05, "loss": 0.7226, "step": 9270 }, { "epoch": 1.2397699919764642, "grad_norm": 1.3782129287719727, "learning_rate": 1.3236715439053944e-05, "loss": 0.7885, "step": 9271 }, { "epoch": 1.2399037175715433, "grad_norm": 1.1319572925567627, "learning_rate": 1.32353493762975e-05, "loss": 0.7089, "step": 9272 }, { "epoch": 1.2400374431666221, "grad_norm": 1.1389429569244385, "learning_rate": 1.3233983246103293e-05, "loss": 0.6998, "step": 9273 }, { "epoch": 1.240171168761701, "grad_norm": 1.2254799604415894, "learning_rate": 1.3232617048499801e-05, "loss": 0.7485, "step": 9274 }, { "epoch": 1.2403048943567798, "grad_norm": 1.0978336334228516, "learning_rate": 1.32312507835155e-05, "loss": 0.6911, "step": 9275 }, { "epoch": 1.240438619951859, "grad_norm": 1.2059299945831299, "learning_rate": 1.3229884451178863e-05, "loss": 0.6381, "step": 9276 }, { "epoch": 1.2405723455469377, "grad_norm": 1.1060365438461304, "learning_rate": 1.322851805151838e-05, "loss": 0.6559, "step": 9277 }, { "epoch": 1.2407060711420166, "grad_norm": 1.1741812229156494, "learning_rate": 1.322715158456253e-05, "loss": 0.6968, "step": 9278 }, { "epoch": 1.2408397967370954, "grad_norm": 1.3146891593933105, "learning_rate": 1.322578505033979e-05, "loss": 0.7588, "step": 9279 }, { "epoch": 1.2409735223321743, "grad_norm": 1.289953589439392, "learning_rate": 1.3224418448878648e-05, "loss": 0.7669, "step": 9280 }, { "epoch": 1.2411072479272534, "grad_norm": 1.1532399654388428, "learning_rate": 1.3223051780207587e-05, "loss": 0.656, "step": 9281 }, { "epoch": 1.2412409735223322, "grad_norm": 1.1366627216339111, "learning_rate": 1.3221685044355099e-05, "loss": 0.658, "step": 9282 }, { "epoch": 1.241374699117411, "grad_norm": 1.22013521194458, "learning_rate": 1.3220318241349669e-05, "loss": 0.7605, "step": 9283 }, { "epoch": 1.24150842471249, "grad_norm": 1.179509162902832, "learning_rate": 1.3218951371219783e-05, "loss": 0.795, "step": 9284 }, { "epoch": 1.2416421503075687, "grad_norm": 1.107421875, "learning_rate": 1.3217584433993937e-05, "loss": 0.66, "step": 9285 }, { "epoch": 1.2417758759026478, "grad_norm": 1.2991619110107422, "learning_rate": 1.3216217429700628e-05, "loss": 0.7543, "step": 9286 }, { "epoch": 1.2419096014977267, "grad_norm": 1.1613503694534302, "learning_rate": 1.3214850358368338e-05, "loss": 0.7119, "step": 9287 }, { "epoch": 1.2420433270928055, "grad_norm": 1.139907956123352, "learning_rate": 1.3213483220025571e-05, "loss": 0.6948, "step": 9288 }, { "epoch": 1.2421770526878844, "grad_norm": 1.1426881551742554, "learning_rate": 1.3212116014700818e-05, "loss": 0.725, "step": 9289 }, { "epoch": 1.2423107782829634, "grad_norm": 1.3799493312835693, "learning_rate": 1.3210748742422586e-05, "loss": 0.7925, "step": 9290 }, { "epoch": 1.2424445038780423, "grad_norm": 1.1253262758255005, "learning_rate": 1.3209381403219366e-05, "loss": 0.7153, "step": 9291 }, { "epoch": 1.2425782294731211, "grad_norm": 1.1465344429016113, "learning_rate": 1.3208013997119662e-05, "loss": 0.6129, "step": 9292 }, { "epoch": 1.2427119550682, "grad_norm": 1.139330267906189, "learning_rate": 1.3206646524151974e-05, "loss": 0.7359, "step": 9293 }, { "epoch": 1.242845680663279, "grad_norm": 1.1804602146148682, "learning_rate": 1.3205278984344811e-05, "loss": 0.7501, "step": 9294 }, { "epoch": 1.242979406258358, "grad_norm": 1.1529501676559448, "learning_rate": 1.320391137772667e-05, "loss": 0.7217, "step": 9295 }, { "epoch": 1.2431131318534367, "grad_norm": 1.2482224702835083, "learning_rate": 1.3202543704326065e-05, "loss": 0.7963, "step": 9296 }, { "epoch": 1.2432468574485156, "grad_norm": 1.4613351821899414, "learning_rate": 1.3201175964171502e-05, "loss": 0.7683, "step": 9297 }, { "epoch": 1.2433805830435944, "grad_norm": 1.223037838935852, "learning_rate": 1.319980815729149e-05, "loss": 0.7234, "step": 9298 }, { "epoch": 1.2435143086386735, "grad_norm": 1.12281334400177, "learning_rate": 1.3198440283714536e-05, "loss": 0.7322, "step": 9299 }, { "epoch": 1.2436480342337524, "grad_norm": 1.2032722234725952, "learning_rate": 1.3197072343469154e-05, "loss": 0.7941, "step": 9300 }, { "epoch": 1.2437817598288312, "grad_norm": 1.3838211297988892, "learning_rate": 1.3195704336583863e-05, "loss": 0.7415, "step": 9301 }, { "epoch": 1.24391548542391, "grad_norm": 1.3007405996322632, "learning_rate": 1.3194336263087168e-05, "loss": 0.7552, "step": 9302 }, { "epoch": 1.244049211018989, "grad_norm": 1.3206770420074463, "learning_rate": 1.3192968123007593e-05, "loss": 0.7305, "step": 9303 }, { "epoch": 1.244182936614068, "grad_norm": 1.13156259059906, "learning_rate": 1.3191599916373653e-05, "loss": 0.7301, "step": 9304 }, { "epoch": 1.2443166622091468, "grad_norm": 1.1350882053375244, "learning_rate": 1.3190231643213865e-05, "loss": 0.6582, "step": 9305 }, { "epoch": 1.2444503878042257, "grad_norm": 1.3048738241195679, "learning_rate": 1.3188863303556754e-05, "loss": 0.6799, "step": 9306 }, { "epoch": 1.2445841133993047, "grad_norm": 1.1378577947616577, "learning_rate": 1.3187494897430837e-05, "loss": 0.6845, "step": 9307 }, { "epoch": 1.2447178389943836, "grad_norm": 1.1637569665908813, "learning_rate": 1.3186126424864639e-05, "loss": 0.7878, "step": 9308 }, { "epoch": 1.2448515645894624, "grad_norm": 1.2447539567947388, "learning_rate": 1.3184757885886683e-05, "loss": 0.7848, "step": 9309 }, { "epoch": 1.2449852901845413, "grad_norm": 1.377752423286438, "learning_rate": 1.3183389280525497e-05, "loss": 0.8088, "step": 9310 }, { "epoch": 1.2451190157796201, "grad_norm": 1.1519241333007812, "learning_rate": 1.3182020608809611e-05, "loss": 0.6947, "step": 9311 }, { "epoch": 1.2452527413746992, "grad_norm": 1.2067630290985107, "learning_rate": 1.3180651870767547e-05, "loss": 0.7188, "step": 9312 }, { "epoch": 1.245386466969778, "grad_norm": 1.1547857522964478, "learning_rate": 1.317928306642784e-05, "loss": 0.6723, "step": 9313 }, { "epoch": 1.245520192564857, "grad_norm": 1.290090799331665, "learning_rate": 1.3177914195819018e-05, "loss": 0.7964, "step": 9314 }, { "epoch": 1.2456539181599358, "grad_norm": 1.139615535736084, "learning_rate": 1.3176545258969615e-05, "loss": 0.7114, "step": 9315 }, { "epoch": 1.2457876437550146, "grad_norm": 1.134974718093872, "learning_rate": 1.3175176255908167e-05, "loss": 0.6651, "step": 9316 }, { "epoch": 1.2459213693500937, "grad_norm": 1.0861785411834717, "learning_rate": 1.3173807186663209e-05, "loss": 0.708, "step": 9317 }, { "epoch": 1.2460550949451725, "grad_norm": 1.2182151079177856, "learning_rate": 1.317243805126328e-05, "loss": 0.6808, "step": 9318 }, { "epoch": 1.2461888205402514, "grad_norm": 1.0341705083847046, "learning_rate": 1.317106884973691e-05, "loss": 0.6521, "step": 9319 }, { "epoch": 1.2463225461353302, "grad_norm": 1.2140824794769287, "learning_rate": 1.3169699582112645e-05, "loss": 0.8218, "step": 9320 }, { "epoch": 1.2464562717304093, "grad_norm": 1.1120494604110718, "learning_rate": 1.3168330248419028e-05, "loss": 0.6771, "step": 9321 }, { "epoch": 1.2465899973254881, "grad_norm": 1.0463011264801025, "learning_rate": 1.3166960848684595e-05, "loss": 0.6359, "step": 9322 }, { "epoch": 1.246723722920567, "grad_norm": 1.2577452659606934, "learning_rate": 1.3165591382937897e-05, "loss": 0.7699, "step": 9323 }, { "epoch": 1.2468574485156458, "grad_norm": 1.156151533126831, "learning_rate": 1.3164221851207475e-05, "loss": 0.7183, "step": 9324 }, { "epoch": 1.246991174110725, "grad_norm": 1.1722558736801147, "learning_rate": 1.3162852253521873e-05, "loss": 0.7294, "step": 9325 }, { "epoch": 1.2471248997058038, "grad_norm": 1.297115445137024, "learning_rate": 1.3161482589909649e-05, "loss": 0.8322, "step": 9326 }, { "epoch": 1.2472586253008826, "grad_norm": 1.2446820735931396, "learning_rate": 1.316011286039934e-05, "loss": 0.7529, "step": 9327 }, { "epoch": 1.2473923508959615, "grad_norm": 1.1087275743484497, "learning_rate": 1.3158743065019504e-05, "loss": 0.7114, "step": 9328 }, { "epoch": 1.2475260764910403, "grad_norm": 1.2273108959197998, "learning_rate": 1.3157373203798688e-05, "loss": 0.7263, "step": 9329 }, { "epoch": 1.2476598020861194, "grad_norm": 1.1508930921554565, "learning_rate": 1.3156003276765456e-05, "loss": 0.6974, "step": 9330 }, { "epoch": 1.2477935276811982, "grad_norm": 1.2422845363616943, "learning_rate": 1.3154633283948352e-05, "loss": 0.6989, "step": 9331 }, { "epoch": 1.247927253276277, "grad_norm": 1.0530104637145996, "learning_rate": 1.3153263225375937e-05, "loss": 0.753, "step": 9332 }, { "epoch": 1.248060978871356, "grad_norm": 1.3269013166427612, "learning_rate": 1.3151893101076765e-05, "loss": 0.8212, "step": 9333 }, { "epoch": 1.2481947044664348, "grad_norm": 1.2658700942993164, "learning_rate": 1.3150522911079398e-05, "loss": 0.7829, "step": 9334 }, { "epoch": 1.2483284300615138, "grad_norm": 1.2770941257476807, "learning_rate": 1.3149152655412397e-05, "loss": 0.7149, "step": 9335 }, { "epoch": 1.2484621556565927, "grad_norm": 1.3781917095184326, "learning_rate": 1.314778233410432e-05, "loss": 0.7872, "step": 9336 }, { "epoch": 1.2485958812516715, "grad_norm": 1.1750023365020752, "learning_rate": 1.3146411947183734e-05, "loss": 0.6849, "step": 9337 }, { "epoch": 1.2487296068467504, "grad_norm": 1.1494070291519165, "learning_rate": 1.3145041494679206e-05, "loss": 0.7026, "step": 9338 }, { "epoch": 1.2488633324418295, "grad_norm": 1.070863127708435, "learning_rate": 1.3143670976619292e-05, "loss": 0.6995, "step": 9339 }, { "epoch": 1.2489970580369083, "grad_norm": 1.1500813961029053, "learning_rate": 1.3142300393032564e-05, "loss": 0.6478, "step": 9340 }, { "epoch": 1.2491307836319872, "grad_norm": 1.1829400062561035, "learning_rate": 1.3140929743947592e-05, "loss": 0.7334, "step": 9341 }, { "epoch": 1.249264509227066, "grad_norm": 1.088805913925171, "learning_rate": 1.3139559029392948e-05, "loss": 0.7043, "step": 9342 }, { "epoch": 1.249398234822145, "grad_norm": 1.1109436750411987, "learning_rate": 1.3138188249397197e-05, "loss": 0.6951, "step": 9343 }, { "epoch": 1.249531960417224, "grad_norm": 1.2694209814071655, "learning_rate": 1.3136817403988918e-05, "loss": 0.7784, "step": 9344 }, { "epoch": 1.2496656860123028, "grad_norm": 1.1016857624053955, "learning_rate": 1.3135446493196677e-05, "loss": 0.7354, "step": 9345 }, { "epoch": 1.2497994116073816, "grad_norm": 1.153218150138855, "learning_rate": 1.3134075517049059e-05, "loss": 0.7319, "step": 9346 }, { "epoch": 1.2499331372024605, "grad_norm": 1.300937294960022, "learning_rate": 1.3132704475574634e-05, "loss": 0.7702, "step": 9347 }, { "epoch": 1.2500668627975395, "grad_norm": 1.02394700050354, "learning_rate": 1.3131333368801982e-05, "loss": 0.7152, "step": 9348 }, { "epoch": 1.2502005883926184, "grad_norm": 1.262675166130066, "learning_rate": 1.312996219675968e-05, "loss": 0.7381, "step": 9349 }, { "epoch": 1.2503343139876972, "grad_norm": 1.123143196105957, "learning_rate": 1.3128590959476313e-05, "loss": 0.7902, "step": 9350 }, { "epoch": 1.250468039582776, "grad_norm": 1.2047656774520874, "learning_rate": 1.3127219656980464e-05, "loss": 0.7393, "step": 9351 }, { "epoch": 1.250601765177855, "grad_norm": 1.1314489841461182, "learning_rate": 1.3125848289300712e-05, "loss": 0.7254, "step": 9352 }, { "epoch": 1.250735490772934, "grad_norm": 0.9788809418678284, "learning_rate": 1.3124476856465642e-05, "loss": 0.6892, "step": 9353 }, { "epoch": 1.2508692163680128, "grad_norm": 1.252990484237671, "learning_rate": 1.3123105358503839e-05, "loss": 0.6575, "step": 9354 }, { "epoch": 1.2510029419630917, "grad_norm": 1.2373435497283936, "learning_rate": 1.3121733795443898e-05, "loss": 0.7507, "step": 9355 }, { "epoch": 1.2511366675581708, "grad_norm": 1.158713936805725, "learning_rate": 1.3120362167314403e-05, "loss": 0.6792, "step": 9356 }, { "epoch": 1.2512703931532494, "grad_norm": 1.293854832649231, "learning_rate": 1.3118990474143941e-05, "loss": 0.8392, "step": 9357 }, { "epoch": 1.2514041187483285, "grad_norm": 1.1044549942016602, "learning_rate": 1.3117618715961111e-05, "loss": 0.6534, "step": 9358 }, { "epoch": 1.2515378443434073, "grad_norm": 1.1490275859832764, "learning_rate": 1.31162468927945e-05, "loss": 0.6731, "step": 9359 }, { "epoch": 1.2516715699384862, "grad_norm": 1.2425954341888428, "learning_rate": 1.3114875004672705e-05, "loss": 0.8295, "step": 9360 }, { "epoch": 1.2518052955335652, "grad_norm": 1.1567577123641968, "learning_rate": 1.3113503051624321e-05, "loss": 0.6408, "step": 9361 }, { "epoch": 1.251939021128644, "grad_norm": 1.1190743446350098, "learning_rate": 1.3112131033677944e-05, "loss": 0.7306, "step": 9362 }, { "epoch": 1.252072746723723, "grad_norm": 1.057592749595642, "learning_rate": 1.3110758950862176e-05, "loss": 0.6627, "step": 9363 }, { "epoch": 1.2522064723188018, "grad_norm": 1.2360124588012695, "learning_rate": 1.3109386803205615e-05, "loss": 0.73, "step": 9364 }, { "epoch": 1.2523401979138806, "grad_norm": 1.266740083694458, "learning_rate": 1.310801459073686e-05, "loss": 0.7904, "step": 9365 }, { "epoch": 1.2524739235089597, "grad_norm": 1.1983699798583984, "learning_rate": 1.3106642313484513e-05, "loss": 0.6869, "step": 9366 }, { "epoch": 1.2526076491040385, "grad_norm": 1.2528026103973389, "learning_rate": 1.3105269971477181e-05, "loss": 0.8036, "step": 9367 }, { "epoch": 1.2527413746991174, "grad_norm": 1.3098112344741821, "learning_rate": 1.3103897564743468e-05, "loss": 0.6797, "step": 9368 }, { "epoch": 1.2528751002941962, "grad_norm": 1.141838788986206, "learning_rate": 1.3102525093311979e-05, "loss": 0.7617, "step": 9369 }, { "epoch": 1.253008825889275, "grad_norm": 1.2870361804962158, "learning_rate": 1.3101152557211325e-05, "loss": 0.627, "step": 9370 }, { "epoch": 1.2531425514843542, "grad_norm": 1.0638781785964966, "learning_rate": 1.3099779956470116e-05, "loss": 0.7245, "step": 9371 }, { "epoch": 1.253276277079433, "grad_norm": 1.3090872764587402, "learning_rate": 1.3098407291116958e-05, "loss": 0.677, "step": 9372 }, { "epoch": 1.2534100026745119, "grad_norm": 1.3152028322219849, "learning_rate": 1.3097034561180463e-05, "loss": 0.7655, "step": 9373 }, { "epoch": 1.253543728269591, "grad_norm": 1.2338777780532837, "learning_rate": 1.3095661766689245e-05, "loss": 0.767, "step": 9374 }, { "epoch": 1.2536774538646698, "grad_norm": 1.2580115795135498, "learning_rate": 1.3094288907671924e-05, "loss": 0.7378, "step": 9375 }, { "epoch": 1.2538111794597486, "grad_norm": 1.122197151184082, "learning_rate": 1.3092915984157108e-05, "loss": 0.7224, "step": 9376 }, { "epoch": 1.2539449050548275, "grad_norm": 1.1007460355758667, "learning_rate": 1.3091542996173421e-05, "loss": 0.6683, "step": 9377 }, { "epoch": 1.2540786306499063, "grad_norm": 1.2023651599884033, "learning_rate": 1.3090169943749475e-05, "loss": 0.6999, "step": 9378 }, { "epoch": 1.2542123562449854, "grad_norm": 1.0603456497192383, "learning_rate": 1.3088796826913897e-05, "loss": 0.6954, "step": 9379 }, { "epoch": 1.2543460818400642, "grad_norm": 1.1769858598709106, "learning_rate": 1.3087423645695303e-05, "loss": 0.7247, "step": 9380 }, { "epoch": 1.254479807435143, "grad_norm": 1.1224919557571411, "learning_rate": 1.3086050400122316e-05, "loss": 0.6792, "step": 9381 }, { "epoch": 1.254613533030222, "grad_norm": 1.1778157949447632, "learning_rate": 1.3084677090223563e-05, "loss": 0.7262, "step": 9382 }, { "epoch": 1.2547472586253008, "grad_norm": 1.1295745372772217, "learning_rate": 1.3083303716027671e-05, "loss": 0.7012, "step": 9383 }, { "epoch": 1.2548809842203799, "grad_norm": 1.119362473487854, "learning_rate": 1.3081930277563259e-05, "loss": 0.6946, "step": 9384 }, { "epoch": 1.2550147098154587, "grad_norm": 1.1715532541275024, "learning_rate": 1.3080556774858962e-05, "loss": 0.722, "step": 9385 }, { "epoch": 1.2551484354105376, "grad_norm": 1.231411099433899, "learning_rate": 1.3079183207943402e-05, "loss": 0.7933, "step": 9386 }, { "epoch": 1.2552821610056164, "grad_norm": 1.2970679998397827, "learning_rate": 1.3077809576845219e-05, "loss": 0.7002, "step": 9387 }, { "epoch": 1.2554158866006953, "grad_norm": 1.1691569089889526, "learning_rate": 1.3076435881593042e-05, "loss": 0.7611, "step": 9388 }, { "epoch": 1.2555496121957743, "grad_norm": 1.243012547492981, "learning_rate": 1.3075062122215498e-05, "loss": 0.7141, "step": 9389 }, { "epoch": 1.2556833377908532, "grad_norm": 1.1296825408935547, "learning_rate": 1.307368829874123e-05, "loss": 0.7417, "step": 9390 }, { "epoch": 1.255817063385932, "grad_norm": 1.1206094026565552, "learning_rate": 1.3072314411198868e-05, "loss": 0.7318, "step": 9391 }, { "epoch": 1.255950788981011, "grad_norm": 1.0847147703170776, "learning_rate": 1.3070940459617053e-05, "loss": 0.6675, "step": 9392 }, { "epoch": 1.25608451457609, "grad_norm": 1.01374351978302, "learning_rate": 1.3069566444024423e-05, "loss": 0.6377, "step": 9393 }, { "epoch": 1.2562182401711688, "grad_norm": 1.1284648180007935, "learning_rate": 1.3068192364449618e-05, "loss": 0.6765, "step": 9394 }, { "epoch": 1.2563519657662476, "grad_norm": 1.135588526725769, "learning_rate": 1.3066818220921283e-05, "loss": 0.6768, "step": 9395 }, { "epoch": 1.2564856913613265, "grad_norm": 1.1649090051651, "learning_rate": 1.3065444013468052e-05, "loss": 0.7227, "step": 9396 }, { "epoch": 1.2566194169564056, "grad_norm": 1.1343060731887817, "learning_rate": 1.3064069742118575e-05, "loss": 0.7394, "step": 9397 }, { "epoch": 1.2567531425514844, "grad_norm": 1.199958086013794, "learning_rate": 1.3062695406901496e-05, "loss": 0.7554, "step": 9398 }, { "epoch": 1.2568868681465633, "grad_norm": 1.285776138305664, "learning_rate": 1.306132100784546e-05, "loss": 0.7025, "step": 9399 }, { "epoch": 1.257020593741642, "grad_norm": 1.2102628946304321, "learning_rate": 1.305994654497912e-05, "loss": 0.644, "step": 9400 }, { "epoch": 1.257154319336721, "grad_norm": 1.2030905485153198, "learning_rate": 1.3058572018331122e-05, "loss": 0.6759, "step": 9401 }, { "epoch": 1.2572880449318, "grad_norm": 1.270838737487793, "learning_rate": 1.3057197427930114e-05, "loss": 0.7913, "step": 9402 }, { "epoch": 1.2574217705268789, "grad_norm": 1.2724796533584595, "learning_rate": 1.3055822773804757e-05, "loss": 0.7765, "step": 9403 }, { "epoch": 1.2575554961219577, "grad_norm": 0.9970998167991638, "learning_rate": 1.3054448055983694e-05, "loss": 0.6444, "step": 9404 }, { "epoch": 1.2576892217170366, "grad_norm": 1.1529046297073364, "learning_rate": 1.3053073274495582e-05, "loss": 0.7297, "step": 9405 }, { "epoch": 1.2578229473121154, "grad_norm": 1.4148932695388794, "learning_rate": 1.3051698429369082e-05, "loss": 0.8698, "step": 9406 }, { "epoch": 1.2579566729071945, "grad_norm": 1.2366430759429932, "learning_rate": 1.305032352063285e-05, "loss": 0.7856, "step": 9407 }, { "epoch": 1.2580903985022733, "grad_norm": 1.1664479970932007, "learning_rate": 1.3048948548315541e-05, "loss": 0.6646, "step": 9408 }, { "epoch": 1.2582241240973522, "grad_norm": 1.0505180358886719, "learning_rate": 1.3047573512445817e-05, "loss": 0.7002, "step": 9409 }, { "epoch": 1.2583578496924313, "grad_norm": 1.2695668935775757, "learning_rate": 1.3046198413052337e-05, "loss": 0.755, "step": 9410 }, { "epoch": 1.25849157528751, "grad_norm": 1.1096209287643433, "learning_rate": 1.3044823250163772e-05, "loss": 0.636, "step": 9411 }, { "epoch": 1.258625300882589, "grad_norm": 1.2991843223571777, "learning_rate": 1.3043448023808774e-05, "loss": 0.8366, "step": 9412 }, { "epoch": 1.2587590264776678, "grad_norm": 1.1352345943450928, "learning_rate": 1.3042072734016018e-05, "loss": 0.7478, "step": 9413 }, { "epoch": 1.2588927520727466, "grad_norm": 1.119526743888855, "learning_rate": 1.3040697380814165e-05, "loss": 0.6572, "step": 9414 }, { "epoch": 1.2590264776678257, "grad_norm": 1.1450073719024658, "learning_rate": 1.3039321964231887e-05, "loss": 0.7258, "step": 9415 }, { "epoch": 1.2591602032629046, "grad_norm": 1.1701388359069824, "learning_rate": 1.303794648429785e-05, "loss": 0.6861, "step": 9416 }, { "epoch": 1.2592939288579834, "grad_norm": 1.1093292236328125, "learning_rate": 1.3036570941040722e-05, "loss": 0.65, "step": 9417 }, { "epoch": 1.2594276544530623, "grad_norm": 1.1471846103668213, "learning_rate": 1.303519533448918e-05, "loss": 0.7536, "step": 9418 }, { "epoch": 1.259561380048141, "grad_norm": 1.1413859128952026, "learning_rate": 1.3033819664671898e-05, "loss": 0.7343, "step": 9419 }, { "epoch": 1.2596951056432202, "grad_norm": 1.2191188335418701, "learning_rate": 1.3032443931617547e-05, "loss": 0.6884, "step": 9420 }, { "epoch": 1.259828831238299, "grad_norm": 1.1447863578796387, "learning_rate": 1.3031068135354805e-05, "loss": 0.6976, "step": 9421 }, { "epoch": 1.2599625568333779, "grad_norm": 1.0656601190567017, "learning_rate": 1.3029692275912346e-05, "loss": 0.6479, "step": 9422 }, { "epoch": 1.2600962824284567, "grad_norm": 1.2403727769851685, "learning_rate": 1.3028316353318853e-05, "loss": 0.7593, "step": 9423 }, { "epoch": 1.2602300080235356, "grad_norm": 1.1008222103118896, "learning_rate": 1.3026940367603e-05, "loss": 0.7513, "step": 9424 }, { "epoch": 1.2603637336186146, "grad_norm": 1.1596697568893433, "learning_rate": 1.3025564318793473e-05, "loss": 0.7907, "step": 9425 }, { "epoch": 1.2604974592136935, "grad_norm": 1.0629615783691406, "learning_rate": 1.3024188206918955e-05, "loss": 0.6599, "step": 9426 }, { "epoch": 1.2606311848087723, "grad_norm": 1.0667093992233276, "learning_rate": 1.3022812032008128e-05, "loss": 0.6271, "step": 9427 }, { "epoch": 1.2607649104038514, "grad_norm": 1.111937403678894, "learning_rate": 1.3021435794089674e-05, "loss": 0.7216, "step": 9428 }, { "epoch": 1.2608986359989303, "grad_norm": 1.231778860092163, "learning_rate": 1.3020059493192283e-05, "loss": 0.718, "step": 9429 }, { "epoch": 1.261032361594009, "grad_norm": 1.161985993385315, "learning_rate": 1.301868312934464e-05, "loss": 0.7428, "step": 9430 }, { "epoch": 1.261166087189088, "grad_norm": 1.14393949508667, "learning_rate": 1.3017306702575437e-05, "loss": 0.703, "step": 9431 }, { "epoch": 1.2612998127841668, "grad_norm": 1.1766678094863892, "learning_rate": 1.3015930212913363e-05, "loss": 0.7935, "step": 9432 }, { "epoch": 1.2614335383792459, "grad_norm": 1.0971862077713013, "learning_rate": 1.3014553660387112e-05, "loss": 0.6116, "step": 9433 }, { "epoch": 1.2615672639743247, "grad_norm": 1.2852641344070435, "learning_rate": 1.3013177045025374e-05, "loss": 0.8033, "step": 9434 }, { "epoch": 1.2617009895694036, "grad_norm": 1.2458208799362183, "learning_rate": 1.3011800366856839e-05, "loss": 0.76, "step": 9435 }, { "epoch": 1.2618347151644824, "grad_norm": 1.1096546649932861, "learning_rate": 1.3010423625910214e-05, "loss": 0.6344, "step": 9436 }, { "epoch": 1.2619684407595613, "grad_norm": 1.2686545848846436, "learning_rate": 1.3009046822214183e-05, "loss": 0.7995, "step": 9437 }, { "epoch": 1.2621021663546403, "grad_norm": 1.2088152170181274, "learning_rate": 1.3007669955797452e-05, "loss": 0.7495, "step": 9438 }, { "epoch": 1.2622358919497192, "grad_norm": 1.2632966041564941, "learning_rate": 1.3006293026688721e-05, "loss": 0.7417, "step": 9439 }, { "epoch": 1.262369617544798, "grad_norm": 1.1630315780639648, "learning_rate": 1.300491603491669e-05, "loss": 0.7325, "step": 9440 }, { "epoch": 1.262503343139877, "grad_norm": 1.141762614250183, "learning_rate": 1.3003538980510058e-05, "loss": 0.6239, "step": 9441 }, { "epoch": 1.2626370687349557, "grad_norm": 1.2121704816818237, "learning_rate": 1.3002161863497529e-05, "loss": 0.8626, "step": 9442 }, { "epoch": 1.2627707943300348, "grad_norm": 1.3186142444610596, "learning_rate": 1.300078468390781e-05, "loss": 0.7921, "step": 9443 }, { "epoch": 1.2629045199251137, "grad_norm": 1.0591927766799927, "learning_rate": 1.2999407441769602e-05, "loss": 0.7051, "step": 9444 }, { "epoch": 1.2630382455201925, "grad_norm": 1.2458487749099731, "learning_rate": 1.2998030137111619e-05, "loss": 0.7587, "step": 9445 }, { "epoch": 1.2631719711152716, "grad_norm": 1.0992887020111084, "learning_rate": 1.2996652769962567e-05, "loss": 0.7183, "step": 9446 }, { "epoch": 1.2633056967103504, "grad_norm": 1.2170807123184204, "learning_rate": 1.2995275340351154e-05, "loss": 0.6569, "step": 9447 }, { "epoch": 1.2634394223054293, "grad_norm": 1.279603362083435, "learning_rate": 1.2993897848306097e-05, "loss": 0.7209, "step": 9448 }, { "epoch": 1.2635731479005081, "grad_norm": 1.243025541305542, "learning_rate": 1.2992520293856098e-05, "loss": 0.7441, "step": 9449 }, { "epoch": 1.263706873495587, "grad_norm": 1.14859938621521, "learning_rate": 1.299114267702988e-05, "loss": 0.7127, "step": 9450 }, { "epoch": 1.263840599090666, "grad_norm": 1.1952840089797974, "learning_rate": 1.2989764997856154e-05, "loss": 0.6994, "step": 9451 }, { "epoch": 1.2639743246857449, "grad_norm": 1.1644119024276733, "learning_rate": 1.298838725636364e-05, "loss": 0.746, "step": 9452 }, { "epoch": 1.2641080502808237, "grad_norm": 1.1415284872055054, "learning_rate": 1.2987009452581051e-05, "loss": 0.716, "step": 9453 }, { "epoch": 1.2642417758759026, "grad_norm": 1.2003577947616577, "learning_rate": 1.2985631586537109e-05, "loss": 0.7858, "step": 9454 }, { "epoch": 1.2643755014709814, "grad_norm": 1.0465545654296875, "learning_rate": 1.2984253658260534e-05, "loss": 0.6908, "step": 9455 }, { "epoch": 1.2645092270660605, "grad_norm": 0.940658450126648, "learning_rate": 1.2982875667780046e-05, "loss": 0.613, "step": 9456 }, { "epoch": 1.2646429526611394, "grad_norm": 1.227908968925476, "learning_rate": 1.2981497615124367e-05, "loss": 0.6837, "step": 9457 }, { "epoch": 1.2647766782562182, "grad_norm": 1.1605809926986694, "learning_rate": 1.2980119500322228e-05, "loss": 0.6837, "step": 9458 }, { "epoch": 1.2649104038512973, "grad_norm": 1.1619807481765747, "learning_rate": 1.2978741323402347e-05, "loss": 0.7774, "step": 9459 }, { "epoch": 1.265044129446376, "grad_norm": 1.2946507930755615, "learning_rate": 1.2977363084393454e-05, "loss": 0.7192, "step": 9460 }, { "epoch": 1.265177855041455, "grad_norm": 1.0990961790084839, "learning_rate": 1.2975984783324278e-05, "loss": 0.7853, "step": 9461 }, { "epoch": 1.2653115806365338, "grad_norm": 1.3292585611343384, "learning_rate": 1.2974606420223546e-05, "loss": 0.7914, "step": 9462 }, { "epoch": 1.2654453062316127, "grad_norm": 1.1760728359222412, "learning_rate": 1.2973227995119985e-05, "loss": 0.7069, "step": 9463 }, { "epoch": 1.2655790318266917, "grad_norm": 1.3283166885375977, "learning_rate": 1.2971849508042338e-05, "loss": 0.7772, "step": 9464 }, { "epoch": 1.2657127574217706, "grad_norm": 1.1950656175613403, "learning_rate": 1.2970470959019328e-05, "loss": 0.7292, "step": 9465 }, { "epoch": 1.2658464830168494, "grad_norm": 1.1041823625564575, "learning_rate": 1.2969092348079695e-05, "loss": 0.6858, "step": 9466 }, { "epoch": 1.2659802086119283, "grad_norm": 1.1594486236572266, "learning_rate": 1.2967713675252172e-05, "loss": 0.7002, "step": 9467 }, { "epoch": 1.2661139342070071, "grad_norm": 1.1467301845550537, "learning_rate": 1.29663349405655e-05, "loss": 0.7321, "step": 9468 }, { "epoch": 1.2662476598020862, "grad_norm": 1.1792594194412231, "learning_rate": 1.2964956144048408e-05, "loss": 0.7266, "step": 9469 }, { "epoch": 1.266381385397165, "grad_norm": 1.096909999847412, "learning_rate": 1.2963577285729647e-05, "loss": 0.691, "step": 9470 }, { "epoch": 1.266515110992244, "grad_norm": 1.116920828819275, "learning_rate": 1.2962198365637954e-05, "loss": 0.7072, "step": 9471 }, { "epoch": 1.2666488365873227, "grad_norm": 1.0858980417251587, "learning_rate": 1.296081938380207e-05, "loss": 0.6529, "step": 9472 }, { "epoch": 1.2667825621824016, "grad_norm": 1.162699818611145, "learning_rate": 1.2959440340250739e-05, "loss": 0.6703, "step": 9473 }, { "epoch": 1.2669162877774807, "grad_norm": 1.2131328582763672, "learning_rate": 1.2958061235012707e-05, "loss": 0.7424, "step": 9474 }, { "epoch": 1.2670500133725595, "grad_norm": 1.0403214693069458, "learning_rate": 1.2956682068116717e-05, "loss": 0.6202, "step": 9475 }, { "epoch": 1.2671837389676384, "grad_norm": 1.0294089317321777, "learning_rate": 1.2955302839591519e-05, "loss": 0.6178, "step": 9476 }, { "epoch": 1.2673174645627174, "grad_norm": 1.049277663230896, "learning_rate": 1.2953923549465861e-05, "loss": 0.6804, "step": 9477 }, { "epoch": 1.2674511901577963, "grad_norm": 1.1760743856430054, "learning_rate": 1.2952544197768494e-05, "loss": 0.7672, "step": 9478 }, { "epoch": 1.2675849157528751, "grad_norm": 1.0840741395950317, "learning_rate": 1.2951164784528167e-05, "loss": 0.6307, "step": 9479 }, { "epoch": 1.267718641347954, "grad_norm": 1.1807917356491089, "learning_rate": 1.2949785309773638e-05, "loss": 0.7465, "step": 9480 }, { "epoch": 1.2678523669430328, "grad_norm": 1.3061667680740356, "learning_rate": 1.2948405773533654e-05, "loss": 0.7694, "step": 9481 }, { "epoch": 1.267986092538112, "grad_norm": 1.0992333889007568, "learning_rate": 1.2947026175836972e-05, "loss": 0.6318, "step": 9482 }, { "epoch": 1.2681198181331907, "grad_norm": 1.0805842876434326, "learning_rate": 1.2945646516712349e-05, "loss": 0.6638, "step": 9483 }, { "epoch": 1.2682535437282696, "grad_norm": 1.3306224346160889, "learning_rate": 1.2944266796188547e-05, "loss": 0.765, "step": 9484 }, { "epoch": 1.2683872693233484, "grad_norm": 1.0997247695922852, "learning_rate": 1.2942887014294318e-05, "loss": 0.6429, "step": 9485 }, { "epoch": 1.2685209949184273, "grad_norm": 1.0569506883621216, "learning_rate": 1.2941507171058424e-05, "loss": 0.6815, "step": 9486 }, { "epoch": 1.2686547205135064, "grad_norm": 1.0984492301940918, "learning_rate": 1.294012726650963e-05, "loss": 0.6801, "step": 9487 }, { "epoch": 1.2687884461085852, "grad_norm": 1.1420230865478516, "learning_rate": 1.2938747300676697e-05, "loss": 0.6958, "step": 9488 }, { "epoch": 1.268922171703664, "grad_norm": 1.1520344018936157, "learning_rate": 1.2937367273588387e-05, "loss": 0.7575, "step": 9489 }, { "epoch": 1.269055897298743, "grad_norm": 1.2222524881362915, "learning_rate": 1.2935987185273467e-05, "loss": 0.6952, "step": 9490 }, { "epoch": 1.2691896228938218, "grad_norm": 1.303871989250183, "learning_rate": 1.2934607035760705e-05, "loss": 0.6638, "step": 9491 }, { "epoch": 1.2693233484889008, "grad_norm": 1.1603704690933228, "learning_rate": 1.2933226825078866e-05, "loss": 0.7282, "step": 9492 }, { "epoch": 1.2694570740839797, "grad_norm": 1.253767490386963, "learning_rate": 1.2931846553256721e-05, "loss": 0.8046, "step": 9493 }, { "epoch": 1.2695907996790585, "grad_norm": 1.229962706565857, "learning_rate": 1.293046622032304e-05, "loss": 0.8114, "step": 9494 }, { "epoch": 1.2697245252741376, "grad_norm": 1.3764369487762451, "learning_rate": 1.2929085826306595e-05, "loss": 0.8298, "step": 9495 }, { "epoch": 1.2698582508692164, "grad_norm": 1.35489821434021, "learning_rate": 1.2927705371236159e-05, "loss": 0.7739, "step": 9496 }, { "epoch": 1.2699919764642953, "grad_norm": 1.1609470844268799, "learning_rate": 1.2926324855140507e-05, "loss": 0.6751, "step": 9497 }, { "epoch": 1.2701257020593741, "grad_norm": 1.0934573411941528, "learning_rate": 1.2924944278048412e-05, "loss": 0.691, "step": 9498 }, { "epoch": 1.270259427654453, "grad_norm": 1.052471399307251, "learning_rate": 1.2923563639988652e-05, "loss": 0.7398, "step": 9499 }, { "epoch": 1.270393153249532, "grad_norm": 1.1274904012680054, "learning_rate": 1.292218294099001e-05, "loss": 0.6857, "step": 9500 }, { "epoch": 1.270526878844611, "grad_norm": 1.258703351020813, "learning_rate": 1.2920802181081254e-05, "loss": 0.7205, "step": 9501 }, { "epoch": 1.2706606044396898, "grad_norm": 1.197588562965393, "learning_rate": 1.2919421360291173e-05, "loss": 0.622, "step": 9502 }, { "epoch": 1.2707943300347686, "grad_norm": 1.1211081743240356, "learning_rate": 1.2918040478648549e-05, "loss": 0.729, "step": 9503 }, { "epoch": 1.2709280556298475, "grad_norm": 1.2332813739776611, "learning_rate": 1.2916659536182166e-05, "loss": 0.7598, "step": 9504 }, { "epoch": 1.2710617812249265, "grad_norm": 1.0994535684585571, "learning_rate": 1.2915278532920802e-05, "loss": 0.6957, "step": 9505 }, { "epoch": 1.2711955068200054, "grad_norm": 1.1468127965927124, "learning_rate": 1.2913897468893249e-05, "loss": 0.6937, "step": 9506 }, { "epoch": 1.2713292324150842, "grad_norm": 1.0861032009124756, "learning_rate": 1.291251634412829e-05, "loss": 0.6953, "step": 9507 }, { "epoch": 1.271462958010163, "grad_norm": 1.2880622148513794, "learning_rate": 1.2911135158654716e-05, "loss": 0.7665, "step": 9508 }, { "epoch": 1.271596683605242, "grad_norm": 1.1230720281600952, "learning_rate": 1.2909753912501312e-05, "loss": 0.696, "step": 9509 }, { "epoch": 1.271730409200321, "grad_norm": 1.2851399183273315, "learning_rate": 1.2908372605696876e-05, "loss": 0.6976, "step": 9510 }, { "epoch": 1.2718641347953998, "grad_norm": 1.1694047451019287, "learning_rate": 1.2906991238270194e-05, "loss": 0.7441, "step": 9511 }, { "epoch": 1.2719978603904787, "grad_norm": 1.053795576095581, "learning_rate": 1.2905609810250064e-05, "loss": 0.6924, "step": 9512 }, { "epoch": 1.2721315859855578, "grad_norm": 1.2144666910171509, "learning_rate": 1.2904228321665276e-05, "loss": 0.6966, "step": 9513 }, { "epoch": 1.2722653115806366, "grad_norm": 1.15086829662323, "learning_rate": 1.2902846772544625e-05, "loss": 0.7114, "step": 9514 }, { "epoch": 1.2723990371757155, "grad_norm": 1.0648542642593384, "learning_rate": 1.2901465162916914e-05, "loss": 0.6516, "step": 9515 }, { "epoch": 1.2725327627707943, "grad_norm": 1.17129385471344, "learning_rate": 1.2900083492810935e-05, "loss": 0.6738, "step": 9516 }, { "epoch": 1.2726664883658731, "grad_norm": 1.227160096168518, "learning_rate": 1.2898701762255495e-05, "loss": 0.7766, "step": 9517 }, { "epoch": 1.2728002139609522, "grad_norm": 1.0876532793045044, "learning_rate": 1.2897319971279387e-05, "loss": 0.723, "step": 9518 }, { "epoch": 1.272933939556031, "grad_norm": 1.1414062976837158, "learning_rate": 1.289593811991142e-05, "loss": 0.7423, "step": 9519 }, { "epoch": 1.27306766515111, "grad_norm": 1.1948473453521729, "learning_rate": 1.2894556208180391e-05, "loss": 0.6736, "step": 9520 }, { "epoch": 1.2732013907461888, "grad_norm": 1.2639120817184448, "learning_rate": 1.2893174236115109e-05, "loss": 0.7121, "step": 9521 }, { "epoch": 1.2733351163412676, "grad_norm": 1.4424244165420532, "learning_rate": 1.2891792203744377e-05, "loss": 0.8544, "step": 9522 }, { "epoch": 1.2734688419363467, "grad_norm": 1.1673781871795654, "learning_rate": 1.2890410111097004e-05, "loss": 0.7861, "step": 9523 }, { "epoch": 1.2736025675314255, "grad_norm": 1.125425100326538, "learning_rate": 1.28890279582018e-05, "loss": 0.6936, "step": 9524 }, { "epoch": 1.2737362931265044, "grad_norm": 1.1461714506149292, "learning_rate": 1.2887645745087573e-05, "loss": 0.7225, "step": 9525 }, { "epoch": 1.2738700187215832, "grad_norm": 1.1426209211349487, "learning_rate": 1.2886263471783134e-05, "loss": 0.7165, "step": 9526 }, { "epoch": 1.274003744316662, "grad_norm": 1.1742935180664062, "learning_rate": 1.2884881138317291e-05, "loss": 0.7322, "step": 9527 }, { "epoch": 1.2741374699117411, "grad_norm": 1.1461148262023926, "learning_rate": 1.2883498744718861e-05, "loss": 0.7656, "step": 9528 }, { "epoch": 1.27427119550682, "grad_norm": 1.1775208711624146, "learning_rate": 1.2882116291016663e-05, "loss": 0.7402, "step": 9529 }, { "epoch": 1.2744049211018988, "grad_norm": 1.1097184419631958, "learning_rate": 1.2880733777239506e-05, "loss": 0.6718, "step": 9530 }, { "epoch": 1.274538646696978, "grad_norm": 1.1424474716186523, "learning_rate": 1.2879351203416213e-05, "loss": 0.7459, "step": 9531 }, { "epoch": 1.2746723722920568, "grad_norm": 1.224245309829712, "learning_rate": 1.2877968569575596e-05, "loss": 0.7285, "step": 9532 }, { "epoch": 1.2748060978871356, "grad_norm": 1.0536460876464844, "learning_rate": 1.2876585875746478e-05, "loss": 0.7081, "step": 9533 }, { "epoch": 1.2749398234822145, "grad_norm": 1.1879181861877441, "learning_rate": 1.2875203121957682e-05, "loss": 0.7127, "step": 9534 }, { "epoch": 1.2750735490772933, "grad_norm": 1.0664161443710327, "learning_rate": 1.2873820308238027e-05, "loss": 0.7394, "step": 9535 }, { "epoch": 1.2752072746723724, "grad_norm": 1.194688320159912, "learning_rate": 1.2872437434616339e-05, "loss": 0.7448, "step": 9536 }, { "epoch": 1.2753410002674512, "grad_norm": 1.3280386924743652, "learning_rate": 1.2871054501121443e-05, "loss": 0.8864, "step": 9537 }, { "epoch": 1.27547472586253, "grad_norm": 1.2681570053100586, "learning_rate": 1.286967150778216e-05, "loss": 0.686, "step": 9538 }, { "epoch": 1.275608451457609, "grad_norm": 1.118194818496704, "learning_rate": 1.2868288454627322e-05, "loss": 0.6948, "step": 9539 }, { "epoch": 1.2757421770526878, "grad_norm": 1.2404524087905884, "learning_rate": 1.2866905341685753e-05, "loss": 0.702, "step": 9540 }, { "epoch": 1.2758759026477668, "grad_norm": 1.1704304218292236, "learning_rate": 1.286552216898629e-05, "loss": 0.6935, "step": 9541 }, { "epoch": 1.2760096282428457, "grad_norm": 1.0573474168777466, "learning_rate": 1.2864138936557755e-05, "loss": 0.735, "step": 9542 }, { "epoch": 1.2761433538379245, "grad_norm": 1.009398102760315, "learning_rate": 1.2862755644428985e-05, "loss": 0.6517, "step": 9543 }, { "epoch": 1.2762770794330036, "grad_norm": 1.145957112312317, "learning_rate": 1.2861372292628816e-05, "loss": 0.6886, "step": 9544 }, { "epoch": 1.2764108050280822, "grad_norm": 1.1343228816986084, "learning_rate": 1.2859988881186079e-05, "loss": 0.7945, "step": 9545 }, { "epoch": 1.2765445306231613, "grad_norm": 1.103018045425415, "learning_rate": 1.285860541012961e-05, "loss": 0.6899, "step": 9546 }, { "epoch": 1.2766782562182402, "grad_norm": 1.0970325469970703, "learning_rate": 1.2857221879488245e-05, "loss": 0.6099, "step": 9547 }, { "epoch": 1.276811981813319, "grad_norm": 1.2206907272338867, "learning_rate": 1.2855838289290822e-05, "loss": 0.7649, "step": 9548 }, { "epoch": 1.276945707408398, "grad_norm": 1.1753188371658325, "learning_rate": 1.2854454639566189e-05, "loss": 0.8172, "step": 9549 }, { "epoch": 1.277079433003477, "grad_norm": 1.236785650253296, "learning_rate": 1.2853070930343176e-05, "loss": 0.7382, "step": 9550 }, { "epoch": 1.2772131585985558, "grad_norm": 1.0794576406478882, "learning_rate": 1.285168716165063e-05, "loss": 0.7146, "step": 9551 }, { "epoch": 1.2773468841936346, "grad_norm": 1.1871787309646606, "learning_rate": 1.2850303333517396e-05, "loss": 0.6988, "step": 9552 }, { "epoch": 1.2774806097887135, "grad_norm": 1.1199374198913574, "learning_rate": 1.2848919445972315e-05, "loss": 0.6563, "step": 9553 }, { "epoch": 1.2776143353837925, "grad_norm": 1.0298348665237427, "learning_rate": 1.2847535499044232e-05, "loss": 0.6022, "step": 9554 }, { "epoch": 1.2777480609788714, "grad_norm": 1.2932682037353516, "learning_rate": 1.2846151492762e-05, "loss": 0.6475, "step": 9555 }, { "epoch": 1.2778817865739502, "grad_norm": 1.27214515209198, "learning_rate": 1.2844767427154462e-05, "loss": 0.6811, "step": 9556 }, { "epoch": 1.278015512169029, "grad_norm": 1.1253668069839478, "learning_rate": 1.2843383302250471e-05, "loss": 0.7056, "step": 9557 }, { "epoch": 1.278149237764108, "grad_norm": 1.2528895139694214, "learning_rate": 1.2841999118078874e-05, "loss": 0.6427, "step": 9558 }, { "epoch": 1.278282963359187, "grad_norm": 1.3314491510391235, "learning_rate": 1.2840614874668524e-05, "loss": 0.7606, "step": 9559 }, { "epoch": 1.2784166889542659, "grad_norm": 1.0440125465393066, "learning_rate": 1.2839230572048274e-05, "loss": 0.6223, "step": 9560 }, { "epoch": 1.2785504145493447, "grad_norm": 1.2579035758972168, "learning_rate": 1.2837846210246984e-05, "loss": 0.7396, "step": 9561 }, { "epoch": 1.2786841401444238, "grad_norm": 1.0947684049606323, "learning_rate": 1.2836461789293505e-05, "loss": 0.7188, "step": 9562 }, { "epoch": 1.2788178657395026, "grad_norm": 1.0589631795883179, "learning_rate": 1.283507730921669e-05, "loss": 0.6997, "step": 9563 }, { "epoch": 1.2789515913345815, "grad_norm": 1.1962766647338867, "learning_rate": 1.2833692770045403e-05, "loss": 0.6717, "step": 9564 }, { "epoch": 1.2790853169296603, "grad_norm": 1.074391484260559, "learning_rate": 1.2832308171808505e-05, "loss": 0.6489, "step": 9565 }, { "epoch": 1.2792190425247392, "grad_norm": 1.2596200704574585, "learning_rate": 1.283092351453485e-05, "loss": 0.7279, "step": 9566 }, { "epoch": 1.2793527681198182, "grad_norm": 1.2978917360305786, "learning_rate": 1.2829538798253303e-05, "loss": 0.7113, "step": 9567 }, { "epoch": 1.279486493714897, "grad_norm": 1.2259624004364014, "learning_rate": 1.2828154022992727e-05, "loss": 0.6672, "step": 9568 }, { "epoch": 1.279620219309976, "grad_norm": 1.1117417812347412, "learning_rate": 1.2826769188781991e-05, "loss": 0.6618, "step": 9569 }, { "epoch": 1.2797539449050548, "grad_norm": 1.1610409021377563, "learning_rate": 1.2825384295649952e-05, "loss": 0.7347, "step": 9570 }, { "epoch": 1.2798876705001336, "grad_norm": 1.3092174530029297, "learning_rate": 1.2823999343625482e-05, "loss": 0.748, "step": 9571 }, { "epoch": 1.2800213960952127, "grad_norm": 1.452265977859497, "learning_rate": 1.2822614332737449e-05, "loss": 0.7977, "step": 9572 }, { "epoch": 1.2801551216902916, "grad_norm": 1.2148412466049194, "learning_rate": 1.2821229263014719e-05, "loss": 0.7278, "step": 9573 }, { "epoch": 1.2802888472853704, "grad_norm": 1.2608327865600586, "learning_rate": 1.2819844134486166e-05, "loss": 0.7936, "step": 9574 }, { "epoch": 1.2804225728804493, "grad_norm": 1.228591799736023, "learning_rate": 1.281845894718066e-05, "loss": 0.7199, "step": 9575 }, { "epoch": 1.280556298475528, "grad_norm": 1.1814494132995605, "learning_rate": 1.2817073701127074e-05, "loss": 0.7539, "step": 9576 }, { "epoch": 1.2806900240706072, "grad_norm": 1.0683388710021973, "learning_rate": 1.2815688396354284e-05, "loss": 0.7229, "step": 9577 }, { "epoch": 1.280823749665686, "grad_norm": 1.244971752166748, "learning_rate": 1.2814303032891162e-05, "loss": 0.7938, "step": 9578 }, { "epoch": 1.2809574752607649, "grad_norm": 1.1436121463775635, "learning_rate": 1.2812917610766587e-05, "loss": 0.7233, "step": 9579 }, { "epoch": 1.281091200855844, "grad_norm": 1.2176181077957153, "learning_rate": 1.2811532130009434e-05, "loss": 0.8012, "step": 9580 }, { "epoch": 1.2812249264509228, "grad_norm": 1.1085972785949707, "learning_rate": 1.2810146590648587e-05, "loss": 0.6931, "step": 9581 }, { "epoch": 1.2813586520460016, "grad_norm": 1.1544116735458374, "learning_rate": 1.2808760992712923e-05, "loss": 0.7264, "step": 9582 }, { "epoch": 1.2814923776410805, "grad_norm": 1.2515429258346558, "learning_rate": 1.2807375336231323e-05, "loss": 0.794, "step": 9583 }, { "epoch": 1.2816261032361593, "grad_norm": 1.2227753400802612, "learning_rate": 1.280598962123267e-05, "loss": 0.8488, "step": 9584 }, { "epoch": 1.2817598288312384, "grad_norm": 1.1010059118270874, "learning_rate": 1.2804603847745848e-05, "loss": 0.6915, "step": 9585 }, { "epoch": 1.2818935544263172, "grad_norm": 1.2636703252792358, "learning_rate": 1.2803218015799743e-05, "loss": 0.7874, "step": 9586 }, { "epoch": 1.282027280021396, "grad_norm": 1.1862380504608154, "learning_rate": 1.280183212542324e-05, "loss": 0.7702, "step": 9587 }, { "epoch": 1.282161005616475, "grad_norm": 1.0793447494506836, "learning_rate": 1.2800446176645229e-05, "loss": 0.7599, "step": 9588 }, { "epoch": 1.2822947312115538, "grad_norm": 1.1803622245788574, "learning_rate": 1.2799060169494601e-05, "loss": 0.6553, "step": 9589 }, { "epoch": 1.2824284568066329, "grad_norm": 1.168172836303711, "learning_rate": 1.2797674104000237e-05, "loss": 0.7146, "step": 9590 }, { "epoch": 1.2825621824017117, "grad_norm": 1.053512692451477, "learning_rate": 1.2796287980191035e-05, "loss": 0.6724, "step": 9591 }, { "epoch": 1.2826959079967906, "grad_norm": 1.1048084497451782, "learning_rate": 1.2794901798095882e-05, "loss": 0.7409, "step": 9592 }, { "epoch": 1.2828296335918694, "grad_norm": 1.09452486038208, "learning_rate": 1.279351555774368e-05, "loss": 0.6656, "step": 9593 }, { "epoch": 1.2829633591869483, "grad_norm": 1.139316201210022, "learning_rate": 1.279212925916332e-05, "loss": 0.6764, "step": 9594 }, { "epoch": 1.2830970847820273, "grad_norm": 1.1722959280014038, "learning_rate": 1.2790742902383695e-05, "loss": 0.6981, "step": 9595 }, { "epoch": 1.2832308103771062, "grad_norm": 1.354641079902649, "learning_rate": 1.2789356487433705e-05, "loss": 0.6704, "step": 9596 }, { "epoch": 1.283364535972185, "grad_norm": 1.436334490776062, "learning_rate": 1.2787970014342248e-05, "loss": 0.8785, "step": 9597 }, { "epoch": 1.283498261567264, "grad_norm": 1.1137406826019287, "learning_rate": 1.2786583483138222e-05, "loss": 0.6304, "step": 9598 }, { "epoch": 1.283631987162343, "grad_norm": 1.0410254001617432, "learning_rate": 1.2785196893850532e-05, "loss": 0.651, "step": 9599 }, { "epoch": 1.2837657127574218, "grad_norm": 1.3171939849853516, "learning_rate": 1.2783810246508077e-05, "loss": 0.7489, "step": 9600 }, { "epoch": 1.2838994383525006, "grad_norm": 1.2629326581954956, "learning_rate": 1.278242354113976e-05, "loss": 0.7332, "step": 9601 }, { "epoch": 1.2840331639475795, "grad_norm": 1.0753047466278076, "learning_rate": 1.2781036777774492e-05, "loss": 0.6598, "step": 9602 }, { "epoch": 1.2841668895426586, "grad_norm": 1.0636957883834839, "learning_rate": 1.2779649956441172e-05, "loss": 0.6693, "step": 9603 }, { "epoch": 1.2843006151377374, "grad_norm": 1.2351280450820923, "learning_rate": 1.2778263077168704e-05, "loss": 0.7663, "step": 9604 }, { "epoch": 1.2844343407328163, "grad_norm": 1.1845741271972656, "learning_rate": 1.2776876139986003e-05, "loss": 0.6487, "step": 9605 }, { "epoch": 1.284568066327895, "grad_norm": 1.307610034942627, "learning_rate": 1.2775489144921977e-05, "loss": 0.7546, "step": 9606 }, { "epoch": 1.284701791922974, "grad_norm": 1.0826590061187744, "learning_rate": 1.2774102092005536e-05, "loss": 0.7264, "step": 9607 }, { "epoch": 1.284835517518053, "grad_norm": 1.311576008796692, "learning_rate": 1.2772714981265591e-05, "loss": 0.721, "step": 9608 }, { "epoch": 1.2849692431131319, "grad_norm": 1.1081477403640747, "learning_rate": 1.2771327812731053e-05, "loss": 0.7731, "step": 9609 }, { "epoch": 1.2851029687082107, "grad_norm": 1.2137136459350586, "learning_rate": 1.2769940586430842e-05, "loss": 0.7516, "step": 9610 }, { "epoch": 1.2852366943032896, "grad_norm": 1.127432942390442, "learning_rate": 1.2768553302393867e-05, "loss": 0.6851, "step": 9611 }, { "epoch": 1.2853704198983684, "grad_norm": 1.0564912557601929, "learning_rate": 1.2767165960649049e-05, "loss": 0.6641, "step": 9612 }, { "epoch": 1.2855041454934475, "grad_norm": 1.1290284395217896, "learning_rate": 1.2765778561225303e-05, "loss": 0.6638, "step": 9613 }, { "epoch": 1.2856378710885263, "grad_norm": 1.1460851430892944, "learning_rate": 1.2764391104151554e-05, "loss": 0.684, "step": 9614 }, { "epoch": 1.2857715966836052, "grad_norm": 1.251197338104248, "learning_rate": 1.2763003589456716e-05, "loss": 0.6796, "step": 9615 }, { "epoch": 1.2859053222786843, "grad_norm": 1.1249823570251465, "learning_rate": 1.2761616017169709e-05, "loss": 0.7374, "step": 9616 }, { "epoch": 1.286039047873763, "grad_norm": 1.4155443906784058, "learning_rate": 1.276022838731946e-05, "loss": 0.7883, "step": 9617 }, { "epoch": 1.286172773468842, "grad_norm": 1.1178494691848755, "learning_rate": 1.2758840699934893e-05, "loss": 0.6404, "step": 9618 }, { "epoch": 1.2863064990639208, "grad_norm": 1.2892897129058838, "learning_rate": 1.2757452955044928e-05, "loss": 0.8131, "step": 9619 }, { "epoch": 1.2864402246589997, "grad_norm": 1.1258512735366821, "learning_rate": 1.27560651526785e-05, "loss": 0.6935, "step": 9620 }, { "epoch": 1.2865739502540787, "grad_norm": 1.181854009628296, "learning_rate": 1.2754677292864525e-05, "loss": 0.7623, "step": 9621 }, { "epoch": 1.2867076758491576, "grad_norm": 1.1584722995758057, "learning_rate": 1.2753289375631945e-05, "loss": 0.7519, "step": 9622 }, { "epoch": 1.2868414014442364, "grad_norm": 1.2618217468261719, "learning_rate": 1.2751901401009678e-05, "loss": 0.8311, "step": 9623 }, { "epoch": 1.2869751270393153, "grad_norm": 1.0831125974655151, "learning_rate": 1.2750513369026658e-05, "loss": 0.8387, "step": 9624 }, { "epoch": 1.2871088526343941, "grad_norm": 1.1415061950683594, "learning_rate": 1.274912527971182e-05, "loss": 0.6753, "step": 9625 }, { "epoch": 1.2872425782294732, "grad_norm": 1.0522689819335938, "learning_rate": 1.27477371330941e-05, "loss": 0.7007, "step": 9626 }, { "epoch": 1.287376303824552, "grad_norm": 1.0634231567382812, "learning_rate": 1.2746348929202426e-05, "loss": 0.7308, "step": 9627 }, { "epoch": 1.2875100294196309, "grad_norm": 1.123031735420227, "learning_rate": 1.2744960668065737e-05, "loss": 0.6801, "step": 9628 }, { "epoch": 1.2876437550147097, "grad_norm": 1.2298609018325806, "learning_rate": 1.274357234971297e-05, "loss": 0.7098, "step": 9629 }, { "epoch": 1.2877774806097886, "grad_norm": 1.1345815658569336, "learning_rate": 1.2742183974173062e-05, "loss": 0.7296, "step": 9630 }, { "epoch": 1.2879112062048677, "grad_norm": 1.254126787185669, "learning_rate": 1.274079554147495e-05, "loss": 0.7089, "step": 9631 }, { "epoch": 1.2880449317999465, "grad_norm": 1.1598751544952393, "learning_rate": 1.2739407051647581e-05, "loss": 0.6983, "step": 9632 }, { "epoch": 1.2881786573950254, "grad_norm": 1.1939847469329834, "learning_rate": 1.2738018504719894e-05, "loss": 0.6925, "step": 9633 }, { "epoch": 1.2883123829901044, "grad_norm": 1.2101213932037354, "learning_rate": 1.2736629900720832e-05, "loss": 0.7636, "step": 9634 }, { "epoch": 1.2884461085851833, "grad_norm": 1.0956484079360962, "learning_rate": 1.2735241239679335e-05, "loss": 0.7592, "step": 9635 }, { "epoch": 1.2885798341802621, "grad_norm": 1.2897831201553345, "learning_rate": 1.2733852521624353e-05, "loss": 0.8213, "step": 9636 }, { "epoch": 1.288713559775341, "grad_norm": 1.3319885730743408, "learning_rate": 1.273246374658483e-05, "loss": 0.7514, "step": 9637 }, { "epoch": 1.2888472853704198, "grad_norm": 1.1608487367630005, "learning_rate": 1.2731074914589718e-05, "loss": 0.7175, "step": 9638 }, { "epoch": 1.2889810109654989, "grad_norm": 1.121476173400879, "learning_rate": 1.272968602566796e-05, "loss": 0.6822, "step": 9639 }, { "epoch": 1.2891147365605777, "grad_norm": 1.1441428661346436, "learning_rate": 1.272829707984851e-05, "loss": 0.7443, "step": 9640 }, { "epoch": 1.2892484621556566, "grad_norm": 1.1241437196731567, "learning_rate": 1.2726908077160318e-05, "loss": 0.7289, "step": 9641 }, { "epoch": 1.2893821877507354, "grad_norm": 1.216529130935669, "learning_rate": 1.2725519017632337e-05, "loss": 0.6968, "step": 9642 }, { "epoch": 1.2895159133458143, "grad_norm": 1.1122649908065796, "learning_rate": 1.2724129901293519e-05, "loss": 0.7527, "step": 9643 }, { "epoch": 1.2896496389408934, "grad_norm": 1.1345137357711792, "learning_rate": 1.272274072817282e-05, "loss": 0.7642, "step": 9644 }, { "epoch": 1.2897833645359722, "grad_norm": 1.126294493675232, "learning_rate": 1.2721351498299194e-05, "loss": 0.6494, "step": 9645 }, { "epoch": 1.289917090131051, "grad_norm": 1.163415789604187, "learning_rate": 1.2719962211701607e-05, "loss": 0.7854, "step": 9646 }, { "epoch": 1.2900508157261301, "grad_norm": 1.1121639013290405, "learning_rate": 1.2718572868409005e-05, "loss": 0.7057, "step": 9647 }, { "epoch": 1.2901845413212087, "grad_norm": 1.1295735836029053, "learning_rate": 1.2717183468450354e-05, "loss": 0.688, "step": 9648 }, { "epoch": 1.2903182669162878, "grad_norm": 1.1596299409866333, "learning_rate": 1.2715794011854612e-05, "loss": 0.6512, "step": 9649 }, { "epoch": 1.2904519925113667, "grad_norm": 1.045924425125122, "learning_rate": 1.2714404498650743e-05, "loss": 0.7101, "step": 9650 }, { "epoch": 1.2905857181064455, "grad_norm": 1.0680131912231445, "learning_rate": 1.271301492886771e-05, "loss": 0.6668, "step": 9651 }, { "epoch": 1.2907194437015246, "grad_norm": 1.1398775577545166, "learning_rate": 1.2711625302534479e-05, "loss": 0.6902, "step": 9652 }, { "epoch": 1.2908531692966034, "grad_norm": 1.0867266654968262, "learning_rate": 1.2710235619680012e-05, "loss": 0.683, "step": 9653 }, { "epoch": 1.2909868948916823, "grad_norm": 1.0483216047286987, "learning_rate": 1.2708845880333278e-05, "loss": 0.6894, "step": 9654 }, { "epoch": 1.2911206204867611, "grad_norm": 1.255963921546936, "learning_rate": 1.2707456084523242e-05, "loss": 0.6983, "step": 9655 }, { "epoch": 1.29125434608184, "grad_norm": 1.2016394138336182, "learning_rate": 1.2706066232278873e-05, "loss": 0.7786, "step": 9656 }, { "epoch": 1.291388071676919, "grad_norm": 1.3941806554794312, "learning_rate": 1.2704676323629146e-05, "loss": 0.7597, "step": 9657 }, { "epoch": 1.291521797271998, "grad_norm": 1.060990571975708, "learning_rate": 1.2703286358603029e-05, "loss": 0.7022, "step": 9658 }, { "epoch": 1.2916555228670767, "grad_norm": 1.0121026039123535, "learning_rate": 1.2701896337229493e-05, "loss": 0.657, "step": 9659 }, { "epoch": 1.2917892484621556, "grad_norm": 1.1033008098602295, "learning_rate": 1.2700506259537515e-05, "loss": 0.7061, "step": 9660 }, { "epoch": 1.2919229740572344, "grad_norm": 1.2020244598388672, "learning_rate": 1.2699116125556065e-05, "loss": 0.7438, "step": 9661 }, { "epoch": 1.2920566996523135, "grad_norm": 1.0644489526748657, "learning_rate": 1.2697725935314125e-05, "loss": 0.6942, "step": 9662 }, { "epoch": 1.2921904252473924, "grad_norm": 1.1937388181686401, "learning_rate": 1.2696335688840669e-05, "loss": 0.7063, "step": 9663 }, { "epoch": 1.2923241508424712, "grad_norm": 1.2846183776855469, "learning_rate": 1.2694945386164675e-05, "loss": 0.7222, "step": 9664 }, { "epoch": 1.2924578764375503, "grad_norm": 1.1278648376464844, "learning_rate": 1.2693555027315124e-05, "loss": 0.7392, "step": 9665 }, { "epoch": 1.2925916020326291, "grad_norm": 1.3033103942871094, "learning_rate": 1.2692164612320997e-05, "loss": 0.7626, "step": 9666 }, { "epoch": 1.292725327627708, "grad_norm": 1.0802661180496216, "learning_rate": 1.2690774141211271e-05, "loss": 0.7298, "step": 9667 }, { "epoch": 1.2928590532227868, "grad_norm": 1.0877751111984253, "learning_rate": 1.2689383614014937e-05, "loss": 0.7406, "step": 9668 }, { "epoch": 1.2929927788178657, "grad_norm": 1.339814305305481, "learning_rate": 1.2687993030760973e-05, "loss": 0.7958, "step": 9669 }, { "epoch": 1.2931265044129447, "grad_norm": 1.0392295122146606, "learning_rate": 1.2686602391478364e-05, "loss": 0.6492, "step": 9670 }, { "epoch": 1.2932602300080236, "grad_norm": 1.2615996599197388, "learning_rate": 1.2685211696196102e-05, "loss": 0.7585, "step": 9671 }, { "epoch": 1.2933939556031024, "grad_norm": 1.1853387355804443, "learning_rate": 1.268382094494317e-05, "loss": 0.7118, "step": 9672 }, { "epoch": 1.2935276811981813, "grad_norm": 1.2066714763641357, "learning_rate": 1.268243013774856e-05, "loss": 0.76, "step": 9673 }, { "epoch": 1.2936614067932601, "grad_norm": 1.2250553369522095, "learning_rate": 1.2681039274641261e-05, "loss": 0.7646, "step": 9674 }, { "epoch": 1.2937951323883392, "grad_norm": 1.318450927734375, "learning_rate": 1.267964835565026e-05, "loss": 0.767, "step": 9675 }, { "epoch": 1.293928857983418, "grad_norm": 1.198045253753662, "learning_rate": 1.2678257380804557e-05, "loss": 0.6883, "step": 9676 }, { "epoch": 1.294062583578497, "grad_norm": 1.1542186737060547, "learning_rate": 1.2676866350133142e-05, "loss": 0.7543, "step": 9677 }, { "epoch": 1.2941963091735758, "grad_norm": 1.2106938362121582, "learning_rate": 1.267547526366501e-05, "loss": 0.7494, "step": 9678 }, { "epoch": 1.2943300347686546, "grad_norm": 1.0944880247116089, "learning_rate": 1.2674084121429153e-05, "loss": 0.744, "step": 9679 }, { "epoch": 1.2944637603637337, "grad_norm": 1.1708085536956787, "learning_rate": 1.2672692923454572e-05, "loss": 0.734, "step": 9680 }, { "epoch": 1.2945974859588125, "grad_norm": 1.069841980934143, "learning_rate": 1.2671301669770266e-05, "loss": 0.6945, "step": 9681 }, { "epoch": 1.2947312115538914, "grad_norm": 1.1405390501022339, "learning_rate": 1.266991036040523e-05, "loss": 0.6381, "step": 9682 }, { "epoch": 1.2948649371489704, "grad_norm": 1.1604965925216675, "learning_rate": 1.266851899538847e-05, "loss": 0.6849, "step": 9683 }, { "epoch": 1.2949986627440493, "grad_norm": 1.1917328834533691, "learning_rate": 1.2667127574748985e-05, "loss": 0.7366, "step": 9684 }, { "epoch": 1.2951323883391281, "grad_norm": 1.2936955690383911, "learning_rate": 1.2665736098515778e-05, "loss": 0.7704, "step": 9685 }, { "epoch": 1.295266113934207, "grad_norm": 1.1507152318954468, "learning_rate": 1.2664344566717853e-05, "loss": 0.8212, "step": 9686 }, { "epoch": 1.2953998395292858, "grad_norm": 1.3287267684936523, "learning_rate": 1.2662952979384216e-05, "loss": 0.732, "step": 9687 }, { "epoch": 1.295533565124365, "grad_norm": 1.199951410293579, "learning_rate": 1.2661561336543868e-05, "loss": 0.779, "step": 9688 }, { "epoch": 1.2956672907194438, "grad_norm": 1.3043510913848877, "learning_rate": 1.2660169638225824e-05, "loss": 0.7696, "step": 9689 }, { "epoch": 1.2958010163145226, "grad_norm": 1.227268099784851, "learning_rate": 1.2658777884459086e-05, "loss": 0.7398, "step": 9690 }, { "epoch": 1.2959347419096015, "grad_norm": 1.3260715007781982, "learning_rate": 1.2657386075272672e-05, "loss": 0.7371, "step": 9691 }, { "epoch": 1.2960684675046803, "grad_norm": 1.0832947492599487, "learning_rate": 1.2655994210695586e-05, "loss": 0.6553, "step": 9692 }, { "epoch": 1.2962021930997594, "grad_norm": 1.1742264032363892, "learning_rate": 1.2654602290756844e-05, "loss": 0.7061, "step": 9693 }, { "epoch": 1.2963359186948382, "grad_norm": 1.2155097723007202, "learning_rate": 1.2653210315485453e-05, "loss": 0.757, "step": 9694 }, { "epoch": 1.296469644289917, "grad_norm": 1.244540810585022, "learning_rate": 1.2651818284910435e-05, "loss": 0.7112, "step": 9695 }, { "epoch": 1.296603369884996, "grad_norm": 1.0440651178359985, "learning_rate": 1.26504261990608e-05, "loss": 0.7048, "step": 9696 }, { "epoch": 1.2967370954800748, "grad_norm": 1.126236081123352, "learning_rate": 1.264903405796557e-05, "loss": 0.6747, "step": 9697 }, { "epoch": 1.2968708210751538, "grad_norm": 1.1871329545974731, "learning_rate": 1.2647641861653759e-05, "loss": 0.7553, "step": 9698 }, { "epoch": 1.2970045466702327, "grad_norm": 1.2088154554367065, "learning_rate": 1.2646249610154388e-05, "loss": 0.6326, "step": 9699 }, { "epoch": 1.2971382722653115, "grad_norm": 1.2028765678405762, "learning_rate": 1.2644857303496476e-05, "loss": 0.7422, "step": 9700 }, { "epoch": 1.2972719978603906, "grad_norm": 1.2238471508026123, "learning_rate": 1.2643464941709042e-05, "loss": 0.703, "step": 9701 }, { "epoch": 1.2974057234554695, "grad_norm": 1.0195839405059814, "learning_rate": 1.264207252482111e-05, "loss": 0.653, "step": 9702 }, { "epoch": 1.2975394490505483, "grad_norm": 1.0800600051879883, "learning_rate": 1.2640680052861706e-05, "loss": 0.7205, "step": 9703 }, { "epoch": 1.2976731746456271, "grad_norm": 1.1276752948760986, "learning_rate": 1.2639287525859855e-05, "loss": 0.7158, "step": 9704 }, { "epoch": 1.297806900240706, "grad_norm": 1.1186134815216064, "learning_rate": 1.263789494384458e-05, "loss": 0.6955, "step": 9705 }, { "epoch": 1.297940625835785, "grad_norm": 1.180485486984253, "learning_rate": 1.263650230684491e-05, "loss": 0.6979, "step": 9706 }, { "epoch": 1.298074351430864, "grad_norm": 1.137648344039917, "learning_rate": 1.2635109614889868e-05, "loss": 0.6352, "step": 9707 }, { "epoch": 1.2982080770259428, "grad_norm": 1.2170674800872803, "learning_rate": 1.2633716868008493e-05, "loss": 0.7115, "step": 9708 }, { "epoch": 1.2983418026210216, "grad_norm": 1.2233610153198242, "learning_rate": 1.2632324066229806e-05, "loss": 0.7699, "step": 9709 }, { "epoch": 1.2984755282161005, "grad_norm": 1.1107714176177979, "learning_rate": 1.2630931209582844e-05, "loss": 0.7058, "step": 9710 }, { "epoch": 1.2986092538111795, "grad_norm": 1.1467139720916748, "learning_rate": 1.2629538298096641e-05, "loss": 0.7793, "step": 9711 }, { "epoch": 1.2987429794062584, "grad_norm": 1.1824299097061157, "learning_rate": 1.2628145331800226e-05, "loss": 0.7095, "step": 9712 }, { "epoch": 1.2988767050013372, "grad_norm": 1.2506284713745117, "learning_rate": 1.2626752310722637e-05, "loss": 0.7512, "step": 9713 }, { "epoch": 1.299010430596416, "grad_norm": 1.1447522640228271, "learning_rate": 1.2625359234892906e-05, "loss": 0.7968, "step": 9714 }, { "epoch": 1.299144156191495, "grad_norm": 1.2814137935638428, "learning_rate": 1.262396610434008e-05, "loss": 0.7642, "step": 9715 }, { "epoch": 1.299277881786574, "grad_norm": 1.371208667755127, "learning_rate": 1.2622572919093188e-05, "loss": 0.7714, "step": 9716 }, { "epoch": 1.2994116073816528, "grad_norm": 1.1869853734970093, "learning_rate": 1.2621179679181273e-05, "loss": 0.6613, "step": 9717 }, { "epoch": 1.2995453329767317, "grad_norm": 1.1784124374389648, "learning_rate": 1.2619786384633374e-05, "loss": 0.6795, "step": 9718 }, { "epoch": 1.2996790585718108, "grad_norm": 1.1134071350097656, "learning_rate": 1.261839303547854e-05, "loss": 0.6611, "step": 9719 }, { "epoch": 1.2998127841668896, "grad_norm": 1.1958562135696411, "learning_rate": 1.2616999631745807e-05, "loss": 0.7883, "step": 9720 }, { "epoch": 1.2999465097619685, "grad_norm": 1.264787197113037, "learning_rate": 1.2615606173464216e-05, "loss": 0.7567, "step": 9721 }, { "epoch": 1.3000802353570473, "grad_norm": 1.2790603637695312, "learning_rate": 1.2614212660662822e-05, "loss": 0.8026, "step": 9722 }, { "epoch": 1.3002139609521262, "grad_norm": 1.1667567491531372, "learning_rate": 1.2612819093370667e-05, "loss": 0.626, "step": 9723 }, { "epoch": 1.3003476865472052, "grad_norm": 1.2687195539474487, "learning_rate": 1.2611425471616796e-05, "loss": 0.7583, "step": 9724 }, { "epoch": 1.300481412142284, "grad_norm": 1.2207969427108765, "learning_rate": 1.261003179543026e-05, "loss": 0.7527, "step": 9725 }, { "epoch": 1.300615137737363, "grad_norm": 1.2442119121551514, "learning_rate": 1.2608638064840108e-05, "loss": 0.7364, "step": 9726 }, { "epoch": 1.3007488633324418, "grad_norm": 1.244952917098999, "learning_rate": 1.2607244279875395e-05, "loss": 0.7548, "step": 9727 }, { "epoch": 1.3008825889275206, "grad_norm": 1.164206624031067, "learning_rate": 1.2605850440565165e-05, "loss": 0.6898, "step": 9728 }, { "epoch": 1.3010163145225997, "grad_norm": 1.3708223104476929, "learning_rate": 1.260445654693848e-05, "loss": 0.8108, "step": 9729 }, { "epoch": 1.3011500401176785, "grad_norm": 1.189103364944458, "learning_rate": 1.260306259902439e-05, "loss": 0.6961, "step": 9730 }, { "epoch": 1.3012837657127574, "grad_norm": 1.233698844909668, "learning_rate": 1.2601668596851953e-05, "loss": 0.7406, "step": 9731 }, { "epoch": 1.3014174913078362, "grad_norm": 1.2103241682052612, "learning_rate": 1.2600274540450222e-05, "loss": 0.7634, "step": 9732 }, { "epoch": 1.301551216902915, "grad_norm": 1.1698404550552368, "learning_rate": 1.2598880429848252e-05, "loss": 0.765, "step": 9733 }, { "epoch": 1.3016849424979942, "grad_norm": 1.324569821357727, "learning_rate": 1.259748626507511e-05, "loss": 0.7123, "step": 9734 }, { "epoch": 1.301818668093073, "grad_norm": 1.2169783115386963, "learning_rate": 1.2596092046159854e-05, "loss": 0.6729, "step": 9735 }, { "epoch": 1.3019523936881519, "grad_norm": 1.226935625076294, "learning_rate": 1.2594697773131542e-05, "loss": 0.7708, "step": 9736 }, { "epoch": 1.302086119283231, "grad_norm": 1.1610592603683472, "learning_rate": 1.2593303446019234e-05, "loss": 0.7841, "step": 9737 }, { "epoch": 1.3022198448783098, "grad_norm": 1.2631546258926392, "learning_rate": 1.2591909064852002e-05, "loss": 0.7388, "step": 9738 }, { "epoch": 1.3023535704733886, "grad_norm": 1.096208095550537, "learning_rate": 1.2590514629658905e-05, "loss": 0.7302, "step": 9739 }, { "epoch": 1.3024872960684675, "grad_norm": 1.2070401906967163, "learning_rate": 1.2589120140469007e-05, "loss": 0.7587, "step": 9740 }, { "epoch": 1.3026210216635463, "grad_norm": 1.188755989074707, "learning_rate": 1.258772559731138e-05, "loss": 0.8041, "step": 9741 }, { "epoch": 1.3027547472586254, "grad_norm": 1.168511152267456, "learning_rate": 1.2586331000215087e-05, "loss": 0.6858, "step": 9742 }, { "epoch": 1.3028884728537042, "grad_norm": 1.1929455995559692, "learning_rate": 1.2584936349209201e-05, "loss": 0.6667, "step": 9743 }, { "epoch": 1.303022198448783, "grad_norm": 1.147537350654602, "learning_rate": 1.258354164432279e-05, "loss": 0.6325, "step": 9744 }, { "epoch": 1.303155924043862, "grad_norm": 1.1671128273010254, "learning_rate": 1.2582146885584925e-05, "loss": 0.6657, "step": 9745 }, { "epoch": 1.3032896496389408, "grad_norm": 1.2589455842971802, "learning_rate": 1.2580752073024677e-05, "loss": 0.7265, "step": 9746 }, { "epoch": 1.3034233752340199, "grad_norm": 1.1517754793167114, "learning_rate": 1.2579357206671126e-05, "loss": 0.7022, "step": 9747 }, { "epoch": 1.3035571008290987, "grad_norm": 1.2010449171066284, "learning_rate": 1.2577962286553338e-05, "loss": 0.7386, "step": 9748 }, { "epoch": 1.3036908264241776, "grad_norm": 1.0198960304260254, "learning_rate": 1.2576567312700394e-05, "loss": 0.644, "step": 9749 }, { "epoch": 1.3038245520192566, "grad_norm": 1.1896487474441528, "learning_rate": 1.2575172285141371e-05, "loss": 0.7121, "step": 9750 }, { "epoch": 1.3039582776143352, "grad_norm": 1.1681534051895142, "learning_rate": 1.2573777203905349e-05, "loss": 0.7076, "step": 9751 }, { "epoch": 1.3040920032094143, "grad_norm": 1.150976300239563, "learning_rate": 1.25723820690214e-05, "loss": 0.7556, "step": 9752 }, { "epoch": 1.3042257288044932, "grad_norm": 1.247734546661377, "learning_rate": 1.2570986880518605e-05, "loss": 0.7323, "step": 9753 }, { "epoch": 1.304359454399572, "grad_norm": 1.0402568578720093, "learning_rate": 1.2569591638426054e-05, "loss": 0.6584, "step": 9754 }, { "epoch": 1.304493179994651, "grad_norm": 1.172666311264038, "learning_rate": 1.2568196342772823e-05, "loss": 0.7317, "step": 9755 }, { "epoch": 1.30462690558973, "grad_norm": 1.2080496549606323, "learning_rate": 1.2566800993587997e-05, "loss": 0.674, "step": 9756 }, { "epoch": 1.3047606311848088, "grad_norm": 1.1687921285629272, "learning_rate": 1.2565405590900659e-05, "loss": 0.7222, "step": 9757 }, { "epoch": 1.3048943567798876, "grad_norm": 1.1584309339523315, "learning_rate": 1.2564010134739897e-05, "loss": 0.7102, "step": 9758 }, { "epoch": 1.3050280823749665, "grad_norm": 1.1695126295089722, "learning_rate": 1.2562614625134797e-05, "loss": 0.7514, "step": 9759 }, { "epoch": 1.3051618079700456, "grad_norm": 1.1632521152496338, "learning_rate": 1.2561219062114447e-05, "loss": 0.6675, "step": 9760 }, { "epoch": 1.3052955335651244, "grad_norm": 1.182298183441162, "learning_rate": 1.2559823445707936e-05, "loss": 0.7198, "step": 9761 }, { "epoch": 1.3054292591602032, "grad_norm": 1.1981295347213745, "learning_rate": 1.2558427775944357e-05, "loss": 0.6733, "step": 9762 }, { "epoch": 1.305562984755282, "grad_norm": 1.0663963556289673, "learning_rate": 1.25570320528528e-05, "loss": 0.7103, "step": 9763 }, { "epoch": 1.305696710350361, "grad_norm": 1.2121855020523071, "learning_rate": 1.2555636276462356e-05, "loss": 0.7443, "step": 9764 }, { "epoch": 1.30583043594544, "grad_norm": 1.295791506767273, "learning_rate": 1.2554240446802118e-05, "loss": 0.7468, "step": 9765 }, { "epoch": 1.3059641615405189, "grad_norm": 1.2298610210418701, "learning_rate": 1.2552844563901178e-05, "loss": 0.7321, "step": 9766 }, { "epoch": 1.3060978871355977, "grad_norm": 1.0248095989227295, "learning_rate": 1.2551448627788641e-05, "loss": 0.6218, "step": 9767 }, { "epoch": 1.3062316127306768, "grad_norm": 1.1197268962860107, "learning_rate": 1.2550052638493597e-05, "loss": 0.7273, "step": 9768 }, { "epoch": 1.3063653383257556, "grad_norm": 1.0497872829437256, "learning_rate": 1.2548656596045147e-05, "loss": 0.6861, "step": 9769 }, { "epoch": 1.3064990639208345, "grad_norm": 1.1599812507629395, "learning_rate": 1.254726050047239e-05, "loss": 0.6677, "step": 9770 }, { "epoch": 1.3066327895159133, "grad_norm": 1.0988116264343262, "learning_rate": 1.2545864351804423e-05, "loss": 0.7195, "step": 9771 }, { "epoch": 1.3067665151109922, "grad_norm": 1.0831265449523926, "learning_rate": 1.2544468150070351e-05, "loss": 0.7278, "step": 9772 }, { "epoch": 1.3069002407060712, "grad_norm": 1.0655025243759155, "learning_rate": 1.2543071895299272e-05, "loss": 0.6905, "step": 9773 }, { "epoch": 1.30703396630115, "grad_norm": 1.089412808418274, "learning_rate": 1.2541675587520296e-05, "loss": 0.7488, "step": 9774 }, { "epoch": 1.307167691896229, "grad_norm": 1.0837054252624512, "learning_rate": 1.2540279226762526e-05, "loss": 0.6901, "step": 9775 }, { "epoch": 1.3073014174913078, "grad_norm": 1.1585474014282227, "learning_rate": 1.2538882813055064e-05, "loss": 0.6739, "step": 9776 }, { "epoch": 1.3074351430863866, "grad_norm": 1.3716362714767456, "learning_rate": 1.253748634642702e-05, "loss": 0.7617, "step": 9777 }, { "epoch": 1.3075688686814657, "grad_norm": 1.1717309951782227, "learning_rate": 1.25360898269075e-05, "loss": 0.72, "step": 9778 }, { "epoch": 1.3077025942765446, "grad_norm": 1.1438543796539307, "learning_rate": 1.2534693254525614e-05, "loss": 0.6551, "step": 9779 }, { "epoch": 1.3078363198716234, "grad_norm": 1.1696605682373047, "learning_rate": 1.2533296629310477e-05, "loss": 0.7289, "step": 9780 }, { "epoch": 1.3079700454667023, "grad_norm": 1.108705997467041, "learning_rate": 1.253189995129119e-05, "loss": 0.7452, "step": 9781 }, { "epoch": 1.308103771061781, "grad_norm": 1.2557648420333862, "learning_rate": 1.2530503220496875e-05, "loss": 0.6944, "step": 9782 }, { "epoch": 1.3082374966568602, "grad_norm": 1.1566818952560425, "learning_rate": 1.2529106436956642e-05, "loss": 0.8241, "step": 9783 }, { "epoch": 1.308371222251939, "grad_norm": 1.4061435461044312, "learning_rate": 1.2527709600699605e-05, "loss": 0.7291, "step": 9784 }, { "epoch": 1.3085049478470179, "grad_norm": 1.1407550573349, "learning_rate": 1.2526312711754877e-05, "loss": 0.7503, "step": 9785 }, { "epoch": 1.308638673442097, "grad_norm": 1.1367970705032349, "learning_rate": 1.252491577015158e-05, "loss": 0.7478, "step": 9786 }, { "epoch": 1.3087723990371758, "grad_norm": 1.2011934518814087, "learning_rate": 1.252351877591883e-05, "loss": 0.7855, "step": 9787 }, { "epoch": 1.3089061246322546, "grad_norm": 1.316261887550354, "learning_rate": 1.2522121729085748e-05, "loss": 0.714, "step": 9788 }, { "epoch": 1.3090398502273335, "grad_norm": 1.2697792053222656, "learning_rate": 1.252072462968145e-05, "loss": 0.7108, "step": 9789 }, { "epoch": 1.3091735758224123, "grad_norm": 1.1292520761489868, "learning_rate": 1.2519327477735059e-05, "loss": 0.6427, "step": 9790 }, { "epoch": 1.3093073014174914, "grad_norm": 1.155401349067688, "learning_rate": 1.2517930273275698e-05, "loss": 0.6429, "step": 9791 }, { "epoch": 1.3094410270125703, "grad_norm": 1.116864800453186, "learning_rate": 1.2516533016332489e-05, "loss": 0.7268, "step": 9792 }, { "epoch": 1.309574752607649, "grad_norm": 1.2376480102539062, "learning_rate": 1.2515135706934556e-05, "loss": 0.6833, "step": 9793 }, { "epoch": 1.309708478202728, "grad_norm": 1.1512469053268433, "learning_rate": 1.2513738345111029e-05, "loss": 0.7223, "step": 9794 }, { "epoch": 1.3098422037978068, "grad_norm": 1.230810284614563, "learning_rate": 1.251234093089103e-05, "loss": 0.7514, "step": 9795 }, { "epoch": 1.3099759293928859, "grad_norm": 1.206926703453064, "learning_rate": 1.2510943464303688e-05, "loss": 0.7205, "step": 9796 }, { "epoch": 1.3101096549879647, "grad_norm": 1.2250057458877563, "learning_rate": 1.2509545945378134e-05, "loss": 0.8289, "step": 9797 }, { "epoch": 1.3102433805830436, "grad_norm": 1.0991623401641846, "learning_rate": 1.2508148374143492e-05, "loss": 0.6943, "step": 9798 }, { "epoch": 1.3103771061781224, "grad_norm": 1.1039295196533203, "learning_rate": 1.25067507506289e-05, "loss": 0.6736, "step": 9799 }, { "epoch": 1.3105108317732013, "grad_norm": 1.2919847965240479, "learning_rate": 1.250535307486349e-05, "loss": 0.7658, "step": 9800 }, { "epoch": 1.3106445573682803, "grad_norm": 1.1397085189819336, "learning_rate": 1.2503955346876388e-05, "loss": 0.6968, "step": 9801 }, { "epoch": 1.3107782829633592, "grad_norm": 1.3238701820373535, "learning_rate": 1.2502557566696736e-05, "loss": 0.8343, "step": 9802 }, { "epoch": 1.310912008558438, "grad_norm": 1.286534309387207, "learning_rate": 1.2501159734353665e-05, "loss": 0.7589, "step": 9803 }, { "epoch": 1.311045734153517, "grad_norm": 1.209022045135498, "learning_rate": 1.2499761849876313e-05, "loss": 0.7938, "step": 9804 }, { "epoch": 1.311179459748596, "grad_norm": 1.2577706575393677, "learning_rate": 1.2498363913293817e-05, "loss": 0.7335, "step": 9805 }, { "epoch": 1.3113131853436748, "grad_norm": 1.0877881050109863, "learning_rate": 1.2496965924635314e-05, "loss": 0.6639, "step": 9806 }, { "epoch": 1.3114469109387537, "grad_norm": 1.244131326675415, "learning_rate": 1.2495567883929947e-05, "loss": 0.6619, "step": 9807 }, { "epoch": 1.3115806365338325, "grad_norm": 1.164082407951355, "learning_rate": 1.2494169791206859e-05, "loss": 0.6612, "step": 9808 }, { "epoch": 1.3117143621289116, "grad_norm": 1.091600775718689, "learning_rate": 1.2492771646495184e-05, "loss": 0.6682, "step": 9809 }, { "epoch": 1.3118480877239904, "grad_norm": 1.1382920742034912, "learning_rate": 1.2491373449824072e-05, "loss": 0.6888, "step": 9810 }, { "epoch": 1.3119818133190693, "grad_norm": 1.0887612104415894, "learning_rate": 1.2489975201222662e-05, "loss": 0.6971, "step": 9811 }, { "epoch": 1.3121155389141481, "grad_norm": 1.125855565071106, "learning_rate": 1.2488576900720101e-05, "loss": 0.6909, "step": 9812 }, { "epoch": 1.312249264509227, "grad_norm": 1.3042161464691162, "learning_rate": 1.2487178548345538e-05, "loss": 0.7382, "step": 9813 }, { "epoch": 1.312382990104306, "grad_norm": 0.9865109920501709, "learning_rate": 1.2485780144128116e-05, "loss": 0.5926, "step": 9814 }, { "epoch": 1.3125167156993849, "grad_norm": 1.1443166732788086, "learning_rate": 1.2484381688096988e-05, "loss": 0.6421, "step": 9815 }, { "epoch": 1.3126504412944637, "grad_norm": 1.1826109886169434, "learning_rate": 1.2482983180281302e-05, "loss": 0.7637, "step": 9816 }, { "epoch": 1.3127841668895426, "grad_norm": 1.1959513425827026, "learning_rate": 1.2481584620710203e-05, "loss": 0.7438, "step": 9817 }, { "epoch": 1.3129178924846214, "grad_norm": 1.32578444480896, "learning_rate": 1.248018600941285e-05, "loss": 0.8757, "step": 9818 }, { "epoch": 1.3130516180797005, "grad_norm": 1.0088437795639038, "learning_rate": 1.2478787346418392e-05, "loss": 0.6241, "step": 9819 }, { "epoch": 1.3131853436747793, "grad_norm": 1.1620514392852783, "learning_rate": 1.2477388631755987e-05, "loss": 0.6507, "step": 9820 }, { "epoch": 1.3133190692698582, "grad_norm": 1.1568121910095215, "learning_rate": 1.2475989865454783e-05, "loss": 0.6384, "step": 9821 }, { "epoch": 1.3134527948649373, "grad_norm": 1.160117268562317, "learning_rate": 1.247459104754394e-05, "loss": 0.7374, "step": 9822 }, { "epoch": 1.3135865204600161, "grad_norm": 1.2202023267745972, "learning_rate": 1.2473192178052615e-05, "loss": 0.7731, "step": 9823 }, { "epoch": 1.313720246055095, "grad_norm": 1.1822270154953003, "learning_rate": 1.2471793257009965e-05, "loss": 0.7048, "step": 9824 }, { "epoch": 1.3138539716501738, "grad_norm": 1.2593294382095337, "learning_rate": 1.2470394284445151e-05, "loss": 0.7624, "step": 9825 }, { "epoch": 1.3139876972452527, "grad_norm": 1.3090794086456299, "learning_rate": 1.2468995260387332e-05, "loss": 0.8416, "step": 9826 }, { "epoch": 1.3141214228403317, "grad_norm": 1.183261513710022, "learning_rate": 1.2467596184865669e-05, "loss": 0.721, "step": 9827 }, { "epoch": 1.3142551484354106, "grad_norm": 1.3361262083053589, "learning_rate": 1.2466197057909326e-05, "loss": 0.7182, "step": 9828 }, { "epoch": 1.3143888740304894, "grad_norm": 1.190143346786499, "learning_rate": 1.2464797879547464e-05, "loss": 0.7378, "step": 9829 }, { "epoch": 1.3145225996255683, "grad_norm": 1.142507553100586, "learning_rate": 1.2463398649809246e-05, "loss": 0.7069, "step": 9830 }, { "epoch": 1.3146563252206471, "grad_norm": 1.1049679517745972, "learning_rate": 1.2461999368723843e-05, "loss": 0.7262, "step": 9831 }, { "epoch": 1.3147900508157262, "grad_norm": 1.120949149131775, "learning_rate": 1.2460600036320421e-05, "loss": 0.6741, "step": 9832 }, { "epoch": 1.314923776410805, "grad_norm": 1.3308773040771484, "learning_rate": 1.2459200652628143e-05, "loss": 0.7437, "step": 9833 }, { "epoch": 1.315057502005884, "grad_norm": 1.2362589836120605, "learning_rate": 1.2457801217676182e-05, "loss": 0.6741, "step": 9834 }, { "epoch": 1.3151912276009627, "grad_norm": 1.239372968673706, "learning_rate": 1.2456401731493705e-05, "loss": 0.798, "step": 9835 }, { "epoch": 1.3153249531960416, "grad_norm": 1.1307982206344604, "learning_rate": 1.2455002194109886e-05, "loss": 0.6919, "step": 9836 }, { "epoch": 1.3154586787911207, "grad_norm": 1.173709511756897, "learning_rate": 1.2453602605553894e-05, "loss": 0.7402, "step": 9837 }, { "epoch": 1.3155924043861995, "grad_norm": 1.1248339414596558, "learning_rate": 1.2452202965854905e-05, "loss": 0.7754, "step": 9838 }, { "epoch": 1.3157261299812784, "grad_norm": 1.1756579875946045, "learning_rate": 1.2450803275042092e-05, "loss": 0.7174, "step": 9839 }, { "epoch": 1.3158598555763574, "grad_norm": 1.192704439163208, "learning_rate": 1.2449403533144629e-05, "loss": 0.7065, "step": 9840 }, { "epoch": 1.3159935811714363, "grad_norm": 1.1554477214813232, "learning_rate": 1.2448003740191694e-05, "loss": 0.6468, "step": 9841 }, { "epoch": 1.3161273067665151, "grad_norm": 1.3115088939666748, "learning_rate": 1.2446603896212461e-05, "loss": 0.7548, "step": 9842 }, { "epoch": 1.316261032361594, "grad_norm": 1.1918281316757202, "learning_rate": 1.2445204001236112e-05, "loss": 0.6631, "step": 9843 }, { "epoch": 1.3163947579566728, "grad_norm": 1.0965884923934937, "learning_rate": 1.2443804055291826e-05, "loss": 0.6651, "step": 9844 }, { "epoch": 1.316528483551752, "grad_norm": 1.3044089078903198, "learning_rate": 1.2442404058408784e-05, "loss": 0.7715, "step": 9845 }, { "epoch": 1.3166622091468307, "grad_norm": 1.1114118099212646, "learning_rate": 1.2441004010616165e-05, "loss": 0.6628, "step": 9846 }, { "epoch": 1.3167959347419096, "grad_norm": 1.1898798942565918, "learning_rate": 1.2439603911943152e-05, "loss": 0.6393, "step": 9847 }, { "epoch": 1.3169296603369884, "grad_norm": 1.1954336166381836, "learning_rate": 1.2438203762418934e-05, "loss": 0.6895, "step": 9848 }, { "epoch": 1.3170633859320673, "grad_norm": 1.3290241956710815, "learning_rate": 1.2436803562072687e-05, "loss": 0.7728, "step": 9849 }, { "epoch": 1.3171971115271464, "grad_norm": 1.2889747619628906, "learning_rate": 1.2435403310933606e-05, "loss": 0.7267, "step": 9850 }, { "epoch": 1.3173308371222252, "grad_norm": 1.4065557718276978, "learning_rate": 1.2434003009030869e-05, "loss": 0.7662, "step": 9851 }, { "epoch": 1.317464562717304, "grad_norm": 1.2182893753051758, "learning_rate": 1.2432602656393673e-05, "loss": 0.7468, "step": 9852 }, { "epoch": 1.3175982883123831, "grad_norm": 1.262793779373169, "learning_rate": 1.2431202253051197e-05, "loss": 0.7328, "step": 9853 }, { "epoch": 1.3177320139074618, "grad_norm": 1.2878621816635132, "learning_rate": 1.242980179903264e-05, "loss": 0.7342, "step": 9854 }, { "epoch": 1.3178657395025408, "grad_norm": 1.4076778888702393, "learning_rate": 1.2428401294367189e-05, "loss": 0.691, "step": 9855 }, { "epoch": 1.3179994650976197, "grad_norm": 1.144184947013855, "learning_rate": 1.2427000739084036e-05, "loss": 0.6708, "step": 9856 }, { "epoch": 1.3181331906926985, "grad_norm": 1.481675386428833, "learning_rate": 1.2425600133212377e-05, "loss": 0.7349, "step": 9857 }, { "epoch": 1.3182669162877776, "grad_norm": 1.1425468921661377, "learning_rate": 1.2424199476781403e-05, "loss": 0.635, "step": 9858 }, { "epoch": 1.3184006418828564, "grad_norm": 1.2624248266220093, "learning_rate": 1.242279876982031e-05, "loss": 0.7103, "step": 9859 }, { "epoch": 1.3185343674779353, "grad_norm": 1.2202231884002686, "learning_rate": 1.2421398012358294e-05, "loss": 0.7427, "step": 9860 }, { "epoch": 1.3186680930730141, "grad_norm": 1.3206868171691895, "learning_rate": 1.241999720442456e-05, "loss": 0.7292, "step": 9861 }, { "epoch": 1.318801818668093, "grad_norm": 1.1631275415420532, "learning_rate": 1.2418596346048293e-05, "loss": 0.7164, "step": 9862 }, { "epoch": 1.318935544263172, "grad_norm": 1.165019154548645, "learning_rate": 1.2417195437258697e-05, "loss": 0.7227, "step": 9863 }, { "epoch": 1.319069269858251, "grad_norm": 1.1370221376419067, "learning_rate": 1.2415794478084981e-05, "loss": 0.6549, "step": 9864 }, { "epoch": 1.3192029954533298, "grad_norm": 1.2088977098464966, "learning_rate": 1.2414393468556341e-05, "loss": 0.7154, "step": 9865 }, { "epoch": 1.3193367210484086, "grad_norm": 1.113718867301941, "learning_rate": 1.2412992408701979e-05, "loss": 0.7219, "step": 9866 }, { "epoch": 1.3194704466434874, "grad_norm": 1.08150053024292, "learning_rate": 1.2411591298551096e-05, "loss": 0.6798, "step": 9867 }, { "epoch": 1.3196041722385665, "grad_norm": 1.22684907913208, "learning_rate": 1.2410190138132903e-05, "loss": 0.7019, "step": 9868 }, { "epoch": 1.3197378978336454, "grad_norm": 1.1325994729995728, "learning_rate": 1.24087889274766e-05, "loss": 0.6658, "step": 9869 }, { "epoch": 1.3198716234287242, "grad_norm": 1.0684702396392822, "learning_rate": 1.24073876666114e-05, "loss": 0.7082, "step": 9870 }, { "epoch": 1.3200053490238033, "grad_norm": 1.250662922859192, "learning_rate": 1.2405986355566506e-05, "loss": 0.7856, "step": 9871 }, { "epoch": 1.3201390746188821, "grad_norm": 1.1444483995437622, "learning_rate": 1.2404584994371128e-05, "loss": 0.6737, "step": 9872 }, { "epoch": 1.320272800213961, "grad_norm": 1.1788280010223389, "learning_rate": 1.2403183583054479e-05, "loss": 0.7029, "step": 9873 }, { "epoch": 1.3204065258090398, "grad_norm": 1.2111122608184814, "learning_rate": 1.2401782121645767e-05, "loss": 0.7459, "step": 9874 }, { "epoch": 1.3205402514041187, "grad_norm": 1.0028976202011108, "learning_rate": 1.2400380610174205e-05, "loss": 0.6452, "step": 9875 }, { "epoch": 1.3206739769991978, "grad_norm": 1.227378487586975, "learning_rate": 1.2398979048669002e-05, "loss": 0.7348, "step": 9876 }, { "epoch": 1.3208077025942766, "grad_norm": 1.2527941465377808, "learning_rate": 1.2397577437159383e-05, "loss": 0.781, "step": 9877 }, { "epoch": 1.3209414281893554, "grad_norm": 1.2534083127975464, "learning_rate": 1.2396175775674553e-05, "loss": 0.7482, "step": 9878 }, { "epoch": 1.3210751537844343, "grad_norm": 1.2838736772537231, "learning_rate": 1.2394774064243733e-05, "loss": 0.6907, "step": 9879 }, { "epoch": 1.3212088793795131, "grad_norm": 1.3349031209945679, "learning_rate": 1.2393372302896138e-05, "loss": 0.7969, "step": 9880 }, { "epoch": 1.3213426049745922, "grad_norm": 1.2055402994155884, "learning_rate": 1.2391970491660988e-05, "loss": 0.7507, "step": 9881 }, { "epoch": 1.321476330569671, "grad_norm": 1.1701058149337769, "learning_rate": 1.2390568630567501e-05, "loss": 0.701, "step": 9882 }, { "epoch": 1.32161005616475, "grad_norm": 1.0695701837539673, "learning_rate": 1.2389166719644901e-05, "loss": 0.7477, "step": 9883 }, { "epoch": 1.3217437817598288, "grad_norm": 1.1811188459396362, "learning_rate": 1.2387764758922405e-05, "loss": 0.7071, "step": 9884 }, { "epoch": 1.3218775073549076, "grad_norm": 1.0748162269592285, "learning_rate": 1.2386362748429239e-05, "loss": 0.6892, "step": 9885 }, { "epoch": 1.3220112329499867, "grad_norm": 1.3554185628890991, "learning_rate": 1.2384960688194623e-05, "loss": 0.7351, "step": 9886 }, { "epoch": 1.3221449585450655, "grad_norm": 1.2020564079284668, "learning_rate": 1.2383558578247785e-05, "loss": 0.7176, "step": 9887 }, { "epoch": 1.3222786841401444, "grad_norm": 1.2024348974227905, "learning_rate": 1.2382156418617948e-05, "loss": 0.6737, "step": 9888 }, { "epoch": 1.3224124097352234, "grad_norm": 1.1332571506500244, "learning_rate": 1.238075420933434e-05, "loss": 0.6789, "step": 9889 }, { "epoch": 1.3225461353303023, "grad_norm": 1.180061936378479, "learning_rate": 1.2379351950426188e-05, "loss": 0.744, "step": 9890 }, { "epoch": 1.3226798609253811, "grad_norm": 1.1906039714813232, "learning_rate": 1.2377949641922724e-05, "loss": 0.7487, "step": 9891 }, { "epoch": 1.32281358652046, "grad_norm": 1.2536375522613525, "learning_rate": 1.2376547283853173e-05, "loss": 0.7147, "step": 9892 }, { "epoch": 1.3229473121155388, "grad_norm": 1.2329235076904297, "learning_rate": 1.2375144876246771e-05, "loss": 0.7792, "step": 9893 }, { "epoch": 1.323081037710618, "grad_norm": 1.1096115112304688, "learning_rate": 1.2373742419132744e-05, "loss": 0.6915, "step": 9894 }, { "epoch": 1.3232147633056968, "grad_norm": 1.2646775245666504, "learning_rate": 1.2372339912540326e-05, "loss": 0.7412, "step": 9895 }, { "epoch": 1.3233484889007756, "grad_norm": 1.1520344018936157, "learning_rate": 1.2370937356498756e-05, "loss": 0.7046, "step": 9896 }, { "epoch": 1.3234822144958545, "grad_norm": 1.079518437385559, "learning_rate": 1.2369534751037267e-05, "loss": 0.7025, "step": 9897 }, { "epoch": 1.3236159400909333, "grad_norm": 1.090664267539978, "learning_rate": 1.2368132096185091e-05, "loss": 0.7132, "step": 9898 }, { "epoch": 1.3237496656860124, "grad_norm": 1.2134525775909424, "learning_rate": 1.2366729391971466e-05, "loss": 0.7124, "step": 9899 }, { "epoch": 1.3238833912810912, "grad_norm": 1.0870977640151978, "learning_rate": 1.2365326638425632e-05, "loss": 0.6939, "step": 9900 }, { "epoch": 1.32401711687617, "grad_norm": 1.1053849458694458, "learning_rate": 1.236392383557683e-05, "loss": 0.6478, "step": 9901 }, { "epoch": 1.324150842471249, "grad_norm": 1.1693007946014404, "learning_rate": 1.2362520983454295e-05, "loss": 0.706, "step": 9902 }, { "epoch": 1.3242845680663278, "grad_norm": 1.253507375717163, "learning_rate": 1.2361118082087271e-05, "loss": 0.7167, "step": 9903 }, { "epoch": 1.3244182936614068, "grad_norm": 1.2455089092254639, "learning_rate": 1.2359715131505001e-05, "loss": 0.7083, "step": 9904 }, { "epoch": 1.3245520192564857, "grad_norm": 1.2626858949661255, "learning_rate": 1.235831213173673e-05, "loss": 0.5995, "step": 9905 }, { "epoch": 1.3246857448515645, "grad_norm": 1.2876940965652466, "learning_rate": 1.2356909082811697e-05, "loss": 0.8383, "step": 9906 }, { "epoch": 1.3248194704466436, "grad_norm": 1.0716735124588013, "learning_rate": 1.2355505984759148e-05, "loss": 0.6272, "step": 9907 }, { "epoch": 1.3249531960417225, "grad_norm": 1.3178489208221436, "learning_rate": 1.2354102837608328e-05, "loss": 0.7381, "step": 9908 }, { "epoch": 1.3250869216368013, "grad_norm": 1.1800323724746704, "learning_rate": 1.2352699641388493e-05, "loss": 0.6908, "step": 9909 }, { "epoch": 1.3252206472318802, "grad_norm": 1.3301548957824707, "learning_rate": 1.2351296396128882e-05, "loss": 0.7214, "step": 9910 }, { "epoch": 1.325354372826959, "grad_norm": 1.2469210624694824, "learning_rate": 1.234989310185875e-05, "loss": 0.7731, "step": 9911 }, { "epoch": 1.325488098422038, "grad_norm": 1.2375925779342651, "learning_rate": 1.2348489758607343e-05, "loss": 0.7548, "step": 9912 }, { "epoch": 1.325621824017117, "grad_norm": 1.3275505304336548, "learning_rate": 1.2347086366403916e-05, "loss": 0.7618, "step": 9913 }, { "epoch": 1.3257555496121958, "grad_norm": 1.0136315822601318, "learning_rate": 1.2345682925277716e-05, "loss": 0.6206, "step": 9914 }, { "epoch": 1.3258892752072746, "grad_norm": 1.0719951391220093, "learning_rate": 1.2344279435258003e-05, "loss": 0.6523, "step": 9915 }, { "epoch": 1.3260230008023535, "grad_norm": 1.3816823959350586, "learning_rate": 1.2342875896374028e-05, "loss": 0.7175, "step": 9916 }, { "epoch": 1.3261567263974325, "grad_norm": 1.1184625625610352, "learning_rate": 1.2341472308655047e-05, "loss": 0.6774, "step": 9917 }, { "epoch": 1.3262904519925114, "grad_norm": 1.2849780321121216, "learning_rate": 1.2340068672130315e-05, "loss": 0.7036, "step": 9918 }, { "epoch": 1.3264241775875902, "grad_norm": 1.1214709281921387, "learning_rate": 1.2338664986829092e-05, "loss": 0.6618, "step": 9919 }, { "epoch": 1.326557903182669, "grad_norm": 1.1513007879257202, "learning_rate": 1.2337261252780632e-05, "loss": 0.6743, "step": 9920 }, { "epoch": 1.326691628777748, "grad_norm": 1.2045342922210693, "learning_rate": 1.23358574700142e-05, "loss": 0.6844, "step": 9921 }, { "epoch": 1.326825354372827, "grad_norm": 1.112687110900879, "learning_rate": 1.2334453638559057e-05, "loss": 0.7418, "step": 9922 }, { "epoch": 1.3269590799679059, "grad_norm": 1.3229628801345825, "learning_rate": 1.2333049758444457e-05, "loss": 0.7144, "step": 9923 }, { "epoch": 1.3270928055629847, "grad_norm": 1.1406444311141968, "learning_rate": 1.233164582969967e-05, "loss": 0.6736, "step": 9924 }, { "epoch": 1.3272265311580638, "grad_norm": 1.2382177114486694, "learning_rate": 1.2330241852353959e-05, "loss": 0.702, "step": 9925 }, { "epoch": 1.3273602567531426, "grad_norm": 1.1028498411178589, "learning_rate": 1.2328837826436581e-05, "loss": 0.7194, "step": 9926 }, { "epoch": 1.3274939823482215, "grad_norm": 1.2120846509933472, "learning_rate": 1.232743375197681e-05, "loss": 0.7987, "step": 9927 }, { "epoch": 1.3276277079433003, "grad_norm": 1.3482658863067627, "learning_rate": 1.2326029629003908e-05, "loss": 0.812, "step": 9928 }, { "epoch": 1.3277614335383792, "grad_norm": 1.215316653251648, "learning_rate": 1.2324625457547148e-05, "loss": 0.7148, "step": 9929 }, { "epoch": 1.3278951591334582, "grad_norm": 1.3712847232818604, "learning_rate": 1.2323221237635791e-05, "loss": 0.7102, "step": 9930 }, { "epoch": 1.328028884728537, "grad_norm": 1.1967591047286987, "learning_rate": 1.2321816969299112e-05, "loss": 0.6588, "step": 9931 }, { "epoch": 1.328162610323616, "grad_norm": 1.1134616136550903, "learning_rate": 1.2320412652566377e-05, "loss": 0.6579, "step": 9932 }, { "epoch": 1.3282963359186948, "grad_norm": 1.2588043212890625, "learning_rate": 1.2319008287466865e-05, "loss": 0.6781, "step": 9933 }, { "epoch": 1.3284300615137736, "grad_norm": 1.054167628288269, "learning_rate": 1.2317603874029843e-05, "loss": 0.704, "step": 9934 }, { "epoch": 1.3285637871088527, "grad_norm": 1.2699534893035889, "learning_rate": 1.2316199412284584e-05, "loss": 0.7859, "step": 9935 }, { "epoch": 1.3286975127039315, "grad_norm": 1.1028108596801758, "learning_rate": 1.2314794902260368e-05, "loss": 0.6493, "step": 9936 }, { "epoch": 1.3288312382990104, "grad_norm": 1.266394019126892, "learning_rate": 1.2313390343986467e-05, "loss": 0.69, "step": 9937 }, { "epoch": 1.3289649638940892, "grad_norm": 1.08072030544281, "learning_rate": 1.2311985737492155e-05, "loss": 0.6659, "step": 9938 }, { "epoch": 1.329098689489168, "grad_norm": 1.2309006452560425, "learning_rate": 1.2310581082806713e-05, "loss": 0.7271, "step": 9939 }, { "epoch": 1.3292324150842472, "grad_norm": 1.2696044445037842, "learning_rate": 1.2309176379959417e-05, "loss": 0.8177, "step": 9940 }, { "epoch": 1.329366140679326, "grad_norm": 1.214389443397522, "learning_rate": 1.2307771628979555e-05, "loss": 0.7734, "step": 9941 }, { "epoch": 1.3294998662744049, "grad_norm": 1.1185178756713867, "learning_rate": 1.2306366829896398e-05, "loss": 0.6796, "step": 9942 }, { "epoch": 1.329633591869484, "grad_norm": 1.2123051881790161, "learning_rate": 1.2304961982739235e-05, "loss": 0.6372, "step": 9943 }, { "epoch": 1.3297673174645628, "grad_norm": 1.0923655033111572, "learning_rate": 1.2303557087537341e-05, "loss": 0.6825, "step": 9944 }, { "epoch": 1.3299010430596416, "grad_norm": 1.4313167333602905, "learning_rate": 1.2302152144320005e-05, "loss": 0.7774, "step": 9945 }, { "epoch": 1.3300347686547205, "grad_norm": 1.140202283859253, "learning_rate": 1.230074715311651e-05, "loss": 0.6922, "step": 9946 }, { "epoch": 1.3301684942497993, "grad_norm": 1.2694644927978516, "learning_rate": 1.2299342113956143e-05, "loss": 0.7476, "step": 9947 }, { "epoch": 1.3303022198448784, "grad_norm": 1.143731951713562, "learning_rate": 1.229793702686819e-05, "loss": 0.6552, "step": 9948 }, { "epoch": 1.3304359454399572, "grad_norm": 1.2158550024032593, "learning_rate": 1.2296531891881937e-05, "loss": 0.6604, "step": 9949 }, { "epoch": 1.330569671035036, "grad_norm": 1.205003023147583, "learning_rate": 1.2295126709026679e-05, "loss": 0.6641, "step": 9950 }, { "epoch": 1.330703396630115, "grad_norm": 1.2294111251831055, "learning_rate": 1.2293721478331695e-05, "loss": 0.6911, "step": 9951 }, { "epoch": 1.3308371222251938, "grad_norm": 1.201937198638916, "learning_rate": 1.2292316199826285e-05, "loss": 0.7307, "step": 9952 }, { "epoch": 1.3309708478202729, "grad_norm": 1.223040223121643, "learning_rate": 1.2290910873539734e-05, "loss": 0.6972, "step": 9953 }, { "epoch": 1.3311045734153517, "grad_norm": 1.185958743095398, "learning_rate": 1.2289505499501341e-05, "loss": 0.6867, "step": 9954 }, { "epoch": 1.3312382990104306, "grad_norm": 1.09072744846344, "learning_rate": 1.2288100077740398e-05, "loss": 0.7329, "step": 9955 }, { "epoch": 1.3313720246055096, "grad_norm": 1.4063736200332642, "learning_rate": 1.2286694608286197e-05, "loss": 0.7516, "step": 9956 }, { "epoch": 1.3315057502005883, "grad_norm": 1.1567846536636353, "learning_rate": 1.2285289091168034e-05, "loss": 0.6729, "step": 9957 }, { "epoch": 1.3316394757956673, "grad_norm": 1.10630464553833, "learning_rate": 1.2283883526415208e-05, "loss": 0.7302, "step": 9958 }, { "epoch": 1.3317732013907462, "grad_norm": 1.1214838027954102, "learning_rate": 1.2282477914057011e-05, "loss": 0.699, "step": 9959 }, { "epoch": 1.331906926985825, "grad_norm": 1.1347246170043945, "learning_rate": 1.228107225412275e-05, "loss": 0.6998, "step": 9960 }, { "epoch": 1.332040652580904, "grad_norm": 1.2258824110031128, "learning_rate": 1.227966654664172e-05, "loss": 0.7044, "step": 9961 }, { "epoch": 1.332174378175983, "grad_norm": 1.1314369440078735, "learning_rate": 1.2278260791643225e-05, "loss": 0.717, "step": 9962 }, { "epoch": 1.3323081037710618, "grad_norm": 1.336971640586853, "learning_rate": 1.2276854989156562e-05, "loss": 0.7743, "step": 9963 }, { "epoch": 1.3324418293661406, "grad_norm": 1.2942824363708496, "learning_rate": 1.2275449139211034e-05, "loss": 0.7857, "step": 9964 }, { "epoch": 1.3325755549612195, "grad_norm": 1.0937530994415283, "learning_rate": 1.2274043241835944e-05, "loss": 0.6634, "step": 9965 }, { "epoch": 1.3327092805562986, "grad_norm": 1.2330195903778076, "learning_rate": 1.2272637297060604e-05, "loss": 0.7603, "step": 9966 }, { "epoch": 1.3328430061513774, "grad_norm": 1.3019753694534302, "learning_rate": 1.227123130491431e-05, "loss": 0.772, "step": 9967 }, { "epoch": 1.3329767317464563, "grad_norm": 1.2161818742752075, "learning_rate": 1.2269825265426374e-05, "loss": 0.7319, "step": 9968 }, { "epoch": 1.333110457341535, "grad_norm": 1.2585318088531494, "learning_rate": 1.2268419178626104e-05, "loss": 0.7614, "step": 9969 }, { "epoch": 1.333244182936614, "grad_norm": 1.1786071062088013, "learning_rate": 1.2267013044542807e-05, "loss": 0.7228, "step": 9970 }, { "epoch": 1.333377908531693, "grad_norm": 1.220621943473816, "learning_rate": 1.226560686320579e-05, "loss": 0.6908, "step": 9971 }, { "epoch": 1.3335116341267719, "grad_norm": 1.1607320308685303, "learning_rate": 1.2264200634644366e-05, "loss": 0.6999, "step": 9972 }, { "epoch": 1.3336453597218507, "grad_norm": 1.3242387771606445, "learning_rate": 1.2262794358887847e-05, "loss": 0.8436, "step": 9973 }, { "epoch": 1.3337790853169298, "grad_norm": 1.1421105861663818, "learning_rate": 1.2261388035965544e-05, "loss": 0.7423, "step": 9974 }, { "epoch": 1.3339128109120086, "grad_norm": 1.1449205875396729, "learning_rate": 1.2259981665906774e-05, "loss": 0.7147, "step": 9975 }, { "epoch": 1.3340465365070875, "grad_norm": 1.1216039657592773, "learning_rate": 1.2258575248740847e-05, "loss": 0.7055, "step": 9976 }, { "epoch": 1.3341802621021663, "grad_norm": 1.251924991607666, "learning_rate": 1.225716878449708e-05, "loss": 0.7334, "step": 9977 }, { "epoch": 1.3343139876972452, "grad_norm": 1.2540733814239502, "learning_rate": 1.2255762273204788e-05, "loss": 0.6734, "step": 9978 }, { "epoch": 1.3344477132923243, "grad_norm": 1.111703872680664, "learning_rate": 1.2254355714893293e-05, "loss": 0.6457, "step": 9979 }, { "epoch": 1.334581438887403, "grad_norm": 1.0919688940048218, "learning_rate": 1.2252949109591908e-05, "loss": 0.7308, "step": 9980 }, { "epoch": 1.334715164482482, "grad_norm": 1.1726315021514893, "learning_rate": 1.2251542457329957e-05, "loss": 0.7669, "step": 9981 }, { "epoch": 1.3348488900775608, "grad_norm": 1.1115158796310425, "learning_rate": 1.2250135758136757e-05, "loss": 0.6425, "step": 9982 }, { "epoch": 1.3349826156726396, "grad_norm": 1.1724599599838257, "learning_rate": 1.224872901204163e-05, "loss": 0.8132, "step": 9983 }, { "epoch": 1.3351163412677187, "grad_norm": 1.1603496074676514, "learning_rate": 1.2247322219073898e-05, "loss": 0.7509, "step": 9984 }, { "epoch": 1.3352500668627976, "grad_norm": 1.2477036714553833, "learning_rate": 1.2245915379262885e-05, "loss": 0.7034, "step": 9985 }, { "epoch": 1.3353837924578764, "grad_norm": 1.1882050037384033, "learning_rate": 1.2244508492637914e-05, "loss": 0.7213, "step": 9986 }, { "epoch": 1.3355175180529553, "grad_norm": 1.2126102447509766, "learning_rate": 1.2243101559228313e-05, "loss": 0.6487, "step": 9987 }, { "epoch": 1.3356512436480341, "grad_norm": 1.1055957078933716, "learning_rate": 1.2241694579063407e-05, "loss": 0.7488, "step": 9988 }, { "epoch": 1.3357849692431132, "grad_norm": 1.0847002267837524, "learning_rate": 1.2240287552172521e-05, "loss": 0.691, "step": 9989 }, { "epoch": 1.335918694838192, "grad_norm": 1.087023138999939, "learning_rate": 1.2238880478584987e-05, "loss": 0.7366, "step": 9990 }, { "epoch": 1.3360524204332709, "grad_norm": 1.242891550064087, "learning_rate": 1.2237473358330128e-05, "loss": 0.7638, "step": 9991 }, { "epoch": 1.33618614602835, "grad_norm": 1.1691824197769165, "learning_rate": 1.223606619143728e-05, "loss": 0.723, "step": 9992 }, { "epoch": 1.3363198716234288, "grad_norm": 1.1096733808517456, "learning_rate": 1.2234658977935772e-05, "loss": 0.7504, "step": 9993 }, { "epoch": 1.3364535972185076, "grad_norm": 1.083448886871338, "learning_rate": 1.2233251717854937e-05, "loss": 0.624, "step": 9994 }, { "epoch": 1.3365873228135865, "grad_norm": 1.219514012336731, "learning_rate": 1.2231844411224105e-05, "loss": 0.7592, "step": 9995 }, { "epoch": 1.3367210484086653, "grad_norm": 1.1211715936660767, "learning_rate": 1.2230437058072613e-05, "loss": 0.7118, "step": 9996 }, { "epoch": 1.3368547740037444, "grad_norm": 1.1520240306854248, "learning_rate": 1.2229029658429795e-05, "loss": 0.7098, "step": 9997 }, { "epoch": 1.3369884995988233, "grad_norm": 1.2420533895492554, "learning_rate": 1.2227622212324985e-05, "loss": 0.7353, "step": 9998 }, { "epoch": 1.3371222251939021, "grad_norm": 1.287726640701294, "learning_rate": 1.2226214719787524e-05, "loss": 0.7306, "step": 9999 }, { "epoch": 1.337255950788981, "grad_norm": 1.2552980184555054, "learning_rate": 1.2224807180846745e-05, "loss": 0.7163, "step": 10000 }, { "epoch": 1.3373896763840598, "grad_norm": 1.126163363456726, "learning_rate": 1.222339959553199e-05, "loss": 0.6433, "step": 10001 }, { "epoch": 1.3375234019791389, "grad_norm": 1.1581871509552002, "learning_rate": 1.2221991963872599e-05, "loss": 0.6508, "step": 10002 }, { "epoch": 1.3376571275742177, "grad_norm": 1.2027941942214966, "learning_rate": 1.2220584285897912e-05, "loss": 0.7462, "step": 10003 }, { "epoch": 1.3377908531692966, "grad_norm": 1.0569887161254883, "learning_rate": 1.2219176561637267e-05, "loss": 0.7032, "step": 10004 }, { "epoch": 1.3379245787643754, "grad_norm": 1.1866272687911987, "learning_rate": 1.2217768791120012e-05, "loss": 0.7276, "step": 10005 }, { "epoch": 1.3380583043594543, "grad_norm": 1.1435871124267578, "learning_rate": 1.2216360974375492e-05, "loss": 0.7032, "step": 10006 }, { "epoch": 1.3381920299545333, "grad_norm": 1.259946584701538, "learning_rate": 1.2214953111433046e-05, "loss": 0.6677, "step": 10007 }, { "epoch": 1.3383257555496122, "grad_norm": 1.1676573753356934, "learning_rate": 1.2213545202322021e-05, "loss": 0.7667, "step": 10008 }, { "epoch": 1.338459481144691, "grad_norm": 1.0351556539535522, "learning_rate": 1.2212137247071764e-05, "loss": 0.6765, "step": 10009 }, { "epoch": 1.3385932067397701, "grad_norm": 1.1760621070861816, "learning_rate": 1.2210729245711623e-05, "loss": 0.6837, "step": 10010 }, { "epoch": 1.338726932334849, "grad_norm": 1.3379417657852173, "learning_rate": 1.2209321198270947e-05, "loss": 0.7661, "step": 10011 }, { "epoch": 1.3388606579299278, "grad_norm": 1.2333307266235352, "learning_rate": 1.2207913104779086e-05, "loss": 0.6646, "step": 10012 }, { "epoch": 1.3389943835250067, "grad_norm": 1.216335415840149, "learning_rate": 1.2206504965265387e-05, "loss": 0.7808, "step": 10013 }, { "epoch": 1.3391281091200855, "grad_norm": 1.2376459836959839, "learning_rate": 1.2205096779759207e-05, "loss": 0.7606, "step": 10014 }, { "epoch": 1.3392618347151646, "grad_norm": 1.0902397632598877, "learning_rate": 1.2203688548289892e-05, "loss": 0.6747, "step": 10015 }, { "epoch": 1.3393955603102434, "grad_norm": 1.3385555744171143, "learning_rate": 1.2202280270886797e-05, "loss": 0.6388, "step": 10016 }, { "epoch": 1.3395292859053223, "grad_norm": 1.2003036737442017, "learning_rate": 1.2200871947579278e-05, "loss": 0.7357, "step": 10017 }, { "epoch": 1.3396630115004011, "grad_norm": 1.1177383661270142, "learning_rate": 1.2199463578396688e-05, "loss": 0.7085, "step": 10018 }, { "epoch": 1.33979673709548, "grad_norm": 1.393141508102417, "learning_rate": 1.2198055163368386e-05, "loss": 0.7323, "step": 10019 }, { "epoch": 1.339930462690559, "grad_norm": 1.1044131517410278, "learning_rate": 1.2196646702523726e-05, "loss": 0.7566, "step": 10020 }, { "epoch": 1.340064188285638, "grad_norm": 1.273234248161316, "learning_rate": 1.219523819589207e-05, "loss": 0.764, "step": 10021 }, { "epoch": 1.3401979138807167, "grad_norm": 1.2831190824508667, "learning_rate": 1.2193829643502774e-05, "loss": 0.7115, "step": 10022 }, { "epoch": 1.3403316394757956, "grad_norm": 1.1941145658493042, "learning_rate": 1.2192421045385194e-05, "loss": 0.711, "step": 10023 }, { "epoch": 1.3404653650708744, "grad_norm": 1.0071264505386353, "learning_rate": 1.2191012401568698e-05, "loss": 0.6609, "step": 10024 }, { "epoch": 1.3405990906659535, "grad_norm": 1.3105251789093018, "learning_rate": 1.2189603712082648e-05, "loss": 0.7499, "step": 10025 }, { "epoch": 1.3407328162610324, "grad_norm": 1.1234333515167236, "learning_rate": 1.21881949769564e-05, "loss": 0.6558, "step": 10026 }, { "epoch": 1.3408665418561112, "grad_norm": 1.0120511054992676, "learning_rate": 1.2186786196219324e-05, "loss": 0.5798, "step": 10027 }, { "epoch": 1.3410002674511903, "grad_norm": 1.2481564283370972, "learning_rate": 1.2185377369900781e-05, "loss": 0.7194, "step": 10028 }, { "epoch": 1.3411339930462691, "grad_norm": 1.1985450983047485, "learning_rate": 1.2183968498030138e-05, "loss": 0.7642, "step": 10029 }, { "epoch": 1.341267718641348, "grad_norm": 1.221891164779663, "learning_rate": 1.218255958063676e-05, "loss": 0.7396, "step": 10030 }, { "epoch": 1.3414014442364268, "grad_norm": 1.2291136980056763, "learning_rate": 1.218115061775002e-05, "loss": 0.7816, "step": 10031 }, { "epoch": 1.3415351698315057, "grad_norm": 1.200584053993225, "learning_rate": 1.2179741609399279e-05, "loss": 0.6409, "step": 10032 }, { "epoch": 1.3416688954265847, "grad_norm": 1.3587855100631714, "learning_rate": 1.217833255561391e-05, "loss": 0.8773, "step": 10033 }, { "epoch": 1.3418026210216636, "grad_norm": 1.1163244247436523, "learning_rate": 1.2176923456423283e-05, "loss": 0.6939, "step": 10034 }, { "epoch": 1.3419363466167424, "grad_norm": 1.0339083671569824, "learning_rate": 1.2175514311856776e-05, "loss": 0.6379, "step": 10035 }, { "epoch": 1.3420700722118213, "grad_norm": 1.0852417945861816, "learning_rate": 1.2174105121943748e-05, "loss": 0.7052, "step": 10036 }, { "epoch": 1.3422037978069001, "grad_norm": 1.2457945346832275, "learning_rate": 1.2172695886713579e-05, "loss": 0.7501, "step": 10037 }, { "epoch": 1.3423375234019792, "grad_norm": 1.1165032386779785, "learning_rate": 1.2171286606195644e-05, "loss": 0.716, "step": 10038 }, { "epoch": 1.342471248997058, "grad_norm": 1.2431137561798096, "learning_rate": 1.2169877280419323e-05, "loss": 0.7444, "step": 10039 }, { "epoch": 1.342604974592137, "grad_norm": 1.134765386581421, "learning_rate": 1.2168467909413981e-05, "loss": 0.7077, "step": 10040 }, { "epoch": 1.3427387001872158, "grad_norm": 1.2699694633483887, "learning_rate": 1.2167058493209e-05, "loss": 0.7395, "step": 10041 }, { "epoch": 1.3428724257822946, "grad_norm": 1.3371946811676025, "learning_rate": 1.2165649031833761e-05, "loss": 0.7448, "step": 10042 }, { "epoch": 1.3430061513773737, "grad_norm": 1.2126374244689941, "learning_rate": 1.2164239525317641e-05, "loss": 0.7252, "step": 10043 }, { "epoch": 1.3431398769724525, "grad_norm": 1.342572808265686, "learning_rate": 1.2162829973690015e-05, "loss": 0.6703, "step": 10044 }, { "epoch": 1.3432736025675314, "grad_norm": 1.1401748657226562, "learning_rate": 1.2161420376980272e-05, "loss": 0.7153, "step": 10045 }, { "epoch": 1.3434073281626104, "grad_norm": 1.2594683170318604, "learning_rate": 1.2160010735217786e-05, "loss": 0.7551, "step": 10046 }, { "epoch": 1.3435410537576893, "grad_norm": 1.3072881698608398, "learning_rate": 1.2158601048431946e-05, "loss": 0.772, "step": 10047 }, { "epoch": 1.3436747793527681, "grad_norm": 1.0567703247070312, "learning_rate": 1.215719131665213e-05, "loss": 0.6808, "step": 10048 }, { "epoch": 1.343808504947847, "grad_norm": 1.2708537578582764, "learning_rate": 1.2155781539907728e-05, "loss": 0.657, "step": 10049 }, { "epoch": 1.3439422305429258, "grad_norm": 1.1334589719772339, "learning_rate": 1.2154371718228119e-05, "loss": 0.6643, "step": 10050 }, { "epoch": 1.344075956138005, "grad_norm": 1.1364762783050537, "learning_rate": 1.2152961851642697e-05, "loss": 0.7279, "step": 10051 }, { "epoch": 1.3442096817330837, "grad_norm": 1.359859824180603, "learning_rate": 1.2151551940180844e-05, "loss": 0.7658, "step": 10052 }, { "epoch": 1.3443434073281626, "grad_norm": 1.2539238929748535, "learning_rate": 1.2150141983871948e-05, "loss": 0.8151, "step": 10053 }, { "epoch": 1.3444771329232414, "grad_norm": 1.0668305158615112, "learning_rate": 1.21487319827454e-05, "loss": 0.7452, "step": 10054 }, { "epoch": 1.3446108585183203, "grad_norm": 1.2387019395828247, "learning_rate": 1.2147321936830592e-05, "loss": 0.6928, "step": 10055 }, { "epoch": 1.3447445841133994, "grad_norm": 1.1377508640289307, "learning_rate": 1.2145911846156912e-05, "loss": 0.6606, "step": 10056 }, { "epoch": 1.3448783097084782, "grad_norm": 1.2309672832489014, "learning_rate": 1.2144501710753753e-05, "loss": 0.7529, "step": 10057 }, { "epoch": 1.345012035303557, "grad_norm": 1.160562515258789, "learning_rate": 1.2143091530650508e-05, "loss": 0.6986, "step": 10058 }, { "epoch": 1.3451457608986361, "grad_norm": 1.0796853303909302, "learning_rate": 1.2141681305876571e-05, "loss": 0.6901, "step": 10059 }, { "epoch": 1.3452794864937148, "grad_norm": 1.256422519683838, "learning_rate": 1.2140271036461338e-05, "loss": 0.8194, "step": 10060 }, { "epoch": 1.3454132120887938, "grad_norm": 1.0610862970352173, "learning_rate": 1.21388607224342e-05, "loss": 0.6154, "step": 10061 }, { "epoch": 1.3455469376838727, "grad_norm": 1.1614086627960205, "learning_rate": 1.213745036382456e-05, "loss": 0.71, "step": 10062 }, { "epoch": 1.3456806632789515, "grad_norm": 1.119407057762146, "learning_rate": 1.213603996066181e-05, "loss": 0.746, "step": 10063 }, { "epoch": 1.3458143888740306, "grad_norm": 1.2506989240646362, "learning_rate": 1.2134629512975352e-05, "loss": 0.7542, "step": 10064 }, { "epoch": 1.3459481144691094, "grad_norm": 1.027877688407898, "learning_rate": 1.2133219020794584e-05, "loss": 0.6634, "step": 10065 }, { "epoch": 1.3460818400641883, "grad_norm": 1.1118401288986206, "learning_rate": 1.2131808484148906e-05, "loss": 0.6316, "step": 10066 }, { "epoch": 1.3462155656592671, "grad_norm": 1.125792145729065, "learning_rate": 1.2130397903067722e-05, "loss": 0.6814, "step": 10067 }, { "epoch": 1.346349291254346, "grad_norm": 1.089645504951477, "learning_rate": 1.2128987277580433e-05, "loss": 0.715, "step": 10068 }, { "epoch": 1.346483016849425, "grad_norm": 1.1538852453231812, "learning_rate": 1.2127576607716436e-05, "loss": 0.7558, "step": 10069 }, { "epoch": 1.346616742444504, "grad_norm": 1.2567024230957031, "learning_rate": 1.2126165893505144e-05, "loss": 0.75, "step": 10070 }, { "epoch": 1.3467504680395828, "grad_norm": 1.1922539472579956, "learning_rate": 1.212475513497596e-05, "loss": 0.6881, "step": 10071 }, { "epoch": 1.3468841936346616, "grad_norm": 1.1519092321395874, "learning_rate": 1.2123344332158288e-05, "loss": 0.6454, "step": 10072 }, { "epoch": 1.3470179192297405, "grad_norm": 1.2882055044174194, "learning_rate": 1.2121933485081536e-05, "loss": 0.7817, "step": 10073 }, { "epoch": 1.3471516448248195, "grad_norm": 1.1061348915100098, "learning_rate": 1.2120522593775108e-05, "loss": 0.701, "step": 10074 }, { "epoch": 1.3472853704198984, "grad_norm": 1.2565011978149414, "learning_rate": 1.2119111658268417e-05, "loss": 0.8133, "step": 10075 }, { "epoch": 1.3474190960149772, "grad_norm": 1.1008533239364624, "learning_rate": 1.2117700678590872e-05, "loss": 0.7085, "step": 10076 }, { "epoch": 1.3475528216100563, "grad_norm": 1.347006916999817, "learning_rate": 1.211628965477188e-05, "loss": 0.7531, "step": 10077 }, { "epoch": 1.3476865472051351, "grad_norm": 1.2499759197235107, "learning_rate": 1.2114878586840856e-05, "loss": 0.6955, "step": 10078 }, { "epoch": 1.347820272800214, "grad_norm": 1.1910834312438965, "learning_rate": 1.2113467474827217e-05, "loss": 0.8189, "step": 10079 }, { "epoch": 1.3479539983952928, "grad_norm": 1.201168179512024, "learning_rate": 1.2112056318760365e-05, "loss": 0.7567, "step": 10080 }, { "epoch": 1.3480877239903717, "grad_norm": 1.2161935567855835, "learning_rate": 1.2110645118669725e-05, "loss": 0.6889, "step": 10081 }, { "epoch": 1.3482214495854508, "grad_norm": 1.3054873943328857, "learning_rate": 1.21092338745847e-05, "loss": 0.7523, "step": 10082 }, { "epoch": 1.3483551751805296, "grad_norm": 1.0133250951766968, "learning_rate": 1.2107822586534718e-05, "loss": 0.6337, "step": 10083 }, { "epoch": 1.3484889007756085, "grad_norm": 1.2997405529022217, "learning_rate": 1.2106411254549191e-05, "loss": 0.7759, "step": 10084 }, { "epoch": 1.3486226263706873, "grad_norm": 1.143696665763855, "learning_rate": 1.2104999878657535e-05, "loss": 0.6414, "step": 10085 }, { "epoch": 1.3487563519657662, "grad_norm": 1.2062244415283203, "learning_rate": 1.2103588458889174e-05, "loss": 0.6562, "step": 10086 }, { "epoch": 1.3488900775608452, "grad_norm": 1.1555273532867432, "learning_rate": 1.2102176995273522e-05, "loss": 0.779, "step": 10087 }, { "epoch": 1.349023803155924, "grad_norm": 1.207604169845581, "learning_rate": 1.210076548784e-05, "loss": 0.6208, "step": 10088 }, { "epoch": 1.349157528751003, "grad_norm": 1.211506724357605, "learning_rate": 1.2099353936618035e-05, "loss": 0.7747, "step": 10089 }, { "epoch": 1.3492912543460818, "grad_norm": 1.1382546424865723, "learning_rate": 1.2097942341637046e-05, "loss": 0.666, "step": 10090 }, { "epoch": 1.3494249799411606, "grad_norm": 1.4062260389328003, "learning_rate": 1.2096530702926457e-05, "loss": 0.8604, "step": 10091 }, { "epoch": 1.3495587055362397, "grad_norm": 1.2554432153701782, "learning_rate": 1.2095119020515691e-05, "loss": 0.7143, "step": 10092 }, { "epoch": 1.3496924311313185, "grad_norm": 1.175278663635254, "learning_rate": 1.2093707294434172e-05, "loss": 0.6458, "step": 10093 }, { "epoch": 1.3498261567263974, "grad_norm": 1.2217447757720947, "learning_rate": 1.2092295524711331e-05, "loss": 0.7555, "step": 10094 }, { "epoch": 1.3499598823214765, "grad_norm": 1.1111235618591309, "learning_rate": 1.2090883711376589e-05, "loss": 0.679, "step": 10095 }, { "epoch": 1.3500936079165553, "grad_norm": 1.062126636505127, "learning_rate": 1.2089471854459375e-05, "loss": 0.6631, "step": 10096 }, { "epoch": 1.3502273335116342, "grad_norm": 1.0881530046463013, "learning_rate": 1.2088059953989124e-05, "loss": 0.7416, "step": 10097 }, { "epoch": 1.350361059106713, "grad_norm": 1.3496240377426147, "learning_rate": 1.2086648009995258e-05, "loss": 0.7947, "step": 10098 }, { "epoch": 1.3504947847017919, "grad_norm": 1.121634840965271, "learning_rate": 1.2085236022507216e-05, "loss": 0.667, "step": 10099 }, { "epoch": 1.350628510296871, "grad_norm": 1.3094840049743652, "learning_rate": 1.2083823991554423e-05, "loss": 0.6922, "step": 10100 }, { "epoch": 1.3507622358919498, "grad_norm": 1.2325226068496704, "learning_rate": 1.2082411917166308e-05, "loss": 0.7694, "step": 10101 }, { "epoch": 1.3508959614870286, "grad_norm": 1.30870521068573, "learning_rate": 1.208099979937231e-05, "loss": 0.7827, "step": 10102 }, { "epoch": 1.3510296870821075, "grad_norm": 1.1953870058059692, "learning_rate": 1.2079587638201868e-05, "loss": 0.6442, "step": 10103 }, { "epoch": 1.3511634126771863, "grad_norm": 1.248350739479065, "learning_rate": 1.2078175433684407e-05, "loss": 0.7573, "step": 10104 }, { "epoch": 1.3512971382722654, "grad_norm": 1.270564079284668, "learning_rate": 1.2076763185849369e-05, "loss": 0.7686, "step": 10105 }, { "epoch": 1.3514308638673442, "grad_norm": 1.2915006875991821, "learning_rate": 1.207535089472619e-05, "loss": 0.8006, "step": 10106 }, { "epoch": 1.351564589462423, "grad_norm": 1.159336805343628, "learning_rate": 1.2073938560344308e-05, "loss": 0.7342, "step": 10107 }, { "epoch": 1.351698315057502, "grad_norm": 1.2063605785369873, "learning_rate": 1.207252618273316e-05, "loss": 0.6829, "step": 10108 }, { "epoch": 1.3518320406525808, "grad_norm": 1.2224453687667847, "learning_rate": 1.2071113761922187e-05, "loss": 0.7762, "step": 10109 }, { "epoch": 1.3519657662476599, "grad_norm": 1.2698389291763306, "learning_rate": 1.206970129794083e-05, "loss": 0.7375, "step": 10110 }, { "epoch": 1.3520994918427387, "grad_norm": 1.2487092018127441, "learning_rate": 1.206828879081853e-05, "loss": 0.6884, "step": 10111 }, { "epoch": 1.3522332174378175, "grad_norm": 1.1894068717956543, "learning_rate": 1.206687624058473e-05, "loss": 0.6999, "step": 10112 }, { "epoch": 1.3523669430328966, "grad_norm": 1.1236110925674438, "learning_rate": 1.2065463647268872e-05, "loss": 0.6796, "step": 10113 }, { "epoch": 1.3525006686279755, "grad_norm": 1.1820405721664429, "learning_rate": 1.2064051010900397e-05, "loss": 0.7304, "step": 10114 }, { "epoch": 1.3526343942230543, "grad_norm": 1.167473316192627, "learning_rate": 1.2062638331508757e-05, "loss": 0.7668, "step": 10115 }, { "epoch": 1.3527681198181332, "grad_norm": 1.275689721107483, "learning_rate": 1.2061225609123397e-05, "loss": 0.7662, "step": 10116 }, { "epoch": 1.352901845413212, "grad_norm": 1.28383469581604, "learning_rate": 1.205981284377376e-05, "loss": 0.7624, "step": 10117 }, { "epoch": 1.353035571008291, "grad_norm": 1.2344011068344116, "learning_rate": 1.2058400035489293e-05, "loss": 0.7694, "step": 10118 }, { "epoch": 1.35316929660337, "grad_norm": 1.1365541219711304, "learning_rate": 1.2056987184299449e-05, "loss": 0.6648, "step": 10119 }, { "epoch": 1.3533030221984488, "grad_norm": 1.1914833784103394, "learning_rate": 1.2055574290233673e-05, "loss": 0.7238, "step": 10120 }, { "epoch": 1.3534367477935276, "grad_norm": 1.2482595443725586, "learning_rate": 1.205416135332142e-05, "loss": 0.7894, "step": 10121 }, { "epoch": 1.3535704733886065, "grad_norm": 1.226043701171875, "learning_rate": 1.205274837359214e-05, "loss": 0.7372, "step": 10122 }, { "epoch": 1.3537041989836855, "grad_norm": 1.1266580820083618, "learning_rate": 1.2051335351075284e-05, "loss": 0.6894, "step": 10123 }, { "epoch": 1.3538379245787644, "grad_norm": 1.185328722000122, "learning_rate": 1.2049922285800305e-05, "loss": 0.7243, "step": 10124 }, { "epoch": 1.3539716501738432, "grad_norm": 1.0669310092926025, "learning_rate": 1.2048509177796659e-05, "loss": 0.6767, "step": 10125 }, { "epoch": 1.354105375768922, "grad_norm": 1.078006386756897, "learning_rate": 1.2047096027093798e-05, "loss": 0.7126, "step": 10126 }, { "epoch": 1.354239101364001, "grad_norm": 1.1265978813171387, "learning_rate": 1.2045682833721177e-05, "loss": 0.6252, "step": 10127 }, { "epoch": 1.35437282695908, "grad_norm": 1.2035911083221436, "learning_rate": 1.2044269597708258e-05, "loss": 0.7137, "step": 10128 }, { "epoch": 1.3545065525541589, "grad_norm": 1.1704684495925903, "learning_rate": 1.2042856319084495e-05, "loss": 0.755, "step": 10129 }, { "epoch": 1.3546402781492377, "grad_norm": 1.2008923292160034, "learning_rate": 1.2041442997879347e-05, "loss": 0.6375, "step": 10130 }, { "epoch": 1.3547740037443168, "grad_norm": 0.9947749376296997, "learning_rate": 1.2040029634122272e-05, "loss": 0.5944, "step": 10131 }, { "epoch": 1.3549077293393956, "grad_norm": 1.4556466341018677, "learning_rate": 1.2038616227842734e-05, "loss": 0.8595, "step": 10132 }, { "epoch": 1.3550414549344745, "grad_norm": 1.0000073909759521, "learning_rate": 1.2037202779070186e-05, "loss": 0.6282, "step": 10133 }, { "epoch": 1.3551751805295533, "grad_norm": 1.2447948455810547, "learning_rate": 1.2035789287834099e-05, "loss": 0.7719, "step": 10134 }, { "epoch": 1.3553089061246322, "grad_norm": 1.2689791917800903, "learning_rate": 1.2034375754163932e-05, "loss": 0.7779, "step": 10135 }, { "epoch": 1.3554426317197112, "grad_norm": 1.2218483686447144, "learning_rate": 1.203296217808915e-05, "loss": 0.7528, "step": 10136 }, { "epoch": 1.35557635731479, "grad_norm": 1.1591823101043701, "learning_rate": 1.2031548559639216e-05, "loss": 0.6363, "step": 10137 }, { "epoch": 1.355710082909869, "grad_norm": 1.1840147972106934, "learning_rate": 1.2030134898843598e-05, "loss": 0.7595, "step": 10138 }, { "epoch": 1.3558438085049478, "grad_norm": 1.1650559902191162, "learning_rate": 1.2028721195731756e-05, "loss": 0.7322, "step": 10139 }, { "epoch": 1.3559775341000266, "grad_norm": 1.1595571041107178, "learning_rate": 1.2027307450333166e-05, "loss": 0.7352, "step": 10140 }, { "epoch": 1.3561112596951057, "grad_norm": 1.1970895528793335, "learning_rate": 1.202589366267729e-05, "loss": 0.7587, "step": 10141 }, { "epoch": 1.3562449852901846, "grad_norm": 1.032342791557312, "learning_rate": 1.20244798327936e-05, "loss": 0.6018, "step": 10142 }, { "epoch": 1.3563787108852634, "grad_norm": 1.218293309211731, "learning_rate": 1.2023065960711565e-05, "loss": 0.7543, "step": 10143 }, { "epoch": 1.3565124364803425, "grad_norm": 1.1026887893676758, "learning_rate": 1.202165204646066e-05, "loss": 0.65, "step": 10144 }, { "epoch": 1.356646162075421, "grad_norm": 1.3123043775558472, "learning_rate": 1.2020238090070346e-05, "loss": 0.7572, "step": 10145 }, { "epoch": 1.3567798876705002, "grad_norm": 1.2147557735443115, "learning_rate": 1.2018824091570103e-05, "loss": 0.7184, "step": 10146 }, { "epoch": 1.356913613265579, "grad_norm": 1.2220582962036133, "learning_rate": 1.2017410050989405e-05, "loss": 0.7494, "step": 10147 }, { "epoch": 1.3570473388606579, "grad_norm": 1.2740370035171509, "learning_rate": 1.2015995968357728e-05, "loss": 0.7532, "step": 10148 }, { "epoch": 1.357181064455737, "grad_norm": 1.1707720756530762, "learning_rate": 1.201458184370454e-05, "loss": 0.6513, "step": 10149 }, { "epoch": 1.3573147900508158, "grad_norm": 1.0780820846557617, "learning_rate": 1.2013167677059324e-05, "loss": 0.65, "step": 10150 }, { "epoch": 1.3574485156458946, "grad_norm": 1.2585344314575195, "learning_rate": 1.2011753468451552e-05, "loss": 0.6926, "step": 10151 }, { "epoch": 1.3575822412409735, "grad_norm": 1.0727747678756714, "learning_rate": 1.2010339217910706e-05, "loss": 0.7375, "step": 10152 }, { "epoch": 1.3577159668360523, "grad_norm": 1.0936923027038574, "learning_rate": 1.200892492546626e-05, "loss": 0.6141, "step": 10153 }, { "epoch": 1.3578496924311314, "grad_norm": 1.1288864612579346, "learning_rate": 1.2007510591147698e-05, "loss": 0.6382, "step": 10154 }, { "epoch": 1.3579834180262103, "grad_norm": 1.198479175567627, "learning_rate": 1.2006096214984498e-05, "loss": 0.8149, "step": 10155 }, { "epoch": 1.358117143621289, "grad_norm": 1.260659098625183, "learning_rate": 1.2004681797006143e-05, "loss": 0.6612, "step": 10156 }, { "epoch": 1.358250869216368, "grad_norm": 1.1443016529083252, "learning_rate": 1.2003267337242115e-05, "loss": 0.7405, "step": 10157 }, { "epoch": 1.3583845948114468, "grad_norm": 1.2879217863082886, "learning_rate": 1.2001852835721894e-05, "loss": 0.7778, "step": 10158 }, { "epoch": 1.3585183204065259, "grad_norm": 1.2178672552108765, "learning_rate": 1.2000438292474968e-05, "loss": 0.7577, "step": 10159 }, { "epoch": 1.3586520460016047, "grad_norm": 1.0373649597167969, "learning_rate": 1.199902370753082e-05, "loss": 0.6048, "step": 10160 }, { "epoch": 1.3587857715966836, "grad_norm": 1.1186918020248413, "learning_rate": 1.1997609080918933e-05, "loss": 0.6773, "step": 10161 }, { "epoch": 1.3589194971917626, "grad_norm": 1.2100756168365479, "learning_rate": 1.1996194412668798e-05, "loss": 0.8011, "step": 10162 }, { "epoch": 1.3590532227868413, "grad_norm": 1.1768124103546143, "learning_rate": 1.1994779702809903e-05, "loss": 0.7342, "step": 10163 }, { "epoch": 1.3591869483819203, "grad_norm": 1.0588281154632568, "learning_rate": 1.1993364951371734e-05, "loss": 0.6437, "step": 10164 }, { "epoch": 1.3593206739769992, "grad_norm": 1.1727943420410156, "learning_rate": 1.1991950158383773e-05, "loss": 0.6474, "step": 10165 }, { "epoch": 1.359454399572078, "grad_norm": 1.1928704977035522, "learning_rate": 1.1990535323875521e-05, "loss": 0.6967, "step": 10166 }, { "epoch": 1.359588125167157, "grad_norm": 1.2454696893692017, "learning_rate": 1.1989120447876465e-05, "loss": 0.7419, "step": 10167 }, { "epoch": 1.359721850762236, "grad_norm": 1.137209415435791, "learning_rate": 1.19877055304161e-05, "loss": 0.782, "step": 10168 }, { "epoch": 1.3598555763573148, "grad_norm": 1.338990569114685, "learning_rate": 1.1986290571523912e-05, "loss": 0.721, "step": 10169 }, { "epoch": 1.3599893019523936, "grad_norm": 1.1938974857330322, "learning_rate": 1.19848755712294e-05, "loss": 0.7078, "step": 10170 }, { "epoch": 1.3601230275474725, "grad_norm": 1.2825438976287842, "learning_rate": 1.1983460529562051e-05, "loss": 0.6854, "step": 10171 }, { "epoch": 1.3602567531425516, "grad_norm": 1.3444875478744507, "learning_rate": 1.1982045446551372e-05, "loss": 0.7213, "step": 10172 }, { "epoch": 1.3603904787376304, "grad_norm": 1.09755277633667, "learning_rate": 1.1980630322226848e-05, "loss": 0.6693, "step": 10173 }, { "epoch": 1.3605242043327093, "grad_norm": 1.0746098756790161, "learning_rate": 1.197921515661798e-05, "loss": 0.6957, "step": 10174 }, { "epoch": 1.3606579299277881, "grad_norm": 1.0708236694335938, "learning_rate": 1.1977799949754267e-05, "loss": 0.6462, "step": 10175 }, { "epoch": 1.360791655522867, "grad_norm": 1.1177432537078857, "learning_rate": 1.197638470166521e-05, "loss": 0.5998, "step": 10176 }, { "epoch": 1.360925381117946, "grad_norm": 1.1892383098602295, "learning_rate": 1.19749694123803e-05, "loss": 0.6392, "step": 10177 }, { "epoch": 1.3610591067130249, "grad_norm": 1.1515694856643677, "learning_rate": 1.1973554081929042e-05, "loss": 0.6998, "step": 10178 }, { "epoch": 1.3611928323081037, "grad_norm": 1.243503212928772, "learning_rate": 1.197213871034094e-05, "loss": 0.7541, "step": 10179 }, { "epoch": 1.3613265579031828, "grad_norm": 1.2338383197784424, "learning_rate": 1.1970723297645494e-05, "loss": 0.7347, "step": 10180 }, { "epoch": 1.3614602834982616, "grad_norm": 1.262148141860962, "learning_rate": 1.1969307843872206e-05, "loss": 0.7583, "step": 10181 }, { "epoch": 1.3615940090933405, "grad_norm": 1.1674898862838745, "learning_rate": 1.1967892349050581e-05, "loss": 0.6301, "step": 10182 }, { "epoch": 1.3617277346884193, "grad_norm": 1.027660846710205, "learning_rate": 1.1966476813210121e-05, "loss": 0.6208, "step": 10183 }, { "epoch": 1.3618614602834982, "grad_norm": 1.3393902778625488, "learning_rate": 1.1965061236380336e-05, "loss": 0.7563, "step": 10184 }, { "epoch": 1.3619951858785773, "grad_norm": 1.1425881385803223, "learning_rate": 1.196364561859073e-05, "loss": 0.7031, "step": 10185 }, { "epoch": 1.3621289114736561, "grad_norm": 1.1585972309112549, "learning_rate": 1.1962229959870805e-05, "loss": 0.6975, "step": 10186 }, { "epoch": 1.362262637068735, "grad_norm": 1.101199984550476, "learning_rate": 1.196081426025008e-05, "loss": 0.6353, "step": 10187 }, { "epoch": 1.3623963626638138, "grad_norm": 1.1224530935287476, "learning_rate": 1.1959398519758059e-05, "loss": 0.6245, "step": 10188 }, { "epoch": 1.3625300882588927, "grad_norm": 1.2043191194534302, "learning_rate": 1.1957982738424247e-05, "loss": 0.6601, "step": 10189 }, { "epoch": 1.3626638138539717, "grad_norm": 1.1529829502105713, "learning_rate": 1.1956566916278159e-05, "loss": 0.7057, "step": 10190 }, { "epoch": 1.3627975394490506, "grad_norm": 1.2066937685012817, "learning_rate": 1.1955151053349306e-05, "loss": 0.7038, "step": 10191 }, { "epoch": 1.3629312650441294, "grad_norm": 1.1664913892745972, "learning_rate": 1.1953735149667201e-05, "loss": 0.6623, "step": 10192 }, { "epoch": 1.3630649906392083, "grad_norm": 1.3087974786758423, "learning_rate": 1.1952319205261356e-05, "loss": 0.7716, "step": 10193 }, { "epoch": 1.3631987162342871, "grad_norm": 1.252387523651123, "learning_rate": 1.1950903220161286e-05, "loss": 0.7635, "step": 10194 }, { "epoch": 1.3633324418293662, "grad_norm": 1.1942683458328247, "learning_rate": 1.1949487194396503e-05, "loss": 0.6589, "step": 10195 }, { "epoch": 1.363466167424445, "grad_norm": 1.1268057823181152, "learning_rate": 1.1948071127996525e-05, "loss": 0.6624, "step": 10196 }, { "epoch": 1.363599893019524, "grad_norm": 1.1349005699157715, "learning_rate": 1.194665502099087e-05, "loss": 0.6746, "step": 10197 }, { "epoch": 1.363733618614603, "grad_norm": 1.134196400642395, "learning_rate": 1.1945238873409053e-05, "loss": 0.6439, "step": 10198 }, { "epoch": 1.3638673442096818, "grad_norm": 1.173986792564392, "learning_rate": 1.1943822685280592e-05, "loss": 0.6387, "step": 10199 }, { "epoch": 1.3640010698047607, "grad_norm": 1.1811243295669556, "learning_rate": 1.194240645663501e-05, "loss": 0.7187, "step": 10200 }, { "epoch": 1.3641347953998395, "grad_norm": 1.1912455558776855, "learning_rate": 1.1940990187501824e-05, "loss": 0.7216, "step": 10201 }, { "epoch": 1.3642685209949184, "grad_norm": 1.2071850299835205, "learning_rate": 1.1939573877910555e-05, "loss": 0.6738, "step": 10202 }, { "epoch": 1.3644022465899974, "grad_norm": 1.2127255201339722, "learning_rate": 1.1938157527890722e-05, "loss": 0.6719, "step": 10203 }, { "epoch": 1.3645359721850763, "grad_norm": 1.2086740732192993, "learning_rate": 1.193674113747185e-05, "loss": 0.6812, "step": 10204 }, { "epoch": 1.3646696977801551, "grad_norm": 1.2231475114822388, "learning_rate": 1.1935324706683464e-05, "loss": 0.6954, "step": 10205 }, { "epoch": 1.364803423375234, "grad_norm": 1.2310230731964111, "learning_rate": 1.1933908235555085e-05, "loss": 0.6976, "step": 10206 }, { "epoch": 1.3649371489703128, "grad_norm": 1.1294760704040527, "learning_rate": 1.1932491724116239e-05, "loss": 0.637, "step": 10207 }, { "epoch": 1.365070874565392, "grad_norm": 1.237724781036377, "learning_rate": 1.1931075172396453e-05, "loss": 0.6757, "step": 10208 }, { "epoch": 1.3652046001604707, "grad_norm": 1.0759931802749634, "learning_rate": 1.1929658580425257e-05, "loss": 0.6946, "step": 10209 }, { "epoch": 1.3653383257555496, "grad_norm": 1.2478537559509277, "learning_rate": 1.192824194823217e-05, "loss": 0.8021, "step": 10210 }, { "epoch": 1.3654720513506284, "grad_norm": 1.1422759294509888, "learning_rate": 1.1926825275846722e-05, "loss": 0.6443, "step": 10211 }, { "epoch": 1.3656057769457073, "grad_norm": 1.1099671125411987, "learning_rate": 1.1925408563298448e-05, "loss": 0.6729, "step": 10212 }, { "epoch": 1.3657395025407864, "grad_norm": 1.241811990737915, "learning_rate": 1.192399181061688e-05, "loss": 0.7681, "step": 10213 }, { "epoch": 1.3658732281358652, "grad_norm": 1.4407131671905518, "learning_rate": 1.1922575017831538e-05, "loss": 0.7192, "step": 10214 }, { "epoch": 1.366006953730944, "grad_norm": 1.166901707649231, "learning_rate": 1.1921158184971959e-05, "loss": 0.7019, "step": 10215 }, { "epoch": 1.3661406793260231, "grad_norm": 1.1612164974212646, "learning_rate": 1.1919741312067676e-05, "loss": 0.7095, "step": 10216 }, { "epoch": 1.366274404921102, "grad_norm": 1.1855413913726807, "learning_rate": 1.1918324399148225e-05, "loss": 0.6672, "step": 10217 }, { "epoch": 1.3664081305161808, "grad_norm": 1.2104783058166504, "learning_rate": 1.1916907446243135e-05, "loss": 0.6942, "step": 10218 }, { "epoch": 1.3665418561112597, "grad_norm": 1.2359639406204224, "learning_rate": 1.1915490453381946e-05, "loss": 0.7738, "step": 10219 }, { "epoch": 1.3666755817063385, "grad_norm": 1.1119080781936646, "learning_rate": 1.1914073420594189e-05, "loss": 0.6798, "step": 10220 }, { "epoch": 1.3668093073014176, "grad_norm": 1.133814811706543, "learning_rate": 1.1912656347909406e-05, "loss": 0.6575, "step": 10221 }, { "epoch": 1.3669430328964964, "grad_norm": 1.2471286058425903, "learning_rate": 1.191123923535713e-05, "loss": 0.7183, "step": 10222 }, { "epoch": 1.3670767584915753, "grad_norm": 1.2019598484039307, "learning_rate": 1.1909822082966902e-05, "loss": 0.6879, "step": 10223 }, { "epoch": 1.3672104840866541, "grad_norm": 1.1864873170852661, "learning_rate": 1.1908404890768255e-05, "loss": 0.6975, "step": 10224 }, { "epoch": 1.367344209681733, "grad_norm": 1.288870096206665, "learning_rate": 1.1906987658790741e-05, "loss": 0.8002, "step": 10225 }, { "epoch": 1.367477935276812, "grad_norm": 1.2178617715835571, "learning_rate": 1.1905570387063892e-05, "loss": 0.7189, "step": 10226 }, { "epoch": 1.367611660871891, "grad_norm": 1.2314642667770386, "learning_rate": 1.190415307561725e-05, "loss": 0.7178, "step": 10227 }, { "epoch": 1.3677453864669697, "grad_norm": 1.2320245504379272, "learning_rate": 1.190273572448036e-05, "loss": 0.7753, "step": 10228 }, { "epoch": 1.3678791120620486, "grad_norm": 1.1743957996368408, "learning_rate": 1.1901318333682765e-05, "loss": 0.6797, "step": 10229 }, { "epoch": 1.3680128376571274, "grad_norm": 1.3338135480880737, "learning_rate": 1.189990090325401e-05, "loss": 0.6625, "step": 10230 }, { "epoch": 1.3681465632522065, "grad_norm": 1.2401553392410278, "learning_rate": 1.1898483433223635e-05, "loss": 0.689, "step": 10231 }, { "epoch": 1.3682802888472854, "grad_norm": 1.1727200746536255, "learning_rate": 1.1897065923621191e-05, "loss": 0.74, "step": 10232 }, { "epoch": 1.3684140144423642, "grad_norm": 1.1763224601745605, "learning_rate": 1.1895648374476227e-05, "loss": 0.6962, "step": 10233 }, { "epoch": 1.3685477400374433, "grad_norm": 1.1437729597091675, "learning_rate": 1.1894230785818284e-05, "loss": 0.7223, "step": 10234 }, { "epoch": 1.3686814656325221, "grad_norm": 1.1838178634643555, "learning_rate": 1.189281315767691e-05, "loss": 0.7544, "step": 10235 }, { "epoch": 1.368815191227601, "grad_norm": 1.1428289413452148, "learning_rate": 1.1891395490081661e-05, "loss": 0.6548, "step": 10236 }, { "epoch": 1.3689489168226798, "grad_norm": 1.4124630689620972, "learning_rate": 1.1889977783062078e-05, "loss": 0.7257, "step": 10237 }, { "epoch": 1.3690826424177587, "grad_norm": 1.2611563205718994, "learning_rate": 1.1888560036647721e-05, "loss": 0.6664, "step": 10238 }, { "epoch": 1.3692163680128377, "grad_norm": 1.153427243232727, "learning_rate": 1.1887142250868135e-05, "loss": 0.7019, "step": 10239 }, { "epoch": 1.3693500936079166, "grad_norm": 1.2976081371307373, "learning_rate": 1.1885724425752875e-05, "loss": 0.6219, "step": 10240 }, { "epoch": 1.3694838192029954, "grad_norm": 1.2516354322433472, "learning_rate": 1.1884306561331498e-05, "loss": 0.7173, "step": 10241 }, { "epoch": 1.3696175447980743, "grad_norm": 1.3219366073608398, "learning_rate": 1.188288865763355e-05, "loss": 0.7175, "step": 10242 }, { "epoch": 1.3697512703931531, "grad_norm": 1.0133330821990967, "learning_rate": 1.1881470714688585e-05, "loss": 0.6155, "step": 10243 }, { "epoch": 1.3698849959882322, "grad_norm": 1.2487989664077759, "learning_rate": 1.188005273252617e-05, "loss": 0.6971, "step": 10244 }, { "epoch": 1.370018721583311, "grad_norm": 1.1328601837158203, "learning_rate": 1.1878634711175854e-05, "loss": 0.6423, "step": 10245 }, { "epoch": 1.37015244717839, "grad_norm": 1.2758080959320068, "learning_rate": 1.1877216650667194e-05, "loss": 0.7512, "step": 10246 }, { "epoch": 1.370286172773469, "grad_norm": 1.2373908758163452, "learning_rate": 1.1875798551029749e-05, "loss": 0.7434, "step": 10247 }, { "epoch": 1.3704198983685476, "grad_norm": 1.1997580528259277, "learning_rate": 1.1874380412293078e-05, "loss": 0.7142, "step": 10248 }, { "epoch": 1.3705536239636267, "grad_norm": 1.1408528089523315, "learning_rate": 1.187296223448674e-05, "loss": 0.7165, "step": 10249 }, { "epoch": 1.3706873495587055, "grad_norm": 1.1677137613296509, "learning_rate": 1.1871544017640298e-05, "loss": 0.6836, "step": 10250 }, { "epoch": 1.3708210751537844, "grad_norm": 1.2518094778060913, "learning_rate": 1.1870125761783311e-05, "loss": 0.7455, "step": 10251 }, { "epoch": 1.3709548007488634, "grad_norm": 1.2905768156051636, "learning_rate": 1.1868707466945343e-05, "loss": 0.7335, "step": 10252 }, { "epoch": 1.3710885263439423, "grad_norm": 1.076263427734375, "learning_rate": 1.1867289133155957e-05, "loss": 0.6254, "step": 10253 }, { "epoch": 1.3712222519390211, "grad_norm": 1.127852439880371, "learning_rate": 1.1865870760444715e-05, "loss": 0.6416, "step": 10254 }, { "epoch": 1.3713559775341, "grad_norm": 1.1369438171386719, "learning_rate": 1.1864452348841182e-05, "loss": 0.7284, "step": 10255 }, { "epoch": 1.3714897031291788, "grad_norm": 1.1914016008377075, "learning_rate": 1.1863033898374921e-05, "loss": 0.6851, "step": 10256 }, { "epoch": 1.371623428724258, "grad_norm": 1.1593722105026245, "learning_rate": 1.1861615409075507e-05, "loss": 0.6197, "step": 10257 }, { "epoch": 1.3717571543193368, "grad_norm": 1.0651557445526123, "learning_rate": 1.1860196880972496e-05, "loss": 0.6785, "step": 10258 }, { "epoch": 1.3718908799144156, "grad_norm": 1.2098373174667358, "learning_rate": 1.1858778314095462e-05, "loss": 0.6963, "step": 10259 }, { "epoch": 1.3720246055094945, "grad_norm": 1.1660557985305786, "learning_rate": 1.1857359708473975e-05, "loss": 0.7039, "step": 10260 }, { "epoch": 1.3721583311045733, "grad_norm": 1.2848864793777466, "learning_rate": 1.1855941064137602e-05, "loss": 0.7796, "step": 10261 }, { "epoch": 1.3722920566996524, "grad_norm": 1.2703560590744019, "learning_rate": 1.185452238111591e-05, "loss": 0.7129, "step": 10262 }, { "epoch": 1.3724257822947312, "grad_norm": 1.0441081523895264, "learning_rate": 1.1853103659438477e-05, "loss": 0.6344, "step": 10263 }, { "epoch": 1.37255950788981, "grad_norm": 1.1877859830856323, "learning_rate": 1.185168489913487e-05, "loss": 0.6759, "step": 10264 }, { "epoch": 1.3726932334848891, "grad_norm": 1.348563313484192, "learning_rate": 1.1850266100234665e-05, "loss": 0.7183, "step": 10265 }, { "epoch": 1.3728269590799678, "grad_norm": 1.2906465530395508, "learning_rate": 1.1848847262767431e-05, "loss": 0.8149, "step": 10266 }, { "epoch": 1.3729606846750468, "grad_norm": 1.2016907930374146, "learning_rate": 1.1847428386762748e-05, "loss": 0.6751, "step": 10267 }, { "epoch": 1.3730944102701257, "grad_norm": 1.2858937978744507, "learning_rate": 1.1846009472250183e-05, "loss": 0.7459, "step": 10268 }, { "epoch": 1.3732281358652045, "grad_norm": 1.0750868320465088, "learning_rate": 1.1844590519259321e-05, "loss": 0.6663, "step": 10269 }, { "epoch": 1.3733618614602836, "grad_norm": 1.2467623949050903, "learning_rate": 1.1843171527819734e-05, "loss": 0.7597, "step": 10270 }, { "epoch": 1.3734955870553625, "grad_norm": 1.2384566068649292, "learning_rate": 1.1841752497961001e-05, "loss": 0.7193, "step": 10271 }, { "epoch": 1.3736293126504413, "grad_norm": 1.1998809576034546, "learning_rate": 1.1840333429712699e-05, "loss": 0.7314, "step": 10272 }, { "epoch": 1.3737630382455202, "grad_norm": 1.2076008319854736, "learning_rate": 1.1838914323104407e-05, "loss": 0.7097, "step": 10273 }, { "epoch": 1.373896763840599, "grad_norm": 1.2304364442825317, "learning_rate": 1.1837495178165706e-05, "loss": 0.6766, "step": 10274 }, { "epoch": 1.374030489435678, "grad_norm": 1.3354172706604004, "learning_rate": 1.1836075994926175e-05, "loss": 0.8148, "step": 10275 }, { "epoch": 1.374164215030757, "grad_norm": 1.2624297142028809, "learning_rate": 1.1834656773415396e-05, "loss": 0.7507, "step": 10276 }, { "epoch": 1.3742979406258358, "grad_norm": 1.1481683254241943, "learning_rate": 1.1833237513662956e-05, "loss": 0.6153, "step": 10277 }, { "epoch": 1.3744316662209146, "grad_norm": 1.1723748445510864, "learning_rate": 1.1831818215698434e-05, "loss": 0.7899, "step": 10278 }, { "epoch": 1.3745653918159935, "grad_norm": 1.1131445169448853, "learning_rate": 1.1830398879551412e-05, "loss": 0.6765, "step": 10279 }, { "epoch": 1.3746991174110725, "grad_norm": 1.1286929845809937, "learning_rate": 1.1828979505251476e-05, "loss": 0.6567, "step": 10280 }, { "epoch": 1.3748328430061514, "grad_norm": 1.2521553039550781, "learning_rate": 1.1827560092828215e-05, "loss": 0.7466, "step": 10281 }, { "epoch": 1.3749665686012302, "grad_norm": 1.1224563121795654, "learning_rate": 1.1826140642311211e-05, "loss": 0.6765, "step": 10282 }, { "epoch": 1.3751002941963093, "grad_norm": 1.0615402460098267, "learning_rate": 1.1824721153730052e-05, "loss": 0.6323, "step": 10283 }, { "epoch": 1.3752340197913882, "grad_norm": 1.1387630701065063, "learning_rate": 1.1823301627114327e-05, "loss": 0.6851, "step": 10284 }, { "epoch": 1.375367745386467, "grad_norm": 1.1740139722824097, "learning_rate": 1.1821882062493625e-05, "loss": 0.7696, "step": 10285 }, { "epoch": 1.3755014709815458, "grad_norm": 1.0665405988693237, "learning_rate": 1.1820462459897537e-05, "loss": 0.6315, "step": 10286 }, { "epoch": 1.3756351965766247, "grad_norm": 1.3269743919372559, "learning_rate": 1.1819042819355649e-05, "loss": 0.7425, "step": 10287 }, { "epoch": 1.3757689221717038, "grad_norm": 1.1500425338745117, "learning_rate": 1.1817623140897552e-05, "loss": 0.7271, "step": 10288 }, { "epoch": 1.3759026477667826, "grad_norm": 1.2580466270446777, "learning_rate": 1.181620342455284e-05, "loss": 0.789, "step": 10289 }, { "epoch": 1.3760363733618615, "grad_norm": 1.2586510181427002, "learning_rate": 1.1814783670351111e-05, "loss": 0.8122, "step": 10290 }, { "epoch": 1.3761700989569403, "grad_norm": 1.2869205474853516, "learning_rate": 1.1813363878321948e-05, "loss": 0.8484, "step": 10291 }, { "epoch": 1.3763038245520192, "grad_norm": 1.1745719909667969, "learning_rate": 1.1811944048494952e-05, "loss": 0.691, "step": 10292 }, { "epoch": 1.3764375501470982, "grad_norm": 1.0377514362335205, "learning_rate": 1.1810524180899716e-05, "loss": 0.6828, "step": 10293 }, { "epoch": 1.376571275742177, "grad_norm": 1.106729507446289, "learning_rate": 1.1809104275565835e-05, "loss": 0.6657, "step": 10294 }, { "epoch": 1.376705001337256, "grad_norm": 1.1703206300735474, "learning_rate": 1.1807684332522906e-05, "loss": 0.6978, "step": 10295 }, { "epoch": 1.3768387269323348, "grad_norm": 1.1567052602767944, "learning_rate": 1.1806264351800527e-05, "loss": 0.7048, "step": 10296 }, { "epoch": 1.3769724525274136, "grad_norm": 1.1369904279708862, "learning_rate": 1.1804844333428299e-05, "loss": 0.7305, "step": 10297 }, { "epoch": 1.3771061781224927, "grad_norm": 1.182319164276123, "learning_rate": 1.1803424277435818e-05, "loss": 0.5883, "step": 10298 }, { "epoch": 1.3772399037175715, "grad_norm": 1.2064143419265747, "learning_rate": 1.180200418385268e-05, "loss": 0.7143, "step": 10299 }, { "epoch": 1.3773736293126504, "grad_norm": 1.1199012994766235, "learning_rate": 1.180058405270849e-05, "loss": 0.6539, "step": 10300 }, { "epoch": 1.3775073549077295, "grad_norm": 1.131047248840332, "learning_rate": 1.1799163884032847e-05, "loss": 0.739, "step": 10301 }, { "epoch": 1.3776410805028083, "grad_norm": 1.173695683479309, "learning_rate": 1.1797743677855358e-05, "loss": 0.7465, "step": 10302 }, { "epoch": 1.3777748060978872, "grad_norm": 1.143878698348999, "learning_rate": 1.1796323434205622e-05, "loss": 0.7075, "step": 10303 }, { "epoch": 1.377908531692966, "grad_norm": 1.195981502532959, "learning_rate": 1.179490315311324e-05, "loss": 0.7586, "step": 10304 }, { "epoch": 1.3780422572880449, "grad_norm": 1.3747605085372925, "learning_rate": 1.1793482834607822e-05, "loss": 0.7788, "step": 10305 }, { "epoch": 1.378175982883124, "grad_norm": 1.3642431497573853, "learning_rate": 1.179206247871897e-05, "loss": 0.7699, "step": 10306 }, { "epoch": 1.3783097084782028, "grad_norm": 1.3034253120422363, "learning_rate": 1.1790642085476287e-05, "loss": 0.7003, "step": 10307 }, { "epoch": 1.3784434340732816, "grad_norm": 1.3358523845672607, "learning_rate": 1.1789221654909386e-05, "loss": 0.8, "step": 10308 }, { "epoch": 1.3785771596683605, "grad_norm": 1.1389260292053223, "learning_rate": 1.1787801187047872e-05, "loss": 0.7155, "step": 10309 }, { "epoch": 1.3787108852634393, "grad_norm": 1.2290832996368408, "learning_rate": 1.1786380681921355e-05, "loss": 0.7649, "step": 10310 }, { "epoch": 1.3788446108585184, "grad_norm": 1.311579704284668, "learning_rate": 1.1784960139559441e-05, "loss": 0.7452, "step": 10311 }, { "epoch": 1.3789783364535972, "grad_norm": 1.252864956855774, "learning_rate": 1.1783539559991737e-05, "loss": 0.7387, "step": 10312 }, { "epoch": 1.379112062048676, "grad_norm": 1.2025372982025146, "learning_rate": 1.178211894324786e-05, "loss": 0.7219, "step": 10313 }, { "epoch": 1.379245787643755, "grad_norm": 1.226413607597351, "learning_rate": 1.1780698289357419e-05, "loss": 0.7064, "step": 10314 }, { "epoch": 1.3793795132388338, "grad_norm": 1.3026734590530396, "learning_rate": 1.1779277598350028e-05, "loss": 0.7633, "step": 10315 }, { "epoch": 1.3795132388339129, "grad_norm": 1.1103025674819946, "learning_rate": 1.1777856870255295e-05, "loss": 0.6596, "step": 10316 }, { "epoch": 1.3796469644289917, "grad_norm": 1.1582976579666138, "learning_rate": 1.1776436105102838e-05, "loss": 0.7621, "step": 10317 }, { "epoch": 1.3797806900240706, "grad_norm": 1.2690963745117188, "learning_rate": 1.1775015302922273e-05, "loss": 0.6599, "step": 10318 }, { "epoch": 1.3799144156191496, "grad_norm": 1.1598347425460815, "learning_rate": 1.1773594463743207e-05, "loss": 0.6629, "step": 10319 }, { "epoch": 1.3800481412142285, "grad_norm": 1.1277376413345337, "learning_rate": 1.1772173587595263e-05, "loss": 0.6953, "step": 10320 }, { "epoch": 1.3801818668093073, "grad_norm": 1.106000304222107, "learning_rate": 1.177075267450806e-05, "loss": 0.6657, "step": 10321 }, { "epoch": 1.3803155924043862, "grad_norm": 1.133597731590271, "learning_rate": 1.1769331724511211e-05, "loss": 0.6789, "step": 10322 }, { "epoch": 1.380449317999465, "grad_norm": 1.1937872171401978, "learning_rate": 1.1767910737634334e-05, "loss": 0.6696, "step": 10323 }, { "epoch": 1.380583043594544, "grad_norm": 1.1425434350967407, "learning_rate": 1.1766489713907047e-05, "loss": 0.7452, "step": 10324 }, { "epoch": 1.380716769189623, "grad_norm": 1.12587571144104, "learning_rate": 1.1765068653358975e-05, "loss": 0.6665, "step": 10325 }, { "epoch": 1.3808504947847018, "grad_norm": 1.0703985691070557, "learning_rate": 1.1763647556019735e-05, "loss": 0.7606, "step": 10326 }, { "epoch": 1.3809842203797806, "grad_norm": 1.0838770866394043, "learning_rate": 1.176222642191895e-05, "loss": 0.694, "step": 10327 }, { "epoch": 1.3811179459748595, "grad_norm": 1.2649205923080444, "learning_rate": 1.176080525108624e-05, "loss": 0.7611, "step": 10328 }, { "epoch": 1.3812516715699386, "grad_norm": 1.19253408908844, "learning_rate": 1.1759384043551232e-05, "loss": 0.6768, "step": 10329 }, { "epoch": 1.3813853971650174, "grad_norm": 1.1661680936813354, "learning_rate": 1.1757962799343548e-05, "loss": 0.6508, "step": 10330 }, { "epoch": 1.3815191227600963, "grad_norm": 1.1784833669662476, "learning_rate": 1.175654151849281e-05, "loss": 0.7297, "step": 10331 }, { "epoch": 1.381652848355175, "grad_norm": 1.1571674346923828, "learning_rate": 1.1755120201028642e-05, "loss": 0.6632, "step": 10332 }, { "epoch": 1.381786573950254, "grad_norm": 1.2020539045333862, "learning_rate": 1.1753698846980677e-05, "loss": 0.6959, "step": 10333 }, { "epoch": 1.381920299545333, "grad_norm": 1.0686465501785278, "learning_rate": 1.1752277456378536e-05, "loss": 0.6462, "step": 10334 }, { "epoch": 1.3820540251404119, "grad_norm": 1.1543594598770142, "learning_rate": 1.1750856029251847e-05, "loss": 0.6715, "step": 10335 }, { "epoch": 1.3821877507354907, "grad_norm": 1.1921883821487427, "learning_rate": 1.174943456563024e-05, "loss": 0.7385, "step": 10336 }, { "epoch": 1.3823214763305698, "grad_norm": 1.2841330766677856, "learning_rate": 1.1748013065543344e-05, "loss": 0.7386, "step": 10337 }, { "epoch": 1.3824552019256486, "grad_norm": 1.1827079057693481, "learning_rate": 1.1746591529020789e-05, "loss": 0.6218, "step": 10338 }, { "epoch": 1.3825889275207275, "grad_norm": 1.093856692314148, "learning_rate": 1.1745169956092204e-05, "loss": 0.7056, "step": 10339 }, { "epoch": 1.3827226531158063, "grad_norm": 1.128021478652954, "learning_rate": 1.174374834678722e-05, "loss": 0.7121, "step": 10340 }, { "epoch": 1.3828563787108852, "grad_norm": 1.2980906963348389, "learning_rate": 1.1742326701135473e-05, "loss": 0.7339, "step": 10341 }, { "epoch": 1.3829901043059643, "grad_norm": 1.387661099433899, "learning_rate": 1.1740905019166594e-05, "loss": 0.7134, "step": 10342 }, { "epoch": 1.383123829901043, "grad_norm": 1.3027377128601074, "learning_rate": 1.1739483300910213e-05, "loss": 0.7705, "step": 10343 }, { "epoch": 1.383257555496122, "grad_norm": 1.1753196716308594, "learning_rate": 1.1738061546395967e-05, "loss": 0.6934, "step": 10344 }, { "epoch": 1.3833912810912008, "grad_norm": 1.255450963973999, "learning_rate": 1.1736639755653492e-05, "loss": 0.7607, "step": 10345 }, { "epoch": 1.3835250066862796, "grad_norm": 1.2707215547561646, "learning_rate": 1.1735217928712423e-05, "loss": 0.7238, "step": 10346 }, { "epoch": 1.3836587322813587, "grad_norm": 1.229047417640686, "learning_rate": 1.1733796065602397e-05, "loss": 0.7781, "step": 10347 }, { "epoch": 1.3837924578764376, "grad_norm": 1.1879738569259644, "learning_rate": 1.1732374166353051e-05, "loss": 0.6732, "step": 10348 }, { "epoch": 1.3839261834715164, "grad_norm": 1.1346478462219238, "learning_rate": 1.1730952230994022e-05, "loss": 0.7634, "step": 10349 }, { "epoch": 1.3840599090665955, "grad_norm": 1.2419096231460571, "learning_rate": 1.1729530259554953e-05, "loss": 0.6875, "step": 10350 }, { "epoch": 1.384193634661674, "grad_norm": 1.0874700546264648, "learning_rate": 1.172810825206548e-05, "loss": 0.7693, "step": 10351 }, { "epoch": 1.3843273602567532, "grad_norm": 1.2425285577774048, "learning_rate": 1.172668620855524e-05, "loss": 0.7107, "step": 10352 }, { "epoch": 1.384461085851832, "grad_norm": 1.1933974027633667, "learning_rate": 1.1725264129053881e-05, "loss": 0.7262, "step": 10353 }, { "epoch": 1.3845948114469109, "grad_norm": 1.3124704360961914, "learning_rate": 1.1723842013591044e-05, "loss": 0.7386, "step": 10354 }, { "epoch": 1.38472853704199, "grad_norm": 1.1542627811431885, "learning_rate": 1.1722419862196369e-05, "loss": 0.7168, "step": 10355 }, { "epoch": 1.3848622626370688, "grad_norm": 1.0263744592666626, "learning_rate": 1.1720997674899496e-05, "loss": 0.6703, "step": 10356 }, { "epoch": 1.3849959882321476, "grad_norm": 1.1206023693084717, "learning_rate": 1.171957545173008e-05, "loss": 0.6586, "step": 10357 }, { "epoch": 1.3851297138272265, "grad_norm": 1.2792408466339111, "learning_rate": 1.1718153192717753e-05, "loss": 0.7298, "step": 10358 }, { "epoch": 1.3852634394223053, "grad_norm": 1.2007086277008057, "learning_rate": 1.171673089789217e-05, "loss": 0.7213, "step": 10359 }, { "epoch": 1.3853971650173844, "grad_norm": 1.1382535696029663, "learning_rate": 1.1715308567282972e-05, "loss": 0.7705, "step": 10360 }, { "epoch": 1.3855308906124633, "grad_norm": 1.2086182832717896, "learning_rate": 1.1713886200919811e-05, "loss": 0.7531, "step": 10361 }, { "epoch": 1.385664616207542, "grad_norm": 1.2057385444641113, "learning_rate": 1.1712463798832335e-05, "loss": 0.758, "step": 10362 }, { "epoch": 1.385798341802621, "grad_norm": 1.204099416732788, "learning_rate": 1.1711041361050183e-05, "loss": 0.695, "step": 10363 }, { "epoch": 1.3859320673976998, "grad_norm": 1.2312778234481812, "learning_rate": 1.1709618887603013e-05, "loss": 0.7131, "step": 10364 }, { "epoch": 1.3860657929927789, "grad_norm": 1.306699275970459, "learning_rate": 1.1708196378520476e-05, "loss": 0.8469, "step": 10365 }, { "epoch": 1.3861995185878577, "grad_norm": 1.324061393737793, "learning_rate": 1.1706773833832214e-05, "loss": 0.7263, "step": 10366 }, { "epoch": 1.3863332441829366, "grad_norm": 1.1715503931045532, "learning_rate": 1.1705351253567892e-05, "loss": 0.6619, "step": 10367 }, { "epoch": 1.3864669697780156, "grad_norm": 1.2502814531326294, "learning_rate": 1.1703928637757152e-05, "loss": 0.6123, "step": 10368 }, { "epoch": 1.3866006953730943, "grad_norm": 1.3710945844650269, "learning_rate": 1.1702505986429648e-05, "loss": 0.7197, "step": 10369 }, { "epoch": 1.3867344209681733, "grad_norm": 1.2241703271865845, "learning_rate": 1.170108329961504e-05, "loss": 0.7071, "step": 10370 }, { "epoch": 1.3868681465632522, "grad_norm": 1.2022857666015625, "learning_rate": 1.1699660577342974e-05, "loss": 0.6809, "step": 10371 }, { "epoch": 1.387001872158331, "grad_norm": 1.2323219776153564, "learning_rate": 1.1698237819643112e-05, "loss": 0.718, "step": 10372 }, { "epoch": 1.38713559775341, "grad_norm": 1.1654260158538818, "learning_rate": 1.1696815026545107e-05, "loss": 0.6962, "step": 10373 }, { "epoch": 1.387269323348489, "grad_norm": 1.1183232069015503, "learning_rate": 1.1695392198078617e-05, "loss": 0.6485, "step": 10374 }, { "epoch": 1.3874030489435678, "grad_norm": 1.1446477174758911, "learning_rate": 1.1693969334273301e-05, "loss": 0.6248, "step": 10375 }, { "epoch": 1.3875367745386467, "grad_norm": 1.2017310857772827, "learning_rate": 1.1692546435158814e-05, "loss": 0.751, "step": 10376 }, { "epoch": 1.3876705001337255, "grad_norm": 1.3004542589187622, "learning_rate": 1.1691123500764813e-05, "loss": 0.7715, "step": 10377 }, { "epoch": 1.3878042257288046, "grad_norm": 1.2739020586013794, "learning_rate": 1.1689700531120965e-05, "loss": 0.7635, "step": 10378 }, { "epoch": 1.3879379513238834, "grad_norm": 1.2735795974731445, "learning_rate": 1.1688277526256923e-05, "loss": 0.7797, "step": 10379 }, { "epoch": 1.3880716769189623, "grad_norm": 1.2582001686096191, "learning_rate": 1.1686854486202352e-05, "loss": 0.735, "step": 10380 }, { "epoch": 1.3882054025140411, "grad_norm": 1.1086448431015015, "learning_rate": 1.1685431410986913e-05, "loss": 0.639, "step": 10381 }, { "epoch": 1.38833912810912, "grad_norm": 1.215226173400879, "learning_rate": 1.168400830064027e-05, "loss": 0.7643, "step": 10382 }, { "epoch": 1.388472853704199, "grad_norm": 1.1814804077148438, "learning_rate": 1.168258515519209e-05, "loss": 0.724, "step": 10383 }, { "epoch": 1.3886065792992779, "grad_norm": 1.2276791334152222, "learning_rate": 1.1681161974672026e-05, "loss": 0.7121, "step": 10384 }, { "epoch": 1.3887403048943567, "grad_norm": 1.1327016353607178, "learning_rate": 1.1679738759109748e-05, "loss": 0.6977, "step": 10385 }, { "epoch": 1.3888740304894358, "grad_norm": 1.219773769378662, "learning_rate": 1.1678315508534928e-05, "loss": 0.6898, "step": 10386 }, { "epoch": 1.3890077560845147, "grad_norm": 1.2139183282852173, "learning_rate": 1.1676892222977227e-05, "loss": 0.7137, "step": 10387 }, { "epoch": 1.3891414816795935, "grad_norm": 1.1520743370056152, "learning_rate": 1.1675468902466311e-05, "loss": 0.7419, "step": 10388 }, { "epoch": 1.3892752072746724, "grad_norm": 1.0907866954803467, "learning_rate": 1.167404554703185e-05, "loss": 0.696, "step": 10389 }, { "epoch": 1.3894089328697512, "grad_norm": 1.1469650268554688, "learning_rate": 1.1672622156703508e-05, "loss": 0.6937, "step": 10390 }, { "epoch": 1.3895426584648303, "grad_norm": 1.1694732904434204, "learning_rate": 1.167119873151096e-05, "loss": 0.7037, "step": 10391 }, { "epoch": 1.3896763840599091, "grad_norm": 1.2636549472808838, "learning_rate": 1.1669775271483875e-05, "loss": 0.7302, "step": 10392 }, { "epoch": 1.389810109654988, "grad_norm": 1.1828047037124634, "learning_rate": 1.1668351776651918e-05, "loss": 0.7414, "step": 10393 }, { "epoch": 1.3899438352500668, "grad_norm": 1.1118900775909424, "learning_rate": 1.1666928247044769e-05, "loss": 0.7062, "step": 10394 }, { "epoch": 1.3900775608451457, "grad_norm": 1.1836761236190796, "learning_rate": 1.1665504682692096e-05, "loss": 0.7323, "step": 10395 }, { "epoch": 1.3902112864402247, "grad_norm": 1.2827930450439453, "learning_rate": 1.1664081083623569e-05, "loss": 0.694, "step": 10396 }, { "epoch": 1.3903450120353036, "grad_norm": 1.0846987962722778, "learning_rate": 1.1662657449868865e-05, "loss": 0.6873, "step": 10397 }, { "epoch": 1.3904787376303824, "grad_norm": 1.0482357740402222, "learning_rate": 1.1661233781457655e-05, "loss": 0.6278, "step": 10398 }, { "epoch": 1.3906124632254613, "grad_norm": 1.2821825742721558, "learning_rate": 1.165981007841962e-05, "loss": 0.8333, "step": 10399 }, { "epoch": 1.3907461888205401, "grad_norm": 1.354382872581482, "learning_rate": 1.1658386340784431e-05, "loss": 0.7476, "step": 10400 }, { "epoch": 1.3908799144156192, "grad_norm": 1.040104866027832, "learning_rate": 1.1656962568581767e-05, "loss": 0.6552, "step": 10401 }, { "epoch": 1.391013640010698, "grad_norm": 1.144014596939087, "learning_rate": 1.16555387618413e-05, "loss": 0.7513, "step": 10402 }, { "epoch": 1.391147365605777, "grad_norm": 1.3031235933303833, "learning_rate": 1.1654114920592715e-05, "loss": 0.7119, "step": 10403 }, { "epoch": 1.391281091200856, "grad_norm": 1.069855809211731, "learning_rate": 1.1652691044865687e-05, "loss": 0.614, "step": 10404 }, { "epoch": 1.3914148167959348, "grad_norm": 1.1170841455459595, "learning_rate": 1.1651267134689895e-05, "loss": 0.6868, "step": 10405 }, { "epoch": 1.3915485423910137, "grad_norm": 1.2767812013626099, "learning_rate": 1.1649843190095018e-05, "loss": 0.7182, "step": 10406 }, { "epoch": 1.3916822679860925, "grad_norm": 1.0920031070709229, "learning_rate": 1.1648419211110742e-05, "loss": 0.5809, "step": 10407 }, { "epoch": 1.3918159935811714, "grad_norm": 1.2283834218978882, "learning_rate": 1.1646995197766743e-05, "loss": 0.7666, "step": 10408 }, { "epoch": 1.3919497191762504, "grad_norm": 1.1616506576538086, "learning_rate": 1.1645571150092705e-05, "loss": 0.7647, "step": 10409 }, { "epoch": 1.3920834447713293, "grad_norm": 1.1822274923324585, "learning_rate": 1.1644147068118313e-05, "loss": 0.7814, "step": 10410 }, { "epoch": 1.3922171703664081, "grad_norm": 1.3648608922958374, "learning_rate": 1.1642722951873244e-05, "loss": 0.8343, "step": 10411 }, { "epoch": 1.392350895961487, "grad_norm": 1.2371116876602173, "learning_rate": 1.1641298801387191e-05, "loss": 0.7261, "step": 10412 }, { "epoch": 1.3924846215565658, "grad_norm": 1.3450381755828857, "learning_rate": 1.1639874616689832e-05, "loss": 0.7393, "step": 10413 }, { "epoch": 1.392618347151645, "grad_norm": 1.193926215171814, "learning_rate": 1.1638450397810859e-05, "loss": 0.7624, "step": 10414 }, { "epoch": 1.3927520727467237, "grad_norm": 1.2264595031738281, "learning_rate": 1.1637026144779955e-05, "loss": 0.7088, "step": 10415 }, { "epoch": 1.3928857983418026, "grad_norm": 1.0404021739959717, "learning_rate": 1.1635601857626806e-05, "loss": 0.687, "step": 10416 }, { "epoch": 1.3930195239368814, "grad_norm": 1.0510411262512207, "learning_rate": 1.16341775363811e-05, "loss": 0.6787, "step": 10417 }, { "epoch": 1.3931532495319603, "grad_norm": 1.2562861442565918, "learning_rate": 1.163275318107253e-05, "loss": 0.7888, "step": 10418 }, { "epoch": 1.3932869751270394, "grad_norm": 1.365065574645996, "learning_rate": 1.1631328791730781e-05, "loss": 0.6849, "step": 10419 }, { "epoch": 1.3934207007221182, "grad_norm": 1.1708908081054688, "learning_rate": 1.1629904368385545e-05, "loss": 0.6181, "step": 10420 }, { "epoch": 1.393554426317197, "grad_norm": 1.1322797536849976, "learning_rate": 1.162847991106651e-05, "loss": 0.6669, "step": 10421 }, { "epoch": 1.3936881519122761, "grad_norm": 1.2137596607208252, "learning_rate": 1.1627055419803372e-05, "loss": 0.6936, "step": 10422 }, { "epoch": 1.393821877507355, "grad_norm": 1.450652837753296, "learning_rate": 1.1625630894625819e-05, "loss": 0.8471, "step": 10423 }, { "epoch": 1.3939556031024338, "grad_norm": 1.2751837968826294, "learning_rate": 1.1624206335563547e-05, "loss": 0.7039, "step": 10424 }, { "epoch": 1.3940893286975127, "grad_norm": 1.1801493167877197, "learning_rate": 1.1622781742646248e-05, "loss": 0.7327, "step": 10425 }, { "epoch": 1.3942230542925915, "grad_norm": 1.1296132802963257, "learning_rate": 1.1621357115903615e-05, "loss": 0.7745, "step": 10426 }, { "epoch": 1.3943567798876706, "grad_norm": 1.184929370880127, "learning_rate": 1.1619932455365346e-05, "loss": 0.7566, "step": 10427 }, { "epoch": 1.3944905054827494, "grad_norm": 1.3677117824554443, "learning_rate": 1.1618507761061136e-05, "loss": 0.7303, "step": 10428 }, { "epoch": 1.3946242310778283, "grad_norm": 1.2666159868240356, "learning_rate": 1.1617083033020678e-05, "loss": 0.7569, "step": 10429 }, { "epoch": 1.3947579566729071, "grad_norm": 1.1321218013763428, "learning_rate": 1.1615658271273668e-05, "loss": 0.7069, "step": 10430 }, { "epoch": 1.394891682267986, "grad_norm": 1.1485258340835571, "learning_rate": 1.1614233475849815e-05, "loss": 0.6681, "step": 10431 }, { "epoch": 1.395025407863065, "grad_norm": 1.227471113204956, "learning_rate": 1.1612808646778806e-05, "loss": 0.7367, "step": 10432 }, { "epoch": 1.395159133458144, "grad_norm": 1.1490963697433472, "learning_rate": 1.1611383784090344e-05, "loss": 0.6271, "step": 10433 }, { "epoch": 1.3952928590532228, "grad_norm": 1.0161354541778564, "learning_rate": 1.160995888781413e-05, "loss": 0.6329, "step": 10434 }, { "epoch": 1.3954265846483016, "grad_norm": 1.1661683320999146, "learning_rate": 1.1608533957979867e-05, "loss": 0.7235, "step": 10435 }, { "epoch": 1.3955603102433805, "grad_norm": 1.1211094856262207, "learning_rate": 1.1607108994617245e-05, "loss": 0.7422, "step": 10436 }, { "epoch": 1.3956940358384595, "grad_norm": 1.2231959104537964, "learning_rate": 1.1605683997755977e-05, "loss": 0.712, "step": 10437 }, { "epoch": 1.3958277614335384, "grad_norm": 1.1116641759872437, "learning_rate": 1.1604258967425764e-05, "loss": 0.7079, "step": 10438 }, { "epoch": 1.3959614870286172, "grad_norm": 1.303560733795166, "learning_rate": 1.1602833903656309e-05, "loss": 0.7265, "step": 10439 }, { "epoch": 1.3960952126236963, "grad_norm": 1.1787686347961426, "learning_rate": 1.1601408806477312e-05, "loss": 0.7229, "step": 10440 }, { "epoch": 1.3962289382187751, "grad_norm": 1.2804287672042847, "learning_rate": 1.1599983675918483e-05, "loss": 0.7649, "step": 10441 }, { "epoch": 1.396362663813854, "grad_norm": 1.0167394876480103, "learning_rate": 1.1598558512009524e-05, "loss": 0.6726, "step": 10442 }, { "epoch": 1.3964963894089328, "grad_norm": 1.196326732635498, "learning_rate": 1.1597133314780142e-05, "loss": 0.7198, "step": 10443 }, { "epoch": 1.3966301150040117, "grad_norm": 1.1013567447662354, "learning_rate": 1.1595708084260044e-05, "loss": 0.6787, "step": 10444 }, { "epoch": 1.3967638405990908, "grad_norm": 1.0910524129867554, "learning_rate": 1.1594282820478941e-05, "loss": 0.648, "step": 10445 }, { "epoch": 1.3968975661941696, "grad_norm": 1.166200041770935, "learning_rate": 1.1592857523466537e-05, "loss": 0.6959, "step": 10446 }, { "epoch": 1.3970312917892485, "grad_norm": 1.1874009370803833, "learning_rate": 1.1591432193252544e-05, "loss": 0.6, "step": 10447 }, { "epoch": 1.3971650173843273, "grad_norm": 1.1876559257507324, "learning_rate": 1.1590006829866665e-05, "loss": 0.7398, "step": 10448 }, { "epoch": 1.3972987429794061, "grad_norm": 1.2209651470184326, "learning_rate": 1.1588581433338614e-05, "loss": 0.6535, "step": 10449 }, { "epoch": 1.3974324685744852, "grad_norm": 1.2398382425308228, "learning_rate": 1.1587156003698108e-05, "loss": 0.7661, "step": 10450 }, { "epoch": 1.397566194169564, "grad_norm": 1.1994364261627197, "learning_rate": 1.1585730540974851e-05, "loss": 0.6363, "step": 10451 }, { "epoch": 1.397699919764643, "grad_norm": 1.2190515995025635, "learning_rate": 1.1584305045198563e-05, "loss": 0.7149, "step": 10452 }, { "epoch": 1.397833645359722, "grad_norm": 1.1928738355636597, "learning_rate": 1.1582879516398949e-05, "loss": 0.5812, "step": 10453 }, { "epoch": 1.3979673709548006, "grad_norm": 1.3220523595809937, "learning_rate": 1.1581453954605724e-05, "loss": 0.7372, "step": 10454 }, { "epoch": 1.3981010965498797, "grad_norm": 1.0939383506774902, "learning_rate": 1.1580028359848608e-05, "loss": 0.6771, "step": 10455 }, { "epoch": 1.3982348221449585, "grad_norm": 1.2797682285308838, "learning_rate": 1.1578602732157309e-05, "loss": 0.7647, "step": 10456 }, { "epoch": 1.3983685477400374, "grad_norm": 1.193174958229065, "learning_rate": 1.157717707156155e-05, "loss": 0.6134, "step": 10457 }, { "epoch": 1.3985022733351165, "grad_norm": 1.2477015256881714, "learning_rate": 1.1575751378091043e-05, "loss": 0.7773, "step": 10458 }, { "epoch": 1.3986359989301953, "grad_norm": 1.2169758081436157, "learning_rate": 1.1574325651775507e-05, "loss": 0.6842, "step": 10459 }, { "epoch": 1.3987697245252741, "grad_norm": 1.237100601196289, "learning_rate": 1.157289989264466e-05, "loss": 0.7388, "step": 10460 }, { "epoch": 1.398903450120353, "grad_norm": 1.3609181642532349, "learning_rate": 1.1571474100728218e-05, "loss": 0.8051, "step": 10461 }, { "epoch": 1.3990371757154318, "grad_norm": 1.2711882591247559, "learning_rate": 1.15700482760559e-05, "loss": 0.7485, "step": 10462 }, { "epoch": 1.399170901310511, "grad_norm": 1.261265754699707, "learning_rate": 1.156862241865743e-05, "loss": 0.6726, "step": 10463 }, { "epoch": 1.3993046269055898, "grad_norm": 1.21962571144104, "learning_rate": 1.1567196528562529e-05, "loss": 0.7001, "step": 10464 }, { "epoch": 1.3994383525006686, "grad_norm": 1.1329017877578735, "learning_rate": 1.1565770605800915e-05, "loss": 0.7008, "step": 10465 }, { "epoch": 1.3995720780957475, "grad_norm": 1.1051579713821411, "learning_rate": 1.156434465040231e-05, "loss": 0.7413, "step": 10466 }, { "epoch": 1.3997058036908263, "grad_norm": 1.1910037994384766, "learning_rate": 1.1562918662396438e-05, "loss": 0.667, "step": 10467 }, { "epoch": 1.3998395292859054, "grad_norm": 1.1898396015167236, "learning_rate": 1.1561492641813021e-05, "loss": 0.7743, "step": 10468 }, { "epoch": 1.3999732548809842, "grad_norm": 1.1269909143447876, "learning_rate": 1.1560066588681786e-05, "loss": 0.6901, "step": 10469 }, { "epoch": 1.400106980476063, "grad_norm": 1.1019412279129028, "learning_rate": 1.1558640503032455e-05, "loss": 0.6791, "step": 10470 }, { "epoch": 1.4002407060711421, "grad_norm": 1.3726661205291748, "learning_rate": 1.1557214384894753e-05, "loss": 0.8373, "step": 10471 }, { "epoch": 1.400374431666221, "grad_norm": 1.335279107093811, "learning_rate": 1.1555788234298411e-05, "loss": 0.7966, "step": 10472 }, { "epoch": 1.4005081572612998, "grad_norm": 1.2123539447784424, "learning_rate": 1.1554362051273149e-05, "loss": 0.7342, "step": 10473 }, { "epoch": 1.4006418828563787, "grad_norm": 1.1895947456359863, "learning_rate": 1.1552935835848697e-05, "loss": 0.6612, "step": 10474 }, { "epoch": 1.4007756084514575, "grad_norm": 1.1981195211410522, "learning_rate": 1.1551509588054783e-05, "loss": 0.6336, "step": 10475 }, { "epoch": 1.4009093340465366, "grad_norm": 1.076019525527954, "learning_rate": 1.1550083307921138e-05, "loss": 0.6427, "step": 10476 }, { "epoch": 1.4010430596416155, "grad_norm": 1.1917961835861206, "learning_rate": 1.154865699547749e-05, "loss": 0.825, "step": 10477 }, { "epoch": 1.4011767852366943, "grad_norm": 1.1850403547286987, "learning_rate": 1.1547230650753569e-05, "loss": 0.8008, "step": 10478 }, { "epoch": 1.4013105108317732, "grad_norm": 1.2097023725509644, "learning_rate": 1.1545804273779104e-05, "loss": 0.6887, "step": 10479 }, { "epoch": 1.401444236426852, "grad_norm": 1.1313683986663818, "learning_rate": 1.1544377864583832e-05, "loss": 0.6989, "step": 10480 }, { "epoch": 1.401577962021931, "grad_norm": 1.1132298707962036, "learning_rate": 1.1542951423197475e-05, "loss": 0.6375, "step": 10481 }, { "epoch": 1.40171168761701, "grad_norm": 1.294676423072815, "learning_rate": 1.1541524949649774e-05, "loss": 0.6912, "step": 10482 }, { "epoch": 1.4018454132120888, "grad_norm": 1.3265748023986816, "learning_rate": 1.1540098443970462e-05, "loss": 0.7701, "step": 10483 }, { "epoch": 1.4019791388071676, "grad_norm": 1.0388612747192383, "learning_rate": 1.1538671906189272e-05, "loss": 0.6719, "step": 10484 }, { "epoch": 1.4021128644022465, "grad_norm": 1.2976186275482178, "learning_rate": 1.1537245336335938e-05, "loss": 0.7094, "step": 10485 }, { "epoch": 1.4022465899973255, "grad_norm": 1.105157494544983, "learning_rate": 1.1535818734440196e-05, "loss": 0.6894, "step": 10486 }, { "epoch": 1.4023803155924044, "grad_norm": 1.1709946393966675, "learning_rate": 1.1534392100531781e-05, "loss": 0.645, "step": 10487 }, { "epoch": 1.4025140411874832, "grad_norm": 1.2792648077011108, "learning_rate": 1.153296543464043e-05, "loss": 0.6916, "step": 10488 }, { "epoch": 1.4026477667825623, "grad_norm": 1.194143533706665, "learning_rate": 1.1531538736795884e-05, "loss": 0.6743, "step": 10489 }, { "epoch": 1.4027814923776412, "grad_norm": 1.1946803331375122, "learning_rate": 1.1530112007027878e-05, "loss": 0.6959, "step": 10490 }, { "epoch": 1.40291521797272, "grad_norm": 1.1878280639648438, "learning_rate": 1.1528685245366149e-05, "loss": 0.6864, "step": 10491 }, { "epoch": 1.4030489435677989, "grad_norm": 1.1840901374816895, "learning_rate": 1.1527258451840445e-05, "loss": 0.74, "step": 10492 }, { "epoch": 1.4031826691628777, "grad_norm": 1.1465567350387573, "learning_rate": 1.1525831626480495e-05, "loss": 0.695, "step": 10493 }, { "epoch": 1.4033163947579568, "grad_norm": 1.2945810556411743, "learning_rate": 1.1524404769316042e-05, "loss": 0.7687, "step": 10494 }, { "epoch": 1.4034501203530356, "grad_norm": 1.217054843902588, "learning_rate": 1.1522977880376836e-05, "loss": 0.7259, "step": 10495 }, { "epoch": 1.4035838459481145, "grad_norm": 1.1944928169250488, "learning_rate": 1.1521550959692612e-05, "loss": 0.7274, "step": 10496 }, { "epoch": 1.4037175715431933, "grad_norm": 1.1552131175994873, "learning_rate": 1.1520124007293114e-05, "loss": 0.6221, "step": 10497 }, { "epoch": 1.4038512971382722, "grad_norm": 1.2658562660217285, "learning_rate": 1.1518697023208085e-05, "loss": 0.7214, "step": 10498 }, { "epoch": 1.4039850227333512, "grad_norm": 1.32713782787323, "learning_rate": 1.151727000746727e-05, "loss": 0.76, "step": 10499 }, { "epoch": 1.40411874832843, "grad_norm": 1.1041321754455566, "learning_rate": 1.1515842960100411e-05, "loss": 0.6276, "step": 10500 }, { "epoch": 1.404252473923509, "grad_norm": 1.2578433752059937, "learning_rate": 1.151441588113726e-05, "loss": 0.7332, "step": 10501 }, { "epoch": 1.4043861995185878, "grad_norm": 1.4034364223480225, "learning_rate": 1.1512988770607558e-05, "loss": 0.6808, "step": 10502 }, { "epoch": 1.4045199251136666, "grad_norm": 1.2231868505477905, "learning_rate": 1.1511561628541053e-05, "loss": 0.6867, "step": 10503 }, { "epoch": 1.4046536507087457, "grad_norm": 1.2082515954971313, "learning_rate": 1.1510134454967493e-05, "loss": 0.657, "step": 10504 }, { "epoch": 1.4047873763038246, "grad_norm": 1.1602566242218018, "learning_rate": 1.1508707249916623e-05, "loss": 0.687, "step": 10505 }, { "epoch": 1.4049211018989034, "grad_norm": 1.0312881469726562, "learning_rate": 1.1507280013418196e-05, "loss": 0.6219, "step": 10506 }, { "epoch": 1.4050548274939825, "grad_norm": 1.1029127836227417, "learning_rate": 1.1505852745501957e-05, "loss": 0.6178, "step": 10507 }, { "epoch": 1.4051885530890613, "grad_norm": 1.090996503829956, "learning_rate": 1.150442544619766e-05, "loss": 0.6061, "step": 10508 }, { "epoch": 1.4053222786841402, "grad_norm": 1.250545620918274, "learning_rate": 1.1502998115535053e-05, "loss": 0.7227, "step": 10509 }, { "epoch": 1.405456004279219, "grad_norm": 1.2194857597351074, "learning_rate": 1.1501570753543891e-05, "loss": 0.6279, "step": 10510 }, { "epoch": 1.4055897298742979, "grad_norm": 1.1679712533950806, "learning_rate": 1.1500143360253922e-05, "loss": 0.6902, "step": 10511 }, { "epoch": 1.405723455469377, "grad_norm": 1.0470558404922485, "learning_rate": 1.1498715935694901e-05, "loss": 0.6571, "step": 10512 }, { "epoch": 1.4058571810644558, "grad_norm": 1.1473331451416016, "learning_rate": 1.1497288479896577e-05, "loss": 0.6788, "step": 10513 }, { "epoch": 1.4059909066595346, "grad_norm": 1.370267391204834, "learning_rate": 1.1495860992888712e-05, "loss": 0.7734, "step": 10514 }, { "epoch": 1.4061246322546135, "grad_norm": 1.181649088859558, "learning_rate": 1.1494433474701055e-05, "loss": 0.7163, "step": 10515 }, { "epoch": 1.4062583578496923, "grad_norm": 1.3100179433822632, "learning_rate": 1.1493005925363361e-05, "loss": 0.6447, "step": 10516 }, { "epoch": 1.4063920834447714, "grad_norm": 1.1937938928604126, "learning_rate": 1.1491578344905387e-05, "loss": 0.6436, "step": 10517 }, { "epoch": 1.4065258090398502, "grad_norm": 1.1605963706970215, "learning_rate": 1.1490150733356891e-05, "loss": 0.6766, "step": 10518 }, { "epoch": 1.406659534634929, "grad_norm": 1.4393471479415894, "learning_rate": 1.1488723090747627e-05, "loss": 0.7956, "step": 10519 }, { "epoch": 1.406793260230008, "grad_norm": 1.0958194732666016, "learning_rate": 1.1487295417107355e-05, "loss": 0.6176, "step": 10520 }, { "epoch": 1.4069269858250868, "grad_norm": 1.2444887161254883, "learning_rate": 1.1485867712465835e-05, "loss": 0.7007, "step": 10521 }, { "epoch": 1.4070607114201659, "grad_norm": 1.1932224035263062, "learning_rate": 1.1484439976852823e-05, "loss": 0.6972, "step": 10522 }, { "epoch": 1.4071944370152447, "grad_norm": 1.0152866840362549, "learning_rate": 1.1483012210298082e-05, "loss": 0.6603, "step": 10523 }, { "epoch": 1.4073281626103236, "grad_norm": 1.1710230112075806, "learning_rate": 1.148158441283137e-05, "loss": 0.687, "step": 10524 }, { "epoch": 1.4074618882054026, "grad_norm": 1.258752465248108, "learning_rate": 1.1480156584482448e-05, "loss": 0.6765, "step": 10525 }, { "epoch": 1.4075956138004815, "grad_norm": 1.1693685054779053, "learning_rate": 1.1478728725281074e-05, "loss": 0.6724, "step": 10526 }, { "epoch": 1.4077293393955603, "grad_norm": 1.113629937171936, "learning_rate": 1.1477300835257019e-05, "loss": 0.689, "step": 10527 }, { "epoch": 1.4078630649906392, "grad_norm": 1.1784099340438843, "learning_rate": 1.1475872914440042e-05, "loss": 0.6146, "step": 10528 }, { "epoch": 1.407996790585718, "grad_norm": 1.1649372577667236, "learning_rate": 1.1474444962859907e-05, "loss": 0.6692, "step": 10529 }, { "epoch": 1.408130516180797, "grad_norm": 1.1371971368789673, "learning_rate": 1.1473016980546377e-05, "loss": 0.7042, "step": 10530 }, { "epoch": 1.408264241775876, "grad_norm": 1.0474406480789185, "learning_rate": 1.1471588967529218e-05, "loss": 0.672, "step": 10531 }, { "epoch": 1.4083979673709548, "grad_norm": 1.2140933275222778, "learning_rate": 1.1470160923838191e-05, "loss": 0.714, "step": 10532 }, { "epoch": 1.4085316929660336, "grad_norm": 1.1206984519958496, "learning_rate": 1.146873284950307e-05, "loss": 0.6531, "step": 10533 }, { "epoch": 1.4086654185611125, "grad_norm": 1.125379204750061, "learning_rate": 1.1467304744553618e-05, "loss": 0.6517, "step": 10534 }, { "epoch": 1.4087991441561916, "grad_norm": 1.1715943813323975, "learning_rate": 1.1465876609019602e-05, "loss": 0.7375, "step": 10535 }, { "epoch": 1.4089328697512704, "grad_norm": 1.229430913925171, "learning_rate": 1.1464448442930792e-05, "loss": 0.6702, "step": 10536 }, { "epoch": 1.4090665953463493, "grad_norm": 1.1022083759307861, "learning_rate": 1.1463020246316956e-05, "loss": 0.6732, "step": 10537 }, { "epoch": 1.409200320941428, "grad_norm": 1.07551908493042, "learning_rate": 1.1461592019207862e-05, "loss": 0.6264, "step": 10538 }, { "epoch": 1.409334046536507, "grad_norm": 1.332484483718872, "learning_rate": 1.1460163761633281e-05, "loss": 0.7845, "step": 10539 }, { "epoch": 1.409467772131586, "grad_norm": 1.2024420499801636, "learning_rate": 1.1458735473622979e-05, "loss": 0.7154, "step": 10540 }, { "epoch": 1.4096014977266649, "grad_norm": 1.18008291721344, "learning_rate": 1.1457307155206738e-05, "loss": 0.6946, "step": 10541 }, { "epoch": 1.4097352233217437, "grad_norm": 1.2743057012557983, "learning_rate": 1.1455878806414322e-05, "loss": 0.6598, "step": 10542 }, { "epoch": 1.4098689489168228, "grad_norm": 1.2505279779434204, "learning_rate": 1.1454450427275506e-05, "loss": 0.7111, "step": 10543 }, { "epoch": 1.4100026745119016, "grad_norm": 1.1035597324371338, "learning_rate": 1.1453022017820061e-05, "loss": 0.7124, "step": 10544 }, { "epoch": 1.4101364001069805, "grad_norm": 1.082471251487732, "learning_rate": 1.1451593578077764e-05, "loss": 0.674, "step": 10545 }, { "epoch": 1.4102701257020593, "grad_norm": 1.3133602142333984, "learning_rate": 1.1450165108078385e-05, "loss": 0.7018, "step": 10546 }, { "epoch": 1.4104038512971382, "grad_norm": 1.2281855344772339, "learning_rate": 1.1448736607851705e-05, "loss": 0.7349, "step": 10547 }, { "epoch": 1.4105375768922173, "grad_norm": 1.1657018661499023, "learning_rate": 1.1447308077427497e-05, "loss": 0.7649, "step": 10548 }, { "epoch": 1.410671302487296, "grad_norm": 1.1242061853408813, "learning_rate": 1.1445879516835536e-05, "loss": 0.6371, "step": 10549 }, { "epoch": 1.410805028082375, "grad_norm": 1.2457032203674316, "learning_rate": 1.14444509261056e-05, "loss": 0.8151, "step": 10550 }, { "epoch": 1.4109387536774538, "grad_norm": 1.1794532537460327, "learning_rate": 1.1443022305267468e-05, "loss": 0.6701, "step": 10551 }, { "epoch": 1.4110724792725327, "grad_norm": 1.2247318029403687, "learning_rate": 1.1441593654350914e-05, "loss": 0.7785, "step": 10552 }, { "epoch": 1.4112062048676117, "grad_norm": 1.2065447568893433, "learning_rate": 1.1440164973385722e-05, "loss": 0.6168, "step": 10553 }, { "epoch": 1.4113399304626906, "grad_norm": 1.1936233043670654, "learning_rate": 1.1438736262401669e-05, "loss": 0.6999, "step": 10554 }, { "epoch": 1.4114736560577694, "grad_norm": 1.0986779928207397, "learning_rate": 1.1437307521428533e-05, "loss": 0.7568, "step": 10555 }, { "epoch": 1.4116073816528485, "grad_norm": 1.2485164403915405, "learning_rate": 1.1435878750496099e-05, "loss": 0.7522, "step": 10556 }, { "epoch": 1.4117411072479271, "grad_norm": 1.1732702255249023, "learning_rate": 1.1434449949634147e-05, "loss": 0.7179, "step": 10557 }, { "epoch": 1.4118748328430062, "grad_norm": 1.1857529878616333, "learning_rate": 1.1433021118872458e-05, "loss": 0.7342, "step": 10558 }, { "epoch": 1.412008558438085, "grad_norm": 1.1212129592895508, "learning_rate": 1.1431592258240814e-05, "loss": 0.7086, "step": 10559 }, { "epoch": 1.4121422840331639, "grad_norm": 1.4557336568832397, "learning_rate": 1.1430163367768998e-05, "loss": 0.7516, "step": 10560 }, { "epoch": 1.412276009628243, "grad_norm": 1.1804993152618408, "learning_rate": 1.14287344474868e-05, "loss": 0.664, "step": 10561 }, { "epoch": 1.4124097352233218, "grad_norm": 1.3473013639450073, "learning_rate": 1.1427305497423995e-05, "loss": 0.815, "step": 10562 }, { "epoch": 1.4125434608184007, "grad_norm": 1.0304821729660034, "learning_rate": 1.1425876517610375e-05, "loss": 0.6309, "step": 10563 }, { "epoch": 1.4126771864134795, "grad_norm": 1.1462628841400146, "learning_rate": 1.1424447508075722e-05, "loss": 0.6792, "step": 10564 }, { "epoch": 1.4128109120085584, "grad_norm": 1.1305737495422363, "learning_rate": 1.1423018468849824e-05, "loss": 0.6808, "step": 10565 }, { "epoch": 1.4129446376036374, "grad_norm": 1.261242389678955, "learning_rate": 1.142158939996247e-05, "loss": 0.8142, "step": 10566 }, { "epoch": 1.4130783631987163, "grad_norm": 1.2355860471725464, "learning_rate": 1.1420160301443444e-05, "loss": 0.7008, "step": 10567 }, { "epoch": 1.4132120887937951, "grad_norm": 1.073434829711914, "learning_rate": 1.1418731173322532e-05, "loss": 0.6619, "step": 10568 }, { "epoch": 1.413345814388874, "grad_norm": 1.24872887134552, "learning_rate": 1.1417302015629532e-05, "loss": 0.7137, "step": 10569 }, { "epoch": 1.4134795399839528, "grad_norm": 1.3223756551742554, "learning_rate": 1.1415872828394225e-05, "loss": 0.7591, "step": 10570 }, { "epoch": 1.4136132655790319, "grad_norm": 1.17685067653656, "learning_rate": 1.1414443611646404e-05, "loss": 0.7014, "step": 10571 }, { "epoch": 1.4137469911741107, "grad_norm": 1.1834352016448975, "learning_rate": 1.1413014365415855e-05, "loss": 0.7739, "step": 10572 }, { "epoch": 1.4138807167691896, "grad_norm": 1.2675681114196777, "learning_rate": 1.1411585089732382e-05, "loss": 0.7575, "step": 10573 }, { "epoch": 1.4140144423642687, "grad_norm": 1.1324913501739502, "learning_rate": 1.1410155784625762e-05, "loss": 0.6689, "step": 10574 }, { "epoch": 1.4141481679593475, "grad_norm": 1.1453560590744019, "learning_rate": 1.1408726450125798e-05, "loss": 0.6617, "step": 10575 }, { "epoch": 1.4142818935544264, "grad_norm": 1.3069463968276978, "learning_rate": 1.1407297086262276e-05, "loss": 0.7695, "step": 10576 }, { "epoch": 1.4144156191495052, "grad_norm": 1.2291260957717896, "learning_rate": 1.1405867693064994e-05, "loss": 0.72, "step": 10577 }, { "epoch": 1.414549344744584, "grad_norm": 1.1886711120605469, "learning_rate": 1.1404438270563744e-05, "loss": 0.665, "step": 10578 }, { "epoch": 1.4146830703396631, "grad_norm": 1.3096486330032349, "learning_rate": 1.1403008818788326e-05, "loss": 0.743, "step": 10579 }, { "epoch": 1.414816795934742, "grad_norm": 1.1347885131835938, "learning_rate": 1.1401579337768528e-05, "loss": 0.7226, "step": 10580 }, { "epoch": 1.4149505215298208, "grad_norm": 1.2470582723617554, "learning_rate": 1.1400149827534154e-05, "loss": 0.6237, "step": 10581 }, { "epoch": 1.4150842471248997, "grad_norm": 1.2683314085006714, "learning_rate": 1.1398720288114992e-05, "loss": 0.6454, "step": 10582 }, { "epoch": 1.4152179727199785, "grad_norm": 1.332045078277588, "learning_rate": 1.1397290719540848e-05, "loss": 0.7271, "step": 10583 }, { "epoch": 1.4153516983150576, "grad_norm": 1.2583454847335815, "learning_rate": 1.1395861121841514e-05, "loss": 0.7871, "step": 10584 }, { "epoch": 1.4154854239101364, "grad_norm": 1.2215043306350708, "learning_rate": 1.1394431495046789e-05, "loss": 0.7032, "step": 10585 }, { "epoch": 1.4156191495052153, "grad_norm": 1.1740665435791016, "learning_rate": 1.1393001839186475e-05, "loss": 0.7392, "step": 10586 }, { "epoch": 1.4157528751002941, "grad_norm": 1.2061184644699097, "learning_rate": 1.1391572154290371e-05, "loss": 0.7668, "step": 10587 }, { "epoch": 1.415886600695373, "grad_norm": 1.1267472505569458, "learning_rate": 1.1390142440388277e-05, "loss": 0.656, "step": 10588 }, { "epoch": 1.416020326290452, "grad_norm": 1.1811354160308838, "learning_rate": 1.1388712697509997e-05, "loss": 0.7215, "step": 10589 }, { "epoch": 1.416154051885531, "grad_norm": 1.2539303302764893, "learning_rate": 1.1387282925685326e-05, "loss": 0.7037, "step": 10590 }, { "epoch": 1.4162877774806097, "grad_norm": 1.3207405805587769, "learning_rate": 1.1385853124944069e-05, "loss": 0.798, "step": 10591 }, { "epoch": 1.4164215030756888, "grad_norm": 1.113406777381897, "learning_rate": 1.138442329531603e-05, "loss": 0.6604, "step": 10592 }, { "epoch": 1.4165552286707677, "grad_norm": 1.1999109983444214, "learning_rate": 1.1382993436831015e-05, "loss": 0.74, "step": 10593 }, { "epoch": 1.4166889542658465, "grad_norm": 1.1537039279937744, "learning_rate": 1.1381563549518823e-05, "loss": 0.7552, "step": 10594 }, { "epoch": 1.4168226798609254, "grad_norm": 1.0739426612854004, "learning_rate": 1.1380133633409263e-05, "loss": 0.7614, "step": 10595 }, { "epoch": 1.4169564054560042, "grad_norm": 1.1287152767181396, "learning_rate": 1.1378703688532136e-05, "loss": 0.6624, "step": 10596 }, { "epoch": 1.4170901310510833, "grad_norm": 0.9782724380493164, "learning_rate": 1.1377273714917249e-05, "loss": 0.59, "step": 10597 }, { "epoch": 1.4172238566461621, "grad_norm": 1.2062641382217407, "learning_rate": 1.1375843712594412e-05, "loss": 0.722, "step": 10598 }, { "epoch": 1.417357582241241, "grad_norm": 1.1900321245193481, "learning_rate": 1.1374413681593428e-05, "loss": 0.6641, "step": 10599 }, { "epoch": 1.4174913078363198, "grad_norm": 1.223484992980957, "learning_rate": 1.1372983621944105e-05, "loss": 0.7572, "step": 10600 }, { "epoch": 1.4176250334313987, "grad_norm": 1.2040348052978516, "learning_rate": 1.1371553533676255e-05, "loss": 0.7681, "step": 10601 }, { "epoch": 1.4177587590264777, "grad_norm": 1.2161526679992676, "learning_rate": 1.1370123416819683e-05, "loss": 0.6975, "step": 10602 }, { "epoch": 1.4178924846215566, "grad_norm": 1.2322206497192383, "learning_rate": 1.1368693271404199e-05, "loss": 0.7875, "step": 10603 }, { "epoch": 1.4180262102166354, "grad_norm": 1.1602782011032104, "learning_rate": 1.1367263097459612e-05, "loss": 0.7053, "step": 10604 }, { "epoch": 1.4181599358117143, "grad_norm": 1.2035529613494873, "learning_rate": 1.1365832895015735e-05, "loss": 0.7326, "step": 10605 }, { "epoch": 1.4182936614067931, "grad_norm": 1.2299703359603882, "learning_rate": 1.1364402664102379e-05, "loss": 0.8074, "step": 10606 }, { "epoch": 1.4184273870018722, "grad_norm": 1.166016697883606, "learning_rate": 1.1362972404749355e-05, "loss": 0.6562, "step": 10607 }, { "epoch": 1.418561112596951, "grad_norm": 1.2602362632751465, "learning_rate": 1.1361542116986474e-05, "loss": 0.7447, "step": 10608 }, { "epoch": 1.41869483819203, "grad_norm": 1.1421501636505127, "learning_rate": 1.1360111800843555e-05, "loss": 0.5786, "step": 10609 }, { "epoch": 1.418828563787109, "grad_norm": 1.200738787651062, "learning_rate": 1.13586814563504e-05, "loss": 0.7147, "step": 10610 }, { "epoch": 1.4189622893821878, "grad_norm": 1.1282882690429688, "learning_rate": 1.1357251083536834e-05, "loss": 0.5874, "step": 10611 }, { "epoch": 1.4190960149772667, "grad_norm": 1.213294506072998, "learning_rate": 1.1355820682432667e-05, "loss": 0.6684, "step": 10612 }, { "epoch": 1.4192297405723455, "grad_norm": 1.3889409303665161, "learning_rate": 1.1354390253067717e-05, "loss": 0.7514, "step": 10613 }, { "epoch": 1.4193634661674244, "grad_norm": 1.1848816871643066, "learning_rate": 1.1352959795471798e-05, "loss": 0.6829, "step": 10614 }, { "epoch": 1.4194971917625034, "grad_norm": 1.3111480474472046, "learning_rate": 1.1351529309674724e-05, "loss": 0.7397, "step": 10615 }, { "epoch": 1.4196309173575823, "grad_norm": 1.2366198301315308, "learning_rate": 1.1350098795706316e-05, "loss": 0.7037, "step": 10616 }, { "epoch": 1.4197646429526611, "grad_norm": 1.172921895980835, "learning_rate": 1.1348668253596394e-05, "loss": 0.7101, "step": 10617 }, { "epoch": 1.41989836854774, "grad_norm": 1.1612149477005005, "learning_rate": 1.1347237683374767e-05, "loss": 0.6365, "step": 10618 }, { "epoch": 1.4200320941428188, "grad_norm": 1.3267265558242798, "learning_rate": 1.1345807085071263e-05, "loss": 0.7582, "step": 10619 }, { "epoch": 1.420165819737898, "grad_norm": 1.2563718557357788, "learning_rate": 1.1344376458715697e-05, "loss": 0.7186, "step": 10620 }, { "epoch": 1.4202995453329768, "grad_norm": 1.2076612710952759, "learning_rate": 1.134294580433789e-05, "loss": 0.7981, "step": 10621 }, { "epoch": 1.4204332709280556, "grad_norm": 1.1642730236053467, "learning_rate": 1.1341515121967666e-05, "loss": 0.7727, "step": 10622 }, { "epoch": 1.4205669965231345, "grad_norm": 1.108965277671814, "learning_rate": 1.1340084411634839e-05, "loss": 0.5688, "step": 10623 }, { "epoch": 1.4207007221182133, "grad_norm": 1.247994065284729, "learning_rate": 1.1338653673369235e-05, "loss": 0.735, "step": 10624 }, { "epoch": 1.4208344477132924, "grad_norm": 1.289122462272644, "learning_rate": 1.1337222907200678e-05, "loss": 0.6724, "step": 10625 }, { "epoch": 1.4209681733083712, "grad_norm": 1.221063256263733, "learning_rate": 1.133579211315899e-05, "loss": 0.7486, "step": 10626 }, { "epoch": 1.42110189890345, "grad_norm": 1.2657474279403687, "learning_rate": 1.1334361291273991e-05, "loss": 0.7797, "step": 10627 }, { "epoch": 1.4212356244985291, "grad_norm": 1.3419336080551147, "learning_rate": 1.1332930441575509e-05, "loss": 0.7377, "step": 10628 }, { "epoch": 1.421369350093608, "grad_norm": 1.0919914245605469, "learning_rate": 1.1331499564093369e-05, "loss": 0.6662, "step": 10629 }, { "epoch": 1.4215030756886868, "grad_norm": 1.4153435230255127, "learning_rate": 1.1330068658857391e-05, "loss": 0.71, "step": 10630 }, { "epoch": 1.4216368012837657, "grad_norm": 1.184780240058899, "learning_rate": 1.1328637725897407e-05, "loss": 0.7355, "step": 10631 }, { "epoch": 1.4217705268788445, "grad_norm": 1.1008286476135254, "learning_rate": 1.132720676524324e-05, "loss": 0.6934, "step": 10632 }, { "epoch": 1.4219042524739236, "grad_norm": 1.0425958633422852, "learning_rate": 1.1325775776924719e-05, "loss": 0.6831, "step": 10633 }, { "epoch": 1.4220379780690025, "grad_norm": 1.1017608642578125, "learning_rate": 1.132434476097167e-05, "loss": 0.6018, "step": 10634 }, { "epoch": 1.4221717036640813, "grad_norm": 1.0533000230789185, "learning_rate": 1.1322913717413923e-05, "loss": 0.6858, "step": 10635 }, { "epoch": 1.4223054292591601, "grad_norm": 1.2206239700317383, "learning_rate": 1.1321482646281301e-05, "loss": 0.6656, "step": 10636 }, { "epoch": 1.422439154854239, "grad_norm": 1.127484917640686, "learning_rate": 1.132005154760364e-05, "loss": 0.6984, "step": 10637 }, { "epoch": 1.422572880449318, "grad_norm": 1.2618334293365479, "learning_rate": 1.1318620421410773e-05, "loss": 0.791, "step": 10638 }, { "epoch": 1.422706606044397, "grad_norm": 1.209006667137146, "learning_rate": 1.131718926773252e-05, "loss": 0.6387, "step": 10639 }, { "epoch": 1.4228403316394758, "grad_norm": 1.3916287422180176, "learning_rate": 1.1315758086598717e-05, "loss": 0.826, "step": 10640 }, { "epoch": 1.4229740572345546, "grad_norm": 1.3079981803894043, "learning_rate": 1.1314326878039197e-05, "loss": 0.7687, "step": 10641 }, { "epoch": 1.4231077828296335, "grad_norm": 1.3276675939559937, "learning_rate": 1.1312895642083789e-05, "loss": 0.7113, "step": 10642 }, { "epoch": 1.4232415084247125, "grad_norm": 1.2755855321884155, "learning_rate": 1.1311464378762329e-05, "loss": 0.6985, "step": 10643 }, { "epoch": 1.4233752340197914, "grad_norm": 1.199749231338501, "learning_rate": 1.1310033088104649e-05, "loss": 0.7106, "step": 10644 }, { "epoch": 1.4235089596148702, "grad_norm": 1.5804774761199951, "learning_rate": 1.1308601770140584e-05, "loss": 0.7697, "step": 10645 }, { "epoch": 1.4236426852099493, "grad_norm": 1.0354292392730713, "learning_rate": 1.1307170424899967e-05, "loss": 0.6212, "step": 10646 }, { "epoch": 1.4237764108050281, "grad_norm": 1.1773607730865479, "learning_rate": 1.1305739052412633e-05, "loss": 0.7717, "step": 10647 }, { "epoch": 1.423910136400107, "grad_norm": 1.0961626768112183, "learning_rate": 1.1304307652708417e-05, "loss": 0.6524, "step": 10648 }, { "epoch": 1.4240438619951858, "grad_norm": 1.0505746603012085, "learning_rate": 1.1302876225817155e-05, "loss": 0.6265, "step": 10649 }, { "epoch": 1.4241775875902647, "grad_norm": 1.3203015327453613, "learning_rate": 1.1301444771768686e-05, "loss": 0.8076, "step": 10650 }, { "epoch": 1.4243113131853438, "grad_norm": 1.0896475315093994, "learning_rate": 1.1300013290592846e-05, "loss": 0.6649, "step": 10651 }, { "epoch": 1.4244450387804226, "grad_norm": 1.1877771615982056, "learning_rate": 1.1298581782319473e-05, "loss": 0.729, "step": 10652 }, { "epoch": 1.4245787643755015, "grad_norm": 1.1570836305618286, "learning_rate": 1.1297150246978406e-05, "loss": 0.7031, "step": 10653 }, { "epoch": 1.4247124899705803, "grad_norm": 1.1245529651641846, "learning_rate": 1.1295718684599486e-05, "loss": 0.7046, "step": 10654 }, { "epoch": 1.4248462155656592, "grad_norm": 1.210593581199646, "learning_rate": 1.1294287095212543e-05, "loss": 0.7298, "step": 10655 }, { "epoch": 1.4249799411607382, "grad_norm": 1.226955771446228, "learning_rate": 1.1292855478847429e-05, "loss": 0.7241, "step": 10656 }, { "epoch": 1.425113666755817, "grad_norm": 1.2139356136322021, "learning_rate": 1.1291423835533977e-05, "loss": 0.699, "step": 10657 }, { "epoch": 1.425247392350896, "grad_norm": 1.1558961868286133, "learning_rate": 1.1289992165302036e-05, "loss": 0.6864, "step": 10658 }, { "epoch": 1.425381117945975, "grad_norm": 1.1971807479858398, "learning_rate": 1.1288560468181437e-05, "loss": 0.6994, "step": 10659 }, { "epoch": 1.4255148435410536, "grad_norm": 1.1828196048736572, "learning_rate": 1.1287128744202032e-05, "loss": 0.7557, "step": 10660 }, { "epoch": 1.4256485691361327, "grad_norm": 1.2036288976669312, "learning_rate": 1.1285696993393658e-05, "loss": 0.7398, "step": 10661 }, { "epoch": 1.4257822947312115, "grad_norm": 1.0965896844863892, "learning_rate": 1.1284265215786159e-05, "loss": 0.7035, "step": 10662 }, { "epoch": 1.4259160203262904, "grad_norm": 1.1354644298553467, "learning_rate": 1.1282833411409381e-05, "loss": 0.676, "step": 10663 }, { "epoch": 1.4260497459213695, "grad_norm": 1.2472926378250122, "learning_rate": 1.128140158029317e-05, "loss": 0.8148, "step": 10664 }, { "epoch": 1.4261834715164483, "grad_norm": 1.1744377613067627, "learning_rate": 1.1279969722467368e-05, "loss": 0.6758, "step": 10665 }, { "epoch": 1.4263171971115272, "grad_norm": 1.1020431518554688, "learning_rate": 1.1278537837961824e-05, "loss": 0.7386, "step": 10666 }, { "epoch": 1.426450922706606, "grad_norm": 1.129422903060913, "learning_rate": 1.127710592680638e-05, "loss": 0.6183, "step": 10667 }, { "epoch": 1.4265846483016849, "grad_norm": 1.3803656101226807, "learning_rate": 1.1275673989030884e-05, "loss": 0.759, "step": 10668 }, { "epoch": 1.426718373896764, "grad_norm": 1.0879089832305908, "learning_rate": 1.1274242024665186e-05, "loss": 0.7261, "step": 10669 }, { "epoch": 1.4268520994918428, "grad_norm": 1.179068684577942, "learning_rate": 1.1272810033739134e-05, "loss": 0.7476, "step": 10670 }, { "epoch": 1.4269858250869216, "grad_norm": 1.1347124576568604, "learning_rate": 1.1271378016282572e-05, "loss": 0.7125, "step": 10671 }, { "epoch": 1.4271195506820005, "grad_norm": 1.2245893478393555, "learning_rate": 1.1269945972325353e-05, "loss": 0.6176, "step": 10672 }, { "epoch": 1.4272532762770793, "grad_norm": 1.1761186122894287, "learning_rate": 1.1268513901897324e-05, "loss": 0.6791, "step": 10673 }, { "epoch": 1.4273870018721584, "grad_norm": 1.14299476146698, "learning_rate": 1.126708180502834e-05, "loss": 0.7068, "step": 10674 }, { "epoch": 1.4275207274672372, "grad_norm": 1.1090683937072754, "learning_rate": 1.1265649681748245e-05, "loss": 0.6542, "step": 10675 }, { "epoch": 1.427654453062316, "grad_norm": 1.1149985790252686, "learning_rate": 1.1264217532086895e-05, "loss": 0.6656, "step": 10676 }, { "epoch": 1.4277881786573952, "grad_norm": 1.129122257232666, "learning_rate": 1.1262785356074139e-05, "loss": 0.701, "step": 10677 }, { "epoch": 1.427921904252474, "grad_norm": 1.1855891942977905, "learning_rate": 1.1261353153739834e-05, "loss": 0.6461, "step": 10678 }, { "epoch": 1.4280556298475529, "grad_norm": 1.3090617656707764, "learning_rate": 1.1259920925113825e-05, "loss": 0.7406, "step": 10679 }, { "epoch": 1.4281893554426317, "grad_norm": 1.1408298015594482, "learning_rate": 1.1258488670225973e-05, "loss": 0.6944, "step": 10680 }, { "epoch": 1.4283230810377106, "grad_norm": 1.2820547819137573, "learning_rate": 1.1257056389106127e-05, "loss": 0.7444, "step": 10681 }, { "epoch": 1.4284568066327896, "grad_norm": 1.3175113201141357, "learning_rate": 1.1255624081784145e-05, "loss": 0.6906, "step": 10682 }, { "epoch": 1.4285905322278685, "grad_norm": 1.1378902196884155, "learning_rate": 1.1254191748289878e-05, "loss": 0.6695, "step": 10683 }, { "epoch": 1.4287242578229473, "grad_norm": 1.2713255882263184, "learning_rate": 1.1252759388653187e-05, "loss": 0.7539, "step": 10684 }, { "epoch": 1.4288579834180262, "grad_norm": 1.2364473342895508, "learning_rate": 1.1251327002903923e-05, "loss": 0.7071, "step": 10685 }, { "epoch": 1.428991709013105, "grad_norm": 1.288007140159607, "learning_rate": 1.1249894591071948e-05, "loss": 0.7236, "step": 10686 }, { "epoch": 1.429125434608184, "grad_norm": 1.1951751708984375, "learning_rate": 1.1248462153187111e-05, "loss": 0.7051, "step": 10687 }, { "epoch": 1.429259160203263, "grad_norm": 1.2279226779937744, "learning_rate": 1.124702968927928e-05, "loss": 0.7284, "step": 10688 }, { "epoch": 1.4293928857983418, "grad_norm": 1.2034598588943481, "learning_rate": 1.1245597199378306e-05, "loss": 0.6743, "step": 10689 }, { "epoch": 1.4295266113934206, "grad_norm": 1.2431162595748901, "learning_rate": 1.1244164683514055e-05, "loss": 0.7107, "step": 10690 }, { "epoch": 1.4296603369884995, "grad_norm": 1.111336588859558, "learning_rate": 1.1242732141716377e-05, "loss": 0.6559, "step": 10691 }, { "epoch": 1.4297940625835786, "grad_norm": 1.2910313606262207, "learning_rate": 1.1241299574015137e-05, "loss": 0.7089, "step": 10692 }, { "epoch": 1.4299277881786574, "grad_norm": 1.2933851480484009, "learning_rate": 1.1239866980440195e-05, "loss": 0.7564, "step": 10693 }, { "epoch": 1.4300615137737362, "grad_norm": 1.175847053527832, "learning_rate": 1.1238434361021412e-05, "loss": 0.7337, "step": 10694 }, { "epoch": 1.4301952393688153, "grad_norm": 1.2050317525863647, "learning_rate": 1.1237001715788652e-05, "loss": 0.791, "step": 10695 }, { "epoch": 1.4303289649638942, "grad_norm": 1.2653065919876099, "learning_rate": 1.1235569044771773e-05, "loss": 0.7755, "step": 10696 }, { "epoch": 1.430462690558973, "grad_norm": 1.1402374505996704, "learning_rate": 1.1234136348000639e-05, "loss": 0.7662, "step": 10697 }, { "epoch": 1.4305964161540519, "grad_norm": 1.1254568099975586, "learning_rate": 1.1232703625505119e-05, "loss": 0.6337, "step": 10698 }, { "epoch": 1.4307301417491307, "grad_norm": 1.20777428150177, "learning_rate": 1.1231270877315066e-05, "loss": 0.7591, "step": 10699 }, { "epoch": 1.4308638673442098, "grad_norm": 1.1707218885421753, "learning_rate": 1.1229838103460349e-05, "loss": 0.7261, "step": 10700 }, { "epoch": 1.4309975929392886, "grad_norm": 1.0493338108062744, "learning_rate": 1.1228405303970837e-05, "loss": 0.5823, "step": 10701 }, { "epoch": 1.4311313185343675, "grad_norm": 1.1962717771530151, "learning_rate": 1.1226972478876392e-05, "loss": 0.6731, "step": 10702 }, { "epoch": 1.4312650441294463, "grad_norm": 1.2757611274719238, "learning_rate": 1.1225539628206879e-05, "loss": 0.7353, "step": 10703 }, { "epoch": 1.4313987697245252, "grad_norm": 1.3193750381469727, "learning_rate": 1.1224106751992164e-05, "loss": 0.7985, "step": 10704 }, { "epoch": 1.4315324953196042, "grad_norm": 1.161144495010376, "learning_rate": 1.1222673850262116e-05, "loss": 0.7081, "step": 10705 }, { "epoch": 1.431666220914683, "grad_norm": 1.176624059677124, "learning_rate": 1.1221240923046602e-05, "loss": 0.747, "step": 10706 }, { "epoch": 1.431799946509762, "grad_norm": 1.1809161901474, "learning_rate": 1.1219807970375488e-05, "loss": 0.7303, "step": 10707 }, { "epoch": 1.4319336721048408, "grad_norm": 1.1982601881027222, "learning_rate": 1.1218374992278645e-05, "loss": 0.7318, "step": 10708 }, { "epoch": 1.4320673976999196, "grad_norm": 1.183390736579895, "learning_rate": 1.1216941988785939e-05, "loss": 0.6817, "step": 10709 }, { "epoch": 1.4322011232949987, "grad_norm": 1.1115597486495972, "learning_rate": 1.1215508959927243e-05, "loss": 0.7542, "step": 10710 }, { "epoch": 1.4323348488900776, "grad_norm": 1.1997588872909546, "learning_rate": 1.121407590573243e-05, "loss": 0.6991, "step": 10711 }, { "epoch": 1.4324685744851564, "grad_norm": 1.2250940799713135, "learning_rate": 1.1212642826231363e-05, "loss": 0.6766, "step": 10712 }, { "epoch": 1.4326023000802355, "grad_norm": 1.1667989492416382, "learning_rate": 1.1211209721453918e-05, "loss": 0.7062, "step": 10713 }, { "epoch": 1.4327360256753143, "grad_norm": 1.3029757738113403, "learning_rate": 1.120977659142996e-05, "loss": 0.6828, "step": 10714 }, { "epoch": 1.4328697512703932, "grad_norm": 1.182138204574585, "learning_rate": 1.1208343436189372e-05, "loss": 0.6757, "step": 10715 }, { "epoch": 1.433003476865472, "grad_norm": 1.1919487714767456, "learning_rate": 1.120691025576202e-05, "loss": 0.7224, "step": 10716 }, { "epoch": 1.4331372024605509, "grad_norm": 1.2977871894836426, "learning_rate": 1.120547705017778e-05, "loss": 0.8007, "step": 10717 }, { "epoch": 1.43327092805563, "grad_norm": 1.0938711166381836, "learning_rate": 1.1204043819466523e-05, "loss": 0.7301, "step": 10718 }, { "epoch": 1.4334046536507088, "grad_norm": 1.3124492168426514, "learning_rate": 1.1202610563658125e-05, "loss": 0.7276, "step": 10719 }, { "epoch": 1.4335383792457876, "grad_norm": 1.1059037446975708, "learning_rate": 1.120117728278246e-05, "loss": 0.7059, "step": 10720 }, { "epoch": 1.4336721048408665, "grad_norm": 1.2210850715637207, "learning_rate": 1.1199743976869403e-05, "loss": 0.6824, "step": 10721 }, { "epoch": 1.4338058304359453, "grad_norm": 1.2350088357925415, "learning_rate": 1.1198310645948833e-05, "loss": 0.7605, "step": 10722 }, { "epoch": 1.4339395560310244, "grad_norm": 1.1967185735702515, "learning_rate": 1.1196877290050625e-05, "loss": 0.6971, "step": 10723 }, { "epoch": 1.4340732816261033, "grad_norm": 1.116580843925476, "learning_rate": 1.1195443909204653e-05, "loss": 0.6801, "step": 10724 }, { "epoch": 1.434207007221182, "grad_norm": 1.138337254524231, "learning_rate": 1.1194010503440797e-05, "loss": 0.7495, "step": 10725 }, { "epoch": 1.434340732816261, "grad_norm": 1.125596046447754, "learning_rate": 1.1192577072788935e-05, "loss": 0.6909, "step": 10726 }, { "epoch": 1.4344744584113398, "grad_norm": 1.1832541227340698, "learning_rate": 1.1191143617278946e-05, "loss": 0.7311, "step": 10727 }, { "epoch": 1.4346081840064189, "grad_norm": 1.2062422037124634, "learning_rate": 1.1189710136940706e-05, "loss": 0.7213, "step": 10728 }, { "epoch": 1.4347419096014977, "grad_norm": 1.1302896738052368, "learning_rate": 1.1188276631804098e-05, "loss": 0.6836, "step": 10729 }, { "epoch": 1.4348756351965766, "grad_norm": 1.2193200588226318, "learning_rate": 1.1186843101898999e-05, "loss": 0.7241, "step": 10730 }, { "epoch": 1.4350093607916556, "grad_norm": 1.1650595664978027, "learning_rate": 1.1185409547255295e-05, "loss": 0.6676, "step": 10731 }, { "epoch": 1.4351430863867345, "grad_norm": 1.1004188060760498, "learning_rate": 1.118397596790286e-05, "loss": 0.6955, "step": 10732 }, { "epoch": 1.4352768119818133, "grad_norm": 1.0259772539138794, "learning_rate": 1.1182542363871578e-05, "loss": 0.6594, "step": 10733 }, { "epoch": 1.4354105375768922, "grad_norm": 1.1806586980819702, "learning_rate": 1.1181108735191332e-05, "loss": 0.7104, "step": 10734 }, { "epoch": 1.435544263171971, "grad_norm": 1.122730016708374, "learning_rate": 1.117967508189201e-05, "loss": 0.7018, "step": 10735 }, { "epoch": 1.43567798876705, "grad_norm": 1.2305117845535278, "learning_rate": 1.1178241404003485e-05, "loss": 0.748, "step": 10736 }, { "epoch": 1.435811714362129, "grad_norm": 1.1225221157073975, "learning_rate": 1.1176807701555647e-05, "loss": 0.6805, "step": 10737 }, { "epoch": 1.4359454399572078, "grad_norm": 1.1370915174484253, "learning_rate": 1.1175373974578378e-05, "loss": 0.7101, "step": 10738 }, { "epoch": 1.4360791655522867, "grad_norm": 1.371666431427002, "learning_rate": 1.1173940223101562e-05, "loss": 0.7469, "step": 10739 }, { "epoch": 1.4362128911473655, "grad_norm": 1.3087252378463745, "learning_rate": 1.1172506447155088e-05, "loss": 0.7168, "step": 10740 }, { "epoch": 1.4363466167424446, "grad_norm": 1.1941101551055908, "learning_rate": 1.1171072646768836e-05, "loss": 0.7223, "step": 10741 }, { "epoch": 1.4364803423375234, "grad_norm": 1.3124561309814453, "learning_rate": 1.1169638821972698e-05, "loss": 0.7951, "step": 10742 }, { "epoch": 1.4366140679326023, "grad_norm": 1.3282922506332397, "learning_rate": 1.1168204972796559e-05, "loss": 0.7362, "step": 10743 }, { "epoch": 1.4367477935276811, "grad_norm": 1.277417778968811, "learning_rate": 1.1166771099270303e-05, "loss": 0.6825, "step": 10744 }, { "epoch": 1.43688151912276, "grad_norm": 1.3105156421661377, "learning_rate": 1.116533720142382e-05, "loss": 0.7135, "step": 10745 }, { "epoch": 1.437015244717839, "grad_norm": 1.3703376054763794, "learning_rate": 1.1163903279286996e-05, "loss": 0.7709, "step": 10746 }, { "epoch": 1.4371489703129179, "grad_norm": 1.2666243314743042, "learning_rate": 1.1162469332889726e-05, "loss": 0.7257, "step": 10747 }, { "epoch": 1.4372826959079967, "grad_norm": 1.351369857788086, "learning_rate": 1.1161035362261891e-05, "loss": 0.7716, "step": 10748 }, { "epoch": 1.4374164215030758, "grad_norm": 1.214685320854187, "learning_rate": 1.1159601367433389e-05, "loss": 0.7361, "step": 10749 }, { "epoch": 1.4375501470981547, "grad_norm": 1.0977617502212524, "learning_rate": 1.1158167348434103e-05, "loss": 0.6321, "step": 10750 }, { "epoch": 1.4376838726932335, "grad_norm": 1.1413968801498413, "learning_rate": 1.1156733305293928e-05, "loss": 0.6667, "step": 10751 }, { "epoch": 1.4378175982883123, "grad_norm": 1.2798453569412231, "learning_rate": 1.1155299238042754e-05, "loss": 0.7224, "step": 10752 }, { "epoch": 1.4379513238833912, "grad_norm": 1.2000062465667725, "learning_rate": 1.1153865146710471e-05, "loss": 0.6471, "step": 10753 }, { "epoch": 1.4380850494784703, "grad_norm": 1.2271296977996826, "learning_rate": 1.1152431031326978e-05, "loss": 0.7592, "step": 10754 }, { "epoch": 1.4382187750735491, "grad_norm": 1.1931766271591187, "learning_rate": 1.115099689192216e-05, "loss": 0.724, "step": 10755 }, { "epoch": 1.438352500668628, "grad_norm": 1.2668206691741943, "learning_rate": 1.1149562728525913e-05, "loss": 0.7347, "step": 10756 }, { "epoch": 1.4384862262637068, "grad_norm": 1.1446300745010376, "learning_rate": 1.1148128541168133e-05, "loss": 0.6349, "step": 10757 }, { "epoch": 1.4386199518587857, "grad_norm": 1.1940571069717407, "learning_rate": 1.1146694329878709e-05, "loss": 0.7141, "step": 10758 }, { "epoch": 1.4387536774538647, "grad_norm": 1.339685320854187, "learning_rate": 1.114526009468754e-05, "loss": 0.7112, "step": 10759 }, { "epoch": 1.4388874030489436, "grad_norm": 1.1376460790634155, "learning_rate": 1.1143825835624521e-05, "loss": 0.6617, "step": 10760 }, { "epoch": 1.4390211286440224, "grad_norm": 1.1699087619781494, "learning_rate": 1.1142391552719548e-05, "loss": 0.6487, "step": 10761 }, { "epoch": 1.4391548542391015, "grad_norm": 1.1421691179275513, "learning_rate": 1.1140957246002513e-05, "loss": 0.6502, "step": 10762 }, { "epoch": 1.4392885798341801, "grad_norm": 1.2844340801239014, "learning_rate": 1.113952291550332e-05, "loss": 0.6997, "step": 10763 }, { "epoch": 1.4394223054292592, "grad_norm": 1.1924713850021362, "learning_rate": 1.113808856125186e-05, "loss": 0.6734, "step": 10764 }, { "epoch": 1.439556031024338, "grad_norm": 1.2711251974105835, "learning_rate": 1.113665418327803e-05, "loss": 0.7421, "step": 10765 }, { "epoch": 1.439689756619417, "grad_norm": 1.0167975425720215, "learning_rate": 1.1135219781611734e-05, "loss": 0.6213, "step": 10766 }, { "epoch": 1.439823482214496, "grad_norm": 1.1935372352600098, "learning_rate": 1.1133785356282872e-05, "loss": 0.726, "step": 10767 }, { "epoch": 1.4399572078095748, "grad_norm": 1.314681053161621, "learning_rate": 1.1132350907321334e-05, "loss": 0.7531, "step": 10768 }, { "epoch": 1.4400909334046537, "grad_norm": 1.3542203903198242, "learning_rate": 1.1130916434757027e-05, "loss": 0.8185, "step": 10769 }, { "epoch": 1.4402246589997325, "grad_norm": 1.0340464115142822, "learning_rate": 1.1129481938619845e-05, "loss": 0.6039, "step": 10770 }, { "epoch": 1.4403583845948114, "grad_norm": 1.237444519996643, "learning_rate": 1.1128047418939698e-05, "loss": 0.7762, "step": 10771 }, { "epoch": 1.4404921101898904, "grad_norm": 1.1049175262451172, "learning_rate": 1.1126612875746479e-05, "loss": 0.6438, "step": 10772 }, { "epoch": 1.4406258357849693, "grad_norm": 1.2687772512435913, "learning_rate": 1.1125178309070094e-05, "loss": 0.668, "step": 10773 }, { "epoch": 1.4407595613800481, "grad_norm": 1.2224650382995605, "learning_rate": 1.1123743718940443e-05, "loss": 0.6637, "step": 10774 }, { "epoch": 1.440893286975127, "grad_norm": 1.3154124021530151, "learning_rate": 1.1122309105387433e-05, "loss": 0.6647, "step": 10775 }, { "epoch": 1.4410270125702058, "grad_norm": 1.0423606634140015, "learning_rate": 1.112087446844096e-05, "loss": 0.6574, "step": 10776 }, { "epoch": 1.441160738165285, "grad_norm": 1.3187575340270996, "learning_rate": 1.1119439808130932e-05, "loss": 0.8095, "step": 10777 }, { "epoch": 1.4412944637603637, "grad_norm": 1.2069398164749146, "learning_rate": 1.111800512448725e-05, "loss": 0.8306, "step": 10778 }, { "epoch": 1.4414281893554426, "grad_norm": 1.268981695175171, "learning_rate": 1.1116570417539825e-05, "loss": 0.6885, "step": 10779 }, { "epoch": 1.4415619149505217, "grad_norm": 1.2179815769195557, "learning_rate": 1.1115135687318556e-05, "loss": 0.7204, "step": 10780 }, { "epoch": 1.4416956405456005, "grad_norm": 1.058647871017456, "learning_rate": 1.111370093385335e-05, "loss": 0.6206, "step": 10781 }, { "epoch": 1.4418293661406794, "grad_norm": 1.2202389240264893, "learning_rate": 1.1112266157174116e-05, "loss": 0.7116, "step": 10782 }, { "epoch": 1.4419630917357582, "grad_norm": 1.2138216495513916, "learning_rate": 1.111083135731076e-05, "loss": 0.723, "step": 10783 }, { "epoch": 1.442096817330837, "grad_norm": 1.2124340534210205, "learning_rate": 1.110939653429318e-05, "loss": 0.7454, "step": 10784 }, { "epoch": 1.4422305429259161, "grad_norm": 1.2436857223510742, "learning_rate": 1.1107961688151297e-05, "loss": 0.7326, "step": 10785 }, { "epoch": 1.442364268520995, "grad_norm": 1.2278088331222534, "learning_rate": 1.1106526818915008e-05, "loss": 0.8119, "step": 10786 }, { "epoch": 1.4424979941160738, "grad_norm": 1.0857634544372559, "learning_rate": 1.1105091926614234e-05, "loss": 0.7593, "step": 10787 }, { "epoch": 1.4426317197111527, "grad_norm": 1.0535091161727905, "learning_rate": 1.110365701127887e-05, "loss": 0.6385, "step": 10788 }, { "epoch": 1.4427654453062315, "grad_norm": 1.256480097770691, "learning_rate": 1.1102222072938832e-05, "loss": 0.6395, "step": 10789 }, { "epoch": 1.4428991709013106, "grad_norm": 1.1999760866165161, "learning_rate": 1.1100787111624031e-05, "loss": 0.7478, "step": 10790 }, { "epoch": 1.4430328964963894, "grad_norm": 1.2586785554885864, "learning_rate": 1.1099352127364373e-05, "loss": 0.7965, "step": 10791 }, { "epoch": 1.4431666220914683, "grad_norm": 1.2181401252746582, "learning_rate": 1.1097917120189778e-05, "loss": 0.7308, "step": 10792 }, { "epoch": 1.4433003476865471, "grad_norm": 1.1153844594955444, "learning_rate": 1.1096482090130147e-05, "loss": 0.6782, "step": 10793 }, { "epoch": 1.443434073281626, "grad_norm": 1.1111050844192505, "learning_rate": 1.1095047037215397e-05, "loss": 0.712, "step": 10794 }, { "epoch": 1.443567798876705, "grad_norm": 1.078020691871643, "learning_rate": 1.1093611961475438e-05, "loss": 0.6933, "step": 10795 }, { "epoch": 1.443701524471784, "grad_norm": 1.074216604232788, "learning_rate": 1.109217686294019e-05, "loss": 0.6868, "step": 10796 }, { "epoch": 1.4438352500668628, "grad_norm": 1.1970863342285156, "learning_rate": 1.1090741741639552e-05, "loss": 0.7729, "step": 10797 }, { "epoch": 1.4439689756619418, "grad_norm": 1.0879454612731934, "learning_rate": 1.108930659760345e-05, "loss": 0.6107, "step": 10798 }, { "epoch": 1.4441027012570207, "grad_norm": 1.1212656497955322, "learning_rate": 1.1087871430861794e-05, "loss": 0.6551, "step": 10799 }, { "epoch": 1.4442364268520995, "grad_norm": 1.083728551864624, "learning_rate": 1.10864362414445e-05, "loss": 0.6537, "step": 10800 }, { "epoch": 1.4443701524471784, "grad_norm": 1.0738023519515991, "learning_rate": 1.1085001029381482e-05, "loss": 0.727, "step": 10801 }, { "epoch": 1.4445038780422572, "grad_norm": 1.180895447731018, "learning_rate": 1.1083565794702655e-05, "loss": 0.6675, "step": 10802 }, { "epoch": 1.4446376036373363, "grad_norm": 1.1423380374908447, "learning_rate": 1.1082130537437937e-05, "loss": 0.6798, "step": 10803 }, { "epoch": 1.4447713292324151, "grad_norm": 1.09044349193573, "learning_rate": 1.1080695257617243e-05, "loss": 0.6708, "step": 10804 }, { "epoch": 1.444905054827494, "grad_norm": 1.8647102117538452, "learning_rate": 1.1079259955270489e-05, "loss": 0.6974, "step": 10805 }, { "epoch": 1.4450387804225728, "grad_norm": 1.2608208656311035, "learning_rate": 1.1077824630427593e-05, "loss": 0.7045, "step": 10806 }, { "epoch": 1.4451725060176517, "grad_norm": 1.2280246019363403, "learning_rate": 1.1076389283118477e-05, "loss": 0.7256, "step": 10807 }, { "epoch": 1.4453062316127308, "grad_norm": 1.280097246170044, "learning_rate": 1.1074953913373057e-05, "loss": 0.7089, "step": 10808 }, { "epoch": 1.4454399572078096, "grad_norm": 1.3396214246749878, "learning_rate": 1.1073518521221249e-05, "loss": 0.764, "step": 10809 }, { "epoch": 1.4455736828028884, "grad_norm": 1.3328964710235596, "learning_rate": 1.1072083106692975e-05, "loss": 0.7252, "step": 10810 }, { "epoch": 1.4457074083979673, "grad_norm": 1.2853786945343018, "learning_rate": 1.1070647669818153e-05, "loss": 0.7159, "step": 10811 }, { "epoch": 1.4458411339930461, "grad_norm": 1.2761359214782715, "learning_rate": 1.106921221062671e-05, "loss": 0.7538, "step": 10812 }, { "epoch": 1.4459748595881252, "grad_norm": 1.2368756532669067, "learning_rate": 1.1067776729148557e-05, "loss": 0.649, "step": 10813 }, { "epoch": 1.446108585183204, "grad_norm": 1.203789234161377, "learning_rate": 1.106634122541362e-05, "loss": 0.7037, "step": 10814 }, { "epoch": 1.446242310778283, "grad_norm": 1.1252245903015137, "learning_rate": 1.1064905699451822e-05, "loss": 0.7117, "step": 10815 }, { "epoch": 1.446376036373362, "grad_norm": 1.1826584339141846, "learning_rate": 1.1063470151293083e-05, "loss": 0.6723, "step": 10816 }, { "epoch": 1.4465097619684408, "grad_norm": 1.2672951221466064, "learning_rate": 1.1062034580967327e-05, "loss": 0.7461, "step": 10817 }, { "epoch": 1.4466434875635197, "grad_norm": 1.2064985036849976, "learning_rate": 1.1060598988504476e-05, "loss": 0.7408, "step": 10818 }, { "epoch": 1.4467772131585985, "grad_norm": 1.2159812450408936, "learning_rate": 1.1059163373934454e-05, "loss": 0.6734, "step": 10819 }, { "epoch": 1.4469109387536774, "grad_norm": 1.2581053972244263, "learning_rate": 1.1057727737287184e-05, "loss": 0.719, "step": 10820 }, { "epoch": 1.4470446643487564, "grad_norm": 1.281354308128357, "learning_rate": 1.1056292078592595e-05, "loss": 0.6992, "step": 10821 }, { "epoch": 1.4471783899438353, "grad_norm": 1.2291339635849, "learning_rate": 1.1054856397880604e-05, "loss": 0.7948, "step": 10822 }, { "epoch": 1.4473121155389141, "grad_norm": 1.0835392475128174, "learning_rate": 1.105342069518114e-05, "loss": 0.6203, "step": 10823 }, { "epoch": 1.447445841133993, "grad_norm": 1.3118091821670532, "learning_rate": 1.1051984970524135e-05, "loss": 0.769, "step": 10824 }, { "epoch": 1.4475795667290718, "grad_norm": 1.1601808071136475, "learning_rate": 1.1050549223939507e-05, "loss": 0.7215, "step": 10825 }, { "epoch": 1.447713292324151, "grad_norm": 1.2609903812408447, "learning_rate": 1.1049113455457186e-05, "loss": 0.7495, "step": 10826 }, { "epoch": 1.4478470179192298, "grad_norm": 1.1831955909729004, "learning_rate": 1.1047677665107099e-05, "loss": 0.7355, "step": 10827 }, { "epoch": 1.4479807435143086, "grad_norm": 1.2387874126434326, "learning_rate": 1.1046241852919176e-05, "loss": 0.722, "step": 10828 }, { "epoch": 1.4481144691093875, "grad_norm": 1.275311827659607, "learning_rate": 1.1044806018923336e-05, "loss": 0.7012, "step": 10829 }, { "epoch": 1.4482481947044663, "grad_norm": 1.249427080154419, "learning_rate": 1.1043370163149518e-05, "loss": 0.7647, "step": 10830 }, { "epoch": 1.4483819202995454, "grad_norm": 1.195416808128357, "learning_rate": 1.104193428562765e-05, "loss": 0.7466, "step": 10831 }, { "epoch": 1.4485156458946242, "grad_norm": 1.1396962404251099, "learning_rate": 1.1040498386387657e-05, "loss": 0.7015, "step": 10832 }, { "epoch": 1.448649371489703, "grad_norm": 1.3026492595672607, "learning_rate": 1.1039062465459468e-05, "loss": 0.6956, "step": 10833 }, { "epoch": 1.4487830970847821, "grad_norm": 1.2944220304489136, "learning_rate": 1.103762652287302e-05, "loss": 0.7582, "step": 10834 }, { "epoch": 1.448916822679861, "grad_norm": 1.0840996503829956, "learning_rate": 1.1036190558658238e-05, "loss": 0.6257, "step": 10835 }, { "epoch": 1.4490505482749398, "grad_norm": 1.2793140411376953, "learning_rate": 1.1034754572845057e-05, "loss": 0.8033, "step": 10836 }, { "epoch": 1.4491842738700187, "grad_norm": 1.2749674320220947, "learning_rate": 1.1033318565463404e-05, "loss": 0.7059, "step": 10837 }, { "epoch": 1.4493179994650975, "grad_norm": 1.1647595167160034, "learning_rate": 1.1031882536543216e-05, "loss": 0.6956, "step": 10838 }, { "epoch": 1.4494517250601766, "grad_norm": 1.147619366645813, "learning_rate": 1.1030446486114425e-05, "loss": 0.7086, "step": 10839 }, { "epoch": 1.4495854506552555, "grad_norm": 1.3113161325454712, "learning_rate": 1.1029010414206965e-05, "loss": 0.6785, "step": 10840 }, { "epoch": 1.4497191762503343, "grad_norm": 1.3291889429092407, "learning_rate": 1.1027574320850763e-05, "loss": 0.8283, "step": 10841 }, { "epoch": 1.4498529018454132, "grad_norm": 1.1831458806991577, "learning_rate": 1.1026138206075759e-05, "loss": 0.7279, "step": 10842 }, { "epoch": 1.449986627440492, "grad_norm": 1.2279443740844727, "learning_rate": 1.1024702069911885e-05, "loss": 0.7449, "step": 10843 }, { "epoch": 1.450120353035571, "grad_norm": 1.307448148727417, "learning_rate": 1.102326591238908e-05, "loss": 0.743, "step": 10844 }, { "epoch": 1.45025407863065, "grad_norm": 1.1759616136550903, "learning_rate": 1.1021829733537274e-05, "loss": 0.6852, "step": 10845 }, { "epoch": 1.4503878042257288, "grad_norm": 1.260444164276123, "learning_rate": 1.1020393533386404e-05, "loss": 0.8268, "step": 10846 }, { "epoch": 1.4505215298208076, "grad_norm": 1.310232162475586, "learning_rate": 1.101895731196641e-05, "loss": 0.735, "step": 10847 }, { "epoch": 1.4506552554158865, "grad_norm": 1.2217086553573608, "learning_rate": 1.1017521069307224e-05, "loss": 0.6976, "step": 10848 }, { "epoch": 1.4507889810109655, "grad_norm": 1.2213094234466553, "learning_rate": 1.1016084805438785e-05, "loss": 0.6894, "step": 10849 }, { "epoch": 1.4509227066060444, "grad_norm": 1.4285427331924438, "learning_rate": 1.1014648520391031e-05, "loss": 0.6675, "step": 10850 }, { "epoch": 1.4510564322011232, "grad_norm": 1.1831865310668945, "learning_rate": 1.10132122141939e-05, "loss": 0.7371, "step": 10851 }, { "epoch": 1.4511901577962023, "grad_norm": 1.2192161083221436, "learning_rate": 1.1011775886877331e-05, "loss": 0.6975, "step": 10852 }, { "epoch": 1.4513238833912812, "grad_norm": 1.1997629404067993, "learning_rate": 1.1010339538471259e-05, "loss": 0.7039, "step": 10853 }, { "epoch": 1.45145760898636, "grad_norm": 1.1829913854599, "learning_rate": 1.1008903169005627e-05, "loss": 0.7916, "step": 10854 }, { "epoch": 1.4515913345814389, "grad_norm": 1.2070574760437012, "learning_rate": 1.1007466778510373e-05, "loss": 0.7159, "step": 10855 }, { "epoch": 1.4517250601765177, "grad_norm": 1.2767226696014404, "learning_rate": 1.100603036701544e-05, "loss": 0.7384, "step": 10856 }, { "epoch": 1.4518587857715968, "grad_norm": 1.1423934698104858, "learning_rate": 1.1004593934550767e-05, "loss": 0.6996, "step": 10857 }, { "epoch": 1.4519925113666756, "grad_norm": 1.1036198139190674, "learning_rate": 1.1003157481146294e-05, "loss": 0.6516, "step": 10858 }, { "epoch": 1.4521262369617545, "grad_norm": 1.1072052717208862, "learning_rate": 1.1001721006831962e-05, "loss": 0.7181, "step": 10859 }, { "epoch": 1.4522599625568333, "grad_norm": 1.1262147426605225, "learning_rate": 1.1000284511637717e-05, "loss": 0.6596, "step": 10860 }, { "epoch": 1.4523936881519122, "grad_norm": 1.0688198804855347, "learning_rate": 1.0998847995593494e-05, "loss": 0.5408, "step": 10861 }, { "epoch": 1.4525274137469912, "grad_norm": 1.2367618083953857, "learning_rate": 1.0997411458729243e-05, "loss": 0.6279, "step": 10862 }, { "epoch": 1.45266113934207, "grad_norm": 1.2538025379180908, "learning_rate": 1.0995974901074905e-05, "loss": 0.6771, "step": 10863 }, { "epoch": 1.452794864937149, "grad_norm": 1.1458609104156494, "learning_rate": 1.0994538322660423e-05, "loss": 0.6847, "step": 10864 }, { "epoch": 1.452928590532228, "grad_norm": 1.275004267692566, "learning_rate": 1.099310172351574e-05, "loss": 0.7399, "step": 10865 }, { "epoch": 1.4530623161273066, "grad_norm": 1.3751593828201294, "learning_rate": 1.0991665103670803e-05, "loss": 0.7518, "step": 10866 }, { "epoch": 1.4531960417223857, "grad_norm": 1.2564325332641602, "learning_rate": 1.0990228463155557e-05, "loss": 0.6702, "step": 10867 }, { "epoch": 1.4533297673174645, "grad_norm": 1.285187840461731, "learning_rate": 1.0988791801999944e-05, "loss": 0.7953, "step": 10868 }, { "epoch": 1.4534634929125434, "grad_norm": 1.1962530612945557, "learning_rate": 1.0987355120233914e-05, "loss": 0.7205, "step": 10869 }, { "epoch": 1.4535972185076225, "grad_norm": 1.1755759716033936, "learning_rate": 1.098591841788741e-05, "loss": 0.6678, "step": 10870 }, { "epoch": 1.4537309441027013, "grad_norm": 1.2317090034484863, "learning_rate": 1.0984481694990378e-05, "loss": 0.7188, "step": 10871 }, { "epoch": 1.4538646696977802, "grad_norm": 1.302388310432434, "learning_rate": 1.0983044951572773e-05, "loss": 0.746, "step": 10872 }, { "epoch": 1.453998395292859, "grad_norm": 1.2712892293930054, "learning_rate": 1.0981608187664532e-05, "loss": 0.7175, "step": 10873 }, { "epoch": 1.4541321208879379, "grad_norm": 1.0935871601104736, "learning_rate": 1.098017140329561e-05, "loss": 0.6222, "step": 10874 }, { "epoch": 1.454265846483017, "grad_norm": 1.2833107709884644, "learning_rate": 1.0978734598495949e-05, "loss": 0.7481, "step": 10875 }, { "epoch": 1.4543995720780958, "grad_norm": 1.1621891260147095, "learning_rate": 1.0977297773295503e-05, "loss": 0.6602, "step": 10876 }, { "epoch": 1.4545332976731746, "grad_norm": 1.095976710319519, "learning_rate": 1.0975860927724225e-05, "loss": 0.6989, "step": 10877 }, { "epoch": 1.4546670232682535, "grad_norm": 1.3705198764801025, "learning_rate": 1.0974424061812055e-05, "loss": 0.7454, "step": 10878 }, { "epoch": 1.4548007488633323, "grad_norm": 1.216985821723938, "learning_rate": 1.097298717558895e-05, "loss": 0.7585, "step": 10879 }, { "epoch": 1.4549344744584114, "grad_norm": 1.095146894454956, "learning_rate": 1.0971550269084856e-05, "loss": 0.6528, "step": 10880 }, { "epoch": 1.4550682000534902, "grad_norm": 1.2245376110076904, "learning_rate": 1.0970113342329728e-05, "loss": 0.67, "step": 10881 }, { "epoch": 1.455201925648569, "grad_norm": 1.1592437028884888, "learning_rate": 1.0968676395353514e-05, "loss": 0.7452, "step": 10882 }, { "epoch": 1.4553356512436482, "grad_norm": 1.2051292657852173, "learning_rate": 1.0967239428186172e-05, "loss": 0.6984, "step": 10883 }, { "epoch": 1.455469376838727, "grad_norm": 1.3012681007385254, "learning_rate": 1.0965802440857645e-05, "loss": 0.6942, "step": 10884 }, { "epoch": 1.4556031024338059, "grad_norm": 1.1714789867401123, "learning_rate": 1.0964365433397894e-05, "loss": 0.7148, "step": 10885 }, { "epoch": 1.4557368280288847, "grad_norm": 1.1794650554656982, "learning_rate": 1.0962928405836866e-05, "loss": 0.6489, "step": 10886 }, { "epoch": 1.4558705536239636, "grad_norm": 1.0465161800384521, "learning_rate": 1.0961491358204516e-05, "loss": 0.607, "step": 10887 }, { "epoch": 1.4560042792190426, "grad_norm": 1.1192132234573364, "learning_rate": 1.09600542905308e-05, "loss": 0.6573, "step": 10888 }, { "epoch": 1.4561380048141215, "grad_norm": 1.3643540143966675, "learning_rate": 1.0958617202845672e-05, "loss": 0.7362, "step": 10889 }, { "epoch": 1.4562717304092003, "grad_norm": 1.2710552215576172, "learning_rate": 1.0957180095179082e-05, "loss": 0.7694, "step": 10890 }, { "epoch": 1.4564054560042792, "grad_norm": 1.2066421508789062, "learning_rate": 1.0955742967560995e-05, "loss": 0.7078, "step": 10891 }, { "epoch": 1.456539181599358, "grad_norm": 1.24596107006073, "learning_rate": 1.0954305820021354e-05, "loss": 0.6936, "step": 10892 }, { "epoch": 1.456672907194437, "grad_norm": 1.1464096307754517, "learning_rate": 1.0952868652590124e-05, "loss": 0.6869, "step": 10893 }, { "epoch": 1.456806632789516, "grad_norm": 1.36152184009552, "learning_rate": 1.095143146529726e-05, "loss": 0.7024, "step": 10894 }, { "epoch": 1.4569403583845948, "grad_norm": 1.184718370437622, "learning_rate": 1.0949994258172715e-05, "loss": 0.6805, "step": 10895 }, { "epoch": 1.4570740839796736, "grad_norm": 1.233087182044983, "learning_rate": 1.094855703124645e-05, "loss": 0.7404, "step": 10896 }, { "epoch": 1.4572078095747525, "grad_norm": 1.1632511615753174, "learning_rate": 1.0947119784548424e-05, "loss": 0.7065, "step": 10897 }, { "epoch": 1.4573415351698316, "grad_norm": 1.0786553621292114, "learning_rate": 1.0945682518108588e-05, "loss": 0.665, "step": 10898 }, { "epoch": 1.4574752607649104, "grad_norm": 1.2024211883544922, "learning_rate": 1.0944245231956909e-05, "loss": 0.7845, "step": 10899 }, { "epoch": 1.4576089863599893, "grad_norm": 1.239782691001892, "learning_rate": 1.0942807926123338e-05, "loss": 0.7382, "step": 10900 }, { "epoch": 1.4577427119550683, "grad_norm": 1.159354567527771, "learning_rate": 1.0941370600637839e-05, "loss": 0.7409, "step": 10901 }, { "epoch": 1.4578764375501472, "grad_norm": 1.3095420598983765, "learning_rate": 1.093993325553037e-05, "loss": 0.7273, "step": 10902 }, { "epoch": 1.458010163145226, "grad_norm": 1.2642269134521484, "learning_rate": 1.0938495890830893e-05, "loss": 0.7473, "step": 10903 }, { "epoch": 1.4581438887403049, "grad_norm": 1.286946415901184, "learning_rate": 1.0937058506569366e-05, "loss": 0.7746, "step": 10904 }, { "epoch": 1.4582776143353837, "grad_norm": 1.1595613956451416, "learning_rate": 1.0935621102775756e-05, "loss": 0.696, "step": 10905 }, { "epoch": 1.4584113399304628, "grad_norm": 1.2030659914016724, "learning_rate": 1.0934183679480014e-05, "loss": 0.6981, "step": 10906 }, { "epoch": 1.4585450655255416, "grad_norm": 1.1344598531723022, "learning_rate": 1.0932746236712106e-05, "loss": 0.6035, "step": 10907 }, { "epoch": 1.4586787911206205, "grad_norm": 1.2621605396270752, "learning_rate": 1.0931308774501999e-05, "loss": 0.7401, "step": 10908 }, { "epoch": 1.4588125167156993, "grad_norm": 1.152124285697937, "learning_rate": 1.0929871292879652e-05, "loss": 0.7147, "step": 10909 }, { "epoch": 1.4589462423107782, "grad_norm": 1.2316539287567139, "learning_rate": 1.0928433791875026e-05, "loss": 0.7069, "step": 10910 }, { "epoch": 1.4590799679058573, "grad_norm": 1.3426806926727295, "learning_rate": 1.0926996271518085e-05, "loss": 0.7863, "step": 10911 }, { "epoch": 1.459213693500936, "grad_norm": 1.3194398880004883, "learning_rate": 1.0925558731838795e-05, "loss": 0.7831, "step": 10912 }, { "epoch": 1.459347419096015, "grad_norm": 1.0863442420959473, "learning_rate": 1.0924121172867119e-05, "loss": 0.6343, "step": 10913 }, { "epoch": 1.4594811446910938, "grad_norm": 1.1019114255905151, "learning_rate": 1.092268359463302e-05, "loss": 0.6221, "step": 10914 }, { "epoch": 1.4596148702861726, "grad_norm": 1.1885719299316406, "learning_rate": 1.0921245997166467e-05, "loss": 0.6801, "step": 10915 }, { "epoch": 1.4597485958812517, "grad_norm": 1.1551927328109741, "learning_rate": 1.091980838049742e-05, "loss": 0.7189, "step": 10916 }, { "epoch": 1.4598823214763306, "grad_norm": 1.293895959854126, "learning_rate": 1.0918370744655851e-05, "loss": 0.821, "step": 10917 }, { "epoch": 1.4600160470714094, "grad_norm": 1.3254491090774536, "learning_rate": 1.0916933089671721e-05, "loss": 0.706, "step": 10918 }, { "epoch": 1.4601497726664885, "grad_norm": 1.1972441673278809, "learning_rate": 1.0915495415574996e-05, "loss": 0.6509, "step": 10919 }, { "epoch": 1.4602834982615673, "grad_norm": 1.3287737369537354, "learning_rate": 1.0914057722395646e-05, "loss": 0.7529, "step": 10920 }, { "epoch": 1.4604172238566462, "grad_norm": 1.1431668996810913, "learning_rate": 1.0912620010163639e-05, "loss": 0.6995, "step": 10921 }, { "epoch": 1.460550949451725, "grad_norm": 1.1322269439697266, "learning_rate": 1.0911182278908941e-05, "loss": 0.6481, "step": 10922 }, { "epoch": 1.4606846750468039, "grad_norm": 1.1951957941055298, "learning_rate": 1.090974452866152e-05, "loss": 0.7249, "step": 10923 }, { "epoch": 1.460818400641883, "grad_norm": 1.4419785737991333, "learning_rate": 1.0908306759451343e-05, "loss": 0.7235, "step": 10924 }, { "epoch": 1.4609521262369618, "grad_norm": 1.215969443321228, "learning_rate": 1.0906868971308384e-05, "loss": 0.6238, "step": 10925 }, { "epoch": 1.4610858518320406, "grad_norm": 2.5027265548706055, "learning_rate": 1.0905431164262605e-05, "loss": 0.6625, "step": 10926 }, { "epoch": 1.4612195774271195, "grad_norm": 1.279018759727478, "learning_rate": 1.0903993338343984e-05, "loss": 0.7467, "step": 10927 }, { "epoch": 1.4613533030221983, "grad_norm": 1.1036295890808105, "learning_rate": 1.0902555493582483e-05, "loss": 0.6448, "step": 10928 }, { "epoch": 1.4614870286172774, "grad_norm": 1.3221873044967651, "learning_rate": 1.090111763000808e-05, "loss": 0.7496, "step": 10929 }, { "epoch": 1.4616207542123563, "grad_norm": 1.2621976137161255, "learning_rate": 1.0899679747650742e-05, "loss": 0.7526, "step": 10930 }, { "epoch": 1.4617544798074351, "grad_norm": 1.3293044567108154, "learning_rate": 1.0898241846540439e-05, "loss": 0.7906, "step": 10931 }, { "epoch": 1.461888205402514, "grad_norm": 1.1575193405151367, "learning_rate": 1.0896803926707142e-05, "loss": 0.6363, "step": 10932 }, { "epoch": 1.4620219309975928, "grad_norm": 1.2418296337127686, "learning_rate": 1.0895365988180829e-05, "loss": 0.6524, "step": 10933 }, { "epoch": 1.4621556565926719, "grad_norm": 1.318368911743164, "learning_rate": 1.0893928030991468e-05, "loss": 0.7095, "step": 10934 }, { "epoch": 1.4622893821877507, "grad_norm": 1.137374997138977, "learning_rate": 1.0892490055169032e-05, "loss": 0.6393, "step": 10935 }, { "epoch": 1.4624231077828296, "grad_norm": 1.255260705947876, "learning_rate": 1.0891052060743494e-05, "loss": 0.7244, "step": 10936 }, { "epoch": 1.4625568333779086, "grad_norm": 1.329527497291565, "learning_rate": 1.0889614047744831e-05, "loss": 0.7447, "step": 10937 }, { "epoch": 1.4626905589729875, "grad_norm": 1.2871960401535034, "learning_rate": 1.0888176016203013e-05, "loss": 0.7276, "step": 10938 }, { "epoch": 1.4628242845680663, "grad_norm": 1.1659642457962036, "learning_rate": 1.0886737966148014e-05, "loss": 0.7082, "step": 10939 }, { "epoch": 1.4629580101631452, "grad_norm": 1.0542985200881958, "learning_rate": 1.0885299897609811e-05, "loss": 0.694, "step": 10940 }, { "epoch": 1.463091735758224, "grad_norm": 1.2396397590637207, "learning_rate": 1.0883861810618382e-05, "loss": 0.739, "step": 10941 }, { "epoch": 1.4632254613533031, "grad_norm": 1.1542752981185913, "learning_rate": 1.0882423705203698e-05, "loss": 0.6637, "step": 10942 }, { "epoch": 1.463359186948382, "grad_norm": 1.3775659799575806, "learning_rate": 1.0880985581395736e-05, "loss": 0.8444, "step": 10943 }, { "epoch": 1.4634929125434608, "grad_norm": 1.2057346105575562, "learning_rate": 1.0879547439224471e-05, "loss": 0.6809, "step": 10944 }, { "epoch": 1.4636266381385397, "grad_norm": 1.110167384147644, "learning_rate": 1.0878109278719882e-05, "loss": 0.6971, "step": 10945 }, { "epoch": 1.4637603637336185, "grad_norm": 1.2810145616531372, "learning_rate": 1.0876671099911947e-05, "loss": 0.6949, "step": 10946 }, { "epoch": 1.4638940893286976, "grad_norm": 1.177228569984436, "learning_rate": 1.087523290283064e-05, "loss": 0.6403, "step": 10947 }, { "epoch": 1.4640278149237764, "grad_norm": 1.2795343399047852, "learning_rate": 1.087379468750594e-05, "loss": 0.8029, "step": 10948 }, { "epoch": 1.4641615405188553, "grad_norm": 1.0432038307189941, "learning_rate": 1.0872356453967829e-05, "loss": 0.5906, "step": 10949 }, { "epoch": 1.4642952661139341, "grad_norm": 1.1143854856491089, "learning_rate": 1.087091820224628e-05, "loss": 0.6319, "step": 10950 }, { "epoch": 1.464428991709013, "grad_norm": 1.437066674232483, "learning_rate": 1.0869479932371274e-05, "loss": 0.76, "step": 10951 }, { "epoch": 1.464562717304092, "grad_norm": 1.3062926530838013, "learning_rate": 1.0868041644372792e-05, "loss": 0.6835, "step": 10952 }, { "epoch": 1.464696442899171, "grad_norm": 1.2938897609710693, "learning_rate": 1.0866603338280812e-05, "loss": 0.7212, "step": 10953 }, { "epoch": 1.4648301684942497, "grad_norm": 1.163225531578064, "learning_rate": 1.0865165014125316e-05, "loss": 0.6775, "step": 10954 }, { "epoch": 1.4649638940893288, "grad_norm": 1.3518708944320679, "learning_rate": 1.086372667193628e-05, "loss": 0.7841, "step": 10955 }, { "epoch": 1.4650976196844077, "grad_norm": 1.2324292659759521, "learning_rate": 1.0862288311743691e-05, "loss": 0.7388, "step": 10956 }, { "epoch": 1.4652313452794865, "grad_norm": 1.3129326105117798, "learning_rate": 1.0860849933577529e-05, "loss": 0.7113, "step": 10957 }, { "epoch": 1.4653650708745654, "grad_norm": 1.1984643936157227, "learning_rate": 1.0859411537467768e-05, "loss": 0.7119, "step": 10958 }, { "epoch": 1.4654987964696442, "grad_norm": 1.4695075750350952, "learning_rate": 1.0857973123444401e-05, "loss": 0.7684, "step": 10959 }, { "epoch": 1.4656325220647233, "grad_norm": 1.3796393871307373, "learning_rate": 1.0856534691537402e-05, "loss": 0.7778, "step": 10960 }, { "epoch": 1.4657662476598021, "grad_norm": 1.270552396774292, "learning_rate": 1.0855096241776759e-05, "loss": 0.7289, "step": 10961 }, { "epoch": 1.465899973254881, "grad_norm": 1.4660818576812744, "learning_rate": 1.0853657774192454e-05, "loss": 0.8081, "step": 10962 }, { "epoch": 1.4660336988499598, "grad_norm": 1.2011971473693848, "learning_rate": 1.0852219288814467e-05, "loss": 0.702, "step": 10963 }, { "epoch": 1.4661674244450387, "grad_norm": 1.2819328308105469, "learning_rate": 1.0850780785672786e-05, "loss": 0.7373, "step": 10964 }, { "epoch": 1.4663011500401177, "grad_norm": 1.309237003326416, "learning_rate": 1.0849342264797391e-05, "loss": 0.7263, "step": 10965 }, { "epoch": 1.4664348756351966, "grad_norm": 1.2203730344772339, "learning_rate": 1.0847903726218271e-05, "loss": 0.7383, "step": 10966 }, { "epoch": 1.4665686012302754, "grad_norm": 1.2435630559921265, "learning_rate": 1.084646516996541e-05, "loss": 0.6819, "step": 10967 }, { "epoch": 1.4667023268253545, "grad_norm": 1.3134021759033203, "learning_rate": 1.0845026596068792e-05, "loss": 0.8351, "step": 10968 }, { "epoch": 1.4668360524204331, "grad_norm": 1.234737753868103, "learning_rate": 1.0843588004558402e-05, "loss": 0.7997, "step": 10969 }, { "epoch": 1.4669697780155122, "grad_norm": 1.2755217552185059, "learning_rate": 1.0842149395464231e-05, "loss": 0.6885, "step": 10970 }, { "epoch": 1.467103503610591, "grad_norm": 1.2468311786651611, "learning_rate": 1.0840710768816258e-05, "loss": 0.6986, "step": 10971 }, { "epoch": 1.46723722920567, "grad_norm": 1.2712064981460571, "learning_rate": 1.0839272124644476e-05, "loss": 0.735, "step": 10972 }, { "epoch": 1.467370954800749, "grad_norm": 1.1605310440063477, "learning_rate": 1.0837833462978866e-05, "loss": 0.7362, "step": 10973 }, { "epoch": 1.4675046803958278, "grad_norm": 1.3182663917541504, "learning_rate": 1.0836394783849424e-05, "loss": 0.724, "step": 10974 }, { "epoch": 1.4676384059909067, "grad_norm": 1.2682924270629883, "learning_rate": 1.083495608728613e-05, "loss": 0.7195, "step": 10975 }, { "epoch": 1.4677721315859855, "grad_norm": 1.308272361755371, "learning_rate": 1.0833517373318976e-05, "loss": 0.7574, "step": 10976 }, { "epoch": 1.4679058571810644, "grad_norm": 1.2059719562530518, "learning_rate": 1.083207864197795e-05, "loss": 0.6926, "step": 10977 }, { "epoch": 1.4680395827761434, "grad_norm": 1.1888363361358643, "learning_rate": 1.083063989329304e-05, "loss": 0.7569, "step": 10978 }, { "epoch": 1.4681733083712223, "grad_norm": 1.2443981170654297, "learning_rate": 1.0829201127294238e-05, "loss": 0.724, "step": 10979 }, { "epoch": 1.4683070339663011, "grad_norm": 1.241886854171753, "learning_rate": 1.082776234401153e-05, "loss": 0.7724, "step": 10980 }, { "epoch": 1.46844075956138, "grad_norm": 1.2591633796691895, "learning_rate": 1.0826323543474909e-05, "loss": 0.7453, "step": 10981 }, { "epoch": 1.4685744851564588, "grad_norm": 1.1055887937545776, "learning_rate": 1.0824884725714366e-05, "loss": 0.6293, "step": 10982 }, { "epoch": 1.468708210751538, "grad_norm": 1.1809444427490234, "learning_rate": 1.082344589075989e-05, "loss": 0.6131, "step": 10983 }, { "epoch": 1.4688419363466167, "grad_norm": 1.1707051992416382, "learning_rate": 1.0822007038641467e-05, "loss": 0.6606, "step": 10984 }, { "epoch": 1.4689756619416956, "grad_norm": 1.2626782655715942, "learning_rate": 1.0820568169389098e-05, "loss": 0.7155, "step": 10985 }, { "epoch": 1.4691093875367747, "grad_norm": 1.2243694067001343, "learning_rate": 1.0819129283032772e-05, "loss": 0.6804, "step": 10986 }, { "epoch": 1.4692431131318535, "grad_norm": 1.2063841819763184, "learning_rate": 1.081769037960248e-05, "loss": 0.7373, "step": 10987 }, { "epoch": 1.4693768387269324, "grad_norm": 1.1805076599121094, "learning_rate": 1.0816251459128213e-05, "loss": 0.7463, "step": 10988 }, { "epoch": 1.4695105643220112, "grad_norm": 1.328493595123291, "learning_rate": 1.0814812521639963e-05, "loss": 0.6635, "step": 10989 }, { "epoch": 1.46964428991709, "grad_norm": 1.1209900379180908, "learning_rate": 1.0813373567167729e-05, "loss": 0.6932, "step": 10990 }, { "epoch": 1.4697780155121691, "grad_norm": 1.2943460941314697, "learning_rate": 1.08119345957415e-05, "loss": 0.6868, "step": 10991 }, { "epoch": 1.469911741107248, "grad_norm": 1.2325923442840576, "learning_rate": 1.081049560739127e-05, "loss": 0.7685, "step": 10992 }, { "epoch": 1.4700454667023268, "grad_norm": 1.2439771890640259, "learning_rate": 1.080905660214704e-05, "loss": 0.6965, "step": 10993 }, { "epoch": 1.4701791922974057, "grad_norm": 1.3063714504241943, "learning_rate": 1.0807617580038797e-05, "loss": 0.7244, "step": 10994 }, { "epoch": 1.4703129178924845, "grad_norm": 1.1719521284103394, "learning_rate": 1.0806178541096535e-05, "loss": 0.656, "step": 10995 }, { "epoch": 1.4704466434875636, "grad_norm": 1.2490668296813965, "learning_rate": 1.0804739485350255e-05, "loss": 0.7115, "step": 10996 }, { "epoch": 1.4705803690826424, "grad_norm": 1.2804933786392212, "learning_rate": 1.0803300412829949e-05, "loss": 0.758, "step": 10997 }, { "epoch": 1.4707140946777213, "grad_norm": 1.2336697578430176, "learning_rate": 1.0801861323565616e-05, "loss": 0.627, "step": 10998 }, { "epoch": 1.4708478202728001, "grad_norm": 1.2621501684188843, "learning_rate": 1.0800422217587253e-05, "loss": 0.6653, "step": 10999 }, { "epoch": 1.470981545867879, "grad_norm": 1.1541835069656372, "learning_rate": 1.0798983094924851e-05, "loss": 0.5881, "step": 11000 }, { "epoch": 1.471115271462958, "grad_norm": 1.1506747007369995, "learning_rate": 1.0797543955608411e-05, "loss": 0.6551, "step": 11001 }, { "epoch": 1.471248997058037, "grad_norm": 1.3766402006149292, "learning_rate": 1.0796104799667935e-05, "loss": 0.7063, "step": 11002 }, { "epoch": 1.4713827226531158, "grad_norm": 1.2164684534072876, "learning_rate": 1.0794665627133409e-05, "loss": 0.6897, "step": 11003 }, { "epoch": 1.4715164482481948, "grad_norm": 1.1555522680282593, "learning_rate": 1.0793226438034843e-05, "loss": 0.6959, "step": 11004 }, { "epoch": 1.4716501738432737, "grad_norm": 1.3476839065551758, "learning_rate": 1.079178723240223e-05, "loss": 0.7559, "step": 11005 }, { "epoch": 1.4717838994383525, "grad_norm": 1.1879136562347412, "learning_rate": 1.0790348010265572e-05, "loss": 0.6216, "step": 11006 }, { "epoch": 1.4719176250334314, "grad_norm": 1.2088356018066406, "learning_rate": 1.0788908771654865e-05, "loss": 0.6877, "step": 11007 }, { "epoch": 1.4720513506285102, "grad_norm": 1.17787504196167, "learning_rate": 1.0787469516600109e-05, "loss": 0.7244, "step": 11008 }, { "epoch": 1.4721850762235893, "grad_norm": 1.0703020095825195, "learning_rate": 1.0786030245131305e-05, "loss": 0.5867, "step": 11009 }, { "epoch": 1.4723188018186681, "grad_norm": 1.4556201696395874, "learning_rate": 1.0784590957278452e-05, "loss": 0.7471, "step": 11010 }, { "epoch": 1.472452527413747, "grad_norm": 1.1953924894332886, "learning_rate": 1.078315165307155e-05, "loss": 0.6816, "step": 11011 }, { "epoch": 1.4725862530088258, "grad_norm": 1.2086997032165527, "learning_rate": 1.0781712332540602e-05, "loss": 0.7157, "step": 11012 }, { "epoch": 1.4727199786039047, "grad_norm": 1.2595924139022827, "learning_rate": 1.0780272995715608e-05, "loss": 0.7183, "step": 11013 }, { "epoch": 1.4728537041989838, "grad_norm": 1.1625953912734985, "learning_rate": 1.0778833642626573e-05, "loss": 0.731, "step": 11014 }, { "epoch": 1.4729874297940626, "grad_norm": 1.2217767238616943, "learning_rate": 1.0777394273303495e-05, "loss": 0.6491, "step": 11015 }, { "epoch": 1.4731211553891415, "grad_norm": 1.238851547241211, "learning_rate": 1.0775954887776374e-05, "loss": 0.734, "step": 11016 }, { "epoch": 1.4732548809842203, "grad_norm": 1.1740225553512573, "learning_rate": 1.0774515486075216e-05, "loss": 0.6682, "step": 11017 }, { "epoch": 1.4733886065792992, "grad_norm": 1.1755441427230835, "learning_rate": 1.0773076068230028e-05, "loss": 0.6931, "step": 11018 }, { "epoch": 1.4735223321743782, "grad_norm": 1.263599157333374, "learning_rate": 1.0771636634270807e-05, "loss": 0.6879, "step": 11019 }, { "epoch": 1.473656057769457, "grad_norm": 1.1955450773239136, "learning_rate": 1.077019718422756e-05, "loss": 0.6872, "step": 11020 }, { "epoch": 1.473789783364536, "grad_norm": 1.3294062614440918, "learning_rate": 1.0768757718130287e-05, "loss": 0.7942, "step": 11021 }, { "epoch": 1.473923508959615, "grad_norm": 1.1144938468933105, "learning_rate": 1.0767318236008997e-05, "loss": 0.6454, "step": 11022 }, { "epoch": 1.4740572345546938, "grad_norm": 1.2441428899765015, "learning_rate": 1.0765878737893692e-05, "loss": 0.7131, "step": 11023 }, { "epoch": 1.4741909601497727, "grad_norm": 1.1600240468978882, "learning_rate": 1.0764439223814378e-05, "loss": 0.7286, "step": 11024 }, { "epoch": 1.4743246857448515, "grad_norm": 1.5299153327941895, "learning_rate": 1.0762999693801057e-05, "loss": 0.8622, "step": 11025 }, { "epoch": 1.4744584113399304, "grad_norm": 1.302994966506958, "learning_rate": 1.0761560147883742e-05, "loss": 0.7904, "step": 11026 }, { "epoch": 1.4745921369350095, "grad_norm": 1.1514214277267456, "learning_rate": 1.0760120586092432e-05, "loss": 0.7185, "step": 11027 }, { "epoch": 1.4747258625300883, "grad_norm": 1.2764613628387451, "learning_rate": 1.0758681008457137e-05, "loss": 0.6692, "step": 11028 }, { "epoch": 1.4748595881251672, "grad_norm": 1.3437010049819946, "learning_rate": 1.0757241415007861e-05, "loss": 0.7132, "step": 11029 }, { "epoch": 1.474993313720246, "grad_norm": 1.2485750913619995, "learning_rate": 1.0755801805774613e-05, "loss": 0.6757, "step": 11030 }, { "epoch": 1.4751270393153249, "grad_norm": 1.2216953039169312, "learning_rate": 1.07543621807874e-05, "loss": 0.6756, "step": 11031 }, { "epoch": 1.475260764910404, "grad_norm": 1.1537508964538574, "learning_rate": 1.0752922540076227e-05, "loss": 0.6142, "step": 11032 }, { "epoch": 1.4753944905054828, "grad_norm": 1.3035407066345215, "learning_rate": 1.0751482883671108e-05, "loss": 0.6769, "step": 11033 }, { "epoch": 1.4755282161005616, "grad_norm": 1.2123315334320068, "learning_rate": 1.0750043211602045e-05, "loss": 0.6792, "step": 11034 }, { "epoch": 1.4756619416956405, "grad_norm": 1.4011425971984863, "learning_rate": 1.0748603523899048e-05, "loss": 0.8214, "step": 11035 }, { "epoch": 1.4757956672907193, "grad_norm": 1.1624788045883179, "learning_rate": 1.0747163820592128e-05, "loss": 0.6623, "step": 11036 }, { "epoch": 1.4759293928857984, "grad_norm": 1.2880839109420776, "learning_rate": 1.0745724101711293e-05, "loss": 0.8074, "step": 11037 }, { "epoch": 1.4760631184808772, "grad_norm": 1.1198848485946655, "learning_rate": 1.0744284367286553e-05, "loss": 0.6794, "step": 11038 }, { "epoch": 1.476196844075956, "grad_norm": 1.2235897779464722, "learning_rate": 1.0742844617347919e-05, "loss": 0.7242, "step": 11039 }, { "epoch": 1.4763305696710352, "grad_norm": 1.1562097072601318, "learning_rate": 1.0741404851925397e-05, "loss": 0.6953, "step": 11040 }, { "epoch": 1.476464295266114, "grad_norm": 1.2664004564285278, "learning_rate": 1.0739965071049001e-05, "loss": 0.7635, "step": 11041 }, { "epoch": 1.4765980208611929, "grad_norm": 1.1384872198104858, "learning_rate": 1.073852527474874e-05, "loss": 0.7494, "step": 11042 }, { "epoch": 1.4767317464562717, "grad_norm": 1.311767339706421, "learning_rate": 1.0737085463054628e-05, "loss": 0.7631, "step": 11043 }, { "epoch": 1.4768654720513505, "grad_norm": 1.1707119941711426, "learning_rate": 1.0735645635996676e-05, "loss": 0.7371, "step": 11044 }, { "epoch": 1.4769991976464296, "grad_norm": 1.2678793668746948, "learning_rate": 1.0734205793604892e-05, "loss": 0.5966, "step": 11045 }, { "epoch": 1.4771329232415085, "grad_norm": 1.1148202419281006, "learning_rate": 1.0732765935909293e-05, "loss": 0.7517, "step": 11046 }, { "epoch": 1.4772666488365873, "grad_norm": 1.3307673931121826, "learning_rate": 1.073132606293989e-05, "loss": 0.7736, "step": 11047 }, { "epoch": 1.4774003744316662, "grad_norm": 1.3070316314697266, "learning_rate": 1.0729886174726694e-05, "loss": 0.7367, "step": 11048 }, { "epoch": 1.477534100026745, "grad_norm": 1.1212128400802612, "learning_rate": 1.0728446271299714e-05, "loss": 0.7185, "step": 11049 }, { "epoch": 1.477667825621824, "grad_norm": 1.2541477680206299, "learning_rate": 1.0727006352688973e-05, "loss": 0.7954, "step": 11050 }, { "epoch": 1.477801551216903, "grad_norm": 1.216055989265442, "learning_rate": 1.0725566418924484e-05, "loss": 0.6888, "step": 11051 }, { "epoch": 1.4779352768119818, "grad_norm": 1.2605098485946655, "learning_rate": 1.0724126470036254e-05, "loss": 0.7019, "step": 11052 }, { "epoch": 1.4780690024070609, "grad_norm": 1.2346242666244507, "learning_rate": 1.0722686506054298e-05, "loss": 0.7027, "step": 11053 }, { "epoch": 1.4782027280021395, "grad_norm": 1.2767425775527954, "learning_rate": 1.0721246527008637e-05, "loss": 0.7523, "step": 11054 }, { "epoch": 1.4783364535972185, "grad_norm": 1.0733360052108765, "learning_rate": 1.071980653292928e-05, "loss": 0.5793, "step": 11055 }, { "epoch": 1.4784701791922974, "grad_norm": 1.279767632484436, "learning_rate": 1.0718366523846246e-05, "loss": 0.7644, "step": 11056 }, { "epoch": 1.4786039047873762, "grad_norm": 1.2687493562698364, "learning_rate": 1.0716926499789548e-05, "loss": 0.7585, "step": 11057 }, { "epoch": 1.4787376303824553, "grad_norm": 1.1347460746765137, "learning_rate": 1.0715486460789204e-05, "loss": 0.7197, "step": 11058 }, { "epoch": 1.4788713559775342, "grad_norm": 1.2479099035263062, "learning_rate": 1.0714046406875231e-05, "loss": 0.7027, "step": 11059 }, { "epoch": 1.479005081572613, "grad_norm": 1.1573920249938965, "learning_rate": 1.0712606338077642e-05, "loss": 0.6997, "step": 11060 }, { "epoch": 1.4791388071676919, "grad_norm": 1.282656192779541, "learning_rate": 1.0711166254426455e-05, "loss": 0.7436, "step": 11061 }, { "epoch": 1.4792725327627707, "grad_norm": 1.3173472881317139, "learning_rate": 1.0709726155951688e-05, "loss": 0.7731, "step": 11062 }, { "epoch": 1.4794062583578498, "grad_norm": 1.4834703207015991, "learning_rate": 1.070828604268336e-05, "loss": 0.7727, "step": 11063 }, { "epoch": 1.4795399839529286, "grad_norm": 1.162908911705017, "learning_rate": 1.0706845914651486e-05, "loss": 0.6783, "step": 11064 }, { "epoch": 1.4796737095480075, "grad_norm": 1.130872130393982, "learning_rate": 1.0705405771886086e-05, "loss": 0.6952, "step": 11065 }, { "epoch": 1.4798074351430863, "grad_norm": 1.2108170986175537, "learning_rate": 1.0703965614417178e-05, "loss": 0.6402, "step": 11066 }, { "epoch": 1.4799411607381652, "grad_norm": 1.2727104425430298, "learning_rate": 1.0702525442274779e-05, "loss": 0.7287, "step": 11067 }, { "epoch": 1.4800748863332442, "grad_norm": 1.2478718757629395, "learning_rate": 1.070108525548891e-05, "loss": 0.6615, "step": 11068 }, { "epoch": 1.480208611928323, "grad_norm": 1.2840837240219116, "learning_rate": 1.069964505408959e-05, "loss": 0.6968, "step": 11069 }, { "epoch": 1.480342337523402, "grad_norm": 1.133843183517456, "learning_rate": 1.0698204838106837e-05, "loss": 0.6541, "step": 11070 }, { "epoch": 1.480476063118481, "grad_norm": 1.1912472248077393, "learning_rate": 1.0696764607570676e-05, "loss": 0.6669, "step": 11071 }, { "epoch": 1.4806097887135596, "grad_norm": 1.1748170852661133, "learning_rate": 1.069532436251112e-05, "loss": 0.6579, "step": 11072 }, { "epoch": 1.4807435143086387, "grad_norm": 1.2491395473480225, "learning_rate": 1.0693884102958194e-05, "loss": 0.7132, "step": 11073 }, { "epoch": 1.4808772399037176, "grad_norm": 1.0071097612380981, "learning_rate": 1.0692443828941918e-05, "loss": 0.6494, "step": 11074 }, { "epoch": 1.4810109654987964, "grad_norm": 1.262352705001831, "learning_rate": 1.0691003540492313e-05, "loss": 0.6607, "step": 11075 }, { "epoch": 1.4811446910938755, "grad_norm": 1.3789443969726562, "learning_rate": 1.06895632376394e-05, "loss": 0.7732, "step": 11076 }, { "epoch": 1.4812784166889543, "grad_norm": 1.296425461769104, "learning_rate": 1.0688122920413202e-05, "loss": 0.7569, "step": 11077 }, { "epoch": 1.4814121422840332, "grad_norm": 1.1450682878494263, "learning_rate": 1.0686682588843737e-05, "loss": 0.6727, "step": 11078 }, { "epoch": 1.481545867879112, "grad_norm": 1.153116226196289, "learning_rate": 1.0685242242961035e-05, "loss": 0.6466, "step": 11079 }, { "epoch": 1.4816795934741909, "grad_norm": 1.1240274906158447, "learning_rate": 1.0683801882795112e-05, "loss": 0.6658, "step": 11080 }, { "epoch": 1.48181331906927, "grad_norm": 1.1764883995056152, "learning_rate": 1.0682361508375993e-05, "loss": 0.598, "step": 11081 }, { "epoch": 1.4819470446643488, "grad_norm": 1.2112371921539307, "learning_rate": 1.06809211197337e-05, "loss": 0.7176, "step": 11082 }, { "epoch": 1.4820807702594276, "grad_norm": 1.1283106803894043, "learning_rate": 1.0679480716898263e-05, "loss": 0.7287, "step": 11083 }, { "epoch": 1.4822144958545065, "grad_norm": 1.1033836603164673, "learning_rate": 1.0678040299899697e-05, "loss": 0.6087, "step": 11084 }, { "epoch": 1.4823482214495853, "grad_norm": 1.2348387241363525, "learning_rate": 1.0676599868768029e-05, "loss": 0.6954, "step": 11085 }, { "epoch": 1.4824819470446644, "grad_norm": 1.2662067413330078, "learning_rate": 1.0675159423533286e-05, "loss": 0.7521, "step": 11086 }, { "epoch": 1.4826156726397433, "grad_norm": 1.059889793395996, "learning_rate": 1.0673718964225488e-05, "loss": 0.635, "step": 11087 }, { "epoch": 1.482749398234822, "grad_norm": 1.2185277938842773, "learning_rate": 1.0672278490874666e-05, "loss": 0.7012, "step": 11088 }, { "epoch": 1.4828831238299012, "grad_norm": 1.201928973197937, "learning_rate": 1.067083800351084e-05, "loss": 0.7051, "step": 11089 }, { "epoch": 1.48301684942498, "grad_norm": 1.1891995668411255, "learning_rate": 1.0669397502164038e-05, "loss": 0.72, "step": 11090 }, { "epoch": 1.4831505750200589, "grad_norm": 1.189477801322937, "learning_rate": 1.066795698686429e-05, "loss": 0.5798, "step": 11091 }, { "epoch": 1.4832843006151377, "grad_norm": 1.256593108177185, "learning_rate": 1.0666516457641614e-05, "loss": 0.7259, "step": 11092 }, { "epoch": 1.4834180262102166, "grad_norm": 1.3148462772369385, "learning_rate": 1.0665075914526039e-05, "loss": 0.7675, "step": 11093 }, { "epoch": 1.4835517518052956, "grad_norm": 1.2321245670318604, "learning_rate": 1.0663635357547593e-05, "loss": 0.7015, "step": 11094 }, { "epoch": 1.4836854774003745, "grad_norm": 1.2060399055480957, "learning_rate": 1.0662194786736307e-05, "loss": 0.7716, "step": 11095 }, { "epoch": 1.4838192029954533, "grad_norm": 1.2887561321258545, "learning_rate": 1.0660754202122199e-05, "loss": 0.6772, "step": 11096 }, { "epoch": 1.4839529285905322, "grad_norm": 1.1921271085739136, "learning_rate": 1.0659313603735307e-05, "loss": 0.7529, "step": 11097 }, { "epoch": 1.484086654185611, "grad_norm": 1.180107593536377, "learning_rate": 1.0657872991605649e-05, "loss": 0.7033, "step": 11098 }, { "epoch": 1.48422037978069, "grad_norm": 1.1730291843414307, "learning_rate": 1.0656432365763263e-05, "loss": 0.6462, "step": 11099 }, { "epoch": 1.484354105375769, "grad_norm": 1.2790377140045166, "learning_rate": 1.0654991726238166e-05, "loss": 0.7754, "step": 11100 }, { "epoch": 1.4844878309708478, "grad_norm": 1.2423210144042969, "learning_rate": 1.0653551073060397e-05, "loss": 0.7825, "step": 11101 }, { "epoch": 1.4846215565659266, "grad_norm": 1.3042274713516235, "learning_rate": 1.0652110406259981e-05, "loss": 0.8086, "step": 11102 }, { "epoch": 1.4847552821610055, "grad_norm": 1.2844929695129395, "learning_rate": 1.065066972586695e-05, "loss": 0.8353, "step": 11103 }, { "epoch": 1.4848890077560846, "grad_norm": 1.0808616876602173, "learning_rate": 1.064922903191133e-05, "loss": 0.6514, "step": 11104 }, { "epoch": 1.4850227333511634, "grad_norm": 1.4469674825668335, "learning_rate": 1.0647788324423152e-05, "loss": 0.7744, "step": 11105 }, { "epoch": 1.4851564589462423, "grad_norm": 1.1879732608795166, "learning_rate": 1.0646347603432443e-05, "loss": 0.7011, "step": 11106 }, { "epoch": 1.4852901845413213, "grad_norm": 1.2401130199432373, "learning_rate": 1.064490686896924e-05, "loss": 0.6848, "step": 11107 }, { "epoch": 1.4854239101364002, "grad_norm": 1.1934558153152466, "learning_rate": 1.064346612106357e-05, "loss": 0.6927, "step": 11108 }, { "epoch": 1.485557635731479, "grad_norm": 1.217354416847229, "learning_rate": 1.0642025359745463e-05, "loss": 0.7093, "step": 11109 }, { "epoch": 1.4856913613265579, "grad_norm": 1.1650991439819336, "learning_rate": 1.0640584585044953e-05, "loss": 0.7072, "step": 11110 }, { "epoch": 1.4858250869216367, "grad_norm": 1.1619435548782349, "learning_rate": 1.0639143796992072e-05, "loss": 0.585, "step": 11111 }, { "epoch": 1.4859588125167158, "grad_norm": 1.2391313314437866, "learning_rate": 1.0637702995616848e-05, "loss": 0.7588, "step": 11112 }, { "epoch": 1.4860925381117946, "grad_norm": 1.2317885160446167, "learning_rate": 1.0636262180949312e-05, "loss": 0.7341, "step": 11113 }, { "epoch": 1.4862262637068735, "grad_norm": 1.1130679845809937, "learning_rate": 1.0634821353019505e-05, "loss": 0.711, "step": 11114 }, { "epoch": 1.4863599893019523, "grad_norm": 1.1894334554672241, "learning_rate": 1.0633380511857454e-05, "loss": 0.7604, "step": 11115 }, { "epoch": 1.4864937148970312, "grad_norm": 1.2044531106948853, "learning_rate": 1.0631939657493188e-05, "loss": 0.7775, "step": 11116 }, { "epoch": 1.4866274404921103, "grad_norm": 1.188333511352539, "learning_rate": 1.0630498789956749e-05, "loss": 0.6572, "step": 11117 }, { "epoch": 1.4867611660871891, "grad_norm": 1.267372727394104, "learning_rate": 1.0629057909278165e-05, "loss": 0.7848, "step": 11118 }, { "epoch": 1.486894891682268, "grad_norm": 1.0872628688812256, "learning_rate": 1.0627617015487468e-05, "loss": 0.6821, "step": 11119 }, { "epoch": 1.4870286172773468, "grad_norm": 1.24944007396698, "learning_rate": 1.0626176108614699e-05, "loss": 0.7661, "step": 11120 }, { "epoch": 1.4871623428724257, "grad_norm": 1.4129022359848022, "learning_rate": 1.0624735188689885e-05, "loss": 0.6711, "step": 11121 }, { "epoch": 1.4872960684675047, "grad_norm": 1.1602057218551636, "learning_rate": 1.0623294255743064e-05, "loss": 0.6772, "step": 11122 }, { "epoch": 1.4874297940625836, "grad_norm": 1.2842772006988525, "learning_rate": 1.0621853309804275e-05, "loss": 0.7407, "step": 11123 }, { "epoch": 1.4875635196576624, "grad_norm": 1.3192344903945923, "learning_rate": 1.0620412350903545e-05, "loss": 0.7811, "step": 11124 }, { "epoch": 1.4876972452527415, "grad_norm": 1.1869572401046753, "learning_rate": 1.0618971379070912e-05, "loss": 0.6615, "step": 11125 }, { "epoch": 1.4878309708478203, "grad_norm": 1.1692684888839722, "learning_rate": 1.0617530394336412e-05, "loss": 0.6601, "step": 11126 }, { "epoch": 1.4879646964428992, "grad_norm": 1.2383116483688354, "learning_rate": 1.0616089396730086e-05, "loss": 0.7036, "step": 11127 }, { "epoch": 1.488098422037978, "grad_norm": 1.1192725896835327, "learning_rate": 1.0614648386281967e-05, "loss": 0.7453, "step": 11128 }, { "epoch": 1.488232147633057, "grad_norm": 1.1129965782165527, "learning_rate": 1.0613207363022086e-05, "loss": 0.6989, "step": 11129 }, { "epoch": 1.488365873228136, "grad_norm": 1.1282628774642944, "learning_rate": 1.0611766326980489e-05, "loss": 0.74, "step": 11130 }, { "epoch": 1.4884995988232148, "grad_norm": 1.06178617477417, "learning_rate": 1.0610325278187203e-05, "loss": 0.6493, "step": 11131 }, { "epoch": 1.4886333244182937, "grad_norm": 1.2385424375534058, "learning_rate": 1.0608884216672275e-05, "loss": 0.6972, "step": 11132 }, { "epoch": 1.4887670500133725, "grad_norm": 1.1972640752792358, "learning_rate": 1.0607443142465735e-05, "loss": 0.672, "step": 11133 }, { "epoch": 1.4889007756084514, "grad_norm": 1.22037935256958, "learning_rate": 1.0606002055597627e-05, "loss": 0.6628, "step": 11134 }, { "epoch": 1.4890345012035304, "grad_norm": 1.1934614181518555, "learning_rate": 1.0604560956097983e-05, "loss": 0.6956, "step": 11135 }, { "epoch": 1.4891682267986093, "grad_norm": 1.1259020566940308, "learning_rate": 1.0603119843996848e-05, "loss": 0.6801, "step": 11136 }, { "epoch": 1.4893019523936881, "grad_norm": 1.1250442266464233, "learning_rate": 1.0601678719324254e-05, "loss": 0.7042, "step": 11137 }, { "epoch": 1.489435677988767, "grad_norm": 1.1000854969024658, "learning_rate": 1.0600237582110244e-05, "loss": 0.6775, "step": 11138 }, { "epoch": 1.4895694035838458, "grad_norm": 1.297085165977478, "learning_rate": 1.0598796432384853e-05, "loss": 0.674, "step": 11139 }, { "epoch": 1.489703129178925, "grad_norm": 1.2587895393371582, "learning_rate": 1.0597355270178126e-05, "loss": 0.6743, "step": 11140 }, { "epoch": 1.4898368547740037, "grad_norm": 1.2848955392837524, "learning_rate": 1.0595914095520102e-05, "loss": 0.6845, "step": 11141 }, { "epoch": 1.4899705803690826, "grad_norm": 1.0787171125411987, "learning_rate": 1.0594472908440817e-05, "loss": 0.7049, "step": 11142 }, { "epoch": 1.4901043059641617, "grad_norm": 1.3908013105392456, "learning_rate": 1.0593031708970312e-05, "loss": 0.7623, "step": 11143 }, { "epoch": 1.4902380315592405, "grad_norm": 1.1248219013214111, "learning_rate": 1.059159049713863e-05, "loss": 0.6604, "step": 11144 }, { "epoch": 1.4903717571543194, "grad_norm": 1.2079771757125854, "learning_rate": 1.059014927297581e-05, "loss": 0.7518, "step": 11145 }, { "epoch": 1.4905054827493982, "grad_norm": 1.3465570211410522, "learning_rate": 1.058870803651189e-05, "loss": 0.6726, "step": 11146 }, { "epoch": 1.490639208344477, "grad_norm": 1.1222517490386963, "learning_rate": 1.0587266787776917e-05, "loss": 0.5953, "step": 11147 }, { "epoch": 1.4907729339395561, "grad_norm": 1.4174551963806152, "learning_rate": 1.0585825526800933e-05, "loss": 0.7789, "step": 11148 }, { "epoch": 1.490906659534635, "grad_norm": 1.354761004447937, "learning_rate": 1.0584384253613973e-05, "loss": 0.7131, "step": 11149 }, { "epoch": 1.4910403851297138, "grad_norm": 1.2866826057434082, "learning_rate": 1.058294296824608e-05, "loss": 0.755, "step": 11150 }, { "epoch": 1.4911741107247927, "grad_norm": 1.2255841493606567, "learning_rate": 1.0581501670727303e-05, "loss": 0.6948, "step": 11151 }, { "epoch": 1.4913078363198715, "grad_norm": 1.217775583267212, "learning_rate": 1.0580060361087678e-05, "loss": 0.6762, "step": 11152 }, { "epoch": 1.4914415619149506, "grad_norm": 1.2363560199737549, "learning_rate": 1.057861903935725e-05, "loss": 0.7571, "step": 11153 }, { "epoch": 1.4915752875100294, "grad_norm": 1.2037606239318848, "learning_rate": 1.0577177705566061e-05, "loss": 0.6373, "step": 11154 }, { "epoch": 1.4917090131051083, "grad_norm": 1.3599095344543457, "learning_rate": 1.0575736359744157e-05, "loss": 0.8142, "step": 11155 }, { "epoch": 1.4918427387001874, "grad_norm": 1.1520377397537231, "learning_rate": 1.057429500192158e-05, "loss": 0.6027, "step": 11156 }, { "epoch": 1.491976464295266, "grad_norm": 1.200454831123352, "learning_rate": 1.0572853632128372e-05, "loss": 0.6532, "step": 11157 }, { "epoch": 1.492110189890345, "grad_norm": 0.9994578957557678, "learning_rate": 1.0571412250394575e-05, "loss": 0.6085, "step": 11158 }, { "epoch": 1.492243915485424, "grad_norm": 1.2469974756240845, "learning_rate": 1.056997085675024e-05, "loss": 0.7465, "step": 11159 }, { "epoch": 1.4923776410805027, "grad_norm": 1.2766176462173462, "learning_rate": 1.0568529451225408e-05, "loss": 0.7289, "step": 11160 }, { "epoch": 1.4925113666755818, "grad_norm": 1.2428025007247925, "learning_rate": 1.0567088033850123e-05, "loss": 0.7384, "step": 11161 }, { "epoch": 1.4926450922706607, "grad_norm": 1.173176884651184, "learning_rate": 1.0565646604654432e-05, "loss": 0.717, "step": 11162 }, { "epoch": 1.4927788178657395, "grad_norm": 1.0862598419189453, "learning_rate": 1.0564205163668377e-05, "loss": 0.6614, "step": 11163 }, { "epoch": 1.4929125434608184, "grad_norm": 1.317094326019287, "learning_rate": 1.0562763710922004e-05, "loss": 0.7413, "step": 11164 }, { "epoch": 1.4930462690558972, "grad_norm": 1.2068299055099487, "learning_rate": 1.0561322246445363e-05, "loss": 0.7845, "step": 11165 }, { "epoch": 1.4931799946509763, "grad_norm": 1.2888822555541992, "learning_rate": 1.0559880770268493e-05, "loss": 0.7543, "step": 11166 }, { "epoch": 1.4933137202460551, "grad_norm": 1.197426676750183, "learning_rate": 1.0558439282421446e-05, "loss": 0.7058, "step": 11167 }, { "epoch": 1.493447445841134, "grad_norm": 1.1670724153518677, "learning_rate": 1.055699778293427e-05, "loss": 0.6726, "step": 11168 }, { "epoch": 1.4935811714362128, "grad_norm": 1.3224575519561768, "learning_rate": 1.0555556271837007e-05, "loss": 0.7048, "step": 11169 }, { "epoch": 1.4937148970312917, "grad_norm": 1.2369978427886963, "learning_rate": 1.05541147491597e-05, "loss": 0.6624, "step": 11170 }, { "epoch": 1.4938486226263707, "grad_norm": 1.2266074419021606, "learning_rate": 1.0552673214932406e-05, "loss": 0.7342, "step": 11171 }, { "epoch": 1.4939823482214496, "grad_norm": 1.2613096237182617, "learning_rate": 1.0551231669185168e-05, "loss": 0.7352, "step": 11172 }, { "epoch": 1.4941160738165284, "grad_norm": 1.2465813159942627, "learning_rate": 1.0549790111948031e-05, "loss": 0.7067, "step": 11173 }, { "epoch": 1.4942497994116075, "grad_norm": 1.2195369005203247, "learning_rate": 1.0548348543251044e-05, "loss": 0.7623, "step": 11174 }, { "epoch": 1.4943835250066861, "grad_norm": 1.2356926202774048, "learning_rate": 1.054690696312426e-05, "loss": 0.7753, "step": 11175 }, { "epoch": 1.4945172506017652, "grad_norm": 1.0978771448135376, "learning_rate": 1.0545465371597723e-05, "loss": 0.6574, "step": 11176 }, { "epoch": 1.494650976196844, "grad_norm": 1.2394564151763916, "learning_rate": 1.0544023768701477e-05, "loss": 0.7113, "step": 11177 }, { "epoch": 1.494784701791923, "grad_norm": 1.3418971300125122, "learning_rate": 1.0542582154465581e-05, "loss": 0.7727, "step": 11178 }, { "epoch": 1.494918427387002, "grad_norm": 1.114583134651184, "learning_rate": 1.0541140528920077e-05, "loss": 0.623, "step": 11179 }, { "epoch": 1.4950521529820808, "grad_norm": 1.278980016708374, "learning_rate": 1.053969889209502e-05, "loss": 0.6847, "step": 11180 }, { "epoch": 1.4951858785771597, "grad_norm": 1.3881422281265259, "learning_rate": 1.0538257244020456e-05, "loss": 0.7263, "step": 11181 }, { "epoch": 1.4953196041722385, "grad_norm": 1.1720807552337646, "learning_rate": 1.0536815584726432e-05, "loss": 0.6569, "step": 11182 }, { "epoch": 1.4954533297673174, "grad_norm": 1.1185722351074219, "learning_rate": 1.0535373914243001e-05, "loss": 0.6577, "step": 11183 }, { "epoch": 1.4955870553623964, "grad_norm": 1.0863063335418701, "learning_rate": 1.0533932232600213e-05, "loss": 0.6473, "step": 11184 }, { "epoch": 1.4957207809574753, "grad_norm": 1.221068024635315, "learning_rate": 1.053249053982812e-05, "loss": 0.7396, "step": 11185 }, { "epoch": 1.4958545065525541, "grad_norm": 1.2321242094039917, "learning_rate": 1.053104883595677e-05, "loss": 0.6905, "step": 11186 }, { "epoch": 1.495988232147633, "grad_norm": 1.2206392288208008, "learning_rate": 1.0529607121016215e-05, "loss": 0.7287, "step": 11187 }, { "epoch": 1.4961219577427118, "grad_norm": 1.2069880962371826, "learning_rate": 1.052816539503651e-05, "loss": 0.631, "step": 11188 }, { "epoch": 1.496255683337791, "grad_norm": 1.2368944883346558, "learning_rate": 1.0526723658047698e-05, "loss": 0.6869, "step": 11189 }, { "epoch": 1.4963894089328698, "grad_norm": 1.193634033203125, "learning_rate": 1.0525281910079834e-05, "loss": 0.6796, "step": 11190 }, { "epoch": 1.4965231345279486, "grad_norm": 1.1900726556777954, "learning_rate": 1.0523840151162974e-05, "loss": 0.6999, "step": 11191 }, { "epoch": 1.4966568601230277, "grad_norm": 1.2822988033294678, "learning_rate": 1.0522398381327171e-05, "loss": 0.8222, "step": 11192 }, { "epoch": 1.4967905857181065, "grad_norm": 1.1578625440597534, "learning_rate": 1.052095660060247e-05, "loss": 0.6517, "step": 11193 }, { "epoch": 1.4969243113131854, "grad_norm": 1.2446532249450684, "learning_rate": 1.0519514809018927e-05, "loss": 0.7465, "step": 11194 }, { "epoch": 1.4970580369082642, "grad_norm": 1.1602444648742676, "learning_rate": 1.0518073006606596e-05, "loss": 0.6599, "step": 11195 }, { "epoch": 1.497191762503343, "grad_norm": 1.3141688108444214, "learning_rate": 1.0516631193395525e-05, "loss": 0.7063, "step": 11196 }, { "epoch": 1.4973254880984221, "grad_norm": 1.1707797050476074, "learning_rate": 1.0515189369415775e-05, "loss": 0.6416, "step": 11197 }, { "epoch": 1.497459213693501, "grad_norm": 1.3497982025146484, "learning_rate": 1.0513747534697396e-05, "loss": 0.7772, "step": 11198 }, { "epoch": 1.4975929392885798, "grad_norm": 1.1801602840423584, "learning_rate": 1.051230568927044e-05, "loss": 0.6775, "step": 11199 }, { "epoch": 1.4977266648836587, "grad_norm": 1.3530025482177734, "learning_rate": 1.0510863833164963e-05, "loss": 0.6813, "step": 11200 }, { "epoch": 1.4978603904787375, "grad_norm": 1.0279252529144287, "learning_rate": 1.0509421966411017e-05, "loss": 0.6137, "step": 11201 }, { "epoch": 1.4979941160738166, "grad_norm": 1.176138162612915, "learning_rate": 1.0507980089038659e-05, "loss": 0.6623, "step": 11202 }, { "epoch": 1.4981278416688955, "grad_norm": 1.3767824172973633, "learning_rate": 1.050653820107794e-05, "loss": 0.7327, "step": 11203 }, { "epoch": 1.4982615672639743, "grad_norm": 1.4212448596954346, "learning_rate": 1.050509630255892e-05, "loss": 0.8116, "step": 11204 }, { "epoch": 1.4983952928590532, "grad_norm": 1.3102025985717773, "learning_rate": 1.050365439351165e-05, "loss": 0.7032, "step": 11205 }, { "epoch": 1.498529018454132, "grad_norm": 1.2339673042297363, "learning_rate": 1.0502212473966183e-05, "loss": 0.7001, "step": 11206 }, { "epoch": 1.498662744049211, "grad_norm": 1.3438186645507812, "learning_rate": 1.0500770543952579e-05, "loss": 0.8373, "step": 11207 }, { "epoch": 1.49879646964429, "grad_norm": 1.2887126207351685, "learning_rate": 1.0499328603500896e-05, "loss": 0.7364, "step": 11208 }, { "epoch": 1.4989301952393688, "grad_norm": 1.1469290256500244, "learning_rate": 1.0497886652641181e-05, "loss": 0.6368, "step": 11209 }, { "epoch": 1.4990639208344478, "grad_norm": 1.2227312326431274, "learning_rate": 1.0496444691403496e-05, "loss": 0.6914, "step": 11210 }, { "epoch": 1.4991976464295267, "grad_norm": 1.278199315071106, "learning_rate": 1.0495002719817896e-05, "loss": 0.7893, "step": 11211 }, { "epoch": 1.4993313720246055, "grad_norm": 1.1027257442474365, "learning_rate": 1.0493560737914444e-05, "loss": 0.6217, "step": 11212 }, { "epoch": 1.4994650976196844, "grad_norm": 1.210065245628357, "learning_rate": 1.0492118745723185e-05, "loss": 0.7271, "step": 11213 }, { "epoch": 1.4995988232147632, "grad_norm": 1.0736790895462036, "learning_rate": 1.0490676743274181e-05, "loss": 0.6545, "step": 11214 }, { "epoch": 1.4997325488098423, "grad_norm": 1.2265375852584839, "learning_rate": 1.0489234730597494e-05, "loss": 0.7098, "step": 11215 }, { "epoch": 1.4998662744049212, "grad_norm": 1.2218736410140991, "learning_rate": 1.0487792707723173e-05, "loss": 0.6801, "step": 11216 }, { "epoch": 1.5, "grad_norm": 1.3834000825881958, "learning_rate": 1.0486350674681282e-05, "loss": 0.8272, "step": 11217 }, { "epoch": 1.5001337255950788, "grad_norm": 1.1733715534210205, "learning_rate": 1.0484908631501875e-05, "loss": 0.6238, "step": 11218 }, { "epoch": 1.5002674511901577, "grad_norm": 1.0997190475463867, "learning_rate": 1.0483466578215013e-05, "loss": 0.6833, "step": 11219 }, { "epoch": 1.5004011767852368, "grad_norm": 1.1278554201126099, "learning_rate": 1.0482024514850753e-05, "loss": 0.6397, "step": 11220 }, { "epoch": 1.5005349023803156, "grad_norm": 1.2674373388290405, "learning_rate": 1.0480582441439155e-05, "loss": 0.791, "step": 11221 }, { "epoch": 1.5006686279753945, "grad_norm": 1.2782623767852783, "learning_rate": 1.0479140358010273e-05, "loss": 0.7469, "step": 11222 }, { "epoch": 1.5008023535704735, "grad_norm": 1.1943987607955933, "learning_rate": 1.0477698264594167e-05, "loss": 0.672, "step": 11223 }, { "epoch": 1.5009360791655522, "grad_norm": 1.269080638885498, "learning_rate": 1.0476256161220902e-05, "loss": 0.6518, "step": 11224 }, { "epoch": 1.5010698047606312, "grad_norm": 1.190590739250183, "learning_rate": 1.0474814047920532e-05, "loss": 0.699, "step": 11225 }, { "epoch": 1.50120353035571, "grad_norm": 1.2478607892990112, "learning_rate": 1.0473371924723117e-05, "loss": 0.6976, "step": 11226 }, { "epoch": 1.501337255950789, "grad_norm": 1.0146020650863647, "learning_rate": 1.0471929791658717e-05, "loss": 0.654, "step": 11227 }, { "epoch": 1.501470981545868, "grad_norm": 1.0527175664901733, "learning_rate": 1.047048764875739e-05, "loss": 0.7468, "step": 11228 }, { "epoch": 1.5016047071409466, "grad_norm": 1.172809362411499, "learning_rate": 1.0469045496049202e-05, "loss": 0.7115, "step": 11229 }, { "epoch": 1.5017384327360257, "grad_norm": 1.151249885559082, "learning_rate": 1.0467603333564207e-05, "loss": 0.6706, "step": 11230 }, { "epoch": 1.5018721583311045, "grad_norm": 1.1829904317855835, "learning_rate": 1.0466161161332468e-05, "loss": 0.6923, "step": 11231 }, { "epoch": 1.5020058839261834, "grad_norm": 1.075018286705017, "learning_rate": 1.0464718979384045e-05, "loss": 0.6382, "step": 11232 }, { "epoch": 1.5021396095212625, "grad_norm": 1.306370496749878, "learning_rate": 1.0463276787749004e-05, "loss": 0.7795, "step": 11233 }, { "epoch": 1.5022733351163413, "grad_norm": 1.2223362922668457, "learning_rate": 1.0461834586457398e-05, "loss": 0.7068, "step": 11234 }, { "epoch": 1.5024070607114202, "grad_norm": 1.0923806428909302, "learning_rate": 1.0460392375539293e-05, "loss": 0.663, "step": 11235 }, { "epoch": 1.502540786306499, "grad_norm": 1.1650400161743164, "learning_rate": 1.0458950155024745e-05, "loss": 0.6382, "step": 11236 }, { "epoch": 1.5026745119015779, "grad_norm": 1.261993169784546, "learning_rate": 1.0457507924943829e-05, "loss": 0.812, "step": 11237 }, { "epoch": 1.502808237496657, "grad_norm": 1.2129238843917847, "learning_rate": 1.0456065685326591e-05, "loss": 0.724, "step": 11238 }, { "epoch": 1.5029419630917358, "grad_norm": 1.1511640548706055, "learning_rate": 1.0454623436203102e-05, "loss": 0.7663, "step": 11239 }, { "epoch": 1.5030756886868146, "grad_norm": 1.4265037775039673, "learning_rate": 1.0453181177603424e-05, "loss": 0.732, "step": 11240 }, { "epoch": 1.5032094142818937, "grad_norm": 1.3808835744857788, "learning_rate": 1.0451738909557617e-05, "loss": 0.7428, "step": 11241 }, { "epoch": 1.5033431398769723, "grad_norm": 1.3154296875, "learning_rate": 1.0450296632095745e-05, "loss": 0.7187, "step": 11242 }, { "epoch": 1.5034768654720514, "grad_norm": 1.3440579175949097, "learning_rate": 1.044885434524787e-05, "loss": 0.7574, "step": 11243 }, { "epoch": 1.5036105910671302, "grad_norm": 1.2270103693008423, "learning_rate": 1.0447412049044055e-05, "loss": 0.6987, "step": 11244 }, { "epoch": 1.503744316662209, "grad_norm": 1.2899839878082275, "learning_rate": 1.0445969743514365e-05, "loss": 0.7693, "step": 11245 }, { "epoch": 1.5038780422572882, "grad_norm": 1.2557570934295654, "learning_rate": 1.0444527428688864e-05, "loss": 0.7688, "step": 11246 }, { "epoch": 1.5040117678523668, "grad_norm": 1.0963035821914673, "learning_rate": 1.0443085104597612e-05, "loss": 0.655, "step": 11247 }, { "epoch": 1.5041454934474459, "grad_norm": 1.2186487913131714, "learning_rate": 1.0441642771270675e-05, "loss": 0.7554, "step": 11248 }, { "epoch": 1.5042792190425247, "grad_norm": 1.0940096378326416, "learning_rate": 1.0440200428738119e-05, "loss": 0.6849, "step": 11249 }, { "epoch": 1.5044129446376036, "grad_norm": 1.2495222091674805, "learning_rate": 1.0438758077030002e-05, "loss": 0.7787, "step": 11250 }, { "epoch": 1.5045466702326826, "grad_norm": 1.278853178024292, "learning_rate": 1.0437315716176398e-05, "loss": 0.7177, "step": 11251 }, { "epoch": 1.5046803958277615, "grad_norm": 1.1386044025421143, "learning_rate": 1.0435873346207362e-05, "loss": 0.6526, "step": 11252 }, { "epoch": 1.5048141214228403, "grad_norm": 1.2027910947799683, "learning_rate": 1.0434430967152966e-05, "loss": 0.7469, "step": 11253 }, { "epoch": 1.5049478470179194, "grad_norm": 1.0777400732040405, "learning_rate": 1.0432988579043273e-05, "loss": 0.6259, "step": 11254 }, { "epoch": 1.505081572612998, "grad_norm": 1.1165553331375122, "learning_rate": 1.0431546181908343e-05, "loss": 0.6709, "step": 11255 }, { "epoch": 1.505215298208077, "grad_norm": 1.298244595527649, "learning_rate": 1.0430103775778249e-05, "loss": 0.7581, "step": 11256 }, { "epoch": 1.505349023803156, "grad_norm": 1.2060997486114502, "learning_rate": 1.0428661360683055e-05, "loss": 0.6969, "step": 11257 }, { "epoch": 1.5054827493982348, "grad_norm": 1.2900875806808472, "learning_rate": 1.0427218936652821e-05, "loss": 0.7801, "step": 11258 }, { "epoch": 1.5056164749933139, "grad_norm": 1.1401000022888184, "learning_rate": 1.042577650371762e-05, "loss": 0.6634, "step": 11259 }, { "epoch": 1.5057502005883925, "grad_norm": 1.2181081771850586, "learning_rate": 1.0424334061907513e-05, "loss": 0.7152, "step": 11260 }, { "epoch": 1.5058839261834716, "grad_norm": 1.2649118900299072, "learning_rate": 1.042289161125257e-05, "loss": 0.7402, "step": 11261 }, { "epoch": 1.5060176517785504, "grad_norm": 1.1299681663513184, "learning_rate": 1.0421449151782855e-05, "loss": 0.6749, "step": 11262 }, { "epoch": 1.5061513773736293, "grad_norm": 1.0603952407836914, "learning_rate": 1.0420006683528436e-05, "loss": 0.6826, "step": 11263 }, { "epoch": 1.5062851029687083, "grad_norm": 1.2336446046829224, "learning_rate": 1.0418564206519379e-05, "loss": 0.7543, "step": 11264 }, { "epoch": 1.506418828563787, "grad_norm": 1.2501355409622192, "learning_rate": 1.0417121720785758e-05, "loss": 0.7113, "step": 11265 }, { "epoch": 1.506552554158866, "grad_norm": 1.0364837646484375, "learning_rate": 1.0415679226357627e-05, "loss": 0.6457, "step": 11266 }, { "epoch": 1.5066862797539449, "grad_norm": 1.3113071918487549, "learning_rate": 1.0414236723265062e-05, "loss": 0.7702, "step": 11267 }, { "epoch": 1.5068200053490237, "grad_norm": 1.3548494577407837, "learning_rate": 1.0412794211538125e-05, "loss": 0.7518, "step": 11268 }, { "epoch": 1.5069537309441028, "grad_norm": 1.1755337715148926, "learning_rate": 1.0411351691206894e-05, "loss": 0.7391, "step": 11269 }, { "epoch": 1.5070874565391816, "grad_norm": 1.1628522872924805, "learning_rate": 1.0409909162301428e-05, "loss": 0.661, "step": 11270 }, { "epoch": 1.5072211821342605, "grad_norm": 1.1194788217544556, "learning_rate": 1.0408466624851796e-05, "loss": 0.6269, "step": 11271 }, { "epoch": 1.5073549077293396, "grad_norm": 1.3749436140060425, "learning_rate": 1.040702407888807e-05, "loss": 0.7609, "step": 11272 }, { "epoch": 1.5074886333244182, "grad_norm": 1.265852928161621, "learning_rate": 1.0405581524440318e-05, "loss": 0.7187, "step": 11273 }, { "epoch": 1.5076223589194973, "grad_norm": 1.3400779962539673, "learning_rate": 1.0404138961538603e-05, "loss": 0.7428, "step": 11274 }, { "epoch": 1.507756084514576, "grad_norm": 1.3339792490005493, "learning_rate": 1.0402696390213e-05, "loss": 0.7566, "step": 11275 }, { "epoch": 1.507889810109655, "grad_norm": 1.449597716331482, "learning_rate": 1.0401253810493579e-05, "loss": 0.7929, "step": 11276 }, { "epoch": 1.508023535704734, "grad_norm": 1.2467231750488281, "learning_rate": 1.0399811222410405e-05, "loss": 0.7336, "step": 11277 }, { "epoch": 1.5081572612998126, "grad_norm": 1.3466869592666626, "learning_rate": 1.0398368625993546e-05, "loss": 0.7176, "step": 11278 }, { "epoch": 1.5082909868948917, "grad_norm": 1.1303359270095825, "learning_rate": 1.0396926021273076e-05, "loss": 0.6873, "step": 11279 }, { "epoch": 1.5084247124899706, "grad_norm": 1.2739181518554688, "learning_rate": 1.0395483408279063e-05, "loss": 0.7528, "step": 11280 }, { "epoch": 1.5085584380850494, "grad_norm": 1.331796646118164, "learning_rate": 1.0394040787041576e-05, "loss": 0.6703, "step": 11281 }, { "epoch": 1.5086921636801285, "grad_norm": 1.4136468172073364, "learning_rate": 1.0392598157590687e-05, "loss": 0.8308, "step": 11282 }, { "epoch": 1.508825889275207, "grad_norm": 1.1866846084594727, "learning_rate": 1.0391155519956464e-05, "loss": 0.682, "step": 11283 }, { "epoch": 1.5089596148702862, "grad_norm": 1.1944962739944458, "learning_rate": 1.038971287416898e-05, "loss": 0.6925, "step": 11284 }, { "epoch": 1.509093340465365, "grad_norm": 1.2064961194992065, "learning_rate": 1.0388270220258305e-05, "loss": 0.6863, "step": 11285 }, { "epoch": 1.5092270660604439, "grad_norm": 1.2031103372573853, "learning_rate": 1.0386827558254507e-05, "loss": 0.7089, "step": 11286 }, { "epoch": 1.509360791655523, "grad_norm": 1.0480718612670898, "learning_rate": 1.0385384888187656e-05, "loss": 0.6946, "step": 11287 }, { "epoch": 1.5094945172506018, "grad_norm": 1.0089476108551025, "learning_rate": 1.0383942210087827e-05, "loss": 0.6099, "step": 11288 }, { "epoch": 1.5096282428456806, "grad_norm": 1.278743863105774, "learning_rate": 1.0382499523985094e-05, "loss": 0.6738, "step": 11289 }, { "epoch": 1.5097619684407597, "grad_norm": 1.150586485862732, "learning_rate": 1.0381056829909522e-05, "loss": 0.7567, "step": 11290 }, { "epoch": 1.5098956940358383, "grad_norm": 1.309959888458252, "learning_rate": 1.0379614127891185e-05, "loss": 0.7024, "step": 11291 }, { "epoch": 1.5100294196309174, "grad_norm": 1.3697991371154785, "learning_rate": 1.0378171417960152e-05, "loss": 0.7617, "step": 11292 }, { "epoch": 1.5101631452259963, "grad_norm": 1.1924794912338257, "learning_rate": 1.03767287001465e-05, "loss": 0.7784, "step": 11293 }, { "epoch": 1.510296870821075, "grad_norm": 1.2200721502304077, "learning_rate": 1.03752859744803e-05, "loss": 0.6579, "step": 11294 }, { "epoch": 1.5104305964161542, "grad_norm": 1.1638315916061401, "learning_rate": 1.037384324099162e-05, "loss": 0.6492, "step": 11295 }, { "epoch": 1.5105643220112328, "grad_norm": 1.2186846733093262, "learning_rate": 1.0372400499710537e-05, "loss": 0.7429, "step": 11296 }, { "epoch": 1.5106980476063119, "grad_norm": 1.2120462656021118, "learning_rate": 1.0370957750667125e-05, "loss": 0.7457, "step": 11297 }, { "epoch": 1.5108317732013907, "grad_norm": 1.1597504615783691, "learning_rate": 1.0369514993891451e-05, "loss": 0.7483, "step": 11298 }, { "epoch": 1.5109654987964696, "grad_norm": 1.1799989938735962, "learning_rate": 1.036807222941359e-05, "loss": 0.6638, "step": 11299 }, { "epoch": 1.5110992243915486, "grad_norm": 1.1815595626831055, "learning_rate": 1.0366629457263616e-05, "loss": 0.6645, "step": 11300 }, { "epoch": 1.5112329499866275, "grad_norm": 1.1958928108215332, "learning_rate": 1.0365186677471598e-05, "loss": 0.6483, "step": 11301 }, { "epoch": 1.5113666755817063, "grad_norm": 1.2273719310760498, "learning_rate": 1.0363743890067621e-05, "loss": 0.6653, "step": 11302 }, { "epoch": 1.5115004011767852, "grad_norm": 1.1292232275009155, "learning_rate": 1.0362301095081746e-05, "loss": 0.6473, "step": 11303 }, { "epoch": 1.511634126771864, "grad_norm": 1.2107740640640259, "learning_rate": 1.0360858292544051e-05, "loss": 0.6732, "step": 11304 }, { "epoch": 1.511767852366943, "grad_norm": 1.2193636894226074, "learning_rate": 1.035941548248461e-05, "loss": 0.7699, "step": 11305 }, { "epoch": 1.511901577962022, "grad_norm": 1.1529028415679932, "learning_rate": 1.03579726649335e-05, "loss": 0.7149, "step": 11306 }, { "epoch": 1.5120353035571008, "grad_norm": 1.3412538766860962, "learning_rate": 1.035652983992079e-05, "loss": 0.7472, "step": 11307 }, { "epoch": 1.5121690291521799, "grad_norm": 1.2334516048431396, "learning_rate": 1.0355087007476558e-05, "loss": 0.734, "step": 11308 }, { "epoch": 1.5123027547472585, "grad_norm": 1.467167615890503, "learning_rate": 1.0353644167630877e-05, "loss": 0.8163, "step": 11309 }, { "epoch": 1.5124364803423376, "grad_norm": 1.1186554431915283, "learning_rate": 1.0352201320413822e-05, "loss": 0.703, "step": 11310 }, { "epoch": 1.5125702059374164, "grad_norm": 1.172777533531189, "learning_rate": 1.0350758465855466e-05, "loss": 0.6771, "step": 11311 }, { "epoch": 1.5127039315324953, "grad_norm": 1.2928880453109741, "learning_rate": 1.0349315603985886e-05, "loss": 0.7665, "step": 11312 }, { "epoch": 1.5128376571275743, "grad_norm": 1.0531476736068726, "learning_rate": 1.0347872734835154e-05, "loss": 0.644, "step": 11313 }, { "epoch": 1.512971382722653, "grad_norm": 1.444922685623169, "learning_rate": 1.0346429858433354e-05, "loss": 0.8218, "step": 11314 }, { "epoch": 1.513105108317732, "grad_norm": 1.168660044670105, "learning_rate": 1.0344986974810549e-05, "loss": 0.6892, "step": 11315 }, { "epoch": 1.5132388339128109, "grad_norm": 1.1563942432403564, "learning_rate": 1.0343544083996824e-05, "loss": 0.6661, "step": 11316 }, { "epoch": 1.5133725595078897, "grad_norm": 1.2931989431381226, "learning_rate": 1.034210118602225e-05, "loss": 0.7088, "step": 11317 }, { "epoch": 1.5135062851029688, "grad_norm": 1.141377329826355, "learning_rate": 1.0340658280916906e-05, "loss": 0.7089, "step": 11318 }, { "epoch": 1.5136400106980477, "grad_norm": 1.3198901414871216, "learning_rate": 1.0339215368710862e-05, "loss": 0.7478, "step": 11319 }, { "epoch": 1.5137737362931265, "grad_norm": 1.1861226558685303, "learning_rate": 1.03377724494342e-05, "loss": 0.6669, "step": 11320 }, { "epoch": 1.5139074618882054, "grad_norm": 1.212786316871643, "learning_rate": 1.0336329523116997e-05, "loss": 0.7096, "step": 11321 }, { "epoch": 1.5140411874832842, "grad_norm": 1.109321117401123, "learning_rate": 1.0334886589789326e-05, "loss": 0.7076, "step": 11322 }, { "epoch": 1.5141749130783633, "grad_norm": 1.0888601541519165, "learning_rate": 1.0333443649481265e-05, "loss": 0.6744, "step": 11323 }, { "epoch": 1.5143086386734421, "grad_norm": 1.210271954536438, "learning_rate": 1.0332000702222889e-05, "loss": 0.7308, "step": 11324 }, { "epoch": 1.514442364268521, "grad_norm": 1.229527235031128, "learning_rate": 1.0330557748044274e-05, "loss": 0.7621, "step": 11325 }, { "epoch": 1.5145760898636, "grad_norm": 1.2199658155441284, "learning_rate": 1.03291147869755e-05, "loss": 0.6836, "step": 11326 }, { "epoch": 1.5147098154586787, "grad_norm": 1.1841131448745728, "learning_rate": 1.0327671819046645e-05, "loss": 0.7241, "step": 11327 }, { "epoch": 1.5148435410537577, "grad_norm": 1.1946063041687012, "learning_rate": 1.0326228844287784e-05, "loss": 0.6895, "step": 11328 }, { "epoch": 1.5149772666488366, "grad_norm": 1.3085778951644897, "learning_rate": 1.0324785862728995e-05, "loss": 0.7038, "step": 11329 }, { "epoch": 1.5151109922439154, "grad_norm": 1.210023045539856, "learning_rate": 1.0323342874400358e-05, "loss": 0.681, "step": 11330 }, { "epoch": 1.5152447178389945, "grad_norm": 1.0165055990219116, "learning_rate": 1.0321899879331942e-05, "loss": 0.6316, "step": 11331 }, { "epoch": 1.5153784434340731, "grad_norm": 1.0894322395324707, "learning_rate": 1.0320456877553833e-05, "loss": 0.6064, "step": 11332 }, { "epoch": 1.5155121690291522, "grad_norm": 1.1646851301193237, "learning_rate": 1.0319013869096109e-05, "loss": 0.6874, "step": 11333 }, { "epoch": 1.515645894624231, "grad_norm": 1.2282353639602661, "learning_rate": 1.0317570853988847e-05, "loss": 0.714, "step": 11334 }, { "epoch": 1.51577962021931, "grad_norm": 1.2134082317352295, "learning_rate": 1.0316127832262124e-05, "loss": 0.7109, "step": 11335 }, { "epoch": 1.515913345814389, "grad_norm": 1.259196400642395, "learning_rate": 1.0314684803946015e-05, "loss": 0.7471, "step": 11336 }, { "epoch": 1.5160470714094678, "grad_norm": 1.0562297105789185, "learning_rate": 1.0313241769070605e-05, "loss": 0.5871, "step": 11337 }, { "epoch": 1.5161807970045467, "grad_norm": 1.2305461168289185, "learning_rate": 1.0311798727665972e-05, "loss": 0.68, "step": 11338 }, { "epoch": 1.5163145225996255, "grad_norm": 1.251670002937317, "learning_rate": 1.031035567976219e-05, "loss": 0.7082, "step": 11339 }, { "epoch": 1.5164482481947044, "grad_norm": 1.1587879657745361, "learning_rate": 1.0308912625389343e-05, "loss": 0.6718, "step": 11340 }, { "epoch": 1.5165819737897834, "grad_norm": 1.1916331052780151, "learning_rate": 1.0307469564577506e-05, "loss": 0.6326, "step": 11341 }, { "epoch": 1.5167156993848623, "grad_norm": 1.2042045593261719, "learning_rate": 1.0306026497356763e-05, "loss": 0.6731, "step": 11342 }, { "epoch": 1.5168494249799411, "grad_norm": 1.3651026487350464, "learning_rate": 1.0304583423757188e-05, "loss": 0.7657, "step": 11343 }, { "epoch": 1.5169831505750202, "grad_norm": 1.346718192100525, "learning_rate": 1.0303140343808865e-05, "loss": 0.7467, "step": 11344 }, { "epoch": 1.5171168761700988, "grad_norm": 1.2971117496490479, "learning_rate": 1.0301697257541867e-05, "loss": 0.6751, "step": 11345 }, { "epoch": 1.517250601765178, "grad_norm": 1.192138910293579, "learning_rate": 1.0300254164986283e-05, "loss": 0.6928, "step": 11346 }, { "epoch": 1.5173843273602567, "grad_norm": 1.2294753789901733, "learning_rate": 1.0298811066172185e-05, "loss": 0.6812, "step": 11347 }, { "epoch": 1.5175180529553356, "grad_norm": 1.3203856945037842, "learning_rate": 1.0297367961129658e-05, "loss": 0.7427, "step": 11348 }, { "epoch": 1.5176517785504147, "grad_norm": 1.2395155429840088, "learning_rate": 1.0295924849888781e-05, "loss": 0.7183, "step": 11349 }, { "epoch": 1.5177855041454933, "grad_norm": 1.1613038778305054, "learning_rate": 1.0294481732479635e-05, "loss": 0.6317, "step": 11350 }, { "epoch": 1.5179192297405724, "grad_norm": 0.9489179849624634, "learning_rate": 1.0293038608932296e-05, "loss": 0.6761, "step": 11351 }, { "epoch": 1.5180529553356512, "grad_norm": 1.1639461517333984, "learning_rate": 1.0291595479276849e-05, "loss": 0.6928, "step": 11352 }, { "epoch": 1.51818668093073, "grad_norm": 1.2916233539581299, "learning_rate": 1.0290152343543372e-05, "loss": 0.7025, "step": 11353 }, { "epoch": 1.5183204065258091, "grad_norm": 1.358557105064392, "learning_rate": 1.0288709201761949e-05, "loss": 0.7098, "step": 11354 }, { "epoch": 1.518454132120888, "grad_norm": 1.143175482749939, "learning_rate": 1.0287266053962657e-05, "loss": 0.6936, "step": 11355 }, { "epoch": 1.5185878577159668, "grad_norm": 1.2170140743255615, "learning_rate": 1.028582290017558e-05, "loss": 0.7027, "step": 11356 }, { "epoch": 1.518721583311046, "grad_norm": 1.3404967784881592, "learning_rate": 1.0284379740430798e-05, "loss": 0.7186, "step": 11357 }, { "epoch": 1.5188553089061245, "grad_norm": 1.2315402030944824, "learning_rate": 1.0282936574758394e-05, "loss": 0.606, "step": 11358 }, { "epoch": 1.5189890345012036, "grad_norm": 1.065169334411621, "learning_rate": 1.0281493403188446e-05, "loss": 0.605, "step": 11359 }, { "epoch": 1.5191227600962824, "grad_norm": 1.1744664907455444, "learning_rate": 1.0280050225751036e-05, "loss": 0.6978, "step": 11360 }, { "epoch": 1.5192564856913613, "grad_norm": 1.383623480796814, "learning_rate": 1.027860704247625e-05, "loss": 0.7485, "step": 11361 }, { "epoch": 1.5193902112864404, "grad_norm": 1.2623125314712524, "learning_rate": 1.0277163853394166e-05, "loss": 0.6846, "step": 11362 }, { "epoch": 1.519523936881519, "grad_norm": 1.282300353050232, "learning_rate": 1.0275720658534867e-05, "loss": 0.7395, "step": 11363 }, { "epoch": 1.519657662476598, "grad_norm": 1.125113606452942, "learning_rate": 1.027427745792843e-05, "loss": 0.6709, "step": 11364 }, { "epoch": 1.519791388071677, "grad_norm": 1.3038486242294312, "learning_rate": 1.0272834251604946e-05, "loss": 0.689, "step": 11365 }, { "epoch": 1.5199251136667558, "grad_norm": 1.2768163681030273, "learning_rate": 1.0271391039594496e-05, "loss": 0.7537, "step": 11366 }, { "epoch": 1.5200588392618348, "grad_norm": 1.3322765827178955, "learning_rate": 1.0269947821927155e-05, "loss": 0.7245, "step": 11367 }, { "epoch": 1.5201925648569135, "grad_norm": 1.2983310222625732, "learning_rate": 1.0268504598633011e-05, "loss": 0.6865, "step": 11368 }, { "epoch": 1.5203262904519925, "grad_norm": 1.180198073387146, "learning_rate": 1.0267061369742147e-05, "loss": 0.7442, "step": 11369 }, { "epoch": 1.5204600160470714, "grad_norm": 1.1388121843338013, "learning_rate": 1.0265618135284643e-05, "loss": 0.6727, "step": 11370 }, { "epoch": 1.5205937416421502, "grad_norm": 1.09035325050354, "learning_rate": 1.0264174895290582e-05, "loss": 0.7221, "step": 11371 }, { "epoch": 1.5207274672372293, "grad_norm": 1.20558500289917, "learning_rate": 1.026273164979005e-05, "loss": 0.7649, "step": 11372 }, { "epoch": 1.5208611928323081, "grad_norm": 1.1587101221084595, "learning_rate": 1.0261288398813127e-05, "loss": 0.6119, "step": 11373 }, { "epoch": 1.520994918427387, "grad_norm": 1.2932541370391846, "learning_rate": 1.0259845142389899e-05, "loss": 0.718, "step": 11374 }, { "epoch": 1.521128644022466, "grad_norm": 1.1788967847824097, "learning_rate": 1.0258401880550449e-05, "loss": 0.6807, "step": 11375 }, { "epoch": 1.5212623696175447, "grad_norm": 1.197046160697937, "learning_rate": 1.0256958613324855e-05, "loss": 0.6778, "step": 11376 }, { "epoch": 1.5213960952126238, "grad_norm": 1.2200101613998413, "learning_rate": 1.0255515340743206e-05, "loss": 0.7254, "step": 11377 }, { "epoch": 1.5215298208077026, "grad_norm": 1.1235463619232178, "learning_rate": 1.0254072062835585e-05, "loss": 0.6933, "step": 11378 }, { "epoch": 1.5216635464027815, "grad_norm": 1.2488973140716553, "learning_rate": 1.0252628779632075e-05, "loss": 0.7206, "step": 11379 }, { "epoch": 1.5217972719978605, "grad_norm": 1.1184589862823486, "learning_rate": 1.0251185491162758e-05, "loss": 0.7618, "step": 11380 }, { "epoch": 1.5219309975929391, "grad_norm": 1.1048762798309326, "learning_rate": 1.0249742197457721e-05, "loss": 0.7185, "step": 11381 }, { "epoch": 1.5220647231880182, "grad_norm": 1.2889329195022583, "learning_rate": 1.024829889854705e-05, "loss": 0.7089, "step": 11382 }, { "epoch": 1.522198448783097, "grad_norm": 1.1219260692596436, "learning_rate": 1.0246855594460818e-05, "loss": 0.6896, "step": 11383 }, { "epoch": 1.522332174378176, "grad_norm": 1.1798728704452515, "learning_rate": 1.0245412285229124e-05, "loss": 0.6549, "step": 11384 }, { "epoch": 1.522465899973255, "grad_norm": 1.2250559329986572, "learning_rate": 1.0243968970882044e-05, "loss": 0.6993, "step": 11385 }, { "epoch": 1.5225996255683336, "grad_norm": 1.2025673389434814, "learning_rate": 1.0242525651449664e-05, "loss": 0.5716, "step": 11386 }, { "epoch": 1.5227333511634127, "grad_norm": 1.1163078546524048, "learning_rate": 1.024108232696207e-05, "loss": 0.6242, "step": 11387 }, { "epoch": 1.5228670767584915, "grad_norm": 1.3289074897766113, "learning_rate": 1.0239638997449346e-05, "loss": 0.78, "step": 11388 }, { "epoch": 1.5230008023535704, "grad_norm": 1.1160694360733032, "learning_rate": 1.0238195662941574e-05, "loss": 0.7035, "step": 11389 }, { "epoch": 1.5231345279486495, "grad_norm": 1.428734302520752, "learning_rate": 1.0236752323468844e-05, "loss": 0.7427, "step": 11390 }, { "epoch": 1.5232682535437283, "grad_norm": 1.2779194116592407, "learning_rate": 1.0235308979061235e-05, "loss": 0.7423, "step": 11391 }, { "epoch": 1.5234019791388071, "grad_norm": 1.2424854040145874, "learning_rate": 1.0233865629748838e-05, "loss": 0.7376, "step": 11392 }, { "epoch": 1.5235357047338862, "grad_norm": 1.4598060846328735, "learning_rate": 1.0232422275561735e-05, "loss": 0.8003, "step": 11393 }, { "epoch": 1.5236694303289648, "grad_norm": 1.2185792922973633, "learning_rate": 1.0230978916530012e-05, "loss": 0.6988, "step": 11394 }, { "epoch": 1.523803155924044, "grad_norm": 1.132039189338684, "learning_rate": 1.0229535552683757e-05, "loss": 0.6794, "step": 11395 }, { "epoch": 1.5239368815191228, "grad_norm": 1.1940776109695435, "learning_rate": 1.022809218405305e-05, "loss": 0.6695, "step": 11396 }, { "epoch": 1.5240706071142016, "grad_norm": 1.2174535989761353, "learning_rate": 1.0226648810667979e-05, "loss": 0.7013, "step": 11397 }, { "epoch": 1.5242043327092807, "grad_norm": 1.1812546253204346, "learning_rate": 1.0225205432558632e-05, "loss": 0.7185, "step": 11398 }, { "epoch": 1.5243380583043593, "grad_norm": 1.2069307565689087, "learning_rate": 1.0223762049755094e-05, "loss": 0.7006, "step": 11399 }, { "epoch": 1.5244717838994384, "grad_norm": 1.1613616943359375, "learning_rate": 1.022231866228745e-05, "loss": 0.6886, "step": 11400 }, { "epoch": 1.5246055094945172, "grad_norm": 1.323214054107666, "learning_rate": 1.0220875270185784e-05, "loss": 0.6913, "step": 11401 }, { "epoch": 1.524739235089596, "grad_norm": 1.2059725522994995, "learning_rate": 1.0219431873480186e-05, "loss": 0.7929, "step": 11402 }, { "epoch": 1.5248729606846751, "grad_norm": 1.0640259981155396, "learning_rate": 1.0217988472200739e-05, "loss": 0.6674, "step": 11403 }, { "epoch": 1.525006686279754, "grad_norm": 1.3941439390182495, "learning_rate": 1.0216545066377535e-05, "loss": 0.7135, "step": 11404 }, { "epoch": 1.5251404118748328, "grad_norm": 1.2665691375732422, "learning_rate": 1.021510165604065e-05, "loss": 0.7613, "step": 11405 }, { "epoch": 1.5252741374699117, "grad_norm": 1.200862169265747, "learning_rate": 1.0213658241220181e-05, "loss": 0.6778, "step": 11406 }, { "epoch": 1.5254078630649905, "grad_norm": 1.3196154832839966, "learning_rate": 1.0212214821946213e-05, "loss": 0.709, "step": 11407 }, { "epoch": 1.5255415886600696, "grad_norm": 1.344474196434021, "learning_rate": 1.0210771398248826e-05, "loss": 0.7834, "step": 11408 }, { "epoch": 1.5256753142551485, "grad_norm": 1.2215858697891235, "learning_rate": 1.0209327970158113e-05, "loss": 0.6684, "step": 11409 }, { "epoch": 1.5258090398502273, "grad_norm": 1.2536499500274658, "learning_rate": 1.0207884537704156e-05, "loss": 0.7324, "step": 11410 }, { "epoch": 1.5259427654453064, "grad_norm": 1.2801861763000488, "learning_rate": 1.0206441100917049e-05, "loss": 0.7062, "step": 11411 }, { "epoch": 1.526076491040385, "grad_norm": 1.1119688749313354, "learning_rate": 1.020499765982687e-05, "loss": 0.5824, "step": 11412 }, { "epoch": 1.526210216635464, "grad_norm": 1.2409217357635498, "learning_rate": 1.0203554214463713e-05, "loss": 0.7377, "step": 11413 }, { "epoch": 1.526343942230543, "grad_norm": 1.249873399734497, "learning_rate": 1.0202110764857662e-05, "loss": 0.7202, "step": 11414 }, { "epoch": 1.5264776678256218, "grad_norm": 1.2186199426651, "learning_rate": 1.0200667311038808e-05, "loss": 0.7442, "step": 11415 }, { "epoch": 1.5266113934207008, "grad_norm": 1.3245911598205566, "learning_rate": 1.0199223853037235e-05, "loss": 0.78, "step": 11416 }, { "epoch": 1.5267451190157795, "grad_norm": 1.1203274726867676, "learning_rate": 1.019778039088303e-05, "loss": 0.6544, "step": 11417 }, { "epoch": 1.5268788446108585, "grad_norm": 1.1356606483459473, "learning_rate": 1.0196336924606282e-05, "loss": 0.7151, "step": 11418 }, { "epoch": 1.5270125702059374, "grad_norm": 1.1675573587417603, "learning_rate": 1.0194893454237082e-05, "loss": 0.685, "step": 11419 }, { "epoch": 1.5271462958010162, "grad_norm": 1.2476993799209595, "learning_rate": 1.0193449979805515e-05, "loss": 0.6771, "step": 11420 }, { "epoch": 1.5272800213960953, "grad_norm": 1.2121187448501587, "learning_rate": 1.0192006501341664e-05, "loss": 0.6832, "step": 11421 }, { "epoch": 1.5274137469911742, "grad_norm": 1.1831876039505005, "learning_rate": 1.0190563018875623e-05, "loss": 0.7506, "step": 11422 }, { "epoch": 1.527547472586253, "grad_norm": 1.2036285400390625, "learning_rate": 1.0189119532437478e-05, "loss": 0.698, "step": 11423 }, { "epoch": 1.5276811981813319, "grad_norm": 1.2253434658050537, "learning_rate": 1.0187676042057315e-05, "loss": 0.683, "step": 11424 }, { "epoch": 1.5278149237764107, "grad_norm": 1.1600944995880127, "learning_rate": 1.0186232547765226e-05, "loss": 0.6751, "step": 11425 }, { "epoch": 1.5279486493714898, "grad_norm": 1.194593906402588, "learning_rate": 1.01847890495913e-05, "loss": 0.6336, "step": 11426 }, { "epoch": 1.5280823749665686, "grad_norm": 1.280401587486267, "learning_rate": 1.0183345547565624e-05, "loss": 0.6623, "step": 11427 }, { "epoch": 1.5282161005616475, "grad_norm": 1.1808527708053589, "learning_rate": 1.0181902041718284e-05, "loss": 0.7071, "step": 11428 }, { "epoch": 1.5283498261567265, "grad_norm": 1.059228539466858, "learning_rate": 1.0180458532079365e-05, "loss": 0.6334, "step": 11429 }, { "epoch": 1.5284835517518052, "grad_norm": 1.0922168493270874, "learning_rate": 1.0179015018678963e-05, "loss": 0.6562, "step": 11430 }, { "epoch": 1.5286172773468842, "grad_norm": 1.0760000944137573, "learning_rate": 1.017757150154717e-05, "loss": 0.6996, "step": 11431 }, { "epoch": 1.528751002941963, "grad_norm": 1.1737550497055054, "learning_rate": 1.0176127980714063e-05, "loss": 0.6616, "step": 11432 }, { "epoch": 1.528884728537042, "grad_norm": 1.1612838506698608, "learning_rate": 1.017468445620974e-05, "loss": 0.7044, "step": 11433 }, { "epoch": 1.529018454132121, "grad_norm": 1.2250031232833862, "learning_rate": 1.0173240928064285e-05, "loss": 0.6748, "step": 11434 }, { "epoch": 1.5291521797271996, "grad_norm": 1.104472279548645, "learning_rate": 1.017179739630779e-05, "loss": 0.6837, "step": 11435 }, { "epoch": 1.5292859053222787, "grad_norm": 1.2426345348358154, "learning_rate": 1.017035386097034e-05, "loss": 0.72, "step": 11436 }, { "epoch": 1.5294196309173576, "grad_norm": 1.2250365018844604, "learning_rate": 1.0168910322082028e-05, "loss": 0.7262, "step": 11437 }, { "epoch": 1.5295533565124364, "grad_norm": 1.3105405569076538, "learning_rate": 1.0167466779672943e-05, "loss": 0.7231, "step": 11438 }, { "epoch": 1.5296870821075155, "grad_norm": 1.1340572834014893, "learning_rate": 1.0166023233773174e-05, "loss": 0.6631, "step": 11439 }, { "epoch": 1.5298208077025943, "grad_norm": 1.0655606985092163, "learning_rate": 1.0164579684412808e-05, "loss": 0.6839, "step": 11440 }, { "epoch": 1.5299545332976732, "grad_norm": 1.2457494735717773, "learning_rate": 1.0163136131621937e-05, "loss": 0.7004, "step": 11441 }, { "epoch": 1.530088258892752, "grad_norm": 1.3896231651306152, "learning_rate": 1.0161692575430646e-05, "loss": 0.8023, "step": 11442 }, { "epoch": 1.5302219844878309, "grad_norm": 1.2457791566848755, "learning_rate": 1.0160249015869032e-05, "loss": 0.7615, "step": 11443 }, { "epoch": 1.53035571008291, "grad_norm": 1.131152629852295, "learning_rate": 1.015880545296718e-05, "loss": 0.6902, "step": 11444 }, { "epoch": 1.5304894356779888, "grad_norm": 1.2113062143325806, "learning_rate": 1.0157361886755178e-05, "loss": 0.7562, "step": 11445 }, { "epoch": 1.5306231612730676, "grad_norm": 1.165136694908142, "learning_rate": 1.015591831726312e-05, "loss": 0.6607, "step": 11446 }, { "epoch": 1.5307568868681467, "grad_norm": 1.2244077920913696, "learning_rate": 1.0154474744521094e-05, "loss": 0.6691, "step": 11447 }, { "epoch": 1.5308906124632253, "grad_norm": 1.0597703456878662, "learning_rate": 1.0153031168559188e-05, "loss": 0.6248, "step": 11448 }, { "epoch": 1.5310243380583044, "grad_norm": 1.0311192274093628, "learning_rate": 1.0151587589407494e-05, "loss": 0.7019, "step": 11449 }, { "epoch": 1.5311580636533833, "grad_norm": 1.1861071586608887, "learning_rate": 1.0150144007096103e-05, "loss": 0.6998, "step": 11450 }, { "epoch": 1.531291789248462, "grad_norm": 1.2244356870651245, "learning_rate": 1.0148700421655105e-05, "loss": 0.6595, "step": 11451 }, { "epoch": 1.5314255148435412, "grad_norm": 1.0164098739624023, "learning_rate": 1.0147256833114586e-05, "loss": 0.6291, "step": 11452 }, { "epoch": 1.5315592404386198, "grad_norm": 1.1831355094909668, "learning_rate": 1.0145813241504642e-05, "loss": 0.67, "step": 11453 }, { "epoch": 1.5316929660336989, "grad_norm": 1.118692398071289, "learning_rate": 1.014436964685536e-05, "loss": 0.6139, "step": 11454 }, { "epoch": 1.5318266916287777, "grad_norm": 1.1892640590667725, "learning_rate": 1.0142926049196829e-05, "loss": 0.6728, "step": 11455 }, { "epoch": 1.5319604172238566, "grad_norm": 1.2539464235305786, "learning_rate": 1.0141482448559142e-05, "loss": 0.7006, "step": 11456 }, { "epoch": 1.5320941428189356, "grad_norm": 1.0876903533935547, "learning_rate": 1.0140038844972389e-05, "loss": 0.6157, "step": 11457 }, { "epoch": 1.5322278684140145, "grad_norm": 1.339532732963562, "learning_rate": 1.0138595238466659e-05, "loss": 0.793, "step": 11458 }, { "epoch": 1.5323615940090933, "grad_norm": 1.134891152381897, "learning_rate": 1.0137151629072049e-05, "loss": 0.6923, "step": 11459 }, { "epoch": 1.5324953196041724, "grad_norm": 1.2790517807006836, "learning_rate": 1.013570801681864e-05, "loss": 0.6954, "step": 11460 }, { "epoch": 1.532629045199251, "grad_norm": 1.1411367654800415, "learning_rate": 1.0134264401736526e-05, "loss": 0.6371, "step": 11461 }, { "epoch": 1.53276277079433, "grad_norm": 1.201743245124817, "learning_rate": 1.0132820783855801e-05, "loss": 0.6875, "step": 11462 }, { "epoch": 1.532896496389409, "grad_norm": 1.15653395652771, "learning_rate": 1.0131377163206555e-05, "loss": 0.6821, "step": 11463 }, { "epoch": 1.5330302219844878, "grad_norm": 1.2586740255355835, "learning_rate": 1.0129933539818878e-05, "loss": 0.6963, "step": 11464 }, { "epoch": 1.5331639475795669, "grad_norm": 1.1018822193145752, "learning_rate": 1.012848991372286e-05, "loss": 0.7236, "step": 11465 }, { "epoch": 1.5332976731746455, "grad_norm": 1.1289311647415161, "learning_rate": 1.012704628494859e-05, "loss": 0.7039, "step": 11466 }, { "epoch": 1.5334313987697246, "grad_norm": 1.230009913444519, "learning_rate": 1.0125602653526164e-05, "loss": 0.7023, "step": 11467 }, { "epoch": 1.5335651243648034, "grad_norm": 1.1485594511032104, "learning_rate": 1.012415901948567e-05, "loss": 0.6821, "step": 11468 }, { "epoch": 1.5336988499598823, "grad_norm": 1.136791467666626, "learning_rate": 1.01227153828572e-05, "loss": 0.6523, "step": 11469 }, { "epoch": 1.5338325755549613, "grad_norm": 1.3130242824554443, "learning_rate": 1.0121271743670846e-05, "loss": 0.7133, "step": 11470 }, { "epoch": 1.53396630115004, "grad_norm": 1.3219910860061646, "learning_rate": 1.01198281019567e-05, "loss": 0.773, "step": 11471 }, { "epoch": 1.534100026745119, "grad_norm": 1.1638140678405762, "learning_rate": 1.011838445774485e-05, "loss": 0.6669, "step": 11472 }, { "epoch": 1.5342337523401979, "grad_norm": 1.1685967445373535, "learning_rate": 1.011694081106539e-05, "loss": 0.6424, "step": 11473 }, { "epoch": 1.5343674779352767, "grad_norm": 1.2603915929794312, "learning_rate": 1.0115497161948409e-05, "loss": 0.6351, "step": 11474 }, { "epoch": 1.5345012035303558, "grad_norm": 1.1106369495391846, "learning_rate": 1.0114053510424e-05, "loss": 0.6909, "step": 11475 }, { "epoch": 1.5346349291254346, "grad_norm": 1.3990757465362549, "learning_rate": 1.0112609856522259e-05, "loss": 0.7292, "step": 11476 }, { "epoch": 1.5347686547205135, "grad_norm": 1.2309085130691528, "learning_rate": 1.011116620027327e-05, "loss": 0.6883, "step": 11477 }, { "epoch": 1.5349023803155926, "grad_norm": 1.2403303384780884, "learning_rate": 1.0109722541707127e-05, "loss": 0.7455, "step": 11478 }, { "epoch": 1.5350361059106712, "grad_norm": 1.1004066467285156, "learning_rate": 1.0108278880853925e-05, "loss": 0.5785, "step": 11479 }, { "epoch": 1.5351698315057503, "grad_norm": 1.2963147163391113, "learning_rate": 1.0106835217743753e-05, "loss": 0.7561, "step": 11480 }, { "epoch": 1.535303557100829, "grad_norm": 1.3116780519485474, "learning_rate": 1.0105391552406703e-05, "loss": 0.8128, "step": 11481 }, { "epoch": 1.535437282695908, "grad_norm": 1.16659414768219, "learning_rate": 1.0103947884872865e-05, "loss": 0.6896, "step": 11482 }, { "epoch": 1.535571008290987, "grad_norm": 1.237070918083191, "learning_rate": 1.0102504215172335e-05, "loss": 0.6757, "step": 11483 }, { "epoch": 1.5357047338860657, "grad_norm": 1.2002946138381958, "learning_rate": 1.0101060543335204e-05, "loss": 0.7349, "step": 11484 }, { "epoch": 1.5358384594811447, "grad_norm": 1.471403956413269, "learning_rate": 1.009961686939156e-05, "loss": 0.7396, "step": 11485 }, { "epoch": 1.5359721850762236, "grad_norm": 1.091191053390503, "learning_rate": 1.0098173193371498e-05, "loss": 0.6589, "step": 11486 }, { "epoch": 1.5361059106713024, "grad_norm": 1.2951558828353882, "learning_rate": 1.0096729515305108e-05, "loss": 0.7205, "step": 11487 }, { "epoch": 1.5362396362663815, "grad_norm": 1.0998430252075195, "learning_rate": 1.0095285835222488e-05, "loss": 0.6545, "step": 11488 }, { "epoch": 1.5363733618614601, "grad_norm": 1.2637847661972046, "learning_rate": 1.0093842153153723e-05, "loss": 0.7418, "step": 11489 }, { "epoch": 1.5365070874565392, "grad_norm": 1.2122328281402588, "learning_rate": 1.009239846912891e-05, "loss": 0.6821, "step": 11490 }, { "epoch": 1.536640813051618, "grad_norm": 1.2284464836120605, "learning_rate": 1.0090954783178137e-05, "loss": 0.7007, "step": 11491 }, { "epoch": 1.5367745386466969, "grad_norm": 1.08121919631958, "learning_rate": 1.00895110953315e-05, "loss": 0.6402, "step": 11492 }, { "epoch": 1.536908264241776, "grad_norm": 1.3623188734054565, "learning_rate": 1.0088067405619088e-05, "loss": 0.6675, "step": 11493 }, { "epoch": 1.5370419898368548, "grad_norm": 1.1458266973495483, "learning_rate": 1.0086623714070998e-05, "loss": 0.621, "step": 11494 }, { "epoch": 1.5371757154319337, "grad_norm": 1.2444700002670288, "learning_rate": 1.0085180020717318e-05, "loss": 0.6663, "step": 11495 }, { "epoch": 1.5373094410270127, "grad_norm": 1.1638400554656982, "learning_rate": 1.0083736325588145e-05, "loss": 0.7342, "step": 11496 }, { "epoch": 1.5374431666220914, "grad_norm": 1.234995722770691, "learning_rate": 1.0082292628713566e-05, "loss": 0.6049, "step": 11497 }, { "epoch": 1.5375768922171704, "grad_norm": 1.044304370880127, "learning_rate": 1.0080848930123674e-05, "loss": 0.6277, "step": 11498 }, { "epoch": 1.5377106178122493, "grad_norm": 1.1334415674209595, "learning_rate": 1.0079405229848566e-05, "loss": 0.6488, "step": 11499 }, { "epoch": 1.5378443434073281, "grad_norm": 1.270363211631775, "learning_rate": 1.0077961527918332e-05, "loss": 0.727, "step": 11500 }, { "epoch": 1.5379780690024072, "grad_norm": 1.2057033777236938, "learning_rate": 1.0076517824363063e-05, "loss": 0.7331, "step": 11501 }, { "epoch": 1.5381117945974858, "grad_norm": 1.27130925655365, "learning_rate": 1.0075074119212854e-05, "loss": 0.6078, "step": 11502 }, { "epoch": 1.5382455201925649, "grad_norm": 1.2937345504760742, "learning_rate": 1.0073630412497796e-05, "loss": 0.7093, "step": 11503 }, { "epoch": 1.5383792457876437, "grad_norm": 1.169643521308899, "learning_rate": 1.0072186704247987e-05, "loss": 0.665, "step": 11504 }, { "epoch": 1.5385129713827226, "grad_norm": 1.4491045475006104, "learning_rate": 1.007074299449351e-05, "loss": 0.7799, "step": 11505 }, { "epoch": 1.5386466969778017, "grad_norm": 1.3527846336364746, "learning_rate": 1.0069299283264463e-05, "loss": 0.7786, "step": 11506 }, { "epoch": 1.5387804225728805, "grad_norm": 1.1357488632202148, "learning_rate": 1.0067855570590939e-05, "loss": 0.6825, "step": 11507 }, { "epoch": 1.5389141481679594, "grad_norm": 1.1992802619934082, "learning_rate": 1.0066411856503034e-05, "loss": 0.645, "step": 11508 }, { "epoch": 1.5390478737630382, "grad_norm": 1.3136249780654907, "learning_rate": 1.0064968141030835e-05, "loss": 0.72, "step": 11509 }, { "epoch": 1.539181599358117, "grad_norm": 1.270641565322876, "learning_rate": 1.0063524424204436e-05, "loss": 0.7072, "step": 11510 }, { "epoch": 1.5393153249531961, "grad_norm": 1.1993701457977295, "learning_rate": 1.0062080706053934e-05, "loss": 0.7524, "step": 11511 }, { "epoch": 1.539449050548275, "grad_norm": 1.142477035522461, "learning_rate": 1.0060636986609418e-05, "loss": 0.7499, "step": 11512 }, { "epoch": 1.5395827761433538, "grad_norm": 1.2433645725250244, "learning_rate": 1.005919326590098e-05, "loss": 0.7129, "step": 11513 }, { "epoch": 1.5397165017384329, "grad_norm": 1.2316704988479614, "learning_rate": 1.0057749543958717e-05, "loss": 0.7215, "step": 11514 }, { "epoch": 1.5398502273335115, "grad_norm": 1.141312599182129, "learning_rate": 1.005630582081272e-05, "loss": 0.6057, "step": 11515 }, { "epoch": 1.5399839529285906, "grad_norm": 1.1960495710372925, "learning_rate": 1.0054862096493084e-05, "loss": 0.7404, "step": 11516 }, { "epoch": 1.5401176785236694, "grad_norm": 1.1864585876464844, "learning_rate": 1.0053418371029898e-05, "loss": 0.7354, "step": 11517 }, { "epoch": 1.5402514041187483, "grad_norm": 1.1143370866775513, "learning_rate": 1.0051974644453255e-05, "loss": 0.7188, "step": 11518 }, { "epoch": 1.5403851297138274, "grad_norm": 1.1594401597976685, "learning_rate": 1.0050530916793253e-05, "loss": 0.6594, "step": 11519 }, { "epoch": 1.540518855308906, "grad_norm": 1.2512022256851196, "learning_rate": 1.0049087188079983e-05, "loss": 0.6958, "step": 11520 }, { "epoch": 1.540652580903985, "grad_norm": 1.1580619812011719, "learning_rate": 1.0047643458343534e-05, "loss": 0.6753, "step": 11521 }, { "epoch": 1.540786306499064, "grad_norm": 1.082715630531311, "learning_rate": 1.0046199727614005e-05, "loss": 0.7697, "step": 11522 }, { "epoch": 1.5409200320941427, "grad_norm": 1.0970429182052612, "learning_rate": 1.0044755995921488e-05, "loss": 0.6974, "step": 11523 }, { "epoch": 1.5410537576892218, "grad_norm": 1.1509534120559692, "learning_rate": 1.0043312263296074e-05, "loss": 0.6377, "step": 11524 }, { "epoch": 1.5411874832843007, "grad_norm": 1.2446962594985962, "learning_rate": 1.0041868529767855e-05, "loss": 0.7892, "step": 11525 }, { "epoch": 1.5413212088793795, "grad_norm": 1.3933978080749512, "learning_rate": 1.004042479536693e-05, "loss": 0.7052, "step": 11526 }, { "epoch": 1.5414549344744584, "grad_norm": 1.1709096431732178, "learning_rate": 1.0038981060123388e-05, "loss": 0.6689, "step": 11527 }, { "epoch": 1.5415886600695372, "grad_norm": 1.1704374551773071, "learning_rate": 1.0037537324067324e-05, "loss": 0.6476, "step": 11528 }, { "epoch": 1.5417223856646163, "grad_norm": 1.219169020652771, "learning_rate": 1.0036093587228828e-05, "loss": 0.7775, "step": 11529 }, { "epoch": 1.5418561112596951, "grad_norm": 1.2217094898223877, "learning_rate": 1.0034649849637998e-05, "loss": 0.6248, "step": 11530 }, { "epoch": 1.541989836854774, "grad_norm": 1.1808208227157593, "learning_rate": 1.0033206111324922e-05, "loss": 0.6888, "step": 11531 }, { "epoch": 1.542123562449853, "grad_norm": 1.1540546417236328, "learning_rate": 1.00317623723197e-05, "loss": 0.6933, "step": 11532 }, { "epoch": 1.5422572880449317, "grad_norm": 1.1816272735595703, "learning_rate": 1.0030318632652419e-05, "loss": 0.7306, "step": 11533 }, { "epoch": 1.5423910136400107, "grad_norm": 1.2176556587219238, "learning_rate": 1.0028874892353176e-05, "loss": 0.6833, "step": 11534 }, { "epoch": 1.5425247392350896, "grad_norm": 1.2748459577560425, "learning_rate": 1.0027431151452062e-05, "loss": 0.7066, "step": 11535 }, { "epoch": 1.5426584648301684, "grad_norm": 1.2358193397521973, "learning_rate": 1.0025987409979176e-05, "loss": 0.6956, "step": 11536 }, { "epoch": 1.5427921904252475, "grad_norm": 1.1027051210403442, "learning_rate": 1.0024543667964605e-05, "loss": 0.6108, "step": 11537 }, { "epoch": 1.5429259160203261, "grad_norm": 1.249271273612976, "learning_rate": 1.0023099925438441e-05, "loss": 0.6892, "step": 11538 }, { "epoch": 1.5430596416154052, "grad_norm": 1.171519160270691, "learning_rate": 1.0021656182430785e-05, "loss": 0.7072, "step": 11539 }, { "epoch": 1.543193367210484, "grad_norm": 1.14243745803833, "learning_rate": 1.002021243897173e-05, "loss": 0.6914, "step": 11540 }, { "epoch": 1.543327092805563, "grad_norm": 1.2589973211288452, "learning_rate": 1.0018768695091361e-05, "loss": 0.6915, "step": 11541 }, { "epoch": 1.543460818400642, "grad_norm": 1.0428980588912964, "learning_rate": 1.0017324950819778e-05, "loss": 0.6631, "step": 11542 }, { "epoch": 1.5435945439957208, "grad_norm": 1.3954237699508667, "learning_rate": 1.0015881206187072e-05, "loss": 0.7164, "step": 11543 }, { "epoch": 1.5437282695907997, "grad_norm": 1.211290955543518, "learning_rate": 1.001443746122334e-05, "loss": 0.7472, "step": 11544 }, { "epoch": 1.5438619951858785, "grad_norm": 1.1888172626495361, "learning_rate": 1.001299371595867e-05, "loss": 0.705, "step": 11545 }, { "epoch": 1.5439957207809574, "grad_norm": 1.1781798601150513, "learning_rate": 1.001154997042316e-05, "loss": 0.6068, "step": 11546 }, { "epoch": 1.5441294463760364, "grad_norm": 1.1936330795288086, "learning_rate": 1.0010106224646901e-05, "loss": 0.7251, "step": 11547 }, { "epoch": 1.5442631719711153, "grad_norm": 1.3079208135604858, "learning_rate": 1.000866247865999e-05, "loss": 0.7275, "step": 11548 }, { "epoch": 1.5443968975661941, "grad_norm": 1.2139668464660645, "learning_rate": 1.0007218732492516e-05, "loss": 0.7367, "step": 11549 }, { "epoch": 1.5445306231612732, "grad_norm": 1.2707051038742065, "learning_rate": 1.0005774986174574e-05, "loss": 0.7236, "step": 11550 }, { "epoch": 1.5446643487563518, "grad_norm": 1.3258891105651855, "learning_rate": 1.0004331239736258e-05, "loss": 0.7706, "step": 11551 }, { "epoch": 1.544798074351431, "grad_norm": 1.2156256437301636, "learning_rate": 1.0002887493207663e-05, "loss": 0.7538, "step": 11552 }, { "epoch": 1.5449317999465098, "grad_norm": 0.9964362978935242, "learning_rate": 1.0001443746618877e-05, "loss": 0.6632, "step": 11553 }, { "epoch": 1.5450655255415886, "grad_norm": 1.1825547218322754, "learning_rate": 1e-05, "loss": 0.6294, "step": 11554 }, { "epoch": 1.5451992511366677, "grad_norm": 1.1713074445724487, "learning_rate": 9.998556253381127e-06, "loss": 0.6623, "step": 11555 }, { "epoch": 1.5453329767317463, "grad_norm": 1.274376630783081, "learning_rate": 9.99711250679234e-06, "loss": 0.7194, "step": 11556 }, { "epoch": 1.5454667023268254, "grad_norm": 1.1717119216918945, "learning_rate": 9.995668760263745e-06, "loss": 0.6939, "step": 11557 }, { "epoch": 1.5456004279219042, "grad_norm": 1.1790015697479248, "learning_rate": 9.994225013825428e-06, "loss": 0.6985, "step": 11558 }, { "epoch": 1.545734153516983, "grad_norm": 1.174066424369812, "learning_rate": 9.992781267507487e-06, "loss": 0.709, "step": 11559 }, { "epoch": 1.5458678791120621, "grad_norm": 1.3035436868667603, "learning_rate": 9.991337521340014e-06, "loss": 0.7265, "step": 11560 }, { "epoch": 1.546001604707141, "grad_norm": 1.1840989589691162, "learning_rate": 9.989893775353099e-06, "loss": 0.6961, "step": 11561 }, { "epoch": 1.5461353303022198, "grad_norm": 1.2567524909973145, "learning_rate": 9.988450029576843e-06, "loss": 0.7475, "step": 11562 }, { "epoch": 1.546269055897299, "grad_norm": 1.2457315921783447, "learning_rate": 9.987006284041332e-06, "loss": 0.7107, "step": 11563 }, { "epoch": 1.5464027814923775, "grad_norm": 1.3733962774276733, "learning_rate": 9.985562538776662e-06, "loss": 0.7192, "step": 11564 }, { "epoch": 1.5465365070874566, "grad_norm": 1.271192193031311, "learning_rate": 9.98411879381293e-06, "loss": 0.6882, "step": 11565 }, { "epoch": 1.5466702326825355, "grad_norm": 1.017574667930603, "learning_rate": 9.982675049180222e-06, "loss": 0.6213, "step": 11566 }, { "epoch": 1.5468039582776143, "grad_norm": 1.3721671104431152, "learning_rate": 9.98123130490864e-06, "loss": 0.7962, "step": 11567 }, { "epoch": 1.5469376838726934, "grad_norm": 1.2679362297058105, "learning_rate": 9.979787561028276e-06, "loss": 0.7386, "step": 11568 }, { "epoch": 1.547071409467772, "grad_norm": 1.199182152748108, "learning_rate": 9.978343817569214e-06, "loss": 0.7775, "step": 11569 }, { "epoch": 1.547205135062851, "grad_norm": 1.2453573942184448, "learning_rate": 9.97690007456156e-06, "loss": 0.6634, "step": 11570 }, { "epoch": 1.54733886065793, "grad_norm": 1.3124717473983765, "learning_rate": 9.975456332035398e-06, "loss": 0.7324, "step": 11571 }, { "epoch": 1.5474725862530088, "grad_norm": 1.2797060012817383, "learning_rate": 9.974012590020826e-06, "loss": 0.7358, "step": 11572 }, { "epoch": 1.5476063118480878, "grad_norm": 1.3853349685668945, "learning_rate": 9.97256884854794e-06, "loss": 0.6429, "step": 11573 }, { "epoch": 1.5477400374431665, "grad_norm": 1.2257990837097168, "learning_rate": 9.971125107646826e-06, "loss": 0.6492, "step": 11574 }, { "epoch": 1.5478737630382455, "grad_norm": 1.1092944145202637, "learning_rate": 9.969681367347583e-06, "loss": 0.6366, "step": 11575 }, { "epoch": 1.5480074886333244, "grad_norm": 1.2266744375228882, "learning_rate": 9.968237627680305e-06, "loss": 0.7186, "step": 11576 }, { "epoch": 1.5481412142284032, "grad_norm": 1.3109180927276611, "learning_rate": 9.96679388867508e-06, "loss": 0.6995, "step": 11577 }, { "epoch": 1.5482749398234823, "grad_norm": 1.1613661050796509, "learning_rate": 9.965350150362005e-06, "loss": 0.7348, "step": 11578 }, { "epoch": 1.5484086654185611, "grad_norm": 1.1526134014129639, "learning_rate": 9.963906412771176e-06, "loss": 0.6908, "step": 11579 }, { "epoch": 1.54854239101364, "grad_norm": 1.2256871461868286, "learning_rate": 9.962462675932679e-06, "loss": 0.6585, "step": 11580 }, { "epoch": 1.548676116608719, "grad_norm": 1.391932487487793, "learning_rate": 9.961018939876616e-06, "loss": 0.6815, "step": 11581 }, { "epoch": 1.5488098422037977, "grad_norm": 1.1569149494171143, "learning_rate": 9.95957520463307e-06, "loss": 0.682, "step": 11582 }, { "epoch": 1.5489435677988768, "grad_norm": 1.1894664764404297, "learning_rate": 9.958131470232147e-06, "loss": 0.7321, "step": 11583 }, { "epoch": 1.5490772933939556, "grad_norm": 1.286178469657898, "learning_rate": 9.956687736703931e-06, "loss": 0.7647, "step": 11584 }, { "epoch": 1.5492110189890345, "grad_norm": 1.19902503490448, "learning_rate": 9.955244004078514e-06, "loss": 0.6885, "step": 11585 }, { "epoch": 1.5493447445841135, "grad_norm": 1.2167459726333618, "learning_rate": 9.953800272385997e-06, "loss": 0.7455, "step": 11586 }, { "epoch": 1.5494784701791922, "grad_norm": 1.3349252939224243, "learning_rate": 9.952356541656471e-06, "loss": 0.7238, "step": 11587 }, { "epoch": 1.5496121957742712, "grad_norm": 1.1347497701644897, "learning_rate": 9.95091281192002e-06, "loss": 0.5805, "step": 11588 }, { "epoch": 1.54974592136935, "grad_norm": 1.2632615566253662, "learning_rate": 9.94946908320675e-06, "loss": 0.6755, "step": 11589 }, { "epoch": 1.549879646964429, "grad_norm": 1.153563380241394, "learning_rate": 9.948025355546747e-06, "loss": 0.7083, "step": 11590 }, { "epoch": 1.550013372559508, "grad_norm": 1.2649372816085815, "learning_rate": 9.946581628970106e-06, "loss": 0.6431, "step": 11591 }, { "epoch": 1.5501470981545866, "grad_norm": 1.1538318395614624, "learning_rate": 9.945137903506921e-06, "loss": 0.5431, "step": 11592 }, { "epoch": 1.5502808237496657, "grad_norm": 1.1633721590042114, "learning_rate": 9.94369417918728e-06, "loss": 0.6826, "step": 11593 }, { "epoch": 1.5504145493447445, "grad_norm": 1.2265843152999878, "learning_rate": 9.942250456041286e-06, "loss": 0.6369, "step": 11594 }, { "epoch": 1.5505482749398234, "grad_norm": 1.3075207471847534, "learning_rate": 9.940806734099021e-06, "loss": 0.7255, "step": 11595 }, { "epoch": 1.5506820005349025, "grad_norm": 1.3687458038330078, "learning_rate": 9.939363013390587e-06, "loss": 0.8353, "step": 11596 }, { "epoch": 1.5508157261299813, "grad_norm": 1.2239234447479248, "learning_rate": 9.93791929394607e-06, "loss": 0.7142, "step": 11597 }, { "epoch": 1.5509494517250602, "grad_norm": 1.1349965333938599, "learning_rate": 9.936475575795563e-06, "loss": 0.6482, "step": 11598 }, { "epoch": 1.5510831773201392, "grad_norm": 1.271505355834961, "learning_rate": 9.935031858969168e-06, "loss": 0.6786, "step": 11599 }, { "epoch": 1.5512169029152179, "grad_norm": 1.2784535884857178, "learning_rate": 9.933588143496971e-06, "loss": 0.6523, "step": 11600 }, { "epoch": 1.551350628510297, "grad_norm": 1.0192725658416748, "learning_rate": 9.932144429409061e-06, "loss": 0.6869, "step": 11601 }, { "epoch": 1.5514843541053758, "grad_norm": 1.2094461917877197, "learning_rate": 9.93070071673554e-06, "loss": 0.6751, "step": 11602 }, { "epoch": 1.5516180797004546, "grad_norm": 1.2380319833755493, "learning_rate": 9.929257005506496e-06, "loss": 0.7084, "step": 11603 }, { "epoch": 1.5517518052955337, "grad_norm": 1.1918452978134155, "learning_rate": 9.927813295752017e-06, "loss": 0.6987, "step": 11604 }, { "epoch": 1.5518855308906123, "grad_norm": 1.2153878211975098, "learning_rate": 9.926369587502205e-06, "loss": 0.727, "step": 11605 }, { "epoch": 1.5520192564856914, "grad_norm": 1.1790846586227417, "learning_rate": 9.924925880787146e-06, "loss": 0.605, "step": 11606 }, { "epoch": 1.5521529820807702, "grad_norm": 1.3317478895187378, "learning_rate": 9.923482175636938e-06, "loss": 0.7303, "step": 11607 }, { "epoch": 1.552286707675849, "grad_norm": 1.170379400253296, "learning_rate": 9.922038472081672e-06, "loss": 0.693, "step": 11608 }, { "epoch": 1.5524204332709282, "grad_norm": 1.1302177906036377, "learning_rate": 9.920594770151436e-06, "loss": 0.6755, "step": 11609 }, { "epoch": 1.552554158866007, "grad_norm": 1.2757900953292847, "learning_rate": 9.919151069876328e-06, "loss": 0.8085, "step": 11610 }, { "epoch": 1.5526878844610859, "grad_norm": 1.2512168884277344, "learning_rate": 9.917707371286439e-06, "loss": 0.6957, "step": 11611 }, { "epoch": 1.5528216100561647, "grad_norm": 1.2800650596618652, "learning_rate": 9.916263674411858e-06, "loss": 0.7203, "step": 11612 }, { "epoch": 1.5529553356512436, "grad_norm": 1.2630618810653687, "learning_rate": 9.914819979282684e-06, "loss": 0.7234, "step": 11613 }, { "epoch": 1.5530890612463226, "grad_norm": 1.2083522081375122, "learning_rate": 9.913376285929002e-06, "loss": 0.7302, "step": 11614 }, { "epoch": 1.5532227868414015, "grad_norm": 1.2552076578140259, "learning_rate": 9.911932594380913e-06, "loss": 0.6931, "step": 11615 }, { "epoch": 1.5533565124364803, "grad_norm": 1.3146113157272339, "learning_rate": 9.910488904668503e-06, "loss": 0.7653, "step": 11616 }, { "epoch": 1.5534902380315594, "grad_norm": 1.2481141090393066, "learning_rate": 9.909045216821863e-06, "loss": 0.7267, "step": 11617 }, { "epoch": 1.553623963626638, "grad_norm": 1.1267297267913818, "learning_rate": 9.907601530871094e-06, "loss": 0.7343, "step": 11618 }, { "epoch": 1.553757689221717, "grad_norm": 1.3143137693405151, "learning_rate": 9.906157846846282e-06, "loss": 0.7429, "step": 11619 }, { "epoch": 1.553891414816796, "grad_norm": 1.2199690341949463, "learning_rate": 9.904714164777514e-06, "loss": 0.624, "step": 11620 }, { "epoch": 1.5540251404118748, "grad_norm": 1.1053187847137451, "learning_rate": 9.903270484694895e-06, "loss": 0.6315, "step": 11621 }, { "epoch": 1.5541588660069539, "grad_norm": 1.2417516708374023, "learning_rate": 9.901826806628505e-06, "loss": 0.6968, "step": 11622 }, { "epoch": 1.5542925916020325, "grad_norm": 1.302356481552124, "learning_rate": 9.900383130608443e-06, "loss": 0.7123, "step": 11623 }, { "epoch": 1.5544263171971116, "grad_norm": 1.204300045967102, "learning_rate": 9.8989394566648e-06, "loss": 0.7234, "step": 11624 }, { "epoch": 1.5545600427921904, "grad_norm": 1.0882188081741333, "learning_rate": 9.897495784827667e-06, "loss": 0.7487, "step": 11625 }, { "epoch": 1.5546937683872692, "grad_norm": 1.2488876581192017, "learning_rate": 9.896052115127136e-06, "loss": 0.7136, "step": 11626 }, { "epoch": 1.5548274939823483, "grad_norm": 1.1952486038208008, "learning_rate": 9.8946084475933e-06, "loss": 0.7369, "step": 11627 }, { "epoch": 1.5549612195774272, "grad_norm": 1.3092358112335205, "learning_rate": 9.89316478225625e-06, "loss": 0.7881, "step": 11628 }, { "epoch": 1.555094945172506, "grad_norm": 1.204134464263916, "learning_rate": 9.891721119146076e-06, "loss": 0.7028, "step": 11629 }, { "epoch": 1.5552286707675849, "grad_norm": 1.173227310180664, "learning_rate": 9.890277458292871e-06, "loss": 0.6695, "step": 11630 }, { "epoch": 1.5553623963626637, "grad_norm": 1.2467774152755737, "learning_rate": 9.888833799726733e-06, "loss": 0.709, "step": 11631 }, { "epoch": 1.5554961219577428, "grad_norm": 1.2323771715164185, "learning_rate": 9.887390143477746e-06, "loss": 0.6794, "step": 11632 }, { "epoch": 1.5556298475528216, "grad_norm": 1.2474805116653442, "learning_rate": 9.885946489576001e-06, "loss": 0.6282, "step": 11633 }, { "epoch": 1.5557635731479005, "grad_norm": 1.0359275341033936, "learning_rate": 9.884502838051595e-06, "loss": 0.6278, "step": 11634 }, { "epoch": 1.5558972987429796, "grad_norm": 1.1019821166992188, "learning_rate": 9.883059188934615e-06, "loss": 0.6579, "step": 11635 }, { "epoch": 1.5560310243380582, "grad_norm": 1.116276502609253, "learning_rate": 9.881615542255151e-06, "loss": 0.6979, "step": 11636 }, { "epoch": 1.5561647499331372, "grad_norm": 1.198554277420044, "learning_rate": 9.880171898043306e-06, "loss": 0.6786, "step": 11637 }, { "epoch": 1.556298475528216, "grad_norm": 1.192000150680542, "learning_rate": 9.878728256329154e-06, "loss": 0.7343, "step": 11638 }, { "epoch": 1.556432201123295, "grad_norm": 1.2033674716949463, "learning_rate": 9.877284617142802e-06, "loss": 0.6672, "step": 11639 }, { "epoch": 1.556565926718374, "grad_norm": 1.1766128540039062, "learning_rate": 9.875840980514332e-06, "loss": 0.7765, "step": 11640 }, { "epoch": 1.5566996523134526, "grad_norm": 1.199671745300293, "learning_rate": 9.87439734647384e-06, "loss": 0.7248, "step": 11641 }, { "epoch": 1.5568333779085317, "grad_norm": 1.3016639947891235, "learning_rate": 9.872953715051412e-06, "loss": 0.7148, "step": 11642 }, { "epoch": 1.5569671035036106, "grad_norm": 1.1941275596618652, "learning_rate": 9.871510086277142e-06, "loss": 0.6622, "step": 11643 }, { "epoch": 1.5571008290986894, "grad_norm": 1.2003486156463623, "learning_rate": 9.870066460181126e-06, "loss": 0.718, "step": 11644 }, { "epoch": 1.5572345546937685, "grad_norm": 1.3094204664230347, "learning_rate": 9.86862283679345e-06, "loss": 0.7215, "step": 11645 }, { "epoch": 1.5573682802888473, "grad_norm": 1.1726515293121338, "learning_rate": 9.8671792161442e-06, "loss": 0.6679, "step": 11646 }, { "epoch": 1.5575020058839262, "grad_norm": 1.0584392547607422, "learning_rate": 9.865735598263477e-06, "loss": 0.6554, "step": 11647 }, { "epoch": 1.5576357314790052, "grad_norm": 1.1330418586730957, "learning_rate": 9.864291983181366e-06, "loss": 0.695, "step": 11648 }, { "epoch": 1.5577694570740839, "grad_norm": 1.2522163391113281, "learning_rate": 9.862848370927955e-06, "loss": 0.767, "step": 11649 }, { "epoch": 1.557903182669163, "grad_norm": 1.1301709413528442, "learning_rate": 9.861404761533343e-06, "loss": 0.6714, "step": 11650 }, { "epoch": 1.5580369082642418, "grad_norm": 1.1758971214294434, "learning_rate": 9.859961155027613e-06, "loss": 0.6315, "step": 11651 }, { "epoch": 1.5581706338593206, "grad_norm": 1.1606630086898804, "learning_rate": 9.85851755144086e-06, "loss": 0.6749, "step": 11652 }, { "epoch": 1.5583043594543997, "grad_norm": 1.082653284072876, "learning_rate": 9.857073950803176e-06, "loss": 0.5811, "step": 11653 }, { "epoch": 1.5584380850494783, "grad_norm": 1.1764706373214722, "learning_rate": 9.855630353144644e-06, "loss": 0.7025, "step": 11654 }, { "epoch": 1.5585718106445574, "grad_norm": 1.1327965259552002, "learning_rate": 9.854186758495361e-06, "loss": 0.6713, "step": 11655 }, { "epoch": 1.5587055362396363, "grad_norm": 1.2917152643203735, "learning_rate": 9.852743166885419e-06, "loss": 0.7021, "step": 11656 }, { "epoch": 1.558839261834715, "grad_norm": 1.122725486755371, "learning_rate": 9.851299578344897e-06, "loss": 0.6649, "step": 11657 }, { "epoch": 1.5589729874297942, "grad_norm": 1.1917108297348022, "learning_rate": 9.8498559929039e-06, "loss": 0.6709, "step": 11658 }, { "epoch": 1.5591067130248728, "grad_norm": 1.1083738803863525, "learning_rate": 9.848412410592506e-06, "loss": 0.62, "step": 11659 }, { "epoch": 1.5592404386199519, "grad_norm": 1.2363409996032715, "learning_rate": 9.846968831440815e-06, "loss": 0.7216, "step": 11660 }, { "epoch": 1.5593741642150307, "grad_norm": 1.4001164436340332, "learning_rate": 9.84552525547891e-06, "loss": 0.6297, "step": 11661 }, { "epoch": 1.5595078898101096, "grad_norm": 1.2242978811264038, "learning_rate": 9.844081682736881e-06, "loss": 0.7094, "step": 11662 }, { "epoch": 1.5596416154051886, "grad_norm": 1.223361611366272, "learning_rate": 9.842638113244824e-06, "loss": 0.7295, "step": 11663 }, { "epoch": 1.5597753410002675, "grad_norm": 1.2252004146575928, "learning_rate": 9.841194547032826e-06, "loss": 0.8019, "step": 11664 }, { "epoch": 1.5599090665953463, "grad_norm": 1.165259838104248, "learning_rate": 9.839750984130971e-06, "loss": 0.6912, "step": 11665 }, { "epoch": 1.5600427921904254, "grad_norm": 1.239406704902649, "learning_rate": 9.838307424569357e-06, "loss": 0.7716, "step": 11666 }, { "epoch": 1.560176517785504, "grad_norm": 1.240493893623352, "learning_rate": 9.836863868378067e-06, "loss": 0.6733, "step": 11667 }, { "epoch": 1.560310243380583, "grad_norm": 1.276371955871582, "learning_rate": 9.835420315587194e-06, "loss": 0.7451, "step": 11668 }, { "epoch": 1.560443968975662, "grad_norm": 1.1262831687927246, "learning_rate": 9.833976766226831e-06, "loss": 0.6196, "step": 11669 }, { "epoch": 1.5605776945707408, "grad_norm": 1.2979719638824463, "learning_rate": 9.832533220327059e-06, "loss": 0.7514, "step": 11670 }, { "epoch": 1.5607114201658199, "grad_norm": 1.213478684425354, "learning_rate": 9.831089677917974e-06, "loss": 0.7296, "step": 11671 }, { "epoch": 1.5608451457608985, "grad_norm": 1.2263919115066528, "learning_rate": 9.829646139029664e-06, "loss": 0.6923, "step": 11672 }, { "epoch": 1.5609788713559776, "grad_norm": 1.4062761068344116, "learning_rate": 9.828202603692214e-06, "loss": 0.7988, "step": 11673 }, { "epoch": 1.5611125969510564, "grad_norm": 1.0653266906738281, "learning_rate": 9.826759071935718e-06, "loss": 0.6466, "step": 11674 }, { "epoch": 1.5612463225461353, "grad_norm": 1.1554373502731323, "learning_rate": 9.82531554379026e-06, "loss": 0.5972, "step": 11675 }, { "epoch": 1.5613800481412143, "grad_norm": 1.1816476583480835, "learning_rate": 9.823872019285938e-06, "loss": 0.6886, "step": 11676 }, { "epoch": 1.561513773736293, "grad_norm": 1.3037949800491333, "learning_rate": 9.822428498452836e-06, "loss": 0.7817, "step": 11677 }, { "epoch": 1.561647499331372, "grad_norm": 1.2093069553375244, "learning_rate": 9.820984981321035e-06, "loss": 0.7161, "step": 11678 }, { "epoch": 1.5617812249264509, "grad_norm": 1.2922788858413696, "learning_rate": 9.819541467920638e-06, "loss": 0.7261, "step": 11679 }, { "epoch": 1.5619149505215297, "grad_norm": 1.1776243448257446, "learning_rate": 9.818097958281723e-06, "loss": 0.6736, "step": 11680 }, { "epoch": 1.5620486761166088, "grad_norm": 1.125073790550232, "learning_rate": 9.81665445243438e-06, "loss": 0.6501, "step": 11681 }, { "epoch": 1.5621824017116877, "grad_norm": 1.2076047658920288, "learning_rate": 9.815210950408703e-06, "loss": 0.7229, "step": 11682 }, { "epoch": 1.5623161273067665, "grad_norm": 1.2347359657287598, "learning_rate": 9.813767452234772e-06, "loss": 0.6013, "step": 11683 }, { "epoch": 1.5624498529018456, "grad_norm": 1.2110868692398071, "learning_rate": 9.812323957942686e-06, "loss": 0.6347, "step": 11684 }, { "epoch": 1.5625835784969242, "grad_norm": 1.4476277828216553, "learning_rate": 9.810880467562527e-06, "loss": 0.8649, "step": 11685 }, { "epoch": 1.5627173040920033, "grad_norm": 1.2302271127700806, "learning_rate": 9.80943698112438e-06, "loss": 0.6593, "step": 11686 }, { "epoch": 1.5628510296870821, "grad_norm": 1.1797484159469604, "learning_rate": 9.80799349865834e-06, "loss": 0.6905, "step": 11687 }, { "epoch": 1.562984755282161, "grad_norm": 1.2235772609710693, "learning_rate": 9.806550020194492e-06, "loss": 0.6367, "step": 11688 }, { "epoch": 1.56311848087724, "grad_norm": 1.1277586221694946, "learning_rate": 9.80510654576292e-06, "loss": 0.6496, "step": 11689 }, { "epoch": 1.5632522064723187, "grad_norm": 1.2723939418792725, "learning_rate": 9.80366307539372e-06, "loss": 0.718, "step": 11690 }, { "epoch": 1.5633859320673977, "grad_norm": 1.1371005773544312, "learning_rate": 9.80221960911697e-06, "loss": 0.6905, "step": 11691 }, { "epoch": 1.5635196576624766, "grad_norm": 1.1974263191223145, "learning_rate": 9.800776146962768e-06, "loss": 0.6646, "step": 11692 }, { "epoch": 1.5636533832575554, "grad_norm": 1.1156206130981445, "learning_rate": 9.799332688961196e-06, "loss": 0.7262, "step": 11693 }, { "epoch": 1.5637871088526345, "grad_norm": 1.123761773109436, "learning_rate": 9.797889235142338e-06, "loss": 0.597, "step": 11694 }, { "epoch": 1.5639208344477131, "grad_norm": 1.2224805355072021, "learning_rate": 9.79644578553629e-06, "loss": 0.6977, "step": 11695 }, { "epoch": 1.5640545600427922, "grad_norm": 1.1933468580245972, "learning_rate": 9.795002340173135e-06, "loss": 0.7128, "step": 11696 }, { "epoch": 1.564188285637871, "grad_norm": 1.3735162019729614, "learning_rate": 9.793558899082955e-06, "loss": 0.7225, "step": 11697 }, { "epoch": 1.56432201123295, "grad_norm": 1.221158504486084, "learning_rate": 9.792115462295848e-06, "loss": 0.7139, "step": 11698 }, { "epoch": 1.564455736828029, "grad_norm": 1.3197550773620605, "learning_rate": 9.79067202984189e-06, "loss": 0.7167, "step": 11699 }, { "epoch": 1.5645894624231078, "grad_norm": 1.207801103591919, "learning_rate": 9.789228601751177e-06, "loss": 0.7217, "step": 11700 }, { "epoch": 1.5647231880181867, "grad_norm": 1.2044693231582642, "learning_rate": 9.787785178053792e-06, "loss": 0.6487, "step": 11701 }, { "epoch": 1.5648569136132657, "grad_norm": 1.3459947109222412, "learning_rate": 9.786341758779817e-06, "loss": 0.6631, "step": 11702 }, { "epoch": 1.5649906392083444, "grad_norm": 1.0948126316070557, "learning_rate": 9.784898343959351e-06, "loss": 0.656, "step": 11703 }, { "epoch": 1.5651243648034234, "grad_norm": 1.1210191249847412, "learning_rate": 9.783454933622472e-06, "loss": 0.6748, "step": 11704 }, { "epoch": 1.5652580903985023, "grad_norm": 1.2867801189422607, "learning_rate": 9.782011527799263e-06, "loss": 0.7098, "step": 11705 }, { "epoch": 1.5653918159935811, "grad_norm": 1.2693672180175781, "learning_rate": 9.780568126519817e-06, "loss": 0.7392, "step": 11706 }, { "epoch": 1.5655255415886602, "grad_norm": 1.150911569595337, "learning_rate": 9.779124729814216e-06, "loss": 0.6981, "step": 11707 }, { "epoch": 1.5656592671837388, "grad_norm": 1.3449972867965698, "learning_rate": 9.777681337712554e-06, "loss": 0.7295, "step": 11708 }, { "epoch": 1.565792992778818, "grad_norm": 1.30966055393219, "learning_rate": 9.77623795024491e-06, "loss": 0.6645, "step": 11709 }, { "epoch": 1.5659267183738967, "grad_norm": 1.1972509622573853, "learning_rate": 9.77479456744137e-06, "loss": 0.6483, "step": 11710 }, { "epoch": 1.5660604439689756, "grad_norm": 1.3096901178359985, "learning_rate": 9.773351189332024e-06, "loss": 0.7409, "step": 11711 }, { "epoch": 1.5661941695640547, "grad_norm": 1.146596908569336, "learning_rate": 9.771907815946955e-06, "loss": 0.635, "step": 11712 }, { "epoch": 1.5663278951591335, "grad_norm": 1.2024109363555908, "learning_rate": 9.770464447316245e-06, "loss": 0.7125, "step": 11713 }, { "epoch": 1.5664616207542124, "grad_norm": 1.2433140277862549, "learning_rate": 9.769021083469991e-06, "loss": 0.653, "step": 11714 }, { "epoch": 1.5665953463492912, "grad_norm": 1.385238766670227, "learning_rate": 9.767577724438267e-06, "loss": 0.7406, "step": 11715 }, { "epoch": 1.56672907194437, "grad_norm": 1.0950173139572144, "learning_rate": 9.766134370251165e-06, "loss": 0.7164, "step": 11716 }, { "epoch": 1.5668627975394491, "grad_norm": 1.2922570705413818, "learning_rate": 9.76469102093877e-06, "loss": 0.7151, "step": 11717 }, { "epoch": 1.566996523134528, "grad_norm": 1.1620092391967773, "learning_rate": 9.76324767653116e-06, "loss": 0.7037, "step": 11718 }, { "epoch": 1.5671302487296068, "grad_norm": 1.2484108209609985, "learning_rate": 9.761804337058428e-06, "loss": 0.6438, "step": 11719 }, { "epoch": 1.567263974324686, "grad_norm": 1.1119927167892456, "learning_rate": 9.76036100255066e-06, "loss": 0.7158, "step": 11720 }, { "epoch": 1.5673976999197645, "grad_norm": 1.1602082252502441, "learning_rate": 9.758917673037932e-06, "loss": 0.6921, "step": 11721 }, { "epoch": 1.5675314255148436, "grad_norm": 1.1938859224319458, "learning_rate": 9.75747434855034e-06, "loss": 0.6929, "step": 11722 }, { "epoch": 1.5676651511099224, "grad_norm": 1.1739352941513062, "learning_rate": 9.756031029117958e-06, "loss": 0.6692, "step": 11723 }, { "epoch": 1.5677988767050013, "grad_norm": 1.309211015701294, "learning_rate": 9.75458771477088e-06, "loss": 0.7373, "step": 11724 }, { "epoch": 1.5679326023000804, "grad_norm": 1.2607371807098389, "learning_rate": 9.753144405539184e-06, "loss": 0.7484, "step": 11725 }, { "epoch": 1.568066327895159, "grad_norm": 1.0773786306381226, "learning_rate": 9.751701101452954e-06, "loss": 0.6628, "step": 11726 }, { "epoch": 1.568200053490238, "grad_norm": 1.219720482826233, "learning_rate": 9.750257802542282e-06, "loss": 0.7642, "step": 11727 }, { "epoch": 1.568333779085317, "grad_norm": 1.0714747905731201, "learning_rate": 9.748814508837244e-06, "loss": 0.663, "step": 11728 }, { "epoch": 1.5684675046803958, "grad_norm": 1.3097397089004517, "learning_rate": 9.74737122036793e-06, "loss": 0.7238, "step": 11729 }, { "epoch": 1.5686012302754748, "grad_norm": 1.3196287155151367, "learning_rate": 9.74592793716442e-06, "loss": 0.7673, "step": 11730 }, { "epoch": 1.5687349558705537, "grad_norm": 1.206199288368225, "learning_rate": 9.744484659256796e-06, "loss": 0.5918, "step": 11731 }, { "epoch": 1.5688686814656325, "grad_norm": 1.225818395614624, "learning_rate": 9.743041386675147e-06, "loss": 0.6645, "step": 11732 }, { "epoch": 1.5690024070607114, "grad_norm": 1.2376528978347778, "learning_rate": 9.741598119449558e-06, "loss": 0.7317, "step": 11733 }, { "epoch": 1.5691361326557902, "grad_norm": 1.3443000316619873, "learning_rate": 9.740154857610103e-06, "loss": 0.6674, "step": 11734 }, { "epoch": 1.5692698582508693, "grad_norm": 1.0988441705703735, "learning_rate": 9.738711601186875e-06, "loss": 0.6393, "step": 11735 }, { "epoch": 1.5694035838459481, "grad_norm": 1.3145753145217896, "learning_rate": 9.737268350209951e-06, "loss": 0.6955, "step": 11736 }, { "epoch": 1.569537309441027, "grad_norm": 1.3256607055664062, "learning_rate": 9.73582510470942e-06, "loss": 0.6991, "step": 11737 }, { "epoch": 1.569671035036106, "grad_norm": 1.2174677848815918, "learning_rate": 9.73438186471536e-06, "loss": 0.7534, "step": 11738 }, { "epoch": 1.5698047606311847, "grad_norm": 1.3708367347717285, "learning_rate": 9.732938630257855e-06, "loss": 0.8335, "step": 11739 }, { "epoch": 1.5699384862262638, "grad_norm": 1.1592994928359985, "learning_rate": 9.731495401366992e-06, "loss": 0.6255, "step": 11740 }, { "epoch": 1.5700722118213426, "grad_norm": 1.2380578517913818, "learning_rate": 9.73005217807285e-06, "loss": 0.7256, "step": 11741 }, { "epoch": 1.5702059374164214, "grad_norm": 1.2830851078033447, "learning_rate": 9.728608960405508e-06, "loss": 0.7221, "step": 11742 }, { "epoch": 1.5703396630115005, "grad_norm": 1.1336897611618042, "learning_rate": 9.727165748395056e-06, "loss": 0.6956, "step": 11743 }, { "epoch": 1.5704733886065791, "grad_norm": 1.0971282720565796, "learning_rate": 9.72572254207157e-06, "loss": 0.6702, "step": 11744 }, { "epoch": 1.5706071142016582, "grad_norm": 1.1822386980056763, "learning_rate": 9.724279341465138e-06, "loss": 0.6961, "step": 11745 }, { "epoch": 1.570740839796737, "grad_norm": 1.1029486656188965, "learning_rate": 9.722836146605838e-06, "loss": 0.6423, "step": 11746 }, { "epoch": 1.570874565391816, "grad_norm": 1.3332191705703735, "learning_rate": 9.721392957523751e-06, "loss": 0.7294, "step": 11747 }, { "epoch": 1.571008290986895, "grad_norm": 1.1590595245361328, "learning_rate": 9.719949774248967e-06, "loss": 0.7524, "step": 11748 }, { "epoch": 1.5711420165819738, "grad_norm": 1.1736226081848145, "learning_rate": 9.718506596811561e-06, "loss": 0.7005, "step": 11749 }, { "epoch": 1.5712757421770527, "grad_norm": 1.327608346939087, "learning_rate": 9.717063425241611e-06, "loss": 0.6983, "step": 11750 }, { "epoch": 1.5714094677721318, "grad_norm": 1.1685997247695923, "learning_rate": 9.715620259569205e-06, "loss": 0.6386, "step": 11751 }, { "epoch": 1.5715431933672104, "grad_norm": 1.1421043872833252, "learning_rate": 9.71417709982442e-06, "loss": 0.669, "step": 11752 }, { "epoch": 1.5716769189622894, "grad_norm": 1.1286109685897827, "learning_rate": 9.712733946037344e-06, "loss": 0.7208, "step": 11753 }, { "epoch": 1.5718106445573683, "grad_norm": 1.1458673477172852, "learning_rate": 9.711290798238056e-06, "loss": 0.69, "step": 11754 }, { "epoch": 1.5719443701524471, "grad_norm": 1.143220067024231, "learning_rate": 9.70984765645663e-06, "loss": 0.668, "step": 11755 }, { "epoch": 1.5720780957475262, "grad_norm": 1.2981085777282715, "learning_rate": 9.708404520723156e-06, "loss": 0.7589, "step": 11756 }, { "epoch": 1.5722118213426048, "grad_norm": 1.313697099685669, "learning_rate": 9.706961391067709e-06, "loss": 0.7312, "step": 11757 }, { "epoch": 1.572345546937684, "grad_norm": 1.164406180381775, "learning_rate": 9.705518267520369e-06, "loss": 0.6445, "step": 11758 }, { "epoch": 1.5724792725327628, "grad_norm": 1.0774198770523071, "learning_rate": 9.704075150111222e-06, "loss": 0.6712, "step": 11759 }, { "epoch": 1.5726129981278416, "grad_norm": 1.3268271684646606, "learning_rate": 9.702632038870342e-06, "loss": 0.7603, "step": 11760 }, { "epoch": 1.5727467237229207, "grad_norm": 1.1782640218734741, "learning_rate": 9.701188933827817e-06, "loss": 0.7605, "step": 11761 }, { "epoch": 1.5728804493179993, "grad_norm": 1.1995817422866821, "learning_rate": 9.699745835013724e-06, "loss": 0.71, "step": 11762 }, { "epoch": 1.5730141749130784, "grad_norm": 1.2396368980407715, "learning_rate": 9.698302742458135e-06, "loss": 0.7202, "step": 11763 }, { "epoch": 1.5731479005081572, "grad_norm": 1.0194238424301147, "learning_rate": 9.69685965619114e-06, "loss": 0.6312, "step": 11764 }, { "epoch": 1.573281626103236, "grad_norm": 1.3074367046356201, "learning_rate": 9.695416576242818e-06, "loss": 0.7789, "step": 11765 }, { "epoch": 1.5734153516983151, "grad_norm": 1.1553776264190674, "learning_rate": 9.69397350264324e-06, "loss": 0.7292, "step": 11766 }, { "epoch": 1.573549077293394, "grad_norm": 1.2162641286849976, "learning_rate": 9.692530435422497e-06, "loss": 0.7685, "step": 11767 }, { "epoch": 1.5736828028884728, "grad_norm": 1.2103520631790161, "learning_rate": 9.691087374610659e-06, "loss": 0.7143, "step": 11768 }, { "epoch": 1.573816528483552, "grad_norm": 1.3082326650619507, "learning_rate": 9.689644320237812e-06, "loss": 0.7088, "step": 11769 }, { "epoch": 1.5739502540786305, "grad_norm": 1.0748053789138794, "learning_rate": 9.688201272334031e-06, "loss": 0.6572, "step": 11770 }, { "epoch": 1.5740839796737096, "grad_norm": 1.2526825666427612, "learning_rate": 9.686758230929395e-06, "loss": 0.6589, "step": 11771 }, { "epoch": 1.5742177052687885, "grad_norm": 1.339076280593872, "learning_rate": 9.685315196053986e-06, "loss": 0.7648, "step": 11772 }, { "epoch": 1.5743514308638673, "grad_norm": 1.0858122110366821, "learning_rate": 9.683872167737883e-06, "loss": 0.652, "step": 11773 }, { "epoch": 1.5744851564589464, "grad_norm": 1.1740416288375854, "learning_rate": 9.682429146011157e-06, "loss": 0.6638, "step": 11774 }, { "epoch": 1.574618882054025, "grad_norm": 1.2719452381134033, "learning_rate": 9.680986130903895e-06, "loss": 0.7042, "step": 11775 }, { "epoch": 1.574752607649104, "grad_norm": 1.2472749948501587, "learning_rate": 9.679543122446167e-06, "loss": 0.6011, "step": 11776 }, { "epoch": 1.574886333244183, "grad_norm": 1.2287189960479736, "learning_rate": 9.67810012066806e-06, "loss": 0.7186, "step": 11777 }, { "epoch": 1.5750200588392618, "grad_norm": 1.19370698928833, "learning_rate": 9.676657125599649e-06, "loss": 0.7487, "step": 11778 }, { "epoch": 1.5751537844343408, "grad_norm": 1.2478740215301514, "learning_rate": 9.675214137271007e-06, "loss": 0.6886, "step": 11779 }, { "epoch": 1.5752875100294195, "grad_norm": 1.1382899284362793, "learning_rate": 9.67377115571222e-06, "loss": 0.7126, "step": 11780 }, { "epoch": 1.5754212356244985, "grad_norm": 1.2274399995803833, "learning_rate": 9.67232818095336e-06, "loss": 0.693, "step": 11781 }, { "epoch": 1.5755549612195774, "grad_norm": 1.177681565284729, "learning_rate": 9.670885213024502e-06, "loss": 0.6347, "step": 11782 }, { "epoch": 1.5756886868146562, "grad_norm": 1.3298187255859375, "learning_rate": 9.669442251955728e-06, "loss": 0.7251, "step": 11783 }, { "epoch": 1.5758224124097353, "grad_norm": 1.171137809753418, "learning_rate": 9.667999297777113e-06, "loss": 0.6272, "step": 11784 }, { "epoch": 1.5759561380048142, "grad_norm": 1.2151525020599365, "learning_rate": 9.666556350518738e-06, "loss": 0.7075, "step": 11785 }, { "epoch": 1.576089863599893, "grad_norm": 1.174127221107483, "learning_rate": 9.665113410210678e-06, "loss": 0.7594, "step": 11786 }, { "epoch": 1.576223589194972, "grad_norm": 1.2687079906463623, "learning_rate": 9.663670476883005e-06, "loss": 0.781, "step": 11787 }, { "epoch": 1.5763573147900507, "grad_norm": 1.1923011541366577, "learning_rate": 9.662227550565801e-06, "loss": 0.7112, "step": 11788 }, { "epoch": 1.5764910403851298, "grad_norm": 1.2337623834609985, "learning_rate": 9.660784631289141e-06, "loss": 0.6796, "step": 11789 }, { "epoch": 1.5766247659802086, "grad_norm": 1.2290518283843994, "learning_rate": 9.659341719083096e-06, "loss": 0.7037, "step": 11790 }, { "epoch": 1.5767584915752875, "grad_norm": 1.1059620380401611, "learning_rate": 9.657898813977753e-06, "loss": 0.6711, "step": 11791 }, { "epoch": 1.5768922171703665, "grad_norm": 1.114241361618042, "learning_rate": 9.656455916003178e-06, "loss": 0.7091, "step": 11792 }, { "epoch": 1.5770259427654452, "grad_norm": 1.1974172592163086, "learning_rate": 9.655013025189452e-06, "loss": 0.6664, "step": 11793 }, { "epoch": 1.5771596683605242, "grad_norm": 1.433977484703064, "learning_rate": 9.653570141566653e-06, "loss": 0.7615, "step": 11794 }, { "epoch": 1.577293393955603, "grad_norm": 1.2793447971343994, "learning_rate": 9.652127265164846e-06, "loss": 0.6729, "step": 11795 }, { "epoch": 1.577427119550682, "grad_norm": 1.1911096572875977, "learning_rate": 9.650684396014115e-06, "loss": 0.7261, "step": 11796 }, { "epoch": 1.577560845145761, "grad_norm": 1.1903282403945923, "learning_rate": 9.64924153414454e-06, "loss": 0.6317, "step": 11797 }, { "epoch": 1.5776945707408396, "grad_norm": 1.187843680381775, "learning_rate": 9.64779867958618e-06, "loss": 0.754, "step": 11798 }, { "epoch": 1.5778282963359187, "grad_norm": 1.3745046854019165, "learning_rate": 9.646355832369128e-06, "loss": 0.8519, "step": 11799 }, { "epoch": 1.5779620219309975, "grad_norm": 1.3015426397323608, "learning_rate": 9.644912992523444e-06, "loss": 0.7947, "step": 11800 }, { "epoch": 1.5780957475260764, "grad_norm": 1.2784847021102905, "learning_rate": 9.643470160079213e-06, "loss": 0.7313, "step": 11801 }, { "epoch": 1.5782294731211555, "grad_norm": 1.1897597312927246, "learning_rate": 9.642027335066502e-06, "loss": 0.6432, "step": 11802 }, { "epoch": 1.5783631987162343, "grad_norm": 1.1843338012695312, "learning_rate": 9.64058451751539e-06, "loss": 0.7218, "step": 11803 }, { "epoch": 1.5784969243113132, "grad_norm": 1.2876243591308594, "learning_rate": 9.63914170745595e-06, "loss": 0.7277, "step": 11804 }, { "epoch": 1.5786306499063922, "grad_norm": 1.217679500579834, "learning_rate": 9.63769890491826e-06, "loss": 0.6611, "step": 11805 }, { "epoch": 1.5787643755014709, "grad_norm": 1.3497886657714844, "learning_rate": 9.636256109932382e-06, "loss": 0.7029, "step": 11806 }, { "epoch": 1.57889810109655, "grad_norm": 1.1805776357650757, "learning_rate": 9.634813322528403e-06, "loss": 0.7257, "step": 11807 }, { "epoch": 1.5790318266916288, "grad_norm": 1.070351481437683, "learning_rate": 9.633370542736386e-06, "loss": 0.5769, "step": 11808 }, { "epoch": 1.5791655522867076, "grad_norm": 1.1269499063491821, "learning_rate": 9.631927770586412e-06, "loss": 0.6408, "step": 11809 }, { "epoch": 1.5792992778817867, "grad_norm": 1.390510082244873, "learning_rate": 9.630485006108554e-06, "loss": 0.6845, "step": 11810 }, { "epoch": 1.5794330034768653, "grad_norm": 1.1902354955673218, "learning_rate": 9.629042249332878e-06, "loss": 0.6793, "step": 11811 }, { "epoch": 1.5795667290719444, "grad_norm": 1.2409356832504272, "learning_rate": 9.627599500289464e-06, "loss": 0.6867, "step": 11812 }, { "epoch": 1.5797004546670232, "grad_norm": 1.1123534440994263, "learning_rate": 9.62615675900838e-06, "loss": 0.6509, "step": 11813 }, { "epoch": 1.579834180262102, "grad_norm": 1.3697246313095093, "learning_rate": 9.624714025519703e-06, "loss": 0.7546, "step": 11814 }, { "epoch": 1.5799679058571812, "grad_norm": 1.1406394243240356, "learning_rate": 9.623271299853501e-06, "loss": 0.6603, "step": 11815 }, { "epoch": 1.58010163145226, "grad_norm": 1.0919586420059204, "learning_rate": 9.62182858203985e-06, "loss": 0.642, "step": 11816 }, { "epoch": 1.5802353570473389, "grad_norm": 1.0920943021774292, "learning_rate": 9.62038587210882e-06, "loss": 0.6775, "step": 11817 }, { "epoch": 1.5803690826424177, "grad_norm": 1.2340935468673706, "learning_rate": 9.618943170090483e-06, "loss": 0.7203, "step": 11818 }, { "epoch": 1.5805028082374966, "grad_norm": 1.270204782485962, "learning_rate": 9.617500476014909e-06, "loss": 0.677, "step": 11819 }, { "epoch": 1.5806365338325756, "grad_norm": 1.1748254299163818, "learning_rate": 9.616057789912176e-06, "loss": 0.5913, "step": 11820 }, { "epoch": 1.5807702594276545, "grad_norm": 1.165730357170105, "learning_rate": 9.614615111812346e-06, "loss": 0.7121, "step": 11821 }, { "epoch": 1.5809039850227333, "grad_norm": 1.4562066793441772, "learning_rate": 9.613172441745497e-06, "loss": 0.7946, "step": 11822 }, { "epoch": 1.5810377106178124, "grad_norm": 1.2353875637054443, "learning_rate": 9.611729779741701e-06, "loss": 0.7234, "step": 11823 }, { "epoch": 1.581171436212891, "grad_norm": 1.1224403381347656, "learning_rate": 9.610287125831021e-06, "loss": 0.6266, "step": 11824 }, { "epoch": 1.58130516180797, "grad_norm": 1.182630181312561, "learning_rate": 9.608844480043538e-06, "loss": 0.7127, "step": 11825 }, { "epoch": 1.581438887403049, "grad_norm": 1.1505976915359497, "learning_rate": 9.607401842409318e-06, "loss": 0.6399, "step": 11826 }, { "epoch": 1.5815726129981278, "grad_norm": 1.2644262313842773, "learning_rate": 9.605959212958425e-06, "loss": 0.7762, "step": 11827 }, { "epoch": 1.5817063385932069, "grad_norm": 1.1500595808029175, "learning_rate": 9.60451659172094e-06, "loss": 0.6393, "step": 11828 }, { "epoch": 1.5818400641882855, "grad_norm": 1.2151269912719727, "learning_rate": 9.603073978726925e-06, "loss": 0.6892, "step": 11829 }, { "epoch": 1.5819737897833646, "grad_norm": 1.2002170085906982, "learning_rate": 9.601631374006455e-06, "loss": 0.6754, "step": 11830 }, { "epoch": 1.5821075153784434, "grad_norm": 1.387992024421692, "learning_rate": 9.6001887775896e-06, "loss": 0.7261, "step": 11831 }, { "epoch": 1.5822412409735223, "grad_norm": 1.219621181488037, "learning_rate": 9.598746189506423e-06, "loss": 0.6779, "step": 11832 }, { "epoch": 1.5823749665686013, "grad_norm": 1.1249243021011353, "learning_rate": 9.597303609787001e-06, "loss": 0.7076, "step": 11833 }, { "epoch": 1.5825086921636802, "grad_norm": 1.2674144506454468, "learning_rate": 9.595861038461399e-06, "loss": 0.8013, "step": 11834 }, { "epoch": 1.582642417758759, "grad_norm": 1.14565110206604, "learning_rate": 9.594418475559684e-06, "loss": 0.725, "step": 11835 }, { "epoch": 1.5827761433538379, "grad_norm": 1.1068092584609985, "learning_rate": 9.592975921111933e-06, "loss": 0.6425, "step": 11836 }, { "epoch": 1.5829098689489167, "grad_norm": 1.1704996824264526, "learning_rate": 9.591533375148204e-06, "loss": 0.7067, "step": 11837 }, { "epoch": 1.5830435945439958, "grad_norm": 1.2061281204223633, "learning_rate": 9.590090837698576e-06, "loss": 0.6823, "step": 11838 }, { "epoch": 1.5831773201390746, "grad_norm": 1.1754319667816162, "learning_rate": 9.588648308793111e-06, "loss": 0.6636, "step": 11839 }, { "epoch": 1.5833110457341535, "grad_norm": 1.2038474082946777, "learning_rate": 9.587205788461875e-06, "loss": 0.6767, "step": 11840 }, { "epoch": 1.5834447713292326, "grad_norm": 1.3270734548568726, "learning_rate": 9.585763276734942e-06, "loss": 0.7381, "step": 11841 }, { "epoch": 1.5835784969243112, "grad_norm": 1.3576314449310303, "learning_rate": 9.58432077364238e-06, "loss": 0.6709, "step": 11842 }, { "epoch": 1.5837122225193903, "grad_norm": 1.133558750152588, "learning_rate": 9.582878279214248e-06, "loss": 0.5968, "step": 11843 }, { "epoch": 1.583845948114469, "grad_norm": 1.2783139944076538, "learning_rate": 9.581435793480623e-06, "loss": 0.757, "step": 11844 }, { "epoch": 1.583979673709548, "grad_norm": 1.2356040477752686, "learning_rate": 9.579993316471564e-06, "loss": 0.7328, "step": 11845 }, { "epoch": 1.584113399304627, "grad_norm": 1.4585750102996826, "learning_rate": 9.578550848217147e-06, "loss": 0.8072, "step": 11846 }, { "epoch": 1.5842471248997056, "grad_norm": 1.1954231262207031, "learning_rate": 9.577108388747433e-06, "loss": 0.6233, "step": 11847 }, { "epoch": 1.5843808504947847, "grad_norm": 1.344062328338623, "learning_rate": 9.57566593809249e-06, "loss": 0.759, "step": 11848 }, { "epoch": 1.5845145760898636, "grad_norm": 1.1410038471221924, "learning_rate": 9.574223496282382e-06, "loss": 0.6498, "step": 11849 }, { "epoch": 1.5846483016849424, "grad_norm": 1.1499236822128296, "learning_rate": 9.572781063347184e-06, "loss": 0.6757, "step": 11850 }, { "epoch": 1.5847820272800215, "grad_norm": 1.2958803176879883, "learning_rate": 9.57133863931695e-06, "loss": 0.7472, "step": 11851 }, { "epoch": 1.5849157528751003, "grad_norm": 1.289383053779602, "learning_rate": 9.569896224221754e-06, "loss": 0.7284, "step": 11852 }, { "epoch": 1.5850494784701792, "grad_norm": 1.1081980466842651, "learning_rate": 9.568453818091659e-06, "loss": 0.6135, "step": 11853 }, { "epoch": 1.5851832040652583, "grad_norm": 1.3121752738952637, "learning_rate": 9.567011420956732e-06, "loss": 0.671, "step": 11854 }, { "epoch": 1.5853169296603369, "grad_norm": 1.4571441411972046, "learning_rate": 9.565569032847037e-06, "loss": 0.691, "step": 11855 }, { "epoch": 1.585450655255416, "grad_norm": 1.3047864437103271, "learning_rate": 9.564126653792638e-06, "loss": 0.7111, "step": 11856 }, { "epoch": 1.5855843808504948, "grad_norm": 1.1097774505615234, "learning_rate": 9.562684283823607e-06, "loss": 0.5999, "step": 11857 }, { "epoch": 1.5857181064455736, "grad_norm": 1.1602849960327148, "learning_rate": 9.561241922970001e-06, "loss": 0.6871, "step": 11858 }, { "epoch": 1.5858518320406527, "grad_norm": 1.2042287588119507, "learning_rate": 9.559799571261885e-06, "loss": 0.7885, "step": 11859 }, { "epoch": 1.5859855576357313, "grad_norm": 1.1908502578735352, "learning_rate": 9.558357228729329e-06, "loss": 0.7059, "step": 11860 }, { "epoch": 1.5861192832308104, "grad_norm": 1.2264348268508911, "learning_rate": 9.556914895402391e-06, "loss": 0.7181, "step": 11861 }, { "epoch": 1.5862530088258893, "grad_norm": 1.1422291994094849, "learning_rate": 9.55547257131114e-06, "loss": 0.6575, "step": 11862 }, { "epoch": 1.5863867344209681, "grad_norm": 1.1978416442871094, "learning_rate": 9.554030256485638e-06, "loss": 0.6541, "step": 11863 }, { "epoch": 1.5865204600160472, "grad_norm": 1.255835771560669, "learning_rate": 9.552587950955946e-06, "loss": 0.7455, "step": 11864 }, { "epoch": 1.5866541856111258, "grad_norm": 1.1623185873031616, "learning_rate": 9.551145654752134e-06, "loss": 0.7129, "step": 11865 }, { "epoch": 1.5867879112062049, "grad_norm": 1.2972922325134277, "learning_rate": 9.549703367904259e-06, "loss": 0.7511, "step": 11866 }, { "epoch": 1.5869216368012837, "grad_norm": 1.134050965309143, "learning_rate": 9.548261090442386e-06, "loss": 0.6252, "step": 11867 }, { "epoch": 1.5870553623963626, "grad_norm": 1.1868950128555298, "learning_rate": 9.54681882239658e-06, "loss": 0.6764, "step": 11868 }, { "epoch": 1.5871890879914416, "grad_norm": 1.1928505897521973, "learning_rate": 9.545376563796898e-06, "loss": 0.6473, "step": 11869 }, { "epoch": 1.5873228135865205, "grad_norm": 1.157888650894165, "learning_rate": 9.54393431467341e-06, "loss": 0.5765, "step": 11870 }, { "epoch": 1.5874565391815993, "grad_norm": 1.1943787336349487, "learning_rate": 9.542492075056178e-06, "loss": 0.6869, "step": 11871 }, { "epoch": 1.5875902647766784, "grad_norm": 1.3316676616668701, "learning_rate": 9.541049844975255e-06, "loss": 0.7536, "step": 11872 }, { "epoch": 1.587723990371757, "grad_norm": 1.2263637781143188, "learning_rate": 9.53960762446071e-06, "loss": 0.7373, "step": 11873 }, { "epoch": 1.5878577159668361, "grad_norm": 1.2653452157974243, "learning_rate": 9.538165413542607e-06, "loss": 0.7069, "step": 11874 }, { "epoch": 1.587991441561915, "grad_norm": 1.1344497203826904, "learning_rate": 9.536723212251e-06, "loss": 0.6453, "step": 11875 }, { "epoch": 1.5881251671569938, "grad_norm": 1.2055330276489258, "learning_rate": 9.535281020615957e-06, "loss": 0.6397, "step": 11876 }, { "epoch": 1.5882588927520729, "grad_norm": 1.3709781169891357, "learning_rate": 9.533838838667534e-06, "loss": 0.7809, "step": 11877 }, { "epoch": 1.5883926183471515, "grad_norm": 1.2785402536392212, "learning_rate": 9.532396666435797e-06, "loss": 0.7613, "step": 11878 }, { "epoch": 1.5885263439422306, "grad_norm": 1.275596022605896, "learning_rate": 9.530954503950802e-06, "loss": 0.743, "step": 11879 }, { "epoch": 1.5886600695373094, "grad_norm": 1.3141602277755737, "learning_rate": 9.529512351242612e-06, "loss": 0.7882, "step": 11880 }, { "epoch": 1.5887937951323883, "grad_norm": 1.1664420366287231, "learning_rate": 9.528070208341286e-06, "loss": 0.7252, "step": 11881 }, { "epoch": 1.5889275207274673, "grad_norm": 1.2321202754974365, "learning_rate": 9.52662807527689e-06, "loss": 0.7462, "step": 11882 }, { "epoch": 1.589061246322546, "grad_norm": 1.1685811281204224, "learning_rate": 9.525185952079472e-06, "loss": 0.7213, "step": 11883 }, { "epoch": 1.589194971917625, "grad_norm": 1.1045488119125366, "learning_rate": 9.523743838779103e-06, "loss": 0.6913, "step": 11884 }, { "epoch": 1.589328697512704, "grad_norm": 1.1653701066970825, "learning_rate": 9.522301735405834e-06, "loss": 0.6423, "step": 11885 }, { "epoch": 1.5894624231077827, "grad_norm": 1.221944808959961, "learning_rate": 9.520859641989729e-06, "loss": 0.6666, "step": 11886 }, { "epoch": 1.5895961487028618, "grad_norm": 1.2114801406860352, "learning_rate": 9.519417558560851e-06, "loss": 0.7222, "step": 11887 }, { "epoch": 1.5897298742979407, "grad_norm": 1.199378490447998, "learning_rate": 9.517975485149248e-06, "loss": 0.6986, "step": 11888 }, { "epoch": 1.5898635998930195, "grad_norm": 1.275043249130249, "learning_rate": 9.516533421784989e-06, "loss": 0.7565, "step": 11889 }, { "epoch": 1.5899973254880986, "grad_norm": 1.1312413215637207, "learning_rate": 9.51509136849813e-06, "loss": 0.6531, "step": 11890 }, { "epoch": 1.5901310510831772, "grad_norm": 1.142961859703064, "learning_rate": 9.513649325318722e-06, "loss": 0.6163, "step": 11891 }, { "epoch": 1.5902647766782563, "grad_norm": 1.215346097946167, "learning_rate": 9.512207292276829e-06, "loss": 0.6958, "step": 11892 }, { "epoch": 1.5903985022733351, "grad_norm": 1.3247084617614746, "learning_rate": 9.51076526940251e-06, "loss": 0.7114, "step": 11893 }, { "epoch": 1.590532227868414, "grad_norm": 1.229372262954712, "learning_rate": 9.50932325672582e-06, "loss": 0.7134, "step": 11894 }, { "epoch": 1.590665953463493, "grad_norm": 1.0219619274139404, "learning_rate": 9.507881254276821e-06, "loss": 0.6383, "step": 11895 }, { "epoch": 1.5907996790585717, "grad_norm": 1.3593305349349976, "learning_rate": 9.506439262085561e-06, "loss": 0.8406, "step": 11896 }, { "epoch": 1.5909334046536507, "grad_norm": 1.1417534351348877, "learning_rate": 9.504997280182105e-06, "loss": 0.7048, "step": 11897 }, { "epoch": 1.5910671302487296, "grad_norm": 1.275374412536621, "learning_rate": 9.503555308596505e-06, "loss": 0.6521, "step": 11898 }, { "epoch": 1.5912008558438084, "grad_norm": 1.296343445777893, "learning_rate": 9.502113347358824e-06, "loss": 0.7224, "step": 11899 }, { "epoch": 1.5913345814388875, "grad_norm": 1.2174605131149292, "learning_rate": 9.50067139649911e-06, "loss": 0.6958, "step": 11900 }, { "epoch": 1.5914683070339664, "grad_norm": 1.2400259971618652, "learning_rate": 9.499229456047423e-06, "loss": 0.7636, "step": 11901 }, { "epoch": 1.5916020326290452, "grad_norm": 1.2371495962142944, "learning_rate": 9.49778752603382e-06, "loss": 0.7113, "step": 11902 }, { "epoch": 1.591735758224124, "grad_norm": 1.1945858001708984, "learning_rate": 9.496345606488357e-06, "loss": 0.7089, "step": 11903 }, { "epoch": 1.591869483819203, "grad_norm": 1.2199798822402954, "learning_rate": 9.494903697441084e-06, "loss": 0.7043, "step": 11904 }, { "epoch": 1.592003209414282, "grad_norm": 1.2072488069534302, "learning_rate": 9.493461798922062e-06, "loss": 0.7226, "step": 11905 }, { "epoch": 1.5921369350093608, "grad_norm": 1.2400561571121216, "learning_rate": 9.492019910961345e-06, "loss": 0.655, "step": 11906 }, { "epoch": 1.5922706606044397, "grad_norm": 1.304604411125183, "learning_rate": 9.490578033588985e-06, "loss": 0.7283, "step": 11907 }, { "epoch": 1.5924043861995187, "grad_norm": 1.1369304656982422, "learning_rate": 9.489136166835042e-06, "loss": 0.7029, "step": 11908 }, { "epoch": 1.5925381117945974, "grad_norm": 1.1227937936782837, "learning_rate": 9.487694310729562e-06, "loss": 0.6797, "step": 11909 }, { "epoch": 1.5926718373896764, "grad_norm": 1.233720302581787, "learning_rate": 9.486252465302608e-06, "loss": 0.6856, "step": 11910 }, { "epoch": 1.5928055629847553, "grad_norm": 1.2412192821502686, "learning_rate": 9.484810630584227e-06, "loss": 0.6795, "step": 11911 }, { "epoch": 1.5929392885798341, "grad_norm": 1.2287447452545166, "learning_rate": 9.483368806604477e-06, "loss": 0.659, "step": 11912 }, { "epoch": 1.5930730141749132, "grad_norm": 1.2471702098846436, "learning_rate": 9.481926993393408e-06, "loss": 0.7103, "step": 11913 }, { "epoch": 1.5932067397699918, "grad_norm": 1.3074742555618286, "learning_rate": 9.480485190981073e-06, "loss": 0.6966, "step": 11914 }, { "epoch": 1.593340465365071, "grad_norm": 1.0657333135604858, "learning_rate": 9.479043399397534e-06, "loss": 0.6447, "step": 11915 }, { "epoch": 1.5934741909601498, "grad_norm": 1.2848750352859497, "learning_rate": 9.477601618672834e-06, "loss": 0.7556, "step": 11916 }, { "epoch": 1.5936079165552286, "grad_norm": 1.1942635774612427, "learning_rate": 9.476159848837026e-06, "loss": 0.7361, "step": 11917 }, { "epoch": 1.5937416421503077, "grad_norm": 1.3195042610168457, "learning_rate": 9.474718089920167e-06, "loss": 0.6931, "step": 11918 }, { "epoch": 1.5938753677453865, "grad_norm": 1.2585618495941162, "learning_rate": 9.473276341952307e-06, "loss": 0.7, "step": 11919 }, { "epoch": 1.5940090933404654, "grad_norm": 1.2516577243804932, "learning_rate": 9.471834604963495e-06, "loss": 0.7288, "step": 11920 }, { "epoch": 1.5941428189355442, "grad_norm": 1.2190533876419067, "learning_rate": 9.470392878983789e-06, "loss": 0.6964, "step": 11921 }, { "epoch": 1.594276544530623, "grad_norm": 1.350277304649353, "learning_rate": 9.46895116404323e-06, "loss": 0.7509, "step": 11922 }, { "epoch": 1.5944102701257021, "grad_norm": 1.1902166604995728, "learning_rate": 9.467509460171884e-06, "loss": 0.6207, "step": 11923 }, { "epoch": 1.594543995720781, "grad_norm": 1.3256665468215942, "learning_rate": 9.466067767399789e-06, "loss": 0.7544, "step": 11924 }, { "epoch": 1.5946777213158598, "grad_norm": 1.157500982284546, "learning_rate": 9.464626085757002e-06, "loss": 0.6301, "step": 11925 }, { "epoch": 1.594811446910939, "grad_norm": 1.1786853075027466, "learning_rate": 9.463184415273572e-06, "loss": 0.6678, "step": 11926 }, { "epoch": 1.5949451725060175, "grad_norm": 1.249001145362854, "learning_rate": 9.461742755979551e-06, "loss": 0.719, "step": 11927 }, { "epoch": 1.5950788981010966, "grad_norm": 1.1630702018737793, "learning_rate": 9.460301107904982e-06, "loss": 0.7154, "step": 11928 }, { "epoch": 1.5952126236961754, "grad_norm": 1.4249745607376099, "learning_rate": 9.458859471079925e-06, "loss": 0.7844, "step": 11929 }, { "epoch": 1.5953463492912543, "grad_norm": 1.330048680305481, "learning_rate": 9.45741784553442e-06, "loss": 0.7218, "step": 11930 }, { "epoch": 1.5954800748863334, "grad_norm": 1.352522611618042, "learning_rate": 9.455976231298525e-06, "loss": 0.751, "step": 11931 }, { "epoch": 1.595613800481412, "grad_norm": 1.0899754762649536, "learning_rate": 9.454534628402284e-06, "loss": 0.6503, "step": 11932 }, { "epoch": 1.595747526076491, "grad_norm": 1.149268388748169, "learning_rate": 9.453093036875742e-06, "loss": 0.6418, "step": 11933 }, { "epoch": 1.59588125167157, "grad_norm": 1.0731477737426758, "learning_rate": 9.451651456748958e-06, "loss": 0.6217, "step": 11934 }, { "epoch": 1.5960149772666488, "grad_norm": 1.1634063720703125, "learning_rate": 9.450209888051976e-06, "loss": 0.6365, "step": 11935 }, { "epoch": 1.5961487028617278, "grad_norm": 1.3404580354690552, "learning_rate": 9.448768330814837e-06, "loss": 0.7318, "step": 11936 }, { "epoch": 1.5962824284568067, "grad_norm": 1.0767606496810913, "learning_rate": 9.447326785067596e-06, "loss": 0.635, "step": 11937 }, { "epoch": 1.5964161540518855, "grad_norm": 1.1781765222549438, "learning_rate": 9.445885250840301e-06, "loss": 0.711, "step": 11938 }, { "epoch": 1.5965498796469644, "grad_norm": 1.3161048889160156, "learning_rate": 9.444443728162998e-06, "loss": 0.7903, "step": 11939 }, { "epoch": 1.5966836052420432, "grad_norm": 1.3514540195465088, "learning_rate": 9.443002217065735e-06, "loss": 0.7769, "step": 11940 }, { "epoch": 1.5968173308371223, "grad_norm": 1.0919398069381714, "learning_rate": 9.441560717578552e-06, "loss": 0.6269, "step": 11941 }, { "epoch": 1.5969510564322011, "grad_norm": 1.2636088132858276, "learning_rate": 9.440119229731508e-06, "loss": 0.7168, "step": 11942 }, { "epoch": 1.59708478202728, "grad_norm": 1.208398699760437, "learning_rate": 9.438677753554642e-06, "loss": 0.7571, "step": 11943 }, { "epoch": 1.597218507622359, "grad_norm": 1.0871726274490356, "learning_rate": 9.437236289077998e-06, "loss": 0.5952, "step": 11944 }, { "epoch": 1.5973522332174377, "grad_norm": 1.3624670505523682, "learning_rate": 9.435794836331627e-06, "loss": 0.7292, "step": 11945 }, { "epoch": 1.5974859588125168, "grad_norm": 1.1158134937286377, "learning_rate": 9.43435339534557e-06, "loss": 0.7283, "step": 11946 }, { "epoch": 1.5976196844075956, "grad_norm": 1.343318223953247, "learning_rate": 9.432911966149879e-06, "loss": 0.6923, "step": 11947 }, { "epoch": 1.5977534100026745, "grad_norm": 1.3452696800231934, "learning_rate": 9.431470548774597e-06, "loss": 0.663, "step": 11948 }, { "epoch": 1.5978871355977535, "grad_norm": 1.219268798828125, "learning_rate": 9.43002914324976e-06, "loss": 0.6624, "step": 11949 }, { "epoch": 1.5980208611928322, "grad_norm": 1.0390870571136475, "learning_rate": 9.428587749605426e-06, "loss": 0.6402, "step": 11950 }, { "epoch": 1.5981545867879112, "grad_norm": 1.106889009475708, "learning_rate": 9.427146367871634e-06, "loss": 0.6457, "step": 11951 }, { "epoch": 1.59828831238299, "grad_norm": 1.2984715700149536, "learning_rate": 9.425704998078422e-06, "loss": 0.685, "step": 11952 }, { "epoch": 1.598422037978069, "grad_norm": 1.1975985765457153, "learning_rate": 9.424263640255846e-06, "loss": 0.6738, "step": 11953 }, { "epoch": 1.598555763573148, "grad_norm": 1.2340142726898193, "learning_rate": 9.422822294433939e-06, "loss": 0.7368, "step": 11954 }, { "epoch": 1.5986894891682268, "grad_norm": 1.328934669494629, "learning_rate": 9.421380960642754e-06, "loss": 0.6825, "step": 11955 }, { "epoch": 1.5988232147633057, "grad_norm": 1.3042895793914795, "learning_rate": 9.419939638912325e-06, "loss": 0.6861, "step": 11956 }, { "epoch": 1.5989569403583848, "grad_norm": 1.3110930919647217, "learning_rate": 9.4184983292727e-06, "loss": 0.7743, "step": 11957 }, { "epoch": 1.5990906659534634, "grad_norm": 1.3622463941574097, "learning_rate": 9.41705703175392e-06, "loss": 0.8152, "step": 11958 }, { "epoch": 1.5992243915485425, "grad_norm": 1.3875211477279663, "learning_rate": 9.415615746386034e-06, "loss": 0.7853, "step": 11959 }, { "epoch": 1.5993581171436213, "grad_norm": 1.251090168952942, "learning_rate": 9.41417447319907e-06, "loss": 0.6437, "step": 11960 }, { "epoch": 1.5994918427387002, "grad_norm": 1.3227801322937012, "learning_rate": 9.412733212223086e-06, "loss": 0.7195, "step": 11961 }, { "epoch": 1.5996255683337792, "grad_norm": 1.2727054357528687, "learning_rate": 9.41129196348811e-06, "loss": 0.718, "step": 11962 }, { "epoch": 1.5997592939288579, "grad_norm": 1.2788254022598267, "learning_rate": 9.409850727024194e-06, "loss": 0.7137, "step": 11963 }, { "epoch": 1.599893019523937, "grad_norm": 1.2815759181976318, "learning_rate": 9.408409502861374e-06, "loss": 0.8015, "step": 11964 }, { "epoch": 1.6000267451190158, "grad_norm": 1.2300723791122437, "learning_rate": 9.40696829102969e-06, "loss": 0.6826, "step": 11965 }, { "epoch": 1.6001604707140946, "grad_norm": 1.2241438627243042, "learning_rate": 9.405527091559187e-06, "loss": 0.7005, "step": 11966 }, { "epoch": 1.6002941963091737, "grad_norm": 1.3267165422439575, "learning_rate": 9.404085904479903e-06, "loss": 0.707, "step": 11967 }, { "epoch": 1.6004279219042523, "grad_norm": 1.2079559564590454, "learning_rate": 9.402644729821876e-06, "loss": 0.6803, "step": 11968 }, { "epoch": 1.6005616474993314, "grad_norm": 1.1369354724884033, "learning_rate": 9.40120356761515e-06, "loss": 0.7341, "step": 11969 }, { "epoch": 1.6006953730944102, "grad_norm": 1.1219432353973389, "learning_rate": 9.39976241788976e-06, "loss": 0.682, "step": 11970 }, { "epoch": 1.600829098689489, "grad_norm": 1.0331940650939941, "learning_rate": 9.398321280675748e-06, "loss": 0.648, "step": 11971 }, { "epoch": 1.6009628242845682, "grad_norm": 1.2097536325454712, "learning_rate": 9.396880156003157e-06, "loss": 0.7325, "step": 11972 }, { "epoch": 1.601096549879647, "grad_norm": 1.1824707984924316, "learning_rate": 9.395439043902017e-06, "loss": 0.6673, "step": 11973 }, { "epoch": 1.6012302754747259, "grad_norm": 1.1807588338851929, "learning_rate": 9.393997944402378e-06, "loss": 0.7126, "step": 11974 }, { "epoch": 1.601364001069805, "grad_norm": 1.2512067556381226, "learning_rate": 9.392556857534267e-06, "loss": 0.7189, "step": 11975 }, { "epoch": 1.6014977266648835, "grad_norm": 1.324547529220581, "learning_rate": 9.39111578332773e-06, "loss": 0.8405, "step": 11976 }, { "epoch": 1.6016314522599626, "grad_norm": 1.257055401802063, "learning_rate": 9.389674721812799e-06, "loss": 0.7111, "step": 11977 }, { "epoch": 1.6017651778550415, "grad_norm": 1.2335609197616577, "learning_rate": 9.388233673019513e-06, "loss": 0.7436, "step": 11978 }, { "epoch": 1.6018989034501203, "grad_norm": 1.1616965532302856, "learning_rate": 9.386792636977915e-06, "loss": 0.6718, "step": 11979 }, { "epoch": 1.6020326290451994, "grad_norm": 1.252840518951416, "learning_rate": 9.38535161371804e-06, "loss": 0.6704, "step": 11980 }, { "epoch": 1.602166354640278, "grad_norm": 1.2710531949996948, "learning_rate": 9.383910603269915e-06, "loss": 0.7209, "step": 11981 }, { "epoch": 1.602300080235357, "grad_norm": 1.2172224521636963, "learning_rate": 9.38246960566359e-06, "loss": 0.6856, "step": 11982 }, { "epoch": 1.602433805830436, "grad_norm": 1.2084267139434814, "learning_rate": 9.38102862092909e-06, "loss": 0.6898, "step": 11983 }, { "epoch": 1.6025675314255148, "grad_norm": 1.258206844329834, "learning_rate": 9.379587649096457e-06, "loss": 0.6996, "step": 11984 }, { "epoch": 1.6027012570205939, "grad_norm": 1.1563105583190918, "learning_rate": 9.37814669019573e-06, "loss": 0.6485, "step": 11985 }, { "epoch": 1.6028349826156725, "grad_norm": 1.1429296731948853, "learning_rate": 9.376705744256936e-06, "loss": 0.6516, "step": 11986 }, { "epoch": 1.6029687082107515, "grad_norm": 1.2605894804000854, "learning_rate": 9.375264811310117e-06, "loss": 0.6794, "step": 11987 }, { "epoch": 1.6031024338058304, "grad_norm": 1.1889954805374146, "learning_rate": 9.373823891385305e-06, "loss": 0.6498, "step": 11988 }, { "epoch": 1.6032361594009092, "grad_norm": 1.3865443468093872, "learning_rate": 9.372382984512533e-06, "loss": 0.7759, "step": 11989 }, { "epoch": 1.6033698849959883, "grad_norm": 1.1048980951309204, "learning_rate": 9.370942090721838e-06, "loss": 0.661, "step": 11990 }, { "epoch": 1.6035036105910672, "grad_norm": 1.3671259880065918, "learning_rate": 9.369501210043251e-06, "loss": 0.7569, "step": 11991 }, { "epoch": 1.603637336186146, "grad_norm": 1.2366061210632324, "learning_rate": 9.368060342506813e-06, "loss": 0.6254, "step": 11992 }, { "epoch": 1.603771061781225, "grad_norm": 1.3501685857772827, "learning_rate": 9.366619488142553e-06, "loss": 0.8084, "step": 11993 }, { "epoch": 1.6039047873763037, "grad_norm": 1.1714917421340942, "learning_rate": 9.365178646980497e-06, "loss": 0.6292, "step": 11994 }, { "epoch": 1.6040385129713828, "grad_norm": 1.269734263420105, "learning_rate": 9.36373781905069e-06, "loss": 0.7336, "step": 11995 }, { "epoch": 1.6041722385664616, "grad_norm": 1.2444238662719727, "learning_rate": 9.362297004383157e-06, "loss": 0.6342, "step": 11996 }, { "epoch": 1.6043059641615405, "grad_norm": 1.1795012950897217, "learning_rate": 9.36085620300793e-06, "loss": 0.7266, "step": 11997 }, { "epoch": 1.6044396897566195, "grad_norm": 1.2577054500579834, "learning_rate": 9.359415414955049e-06, "loss": 0.6763, "step": 11998 }, { "epoch": 1.6045734153516982, "grad_norm": 1.2974488735198975, "learning_rate": 9.357974640254537e-06, "loss": 0.7404, "step": 11999 }, { "epoch": 1.6047071409467772, "grad_norm": 1.3762050867080688, "learning_rate": 9.356533878936434e-06, "loss": 0.7315, "step": 12000 }, { "epoch": 1.604840866541856, "grad_norm": 1.1575360298156738, "learning_rate": 9.355093131030764e-06, "loss": 0.6351, "step": 12001 }, { "epoch": 1.604974592136935, "grad_norm": 1.2466273307800293, "learning_rate": 9.353652396567558e-06, "loss": 0.7248, "step": 12002 }, { "epoch": 1.605108317732014, "grad_norm": 1.2401421070098877, "learning_rate": 9.352211675576852e-06, "loss": 0.7093, "step": 12003 }, { "epoch": 1.6052420433270929, "grad_norm": 1.2683452367782593, "learning_rate": 9.350770968088675e-06, "loss": 0.6597, "step": 12004 }, { "epoch": 1.6053757689221717, "grad_norm": 1.2470391988754272, "learning_rate": 9.349330274133051e-06, "loss": 0.7096, "step": 12005 }, { "epoch": 1.6055094945172506, "grad_norm": 1.4760842323303223, "learning_rate": 9.34788959374002e-06, "loss": 0.7248, "step": 12006 }, { "epoch": 1.6056432201123294, "grad_norm": 1.3911807537078857, "learning_rate": 9.346448926939603e-06, "loss": 0.8426, "step": 12007 }, { "epoch": 1.6057769457074085, "grad_norm": 1.2453354597091675, "learning_rate": 9.345008273761836e-06, "loss": 0.6926, "step": 12008 }, { "epoch": 1.6059106713024873, "grad_norm": 1.3687275648117065, "learning_rate": 9.343567634236742e-06, "loss": 0.7061, "step": 12009 }, { "epoch": 1.6060443968975662, "grad_norm": 1.2641733884811401, "learning_rate": 9.342127008394351e-06, "loss": 0.7058, "step": 12010 }, { "epoch": 1.6061781224926452, "grad_norm": 1.227340579032898, "learning_rate": 9.340686396264698e-06, "loss": 0.7001, "step": 12011 }, { "epoch": 1.6063118480877239, "grad_norm": 1.179430603981018, "learning_rate": 9.339245797877804e-06, "loss": 0.6977, "step": 12012 }, { "epoch": 1.606445573682803, "grad_norm": 1.2974070310592651, "learning_rate": 9.337805213263698e-06, "loss": 0.7306, "step": 12013 }, { "epoch": 1.6065792992778818, "grad_norm": 1.2829806804656982, "learning_rate": 9.33636464245241e-06, "loss": 0.7833, "step": 12014 }, { "epoch": 1.6067130248729606, "grad_norm": 1.2561646699905396, "learning_rate": 9.334924085473964e-06, "loss": 0.6155, "step": 12015 }, { "epoch": 1.6068467504680397, "grad_norm": 1.3362137079238892, "learning_rate": 9.333483542358391e-06, "loss": 0.7378, "step": 12016 }, { "epoch": 1.6069804760631183, "grad_norm": 1.2331461906433105, "learning_rate": 9.332043013135717e-06, "loss": 0.6869, "step": 12017 }, { "epoch": 1.6071142016581974, "grad_norm": 1.129315733909607, "learning_rate": 9.330602497835962e-06, "loss": 0.7059, "step": 12018 }, { "epoch": 1.6072479272532763, "grad_norm": 1.179667353630066, "learning_rate": 9.329161996489162e-06, "loss": 0.6443, "step": 12019 }, { "epoch": 1.607381652848355, "grad_norm": 1.4535558223724365, "learning_rate": 9.32772150912534e-06, "loss": 0.7746, "step": 12020 }, { "epoch": 1.6075153784434342, "grad_norm": 1.175001621246338, "learning_rate": 9.326281035774513e-06, "loss": 0.7108, "step": 12021 }, { "epoch": 1.607649104038513, "grad_norm": 1.262392282485962, "learning_rate": 9.324840576466718e-06, "loss": 0.6805, "step": 12022 }, { "epoch": 1.6077828296335919, "grad_norm": 1.1330912113189697, "learning_rate": 9.323400131231971e-06, "loss": 0.6432, "step": 12023 }, { "epoch": 1.6079165552286707, "grad_norm": 1.2967466115951538, "learning_rate": 9.321959700100306e-06, "loss": 0.6597, "step": 12024 }, { "epoch": 1.6080502808237496, "grad_norm": 1.2467228174209595, "learning_rate": 9.320519283101742e-06, "loss": 0.7339, "step": 12025 }, { "epoch": 1.6081840064188286, "grad_norm": 1.2563709020614624, "learning_rate": 9.319078880266299e-06, "loss": 0.7231, "step": 12026 }, { "epoch": 1.6083177320139075, "grad_norm": 1.1622490882873535, "learning_rate": 9.31763849162401e-06, "loss": 0.6697, "step": 12027 }, { "epoch": 1.6084514576089863, "grad_norm": 1.245871901512146, "learning_rate": 9.316198117204891e-06, "loss": 0.7258, "step": 12028 }, { "epoch": 1.6085851832040654, "grad_norm": 1.226513147354126, "learning_rate": 9.314757757038966e-06, "loss": 0.7238, "step": 12029 }, { "epoch": 1.608718908799144, "grad_norm": 1.2418453693389893, "learning_rate": 9.313317411156265e-06, "loss": 0.6659, "step": 12030 }, { "epoch": 1.608852634394223, "grad_norm": 1.2755424976348877, "learning_rate": 9.311877079586799e-06, "loss": 0.7141, "step": 12031 }, { "epoch": 1.608986359989302, "grad_norm": 1.2642672061920166, "learning_rate": 9.310436762360603e-06, "loss": 0.7384, "step": 12032 }, { "epoch": 1.6091200855843808, "grad_norm": 1.2287489175796509, "learning_rate": 9.308996459507692e-06, "loss": 0.6932, "step": 12033 }, { "epoch": 1.6092538111794599, "grad_norm": 1.1590118408203125, "learning_rate": 9.307556171058085e-06, "loss": 0.6413, "step": 12034 }, { "epoch": 1.6093875367745385, "grad_norm": 1.1187297105789185, "learning_rate": 9.306115897041808e-06, "loss": 0.649, "step": 12035 }, { "epoch": 1.6095212623696176, "grad_norm": 1.058962106704712, "learning_rate": 9.304675637488884e-06, "loss": 0.6241, "step": 12036 }, { "epoch": 1.6096549879646964, "grad_norm": 1.2891771793365479, "learning_rate": 9.303235392429328e-06, "loss": 0.7907, "step": 12037 }, { "epoch": 1.6097887135597753, "grad_norm": 1.1527633666992188, "learning_rate": 9.301795161893166e-06, "loss": 0.6882, "step": 12038 }, { "epoch": 1.6099224391548543, "grad_norm": 1.1895396709442139, "learning_rate": 9.30035494591041e-06, "loss": 0.7568, "step": 12039 }, { "epoch": 1.6100561647499332, "grad_norm": 1.1801073551177979, "learning_rate": 9.298914744511093e-06, "loss": 0.7377, "step": 12040 }, { "epoch": 1.610189890345012, "grad_norm": 1.1962250471115112, "learning_rate": 9.297474557725225e-06, "loss": 0.67, "step": 12041 }, { "epoch": 1.6103236159400909, "grad_norm": 1.3731606006622314, "learning_rate": 9.296034385582823e-06, "loss": 0.7016, "step": 12042 }, { "epoch": 1.6104573415351697, "grad_norm": 1.2359511852264404, "learning_rate": 9.294594228113917e-06, "loss": 0.7167, "step": 12043 }, { "epoch": 1.6105910671302488, "grad_norm": 1.1612904071807861, "learning_rate": 9.293154085348519e-06, "loss": 0.7258, "step": 12044 }, { "epoch": 1.6107247927253276, "grad_norm": 1.1835960149765015, "learning_rate": 9.291713957316642e-06, "loss": 0.652, "step": 12045 }, { "epoch": 1.6108585183204065, "grad_norm": 1.3312981128692627, "learning_rate": 9.290273844048316e-06, "loss": 0.7622, "step": 12046 }, { "epoch": 1.6109922439154856, "grad_norm": 1.198354959487915, "learning_rate": 9.288833745573547e-06, "loss": 0.6502, "step": 12047 }, { "epoch": 1.6111259695105642, "grad_norm": 1.1594412326812744, "learning_rate": 9.287393661922361e-06, "loss": 0.7136, "step": 12048 }, { "epoch": 1.6112596951056433, "grad_norm": 1.2196731567382812, "learning_rate": 9.285953593124774e-06, "loss": 0.6986, "step": 12049 }, { "epoch": 1.6113934207007221, "grad_norm": 1.0901768207550049, "learning_rate": 9.284513539210798e-06, "loss": 0.692, "step": 12050 }, { "epoch": 1.611527146295801, "grad_norm": 1.1655796766281128, "learning_rate": 9.283073500210456e-06, "loss": 0.592, "step": 12051 }, { "epoch": 1.61166087189088, "grad_norm": 1.0902522802352905, "learning_rate": 9.28163347615376e-06, "loss": 0.641, "step": 12052 }, { "epoch": 1.6117945974859587, "grad_norm": 1.2153087854385376, "learning_rate": 9.280193467070722e-06, "loss": 0.6631, "step": 12053 }, { "epoch": 1.6119283230810377, "grad_norm": 1.1211543083190918, "learning_rate": 9.278753472991366e-06, "loss": 0.6138, "step": 12054 }, { "epoch": 1.6120620486761166, "grad_norm": 1.160091757774353, "learning_rate": 9.2773134939457e-06, "loss": 0.665, "step": 12055 }, { "epoch": 1.6121957742711954, "grad_norm": 1.2990282773971558, "learning_rate": 9.275873529963751e-06, "loss": 0.6839, "step": 12056 }, { "epoch": 1.6123294998662745, "grad_norm": 1.2587974071502686, "learning_rate": 9.274433581075521e-06, "loss": 0.6482, "step": 12057 }, { "epoch": 1.6124632254613533, "grad_norm": 1.3218412399291992, "learning_rate": 9.272993647311027e-06, "loss": 0.7529, "step": 12058 }, { "epoch": 1.6125969510564322, "grad_norm": 1.2248531579971313, "learning_rate": 9.271553728700287e-06, "loss": 0.7153, "step": 12059 }, { "epoch": 1.6127306766515113, "grad_norm": 1.153381109237671, "learning_rate": 9.270113825273311e-06, "loss": 0.6549, "step": 12060 }, { "epoch": 1.61286440224659, "grad_norm": 1.3794678449630737, "learning_rate": 9.268673937060113e-06, "loss": 0.7035, "step": 12061 }, { "epoch": 1.612998127841669, "grad_norm": 1.1833741664886475, "learning_rate": 9.26723406409071e-06, "loss": 0.6875, "step": 12062 }, { "epoch": 1.6131318534367478, "grad_norm": 1.1151732206344604, "learning_rate": 9.265794206395108e-06, "loss": 0.6272, "step": 12063 }, { "epoch": 1.6132655790318267, "grad_norm": 1.2501327991485596, "learning_rate": 9.264354364003327e-06, "loss": 0.7366, "step": 12064 }, { "epoch": 1.6133993046269057, "grad_norm": 1.3475669622421265, "learning_rate": 9.262914536945377e-06, "loss": 0.6866, "step": 12065 }, { "epoch": 1.6135330302219844, "grad_norm": 1.276896357536316, "learning_rate": 9.261474725251261e-06, "loss": 0.6487, "step": 12066 }, { "epoch": 1.6136667558170634, "grad_norm": 1.254490852355957, "learning_rate": 9.260034928951002e-06, "loss": 0.8003, "step": 12067 }, { "epoch": 1.6138004814121423, "grad_norm": 1.4288378953933716, "learning_rate": 9.258595148074604e-06, "loss": 0.764, "step": 12068 }, { "epoch": 1.6139342070072211, "grad_norm": 1.2978192567825317, "learning_rate": 9.257155382652086e-06, "loss": 0.7145, "step": 12069 }, { "epoch": 1.6140679326023002, "grad_norm": 1.259850025177002, "learning_rate": 9.255715632713452e-06, "loss": 0.6853, "step": 12070 }, { "epoch": 1.6142016581973788, "grad_norm": 1.2559986114501953, "learning_rate": 9.254275898288709e-06, "loss": 0.6892, "step": 12071 }, { "epoch": 1.614335383792458, "grad_norm": 1.2673776149749756, "learning_rate": 9.252836179407876e-06, "loss": 0.7321, "step": 12072 }, { "epoch": 1.6144691093875367, "grad_norm": 1.0878359079360962, "learning_rate": 9.251396476100955e-06, "loss": 0.6052, "step": 12073 }, { "epoch": 1.6146028349826156, "grad_norm": 1.0732975006103516, "learning_rate": 9.249956788397956e-06, "loss": 0.6998, "step": 12074 }, { "epoch": 1.6147365605776947, "grad_norm": 1.3220690488815308, "learning_rate": 9.248517116328897e-06, "loss": 0.7021, "step": 12075 }, { "epoch": 1.6148702861727735, "grad_norm": 1.2409942150115967, "learning_rate": 9.247077459923773e-06, "loss": 0.7002, "step": 12076 }, { "epoch": 1.6150040117678524, "grad_norm": 1.3409161567687988, "learning_rate": 9.245637819212602e-06, "loss": 0.7866, "step": 12077 }, { "epoch": 1.6151377373629314, "grad_norm": 1.257031798362732, "learning_rate": 9.244198194225392e-06, "loss": 0.7429, "step": 12078 }, { "epoch": 1.61527146295801, "grad_norm": 1.286256194114685, "learning_rate": 9.24275858499214e-06, "loss": 0.7193, "step": 12079 }, { "epoch": 1.6154051885530891, "grad_norm": 1.266829252243042, "learning_rate": 9.241318991542865e-06, "loss": 0.7503, "step": 12080 }, { "epoch": 1.615538914148168, "grad_norm": 1.106519341468811, "learning_rate": 9.239879413907571e-06, "loss": 0.6455, "step": 12081 }, { "epoch": 1.6156726397432468, "grad_norm": 1.3861799240112305, "learning_rate": 9.23843985211626e-06, "loss": 0.7102, "step": 12082 }, { "epoch": 1.615806365338326, "grad_norm": 1.2288228273391724, "learning_rate": 9.237000306198944e-06, "loss": 0.707, "step": 12083 }, { "epoch": 1.6159400909334045, "grad_norm": 1.1513835191726685, "learning_rate": 9.235560776185623e-06, "loss": 0.636, "step": 12084 }, { "epoch": 1.6160738165284836, "grad_norm": 1.15359628200531, "learning_rate": 9.234121262106312e-06, "loss": 0.6712, "step": 12085 }, { "epoch": 1.6162075421235624, "grad_norm": 1.3144891262054443, "learning_rate": 9.232681763991006e-06, "loss": 0.6942, "step": 12086 }, { "epoch": 1.6163412677186413, "grad_norm": 1.0848164558410645, "learning_rate": 9.231242281869714e-06, "loss": 0.5859, "step": 12087 }, { "epoch": 1.6164749933137204, "grad_norm": 1.2962980270385742, "learning_rate": 9.229802815772444e-06, "loss": 0.7549, "step": 12088 }, { "epoch": 1.616608718908799, "grad_norm": 1.1979405879974365, "learning_rate": 9.228363365729198e-06, "loss": 0.7137, "step": 12089 }, { "epoch": 1.616742444503878, "grad_norm": 1.1655186414718628, "learning_rate": 9.226923931769973e-06, "loss": 0.6677, "step": 12090 }, { "epoch": 1.616876170098957, "grad_norm": 1.3142768144607544, "learning_rate": 9.225484513924786e-06, "loss": 0.6559, "step": 12091 }, { "epoch": 1.6170098956940357, "grad_norm": 1.4054341316223145, "learning_rate": 9.224045112223627e-06, "loss": 0.7982, "step": 12092 }, { "epoch": 1.6171436212891148, "grad_norm": 1.242814540863037, "learning_rate": 9.222605726696509e-06, "loss": 0.7677, "step": 12093 }, { "epoch": 1.6172773468841937, "grad_norm": 1.3603601455688477, "learning_rate": 9.22116635737343e-06, "loss": 0.7372, "step": 12094 }, { "epoch": 1.6174110724792725, "grad_norm": 1.1627155542373657, "learning_rate": 9.21972700428439e-06, "loss": 0.6837, "step": 12095 }, { "epoch": 1.6175447980743516, "grad_norm": 1.0614711046218872, "learning_rate": 9.2182876674594e-06, "loss": 0.651, "step": 12096 }, { "epoch": 1.6176785236694302, "grad_norm": 1.309293270111084, "learning_rate": 9.216848346928455e-06, "loss": 0.6873, "step": 12097 }, { "epoch": 1.6178122492645093, "grad_norm": 1.2187672853469849, "learning_rate": 9.215409042721553e-06, "loss": 0.6364, "step": 12098 }, { "epoch": 1.6179459748595881, "grad_norm": 1.1157554388046265, "learning_rate": 9.213969754868699e-06, "loss": 0.6541, "step": 12099 }, { "epoch": 1.618079700454667, "grad_norm": 1.1662760972976685, "learning_rate": 9.212530483399891e-06, "loss": 0.6874, "step": 12100 }, { "epoch": 1.618213426049746, "grad_norm": 1.288918137550354, "learning_rate": 9.211091228345137e-06, "loss": 0.7848, "step": 12101 }, { "epoch": 1.6183471516448247, "grad_norm": 1.3652713298797607, "learning_rate": 9.209651989734431e-06, "loss": 0.7237, "step": 12102 }, { "epoch": 1.6184808772399037, "grad_norm": 1.1923158168792725, "learning_rate": 9.20821276759777e-06, "loss": 0.6412, "step": 12103 }, { "epoch": 1.6186146028349826, "grad_norm": 1.1096692085266113, "learning_rate": 9.206773561965158e-06, "loss": 0.6046, "step": 12104 }, { "epoch": 1.6187483284300614, "grad_norm": 1.127875804901123, "learning_rate": 9.205334372866593e-06, "loss": 0.6234, "step": 12105 }, { "epoch": 1.6188820540251405, "grad_norm": 1.257360577583313, "learning_rate": 9.203895200332069e-06, "loss": 0.6862, "step": 12106 }, { "epoch": 1.6190157796202194, "grad_norm": 1.3931533098220825, "learning_rate": 9.20245604439159e-06, "loss": 0.7081, "step": 12107 }, { "epoch": 1.6191495052152982, "grad_norm": 1.0419423580169678, "learning_rate": 9.20101690507515e-06, "loss": 0.6241, "step": 12108 }, { "epoch": 1.619283230810377, "grad_norm": 1.205247402191162, "learning_rate": 9.199577782412752e-06, "loss": 0.7334, "step": 12109 }, { "epoch": 1.619416956405456, "grad_norm": 1.338371753692627, "learning_rate": 9.198138676434387e-06, "loss": 0.7165, "step": 12110 }, { "epoch": 1.619550682000535, "grad_norm": 1.276588797569275, "learning_rate": 9.196699587170053e-06, "loss": 0.7499, "step": 12111 }, { "epoch": 1.6196844075956138, "grad_norm": 1.4011569023132324, "learning_rate": 9.195260514649748e-06, "loss": 0.7614, "step": 12112 }, { "epoch": 1.6198181331906927, "grad_norm": 1.2182762622833252, "learning_rate": 9.19382145890347e-06, "loss": 0.6607, "step": 12113 }, { "epoch": 1.6199518587857717, "grad_norm": 1.2512187957763672, "learning_rate": 9.192382419961208e-06, "loss": 0.6295, "step": 12114 }, { "epoch": 1.6200855843808504, "grad_norm": 1.3180410861968994, "learning_rate": 9.190943397852966e-06, "loss": 0.7387, "step": 12115 }, { "epoch": 1.6202193099759294, "grad_norm": 1.247375249862671, "learning_rate": 9.18950439260873e-06, "loss": 0.6517, "step": 12116 }, { "epoch": 1.6203530355710083, "grad_norm": 1.2085763216018677, "learning_rate": 9.188065404258502e-06, "loss": 0.7197, "step": 12117 }, { "epoch": 1.6204867611660871, "grad_norm": 1.1321659088134766, "learning_rate": 9.186626432832275e-06, "loss": 0.6507, "step": 12118 }, { "epoch": 1.6206204867611662, "grad_norm": 1.339453935623169, "learning_rate": 9.185187478360037e-06, "loss": 0.6821, "step": 12119 }, { "epoch": 1.6207542123562448, "grad_norm": 1.2092570066452026, "learning_rate": 9.18374854087179e-06, "loss": 0.7445, "step": 12120 }, { "epoch": 1.620887937951324, "grad_norm": 1.3333697319030762, "learning_rate": 9.182309620397525e-06, "loss": 0.7565, "step": 12121 }, { "epoch": 1.6210216635464028, "grad_norm": 1.3010880947113037, "learning_rate": 9.18087071696723e-06, "loss": 0.6198, "step": 12122 }, { "epoch": 1.6211553891414816, "grad_norm": 1.3878114223480225, "learning_rate": 9.179431830610905e-06, "loss": 0.7412, "step": 12123 }, { "epoch": 1.6212891147365607, "grad_norm": 1.1799821853637695, "learning_rate": 9.177992961358533e-06, "loss": 0.6429, "step": 12124 }, { "epoch": 1.6214228403316395, "grad_norm": 1.3221498727798462, "learning_rate": 9.176554109240115e-06, "loss": 0.6978, "step": 12125 }, { "epoch": 1.6215565659267184, "grad_norm": 1.2941234111785889, "learning_rate": 9.175115274285639e-06, "loss": 0.7287, "step": 12126 }, { "epoch": 1.6216902915217972, "grad_norm": 1.2035632133483887, "learning_rate": 9.173676456525091e-06, "loss": 0.6771, "step": 12127 }, { "epoch": 1.621824017116876, "grad_norm": 1.2470849752426147, "learning_rate": 9.172237655988472e-06, "loss": 0.6868, "step": 12128 }, { "epoch": 1.6219577427119551, "grad_norm": 1.2868101596832275, "learning_rate": 9.170798872705767e-06, "loss": 0.6765, "step": 12129 }, { "epoch": 1.622091468307034, "grad_norm": 1.4334930181503296, "learning_rate": 9.169360106706962e-06, "loss": 0.8034, "step": 12130 }, { "epoch": 1.6222251939021128, "grad_norm": 1.3387612104415894, "learning_rate": 9.167921358022053e-06, "loss": 0.7017, "step": 12131 }, { "epoch": 1.622358919497192, "grad_norm": 1.3273720741271973, "learning_rate": 9.166482626681024e-06, "loss": 0.7781, "step": 12132 }, { "epoch": 1.6224926450922705, "grad_norm": 1.2694358825683594, "learning_rate": 9.165043912713873e-06, "loss": 0.6407, "step": 12133 }, { "epoch": 1.6226263706873496, "grad_norm": 1.1428979635238647, "learning_rate": 9.16360521615058e-06, "loss": 0.6553, "step": 12134 }, { "epoch": 1.6227600962824285, "grad_norm": 1.1809850931167603, "learning_rate": 9.162166537021134e-06, "loss": 0.6378, "step": 12135 }, { "epoch": 1.6228938218775073, "grad_norm": 1.2608532905578613, "learning_rate": 9.16072787535553e-06, "loss": 0.6299, "step": 12136 }, { "epoch": 1.6230275474725864, "grad_norm": 1.1949703693389893, "learning_rate": 9.159289231183745e-06, "loss": 0.6872, "step": 12137 }, { "epoch": 1.623161273067665, "grad_norm": 1.221248984336853, "learning_rate": 9.15785060453577e-06, "loss": 0.6902, "step": 12138 }, { "epoch": 1.623294998662744, "grad_norm": 1.3226178884506226, "learning_rate": 9.1564119954416e-06, "loss": 0.6719, "step": 12139 }, { "epoch": 1.623428724257823, "grad_norm": 1.3858745098114014, "learning_rate": 9.154973403931207e-06, "loss": 0.6715, "step": 12140 }, { "epoch": 1.6235624498529018, "grad_norm": 1.1939282417297363, "learning_rate": 9.153534830034591e-06, "loss": 0.658, "step": 12141 }, { "epoch": 1.6236961754479808, "grad_norm": 1.0602447986602783, "learning_rate": 9.152096273781732e-06, "loss": 0.6746, "step": 12142 }, { "epoch": 1.6238299010430597, "grad_norm": 1.2562229633331299, "learning_rate": 9.15065773520261e-06, "loss": 0.6873, "step": 12143 }, { "epoch": 1.6239636266381385, "grad_norm": 1.2610026597976685, "learning_rate": 9.149219214327217e-06, "loss": 0.6739, "step": 12144 }, { "epoch": 1.6240973522332174, "grad_norm": 1.2135074138641357, "learning_rate": 9.147780711185538e-06, "loss": 0.6618, "step": 12145 }, { "epoch": 1.6242310778282962, "grad_norm": 1.1034901142120361, "learning_rate": 9.14634222580755e-06, "loss": 0.6789, "step": 12146 }, { "epoch": 1.6243648034233753, "grad_norm": 1.2972038984298706, "learning_rate": 9.144903758223245e-06, "loss": 0.6819, "step": 12147 }, { "epoch": 1.6244985290184542, "grad_norm": 1.1745084524154663, "learning_rate": 9.143465308462598e-06, "loss": 0.6281, "step": 12148 }, { "epoch": 1.624632254613533, "grad_norm": 1.3178049325942993, "learning_rate": 9.142026876555602e-06, "loss": 0.7848, "step": 12149 }, { "epoch": 1.624765980208612, "grad_norm": 1.239805817604065, "learning_rate": 9.140588462532233e-06, "loss": 0.7239, "step": 12150 }, { "epoch": 1.6248997058036907, "grad_norm": 1.2920856475830078, "learning_rate": 9.139150066422474e-06, "loss": 0.7369, "step": 12151 }, { "epoch": 1.6250334313987698, "grad_norm": 1.1460797786712646, "learning_rate": 9.137711688256312e-06, "loss": 0.727, "step": 12152 }, { "epoch": 1.6251671569938486, "grad_norm": 1.0860310792922974, "learning_rate": 9.13627332806372e-06, "loss": 0.6365, "step": 12153 }, { "epoch": 1.6253008825889275, "grad_norm": 1.1594702005386353, "learning_rate": 9.134834985874687e-06, "loss": 0.6601, "step": 12154 }, { "epoch": 1.6254346081840065, "grad_norm": 1.1805531978607178, "learning_rate": 9.133396661719193e-06, "loss": 0.692, "step": 12155 }, { "epoch": 1.6255683337790852, "grad_norm": 1.1558222770690918, "learning_rate": 9.13195835562721e-06, "loss": 0.6381, "step": 12156 }, { "epoch": 1.6257020593741642, "grad_norm": 1.2628341913223267, "learning_rate": 9.130520067628728e-06, "loss": 0.7571, "step": 12157 }, { "epoch": 1.625835784969243, "grad_norm": 1.243841528892517, "learning_rate": 9.129081797753724e-06, "loss": 0.7167, "step": 12158 }, { "epoch": 1.625969510564322, "grad_norm": 1.1264487504959106, "learning_rate": 9.127643546032174e-06, "loss": 0.686, "step": 12159 }, { "epoch": 1.626103236159401, "grad_norm": 1.3356196880340576, "learning_rate": 9.126205312494062e-06, "loss": 0.7512, "step": 12160 }, { "epoch": 1.6262369617544798, "grad_norm": 1.3498576879501343, "learning_rate": 9.124767097169362e-06, "loss": 0.7969, "step": 12161 }, { "epoch": 1.6263706873495587, "grad_norm": 1.1330918073654175, "learning_rate": 9.123328900088058e-06, "loss": 0.7067, "step": 12162 }, { "epoch": 1.6265044129446378, "grad_norm": 1.2193334102630615, "learning_rate": 9.121890721280121e-06, "loss": 0.6979, "step": 12163 }, { "epoch": 1.6266381385397164, "grad_norm": 1.4225854873657227, "learning_rate": 9.120452560775532e-06, "loss": 0.7297, "step": 12164 }, { "epoch": 1.6267718641347955, "grad_norm": 1.1815792322158813, "learning_rate": 9.119014418604269e-06, "loss": 0.7003, "step": 12165 }, { "epoch": 1.6269055897298743, "grad_norm": 1.179482340812683, "learning_rate": 9.117576294796307e-06, "loss": 0.6451, "step": 12166 }, { "epoch": 1.6270393153249532, "grad_norm": 1.2368333339691162, "learning_rate": 9.11613818938162e-06, "loss": 0.7428, "step": 12167 }, { "epoch": 1.6271730409200322, "grad_norm": 1.2230229377746582, "learning_rate": 9.11470010239019e-06, "loss": 0.7017, "step": 12168 }, { "epoch": 1.6273067665151109, "grad_norm": 1.2191781997680664, "learning_rate": 9.113262033851988e-06, "loss": 0.7569, "step": 12169 }, { "epoch": 1.62744049211019, "grad_norm": 1.251646637916565, "learning_rate": 9.11182398379699e-06, "loss": 0.801, "step": 12170 }, { "epoch": 1.6275742177052688, "grad_norm": 1.100760579109192, "learning_rate": 9.110385952255174e-06, "loss": 0.7004, "step": 12171 }, { "epoch": 1.6277079433003476, "grad_norm": 1.2002395391464233, "learning_rate": 9.108947939256508e-06, "loss": 0.7105, "step": 12172 }, { "epoch": 1.6278416688954267, "grad_norm": 1.2985812425613403, "learning_rate": 9.107509944830972e-06, "loss": 0.7223, "step": 12173 }, { "epoch": 1.6279753944905053, "grad_norm": 1.27374267578125, "learning_rate": 9.106071969008537e-06, "loss": 0.7727, "step": 12174 }, { "epoch": 1.6281091200855844, "grad_norm": 1.1897945404052734, "learning_rate": 9.104634011819173e-06, "loss": 0.6522, "step": 12175 }, { "epoch": 1.6282428456806632, "grad_norm": 1.282271385192871, "learning_rate": 9.10319607329286e-06, "loss": 0.737, "step": 12176 }, { "epoch": 1.628376571275742, "grad_norm": 1.1652004718780518, "learning_rate": 9.101758153459564e-06, "loss": 0.6457, "step": 12177 }, { "epoch": 1.6285102968708212, "grad_norm": 1.1747586727142334, "learning_rate": 9.100320252349261e-06, "loss": 0.7103, "step": 12178 }, { "epoch": 1.6286440224659, "grad_norm": 1.2479718923568726, "learning_rate": 9.098882369991924e-06, "loss": 0.7065, "step": 12179 }, { "epoch": 1.6287777480609789, "grad_norm": 1.3513357639312744, "learning_rate": 9.097444506417518e-06, "loss": 0.7267, "step": 12180 }, { "epoch": 1.628911473656058, "grad_norm": 1.2002671957015991, "learning_rate": 9.096006661656021e-06, "loss": 0.6796, "step": 12181 }, { "epoch": 1.6290451992511366, "grad_norm": 1.480422854423523, "learning_rate": 9.094568835737397e-06, "loss": 0.7156, "step": 12182 }, { "epoch": 1.6291789248462156, "grad_norm": 1.230607271194458, "learning_rate": 9.093131028691617e-06, "loss": 0.7416, "step": 12183 }, { "epoch": 1.6293126504412945, "grad_norm": 1.2090680599212646, "learning_rate": 9.091693240548659e-06, "loss": 0.6386, "step": 12184 }, { "epoch": 1.6294463760363733, "grad_norm": 1.2085438966751099, "learning_rate": 9.090255471338482e-06, "loss": 0.7721, "step": 12185 }, { "epoch": 1.6295801016314524, "grad_norm": 1.1867046356201172, "learning_rate": 9.088817721091062e-06, "loss": 0.6991, "step": 12186 }, { "epoch": 1.629713827226531, "grad_norm": 1.1910532712936401, "learning_rate": 9.087379989836366e-06, "loss": 0.6951, "step": 12187 }, { "epoch": 1.62984755282161, "grad_norm": 1.2493771314620972, "learning_rate": 9.085942277604354e-06, "loss": 0.6908, "step": 12188 }, { "epoch": 1.629981278416689, "grad_norm": 1.2557603120803833, "learning_rate": 9.084504584425005e-06, "loss": 0.8063, "step": 12189 }, { "epoch": 1.6301150040117678, "grad_norm": 1.1318848133087158, "learning_rate": 9.083066910328284e-06, "loss": 0.6871, "step": 12190 }, { "epoch": 1.6302487296068469, "grad_norm": 1.100594401359558, "learning_rate": 9.08162925534415e-06, "loss": 0.6554, "step": 12191 }, { "epoch": 1.6303824552019255, "grad_norm": 1.1576602458953857, "learning_rate": 9.080191619502581e-06, "loss": 0.5987, "step": 12192 }, { "epoch": 1.6305161807970046, "grad_norm": 1.2103257179260254, "learning_rate": 9.078754002833535e-06, "loss": 0.7157, "step": 12193 }, { "epoch": 1.6306499063920834, "grad_norm": 1.2625612020492554, "learning_rate": 9.07731640536698e-06, "loss": 0.7749, "step": 12194 }, { "epoch": 1.6307836319871623, "grad_norm": 1.0626695156097412, "learning_rate": 9.075878827132883e-06, "loss": 0.5939, "step": 12195 }, { "epoch": 1.6309173575822413, "grad_norm": 1.1504452228546143, "learning_rate": 9.074441268161207e-06, "loss": 0.6633, "step": 12196 }, { "epoch": 1.6310510831773202, "grad_norm": 1.1651383638381958, "learning_rate": 9.073003728481917e-06, "loss": 0.5941, "step": 12197 }, { "epoch": 1.631184808772399, "grad_norm": 1.2084946632385254, "learning_rate": 9.07156620812498e-06, "loss": 0.7825, "step": 12198 }, { "epoch": 1.631318534367478, "grad_norm": 1.2287118434906006, "learning_rate": 9.070128707120351e-06, "loss": 0.6454, "step": 12199 }, { "epoch": 1.6314522599625567, "grad_norm": 1.2099499702453613, "learning_rate": 9.068691225498004e-06, "loss": 0.6358, "step": 12200 }, { "epoch": 1.6315859855576358, "grad_norm": 1.3258144855499268, "learning_rate": 9.067253763287894e-06, "loss": 0.8125, "step": 12201 }, { "epoch": 1.6317197111527146, "grad_norm": 1.1682347059249878, "learning_rate": 9.065816320519989e-06, "loss": 0.665, "step": 12202 }, { "epoch": 1.6318534367477935, "grad_norm": 1.3243006467819214, "learning_rate": 9.06437889722425e-06, "loss": 0.7624, "step": 12203 }, { "epoch": 1.6319871623428726, "grad_norm": 1.273690938949585, "learning_rate": 9.062941493430634e-06, "loss": 0.6392, "step": 12204 }, { "epoch": 1.6321208879379512, "grad_norm": 1.234810471534729, "learning_rate": 9.061504109169108e-06, "loss": 0.7321, "step": 12205 }, { "epoch": 1.6322546135330303, "grad_norm": 1.2182395458221436, "learning_rate": 9.060066744469633e-06, "loss": 0.6575, "step": 12206 }, { "epoch": 1.632388339128109, "grad_norm": 1.239786148071289, "learning_rate": 9.058629399362163e-06, "loss": 0.7437, "step": 12207 }, { "epoch": 1.632522064723188, "grad_norm": 1.1204453706741333, "learning_rate": 9.057192073876665e-06, "loss": 0.6615, "step": 12208 }, { "epoch": 1.632655790318267, "grad_norm": 1.1716543436050415, "learning_rate": 9.055754768043095e-06, "loss": 0.6451, "step": 12209 }, { "epoch": 1.6327895159133459, "grad_norm": 1.1134787797927856, "learning_rate": 9.054317481891413e-06, "loss": 0.6402, "step": 12210 }, { "epoch": 1.6329232415084247, "grad_norm": 1.2876673936843872, "learning_rate": 9.052880215451581e-06, "loss": 0.747, "step": 12211 }, { "epoch": 1.6330569671035036, "grad_norm": 1.3560421466827393, "learning_rate": 9.05144296875355e-06, "loss": 0.6932, "step": 12212 }, { "epoch": 1.6331906926985824, "grad_norm": 1.2528886795043945, "learning_rate": 9.050005741827286e-06, "loss": 0.7234, "step": 12213 }, { "epoch": 1.6333244182936615, "grad_norm": 1.2665976285934448, "learning_rate": 9.048568534702744e-06, "loss": 0.8155, "step": 12214 }, { "epoch": 1.6334581438887403, "grad_norm": 1.255212426185608, "learning_rate": 9.047131347409879e-06, "loss": 0.7204, "step": 12215 }, { "epoch": 1.6335918694838192, "grad_norm": 1.2966251373291016, "learning_rate": 9.045694179978647e-06, "loss": 0.6883, "step": 12216 }, { "epoch": 1.6337255950788983, "grad_norm": 1.3082724809646606, "learning_rate": 9.044257032439007e-06, "loss": 0.7679, "step": 12217 }, { "epoch": 1.6338593206739769, "grad_norm": 1.3585913181304932, "learning_rate": 9.04281990482092e-06, "loss": 0.7084, "step": 12218 }, { "epoch": 1.633993046269056, "grad_norm": 1.189323902130127, "learning_rate": 9.041382797154333e-06, "loss": 0.6483, "step": 12219 }, { "epoch": 1.6341267718641348, "grad_norm": 1.1092365980148315, "learning_rate": 9.039945709469202e-06, "loss": 0.6801, "step": 12220 }, { "epoch": 1.6342604974592136, "grad_norm": 1.1652475595474243, "learning_rate": 9.038508641795485e-06, "loss": 0.6776, "step": 12221 }, { "epoch": 1.6343942230542927, "grad_norm": 1.1675993204116821, "learning_rate": 9.037071594163139e-06, "loss": 0.6465, "step": 12222 }, { "epoch": 1.6345279486493713, "grad_norm": 1.207872748374939, "learning_rate": 9.035634566602109e-06, "loss": 0.6673, "step": 12223 }, { "epoch": 1.6346616742444504, "grad_norm": 1.093558430671692, "learning_rate": 9.034197559142358e-06, "loss": 0.7072, "step": 12224 }, { "epoch": 1.6347953998395293, "grad_norm": 1.2169008255004883, "learning_rate": 9.03276057181383e-06, "loss": 0.7134, "step": 12225 }, { "epoch": 1.634929125434608, "grad_norm": 1.185446858406067, "learning_rate": 9.031323604646488e-06, "loss": 0.7122, "step": 12226 }, { "epoch": 1.6350628510296872, "grad_norm": 1.17899751663208, "learning_rate": 9.029886657670275e-06, "loss": 0.6924, "step": 12227 }, { "epoch": 1.635196576624766, "grad_norm": 1.233406901359558, "learning_rate": 9.028449730915146e-06, "loss": 0.7446, "step": 12228 }, { "epoch": 1.6353303022198449, "grad_norm": 1.2549545764923096, "learning_rate": 9.027012824411053e-06, "loss": 0.7183, "step": 12229 }, { "epoch": 1.6354640278149237, "grad_norm": 1.2093199491500854, "learning_rate": 9.02557593818795e-06, "loss": 0.7076, "step": 12230 }, { "epoch": 1.6355977534100026, "grad_norm": 1.168333888053894, "learning_rate": 9.024139072275779e-06, "loss": 0.7126, "step": 12231 }, { "epoch": 1.6357314790050816, "grad_norm": 1.2756528854370117, "learning_rate": 9.022702226704499e-06, "loss": 0.6721, "step": 12232 }, { "epoch": 1.6358652046001605, "grad_norm": 1.1707675457000732, "learning_rate": 9.021265401504053e-06, "loss": 0.6938, "step": 12233 }, { "epoch": 1.6359989301952393, "grad_norm": 1.2475755214691162, "learning_rate": 9.019828596704394e-06, "loss": 0.7518, "step": 12234 }, { "epoch": 1.6361326557903184, "grad_norm": 1.2347018718719482, "learning_rate": 9.018391812335473e-06, "loss": 0.76, "step": 12235 }, { "epoch": 1.636266381385397, "grad_norm": 1.2987205982208252, "learning_rate": 9.01695504842723e-06, "loss": 0.7487, "step": 12236 }, { "epoch": 1.636400106980476, "grad_norm": 1.2436975240707397, "learning_rate": 9.015518305009623e-06, "loss": 0.7085, "step": 12237 }, { "epoch": 1.636533832575555, "grad_norm": 1.1979976892471313, "learning_rate": 9.014081582112592e-06, "loss": 0.7111, "step": 12238 }, { "epoch": 1.6366675581706338, "grad_norm": 1.0305064916610718, "learning_rate": 9.012644879766091e-06, "loss": 0.5911, "step": 12239 }, { "epoch": 1.6368012837657129, "grad_norm": 1.2712054252624512, "learning_rate": 9.011208198000058e-06, "loss": 0.7257, "step": 12240 }, { "epoch": 1.6369350093607915, "grad_norm": 1.0698387622833252, "learning_rate": 9.009771536844448e-06, "loss": 0.6658, "step": 12241 }, { "epoch": 1.6370687349558706, "grad_norm": 1.1605802774429321, "learning_rate": 9.008334896329199e-06, "loss": 0.7534, "step": 12242 }, { "epoch": 1.6372024605509494, "grad_norm": 1.2932995557785034, "learning_rate": 9.006898276484264e-06, "loss": 0.6507, "step": 12243 }, { "epoch": 1.6373361861460283, "grad_norm": 1.2700107097625732, "learning_rate": 9.00546167733958e-06, "loss": 0.6916, "step": 12244 }, { "epoch": 1.6374699117411073, "grad_norm": 1.2535593509674072, "learning_rate": 9.004025098925099e-06, "loss": 0.771, "step": 12245 }, { "epoch": 1.6376036373361862, "grad_norm": 1.243652582168579, "learning_rate": 9.002588541270758e-06, "loss": 0.6334, "step": 12246 }, { "epoch": 1.637737362931265, "grad_norm": 1.2627640962600708, "learning_rate": 9.00115200440651e-06, "loss": 0.6161, "step": 12247 }, { "epoch": 1.6378710885263439, "grad_norm": 1.1325398683547974, "learning_rate": 8.999715488362288e-06, "loss": 0.6409, "step": 12248 }, { "epoch": 1.6380048141214227, "grad_norm": 1.186276912689209, "learning_rate": 8.99827899316804e-06, "loss": 0.7208, "step": 12249 }, { "epoch": 1.6381385397165018, "grad_norm": 1.3243136405944824, "learning_rate": 8.99684251885371e-06, "loss": 0.6919, "step": 12250 }, { "epoch": 1.6382722653115807, "grad_norm": 1.171627163887024, "learning_rate": 8.995406065449238e-06, "loss": 0.6747, "step": 12251 }, { "epoch": 1.6384059909066595, "grad_norm": 1.2558014392852783, "learning_rate": 8.993969632984561e-06, "loss": 0.7452, "step": 12252 }, { "epoch": 1.6385397165017386, "grad_norm": 1.1844756603240967, "learning_rate": 8.992533221489628e-06, "loss": 0.6912, "step": 12253 }, { "epoch": 1.6386734420968172, "grad_norm": 1.3822500705718994, "learning_rate": 8.991096830994375e-06, "loss": 0.7401, "step": 12254 }, { "epoch": 1.6388071676918963, "grad_norm": 1.3259596824645996, "learning_rate": 8.989660461528743e-06, "loss": 0.7836, "step": 12255 }, { "epoch": 1.6389408932869751, "grad_norm": 1.1490412950515747, "learning_rate": 8.988224113122675e-06, "loss": 0.7008, "step": 12256 }, { "epoch": 1.639074618882054, "grad_norm": 1.3420923948287964, "learning_rate": 8.986787785806102e-06, "loss": 0.7262, "step": 12257 }, { "epoch": 1.639208344477133, "grad_norm": 1.2320441007614136, "learning_rate": 8.985351479608972e-06, "loss": 0.7275, "step": 12258 }, { "epoch": 1.6393420700722117, "grad_norm": 1.1409752368927002, "learning_rate": 8.983915194561218e-06, "loss": 0.6574, "step": 12259 }, { "epoch": 1.6394757956672907, "grad_norm": 1.1920838356018066, "learning_rate": 8.98247893069278e-06, "loss": 0.6938, "step": 12260 }, { "epoch": 1.6396095212623696, "grad_norm": 1.2273805141448975, "learning_rate": 8.981042688033593e-06, "loss": 0.6736, "step": 12261 }, { "epoch": 1.6397432468574484, "grad_norm": 1.1600852012634277, "learning_rate": 8.979606466613596e-06, "loss": 0.6376, "step": 12262 }, { "epoch": 1.6398769724525275, "grad_norm": 1.200808048248291, "learning_rate": 8.97817026646273e-06, "loss": 0.6361, "step": 12263 }, { "epoch": 1.6400106980476064, "grad_norm": 1.217524528503418, "learning_rate": 8.976734087610925e-06, "loss": 0.6929, "step": 12264 }, { "epoch": 1.6401444236426852, "grad_norm": 1.1046650409698486, "learning_rate": 8.975297930088116e-06, "loss": 0.5904, "step": 12265 }, { "epoch": 1.6402781492377643, "grad_norm": 1.289227843284607, "learning_rate": 8.973861793924246e-06, "loss": 0.7478, "step": 12266 }, { "epoch": 1.640411874832843, "grad_norm": 1.2591333389282227, "learning_rate": 8.97242567914924e-06, "loss": 0.6179, "step": 12267 }, { "epoch": 1.640545600427922, "grad_norm": 1.3970115184783936, "learning_rate": 8.970989585793039e-06, "loss": 0.7256, "step": 12268 }, { "epoch": 1.6406793260230008, "grad_norm": 1.0967646837234497, "learning_rate": 8.969553513885578e-06, "loss": 0.5993, "step": 12269 }, { "epoch": 1.6408130516180797, "grad_norm": 1.205810546875, "learning_rate": 8.968117463456784e-06, "loss": 0.6389, "step": 12270 }, { "epoch": 1.6409467772131587, "grad_norm": 1.2053886651992798, "learning_rate": 8.966681434536599e-06, "loss": 0.7589, "step": 12271 }, { "epoch": 1.6410805028082374, "grad_norm": 1.1467087268829346, "learning_rate": 8.965245427154948e-06, "loss": 0.6131, "step": 12272 }, { "epoch": 1.6412142284033164, "grad_norm": 1.2030466794967651, "learning_rate": 8.963809441341764e-06, "loss": 0.7084, "step": 12273 }, { "epoch": 1.6413479539983953, "grad_norm": 1.3350441455841064, "learning_rate": 8.962373477126983e-06, "loss": 0.7696, "step": 12274 }, { "epoch": 1.6414816795934741, "grad_norm": 1.2633978128433228, "learning_rate": 8.960937534540537e-06, "loss": 0.7451, "step": 12275 }, { "epoch": 1.6416154051885532, "grad_norm": 1.2714512348175049, "learning_rate": 8.959501613612347e-06, "loss": 0.7369, "step": 12276 }, { "epoch": 1.6417491307836318, "grad_norm": 1.2392311096191406, "learning_rate": 8.958065714372355e-06, "loss": 0.6442, "step": 12277 }, { "epoch": 1.641882856378711, "grad_norm": 1.2752341032028198, "learning_rate": 8.956629836850482e-06, "loss": 0.6685, "step": 12278 }, { "epoch": 1.6420165819737897, "grad_norm": 1.2576552629470825, "learning_rate": 8.955193981076666e-06, "loss": 0.7191, "step": 12279 }, { "epoch": 1.6421503075688686, "grad_norm": 1.302627444267273, "learning_rate": 8.95375814708083e-06, "loss": 0.7125, "step": 12280 }, { "epoch": 1.6422840331639477, "grad_norm": 1.2063794136047363, "learning_rate": 8.952322334892903e-06, "loss": 0.6962, "step": 12281 }, { "epoch": 1.6424177587590265, "grad_norm": 1.583531141281128, "learning_rate": 8.950886544542817e-06, "loss": 0.7505, "step": 12282 }, { "epoch": 1.6425514843541054, "grad_norm": 1.3122018575668335, "learning_rate": 8.949450776060498e-06, "loss": 0.6695, "step": 12283 }, { "epoch": 1.6426852099491844, "grad_norm": 1.2292333841323853, "learning_rate": 8.948015029475866e-06, "loss": 0.7243, "step": 12284 }, { "epoch": 1.642818935544263, "grad_norm": 1.124380350112915, "learning_rate": 8.946579304818863e-06, "loss": 0.6807, "step": 12285 }, { "epoch": 1.6429526611393421, "grad_norm": 1.2706211805343628, "learning_rate": 8.945143602119397e-06, "loss": 0.6407, "step": 12286 }, { "epoch": 1.643086386734421, "grad_norm": 1.2830673456192017, "learning_rate": 8.943707921407408e-06, "loss": 0.6725, "step": 12287 }, { "epoch": 1.6432201123294998, "grad_norm": 1.3369498252868652, "learning_rate": 8.94227226271282e-06, "loss": 0.826, "step": 12288 }, { "epoch": 1.643353837924579, "grad_norm": 1.1838973760604858, "learning_rate": 8.940836626065547e-06, "loss": 0.7315, "step": 12289 }, { "epoch": 1.6434875635196575, "grad_norm": 1.2631181478500366, "learning_rate": 8.939401011495527e-06, "loss": 0.6231, "step": 12290 }, { "epoch": 1.6436212891147366, "grad_norm": 1.1619325876235962, "learning_rate": 8.937965419032677e-06, "loss": 0.6267, "step": 12291 }, { "epoch": 1.6437550147098154, "grad_norm": 1.1883012056350708, "learning_rate": 8.936529848706919e-06, "loss": 0.7537, "step": 12292 }, { "epoch": 1.6438887403048943, "grad_norm": 1.2511861324310303, "learning_rate": 8.93509430054818e-06, "loss": 0.694, "step": 12293 }, { "epoch": 1.6440224658999734, "grad_norm": 1.2818245887756348, "learning_rate": 8.933658774586381e-06, "loss": 0.6999, "step": 12294 }, { "epoch": 1.644156191495052, "grad_norm": 1.157547116279602, "learning_rate": 8.932223270851445e-06, "loss": 0.6793, "step": 12295 }, { "epoch": 1.644289917090131, "grad_norm": 1.2189162969589233, "learning_rate": 8.930787789373296e-06, "loss": 0.7135, "step": 12296 }, { "epoch": 1.64442364268521, "grad_norm": 1.1448163986206055, "learning_rate": 8.929352330181847e-06, "loss": 0.7083, "step": 12297 }, { "epoch": 1.6445573682802888, "grad_norm": 1.21192467212677, "learning_rate": 8.92791689330703e-06, "loss": 0.6745, "step": 12298 }, { "epoch": 1.6446910938753678, "grad_norm": 1.4186244010925293, "learning_rate": 8.926481478778756e-06, "loss": 0.6883, "step": 12299 }, { "epoch": 1.6448248194704467, "grad_norm": 1.0672227144241333, "learning_rate": 8.925046086626945e-06, "loss": 0.6446, "step": 12300 }, { "epoch": 1.6449585450655255, "grad_norm": 1.261681318283081, "learning_rate": 8.923610716881525e-06, "loss": 0.6901, "step": 12301 }, { "epoch": 1.6450922706606046, "grad_norm": 1.166210412979126, "learning_rate": 8.922175369572407e-06, "loss": 0.6448, "step": 12302 }, { "epoch": 1.6452259962556832, "grad_norm": 1.1794824600219727, "learning_rate": 8.920740044729515e-06, "loss": 0.708, "step": 12303 }, { "epoch": 1.6453597218507623, "grad_norm": 1.2333112955093384, "learning_rate": 8.919304742382762e-06, "loss": 0.7075, "step": 12304 }, { "epoch": 1.6454934474458411, "grad_norm": 1.225588321685791, "learning_rate": 8.917869462562067e-06, "loss": 0.6641, "step": 12305 }, { "epoch": 1.64562717304092, "grad_norm": 1.4158178567886353, "learning_rate": 8.916434205297347e-06, "loss": 0.7145, "step": 12306 }, { "epoch": 1.645760898635999, "grad_norm": 1.204805612564087, "learning_rate": 8.914998970618522e-06, "loss": 0.7406, "step": 12307 }, { "epoch": 1.6458946242310777, "grad_norm": 1.284525752067566, "learning_rate": 8.913563758555502e-06, "loss": 0.6775, "step": 12308 }, { "epoch": 1.6460283498261568, "grad_norm": 1.3572874069213867, "learning_rate": 8.912128569138209e-06, "loss": 0.6808, "step": 12309 }, { "epoch": 1.6461620754212356, "grad_norm": 1.3200709819793701, "learning_rate": 8.91069340239655e-06, "loss": 0.6942, "step": 12310 }, { "epoch": 1.6462958010163145, "grad_norm": 1.2622733116149902, "learning_rate": 8.909258258360451e-06, "loss": 0.7759, "step": 12311 }, { "epoch": 1.6464295266113935, "grad_norm": 1.2914661169052124, "learning_rate": 8.907823137059817e-06, "loss": 0.7301, "step": 12312 }, { "epoch": 1.6465632522064724, "grad_norm": 1.2797245979309082, "learning_rate": 8.906388038524562e-06, "loss": 0.7668, "step": 12313 }, { "epoch": 1.6466969778015512, "grad_norm": 1.1662758588790894, "learning_rate": 8.904952962784605e-06, "loss": 0.7094, "step": 12314 }, { "epoch": 1.64683070339663, "grad_norm": 1.2073575258255005, "learning_rate": 8.903517909869858e-06, "loss": 0.5904, "step": 12315 }, { "epoch": 1.646964428991709, "grad_norm": 1.21602463722229, "learning_rate": 8.902082879810225e-06, "loss": 0.7156, "step": 12316 }, { "epoch": 1.647098154586788, "grad_norm": 1.2871873378753662, "learning_rate": 8.900647872635629e-06, "loss": 0.6978, "step": 12317 }, { "epoch": 1.6472318801818668, "grad_norm": 1.2348345518112183, "learning_rate": 8.899212888375972e-06, "loss": 0.6612, "step": 12318 }, { "epoch": 1.6473656057769457, "grad_norm": 1.320090651512146, "learning_rate": 8.89777792706117e-06, "loss": 0.7991, "step": 12319 }, { "epoch": 1.6474993313720248, "grad_norm": 1.278443455696106, "learning_rate": 8.896342988721135e-06, "loss": 0.682, "step": 12320 }, { "epoch": 1.6476330569671034, "grad_norm": 1.2681987285614014, "learning_rate": 8.894908073385771e-06, "loss": 0.7072, "step": 12321 }, { "epoch": 1.6477667825621825, "grad_norm": 1.285531997680664, "learning_rate": 8.893473181084993e-06, "loss": 0.7199, "step": 12322 }, { "epoch": 1.6479005081572613, "grad_norm": 1.0385469198226929, "learning_rate": 8.892038311848704e-06, "loss": 0.6066, "step": 12323 }, { "epoch": 1.6480342337523401, "grad_norm": 1.1164511442184448, "learning_rate": 8.890603465706823e-06, "loss": 0.6887, "step": 12324 }, { "epoch": 1.6481679593474192, "grad_norm": 1.1552810668945312, "learning_rate": 8.889168642689246e-06, "loss": 0.6759, "step": 12325 }, { "epoch": 1.6483016849424978, "grad_norm": 1.2800675630569458, "learning_rate": 8.887733842825885e-06, "loss": 0.6822, "step": 12326 }, { "epoch": 1.648435410537577, "grad_norm": 1.1618342399597168, "learning_rate": 8.886299066146652e-06, "loss": 0.6344, "step": 12327 }, { "epoch": 1.6485691361326558, "grad_norm": 1.115425944328308, "learning_rate": 8.884864312681449e-06, "loss": 0.789, "step": 12328 }, { "epoch": 1.6487028617277346, "grad_norm": 1.2912665605545044, "learning_rate": 8.883429582460178e-06, "loss": 0.747, "step": 12329 }, { "epoch": 1.6488365873228137, "grad_norm": 1.2462538480758667, "learning_rate": 8.881994875512754e-06, "loss": 0.7079, "step": 12330 }, { "epoch": 1.6489703129178925, "grad_norm": 1.2592493295669556, "learning_rate": 8.880560191869071e-06, "loss": 0.7433, "step": 12331 }, { "epoch": 1.6491040385129714, "grad_norm": 1.2509167194366455, "learning_rate": 8.879125531559042e-06, "loss": 0.6953, "step": 12332 }, { "epoch": 1.6492377641080502, "grad_norm": 1.1815496683120728, "learning_rate": 8.877690894612572e-06, "loss": 0.7036, "step": 12333 }, { "epoch": 1.649371489703129, "grad_norm": 1.3585467338562012, "learning_rate": 8.876256281059558e-06, "loss": 0.7314, "step": 12334 }, { "epoch": 1.6495052152982081, "grad_norm": 1.1983674764633179, "learning_rate": 8.874821690929909e-06, "loss": 0.6453, "step": 12335 }, { "epoch": 1.649638940893287, "grad_norm": 1.1856147050857544, "learning_rate": 8.873387124253524e-06, "loss": 0.6943, "step": 12336 }, { "epoch": 1.6497726664883658, "grad_norm": 1.297411561012268, "learning_rate": 8.871952581060305e-06, "loss": 0.6285, "step": 12337 }, { "epoch": 1.649906392083445, "grad_norm": 1.243592381477356, "learning_rate": 8.870518061380156e-06, "loss": 0.694, "step": 12338 }, { "epoch": 1.6500401176785235, "grad_norm": 1.1155613660812378, "learning_rate": 8.869083565242975e-06, "loss": 0.64, "step": 12339 }, { "epoch": 1.6501738432736026, "grad_norm": 1.1976444721221924, "learning_rate": 8.86764909267867e-06, "loss": 0.7436, "step": 12340 }, { "epoch": 1.6503075688686815, "grad_norm": 1.116301417350769, "learning_rate": 8.866214643717135e-06, "loss": 0.6885, "step": 12341 }, { "epoch": 1.6504412944637603, "grad_norm": 1.239696741104126, "learning_rate": 8.864780218388267e-06, "loss": 0.6893, "step": 12342 }, { "epoch": 1.6505750200588394, "grad_norm": 1.1754655838012695, "learning_rate": 8.863345816721972e-06, "loss": 0.6545, "step": 12343 }, { "epoch": 1.650708745653918, "grad_norm": 1.1113002300262451, "learning_rate": 8.861911438748146e-06, "loss": 0.6291, "step": 12344 }, { "epoch": 1.650842471248997, "grad_norm": 1.211775302886963, "learning_rate": 8.860477084496684e-06, "loss": 0.6466, "step": 12345 }, { "epoch": 1.650976196844076, "grad_norm": 4.429222583770752, "learning_rate": 8.85904275399749e-06, "loss": 0.7151, "step": 12346 }, { "epoch": 1.6511099224391548, "grad_norm": 1.1703946590423584, "learning_rate": 8.857608447280454e-06, "loss": 0.6853, "step": 12347 }, { "epoch": 1.6512436480342338, "grad_norm": 1.2180061340332031, "learning_rate": 8.856174164375482e-06, "loss": 0.7024, "step": 12348 }, { "epoch": 1.6513773736293127, "grad_norm": 1.2263474464416504, "learning_rate": 8.854739905312463e-06, "loss": 0.6754, "step": 12349 }, { "epoch": 1.6515110992243915, "grad_norm": 1.2842947244644165, "learning_rate": 8.853305670121294e-06, "loss": 0.7048, "step": 12350 }, { "epoch": 1.6516448248194704, "grad_norm": 1.1028200387954712, "learning_rate": 8.85187145883187e-06, "loss": 0.6235, "step": 12351 }, { "epoch": 1.6517785504145492, "grad_norm": 1.2587285041809082, "learning_rate": 8.85043727147409e-06, "loss": 0.6756, "step": 12352 }, { "epoch": 1.6519122760096283, "grad_norm": 1.1952602863311768, "learning_rate": 8.84900310807784e-06, "loss": 0.7453, "step": 12353 }, { "epoch": 1.6520460016047072, "grad_norm": 1.2839983701705933, "learning_rate": 8.847568968673025e-06, "loss": 0.6524, "step": 12354 }, { "epoch": 1.652179727199786, "grad_norm": 1.172766089439392, "learning_rate": 8.846134853289527e-06, "loss": 0.6554, "step": 12355 }, { "epoch": 1.652313452794865, "grad_norm": 1.2719576358795166, "learning_rate": 8.84470076195725e-06, "loss": 0.7063, "step": 12356 }, { "epoch": 1.6524471783899437, "grad_norm": 1.2521767616271973, "learning_rate": 8.843266694706075e-06, "loss": 0.7612, "step": 12357 }, { "epoch": 1.6525809039850228, "grad_norm": 1.1674854755401611, "learning_rate": 8.841832651565897e-06, "loss": 0.6857, "step": 12358 }, { "epoch": 1.6527146295801016, "grad_norm": 1.2313117980957031, "learning_rate": 8.840398632566614e-06, "loss": 0.6828, "step": 12359 }, { "epoch": 1.6528483551751805, "grad_norm": 1.2685930728912354, "learning_rate": 8.838964637738112e-06, "loss": 0.7138, "step": 12360 }, { "epoch": 1.6529820807702595, "grad_norm": 1.0916415452957153, "learning_rate": 8.837530667110278e-06, "loss": 0.6942, "step": 12361 }, { "epoch": 1.6531158063653382, "grad_norm": 1.2985621690750122, "learning_rate": 8.836096720713009e-06, "loss": 0.6949, "step": 12362 }, { "epoch": 1.6532495319604172, "grad_norm": 1.122735857963562, "learning_rate": 8.834662798576184e-06, "loss": 0.6054, "step": 12363 }, { "epoch": 1.653383257555496, "grad_norm": 1.1704249382019043, "learning_rate": 8.8332289007297e-06, "loss": 0.6262, "step": 12364 }, { "epoch": 1.653516983150575, "grad_norm": 1.235245943069458, "learning_rate": 8.831795027203448e-06, "loss": 0.6302, "step": 12365 }, { "epoch": 1.653650708745654, "grad_norm": 1.140698790550232, "learning_rate": 8.830361178027302e-06, "loss": 0.6043, "step": 12366 }, { "epoch": 1.6537844343407329, "grad_norm": 1.205237627029419, "learning_rate": 8.828927353231165e-06, "loss": 0.6171, "step": 12367 }, { "epoch": 1.6539181599358117, "grad_norm": 1.3161205053329468, "learning_rate": 8.827493552844917e-06, "loss": 0.7309, "step": 12368 }, { "epoch": 1.6540518855308908, "grad_norm": 1.1530934572219849, "learning_rate": 8.826059776898441e-06, "loss": 0.6129, "step": 12369 }, { "epoch": 1.6541856111259694, "grad_norm": 1.33249032497406, "learning_rate": 8.824626025421625e-06, "loss": 0.7404, "step": 12370 }, { "epoch": 1.6543193367210485, "grad_norm": 1.2091525793075562, "learning_rate": 8.823192298444355e-06, "loss": 0.7499, "step": 12371 }, { "epoch": 1.6544530623161273, "grad_norm": 1.2791638374328613, "learning_rate": 8.821758595996516e-06, "loss": 0.6957, "step": 12372 }, { "epoch": 1.6545867879112062, "grad_norm": 1.0910277366638184, "learning_rate": 8.820324918107995e-06, "loss": 0.5971, "step": 12373 }, { "epoch": 1.6547205135062852, "grad_norm": 1.1187313795089722, "learning_rate": 8.818891264808667e-06, "loss": 0.6333, "step": 12374 }, { "epoch": 1.6548542391013639, "grad_norm": 1.2203001976013184, "learning_rate": 8.817457636128425e-06, "loss": 0.6955, "step": 12375 }, { "epoch": 1.654987964696443, "grad_norm": 1.0609242916107178, "learning_rate": 8.816024032097145e-06, "loss": 0.6885, "step": 12376 }, { "epoch": 1.6551216902915218, "grad_norm": 1.2922406196594238, "learning_rate": 8.814590452744709e-06, "loss": 0.6549, "step": 12377 }, { "epoch": 1.6552554158866006, "grad_norm": 1.3166743516921997, "learning_rate": 8.813156898101003e-06, "loss": 0.7177, "step": 12378 }, { "epoch": 1.6553891414816797, "grad_norm": 1.0977634191513062, "learning_rate": 8.811723368195903e-06, "loss": 0.6205, "step": 12379 }, { "epoch": 1.6555228670767583, "grad_norm": 1.234967827796936, "learning_rate": 8.810289863059298e-06, "loss": 0.7415, "step": 12380 }, { "epoch": 1.6556565926718374, "grad_norm": 1.2720471620559692, "learning_rate": 8.80885638272106e-06, "loss": 0.7507, "step": 12381 }, { "epoch": 1.6557903182669163, "grad_norm": 1.246524453163147, "learning_rate": 8.807422927211068e-06, "loss": 0.7444, "step": 12382 }, { "epoch": 1.655924043861995, "grad_norm": 1.111374020576477, "learning_rate": 8.805989496559204e-06, "loss": 0.6691, "step": 12383 }, { "epoch": 1.6560577694570742, "grad_norm": 1.1502548456192017, "learning_rate": 8.80455609079535e-06, "loss": 0.6776, "step": 12384 }, { "epoch": 1.656191495052153, "grad_norm": 1.1283316612243652, "learning_rate": 8.803122709949378e-06, "loss": 0.639, "step": 12385 }, { "epoch": 1.6563252206472319, "grad_norm": 1.3349289894104004, "learning_rate": 8.80168935405117e-06, "loss": 0.6787, "step": 12386 }, { "epoch": 1.656458946242311, "grad_norm": 1.1897867918014526, "learning_rate": 8.800256023130597e-06, "loss": 0.6616, "step": 12387 }, { "epoch": 1.6565926718373896, "grad_norm": 1.3511940240859985, "learning_rate": 8.798822717217543e-06, "loss": 0.7981, "step": 12388 }, { "epoch": 1.6567263974324686, "grad_norm": 1.1522397994995117, "learning_rate": 8.797389436341879e-06, "loss": 0.6669, "step": 12389 }, { "epoch": 1.6568601230275475, "grad_norm": 1.3934744596481323, "learning_rate": 8.795956180533478e-06, "loss": 0.7414, "step": 12390 }, { "epoch": 1.6569938486226263, "grad_norm": 1.1359480619430542, "learning_rate": 8.794522949822222e-06, "loss": 0.6611, "step": 12391 }, { "epoch": 1.6571275742177054, "grad_norm": 1.0843828916549683, "learning_rate": 8.793089744237983e-06, "loss": 0.6325, "step": 12392 }, { "epoch": 1.657261299812784, "grad_norm": 1.3256853818893433, "learning_rate": 8.79165656381063e-06, "loss": 0.655, "step": 12393 }, { "epoch": 1.657395025407863, "grad_norm": 1.2043112516403198, "learning_rate": 8.790223408570043e-06, "loss": 0.6668, "step": 12394 }, { "epoch": 1.657528751002942, "grad_norm": 1.213523268699646, "learning_rate": 8.788790278546087e-06, "loss": 0.756, "step": 12395 }, { "epoch": 1.6576624765980208, "grad_norm": 1.0996527671813965, "learning_rate": 8.78735717376864e-06, "loss": 0.6849, "step": 12396 }, { "epoch": 1.6577962021930999, "grad_norm": 1.214781403541565, "learning_rate": 8.785924094267575e-06, "loss": 0.6748, "step": 12397 }, { "epoch": 1.6579299277881785, "grad_norm": 1.197139024734497, "learning_rate": 8.784491040072755e-06, "loss": 0.717, "step": 12398 }, { "epoch": 1.6580636533832576, "grad_norm": 1.1408874988555908, "learning_rate": 8.783058011214063e-06, "loss": 0.6464, "step": 12399 }, { "epoch": 1.6581973789783364, "grad_norm": 1.1369574069976807, "learning_rate": 8.781625007721362e-06, "loss": 0.6863, "step": 12400 }, { "epoch": 1.6583311045734153, "grad_norm": 1.274687647819519, "learning_rate": 8.780192029624516e-06, "loss": 0.6533, "step": 12401 }, { "epoch": 1.6584648301684943, "grad_norm": 1.3065712451934814, "learning_rate": 8.778759076953403e-06, "loss": 0.642, "step": 12402 }, { "epoch": 1.6585985557635732, "grad_norm": 1.0645710229873657, "learning_rate": 8.777326149737886e-06, "loss": 0.5633, "step": 12403 }, { "epoch": 1.658732281358652, "grad_norm": 1.2185330390930176, "learning_rate": 8.77589324800784e-06, "loss": 0.6942, "step": 12404 }, { "epoch": 1.658866006953731, "grad_norm": 1.3028680086135864, "learning_rate": 8.774460371793126e-06, "loss": 0.68, "step": 12405 }, { "epoch": 1.6589997325488097, "grad_norm": 1.3789809942245483, "learning_rate": 8.77302752112361e-06, "loss": 0.7071, "step": 12406 }, { "epoch": 1.6591334581438888, "grad_norm": 1.1797913312911987, "learning_rate": 8.771594696029166e-06, "loss": 0.7224, "step": 12407 }, { "epoch": 1.6592671837389676, "grad_norm": 1.201615571975708, "learning_rate": 8.77016189653965e-06, "loss": 0.6927, "step": 12408 }, { "epoch": 1.6594009093340465, "grad_norm": 1.2347160577774048, "learning_rate": 8.768729122684935e-06, "loss": 0.6494, "step": 12409 }, { "epoch": 1.6595346349291256, "grad_norm": 1.1201642751693726, "learning_rate": 8.767296374494886e-06, "loss": 0.6443, "step": 12410 }, { "epoch": 1.6596683605242042, "grad_norm": 1.2633198499679565, "learning_rate": 8.76586365199936e-06, "loss": 0.7498, "step": 12411 }, { "epoch": 1.6598020861192833, "grad_norm": 1.1623989343643188, "learning_rate": 8.764430955228229e-06, "loss": 0.6637, "step": 12412 }, { "epoch": 1.659935811714362, "grad_norm": 1.1975071430206299, "learning_rate": 8.762998284211353e-06, "loss": 0.7059, "step": 12413 }, { "epoch": 1.660069537309441, "grad_norm": 1.2383865118026733, "learning_rate": 8.76156563897859e-06, "loss": 0.7047, "step": 12414 }, { "epoch": 1.66020326290452, "grad_norm": 1.1941391229629517, "learning_rate": 8.760133019559808e-06, "loss": 0.7088, "step": 12415 }, { "epoch": 1.6603369884995989, "grad_norm": 1.2723753452301025, "learning_rate": 8.758700425984865e-06, "loss": 0.6659, "step": 12416 }, { "epoch": 1.6604707140946777, "grad_norm": 1.2194710969924927, "learning_rate": 8.757267858283627e-06, "loss": 0.6707, "step": 12417 }, { "epoch": 1.6606044396897566, "grad_norm": 1.231456995010376, "learning_rate": 8.75583531648595e-06, "loss": 0.6915, "step": 12418 }, { "epoch": 1.6607381652848354, "grad_norm": 1.3471393585205078, "learning_rate": 8.754402800621694e-06, "loss": 0.6941, "step": 12419 }, { "epoch": 1.6608718908799145, "grad_norm": 1.2551910877227783, "learning_rate": 8.752970310720723e-06, "loss": 0.6819, "step": 12420 }, { "epoch": 1.6610056164749933, "grad_norm": 1.2738603353500366, "learning_rate": 8.75153784681289e-06, "loss": 0.7278, "step": 12421 }, { "epoch": 1.6611393420700722, "grad_norm": 1.272778034210205, "learning_rate": 8.750105408928054e-06, "loss": 0.7152, "step": 12422 }, { "epoch": 1.6612730676651513, "grad_norm": 1.1494654417037964, "learning_rate": 8.748672997096079e-06, "loss": 0.6891, "step": 12423 }, { "epoch": 1.6614067932602299, "grad_norm": 1.2249614000320435, "learning_rate": 8.747240611346815e-06, "loss": 0.6685, "step": 12424 }, { "epoch": 1.661540518855309, "grad_norm": 1.2430477142333984, "learning_rate": 8.745808251710123e-06, "loss": 0.6816, "step": 12425 }, { "epoch": 1.6616742444503878, "grad_norm": 1.1498692035675049, "learning_rate": 8.74437591821586e-06, "loss": 0.6665, "step": 12426 }, { "epoch": 1.6618079700454667, "grad_norm": 1.2858808040618896, "learning_rate": 8.742943610893875e-06, "loss": 0.6778, "step": 12427 }, { "epoch": 1.6619416956405457, "grad_norm": 1.296128749847412, "learning_rate": 8.74151132977403e-06, "loss": 0.716, "step": 12428 }, { "epoch": 1.6620754212356244, "grad_norm": 1.1610465049743652, "learning_rate": 8.740079074886178e-06, "loss": 0.7402, "step": 12429 }, { "epoch": 1.6622091468307034, "grad_norm": 1.2021414041519165, "learning_rate": 8.738646846260169e-06, "loss": 0.6612, "step": 12430 }, { "epoch": 1.6623428724257823, "grad_norm": 1.2859634160995483, "learning_rate": 8.737214643925864e-06, "loss": 0.7217, "step": 12431 }, { "epoch": 1.6624765980208611, "grad_norm": 1.2385551929473877, "learning_rate": 8.735782467913107e-06, "loss": 0.6475, "step": 12432 }, { "epoch": 1.6626103236159402, "grad_norm": 1.3111885786056519, "learning_rate": 8.734350318251758e-06, "loss": 0.7304, "step": 12433 }, { "epoch": 1.662744049211019, "grad_norm": 1.141268253326416, "learning_rate": 8.732918194971663e-06, "loss": 0.6707, "step": 12434 }, { "epoch": 1.6628777748060979, "grad_norm": 1.321655035018921, "learning_rate": 8.731486098102674e-06, "loss": 0.8004, "step": 12435 }, { "epoch": 1.6630115004011767, "grad_norm": 1.2179478406906128, "learning_rate": 8.730054027674649e-06, "loss": 0.6271, "step": 12436 }, { "epoch": 1.6631452259962556, "grad_norm": 1.328784704208374, "learning_rate": 8.728621983717433e-06, "loss": 0.7672, "step": 12437 }, { "epoch": 1.6632789515913347, "grad_norm": 1.1775200366973877, "learning_rate": 8.72718996626087e-06, "loss": 0.6297, "step": 12438 }, { "epoch": 1.6634126771864135, "grad_norm": 1.1654350757598877, "learning_rate": 8.725757975334816e-06, "loss": 0.6214, "step": 12439 }, { "epoch": 1.6635464027814924, "grad_norm": 1.2574567794799805, "learning_rate": 8.724326010969116e-06, "loss": 0.6549, "step": 12440 }, { "epoch": 1.6636801283765714, "grad_norm": 1.1157575845718384, "learning_rate": 8.722894073193622e-06, "loss": 0.6359, "step": 12441 }, { "epoch": 1.66381385397165, "grad_norm": 1.4238977432250977, "learning_rate": 8.721462162038181e-06, "loss": 0.7887, "step": 12442 }, { "epoch": 1.6639475795667291, "grad_norm": 1.296324372291565, "learning_rate": 8.720030277532632e-06, "loss": 0.6746, "step": 12443 }, { "epoch": 1.664081305161808, "grad_norm": 1.1258771419525146, "learning_rate": 8.718598419706832e-06, "loss": 0.6781, "step": 12444 }, { "epoch": 1.6642150307568868, "grad_norm": 1.2695239782333374, "learning_rate": 8.717166588590624e-06, "loss": 0.7061, "step": 12445 }, { "epoch": 1.6643487563519659, "grad_norm": 1.2950083017349243, "learning_rate": 8.715734784213843e-06, "loss": 0.7041, "step": 12446 }, { "epoch": 1.6644824819470445, "grad_norm": 1.102746844291687, "learning_rate": 8.714303006606346e-06, "loss": 0.6618, "step": 12447 }, { "epoch": 1.6646162075421236, "grad_norm": 1.445131540298462, "learning_rate": 8.71287125579797e-06, "loss": 0.784, "step": 12448 }, { "epoch": 1.6647499331372024, "grad_norm": 1.3842480182647705, "learning_rate": 8.711439531818565e-06, "loss": 0.7588, "step": 12449 }, { "epoch": 1.6648836587322813, "grad_norm": 1.1452401876449585, "learning_rate": 8.71000783469797e-06, "loss": 0.6185, "step": 12450 }, { "epoch": 1.6650173843273604, "grad_norm": 1.1969728469848633, "learning_rate": 8.708576164466023e-06, "loss": 0.6646, "step": 12451 }, { "epoch": 1.6651511099224392, "grad_norm": 1.3963426351547241, "learning_rate": 8.707144521152574e-06, "loss": 0.7306, "step": 12452 }, { "epoch": 1.665284835517518, "grad_norm": 1.174609661102295, "learning_rate": 8.705712904787458e-06, "loss": 0.7085, "step": 12453 }, { "epoch": 1.665418561112597, "grad_norm": 1.1370718479156494, "learning_rate": 8.704281315400518e-06, "loss": 0.728, "step": 12454 }, { "epoch": 1.6655522867076757, "grad_norm": 1.2526185512542725, "learning_rate": 8.702849753021595e-06, "loss": 0.7647, "step": 12455 }, { "epoch": 1.6656860123027548, "grad_norm": 1.7578057050704956, "learning_rate": 8.701418217680525e-06, "loss": 0.6529, "step": 12456 }, { "epoch": 1.6658197378978337, "grad_norm": 1.556486964225769, "learning_rate": 8.699986709407156e-06, "loss": 0.7633, "step": 12457 }, { "epoch": 1.6659534634929125, "grad_norm": 1.3589372634887695, "learning_rate": 8.698555228231319e-06, "loss": 0.7914, "step": 12458 }, { "epoch": 1.6660871890879916, "grad_norm": 1.334647536277771, "learning_rate": 8.697123774182847e-06, "loss": 0.7337, "step": 12459 }, { "epoch": 1.6662209146830702, "grad_norm": 1.2330920696258545, "learning_rate": 8.695692347291586e-06, "loss": 0.6154, "step": 12460 }, { "epoch": 1.6663546402781493, "grad_norm": 1.347893476486206, "learning_rate": 8.694260947587372e-06, "loss": 0.759, "step": 12461 }, { "epoch": 1.6664883658732281, "grad_norm": 1.0674257278442383, "learning_rate": 8.692829575100037e-06, "loss": 0.5989, "step": 12462 }, { "epoch": 1.666622091468307, "grad_norm": 1.1802756786346436, "learning_rate": 8.69139822985942e-06, "loss": 0.6585, "step": 12463 }, { "epoch": 1.666755817063386, "grad_norm": 1.2098033428192139, "learning_rate": 8.68996691189535e-06, "loss": 0.7902, "step": 12464 }, { "epoch": 1.6668895426584647, "grad_norm": 1.1803351640701294, "learning_rate": 8.688535621237674e-06, "loss": 0.6894, "step": 12465 }, { "epoch": 1.6670232682535437, "grad_norm": 1.357036828994751, "learning_rate": 8.687104357916214e-06, "loss": 0.7404, "step": 12466 }, { "epoch": 1.6671569938486226, "grad_norm": 1.1231348514556885, "learning_rate": 8.685673121960805e-06, "loss": 0.6416, "step": 12467 }, { "epoch": 1.6672907194437014, "grad_norm": 1.1997489929199219, "learning_rate": 8.684241913401285e-06, "loss": 0.6708, "step": 12468 }, { "epoch": 1.6674244450387805, "grad_norm": 1.3341172933578491, "learning_rate": 8.682810732267486e-06, "loss": 0.7309, "step": 12469 }, { "epoch": 1.6675581706338594, "grad_norm": 1.1997499465942383, "learning_rate": 8.681379578589232e-06, "loss": 0.6862, "step": 12470 }, { "epoch": 1.6676918962289382, "grad_norm": 1.1846867799758911, "learning_rate": 8.679948452396361e-06, "loss": 0.653, "step": 12471 }, { "epoch": 1.6678256218240173, "grad_norm": 1.1316466331481934, "learning_rate": 8.678517353718699e-06, "loss": 0.6476, "step": 12472 }, { "epoch": 1.667959347419096, "grad_norm": 1.328794240951538, "learning_rate": 8.67708628258608e-06, "loss": 0.7163, "step": 12473 }, { "epoch": 1.668093073014175, "grad_norm": 1.1940380334854126, "learning_rate": 8.675655239028333e-06, "loss": 0.7099, "step": 12474 }, { "epoch": 1.6682267986092538, "grad_norm": 1.1756426095962524, "learning_rate": 8.674224223075283e-06, "loss": 0.6675, "step": 12475 }, { "epoch": 1.6683605242043327, "grad_norm": 1.33180570602417, "learning_rate": 8.672793234756762e-06, "loss": 0.6899, "step": 12476 }, { "epoch": 1.6684942497994117, "grad_norm": 1.1839414834976196, "learning_rate": 8.671362274102598e-06, "loss": 0.6941, "step": 12477 }, { "epoch": 1.6686279753944904, "grad_norm": 1.2693500518798828, "learning_rate": 8.66993134114261e-06, "loss": 0.6536, "step": 12478 }, { "epoch": 1.6687617009895694, "grad_norm": 1.3352482318878174, "learning_rate": 8.668500435906635e-06, "loss": 0.7355, "step": 12479 }, { "epoch": 1.6688954265846483, "grad_norm": 1.1899019479751587, "learning_rate": 8.667069558424493e-06, "loss": 0.6689, "step": 12480 }, { "epoch": 1.6690291521797271, "grad_norm": 1.1978273391723633, "learning_rate": 8.66563870872601e-06, "loss": 0.7324, "step": 12481 }, { "epoch": 1.6691628777748062, "grad_norm": 1.3753151893615723, "learning_rate": 8.664207886841014e-06, "loss": 0.7844, "step": 12482 }, { "epoch": 1.6692966033698848, "grad_norm": 1.3972060680389404, "learning_rate": 8.662777092799322e-06, "loss": 0.7412, "step": 12483 }, { "epoch": 1.669430328964964, "grad_norm": 1.1643095016479492, "learning_rate": 8.661346326630767e-06, "loss": 0.6088, "step": 12484 }, { "epoch": 1.6695640545600428, "grad_norm": 1.2762802839279175, "learning_rate": 8.659915588365164e-06, "loss": 0.6934, "step": 12485 }, { "epoch": 1.6696977801551216, "grad_norm": 1.2050150632858276, "learning_rate": 8.658484878032335e-06, "loss": 0.6632, "step": 12486 }, { "epoch": 1.6698315057502007, "grad_norm": 1.1012870073318481, "learning_rate": 8.657054195662112e-06, "loss": 0.6321, "step": 12487 }, { "epoch": 1.6699652313452795, "grad_norm": 1.1518096923828125, "learning_rate": 8.655623541284304e-06, "loss": 0.6685, "step": 12488 }, { "epoch": 1.6700989569403584, "grad_norm": 1.4391409158706665, "learning_rate": 8.654192914928739e-06, "loss": 0.6534, "step": 12489 }, { "epoch": 1.6702326825354374, "grad_norm": 1.343369483947754, "learning_rate": 8.652762316625238e-06, "loss": 0.75, "step": 12490 }, { "epoch": 1.670366408130516, "grad_norm": 1.209408164024353, "learning_rate": 8.651331746403611e-06, "loss": 0.7311, "step": 12491 }, { "epoch": 1.6705001337255951, "grad_norm": 1.0760979652404785, "learning_rate": 8.649901204293685e-06, "loss": 0.6656, "step": 12492 }, { "epoch": 1.670633859320674, "grad_norm": 1.187767744064331, "learning_rate": 8.648470690325277e-06, "loss": 0.6537, "step": 12493 }, { "epoch": 1.6707675849157528, "grad_norm": 1.1163438558578491, "learning_rate": 8.647040204528206e-06, "loss": 0.6062, "step": 12494 }, { "epoch": 1.670901310510832, "grad_norm": 1.1550617218017578, "learning_rate": 8.645609746932288e-06, "loss": 0.6554, "step": 12495 }, { "epoch": 1.6710350361059105, "grad_norm": 1.3999446630477905, "learning_rate": 8.644179317567335e-06, "loss": 0.6944, "step": 12496 }, { "epoch": 1.6711687617009896, "grad_norm": 1.3315181732177734, "learning_rate": 8.64274891646317e-06, "loss": 0.6437, "step": 12497 }, { "epoch": 1.6713024872960685, "grad_norm": 1.067337155342102, "learning_rate": 8.641318543649602e-06, "loss": 0.6573, "step": 12498 }, { "epoch": 1.6714362128911473, "grad_norm": 1.1923857927322388, "learning_rate": 8.639888199156449e-06, "loss": 0.6289, "step": 12499 }, { "epoch": 1.6715699384862264, "grad_norm": 1.1675527095794678, "learning_rate": 8.638457883013529e-06, "loss": 0.6731, "step": 12500 }, { "epoch": 1.671703664081305, "grad_norm": 1.086632251739502, "learning_rate": 8.637027595250646e-06, "loss": 0.6308, "step": 12501 }, { "epoch": 1.671837389676384, "grad_norm": 1.1351196765899658, "learning_rate": 8.635597335897623e-06, "loss": 0.7017, "step": 12502 }, { "epoch": 1.671971115271463, "grad_norm": 1.2161800861358643, "learning_rate": 8.63416710498427e-06, "loss": 0.7266, "step": 12503 }, { "epoch": 1.6721048408665418, "grad_norm": 1.211410641670227, "learning_rate": 8.63273690254039e-06, "loss": 0.6823, "step": 12504 }, { "epoch": 1.6722385664616208, "grad_norm": 1.1712548732757568, "learning_rate": 8.631306728595804e-06, "loss": 0.5785, "step": 12505 }, { "epoch": 1.6723722920566997, "grad_norm": 1.3365895748138428, "learning_rate": 8.629876583180322e-06, "loss": 0.7561, "step": 12506 }, { "epoch": 1.6725060176517785, "grad_norm": 1.1151317358016968, "learning_rate": 8.628446466323748e-06, "loss": 0.6249, "step": 12507 }, { "epoch": 1.6726397432468576, "grad_norm": 1.3312451839447021, "learning_rate": 8.627016378055896e-06, "loss": 0.7013, "step": 12508 }, { "epoch": 1.6727734688419362, "grad_norm": 1.1077631711959839, "learning_rate": 8.625586318406574e-06, "loss": 0.6323, "step": 12509 }, { "epoch": 1.6729071944370153, "grad_norm": 1.2349666357040405, "learning_rate": 8.624156287405591e-06, "loss": 0.7314, "step": 12510 }, { "epoch": 1.6730409200320941, "grad_norm": 1.3365390300750732, "learning_rate": 8.622726285082753e-06, "loss": 0.7739, "step": 12511 }, { "epoch": 1.673174645627173, "grad_norm": 1.3835841417312622, "learning_rate": 8.621296311467868e-06, "loss": 0.6535, "step": 12512 }, { "epoch": 1.673308371222252, "grad_norm": 1.306641936302185, "learning_rate": 8.61986636659074e-06, "loss": 0.7294, "step": 12513 }, { "epoch": 1.6734420968173307, "grad_norm": 1.2207576036453247, "learning_rate": 8.618436450481182e-06, "loss": 0.76, "step": 12514 }, { "epoch": 1.6735758224124098, "grad_norm": 1.0595773458480835, "learning_rate": 8.617006563168986e-06, "loss": 0.6388, "step": 12515 }, { "epoch": 1.6737095480074886, "grad_norm": 1.2052029371261597, "learning_rate": 8.615576704683972e-06, "loss": 0.7508, "step": 12516 }, { "epoch": 1.6738432736025675, "grad_norm": 1.2362549304962158, "learning_rate": 8.614146875055933e-06, "loss": 0.6894, "step": 12517 }, { "epoch": 1.6739769991976465, "grad_norm": 1.2581454515457153, "learning_rate": 8.612717074314677e-06, "loss": 0.7011, "step": 12518 }, { "epoch": 1.6741107247927254, "grad_norm": 1.2816431522369385, "learning_rate": 8.611287302490008e-06, "loss": 0.7014, "step": 12519 }, { "epoch": 1.6742444503878042, "grad_norm": 1.2089641094207764, "learning_rate": 8.609857559611723e-06, "loss": 0.6913, "step": 12520 }, { "epoch": 1.674378175982883, "grad_norm": 1.356865644454956, "learning_rate": 8.608427845709632e-06, "loss": 0.75, "step": 12521 }, { "epoch": 1.674511901577962, "grad_norm": 1.489906668663025, "learning_rate": 8.60699816081353e-06, "loss": 0.7448, "step": 12522 }, { "epoch": 1.674645627173041, "grad_norm": 1.4315907955169678, "learning_rate": 8.605568504953213e-06, "loss": 0.6496, "step": 12523 }, { "epoch": 1.6747793527681198, "grad_norm": 1.2465261220932007, "learning_rate": 8.60413887815849e-06, "loss": 0.6653, "step": 12524 }, { "epoch": 1.6749130783631987, "grad_norm": 1.1621308326721191, "learning_rate": 8.602709280459156e-06, "loss": 0.6422, "step": 12525 }, { "epoch": 1.6750468039582778, "grad_norm": 1.2785048484802246, "learning_rate": 8.60127971188501e-06, "loss": 0.7162, "step": 12526 }, { "epoch": 1.6751805295533564, "grad_norm": 1.2701060771942139, "learning_rate": 8.599850172465851e-06, "loss": 0.7648, "step": 12527 }, { "epoch": 1.6753142551484355, "grad_norm": 1.3403438329696655, "learning_rate": 8.598420662231473e-06, "loss": 0.7635, "step": 12528 }, { "epoch": 1.6754479807435143, "grad_norm": 1.293044924736023, "learning_rate": 8.596991181211679e-06, "loss": 0.7371, "step": 12529 }, { "epoch": 1.6755817063385932, "grad_norm": 1.1402117013931274, "learning_rate": 8.595561729436257e-06, "loss": 0.7078, "step": 12530 }, { "epoch": 1.6757154319336722, "grad_norm": 1.3669507503509521, "learning_rate": 8.594132306935008e-06, "loss": 0.7872, "step": 12531 }, { "epoch": 1.6758491575287509, "grad_norm": 1.2211463451385498, "learning_rate": 8.592702913737727e-06, "loss": 0.5982, "step": 12532 }, { "epoch": 1.67598288312383, "grad_norm": 1.303511142730713, "learning_rate": 8.591273549874204e-06, "loss": 0.7535, "step": 12533 }, { "epoch": 1.6761166087189088, "grad_norm": 1.1747208833694458, "learning_rate": 8.58984421537424e-06, "loss": 0.7229, "step": 12534 }, { "epoch": 1.6762503343139876, "grad_norm": 1.1237784624099731, "learning_rate": 8.588414910267623e-06, "loss": 0.6008, "step": 12535 }, { "epoch": 1.6763840599090667, "grad_norm": 1.167698621749878, "learning_rate": 8.586985634584145e-06, "loss": 0.6962, "step": 12536 }, { "epoch": 1.6765177855041455, "grad_norm": 1.167222499847412, "learning_rate": 8.5855563883536e-06, "loss": 0.6244, "step": 12537 }, { "epoch": 1.6766515110992244, "grad_norm": 1.2443809509277344, "learning_rate": 8.58412717160578e-06, "loss": 0.6844, "step": 12538 }, { "epoch": 1.6767852366943032, "grad_norm": 1.1030668020248413, "learning_rate": 8.582697984370471e-06, "loss": 0.5852, "step": 12539 }, { "epoch": 1.676918962289382, "grad_norm": 1.244611382484436, "learning_rate": 8.58126882667747e-06, "loss": 0.721, "step": 12540 }, { "epoch": 1.6770526878844612, "grad_norm": 1.421364665031433, "learning_rate": 8.579839698556558e-06, "loss": 0.7606, "step": 12541 }, { "epoch": 1.67718641347954, "grad_norm": 1.2461024522781372, "learning_rate": 8.578410600037533e-06, "loss": 0.657, "step": 12542 }, { "epoch": 1.6773201390746189, "grad_norm": 1.1580002307891846, "learning_rate": 8.576981531150177e-06, "loss": 0.6402, "step": 12543 }, { "epoch": 1.677453864669698, "grad_norm": 1.2995128631591797, "learning_rate": 8.57555249192428e-06, "loss": 0.6476, "step": 12544 }, { "epoch": 1.6775875902647766, "grad_norm": 1.1639846563339233, "learning_rate": 8.574123482389627e-06, "loss": 0.706, "step": 12545 }, { "epoch": 1.6777213158598556, "grad_norm": 1.1439696550369263, "learning_rate": 8.572694502576009e-06, "loss": 0.6584, "step": 12546 }, { "epoch": 1.6778550414549345, "grad_norm": 1.180978775024414, "learning_rate": 8.571265552513205e-06, "loss": 0.6606, "step": 12547 }, { "epoch": 1.6779887670500133, "grad_norm": 1.2616256475448608, "learning_rate": 8.569836632231005e-06, "loss": 0.6756, "step": 12548 }, { "epoch": 1.6781224926450924, "grad_norm": 1.2243560552597046, "learning_rate": 8.568407741759188e-06, "loss": 0.6823, "step": 12549 }, { "epoch": 1.678256218240171, "grad_norm": 1.2548588514328003, "learning_rate": 8.566978881127544e-06, "loss": 0.6838, "step": 12550 }, { "epoch": 1.67838994383525, "grad_norm": 1.053565502166748, "learning_rate": 8.565550050365858e-06, "loss": 0.6704, "step": 12551 }, { "epoch": 1.678523669430329, "grad_norm": 1.2140696048736572, "learning_rate": 8.564121249503901e-06, "loss": 0.6951, "step": 12552 }, { "epoch": 1.6786573950254078, "grad_norm": 1.1624490022659302, "learning_rate": 8.562692478571469e-06, "loss": 0.6082, "step": 12553 }, { "epoch": 1.6787911206204869, "grad_norm": 1.2249689102172852, "learning_rate": 8.561263737598338e-06, "loss": 0.7459, "step": 12554 }, { "epoch": 1.6789248462155657, "grad_norm": 1.3606446981430054, "learning_rate": 8.559835026614281e-06, "loss": 0.7029, "step": 12555 }, { "epoch": 1.6790585718106446, "grad_norm": 1.2434829473495483, "learning_rate": 8.558406345649088e-06, "loss": 0.6557, "step": 12556 }, { "epoch": 1.6791922974057236, "grad_norm": 1.2610472440719604, "learning_rate": 8.556977694732535e-06, "loss": 0.6609, "step": 12557 }, { "epoch": 1.6793260230008022, "grad_norm": 1.1929535865783691, "learning_rate": 8.555549073894403e-06, "loss": 0.7161, "step": 12558 }, { "epoch": 1.6794597485958813, "grad_norm": 1.270406723022461, "learning_rate": 8.554120483164467e-06, "loss": 0.651, "step": 12559 }, { "epoch": 1.6795934741909602, "grad_norm": 1.202677607536316, "learning_rate": 8.552691922572505e-06, "loss": 0.6615, "step": 12560 }, { "epoch": 1.679727199786039, "grad_norm": 1.149090051651001, "learning_rate": 8.551263392148298e-06, "loss": 0.6596, "step": 12561 }, { "epoch": 1.679860925381118, "grad_norm": 1.2523561716079712, "learning_rate": 8.549834891921616e-06, "loss": 0.7031, "step": 12562 }, { "epoch": 1.6799946509761967, "grad_norm": 1.2160288095474243, "learning_rate": 8.54840642192224e-06, "loss": 0.6403, "step": 12563 }, { "epoch": 1.6801283765712758, "grad_norm": 1.4041779041290283, "learning_rate": 8.54697798217994e-06, "loss": 0.7507, "step": 12564 }, { "epoch": 1.6802621021663546, "grad_norm": 1.1235220432281494, "learning_rate": 8.545549572724496e-06, "loss": 0.6201, "step": 12565 }, { "epoch": 1.6803958277614335, "grad_norm": 1.2747966051101685, "learning_rate": 8.544121193585681e-06, "loss": 0.7784, "step": 12566 }, { "epoch": 1.6805295533565126, "grad_norm": 1.2559876441955566, "learning_rate": 8.542692844793267e-06, "loss": 0.7278, "step": 12567 }, { "epoch": 1.6806632789515912, "grad_norm": 1.1781076192855835, "learning_rate": 8.541264526377021e-06, "loss": 0.7039, "step": 12568 }, { "epoch": 1.6807970045466702, "grad_norm": 1.2719155550003052, "learning_rate": 8.539836238366724e-06, "loss": 0.6998, "step": 12569 }, { "epoch": 1.680930730141749, "grad_norm": 1.284642219543457, "learning_rate": 8.538407980792144e-06, "loss": 0.7458, "step": 12570 }, { "epoch": 1.681064455736828, "grad_norm": 1.3887240886688232, "learning_rate": 8.536979753683046e-06, "loss": 0.7413, "step": 12571 }, { "epoch": 1.681198181331907, "grad_norm": 1.1581352949142456, "learning_rate": 8.535551557069211e-06, "loss": 0.6261, "step": 12572 }, { "epoch": 1.6813319069269859, "grad_norm": 1.2411030530929565, "learning_rate": 8.534123390980398e-06, "loss": 0.6508, "step": 12573 }, { "epoch": 1.6814656325220647, "grad_norm": 1.2681448459625244, "learning_rate": 8.532695255446384e-06, "loss": 0.6787, "step": 12574 }, { "epoch": 1.6815993581171438, "grad_norm": 1.2097887992858887, "learning_rate": 8.531267150496932e-06, "loss": 0.6812, "step": 12575 }, { "epoch": 1.6817330837122224, "grad_norm": 1.1672911643981934, "learning_rate": 8.52983907616181e-06, "loss": 0.6357, "step": 12576 }, { "epoch": 1.6818668093073015, "grad_norm": 1.4323292970657349, "learning_rate": 8.528411032470786e-06, "loss": 0.7375, "step": 12577 }, { "epoch": 1.6820005349023803, "grad_norm": 1.1874772310256958, "learning_rate": 8.526983019453624e-06, "loss": 0.6848, "step": 12578 }, { "epoch": 1.6821342604974592, "grad_norm": 1.3499449491500854, "learning_rate": 8.525555037140095e-06, "loss": 0.7648, "step": 12579 }, { "epoch": 1.6822679860925382, "grad_norm": 1.2850762605667114, "learning_rate": 8.524127085559961e-06, "loss": 0.7349, "step": 12580 }, { "epoch": 1.6824017116876169, "grad_norm": 1.1222763061523438, "learning_rate": 8.522699164742981e-06, "loss": 0.6856, "step": 12581 }, { "epoch": 1.682535437282696, "grad_norm": 1.3772772550582886, "learning_rate": 8.521271274718928e-06, "loss": 0.7056, "step": 12582 }, { "epoch": 1.6826691628777748, "grad_norm": 1.089009404182434, "learning_rate": 8.519843415517557e-06, "loss": 0.5965, "step": 12583 }, { "epoch": 1.6828028884728536, "grad_norm": 1.1247016191482544, "learning_rate": 8.518415587168634e-06, "loss": 0.6257, "step": 12584 }, { "epoch": 1.6829366140679327, "grad_norm": 1.3030263185501099, "learning_rate": 8.516987789701923e-06, "loss": 0.6541, "step": 12585 }, { "epoch": 1.6830703396630113, "grad_norm": 1.338673710823059, "learning_rate": 8.515560023147177e-06, "loss": 0.7845, "step": 12586 }, { "epoch": 1.6832040652580904, "grad_norm": 1.199763298034668, "learning_rate": 8.514132287534166e-06, "loss": 0.6634, "step": 12587 }, { "epoch": 1.6833377908531693, "grad_norm": 1.4011178016662598, "learning_rate": 8.512704582892646e-06, "loss": 0.7669, "step": 12588 }, { "epoch": 1.683471516448248, "grad_norm": 1.208861231803894, "learning_rate": 8.511276909252374e-06, "loss": 0.7175, "step": 12589 }, { "epoch": 1.6836052420433272, "grad_norm": 1.1918777227401733, "learning_rate": 8.509849266643112e-06, "loss": 0.7273, "step": 12590 }, { "epoch": 1.683738967638406, "grad_norm": 1.3381962776184082, "learning_rate": 8.508421655094618e-06, "loss": 0.6954, "step": 12591 }, { "epoch": 1.6838726932334849, "grad_norm": 1.1858441829681396, "learning_rate": 8.50699407463664e-06, "loss": 0.6641, "step": 12592 }, { "epoch": 1.684006418828564, "grad_norm": 1.0837119817733765, "learning_rate": 8.50556652529895e-06, "loss": 0.5883, "step": 12593 }, { "epoch": 1.6841401444236426, "grad_norm": 1.244067907333374, "learning_rate": 8.50413900711129e-06, "loss": 0.689, "step": 12594 }, { "epoch": 1.6842738700187216, "grad_norm": 1.254588007926941, "learning_rate": 8.502711520103425e-06, "loss": 0.6764, "step": 12595 }, { "epoch": 1.6844075956138005, "grad_norm": 1.3050950765609741, "learning_rate": 8.501284064305104e-06, "loss": 0.7788, "step": 12596 }, { "epoch": 1.6845413212088793, "grad_norm": 1.2358989715576172, "learning_rate": 8.49985663974608e-06, "loss": 0.7262, "step": 12597 }, { "epoch": 1.6846750468039584, "grad_norm": 1.1740919351577759, "learning_rate": 8.498429246456112e-06, "loss": 0.6798, "step": 12598 }, { "epoch": 1.684808772399037, "grad_norm": 1.3470181226730347, "learning_rate": 8.49700188446495e-06, "loss": 0.7596, "step": 12599 }, { "epoch": 1.684942497994116, "grad_norm": 1.2600083351135254, "learning_rate": 8.495574553802343e-06, "loss": 0.7361, "step": 12600 }, { "epoch": 1.685076223589195, "grad_norm": 1.1840137243270874, "learning_rate": 8.494147254498045e-06, "loss": 0.702, "step": 12601 }, { "epoch": 1.6852099491842738, "grad_norm": 1.1932307481765747, "learning_rate": 8.492719986581808e-06, "loss": 0.6086, "step": 12602 }, { "epoch": 1.6853436747793529, "grad_norm": 1.312119722366333, "learning_rate": 8.49129275008338e-06, "loss": 0.8095, "step": 12603 }, { "epoch": 1.6854774003744315, "grad_norm": 1.1024538278579712, "learning_rate": 8.489865545032512e-06, "loss": 0.7281, "step": 12604 }, { "epoch": 1.6856111259695106, "grad_norm": 1.209395408630371, "learning_rate": 8.488438371458949e-06, "loss": 0.6289, "step": 12605 }, { "epoch": 1.6857448515645894, "grad_norm": 1.4207433462142944, "learning_rate": 8.487011229392445e-06, "loss": 0.7006, "step": 12606 }, { "epoch": 1.6858785771596683, "grad_norm": 1.2793669700622559, "learning_rate": 8.485584118862743e-06, "loss": 0.7348, "step": 12607 }, { "epoch": 1.6860123027547473, "grad_norm": 1.2128007411956787, "learning_rate": 8.48415703989959e-06, "loss": 0.6976, "step": 12608 }, { "epoch": 1.6861460283498262, "grad_norm": 1.208802342414856, "learning_rate": 8.482729992532733e-06, "loss": 0.7244, "step": 12609 }, { "epoch": 1.686279753944905, "grad_norm": 1.2582327127456665, "learning_rate": 8.481302976791917e-06, "loss": 0.7273, "step": 12610 }, { "epoch": 1.686413479539984, "grad_norm": 1.1231346130371094, "learning_rate": 8.47987599270689e-06, "loss": 0.6718, "step": 12611 }, { "epoch": 1.6865472051350627, "grad_norm": 1.156197190284729, "learning_rate": 8.478449040307393e-06, "loss": 0.6107, "step": 12612 }, { "epoch": 1.6866809307301418, "grad_norm": 1.1130750179290771, "learning_rate": 8.477022119623165e-06, "loss": 0.6636, "step": 12613 }, { "epoch": 1.6868146563252207, "grad_norm": 1.0320173501968384, "learning_rate": 8.47559523068396e-06, "loss": 0.5917, "step": 12614 }, { "epoch": 1.6869483819202995, "grad_norm": 1.305985450744629, "learning_rate": 8.47416837351951e-06, "loss": 0.7342, "step": 12615 }, { "epoch": 1.6870821075153786, "grad_norm": 1.1145660877227783, "learning_rate": 8.472741548159559e-06, "loss": 0.6901, "step": 12616 }, { "epoch": 1.6872158331104572, "grad_norm": 1.1806029081344604, "learning_rate": 8.471314754633853e-06, "loss": 0.7162, "step": 12617 }, { "epoch": 1.6873495587055363, "grad_norm": 1.1462332010269165, "learning_rate": 8.469887992972124e-06, "loss": 0.6856, "step": 12618 }, { "epoch": 1.6874832843006151, "grad_norm": 1.2312220335006714, "learning_rate": 8.468461263204118e-06, "loss": 0.7555, "step": 12619 }, { "epoch": 1.687617009895694, "grad_norm": 1.1212772130966187, "learning_rate": 8.467034565359571e-06, "loss": 0.6092, "step": 12620 }, { "epoch": 1.687750735490773, "grad_norm": 1.1769607067108154, "learning_rate": 8.465607899468222e-06, "loss": 0.6326, "step": 12621 }, { "epoch": 1.6878844610858519, "grad_norm": 1.2133753299713135, "learning_rate": 8.464181265559807e-06, "loss": 0.6821, "step": 12622 }, { "epoch": 1.6880181866809307, "grad_norm": 1.3872541189193726, "learning_rate": 8.462754663664067e-06, "loss": 0.7102, "step": 12623 }, { "epoch": 1.6881519122760096, "grad_norm": 1.1456184387207031, "learning_rate": 8.46132809381073e-06, "loss": 0.6554, "step": 12624 }, { "epoch": 1.6882856378710884, "grad_norm": 1.2878984212875366, "learning_rate": 8.459901556029541e-06, "loss": 0.7308, "step": 12625 }, { "epoch": 1.6884193634661675, "grad_norm": 1.4174041748046875, "learning_rate": 8.458475050350227e-06, "loss": 0.7562, "step": 12626 }, { "epoch": 1.6885530890612463, "grad_norm": 1.1257339715957642, "learning_rate": 8.457048576802529e-06, "loss": 0.6233, "step": 12627 }, { "epoch": 1.6886868146563252, "grad_norm": 1.346755027770996, "learning_rate": 8.455622135416175e-06, "loss": 0.7294, "step": 12628 }, { "epoch": 1.6888205402514043, "grad_norm": 1.2853269577026367, "learning_rate": 8.454195726220898e-06, "loss": 0.7373, "step": 12629 }, { "epoch": 1.688954265846483, "grad_norm": 1.1935268640518188, "learning_rate": 8.452769349246434e-06, "loss": 0.7396, "step": 12630 }, { "epoch": 1.689087991441562, "grad_norm": 1.438927173614502, "learning_rate": 8.451343004522515e-06, "loss": 0.8071, "step": 12631 }, { "epoch": 1.6892217170366408, "grad_norm": 1.1363568305969238, "learning_rate": 8.449916692078863e-06, "loss": 0.6644, "step": 12632 }, { "epoch": 1.6893554426317197, "grad_norm": 1.1978378295898438, "learning_rate": 8.44849041194522e-06, "loss": 0.6382, "step": 12633 }, { "epoch": 1.6894891682267987, "grad_norm": 1.1633226871490479, "learning_rate": 8.447064164151305e-06, "loss": 0.6297, "step": 12634 }, { "epoch": 1.6896228938218774, "grad_norm": 1.2863752841949463, "learning_rate": 8.445637948726854e-06, "loss": 0.7954, "step": 12635 }, { "epoch": 1.6897566194169564, "grad_norm": 1.092232346534729, "learning_rate": 8.444211765701594e-06, "loss": 0.6824, "step": 12636 }, { "epoch": 1.6898903450120353, "grad_norm": 1.1640865802764893, "learning_rate": 8.442785615105247e-06, "loss": 0.6914, "step": 12637 }, { "epoch": 1.6900240706071141, "grad_norm": 1.2823617458343506, "learning_rate": 8.441359496967549e-06, "loss": 0.6636, "step": 12638 }, { "epoch": 1.6901577962021932, "grad_norm": 1.250607967376709, "learning_rate": 8.439933411318217e-06, "loss": 0.7108, "step": 12639 }, { "epoch": 1.690291521797272, "grad_norm": 1.2313709259033203, "learning_rate": 8.43850735818698e-06, "loss": 0.7316, "step": 12640 }, { "epoch": 1.690425247392351, "grad_norm": 1.164263129234314, "learning_rate": 8.437081337603566e-06, "loss": 0.5821, "step": 12641 }, { "epoch": 1.6905589729874297, "grad_norm": 1.2083113193511963, "learning_rate": 8.43565534959769e-06, "loss": 0.7066, "step": 12642 }, { "epoch": 1.6906926985825086, "grad_norm": 1.2969449758529663, "learning_rate": 8.434229394199089e-06, "loss": 0.6443, "step": 12643 }, { "epoch": 1.6908264241775877, "grad_norm": 1.284295916557312, "learning_rate": 8.432803471437476e-06, "loss": 0.7938, "step": 12644 }, { "epoch": 1.6909601497726665, "grad_norm": 1.2096102237701416, "learning_rate": 8.43137758134257e-06, "loss": 0.6735, "step": 12645 }, { "epoch": 1.6910938753677454, "grad_norm": 1.219283103942871, "learning_rate": 8.429951723944103e-06, "loss": 0.6859, "step": 12646 }, { "epoch": 1.6912276009628244, "grad_norm": 1.1531134843826294, "learning_rate": 8.428525899271787e-06, "loss": 0.6339, "step": 12647 }, { "epoch": 1.691361326557903, "grad_norm": 1.2346347570419312, "learning_rate": 8.427100107355344e-06, "loss": 0.6306, "step": 12648 }, { "epoch": 1.6914950521529821, "grad_norm": 1.2428038120269775, "learning_rate": 8.425674348224498e-06, "loss": 0.6744, "step": 12649 }, { "epoch": 1.691628777748061, "grad_norm": 1.2464931011199951, "learning_rate": 8.424248621908959e-06, "loss": 0.744, "step": 12650 }, { "epoch": 1.6917625033431398, "grad_norm": 1.3033384084701538, "learning_rate": 8.422822928438453e-06, "loss": 0.6573, "step": 12651 }, { "epoch": 1.691896228938219, "grad_norm": 1.2556694746017456, "learning_rate": 8.421397267842693e-06, "loss": 0.7233, "step": 12652 }, { "epoch": 1.6920299545332975, "grad_norm": 1.1637213230133057, "learning_rate": 8.419971640151397e-06, "loss": 0.6873, "step": 12653 }, { "epoch": 1.6921636801283766, "grad_norm": 1.3313084840774536, "learning_rate": 8.41854604539428e-06, "loss": 0.8026, "step": 12654 }, { "epoch": 1.6922974057234554, "grad_norm": 1.3373881578445435, "learning_rate": 8.417120483601058e-06, "loss": 0.7479, "step": 12655 }, { "epoch": 1.6924311313185343, "grad_norm": 1.1625293493270874, "learning_rate": 8.41569495480144e-06, "loss": 0.6275, "step": 12656 }, { "epoch": 1.6925648569136134, "grad_norm": 1.1874030828475952, "learning_rate": 8.414269459025152e-06, "loss": 0.656, "step": 12657 }, { "epoch": 1.6926985825086922, "grad_norm": 1.3021211624145508, "learning_rate": 8.412843996301894e-06, "loss": 0.7917, "step": 12658 }, { "epoch": 1.692832308103771, "grad_norm": 1.182937502861023, "learning_rate": 8.411418566661387e-06, "loss": 0.6823, "step": 12659 }, { "epoch": 1.6929660336988501, "grad_norm": 1.2208675146102905, "learning_rate": 8.40999317013334e-06, "loss": 0.6651, "step": 12660 }, { "epoch": 1.6930997592939288, "grad_norm": 1.136149525642395, "learning_rate": 8.408567806747461e-06, "loss": 0.6997, "step": 12661 }, { "epoch": 1.6932334848890078, "grad_norm": 1.355196475982666, "learning_rate": 8.407142476533468e-06, "loss": 0.6991, "step": 12662 }, { "epoch": 1.6933672104840867, "grad_norm": 1.408139944076538, "learning_rate": 8.40571717952106e-06, "loss": 0.7221, "step": 12663 }, { "epoch": 1.6935009360791655, "grad_norm": 1.3606435060501099, "learning_rate": 8.404291915739958e-06, "loss": 0.7651, "step": 12664 }, { "epoch": 1.6936346616742446, "grad_norm": 1.309222936630249, "learning_rate": 8.402866685219863e-06, "loss": 0.7209, "step": 12665 }, { "epoch": 1.6937683872693232, "grad_norm": 1.214650273323059, "learning_rate": 8.401441487990478e-06, "loss": 0.7306, "step": 12666 }, { "epoch": 1.6939021128644023, "grad_norm": 1.4477800130844116, "learning_rate": 8.40001632408152e-06, "loss": 0.7424, "step": 12667 }, { "epoch": 1.6940358384594811, "grad_norm": 1.2210028171539307, "learning_rate": 8.398591193522691e-06, "loss": 0.7333, "step": 12668 }, { "epoch": 1.69416956405456, "grad_norm": 1.305587887763977, "learning_rate": 8.397166096343694e-06, "loss": 0.7119, "step": 12669 }, { "epoch": 1.694303289649639, "grad_norm": 1.28839910030365, "learning_rate": 8.39574103257424e-06, "loss": 0.7025, "step": 12670 }, { "epoch": 1.6944370152447177, "grad_norm": 1.1505221128463745, "learning_rate": 8.394316002244023e-06, "loss": 0.6137, "step": 12671 }, { "epoch": 1.6945707408397968, "grad_norm": 1.1304563283920288, "learning_rate": 8.392891005382756e-06, "loss": 0.6732, "step": 12672 }, { "epoch": 1.6947044664348756, "grad_norm": 1.3093501329421997, "learning_rate": 8.39146604202014e-06, "loss": 0.7788, "step": 12673 }, { "epoch": 1.6948381920299544, "grad_norm": 1.3738151788711548, "learning_rate": 8.39004111218587e-06, "loss": 0.7549, "step": 12674 }, { "epoch": 1.6949719176250335, "grad_norm": 1.198561668395996, "learning_rate": 8.388616215909657e-06, "loss": 0.7197, "step": 12675 }, { "epoch": 1.6951056432201124, "grad_norm": 1.2479665279388428, "learning_rate": 8.387191353221198e-06, "loss": 0.6396, "step": 12676 }, { "epoch": 1.6952393688151912, "grad_norm": 1.1282187700271606, "learning_rate": 8.385766524150187e-06, "loss": 0.6695, "step": 12677 }, { "epoch": 1.6953730944102703, "grad_norm": 1.2233806848526, "learning_rate": 8.384341728726333e-06, "loss": 0.7722, "step": 12678 }, { "epoch": 1.695506820005349, "grad_norm": 1.1995158195495605, "learning_rate": 8.382916966979326e-06, "loss": 0.6936, "step": 12679 }, { "epoch": 1.695640545600428, "grad_norm": 1.1774523258209229, "learning_rate": 8.381492238938868e-06, "loss": 0.6072, "step": 12680 }, { "epoch": 1.6957742711955068, "grad_norm": 1.2051258087158203, "learning_rate": 8.380067544634658e-06, "loss": 0.7088, "step": 12681 }, { "epoch": 1.6959079967905857, "grad_norm": 1.2617137432098389, "learning_rate": 8.378642884096386e-06, "loss": 0.8068, "step": 12682 }, { "epoch": 1.6960417223856648, "grad_norm": 1.1566762924194336, "learning_rate": 8.377218257353757e-06, "loss": 0.696, "step": 12683 }, { "epoch": 1.6961754479807434, "grad_norm": 1.3244025707244873, "learning_rate": 8.375793664436459e-06, "loss": 0.7538, "step": 12684 }, { "epoch": 1.6963091735758224, "grad_norm": 1.2162891626358032, "learning_rate": 8.374369105374183e-06, "loss": 0.7173, "step": 12685 }, { "epoch": 1.6964428991709013, "grad_norm": 1.3229069709777832, "learning_rate": 8.372944580196631e-06, "loss": 0.6716, "step": 12686 }, { "epoch": 1.6965766247659801, "grad_norm": 1.1752147674560547, "learning_rate": 8.37152008893349e-06, "loss": 0.6156, "step": 12687 }, { "epoch": 1.6967103503610592, "grad_norm": 1.2808023691177368, "learning_rate": 8.370095631614459e-06, "loss": 0.7706, "step": 12688 }, { "epoch": 1.6968440759561378, "grad_norm": 1.3911948204040527, "learning_rate": 8.368671208269224e-06, "loss": 0.763, "step": 12689 }, { "epoch": 1.696977801551217, "grad_norm": 1.2635115385055542, "learning_rate": 8.367246818927472e-06, "loss": 0.7614, "step": 12690 }, { "epoch": 1.6971115271462958, "grad_norm": 1.201280117034912, "learning_rate": 8.365822463618902e-06, "loss": 0.6999, "step": 12691 }, { "epoch": 1.6972452527413746, "grad_norm": 1.248910903930664, "learning_rate": 8.364398142373198e-06, "loss": 0.6224, "step": 12692 }, { "epoch": 1.6973789783364537, "grad_norm": 1.3601746559143066, "learning_rate": 8.362973855220046e-06, "loss": 0.791, "step": 12693 }, { "epoch": 1.6975127039315325, "grad_norm": 1.1967077255249023, "learning_rate": 8.361549602189145e-06, "loss": 0.5768, "step": 12694 }, { "epoch": 1.6976464295266114, "grad_norm": 1.216409683227539, "learning_rate": 8.360125383310167e-06, "loss": 0.7103, "step": 12695 }, { "epoch": 1.6977801551216904, "grad_norm": 1.1632540225982666, "learning_rate": 8.358701198612814e-06, "loss": 0.7192, "step": 12696 }, { "epoch": 1.697913880716769, "grad_norm": 1.1656478643417358, "learning_rate": 8.35727704812676e-06, "loss": 0.7105, "step": 12697 }, { "epoch": 1.6980476063118481, "grad_norm": 1.3913065195083618, "learning_rate": 8.355852931881692e-06, "loss": 0.7968, "step": 12698 }, { "epoch": 1.698181331906927, "grad_norm": 1.0954447984695435, "learning_rate": 8.354428849907298e-06, "loss": 0.6451, "step": 12699 }, { "epoch": 1.6983150575020058, "grad_norm": 1.303686499595642, "learning_rate": 8.353004802233262e-06, "loss": 0.7283, "step": 12700 }, { "epoch": 1.698448783097085, "grad_norm": 1.205492615699768, "learning_rate": 8.35158078888926e-06, "loss": 0.7608, "step": 12701 }, { "epoch": 1.6985825086921635, "grad_norm": 1.4294120073318481, "learning_rate": 8.350156809904984e-06, "loss": 0.7449, "step": 12702 }, { "epoch": 1.6987162342872426, "grad_norm": 1.3111544847488403, "learning_rate": 8.348732865310107e-06, "loss": 0.7288, "step": 12703 }, { "epoch": 1.6988499598823215, "grad_norm": 1.2754124402999878, "learning_rate": 8.347308955134317e-06, "loss": 0.6962, "step": 12704 }, { "epoch": 1.6989836854774003, "grad_norm": 1.3212411403656006, "learning_rate": 8.345885079407287e-06, "loss": 0.6674, "step": 12705 }, { "epoch": 1.6991174110724794, "grad_norm": 1.2182207107543945, "learning_rate": 8.3444612381587e-06, "loss": 0.657, "step": 12706 }, { "epoch": 1.699251136667558, "grad_norm": 1.2850276231765747, "learning_rate": 8.343037431418236e-06, "loss": 0.7699, "step": 12707 }, { "epoch": 1.699384862262637, "grad_norm": 1.2729556560516357, "learning_rate": 8.341613659215574e-06, "loss": 0.6862, "step": 12708 }, { "epoch": 1.699518587857716, "grad_norm": 1.2506012916564941, "learning_rate": 8.340189921580383e-06, "loss": 0.6737, "step": 12709 }, { "epoch": 1.6996523134527948, "grad_norm": 1.2971998453140259, "learning_rate": 8.338766218542348e-06, "loss": 0.7618, "step": 12710 }, { "epoch": 1.6997860390478738, "grad_norm": 1.3115055561065674, "learning_rate": 8.337342550131137e-06, "loss": 0.752, "step": 12711 }, { "epoch": 1.6999197646429527, "grad_norm": 1.2073111534118652, "learning_rate": 8.335918916376435e-06, "loss": 0.7266, "step": 12712 }, { "epoch": 1.7000534902380315, "grad_norm": 1.380050539970398, "learning_rate": 8.33449531730791e-06, "loss": 0.7149, "step": 12713 }, { "epoch": 1.7001872158331106, "grad_norm": 1.2507132291793823, "learning_rate": 8.333071752955233e-06, "loss": 0.7359, "step": 12714 }, { "epoch": 1.7003209414281892, "grad_norm": 1.1087751388549805, "learning_rate": 8.331648223348083e-06, "loss": 0.5877, "step": 12715 }, { "epoch": 1.7004546670232683, "grad_norm": 1.2438302040100098, "learning_rate": 8.330224728516132e-06, "loss": 0.681, "step": 12716 }, { "epoch": 1.7005883926183472, "grad_norm": 1.3261548280715942, "learning_rate": 8.328801268489043e-06, "loss": 0.7267, "step": 12717 }, { "epoch": 1.700722118213426, "grad_norm": 1.1668728590011597, "learning_rate": 8.327377843296493e-06, "loss": 0.6562, "step": 12718 }, { "epoch": 1.700855843808505, "grad_norm": 1.1999346017837524, "learning_rate": 8.325954452968152e-06, "loss": 0.6628, "step": 12719 }, { "epoch": 1.7009895694035837, "grad_norm": 1.2253391742706299, "learning_rate": 8.324531097533692e-06, "loss": 0.729, "step": 12720 }, { "epoch": 1.7011232949986628, "grad_norm": 1.3300126791000366, "learning_rate": 8.323107777022778e-06, "loss": 0.7465, "step": 12721 }, { "epoch": 1.7012570205937416, "grad_norm": 1.2687304019927979, "learning_rate": 8.321684491465072e-06, "loss": 0.6327, "step": 12722 }, { "epoch": 1.7013907461888205, "grad_norm": 1.4037764072418213, "learning_rate": 8.320261240890253e-06, "loss": 0.812, "step": 12723 }, { "epoch": 1.7015244717838995, "grad_norm": 1.3550457954406738, "learning_rate": 8.318838025327977e-06, "loss": 0.6976, "step": 12724 }, { "epoch": 1.7016581973789784, "grad_norm": 1.3242225646972656, "learning_rate": 8.317414844807915e-06, "loss": 0.7251, "step": 12725 }, { "epoch": 1.7017919229740572, "grad_norm": 1.2529979944229126, "learning_rate": 8.31599169935973e-06, "loss": 0.7169, "step": 12726 }, { "epoch": 1.701925648569136, "grad_norm": 1.2296323776245117, "learning_rate": 8.314568589013085e-06, "loss": 0.706, "step": 12727 }, { "epoch": 1.702059374164215, "grad_norm": 1.1020498275756836, "learning_rate": 8.31314551379765e-06, "loss": 0.6014, "step": 12728 }, { "epoch": 1.702193099759294, "grad_norm": 1.1587034463882446, "learning_rate": 8.311722473743082e-06, "loss": 0.729, "step": 12729 }, { "epoch": 1.7023268253543729, "grad_norm": 1.3119348287582397, "learning_rate": 8.31029946887904e-06, "loss": 0.7195, "step": 12730 }, { "epoch": 1.7024605509494517, "grad_norm": 1.3630188703536987, "learning_rate": 8.308876499235189e-06, "loss": 0.6822, "step": 12731 }, { "epoch": 1.7025942765445308, "grad_norm": 1.112807035446167, "learning_rate": 8.307453564841193e-06, "loss": 0.6474, "step": 12732 }, { "epoch": 1.7027280021396094, "grad_norm": 1.187903881072998, "learning_rate": 8.3060306657267e-06, "loss": 0.7485, "step": 12733 }, { "epoch": 1.7028617277346885, "grad_norm": 1.320151925086975, "learning_rate": 8.304607801921385e-06, "loss": 0.6929, "step": 12734 }, { "epoch": 1.7029954533297673, "grad_norm": 1.2018464803695679, "learning_rate": 8.303184973454893e-06, "loss": 0.7194, "step": 12735 }, { "epoch": 1.7031291789248462, "grad_norm": 1.2483669519424438, "learning_rate": 8.301762180356891e-06, "loss": 0.7203, "step": 12736 }, { "epoch": 1.7032629045199252, "grad_norm": 1.119397759437561, "learning_rate": 8.300339422657027e-06, "loss": 0.6704, "step": 12737 }, { "epoch": 1.7033966301150039, "grad_norm": 1.1859266757965088, "learning_rate": 8.29891670038496e-06, "loss": 0.6505, "step": 12738 }, { "epoch": 1.703530355710083, "grad_norm": 1.1386950016021729, "learning_rate": 8.297494013570354e-06, "loss": 0.6685, "step": 12739 }, { "epoch": 1.7036640813051618, "grad_norm": 1.216395378112793, "learning_rate": 8.296071362242853e-06, "loss": 0.7104, "step": 12740 }, { "epoch": 1.7037978069002406, "grad_norm": 1.215865969657898, "learning_rate": 8.29464874643211e-06, "loss": 0.6096, "step": 12741 }, { "epoch": 1.7039315324953197, "grad_norm": 1.287767767906189, "learning_rate": 8.293226166167788e-06, "loss": 0.7321, "step": 12742 }, { "epoch": 1.7040652580903985, "grad_norm": 1.1900346279144287, "learning_rate": 8.291803621479528e-06, "loss": 0.6479, "step": 12743 }, { "epoch": 1.7041989836854774, "grad_norm": 1.2542630434036255, "learning_rate": 8.290381112396989e-06, "loss": 0.6526, "step": 12744 }, { "epoch": 1.7043327092805562, "grad_norm": 1.2381327152252197, "learning_rate": 8.288958638949822e-06, "loss": 0.6047, "step": 12745 }, { "epoch": 1.704466434875635, "grad_norm": 1.3802180290222168, "learning_rate": 8.28753620116767e-06, "loss": 0.7326, "step": 12746 }, { "epoch": 1.7046001604707142, "grad_norm": 1.191171407699585, "learning_rate": 8.286113799080192e-06, "loss": 0.689, "step": 12747 }, { "epoch": 1.704733886065793, "grad_norm": 1.2118676900863647, "learning_rate": 8.284691432717028e-06, "loss": 0.7179, "step": 12748 }, { "epoch": 1.7048676116608719, "grad_norm": 1.221256971359253, "learning_rate": 8.283269102107832e-06, "loss": 0.6396, "step": 12749 }, { "epoch": 1.705001337255951, "grad_norm": 1.2459454536437988, "learning_rate": 8.281846807282248e-06, "loss": 0.6516, "step": 12750 }, { "epoch": 1.7051350628510296, "grad_norm": 1.3179590702056885, "learning_rate": 8.280424548269922e-06, "loss": 0.6838, "step": 12751 }, { "epoch": 1.7052687884461086, "grad_norm": 1.1464793682098389, "learning_rate": 8.279002325100505e-06, "loss": 0.6699, "step": 12752 }, { "epoch": 1.7054025140411875, "grad_norm": 1.2682521343231201, "learning_rate": 8.277580137803636e-06, "loss": 0.705, "step": 12753 }, { "epoch": 1.7055362396362663, "grad_norm": 1.2531061172485352, "learning_rate": 8.276157986408959e-06, "loss": 0.6312, "step": 12754 }, { "epoch": 1.7056699652313454, "grad_norm": 1.2168264389038086, "learning_rate": 8.274735870946122e-06, "loss": 0.6488, "step": 12755 }, { "epoch": 1.705803690826424, "grad_norm": 1.4569647312164307, "learning_rate": 8.273313791444762e-06, "loss": 0.8013, "step": 12756 }, { "epoch": 1.705937416421503, "grad_norm": 1.2833836078643799, "learning_rate": 8.271891747934524e-06, "loss": 0.6787, "step": 12757 }, { "epoch": 1.706071142016582, "grad_norm": 1.269630789756775, "learning_rate": 8.270469740445052e-06, "loss": 0.6702, "step": 12758 }, { "epoch": 1.7062048676116608, "grad_norm": 1.4853568077087402, "learning_rate": 8.269047769005978e-06, "loss": 0.773, "step": 12759 }, { "epoch": 1.7063385932067399, "grad_norm": 1.2489780187606812, "learning_rate": 8.267625833646952e-06, "loss": 0.7471, "step": 12760 }, { "epoch": 1.7064723188018187, "grad_norm": 1.2595922946929932, "learning_rate": 8.266203934397608e-06, "loss": 0.6356, "step": 12761 }, { "epoch": 1.7066060443968976, "grad_norm": 1.1662458181381226, "learning_rate": 8.26478207128758e-06, "loss": 0.6659, "step": 12762 }, { "epoch": 1.7067397699919766, "grad_norm": 1.3388166427612305, "learning_rate": 8.26336024434651e-06, "loss": 0.7442, "step": 12763 }, { "epoch": 1.7068734955870553, "grad_norm": 1.1608309745788574, "learning_rate": 8.261938453604033e-06, "loss": 0.6793, "step": 12764 }, { "epoch": 1.7070072211821343, "grad_norm": 1.0983465909957886, "learning_rate": 8.26051669908979e-06, "loss": 0.598, "step": 12765 }, { "epoch": 1.7071409467772132, "grad_norm": 1.1819957494735718, "learning_rate": 8.259094980833411e-06, "loss": 0.6322, "step": 12766 }, { "epoch": 1.707274672372292, "grad_norm": 1.0930172204971313, "learning_rate": 8.257673298864528e-06, "loss": 0.6791, "step": 12767 }, { "epoch": 1.707408397967371, "grad_norm": 1.575594425201416, "learning_rate": 8.256251653212783e-06, "loss": 0.7685, "step": 12768 }, { "epoch": 1.7075421235624497, "grad_norm": 1.1964302062988281, "learning_rate": 8.254830043907799e-06, "loss": 0.5386, "step": 12769 }, { "epoch": 1.7076758491575288, "grad_norm": 1.2177408933639526, "learning_rate": 8.253408470979212e-06, "loss": 0.6507, "step": 12770 }, { "epoch": 1.7078095747526076, "grad_norm": 1.3633873462677002, "learning_rate": 8.251986934456658e-06, "loss": 0.6868, "step": 12771 }, { "epoch": 1.7079433003476865, "grad_norm": 1.1916502714157104, "learning_rate": 8.25056543436976e-06, "loss": 0.6643, "step": 12772 }, { "epoch": 1.7080770259427656, "grad_norm": 1.1619648933410645, "learning_rate": 8.249143970748155e-06, "loss": 0.7083, "step": 12773 }, { "epoch": 1.7082107515378442, "grad_norm": 1.3134262561798096, "learning_rate": 8.24772254362147e-06, "loss": 0.6981, "step": 12774 }, { "epoch": 1.7083444771329233, "grad_norm": 1.3022657632827759, "learning_rate": 8.246301153019326e-06, "loss": 0.7662, "step": 12775 }, { "epoch": 1.708478202728002, "grad_norm": 1.261699914932251, "learning_rate": 8.24487979897136e-06, "loss": 0.6897, "step": 12776 }, { "epoch": 1.708611928323081, "grad_norm": 1.2626806497573853, "learning_rate": 8.243458481507195e-06, "loss": 0.7069, "step": 12777 }, { "epoch": 1.70874565391816, "grad_norm": 1.2278270721435547, "learning_rate": 8.242037200656455e-06, "loss": 0.6861, "step": 12778 }, { "epoch": 1.7088793795132389, "grad_norm": 1.3510301113128662, "learning_rate": 8.24061595644877e-06, "loss": 0.8066, "step": 12779 }, { "epoch": 1.7090131051083177, "grad_norm": 1.2224266529083252, "learning_rate": 8.23919474891376e-06, "loss": 0.7114, "step": 12780 }, { "epoch": 1.7091468307033968, "grad_norm": 1.255384922027588, "learning_rate": 8.237773578081052e-06, "loss": 0.7171, "step": 12781 }, { "epoch": 1.7092805562984754, "grad_norm": 1.1592470407485962, "learning_rate": 8.236352443980268e-06, "loss": 0.6688, "step": 12782 }, { "epoch": 1.7094142818935545, "grad_norm": 1.289285659790039, "learning_rate": 8.234931346641025e-06, "loss": 0.7049, "step": 12783 }, { "epoch": 1.7095480074886333, "grad_norm": 1.2794567346572876, "learning_rate": 8.233510286092955e-06, "loss": 0.7764, "step": 12784 }, { "epoch": 1.7096817330837122, "grad_norm": 1.1646713018417358, "learning_rate": 8.232089262365672e-06, "loss": 0.6169, "step": 12785 }, { "epoch": 1.7098154586787913, "grad_norm": 1.176916480064392, "learning_rate": 8.230668275488794e-06, "loss": 0.6895, "step": 12786 }, { "epoch": 1.7099491842738699, "grad_norm": 1.2405433654785156, "learning_rate": 8.229247325491945e-06, "loss": 0.6589, "step": 12787 }, { "epoch": 1.710082909868949, "grad_norm": 1.5124691724777222, "learning_rate": 8.227826412404737e-06, "loss": 0.684, "step": 12788 }, { "epoch": 1.7102166354640278, "grad_norm": 1.2935627698898315, "learning_rate": 8.226405536256794e-06, "loss": 0.7416, "step": 12789 }, { "epoch": 1.7103503610591066, "grad_norm": 1.4103387594223022, "learning_rate": 8.224984697077734e-06, "loss": 0.7181, "step": 12790 }, { "epoch": 1.7104840866541857, "grad_norm": 1.1975380182266235, "learning_rate": 8.223563894897164e-06, "loss": 0.5976, "step": 12791 }, { "epoch": 1.7106178122492643, "grad_norm": 1.1826415061950684, "learning_rate": 8.222143129744708e-06, "loss": 0.7007, "step": 12792 }, { "epoch": 1.7107515378443434, "grad_norm": 1.288440465927124, "learning_rate": 8.220722401649979e-06, "loss": 0.7362, "step": 12793 }, { "epoch": 1.7108852634394223, "grad_norm": 1.32015860080719, "learning_rate": 8.219301710642583e-06, "loss": 0.7154, "step": 12794 }, { "epoch": 1.7110189890345011, "grad_norm": 1.1202263832092285, "learning_rate": 8.217881056752142e-06, "loss": 0.6248, "step": 12795 }, { "epoch": 1.7111527146295802, "grad_norm": 1.4491305351257324, "learning_rate": 8.216460440008263e-06, "loss": 0.7152, "step": 12796 }, { "epoch": 1.711286440224659, "grad_norm": 1.2150226831436157, "learning_rate": 8.215039860440564e-06, "loss": 0.6836, "step": 12797 }, { "epoch": 1.7114201658197379, "grad_norm": 1.1814301013946533, "learning_rate": 8.21361931807865e-06, "loss": 0.6493, "step": 12798 }, { "epoch": 1.711553891414817, "grad_norm": 1.1774060726165771, "learning_rate": 8.21219881295213e-06, "loss": 0.6034, "step": 12799 }, { "epoch": 1.7116876170098956, "grad_norm": 1.2926663160324097, "learning_rate": 8.210778345090617e-06, "loss": 0.6706, "step": 12800 }, { "epoch": 1.7118213426049746, "grad_norm": 1.30585515499115, "learning_rate": 8.209357914523716e-06, "loss": 0.6747, "step": 12801 }, { "epoch": 1.7119550682000535, "grad_norm": 1.4625037908554077, "learning_rate": 8.207937521281033e-06, "loss": 0.6936, "step": 12802 }, { "epoch": 1.7120887937951323, "grad_norm": 1.2131339311599731, "learning_rate": 8.206517165392183e-06, "loss": 0.7115, "step": 12803 }, { "epoch": 1.7122225193902114, "grad_norm": 1.1758376359939575, "learning_rate": 8.20509684688676e-06, "loss": 0.6789, "step": 12804 }, { "epoch": 1.71235624498529, "grad_norm": 1.1197118759155273, "learning_rate": 8.203676565794382e-06, "loss": 0.6722, "step": 12805 }, { "epoch": 1.7124899705803691, "grad_norm": 1.1939643621444702, "learning_rate": 8.202256322144647e-06, "loss": 0.7097, "step": 12806 }, { "epoch": 1.712623696175448, "grad_norm": 1.132934331893921, "learning_rate": 8.200836115967153e-06, "loss": 0.6493, "step": 12807 }, { "epoch": 1.7127574217705268, "grad_norm": 1.2295867204666138, "learning_rate": 8.199415947291512e-06, "loss": 0.697, "step": 12808 }, { "epoch": 1.7128911473656059, "grad_norm": 1.0900604724884033, "learning_rate": 8.197995816147325e-06, "loss": 0.6252, "step": 12809 }, { "epoch": 1.7130248729606847, "grad_norm": 1.1717404127120972, "learning_rate": 8.196575722564187e-06, "loss": 0.6174, "step": 12810 }, { "epoch": 1.7131585985557636, "grad_norm": 1.3035222291946411, "learning_rate": 8.195155666571705e-06, "loss": 0.6694, "step": 12811 }, { "epoch": 1.7132923241508424, "grad_norm": 1.223902702331543, "learning_rate": 8.193735648199473e-06, "loss": 0.6994, "step": 12812 }, { "epoch": 1.7134260497459213, "grad_norm": 1.2977434396743774, "learning_rate": 8.192315667477096e-06, "loss": 0.7424, "step": 12813 }, { "epoch": 1.7135597753410003, "grad_norm": 1.4670928716659546, "learning_rate": 8.190895724434169e-06, "loss": 0.842, "step": 12814 }, { "epoch": 1.7136935009360792, "grad_norm": 1.1869728565216064, "learning_rate": 8.189475819100286e-06, "loss": 0.659, "step": 12815 }, { "epoch": 1.713827226531158, "grad_norm": 1.3191691637039185, "learning_rate": 8.188055951505051e-06, "loss": 0.7305, "step": 12816 }, { "epoch": 1.7139609521262371, "grad_norm": 1.2622216939926147, "learning_rate": 8.186636121678057e-06, "loss": 0.7657, "step": 12817 }, { "epoch": 1.7140946777213157, "grad_norm": 1.2715426683425903, "learning_rate": 8.185216329648892e-06, "loss": 0.7474, "step": 12818 }, { "epoch": 1.7142284033163948, "grad_norm": 1.167702317237854, "learning_rate": 8.18379657544716e-06, "loss": 0.6626, "step": 12819 }, { "epoch": 1.7143621289114737, "grad_norm": 1.2841434478759766, "learning_rate": 8.18237685910245e-06, "loss": 0.6514, "step": 12820 }, { "epoch": 1.7144958545065525, "grad_norm": 1.4027804136276245, "learning_rate": 8.180957180644353e-06, "loss": 0.7711, "step": 12821 }, { "epoch": 1.7146295801016316, "grad_norm": 1.1240234375, "learning_rate": 8.179537540102466e-06, "loss": 0.5589, "step": 12822 }, { "epoch": 1.7147633056967102, "grad_norm": 1.297250747680664, "learning_rate": 8.178117937506375e-06, "loss": 0.6925, "step": 12823 }, { "epoch": 1.7148970312917893, "grad_norm": 1.2992154359817505, "learning_rate": 8.176698372885676e-06, "loss": 0.7385, "step": 12824 }, { "epoch": 1.7150307568868681, "grad_norm": 1.3466929197311401, "learning_rate": 8.175278846269953e-06, "loss": 0.7254, "step": 12825 }, { "epoch": 1.715164482481947, "grad_norm": 1.37809157371521, "learning_rate": 8.173859357688792e-06, "loss": 0.6521, "step": 12826 }, { "epoch": 1.715298208077026, "grad_norm": 1.1244807243347168, "learning_rate": 8.172439907171788e-06, "loss": 0.6372, "step": 12827 }, { "epoch": 1.715431933672105, "grad_norm": 1.662224531173706, "learning_rate": 8.171020494748526e-06, "loss": 0.7393, "step": 12828 }, { "epoch": 1.7155656592671837, "grad_norm": 1.1774975061416626, "learning_rate": 8.169601120448592e-06, "loss": 0.7268, "step": 12829 }, { "epoch": 1.7156993848622626, "grad_norm": 1.214506983757019, "learning_rate": 8.168181784301573e-06, "loss": 0.7227, "step": 12830 }, { "epoch": 1.7158331104573414, "grad_norm": 1.151482105255127, "learning_rate": 8.166762486337045e-06, "loss": 0.7058, "step": 12831 }, { "epoch": 1.7159668360524205, "grad_norm": 1.4210294485092163, "learning_rate": 8.165343226584605e-06, "loss": 0.8111, "step": 12832 }, { "epoch": 1.7161005616474994, "grad_norm": 1.2370742559432983, "learning_rate": 8.163924005073826e-06, "loss": 0.7378, "step": 12833 }, { "epoch": 1.7162342872425782, "grad_norm": 1.1957107782363892, "learning_rate": 8.162504821834296e-06, "loss": 0.6369, "step": 12834 }, { "epoch": 1.7163680128376573, "grad_norm": 1.0631752014160156, "learning_rate": 8.161085676895597e-06, "loss": 0.6691, "step": 12835 }, { "epoch": 1.716501738432736, "grad_norm": 1.1368411779403687, "learning_rate": 8.159666570287303e-06, "loss": 0.6798, "step": 12836 }, { "epoch": 1.716635464027815, "grad_norm": 1.3594932556152344, "learning_rate": 8.158247502039002e-06, "loss": 0.6431, "step": 12837 }, { "epoch": 1.7167691896228938, "grad_norm": 1.1777312755584717, "learning_rate": 8.156828472180271e-06, "loss": 0.7112, "step": 12838 }, { "epoch": 1.7169029152179727, "grad_norm": 1.175522804260254, "learning_rate": 8.15540948074068e-06, "loss": 0.6884, "step": 12839 }, { "epoch": 1.7170366408130517, "grad_norm": 1.1670371294021606, "learning_rate": 8.153990527749818e-06, "loss": 0.7058, "step": 12840 }, { "epoch": 1.7171703664081304, "grad_norm": 1.325585126876831, "learning_rate": 8.152571613237257e-06, "loss": 0.677, "step": 12841 }, { "epoch": 1.7173040920032094, "grad_norm": 1.3175456523895264, "learning_rate": 8.151152737232572e-06, "loss": 0.6777, "step": 12842 }, { "epoch": 1.7174378175982883, "grad_norm": 1.2463619709014893, "learning_rate": 8.14973389976534e-06, "loss": 0.7486, "step": 12843 }, { "epoch": 1.7175715431933671, "grad_norm": 1.2975207567214966, "learning_rate": 8.148315100865131e-06, "loss": 0.7444, "step": 12844 }, { "epoch": 1.7177052687884462, "grad_norm": 1.1764236688613892, "learning_rate": 8.146896340561528e-06, "loss": 0.7014, "step": 12845 }, { "epoch": 1.717838994383525, "grad_norm": 1.2639333009719849, "learning_rate": 8.145477618884092e-06, "loss": 0.7113, "step": 12846 }, { "epoch": 1.717972719978604, "grad_norm": 1.3122056722640991, "learning_rate": 8.1440589358624e-06, "loss": 0.7294, "step": 12847 }, { "epoch": 1.7181064455736828, "grad_norm": 1.224700927734375, "learning_rate": 8.142640291526028e-06, "loss": 0.7267, "step": 12848 }, { "epoch": 1.7182401711687616, "grad_norm": 1.2388968467712402, "learning_rate": 8.141221685904538e-06, "loss": 0.7488, "step": 12849 }, { "epoch": 1.7183738967638407, "grad_norm": 1.1630809307098389, "learning_rate": 8.139803119027507e-06, "loss": 0.6756, "step": 12850 }, { "epoch": 1.7185076223589195, "grad_norm": 1.1897659301757812, "learning_rate": 8.1383845909245e-06, "loss": 0.662, "step": 12851 }, { "epoch": 1.7186413479539984, "grad_norm": 1.1600133180618286, "learning_rate": 8.13696610162508e-06, "loss": 0.7194, "step": 12852 }, { "epoch": 1.7187750735490774, "grad_norm": 1.257002830505371, "learning_rate": 8.135547651158822e-06, "loss": 0.6716, "step": 12853 }, { "epoch": 1.718908799144156, "grad_norm": 1.2998160123825073, "learning_rate": 8.13412923955529e-06, "loss": 0.6176, "step": 12854 }, { "epoch": 1.7190425247392351, "grad_norm": 1.2360540628433228, "learning_rate": 8.132710866844045e-06, "loss": 0.7464, "step": 12855 }, { "epoch": 1.719176250334314, "grad_norm": 1.166771650314331, "learning_rate": 8.13129253305466e-06, "loss": 0.6833, "step": 12856 }, { "epoch": 1.7193099759293928, "grad_norm": 1.173782467842102, "learning_rate": 8.129874238216689e-06, "loss": 0.6532, "step": 12857 }, { "epoch": 1.719443701524472, "grad_norm": 1.2952295541763306, "learning_rate": 8.128455982359704e-06, "loss": 0.7143, "step": 12858 }, { "epoch": 1.7195774271195505, "grad_norm": 1.132118821144104, "learning_rate": 8.127037765513261e-06, "loss": 0.6858, "step": 12859 }, { "epoch": 1.7197111527146296, "grad_norm": 1.374457597732544, "learning_rate": 8.125619587706925e-06, "loss": 0.7606, "step": 12860 }, { "epoch": 1.7198448783097084, "grad_norm": 1.1691745519638062, "learning_rate": 8.124201448970254e-06, "loss": 0.6505, "step": 12861 }, { "epoch": 1.7199786039047873, "grad_norm": 1.3403770923614502, "learning_rate": 8.122783349332811e-06, "loss": 0.7641, "step": 12862 }, { "epoch": 1.7201123294998664, "grad_norm": 1.312195897102356, "learning_rate": 8.12136528882415e-06, "loss": 0.6855, "step": 12863 }, { "epoch": 1.7202460550949452, "grad_norm": 1.2833150625228882, "learning_rate": 8.119947267473833e-06, "loss": 0.7039, "step": 12864 }, { "epoch": 1.720379780690024, "grad_norm": 1.185397744178772, "learning_rate": 8.118529285311415e-06, "loss": 0.6435, "step": 12865 }, { "epoch": 1.7205135062851031, "grad_norm": 1.113646149635315, "learning_rate": 8.117111342366454e-06, "loss": 0.5846, "step": 12866 }, { "epoch": 1.7206472318801818, "grad_norm": 1.1509543657302856, "learning_rate": 8.115693438668507e-06, "loss": 0.6787, "step": 12867 }, { "epoch": 1.7207809574752608, "grad_norm": 1.2752186059951782, "learning_rate": 8.114275574247124e-06, "loss": 0.7561, "step": 12868 }, { "epoch": 1.7209146830703397, "grad_norm": 1.297583818435669, "learning_rate": 8.112857749131867e-06, "loss": 0.7779, "step": 12869 }, { "epoch": 1.7210484086654185, "grad_norm": 1.334119439125061, "learning_rate": 8.111439963352284e-06, "loss": 0.7614, "step": 12870 }, { "epoch": 1.7211821342604976, "grad_norm": 1.2802408933639526, "learning_rate": 8.110022216937923e-06, "loss": 0.7277, "step": 12871 }, { "epoch": 1.7213158598555762, "grad_norm": 1.2406387329101562, "learning_rate": 8.108604509918344e-06, "loss": 0.7003, "step": 12872 }, { "epoch": 1.7214495854506553, "grad_norm": 1.1272902488708496, "learning_rate": 8.107186842323091e-06, "loss": 0.6048, "step": 12873 }, { "epoch": 1.7215833110457341, "grad_norm": 1.140223741531372, "learning_rate": 8.10576921418172e-06, "loss": 0.641, "step": 12874 }, { "epoch": 1.721717036640813, "grad_norm": 1.1854684352874756, "learning_rate": 8.104351625523778e-06, "loss": 0.5968, "step": 12875 }, { "epoch": 1.721850762235892, "grad_norm": 1.1487549543380737, "learning_rate": 8.102934076378809e-06, "loss": 0.7058, "step": 12876 }, { "epoch": 1.7219844878309707, "grad_norm": 1.2717201709747314, "learning_rate": 8.101516566776368e-06, "loss": 0.6731, "step": 12877 }, { "epoch": 1.7221182134260498, "grad_norm": 1.3073252439498901, "learning_rate": 8.100099096745995e-06, "loss": 0.7058, "step": 12878 }, { "epoch": 1.7222519390211286, "grad_norm": 1.2961386442184448, "learning_rate": 8.098681666317239e-06, "loss": 0.7122, "step": 12879 }, { "epoch": 1.7223856646162075, "grad_norm": 1.2053905725479126, "learning_rate": 8.097264275519643e-06, "loss": 0.7094, "step": 12880 }, { "epoch": 1.7225193902112865, "grad_norm": 1.216880440711975, "learning_rate": 8.095846924382751e-06, "loss": 0.684, "step": 12881 }, { "epoch": 1.7226531158063654, "grad_norm": 1.2305643558502197, "learning_rate": 8.094429612936111e-06, "loss": 0.6824, "step": 12882 }, { "epoch": 1.7227868414014442, "grad_norm": 1.25296151638031, "learning_rate": 8.093012341209264e-06, "loss": 0.6969, "step": 12883 }, { "epoch": 1.7229205669965233, "grad_norm": 1.2045902013778687, "learning_rate": 8.091595109231745e-06, "loss": 0.6558, "step": 12884 }, { "epoch": 1.723054292591602, "grad_norm": 1.1353389024734497, "learning_rate": 8.090177917033102e-06, "loss": 0.6761, "step": 12885 }, { "epoch": 1.723188018186681, "grad_norm": 1.2239171266555786, "learning_rate": 8.088760764642874e-06, "loss": 0.6793, "step": 12886 }, { "epoch": 1.7233217437817598, "grad_norm": 1.1530975103378296, "learning_rate": 8.087343652090595e-06, "loss": 0.7066, "step": 12887 }, { "epoch": 1.7234554693768387, "grad_norm": 1.189701795578003, "learning_rate": 8.085926579405814e-06, "loss": 0.611, "step": 12888 }, { "epoch": 1.7235891949719178, "grad_norm": 1.1637330055236816, "learning_rate": 8.084509546618055e-06, "loss": 0.6989, "step": 12889 }, { "epoch": 1.7237229205669964, "grad_norm": 1.199289083480835, "learning_rate": 8.083092553756866e-06, "loss": 0.6647, "step": 12890 }, { "epoch": 1.7238566461620755, "grad_norm": 1.2631560564041138, "learning_rate": 8.081675600851779e-06, "loss": 0.6444, "step": 12891 }, { "epoch": 1.7239903717571543, "grad_norm": 1.3155760765075684, "learning_rate": 8.080258687932326e-06, "loss": 0.7455, "step": 12892 }, { "epoch": 1.7241240973522332, "grad_norm": 1.3042099475860596, "learning_rate": 8.078841815028043e-06, "loss": 0.7544, "step": 12893 }, { "epoch": 1.7242578229473122, "grad_norm": 1.2959418296813965, "learning_rate": 8.077424982168467e-06, "loss": 0.6455, "step": 12894 }, { "epoch": 1.7243915485423909, "grad_norm": 1.1764159202575684, "learning_rate": 8.076008189383125e-06, "loss": 0.666, "step": 12895 }, { "epoch": 1.72452527413747, "grad_norm": 1.4329313039779663, "learning_rate": 8.074591436701554e-06, "loss": 0.6683, "step": 12896 }, { "epoch": 1.7246589997325488, "grad_norm": 1.2898163795471191, "learning_rate": 8.073174724153278e-06, "loss": 0.6584, "step": 12897 }, { "epoch": 1.7247927253276276, "grad_norm": 1.1148664951324463, "learning_rate": 8.071758051767833e-06, "loss": 0.6806, "step": 12898 }, { "epoch": 1.7249264509227067, "grad_norm": 1.1910672187805176, "learning_rate": 8.070341419574748e-06, "loss": 0.7153, "step": 12899 }, { "epoch": 1.7250601765177855, "grad_norm": 1.185206651687622, "learning_rate": 8.068924827603545e-06, "loss": 0.6382, "step": 12900 }, { "epoch": 1.7251939021128644, "grad_norm": 1.2356975078582764, "learning_rate": 8.067508275883763e-06, "loss": 0.7217, "step": 12901 }, { "epoch": 1.7253276277079435, "grad_norm": 1.1419377326965332, "learning_rate": 8.066091764444918e-06, "loss": 0.6756, "step": 12902 }, { "epoch": 1.725461353303022, "grad_norm": 1.1161158084869385, "learning_rate": 8.064675293316538e-06, "loss": 0.6484, "step": 12903 }, { "epoch": 1.7255950788981012, "grad_norm": 1.3562854528427124, "learning_rate": 8.063258862528151e-06, "loss": 0.7253, "step": 12904 }, { "epoch": 1.72572880449318, "grad_norm": 1.3898921012878418, "learning_rate": 8.06184247210928e-06, "loss": 0.7605, "step": 12905 }, { "epoch": 1.7258625300882589, "grad_norm": 1.2959322929382324, "learning_rate": 8.060426122089448e-06, "loss": 0.6522, "step": 12906 }, { "epoch": 1.725996255683338, "grad_norm": 1.3542392253875732, "learning_rate": 8.059009812498179e-06, "loss": 0.7239, "step": 12907 }, { "epoch": 1.7261299812784165, "grad_norm": 1.2368452548980713, "learning_rate": 8.057593543364991e-06, "loss": 0.7222, "step": 12908 }, { "epoch": 1.7262637068734956, "grad_norm": 1.1805928945541382, "learning_rate": 8.05617731471941e-06, "loss": 0.7307, "step": 12909 }, { "epoch": 1.7263974324685745, "grad_norm": 1.1964836120605469, "learning_rate": 8.05476112659095e-06, "loss": 0.6286, "step": 12910 }, { "epoch": 1.7265311580636533, "grad_norm": 1.2369167804718018, "learning_rate": 8.053344979009134e-06, "loss": 0.6727, "step": 12911 }, { "epoch": 1.7266648836587324, "grad_norm": 1.1599445343017578, "learning_rate": 8.051928872003477e-06, "loss": 0.7123, "step": 12912 }, { "epoch": 1.7267986092538112, "grad_norm": 1.2838554382324219, "learning_rate": 8.050512805603498e-06, "loss": 0.734, "step": 12913 }, { "epoch": 1.72693233484889, "grad_norm": 1.2725017070770264, "learning_rate": 8.04909677983872e-06, "loss": 0.7254, "step": 12914 }, { "epoch": 1.727066060443969, "grad_norm": 1.2023309469223022, "learning_rate": 8.04768079473865e-06, "loss": 0.7444, "step": 12915 }, { "epoch": 1.7271997860390478, "grad_norm": 1.1640784740447998, "learning_rate": 8.046264850332802e-06, "loss": 0.6093, "step": 12916 }, { "epoch": 1.7273335116341269, "grad_norm": 1.2280038595199585, "learning_rate": 8.044848946650696e-06, "loss": 0.6678, "step": 12917 }, { "epoch": 1.7274672372292057, "grad_norm": 1.2812857627868652, "learning_rate": 8.043433083721843e-06, "loss": 0.7098, "step": 12918 }, { "epoch": 1.7276009628242845, "grad_norm": 1.1899956464767456, "learning_rate": 8.042017261575756e-06, "loss": 0.6582, "step": 12919 }, { "epoch": 1.7277346884193636, "grad_norm": 1.3713732957839966, "learning_rate": 8.040601480241948e-06, "loss": 0.7497, "step": 12920 }, { "epoch": 1.7278684140144422, "grad_norm": 1.2832385301589966, "learning_rate": 8.03918573974992e-06, "loss": 0.6391, "step": 12921 }, { "epoch": 1.7280021396095213, "grad_norm": 1.3006452322006226, "learning_rate": 8.037770040129196e-06, "loss": 0.7234, "step": 12922 }, { "epoch": 1.7281358652046002, "grad_norm": 1.181689739227295, "learning_rate": 8.036354381409276e-06, "loss": 0.6505, "step": 12923 }, { "epoch": 1.728269590799679, "grad_norm": 1.2600747346878052, "learning_rate": 8.034938763619667e-06, "loss": 0.7704, "step": 12924 }, { "epoch": 1.728403316394758, "grad_norm": 1.239434838294983, "learning_rate": 8.03352318678988e-06, "loss": 0.7337, "step": 12925 }, { "epoch": 1.7285370419898367, "grad_norm": 1.4491002559661865, "learning_rate": 8.03210765094942e-06, "loss": 0.7203, "step": 12926 }, { "epoch": 1.7286707675849158, "grad_norm": 1.410421371459961, "learning_rate": 8.030692156127797e-06, "loss": 0.7267, "step": 12927 }, { "epoch": 1.7288044931799946, "grad_norm": 1.124375581741333, "learning_rate": 8.029276702354511e-06, "loss": 0.6297, "step": 12928 }, { "epoch": 1.7289382187750735, "grad_norm": 1.3015804290771484, "learning_rate": 8.027861289659062e-06, "loss": 0.6466, "step": 12929 }, { "epoch": 1.7290719443701525, "grad_norm": 1.2716597318649292, "learning_rate": 8.026445918070963e-06, "loss": 0.6978, "step": 12930 }, { "epoch": 1.7292056699652314, "grad_norm": 1.180567741394043, "learning_rate": 8.025030587619706e-06, "loss": 0.6958, "step": 12931 }, { "epoch": 1.7293393955603102, "grad_norm": 1.2131541967391968, "learning_rate": 8.023615298334796e-06, "loss": 0.7462, "step": 12932 }, { "epoch": 1.729473121155389, "grad_norm": 1.2852815389633179, "learning_rate": 8.022200050245736e-06, "loss": 0.6923, "step": 12933 }, { "epoch": 1.729606846750468, "grad_norm": 1.182002067565918, "learning_rate": 8.020784843382021e-06, "loss": 0.6751, "step": 12934 }, { "epoch": 1.729740572345547, "grad_norm": 1.2903915643692017, "learning_rate": 8.019369677773155e-06, "loss": 0.664, "step": 12935 }, { "epoch": 1.7298742979406259, "grad_norm": 1.2154886722564697, "learning_rate": 8.017954553448632e-06, "loss": 0.747, "step": 12936 }, { "epoch": 1.7300080235357047, "grad_norm": 1.3928550481796265, "learning_rate": 8.01653947043795e-06, "loss": 0.7992, "step": 12937 }, { "epoch": 1.7301417491307838, "grad_norm": 1.1690040826797485, "learning_rate": 8.015124428770605e-06, "loss": 0.6869, "step": 12938 }, { "epoch": 1.7302754747258624, "grad_norm": 1.2384727001190186, "learning_rate": 8.013709428476093e-06, "loss": 0.6769, "step": 12939 }, { "epoch": 1.7304092003209415, "grad_norm": 1.2056655883789062, "learning_rate": 8.012294469583902e-06, "loss": 0.6784, "step": 12940 }, { "epoch": 1.7305429259160203, "grad_norm": 1.2486110925674438, "learning_rate": 8.010879552123537e-06, "loss": 0.6721, "step": 12941 }, { "epoch": 1.7306766515110992, "grad_norm": 1.2337318658828735, "learning_rate": 8.009464676124479e-06, "loss": 0.6669, "step": 12942 }, { "epoch": 1.7308103771061782, "grad_norm": 1.1453114748001099, "learning_rate": 8.00804984161623e-06, "loss": 0.6382, "step": 12943 }, { "epoch": 1.7309441027012569, "grad_norm": 1.5174992084503174, "learning_rate": 8.006635048628273e-06, "loss": 0.7013, "step": 12944 }, { "epoch": 1.731077828296336, "grad_norm": 1.2980328798294067, "learning_rate": 8.005220297190099e-06, "loss": 0.6645, "step": 12945 }, { "epoch": 1.7312115538914148, "grad_norm": 1.105157732963562, "learning_rate": 8.003805587331204e-06, "loss": 0.6581, "step": 12946 }, { "epoch": 1.7313452794864936, "grad_norm": 1.3423397541046143, "learning_rate": 8.00239091908107e-06, "loss": 0.7296, "step": 12947 }, { "epoch": 1.7314790050815727, "grad_norm": 1.247710943222046, "learning_rate": 8.000976292469184e-06, "loss": 0.7469, "step": 12948 }, { "epoch": 1.7316127306766516, "grad_norm": 1.2204896211624146, "learning_rate": 7.999561707525034e-06, "loss": 0.6622, "step": 12949 }, { "epoch": 1.7317464562717304, "grad_norm": 1.3191577196121216, "learning_rate": 7.998147164278107e-06, "loss": 0.745, "step": 12950 }, { "epoch": 1.7318801818668093, "grad_norm": 1.22435462474823, "learning_rate": 7.996732662757887e-06, "loss": 0.6733, "step": 12951 }, { "epoch": 1.732013907461888, "grad_norm": 1.1642422676086426, "learning_rate": 7.99531820299386e-06, "loss": 0.6471, "step": 12952 }, { "epoch": 1.7321476330569672, "grad_norm": 1.3170973062515259, "learning_rate": 7.993903785015502e-06, "loss": 0.7244, "step": 12953 }, { "epoch": 1.732281358652046, "grad_norm": 1.3028523921966553, "learning_rate": 7.992489408852306e-06, "loss": 0.6452, "step": 12954 }, { "epoch": 1.7324150842471249, "grad_norm": 1.198959469795227, "learning_rate": 7.991075074533743e-06, "loss": 0.6933, "step": 12955 }, { "epoch": 1.732548809842204, "grad_norm": 1.2525686025619507, "learning_rate": 7.989660782089298e-06, "loss": 0.6041, "step": 12956 }, { "epoch": 1.7326825354372826, "grad_norm": 1.126526951789856, "learning_rate": 7.988246531548452e-06, "loss": 0.6148, "step": 12957 }, { "epoch": 1.7328162610323616, "grad_norm": 1.3683443069458008, "learning_rate": 7.986832322940678e-06, "loss": 0.6632, "step": 12958 }, { "epoch": 1.7329499866274405, "grad_norm": 1.08456289768219, "learning_rate": 7.985418156295462e-06, "loss": 0.6639, "step": 12959 }, { "epoch": 1.7330837122225193, "grad_norm": 1.20579195022583, "learning_rate": 7.984004031642277e-06, "loss": 0.6817, "step": 12960 }, { "epoch": 1.7332174378175984, "grad_norm": 1.429930329322815, "learning_rate": 7.982589949010595e-06, "loss": 0.7181, "step": 12961 }, { "epoch": 1.733351163412677, "grad_norm": 1.1147289276123047, "learning_rate": 7.9811759084299e-06, "loss": 0.6253, "step": 12962 }, { "epoch": 1.733484889007756, "grad_norm": 1.1452322006225586, "learning_rate": 7.97976190992966e-06, "loss": 0.674, "step": 12963 }, { "epoch": 1.733618614602835, "grad_norm": 1.3454655408859253, "learning_rate": 7.978347953539344e-06, "loss": 0.6978, "step": 12964 }, { "epoch": 1.7337523401979138, "grad_norm": 1.1853774785995483, "learning_rate": 7.976934039288437e-06, "loss": 0.596, "step": 12965 }, { "epoch": 1.7338860657929929, "grad_norm": 1.1059528589248657, "learning_rate": 7.975520167206401e-06, "loss": 0.6165, "step": 12966 }, { "epoch": 1.7340197913880717, "grad_norm": 1.2693812847137451, "learning_rate": 7.974106337322713e-06, "loss": 0.6903, "step": 12967 }, { "epoch": 1.7341535169831506, "grad_norm": 1.1936752796173096, "learning_rate": 7.972692549666838e-06, "loss": 0.6019, "step": 12968 }, { "epoch": 1.7342872425782296, "grad_norm": 1.4172327518463135, "learning_rate": 7.971278804268245e-06, "loss": 0.6857, "step": 12969 }, { "epoch": 1.7344209681733083, "grad_norm": 1.2376610040664673, "learning_rate": 7.969865101156407e-06, "loss": 0.6526, "step": 12970 }, { "epoch": 1.7345546937683873, "grad_norm": 1.223358154296875, "learning_rate": 7.968451440360789e-06, "loss": 0.698, "step": 12971 }, { "epoch": 1.7346884193634662, "grad_norm": 1.3264931440353394, "learning_rate": 7.967037821910853e-06, "loss": 0.8227, "step": 12972 }, { "epoch": 1.734822144958545, "grad_norm": 1.4387422800064087, "learning_rate": 7.96562424583607e-06, "loss": 0.7178, "step": 12973 }, { "epoch": 1.734955870553624, "grad_norm": 1.082356572151184, "learning_rate": 7.964210712165901e-06, "loss": 0.6439, "step": 12974 }, { "epoch": 1.7350895961487027, "grad_norm": 1.3814677000045776, "learning_rate": 7.962797220929816e-06, "loss": 0.6704, "step": 12975 }, { "epoch": 1.7352233217437818, "grad_norm": 1.4386422634124756, "learning_rate": 7.961383772157273e-06, "loss": 0.7736, "step": 12976 }, { "epoch": 1.7353570473388606, "grad_norm": 1.2363412380218506, "learning_rate": 7.95997036587773e-06, "loss": 0.798, "step": 12977 }, { "epoch": 1.7354907729339395, "grad_norm": 1.1102499961853027, "learning_rate": 7.958557002120656e-06, "loss": 0.6632, "step": 12978 }, { "epoch": 1.7356244985290186, "grad_norm": 1.3287978172302246, "learning_rate": 7.95714368091551e-06, "loss": 0.7148, "step": 12979 }, { "epoch": 1.7357582241240972, "grad_norm": 1.3027607202529907, "learning_rate": 7.955730402291743e-06, "loss": 0.7188, "step": 12980 }, { "epoch": 1.7358919497191763, "grad_norm": 1.4091987609863281, "learning_rate": 7.954317166278825e-06, "loss": 0.8573, "step": 12981 }, { "epoch": 1.7360256753142551, "grad_norm": 1.2049931287765503, "learning_rate": 7.952903972906205e-06, "loss": 0.6708, "step": 12982 }, { "epoch": 1.736159400909334, "grad_norm": 1.2172513008117676, "learning_rate": 7.951490822203345e-06, "loss": 0.677, "step": 12983 }, { "epoch": 1.736293126504413, "grad_norm": 1.2180969715118408, "learning_rate": 7.950077714199698e-06, "loss": 0.6512, "step": 12984 }, { "epoch": 1.7364268520994919, "grad_norm": 1.3189692497253418, "learning_rate": 7.948664648924716e-06, "loss": 0.7261, "step": 12985 }, { "epoch": 1.7365605776945707, "grad_norm": 1.1013020277023315, "learning_rate": 7.947251626407863e-06, "loss": 0.6572, "step": 12986 }, { "epoch": 1.7366943032896498, "grad_norm": 1.3120019435882568, "learning_rate": 7.945838646678581e-06, "loss": 0.6837, "step": 12987 }, { "epoch": 1.7368280288847284, "grad_norm": 1.1524003744125366, "learning_rate": 7.944425709766328e-06, "loss": 0.7071, "step": 12988 }, { "epoch": 1.7369617544798075, "grad_norm": 1.4776729345321655, "learning_rate": 7.943012815700554e-06, "loss": 0.7936, "step": 12989 }, { "epoch": 1.7370954800748863, "grad_norm": 1.1302794218063354, "learning_rate": 7.941599964510707e-06, "loss": 0.5866, "step": 12990 }, { "epoch": 1.7372292056699652, "grad_norm": 1.2434536218643188, "learning_rate": 7.940187156226244e-06, "loss": 0.6727, "step": 12991 }, { "epoch": 1.7373629312650443, "grad_norm": 1.2090867757797241, "learning_rate": 7.938774390876608e-06, "loss": 0.6755, "step": 12992 }, { "epoch": 1.737496656860123, "grad_norm": 1.3892182111740112, "learning_rate": 7.937361668491244e-06, "loss": 0.7603, "step": 12993 }, { "epoch": 1.737630382455202, "grad_norm": 1.3046506643295288, "learning_rate": 7.935948989099606e-06, "loss": 0.7253, "step": 12994 }, { "epoch": 1.7377641080502808, "grad_norm": 1.1160005331039429, "learning_rate": 7.934536352731133e-06, "loss": 0.6024, "step": 12995 }, { "epoch": 1.7378978336453597, "grad_norm": 1.1101962327957153, "learning_rate": 7.933123759415273e-06, "loss": 0.6696, "step": 12996 }, { "epoch": 1.7380315592404387, "grad_norm": 1.3881338834762573, "learning_rate": 7.931711209181474e-06, "loss": 0.7221, "step": 12997 }, { "epoch": 1.7381652848355174, "grad_norm": 1.2860358953475952, "learning_rate": 7.930298702059171e-06, "loss": 0.6302, "step": 12998 }, { "epoch": 1.7382990104305964, "grad_norm": 1.2796674966812134, "learning_rate": 7.928886238077817e-06, "loss": 0.7038, "step": 12999 }, { "epoch": 1.7384327360256753, "grad_norm": 1.3535820245742798, "learning_rate": 7.927473817266843e-06, "loss": 0.658, "step": 13000 }, { "epoch": 1.7385664616207541, "grad_norm": 1.3275471925735474, "learning_rate": 7.926061439655696e-06, "loss": 0.7353, "step": 13001 }, { "epoch": 1.7387001872158332, "grad_norm": 1.2380675077438354, "learning_rate": 7.924649105273813e-06, "loss": 0.6585, "step": 13002 }, { "epoch": 1.738833912810912, "grad_norm": 1.1827822923660278, "learning_rate": 7.923236814150631e-06, "loss": 0.6684, "step": 13003 }, { "epoch": 1.738967638405991, "grad_norm": 1.3132728338241577, "learning_rate": 7.921824566315595e-06, "loss": 0.6522, "step": 13004 }, { "epoch": 1.73910136400107, "grad_norm": 1.189157485961914, "learning_rate": 7.920412361798137e-06, "loss": 0.6441, "step": 13005 }, { "epoch": 1.7392350895961486, "grad_norm": 1.3762788772583008, "learning_rate": 7.91900020062769e-06, "loss": 0.759, "step": 13006 }, { "epoch": 1.7393688151912277, "grad_norm": 1.201709270477295, "learning_rate": 7.917588082833696e-06, "loss": 0.6586, "step": 13007 }, { "epoch": 1.7395025407863065, "grad_norm": 1.2958943843841553, "learning_rate": 7.916176008445584e-06, "loss": 0.7184, "step": 13008 }, { "epoch": 1.7396362663813854, "grad_norm": 1.265431523323059, "learning_rate": 7.914763977492787e-06, "loss": 0.667, "step": 13009 }, { "epoch": 1.7397699919764644, "grad_norm": 1.343470573425293, "learning_rate": 7.913351990004743e-06, "loss": 0.6668, "step": 13010 }, { "epoch": 1.739903717571543, "grad_norm": 1.294517159461975, "learning_rate": 7.911940046010876e-06, "loss": 0.6748, "step": 13011 }, { "epoch": 1.7400374431666221, "grad_norm": 1.1967862844467163, "learning_rate": 7.910528145540626e-06, "loss": 0.7061, "step": 13012 }, { "epoch": 1.740171168761701, "grad_norm": 1.283071756362915, "learning_rate": 7.909116288623418e-06, "loss": 0.6351, "step": 13013 }, { "epoch": 1.7403048943567798, "grad_norm": 1.136717677116394, "learning_rate": 7.907704475288674e-06, "loss": 0.6653, "step": 13014 }, { "epoch": 1.740438619951859, "grad_norm": 1.3442686796188354, "learning_rate": 7.90629270556583e-06, "loss": 0.7473, "step": 13015 }, { "epoch": 1.7405723455469377, "grad_norm": 1.1719759702682495, "learning_rate": 7.904880979484316e-06, "loss": 0.7021, "step": 13016 }, { "epoch": 1.7407060711420166, "grad_norm": 1.123903751373291, "learning_rate": 7.903469297073547e-06, "loss": 0.6149, "step": 13017 }, { "epoch": 1.7408397967370954, "grad_norm": 1.2346513271331787, "learning_rate": 7.902057658362957e-06, "loss": 0.7083, "step": 13018 }, { "epoch": 1.7409735223321743, "grad_norm": 1.2357277870178223, "learning_rate": 7.900646063381965e-06, "loss": 0.7355, "step": 13019 }, { "epoch": 1.7411072479272534, "grad_norm": 1.291468858718872, "learning_rate": 7.899234512160002e-06, "loss": 0.7176, "step": 13020 }, { "epoch": 1.7412409735223322, "grad_norm": 1.2255412340164185, "learning_rate": 7.897823004726482e-06, "loss": 0.6828, "step": 13021 }, { "epoch": 1.741374699117411, "grad_norm": 1.3531514406204224, "learning_rate": 7.896411541110828e-06, "loss": 0.6709, "step": 13022 }, { "epoch": 1.7415084247124901, "grad_norm": 1.2913018465042114, "learning_rate": 7.895000121342467e-06, "loss": 0.6651, "step": 13023 }, { "epoch": 1.7416421503075687, "grad_norm": 1.3822314739227295, "learning_rate": 7.893588745450814e-06, "loss": 0.7505, "step": 13024 }, { "epoch": 1.7417758759026478, "grad_norm": 1.2893619537353516, "learning_rate": 7.892177413465285e-06, "loss": 0.6882, "step": 13025 }, { "epoch": 1.7419096014977267, "grad_norm": 1.3651442527770996, "learning_rate": 7.890766125415304e-06, "loss": 0.6938, "step": 13026 }, { "epoch": 1.7420433270928055, "grad_norm": 1.3322980403900146, "learning_rate": 7.88935488133028e-06, "loss": 0.7687, "step": 13027 }, { "epoch": 1.7421770526878846, "grad_norm": 1.1048146486282349, "learning_rate": 7.887943681239636e-06, "loss": 0.6887, "step": 13028 }, { "epoch": 1.7423107782829632, "grad_norm": 1.2392172813415527, "learning_rate": 7.886532525172788e-06, "loss": 0.6702, "step": 13029 }, { "epoch": 1.7424445038780423, "grad_norm": 1.135341763496399, "learning_rate": 7.885121413159142e-06, "loss": 0.6104, "step": 13030 }, { "epoch": 1.7425782294731211, "grad_norm": 1.3980683088302612, "learning_rate": 7.883710345228121e-06, "loss": 0.7751, "step": 13031 }, { "epoch": 1.7427119550682, "grad_norm": 1.1801718473434448, "learning_rate": 7.882299321409133e-06, "loss": 0.6543, "step": 13032 }, { "epoch": 1.742845680663279, "grad_norm": 1.1539872884750366, "learning_rate": 7.880888341731585e-06, "loss": 0.6601, "step": 13033 }, { "epoch": 1.742979406258358, "grad_norm": 1.2245397567749023, "learning_rate": 7.879477406224894e-06, "loss": 0.6731, "step": 13034 }, { "epoch": 1.7431131318534367, "grad_norm": 1.2380887269973755, "learning_rate": 7.878066514918466e-06, "loss": 0.6663, "step": 13035 }, { "epoch": 1.7432468574485156, "grad_norm": 1.1769920587539673, "learning_rate": 7.876655667841713e-06, "loss": 0.6971, "step": 13036 }, { "epoch": 1.7433805830435944, "grad_norm": 1.2474780082702637, "learning_rate": 7.875244865024043e-06, "loss": 0.6831, "step": 13037 }, { "epoch": 1.7435143086386735, "grad_norm": 1.391759991645813, "learning_rate": 7.873834106494856e-06, "loss": 0.752, "step": 13038 }, { "epoch": 1.7436480342337524, "grad_norm": 1.1344763040542603, "learning_rate": 7.872423392283566e-06, "loss": 0.5601, "step": 13039 }, { "epoch": 1.7437817598288312, "grad_norm": 1.2707011699676514, "learning_rate": 7.871012722419572e-06, "loss": 0.6839, "step": 13040 }, { "epoch": 1.7439154854239103, "grad_norm": 1.1836826801300049, "learning_rate": 7.86960209693228e-06, "loss": 0.6804, "step": 13041 }, { "epoch": 1.744049211018989, "grad_norm": 1.3756452798843384, "learning_rate": 7.868191515851097e-06, "loss": 0.7385, "step": 13042 }, { "epoch": 1.744182936614068, "grad_norm": 1.2071243524551392, "learning_rate": 7.866780979205418e-06, "loss": 0.6619, "step": 13043 }, { "epoch": 1.7443166622091468, "grad_norm": 1.4034690856933594, "learning_rate": 7.865370487024652e-06, "loss": 0.7114, "step": 13044 }, { "epoch": 1.7444503878042257, "grad_norm": 1.2475078105926514, "learning_rate": 7.863960039338196e-06, "loss": 0.7472, "step": 13045 }, { "epoch": 1.7445841133993047, "grad_norm": 1.1691175699234009, "learning_rate": 7.862549636175444e-06, "loss": 0.6136, "step": 13046 }, { "epoch": 1.7447178389943834, "grad_norm": 1.3238695859909058, "learning_rate": 7.861139277565802e-06, "loss": 0.6959, "step": 13047 }, { "epoch": 1.7448515645894624, "grad_norm": 1.217269778251648, "learning_rate": 7.859728963538667e-06, "loss": 0.6304, "step": 13048 }, { "epoch": 1.7449852901845413, "grad_norm": 1.349548578262329, "learning_rate": 7.85831869412343e-06, "loss": 0.7341, "step": 13049 }, { "epoch": 1.7451190157796201, "grad_norm": 1.1678849458694458, "learning_rate": 7.856908469349495e-06, "loss": 0.6642, "step": 13050 }, { "epoch": 1.7452527413746992, "grad_norm": 1.392716884613037, "learning_rate": 7.855498289246246e-06, "loss": 0.8134, "step": 13051 }, { "epoch": 1.745386466969778, "grad_norm": 1.2779555320739746, "learning_rate": 7.85408815384309e-06, "loss": 0.699, "step": 13052 }, { "epoch": 1.745520192564857, "grad_norm": 1.2170205116271973, "learning_rate": 7.85267806316941e-06, "loss": 0.7441, "step": 13053 }, { "epoch": 1.7456539181599358, "grad_norm": 1.2747998237609863, "learning_rate": 7.851268017254598e-06, "loss": 0.7023, "step": 13054 }, { "epoch": 1.7457876437550146, "grad_norm": 1.1835910081863403, "learning_rate": 7.849858016128054e-06, "loss": 0.691, "step": 13055 }, { "epoch": 1.7459213693500937, "grad_norm": 1.2233202457427979, "learning_rate": 7.848448059819161e-06, "loss": 0.7048, "step": 13056 }, { "epoch": 1.7460550949451725, "grad_norm": 1.297877550125122, "learning_rate": 7.847038148357306e-06, "loss": 0.7284, "step": 13057 }, { "epoch": 1.7461888205402514, "grad_norm": 1.176261067390442, "learning_rate": 7.845628281771884e-06, "loss": 0.6343, "step": 13058 }, { "epoch": 1.7463225461353304, "grad_norm": 1.2030620574951172, "learning_rate": 7.844218460092274e-06, "loss": 0.7194, "step": 13059 }, { "epoch": 1.746456271730409, "grad_norm": 1.3171570301055908, "learning_rate": 7.842808683347871e-06, "loss": 0.712, "step": 13060 }, { "epoch": 1.7465899973254881, "grad_norm": 1.1180232763290405, "learning_rate": 7.841398951568059e-06, "loss": 0.6585, "step": 13061 }, { "epoch": 1.746723722920567, "grad_norm": 1.4766656160354614, "learning_rate": 7.839989264782216e-06, "loss": 0.7921, "step": 13062 }, { "epoch": 1.7468574485156458, "grad_norm": 1.2696322202682495, "learning_rate": 7.838579623019732e-06, "loss": 0.6242, "step": 13063 }, { "epoch": 1.746991174110725, "grad_norm": 1.2306702136993408, "learning_rate": 7.83717002630999e-06, "loss": 0.7428, "step": 13064 }, { "epoch": 1.7471248997058035, "grad_norm": 1.134827971458435, "learning_rate": 7.835760474682364e-06, "loss": 0.6619, "step": 13065 }, { "epoch": 1.7472586253008826, "grad_norm": 1.1618316173553467, "learning_rate": 7.83435096816624e-06, "loss": 0.6702, "step": 13066 }, { "epoch": 1.7473923508959615, "grad_norm": 1.1894416809082031, "learning_rate": 7.832941506790998e-06, "loss": 0.5767, "step": 13067 }, { "epoch": 1.7475260764910403, "grad_norm": 1.2185240983963013, "learning_rate": 7.831532090586022e-06, "loss": 0.6596, "step": 13068 }, { "epoch": 1.7476598020861194, "grad_norm": 1.205224871635437, "learning_rate": 7.830122719580682e-06, "loss": 0.6981, "step": 13069 }, { "epoch": 1.7477935276811982, "grad_norm": 1.2897881269454956, "learning_rate": 7.828713393804354e-06, "loss": 0.7274, "step": 13070 }, { "epoch": 1.747927253276277, "grad_norm": 1.2016863822937012, "learning_rate": 7.827304113286423e-06, "loss": 0.7419, "step": 13071 }, { "epoch": 1.7480609788713561, "grad_norm": 1.269400954246521, "learning_rate": 7.825894878056257e-06, "loss": 0.6796, "step": 13072 }, { "epoch": 1.7481947044664348, "grad_norm": 1.179728627204895, "learning_rate": 7.824485688143229e-06, "loss": 0.7003, "step": 13073 }, { "epoch": 1.7483284300615138, "grad_norm": 1.1984901428222656, "learning_rate": 7.823076543576718e-06, "loss": 0.7239, "step": 13074 }, { "epoch": 1.7484621556565927, "grad_norm": 1.3591724634170532, "learning_rate": 7.82166744438609e-06, "loss": 0.7506, "step": 13075 }, { "epoch": 1.7485958812516715, "grad_norm": 1.213767409324646, "learning_rate": 7.820258390600723e-06, "loss": 0.6928, "step": 13076 }, { "epoch": 1.7487296068467506, "grad_norm": 1.263181447982788, "learning_rate": 7.818849382249987e-06, "loss": 0.7237, "step": 13077 }, { "epoch": 1.7488633324418292, "grad_norm": 1.2507827281951904, "learning_rate": 7.81744041936324e-06, "loss": 0.6872, "step": 13078 }, { "epoch": 1.7489970580369083, "grad_norm": 1.1744171380996704, "learning_rate": 7.816031501969865e-06, "loss": 0.7597, "step": 13079 }, { "epoch": 1.7491307836319872, "grad_norm": 1.3690522909164429, "learning_rate": 7.814622630099224e-06, "loss": 0.7873, "step": 13080 }, { "epoch": 1.749264509227066, "grad_norm": 1.213753581047058, "learning_rate": 7.813213803780679e-06, "loss": 0.7045, "step": 13081 }, { "epoch": 1.749398234822145, "grad_norm": 1.4117039442062378, "learning_rate": 7.811805023043603e-06, "loss": 0.7456, "step": 13082 }, { "epoch": 1.7495319604172237, "grad_norm": 1.1690768003463745, "learning_rate": 7.810396287917354e-06, "loss": 0.6891, "step": 13083 }, { "epoch": 1.7496656860123028, "grad_norm": 1.2917319536209106, "learning_rate": 7.808987598431303e-06, "loss": 0.6886, "step": 13084 }, { "epoch": 1.7497994116073816, "grad_norm": 1.290205478668213, "learning_rate": 7.807578954614808e-06, "loss": 0.66, "step": 13085 }, { "epoch": 1.7499331372024605, "grad_norm": 1.2115559577941895, "learning_rate": 7.806170356497229e-06, "loss": 0.6721, "step": 13086 }, { "epoch": 1.7500668627975395, "grad_norm": 1.3884086608886719, "learning_rate": 7.804761804107935e-06, "loss": 0.7299, "step": 13087 }, { "epoch": 1.7502005883926184, "grad_norm": 1.2650190591812134, "learning_rate": 7.803353297476276e-06, "loss": 0.6995, "step": 13088 }, { "epoch": 1.7503343139876972, "grad_norm": 1.2618435621261597, "learning_rate": 7.801944836631617e-06, "loss": 0.7303, "step": 13089 }, { "epoch": 1.7504680395827763, "grad_norm": 1.3761684894561768, "learning_rate": 7.800536421603317e-06, "loss": 0.6773, "step": 13090 }, { "epoch": 1.750601765177855, "grad_norm": 1.2615305185317993, "learning_rate": 7.799128052420726e-06, "loss": 0.6445, "step": 13091 }, { "epoch": 1.750735490772934, "grad_norm": 1.1958781480789185, "learning_rate": 7.797719729113207e-06, "loss": 0.6851, "step": 13092 }, { "epoch": 1.7508692163680128, "grad_norm": 1.3002465963363647, "learning_rate": 7.796311451710115e-06, "loss": 0.683, "step": 13093 }, { "epoch": 1.7510029419630917, "grad_norm": 1.2295221090316772, "learning_rate": 7.794903220240798e-06, "loss": 0.7837, "step": 13094 }, { "epoch": 1.7511366675581708, "grad_norm": 1.304438829421997, "learning_rate": 7.793495034734616e-06, "loss": 0.6953, "step": 13095 }, { "epoch": 1.7512703931532494, "grad_norm": 1.2923221588134766, "learning_rate": 7.792086895220915e-06, "loss": 0.6624, "step": 13096 }, { "epoch": 1.7514041187483285, "grad_norm": 1.1502407789230347, "learning_rate": 7.790678801729056e-06, "loss": 0.7078, "step": 13097 }, { "epoch": 1.7515378443434073, "grad_norm": 1.2105625867843628, "learning_rate": 7.789270754288379e-06, "loss": 0.6706, "step": 13098 }, { "epoch": 1.7516715699384862, "grad_norm": 1.4119216203689575, "learning_rate": 7.787862752928237e-06, "loss": 0.7235, "step": 13099 }, { "epoch": 1.7518052955335652, "grad_norm": 1.3615435361862183, "learning_rate": 7.786454797677982e-06, "loss": 0.674, "step": 13100 }, { "epoch": 1.7519390211286439, "grad_norm": 1.148840069770813, "learning_rate": 7.78504688856696e-06, "loss": 0.6492, "step": 13101 }, { "epoch": 1.752072746723723, "grad_norm": 1.2933416366577148, "learning_rate": 7.783639025624511e-06, "loss": 0.6806, "step": 13102 }, { "epoch": 1.7522064723188018, "grad_norm": 1.1937079429626465, "learning_rate": 7.782231208879991e-06, "loss": 0.686, "step": 13103 }, { "epoch": 1.7523401979138806, "grad_norm": 1.112066626548767, "learning_rate": 7.780823438362733e-06, "loss": 0.653, "step": 13104 }, { "epoch": 1.7524739235089597, "grad_norm": 1.3983036279678345, "learning_rate": 7.779415714102092e-06, "loss": 0.6604, "step": 13105 }, { "epoch": 1.7526076491040385, "grad_norm": 1.2212536334991455, "learning_rate": 7.778008036127405e-06, "loss": 0.7585, "step": 13106 }, { "epoch": 1.7527413746991174, "grad_norm": 1.2914576530456543, "learning_rate": 7.776600404468012e-06, "loss": 0.745, "step": 13107 }, { "epoch": 1.7528751002941965, "grad_norm": 1.17420494556427, "learning_rate": 7.775192819153259e-06, "loss": 0.633, "step": 13108 }, { "epoch": 1.753008825889275, "grad_norm": 1.219744324684143, "learning_rate": 7.773785280212482e-06, "loss": 0.6973, "step": 13109 }, { "epoch": 1.7531425514843542, "grad_norm": 1.3174890279769897, "learning_rate": 7.772377787675019e-06, "loss": 0.6068, "step": 13110 }, { "epoch": 1.753276277079433, "grad_norm": 1.4683308601379395, "learning_rate": 7.770970341570209e-06, "loss": 0.7688, "step": 13111 }, { "epoch": 1.7534100026745119, "grad_norm": 1.2203373908996582, "learning_rate": 7.769562941927387e-06, "loss": 0.6879, "step": 13112 }, { "epoch": 1.753543728269591, "grad_norm": 1.1337286233901978, "learning_rate": 7.768155588775898e-06, "loss": 0.6657, "step": 13113 }, { "epoch": 1.7536774538646696, "grad_norm": 1.3314387798309326, "learning_rate": 7.766748282145068e-06, "loss": 0.7034, "step": 13114 }, { "epoch": 1.7538111794597486, "grad_norm": 1.2009519338607788, "learning_rate": 7.76534102206423e-06, "loss": 0.6455, "step": 13115 }, { "epoch": 1.7539449050548275, "grad_norm": 1.228232502937317, "learning_rate": 7.763933808562724e-06, "loss": 0.7057, "step": 13116 }, { "epoch": 1.7540786306499063, "grad_norm": 1.3439264297485352, "learning_rate": 7.762526641669875e-06, "loss": 0.7114, "step": 13117 }, { "epoch": 1.7542123562449854, "grad_norm": 1.2936687469482422, "learning_rate": 7.761119521415017e-06, "loss": 0.6948, "step": 13118 }, { "epoch": 1.7543460818400642, "grad_norm": 1.2233085632324219, "learning_rate": 7.759712447827482e-06, "loss": 0.617, "step": 13119 }, { "epoch": 1.754479807435143, "grad_norm": 1.2602564096450806, "learning_rate": 7.758305420936594e-06, "loss": 0.6189, "step": 13120 }, { "epoch": 1.754613533030222, "grad_norm": 1.1129753589630127, "learning_rate": 7.75689844077169e-06, "loss": 0.6787, "step": 13121 }, { "epoch": 1.7547472586253008, "grad_norm": 1.278192162513733, "learning_rate": 7.755491507362089e-06, "loss": 0.6833, "step": 13122 }, { "epoch": 1.7548809842203799, "grad_norm": 1.1872895956039429, "learning_rate": 7.754084620737117e-06, "loss": 0.6827, "step": 13123 }, { "epoch": 1.7550147098154587, "grad_norm": 1.3323551416397095, "learning_rate": 7.752677780926105e-06, "loss": 0.7143, "step": 13124 }, { "epoch": 1.7551484354105376, "grad_norm": 1.3326780796051025, "learning_rate": 7.751270987958375e-06, "loss": 0.7769, "step": 13125 }, { "epoch": 1.7552821610056166, "grad_norm": 1.1852363348007202, "learning_rate": 7.749864241863245e-06, "loss": 0.6745, "step": 13126 }, { "epoch": 1.7554158866006953, "grad_norm": 1.3145664930343628, "learning_rate": 7.748457542670046e-06, "loss": 0.681, "step": 13127 }, { "epoch": 1.7555496121957743, "grad_norm": 1.3497494459152222, "learning_rate": 7.747050890408092e-06, "loss": 0.6562, "step": 13128 }, { "epoch": 1.7556833377908532, "grad_norm": 1.2761904001235962, "learning_rate": 7.74564428510671e-06, "loss": 0.7677, "step": 13129 }, { "epoch": 1.755817063385932, "grad_norm": 1.1878252029418945, "learning_rate": 7.744237726795213e-06, "loss": 0.6712, "step": 13130 }, { "epoch": 1.755950788981011, "grad_norm": 1.1849473714828491, "learning_rate": 7.742831215502922e-06, "loss": 0.6343, "step": 13131 }, { "epoch": 1.7560845145760897, "grad_norm": 1.2963931560516357, "learning_rate": 7.741424751259156e-06, "loss": 0.6887, "step": 13132 }, { "epoch": 1.7562182401711688, "grad_norm": 1.3765865564346313, "learning_rate": 7.740018334093231e-06, "loss": 0.7811, "step": 13133 }, { "epoch": 1.7563519657662476, "grad_norm": 1.3247803449630737, "learning_rate": 7.738611964034458e-06, "loss": 0.7118, "step": 13134 }, { "epoch": 1.7564856913613265, "grad_norm": 1.4144973754882812, "learning_rate": 7.737205641112158e-06, "loss": 0.7609, "step": 13135 }, { "epoch": 1.7566194169564056, "grad_norm": 1.2717084884643555, "learning_rate": 7.735799365355636e-06, "loss": 0.7217, "step": 13136 }, { "epoch": 1.7567531425514844, "grad_norm": 1.3558757305145264, "learning_rate": 7.734393136794214e-06, "loss": 0.6734, "step": 13137 }, { "epoch": 1.7568868681465633, "grad_norm": 1.2823171615600586, "learning_rate": 7.732986955457198e-06, "loss": 0.7037, "step": 13138 }, { "epoch": 1.757020593741642, "grad_norm": 1.4336529970169067, "learning_rate": 7.731580821373898e-06, "loss": 0.7804, "step": 13139 }, { "epoch": 1.757154319336721, "grad_norm": 1.2235890626907349, "learning_rate": 7.73017473457363e-06, "loss": 0.6895, "step": 13140 }, { "epoch": 1.7572880449318, "grad_norm": 1.3857910633087158, "learning_rate": 7.728768695085696e-06, "loss": 0.7193, "step": 13141 }, { "epoch": 1.7574217705268789, "grad_norm": 1.0571202039718628, "learning_rate": 7.7273627029394e-06, "loss": 0.5665, "step": 13142 }, { "epoch": 1.7575554961219577, "grad_norm": 1.1605843305587769, "learning_rate": 7.725956758164058e-06, "loss": 0.6566, "step": 13143 }, { "epoch": 1.7576892217170368, "grad_norm": 1.1740840673446655, "learning_rate": 7.724550860788968e-06, "loss": 0.6738, "step": 13144 }, { "epoch": 1.7578229473121154, "grad_norm": 1.2423170804977417, "learning_rate": 7.723145010843442e-06, "loss": 0.6279, "step": 13145 }, { "epoch": 1.7579566729071945, "grad_norm": 1.259957194328308, "learning_rate": 7.72173920835678e-06, "loss": 0.6394, "step": 13146 }, { "epoch": 1.7580903985022733, "grad_norm": 1.2878979444503784, "learning_rate": 7.720333453358281e-06, "loss": 0.6923, "step": 13147 }, { "epoch": 1.7582241240973522, "grad_norm": 1.1974692344665527, "learning_rate": 7.718927745877253e-06, "loss": 0.649, "step": 13148 }, { "epoch": 1.7583578496924313, "grad_norm": 1.427952527999878, "learning_rate": 7.71752208594299e-06, "loss": 0.7997, "step": 13149 }, { "epoch": 1.7584915752875099, "grad_norm": 1.2960275411605835, "learning_rate": 7.716116473584795e-06, "loss": 0.7211, "step": 13150 }, { "epoch": 1.758625300882589, "grad_norm": 1.2086318731307983, "learning_rate": 7.714710908831971e-06, "loss": 0.6381, "step": 13151 }, { "epoch": 1.7587590264776678, "grad_norm": 1.3161646127700806, "learning_rate": 7.713305391713805e-06, "loss": 0.7629, "step": 13152 }, { "epoch": 1.7588927520727466, "grad_norm": 1.285466194152832, "learning_rate": 7.711899922259606e-06, "loss": 0.7692, "step": 13153 }, { "epoch": 1.7590264776678257, "grad_norm": 1.31812584400177, "learning_rate": 7.710494500498662e-06, "loss": 0.7182, "step": 13154 }, { "epoch": 1.7591602032629046, "grad_norm": 1.337638020515442, "learning_rate": 7.709089126460266e-06, "loss": 0.6984, "step": 13155 }, { "epoch": 1.7592939288579834, "grad_norm": 1.3110979795455933, "learning_rate": 7.707683800173717e-06, "loss": 0.639, "step": 13156 }, { "epoch": 1.7594276544530623, "grad_norm": 1.1933131217956543, "learning_rate": 7.70627852166831e-06, "loss": 0.7167, "step": 13157 }, { "epoch": 1.759561380048141, "grad_norm": 1.4542263746261597, "learning_rate": 7.704873290973325e-06, "loss": 0.6861, "step": 13158 }, { "epoch": 1.7596951056432202, "grad_norm": 1.263856291770935, "learning_rate": 7.703468108118064e-06, "loss": 0.7163, "step": 13159 }, { "epoch": 1.759828831238299, "grad_norm": 1.3714478015899658, "learning_rate": 7.702062973131812e-06, "loss": 0.7376, "step": 13160 }, { "epoch": 1.7599625568333779, "grad_norm": 1.1897685527801514, "learning_rate": 7.700657886043859e-06, "loss": 0.6726, "step": 13161 }, { "epoch": 1.760096282428457, "grad_norm": 1.113761067390442, "learning_rate": 7.699252846883493e-06, "loss": 0.6847, "step": 13162 }, { "epoch": 1.7602300080235356, "grad_norm": 1.385581612586975, "learning_rate": 7.697847855679996e-06, "loss": 0.7176, "step": 13163 }, { "epoch": 1.7603637336186146, "grad_norm": 1.3693904876708984, "learning_rate": 7.696442912462662e-06, "loss": 0.7265, "step": 13164 }, { "epoch": 1.7604974592136935, "grad_norm": 1.2939317226409912, "learning_rate": 7.695038017260772e-06, "loss": 0.7417, "step": 13165 }, { "epoch": 1.7606311848087723, "grad_norm": 1.280278205871582, "learning_rate": 7.693633170103603e-06, "loss": 0.7064, "step": 13166 }, { "epoch": 1.7607649104038514, "grad_norm": 1.2304584980010986, "learning_rate": 7.692228371020449e-06, "loss": 0.6714, "step": 13167 }, { "epoch": 1.76089863599893, "grad_norm": 1.2177759408950806, "learning_rate": 7.690823620040581e-06, "loss": 0.6829, "step": 13168 }, { "epoch": 1.761032361594009, "grad_norm": 1.219003677368164, "learning_rate": 7.68941891719329e-06, "loss": 0.5802, "step": 13169 }, { "epoch": 1.761166087189088, "grad_norm": 1.1753109693527222, "learning_rate": 7.68801426250785e-06, "loss": 0.6488, "step": 13170 }, { "epoch": 1.7612998127841668, "grad_norm": 1.1693811416625977, "learning_rate": 7.686609656013538e-06, "loss": 0.6663, "step": 13171 }, { "epoch": 1.7614335383792459, "grad_norm": 1.1380445957183838, "learning_rate": 7.685205097739636e-06, "loss": 0.6169, "step": 13172 }, { "epoch": 1.7615672639743247, "grad_norm": 1.2496849298477173, "learning_rate": 7.683800587715416e-06, "loss": 0.709, "step": 13173 }, { "epoch": 1.7617009895694036, "grad_norm": 1.2730404138565063, "learning_rate": 7.68239612597016e-06, "loss": 0.7651, "step": 13174 }, { "epoch": 1.7618347151644826, "grad_norm": 1.2129355669021606, "learning_rate": 7.680991712533138e-06, "loss": 0.6542, "step": 13175 }, { "epoch": 1.7619684407595613, "grad_norm": 1.1598167419433594, "learning_rate": 7.679587347433624e-06, "loss": 0.6497, "step": 13176 }, { "epoch": 1.7621021663546403, "grad_norm": 1.2443852424621582, "learning_rate": 7.678183030700891e-06, "loss": 0.6289, "step": 13177 }, { "epoch": 1.7622358919497192, "grad_norm": 1.136084794998169, "learning_rate": 7.676778762364214e-06, "loss": 0.5957, "step": 13178 }, { "epoch": 1.762369617544798, "grad_norm": 1.2369897365570068, "learning_rate": 7.675374542452856e-06, "loss": 0.6616, "step": 13179 }, { "epoch": 1.762503343139877, "grad_norm": 1.3235372304916382, "learning_rate": 7.673970370996095e-06, "loss": 0.7138, "step": 13180 }, { "epoch": 1.7626370687349557, "grad_norm": 1.0980262756347656, "learning_rate": 7.672566248023192e-06, "loss": 0.7086, "step": 13181 }, { "epoch": 1.7627707943300348, "grad_norm": 1.1994779109954834, "learning_rate": 7.67116217356342e-06, "loss": 0.6782, "step": 13182 }, { "epoch": 1.7629045199251137, "grad_norm": 1.1428031921386719, "learning_rate": 7.669758147646046e-06, "loss": 0.6416, "step": 13183 }, { "epoch": 1.7630382455201925, "grad_norm": 1.2394593954086304, "learning_rate": 7.668354170300331e-06, "loss": 0.7097, "step": 13184 }, { "epoch": 1.7631719711152716, "grad_norm": 1.2066439390182495, "learning_rate": 7.666950241555546e-06, "loss": 0.7195, "step": 13185 }, { "epoch": 1.7633056967103502, "grad_norm": 1.2288256883621216, "learning_rate": 7.66554636144095e-06, "loss": 0.7129, "step": 13186 }, { "epoch": 1.7634394223054293, "grad_norm": 1.24745512008667, "learning_rate": 7.664142529985801e-06, "loss": 0.7511, "step": 13187 }, { "epoch": 1.7635731479005081, "grad_norm": 1.3364877700805664, "learning_rate": 7.66273874721937e-06, "loss": 0.6757, "step": 13188 }, { "epoch": 1.763706873495587, "grad_norm": 1.1447926759719849, "learning_rate": 7.661335013170911e-06, "loss": 0.6176, "step": 13189 }, { "epoch": 1.763840599090666, "grad_norm": 1.3012038469314575, "learning_rate": 7.659931327869688e-06, "loss": 0.7468, "step": 13190 }, { "epoch": 1.7639743246857449, "grad_norm": 1.2025270462036133, "learning_rate": 7.65852769134496e-06, "loss": 0.6623, "step": 13191 }, { "epoch": 1.7641080502808237, "grad_norm": 1.2923822402954102, "learning_rate": 7.657124103625974e-06, "loss": 0.7051, "step": 13192 }, { "epoch": 1.7642417758759028, "grad_norm": 1.225016474723816, "learning_rate": 7.655720564742002e-06, "loss": 0.654, "step": 13193 }, { "epoch": 1.7643755014709814, "grad_norm": 1.2559335231781006, "learning_rate": 7.654317074722287e-06, "loss": 0.6994, "step": 13194 }, { "epoch": 1.7645092270660605, "grad_norm": 1.2556838989257812, "learning_rate": 7.652913633596087e-06, "loss": 0.7013, "step": 13195 }, { "epoch": 1.7646429526611394, "grad_norm": 1.3198646306991577, "learning_rate": 7.65151024139266e-06, "loss": 0.7382, "step": 13196 }, { "epoch": 1.7647766782562182, "grad_norm": 1.3013070821762085, "learning_rate": 7.650106898141251e-06, "loss": 0.6519, "step": 13197 }, { "epoch": 1.7649104038512973, "grad_norm": 1.199157953262329, "learning_rate": 7.64870360387112e-06, "loss": 0.696, "step": 13198 }, { "epoch": 1.765044129446376, "grad_norm": 1.1832815408706665, "learning_rate": 7.64730035861151e-06, "loss": 0.699, "step": 13199 }, { "epoch": 1.765177855041455, "grad_norm": 1.205780267715454, "learning_rate": 7.645897162391672e-06, "loss": 0.6569, "step": 13200 }, { "epoch": 1.7653115806365338, "grad_norm": 1.1487656831741333, "learning_rate": 7.644494015240855e-06, "loss": 0.6951, "step": 13201 }, { "epoch": 1.7654453062316127, "grad_norm": 1.1796915531158447, "learning_rate": 7.64309091718831e-06, "loss": 0.6604, "step": 13202 }, { "epoch": 1.7655790318266917, "grad_norm": 1.3387131690979004, "learning_rate": 7.641687868263274e-06, "loss": 0.6797, "step": 13203 }, { "epoch": 1.7657127574217704, "grad_norm": 1.176735520362854, "learning_rate": 7.640284868495e-06, "loss": 0.7035, "step": 13204 }, { "epoch": 1.7658464830168494, "grad_norm": 1.214046597480774, "learning_rate": 7.638881917912729e-06, "loss": 0.6553, "step": 13205 }, { "epoch": 1.7659802086119283, "grad_norm": 1.2525125741958618, "learning_rate": 7.637479016545708e-06, "loss": 0.6772, "step": 13206 }, { "epoch": 1.7661139342070071, "grad_norm": 1.266052484512329, "learning_rate": 7.636076164423173e-06, "loss": 0.6594, "step": 13207 }, { "epoch": 1.7662476598020862, "grad_norm": 1.1831791400909424, "learning_rate": 7.63467336157437e-06, "loss": 0.7072, "step": 13208 }, { "epoch": 1.766381385397165, "grad_norm": 1.2331364154815674, "learning_rate": 7.633270608028537e-06, "loss": 0.6963, "step": 13209 }, { "epoch": 1.766515110992244, "grad_norm": 1.2755190134048462, "learning_rate": 7.631867903814916e-06, "loss": 0.7936, "step": 13210 }, { "epoch": 1.766648836587323, "grad_norm": 1.3306457996368408, "learning_rate": 7.630465248962738e-06, "loss": 0.6856, "step": 13211 }, { "epoch": 1.7667825621824016, "grad_norm": 1.2203032970428467, "learning_rate": 7.629062643501248e-06, "loss": 0.7159, "step": 13212 }, { "epoch": 1.7669162877774807, "grad_norm": 1.1315717697143555, "learning_rate": 7.627660087459674e-06, "loss": 0.6887, "step": 13213 }, { "epoch": 1.7670500133725595, "grad_norm": 1.2911678552627563, "learning_rate": 7.6262575808672576e-06, "loss": 0.77, "step": 13214 }, { "epoch": 1.7671837389676384, "grad_norm": 1.2581003904342651, "learning_rate": 7.624855123753235e-06, "loss": 0.6755, "step": 13215 }, { "epoch": 1.7673174645627174, "grad_norm": 1.2090908288955688, "learning_rate": 7.623452716146827e-06, "loss": 0.6794, "step": 13216 }, { "epoch": 1.767451190157796, "grad_norm": 1.5002691745758057, "learning_rate": 7.62205035807728e-06, "loss": 0.7918, "step": 13217 }, { "epoch": 1.7675849157528751, "grad_norm": 1.2874877452850342, "learning_rate": 7.620648049573815e-06, "loss": 0.6163, "step": 13218 }, { "epoch": 1.767718641347954, "grad_norm": 1.1568379402160645, "learning_rate": 7.619245790665662e-06, "loss": 0.6791, "step": 13219 }, { "epoch": 1.7678523669430328, "grad_norm": 1.1005451679229736, "learning_rate": 7.617843581382055e-06, "loss": 0.6064, "step": 13220 }, { "epoch": 1.767986092538112, "grad_norm": 1.1477329730987549, "learning_rate": 7.6164414217522185e-06, "loss": 0.6304, "step": 13221 }, { "epoch": 1.7681198181331907, "grad_norm": 1.4061325788497925, "learning_rate": 7.61503931180538e-06, "loss": 0.6991, "step": 13222 }, { "epoch": 1.7682535437282696, "grad_norm": 1.2457906007766724, "learning_rate": 7.613637251570767e-06, "loss": 0.7343, "step": 13223 }, { "epoch": 1.7683872693233484, "grad_norm": 1.2702500820159912, "learning_rate": 7.612235241077597e-06, "loss": 0.7036, "step": 13224 }, { "epoch": 1.7685209949184273, "grad_norm": 1.2025442123413086, "learning_rate": 7.610833280355103e-06, "loss": 0.6597, "step": 13225 }, { "epoch": 1.7686547205135064, "grad_norm": 1.1940994262695312, "learning_rate": 7.609431369432502e-06, "loss": 0.7255, "step": 13226 }, { "epoch": 1.7687884461085852, "grad_norm": 1.1808069944381714, "learning_rate": 7.608029508339015e-06, "loss": 0.665, "step": 13227 }, { "epoch": 1.768922171703664, "grad_norm": 1.3171695470809937, "learning_rate": 7.606627697103866e-06, "loss": 0.6795, "step": 13228 }, { "epoch": 1.7690558972987431, "grad_norm": 1.2143160104751587, "learning_rate": 7.6052259357562685e-06, "loss": 0.637, "step": 13229 }, { "epoch": 1.7691896228938218, "grad_norm": 1.3234939575195312, "learning_rate": 7.60382422432545e-06, "loss": 0.7277, "step": 13230 }, { "epoch": 1.7693233484889008, "grad_norm": 1.2912805080413818, "learning_rate": 7.602422562840622e-06, "loss": 0.6659, "step": 13231 }, { "epoch": 1.7694570740839797, "grad_norm": 1.20145845413208, "learning_rate": 7.601020951330998e-06, "loss": 0.7327, "step": 13232 }, { "epoch": 1.7695907996790585, "grad_norm": 1.390238642692566, "learning_rate": 7.599619389825799e-06, "loss": 0.7092, "step": 13233 }, { "epoch": 1.7697245252741376, "grad_norm": 1.1942757368087769, "learning_rate": 7.598217878354237e-06, "loss": 0.6519, "step": 13234 }, { "epoch": 1.7698582508692162, "grad_norm": 1.0842225551605225, "learning_rate": 7.596816416945523e-06, "loss": 0.6341, "step": 13235 }, { "epoch": 1.7699919764642953, "grad_norm": 1.1821191310882568, "learning_rate": 7.595415005628875e-06, "loss": 0.6408, "step": 13236 }, { "epoch": 1.7701257020593741, "grad_norm": 1.2233281135559082, "learning_rate": 7.594013644433496e-06, "loss": 0.6512, "step": 13237 }, { "epoch": 1.770259427654453, "grad_norm": 1.1893068552017212, "learning_rate": 7.592612333388604e-06, "loss": 0.6324, "step": 13238 }, { "epoch": 1.770393153249532, "grad_norm": 1.3438538312911987, "learning_rate": 7.591211072523403e-06, "loss": 0.6713, "step": 13239 }, { "epoch": 1.770526878844611, "grad_norm": 1.2872432470321655, "learning_rate": 7.5898098618671015e-06, "loss": 0.6585, "step": 13240 }, { "epoch": 1.7706606044396898, "grad_norm": 1.2955158948898315, "learning_rate": 7.5884087014489065e-06, "loss": 0.6194, "step": 13241 }, { "epoch": 1.7707943300347686, "grad_norm": 1.2175147533416748, "learning_rate": 7.587007591298028e-06, "loss": 0.6695, "step": 13242 }, { "epoch": 1.7709280556298475, "grad_norm": 1.4306607246398926, "learning_rate": 7.585606531443662e-06, "loss": 0.7935, "step": 13243 }, { "epoch": 1.7710617812249265, "grad_norm": 1.1454448699951172, "learning_rate": 7.584205521915023e-06, "loss": 0.6189, "step": 13244 }, { "epoch": 1.7711955068200054, "grad_norm": 1.2536683082580566, "learning_rate": 7.582804562741303e-06, "loss": 0.727, "step": 13245 }, { "epoch": 1.7713292324150842, "grad_norm": 1.2530522346496582, "learning_rate": 7.581403653951711e-06, "loss": 0.6892, "step": 13246 }, { "epoch": 1.7714629580101633, "grad_norm": 1.1325064897537231, "learning_rate": 7.5800027955754474e-06, "loss": 0.6352, "step": 13247 }, { "epoch": 1.771596683605242, "grad_norm": 1.40010666847229, "learning_rate": 7.578601987641706e-06, "loss": 0.7073, "step": 13248 }, { "epoch": 1.771730409200321, "grad_norm": 1.4395222663879395, "learning_rate": 7.5772012301796935e-06, "loss": 0.8132, "step": 13249 }, { "epoch": 1.7718641347953998, "grad_norm": 1.0818517208099365, "learning_rate": 7.575800523218603e-06, "loss": 0.6525, "step": 13250 }, { "epoch": 1.7719978603904787, "grad_norm": 1.181702971458435, "learning_rate": 7.574399866787626e-06, "loss": 0.6715, "step": 13251 }, { "epoch": 1.7721315859855578, "grad_norm": 1.1964753866195679, "learning_rate": 7.572999260915965e-06, "loss": 0.6178, "step": 13252 }, { "epoch": 1.7722653115806364, "grad_norm": 1.2745329141616821, "learning_rate": 7.5715987056328136e-06, "loss": 0.7385, "step": 13253 }, { "epoch": 1.7723990371757155, "grad_norm": 1.1141149997711182, "learning_rate": 7.570198200967363e-06, "loss": 0.687, "step": 13254 }, { "epoch": 1.7725327627707943, "grad_norm": 1.3399637937545776, "learning_rate": 7.568797746948806e-06, "loss": 0.7182, "step": 13255 }, { "epoch": 1.7726664883658731, "grad_norm": 1.326000452041626, "learning_rate": 7.567397343606331e-06, "loss": 0.6675, "step": 13256 }, { "epoch": 1.7728002139609522, "grad_norm": 1.1466896533966064, "learning_rate": 7.565996990969135e-06, "loss": 0.6022, "step": 13257 }, { "epoch": 1.772933939556031, "grad_norm": 1.4548966884613037, "learning_rate": 7.564596689066397e-06, "loss": 0.6852, "step": 13258 }, { "epoch": 1.77306766515111, "grad_norm": 1.1868021488189697, "learning_rate": 7.563196437927316e-06, "loss": 0.6299, "step": 13259 }, { "epoch": 1.7732013907461888, "grad_norm": 1.2207348346710205, "learning_rate": 7.5617962375810705e-06, "loss": 0.6813, "step": 13260 }, { "epoch": 1.7733351163412676, "grad_norm": 1.2169100046157837, "learning_rate": 7.560396088056848e-06, "loss": 0.6433, "step": 13261 }, { "epoch": 1.7734688419363467, "grad_norm": 1.2752684354782104, "learning_rate": 7.558995989383839e-06, "loss": 0.7832, "step": 13262 }, { "epoch": 1.7736025675314255, "grad_norm": 1.1743502616882324, "learning_rate": 7.557595941591221e-06, "loss": 0.6141, "step": 13263 }, { "epoch": 1.7737362931265044, "grad_norm": 1.3403269052505493, "learning_rate": 7.556195944708176e-06, "loss": 0.7051, "step": 13264 }, { "epoch": 1.7738700187215835, "grad_norm": 1.2211259603500366, "learning_rate": 7.55479599876389e-06, "loss": 0.6317, "step": 13265 }, { "epoch": 1.774003744316662, "grad_norm": 1.239791989326477, "learning_rate": 7.553396103787541e-06, "loss": 0.6124, "step": 13266 }, { "epoch": 1.7741374699117411, "grad_norm": 1.229616641998291, "learning_rate": 7.55199625980831e-06, "loss": 0.6925, "step": 13267 }, { "epoch": 1.77427119550682, "grad_norm": 1.1542174816131592, "learning_rate": 7.550596466855375e-06, "loss": 0.7234, "step": 13268 }, { "epoch": 1.7744049211018988, "grad_norm": 1.1539620161056519, "learning_rate": 7.5491967249579105e-06, "loss": 0.7413, "step": 13269 }, { "epoch": 1.774538646696978, "grad_norm": 1.2399497032165527, "learning_rate": 7.547797034145098e-06, "loss": 0.7174, "step": 13270 }, { "epoch": 1.7746723722920565, "grad_norm": 1.2965887784957886, "learning_rate": 7.546397394446108e-06, "loss": 0.7476, "step": 13271 }, { "epoch": 1.7748060978871356, "grad_norm": 1.1897649765014648, "learning_rate": 7.5449978058901174e-06, "loss": 0.692, "step": 13272 }, { "epoch": 1.7749398234822145, "grad_norm": 1.3211344480514526, "learning_rate": 7.543598268506297e-06, "loss": 0.6762, "step": 13273 }, { "epoch": 1.7750735490772933, "grad_norm": 1.2350395917892456, "learning_rate": 7.542198782323819e-06, "loss": 0.6655, "step": 13274 }, { "epoch": 1.7752072746723724, "grad_norm": 1.3445478677749634, "learning_rate": 7.540799347371859e-06, "loss": 0.6655, "step": 13275 }, { "epoch": 1.7753410002674512, "grad_norm": 1.3568940162658691, "learning_rate": 7.539399963679583e-06, "loss": 0.7447, "step": 13276 }, { "epoch": 1.77547472586253, "grad_norm": 1.3517497777938843, "learning_rate": 7.538000631276158e-06, "loss": 0.7122, "step": 13277 }, { "epoch": 1.7756084514576091, "grad_norm": 1.2729796171188354, "learning_rate": 7.536601350190756e-06, "loss": 0.699, "step": 13278 }, { "epoch": 1.7757421770526878, "grad_norm": 1.2554553747177124, "learning_rate": 7.53520212045254e-06, "loss": 0.6439, "step": 13279 }, { "epoch": 1.7758759026477668, "grad_norm": 1.1379806995391846, "learning_rate": 7.533802942090677e-06, "loss": 0.6489, "step": 13280 }, { "epoch": 1.7760096282428457, "grad_norm": 1.1808940172195435, "learning_rate": 7.532403815134335e-06, "loss": 0.6038, "step": 13281 }, { "epoch": 1.7761433538379245, "grad_norm": 1.1703616380691528, "learning_rate": 7.531004739612668e-06, "loss": 0.7072, "step": 13282 }, { "epoch": 1.7762770794330036, "grad_norm": 1.495598554611206, "learning_rate": 7.529605715554851e-06, "loss": 0.7723, "step": 13283 }, { "epoch": 1.7764108050280822, "grad_norm": 1.1676737070083618, "learning_rate": 7.528206742990036e-06, "loss": 0.6442, "step": 13284 }, { "epoch": 1.7765445306231613, "grad_norm": 1.411613941192627, "learning_rate": 7.526807821947387e-06, "loss": 0.7284, "step": 13285 }, { "epoch": 1.7766782562182402, "grad_norm": 1.204737663269043, "learning_rate": 7.5254089524560614e-06, "loss": 0.7459, "step": 13286 }, { "epoch": 1.776811981813319, "grad_norm": 1.2189507484436035, "learning_rate": 7.524010134545221e-06, "loss": 0.6776, "step": 13287 }, { "epoch": 1.776945707408398, "grad_norm": 1.2145860195159912, "learning_rate": 7.522611368244016e-06, "loss": 0.749, "step": 13288 }, { "epoch": 1.7770794330034767, "grad_norm": 1.2140165567398071, "learning_rate": 7.521212653581611e-06, "loss": 0.6765, "step": 13289 }, { "epoch": 1.7772131585985558, "grad_norm": 1.147995114326477, "learning_rate": 7.51981399058715e-06, "loss": 0.6781, "step": 13290 }, { "epoch": 1.7773468841936346, "grad_norm": 1.2674106359481812, "learning_rate": 7.5184153792897995e-06, "loss": 0.7903, "step": 13291 }, { "epoch": 1.7774806097887135, "grad_norm": 1.1379270553588867, "learning_rate": 7.5170168197187035e-06, "loss": 0.6452, "step": 13292 }, { "epoch": 1.7776143353837925, "grad_norm": 1.1839420795440674, "learning_rate": 7.515618311903012e-06, "loss": 0.6417, "step": 13293 }, { "epoch": 1.7777480609788714, "grad_norm": 1.178890585899353, "learning_rate": 7.514219855871886e-06, "loss": 0.6789, "step": 13294 }, { "epoch": 1.7778817865739502, "grad_norm": 1.240135908126831, "learning_rate": 7.512821451654467e-06, "loss": 0.6293, "step": 13295 }, { "epoch": 1.7780155121690293, "grad_norm": 1.23332679271698, "learning_rate": 7.511423099279901e-06, "loss": 0.6692, "step": 13296 }, { "epoch": 1.778149237764108, "grad_norm": 1.255388617515564, "learning_rate": 7.510024798777342e-06, "loss": 0.7009, "step": 13297 }, { "epoch": 1.778282963359187, "grad_norm": 1.1538621187210083, "learning_rate": 7.5086265501759325e-06, "loss": 0.669, "step": 13298 }, { "epoch": 1.7784166889542659, "grad_norm": 1.1280181407928467, "learning_rate": 7.507228353504819e-06, "loss": 0.6657, "step": 13299 }, { "epoch": 1.7785504145493447, "grad_norm": 1.074084997177124, "learning_rate": 7.505830208793147e-06, "loss": 0.655, "step": 13300 }, { "epoch": 1.7786841401444238, "grad_norm": 1.3083407878875732, "learning_rate": 7.504432116070053e-06, "loss": 0.7192, "step": 13301 }, { "epoch": 1.7788178657395024, "grad_norm": 1.1893607378005981, "learning_rate": 7.503034075364689e-06, "loss": 0.6478, "step": 13302 }, { "epoch": 1.7789515913345815, "grad_norm": 1.291527509689331, "learning_rate": 7.501636086706188e-06, "loss": 0.6744, "step": 13303 }, { "epoch": 1.7790853169296603, "grad_norm": 1.3032326698303223, "learning_rate": 7.500238150123691e-06, "loss": 0.6879, "step": 13304 }, { "epoch": 1.7792190425247392, "grad_norm": 1.2379329204559326, "learning_rate": 7.498840265646339e-06, "loss": 0.6911, "step": 13305 }, { "epoch": 1.7793527681198182, "grad_norm": 1.1819076538085938, "learning_rate": 7.497442433303265e-06, "loss": 0.7288, "step": 13306 }, { "epoch": 1.7794864937148969, "grad_norm": 1.0411958694458008, "learning_rate": 7.4960446531236134e-06, "loss": 0.6233, "step": 13307 }, { "epoch": 1.779620219309976, "grad_norm": 1.184686303138733, "learning_rate": 7.494646925136515e-06, "loss": 0.7092, "step": 13308 }, { "epoch": 1.7797539449050548, "grad_norm": 1.2434760332107544, "learning_rate": 7.4932492493711e-06, "loss": 0.709, "step": 13309 }, { "epoch": 1.7798876705001336, "grad_norm": 1.1609017848968506, "learning_rate": 7.49185162585651e-06, "loss": 0.6821, "step": 13310 }, { "epoch": 1.7800213960952127, "grad_norm": 1.1342800855636597, "learning_rate": 7.490454054621872e-06, "loss": 0.6891, "step": 13311 }, { "epoch": 1.7801551216902916, "grad_norm": 1.2116864919662476, "learning_rate": 7.489056535696313e-06, "loss": 0.6849, "step": 13312 }, { "epoch": 1.7802888472853704, "grad_norm": 1.315856695175171, "learning_rate": 7.487659069108974e-06, "loss": 0.6724, "step": 13313 }, { "epoch": 1.7804225728804495, "grad_norm": 1.1762348413467407, "learning_rate": 7.486261654888974e-06, "loss": 0.6904, "step": 13314 }, { "epoch": 1.780556298475528, "grad_norm": 1.26168954372406, "learning_rate": 7.484864293065446e-06, "loss": 0.6917, "step": 13315 }, { "epoch": 1.7806900240706072, "grad_norm": 1.3267970085144043, "learning_rate": 7.483466983667516e-06, "loss": 0.7231, "step": 13316 }, { "epoch": 1.780823749665686, "grad_norm": 1.315279483795166, "learning_rate": 7.482069726724306e-06, "loss": 0.6882, "step": 13317 }, { "epoch": 1.7809574752607649, "grad_norm": 1.2874135971069336, "learning_rate": 7.4806725222649446e-06, "loss": 0.6767, "step": 13318 }, { "epoch": 1.781091200855844, "grad_norm": 1.2011380195617676, "learning_rate": 7.479275370318555e-06, "loss": 0.5735, "step": 13319 }, { "epoch": 1.7812249264509226, "grad_norm": 1.2650060653686523, "learning_rate": 7.477878270914255e-06, "loss": 0.7213, "step": 13320 }, { "epoch": 1.7813586520460016, "grad_norm": 1.2899402379989624, "learning_rate": 7.476481224081174e-06, "loss": 0.7721, "step": 13321 }, { "epoch": 1.7814923776410805, "grad_norm": 1.359849214553833, "learning_rate": 7.4750842298484205e-06, "loss": 0.7282, "step": 13322 }, { "epoch": 1.7816261032361593, "grad_norm": 1.2366663217544556, "learning_rate": 7.473687288245126e-06, "loss": 0.6668, "step": 13323 }, { "epoch": 1.7817598288312384, "grad_norm": 1.3460928201675415, "learning_rate": 7.472290399300399e-06, "loss": 0.7386, "step": 13324 }, { "epoch": 1.7818935544263172, "grad_norm": 1.167547583580017, "learning_rate": 7.47089356304336e-06, "loss": 0.5787, "step": 13325 }, { "epoch": 1.782027280021396, "grad_norm": 1.121841311454773, "learning_rate": 7.469496779503127e-06, "loss": 0.6363, "step": 13326 }, { "epoch": 1.782161005616475, "grad_norm": 1.3463078737258911, "learning_rate": 7.468100048708813e-06, "loss": 0.6764, "step": 13327 }, { "epoch": 1.7822947312115538, "grad_norm": 1.2898348569869995, "learning_rate": 7.4667033706895265e-06, "loss": 0.7234, "step": 13328 }, { "epoch": 1.7824284568066329, "grad_norm": 1.2123874425888062, "learning_rate": 7.465306745474388e-06, "loss": 0.6829, "step": 13329 }, { "epoch": 1.7825621824017117, "grad_norm": 1.2860438823699951, "learning_rate": 7.463910173092501e-06, "loss": 0.6982, "step": 13330 }, { "epoch": 1.7826959079967906, "grad_norm": 1.2346816062927246, "learning_rate": 7.462513653572983e-06, "loss": 0.7146, "step": 13331 }, { "epoch": 1.7828296335918696, "grad_norm": 1.253859043121338, "learning_rate": 7.46111718694494e-06, "loss": 0.7073, "step": 13332 }, { "epoch": 1.7829633591869483, "grad_norm": 1.1765891313552856, "learning_rate": 7.459720773237476e-06, "loss": 0.6378, "step": 13333 }, { "epoch": 1.7830970847820273, "grad_norm": 1.2042300701141357, "learning_rate": 7.458324412479705e-06, "loss": 0.6764, "step": 13334 }, { "epoch": 1.7832308103771062, "grad_norm": 1.2054184675216675, "learning_rate": 7.456928104700729e-06, "loss": 0.6822, "step": 13335 }, { "epoch": 1.783364535972185, "grad_norm": 1.2493089437484741, "learning_rate": 7.455531849929653e-06, "loss": 0.715, "step": 13336 }, { "epoch": 1.783498261567264, "grad_norm": 1.2286487817764282, "learning_rate": 7.45413564819558e-06, "loss": 0.6545, "step": 13337 }, { "epoch": 1.7836319871623427, "grad_norm": 1.2287245988845825, "learning_rate": 7.452739499527613e-06, "loss": 0.5941, "step": 13338 }, { "epoch": 1.7837657127574218, "grad_norm": 1.477335810661316, "learning_rate": 7.451343403954856e-06, "loss": 0.7621, "step": 13339 }, { "epoch": 1.7838994383525006, "grad_norm": 1.286862850189209, "learning_rate": 7.449947361506407e-06, "loss": 0.6808, "step": 13340 }, { "epoch": 1.7840331639475795, "grad_norm": 1.2689037322998047, "learning_rate": 7.448551372211361e-06, "loss": 0.7286, "step": 13341 }, { "epoch": 1.7841668895426586, "grad_norm": 1.2290533781051636, "learning_rate": 7.447155436098825e-06, "loss": 0.6927, "step": 13342 }, { "epoch": 1.7843006151377374, "grad_norm": 1.2181172370910645, "learning_rate": 7.4457595531978864e-06, "loss": 0.7627, "step": 13343 }, { "epoch": 1.7844343407328163, "grad_norm": 1.3072049617767334, "learning_rate": 7.444363723537648e-06, "loss": 0.6991, "step": 13344 }, { "epoch": 1.784568066327895, "grad_norm": 1.293628454208374, "learning_rate": 7.442967947147205e-06, "loss": 0.6767, "step": 13345 }, { "epoch": 1.784701791922974, "grad_norm": 1.1302788257598877, "learning_rate": 7.441572224055644e-06, "loss": 0.6765, "step": 13346 }, { "epoch": 1.784835517518053, "grad_norm": 1.2976595163345337, "learning_rate": 7.440176554292065e-06, "loss": 0.6495, "step": 13347 }, { "epoch": 1.7849692431131319, "grad_norm": 1.4654515981674194, "learning_rate": 7.438780937885555e-06, "loss": 0.8002, "step": 13348 }, { "epoch": 1.7851029687082107, "grad_norm": 1.296443223953247, "learning_rate": 7.437385374865206e-06, "loss": 0.7139, "step": 13349 }, { "epoch": 1.7852366943032898, "grad_norm": 1.2363168001174927, "learning_rate": 7.435989865260106e-06, "loss": 0.6938, "step": 13350 }, { "epoch": 1.7853704198983684, "grad_norm": 1.3031309843063354, "learning_rate": 7.434594409099342e-06, "loss": 0.6513, "step": 13351 }, { "epoch": 1.7855041454934475, "grad_norm": 1.1127417087554932, "learning_rate": 7.433199006412006e-06, "loss": 0.6408, "step": 13352 }, { "epoch": 1.7856378710885263, "grad_norm": 1.2181146144866943, "learning_rate": 7.431803657227182e-06, "loss": 0.6425, "step": 13353 }, { "epoch": 1.7857715966836052, "grad_norm": 1.201139211654663, "learning_rate": 7.430408361573949e-06, "loss": 0.5671, "step": 13354 }, { "epoch": 1.7859053222786843, "grad_norm": 1.248763918876648, "learning_rate": 7.429013119481398e-06, "loss": 0.6535, "step": 13355 }, { "epoch": 1.7860390478737629, "grad_norm": 1.3523023128509521, "learning_rate": 7.427617930978605e-06, "loss": 0.665, "step": 13356 }, { "epoch": 1.786172773468842, "grad_norm": 1.390576958656311, "learning_rate": 7.426222796094655e-06, "loss": 0.6613, "step": 13357 }, { "epoch": 1.7863064990639208, "grad_norm": 1.2277024984359741, "learning_rate": 7.424827714858631e-06, "loss": 0.6929, "step": 13358 }, { "epoch": 1.7864402246589997, "grad_norm": 1.2601195573806763, "learning_rate": 7.423432687299605e-06, "loss": 0.689, "step": 13359 }, { "epoch": 1.7865739502540787, "grad_norm": 1.1781598329544067, "learning_rate": 7.422037713446665e-06, "loss": 0.6546, "step": 13360 }, { "epoch": 1.7867076758491576, "grad_norm": 1.197702407836914, "learning_rate": 7.42064279332888e-06, "loss": 0.672, "step": 13361 }, { "epoch": 1.7868414014442364, "grad_norm": 1.2426199913024902, "learning_rate": 7.419247926975325e-06, "loss": 0.7246, "step": 13362 }, { "epoch": 1.7869751270393153, "grad_norm": 1.444120168685913, "learning_rate": 7.417853114415079e-06, "loss": 0.6689, "step": 13363 }, { "epoch": 1.7871088526343941, "grad_norm": 1.2977793216705322, "learning_rate": 7.416458355677215e-06, "loss": 0.6774, "step": 13364 }, { "epoch": 1.7872425782294732, "grad_norm": 1.130021572113037, "learning_rate": 7.415063650790801e-06, "loss": 0.677, "step": 13365 }, { "epoch": 1.787376303824552, "grad_norm": 1.3829281330108643, "learning_rate": 7.413668999784916e-06, "loss": 0.7151, "step": 13366 }, { "epoch": 1.7875100294196309, "grad_norm": 1.4196044206619263, "learning_rate": 7.412274402688622e-06, "loss": 0.7467, "step": 13367 }, { "epoch": 1.78764375501471, "grad_norm": 1.2620078325271606, "learning_rate": 7.410879859530996e-06, "loss": 0.6772, "step": 13368 }, { "epoch": 1.7877774806097886, "grad_norm": 1.3080027103424072, "learning_rate": 7.4094853703410985e-06, "loss": 0.6734, "step": 13369 }, { "epoch": 1.7879112062048677, "grad_norm": 1.2145764827728271, "learning_rate": 7.408090935147999e-06, "loss": 0.6656, "step": 13370 }, { "epoch": 1.7880449317999465, "grad_norm": 1.312849998474121, "learning_rate": 7.406696553980768e-06, "loss": 0.7444, "step": 13371 }, { "epoch": 1.7881786573950254, "grad_norm": 1.425845742225647, "learning_rate": 7.405302226868465e-06, "loss": 0.783, "step": 13372 }, { "epoch": 1.7883123829901044, "grad_norm": 1.4015134572982788, "learning_rate": 7.403907953840151e-06, "loss": 0.749, "step": 13373 }, { "epoch": 1.788446108585183, "grad_norm": 1.3438396453857422, "learning_rate": 7.402513734924895e-06, "loss": 0.7202, "step": 13374 }, { "epoch": 1.7885798341802621, "grad_norm": 1.215644121170044, "learning_rate": 7.401119570151749e-06, "loss": 0.662, "step": 13375 }, { "epoch": 1.788713559775341, "grad_norm": 1.332722783088684, "learning_rate": 7.399725459549783e-06, "loss": 0.7028, "step": 13376 }, { "epoch": 1.7888472853704198, "grad_norm": 1.2073808908462524, "learning_rate": 7.398331403148053e-06, "loss": 0.6459, "step": 13377 }, { "epoch": 1.7889810109654989, "grad_norm": 1.1911091804504395, "learning_rate": 7.3969374009756104e-06, "loss": 0.6776, "step": 13378 }, { "epoch": 1.7891147365605777, "grad_norm": 1.2805213928222656, "learning_rate": 7.395543453061522e-06, "loss": 0.6743, "step": 13379 }, { "epoch": 1.7892484621556566, "grad_norm": 1.1723949909210205, "learning_rate": 7.394149559434838e-06, "loss": 0.6974, "step": 13380 }, { "epoch": 1.7893821877507357, "grad_norm": 1.191560983657837, "learning_rate": 7.392755720124609e-06, "loss": 0.6692, "step": 13381 }, { "epoch": 1.7895159133458143, "grad_norm": 1.1938276290893555, "learning_rate": 7.391361935159893e-06, "loss": 0.6605, "step": 13382 }, { "epoch": 1.7896496389408934, "grad_norm": 1.3397566080093384, "learning_rate": 7.38996820456974e-06, "loss": 0.6469, "step": 13383 }, { "epoch": 1.7897833645359722, "grad_norm": 1.1818126440048218, "learning_rate": 7.388574528383207e-06, "loss": 0.7018, "step": 13384 }, { "epoch": 1.789917090131051, "grad_norm": 1.1265572309494019, "learning_rate": 7.387180906629339e-06, "loss": 0.707, "step": 13385 }, { "epoch": 1.7900508157261301, "grad_norm": 1.3017100095748901, "learning_rate": 7.38578733933718e-06, "loss": 0.651, "step": 13386 }, { "epoch": 1.7901845413212087, "grad_norm": 1.2248269319534302, "learning_rate": 7.384393826535786e-06, "loss": 0.7286, "step": 13387 }, { "epoch": 1.7903182669162878, "grad_norm": 1.3767300844192505, "learning_rate": 7.383000368254199e-06, "loss": 0.7568, "step": 13388 }, { "epoch": 1.7904519925113667, "grad_norm": 1.2575101852416992, "learning_rate": 7.3816069645214615e-06, "loss": 0.6654, "step": 13389 }, { "epoch": 1.7905857181064455, "grad_norm": 1.3004111051559448, "learning_rate": 7.380213615366627e-06, "loss": 0.7282, "step": 13390 }, { "epoch": 1.7907194437015246, "grad_norm": 1.2869174480438232, "learning_rate": 7.378820320818728e-06, "loss": 0.6915, "step": 13391 }, { "epoch": 1.7908531692966032, "grad_norm": 1.2462431192398071, "learning_rate": 7.377427080906816e-06, "loss": 0.7109, "step": 13392 }, { "epoch": 1.7909868948916823, "grad_norm": 1.2483997344970703, "learning_rate": 7.376033895659927e-06, "loss": 0.6829, "step": 13393 }, { "epoch": 1.7911206204867611, "grad_norm": 1.302516222000122, "learning_rate": 7.374640765107095e-06, "loss": 0.7578, "step": 13394 }, { "epoch": 1.79125434608184, "grad_norm": 1.0462085008621216, "learning_rate": 7.373247689277367e-06, "loss": 0.6055, "step": 13395 }, { "epoch": 1.791388071676919, "grad_norm": 1.324966311454773, "learning_rate": 7.3718546681997795e-06, "loss": 0.745, "step": 13396 }, { "epoch": 1.791521797271998, "grad_norm": 1.226814866065979, "learning_rate": 7.370461701903362e-06, "loss": 0.6926, "step": 13397 }, { "epoch": 1.7916555228670767, "grad_norm": 1.191521406173706, "learning_rate": 7.369068790417159e-06, "loss": 0.7267, "step": 13398 }, { "epoch": 1.7917892484621558, "grad_norm": 1.2224823236465454, "learning_rate": 7.367675933770196e-06, "loss": 0.5498, "step": 13399 }, { "epoch": 1.7919229740572344, "grad_norm": 1.2102611064910889, "learning_rate": 7.366283131991512e-06, "loss": 0.7043, "step": 13400 }, { "epoch": 1.7920566996523135, "grad_norm": 1.2830649614334106, "learning_rate": 7.3648903851101335e-06, "loss": 0.6513, "step": 13401 }, { "epoch": 1.7921904252473924, "grad_norm": 1.3355966806411743, "learning_rate": 7.3634976931550925e-06, "loss": 0.7399, "step": 13402 }, { "epoch": 1.7923241508424712, "grad_norm": 1.1737391948699951, "learning_rate": 7.362105056155423e-06, "loss": 0.631, "step": 13403 }, { "epoch": 1.7924578764375503, "grad_norm": 1.1708190441131592, "learning_rate": 7.360712474140149e-06, "loss": 0.6969, "step": 13404 }, { "epoch": 1.792591602032629, "grad_norm": 1.3482171297073364, "learning_rate": 7.359319947138295e-06, "loss": 0.7234, "step": 13405 }, { "epoch": 1.792725327627708, "grad_norm": 1.2630066871643066, "learning_rate": 7.3579274751788935e-06, "loss": 0.6527, "step": 13406 }, { "epoch": 1.7928590532227868, "grad_norm": 1.2136695384979248, "learning_rate": 7.3565350582909614e-06, "loss": 0.6884, "step": 13407 }, { "epoch": 1.7929927788178657, "grad_norm": 1.3214963674545288, "learning_rate": 7.355142696503528e-06, "loss": 0.6347, "step": 13408 }, { "epoch": 1.7931265044129447, "grad_norm": 1.2179282903671265, "learning_rate": 7.353750389845616e-06, "loss": 0.6732, "step": 13409 }, { "epoch": 1.7932602300080234, "grad_norm": 1.2094076871871948, "learning_rate": 7.352358138346241e-06, "loss": 0.6123, "step": 13410 }, { "epoch": 1.7933939556031024, "grad_norm": 1.1670721769332886, "learning_rate": 7.350965942034433e-06, "loss": 0.6523, "step": 13411 }, { "epoch": 1.7935276811981813, "grad_norm": 1.2386744022369385, "learning_rate": 7.3495738009392026e-06, "loss": 0.6983, "step": 13412 }, { "epoch": 1.7936614067932601, "grad_norm": 1.2220262289047241, "learning_rate": 7.348181715089569e-06, "loss": 0.7049, "step": 13413 }, { "epoch": 1.7937951323883392, "grad_norm": 1.25224769115448, "learning_rate": 7.34678968451455e-06, "loss": 0.6315, "step": 13414 }, { "epoch": 1.793928857983418, "grad_norm": 1.1697125434875488, "learning_rate": 7.345397709243159e-06, "loss": 0.6284, "step": 13415 }, { "epoch": 1.794062583578497, "grad_norm": 1.559451699256897, "learning_rate": 7.344005789304416e-06, "loss": 0.8295, "step": 13416 }, { "epoch": 1.794196309173576, "grad_norm": 1.2177834510803223, "learning_rate": 7.3426139247273335e-06, "loss": 0.6051, "step": 13417 }, { "epoch": 1.7943300347686546, "grad_norm": 1.2296241521835327, "learning_rate": 7.3412221155409135e-06, "loss": 0.6447, "step": 13418 }, { "epoch": 1.7944637603637337, "grad_norm": 1.3108586072921753, "learning_rate": 7.33983036177418e-06, "loss": 0.733, "step": 13419 }, { "epoch": 1.7945974859588125, "grad_norm": 1.1300619840621948, "learning_rate": 7.338438663456136e-06, "loss": 0.652, "step": 13420 }, { "epoch": 1.7947312115538914, "grad_norm": 1.2096654176712036, "learning_rate": 7.337047020615789e-06, "loss": 0.6483, "step": 13421 }, { "epoch": 1.7948649371489704, "grad_norm": 1.2980238199234009, "learning_rate": 7.335655433282151e-06, "loss": 0.691, "step": 13422 }, { "epoch": 1.794998662744049, "grad_norm": 1.0964913368225098, "learning_rate": 7.334263901484223e-06, "loss": 0.6168, "step": 13423 }, { "epoch": 1.7951323883391281, "grad_norm": 1.2979035377502441, "learning_rate": 7.332872425251017e-06, "loss": 0.7011, "step": 13424 }, { "epoch": 1.795266113934207, "grad_norm": 1.2881304025650024, "learning_rate": 7.331481004611533e-06, "loss": 0.6275, "step": 13425 }, { "epoch": 1.7953998395292858, "grad_norm": 1.1788579225540161, "learning_rate": 7.330089639594771e-06, "loss": 0.645, "step": 13426 }, { "epoch": 1.795533565124365, "grad_norm": 1.2483694553375244, "learning_rate": 7.328698330229738e-06, "loss": 0.7253, "step": 13427 }, { "epoch": 1.7956672907194438, "grad_norm": 1.3123050928115845, "learning_rate": 7.327307076545428e-06, "loss": 0.707, "step": 13428 }, { "epoch": 1.7958010163145226, "grad_norm": 1.2769874334335327, "learning_rate": 7.325915878570851e-06, "loss": 0.7124, "step": 13429 }, { "epoch": 1.7959347419096015, "grad_norm": 1.2320728302001953, "learning_rate": 7.324524736334997e-06, "loss": 0.6965, "step": 13430 }, { "epoch": 1.7960684675046803, "grad_norm": 1.2586181163787842, "learning_rate": 7.32313364986686e-06, "loss": 0.7034, "step": 13431 }, { "epoch": 1.7962021930997594, "grad_norm": 1.351989984512329, "learning_rate": 7.321742619195446e-06, "loss": 0.746, "step": 13432 }, { "epoch": 1.7963359186948382, "grad_norm": 1.220894694328308, "learning_rate": 7.320351644349741e-06, "loss": 0.589, "step": 13433 }, { "epoch": 1.796469644289917, "grad_norm": 1.1424720287322998, "learning_rate": 7.318960725358742e-06, "loss": 0.6312, "step": 13434 }, { "epoch": 1.7966033698849961, "grad_norm": 1.2809216976165771, "learning_rate": 7.317569862251444e-06, "loss": 0.6917, "step": 13435 }, { "epoch": 1.7967370954800748, "grad_norm": 1.266371488571167, "learning_rate": 7.316179055056831e-06, "loss": 0.609, "step": 13436 }, { "epoch": 1.7968708210751538, "grad_norm": 1.2243026494979858, "learning_rate": 7.3147883038039015e-06, "loss": 0.6619, "step": 13437 }, { "epoch": 1.7970045466702327, "grad_norm": 1.3708266019821167, "learning_rate": 7.313397608521641e-06, "loss": 0.7951, "step": 13438 }, { "epoch": 1.7971382722653115, "grad_norm": 1.2156145572662354, "learning_rate": 7.312006969239032e-06, "loss": 0.7021, "step": 13439 }, { "epoch": 1.7972719978603906, "grad_norm": 1.3105140924453735, "learning_rate": 7.3106163859850675e-06, "loss": 0.6566, "step": 13440 }, { "epoch": 1.7974057234554692, "grad_norm": 1.4145431518554688, "learning_rate": 7.309225858788733e-06, "loss": 0.8054, "step": 13441 }, { "epoch": 1.7975394490505483, "grad_norm": 1.3209199905395508, "learning_rate": 7.307835387679007e-06, "loss": 0.7223, "step": 13442 }, { "epoch": 1.7976731746456271, "grad_norm": 1.338935136795044, "learning_rate": 7.3064449726848805e-06, "loss": 0.6453, "step": 13443 }, { "epoch": 1.797806900240706, "grad_norm": 1.0802689790725708, "learning_rate": 7.305054613835326e-06, "loss": 0.7094, "step": 13444 }, { "epoch": 1.797940625835785, "grad_norm": 1.1431933641433716, "learning_rate": 7.303664311159335e-06, "loss": 0.6033, "step": 13445 }, { "epoch": 1.798074351430864, "grad_norm": 1.3665932416915894, "learning_rate": 7.3022740646858785e-06, "loss": 0.667, "step": 13446 }, { "epoch": 1.7982080770259428, "grad_norm": 1.1526343822479248, "learning_rate": 7.300883874443935e-06, "loss": 0.615, "step": 13447 }, { "epoch": 1.7983418026210216, "grad_norm": 1.2276860475540161, "learning_rate": 7.299493740462489e-06, "loss": 0.6817, "step": 13448 }, { "epoch": 1.7984755282161005, "grad_norm": 1.3118462562561035, "learning_rate": 7.2981036627705116e-06, "loss": 0.6688, "step": 13449 }, { "epoch": 1.7986092538111795, "grad_norm": 1.2382930517196655, "learning_rate": 7.2967136413969745e-06, "loss": 0.6919, "step": 13450 }, { "epoch": 1.7987429794062584, "grad_norm": 1.3025058507919312, "learning_rate": 7.295323676370858e-06, "loss": 0.7169, "step": 13451 }, { "epoch": 1.7988767050013372, "grad_norm": 1.358379602432251, "learning_rate": 7.293933767721127e-06, "loss": 0.7169, "step": 13452 }, { "epoch": 1.7990104305964163, "grad_norm": 1.1618032455444336, "learning_rate": 7.292543915476761e-06, "loss": 0.6362, "step": 13453 }, { "epoch": 1.799144156191495, "grad_norm": 1.2676730155944824, "learning_rate": 7.291154119666727e-06, "loss": 0.6653, "step": 13454 }, { "epoch": 1.799277881786574, "grad_norm": 1.196387529373169, "learning_rate": 7.289764380319989e-06, "loss": 0.7012, "step": 13455 }, { "epoch": 1.7994116073816528, "grad_norm": 1.2537990808486938, "learning_rate": 7.288374697465524e-06, "loss": 0.6568, "step": 13456 }, { "epoch": 1.7995453329767317, "grad_norm": 1.2401738166809082, "learning_rate": 7.2869850711322934e-06, "loss": 0.6325, "step": 13457 }, { "epoch": 1.7996790585718108, "grad_norm": 1.2342698574066162, "learning_rate": 7.285595501349259e-06, "loss": 0.7344, "step": 13458 }, { "epoch": 1.7998127841668894, "grad_norm": 1.2173714637756348, "learning_rate": 7.28420598814539e-06, "loss": 0.6696, "step": 13459 }, { "epoch": 1.7999465097619685, "grad_norm": 1.1586909294128418, "learning_rate": 7.282816531549648e-06, "loss": 0.6715, "step": 13460 }, { "epoch": 1.8000802353570473, "grad_norm": 1.1464523077011108, "learning_rate": 7.281427131590999e-06, "loss": 0.6345, "step": 13461 }, { "epoch": 1.8002139609521262, "grad_norm": 1.2275243997573853, "learning_rate": 7.2800377882984e-06, "loss": 0.6193, "step": 13462 }, { "epoch": 1.8003476865472052, "grad_norm": 1.3102253675460815, "learning_rate": 7.278648501700804e-06, "loss": 0.7097, "step": 13463 }, { "epoch": 1.800481412142284, "grad_norm": 1.3097261190414429, "learning_rate": 7.277259271827184e-06, "loss": 0.7049, "step": 13464 }, { "epoch": 1.800615137737363, "grad_norm": 1.2153162956237793, "learning_rate": 7.275870098706485e-06, "loss": 0.661, "step": 13465 }, { "epoch": 1.800748863332442, "grad_norm": 1.4036004543304443, "learning_rate": 7.274480982367664e-06, "loss": 0.7015, "step": 13466 }, { "epoch": 1.8008825889275206, "grad_norm": 1.2054928541183472, "learning_rate": 7.273091922839686e-06, "loss": 0.6822, "step": 13467 }, { "epoch": 1.8010163145225997, "grad_norm": 1.2066320180892944, "learning_rate": 7.271702920151491e-06, "loss": 0.6771, "step": 13468 }, { "epoch": 1.8011500401176785, "grad_norm": 1.4527721405029297, "learning_rate": 7.270313974332042e-06, "loss": 0.7551, "step": 13469 }, { "epoch": 1.8012837657127574, "grad_norm": 1.0772895812988281, "learning_rate": 7.268925085410288e-06, "loss": 0.5768, "step": 13470 }, { "epoch": 1.8014174913078365, "grad_norm": 1.336227297782898, "learning_rate": 7.26753625341517e-06, "loss": 0.7337, "step": 13471 }, { "epoch": 1.801551216902915, "grad_norm": 1.1203022003173828, "learning_rate": 7.266147478375649e-06, "loss": 0.6589, "step": 13472 }, { "epoch": 1.8016849424979942, "grad_norm": 1.3411940336227417, "learning_rate": 7.2647587603206695e-06, "loss": 0.7092, "step": 13473 }, { "epoch": 1.801818668093073, "grad_norm": 1.2565068006515503, "learning_rate": 7.263370099279173e-06, "loss": 0.6827, "step": 13474 }, { "epoch": 1.8019523936881519, "grad_norm": 1.2375364303588867, "learning_rate": 7.261981495280111e-06, "loss": 0.6942, "step": 13475 }, { "epoch": 1.802086119283231, "grad_norm": 1.1999362707138062, "learning_rate": 7.260592948352418e-06, "loss": 0.7342, "step": 13476 }, { "epoch": 1.8022198448783096, "grad_norm": 1.3678416013717651, "learning_rate": 7.259204458525051e-06, "loss": 0.7925, "step": 13477 }, { "epoch": 1.8023535704733886, "grad_norm": 1.2612638473510742, "learning_rate": 7.257816025826942e-06, "loss": 0.6657, "step": 13478 }, { "epoch": 1.8024872960684675, "grad_norm": 1.313520073890686, "learning_rate": 7.256427650287032e-06, "loss": 0.7848, "step": 13479 }, { "epoch": 1.8026210216635463, "grad_norm": 1.2450754642486572, "learning_rate": 7.255039331934266e-06, "loss": 0.6151, "step": 13480 }, { "epoch": 1.8027547472586254, "grad_norm": 1.3240654468536377, "learning_rate": 7.253651070797578e-06, "loss": 0.7502, "step": 13481 }, { "epoch": 1.8028884728537042, "grad_norm": 1.3654778003692627, "learning_rate": 7.2522628669059015e-06, "loss": 0.6816, "step": 13482 }, { "epoch": 1.803022198448783, "grad_norm": 1.3863770961761475, "learning_rate": 7.250874720288181e-06, "loss": 0.7079, "step": 13483 }, { "epoch": 1.8031559240438622, "grad_norm": 1.170119285583496, "learning_rate": 7.2494866309733414e-06, "loss": 0.6547, "step": 13484 }, { "epoch": 1.8032896496389408, "grad_norm": 1.334919810295105, "learning_rate": 7.248098598990324e-06, "loss": 0.6977, "step": 13485 }, { "epoch": 1.8034233752340199, "grad_norm": 1.1556966304779053, "learning_rate": 7.24671062436806e-06, "loss": 0.6379, "step": 13486 }, { "epoch": 1.8035571008290987, "grad_norm": 1.2706815004348755, "learning_rate": 7.245322707135474e-06, "loss": 0.702, "step": 13487 }, { "epoch": 1.8036908264241776, "grad_norm": 1.179062843322754, "learning_rate": 7.243934847321504e-06, "loss": 0.7235, "step": 13488 }, { "epoch": 1.8038245520192566, "grad_norm": 1.3084492683410645, "learning_rate": 7.242547044955075e-06, "loss": 0.6888, "step": 13489 }, { "epoch": 1.8039582776143352, "grad_norm": 1.2412402629852295, "learning_rate": 7.24115930006511e-06, "loss": 0.6727, "step": 13490 }, { "epoch": 1.8040920032094143, "grad_norm": 1.1943401098251343, "learning_rate": 7.2397716126805415e-06, "loss": 0.6658, "step": 13491 }, { "epoch": 1.8042257288044932, "grad_norm": 1.4427387714385986, "learning_rate": 7.238383982830292e-06, "loss": 0.7774, "step": 13492 }, { "epoch": 1.804359454399572, "grad_norm": 1.336787462234497, "learning_rate": 7.2369964105432884e-06, "loss": 0.6224, "step": 13493 }, { "epoch": 1.804493179994651, "grad_norm": 1.117634654045105, "learning_rate": 7.235608895848451e-06, "loss": 0.6012, "step": 13494 }, { "epoch": 1.8046269055897297, "grad_norm": 1.2029128074645996, "learning_rate": 7.2342214387746965e-06, "loss": 0.772, "step": 13495 }, { "epoch": 1.8047606311848088, "grad_norm": 1.2268115282058716, "learning_rate": 7.232834039350954e-06, "loss": 0.6984, "step": 13496 }, { "epoch": 1.8048943567798876, "grad_norm": 1.236081600189209, "learning_rate": 7.231446697606136e-06, "loss": 0.6608, "step": 13497 }, { "epoch": 1.8050280823749665, "grad_norm": 1.248104453086853, "learning_rate": 7.23005941356916e-06, "loss": 0.6695, "step": 13498 }, { "epoch": 1.8051618079700456, "grad_norm": 1.1277081966400146, "learning_rate": 7.22867218726895e-06, "loss": 0.7062, "step": 13499 }, { "epoch": 1.8052955335651244, "grad_norm": 1.2174861431121826, "learning_rate": 7.227285018734411e-06, "loss": 0.679, "step": 13500 }, { "epoch": 1.8054292591602032, "grad_norm": 1.228413701057434, "learning_rate": 7.225897907994468e-06, "loss": 0.6606, "step": 13501 }, { "epoch": 1.8055629847552823, "grad_norm": 1.2130722999572754, "learning_rate": 7.224510855078027e-06, "loss": 0.602, "step": 13502 }, { "epoch": 1.805696710350361, "grad_norm": 1.2508689165115356, "learning_rate": 7.223123860013998e-06, "loss": 0.6946, "step": 13503 }, { "epoch": 1.80583043594544, "grad_norm": 1.2220182418823242, "learning_rate": 7.221736922831297e-06, "loss": 0.6785, "step": 13504 }, { "epoch": 1.8059641615405189, "grad_norm": 1.1811316013336182, "learning_rate": 7.220350043558835e-06, "loss": 0.6681, "step": 13505 }, { "epoch": 1.8060978871355977, "grad_norm": 1.4225716590881348, "learning_rate": 7.21896322222551e-06, "loss": 0.7318, "step": 13506 }, { "epoch": 1.8062316127306768, "grad_norm": 1.1350493431091309, "learning_rate": 7.21757645886024e-06, "loss": 0.68, "step": 13507 }, { "epoch": 1.8063653383257554, "grad_norm": 1.212494969367981, "learning_rate": 7.216189753491924e-06, "loss": 0.6191, "step": 13508 }, { "epoch": 1.8064990639208345, "grad_norm": 1.2296123504638672, "learning_rate": 7.214803106149471e-06, "loss": 0.6921, "step": 13509 }, { "epoch": 1.8066327895159133, "grad_norm": 1.275501012802124, "learning_rate": 7.213416516861779e-06, "loss": 0.6705, "step": 13510 }, { "epoch": 1.8067665151109922, "grad_norm": 1.1218996047973633, "learning_rate": 7.212029985657754e-06, "loss": 0.6798, "step": 13511 }, { "epoch": 1.8069002407060712, "grad_norm": 1.1189286708831787, "learning_rate": 7.2106435125663e-06, "loss": 0.6776, "step": 13512 }, { "epoch": 1.8070339663011499, "grad_norm": 1.288316011428833, "learning_rate": 7.2092570976163065e-06, "loss": 0.6893, "step": 13513 }, { "epoch": 1.807167691896229, "grad_norm": 1.2410317659378052, "learning_rate": 7.207870740836684e-06, "loss": 0.7023, "step": 13514 }, { "epoch": 1.8073014174913078, "grad_norm": 1.1779961585998535, "learning_rate": 7.206484442256324e-06, "loss": 0.6149, "step": 13515 }, { "epoch": 1.8074351430863866, "grad_norm": 1.4498060941696167, "learning_rate": 7.205098201904118e-06, "loss": 0.8172, "step": 13516 }, { "epoch": 1.8075688686814657, "grad_norm": 1.1095607280731201, "learning_rate": 7.203712019808968e-06, "loss": 0.6235, "step": 13517 }, { "epoch": 1.8077025942765446, "grad_norm": 1.1724600791931152, "learning_rate": 7.2023258959997675e-06, "loss": 0.6599, "step": 13518 }, { "epoch": 1.8078363198716234, "grad_norm": 1.247000813484192, "learning_rate": 7.200939830505402e-06, "loss": 0.6627, "step": 13519 }, { "epoch": 1.8079700454667025, "grad_norm": 1.4289426803588867, "learning_rate": 7.1995538233547725e-06, "loss": 0.703, "step": 13520 }, { "epoch": 1.808103771061781, "grad_norm": 1.306842565536499, "learning_rate": 7.198167874576758e-06, "loss": 0.7625, "step": 13521 }, { "epoch": 1.8082374966568602, "grad_norm": 1.2688566446304321, "learning_rate": 7.196781984200258e-06, "loss": 0.7134, "step": 13522 }, { "epoch": 1.808371222251939, "grad_norm": 1.2211495637893677, "learning_rate": 7.195396152254155e-06, "loss": 0.6909, "step": 13523 }, { "epoch": 1.8085049478470179, "grad_norm": 1.4049979448318481, "learning_rate": 7.194010378767333e-06, "loss": 0.7442, "step": 13524 }, { "epoch": 1.808638673442097, "grad_norm": 1.256548285484314, "learning_rate": 7.1926246637686805e-06, "loss": 0.6858, "step": 13525 }, { "epoch": 1.8087723990371756, "grad_norm": 1.423722267150879, "learning_rate": 7.191239007287082e-06, "loss": 0.7627, "step": 13526 }, { "epoch": 1.8089061246322546, "grad_norm": 1.4026179313659668, "learning_rate": 7.189853409351415e-06, "loss": 0.7943, "step": 13527 }, { "epoch": 1.8090398502273335, "grad_norm": 1.2950092554092407, "learning_rate": 7.188467869990569e-06, "loss": 0.6433, "step": 13528 }, { "epoch": 1.8091735758224123, "grad_norm": 1.1776596307754517, "learning_rate": 7.187082389233415e-06, "loss": 0.6758, "step": 13529 }, { "epoch": 1.8093073014174914, "grad_norm": 1.200979232788086, "learning_rate": 7.18569696710884e-06, "loss": 0.583, "step": 13530 }, { "epoch": 1.8094410270125703, "grad_norm": 1.2164534330368042, "learning_rate": 7.184311603645719e-06, "loss": 0.6899, "step": 13531 }, { "epoch": 1.809574752607649, "grad_norm": 1.2490911483764648, "learning_rate": 7.1829262988729265e-06, "loss": 0.7115, "step": 13532 }, { "epoch": 1.809708478202728, "grad_norm": 1.1914219856262207, "learning_rate": 7.181541052819343e-06, "loss": 0.675, "step": 13533 }, { "epoch": 1.8098422037978068, "grad_norm": 1.304665207862854, "learning_rate": 7.18015586551384e-06, "loss": 0.7068, "step": 13534 }, { "epoch": 1.8099759293928859, "grad_norm": 1.3632184267044067, "learning_rate": 7.1787707369852835e-06, "loss": 0.7533, "step": 13535 }, { "epoch": 1.8101096549879647, "grad_norm": 1.2004295587539673, "learning_rate": 7.1773856672625555e-06, "loss": 0.6986, "step": 13536 }, { "epoch": 1.8102433805830436, "grad_norm": 1.3901088237762451, "learning_rate": 7.17600065637452e-06, "loss": 0.6908, "step": 13537 }, { "epoch": 1.8103771061781226, "grad_norm": 1.3509643077850342, "learning_rate": 7.17461570435005e-06, "loss": 0.7661, "step": 13538 }, { "epoch": 1.8105108317732013, "grad_norm": 1.2957826852798462, "learning_rate": 7.173230811218015e-06, "loss": 0.6553, "step": 13539 }, { "epoch": 1.8106445573682803, "grad_norm": 1.2563608884811401, "learning_rate": 7.1718459770072725e-06, "loss": 0.742, "step": 13540 }, { "epoch": 1.8107782829633592, "grad_norm": 1.318854570388794, "learning_rate": 7.1704612017467014e-06, "loss": 0.7392, "step": 13541 }, { "epoch": 1.810912008558438, "grad_norm": 1.1652617454528809, "learning_rate": 7.169076485465154e-06, "loss": 0.644, "step": 13542 }, { "epoch": 1.811045734153517, "grad_norm": 1.1651017665863037, "learning_rate": 7.167691828191498e-06, "loss": 0.6091, "step": 13543 }, { "epoch": 1.8111794597485957, "grad_norm": 1.2813012599945068, "learning_rate": 7.166307229954599e-06, "loss": 0.627, "step": 13544 }, { "epoch": 1.8113131853436748, "grad_norm": 1.1741511821746826, "learning_rate": 7.16492269078331e-06, "loss": 0.7271, "step": 13545 }, { "epoch": 1.8114469109387537, "grad_norm": 1.3188551664352417, "learning_rate": 7.1635382107065e-06, "loss": 0.6621, "step": 13546 }, { "epoch": 1.8115806365338325, "grad_norm": 1.202091097831726, "learning_rate": 7.1621537897530205e-06, "loss": 0.6944, "step": 13547 }, { "epoch": 1.8117143621289116, "grad_norm": 1.2145463228225708, "learning_rate": 7.160769427951726e-06, "loss": 0.6318, "step": 13548 }, { "epoch": 1.8118480877239904, "grad_norm": 1.2254374027252197, "learning_rate": 7.159385125331478e-06, "loss": 0.7038, "step": 13549 }, { "epoch": 1.8119818133190693, "grad_norm": 1.1796205043792725, "learning_rate": 7.158000881921131e-06, "loss": 0.6637, "step": 13550 }, { "epoch": 1.8121155389141481, "grad_norm": 1.4149168729782104, "learning_rate": 7.156616697749532e-06, "loss": 0.7709, "step": 13551 }, { "epoch": 1.812249264509227, "grad_norm": 1.178280234336853, "learning_rate": 7.155232572845541e-06, "loss": 0.6207, "step": 13552 }, { "epoch": 1.812382990104306, "grad_norm": 1.2755999565124512, "learning_rate": 7.153848507238002e-06, "loss": 0.7183, "step": 13553 }, { "epoch": 1.8125167156993849, "grad_norm": 1.355187177658081, "learning_rate": 7.152464500955769e-06, "loss": 0.6586, "step": 13554 }, { "epoch": 1.8126504412944637, "grad_norm": 1.2713872194290161, "learning_rate": 7.151080554027688e-06, "loss": 0.7022, "step": 13555 }, { "epoch": 1.8127841668895428, "grad_norm": 1.1352962255477905, "learning_rate": 7.149696666482607e-06, "loss": 0.6592, "step": 13556 }, { "epoch": 1.8129178924846214, "grad_norm": 1.3762716054916382, "learning_rate": 7.1483128383493715e-06, "loss": 0.688, "step": 13557 }, { "epoch": 1.8130516180797005, "grad_norm": 1.276595950126648, "learning_rate": 7.146929069656828e-06, "loss": 0.668, "step": 13558 }, { "epoch": 1.8131853436747793, "grad_norm": 1.1672265529632568, "learning_rate": 7.1455453604338145e-06, "loss": 0.6438, "step": 13559 }, { "epoch": 1.8133190692698582, "grad_norm": 1.2830318212509155, "learning_rate": 7.144161710709179e-06, "loss": 0.6345, "step": 13560 }, { "epoch": 1.8134527948649373, "grad_norm": 1.1646220684051514, "learning_rate": 7.142778120511758e-06, "loss": 0.667, "step": 13561 }, { "epoch": 1.813586520460016, "grad_norm": 1.1093212366104126, "learning_rate": 7.141394589870393e-06, "loss": 0.6573, "step": 13562 }, { "epoch": 1.813720246055095, "grad_norm": 1.3103210926055908, "learning_rate": 7.140011118813925e-06, "loss": 0.7157, "step": 13563 }, { "epoch": 1.8138539716501738, "grad_norm": 1.2983509302139282, "learning_rate": 7.1386277073711855e-06, "loss": 0.7328, "step": 13564 }, { "epoch": 1.8139876972452527, "grad_norm": 1.2334569692611694, "learning_rate": 7.1372443555710155e-06, "loss": 0.6582, "step": 13565 }, { "epoch": 1.8141214228403317, "grad_norm": 1.2476240396499634, "learning_rate": 7.13586106344225e-06, "loss": 0.6092, "step": 13566 }, { "epoch": 1.8142551484354106, "grad_norm": 1.262091040611267, "learning_rate": 7.134477831013714e-06, "loss": 0.6851, "step": 13567 }, { "epoch": 1.8143888740304894, "grad_norm": 1.253456950187683, "learning_rate": 7.133094658314248e-06, "loss": 0.6426, "step": 13568 }, { "epoch": 1.8145225996255685, "grad_norm": 1.1792532205581665, "learning_rate": 7.1317115453726815e-06, "loss": 0.6238, "step": 13569 }, { "epoch": 1.8146563252206471, "grad_norm": 1.2162641286849976, "learning_rate": 7.130328492217841e-06, "loss": 0.6266, "step": 13570 }, { "epoch": 1.8147900508157262, "grad_norm": 1.1827529668807983, "learning_rate": 7.128945498878562e-06, "loss": 0.6895, "step": 13571 }, { "epoch": 1.814923776410805, "grad_norm": 1.2661943435668945, "learning_rate": 7.127562565383661e-06, "loss": 0.6891, "step": 13572 }, { "epoch": 1.815057502005884, "grad_norm": 1.1818904876708984, "learning_rate": 7.1261796917619745e-06, "loss": 0.6605, "step": 13573 }, { "epoch": 1.815191227600963, "grad_norm": 1.2967522144317627, "learning_rate": 7.124796878042319e-06, "loss": 0.7192, "step": 13574 }, { "epoch": 1.8153249531960416, "grad_norm": 1.227283239364624, "learning_rate": 7.123414124253522e-06, "loss": 0.66, "step": 13575 }, { "epoch": 1.8154586787911207, "grad_norm": 1.238958716392517, "learning_rate": 7.122031430424406e-06, "loss": 0.6852, "step": 13576 }, { "epoch": 1.8155924043861995, "grad_norm": 1.2863264083862305, "learning_rate": 7.120648796583789e-06, "loss": 0.6643, "step": 13577 }, { "epoch": 1.8157261299812784, "grad_norm": 1.280329942703247, "learning_rate": 7.119266222760494e-06, "loss": 0.7014, "step": 13578 }, { "epoch": 1.8158598555763574, "grad_norm": 1.2752056121826172, "learning_rate": 7.1178837089833416e-06, "loss": 0.6836, "step": 13579 }, { "epoch": 1.815993581171436, "grad_norm": 1.1416422128677368, "learning_rate": 7.116501255281138e-06, "loss": 0.5878, "step": 13580 }, { "epoch": 1.8161273067665151, "grad_norm": 1.1690255403518677, "learning_rate": 7.115118861682711e-06, "loss": 0.67, "step": 13581 }, { "epoch": 1.816261032361594, "grad_norm": 1.2313569784164429, "learning_rate": 7.113736528216872e-06, "loss": 0.6407, "step": 13582 }, { "epoch": 1.8163947579566728, "grad_norm": 1.193312406539917, "learning_rate": 7.112354254912429e-06, "loss": 0.6652, "step": 13583 }, { "epoch": 1.816528483551752, "grad_norm": 1.2921831607818604, "learning_rate": 7.110972041798203e-06, "loss": 0.7383, "step": 13584 }, { "epoch": 1.8166622091468307, "grad_norm": 1.2011044025421143, "learning_rate": 7.109589888902995e-06, "loss": 0.5889, "step": 13585 }, { "epoch": 1.8167959347419096, "grad_norm": 1.3369941711425781, "learning_rate": 7.108207796255625e-06, "loss": 0.6647, "step": 13586 }, { "epoch": 1.8169296603369887, "grad_norm": 1.1748533248901367, "learning_rate": 7.106825763884895e-06, "loss": 0.6146, "step": 13587 }, { "epoch": 1.8170633859320673, "grad_norm": 1.2341011762619019, "learning_rate": 7.105443791819612e-06, "loss": 0.6544, "step": 13588 }, { "epoch": 1.8171971115271464, "grad_norm": 1.3026204109191895, "learning_rate": 7.1040618800885845e-06, "loss": 0.6576, "step": 13589 }, { "epoch": 1.8173308371222252, "grad_norm": 1.194996953010559, "learning_rate": 7.102680028720616e-06, "loss": 0.7159, "step": 13590 }, { "epoch": 1.817464562717304, "grad_norm": 1.5943880081176758, "learning_rate": 7.101298237744508e-06, "loss": 0.6831, "step": 13591 }, { "epoch": 1.8175982883123831, "grad_norm": 1.2619918584823608, "learning_rate": 7.099916507189067e-06, "loss": 0.7094, "step": 13592 }, { "epoch": 1.8177320139074618, "grad_norm": 1.3485101461410522, "learning_rate": 7.098534837083089e-06, "loss": 0.74, "step": 13593 }, { "epoch": 1.8178657395025408, "grad_norm": 1.346592664718628, "learning_rate": 7.097153227455379e-06, "loss": 0.691, "step": 13594 }, { "epoch": 1.8179994650976197, "grad_norm": 1.3063503503799438, "learning_rate": 7.0957716783347295e-06, "loss": 0.6682, "step": 13595 }, { "epoch": 1.8181331906926985, "grad_norm": 1.3465898036956787, "learning_rate": 7.09439018974994e-06, "loss": 0.7985, "step": 13596 }, { "epoch": 1.8182669162877776, "grad_norm": 1.1896618604660034, "learning_rate": 7.093008761729809e-06, "loss": 0.6451, "step": 13597 }, { "epoch": 1.8184006418828562, "grad_norm": 1.252610683441162, "learning_rate": 7.091627394303125e-06, "loss": 0.7402, "step": 13598 }, { "epoch": 1.8185343674779353, "grad_norm": 1.3104808330535889, "learning_rate": 7.09024608749869e-06, "loss": 0.6923, "step": 13599 }, { "epoch": 1.8186680930730141, "grad_norm": 1.2454110383987427, "learning_rate": 7.088864841345289e-06, "loss": 0.6506, "step": 13600 }, { "epoch": 1.818801818668093, "grad_norm": 1.5558629035949707, "learning_rate": 7.087483655871713e-06, "loss": 0.7542, "step": 13601 }, { "epoch": 1.818935544263172, "grad_norm": 1.265740990638733, "learning_rate": 7.086102531106755e-06, "loss": 0.6026, "step": 13602 }, { "epoch": 1.819069269858251, "grad_norm": 1.2846379280090332, "learning_rate": 7.084721467079202e-06, "loss": 0.7032, "step": 13603 }, { "epoch": 1.8192029954533298, "grad_norm": 1.2625577449798584, "learning_rate": 7.083340463817837e-06, "loss": 0.6803, "step": 13604 }, { "epoch": 1.8193367210484088, "grad_norm": 1.3744566440582275, "learning_rate": 7.081959521351454e-06, "loss": 0.6589, "step": 13605 }, { "epoch": 1.8194704466434874, "grad_norm": 1.169491171836853, "learning_rate": 7.080578639708827e-06, "loss": 0.716, "step": 13606 }, { "epoch": 1.8196041722385665, "grad_norm": 1.2505451440811157, "learning_rate": 7.079197818918749e-06, "loss": 0.6243, "step": 13607 }, { "epoch": 1.8197378978336454, "grad_norm": 1.3637359142303467, "learning_rate": 7.077817059009997e-06, "loss": 0.6775, "step": 13608 }, { "epoch": 1.8198716234287242, "grad_norm": 1.238973617553711, "learning_rate": 7.076436360011348e-06, "loss": 0.622, "step": 13609 }, { "epoch": 1.8200053490238033, "grad_norm": 1.1828560829162598, "learning_rate": 7.0750557219515916e-06, "loss": 0.7482, "step": 13610 }, { "epoch": 1.820139074618882, "grad_norm": 1.1666189432144165, "learning_rate": 7.073675144859499e-06, "loss": 0.6412, "step": 13611 }, { "epoch": 1.820272800213961, "grad_norm": 1.312224268913269, "learning_rate": 7.072294628763843e-06, "loss": 0.7112, "step": 13612 }, { "epoch": 1.8204065258090398, "grad_norm": 1.1342087984085083, "learning_rate": 7.0709141736934066e-06, "loss": 0.7414, "step": 13613 }, { "epoch": 1.8205402514041187, "grad_norm": 1.2651315927505493, "learning_rate": 7.069533779676961e-06, "loss": 0.663, "step": 13614 }, { "epoch": 1.8206739769991978, "grad_norm": 1.3959838151931763, "learning_rate": 7.06815344674328e-06, "loss": 0.771, "step": 13615 }, { "epoch": 1.8208077025942764, "grad_norm": 1.154520034790039, "learning_rate": 7.0667731749211375e-06, "loss": 0.6361, "step": 13616 }, { "epoch": 1.8209414281893554, "grad_norm": 1.1782459020614624, "learning_rate": 7.0653929642392974e-06, "loss": 0.6224, "step": 13617 }, { "epoch": 1.8210751537844343, "grad_norm": 1.384628176689148, "learning_rate": 7.0640128147265355e-06, "loss": 0.7108, "step": 13618 }, { "epoch": 1.8212088793795131, "grad_norm": 1.3161417245864868, "learning_rate": 7.062632726411616e-06, "loss": 0.5926, "step": 13619 }, { "epoch": 1.8213426049745922, "grad_norm": 1.1590203046798706, "learning_rate": 7.061252699323307e-06, "loss": 0.73, "step": 13620 }, { "epoch": 1.821476330569671, "grad_norm": 1.2304840087890625, "learning_rate": 7.059872733490372e-06, "loss": 0.6546, "step": 13621 }, { "epoch": 1.82161005616475, "grad_norm": 1.2083485126495361, "learning_rate": 7.0584928289415755e-06, "loss": 0.7346, "step": 13622 }, { "epoch": 1.821743781759829, "grad_norm": 1.286428689956665, "learning_rate": 7.057112985705685e-06, "loss": 0.6992, "step": 13623 }, { "epoch": 1.8218775073549076, "grad_norm": 1.390513300895691, "learning_rate": 7.055733203811459e-06, "loss": 0.7752, "step": 13624 }, { "epoch": 1.8220112329499867, "grad_norm": 1.2702159881591797, "learning_rate": 7.054353483287651e-06, "loss": 0.6943, "step": 13625 }, { "epoch": 1.8221449585450655, "grad_norm": 1.3454058170318604, "learning_rate": 7.052973824163032e-06, "loss": 0.7507, "step": 13626 }, { "epoch": 1.8222786841401444, "grad_norm": 1.326231837272644, "learning_rate": 7.051594226466351e-06, "loss": 0.6592, "step": 13627 }, { "epoch": 1.8224124097352234, "grad_norm": 1.196489691734314, "learning_rate": 7.050214690226365e-06, "loss": 0.6161, "step": 13628 }, { "epoch": 1.822546135330302, "grad_norm": 1.1779720783233643, "learning_rate": 7.048835215471834e-06, "loss": 0.6189, "step": 13629 }, { "epoch": 1.8226798609253811, "grad_norm": 1.2498775720596313, "learning_rate": 7.047455802231506e-06, "loss": 0.5919, "step": 13630 }, { "epoch": 1.82281358652046, "grad_norm": 1.4105192422866821, "learning_rate": 7.046076450534142e-06, "loss": 0.7041, "step": 13631 }, { "epoch": 1.8229473121155388, "grad_norm": 1.304527997970581, "learning_rate": 7.0446971604084845e-06, "loss": 0.6954, "step": 13632 }, { "epoch": 1.823081037710618, "grad_norm": 1.259665608406067, "learning_rate": 7.043317931883287e-06, "loss": 0.724, "step": 13633 }, { "epoch": 1.8232147633056968, "grad_norm": 1.316893458366394, "learning_rate": 7.041938764987297e-06, "loss": 0.6838, "step": 13634 }, { "epoch": 1.8233484889007756, "grad_norm": 1.3688302040100098, "learning_rate": 7.040559659749265e-06, "loss": 0.6244, "step": 13635 }, { "epoch": 1.8234822144958545, "grad_norm": 1.3386318683624268, "learning_rate": 7.0391806161979316e-06, "loss": 0.7227, "step": 13636 }, { "epoch": 1.8236159400909333, "grad_norm": 1.4612607955932617, "learning_rate": 7.037801634362049e-06, "loss": 0.7475, "step": 13637 }, { "epoch": 1.8237496656860124, "grad_norm": 1.1339207887649536, "learning_rate": 7.036422714270353e-06, "loss": 0.5869, "step": 13638 }, { "epoch": 1.8238833912810912, "grad_norm": 1.2771040201187134, "learning_rate": 7.035043855951593e-06, "loss": 0.6836, "step": 13639 }, { "epoch": 1.82401711687617, "grad_norm": 1.328466773033142, "learning_rate": 7.0336650594345055e-06, "loss": 0.7341, "step": 13640 }, { "epoch": 1.8241508424712491, "grad_norm": 1.184380292892456, "learning_rate": 7.032286324747829e-06, "loss": 0.6625, "step": 13641 }, { "epoch": 1.8242845680663278, "grad_norm": 1.3108444213867188, "learning_rate": 7.030907651920309e-06, "loss": 0.7644, "step": 13642 }, { "epoch": 1.8244182936614068, "grad_norm": 1.215500831604004, "learning_rate": 7.0295290409806775e-06, "loss": 0.6646, "step": 13643 }, { "epoch": 1.8245520192564857, "grad_norm": 1.2626285552978516, "learning_rate": 7.028150491957666e-06, "loss": 0.7242, "step": 13644 }, { "epoch": 1.8246857448515645, "grad_norm": 1.4614107608795166, "learning_rate": 7.026772004880018e-06, "loss": 0.7786, "step": 13645 }, { "epoch": 1.8248194704466436, "grad_norm": 1.2773489952087402, "learning_rate": 7.025393579776458e-06, "loss": 0.707, "step": 13646 }, { "epoch": 1.8249531960417222, "grad_norm": 1.3084397315979004, "learning_rate": 7.024015216675726e-06, "loss": 0.7145, "step": 13647 }, { "epoch": 1.8250869216368013, "grad_norm": 1.357030987739563, "learning_rate": 7.022636915606549e-06, "loss": 0.6807, "step": 13648 }, { "epoch": 1.8252206472318802, "grad_norm": 1.171615719795227, "learning_rate": 7.021258676597654e-06, "loss": 0.6356, "step": 13649 }, { "epoch": 1.825354372826959, "grad_norm": 1.121340036392212, "learning_rate": 7.0198804996777754e-06, "loss": 0.6038, "step": 13650 }, { "epoch": 1.825488098422038, "grad_norm": 1.2261704206466675, "learning_rate": 7.018502384875634e-06, "loss": 0.6682, "step": 13651 }, { "epoch": 1.825621824017117, "grad_norm": 1.2922707796096802, "learning_rate": 7.017124332219958e-06, "loss": 0.7455, "step": 13652 }, { "epoch": 1.8257555496121958, "grad_norm": 1.2519872188568115, "learning_rate": 7.015746341739469e-06, "loss": 0.68, "step": 13653 }, { "epoch": 1.8258892752072746, "grad_norm": 1.2441667318344116, "learning_rate": 7.014368413462891e-06, "loss": 0.6452, "step": 13654 }, { "epoch": 1.8260230008023535, "grad_norm": 1.2047346830368042, "learning_rate": 7.012990547418952e-06, "loss": 0.6525, "step": 13655 }, { "epoch": 1.8261567263974325, "grad_norm": 1.4370282888412476, "learning_rate": 7.011612743636365e-06, "loss": 0.7295, "step": 13656 }, { "epoch": 1.8262904519925114, "grad_norm": 1.1209518909454346, "learning_rate": 7.010235002143847e-06, "loss": 0.6072, "step": 13657 }, { "epoch": 1.8264241775875902, "grad_norm": 1.2879307270050049, "learning_rate": 7.008857322970124e-06, "loss": 0.754, "step": 13658 }, { "epoch": 1.8265579031826693, "grad_norm": 1.294012188911438, "learning_rate": 7.007479706143905e-06, "loss": 0.6941, "step": 13659 }, { "epoch": 1.826691628777748, "grad_norm": 1.1819275617599487, "learning_rate": 7.006102151693907e-06, "loss": 0.6739, "step": 13660 }, { "epoch": 1.826825354372827, "grad_norm": 1.333618402481079, "learning_rate": 7.004724659648848e-06, "loss": 0.7227, "step": 13661 }, { "epoch": 1.8269590799679059, "grad_norm": 1.2918713092803955, "learning_rate": 7.003347230037434e-06, "loss": 0.6956, "step": 13662 }, { "epoch": 1.8270928055629847, "grad_norm": 1.1978696584701538, "learning_rate": 7.001969862888383e-06, "loss": 0.6364, "step": 13663 }, { "epoch": 1.8272265311580638, "grad_norm": 1.2334239482879639, "learning_rate": 7.000592558230399e-06, "loss": 0.7472, "step": 13664 }, { "epoch": 1.8273602567531424, "grad_norm": 1.3283867835998535, "learning_rate": 6.9992153160921935e-06, "loss": 0.7504, "step": 13665 }, { "epoch": 1.8274939823482215, "grad_norm": 1.2248510122299194, "learning_rate": 6.997838136502474e-06, "loss": 0.6145, "step": 13666 }, { "epoch": 1.8276277079433003, "grad_norm": 1.0830023288726807, "learning_rate": 6.9964610194899476e-06, "loss": 0.6485, "step": 13667 }, { "epoch": 1.8277614335383792, "grad_norm": 1.2234848737716675, "learning_rate": 6.995083965083313e-06, "loss": 0.7005, "step": 13668 }, { "epoch": 1.8278951591334582, "grad_norm": 1.2511358261108398, "learning_rate": 6.993706973311281e-06, "loss": 0.6911, "step": 13669 }, { "epoch": 1.828028884728537, "grad_norm": 1.1673377752304077, "learning_rate": 6.992330044202547e-06, "loss": 0.6189, "step": 13670 }, { "epoch": 1.828162610323616, "grad_norm": 1.1722458600997925, "learning_rate": 6.990953177785818e-06, "loss": 0.6787, "step": 13671 }, { "epoch": 1.828296335918695, "grad_norm": 1.2142329216003418, "learning_rate": 6.989576374089791e-06, "loss": 0.6828, "step": 13672 }, { "epoch": 1.8284300615137736, "grad_norm": 1.3297072649002075, "learning_rate": 6.98819963314316e-06, "loss": 0.7227, "step": 13673 }, { "epoch": 1.8285637871088527, "grad_norm": 1.270232081413269, "learning_rate": 6.986822954974631e-06, "loss": 0.6723, "step": 13674 }, { "epoch": 1.8286975127039315, "grad_norm": 1.1608420610427856, "learning_rate": 6.985446339612893e-06, "loss": 0.6511, "step": 13675 }, { "epoch": 1.8288312382990104, "grad_norm": 1.1201629638671875, "learning_rate": 6.984069787086638e-06, "loss": 0.6425, "step": 13676 }, { "epoch": 1.8289649638940895, "grad_norm": 1.262510061264038, "learning_rate": 6.982693297424567e-06, "loss": 0.7085, "step": 13677 }, { "epoch": 1.829098689489168, "grad_norm": 1.1735036373138428, "learning_rate": 6.981316870655361e-06, "loss": 0.5697, "step": 13678 }, { "epoch": 1.8292324150842472, "grad_norm": 1.330461025238037, "learning_rate": 6.97994050680772e-06, "loss": 0.7104, "step": 13679 }, { "epoch": 1.829366140679326, "grad_norm": 1.1854509115219116, "learning_rate": 6.978564205910331e-06, "loss": 0.6836, "step": 13680 }, { "epoch": 1.8294998662744049, "grad_norm": 1.1057363748550415, "learning_rate": 6.9771879679918755e-06, "loss": 0.6786, "step": 13681 }, { "epoch": 1.829633591869484, "grad_norm": 1.2634400129318237, "learning_rate": 6.9758117930810484e-06, "loss": 0.7121, "step": 13682 }, { "epoch": 1.8297673174645626, "grad_norm": 1.3212573528289795, "learning_rate": 6.974435681206526e-06, "loss": 0.7735, "step": 13683 }, { "epoch": 1.8299010430596416, "grad_norm": 1.1935824155807495, "learning_rate": 6.973059632397002e-06, "loss": 0.6034, "step": 13684 }, { "epoch": 1.8300347686547205, "grad_norm": 1.194448471069336, "learning_rate": 6.971683646681151e-06, "loss": 0.6625, "step": 13685 }, { "epoch": 1.8301684942497993, "grad_norm": 1.137538194656372, "learning_rate": 6.970307724087655e-06, "loss": 0.6847, "step": 13686 }, { "epoch": 1.8303022198448784, "grad_norm": 1.2208797931671143, "learning_rate": 6.968931864645198e-06, "loss": 0.651, "step": 13687 }, { "epoch": 1.8304359454399572, "grad_norm": 1.3061879873275757, "learning_rate": 6.967556068382457e-06, "loss": 0.6512, "step": 13688 }, { "epoch": 1.830569671035036, "grad_norm": 1.2173490524291992, "learning_rate": 6.966180335328103e-06, "loss": 0.5641, "step": 13689 }, { "epoch": 1.8307033966301152, "grad_norm": 1.3485435247421265, "learning_rate": 6.964804665510823e-06, "loss": 0.6403, "step": 13690 }, { "epoch": 1.8308371222251938, "grad_norm": 1.2749197483062744, "learning_rate": 6.963429058959279e-06, "loss": 0.7385, "step": 13691 }, { "epoch": 1.8309708478202729, "grad_norm": 1.3115544319152832, "learning_rate": 6.962053515702154e-06, "loss": 0.6513, "step": 13692 }, { "epoch": 1.8311045734153517, "grad_norm": 1.2639825344085693, "learning_rate": 6.9606780357681184e-06, "loss": 0.6193, "step": 13693 }, { "epoch": 1.8312382990104306, "grad_norm": 1.247612714767456, "learning_rate": 6.9593026191858355e-06, "loss": 0.6445, "step": 13694 }, { "epoch": 1.8313720246055096, "grad_norm": 1.2918758392333984, "learning_rate": 6.9579272659839855e-06, "loss": 0.6783, "step": 13695 }, { "epoch": 1.8315057502005883, "grad_norm": 1.1177948713302612, "learning_rate": 6.95655197619123e-06, "loss": 0.6188, "step": 13696 }, { "epoch": 1.8316394757956673, "grad_norm": 1.2831132411956787, "learning_rate": 6.955176749836232e-06, "loss": 0.7885, "step": 13697 }, { "epoch": 1.8317732013907462, "grad_norm": 1.1410598754882812, "learning_rate": 6.953801586947664e-06, "loss": 0.5719, "step": 13698 }, { "epoch": 1.831906926985825, "grad_norm": 1.2301900386810303, "learning_rate": 6.952426487554185e-06, "loss": 0.7245, "step": 13699 }, { "epoch": 1.832040652580904, "grad_norm": 1.3630056381225586, "learning_rate": 6.951051451684463e-06, "loss": 0.6626, "step": 13700 }, { "epoch": 1.8321743781759827, "grad_norm": 1.3991765975952148, "learning_rate": 6.949676479367155e-06, "loss": 0.7305, "step": 13701 }, { "epoch": 1.8323081037710618, "grad_norm": 1.256777286529541, "learning_rate": 6.94830157063092e-06, "loss": 0.6315, "step": 13702 }, { "epoch": 1.8324418293661406, "grad_norm": 1.2460036277770996, "learning_rate": 6.9469267255044215e-06, "loss": 0.6766, "step": 13703 }, { "epoch": 1.8325755549612195, "grad_norm": 1.1540305614471436, "learning_rate": 6.945551944016311e-06, "loss": 0.582, "step": 13704 }, { "epoch": 1.8327092805562986, "grad_norm": 1.2193268537521362, "learning_rate": 6.944177226195247e-06, "loss": 0.694, "step": 13705 }, { "epoch": 1.8328430061513774, "grad_norm": 1.3453047275543213, "learning_rate": 6.942802572069889e-06, "loss": 0.7757, "step": 13706 }, { "epoch": 1.8329767317464563, "grad_norm": 1.258634328842163, "learning_rate": 6.94142798166888e-06, "loss": 0.7362, "step": 13707 }, { "epoch": 1.8331104573415353, "grad_norm": 1.2628198862075806, "learning_rate": 6.940053455020883e-06, "loss": 0.6594, "step": 13708 }, { "epoch": 1.833244182936614, "grad_norm": 1.2385324239730835, "learning_rate": 6.938678992154544e-06, "loss": 0.6597, "step": 13709 }, { "epoch": 1.833377908531693, "grad_norm": 1.3036601543426514, "learning_rate": 6.937304593098509e-06, "loss": 0.7304, "step": 13710 }, { "epoch": 1.8335116341267719, "grad_norm": 1.2943599224090576, "learning_rate": 6.935930257881429e-06, "loss": 0.729, "step": 13711 }, { "epoch": 1.8336453597218507, "grad_norm": 1.2706928253173828, "learning_rate": 6.934555986531953e-06, "loss": 0.6259, "step": 13712 }, { "epoch": 1.8337790853169298, "grad_norm": 1.2457811832427979, "learning_rate": 6.933181779078722e-06, "loss": 0.6726, "step": 13713 }, { "epoch": 1.8339128109120084, "grad_norm": 1.172239065170288, "learning_rate": 6.9318076355503835e-06, "loss": 0.6841, "step": 13714 }, { "epoch": 1.8340465365070875, "grad_norm": 1.1760669946670532, "learning_rate": 6.9304335559755766e-06, "loss": 0.6165, "step": 13715 }, { "epoch": 1.8341802621021663, "grad_norm": 1.252285361289978, "learning_rate": 6.929059540382948e-06, "loss": 0.7124, "step": 13716 }, { "epoch": 1.8343139876972452, "grad_norm": 1.1872901916503906, "learning_rate": 6.927685588801134e-06, "loss": 0.7055, "step": 13717 }, { "epoch": 1.8344477132923243, "grad_norm": 1.217926025390625, "learning_rate": 6.926311701258772e-06, "loss": 0.6652, "step": 13718 }, { "epoch": 1.834581438887403, "grad_norm": 1.1974453926086426, "learning_rate": 6.924937877784505e-06, "loss": 0.6873, "step": 13719 }, { "epoch": 1.834715164482482, "grad_norm": 1.280928611755371, "learning_rate": 6.923564118406964e-06, "loss": 0.7317, "step": 13720 }, { "epoch": 1.8348488900775608, "grad_norm": 1.5077660083770752, "learning_rate": 6.9221904231547835e-06, "loss": 0.7595, "step": 13721 }, { "epoch": 1.8349826156726396, "grad_norm": 1.321532130241394, "learning_rate": 6.920816792056602e-06, "loss": 0.6378, "step": 13722 }, { "epoch": 1.8351163412677187, "grad_norm": 1.34261953830719, "learning_rate": 6.919443225141043e-06, "loss": 0.712, "step": 13723 }, { "epoch": 1.8352500668627976, "grad_norm": 1.2904306650161743, "learning_rate": 6.9180697224367445e-06, "loss": 0.717, "step": 13724 }, { "epoch": 1.8353837924578764, "grad_norm": 1.20167076587677, "learning_rate": 6.916696283972335e-06, "loss": 0.7283, "step": 13725 }, { "epoch": 1.8355175180529555, "grad_norm": 1.3282886743545532, "learning_rate": 6.9153229097764375e-06, "loss": 0.6731, "step": 13726 }, { "epoch": 1.8356512436480341, "grad_norm": 1.2034962177276611, "learning_rate": 6.913949599877686e-06, "loss": 0.6773, "step": 13727 }, { "epoch": 1.8357849692431132, "grad_norm": 1.2672545909881592, "learning_rate": 6.912576354304703e-06, "loss": 0.6416, "step": 13728 }, { "epoch": 1.835918694838192, "grad_norm": 1.2087756395339966, "learning_rate": 6.911203173086107e-06, "loss": 0.6043, "step": 13729 }, { "epoch": 1.8360524204332709, "grad_norm": 1.2366011142730713, "learning_rate": 6.909830056250527e-06, "loss": 0.6736, "step": 13730 }, { "epoch": 1.83618614602835, "grad_norm": 1.1904550790786743, "learning_rate": 6.9084570038265805e-06, "loss": 0.6493, "step": 13731 }, { "epoch": 1.8363198716234286, "grad_norm": 1.1627498865127563, "learning_rate": 6.907084015842893e-06, "loss": 0.6944, "step": 13732 }, { "epoch": 1.8364535972185076, "grad_norm": 1.2116574048995972, "learning_rate": 6.905711092328081e-06, "loss": 0.6071, "step": 13733 }, { "epoch": 1.8365873228135865, "grad_norm": 1.278102993965149, "learning_rate": 6.904338233310755e-06, "loss": 0.6926, "step": 13734 }, { "epoch": 1.8367210484086653, "grad_norm": 1.17496657371521, "learning_rate": 6.9029654388195425e-06, "loss": 0.6639, "step": 13735 }, { "epoch": 1.8368547740037444, "grad_norm": 1.4341068267822266, "learning_rate": 6.901592708883047e-06, "loss": 0.72, "step": 13736 }, { "epoch": 1.8369884995988233, "grad_norm": 1.2902679443359375, "learning_rate": 6.9002200435298864e-06, "loss": 0.7138, "step": 13737 }, { "epoch": 1.8371222251939021, "grad_norm": 1.1837358474731445, "learning_rate": 6.8988474427886765e-06, "loss": 0.6908, "step": 13738 }, { "epoch": 1.837255950788981, "grad_norm": 1.3759571313858032, "learning_rate": 6.89747490668802e-06, "loss": 0.719, "step": 13739 }, { "epoch": 1.8373896763840598, "grad_norm": 1.2355530261993408, "learning_rate": 6.8961024352565345e-06, "loss": 0.6806, "step": 13740 }, { "epoch": 1.8375234019791389, "grad_norm": 1.0711584091186523, "learning_rate": 6.894730028522824e-06, "loss": 0.6159, "step": 13741 }, { "epoch": 1.8376571275742177, "grad_norm": 1.0717856884002686, "learning_rate": 6.89335768651549e-06, "loss": 0.5724, "step": 13742 }, { "epoch": 1.8377908531692966, "grad_norm": 1.2146811485290527, "learning_rate": 6.8919854092631445e-06, "loss": 0.6236, "step": 13743 }, { "epoch": 1.8379245787643756, "grad_norm": 1.3056007623672485, "learning_rate": 6.8906131967943904e-06, "loss": 0.6674, "step": 13744 }, { "epoch": 1.8380583043594543, "grad_norm": 1.5136709213256836, "learning_rate": 6.889241049137825e-06, "loss": 0.7415, "step": 13745 }, { "epoch": 1.8381920299545333, "grad_norm": 1.31511652469635, "learning_rate": 6.887868966322058e-06, "loss": 0.7823, "step": 13746 }, { "epoch": 1.8383257555496122, "grad_norm": 1.253554344177246, "learning_rate": 6.886496948375681e-06, "loss": 0.7212, "step": 13747 }, { "epoch": 1.838459481144691, "grad_norm": 1.2178689241409302, "learning_rate": 6.885124995327298e-06, "loss": 0.6649, "step": 13748 }, { "epoch": 1.8385932067397701, "grad_norm": 1.1900715827941895, "learning_rate": 6.883753107205503e-06, "loss": 0.6419, "step": 13749 }, { "epoch": 1.8387269323348487, "grad_norm": 1.230186939239502, "learning_rate": 6.8823812840388905e-06, "loss": 0.707, "step": 13750 }, { "epoch": 1.8388606579299278, "grad_norm": 1.429879069328308, "learning_rate": 6.88100952585606e-06, "loss": 0.7515, "step": 13751 }, { "epoch": 1.8389943835250067, "grad_norm": 1.1788370609283447, "learning_rate": 6.879637832685603e-06, "loss": 0.6389, "step": 13752 }, { "epoch": 1.8391281091200855, "grad_norm": 1.1367188692092896, "learning_rate": 6.878266204556103e-06, "loss": 0.6463, "step": 13753 }, { "epoch": 1.8392618347151646, "grad_norm": 1.2369978427886963, "learning_rate": 6.876894641496164e-06, "loss": 0.6379, "step": 13754 }, { "epoch": 1.8393955603102434, "grad_norm": 1.207277774810791, "learning_rate": 6.875523143534362e-06, "loss": 0.6553, "step": 13755 }, { "epoch": 1.8395292859053223, "grad_norm": 1.2378968000411987, "learning_rate": 6.874151710699293e-06, "loss": 0.6394, "step": 13756 }, { "epoch": 1.8396630115004011, "grad_norm": 1.322172999382019, "learning_rate": 6.87278034301954e-06, "loss": 0.7289, "step": 13757 }, { "epoch": 1.83979673709548, "grad_norm": 1.238092303276062, "learning_rate": 6.871409040523686e-06, "loss": 0.6874, "step": 13758 }, { "epoch": 1.839930462690559, "grad_norm": 1.2307052612304688, "learning_rate": 6.870037803240321e-06, "loss": 0.7333, "step": 13759 }, { "epoch": 1.840064188285638, "grad_norm": 1.3050785064697266, "learning_rate": 6.868666631198024e-06, "loss": 0.7039, "step": 13760 }, { "epoch": 1.8401979138807167, "grad_norm": 1.3142913579940796, "learning_rate": 6.86729552442537e-06, "loss": 0.694, "step": 13761 }, { "epoch": 1.8403316394757958, "grad_norm": 1.2985808849334717, "learning_rate": 6.8659244829509455e-06, "loss": 0.6312, "step": 13762 }, { "epoch": 1.8404653650708744, "grad_norm": 1.3340829610824585, "learning_rate": 6.864553506803322e-06, "loss": 0.6767, "step": 13763 }, { "epoch": 1.8405990906659535, "grad_norm": 1.222744345664978, "learning_rate": 6.8631825960110866e-06, "loss": 0.6117, "step": 13764 }, { "epoch": 1.8407328162610324, "grad_norm": 1.5633609294891357, "learning_rate": 6.861811750602807e-06, "loss": 0.7273, "step": 13765 }, { "epoch": 1.8408665418561112, "grad_norm": 1.214248776435852, "learning_rate": 6.8604409706070556e-06, "loss": 0.5668, "step": 13766 }, { "epoch": 1.8410002674511903, "grad_norm": 1.219557762145996, "learning_rate": 6.859070256052412e-06, "loss": 0.6565, "step": 13767 }, { "epoch": 1.841133993046269, "grad_norm": 1.1539026498794556, "learning_rate": 6.857699606967439e-06, "loss": 0.6715, "step": 13768 }, { "epoch": 1.841267718641348, "grad_norm": 1.203932762145996, "learning_rate": 6.856329023380712e-06, "loss": 0.6734, "step": 13769 }, { "epoch": 1.8414014442364268, "grad_norm": 1.296655297279358, "learning_rate": 6.854958505320801e-06, "loss": 0.7215, "step": 13770 }, { "epoch": 1.8415351698315057, "grad_norm": 1.2001996040344238, "learning_rate": 6.853588052816267e-06, "loss": 0.7093, "step": 13771 }, { "epoch": 1.8416688954265847, "grad_norm": 1.3656059503555298, "learning_rate": 6.852217665895682e-06, "loss": 0.755, "step": 13772 }, { "epoch": 1.8418026210216636, "grad_norm": 1.1055585145950317, "learning_rate": 6.850847344587607e-06, "loss": 0.6602, "step": 13773 }, { "epoch": 1.8419363466167424, "grad_norm": 1.1360492706298828, "learning_rate": 6.849477088920604e-06, "loss": 0.6291, "step": 13774 }, { "epoch": 1.8420700722118215, "grad_norm": 1.3937456607818604, "learning_rate": 6.848106898923238e-06, "loss": 0.6174, "step": 13775 }, { "epoch": 1.8422037978069001, "grad_norm": 1.205394983291626, "learning_rate": 6.846736774624066e-06, "loss": 0.6379, "step": 13776 }, { "epoch": 1.8423375234019792, "grad_norm": 1.0102508068084717, "learning_rate": 6.845366716051651e-06, "loss": 0.5956, "step": 13777 }, { "epoch": 1.842471248997058, "grad_norm": 1.5055598020553589, "learning_rate": 6.843996723234549e-06, "loss": 0.6833, "step": 13778 }, { "epoch": 1.842604974592137, "grad_norm": 1.145379662513733, "learning_rate": 6.842626796201311e-06, "loss": 0.6269, "step": 13779 }, { "epoch": 1.842738700187216, "grad_norm": 1.3151395320892334, "learning_rate": 6.841256934980501e-06, "loss": 0.687, "step": 13780 }, { "epoch": 1.8428724257822946, "grad_norm": 1.1371145248413086, "learning_rate": 6.839887139600664e-06, "loss": 0.7001, "step": 13781 }, { "epoch": 1.8430061513773737, "grad_norm": 1.32063889503479, "learning_rate": 6.838517410090355e-06, "loss": 0.7475, "step": 13782 }, { "epoch": 1.8431398769724525, "grad_norm": 1.152679681777954, "learning_rate": 6.8371477464781276e-06, "loss": 0.6264, "step": 13783 }, { "epoch": 1.8432736025675314, "grad_norm": 1.4427374601364136, "learning_rate": 6.835778148792527e-06, "loss": 0.6867, "step": 13784 }, { "epoch": 1.8434073281626104, "grad_norm": 1.2832494974136353, "learning_rate": 6.834408617062107e-06, "loss": 0.69, "step": 13785 }, { "epoch": 1.843541053757689, "grad_norm": 1.2268489599227905, "learning_rate": 6.8330391513154095e-06, "loss": 0.6548, "step": 13786 }, { "epoch": 1.8436747793527681, "grad_norm": 1.129612922668457, "learning_rate": 6.831669751580976e-06, "loss": 0.6479, "step": 13787 }, { "epoch": 1.843808504947847, "grad_norm": 1.193387508392334, "learning_rate": 6.8303004178873566e-06, "loss": 0.5958, "step": 13788 }, { "epoch": 1.8439422305429258, "grad_norm": 1.270899772644043, "learning_rate": 6.828931150263095e-06, "loss": 0.7172, "step": 13789 }, { "epoch": 1.844075956138005, "grad_norm": 1.079564094543457, "learning_rate": 6.827561948736725e-06, "loss": 0.5916, "step": 13790 }, { "epoch": 1.8442096817330837, "grad_norm": 1.2091145515441895, "learning_rate": 6.826192813336794e-06, "loss": 0.6844, "step": 13791 }, { "epoch": 1.8443434073281626, "grad_norm": 1.1348251104354858, "learning_rate": 6.824823744091833e-06, "loss": 0.61, "step": 13792 }, { "epoch": 1.8444771329232417, "grad_norm": 1.3358339071273804, "learning_rate": 6.8234547410303865e-06, "loss": 0.6961, "step": 13793 }, { "epoch": 1.8446108585183203, "grad_norm": 1.2482625246047974, "learning_rate": 6.822085804180985e-06, "loss": 0.7367, "step": 13794 }, { "epoch": 1.8447445841133994, "grad_norm": 1.19967520236969, "learning_rate": 6.820716933572162e-06, "loss": 0.587, "step": 13795 }, { "epoch": 1.8448783097084782, "grad_norm": 1.3269333839416504, "learning_rate": 6.819348129232456e-06, "loss": 0.6952, "step": 13796 }, { "epoch": 1.845012035303557, "grad_norm": 1.3255964517593384, "learning_rate": 6.8179793911903945e-06, "loss": 0.7801, "step": 13797 }, { "epoch": 1.8451457608986361, "grad_norm": 1.1533595323562622, "learning_rate": 6.816610719474503e-06, "loss": 0.5981, "step": 13798 }, { "epoch": 1.8452794864937148, "grad_norm": 1.2958546876907349, "learning_rate": 6.815242114113321e-06, "loss": 0.7172, "step": 13799 }, { "epoch": 1.8454132120887938, "grad_norm": 1.1685703992843628, "learning_rate": 6.813873575135363e-06, "loss": 0.681, "step": 13800 }, { "epoch": 1.8455469376838727, "grad_norm": 1.1234225034713745, "learning_rate": 6.812505102569164e-06, "loss": 0.6369, "step": 13801 }, { "epoch": 1.8456806632789515, "grad_norm": 1.3002102375030518, "learning_rate": 6.81113669644325e-06, "loss": 0.6346, "step": 13802 }, { "epoch": 1.8458143888740306, "grad_norm": 1.258652687072754, "learning_rate": 6.809768356786135e-06, "loss": 0.692, "step": 13803 }, { "epoch": 1.8459481144691092, "grad_norm": 1.2245444059371948, "learning_rate": 6.80840008362635e-06, "loss": 0.6703, "step": 13804 }, { "epoch": 1.8460818400641883, "grad_norm": 1.524429440498352, "learning_rate": 6.807031876992411e-06, "loss": 0.7176, "step": 13805 }, { "epoch": 1.8462155656592671, "grad_norm": 1.2874886989593506, "learning_rate": 6.8056637369128335e-06, "loss": 0.7075, "step": 13806 }, { "epoch": 1.846349291254346, "grad_norm": 1.2795969247817993, "learning_rate": 6.804295663416141e-06, "loss": 0.6659, "step": 13807 }, { "epoch": 1.846483016849425, "grad_norm": 1.2683343887329102, "learning_rate": 6.802927656530844e-06, "loss": 0.7085, "step": 13808 }, { "epoch": 1.846616742444504, "grad_norm": 1.4794082641601562, "learning_rate": 6.801559716285466e-06, "loss": 0.7858, "step": 13809 }, { "epoch": 1.8467504680395828, "grad_norm": 1.2969962358474731, "learning_rate": 6.800191842708515e-06, "loss": 0.6796, "step": 13810 }, { "epoch": 1.8468841936346618, "grad_norm": 1.1836827993392944, "learning_rate": 6.7988240358285e-06, "loss": 0.6788, "step": 13811 }, { "epoch": 1.8470179192297405, "grad_norm": 1.2591941356658936, "learning_rate": 6.797456295673937e-06, "loss": 0.6919, "step": 13812 }, { "epoch": 1.8471516448248195, "grad_norm": 1.1629880666732788, "learning_rate": 6.796088622273331e-06, "loss": 0.6281, "step": 13813 }, { "epoch": 1.8472853704198984, "grad_norm": 1.2673150300979614, "learning_rate": 6.794721015655191e-06, "loss": 0.7406, "step": 13814 }, { "epoch": 1.8474190960149772, "grad_norm": 1.2791593074798584, "learning_rate": 6.793353475848028e-06, "loss": 0.6527, "step": 13815 }, { "epoch": 1.8475528216100563, "grad_norm": 1.1884660720825195, "learning_rate": 6.791986002880339e-06, "loss": 0.6655, "step": 13816 }, { "epoch": 1.847686547205135, "grad_norm": 1.3216431140899658, "learning_rate": 6.790618596780638e-06, "loss": 0.7747, "step": 13817 }, { "epoch": 1.847820272800214, "grad_norm": 1.302400827407837, "learning_rate": 6.789251257577419e-06, "loss": 0.7345, "step": 13818 }, { "epoch": 1.8479539983952928, "grad_norm": 1.2374743223190308, "learning_rate": 6.787883985299182e-06, "loss": 0.7003, "step": 13819 }, { "epoch": 1.8480877239903717, "grad_norm": 1.154674768447876, "learning_rate": 6.786516779974431e-06, "loss": 0.6386, "step": 13820 }, { "epoch": 1.8482214495854508, "grad_norm": 1.0955474376678467, "learning_rate": 6.785149641631665e-06, "loss": 0.6317, "step": 13821 }, { "epoch": 1.8483551751805296, "grad_norm": 1.3786767721176147, "learning_rate": 6.783782570299376e-06, "loss": 0.7858, "step": 13822 }, { "epoch": 1.8484889007756085, "grad_norm": 1.1883971691131592, "learning_rate": 6.782415566006064e-06, "loss": 0.6851, "step": 13823 }, { "epoch": 1.8486226263706873, "grad_norm": 1.1589635610580444, "learning_rate": 6.781048628780217e-06, "loss": 0.6206, "step": 13824 }, { "epoch": 1.8487563519657662, "grad_norm": 1.2407127618789673, "learning_rate": 6.779681758650336e-06, "loss": 0.6558, "step": 13825 }, { "epoch": 1.8488900775608452, "grad_norm": 1.234395146369934, "learning_rate": 6.778314955644905e-06, "loss": 0.6349, "step": 13826 }, { "epoch": 1.849023803155924, "grad_norm": 1.2651311159133911, "learning_rate": 6.776948219792412e-06, "loss": 0.6709, "step": 13827 }, { "epoch": 1.849157528751003, "grad_norm": 1.151973009109497, "learning_rate": 6.775581551121355e-06, "loss": 0.6578, "step": 13828 }, { "epoch": 1.849291254346082, "grad_norm": 1.2138807773590088, "learning_rate": 6.774214949660215e-06, "loss": 0.7399, "step": 13829 }, { "epoch": 1.8494249799411606, "grad_norm": 1.238549828529358, "learning_rate": 6.772848415437473e-06, "loss": 0.7929, "step": 13830 }, { "epoch": 1.8495587055362397, "grad_norm": 1.1721092462539673, "learning_rate": 6.771481948481622e-06, "loss": 0.5906, "step": 13831 }, { "epoch": 1.8496924311313185, "grad_norm": 1.3039196729660034, "learning_rate": 6.7701155488211365e-06, "loss": 0.7351, "step": 13832 }, { "epoch": 1.8498261567263974, "grad_norm": 1.2214093208312988, "learning_rate": 6.7687492164845044e-06, "loss": 0.6557, "step": 13833 }, { "epoch": 1.8499598823214765, "grad_norm": 1.1949502229690552, "learning_rate": 6.767382951500205e-06, "loss": 0.6528, "step": 13834 }, { "epoch": 1.850093607916555, "grad_norm": 1.298505425453186, "learning_rate": 6.766016753896709e-06, "loss": 0.637, "step": 13835 }, { "epoch": 1.8502273335116342, "grad_norm": 1.248430848121643, "learning_rate": 6.7646506237025045e-06, "loss": 0.6576, "step": 13836 }, { "epoch": 1.850361059106713, "grad_norm": 1.2680670022964478, "learning_rate": 6.763284560946062e-06, "loss": 0.6819, "step": 13837 }, { "epoch": 1.8504947847017919, "grad_norm": 1.2248493432998657, "learning_rate": 6.761918565655851e-06, "loss": 0.6614, "step": 13838 }, { "epoch": 1.850628510296871, "grad_norm": 1.6024763584136963, "learning_rate": 6.76055263786035e-06, "loss": 0.8151, "step": 13839 }, { "epoch": 1.8507622358919498, "grad_norm": 1.2581449747085571, "learning_rate": 6.759186777588032e-06, "loss": 0.6083, "step": 13840 }, { "epoch": 1.8508959614870286, "grad_norm": 1.1997913122177124, "learning_rate": 6.757820984867362e-06, "loss": 0.6432, "step": 13841 }, { "epoch": 1.8510296870821075, "grad_norm": 1.318400502204895, "learning_rate": 6.756455259726815e-06, "loss": 0.7623, "step": 13842 }, { "epoch": 1.8511634126771863, "grad_norm": 1.2387298345565796, "learning_rate": 6.755089602194849e-06, "loss": 0.7235, "step": 13843 }, { "epoch": 1.8512971382722654, "grad_norm": 1.2552803754806519, "learning_rate": 6.75372401229994e-06, "loss": 0.7473, "step": 13844 }, { "epoch": 1.8514308638673442, "grad_norm": 1.5273960828781128, "learning_rate": 6.752358490070545e-06, "loss": 0.7115, "step": 13845 }, { "epoch": 1.851564589462423, "grad_norm": 1.307141900062561, "learning_rate": 6.750993035535128e-06, "loss": 0.7085, "step": 13846 }, { "epoch": 1.8516983150575022, "grad_norm": 1.1096395254135132, "learning_rate": 6.749627648722157e-06, "loss": 0.5856, "step": 13847 }, { "epoch": 1.8518320406525808, "grad_norm": 1.3337516784667969, "learning_rate": 6.748262329660082e-06, "loss": 0.6816, "step": 13848 }, { "epoch": 1.8519657662476599, "grad_norm": 1.1463372707366943, "learning_rate": 6.746897078377372e-06, "loss": 0.6461, "step": 13849 }, { "epoch": 1.8520994918427387, "grad_norm": 1.2968477010726929, "learning_rate": 6.74553189490248e-06, "loss": 0.7194, "step": 13850 }, { "epoch": 1.8522332174378175, "grad_norm": 1.1409854888916016, "learning_rate": 6.744166779263856e-06, "loss": 0.6041, "step": 13851 }, { "epoch": 1.8523669430328966, "grad_norm": 1.2449700832366943, "learning_rate": 6.742801731489963e-06, "loss": 0.6686, "step": 13852 }, { "epoch": 1.8525006686279752, "grad_norm": 1.3354474306106567, "learning_rate": 6.741436751609252e-06, "loss": 0.734, "step": 13853 }, { "epoch": 1.8526343942230543, "grad_norm": 1.2562320232391357, "learning_rate": 6.740071839650171e-06, "loss": 0.6561, "step": 13854 }, { "epoch": 1.8527681198181332, "grad_norm": 1.2631070613861084, "learning_rate": 6.738706995641177e-06, "loss": 0.6963, "step": 13855 }, { "epoch": 1.852901845413212, "grad_norm": 1.3139373064041138, "learning_rate": 6.7373422196107105e-06, "loss": 0.6567, "step": 13856 }, { "epoch": 1.853035571008291, "grad_norm": 1.365637183189392, "learning_rate": 6.735977511587228e-06, "loss": 0.7447, "step": 13857 }, { "epoch": 1.85316929660337, "grad_norm": 1.2912229299545288, "learning_rate": 6.734612871599169e-06, "loss": 0.6935, "step": 13858 }, { "epoch": 1.8533030221984488, "grad_norm": 1.2933177947998047, "learning_rate": 6.733248299674977e-06, "loss": 0.6975, "step": 13859 }, { "epoch": 1.8534367477935276, "grad_norm": 1.2751692533493042, "learning_rate": 6.731883795843104e-06, "loss": 0.6417, "step": 13860 }, { "epoch": 1.8535704733886065, "grad_norm": 1.3222585916519165, "learning_rate": 6.73051936013198e-06, "loss": 0.7103, "step": 13861 }, { "epoch": 1.8537041989836855, "grad_norm": 1.2787964344024658, "learning_rate": 6.7291549925700575e-06, "loss": 0.6913, "step": 13862 }, { "epoch": 1.8538379245787644, "grad_norm": 1.181631326675415, "learning_rate": 6.727790693185767e-06, "loss": 0.6285, "step": 13863 }, { "epoch": 1.8539716501738432, "grad_norm": 1.243194341659546, "learning_rate": 6.7264264620075455e-06, "loss": 0.614, "step": 13864 }, { "epoch": 1.8541053757689223, "grad_norm": 1.3067023754119873, "learning_rate": 6.725062299063834e-06, "loss": 0.7912, "step": 13865 }, { "epoch": 1.854239101364001, "grad_norm": 1.2184467315673828, "learning_rate": 6.723698204383067e-06, "loss": 0.64, "step": 13866 }, { "epoch": 1.85437282695908, "grad_norm": 1.2818893194198608, "learning_rate": 6.722334177993673e-06, "loss": 0.7748, "step": 13867 }, { "epoch": 1.8545065525541589, "grad_norm": 1.2240118980407715, "learning_rate": 6.720970219924088e-06, "loss": 0.7437, "step": 13868 }, { "epoch": 1.8546402781492377, "grad_norm": 1.2402490377426147, "learning_rate": 6.719606330202739e-06, "loss": 0.6169, "step": 13869 }, { "epoch": 1.8547740037443168, "grad_norm": 1.4071033000946045, "learning_rate": 6.71824250885806e-06, "loss": 0.7218, "step": 13870 }, { "epoch": 1.8549077293393954, "grad_norm": 1.1687504053115845, "learning_rate": 6.716878755918474e-06, "loss": 0.6571, "step": 13871 }, { "epoch": 1.8550414549344745, "grad_norm": 1.2581931352615356, "learning_rate": 6.715515071412411e-06, "loss": 0.66, "step": 13872 }, { "epoch": 1.8551751805295533, "grad_norm": 1.2189053297042847, "learning_rate": 6.71415145536829e-06, "loss": 0.6623, "step": 13873 }, { "epoch": 1.8553089061246322, "grad_norm": 1.2393089532852173, "learning_rate": 6.712787907814542e-06, "loss": 0.664, "step": 13874 }, { "epoch": 1.8554426317197112, "grad_norm": 1.2932487726211548, "learning_rate": 6.7114244287795785e-06, "loss": 0.7197, "step": 13875 }, { "epoch": 1.85557635731479, "grad_norm": 1.2768210172653198, "learning_rate": 6.710061018291831e-06, "loss": 0.658, "step": 13876 }, { "epoch": 1.855710082909869, "grad_norm": 1.27066969871521, "learning_rate": 6.70869767637971e-06, "loss": 0.613, "step": 13877 }, { "epoch": 1.855843808504948, "grad_norm": 1.2313402891159058, "learning_rate": 6.707334403071638e-06, "loss": 0.6895, "step": 13878 }, { "epoch": 1.8559775341000266, "grad_norm": 1.2324997186660767, "learning_rate": 6.705971198396032e-06, "loss": 0.6298, "step": 13879 }, { "epoch": 1.8561112596951057, "grad_norm": 1.1672239303588867, "learning_rate": 6.7046080623812995e-06, "loss": 0.6712, "step": 13880 }, { "epoch": 1.8562449852901846, "grad_norm": 1.3264271020889282, "learning_rate": 6.703244995055864e-06, "loss": 0.6835, "step": 13881 }, { "epoch": 1.8563787108852634, "grad_norm": 1.240195631980896, "learning_rate": 6.701881996448131e-06, "loss": 0.6343, "step": 13882 }, { "epoch": 1.8565124364803425, "grad_norm": 1.2639858722686768, "learning_rate": 6.700519066586508e-06, "loss": 0.6766, "step": 13883 }, { "epoch": 1.856646162075421, "grad_norm": 1.2224682569503784, "learning_rate": 6.6991562054994085e-06, "loss": 0.702, "step": 13884 }, { "epoch": 1.8567798876705002, "grad_norm": 1.3374301195144653, "learning_rate": 6.6977934132152414e-06, "loss": 0.6861, "step": 13885 }, { "epoch": 1.856913613265579, "grad_norm": 1.143964409828186, "learning_rate": 6.69643068976241e-06, "loss": 0.6358, "step": 13886 }, { "epoch": 1.8570473388606579, "grad_norm": 1.1736760139465332, "learning_rate": 6.695068035169321e-06, "loss": 0.6392, "step": 13887 }, { "epoch": 1.857181064455737, "grad_norm": 1.1120654344558716, "learning_rate": 6.693705449464373e-06, "loss": 0.6642, "step": 13888 }, { "epoch": 1.8573147900508156, "grad_norm": 1.1934597492218018, "learning_rate": 6.692342932675974e-06, "loss": 0.6757, "step": 13889 }, { "epoch": 1.8574485156458946, "grad_norm": 1.3436036109924316, "learning_rate": 6.690980484832521e-06, "loss": 0.6881, "step": 13890 }, { "epoch": 1.8575822412409735, "grad_norm": 1.1626349687576294, "learning_rate": 6.689618105962412e-06, "loss": 0.6974, "step": 13891 }, { "epoch": 1.8577159668360523, "grad_norm": 1.2873663902282715, "learning_rate": 6.688255796094048e-06, "loss": 0.669, "step": 13892 }, { "epoch": 1.8578496924311314, "grad_norm": 1.1633247137069702, "learning_rate": 6.686893555255819e-06, "loss": 0.6829, "step": 13893 }, { "epoch": 1.8579834180262103, "grad_norm": 1.2562705278396606, "learning_rate": 6.685531383476128e-06, "loss": 0.7221, "step": 13894 }, { "epoch": 1.858117143621289, "grad_norm": 1.2320321798324585, "learning_rate": 6.684169280783365e-06, "loss": 0.6595, "step": 13895 }, { "epoch": 1.8582508692163682, "grad_norm": 1.3818409442901611, "learning_rate": 6.682807247205915e-06, "loss": 0.6633, "step": 13896 }, { "epoch": 1.8583845948114468, "grad_norm": 1.1381113529205322, "learning_rate": 6.681445282772176e-06, "loss": 0.6703, "step": 13897 }, { "epoch": 1.8585183204065259, "grad_norm": 1.177986979484558, "learning_rate": 6.680083387510536e-06, "loss": 0.693, "step": 13898 }, { "epoch": 1.8586520460016047, "grad_norm": 1.155556321144104, "learning_rate": 6.678721561449377e-06, "loss": 0.6562, "step": 13899 }, { "epoch": 1.8587857715966836, "grad_norm": 1.1550631523132324, "learning_rate": 6.677359804617094e-06, "loss": 0.6456, "step": 13900 }, { "epoch": 1.8589194971917626, "grad_norm": 1.2905124425888062, "learning_rate": 6.675998117042062e-06, "loss": 0.7122, "step": 13901 }, { "epoch": 1.8590532227868413, "grad_norm": 1.110217809677124, "learning_rate": 6.674636498752673e-06, "loss": 0.658, "step": 13902 }, { "epoch": 1.8591869483819203, "grad_norm": 1.360866904258728, "learning_rate": 6.673274949777302e-06, "loss": 0.755, "step": 13903 }, { "epoch": 1.8593206739769992, "grad_norm": 1.2819935083389282, "learning_rate": 6.671913470144331e-06, "loss": 0.6706, "step": 13904 }, { "epoch": 1.859454399572078, "grad_norm": 1.1890041828155518, "learning_rate": 6.670552059882138e-06, "loss": 0.6362, "step": 13905 }, { "epoch": 1.859588125167157, "grad_norm": 1.205298662185669, "learning_rate": 6.669190719019105e-06, "loss": 0.6586, "step": 13906 }, { "epoch": 1.8597218507622357, "grad_norm": 1.1301299333572388, "learning_rate": 6.6678294475836e-06, "loss": 0.6123, "step": 13907 }, { "epoch": 1.8598555763573148, "grad_norm": 1.331193208694458, "learning_rate": 6.666468245604005e-06, "loss": 0.6503, "step": 13908 }, { "epoch": 1.8599893019523936, "grad_norm": 1.2336878776550293, "learning_rate": 6.665107113108687e-06, "loss": 0.606, "step": 13909 }, { "epoch": 1.8601230275474725, "grad_norm": 1.2340543270111084, "learning_rate": 6.663746050126021e-06, "loss": 0.6019, "step": 13910 }, { "epoch": 1.8602567531425516, "grad_norm": 1.2719411849975586, "learning_rate": 6.662385056684377e-06, "loss": 0.7088, "step": 13911 }, { "epoch": 1.8603904787376304, "grad_norm": 1.2811404466629028, "learning_rate": 6.661024132812119e-06, "loss": 0.6464, "step": 13912 }, { "epoch": 1.8605242043327093, "grad_norm": 1.4237899780273438, "learning_rate": 6.6596632785376245e-06, "loss": 0.764, "step": 13913 }, { "epoch": 1.8606579299277883, "grad_norm": 1.2332491874694824, "learning_rate": 6.658302493889251e-06, "loss": 0.7338, "step": 13914 }, { "epoch": 1.860791655522867, "grad_norm": 1.464020013809204, "learning_rate": 6.656941778895359e-06, "loss": 0.7232, "step": 13915 }, { "epoch": 1.860925381117946, "grad_norm": 1.2471884489059448, "learning_rate": 6.655581133584321e-06, "loss": 0.6388, "step": 13916 }, { "epoch": 1.8610591067130249, "grad_norm": 1.2288116216659546, "learning_rate": 6.654220557984492e-06, "loss": 0.7115, "step": 13917 }, { "epoch": 1.8611928323081037, "grad_norm": 1.2966489791870117, "learning_rate": 6.652860052124235e-06, "loss": 0.7144, "step": 13918 }, { "epoch": 1.8613265579031828, "grad_norm": 1.2088100910186768, "learning_rate": 6.651499616031909e-06, "loss": 0.6394, "step": 13919 }, { "epoch": 1.8614602834982614, "grad_norm": 1.2111750841140747, "learning_rate": 6.6501392497358654e-06, "loss": 0.6606, "step": 13920 }, { "epoch": 1.8615940090933405, "grad_norm": 1.1587275266647339, "learning_rate": 6.648778953264467e-06, "loss": 0.6485, "step": 13921 }, { "epoch": 1.8617277346884193, "grad_norm": 1.4167252779006958, "learning_rate": 6.647418726646065e-06, "loss": 0.7385, "step": 13922 }, { "epoch": 1.8618614602834982, "grad_norm": 1.3064322471618652, "learning_rate": 6.646058569909008e-06, "loss": 0.6845, "step": 13923 }, { "epoch": 1.8619951858785773, "grad_norm": 1.3005396127700806, "learning_rate": 6.644698483081654e-06, "loss": 0.6841, "step": 13924 }, { "epoch": 1.8621289114736561, "grad_norm": 1.242795705795288, "learning_rate": 6.643338466192346e-06, "loss": 0.6703, "step": 13925 }, { "epoch": 1.862262637068735, "grad_norm": 1.3630322217941284, "learning_rate": 6.64197851926944e-06, "loss": 0.6653, "step": 13926 }, { "epoch": 1.8623963626638138, "grad_norm": 1.228246808052063, "learning_rate": 6.640618642341279e-06, "loss": 0.6649, "step": 13927 }, { "epoch": 1.8625300882588927, "grad_norm": 1.3112335205078125, "learning_rate": 6.639258835436202e-06, "loss": 0.6562, "step": 13928 }, { "epoch": 1.8626638138539717, "grad_norm": 1.2685134410858154, "learning_rate": 6.637899098582562e-06, "loss": 0.7276, "step": 13929 }, { "epoch": 1.8627975394490506, "grad_norm": 1.3184354305267334, "learning_rate": 6.6365394318087e-06, "loss": 0.6748, "step": 13930 }, { "epoch": 1.8629312650441294, "grad_norm": 1.2283575534820557, "learning_rate": 6.635179835142951e-06, "loss": 0.6566, "step": 13931 }, { "epoch": 1.8630649906392085, "grad_norm": 1.3593336343765259, "learning_rate": 6.633820308613662e-06, "loss": 0.6753, "step": 13932 }, { "epoch": 1.8631987162342871, "grad_norm": 1.4365121126174927, "learning_rate": 6.632460852249164e-06, "loss": 0.7558, "step": 13933 }, { "epoch": 1.8633324418293662, "grad_norm": 1.133737564086914, "learning_rate": 6.631101466077801e-06, "loss": 0.6154, "step": 13934 }, { "epoch": 1.863466167424445, "grad_norm": 1.311123251914978, "learning_rate": 6.629742150127903e-06, "loss": 0.6573, "step": 13935 }, { "epoch": 1.863599893019524, "grad_norm": 1.3266445398330688, "learning_rate": 6.628382904427804e-06, "loss": 0.6609, "step": 13936 }, { "epoch": 1.863733618614603, "grad_norm": 1.295404314994812, "learning_rate": 6.627023729005837e-06, "loss": 0.7114, "step": 13937 }, { "epoch": 1.8638673442096816, "grad_norm": 1.2988808155059814, "learning_rate": 6.625664623890331e-06, "loss": 0.6909, "step": 13938 }, { "epoch": 1.8640010698047607, "grad_norm": 1.2106274366378784, "learning_rate": 6.624305589109622e-06, "loss": 0.6314, "step": 13939 }, { "epoch": 1.8641347953998395, "grad_norm": 1.301979660987854, "learning_rate": 6.622946624692033e-06, "loss": 0.6548, "step": 13940 }, { "epoch": 1.8642685209949184, "grad_norm": 1.1528948545455933, "learning_rate": 6.6215877306658835e-06, "loss": 0.6398, "step": 13941 }, { "epoch": 1.8644022465899974, "grad_norm": 1.2338688373565674, "learning_rate": 6.620228907059511e-06, "loss": 0.6805, "step": 13942 }, { "epoch": 1.8645359721850763, "grad_norm": 1.172389268875122, "learning_rate": 6.618870153901231e-06, "loss": 0.6376, "step": 13943 }, { "epoch": 1.8646696977801551, "grad_norm": 1.3034014701843262, "learning_rate": 6.617511471219364e-06, "loss": 0.6932, "step": 13944 }, { "epoch": 1.864803423375234, "grad_norm": 1.29646897315979, "learning_rate": 6.616152859042239e-06, "loss": 0.6567, "step": 13945 }, { "epoch": 1.8649371489703128, "grad_norm": 1.123051643371582, "learning_rate": 6.614794317398166e-06, "loss": 0.5795, "step": 13946 }, { "epoch": 1.865070874565392, "grad_norm": 1.1916099786758423, "learning_rate": 6.613435846315468e-06, "loss": 0.6706, "step": 13947 }, { "epoch": 1.8652046001604707, "grad_norm": 1.2787476778030396, "learning_rate": 6.612077445822458e-06, "loss": 0.6806, "step": 13948 }, { "epoch": 1.8653383257555496, "grad_norm": 1.1563136577606201, "learning_rate": 6.610719115947453e-06, "loss": 0.6303, "step": 13949 }, { "epoch": 1.8654720513506287, "grad_norm": 1.283848762512207, "learning_rate": 6.609360856718763e-06, "loss": 0.6894, "step": 13950 }, { "epoch": 1.8656057769457073, "grad_norm": 1.3023619651794434, "learning_rate": 6.608002668164706e-06, "loss": 0.7266, "step": 13951 }, { "epoch": 1.8657395025407864, "grad_norm": 1.3092055320739746, "learning_rate": 6.606644550313581e-06, "loss": 0.6801, "step": 13952 }, { "epoch": 1.8658732281358652, "grad_norm": 1.1721385717391968, "learning_rate": 6.605286503193709e-06, "loss": 0.6754, "step": 13953 }, { "epoch": 1.866006953730944, "grad_norm": 1.3377279043197632, "learning_rate": 6.603928526833386e-06, "loss": 0.6794, "step": 13954 }, { "epoch": 1.8661406793260231, "grad_norm": 1.2527827024459839, "learning_rate": 6.602570621260929e-06, "loss": 0.6764, "step": 13955 }, { "epoch": 1.8662744049211017, "grad_norm": 1.4743539094924927, "learning_rate": 6.601212786504633e-06, "loss": 0.8008, "step": 13956 }, { "epoch": 1.8664081305161808, "grad_norm": 1.2408746480941772, "learning_rate": 6.599855022592803e-06, "loss": 0.6576, "step": 13957 }, { "epoch": 1.8665418561112597, "grad_norm": 1.2729380130767822, "learning_rate": 6.598497329553744e-06, "loss": 0.6933, "step": 13958 }, { "epoch": 1.8666755817063385, "grad_norm": 1.1852768659591675, "learning_rate": 6.597139707415754e-06, "loss": 0.642, "step": 13959 }, { "epoch": 1.8668093073014176, "grad_norm": 1.220337986946106, "learning_rate": 6.595782156207126e-06, "loss": 0.6675, "step": 13960 }, { "epoch": 1.8669430328964964, "grad_norm": 1.175877571105957, "learning_rate": 6.594424675956166e-06, "loss": 0.6725, "step": 13961 }, { "epoch": 1.8670767584915753, "grad_norm": 1.2880406379699707, "learning_rate": 6.593067266691162e-06, "loss": 0.6962, "step": 13962 }, { "epoch": 1.8672104840866541, "grad_norm": 1.0359405279159546, "learning_rate": 6.591709928440413e-06, "loss": 0.624, "step": 13963 }, { "epoch": 1.867344209681733, "grad_norm": 1.203827977180481, "learning_rate": 6.59035266123221e-06, "loss": 0.6779, "step": 13964 }, { "epoch": 1.867477935276812, "grad_norm": 1.1972779035568237, "learning_rate": 6.588995465094839e-06, "loss": 0.7174, "step": 13965 }, { "epoch": 1.867611660871891, "grad_norm": 1.435289978981018, "learning_rate": 6.587638340056598e-06, "loss": 0.7096, "step": 13966 }, { "epoch": 1.8677453864669697, "grad_norm": 1.2918485403060913, "learning_rate": 6.5862812861457685e-06, "loss": 0.6924, "step": 13967 }, { "epoch": 1.8678791120620488, "grad_norm": 1.3213568925857544, "learning_rate": 6.584924303390639e-06, "loss": 0.739, "step": 13968 }, { "epoch": 1.8680128376571274, "grad_norm": 1.1669787168502808, "learning_rate": 6.583567391819494e-06, "loss": 0.6116, "step": 13969 }, { "epoch": 1.8681465632522065, "grad_norm": 1.488783597946167, "learning_rate": 6.582210551460615e-06, "loss": 0.6902, "step": 13970 }, { "epoch": 1.8682802888472854, "grad_norm": 1.2737895250320435, "learning_rate": 6.580853782342291e-06, "loss": 0.8207, "step": 13971 }, { "epoch": 1.8684140144423642, "grad_norm": 1.2612347602844238, "learning_rate": 6.5794970844928e-06, "loss": 0.6536, "step": 13972 }, { "epoch": 1.8685477400374433, "grad_norm": 1.309590220451355, "learning_rate": 6.578140457940414e-06, "loss": 0.6772, "step": 13973 }, { "epoch": 1.868681465632522, "grad_norm": 1.218559741973877, "learning_rate": 6.576783902713419e-06, "loss": 0.6717, "step": 13974 }, { "epoch": 1.868815191227601, "grad_norm": 1.3907921314239502, "learning_rate": 6.575427418840087e-06, "loss": 0.7271, "step": 13975 }, { "epoch": 1.8689489168226798, "grad_norm": 1.228306770324707, "learning_rate": 6.57407100634869e-06, "loss": 0.6883, "step": 13976 }, { "epoch": 1.8690826424177587, "grad_norm": 1.4250659942626953, "learning_rate": 6.57271466526751e-06, "loss": 0.6727, "step": 13977 }, { "epoch": 1.8692163680128377, "grad_norm": 1.1936372518539429, "learning_rate": 6.57135839562481e-06, "loss": 0.655, "step": 13978 }, { "epoch": 1.8693500936079166, "grad_norm": 1.2455246448516846, "learning_rate": 6.570002197448866e-06, "loss": 0.6487, "step": 13979 }, { "epoch": 1.8694838192029954, "grad_norm": 1.3272223472595215, "learning_rate": 6.568646070767941e-06, "loss": 0.7013, "step": 13980 }, { "epoch": 1.8696175447980745, "grad_norm": 1.3171569108963013, "learning_rate": 6.567290015610307e-06, "loss": 0.6111, "step": 13981 }, { "epoch": 1.8697512703931531, "grad_norm": 1.1855790615081787, "learning_rate": 6.5659340320042274e-06, "loss": 0.6016, "step": 13982 }, { "epoch": 1.8698849959882322, "grad_norm": 1.2581253051757812, "learning_rate": 6.564578119977969e-06, "loss": 0.7092, "step": 13983 }, { "epoch": 1.870018721583311, "grad_norm": 1.0305087566375732, "learning_rate": 6.563222279559788e-06, "loss": 0.6017, "step": 13984 }, { "epoch": 1.87015244717839, "grad_norm": 1.386892318725586, "learning_rate": 6.5618665107779545e-06, "loss": 0.7062, "step": 13985 }, { "epoch": 1.870286172773469, "grad_norm": 1.1256368160247803, "learning_rate": 6.560510813660719e-06, "loss": 0.6704, "step": 13986 }, { "epoch": 1.8704198983685476, "grad_norm": 1.2771196365356445, "learning_rate": 6.559155188236348e-06, "loss": 0.6754, "step": 13987 }, { "epoch": 1.8705536239636267, "grad_norm": 1.2199993133544922, "learning_rate": 6.557799634533093e-06, "loss": 0.6793, "step": 13988 }, { "epoch": 1.8706873495587055, "grad_norm": 1.4394389390945435, "learning_rate": 6.556444152579209e-06, "loss": 0.7351, "step": 13989 }, { "epoch": 1.8708210751537844, "grad_norm": 1.3375440835952759, "learning_rate": 6.555088742402955e-06, "loss": 0.6663, "step": 13990 }, { "epoch": 1.8709548007488634, "grad_norm": 1.163658618927002, "learning_rate": 6.55373340403258e-06, "loss": 0.6384, "step": 13991 }, { "epoch": 1.871088526343942, "grad_norm": 1.1022019386291504, "learning_rate": 6.552378137496332e-06, "loss": 0.5907, "step": 13992 }, { "epoch": 1.8712222519390211, "grad_norm": 1.2540149688720703, "learning_rate": 6.551022942822465e-06, "loss": 0.6767, "step": 13993 }, { "epoch": 1.8713559775341, "grad_norm": 1.331811785697937, "learning_rate": 6.549667820039221e-06, "loss": 0.729, "step": 13994 }, { "epoch": 1.8714897031291788, "grad_norm": 1.2621463537216187, "learning_rate": 6.548312769174852e-06, "loss": 0.6618, "step": 13995 }, { "epoch": 1.871623428724258, "grad_norm": 1.252867579460144, "learning_rate": 6.546957790257602e-06, "loss": 0.6367, "step": 13996 }, { "epoch": 1.8717571543193368, "grad_norm": 1.1300737857818604, "learning_rate": 6.545602883315708e-06, "loss": 0.6166, "step": 13997 }, { "epoch": 1.8718908799144156, "grad_norm": 1.0902920961380005, "learning_rate": 6.5442480483774215e-06, "loss": 0.6727, "step": 13998 }, { "epoch": 1.8720246055094947, "grad_norm": 1.4419710636138916, "learning_rate": 6.542893285470975e-06, "loss": 0.6912, "step": 13999 }, { "epoch": 1.8721583311045733, "grad_norm": 1.0174311399459839, "learning_rate": 6.5415385946246106e-06, "loss": 0.6021, "step": 14000 }, { "epoch": 1.8722920566996524, "grad_norm": 1.3478707075119019, "learning_rate": 6.540183975866563e-06, "loss": 0.7583, "step": 14001 }, { "epoch": 1.8724257822947312, "grad_norm": 1.1823484897613525, "learning_rate": 6.538829429225068e-06, "loss": 0.6328, "step": 14002 }, { "epoch": 1.87255950788981, "grad_norm": 1.2060277462005615, "learning_rate": 6.537474954728368e-06, "loss": 0.6187, "step": 14003 }, { "epoch": 1.8726932334848891, "grad_norm": 1.281545877456665, "learning_rate": 6.536120552404688e-06, "loss": 0.7761, "step": 14004 }, { "epoch": 1.8728269590799678, "grad_norm": 1.296919584274292, "learning_rate": 6.534766222282256e-06, "loss": 0.7047, "step": 14005 }, { "epoch": 1.8729606846750468, "grad_norm": 1.227433204650879, "learning_rate": 6.533411964389311e-06, "loss": 0.685, "step": 14006 }, { "epoch": 1.8730944102701257, "grad_norm": 1.333950161933899, "learning_rate": 6.532057778754074e-06, "loss": 0.789, "step": 14007 }, { "epoch": 1.8732281358652045, "grad_norm": 1.3873847723007202, "learning_rate": 6.530703665404772e-06, "loss": 0.6953, "step": 14008 }, { "epoch": 1.8733618614602836, "grad_norm": 1.1854841709136963, "learning_rate": 6.529349624369637e-06, "loss": 0.6423, "step": 14009 }, { "epoch": 1.8734955870553622, "grad_norm": 1.1603190898895264, "learning_rate": 6.527995655676882e-06, "loss": 0.6532, "step": 14010 }, { "epoch": 1.8736293126504413, "grad_norm": 1.21134614944458, "learning_rate": 6.5266417593547415e-06, "loss": 0.5951, "step": 14011 }, { "epoch": 1.8737630382455202, "grad_norm": 1.3154667615890503, "learning_rate": 6.525287935431427e-06, "loss": 0.7585, "step": 14012 }, { "epoch": 1.873896763840599, "grad_norm": 1.2063173055648804, "learning_rate": 6.523934183935161e-06, "loss": 0.6322, "step": 14013 }, { "epoch": 1.874030489435678, "grad_norm": 1.3013668060302734, "learning_rate": 6.522580504894161e-06, "loss": 0.6884, "step": 14014 }, { "epoch": 1.874164215030757, "grad_norm": 1.0830801725387573, "learning_rate": 6.521226898336643e-06, "loss": 0.6403, "step": 14015 }, { "epoch": 1.8742979406258358, "grad_norm": 1.3827158212661743, "learning_rate": 6.519873364290818e-06, "loss": 0.6612, "step": 14016 }, { "epoch": 1.8744316662209148, "grad_norm": 1.206017255783081, "learning_rate": 6.518519902784908e-06, "loss": 0.6201, "step": 14017 }, { "epoch": 1.8745653918159935, "grad_norm": 1.2448264360427856, "learning_rate": 6.517166513847115e-06, "loss": 0.6851, "step": 14018 }, { "epoch": 1.8746991174110725, "grad_norm": 1.2364295721054077, "learning_rate": 6.515813197505656e-06, "loss": 0.6262, "step": 14019 }, { "epoch": 1.8748328430061514, "grad_norm": 1.4403367042541504, "learning_rate": 6.514459953788737e-06, "loss": 0.7105, "step": 14020 }, { "epoch": 1.8749665686012302, "grad_norm": 1.3327215909957886, "learning_rate": 6.513106782724561e-06, "loss": 0.7084, "step": 14021 }, { "epoch": 1.8751002941963093, "grad_norm": 1.1748720407485962, "learning_rate": 6.511753684341342e-06, "loss": 0.6175, "step": 14022 }, { "epoch": 1.875234019791388, "grad_norm": 1.2808758020401, "learning_rate": 6.510400658667276e-06, "loss": 0.7218, "step": 14023 }, { "epoch": 1.875367745386467, "grad_norm": 1.2290514707565308, "learning_rate": 6.509047705730572e-06, "loss": 0.621, "step": 14024 }, { "epoch": 1.8755014709815458, "grad_norm": 1.283898115158081, "learning_rate": 6.507694825559429e-06, "loss": 0.7223, "step": 14025 }, { "epoch": 1.8756351965766247, "grad_norm": 1.3426834344863892, "learning_rate": 6.506342018182041e-06, "loss": 0.6513, "step": 14026 }, { "epoch": 1.8757689221717038, "grad_norm": 1.301448106765747, "learning_rate": 6.5049892836266135e-06, "loss": 0.7162, "step": 14027 }, { "epoch": 1.8759026477667826, "grad_norm": 1.3862535953521729, "learning_rate": 6.503636621921342e-06, "loss": 0.7553, "step": 14028 }, { "epoch": 1.8760363733618615, "grad_norm": 1.254606008529663, "learning_rate": 6.502284033094415e-06, "loss": 0.6599, "step": 14029 }, { "epoch": 1.8761700989569403, "grad_norm": 1.3500593900680542, "learning_rate": 6.500931517174034e-06, "loss": 0.6775, "step": 14030 }, { "epoch": 1.8763038245520192, "grad_norm": 1.3084383010864258, "learning_rate": 6.499579074188385e-06, "loss": 0.7006, "step": 14031 }, { "epoch": 1.8764375501470982, "grad_norm": 1.4097360372543335, "learning_rate": 6.498226704165662e-06, "loss": 0.7456, "step": 14032 }, { "epoch": 1.876571275742177, "grad_norm": 1.4105556011199951, "learning_rate": 6.496874407134053e-06, "loss": 0.7007, "step": 14033 }, { "epoch": 1.876705001337256, "grad_norm": 1.1480857133865356, "learning_rate": 6.495522183121741e-06, "loss": 0.637, "step": 14034 }, { "epoch": 1.876838726932335, "grad_norm": 1.3478442430496216, "learning_rate": 6.4941700321569215e-06, "loss": 0.7553, "step": 14035 }, { "epoch": 1.8769724525274136, "grad_norm": 1.2754693031311035, "learning_rate": 6.492817954267771e-06, "loss": 0.6833, "step": 14036 }, { "epoch": 1.8771061781224927, "grad_norm": 1.1774544715881348, "learning_rate": 6.491465949482471e-06, "loss": 0.6317, "step": 14037 }, { "epoch": 1.8772399037175715, "grad_norm": 1.3568300008773804, "learning_rate": 6.49011401782921e-06, "loss": 0.6889, "step": 14038 }, { "epoch": 1.8773736293126504, "grad_norm": 1.3176097869873047, "learning_rate": 6.4887621593361595e-06, "loss": 0.6559, "step": 14039 }, { "epoch": 1.8775073549077295, "grad_norm": 1.1316043138504028, "learning_rate": 6.487410374031504e-06, "loss": 0.5738, "step": 14040 }, { "epoch": 1.877641080502808, "grad_norm": 1.3101931810379028, "learning_rate": 6.4860586619434205e-06, "loss": 0.7198, "step": 14041 }, { "epoch": 1.8777748060978872, "grad_norm": 1.3052033185958862, "learning_rate": 6.4847070231000775e-06, "loss": 0.6992, "step": 14042 }, { "epoch": 1.877908531692966, "grad_norm": 1.3033486604690552, "learning_rate": 6.483355457529657e-06, "loss": 0.6926, "step": 14043 }, { "epoch": 1.8780422572880449, "grad_norm": 1.2964602708816528, "learning_rate": 6.482003965260326e-06, "loss": 0.6251, "step": 14044 }, { "epoch": 1.878175982883124, "grad_norm": 1.1500821113586426, "learning_rate": 6.480652546320254e-06, "loss": 0.663, "step": 14045 }, { "epoch": 1.8783097084782028, "grad_norm": 1.4576236009597778, "learning_rate": 6.4793012007376125e-06, "loss": 0.7686, "step": 14046 }, { "epoch": 1.8784434340732816, "grad_norm": 1.1989316940307617, "learning_rate": 6.4779499285405655e-06, "loss": 0.6454, "step": 14047 }, { "epoch": 1.8785771596683605, "grad_norm": 1.3502057790756226, "learning_rate": 6.476598729757289e-06, "loss": 0.6987, "step": 14048 }, { "epoch": 1.8787108852634393, "grad_norm": 1.3030344247817993, "learning_rate": 6.475247604415937e-06, "loss": 0.7016, "step": 14049 }, { "epoch": 1.8788446108585184, "grad_norm": 1.1843358278274536, "learning_rate": 6.473896552544674e-06, "loss": 0.6321, "step": 14050 }, { "epoch": 1.8789783364535972, "grad_norm": 1.2567222118377686, "learning_rate": 6.472545574171667e-06, "loss": 0.6878, "step": 14051 }, { "epoch": 1.879112062048676, "grad_norm": 1.2484915256500244, "learning_rate": 6.471194669325069e-06, "loss": 0.747, "step": 14052 }, { "epoch": 1.8792457876437552, "grad_norm": 1.3272696733474731, "learning_rate": 6.4698438380330405e-06, "loss": 0.6248, "step": 14053 }, { "epoch": 1.8793795132388338, "grad_norm": 1.1845945119857788, "learning_rate": 6.468493080323743e-06, "loss": 0.6924, "step": 14054 }, { "epoch": 1.8795132388339129, "grad_norm": 1.221358060836792, "learning_rate": 6.4671423962253255e-06, "loss": 0.6084, "step": 14055 }, { "epoch": 1.8796469644289917, "grad_norm": 1.319655418395996, "learning_rate": 6.465791785765946e-06, "loss": 0.6483, "step": 14056 }, { "epoch": 1.8797806900240706, "grad_norm": 1.1982049942016602, "learning_rate": 6.464441248973756e-06, "loss": 0.6751, "step": 14057 }, { "epoch": 1.8799144156191496, "grad_norm": 1.3233323097229004, "learning_rate": 6.4630907858769e-06, "loss": 0.7486, "step": 14058 }, { "epoch": 1.8800481412142283, "grad_norm": 1.2489064931869507, "learning_rate": 6.4617403965035356e-06, "loss": 0.5452, "step": 14059 }, { "epoch": 1.8801818668093073, "grad_norm": 1.371580958366394, "learning_rate": 6.460390080881807e-06, "loss": 0.6551, "step": 14060 }, { "epoch": 1.8803155924043862, "grad_norm": 1.2519102096557617, "learning_rate": 6.459039839039858e-06, "loss": 0.6407, "step": 14061 }, { "epoch": 1.880449317999465, "grad_norm": 1.181142807006836, "learning_rate": 6.457689671005838e-06, "loss": 0.6675, "step": 14062 }, { "epoch": 1.880583043594544, "grad_norm": 1.3463757038116455, "learning_rate": 6.456339576807883e-06, "loss": 0.7706, "step": 14063 }, { "epoch": 1.880716769189623, "grad_norm": 1.4333125352859497, "learning_rate": 6.454989556474143e-06, "loss": 0.6521, "step": 14064 }, { "epoch": 1.8808504947847018, "grad_norm": 1.3870477676391602, "learning_rate": 6.453639610032751e-06, "loss": 0.6348, "step": 14065 }, { "epoch": 1.8809842203797806, "grad_norm": 1.2330268621444702, "learning_rate": 6.452289737511846e-06, "loss": 0.6294, "step": 14066 }, { "epoch": 1.8811179459748595, "grad_norm": 1.3861037492752075, "learning_rate": 6.450939938939571e-06, "loss": 0.7011, "step": 14067 }, { "epoch": 1.8812516715699386, "grad_norm": 1.2697967290878296, "learning_rate": 6.449590214344057e-06, "loss": 0.6889, "step": 14068 }, { "epoch": 1.8813853971650174, "grad_norm": 1.3931254148483276, "learning_rate": 6.448240563753434e-06, "loss": 0.6688, "step": 14069 }, { "epoch": 1.8815191227600963, "grad_norm": 1.3026654720306396, "learning_rate": 6.446890987195842e-06, "loss": 0.6595, "step": 14070 }, { "epoch": 1.8816528483551753, "grad_norm": 1.330972671508789, "learning_rate": 6.445541484699402e-06, "loss": 0.7557, "step": 14071 }, { "epoch": 1.881786573950254, "grad_norm": 1.401557207107544, "learning_rate": 6.444192056292251e-06, "loss": 0.8084, "step": 14072 }, { "epoch": 1.881920299545333, "grad_norm": 1.3080319166183472, "learning_rate": 6.442842702002516e-06, "loss": 0.6921, "step": 14073 }, { "epoch": 1.8820540251404119, "grad_norm": 1.3351554870605469, "learning_rate": 6.441493421858318e-06, "loss": 0.6687, "step": 14074 }, { "epoch": 1.8821877507354907, "grad_norm": 1.3229854106903076, "learning_rate": 6.440144215887788e-06, "loss": 0.7118, "step": 14075 }, { "epoch": 1.8823214763305698, "grad_norm": 1.4023959636688232, "learning_rate": 6.438795084119045e-06, "loss": 0.7045, "step": 14076 }, { "epoch": 1.8824552019256484, "grad_norm": 1.1306209564208984, "learning_rate": 6.437446026580208e-06, "loss": 0.6875, "step": 14077 }, { "epoch": 1.8825889275207275, "grad_norm": 1.146634817123413, "learning_rate": 6.4360970432993995e-06, "loss": 0.6703, "step": 14078 }, { "epoch": 1.8827226531158063, "grad_norm": 1.1646977663040161, "learning_rate": 6.434748134304737e-06, "loss": 0.6109, "step": 14079 }, { "epoch": 1.8828563787108852, "grad_norm": 1.2927672863006592, "learning_rate": 6.433399299624342e-06, "loss": 0.632, "step": 14080 }, { "epoch": 1.8829901043059643, "grad_norm": 1.1203598976135254, "learning_rate": 6.432050539286325e-06, "loss": 0.6169, "step": 14081 }, { "epoch": 1.883123829901043, "grad_norm": 1.3726783990859985, "learning_rate": 6.430701853318797e-06, "loss": 0.7569, "step": 14082 }, { "epoch": 1.883257555496122, "grad_norm": 1.2672418355941772, "learning_rate": 6.429353241749878e-06, "loss": 0.7143, "step": 14083 }, { "epoch": 1.883391281091201, "grad_norm": 1.170158863067627, "learning_rate": 6.428004704607671e-06, "loss": 0.6499, "step": 14084 }, { "epoch": 1.8835250066862796, "grad_norm": 1.2926275730133057, "learning_rate": 6.426656241920286e-06, "loss": 0.6919, "step": 14085 }, { "epoch": 1.8836587322813587, "grad_norm": 1.2739237546920776, "learning_rate": 6.425307853715837e-06, "loss": 0.6639, "step": 14086 }, { "epoch": 1.8837924578764376, "grad_norm": 1.224241852760315, "learning_rate": 6.423959540022422e-06, "loss": 0.6768, "step": 14087 }, { "epoch": 1.8839261834715164, "grad_norm": 1.470683217048645, "learning_rate": 6.422611300868151e-06, "loss": 0.6887, "step": 14088 }, { "epoch": 1.8840599090665955, "grad_norm": 1.2674822807312012, "learning_rate": 6.421263136281124e-06, "loss": 0.6913, "step": 14089 }, { "epoch": 1.884193634661674, "grad_norm": 1.124819278717041, "learning_rate": 6.41991504628944e-06, "loss": 0.6076, "step": 14090 }, { "epoch": 1.8843273602567532, "grad_norm": 1.148465633392334, "learning_rate": 6.418567030921201e-06, "loss": 0.5521, "step": 14091 }, { "epoch": 1.884461085851832, "grad_norm": 1.265394687652588, "learning_rate": 6.417219090204508e-06, "loss": 0.7486, "step": 14092 }, { "epoch": 1.8845948114469109, "grad_norm": 1.236352562904358, "learning_rate": 6.415871224167451e-06, "loss": 0.6691, "step": 14093 }, { "epoch": 1.88472853704199, "grad_norm": 1.2643847465515137, "learning_rate": 6.414523432838134e-06, "loss": 0.7224, "step": 14094 }, { "epoch": 1.8848622626370686, "grad_norm": 1.2067506313323975, "learning_rate": 6.4131757162446395e-06, "loss": 0.6419, "step": 14095 }, { "epoch": 1.8849959882321476, "grad_norm": 1.1536996364593506, "learning_rate": 6.41182807441507e-06, "loss": 0.6542, "step": 14096 }, { "epoch": 1.8851297138272265, "grad_norm": 1.090825080871582, "learning_rate": 6.410480507377507e-06, "loss": 0.6305, "step": 14097 }, { "epoch": 1.8852634394223053, "grad_norm": 1.358974575996399, "learning_rate": 6.409133015160042e-06, "loss": 0.6513, "step": 14098 }, { "epoch": 1.8853971650173844, "grad_norm": 1.3241006135940552, "learning_rate": 6.407785597790768e-06, "loss": 0.6329, "step": 14099 }, { "epoch": 1.8855308906124633, "grad_norm": 1.1992534399032593, "learning_rate": 6.406438255297764e-06, "loss": 0.6051, "step": 14100 }, { "epoch": 1.885664616207542, "grad_norm": 1.2454341650009155, "learning_rate": 6.405090987709113e-06, "loss": 0.6907, "step": 14101 }, { "epoch": 1.8857983418026212, "grad_norm": 1.2980799674987793, "learning_rate": 6.403743795052905e-06, "loss": 0.7154, "step": 14102 }, { "epoch": 1.8859320673976998, "grad_norm": 1.359434723854065, "learning_rate": 6.402396677357212e-06, "loss": 0.7634, "step": 14103 }, { "epoch": 1.8860657929927789, "grad_norm": 1.2489616870880127, "learning_rate": 6.401049634650119e-06, "loss": 0.6445, "step": 14104 }, { "epoch": 1.8861995185878577, "grad_norm": 1.3029799461364746, "learning_rate": 6.399702666959705e-06, "loss": 0.6227, "step": 14105 }, { "epoch": 1.8863332441829366, "grad_norm": 1.270702838897705, "learning_rate": 6.39835577431404e-06, "loss": 0.6541, "step": 14106 }, { "epoch": 1.8864669697780156, "grad_norm": 1.2219544649124146, "learning_rate": 6.397008956741206e-06, "loss": 0.6793, "step": 14107 }, { "epoch": 1.8866006953730943, "grad_norm": 1.2158721685409546, "learning_rate": 6.395662214269269e-06, "loss": 0.6904, "step": 14108 }, { "epoch": 1.8867344209681733, "grad_norm": 1.2379530668258667, "learning_rate": 6.394315546926309e-06, "loss": 0.716, "step": 14109 }, { "epoch": 1.8868681465632522, "grad_norm": 1.1447101831436157, "learning_rate": 6.3929689547403875e-06, "loss": 0.6251, "step": 14110 }, { "epoch": 1.887001872158331, "grad_norm": 1.2050886154174805, "learning_rate": 6.391622437739575e-06, "loss": 0.6709, "step": 14111 }, { "epoch": 1.88713559775341, "grad_norm": 1.1737996339797974, "learning_rate": 6.390275995951945e-06, "loss": 0.6633, "step": 14112 }, { "epoch": 1.8872693233484887, "grad_norm": 1.2596298456192017, "learning_rate": 6.3889296294055566e-06, "loss": 0.719, "step": 14113 }, { "epoch": 1.8874030489435678, "grad_norm": 1.2738516330718994, "learning_rate": 6.387583338128471e-06, "loss": 0.6984, "step": 14114 }, { "epoch": 1.8875367745386467, "grad_norm": 1.142195701599121, "learning_rate": 6.386237122148758e-06, "loss": 0.6017, "step": 14115 }, { "epoch": 1.8876705001337255, "grad_norm": 1.3245794773101807, "learning_rate": 6.3848909814944706e-06, "loss": 0.7709, "step": 14116 }, { "epoch": 1.8878042257288046, "grad_norm": 1.2002161741256714, "learning_rate": 6.383544916193674e-06, "loss": 0.6937, "step": 14117 }, { "epoch": 1.8879379513238834, "grad_norm": 1.2082483768463135, "learning_rate": 6.382198926274424e-06, "loss": 0.6515, "step": 14118 }, { "epoch": 1.8880716769189623, "grad_norm": 1.08533775806427, "learning_rate": 6.380853011764772e-06, "loss": 0.6317, "step": 14119 }, { "epoch": 1.8882054025140413, "grad_norm": 1.2559655904769897, "learning_rate": 6.379507172692778e-06, "loss": 0.6863, "step": 14120 }, { "epoch": 1.88833912810912, "grad_norm": 1.3622547388076782, "learning_rate": 6.378161409086494e-06, "loss": 0.6784, "step": 14121 }, { "epoch": 1.888472853704199, "grad_norm": 1.4511135816574097, "learning_rate": 6.376815720973966e-06, "loss": 0.7581, "step": 14122 }, { "epoch": 1.8886065792992779, "grad_norm": 1.3332024812698364, "learning_rate": 6.375470108383249e-06, "loss": 0.7732, "step": 14123 }, { "epoch": 1.8887403048943567, "grad_norm": 1.0717743635177612, "learning_rate": 6.374124571342387e-06, "loss": 0.6536, "step": 14124 }, { "epoch": 1.8888740304894358, "grad_norm": 1.1635172367095947, "learning_rate": 6.372779109879433e-06, "loss": 0.6881, "step": 14125 }, { "epoch": 1.8890077560845144, "grad_norm": 1.3672279119491577, "learning_rate": 6.371433724022429e-06, "loss": 0.7624, "step": 14126 }, { "epoch": 1.8891414816795935, "grad_norm": 1.379828929901123, "learning_rate": 6.3700884137994115e-06, "loss": 0.7014, "step": 14127 }, { "epoch": 1.8892752072746724, "grad_norm": 1.245582938194275, "learning_rate": 6.36874317923843e-06, "loss": 0.5865, "step": 14128 }, { "epoch": 1.8894089328697512, "grad_norm": 1.250872015953064, "learning_rate": 6.367398020367522e-06, "loss": 0.6932, "step": 14129 }, { "epoch": 1.8895426584648303, "grad_norm": 1.3644089698791504, "learning_rate": 6.366052937214724e-06, "loss": 0.7166, "step": 14130 }, { "epoch": 1.8896763840599091, "grad_norm": 1.260862112045288, "learning_rate": 6.364707929808079e-06, "loss": 0.7288, "step": 14131 }, { "epoch": 1.889810109654988, "grad_norm": 1.1989822387695312, "learning_rate": 6.363362998175615e-06, "loss": 0.6437, "step": 14132 }, { "epoch": 1.8899438352500668, "grad_norm": 1.2962535619735718, "learning_rate": 6.3620181423453745e-06, "loss": 0.7023, "step": 14133 }, { "epoch": 1.8900775608451457, "grad_norm": 1.2855851650238037, "learning_rate": 6.360673362345382e-06, "loss": 0.6938, "step": 14134 }, { "epoch": 1.8902112864402247, "grad_norm": 1.2507954835891724, "learning_rate": 6.359328658203668e-06, "loss": 0.6408, "step": 14135 }, { "epoch": 1.8903450120353036, "grad_norm": 1.434545636177063, "learning_rate": 6.357984029948267e-06, "loss": 0.7707, "step": 14136 }, { "epoch": 1.8904787376303824, "grad_norm": 1.3401713371276855, "learning_rate": 6.356639477607205e-06, "loss": 0.7297, "step": 14137 }, { "epoch": 1.8906124632254615, "grad_norm": 1.2333322763442993, "learning_rate": 6.355295001208504e-06, "loss": 0.6167, "step": 14138 }, { "epoch": 1.8907461888205401, "grad_norm": 1.2623281478881836, "learning_rate": 6.3539506007801944e-06, "loss": 0.6738, "step": 14139 }, { "epoch": 1.8908799144156192, "grad_norm": 1.308822512626648, "learning_rate": 6.352606276350291e-06, "loss": 0.7274, "step": 14140 }, { "epoch": 1.891013640010698, "grad_norm": 1.4683748483657837, "learning_rate": 6.351262027946824e-06, "loss": 0.7126, "step": 14141 }, { "epoch": 1.891147365605777, "grad_norm": 1.2373170852661133, "learning_rate": 6.349917855597807e-06, "loss": 0.665, "step": 14142 }, { "epoch": 1.891281091200856, "grad_norm": 1.0747766494750977, "learning_rate": 6.348573759331257e-06, "loss": 0.5897, "step": 14143 }, { "epoch": 1.8914148167959346, "grad_norm": 1.208284616470337, "learning_rate": 6.347229739175197e-06, "loss": 0.6217, "step": 14144 }, { "epoch": 1.8915485423910137, "grad_norm": 1.3434022665023804, "learning_rate": 6.345885795157638e-06, "loss": 0.6309, "step": 14145 }, { "epoch": 1.8916822679860925, "grad_norm": 1.3921606540679932, "learning_rate": 6.344541927306589e-06, "loss": 0.7244, "step": 14146 }, { "epoch": 1.8918159935811714, "grad_norm": 1.4474575519561768, "learning_rate": 6.34319813565007e-06, "loss": 0.6479, "step": 14147 }, { "epoch": 1.8919497191762504, "grad_norm": 1.269419550895691, "learning_rate": 6.341854420216083e-06, "loss": 0.7034, "step": 14148 }, { "epoch": 1.8920834447713293, "grad_norm": 1.4364163875579834, "learning_rate": 6.34051078103264e-06, "loss": 0.7366, "step": 14149 }, { "epoch": 1.8922171703664081, "grad_norm": 1.4080795049667358, "learning_rate": 6.339167218127752e-06, "loss": 0.7664, "step": 14150 }, { "epoch": 1.892350895961487, "grad_norm": 1.118035912513733, "learning_rate": 6.337823731529415e-06, "loss": 0.6396, "step": 14151 }, { "epoch": 1.8924846215565658, "grad_norm": 1.190543532371521, "learning_rate": 6.336480321265643e-06, "loss": 0.7128, "step": 14152 }, { "epoch": 1.892618347151645, "grad_norm": 1.452013611793518, "learning_rate": 6.335136987364433e-06, "loss": 0.7963, "step": 14153 }, { "epoch": 1.8927520727467237, "grad_norm": 1.3416484594345093, "learning_rate": 6.333793729853781e-06, "loss": 0.7164, "step": 14154 }, { "epoch": 1.8928857983418026, "grad_norm": 1.2548432350158691, "learning_rate": 6.332450548761692e-06, "loss": 0.7003, "step": 14155 }, { "epoch": 1.8930195239368817, "grad_norm": 1.287768840789795, "learning_rate": 6.331107444116163e-06, "loss": 0.6465, "step": 14156 }, { "epoch": 1.8931532495319603, "grad_norm": 1.277902603149414, "learning_rate": 6.32976441594519e-06, "loss": 0.63, "step": 14157 }, { "epoch": 1.8932869751270394, "grad_norm": 1.2718380689620972, "learning_rate": 6.328421464276766e-06, "loss": 0.6681, "step": 14158 }, { "epoch": 1.8934207007221182, "grad_norm": 1.206114411354065, "learning_rate": 6.327078589138879e-06, "loss": 0.6473, "step": 14159 }, { "epoch": 1.893554426317197, "grad_norm": 1.3100687265396118, "learning_rate": 6.325735790559529e-06, "loss": 0.6766, "step": 14160 }, { "epoch": 1.8936881519122761, "grad_norm": 1.2397688627243042, "learning_rate": 6.324393068566696e-06, "loss": 0.6304, "step": 14161 }, { "epoch": 1.8938218775073548, "grad_norm": 1.1693998575210571, "learning_rate": 6.323050423188374e-06, "loss": 0.6496, "step": 14162 }, { "epoch": 1.8939556031024338, "grad_norm": 1.3855648040771484, "learning_rate": 6.32170785445255e-06, "loss": 0.7717, "step": 14163 }, { "epoch": 1.8940893286975127, "grad_norm": 1.1528772115707397, "learning_rate": 6.320365362387202e-06, "loss": 0.6547, "step": 14164 }, { "epoch": 1.8942230542925915, "grad_norm": 1.5268951654434204, "learning_rate": 6.31902294702032e-06, "loss": 0.7197, "step": 14165 }, { "epoch": 1.8943567798876706, "grad_norm": 1.3617584705352783, "learning_rate": 6.317680608379884e-06, "loss": 0.695, "step": 14166 }, { "epoch": 1.8944905054827494, "grad_norm": 1.430815577507019, "learning_rate": 6.316338346493867e-06, "loss": 0.7175, "step": 14167 }, { "epoch": 1.8946242310778283, "grad_norm": 1.2259982824325562, "learning_rate": 6.314996161390255e-06, "loss": 0.6448, "step": 14168 }, { "epoch": 1.8947579566729071, "grad_norm": 1.4310404062271118, "learning_rate": 6.313654053097023e-06, "loss": 0.7019, "step": 14169 }, { "epoch": 1.894891682267986, "grad_norm": 1.3564549684524536, "learning_rate": 6.312312021642142e-06, "loss": 0.627, "step": 14170 }, { "epoch": 1.895025407863065, "grad_norm": 1.3183951377868652, "learning_rate": 6.31097006705359e-06, "loss": 0.7327, "step": 14171 }, { "epoch": 1.895159133458144, "grad_norm": 1.311274766921997, "learning_rate": 6.309628189359336e-06, "loss": 0.6362, "step": 14172 }, { "epoch": 1.8952928590532228, "grad_norm": 1.2156037092208862, "learning_rate": 6.3082863885873525e-06, "loss": 0.6489, "step": 14173 }, { "epoch": 1.8954265846483018, "grad_norm": 1.2086210250854492, "learning_rate": 6.306944664765606e-06, "loss": 0.6497, "step": 14174 }, { "epoch": 1.8955603102433805, "grad_norm": 1.20020592212677, "learning_rate": 6.305603017922062e-06, "loss": 0.6882, "step": 14175 }, { "epoch": 1.8956940358384595, "grad_norm": 1.24151611328125, "learning_rate": 6.304261448084692e-06, "loss": 0.7021, "step": 14176 }, { "epoch": 1.8958277614335384, "grad_norm": 1.2657420635223389, "learning_rate": 6.3029199552814545e-06, "loss": 0.6324, "step": 14177 }, { "epoch": 1.8959614870286172, "grad_norm": 1.2817223072052002, "learning_rate": 6.30157853954031e-06, "loss": 0.62, "step": 14178 }, { "epoch": 1.8960952126236963, "grad_norm": 1.2780274152755737, "learning_rate": 6.300237200889225e-06, "loss": 0.605, "step": 14179 }, { "epoch": 1.896228938218775, "grad_norm": 1.2388662099838257, "learning_rate": 6.2988959393561525e-06, "loss": 0.7378, "step": 14180 }, { "epoch": 1.896362663813854, "grad_norm": 1.246254563331604, "learning_rate": 6.297554754969053e-06, "loss": 0.6795, "step": 14181 }, { "epoch": 1.8964963894089328, "grad_norm": 1.1774646043777466, "learning_rate": 6.296213647755885e-06, "loss": 0.6024, "step": 14182 }, { "epoch": 1.8966301150040117, "grad_norm": 1.1876485347747803, "learning_rate": 6.294872617744595e-06, "loss": 0.6663, "step": 14183 }, { "epoch": 1.8967638405990908, "grad_norm": 1.172798752784729, "learning_rate": 6.293531664963144e-06, "loss": 0.6741, "step": 14184 }, { "epoch": 1.8968975661941696, "grad_norm": 1.4781556129455566, "learning_rate": 6.292190789439479e-06, "loss": 0.717, "step": 14185 }, { "epoch": 1.8970312917892485, "grad_norm": 1.3320326805114746, "learning_rate": 6.2908499912015444e-06, "loss": 0.6823, "step": 14186 }, { "epoch": 1.8971650173843275, "grad_norm": 1.4262053966522217, "learning_rate": 6.2895092702772945e-06, "loss": 0.6841, "step": 14187 }, { "epoch": 1.8972987429794061, "grad_norm": 1.2862799167633057, "learning_rate": 6.288168626694673e-06, "loss": 0.7697, "step": 14188 }, { "epoch": 1.8974324685744852, "grad_norm": 1.3348792791366577, "learning_rate": 6.286828060481626e-06, "loss": 0.7297, "step": 14189 }, { "epoch": 1.897566194169564, "grad_norm": 1.1944373846054077, "learning_rate": 6.285487571666096e-06, "loss": 0.6294, "step": 14190 }, { "epoch": 1.897699919764643, "grad_norm": 1.2782119512557983, "learning_rate": 6.284147160276018e-06, "loss": 0.6962, "step": 14191 }, { "epoch": 1.897833645359722, "grad_norm": 1.0583351850509644, "learning_rate": 6.282806826339343e-06, "loss": 0.5789, "step": 14192 }, { "epoch": 1.8979673709548006, "grad_norm": 1.25435209274292, "learning_rate": 6.2814665698839976e-06, "loss": 0.6818, "step": 14193 }, { "epoch": 1.8981010965498797, "grad_norm": 1.3558266162872314, "learning_rate": 6.280126390937925e-06, "loss": 0.6586, "step": 14194 }, { "epoch": 1.8982348221449585, "grad_norm": 1.1226017475128174, "learning_rate": 6.278786289529061e-06, "loss": 0.6537, "step": 14195 }, { "epoch": 1.8983685477400374, "grad_norm": 1.2714204788208008, "learning_rate": 6.277446265685332e-06, "loss": 0.6651, "step": 14196 }, { "epoch": 1.8985022733351165, "grad_norm": 1.2135707139968872, "learning_rate": 6.276106319434676e-06, "loss": 0.628, "step": 14197 }, { "epoch": 1.898635998930195, "grad_norm": 1.2941120862960815, "learning_rate": 6.274766450805022e-06, "loss": 0.6397, "step": 14198 }, { "epoch": 1.8987697245252741, "grad_norm": 1.173779010772705, "learning_rate": 6.273426659824293e-06, "loss": 0.682, "step": 14199 }, { "epoch": 1.898903450120353, "grad_norm": 1.2401278018951416, "learning_rate": 6.272086946520419e-06, "loss": 0.6651, "step": 14200 }, { "epoch": 1.8990371757154318, "grad_norm": 1.141048550605774, "learning_rate": 6.270747310921328e-06, "loss": 0.6762, "step": 14201 }, { "epoch": 1.899170901310511, "grad_norm": 1.2365912199020386, "learning_rate": 6.269407753054939e-06, "loss": 0.6751, "step": 14202 }, { "epoch": 1.8993046269055898, "grad_norm": 1.2768604755401611, "learning_rate": 6.2680682729491795e-06, "loss": 0.677, "step": 14203 }, { "epoch": 1.8994383525006686, "grad_norm": 1.0195436477661133, "learning_rate": 6.26672887063196e-06, "loss": 0.6231, "step": 14204 }, { "epoch": 1.8995720780957477, "grad_norm": 1.3629164695739746, "learning_rate": 6.265389546131209e-06, "loss": 0.6707, "step": 14205 }, { "epoch": 1.8997058036908263, "grad_norm": 1.2184032201766968, "learning_rate": 6.2640502994748375e-06, "loss": 0.6592, "step": 14206 }, { "epoch": 1.8998395292859054, "grad_norm": 1.2271900177001953, "learning_rate": 6.262711130690762e-06, "loss": 0.6272, "step": 14207 }, { "epoch": 1.8999732548809842, "grad_norm": 1.1895248889923096, "learning_rate": 6.261372039806899e-06, "loss": 0.635, "step": 14208 }, { "epoch": 1.900106980476063, "grad_norm": 1.2674988508224487, "learning_rate": 6.260033026851156e-06, "loss": 0.7167, "step": 14209 }, { "epoch": 1.9002407060711421, "grad_norm": 1.310165286064148, "learning_rate": 6.2586940918514474e-06, "loss": 0.6393, "step": 14210 }, { "epoch": 1.9003744316662208, "grad_norm": 1.2059580087661743, "learning_rate": 6.257355234835682e-06, "loss": 0.6647, "step": 14211 }, { "epoch": 1.9005081572612998, "grad_norm": 1.16941499710083, "learning_rate": 6.256016455831762e-06, "loss": 0.5878, "step": 14212 }, { "epoch": 1.9006418828563787, "grad_norm": 1.2030833959579468, "learning_rate": 6.254677754867596e-06, "loss": 0.6431, "step": 14213 }, { "epoch": 1.9007756084514575, "grad_norm": 1.2448970079421997, "learning_rate": 6.2533391319710924e-06, "loss": 0.7215, "step": 14214 }, { "epoch": 1.9009093340465366, "grad_norm": 1.2692575454711914, "learning_rate": 6.252000587170145e-06, "loss": 0.7123, "step": 14215 }, { "epoch": 1.9010430596416152, "grad_norm": 1.3642958402633667, "learning_rate": 6.250662120492663e-06, "loss": 0.6922, "step": 14216 }, { "epoch": 1.9011767852366943, "grad_norm": 1.3640735149383545, "learning_rate": 6.249323731966537e-06, "loss": 0.6849, "step": 14217 }, { "epoch": 1.9013105108317732, "grad_norm": 1.4249627590179443, "learning_rate": 6.247985421619674e-06, "loss": 0.7258, "step": 14218 }, { "epoch": 1.901444236426852, "grad_norm": 1.101607084274292, "learning_rate": 6.24664718947996e-06, "loss": 0.6548, "step": 14219 }, { "epoch": 1.901577962021931, "grad_norm": 1.3178461790084839, "learning_rate": 6.2453090355752955e-06, "loss": 0.7144, "step": 14220 }, { "epoch": 1.90171168761701, "grad_norm": 1.403782606124878, "learning_rate": 6.243970959933572e-06, "loss": 0.7134, "step": 14221 }, { "epoch": 1.9018454132120888, "grad_norm": 1.1963376998901367, "learning_rate": 6.24263296258268e-06, "loss": 0.7193, "step": 14222 }, { "epoch": 1.9019791388071678, "grad_norm": 1.184970736503601, "learning_rate": 6.241295043550506e-06, "loss": 0.6593, "step": 14223 }, { "epoch": 1.9021128644022465, "grad_norm": 1.2098369598388672, "learning_rate": 6.239957202864943e-06, "loss": 0.5852, "step": 14224 }, { "epoch": 1.9022465899973255, "grad_norm": 1.4391251802444458, "learning_rate": 6.23861944055387e-06, "loss": 0.6346, "step": 14225 }, { "epoch": 1.9023803155924044, "grad_norm": 1.334114909172058, "learning_rate": 6.237281756645178e-06, "loss": 0.7033, "step": 14226 }, { "epoch": 1.9025140411874832, "grad_norm": 1.2246087789535522, "learning_rate": 6.23594415116675e-06, "loss": 0.6968, "step": 14227 }, { "epoch": 1.9026477667825623, "grad_norm": 1.216705083847046, "learning_rate": 6.2346066241464595e-06, "loss": 0.6795, "step": 14228 }, { "epoch": 1.902781492377641, "grad_norm": 1.2645127773284912, "learning_rate": 6.233269175612195e-06, "loss": 0.7201, "step": 14229 }, { "epoch": 1.90291521797272, "grad_norm": 1.363527774810791, "learning_rate": 6.23193180559183e-06, "loss": 0.6423, "step": 14230 }, { "epoch": 1.9030489435677989, "grad_norm": 1.2733242511749268, "learning_rate": 6.230594514113238e-06, "loss": 0.6162, "step": 14231 }, { "epoch": 1.9031826691628777, "grad_norm": 1.2674627304077148, "learning_rate": 6.2292573012042965e-06, "loss": 0.6803, "step": 14232 }, { "epoch": 1.9033163947579568, "grad_norm": 1.3361784219741821, "learning_rate": 6.22792016689288e-06, "loss": 0.6471, "step": 14233 }, { "epoch": 1.9034501203530356, "grad_norm": 1.2567089796066284, "learning_rate": 6.2265831112068565e-06, "loss": 0.6775, "step": 14234 }, { "epoch": 1.9035838459481145, "grad_norm": 1.3277584314346313, "learning_rate": 6.225246134174101e-06, "loss": 0.6346, "step": 14235 }, { "epoch": 1.9037175715431933, "grad_norm": 1.1782984733581543, "learning_rate": 6.223909235822472e-06, "loss": 0.6258, "step": 14236 }, { "epoch": 1.9038512971382722, "grad_norm": 1.2829620838165283, "learning_rate": 6.222572416179847e-06, "loss": 0.6008, "step": 14237 }, { "epoch": 1.9039850227333512, "grad_norm": 1.2130955457687378, "learning_rate": 6.2212356752740835e-06, "loss": 0.7016, "step": 14238 }, { "epoch": 1.90411874832843, "grad_norm": 1.3055992126464844, "learning_rate": 6.219899013133046e-06, "loss": 0.633, "step": 14239 }, { "epoch": 1.904252473923509, "grad_norm": 1.3640965223312378, "learning_rate": 6.218562429784596e-06, "loss": 0.7483, "step": 14240 }, { "epoch": 1.904386199518588, "grad_norm": 1.367092490196228, "learning_rate": 6.217225925256593e-06, "loss": 0.7042, "step": 14241 }, { "epoch": 1.9045199251136666, "grad_norm": 1.2731029987335205, "learning_rate": 6.215889499576898e-06, "loss": 0.6742, "step": 14242 }, { "epoch": 1.9046536507087457, "grad_norm": 1.4902068376541138, "learning_rate": 6.214553152773366e-06, "loss": 0.6201, "step": 14243 }, { "epoch": 1.9047873763038246, "grad_norm": 1.3631356954574585, "learning_rate": 6.213216884873848e-06, "loss": 0.6913, "step": 14244 }, { "epoch": 1.9049211018989034, "grad_norm": 1.3377296924591064, "learning_rate": 6.211880695906203e-06, "loss": 0.6877, "step": 14245 }, { "epoch": 1.9050548274939825, "grad_norm": 1.255954623222351, "learning_rate": 6.2105445858982805e-06, "loss": 0.6708, "step": 14246 }, { "epoch": 1.905188553089061, "grad_norm": 1.1267220973968506, "learning_rate": 6.209208554877927e-06, "loss": 0.6654, "step": 14247 }, { "epoch": 1.9053222786841402, "grad_norm": 1.1582626104354858, "learning_rate": 6.207872602872998e-06, "loss": 0.6396, "step": 14248 }, { "epoch": 1.905456004279219, "grad_norm": 1.210599422454834, "learning_rate": 6.20653672991133e-06, "loss": 0.6549, "step": 14249 }, { "epoch": 1.9055897298742979, "grad_norm": 1.1410456895828247, "learning_rate": 6.20520093602078e-06, "loss": 0.5714, "step": 14250 }, { "epoch": 1.905723455469377, "grad_norm": 1.0925973653793335, "learning_rate": 6.203865221229182e-06, "loss": 0.5963, "step": 14251 }, { "epoch": 1.9058571810644558, "grad_norm": 1.3466782569885254, "learning_rate": 6.202529585564382e-06, "loss": 0.7082, "step": 14252 }, { "epoch": 1.9059909066595346, "grad_norm": 1.3894317150115967, "learning_rate": 6.201194029054218e-06, "loss": 0.7011, "step": 14253 }, { "epoch": 1.9061246322546135, "grad_norm": 1.3058750629425049, "learning_rate": 6.199858551726532e-06, "loss": 0.6732, "step": 14254 }, { "epoch": 1.9062583578496923, "grad_norm": 1.3069320917129517, "learning_rate": 6.1985231536091535e-06, "loss": 0.7017, "step": 14255 }, { "epoch": 1.9063920834447714, "grad_norm": 1.2842769622802734, "learning_rate": 6.1971878347299275e-06, "loss": 0.6798, "step": 14256 }, { "epoch": 1.9065258090398502, "grad_norm": 1.3049827814102173, "learning_rate": 6.195852595116678e-06, "loss": 0.6991, "step": 14257 }, { "epoch": 1.906659534634929, "grad_norm": 1.1584053039550781, "learning_rate": 6.194517434797243e-06, "loss": 0.6637, "step": 14258 }, { "epoch": 1.9067932602300082, "grad_norm": 1.3488049507141113, "learning_rate": 6.193182353799451e-06, "loss": 0.7048, "step": 14259 }, { "epoch": 1.9069269858250868, "grad_norm": 1.160932183265686, "learning_rate": 6.191847352151127e-06, "loss": 0.6652, "step": 14260 }, { "epoch": 1.9070607114201659, "grad_norm": 1.2118254899978638, "learning_rate": 6.190512429880105e-06, "loss": 0.641, "step": 14261 }, { "epoch": 1.9071944370152447, "grad_norm": 1.290051817893982, "learning_rate": 6.189177587014206e-06, "loss": 0.6369, "step": 14262 }, { "epoch": 1.9073281626103236, "grad_norm": 1.3270457983016968, "learning_rate": 6.18784282358125e-06, "loss": 0.7822, "step": 14263 }, { "epoch": 1.9074618882054026, "grad_norm": 1.1998343467712402, "learning_rate": 6.186508139609064e-06, "loss": 0.6411, "step": 14264 }, { "epoch": 1.9075956138004813, "grad_norm": 1.2813255786895752, "learning_rate": 6.185173535125468e-06, "loss": 0.704, "step": 14265 }, { "epoch": 1.9077293393955603, "grad_norm": 1.312684416770935, "learning_rate": 6.183839010158278e-06, "loss": 0.6897, "step": 14266 }, { "epoch": 1.9078630649906392, "grad_norm": 1.323920726776123, "learning_rate": 6.182504564735314e-06, "loss": 0.6371, "step": 14267 }, { "epoch": 1.907996790585718, "grad_norm": 1.3595277070999146, "learning_rate": 6.181170198884386e-06, "loss": 0.6701, "step": 14268 }, { "epoch": 1.908130516180797, "grad_norm": 1.1455808877944946, "learning_rate": 6.179835912633315e-06, "loss": 0.6417, "step": 14269 }, { "epoch": 1.908264241775876, "grad_norm": 1.218246579170227, "learning_rate": 6.178501706009907e-06, "loss": 0.6784, "step": 14270 }, { "epoch": 1.9083979673709548, "grad_norm": 1.2500081062316895, "learning_rate": 6.177167579041974e-06, "loss": 0.6286, "step": 14271 }, { "epoch": 1.9085316929660336, "grad_norm": 1.306410789489746, "learning_rate": 6.1758335317573245e-06, "loss": 0.6401, "step": 14272 }, { "epoch": 1.9086654185611125, "grad_norm": 1.4075771570205688, "learning_rate": 6.174499564183764e-06, "loss": 0.7212, "step": 14273 }, { "epoch": 1.9087991441561916, "grad_norm": 1.341709852218628, "learning_rate": 6.173165676349103e-06, "loss": 0.6642, "step": 14274 }, { "epoch": 1.9089328697512704, "grad_norm": 1.3228802680969238, "learning_rate": 6.171831868281142e-06, "loss": 0.6927, "step": 14275 }, { "epoch": 1.9090665953463493, "grad_norm": 1.2037936449050903, "learning_rate": 6.170498140007679e-06, "loss": 0.6216, "step": 14276 }, { "epoch": 1.9092003209414283, "grad_norm": 1.2791647911071777, "learning_rate": 6.169164491556519e-06, "loss": 0.6834, "step": 14277 }, { "epoch": 1.909334046536507, "grad_norm": 1.2625590562820435, "learning_rate": 6.16783092295546e-06, "loss": 0.6874, "step": 14278 }, { "epoch": 1.909467772131586, "grad_norm": 1.151929497718811, "learning_rate": 6.1664974342323e-06, "loss": 0.6162, "step": 14279 }, { "epoch": 1.9096014977266649, "grad_norm": 1.3370460271835327, "learning_rate": 6.165164025414831e-06, "loss": 0.7386, "step": 14280 }, { "epoch": 1.9097352233217437, "grad_norm": 1.2010351419448853, "learning_rate": 6.163830696530846e-06, "loss": 0.6399, "step": 14281 }, { "epoch": 1.9098689489168228, "grad_norm": 1.2746011018753052, "learning_rate": 6.162497447608145e-06, "loss": 0.7026, "step": 14282 }, { "epoch": 1.9100026745119014, "grad_norm": 1.315746784210205, "learning_rate": 6.161164278674508e-06, "loss": 0.7485, "step": 14283 }, { "epoch": 1.9101364001069805, "grad_norm": 1.157317876815796, "learning_rate": 6.15983118975773e-06, "loss": 0.6024, "step": 14284 }, { "epoch": 1.9102701257020593, "grad_norm": 1.5498894453048706, "learning_rate": 6.158498180885596e-06, "loss": 0.7697, "step": 14285 }, { "epoch": 1.9104038512971382, "grad_norm": 1.2785818576812744, "learning_rate": 6.157165252085888e-06, "loss": 0.642, "step": 14286 }, { "epoch": 1.9105375768922173, "grad_norm": 1.3187330961227417, "learning_rate": 6.155832403386399e-06, "loss": 0.6547, "step": 14287 }, { "epoch": 1.910671302487296, "grad_norm": 1.5496586561203003, "learning_rate": 6.154499634814905e-06, "loss": 0.7219, "step": 14288 }, { "epoch": 1.910805028082375, "grad_norm": 1.2809336185455322, "learning_rate": 6.153166946399182e-06, "loss": 0.6731, "step": 14289 }, { "epoch": 1.910938753677454, "grad_norm": 1.2342798709869385, "learning_rate": 6.151834338167016e-06, "loss": 0.6302, "step": 14290 }, { "epoch": 1.9110724792725327, "grad_norm": 1.3253809213638306, "learning_rate": 6.15050181014618e-06, "loss": 0.6674, "step": 14291 }, { "epoch": 1.9112062048676117, "grad_norm": 1.1328574419021606, "learning_rate": 6.149169362364448e-06, "loss": 0.5979, "step": 14292 }, { "epoch": 1.9113399304626906, "grad_norm": 1.3538148403167725, "learning_rate": 6.1478369948495994e-06, "loss": 0.7809, "step": 14293 }, { "epoch": 1.9114736560577694, "grad_norm": 1.2945058345794678, "learning_rate": 6.1465047076293994e-06, "loss": 0.6838, "step": 14294 }, { "epoch": 1.9116073816528485, "grad_norm": 1.2333205938339233, "learning_rate": 6.1451725007316245e-06, "loss": 0.6511, "step": 14295 }, { "epoch": 1.9117411072479271, "grad_norm": 1.2917035818099976, "learning_rate": 6.143840374184038e-06, "loss": 0.6985, "step": 14296 }, { "epoch": 1.9118748328430062, "grad_norm": 1.4599846601486206, "learning_rate": 6.1425083280144095e-06, "loss": 0.7036, "step": 14297 }, { "epoch": 1.912008558438085, "grad_norm": 1.3134015798568726, "learning_rate": 6.141176362250504e-06, "loss": 0.6042, "step": 14298 }, { "epoch": 1.9121422840331639, "grad_norm": 1.1629736423492432, "learning_rate": 6.139844476920086e-06, "loss": 0.6894, "step": 14299 }, { "epoch": 1.912276009628243, "grad_norm": 1.3049935102462769, "learning_rate": 6.138512672050913e-06, "loss": 0.7221, "step": 14300 }, { "epoch": 1.9124097352233216, "grad_norm": 1.249802589416504, "learning_rate": 6.137180947670751e-06, "loss": 0.6635, "step": 14301 }, { "epoch": 1.9125434608184007, "grad_norm": 1.3167529106140137, "learning_rate": 6.135849303807353e-06, "loss": 0.6424, "step": 14302 }, { "epoch": 1.9126771864134795, "grad_norm": 1.1499838829040527, "learning_rate": 6.134517740488481e-06, "loss": 0.5879, "step": 14303 }, { "epoch": 1.9128109120085584, "grad_norm": 1.2181648015975952, "learning_rate": 6.133186257741888e-06, "loss": 0.6631, "step": 14304 }, { "epoch": 1.9129446376036374, "grad_norm": 1.232839584350586, "learning_rate": 6.1318548555953235e-06, "loss": 0.5954, "step": 14305 }, { "epoch": 1.9130783631987163, "grad_norm": 1.2607297897338867, "learning_rate": 6.130523534076549e-06, "loss": 0.6514, "step": 14306 }, { "epoch": 1.9132120887937951, "grad_norm": 1.2343944311141968, "learning_rate": 6.129192293213307e-06, "loss": 0.5875, "step": 14307 }, { "epoch": 1.9133458143888742, "grad_norm": 1.2563674449920654, "learning_rate": 6.127861133033345e-06, "loss": 0.7707, "step": 14308 }, { "epoch": 1.9134795399839528, "grad_norm": 1.5288641452789307, "learning_rate": 6.126530053564414e-06, "loss": 0.6887, "step": 14309 }, { "epoch": 1.9136132655790319, "grad_norm": 1.389078974723816, "learning_rate": 6.125199054834257e-06, "loss": 0.7357, "step": 14310 }, { "epoch": 1.9137469911741107, "grad_norm": 1.224880337715149, "learning_rate": 6.123868136870619e-06, "loss": 0.6976, "step": 14311 }, { "epoch": 1.9138807167691896, "grad_norm": 1.2137125730514526, "learning_rate": 6.122537299701241e-06, "loss": 0.6143, "step": 14312 }, { "epoch": 1.9140144423642687, "grad_norm": 1.338866949081421, "learning_rate": 6.1212065433538595e-06, "loss": 0.7598, "step": 14313 }, { "epoch": 1.9141481679593473, "grad_norm": 1.2913086414337158, "learning_rate": 6.11987586785622e-06, "loss": 0.6477, "step": 14314 }, { "epoch": 1.9142818935544264, "grad_norm": 1.15380859375, "learning_rate": 6.118545273236054e-06, "loss": 0.6436, "step": 14315 }, { "epoch": 1.9144156191495052, "grad_norm": 1.2949401140213013, "learning_rate": 6.1172147595210976e-06, "loss": 0.5933, "step": 14316 }, { "epoch": 1.914549344744584, "grad_norm": 1.1877398490905762, "learning_rate": 6.115884326739083e-06, "loss": 0.5922, "step": 14317 }, { "epoch": 1.9146830703396631, "grad_norm": 1.3242037296295166, "learning_rate": 6.114553974917741e-06, "loss": 0.7254, "step": 14318 }, { "epoch": 1.9148167959347417, "grad_norm": 1.3087332248687744, "learning_rate": 6.113223704084807e-06, "loss": 0.7583, "step": 14319 }, { "epoch": 1.9149505215298208, "grad_norm": 1.1986819505691528, "learning_rate": 6.111893514268007e-06, "loss": 0.7493, "step": 14320 }, { "epoch": 1.9150842471248997, "grad_norm": 1.1840780973434448, "learning_rate": 6.110563405495062e-06, "loss": 0.6304, "step": 14321 }, { "epoch": 1.9152179727199785, "grad_norm": 1.281714916229248, "learning_rate": 6.109233377793704e-06, "loss": 0.731, "step": 14322 }, { "epoch": 1.9153516983150576, "grad_norm": 1.229513168334961, "learning_rate": 6.107903431191652e-06, "loss": 0.6925, "step": 14323 }, { "epoch": 1.9154854239101364, "grad_norm": 1.3632463216781616, "learning_rate": 6.106573565716627e-06, "loss": 0.7201, "step": 14324 }, { "epoch": 1.9156191495052153, "grad_norm": 1.1269325017929077, "learning_rate": 6.105243781396353e-06, "loss": 0.6239, "step": 14325 }, { "epoch": 1.9157528751002944, "grad_norm": 1.2974936962127686, "learning_rate": 6.103914078258543e-06, "loss": 0.7661, "step": 14326 }, { "epoch": 1.915886600695373, "grad_norm": 1.3269054889678955, "learning_rate": 6.102584456330919e-06, "loss": 0.6952, "step": 14327 }, { "epoch": 1.916020326290452, "grad_norm": 1.2491848468780518, "learning_rate": 6.101254915641191e-06, "loss": 0.6751, "step": 14328 }, { "epoch": 1.916154051885531, "grad_norm": 1.2820974588394165, "learning_rate": 6.099925456217073e-06, "loss": 0.6795, "step": 14329 }, { "epoch": 1.9162877774806097, "grad_norm": 1.239327073097229, "learning_rate": 6.098596078086278e-06, "loss": 0.6885, "step": 14330 }, { "epoch": 1.9164215030756888, "grad_norm": 1.396090030670166, "learning_rate": 6.097266781276515e-06, "loss": 0.6929, "step": 14331 }, { "epoch": 1.9165552286707674, "grad_norm": 1.2558794021606445, "learning_rate": 6.095937565815489e-06, "loss": 0.6845, "step": 14332 }, { "epoch": 1.9166889542658465, "grad_norm": 1.2332311868667603, "learning_rate": 6.0946084317309105e-06, "loss": 0.6837, "step": 14333 }, { "epoch": 1.9168226798609254, "grad_norm": 1.2708615064620972, "learning_rate": 6.093279379050481e-06, "loss": 0.6875, "step": 14334 }, { "epoch": 1.9169564054560042, "grad_norm": 1.261328101158142, "learning_rate": 6.091950407801907e-06, "loss": 0.7211, "step": 14335 }, { "epoch": 1.9170901310510833, "grad_norm": 1.0668758153915405, "learning_rate": 6.090621518012884e-06, "loss": 0.6164, "step": 14336 }, { "epoch": 1.9172238566461621, "grad_norm": 1.2561827898025513, "learning_rate": 6.089292709711115e-06, "loss": 0.7058, "step": 14337 }, { "epoch": 1.917357582241241, "grad_norm": 1.3284103870391846, "learning_rate": 6.0879639829243e-06, "loss": 0.6982, "step": 14338 }, { "epoch": 1.9174913078363198, "grad_norm": 1.295108437538147, "learning_rate": 6.086635337680133e-06, "loss": 0.6631, "step": 14339 }, { "epoch": 1.9176250334313987, "grad_norm": 1.2891427278518677, "learning_rate": 6.085306774006303e-06, "loss": 0.7155, "step": 14340 }, { "epoch": 1.9177587590264777, "grad_norm": 1.2271307706832886, "learning_rate": 6.083978291930511e-06, "loss": 0.6499, "step": 14341 }, { "epoch": 1.9178924846215566, "grad_norm": 1.2307151556015015, "learning_rate": 6.082649891480441e-06, "loss": 0.6818, "step": 14342 }, { "epoch": 1.9180262102166354, "grad_norm": 1.301474928855896, "learning_rate": 6.081321572683787e-06, "loss": 0.7433, "step": 14343 }, { "epoch": 1.9181599358117145, "grad_norm": 1.254326343536377, "learning_rate": 6.0799933355682374e-06, "loss": 0.7086, "step": 14344 }, { "epoch": 1.9182936614067931, "grad_norm": 1.415197730064392, "learning_rate": 6.078665180161472e-06, "loss": 0.6127, "step": 14345 }, { "epoch": 1.9184273870018722, "grad_norm": 1.3525371551513672, "learning_rate": 6.0773371064911825e-06, "loss": 0.6824, "step": 14346 }, { "epoch": 1.918561112596951, "grad_norm": 1.2345032691955566, "learning_rate": 6.076009114585045e-06, "loss": 0.6523, "step": 14347 }, { "epoch": 1.91869483819203, "grad_norm": 1.1721729040145874, "learning_rate": 6.074681204470742e-06, "loss": 0.6789, "step": 14348 }, { "epoch": 1.918828563787109, "grad_norm": 1.307623028755188, "learning_rate": 6.073353376175955e-06, "loss": 0.683, "step": 14349 }, { "epoch": 1.9189622893821876, "grad_norm": 1.2120999097824097, "learning_rate": 6.072025629728356e-06, "loss": 0.6918, "step": 14350 }, { "epoch": 1.9190960149772667, "grad_norm": 1.310998558998108, "learning_rate": 6.07069796515563e-06, "loss": 0.6746, "step": 14351 }, { "epoch": 1.9192297405723455, "grad_norm": 1.2244346141815186, "learning_rate": 6.069370382485442e-06, "loss": 0.622, "step": 14352 }, { "epoch": 1.9193634661674244, "grad_norm": 1.2747117280960083, "learning_rate": 6.068042881745466e-06, "loss": 0.6241, "step": 14353 }, { "epoch": 1.9194971917625034, "grad_norm": 1.4055944681167603, "learning_rate": 6.0667154629633766e-06, "loss": 0.7352, "step": 14354 }, { "epoch": 1.9196309173575823, "grad_norm": 1.3048895597457886, "learning_rate": 6.065388126166837e-06, "loss": 0.6208, "step": 14355 }, { "epoch": 1.9197646429526611, "grad_norm": 1.3057315349578857, "learning_rate": 6.064060871383515e-06, "loss": 0.6981, "step": 14356 }, { "epoch": 1.91989836854774, "grad_norm": 1.365171194076538, "learning_rate": 6.062733698641083e-06, "loss": 0.7013, "step": 14357 }, { "epoch": 1.9200320941428188, "grad_norm": 1.3311299085617065, "learning_rate": 6.061406607967194e-06, "loss": 0.6939, "step": 14358 }, { "epoch": 1.920165819737898, "grad_norm": 1.3191055059432983, "learning_rate": 6.060079599389521e-06, "loss": 0.7193, "step": 14359 }, { "epoch": 1.9202995453329768, "grad_norm": 1.2935361862182617, "learning_rate": 6.0587526729357145e-06, "loss": 0.7014, "step": 14360 }, { "epoch": 1.9204332709280556, "grad_norm": 1.255576491355896, "learning_rate": 6.057425828633438e-06, "loss": 0.7022, "step": 14361 }, { "epoch": 1.9205669965231347, "grad_norm": 1.2457056045532227, "learning_rate": 6.056099066510349e-06, "loss": 0.65, "step": 14362 }, { "epoch": 1.9207007221182133, "grad_norm": 1.2485390901565552, "learning_rate": 6.054772386594099e-06, "loss": 0.7043, "step": 14363 }, { "epoch": 1.9208344477132924, "grad_norm": 1.2033710479736328, "learning_rate": 6.053445788912345e-06, "loss": 0.6785, "step": 14364 }, { "epoch": 1.9209681733083712, "grad_norm": 1.0446795225143433, "learning_rate": 6.052119273492739e-06, "loss": 0.6058, "step": 14365 }, { "epoch": 1.92110189890345, "grad_norm": 1.247416377067566, "learning_rate": 6.050792840362925e-06, "loss": 0.6328, "step": 14366 }, { "epoch": 1.9212356244985291, "grad_norm": 1.2555475234985352, "learning_rate": 6.049466489550558e-06, "loss": 0.7175, "step": 14367 }, { "epoch": 1.9213693500936078, "grad_norm": 1.2473998069763184, "learning_rate": 6.048140221083281e-06, "loss": 0.682, "step": 14368 }, { "epoch": 1.9215030756886868, "grad_norm": 1.4996132850646973, "learning_rate": 6.0468140349887375e-06, "loss": 0.7543, "step": 14369 }, { "epoch": 1.9216368012837657, "grad_norm": 1.2484712600708008, "learning_rate": 6.0454879312945755e-06, "loss": 0.6427, "step": 14370 }, { "epoch": 1.9217705268788445, "grad_norm": 1.1269407272338867, "learning_rate": 6.044161910028431e-06, "loss": 0.6127, "step": 14371 }, { "epoch": 1.9219042524739236, "grad_norm": 1.1464072465896606, "learning_rate": 6.0428359712179485e-06, "loss": 0.6385, "step": 14372 }, { "epoch": 1.9220379780690025, "grad_norm": 1.3393694162368774, "learning_rate": 6.041510114890765e-06, "loss": 0.721, "step": 14373 }, { "epoch": 1.9221717036640813, "grad_norm": 1.2954378128051758, "learning_rate": 6.040184341074511e-06, "loss": 0.7169, "step": 14374 }, { "epoch": 1.9223054292591604, "grad_norm": 1.1914572715759277, "learning_rate": 6.038858649796827e-06, "loss": 0.5934, "step": 14375 }, { "epoch": 1.922439154854239, "grad_norm": 1.3840868473052979, "learning_rate": 6.037533041085346e-06, "loss": 0.7007, "step": 14376 }, { "epoch": 1.922572880449318, "grad_norm": 1.3793399333953857, "learning_rate": 6.0362075149676935e-06, "loss": 0.7508, "step": 14377 }, { "epoch": 1.922706606044397, "grad_norm": 1.292108178138733, "learning_rate": 6.034882071471506e-06, "loss": 0.7011, "step": 14378 }, { "epoch": 1.9228403316394758, "grad_norm": 1.2115254402160645, "learning_rate": 6.033556710624404e-06, "loss": 0.6947, "step": 14379 }, { "epoch": 1.9229740572345548, "grad_norm": 1.3485713005065918, "learning_rate": 6.032231432454021e-06, "loss": 0.7072, "step": 14380 }, { "epoch": 1.9231077828296335, "grad_norm": 1.2306231260299683, "learning_rate": 6.0309062369879745e-06, "loss": 0.7167, "step": 14381 }, { "epoch": 1.9232415084247125, "grad_norm": 1.4424158334732056, "learning_rate": 6.029581124253887e-06, "loss": 0.7788, "step": 14382 }, { "epoch": 1.9233752340197914, "grad_norm": 1.469840168952942, "learning_rate": 6.028256094279387e-06, "loss": 0.7956, "step": 14383 }, { "epoch": 1.9235089596148702, "grad_norm": 1.354666829109192, "learning_rate": 6.026931147092088e-06, "loss": 0.753, "step": 14384 }, { "epoch": 1.9236426852099493, "grad_norm": 1.2361111640930176, "learning_rate": 6.025606282719603e-06, "loss": 0.6157, "step": 14385 }, { "epoch": 1.923776410805028, "grad_norm": 1.274280071258545, "learning_rate": 6.024281501189555e-06, "loss": 0.6902, "step": 14386 }, { "epoch": 1.923910136400107, "grad_norm": 1.297232747077942, "learning_rate": 6.022956802529552e-06, "loss": 0.7104, "step": 14387 }, { "epoch": 1.9240438619951858, "grad_norm": 1.2780399322509766, "learning_rate": 6.02163218676721e-06, "loss": 0.6887, "step": 14388 }, { "epoch": 1.9241775875902647, "grad_norm": 1.2703601121902466, "learning_rate": 6.020307653930141e-06, "loss": 0.6967, "step": 14389 }, { "epoch": 1.9243113131853438, "grad_norm": 1.2578415870666504, "learning_rate": 6.018983204045946e-06, "loss": 0.6301, "step": 14390 }, { "epoch": 1.9244450387804226, "grad_norm": 1.1512683629989624, "learning_rate": 6.017658837142242e-06, "loss": 0.6736, "step": 14391 }, { "epoch": 1.9245787643755015, "grad_norm": 1.0372085571289062, "learning_rate": 6.016334553246628e-06, "loss": 0.5888, "step": 14392 }, { "epoch": 1.9247124899705805, "grad_norm": 1.293728232383728, "learning_rate": 6.015010352386703e-06, "loss": 0.7056, "step": 14393 }, { "epoch": 1.9248462155656592, "grad_norm": 1.252553105354309, "learning_rate": 6.013686234590077e-06, "loss": 0.7313, "step": 14394 }, { "epoch": 1.9249799411607382, "grad_norm": 1.3527165651321411, "learning_rate": 6.012362199884345e-06, "loss": 0.6276, "step": 14395 }, { "epoch": 1.925113666755817, "grad_norm": 1.0362600088119507, "learning_rate": 6.011038248297112e-06, "loss": 0.5849, "step": 14396 }, { "epoch": 1.925247392350896, "grad_norm": 1.331507921218872, "learning_rate": 6.009714379855969e-06, "loss": 0.7018, "step": 14397 }, { "epoch": 1.925381117945975, "grad_norm": 1.1518824100494385, "learning_rate": 6.008390594588508e-06, "loss": 0.6201, "step": 14398 }, { "epoch": 1.9255148435410536, "grad_norm": 1.2447253465652466, "learning_rate": 6.007066892522328e-06, "loss": 0.6928, "step": 14399 }, { "epoch": 1.9256485691361327, "grad_norm": 1.1865956783294678, "learning_rate": 6.005743273685017e-06, "loss": 0.64, "step": 14400 }, { "epoch": 1.9257822947312115, "grad_norm": 1.4203073978424072, "learning_rate": 6.004419738104164e-06, "loss": 0.7649, "step": 14401 }, { "epoch": 1.9259160203262904, "grad_norm": 1.372672438621521, "learning_rate": 6.0030962858073615e-06, "loss": 0.7404, "step": 14402 }, { "epoch": 1.9260497459213695, "grad_norm": 1.2716954946517944, "learning_rate": 6.001772916822188e-06, "loss": 0.6997, "step": 14403 }, { "epoch": 1.926183471516448, "grad_norm": 1.1688146591186523, "learning_rate": 6.0004496311762365e-06, "loss": 0.6312, "step": 14404 }, { "epoch": 1.9263171971115272, "grad_norm": 1.327919602394104, "learning_rate": 5.999126428897085e-06, "loss": 0.6903, "step": 14405 }, { "epoch": 1.926450922706606, "grad_norm": 1.2755934000015259, "learning_rate": 5.9978033100123115e-06, "loss": 0.6997, "step": 14406 }, { "epoch": 1.9265846483016849, "grad_norm": 1.2396315336227417, "learning_rate": 5.9964802745494986e-06, "loss": 0.7452, "step": 14407 }, { "epoch": 1.926718373896764, "grad_norm": 1.3341940641403198, "learning_rate": 5.995157322536227e-06, "loss": 0.6824, "step": 14408 }, { "epoch": 1.9268520994918428, "grad_norm": 1.1795096397399902, "learning_rate": 5.993834454000065e-06, "loss": 0.6071, "step": 14409 }, { "epoch": 1.9269858250869216, "grad_norm": 1.1885406970977783, "learning_rate": 5.9925116689685925e-06, "loss": 0.6722, "step": 14410 }, { "epoch": 1.9271195506820007, "grad_norm": 1.2883967161178589, "learning_rate": 5.991188967469377e-06, "loss": 0.6556, "step": 14411 }, { "epoch": 1.9272532762770793, "grad_norm": 1.4747883081436157, "learning_rate": 5.989866349529994e-06, "loss": 0.7229, "step": 14412 }, { "epoch": 1.9273870018721584, "grad_norm": 1.461599588394165, "learning_rate": 5.98854381517801e-06, "loss": 0.7761, "step": 14413 }, { "epoch": 1.9275207274672372, "grad_norm": 1.4269999265670776, "learning_rate": 5.987221364440987e-06, "loss": 0.7585, "step": 14414 }, { "epoch": 1.927654453062316, "grad_norm": 1.0483715534210205, "learning_rate": 5.985898997346501e-06, "loss": 0.5657, "step": 14415 }, { "epoch": 1.9277881786573952, "grad_norm": 1.1475498676300049, "learning_rate": 5.984576713922108e-06, "loss": 0.6534, "step": 14416 }, { "epoch": 1.9279219042524738, "grad_norm": 1.126628041267395, "learning_rate": 5.983254514195368e-06, "loss": 0.6485, "step": 14417 }, { "epoch": 1.9280556298475529, "grad_norm": 1.2385354042053223, "learning_rate": 5.981932398193848e-06, "loss": 0.6342, "step": 14418 }, { "epoch": 1.9281893554426317, "grad_norm": 1.3059688806533813, "learning_rate": 5.9806103659450975e-06, "loss": 0.7523, "step": 14419 }, { "epoch": 1.9283230810377106, "grad_norm": 1.2351291179656982, "learning_rate": 5.979288417476681e-06, "loss": 0.6714, "step": 14420 }, { "epoch": 1.9284568066327896, "grad_norm": 1.3163154125213623, "learning_rate": 5.97796655281615e-06, "loss": 0.6966, "step": 14421 }, { "epoch": 1.9285905322278682, "grad_norm": 1.292440414428711, "learning_rate": 5.976644771991054e-06, "loss": 0.6204, "step": 14422 }, { "epoch": 1.9287242578229473, "grad_norm": 1.1648979187011719, "learning_rate": 5.9753230750289534e-06, "loss": 0.6743, "step": 14423 }, { "epoch": 1.9288579834180262, "grad_norm": 1.3234107494354248, "learning_rate": 5.974001461957392e-06, "loss": 0.6957, "step": 14424 }, { "epoch": 1.928991709013105, "grad_norm": 1.2569841146469116, "learning_rate": 5.972679932803912e-06, "loss": 0.6947, "step": 14425 }, { "epoch": 1.929125434608184, "grad_norm": 1.206868052482605, "learning_rate": 5.971358487596068e-06, "loss": 0.6446, "step": 14426 }, { "epoch": 1.929259160203263, "grad_norm": 1.3475213050842285, "learning_rate": 5.970037126361399e-06, "loss": 0.6709, "step": 14427 }, { "epoch": 1.9293928857983418, "grad_norm": 1.2687031030654907, "learning_rate": 5.968715849127454e-06, "loss": 0.6257, "step": 14428 }, { "epoch": 1.9295266113934209, "grad_norm": 1.2240290641784668, "learning_rate": 5.96739465592177e-06, "loss": 0.6668, "step": 14429 }, { "epoch": 1.9296603369884995, "grad_norm": 1.268046498298645, "learning_rate": 5.966073546771882e-06, "loss": 0.6723, "step": 14430 }, { "epoch": 1.9297940625835786, "grad_norm": 1.284865379333496, "learning_rate": 5.964752521705335e-06, "loss": 0.6934, "step": 14431 }, { "epoch": 1.9299277881786574, "grad_norm": 1.1946617364883423, "learning_rate": 5.9634315807496565e-06, "loss": 0.7276, "step": 14432 }, { "epoch": 1.9300615137737362, "grad_norm": 1.286138653755188, "learning_rate": 5.9621107239323835e-06, "loss": 0.6448, "step": 14433 }, { "epoch": 1.9301952393688153, "grad_norm": 1.2671115398406982, "learning_rate": 5.960789951281052e-06, "loss": 0.6584, "step": 14434 }, { "epoch": 1.930328964963894, "grad_norm": 1.3619554042816162, "learning_rate": 5.9594692628231855e-06, "loss": 0.6847, "step": 14435 }, { "epoch": 1.930462690558973, "grad_norm": 1.2801129817962646, "learning_rate": 5.95814865858632e-06, "loss": 0.6617, "step": 14436 }, { "epoch": 1.9305964161540519, "grad_norm": 1.3092200756072998, "learning_rate": 5.956828138597976e-06, "loss": 0.6717, "step": 14437 }, { "epoch": 1.9307301417491307, "grad_norm": 1.3832833766937256, "learning_rate": 5.955507702885679e-06, "loss": 0.7211, "step": 14438 }, { "epoch": 1.9308638673442098, "grad_norm": 1.3016250133514404, "learning_rate": 5.954187351476954e-06, "loss": 0.6866, "step": 14439 }, { "epoch": 1.9309975929392886, "grad_norm": 1.1444220542907715, "learning_rate": 5.952867084399327e-06, "loss": 0.5802, "step": 14440 }, { "epoch": 1.9311313185343675, "grad_norm": 1.3566572666168213, "learning_rate": 5.951546901680306e-06, "loss": 0.7755, "step": 14441 }, { "epoch": 1.9312650441294463, "grad_norm": 1.1164309978485107, "learning_rate": 5.950226803347421e-06, "loss": 0.6144, "step": 14442 }, { "epoch": 1.9313987697245252, "grad_norm": 1.231952428817749, "learning_rate": 5.948906789428179e-06, "loss": 0.6854, "step": 14443 }, { "epoch": 1.9315324953196042, "grad_norm": 1.2558611631393433, "learning_rate": 5.947586859950103e-06, "loss": 0.683, "step": 14444 }, { "epoch": 1.931666220914683, "grad_norm": 1.2705365419387817, "learning_rate": 5.946267014940699e-06, "loss": 0.7012, "step": 14445 }, { "epoch": 1.931799946509762, "grad_norm": 1.2902936935424805, "learning_rate": 5.944947254427478e-06, "loss": 0.7129, "step": 14446 }, { "epoch": 1.931933672104841, "grad_norm": 1.2105505466461182, "learning_rate": 5.943627578437955e-06, "loss": 0.6558, "step": 14447 }, { "epoch": 1.9320673976999196, "grad_norm": 1.1718028783798218, "learning_rate": 5.942307986999629e-06, "loss": 0.6234, "step": 14448 }, { "epoch": 1.9322011232949987, "grad_norm": 1.2239034175872803, "learning_rate": 5.9409884801400155e-06, "loss": 0.7149, "step": 14449 }, { "epoch": 1.9323348488900776, "grad_norm": 1.214671015739441, "learning_rate": 5.939669057886612e-06, "loss": 0.6828, "step": 14450 }, { "epoch": 1.9324685744851564, "grad_norm": 1.1674202680587769, "learning_rate": 5.938349720266918e-06, "loss": 0.615, "step": 14451 }, { "epoch": 1.9326023000802355, "grad_norm": 1.2422764301300049, "learning_rate": 5.93703046730844e-06, "loss": 0.6554, "step": 14452 }, { "epoch": 1.932736025675314, "grad_norm": 1.1725634336471558, "learning_rate": 5.935711299038676e-06, "loss": 0.6075, "step": 14453 }, { "epoch": 1.9328697512703932, "grad_norm": 1.3666969537734985, "learning_rate": 5.934392215485117e-06, "loss": 0.688, "step": 14454 }, { "epoch": 1.933003476865472, "grad_norm": 1.3080909252166748, "learning_rate": 5.933073216675265e-06, "loss": 0.6646, "step": 14455 }, { "epoch": 1.9331372024605509, "grad_norm": 1.3700485229492188, "learning_rate": 5.931754302636606e-06, "loss": 0.6627, "step": 14456 }, { "epoch": 1.93327092805563, "grad_norm": 1.3341292142868042, "learning_rate": 5.93043547339664e-06, "loss": 0.7792, "step": 14457 }, { "epoch": 1.9334046536507088, "grad_norm": 1.1330444812774658, "learning_rate": 5.929116728982851e-06, "loss": 0.5837, "step": 14458 }, { "epoch": 1.9335383792457876, "grad_norm": 1.271798014640808, "learning_rate": 5.927798069422727e-06, "loss": 0.6645, "step": 14459 }, { "epoch": 1.9336721048408665, "grad_norm": 1.2712351083755493, "learning_rate": 5.926479494743758e-06, "loss": 0.6097, "step": 14460 }, { "epoch": 1.9338058304359453, "grad_norm": 1.3827751874923706, "learning_rate": 5.925161004973427e-06, "loss": 0.7183, "step": 14461 }, { "epoch": 1.9339395560310244, "grad_norm": 1.1603165864944458, "learning_rate": 5.923842600139211e-06, "loss": 0.6447, "step": 14462 }, { "epoch": 1.9340732816261033, "grad_norm": 1.1967827081680298, "learning_rate": 5.9225242802686e-06, "loss": 0.6516, "step": 14463 }, { "epoch": 1.934207007221182, "grad_norm": 1.2776082754135132, "learning_rate": 5.921206045389065e-06, "loss": 0.6366, "step": 14464 }, { "epoch": 1.9343407328162612, "grad_norm": 1.192592740058899, "learning_rate": 5.919887895528088e-06, "loss": 0.6482, "step": 14465 }, { "epoch": 1.9344744584113398, "grad_norm": 1.3361238241195679, "learning_rate": 5.918569830713145e-06, "loss": 0.6839, "step": 14466 }, { "epoch": 1.9346081840064189, "grad_norm": 1.2640334367752075, "learning_rate": 5.917251850971706e-06, "loss": 0.7188, "step": 14467 }, { "epoch": 1.9347419096014977, "grad_norm": 1.2613823413848877, "learning_rate": 5.91593395633125e-06, "loss": 0.6783, "step": 14468 }, { "epoch": 1.9348756351965766, "grad_norm": 1.1773933172225952, "learning_rate": 5.914616146819241e-06, "loss": 0.6711, "step": 14469 }, { "epoch": 1.9350093607916556, "grad_norm": 1.2209922075271606, "learning_rate": 5.913298422463145e-06, "loss": 0.6484, "step": 14470 }, { "epoch": 1.9351430863867343, "grad_norm": 1.3920403718948364, "learning_rate": 5.911980783290436e-06, "loss": 0.6639, "step": 14471 }, { "epoch": 1.9352768119818133, "grad_norm": 1.2942211627960205, "learning_rate": 5.910663229328573e-06, "loss": 0.7608, "step": 14472 }, { "epoch": 1.9354105375768922, "grad_norm": 1.1851541996002197, "learning_rate": 5.909345760605027e-06, "loss": 0.7274, "step": 14473 }, { "epoch": 1.935544263171971, "grad_norm": 1.3126583099365234, "learning_rate": 5.908028377147252e-06, "loss": 0.686, "step": 14474 }, { "epoch": 1.93567798876705, "grad_norm": 1.5827136039733887, "learning_rate": 5.906711078982708e-06, "loss": 0.7552, "step": 14475 }, { "epoch": 1.935811714362129, "grad_norm": 1.377055048942566, "learning_rate": 5.905393866138857e-06, "loss": 0.6839, "step": 14476 }, { "epoch": 1.9359454399572078, "grad_norm": 1.3584359884262085, "learning_rate": 5.904076738643153e-06, "loss": 0.6314, "step": 14477 }, { "epoch": 1.9360791655522869, "grad_norm": 1.2355737686157227, "learning_rate": 5.902759696523046e-06, "loss": 0.642, "step": 14478 }, { "epoch": 1.9362128911473655, "grad_norm": 1.1691131591796875, "learning_rate": 5.9014427398059985e-06, "loss": 0.6349, "step": 14479 }, { "epoch": 1.9363466167424446, "grad_norm": 1.10879647731781, "learning_rate": 5.90012586851945e-06, "loss": 0.5618, "step": 14480 }, { "epoch": 1.9364803423375234, "grad_norm": 1.255393385887146, "learning_rate": 5.898809082690857e-06, "loss": 0.6133, "step": 14481 }, { "epoch": 1.9366140679326023, "grad_norm": 1.4464377164840698, "learning_rate": 5.897492382347667e-06, "loss": 0.6866, "step": 14482 }, { "epoch": 1.9367477935276813, "grad_norm": 1.1603502035140991, "learning_rate": 5.896175767517318e-06, "loss": 0.57, "step": 14483 }, { "epoch": 1.93688151912276, "grad_norm": 1.1326144933700562, "learning_rate": 5.89485923822726e-06, "loss": 0.685, "step": 14484 }, { "epoch": 1.937015244717839, "grad_norm": 1.2566906213760376, "learning_rate": 5.893542794504934e-06, "loss": 0.6764, "step": 14485 }, { "epoch": 1.9371489703129179, "grad_norm": 1.2922167778015137, "learning_rate": 5.892226436377775e-06, "loss": 0.6971, "step": 14486 }, { "epoch": 1.9372826959079967, "grad_norm": 1.1553624868392944, "learning_rate": 5.89091016387323e-06, "loss": 0.615, "step": 14487 }, { "epoch": 1.9374164215030758, "grad_norm": 1.3865890502929688, "learning_rate": 5.889593977018726e-06, "loss": 0.6967, "step": 14488 }, { "epoch": 1.9375501470981544, "grad_norm": 1.203579068183899, "learning_rate": 5.888277875841708e-06, "loss": 0.6682, "step": 14489 }, { "epoch": 1.9376838726932335, "grad_norm": 1.4297423362731934, "learning_rate": 5.8869618603696e-06, "loss": 0.6998, "step": 14490 }, { "epoch": 1.9378175982883123, "grad_norm": 1.2040075063705444, "learning_rate": 5.885645930629833e-06, "loss": 0.6937, "step": 14491 }, { "epoch": 1.9379513238833912, "grad_norm": 1.2505990266799927, "learning_rate": 5.884330086649845e-06, "loss": 0.6348, "step": 14492 }, { "epoch": 1.9380850494784703, "grad_norm": 1.346511721611023, "learning_rate": 5.883014328457059e-06, "loss": 0.7285, "step": 14493 }, { "epoch": 1.9382187750735491, "grad_norm": 1.3473567962646484, "learning_rate": 5.881698656078894e-06, "loss": 0.7286, "step": 14494 }, { "epoch": 1.938352500668628, "grad_norm": 1.2619190216064453, "learning_rate": 5.8803830695427854e-06, "loss": 0.707, "step": 14495 }, { "epoch": 1.938486226263707, "grad_norm": 1.281182050704956, "learning_rate": 5.879067568876145e-06, "loss": 0.6853, "step": 14496 }, { "epoch": 1.9386199518587857, "grad_norm": 1.2203370332717896, "learning_rate": 5.877752154106399e-06, "loss": 0.6536, "step": 14497 }, { "epoch": 1.9387536774538647, "grad_norm": 1.2782256603240967, "learning_rate": 5.876436825260967e-06, "loss": 0.6809, "step": 14498 }, { "epoch": 1.9388874030489436, "grad_norm": 1.2935810089111328, "learning_rate": 5.87512158236726e-06, "loss": 0.625, "step": 14499 }, { "epoch": 1.9390211286440224, "grad_norm": 1.1280409097671509, "learning_rate": 5.8738064254527e-06, "loss": 0.5785, "step": 14500 }, { "epoch": 1.9391548542391015, "grad_norm": 1.3321349620819092, "learning_rate": 5.872491354544698e-06, "loss": 0.7636, "step": 14501 }, { "epoch": 1.9392885798341801, "grad_norm": 1.1474483013153076, "learning_rate": 5.8711763696706595e-06, "loss": 0.6445, "step": 14502 }, { "epoch": 1.9394223054292592, "grad_norm": 1.193880558013916, "learning_rate": 5.869861470858e-06, "loss": 0.6973, "step": 14503 }, { "epoch": 1.939556031024338, "grad_norm": 1.3732945919036865, "learning_rate": 5.8685466581341246e-06, "loss": 0.7527, "step": 14504 }, { "epoch": 1.939689756619417, "grad_norm": 1.2913517951965332, "learning_rate": 5.867231931526445e-06, "loss": 0.7157, "step": 14505 }, { "epoch": 1.939823482214496, "grad_norm": 1.3383525609970093, "learning_rate": 5.86591729106236e-06, "loss": 0.755, "step": 14506 }, { "epoch": 1.9399572078095746, "grad_norm": 1.3321714401245117, "learning_rate": 5.864602736769269e-06, "loss": 0.6872, "step": 14507 }, { "epoch": 1.9400909334046537, "grad_norm": 1.2463963031768799, "learning_rate": 5.863288268674583e-06, "loss": 0.6559, "step": 14508 }, { "epoch": 1.9402246589997325, "grad_norm": 1.1296796798706055, "learning_rate": 5.861973886805692e-06, "loss": 0.6393, "step": 14509 }, { "epoch": 1.9403583845948114, "grad_norm": 1.4072933197021484, "learning_rate": 5.860659591189992e-06, "loss": 0.667, "step": 14510 }, { "epoch": 1.9404921101898904, "grad_norm": 1.1853086948394775, "learning_rate": 5.859345381854888e-06, "loss": 0.688, "step": 14511 }, { "epoch": 1.9406258357849693, "grad_norm": 1.287976861000061, "learning_rate": 5.858031258827761e-06, "loss": 0.6782, "step": 14512 }, { "epoch": 1.9407595613800481, "grad_norm": 1.2892423868179321, "learning_rate": 5.856717222136015e-06, "loss": 0.7069, "step": 14513 }, { "epoch": 1.9408932869751272, "grad_norm": 1.137879490852356, "learning_rate": 5.855403271807033e-06, "loss": 0.6661, "step": 14514 }, { "epoch": 1.9410270125702058, "grad_norm": 1.3065146207809448, "learning_rate": 5.8540894078682e-06, "loss": 0.656, "step": 14515 }, { "epoch": 1.941160738165285, "grad_norm": 1.1426358222961426, "learning_rate": 5.8527756303469074e-06, "loss": 0.5722, "step": 14516 }, { "epoch": 1.9412944637603637, "grad_norm": 1.3579005002975464, "learning_rate": 5.851461939270542e-06, "loss": 0.7527, "step": 14517 }, { "epoch": 1.9414281893554426, "grad_norm": 1.223244309425354, "learning_rate": 5.850148334666476e-06, "loss": 0.6481, "step": 14518 }, { "epoch": 1.9415619149505217, "grad_norm": 1.231117844581604, "learning_rate": 5.848834816562104e-06, "loss": 0.6353, "step": 14519 }, { "epoch": 1.9416956405456003, "grad_norm": 1.32968270778656, "learning_rate": 5.8475213849847935e-06, "loss": 0.6779, "step": 14520 }, { "epoch": 1.9418293661406794, "grad_norm": 1.3732610940933228, "learning_rate": 5.846208039961929e-06, "loss": 0.6623, "step": 14521 }, { "epoch": 1.9419630917357582, "grad_norm": 1.217100977897644, "learning_rate": 5.844894781520881e-06, "loss": 0.63, "step": 14522 }, { "epoch": 1.942096817330837, "grad_norm": 1.2675803899765015, "learning_rate": 5.843581609689024e-06, "loss": 0.7087, "step": 14523 }, { "epoch": 1.9422305429259161, "grad_norm": 1.2878421545028687, "learning_rate": 5.842268524493735e-06, "loss": 0.7007, "step": 14524 }, { "epoch": 1.9423642685209948, "grad_norm": 1.336093783378601, "learning_rate": 5.840955525962381e-06, "loss": 0.6629, "step": 14525 }, { "epoch": 1.9424979941160738, "grad_norm": 1.180262804031372, "learning_rate": 5.839642614122324e-06, "loss": 0.6274, "step": 14526 }, { "epoch": 1.9426317197111527, "grad_norm": 1.2009515762329102, "learning_rate": 5.83832978900094e-06, "loss": 0.5642, "step": 14527 }, { "epoch": 1.9427654453062315, "grad_norm": 1.3272777795791626, "learning_rate": 5.837017050625583e-06, "loss": 0.7129, "step": 14528 }, { "epoch": 1.9428991709013106, "grad_norm": 1.5426565408706665, "learning_rate": 5.835704399023631e-06, "loss": 0.7122, "step": 14529 }, { "epoch": 1.9430328964963894, "grad_norm": 1.2776893377304077, "learning_rate": 5.83439183422243e-06, "loss": 0.7302, "step": 14530 }, { "epoch": 1.9431666220914683, "grad_norm": 1.315960168838501, "learning_rate": 5.833079356249347e-06, "loss": 0.6828, "step": 14531 }, { "epoch": 1.9433003476865474, "grad_norm": 1.2247551679611206, "learning_rate": 5.8317669651317375e-06, "loss": 0.6473, "step": 14532 }, { "epoch": 1.943434073281626, "grad_norm": 1.3713740110397339, "learning_rate": 5.830454660896956e-06, "loss": 0.7823, "step": 14533 }, { "epoch": 1.943567798876705, "grad_norm": 1.3235563039779663, "learning_rate": 5.829142443572358e-06, "loss": 0.7127, "step": 14534 }, { "epoch": 1.943701524471784, "grad_norm": 1.2662243843078613, "learning_rate": 5.827830313185294e-06, "loss": 0.664, "step": 14535 }, { "epoch": 1.9438352500668628, "grad_norm": 1.3169081211090088, "learning_rate": 5.826518269763116e-06, "loss": 0.719, "step": 14536 }, { "epoch": 1.9439689756619418, "grad_norm": 1.3273957967758179, "learning_rate": 5.82520631333317e-06, "loss": 0.7934, "step": 14537 }, { "epoch": 1.9441027012570204, "grad_norm": 1.237271785736084, "learning_rate": 5.823894443922804e-06, "loss": 0.632, "step": 14538 }, { "epoch": 1.9442364268520995, "grad_norm": 1.2337758541107178, "learning_rate": 5.822582661559362e-06, "loss": 0.6629, "step": 14539 }, { "epoch": 1.9443701524471784, "grad_norm": 1.3367664813995361, "learning_rate": 5.821270966270187e-06, "loss": 0.5989, "step": 14540 }, { "epoch": 1.9445038780422572, "grad_norm": 1.465135097503662, "learning_rate": 5.819959358082621e-06, "loss": 0.7285, "step": 14541 }, { "epoch": 1.9446376036373363, "grad_norm": 1.2941207885742188, "learning_rate": 5.818647837024002e-06, "loss": 0.6793, "step": 14542 }, { "epoch": 1.9447713292324151, "grad_norm": 1.3215018510818481, "learning_rate": 5.817336403121671e-06, "loss": 0.6993, "step": 14543 }, { "epoch": 1.944905054827494, "grad_norm": 1.3031100034713745, "learning_rate": 5.816025056402953e-06, "loss": 0.7426, "step": 14544 }, { "epoch": 1.9450387804225728, "grad_norm": 1.2978556156158447, "learning_rate": 5.814713796895193e-06, "loss": 0.6849, "step": 14545 }, { "epoch": 1.9451725060176517, "grad_norm": 1.25335693359375, "learning_rate": 5.813402624625722e-06, "loss": 0.6421, "step": 14546 }, { "epoch": 1.9453062316127308, "grad_norm": 1.2016547918319702, "learning_rate": 5.81209153962186e-06, "loss": 0.612, "step": 14547 }, { "epoch": 1.9454399572078096, "grad_norm": 1.1580238342285156, "learning_rate": 5.810780541910951e-06, "loss": 0.5703, "step": 14548 }, { "epoch": 1.9455736828028884, "grad_norm": 1.2101776599884033, "learning_rate": 5.809469631520304e-06, "loss": 0.7102, "step": 14549 }, { "epoch": 1.9457074083979675, "grad_norm": 1.2439404726028442, "learning_rate": 5.808158808477261e-06, "loss": 0.6384, "step": 14550 }, { "epoch": 1.9458411339930461, "grad_norm": 1.3315669298171997, "learning_rate": 5.806848072809132e-06, "loss": 0.7239, "step": 14551 }, { "epoch": 1.9459748595881252, "grad_norm": 1.3043615818023682, "learning_rate": 5.805537424543244e-06, "loss": 0.6621, "step": 14552 }, { "epoch": 1.946108585183204, "grad_norm": 1.3136605024337769, "learning_rate": 5.8042268637069125e-06, "loss": 0.6922, "step": 14553 }, { "epoch": 1.946242310778283, "grad_norm": 1.2590259313583374, "learning_rate": 5.802916390327459e-06, "loss": 0.5407, "step": 14554 }, { "epoch": 1.946376036373362, "grad_norm": 1.2732837200164795, "learning_rate": 5.801606004432197e-06, "loss": 0.7444, "step": 14555 }, { "epoch": 1.9465097619684406, "grad_norm": 1.252768635749817, "learning_rate": 5.800295706048439e-06, "loss": 0.6423, "step": 14556 }, { "epoch": 1.9466434875635197, "grad_norm": 1.1760319471359253, "learning_rate": 5.7989854952035e-06, "loss": 0.6832, "step": 14557 }, { "epoch": 1.9467772131585985, "grad_norm": 1.1559561491012573, "learning_rate": 5.797675371924687e-06, "loss": 0.6703, "step": 14558 }, { "epoch": 1.9469109387536774, "grad_norm": 1.2067400217056274, "learning_rate": 5.79636533623931e-06, "loss": 0.6302, "step": 14559 }, { "epoch": 1.9470446643487564, "grad_norm": 1.1841014623641968, "learning_rate": 5.795055388174675e-06, "loss": 0.6236, "step": 14560 }, { "epoch": 1.9471783899438353, "grad_norm": 1.3379091024398804, "learning_rate": 5.7937455277580875e-06, "loss": 0.6851, "step": 14561 }, { "epoch": 1.9473121155389141, "grad_norm": 1.334794044494629, "learning_rate": 5.7924357550168534e-06, "loss": 0.6841, "step": 14562 }, { "epoch": 1.947445841133993, "grad_norm": 1.3697844743728638, "learning_rate": 5.791126069978261e-06, "loss": 0.7293, "step": 14563 }, { "epoch": 1.9475795667290718, "grad_norm": 1.3431954383850098, "learning_rate": 5.789816472669622e-06, "loss": 0.6898, "step": 14564 }, { "epoch": 1.947713292324151, "grad_norm": 1.3829270601272583, "learning_rate": 5.788506963118232e-06, "loss": 0.6781, "step": 14565 }, { "epoch": 1.9478470179192298, "grad_norm": 1.2572550773620605, "learning_rate": 5.787197541351383e-06, "loss": 0.6945, "step": 14566 }, { "epoch": 1.9479807435143086, "grad_norm": 1.2475545406341553, "learning_rate": 5.785888207396374e-06, "loss": 0.6362, "step": 14567 }, { "epoch": 1.9481144691093877, "grad_norm": 1.12861967086792, "learning_rate": 5.784578961280485e-06, "loss": 0.5718, "step": 14568 }, { "epoch": 1.9482481947044663, "grad_norm": 1.3583524227142334, "learning_rate": 5.783269803031022e-06, "loss": 0.7706, "step": 14569 }, { "epoch": 1.9483819202995454, "grad_norm": 1.3273788690567017, "learning_rate": 5.78196073267526e-06, "loss": 0.6147, "step": 14570 }, { "epoch": 1.9485156458946242, "grad_norm": 1.2622047662734985, "learning_rate": 5.780651750240491e-06, "loss": 0.6911, "step": 14571 }, { "epoch": 1.948649371489703, "grad_norm": 1.318591833114624, "learning_rate": 5.779342855754e-06, "loss": 0.5865, "step": 14572 }, { "epoch": 1.9487830970847821, "grad_norm": 1.2917972803115845, "learning_rate": 5.778034049243062e-06, "loss": 0.613, "step": 14573 }, { "epoch": 1.9489168226798608, "grad_norm": 1.19225013256073, "learning_rate": 5.776725330734973e-06, "loss": 0.652, "step": 14574 }, { "epoch": 1.9490505482749398, "grad_norm": 1.2270605564117432, "learning_rate": 5.7754167002570015e-06, "loss": 0.6307, "step": 14575 }, { "epoch": 1.9491842738700187, "grad_norm": 1.4167894124984741, "learning_rate": 5.774108157836424e-06, "loss": 0.6809, "step": 14576 }, { "epoch": 1.9493179994650975, "grad_norm": 1.2539727687835693, "learning_rate": 5.772799703500519e-06, "loss": 0.6394, "step": 14577 }, { "epoch": 1.9494517250601766, "grad_norm": 1.4108961820602417, "learning_rate": 5.771491337276559e-06, "loss": 0.7197, "step": 14578 }, { "epoch": 1.9495854506552555, "grad_norm": 1.239606499671936, "learning_rate": 5.7701830591918164e-06, "loss": 0.6709, "step": 14579 }, { "epoch": 1.9497191762503343, "grad_norm": 1.4313056468963623, "learning_rate": 5.76887486927356e-06, "loss": 0.7008, "step": 14580 }, { "epoch": 1.9498529018454134, "grad_norm": 1.2129759788513184, "learning_rate": 5.767566767549058e-06, "loss": 0.7019, "step": 14581 }, { "epoch": 1.949986627440492, "grad_norm": 1.1411004066467285, "learning_rate": 5.766258754045577e-06, "loss": 0.5381, "step": 14582 }, { "epoch": 1.950120353035571, "grad_norm": 1.271362066268921, "learning_rate": 5.764950828790381e-06, "loss": 0.6141, "step": 14583 }, { "epoch": 1.95025407863065, "grad_norm": 1.1945751905441284, "learning_rate": 5.763642991810732e-06, "loss": 0.6496, "step": 14584 }, { "epoch": 1.9503878042257288, "grad_norm": 1.2459964752197266, "learning_rate": 5.762335243133892e-06, "loss": 0.6351, "step": 14585 }, { "epoch": 1.9505215298208078, "grad_norm": 1.202523112297058, "learning_rate": 5.761027582787122e-06, "loss": 0.6306, "step": 14586 }, { "epoch": 1.9506552554158865, "grad_norm": 1.437219262123108, "learning_rate": 5.759720010797668e-06, "loss": 0.736, "step": 14587 }, { "epoch": 1.9507889810109655, "grad_norm": 1.0865683555603027, "learning_rate": 5.758412527192801e-06, "loss": 0.6181, "step": 14588 }, { "epoch": 1.9509227066060444, "grad_norm": 1.2649890184402466, "learning_rate": 5.7571051319997585e-06, "loss": 0.7191, "step": 14589 }, { "epoch": 1.9510564322011232, "grad_norm": 1.2200759649276733, "learning_rate": 5.755797825245802e-06, "loss": 0.6957, "step": 14590 }, { "epoch": 1.9511901577962023, "grad_norm": 1.2296539545059204, "learning_rate": 5.754490606958185e-06, "loss": 0.638, "step": 14591 }, { "epoch": 1.951323883391281, "grad_norm": 1.3819276094436646, "learning_rate": 5.753183477164139e-06, "loss": 0.7047, "step": 14592 }, { "epoch": 1.95145760898636, "grad_norm": 1.2433072328567505, "learning_rate": 5.751876435890929e-06, "loss": 0.6435, "step": 14593 }, { "epoch": 1.9515913345814389, "grad_norm": 1.2099626064300537, "learning_rate": 5.750569483165785e-06, "loss": 0.6793, "step": 14594 }, { "epoch": 1.9517250601765177, "grad_norm": 1.026604413986206, "learning_rate": 5.7492626190159515e-06, "loss": 0.5591, "step": 14595 }, { "epoch": 1.9518587857715968, "grad_norm": 1.3085094690322876, "learning_rate": 5.747955843468674e-06, "loss": 0.6698, "step": 14596 }, { "epoch": 1.9519925113666756, "grad_norm": 1.235945463180542, "learning_rate": 5.746649156551187e-06, "loss": 0.6139, "step": 14597 }, { "epoch": 1.9521262369617545, "grad_norm": 1.449135661125183, "learning_rate": 5.74534255829073e-06, "loss": 0.6993, "step": 14598 }, { "epoch": 1.9522599625568335, "grad_norm": 1.2851158380508423, "learning_rate": 5.744036048714534e-06, "loss": 0.703, "step": 14599 }, { "epoch": 1.9523936881519122, "grad_norm": 1.1898199319839478, "learning_rate": 5.742729627849836e-06, "loss": 0.6717, "step": 14600 }, { "epoch": 1.9525274137469912, "grad_norm": 1.21040940284729, "learning_rate": 5.7414232957238635e-06, "loss": 0.681, "step": 14601 }, { "epoch": 1.95266113934207, "grad_norm": 1.1966288089752197, "learning_rate": 5.740117052363848e-06, "loss": 0.6697, "step": 14602 }, { "epoch": 1.952794864937149, "grad_norm": 1.3730822801589966, "learning_rate": 5.738810897797016e-06, "loss": 0.7008, "step": 14603 }, { "epoch": 1.952928590532228, "grad_norm": 1.1394495964050293, "learning_rate": 5.737504832050594e-06, "loss": 0.6007, "step": 14604 }, { "epoch": 1.9530623161273066, "grad_norm": 1.4064130783081055, "learning_rate": 5.736198855151804e-06, "loss": 0.6924, "step": 14605 }, { "epoch": 1.9531960417223857, "grad_norm": 1.408184289932251, "learning_rate": 5.734892967127869e-06, "loss": 0.7476, "step": 14606 }, { "epoch": 1.9533297673174645, "grad_norm": 1.194059133529663, "learning_rate": 5.733587168006014e-06, "loss": 0.6505, "step": 14607 }, { "epoch": 1.9534634929125434, "grad_norm": 1.3690651655197144, "learning_rate": 5.732281457813445e-06, "loss": 0.7018, "step": 14608 }, { "epoch": 1.9535972185076225, "grad_norm": 1.1940110921859741, "learning_rate": 5.730975836577386e-06, "loss": 0.6149, "step": 14609 }, { "epoch": 1.953730944102701, "grad_norm": 1.268286108970642, "learning_rate": 5.729670304325057e-06, "loss": 0.6151, "step": 14610 }, { "epoch": 1.9538646696977802, "grad_norm": 1.3428348302841187, "learning_rate": 5.728364861083655e-06, "loss": 0.6842, "step": 14611 }, { "epoch": 1.953998395292859, "grad_norm": 1.073595404624939, "learning_rate": 5.727059506880408e-06, "loss": 0.6316, "step": 14612 }, { "epoch": 1.9541321208879379, "grad_norm": 1.206764817237854, "learning_rate": 5.72575424174251e-06, "loss": 0.6534, "step": 14613 }, { "epoch": 1.954265846483017, "grad_norm": 1.2491192817687988, "learning_rate": 5.724449065697182e-06, "loss": 0.682, "step": 14614 }, { "epoch": 1.9543995720780958, "grad_norm": 1.157507061958313, "learning_rate": 5.723143978771617e-06, "loss": 0.6271, "step": 14615 }, { "epoch": 1.9545332976731746, "grad_norm": 1.2509205341339111, "learning_rate": 5.721838980993025e-06, "loss": 0.7223, "step": 14616 }, { "epoch": 1.9546670232682537, "grad_norm": 1.2894445657730103, "learning_rate": 5.720534072388605e-06, "loss": 0.7047, "step": 14617 }, { "epoch": 1.9548007488633323, "grad_norm": 1.226746678352356, "learning_rate": 5.719229252985553e-06, "loss": 0.6554, "step": 14618 }, { "epoch": 1.9549344744584114, "grad_norm": 1.3642951250076294, "learning_rate": 5.7179245228110795e-06, "loss": 0.6613, "step": 14619 }, { "epoch": 1.9550682000534902, "grad_norm": 1.3229955434799194, "learning_rate": 5.716619881892367e-06, "loss": 0.685, "step": 14620 }, { "epoch": 1.955201925648569, "grad_norm": 1.3023368120193481, "learning_rate": 5.715315330256614e-06, "loss": 0.6266, "step": 14621 }, { "epoch": 1.9553356512436482, "grad_norm": 1.2533899545669556, "learning_rate": 5.714010867931015e-06, "loss": 0.6259, "step": 14622 }, { "epoch": 1.9554693768387268, "grad_norm": 1.314460039138794, "learning_rate": 5.7127064949427566e-06, "loss": 0.6825, "step": 14623 }, { "epoch": 1.9556031024338059, "grad_norm": 1.182695984840393, "learning_rate": 5.71140221131903e-06, "loss": 0.6088, "step": 14624 }, { "epoch": 1.9557368280288847, "grad_norm": 1.3603765964508057, "learning_rate": 5.710098017087019e-06, "loss": 0.6714, "step": 14625 }, { "epoch": 1.9558705536239636, "grad_norm": 1.1404730081558228, "learning_rate": 5.708793912273911e-06, "loss": 0.626, "step": 14626 }, { "epoch": 1.9560042792190426, "grad_norm": 1.2869254350662231, "learning_rate": 5.7074898969068874e-06, "loss": 0.681, "step": 14627 }, { "epoch": 1.9561380048141215, "grad_norm": 1.269412875175476, "learning_rate": 5.7061859710131296e-06, "loss": 0.6575, "step": 14628 }, { "epoch": 1.9562717304092003, "grad_norm": 1.2813360691070557, "learning_rate": 5.7048821346198155e-06, "loss": 0.6241, "step": 14629 }, { "epoch": 1.9564054560042792, "grad_norm": 1.332628846168518, "learning_rate": 5.703578387754124e-06, "loss": 0.6185, "step": 14630 }, { "epoch": 1.956539181599358, "grad_norm": 1.459277868270874, "learning_rate": 5.702274730443234e-06, "loss": 0.677, "step": 14631 }, { "epoch": 1.956672907194437, "grad_norm": 1.2983884811401367, "learning_rate": 5.700971162714306e-06, "loss": 0.7439, "step": 14632 }, { "epoch": 1.956806632789516, "grad_norm": 1.2364405393600464, "learning_rate": 5.69966768459453e-06, "loss": 0.6815, "step": 14633 }, { "epoch": 1.9569403583845948, "grad_norm": 1.215437650680542, "learning_rate": 5.698364296111057e-06, "loss": 0.6297, "step": 14634 }, { "epoch": 1.9570740839796739, "grad_norm": 1.315403938293457, "learning_rate": 5.697060997291071e-06, "loss": 0.685, "step": 14635 }, { "epoch": 1.9572078095747525, "grad_norm": 1.1407335996627808, "learning_rate": 5.695757788161729e-06, "loss": 0.5997, "step": 14636 }, { "epoch": 1.9573415351698316, "grad_norm": 1.3310550451278687, "learning_rate": 5.694454668750191e-06, "loss": 0.7771, "step": 14637 }, { "epoch": 1.9574752607649104, "grad_norm": 1.2347928285598755, "learning_rate": 5.6931516390836364e-06, "loss": 0.6863, "step": 14638 }, { "epoch": 1.9576089863599893, "grad_norm": 1.363997220993042, "learning_rate": 5.6918486991892085e-06, "loss": 0.5978, "step": 14639 }, { "epoch": 1.9577427119550683, "grad_norm": 1.293451189994812, "learning_rate": 5.690545849094072e-06, "loss": 0.661, "step": 14640 }, { "epoch": 1.957876437550147, "grad_norm": 1.3363726139068604, "learning_rate": 5.689243088825385e-06, "loss": 0.7209, "step": 14641 }, { "epoch": 1.958010163145226, "grad_norm": 1.381197452545166, "learning_rate": 5.6879404184102994e-06, "loss": 0.7761, "step": 14642 }, { "epoch": 1.9581438887403049, "grad_norm": 1.255351185798645, "learning_rate": 5.68663783787597e-06, "loss": 0.6871, "step": 14643 }, { "epoch": 1.9582776143353837, "grad_norm": 1.3195786476135254, "learning_rate": 5.685335347249548e-06, "loss": 0.7137, "step": 14644 }, { "epoch": 1.9584113399304628, "grad_norm": 1.316573977470398, "learning_rate": 5.684032946558182e-06, "loss": 0.617, "step": 14645 }, { "epoch": 1.9585450655255416, "grad_norm": 1.3466285467147827, "learning_rate": 5.682730635829019e-06, "loss": 0.6937, "step": 14646 }, { "epoch": 1.9586787911206205, "grad_norm": 1.3530833721160889, "learning_rate": 5.681428415089204e-06, "loss": 0.7281, "step": 14647 }, { "epoch": 1.9588125167156993, "grad_norm": 1.2846466302871704, "learning_rate": 5.680126284365882e-06, "loss": 0.7142, "step": 14648 }, { "epoch": 1.9589462423107782, "grad_norm": 1.1907652616500854, "learning_rate": 5.678824243686194e-06, "loss": 0.6648, "step": 14649 }, { "epoch": 1.9590799679058573, "grad_norm": 1.2312815189361572, "learning_rate": 5.67752229307728e-06, "loss": 0.6205, "step": 14650 }, { "epoch": 1.959213693500936, "grad_norm": 1.5501290559768677, "learning_rate": 5.6762204325662775e-06, "loss": 0.6899, "step": 14651 }, { "epoch": 1.959347419096015, "grad_norm": 1.1593133211135864, "learning_rate": 5.674918662180326e-06, "loss": 0.6055, "step": 14652 }, { "epoch": 1.959481144691094, "grad_norm": 1.3351491689682007, "learning_rate": 5.673616981946548e-06, "loss": 0.706, "step": 14653 }, { "epoch": 1.9596148702861726, "grad_norm": 1.1090319156646729, "learning_rate": 5.672315391892094e-06, "loss": 0.5792, "step": 14654 }, { "epoch": 1.9597485958812517, "grad_norm": 1.4159257411956787, "learning_rate": 5.671013892044079e-06, "loss": 0.6802, "step": 14655 }, { "epoch": 1.9598823214763306, "grad_norm": 1.16078519821167, "learning_rate": 5.669712482429632e-06, "loss": 0.6262, "step": 14656 }, { "epoch": 1.9600160470714094, "grad_norm": 1.2462307214736938, "learning_rate": 5.668411163075896e-06, "loss": 0.7067, "step": 14657 }, { "epoch": 1.9601497726664885, "grad_norm": 1.354002594947815, "learning_rate": 5.667109934009973e-06, "loss": 0.6703, "step": 14658 }, { "epoch": 1.9602834982615671, "grad_norm": 1.3994309902191162, "learning_rate": 5.6658087952590064e-06, "loss": 0.7714, "step": 14659 }, { "epoch": 1.9604172238566462, "grad_norm": 1.1637780666351318, "learning_rate": 5.664507746850106e-06, "loss": 0.6872, "step": 14660 }, { "epoch": 1.960550949451725, "grad_norm": 1.1955482959747314, "learning_rate": 5.663206788810391e-06, "loss": 0.622, "step": 14661 }, { "epoch": 1.9606846750468039, "grad_norm": 1.3589155673980713, "learning_rate": 5.661905921166981e-06, "loss": 0.6395, "step": 14662 }, { "epoch": 1.960818400641883, "grad_norm": 1.1530872583389282, "learning_rate": 5.6606051439469915e-06, "loss": 0.6191, "step": 14663 }, { "epoch": 1.9609521262369618, "grad_norm": 1.3186378479003906, "learning_rate": 5.6593044571775344e-06, "loss": 0.8032, "step": 14664 }, { "epoch": 1.9610858518320406, "grad_norm": 1.1950606107711792, "learning_rate": 5.658003860885724e-06, "loss": 0.6306, "step": 14665 }, { "epoch": 1.9612195774271195, "grad_norm": 1.3002429008483887, "learning_rate": 5.656703355098666e-06, "loss": 0.6399, "step": 14666 }, { "epoch": 1.9613533030221983, "grad_norm": 1.3470947742462158, "learning_rate": 5.655402939843472e-06, "loss": 0.7687, "step": 14667 }, { "epoch": 1.9614870286172774, "grad_norm": 1.3797457218170166, "learning_rate": 5.654102615147245e-06, "loss": 0.7361, "step": 14668 }, { "epoch": 1.9616207542123563, "grad_norm": 1.3496390581130981, "learning_rate": 5.652802381037093e-06, "loss": 0.6731, "step": 14669 }, { "epoch": 1.9617544798074351, "grad_norm": 1.1988033056259155, "learning_rate": 5.651502237540113e-06, "loss": 0.6217, "step": 14670 }, { "epoch": 1.9618882054025142, "grad_norm": 1.2463001012802124, "learning_rate": 5.650202184683413e-06, "loss": 0.676, "step": 14671 }, { "epoch": 1.9620219309975928, "grad_norm": 1.1167430877685547, "learning_rate": 5.648902222494077e-06, "loss": 0.6124, "step": 14672 }, { "epoch": 1.9621556565926719, "grad_norm": 1.3045426607131958, "learning_rate": 5.64760235099922e-06, "loss": 0.6356, "step": 14673 }, { "epoch": 1.9622893821877507, "grad_norm": 1.257029414176941, "learning_rate": 5.646302570225919e-06, "loss": 0.6672, "step": 14674 }, { "epoch": 1.9624231077828296, "grad_norm": 1.2020256519317627, "learning_rate": 5.645002880201278e-06, "loss": 0.6295, "step": 14675 }, { "epoch": 1.9625568333779086, "grad_norm": 1.2027219533920288, "learning_rate": 5.643703280952391e-06, "loss": 0.6613, "step": 14676 }, { "epoch": 1.9626905589729873, "grad_norm": 1.353958249092102, "learning_rate": 5.642403772506331e-06, "loss": 0.6726, "step": 14677 }, { "epoch": 1.9628242845680663, "grad_norm": 1.4640628099441528, "learning_rate": 5.6411043548902016e-06, "loss": 0.7723, "step": 14678 }, { "epoch": 1.9629580101631452, "grad_norm": 1.2663509845733643, "learning_rate": 5.639805028131078e-06, "loss": 0.6843, "step": 14679 }, { "epoch": 1.963091735758224, "grad_norm": 1.2475420236587524, "learning_rate": 5.638505792256046e-06, "loss": 0.6768, "step": 14680 }, { "epoch": 1.9632254613533031, "grad_norm": 1.2259981632232666, "learning_rate": 5.6372066472921875e-06, "loss": 0.6207, "step": 14681 }, { "epoch": 1.963359186948382, "grad_norm": 1.2565579414367676, "learning_rate": 5.635907593266578e-06, "loss": 0.681, "step": 14682 }, { "epoch": 1.9634929125434608, "grad_norm": 1.2950533628463745, "learning_rate": 5.634608630206306e-06, "loss": 0.6373, "step": 14683 }, { "epoch": 1.9636266381385399, "grad_norm": 1.2803237438201904, "learning_rate": 5.6333097581384365e-06, "loss": 0.6578, "step": 14684 }, { "epoch": 1.9637603637336185, "grad_norm": 1.499576210975647, "learning_rate": 5.6320109770900455e-06, "loss": 0.7063, "step": 14685 }, { "epoch": 1.9638940893286976, "grad_norm": 1.2894600629806519, "learning_rate": 5.630712287088207e-06, "loss": 0.6045, "step": 14686 }, { "epoch": 1.9640278149237764, "grad_norm": 1.3914014101028442, "learning_rate": 5.6294136881599905e-06, "loss": 0.7184, "step": 14687 }, { "epoch": 1.9641615405188553, "grad_norm": 1.0767074823379517, "learning_rate": 5.628115180332463e-06, "loss": 0.5928, "step": 14688 }, { "epoch": 1.9642952661139343, "grad_norm": 1.3659939765930176, "learning_rate": 5.6268167636326896e-06, "loss": 0.665, "step": 14689 }, { "epoch": 1.964428991709013, "grad_norm": 1.4366748332977295, "learning_rate": 5.625518438087738e-06, "loss": 0.7519, "step": 14690 }, { "epoch": 1.964562717304092, "grad_norm": 1.1853920221328735, "learning_rate": 5.624220203724669e-06, "loss": 0.6127, "step": 14691 }, { "epoch": 1.964696442899171, "grad_norm": 1.2590110301971436, "learning_rate": 5.62292206057054e-06, "loss": 0.7031, "step": 14692 }, { "epoch": 1.9648301684942497, "grad_norm": 1.350253939628601, "learning_rate": 5.621624008652414e-06, "loss": 0.7354, "step": 14693 }, { "epoch": 1.9649638940893288, "grad_norm": 1.364099383354187, "learning_rate": 5.620326047997346e-06, "loss": 0.6178, "step": 14694 }, { "epoch": 1.9650976196844074, "grad_norm": 1.3688173294067383, "learning_rate": 5.619028178632394e-06, "loss": 0.7489, "step": 14695 }, { "epoch": 1.9652313452794865, "grad_norm": 1.261811375617981, "learning_rate": 5.6177304005846e-06, "loss": 0.6803, "step": 14696 }, { "epoch": 1.9653650708745654, "grad_norm": 1.2925554513931274, "learning_rate": 5.61643271388103e-06, "loss": 0.682, "step": 14697 }, { "epoch": 1.9654987964696442, "grad_norm": 1.1903830766677856, "learning_rate": 5.615135118548718e-06, "loss": 0.6622, "step": 14698 }, { "epoch": 1.9656325220647233, "grad_norm": 1.3450969457626343, "learning_rate": 5.613837614614726e-06, "loss": 0.6756, "step": 14699 }, { "epoch": 1.9657662476598021, "grad_norm": 1.2793998718261719, "learning_rate": 5.612540202106089e-06, "loss": 0.6788, "step": 14700 }, { "epoch": 1.965899973254881, "grad_norm": 1.2261356115341187, "learning_rate": 5.611242881049848e-06, "loss": 0.6447, "step": 14701 }, { "epoch": 1.96603369884996, "grad_norm": 1.2503198385238647, "learning_rate": 5.6099456514730585e-06, "loss": 0.6521, "step": 14702 }, { "epoch": 1.9661674244450387, "grad_norm": 1.2863826751708984, "learning_rate": 5.608648513402741e-06, "loss": 0.6257, "step": 14703 }, { "epoch": 1.9663011500401177, "grad_norm": 1.2316803932189941, "learning_rate": 5.607351466865954e-06, "loss": 0.6021, "step": 14704 }, { "epoch": 1.9664348756351966, "grad_norm": 1.310901403427124, "learning_rate": 5.606054511889716e-06, "loss": 0.6859, "step": 14705 }, { "epoch": 1.9665686012302754, "grad_norm": 1.3242027759552002, "learning_rate": 5.604757648501069e-06, "loss": 0.6668, "step": 14706 }, { "epoch": 1.9667023268253545, "grad_norm": 1.116053581237793, "learning_rate": 5.603460876727043e-06, "loss": 0.5948, "step": 14707 }, { "epoch": 1.9668360524204331, "grad_norm": 1.280141830444336, "learning_rate": 5.602164196594666e-06, "loss": 0.6268, "step": 14708 }, { "epoch": 1.9669697780155122, "grad_norm": 1.2817654609680176, "learning_rate": 5.6008676081309685e-06, "loss": 0.6673, "step": 14709 }, { "epoch": 1.967103503610591, "grad_norm": 1.247922658920288, "learning_rate": 5.599571111362978e-06, "loss": 0.7094, "step": 14710 }, { "epoch": 1.96723722920567, "grad_norm": 1.2625484466552734, "learning_rate": 5.598274706317716e-06, "loss": 0.652, "step": 14711 }, { "epoch": 1.967370954800749, "grad_norm": 1.242927074432373, "learning_rate": 5.596978393022206e-06, "loss": 0.6861, "step": 14712 }, { "epoch": 1.9675046803958276, "grad_norm": 1.2137401103973389, "learning_rate": 5.595682171503467e-06, "loss": 0.5897, "step": 14713 }, { "epoch": 1.9676384059909067, "grad_norm": 1.3626763820648193, "learning_rate": 5.59438604178852e-06, "loss": 0.6935, "step": 14714 }, { "epoch": 1.9677721315859855, "grad_norm": 1.3026707172393799, "learning_rate": 5.593090003904379e-06, "loss": 0.7677, "step": 14715 }, { "epoch": 1.9679058571810644, "grad_norm": 1.3383845090866089, "learning_rate": 5.5917940578780635e-06, "loss": 0.6696, "step": 14716 }, { "epoch": 1.9680395827761434, "grad_norm": 1.3258661031723022, "learning_rate": 5.590498203736576e-06, "loss": 0.6921, "step": 14717 }, { "epoch": 1.9681733083712223, "grad_norm": 1.2359272241592407, "learning_rate": 5.589202441506942e-06, "loss": 0.6463, "step": 14718 }, { "epoch": 1.9683070339663011, "grad_norm": 1.4394315481185913, "learning_rate": 5.587906771216154e-06, "loss": 0.712, "step": 14719 }, { "epoch": 1.9684407595613802, "grad_norm": 1.289727807044983, "learning_rate": 5.586611192891231e-06, "loss": 0.6514, "step": 14720 }, { "epoch": 1.9685744851564588, "grad_norm": 1.2624475955963135, "learning_rate": 5.58531570655918e-06, "loss": 0.6619, "step": 14721 }, { "epoch": 1.968708210751538, "grad_norm": 1.345966100692749, "learning_rate": 5.584020312246991e-06, "loss": 0.716, "step": 14722 }, { "epoch": 1.9688419363466167, "grad_norm": 1.1887273788452148, "learning_rate": 5.5827250099816785e-06, "loss": 0.6311, "step": 14723 }, { "epoch": 1.9689756619416956, "grad_norm": 1.2655175924301147, "learning_rate": 5.581429799790234e-06, "loss": 0.6616, "step": 14724 }, { "epoch": 1.9691093875367747, "grad_norm": 1.19561767578125, "learning_rate": 5.580134681699657e-06, "loss": 0.6543, "step": 14725 }, { "epoch": 1.9692431131318533, "grad_norm": 1.42328679561615, "learning_rate": 5.578839655736943e-06, "loss": 0.6546, "step": 14726 }, { "epoch": 1.9693768387269324, "grad_norm": 1.1691291332244873, "learning_rate": 5.577544721929082e-06, "loss": 0.6298, "step": 14727 }, { "epoch": 1.9695105643220112, "grad_norm": 1.2448904514312744, "learning_rate": 5.5762498803030775e-06, "loss": 0.616, "step": 14728 }, { "epoch": 1.96964428991709, "grad_norm": 1.184157133102417, "learning_rate": 5.574955130885906e-06, "loss": 0.6898, "step": 14729 }, { "epoch": 1.9697780155121691, "grad_norm": 1.4148709774017334, "learning_rate": 5.573660473704562e-06, "loss": 0.7199, "step": 14730 }, { "epoch": 1.969911741107248, "grad_norm": 1.2132543325424194, "learning_rate": 5.572365908786029e-06, "loss": 0.6144, "step": 14731 }, { "epoch": 1.9700454667023268, "grad_norm": 1.2301424741744995, "learning_rate": 5.5710714361572915e-06, "loss": 0.6831, "step": 14732 }, { "epoch": 1.9701791922974057, "grad_norm": 1.3183348178863525, "learning_rate": 5.569777055845334e-06, "loss": 0.7309, "step": 14733 }, { "epoch": 1.9703129178924845, "grad_norm": 1.2518537044525146, "learning_rate": 5.568482767877132e-06, "loss": 0.6097, "step": 14734 }, { "epoch": 1.9704466434875636, "grad_norm": 1.1447440385818481, "learning_rate": 5.567188572279667e-06, "loss": 0.6439, "step": 14735 }, { "epoch": 1.9705803690826424, "grad_norm": 1.379079818725586, "learning_rate": 5.5658944690799155e-06, "loss": 0.7021, "step": 14736 }, { "epoch": 1.9707140946777213, "grad_norm": 1.2161476612091064, "learning_rate": 5.564600458304854e-06, "loss": 0.5934, "step": 14737 }, { "epoch": 1.9708478202728004, "grad_norm": 1.1162919998168945, "learning_rate": 5.563306539981443e-06, "loss": 0.6021, "step": 14738 }, { "epoch": 1.970981545867879, "grad_norm": 1.2312591075897217, "learning_rate": 5.562012714136667e-06, "loss": 0.6245, "step": 14739 }, { "epoch": 1.971115271462958, "grad_norm": 1.217163324356079, "learning_rate": 5.560718980797492e-06, "loss": 0.6366, "step": 14740 }, { "epoch": 1.971248997058037, "grad_norm": 1.269691824913025, "learning_rate": 5.559425339990876e-06, "loss": 0.6743, "step": 14741 }, { "epoch": 1.9713827226531158, "grad_norm": 1.1983734369277954, "learning_rate": 5.558131791743795e-06, "loss": 0.6685, "step": 14742 }, { "epoch": 1.9715164482481948, "grad_norm": 1.4491660594940186, "learning_rate": 5.5568383360832e-06, "loss": 0.8242, "step": 14743 }, { "epoch": 1.9716501738432735, "grad_norm": 1.262616515159607, "learning_rate": 5.555544973036067e-06, "loss": 0.6692, "step": 14744 }, { "epoch": 1.9717838994383525, "grad_norm": 1.3029879331588745, "learning_rate": 5.554251702629341e-06, "loss": 0.6851, "step": 14745 }, { "epoch": 1.9719176250334314, "grad_norm": 1.2365121841430664, "learning_rate": 5.55295852488998e-06, "loss": 0.6593, "step": 14746 }, { "epoch": 1.9720513506285102, "grad_norm": 1.3223450183868408, "learning_rate": 5.551665439844951e-06, "loss": 0.7862, "step": 14747 }, { "epoch": 1.9721850762235893, "grad_norm": 1.212733268737793, "learning_rate": 5.550372447521195e-06, "loss": 0.6303, "step": 14748 }, { "epoch": 1.9723188018186681, "grad_norm": 1.1284390687942505, "learning_rate": 5.549079547945669e-06, "loss": 0.6094, "step": 14749 }, { "epoch": 1.972452527413747, "grad_norm": 1.2222892045974731, "learning_rate": 5.54778674114532e-06, "loss": 0.6576, "step": 14750 }, { "epoch": 1.9725862530088258, "grad_norm": 1.393254280090332, "learning_rate": 5.5464940271470955e-06, "loss": 0.7018, "step": 14751 }, { "epoch": 1.9727199786039047, "grad_norm": 1.2778618335723877, "learning_rate": 5.5452014059779425e-06, "loss": 0.6703, "step": 14752 }, { "epoch": 1.9728537041989838, "grad_norm": 1.1624325513839722, "learning_rate": 5.5439088776648034e-06, "loss": 0.6344, "step": 14753 }, { "epoch": 1.9729874297940626, "grad_norm": 1.3268336057662964, "learning_rate": 5.542616442234618e-06, "loss": 0.744, "step": 14754 }, { "epoch": 1.9731211553891415, "grad_norm": 1.1663570404052734, "learning_rate": 5.541324099714329e-06, "loss": 0.6251, "step": 14755 }, { "epoch": 1.9732548809842205, "grad_norm": 1.077268362045288, "learning_rate": 5.5400318501308755e-06, "loss": 0.6068, "step": 14756 }, { "epoch": 1.9733886065792992, "grad_norm": 1.253084421157837, "learning_rate": 5.5387396935111834e-06, "loss": 0.6578, "step": 14757 }, { "epoch": 1.9735223321743782, "grad_norm": 1.3760347366333008, "learning_rate": 5.537447629882198e-06, "loss": 0.8143, "step": 14758 }, { "epoch": 1.973656057769457, "grad_norm": 1.219498872756958, "learning_rate": 5.536155659270846e-06, "loss": 0.7175, "step": 14759 }, { "epoch": 1.973789783364536, "grad_norm": 1.291443943977356, "learning_rate": 5.534863781704059e-06, "loss": 0.6802, "step": 14760 }, { "epoch": 1.973923508959615, "grad_norm": 1.2158610820770264, "learning_rate": 5.533571997208766e-06, "loss": 0.6179, "step": 14761 }, { "epoch": 1.9740572345546936, "grad_norm": 1.2621403932571411, "learning_rate": 5.532280305811883e-06, "loss": 0.6941, "step": 14762 }, { "epoch": 1.9741909601497727, "grad_norm": 1.195396065711975, "learning_rate": 5.53098870754035e-06, "loss": 0.6943, "step": 14763 }, { "epoch": 1.9743246857448515, "grad_norm": 1.2404215335845947, "learning_rate": 5.529697202421078e-06, "loss": 0.6528, "step": 14764 }, { "epoch": 1.9744584113399304, "grad_norm": 1.2457860708236694, "learning_rate": 5.5284057904809855e-06, "loss": 0.7154, "step": 14765 }, { "epoch": 1.9745921369350095, "grad_norm": 1.2416030168533325, "learning_rate": 5.527114471747004e-06, "loss": 0.6563, "step": 14766 }, { "epoch": 1.9747258625300883, "grad_norm": 1.2817267179489136, "learning_rate": 5.525823246246031e-06, "loss": 0.6922, "step": 14767 }, { "epoch": 1.9748595881251672, "grad_norm": 1.340245246887207, "learning_rate": 5.524532114005001e-06, "loss": 0.6941, "step": 14768 }, { "epoch": 1.974993313720246, "grad_norm": 1.265772819519043, "learning_rate": 5.523241075050813e-06, "loss": 0.6966, "step": 14769 }, { "epoch": 1.9751270393153249, "grad_norm": 1.3466570377349854, "learning_rate": 5.52195012941038e-06, "loss": 0.7058, "step": 14770 }, { "epoch": 1.975260764910404, "grad_norm": 1.3354321718215942, "learning_rate": 5.520659277110611e-06, "loss": 0.6437, "step": 14771 }, { "epoch": 1.9753944905054828, "grad_norm": 1.1858508586883545, "learning_rate": 5.519368518178414e-06, "loss": 0.7199, "step": 14772 }, { "epoch": 1.9755282161005616, "grad_norm": 1.2448784112930298, "learning_rate": 5.5180778526406935e-06, "loss": 0.6592, "step": 14773 }, { "epoch": 1.9756619416956407, "grad_norm": 1.2904632091522217, "learning_rate": 5.5167872805243505e-06, "loss": 0.6896, "step": 14774 }, { "epoch": 1.9757956672907193, "grad_norm": 1.241255521774292, "learning_rate": 5.515496801856287e-06, "loss": 0.5726, "step": 14775 }, { "epoch": 1.9759293928857984, "grad_norm": 1.1411305665969849, "learning_rate": 5.514206416663401e-06, "loss": 0.6089, "step": 14776 }, { "epoch": 1.9760631184808772, "grad_norm": 1.1187050342559814, "learning_rate": 5.512916124972589e-06, "loss": 0.6086, "step": 14777 }, { "epoch": 1.976196844075956, "grad_norm": 1.3878928422927856, "learning_rate": 5.511625926810749e-06, "loss": 0.7315, "step": 14778 }, { "epoch": 1.9763305696710352, "grad_norm": 1.0977685451507568, "learning_rate": 5.510335822204771e-06, "loss": 0.654, "step": 14779 }, { "epoch": 1.9764642952661138, "grad_norm": 1.2233381271362305, "learning_rate": 5.509045811181549e-06, "loss": 0.5597, "step": 14780 }, { "epoch": 1.9765980208611929, "grad_norm": 1.3761019706726074, "learning_rate": 5.507755893767963e-06, "loss": 0.6529, "step": 14781 }, { "epoch": 1.9767317464562717, "grad_norm": 1.2496211528778076, "learning_rate": 5.506466069990914e-06, "loss": 0.6308, "step": 14782 }, { "epoch": 1.9768654720513505, "grad_norm": 1.2499384880065918, "learning_rate": 5.505176339877273e-06, "loss": 0.6501, "step": 14783 }, { "epoch": 1.9769991976464296, "grad_norm": 1.2473065853118896, "learning_rate": 5.503886703453933e-06, "loss": 0.6659, "step": 14784 }, { "epoch": 1.9771329232415085, "grad_norm": 1.2127883434295654, "learning_rate": 5.502597160747778e-06, "loss": 0.6842, "step": 14785 }, { "epoch": 1.9772666488365873, "grad_norm": 1.3580031394958496, "learning_rate": 5.501307711785672e-06, "loss": 0.6791, "step": 14786 }, { "epoch": 1.9774003744316664, "grad_norm": 1.3264230489730835, "learning_rate": 5.5000183565945095e-06, "loss": 0.7295, "step": 14787 }, { "epoch": 1.977534100026745, "grad_norm": 1.2811975479125977, "learning_rate": 5.4987290952011514e-06, "loss": 0.5818, "step": 14788 }, { "epoch": 1.977667825621824, "grad_norm": 1.3609604835510254, "learning_rate": 5.497439927632486e-06, "loss": 0.6836, "step": 14789 }, { "epoch": 1.977801551216903, "grad_norm": 1.3904787302017212, "learning_rate": 5.4961508539153744e-06, "loss": 0.7534, "step": 14790 }, { "epoch": 1.9779352768119818, "grad_norm": 1.1995903253555298, "learning_rate": 5.494861874076682e-06, "loss": 0.6065, "step": 14791 }, { "epoch": 1.9780690024070609, "grad_norm": 1.2256760597229004, "learning_rate": 5.493572988143292e-06, "loss": 0.6627, "step": 14792 }, { "epoch": 1.9782027280021395, "grad_norm": 1.3130468130111694, "learning_rate": 5.492284196142057e-06, "loss": 0.655, "step": 14793 }, { "epoch": 1.9783364535972185, "grad_norm": 1.383592128753662, "learning_rate": 5.490995498099844e-06, "loss": 0.6497, "step": 14794 }, { "epoch": 1.9784701791922974, "grad_norm": 1.2162625789642334, "learning_rate": 5.489706894043516e-06, "loss": 0.7338, "step": 14795 }, { "epoch": 1.9786039047873762, "grad_norm": 1.4042145013809204, "learning_rate": 5.48841838399993e-06, "loss": 0.7157, "step": 14796 }, { "epoch": 1.9787376303824553, "grad_norm": 1.1914047002792358, "learning_rate": 5.487129967995948e-06, "loss": 0.6003, "step": 14797 }, { "epoch": 1.978871355977534, "grad_norm": 1.2774121761322021, "learning_rate": 5.485841646058423e-06, "loss": 0.6363, "step": 14798 }, { "epoch": 1.979005081572613, "grad_norm": 1.3322339057922363, "learning_rate": 5.484553418214208e-06, "loss": 0.7308, "step": 14799 }, { "epoch": 1.9791388071676919, "grad_norm": 1.2799160480499268, "learning_rate": 5.483265284490157e-06, "loss": 0.6417, "step": 14800 }, { "epoch": 1.9792725327627707, "grad_norm": 1.2640044689178467, "learning_rate": 5.481977244913124e-06, "loss": 0.6334, "step": 14801 }, { "epoch": 1.9794062583578498, "grad_norm": 1.1758586168289185, "learning_rate": 5.480689299509943e-06, "loss": 0.6986, "step": 14802 }, { "epoch": 1.9795399839529286, "grad_norm": 1.2020176649093628, "learning_rate": 5.479401448307473e-06, "loss": 0.6203, "step": 14803 }, { "epoch": 1.9796737095480075, "grad_norm": 1.2386404275894165, "learning_rate": 5.4781136913325535e-06, "loss": 0.6073, "step": 14804 }, { "epoch": 1.9798074351430865, "grad_norm": 1.4168819189071655, "learning_rate": 5.476826028612028e-06, "loss": 0.645, "step": 14805 }, { "epoch": 1.9799411607381652, "grad_norm": 1.450361967086792, "learning_rate": 5.47553846017274e-06, "loss": 0.6272, "step": 14806 }, { "epoch": 1.9800748863332442, "grad_norm": 1.3986823558807373, "learning_rate": 5.474250986041514e-06, "loss": 0.6878, "step": 14807 }, { "epoch": 1.980208611928323, "grad_norm": 1.2959864139556885, "learning_rate": 5.472963606245205e-06, "loss": 0.6541, "step": 14808 }, { "epoch": 1.980342337523402, "grad_norm": 1.452620506286621, "learning_rate": 5.471676320810633e-06, "loss": 0.7539, "step": 14809 }, { "epoch": 1.980476063118481, "grad_norm": 1.189140796661377, "learning_rate": 5.47038912976463e-06, "loss": 0.6901, "step": 14810 }, { "epoch": 1.9806097887135596, "grad_norm": 1.3543273210525513, "learning_rate": 5.469102033134042e-06, "loss": 0.7385, "step": 14811 }, { "epoch": 1.9807435143086387, "grad_norm": 1.3062105178833008, "learning_rate": 5.467815030945676e-06, "loss": 0.6865, "step": 14812 }, { "epoch": 1.9808772399037176, "grad_norm": 1.210077166557312, "learning_rate": 5.466528123226378e-06, "loss": 0.5618, "step": 14813 }, { "epoch": 1.9810109654987964, "grad_norm": 1.2392358779907227, "learning_rate": 5.465241310002959e-06, "loss": 0.6422, "step": 14814 }, { "epoch": 1.9811446910938755, "grad_norm": 1.2217706441879272, "learning_rate": 5.463954591302245e-06, "loss": 0.6629, "step": 14815 }, { "epoch": 1.981278416688954, "grad_norm": 1.1721479892730713, "learning_rate": 5.462667967151059e-06, "loss": 0.647, "step": 14816 }, { "epoch": 1.9814121422840332, "grad_norm": 1.3137712478637695, "learning_rate": 5.461381437576216e-06, "loss": 0.608, "step": 14817 }, { "epoch": 1.981545867879112, "grad_norm": 1.1816153526306152, "learning_rate": 5.460095002604533e-06, "loss": 0.6296, "step": 14818 }, { "epoch": 1.9816795934741909, "grad_norm": 1.509032130241394, "learning_rate": 5.458808662262826e-06, "loss": 0.7559, "step": 14819 }, { "epoch": 1.98181331906927, "grad_norm": 1.3604809045791626, "learning_rate": 5.4575224165779075e-06, "loss": 0.7419, "step": 14820 }, { "epoch": 1.9819470446643488, "grad_norm": 1.3283542394638062, "learning_rate": 5.456236265576589e-06, "loss": 0.659, "step": 14821 }, { "epoch": 1.9820807702594276, "grad_norm": 1.3595868349075317, "learning_rate": 5.454950209285676e-06, "loss": 0.6865, "step": 14822 }, { "epoch": 1.9822144958545067, "grad_norm": 1.1654398441314697, "learning_rate": 5.453664247731976e-06, "loss": 0.6198, "step": 14823 }, { "epoch": 1.9823482214495853, "grad_norm": 1.2143925428390503, "learning_rate": 5.452378380942296e-06, "loss": 0.6252, "step": 14824 }, { "epoch": 1.9824819470446644, "grad_norm": 1.405228614807129, "learning_rate": 5.45109260894344e-06, "loss": 0.7553, "step": 14825 }, { "epoch": 1.9826156726397433, "grad_norm": 1.2356865406036377, "learning_rate": 5.449806931762198e-06, "loss": 0.6689, "step": 14826 }, { "epoch": 1.982749398234822, "grad_norm": 1.3537129163742065, "learning_rate": 5.448521349425384e-06, "loss": 0.6512, "step": 14827 }, { "epoch": 1.9828831238299012, "grad_norm": 1.2031548023223877, "learning_rate": 5.4472358619597795e-06, "loss": 0.6803, "step": 14828 }, { "epoch": 1.9830168494249798, "grad_norm": 1.1823501586914062, "learning_rate": 5.445950469392191e-06, "loss": 0.6824, "step": 14829 }, { "epoch": 1.9831505750200589, "grad_norm": 1.3379372358322144, "learning_rate": 5.444665171749411e-06, "loss": 0.6957, "step": 14830 }, { "epoch": 1.9832843006151377, "grad_norm": 1.3275970220565796, "learning_rate": 5.44337996905822e-06, "loss": 0.6398, "step": 14831 }, { "epoch": 1.9834180262102166, "grad_norm": 1.230839490890503, "learning_rate": 5.442094861345419e-06, "loss": 0.6339, "step": 14832 }, { "epoch": 1.9835517518052956, "grad_norm": 1.278613805770874, "learning_rate": 5.440809848637787e-06, "loss": 0.6064, "step": 14833 }, { "epoch": 1.9836854774003745, "grad_norm": 1.1381815671920776, "learning_rate": 5.43952493096211e-06, "loss": 0.6236, "step": 14834 }, { "epoch": 1.9838192029954533, "grad_norm": 1.218299150466919, "learning_rate": 5.438240108345172e-06, "loss": 0.6195, "step": 14835 }, { "epoch": 1.9839529285905322, "grad_norm": 1.262445330619812, "learning_rate": 5.436955380813751e-06, "loss": 0.6985, "step": 14836 }, { "epoch": 1.984086654185611, "grad_norm": 1.302369236946106, "learning_rate": 5.435670748394635e-06, "loss": 0.644, "step": 14837 }, { "epoch": 1.98422037978069, "grad_norm": 1.18966543674469, "learning_rate": 5.434386211114592e-06, "loss": 0.6044, "step": 14838 }, { "epoch": 1.984354105375769, "grad_norm": 1.2184501886367798, "learning_rate": 5.433101769000399e-06, "loss": 0.6485, "step": 14839 }, { "epoch": 1.9844878309708478, "grad_norm": 1.1978563070297241, "learning_rate": 5.431817422078829e-06, "loss": 0.6575, "step": 14840 }, { "epoch": 1.9846215565659269, "grad_norm": 1.3085737228393555, "learning_rate": 5.430533170376655e-06, "loss": 0.7216, "step": 14841 }, { "epoch": 1.9847552821610055, "grad_norm": 1.2358304262161255, "learning_rate": 5.429249013920643e-06, "loss": 0.6817, "step": 14842 }, { "epoch": 1.9848890077560846, "grad_norm": 1.3739274740219116, "learning_rate": 5.4279649527375636e-06, "loss": 0.7368, "step": 14843 }, { "epoch": 1.9850227333511634, "grad_norm": 1.2815834283828735, "learning_rate": 5.426680986854178e-06, "loss": 0.6999, "step": 14844 }, { "epoch": 1.9851564589462423, "grad_norm": 1.0874109268188477, "learning_rate": 5.425397116297251e-06, "loss": 0.5736, "step": 14845 }, { "epoch": 1.9852901845413213, "grad_norm": 1.2515881061553955, "learning_rate": 5.424113341093548e-06, "loss": 0.6671, "step": 14846 }, { "epoch": 1.9854239101364, "grad_norm": 1.3371175527572632, "learning_rate": 5.422829661269816e-06, "loss": 0.6746, "step": 14847 }, { "epoch": 1.985557635731479, "grad_norm": 1.3259339332580566, "learning_rate": 5.421546076852824e-06, "loss": 0.6685, "step": 14848 }, { "epoch": 1.9856913613265579, "grad_norm": 1.3031377792358398, "learning_rate": 5.420262587869327e-06, "loss": 0.7014, "step": 14849 }, { "epoch": 1.9858250869216367, "grad_norm": 1.377580165863037, "learning_rate": 5.418979194346065e-06, "loss": 0.7865, "step": 14850 }, { "epoch": 1.9859588125167158, "grad_norm": 1.3640551567077637, "learning_rate": 5.417695896309807e-06, "loss": 0.71, "step": 14851 }, { "epoch": 1.9860925381117946, "grad_norm": 1.4848729372024536, "learning_rate": 5.4164126937872855e-06, "loss": 0.7335, "step": 14852 }, { "epoch": 1.9862262637068735, "grad_norm": 1.2218044996261597, "learning_rate": 5.415129586805264e-06, "loss": 0.6266, "step": 14853 }, { "epoch": 1.9863599893019523, "grad_norm": 1.5918920040130615, "learning_rate": 5.4138465753904735e-06, "loss": 0.8394, "step": 14854 }, { "epoch": 1.9864937148970312, "grad_norm": 1.2429171800613403, "learning_rate": 5.4125636595696585e-06, "loss": 0.6964, "step": 14855 }, { "epoch": 1.9866274404921103, "grad_norm": 1.303707480430603, "learning_rate": 5.411280839369574e-06, "loss": 0.6946, "step": 14856 }, { "epoch": 1.9867611660871891, "grad_norm": 1.2284363508224487, "learning_rate": 5.409998114816943e-06, "loss": 0.6214, "step": 14857 }, { "epoch": 1.986894891682268, "grad_norm": 1.5470633506774902, "learning_rate": 5.408715485938511e-06, "loss": 0.7401, "step": 14858 }, { "epoch": 1.987028617277347, "grad_norm": 1.3907623291015625, "learning_rate": 5.407432952761011e-06, "loss": 0.7107, "step": 14859 }, { "epoch": 1.9871623428724257, "grad_norm": 1.2641938924789429, "learning_rate": 5.406150515311177e-06, "loss": 0.6768, "step": 14860 }, { "epoch": 1.9872960684675047, "grad_norm": 1.2861313819885254, "learning_rate": 5.404868173615739e-06, "loss": 0.6201, "step": 14861 }, { "epoch": 1.9874297940625836, "grad_norm": 1.2859594821929932, "learning_rate": 5.403585927701427e-06, "loss": 0.6577, "step": 14862 }, { "epoch": 1.9875635196576624, "grad_norm": 1.1363396644592285, "learning_rate": 5.402303777594968e-06, "loss": 0.6407, "step": 14863 }, { "epoch": 1.9876972452527415, "grad_norm": 1.1651362180709839, "learning_rate": 5.401021723323088e-06, "loss": 0.6151, "step": 14864 }, { "epoch": 1.9878309708478201, "grad_norm": 1.205941915512085, "learning_rate": 5.399739764912513e-06, "loss": 0.5937, "step": 14865 }, { "epoch": 1.9879646964428992, "grad_norm": 1.287140965461731, "learning_rate": 5.398457902389952e-06, "loss": 0.6449, "step": 14866 }, { "epoch": 1.988098422037978, "grad_norm": 1.230329155921936, "learning_rate": 5.397176135782136e-06, "loss": 0.7012, "step": 14867 }, { "epoch": 1.988232147633057, "grad_norm": 1.1349071264266968, "learning_rate": 5.395894465115781e-06, "loss": 0.7155, "step": 14868 }, { "epoch": 1.988365873228136, "grad_norm": 1.3649296760559082, "learning_rate": 5.3946128904176e-06, "loss": 0.6827, "step": 14869 }, { "epoch": 1.9884995988232148, "grad_norm": 1.2703038454055786, "learning_rate": 5.393331411714309e-06, "loss": 0.7285, "step": 14870 }, { "epoch": 1.9886333244182937, "grad_norm": 1.2619279623031616, "learning_rate": 5.392050029032609e-06, "loss": 0.6675, "step": 14871 }, { "epoch": 1.9887670500133725, "grad_norm": 1.2635893821716309, "learning_rate": 5.390768742399226e-06, "loss": 0.6365, "step": 14872 }, { "epoch": 1.9889007756084514, "grad_norm": 1.3888193368911743, "learning_rate": 5.38948755184085e-06, "loss": 0.6993, "step": 14873 }, { "epoch": 1.9890345012035304, "grad_norm": 1.21269690990448, "learning_rate": 5.388206457384198e-06, "loss": 0.6206, "step": 14874 }, { "epoch": 1.9891682267986093, "grad_norm": 1.234320878982544, "learning_rate": 5.386925459055971e-06, "loss": 0.6093, "step": 14875 }, { "epoch": 1.9893019523936881, "grad_norm": 1.2551288604736328, "learning_rate": 5.385644556882863e-06, "loss": 0.6275, "step": 14876 }, { "epoch": 1.9894356779887672, "grad_norm": 1.2301275730133057, "learning_rate": 5.384363750891586e-06, "loss": 0.7088, "step": 14877 }, { "epoch": 1.9895694035838458, "grad_norm": 1.1993392705917358, "learning_rate": 5.383083041108827e-06, "loss": 0.6432, "step": 14878 }, { "epoch": 1.989703129178925, "grad_norm": 1.418544888496399, "learning_rate": 5.3818024275612825e-06, "loss": 0.7441, "step": 14879 }, { "epoch": 1.9898368547740037, "grad_norm": 1.2797012329101562, "learning_rate": 5.380521910275649e-06, "loss": 0.6576, "step": 14880 }, { "epoch": 1.9899705803690826, "grad_norm": 1.2851200103759766, "learning_rate": 5.379241489278615e-06, "loss": 0.6826, "step": 14881 }, { "epoch": 1.9901043059641617, "grad_norm": 1.3117268085479736, "learning_rate": 5.3779611645968696e-06, "loss": 0.6506, "step": 14882 }, { "epoch": 1.9902380315592403, "grad_norm": 1.235428810119629, "learning_rate": 5.376680936257102e-06, "loss": 0.6771, "step": 14883 }, { "epoch": 1.9903717571543194, "grad_norm": 1.2995370626449585, "learning_rate": 5.375400804285995e-06, "loss": 0.6154, "step": 14884 }, { "epoch": 1.9905054827493982, "grad_norm": 1.2795130014419556, "learning_rate": 5.3741207687102345e-06, "loss": 0.6909, "step": 14885 }, { "epoch": 1.990639208344477, "grad_norm": 1.3259156942367554, "learning_rate": 5.3728408295565e-06, "loss": 0.6616, "step": 14886 }, { "epoch": 1.9907729339395561, "grad_norm": 1.428341031074524, "learning_rate": 5.37156098685147e-06, "loss": 0.686, "step": 14887 }, { "epoch": 1.990906659534635, "grad_norm": 1.3400788307189941, "learning_rate": 5.370281240621823e-06, "loss": 0.7091, "step": 14888 }, { "epoch": 1.9910403851297138, "grad_norm": 1.2306163311004639, "learning_rate": 5.369001590894233e-06, "loss": 0.662, "step": 14889 }, { "epoch": 1.991174110724793, "grad_norm": 1.2499032020568848, "learning_rate": 5.367722037695373e-06, "loss": 0.6803, "step": 14890 }, { "epoch": 1.9913078363198715, "grad_norm": 1.3928347826004028, "learning_rate": 5.366442581051918e-06, "loss": 0.6526, "step": 14891 }, { "epoch": 1.9914415619149506, "grad_norm": 1.28783118724823, "learning_rate": 5.365163220990528e-06, "loss": 0.5943, "step": 14892 }, { "epoch": 1.9915752875100294, "grad_norm": 1.2658665180206299, "learning_rate": 5.3638839575378775e-06, "loss": 0.659, "step": 14893 }, { "epoch": 1.9917090131051083, "grad_norm": 1.265547275543213, "learning_rate": 5.3626047907206335e-06, "loss": 0.6672, "step": 14894 }, { "epoch": 1.9918427387001874, "grad_norm": 1.28840970993042, "learning_rate": 5.361325720565449e-06, "loss": 0.7005, "step": 14895 }, { "epoch": 1.991976464295266, "grad_norm": 1.5279120206832886, "learning_rate": 5.360046747098997e-06, "loss": 0.743, "step": 14896 }, { "epoch": 1.992110189890345, "grad_norm": 1.2827222347259521, "learning_rate": 5.358767870347924e-06, "loss": 0.6857, "step": 14897 }, { "epoch": 1.992243915485424, "grad_norm": 1.1647934913635254, "learning_rate": 5.357489090338901e-06, "loss": 0.7034, "step": 14898 }, { "epoch": 1.9923776410805027, "grad_norm": 1.4435542821884155, "learning_rate": 5.356210407098572e-06, "loss": 0.6631, "step": 14899 }, { "epoch": 1.9925113666755818, "grad_norm": 1.2394779920578003, "learning_rate": 5.354931820653593e-06, "loss": 0.5966, "step": 14900 }, { "epoch": 1.9926450922706604, "grad_norm": 1.2509815692901611, "learning_rate": 5.353653331030615e-06, "loss": 0.7604, "step": 14901 }, { "epoch": 1.9927788178657395, "grad_norm": 1.192550539970398, "learning_rate": 5.352374938256289e-06, "loss": 0.7249, "step": 14902 }, { "epoch": 1.9929125434608184, "grad_norm": 1.2485854625701904, "learning_rate": 5.351096642357259e-06, "loss": 0.6759, "step": 14903 }, { "epoch": 1.9930462690558972, "grad_norm": 1.384131908416748, "learning_rate": 5.3498184433601695e-06, "loss": 0.7277, "step": 14904 }, { "epoch": 1.9931799946509763, "grad_norm": 1.3492255210876465, "learning_rate": 5.348540341291666e-06, "loss": 0.7991, "step": 14905 }, { "epoch": 1.9933137202460551, "grad_norm": 1.3712635040283203, "learning_rate": 5.3472623361783896e-06, "loss": 0.6984, "step": 14906 }, { "epoch": 1.993447445841134, "grad_norm": 1.2760930061340332, "learning_rate": 5.345984428046976e-06, "loss": 0.6803, "step": 14907 }, { "epoch": 1.993581171436213, "grad_norm": 1.2948359251022339, "learning_rate": 5.344706616924062e-06, "loss": 0.6497, "step": 14908 }, { "epoch": 1.9937148970312917, "grad_norm": 1.233685851097107, "learning_rate": 5.343428902836287e-06, "loss": 0.618, "step": 14909 }, { "epoch": 1.9938486226263707, "grad_norm": 1.3347445726394653, "learning_rate": 5.342151285810283e-06, "loss": 0.6374, "step": 14910 }, { "epoch": 1.9939823482214496, "grad_norm": 1.4459260702133179, "learning_rate": 5.340873765872671e-06, "loss": 0.743, "step": 14911 }, { "epoch": 1.9941160738165284, "grad_norm": 1.2689650058746338, "learning_rate": 5.339596343050091e-06, "loss": 0.7001, "step": 14912 }, { "epoch": 1.9942497994116075, "grad_norm": 1.2226638793945312, "learning_rate": 5.338319017369165e-06, "loss": 0.7303, "step": 14913 }, { "epoch": 1.9943835250066861, "grad_norm": 1.3546885251998901, "learning_rate": 5.337041788856518e-06, "loss": 0.6491, "step": 14914 }, { "epoch": 1.9945172506017652, "grad_norm": 1.4291191101074219, "learning_rate": 5.335764657538779e-06, "loss": 0.6996, "step": 14915 }, { "epoch": 1.994650976196844, "grad_norm": 1.2857189178466797, "learning_rate": 5.3344876234425536e-06, "loss": 0.6476, "step": 14916 }, { "epoch": 1.994784701791923, "grad_norm": 1.2670665979385376, "learning_rate": 5.3332106865944766e-06, "loss": 0.7331, "step": 14917 }, { "epoch": 1.994918427387002, "grad_norm": 1.2989870309829712, "learning_rate": 5.331933847021153e-06, "loss": 0.7309, "step": 14918 }, { "epoch": 1.9950521529820806, "grad_norm": 1.2674791812896729, "learning_rate": 5.330657104749203e-06, "loss": 0.667, "step": 14919 }, { "epoch": 1.9951858785771597, "grad_norm": 1.1993584632873535, "learning_rate": 5.329380459805237e-06, "loss": 0.5557, "step": 14920 }, { "epoch": 1.9953196041722385, "grad_norm": 1.2351102828979492, "learning_rate": 5.328103912215861e-06, "loss": 0.6876, "step": 14921 }, { "epoch": 1.9954533297673174, "grad_norm": 1.303617000579834, "learning_rate": 5.326827462007697e-06, "loss": 0.6627, "step": 14922 }, { "epoch": 1.9955870553623964, "grad_norm": 1.3228851556777954, "learning_rate": 5.32555110920734e-06, "loss": 0.6713, "step": 14923 }, { "epoch": 1.9957207809574753, "grad_norm": 1.2968569993972778, "learning_rate": 5.324274853841396e-06, "loss": 0.6675, "step": 14924 }, { "epoch": 1.9958545065525541, "grad_norm": 1.4967141151428223, "learning_rate": 5.3229986959364675e-06, "loss": 0.8057, "step": 14925 }, { "epoch": 1.9959882321476332, "grad_norm": 1.2397456169128418, "learning_rate": 5.321722635519158e-06, "loss": 0.681, "step": 14926 }, { "epoch": 1.9961219577427118, "grad_norm": 1.261259913444519, "learning_rate": 5.320446672616062e-06, "loss": 0.6317, "step": 14927 }, { "epoch": 1.996255683337791, "grad_norm": 1.3671162128448486, "learning_rate": 5.319170807253777e-06, "loss": 0.7377, "step": 14928 }, { "epoch": 1.9963894089328698, "grad_norm": 1.292900562286377, "learning_rate": 5.317895039458899e-06, "loss": 0.6293, "step": 14929 }, { "epoch": 1.9965231345279486, "grad_norm": 1.154637098312378, "learning_rate": 5.316619369258018e-06, "loss": 0.6445, "step": 14930 }, { "epoch": 1.9966568601230277, "grad_norm": 1.3124198913574219, "learning_rate": 5.315343796677724e-06, "loss": 0.6586, "step": 14931 }, { "epoch": 1.9967905857181063, "grad_norm": 1.4545519351959229, "learning_rate": 5.314068321744607e-06, "loss": 0.7663, "step": 14932 }, { "epoch": 1.9969243113131854, "grad_norm": 1.226535439491272, "learning_rate": 5.312792944485251e-06, "loss": 0.7021, "step": 14933 }, { "epoch": 1.9970580369082642, "grad_norm": 1.2827050685882568, "learning_rate": 5.3115176649262445e-06, "loss": 0.6182, "step": 14934 }, { "epoch": 1.997191762503343, "grad_norm": 1.33535897731781, "learning_rate": 5.310242483094159e-06, "loss": 0.6808, "step": 14935 }, { "epoch": 1.9973254880984221, "grad_norm": 1.3951321840286255, "learning_rate": 5.308967399015589e-06, "loss": 0.6582, "step": 14936 }, { "epoch": 1.997459213693501, "grad_norm": 1.1756454706192017, "learning_rate": 5.3076924127170956e-06, "loss": 0.6532, "step": 14937 }, { "epoch": 1.9975929392885798, "grad_norm": 1.3676700592041016, "learning_rate": 5.3064175242252694e-06, "loss": 0.6959, "step": 14938 }, { "epoch": 1.9977266648836587, "grad_norm": 1.1414225101470947, "learning_rate": 5.305142733566681e-06, "loss": 0.6707, "step": 14939 }, { "epoch": 1.9978603904787375, "grad_norm": 1.3546677827835083, "learning_rate": 5.303868040767894e-06, "loss": 0.7027, "step": 14940 }, { "epoch": 1.9979941160738166, "grad_norm": 1.2713035345077515, "learning_rate": 5.30259344585549e-06, "loss": 0.6753, "step": 14941 }, { "epoch": 1.9981278416688955, "grad_norm": 1.209706425666809, "learning_rate": 5.301318948856029e-06, "loss": 0.647, "step": 14942 }, { "epoch": 1.9982615672639743, "grad_norm": 1.2872382402420044, "learning_rate": 5.300044549796076e-06, "loss": 0.7555, "step": 14943 }, { "epoch": 1.9983952928590534, "grad_norm": 1.3546504974365234, "learning_rate": 5.298770248702198e-06, "loss": 0.6505, "step": 14944 }, { "epoch": 1.998529018454132, "grad_norm": 1.2530288696289062, "learning_rate": 5.297496045600956e-06, "loss": 0.6236, "step": 14945 }, { "epoch": 1.998662744049211, "grad_norm": 1.3733775615692139, "learning_rate": 5.296221940518908e-06, "loss": 0.7452, "step": 14946 }, { "epoch": 1.99879646964429, "grad_norm": 1.3594934940338135, "learning_rate": 5.294947933482612e-06, "loss": 0.7002, "step": 14947 }, { "epoch": 1.9989301952393688, "grad_norm": 1.2966232299804688, "learning_rate": 5.293674024518627e-06, "loss": 0.6553, "step": 14948 }, { "epoch": 1.9990639208344478, "grad_norm": 1.2556822299957275, "learning_rate": 5.292400213653501e-06, "loss": 0.6879, "step": 14949 }, { "epoch": 1.9991976464295265, "grad_norm": 1.1856272220611572, "learning_rate": 5.291126500913788e-06, "loss": 0.7289, "step": 14950 }, { "epoch": 1.9993313720246055, "grad_norm": 1.301573634147644, "learning_rate": 5.289852886326039e-06, "loss": 0.6823, "step": 14951 }, { "epoch": 1.9994650976196844, "grad_norm": 1.2661771774291992, "learning_rate": 5.288579369916798e-06, "loss": 0.6418, "step": 14952 }, { "epoch": 1.9995988232147632, "grad_norm": 1.390884280204773, "learning_rate": 5.287305951712612e-06, "loss": 0.5722, "step": 14953 }, { "epoch": 1.9997325488098423, "grad_norm": 1.2112319469451904, "learning_rate": 5.286032631740023e-06, "loss": 0.6523, "step": 14954 }, { "epoch": 1.9998662744049212, "grad_norm": 1.3114471435546875, "learning_rate": 5.284759410025578e-06, "loss": 0.677, "step": 14955 }, { "epoch": 2.0, "grad_norm": 1.050843358039856, "learning_rate": 5.283486286595804e-06, "loss": 0.4815, "step": 14956 }, { "epoch": 2.000133725595079, "grad_norm": 1.055700421333313, "learning_rate": 5.282213261477247e-06, "loss": 0.455, "step": 14957 }, { "epoch": 2.0002674511901577, "grad_norm": 0.974917471408844, "learning_rate": 5.280940334696442e-06, "loss": 0.4176, "step": 14958 }, { "epoch": 2.0004011767852368, "grad_norm": 1.0393953323364258, "learning_rate": 5.27966750627992e-06, "loss": 0.4543, "step": 14959 }, { "epoch": 2.0005349023803154, "grad_norm": 1.2081223726272583, "learning_rate": 5.278394776254214e-06, "loss": 0.456, "step": 14960 }, { "epoch": 2.0006686279753945, "grad_norm": 1.1563613414764404, "learning_rate": 5.2771221446458445e-06, "loss": 0.4558, "step": 14961 }, { "epoch": 2.0008023535704735, "grad_norm": 1.101528286933899, "learning_rate": 5.275849611481352e-06, "loss": 0.4938, "step": 14962 }, { "epoch": 2.000936079165552, "grad_norm": 1.0396020412445068, "learning_rate": 5.27457717678725e-06, "loss": 0.4511, "step": 14963 }, { "epoch": 2.0010698047606312, "grad_norm": 1.2446961402893066, "learning_rate": 5.273304840590066e-06, "loss": 0.455, "step": 14964 }, { "epoch": 2.0012035303557103, "grad_norm": 1.0518479347229004, "learning_rate": 5.272032602916317e-06, "loss": 0.4274, "step": 14965 }, { "epoch": 2.001337255950789, "grad_norm": 1.1066879034042358, "learning_rate": 5.270760463792523e-06, "loss": 0.4736, "step": 14966 }, { "epoch": 2.001470981545868, "grad_norm": 1.1960071325302124, "learning_rate": 5.2694884232452086e-06, "loss": 0.4654, "step": 14967 }, { "epoch": 2.0016047071409466, "grad_norm": 1.0354878902435303, "learning_rate": 5.268216481300876e-06, "loss": 0.423, "step": 14968 }, { "epoch": 2.0017384327360257, "grad_norm": 1.1417587995529175, "learning_rate": 5.266944637986046e-06, "loss": 0.4263, "step": 14969 }, { "epoch": 2.0018721583311048, "grad_norm": 1.0643304586410522, "learning_rate": 5.265672893327224e-06, "loss": 0.4161, "step": 14970 }, { "epoch": 2.0020058839261834, "grad_norm": 1.1227037906646729, "learning_rate": 5.264401247350921e-06, "loss": 0.4201, "step": 14971 }, { "epoch": 2.0021396095212625, "grad_norm": 1.253049373626709, "learning_rate": 5.263129700083642e-06, "loss": 0.4266, "step": 14972 }, { "epoch": 2.002273335116341, "grad_norm": 1.1163498163223267, "learning_rate": 5.261858251551893e-06, "loss": 0.4141, "step": 14973 }, { "epoch": 2.00240706071142, "grad_norm": 1.1221153736114502, "learning_rate": 5.260586901782172e-06, "loss": 0.4066, "step": 14974 }, { "epoch": 2.0025407863064992, "grad_norm": 1.1791040897369385, "learning_rate": 5.2593156508009844e-06, "loss": 0.4544, "step": 14975 }, { "epoch": 2.002674511901578, "grad_norm": 1.2274538278579712, "learning_rate": 5.258044498634825e-06, "loss": 0.4169, "step": 14976 }, { "epoch": 2.002808237496657, "grad_norm": 1.2820711135864258, "learning_rate": 5.256773445310191e-06, "loss": 0.4114, "step": 14977 }, { "epoch": 2.0029419630917356, "grad_norm": 1.4803552627563477, "learning_rate": 5.255502490853575e-06, "loss": 0.4383, "step": 14978 }, { "epoch": 2.0030756886868146, "grad_norm": 1.3164548873901367, "learning_rate": 5.2542316352914735e-06, "loss": 0.4196, "step": 14979 }, { "epoch": 2.0032094142818937, "grad_norm": 1.2315852642059326, "learning_rate": 5.252960878650364e-06, "loss": 0.4117, "step": 14980 }, { "epoch": 2.0033431398769723, "grad_norm": 1.208964467048645, "learning_rate": 5.251690220956751e-06, "loss": 0.3659, "step": 14981 }, { "epoch": 2.0034768654720514, "grad_norm": 1.4122995138168335, "learning_rate": 5.250419662237104e-06, "loss": 0.4023, "step": 14982 }, { "epoch": 2.0036105910671305, "grad_norm": 1.3758465051651, "learning_rate": 5.249149202517922e-06, "loss": 0.4082, "step": 14983 }, { "epoch": 2.003744316662209, "grad_norm": 1.3981959819793701, "learning_rate": 5.247878841825676e-06, "loss": 0.4118, "step": 14984 }, { "epoch": 2.003878042257288, "grad_norm": 1.4900596141815186, "learning_rate": 5.246608580186843e-06, "loss": 0.443, "step": 14985 }, { "epoch": 2.004011767852367, "grad_norm": 1.3102511167526245, "learning_rate": 5.2453384176279135e-06, "loss": 0.3698, "step": 14986 }, { "epoch": 2.004145493447446, "grad_norm": 1.2923847436904907, "learning_rate": 5.244068354175352e-06, "loss": 0.3596, "step": 14987 }, { "epoch": 2.004279219042525, "grad_norm": 1.3789196014404297, "learning_rate": 5.242798389855634e-06, "loss": 0.3656, "step": 14988 }, { "epoch": 2.0044129446376036, "grad_norm": 1.4346433877944946, "learning_rate": 5.2415285246952305e-06, "loss": 0.4069, "step": 14989 }, { "epoch": 2.0045466702326826, "grad_norm": 1.5303571224212646, "learning_rate": 5.2402587587206134e-06, "loss": 0.4206, "step": 14990 }, { "epoch": 2.0046803958277613, "grad_norm": 1.2710036039352417, "learning_rate": 5.238989091958246e-06, "loss": 0.3709, "step": 14991 }, { "epoch": 2.0048141214228403, "grad_norm": 1.3869820833206177, "learning_rate": 5.2377195244345965e-06, "loss": 0.4041, "step": 14992 }, { "epoch": 2.0049478470179194, "grad_norm": 1.4926518201828003, "learning_rate": 5.236450056176127e-06, "loss": 0.4351, "step": 14993 }, { "epoch": 2.005081572612998, "grad_norm": 1.495334506034851, "learning_rate": 5.235180687209296e-06, "loss": 0.4313, "step": 14994 }, { "epoch": 2.005215298208077, "grad_norm": 1.3399914503097534, "learning_rate": 5.233911417560567e-06, "loss": 0.3925, "step": 14995 }, { "epoch": 2.0053490238031557, "grad_norm": 1.5036503076553345, "learning_rate": 5.232642247256391e-06, "loss": 0.3731, "step": 14996 }, { "epoch": 2.005482749398235, "grad_norm": 1.3649390935897827, "learning_rate": 5.231373176323227e-06, "loss": 0.3917, "step": 14997 }, { "epoch": 2.005616474993314, "grad_norm": 1.3976646661758423, "learning_rate": 5.230104204787525e-06, "loss": 0.433, "step": 14998 }, { "epoch": 2.0057502005883925, "grad_norm": 1.3757050037384033, "learning_rate": 5.228835332675737e-06, "loss": 0.3808, "step": 14999 }, { "epoch": 2.0058839261834716, "grad_norm": 1.4187084436416626, "learning_rate": 5.227566560014315e-06, "loss": 0.3927, "step": 15000 }, { "epoch": 2.0060176517785506, "grad_norm": 1.1158243417739868, "learning_rate": 5.226297886829695e-06, "loss": 0.3527, "step": 15001 }, { "epoch": 2.0061513773736293, "grad_norm": 1.4159162044525146, "learning_rate": 5.225029313148333e-06, "loss": 0.4278, "step": 15002 }, { "epoch": 2.0062851029687083, "grad_norm": 1.4480334520339966, "learning_rate": 5.223760838996663e-06, "loss": 0.4133, "step": 15003 }, { "epoch": 2.006418828563787, "grad_norm": 1.2829251289367676, "learning_rate": 5.222492464401124e-06, "loss": 0.3787, "step": 15004 }, { "epoch": 2.006552554158866, "grad_norm": 1.2952005863189697, "learning_rate": 5.221224189388165e-06, "loss": 0.4105, "step": 15005 }, { "epoch": 2.006686279753945, "grad_norm": 1.4485445022583008, "learning_rate": 5.219956013984209e-06, "loss": 0.3936, "step": 15006 }, { "epoch": 2.0068200053490237, "grad_norm": 1.0720840692520142, "learning_rate": 5.218687938215702e-06, "loss": 0.3392, "step": 15007 }, { "epoch": 2.006953730944103, "grad_norm": 1.2559359073638916, "learning_rate": 5.217419962109067e-06, "loss": 0.401, "step": 15008 }, { "epoch": 2.0070874565391814, "grad_norm": 1.3435348272323608, "learning_rate": 5.216152085690736e-06, "loss": 0.4332, "step": 15009 }, { "epoch": 2.0072211821342605, "grad_norm": 1.2701259851455688, "learning_rate": 5.214884308987136e-06, "loss": 0.3864, "step": 15010 }, { "epoch": 2.0073549077293396, "grad_norm": 1.2609282732009888, "learning_rate": 5.213616632024695e-06, "loss": 0.3922, "step": 15011 }, { "epoch": 2.007488633324418, "grad_norm": 1.2839031219482422, "learning_rate": 5.212349054829835e-06, "loss": 0.3634, "step": 15012 }, { "epoch": 2.0076223589194973, "grad_norm": 1.4811216592788696, "learning_rate": 5.211081577428978e-06, "loss": 0.447, "step": 15013 }, { "epoch": 2.007756084514576, "grad_norm": 1.2900382280349731, "learning_rate": 5.2098141998485415e-06, "loss": 0.3676, "step": 15014 }, { "epoch": 2.007889810109655, "grad_norm": 1.4951683282852173, "learning_rate": 5.2085469221149465e-06, "loss": 0.4176, "step": 15015 }, { "epoch": 2.008023535704734, "grad_norm": 1.4090937376022339, "learning_rate": 5.207279744254605e-06, "loss": 0.4075, "step": 15016 }, { "epoch": 2.0081572612998126, "grad_norm": 1.286105751991272, "learning_rate": 5.206012666293931e-06, "loss": 0.3766, "step": 15017 }, { "epoch": 2.0082909868948917, "grad_norm": 1.4553841352462769, "learning_rate": 5.204745688259336e-06, "loss": 0.4432, "step": 15018 }, { "epoch": 2.008424712489971, "grad_norm": 1.419102430343628, "learning_rate": 5.203478810177232e-06, "loss": 0.4203, "step": 15019 }, { "epoch": 2.0085584380850494, "grad_norm": 1.311758279800415, "learning_rate": 5.202212032074014e-06, "loss": 0.3801, "step": 15020 }, { "epoch": 2.0086921636801285, "grad_norm": 1.3151917457580566, "learning_rate": 5.200945353976103e-06, "loss": 0.34, "step": 15021 }, { "epoch": 2.008825889275207, "grad_norm": 1.1964595317840576, "learning_rate": 5.199678775909889e-06, "loss": 0.3734, "step": 15022 }, { "epoch": 2.008959614870286, "grad_norm": 1.3623753786087036, "learning_rate": 5.1984122979017785e-06, "loss": 0.3894, "step": 15023 }, { "epoch": 2.0090933404653653, "grad_norm": 1.955280065536499, "learning_rate": 5.197145919978172e-06, "loss": 0.3986, "step": 15024 }, { "epoch": 2.009227066060444, "grad_norm": 1.5207499265670776, "learning_rate": 5.195879642165458e-06, "loss": 0.3948, "step": 15025 }, { "epoch": 2.009360791655523, "grad_norm": 1.4051802158355713, "learning_rate": 5.194613464490042e-06, "loss": 0.3861, "step": 15026 }, { "epoch": 2.0094945172506016, "grad_norm": 1.5008959770202637, "learning_rate": 5.193347386978307e-06, "loss": 0.4041, "step": 15027 }, { "epoch": 2.0096282428456806, "grad_norm": 1.4927411079406738, "learning_rate": 5.192081409656647e-06, "loss": 0.4146, "step": 15028 }, { "epoch": 2.0097619684407597, "grad_norm": 1.3970462083816528, "learning_rate": 5.190815532551448e-06, "loss": 0.3855, "step": 15029 }, { "epoch": 2.0098956940358383, "grad_norm": 1.30995512008667, "learning_rate": 5.189549755689094e-06, "loss": 0.3789, "step": 15030 }, { "epoch": 2.0100294196309174, "grad_norm": 1.4284340143203735, "learning_rate": 5.1882840790959785e-06, "loss": 0.3917, "step": 15031 }, { "epoch": 2.010163145225996, "grad_norm": 1.3816999197006226, "learning_rate": 5.187018502798475e-06, "loss": 0.3771, "step": 15032 }, { "epoch": 2.010296870821075, "grad_norm": 1.4078904390335083, "learning_rate": 5.185753026822964e-06, "loss": 0.4026, "step": 15033 }, { "epoch": 2.010430596416154, "grad_norm": 1.5465421676635742, "learning_rate": 5.184487651195825e-06, "loss": 0.4271, "step": 15034 }, { "epoch": 2.010564322011233, "grad_norm": 1.3871711492538452, "learning_rate": 5.183222375943433e-06, "loss": 0.3976, "step": 15035 }, { "epoch": 2.010698047606312, "grad_norm": 1.3923689126968384, "learning_rate": 5.181957201092163e-06, "loss": 0.4013, "step": 15036 }, { "epoch": 2.010831773201391, "grad_norm": 1.4931987524032593, "learning_rate": 5.180692126668383e-06, "loss": 0.3975, "step": 15037 }, { "epoch": 2.0109654987964696, "grad_norm": 1.36726713180542, "learning_rate": 5.179427152698464e-06, "loss": 0.3377, "step": 15038 }, { "epoch": 2.0110992243915486, "grad_norm": 1.2424111366271973, "learning_rate": 5.178162279208774e-06, "loss": 0.3712, "step": 15039 }, { "epoch": 2.0112329499866273, "grad_norm": 1.4048572778701782, "learning_rate": 5.176897506225675e-06, "loss": 0.3726, "step": 15040 }, { "epoch": 2.0113666755817063, "grad_norm": 1.5942720174789429, "learning_rate": 5.175632833775535e-06, "loss": 0.4217, "step": 15041 }, { "epoch": 2.0115004011767854, "grad_norm": 1.4521111249923706, "learning_rate": 5.1743682618847114e-06, "loss": 0.4482, "step": 15042 }, { "epoch": 2.011634126771864, "grad_norm": 1.3126685619354248, "learning_rate": 5.173103790579564e-06, "loss": 0.3804, "step": 15043 }, { "epoch": 2.011767852366943, "grad_norm": 1.5449199676513672, "learning_rate": 5.171839419886449e-06, "loss": 0.4443, "step": 15044 }, { "epoch": 2.0119015779620217, "grad_norm": 1.2719964981079102, "learning_rate": 5.170575149831725e-06, "loss": 0.3859, "step": 15045 }, { "epoch": 2.012035303557101, "grad_norm": 1.4632441997528076, "learning_rate": 5.169310980441732e-06, "loss": 0.3851, "step": 15046 }, { "epoch": 2.01216902915218, "grad_norm": 1.3705885410308838, "learning_rate": 5.168046911742838e-06, "loss": 0.4124, "step": 15047 }, { "epoch": 2.0123027547472585, "grad_norm": 1.4565726518630981, "learning_rate": 5.166782943761378e-06, "loss": 0.4218, "step": 15048 }, { "epoch": 2.0124364803423376, "grad_norm": 1.4194146394729614, "learning_rate": 5.165519076523699e-06, "loss": 0.4291, "step": 15049 }, { "epoch": 2.012570205937416, "grad_norm": 1.4706757068634033, "learning_rate": 5.164255310056156e-06, "loss": 0.354, "step": 15050 }, { "epoch": 2.0127039315324953, "grad_norm": 1.5115705728530884, "learning_rate": 5.162991644385078e-06, "loss": 0.4107, "step": 15051 }, { "epoch": 2.0128376571275743, "grad_norm": 1.493889570236206, "learning_rate": 5.161728079536816e-06, "loss": 0.412, "step": 15052 }, { "epoch": 2.012971382722653, "grad_norm": 1.5621106624603271, "learning_rate": 5.1604646155377e-06, "loss": 0.4375, "step": 15053 }, { "epoch": 2.013105108317732, "grad_norm": 1.4189461469650269, "learning_rate": 5.159201252414067e-06, "loss": 0.3783, "step": 15054 }, { "epoch": 2.013238833912811, "grad_norm": 1.5454238653182983, "learning_rate": 5.157937990192255e-06, "loss": 0.4137, "step": 15055 }, { "epoch": 2.0133725595078897, "grad_norm": 1.3968615531921387, "learning_rate": 5.156674828898589e-06, "loss": 0.3953, "step": 15056 }, { "epoch": 2.013506285102969, "grad_norm": 1.3716275691986084, "learning_rate": 5.155411768559402e-06, "loss": 0.3713, "step": 15057 }, { "epoch": 2.0136400106980474, "grad_norm": 1.471168875694275, "learning_rate": 5.154148809201022e-06, "loss": 0.3872, "step": 15058 }, { "epoch": 2.0137737362931265, "grad_norm": 1.5357636213302612, "learning_rate": 5.152885950849772e-06, "loss": 0.3968, "step": 15059 }, { "epoch": 2.0139074618882056, "grad_norm": 1.3124295473098755, "learning_rate": 5.151623193531976e-06, "loss": 0.3725, "step": 15060 }, { "epoch": 2.014041187483284, "grad_norm": 1.2504169940948486, "learning_rate": 5.150360537273956e-06, "loss": 0.3893, "step": 15061 }, { "epoch": 2.0141749130783633, "grad_norm": 1.4942512512207031, "learning_rate": 5.14909798210203e-06, "loss": 0.431, "step": 15062 }, { "epoch": 2.014308638673442, "grad_norm": 1.5264604091644287, "learning_rate": 5.147835528042515e-06, "loss": 0.4108, "step": 15063 }, { "epoch": 2.014442364268521, "grad_norm": 1.3152052164077759, "learning_rate": 5.1465731751217286e-06, "loss": 0.3496, "step": 15064 }, { "epoch": 2.0145760898636, "grad_norm": 1.823743224143982, "learning_rate": 5.145310923365973e-06, "loss": 0.4622, "step": 15065 }, { "epoch": 2.0147098154586787, "grad_norm": 1.6228814125061035, "learning_rate": 5.144048772801573e-06, "loss": 0.4324, "step": 15066 }, { "epoch": 2.0148435410537577, "grad_norm": 1.441481351852417, "learning_rate": 5.142786723454822e-06, "loss": 0.4244, "step": 15067 }, { "epoch": 2.014977266648837, "grad_norm": 1.5807491540908813, "learning_rate": 5.141524775352038e-06, "loss": 0.468, "step": 15068 }, { "epoch": 2.0151109922439154, "grad_norm": 1.5947625637054443, "learning_rate": 5.140262928519524e-06, "loss": 0.3984, "step": 15069 }, { "epoch": 2.0152447178389945, "grad_norm": 1.3195481300354004, "learning_rate": 5.139001182983572e-06, "loss": 0.404, "step": 15070 }, { "epoch": 2.015378443434073, "grad_norm": 1.1909089088439941, "learning_rate": 5.137739538770497e-06, "loss": 0.3756, "step": 15071 }, { "epoch": 2.015512169029152, "grad_norm": 1.643643856048584, "learning_rate": 5.136477995906583e-06, "loss": 0.441, "step": 15072 }, { "epoch": 2.0156458946242313, "grad_norm": 1.382297396659851, "learning_rate": 5.1352165544181345e-06, "loss": 0.407, "step": 15073 }, { "epoch": 2.01577962021931, "grad_norm": 1.5611246824264526, "learning_rate": 5.133955214331439e-06, "loss": 0.3946, "step": 15074 }, { "epoch": 2.015913345814389, "grad_norm": 1.4899537563323975, "learning_rate": 5.132693975672788e-06, "loss": 0.4056, "step": 15075 }, { "epoch": 2.0160470714094676, "grad_norm": 1.2148845195770264, "learning_rate": 5.131432838468482e-06, "loss": 0.3168, "step": 15076 }, { "epoch": 2.0161807970045467, "grad_norm": 1.3161561489105225, "learning_rate": 5.130171802744795e-06, "loss": 0.3733, "step": 15077 }, { "epoch": 2.0163145225996257, "grad_norm": 1.4447343349456787, "learning_rate": 5.128910868528017e-06, "loss": 0.3838, "step": 15078 }, { "epoch": 2.0164482481947044, "grad_norm": 1.3097301721572876, "learning_rate": 5.127650035844429e-06, "loss": 0.4112, "step": 15079 }, { "epoch": 2.0165819737897834, "grad_norm": 1.4044361114501953, "learning_rate": 5.126389304720316e-06, "loss": 0.374, "step": 15080 }, { "epoch": 2.016715699384862, "grad_norm": 1.3084218502044678, "learning_rate": 5.125128675181954e-06, "loss": 0.3568, "step": 15081 }, { "epoch": 2.016849424979941, "grad_norm": 1.3356610536575317, "learning_rate": 5.123868147255619e-06, "loss": 0.4064, "step": 15082 }, { "epoch": 2.01698315057502, "grad_norm": 1.3402231931686401, "learning_rate": 5.122607720967588e-06, "loss": 0.3645, "step": 15083 }, { "epoch": 2.017116876170099, "grad_norm": 1.5002535581588745, "learning_rate": 5.121347396344132e-06, "loss": 0.373, "step": 15084 }, { "epoch": 2.017250601765178, "grad_norm": 1.558423399925232, "learning_rate": 5.120087173411523e-06, "loss": 0.4311, "step": 15085 }, { "epoch": 2.017384327360257, "grad_norm": 1.3377227783203125, "learning_rate": 5.1188270521960215e-06, "loss": 0.3741, "step": 15086 }, { "epoch": 2.0175180529553356, "grad_norm": 1.5051350593566895, "learning_rate": 5.117567032723902e-06, "loss": 0.4075, "step": 15087 }, { "epoch": 2.0176517785504147, "grad_norm": 1.3303165435791016, "learning_rate": 5.116307115021431e-06, "loss": 0.3654, "step": 15088 }, { "epoch": 2.0177855041454933, "grad_norm": 1.4619866609573364, "learning_rate": 5.115047299114856e-06, "loss": 0.3862, "step": 15089 }, { "epoch": 2.0179192297405724, "grad_norm": 1.3716658353805542, "learning_rate": 5.1137875850304545e-06, "loss": 0.3797, "step": 15090 }, { "epoch": 2.0180529553356514, "grad_norm": 1.338131308555603, "learning_rate": 5.112527972794465e-06, "loss": 0.3506, "step": 15091 }, { "epoch": 2.01818668093073, "grad_norm": 1.294083833694458, "learning_rate": 5.111268462433163e-06, "loss": 0.3564, "step": 15092 }, { "epoch": 2.018320406525809, "grad_norm": 1.4195901155471802, "learning_rate": 5.1100090539727884e-06, "loss": 0.3472, "step": 15093 }, { "epoch": 2.0184541321208878, "grad_norm": 1.349548101425171, "learning_rate": 5.108749747439591e-06, "loss": 0.4129, "step": 15094 }, { "epoch": 2.018587857715967, "grad_norm": 1.545592188835144, "learning_rate": 5.107490542859832e-06, "loss": 0.3982, "step": 15095 }, { "epoch": 2.018721583311046, "grad_norm": 1.4069312810897827, "learning_rate": 5.106231440259748e-06, "loss": 0.3563, "step": 15096 }, { "epoch": 2.0188553089061245, "grad_norm": 1.2842289209365845, "learning_rate": 5.1049724396655865e-06, "loss": 0.3757, "step": 15097 }, { "epoch": 2.0189890345012036, "grad_norm": 1.4287010431289673, "learning_rate": 5.10371354110359e-06, "loss": 0.3895, "step": 15098 }, { "epoch": 2.019122760096282, "grad_norm": 1.3400799036026, "learning_rate": 5.102454744600001e-06, "loss": 0.3941, "step": 15099 }, { "epoch": 2.0192564856913613, "grad_norm": 1.3937876224517822, "learning_rate": 5.101196050181054e-06, "loss": 0.3917, "step": 15100 }, { "epoch": 2.0193902112864404, "grad_norm": 1.411228060722351, "learning_rate": 5.09993745787299e-06, "loss": 0.391, "step": 15101 }, { "epoch": 2.019523936881519, "grad_norm": 1.4551266431808472, "learning_rate": 5.09867896770204e-06, "loss": 0.3998, "step": 15102 }, { "epoch": 2.019657662476598, "grad_norm": 1.4065072536468506, "learning_rate": 5.0974205796944365e-06, "loss": 0.3749, "step": 15103 }, { "epoch": 2.019791388071677, "grad_norm": 1.5160589218139648, "learning_rate": 5.096162293876415e-06, "loss": 0.4125, "step": 15104 }, { "epoch": 2.0199251136667558, "grad_norm": 1.4706947803497314, "learning_rate": 5.094904110274188e-06, "loss": 0.3688, "step": 15105 }, { "epoch": 2.020058839261835, "grad_norm": 1.2299705743789673, "learning_rate": 5.093646028913996e-06, "loss": 0.3363, "step": 15106 }, { "epoch": 2.0201925648569135, "grad_norm": 1.449506163597107, "learning_rate": 5.092388049822059e-06, "loss": 0.3676, "step": 15107 }, { "epoch": 2.0203262904519925, "grad_norm": 1.5085084438323975, "learning_rate": 5.091130173024596e-06, "loss": 0.4028, "step": 15108 }, { "epoch": 2.0204600160470716, "grad_norm": 1.4426831007003784, "learning_rate": 5.089872398547831e-06, "loss": 0.3453, "step": 15109 }, { "epoch": 2.02059374164215, "grad_norm": 1.2821072340011597, "learning_rate": 5.0886147264179685e-06, "loss": 0.3505, "step": 15110 }, { "epoch": 2.0207274672372293, "grad_norm": 1.4536360502243042, "learning_rate": 5.087357156661241e-06, "loss": 0.3792, "step": 15111 }, { "epoch": 2.020861192832308, "grad_norm": 1.4036517143249512, "learning_rate": 5.08609968930385e-06, "loss": 0.4042, "step": 15112 }, { "epoch": 2.020994918427387, "grad_norm": 1.261043906211853, "learning_rate": 5.084842324372003e-06, "loss": 0.3825, "step": 15113 }, { "epoch": 2.021128644022466, "grad_norm": 1.376478672027588, "learning_rate": 5.083585061891925e-06, "loss": 0.3515, "step": 15114 }, { "epoch": 2.0212623696175447, "grad_norm": 1.3559823036193848, "learning_rate": 5.082327901889801e-06, "loss": 0.3622, "step": 15115 }, { "epoch": 2.0213960952126238, "grad_norm": 1.4550917148590088, "learning_rate": 5.081070844391855e-06, "loss": 0.3691, "step": 15116 }, { "epoch": 2.0215298208077024, "grad_norm": 1.371978998184204, "learning_rate": 5.079813889424278e-06, "loss": 0.3435, "step": 15117 }, { "epoch": 2.0216635464027815, "grad_norm": 1.2908315658569336, "learning_rate": 5.078557037013271e-06, "loss": 0.3149, "step": 15118 }, { "epoch": 2.0217972719978605, "grad_norm": 1.5395832061767578, "learning_rate": 5.077300287185034e-06, "loss": 0.3877, "step": 15119 }, { "epoch": 2.021930997592939, "grad_norm": 1.379804015159607, "learning_rate": 5.0760436399657605e-06, "loss": 0.3857, "step": 15120 }, { "epoch": 2.022064723188018, "grad_norm": 1.4979171752929688, "learning_rate": 5.074787095381647e-06, "loss": 0.3934, "step": 15121 }, { "epoch": 2.0221984487830973, "grad_norm": 1.5418989658355713, "learning_rate": 5.0735306534588826e-06, "loss": 0.425, "step": 15122 }, { "epoch": 2.022332174378176, "grad_norm": 1.236465334892273, "learning_rate": 5.0722743142236585e-06, "loss": 0.3758, "step": 15123 }, { "epoch": 2.022465899973255, "grad_norm": 1.3377609252929688, "learning_rate": 5.071018077702161e-06, "loss": 0.355, "step": 15124 }, { "epoch": 2.0225996255683336, "grad_norm": 1.722356915473938, "learning_rate": 5.069761943920575e-06, "loss": 0.4262, "step": 15125 }, { "epoch": 2.0227333511634127, "grad_norm": 1.3811054229736328, "learning_rate": 5.068505912905083e-06, "loss": 0.3874, "step": 15126 }, { "epoch": 2.0228670767584918, "grad_norm": 1.5753281116485596, "learning_rate": 5.067249984681865e-06, "loss": 0.4458, "step": 15127 }, { "epoch": 2.0230008023535704, "grad_norm": 1.3950414657592773, "learning_rate": 5.065994159277103e-06, "loss": 0.3857, "step": 15128 }, { "epoch": 2.0231345279486495, "grad_norm": 1.4190549850463867, "learning_rate": 5.064738436716972e-06, "loss": 0.375, "step": 15129 }, { "epoch": 2.023268253543728, "grad_norm": 1.3524816036224365, "learning_rate": 5.0634828170276486e-06, "loss": 0.3866, "step": 15130 }, { "epoch": 2.023401979138807, "grad_norm": 1.4363137483596802, "learning_rate": 5.062227300235294e-06, "loss": 0.3781, "step": 15131 }, { "epoch": 2.023535704733886, "grad_norm": 1.4599323272705078, "learning_rate": 5.06097188636609e-06, "loss": 0.4079, "step": 15132 }, { "epoch": 2.023669430328965, "grad_norm": 1.4136043787002563, "learning_rate": 5.0597165754462065e-06, "loss": 0.3718, "step": 15133 }, { "epoch": 2.023803155924044, "grad_norm": 1.4858431816101074, "learning_rate": 5.058461367501794e-06, "loss": 0.3986, "step": 15134 }, { "epoch": 2.0239368815191225, "grad_norm": 1.6382603645324707, "learning_rate": 5.0572062625590355e-06, "loss": 0.3948, "step": 15135 }, { "epoch": 2.0240706071142016, "grad_norm": 1.5846140384674072, "learning_rate": 5.055951260644074e-06, "loss": 0.3915, "step": 15136 }, { "epoch": 2.0242043327092807, "grad_norm": 1.5171692371368408, "learning_rate": 5.054696361783084e-06, "loss": 0.4071, "step": 15137 }, { "epoch": 2.0243380583043593, "grad_norm": 1.3030234575271606, "learning_rate": 5.053441566002214e-06, "loss": 0.3592, "step": 15138 }, { "epoch": 2.0244717838994384, "grad_norm": 1.3448652029037476, "learning_rate": 5.052186873327617e-06, "loss": 0.3638, "step": 15139 }, { "epoch": 2.0246055094945175, "grad_norm": 1.5697062015533447, "learning_rate": 5.050932283785457e-06, "loss": 0.4285, "step": 15140 }, { "epoch": 2.024739235089596, "grad_norm": 1.2767164707183838, "learning_rate": 5.049677797401875e-06, "loss": 0.3493, "step": 15141 }, { "epoch": 2.024872960684675, "grad_norm": 1.2968182563781738, "learning_rate": 5.048423414203022e-06, "loss": 0.3675, "step": 15142 }, { "epoch": 2.0250066862797538, "grad_norm": 1.3948919773101807, "learning_rate": 5.0471691342150445e-06, "loss": 0.4235, "step": 15143 }, { "epoch": 2.025140411874833, "grad_norm": 1.3990373611450195, "learning_rate": 5.045914957464086e-06, "loss": 0.4022, "step": 15144 }, { "epoch": 2.025274137469912, "grad_norm": 1.3211435079574585, "learning_rate": 5.0446608839762925e-06, "loss": 0.4302, "step": 15145 }, { "epoch": 2.0254078630649905, "grad_norm": 1.4555833339691162, "learning_rate": 5.0434069137778e-06, "loss": 0.3844, "step": 15146 }, { "epoch": 2.0255415886600696, "grad_norm": 1.3638324737548828, "learning_rate": 5.042153046894746e-06, "loss": 0.3565, "step": 15147 }, { "epoch": 2.0256753142551482, "grad_norm": 1.3377161026000977, "learning_rate": 5.040899283353269e-06, "loss": 0.3833, "step": 15148 }, { "epoch": 2.0258090398502273, "grad_norm": 1.2978307008743286, "learning_rate": 5.039645623179503e-06, "loss": 0.359, "step": 15149 }, { "epoch": 2.0259427654453064, "grad_norm": 1.5284180641174316, "learning_rate": 5.038392066399572e-06, "loss": 0.3644, "step": 15150 }, { "epoch": 2.026076491040385, "grad_norm": 1.2375569343566895, "learning_rate": 5.037138613039614e-06, "loss": 0.324, "step": 15151 }, { "epoch": 2.026210216635464, "grad_norm": 1.4792269468307495, "learning_rate": 5.035885263125753e-06, "loss": 0.4198, "step": 15152 }, { "epoch": 2.026343942230543, "grad_norm": 1.5818573236465454, "learning_rate": 5.034632016684112e-06, "loss": 0.3535, "step": 15153 }, { "epoch": 2.0264776678256218, "grad_norm": 1.479024887084961, "learning_rate": 5.03337887374082e-06, "loss": 0.3818, "step": 15154 }, { "epoch": 2.026611393420701, "grad_norm": 1.453445315361023, "learning_rate": 5.032125834321986e-06, "loss": 0.413, "step": 15155 }, { "epoch": 2.0267451190157795, "grad_norm": 1.3083502054214478, "learning_rate": 5.030872898453742e-06, "loss": 0.3598, "step": 15156 }, { "epoch": 2.0268788446108585, "grad_norm": 1.2808568477630615, "learning_rate": 5.029620066162193e-06, "loss": 0.3362, "step": 15157 }, { "epoch": 2.0270125702059376, "grad_norm": 1.4797894954681396, "learning_rate": 5.0283673374734546e-06, "loss": 0.3931, "step": 15158 }, { "epoch": 2.0271462958010162, "grad_norm": 1.4074105024337769, "learning_rate": 5.02711471241365e-06, "loss": 0.3807, "step": 15159 }, { "epoch": 2.0272800213960953, "grad_norm": 1.571441888809204, "learning_rate": 5.025862191008872e-06, "loss": 0.3806, "step": 15160 }, { "epoch": 2.027413746991174, "grad_norm": 1.4238935708999634, "learning_rate": 5.024609773285245e-06, "loss": 0.4401, "step": 15161 }, { "epoch": 2.027547472586253, "grad_norm": 1.5721715688705444, "learning_rate": 5.023357459268863e-06, "loss": 0.4243, "step": 15162 }, { "epoch": 2.027681198181332, "grad_norm": 1.3580513000488281, "learning_rate": 5.022105248985831e-06, "loss": 0.3738, "step": 15163 }, { "epoch": 2.0278149237764107, "grad_norm": 1.5814497470855713, "learning_rate": 5.020853142462253e-06, "loss": 0.4318, "step": 15164 }, { "epoch": 2.0279486493714898, "grad_norm": 1.4298213720321655, "learning_rate": 5.019601139724226e-06, "loss": 0.4226, "step": 15165 }, { "epoch": 2.0280823749665684, "grad_norm": 1.4232758283615112, "learning_rate": 5.018349240797848e-06, "loss": 0.3809, "step": 15166 }, { "epoch": 2.0282161005616475, "grad_norm": 1.3235522508621216, "learning_rate": 5.017097445709214e-06, "loss": 0.3192, "step": 15167 }, { "epoch": 2.0283498261567265, "grad_norm": 1.3963795900344849, "learning_rate": 5.015845754484414e-06, "loss": 0.3937, "step": 15168 }, { "epoch": 2.028483551751805, "grad_norm": 1.6338738203048706, "learning_rate": 5.014594167149541e-06, "loss": 0.4205, "step": 15169 }, { "epoch": 2.0286172773468842, "grad_norm": 1.3646982908248901, "learning_rate": 5.013342683730682e-06, "loss": 0.3539, "step": 15170 }, { "epoch": 2.0287510029419633, "grad_norm": 1.546512484550476, "learning_rate": 5.012091304253923e-06, "loss": 0.3964, "step": 15171 }, { "epoch": 2.028884728537042, "grad_norm": 1.3893542289733887, "learning_rate": 5.010840028745347e-06, "loss": 0.3952, "step": 15172 }, { "epoch": 2.029018454132121, "grad_norm": 1.4347014427185059, "learning_rate": 5.009588857231043e-06, "loss": 0.3797, "step": 15173 }, { "epoch": 2.0291521797271996, "grad_norm": 1.3542146682739258, "learning_rate": 5.008337789737073e-06, "loss": 0.3678, "step": 15174 }, { "epoch": 2.0292859053222787, "grad_norm": 1.3876335620880127, "learning_rate": 5.007086826289535e-06, "loss": 0.3886, "step": 15175 }, { "epoch": 2.0294196309173578, "grad_norm": 1.85788094997406, "learning_rate": 5.005835966914485e-06, "loss": 0.3778, "step": 15176 }, { "epoch": 2.0295533565124364, "grad_norm": 1.4473538398742676, "learning_rate": 5.004585211638011e-06, "loss": 0.3645, "step": 15177 }, { "epoch": 2.0296870821075155, "grad_norm": 1.495301365852356, "learning_rate": 5.003334560486181e-06, "loss": 0.4184, "step": 15178 }, { "epoch": 2.029820807702594, "grad_norm": 1.7206151485443115, "learning_rate": 5.002084013485054e-06, "loss": 0.4531, "step": 15179 }, { "epoch": 2.029954533297673, "grad_norm": 1.387464165687561, "learning_rate": 5.0008335706607095e-06, "loss": 0.3831, "step": 15180 }, { "epoch": 2.0300882588927522, "grad_norm": 1.582840919494629, "learning_rate": 4.999583232039202e-06, "loss": 0.3691, "step": 15181 }, { "epoch": 2.030221984487831, "grad_norm": 1.3650336265563965, "learning_rate": 4.998332997646598e-06, "loss": 0.3599, "step": 15182 }, { "epoch": 2.03035571008291, "grad_norm": 1.5347820520401, "learning_rate": 4.997082867508956e-06, "loss": 0.4252, "step": 15183 }, { "epoch": 2.0304894356779886, "grad_norm": 1.456618070602417, "learning_rate": 4.99583284165233e-06, "loss": 0.3722, "step": 15184 }, { "epoch": 2.0306231612730676, "grad_norm": 1.3619157075881958, "learning_rate": 4.9945829201027894e-06, "loss": 0.3729, "step": 15185 }, { "epoch": 2.0307568868681467, "grad_norm": 1.5675910711288452, "learning_rate": 4.993333102886373e-06, "loss": 0.3584, "step": 15186 }, { "epoch": 2.0308906124632253, "grad_norm": 1.4434353113174438, "learning_rate": 4.992083390029138e-06, "loss": 0.4009, "step": 15187 }, { "epoch": 2.0310243380583044, "grad_norm": 1.5061726570129395, "learning_rate": 4.990833781557132e-06, "loss": 0.3879, "step": 15188 }, { "epoch": 2.0311580636533835, "grad_norm": 1.4095494747161865, "learning_rate": 4.989584277496402e-06, "loss": 0.3881, "step": 15189 }, { "epoch": 2.031291789248462, "grad_norm": 1.506284475326538, "learning_rate": 4.988334877872995e-06, "loss": 0.354, "step": 15190 }, { "epoch": 2.031425514843541, "grad_norm": 1.6584863662719727, "learning_rate": 4.987085582712951e-06, "loss": 0.4011, "step": 15191 }, { "epoch": 2.03155924043862, "grad_norm": 1.3552623987197876, "learning_rate": 4.985836392042311e-06, "loss": 0.3884, "step": 15192 }, { "epoch": 2.031692966033699, "grad_norm": 1.555468201637268, "learning_rate": 4.984587305887113e-06, "loss": 0.3966, "step": 15193 }, { "epoch": 2.031826691628778, "grad_norm": 1.5172396898269653, "learning_rate": 4.983338324273397e-06, "loss": 0.4263, "step": 15194 }, { "epoch": 2.0319604172238566, "grad_norm": 1.3437168598175049, "learning_rate": 4.982089447227187e-06, "loss": 0.3672, "step": 15195 }, { "epoch": 2.0320941428189356, "grad_norm": 1.3788877725601196, "learning_rate": 4.980840674774523e-06, "loss": 0.3858, "step": 15196 }, { "epoch": 2.0322278684140143, "grad_norm": 1.3980292081832886, "learning_rate": 4.979592006941437e-06, "loss": 0.3747, "step": 15197 }, { "epoch": 2.0323615940090933, "grad_norm": 1.3463141918182373, "learning_rate": 4.9783434437539444e-06, "loss": 0.3945, "step": 15198 }, { "epoch": 2.0324953196041724, "grad_norm": 1.9077330827713013, "learning_rate": 4.977094985238085e-06, "loss": 0.3421, "step": 15199 }, { "epoch": 2.032629045199251, "grad_norm": 1.4432581663131714, "learning_rate": 4.975846631419866e-06, "loss": 0.3582, "step": 15200 }, { "epoch": 2.03276277079433, "grad_norm": 1.4345471858978271, "learning_rate": 4.974598382325324e-06, "loss": 0.3917, "step": 15201 }, { "epoch": 2.0328964963894087, "grad_norm": 1.4581286907196045, "learning_rate": 4.973350237980466e-06, "loss": 0.4144, "step": 15202 }, { "epoch": 2.033030221984488, "grad_norm": 1.3915935754776, "learning_rate": 4.972102198411309e-06, "loss": 0.3656, "step": 15203 }, { "epoch": 2.033163947579567, "grad_norm": 1.5647811889648438, "learning_rate": 4.970854263643878e-06, "loss": 0.4091, "step": 15204 }, { "epoch": 2.0332976731746455, "grad_norm": 1.313264012336731, "learning_rate": 4.969606433704174e-06, "loss": 0.3244, "step": 15205 }, { "epoch": 2.0334313987697246, "grad_norm": 1.398871660232544, "learning_rate": 4.968358708618211e-06, "loss": 0.4075, "step": 15206 }, { "epoch": 2.0335651243648036, "grad_norm": 1.4235507249832153, "learning_rate": 4.967111088411994e-06, "loss": 0.4017, "step": 15207 }, { "epoch": 2.0336988499598823, "grad_norm": 1.48894202709198, "learning_rate": 4.9658635731115314e-06, "loss": 0.3981, "step": 15208 }, { "epoch": 2.0338325755549613, "grad_norm": 1.3534085750579834, "learning_rate": 4.964616162742826e-06, "loss": 0.3455, "step": 15209 }, { "epoch": 2.03396630115004, "grad_norm": 1.5894485712051392, "learning_rate": 4.9633688573318775e-06, "loss": 0.4114, "step": 15210 }, { "epoch": 2.034100026745119, "grad_norm": 1.669345736503601, "learning_rate": 4.962121656904686e-06, "loss": 0.4125, "step": 15211 }, { "epoch": 2.034233752340198, "grad_norm": 1.3715846538543701, "learning_rate": 4.960874561487248e-06, "loss": 0.4081, "step": 15212 }, { "epoch": 2.0343674779352767, "grad_norm": 1.3820350170135498, "learning_rate": 4.959627571105557e-06, "loss": 0.3695, "step": 15213 }, { "epoch": 2.034501203530356, "grad_norm": 1.37521493434906, "learning_rate": 4.958380685785608e-06, "loss": 0.3592, "step": 15214 }, { "epoch": 2.0346349291254344, "grad_norm": 1.5038729906082153, "learning_rate": 4.957133905553387e-06, "loss": 0.3825, "step": 15215 }, { "epoch": 2.0347686547205135, "grad_norm": 1.4716589450836182, "learning_rate": 4.955887230434886e-06, "loss": 0.4044, "step": 15216 }, { "epoch": 2.0349023803155926, "grad_norm": 1.2435904741287231, "learning_rate": 4.954640660456088e-06, "loss": 0.3209, "step": 15217 }, { "epoch": 2.035036105910671, "grad_norm": 1.3445383310317993, "learning_rate": 4.953394195642982e-06, "loss": 0.3838, "step": 15218 }, { "epoch": 2.0351698315057503, "grad_norm": 1.5189684629440308, "learning_rate": 4.9521478360215365e-06, "loss": 0.3945, "step": 15219 }, { "epoch": 2.035303557100829, "grad_norm": 1.4853973388671875, "learning_rate": 4.950901581617747e-06, "loss": 0.3694, "step": 15220 }, { "epoch": 2.035437282695908, "grad_norm": 1.3932093381881714, "learning_rate": 4.949655432457575e-06, "loss": 0.3337, "step": 15221 }, { "epoch": 2.035571008290987, "grad_norm": 1.4687823057174683, "learning_rate": 4.948409388567007e-06, "loss": 0.3707, "step": 15222 }, { "epoch": 2.0357047338860657, "grad_norm": 1.3845065832138062, "learning_rate": 4.947163449972016e-06, "loss": 0.3826, "step": 15223 }, { "epoch": 2.0358384594811447, "grad_norm": 1.439473032951355, "learning_rate": 4.945917616698559e-06, "loss": 0.3736, "step": 15224 }, { "epoch": 2.035972185076224, "grad_norm": 1.596977710723877, "learning_rate": 4.944671888772621e-06, "loss": 0.449, "step": 15225 }, { "epoch": 2.0361059106713024, "grad_norm": 1.590625524520874, "learning_rate": 4.943426266220156e-06, "loss": 0.4011, "step": 15226 }, { "epoch": 2.0362396362663815, "grad_norm": 1.3335574865341187, "learning_rate": 4.942180749067133e-06, "loss": 0.4002, "step": 15227 }, { "epoch": 2.03637336186146, "grad_norm": 1.3558980226516724, "learning_rate": 4.9409353373395105e-06, "loss": 0.3659, "step": 15228 }, { "epoch": 2.036507087456539, "grad_norm": 1.6549838781356812, "learning_rate": 4.939690031063251e-06, "loss": 0.3966, "step": 15229 }, { "epoch": 2.0366408130516183, "grad_norm": 1.2646405696868896, "learning_rate": 4.938444830264311e-06, "loss": 0.3413, "step": 15230 }, { "epoch": 2.036774538646697, "grad_norm": 1.458613395690918, "learning_rate": 4.937199734968644e-06, "loss": 0.4247, "step": 15231 }, { "epoch": 2.036908264241776, "grad_norm": 1.5858837366104126, "learning_rate": 4.935954745202205e-06, "loss": 0.4322, "step": 15232 }, { "epoch": 2.0370419898368546, "grad_norm": 1.6323989629745483, "learning_rate": 4.934709860990944e-06, "loss": 0.4228, "step": 15233 }, { "epoch": 2.0371757154319337, "grad_norm": 1.547903060913086, "learning_rate": 4.933465082360808e-06, "loss": 0.386, "step": 15234 }, { "epoch": 2.0373094410270127, "grad_norm": 1.4644842147827148, "learning_rate": 4.932220409337743e-06, "loss": 0.3745, "step": 15235 }, { "epoch": 2.0374431666220914, "grad_norm": 1.3342421054840088, "learning_rate": 4.930975841947696e-06, "loss": 0.4071, "step": 15236 }, { "epoch": 2.0375768922171704, "grad_norm": 1.4508602619171143, "learning_rate": 4.929731380216607e-06, "loss": 0.4127, "step": 15237 }, { "epoch": 2.037710617812249, "grad_norm": 1.3649914264678955, "learning_rate": 4.928487024170415e-06, "loss": 0.4485, "step": 15238 }, { "epoch": 2.037844343407328, "grad_norm": 1.564220666885376, "learning_rate": 4.927242773835063e-06, "loss": 0.4209, "step": 15239 }, { "epoch": 2.037978069002407, "grad_norm": 1.3991637229919434, "learning_rate": 4.925998629236473e-06, "loss": 0.3891, "step": 15240 }, { "epoch": 2.038111794597486, "grad_norm": 1.4363185167312622, "learning_rate": 4.92475459040059e-06, "loss": 0.3879, "step": 15241 }, { "epoch": 2.038245520192565, "grad_norm": 1.3756963014602661, "learning_rate": 4.923510657353344e-06, "loss": 0.351, "step": 15242 }, { "epoch": 2.038379245787644, "grad_norm": 1.4408073425292969, "learning_rate": 4.922266830120654e-06, "loss": 0.3716, "step": 15243 }, { "epoch": 2.0385129713827226, "grad_norm": 1.4855787754058838, "learning_rate": 4.921023108728461e-06, "loss": 0.3949, "step": 15244 }, { "epoch": 2.0386466969778017, "grad_norm": 1.571374773979187, "learning_rate": 4.919779493202673e-06, "loss": 0.3869, "step": 15245 }, { "epoch": 2.0387804225728803, "grad_norm": 1.3049968481063843, "learning_rate": 4.918535983569228e-06, "loss": 0.3796, "step": 15246 }, { "epoch": 2.0389141481679594, "grad_norm": 1.493226408958435, "learning_rate": 4.917292579854035e-06, "loss": 0.4057, "step": 15247 }, { "epoch": 2.0390478737630384, "grad_norm": 1.2981209754943848, "learning_rate": 4.916049282083013e-06, "loss": 0.3436, "step": 15248 }, { "epoch": 2.039181599358117, "grad_norm": 1.9203742742538452, "learning_rate": 4.91480609028208e-06, "loss": 0.4649, "step": 15249 }, { "epoch": 2.039315324953196, "grad_norm": 1.6046770811080933, "learning_rate": 4.913563004477148e-06, "loss": 0.394, "step": 15250 }, { "epoch": 2.0394490505482747, "grad_norm": 1.5132733583450317, "learning_rate": 4.912320024694128e-06, "loss": 0.3911, "step": 15251 }, { "epoch": 2.039582776143354, "grad_norm": 1.256216287612915, "learning_rate": 4.911077150958928e-06, "loss": 0.3762, "step": 15252 }, { "epoch": 2.039716501738433, "grad_norm": 1.7052435874938965, "learning_rate": 4.909834383297456e-06, "loss": 0.3758, "step": 15253 }, { "epoch": 2.0398502273335115, "grad_norm": 1.400803565979004, "learning_rate": 4.908591721735615e-06, "loss": 0.3905, "step": 15254 }, { "epoch": 2.0399839529285906, "grad_norm": 1.473936915397644, "learning_rate": 4.907349166299308e-06, "loss": 0.3609, "step": 15255 }, { "epoch": 2.0401176785236697, "grad_norm": 1.4464904069900513, "learning_rate": 4.9061067170144335e-06, "loss": 0.386, "step": 15256 }, { "epoch": 2.0402514041187483, "grad_norm": 1.4845995903015137, "learning_rate": 4.904864373906892e-06, "loss": 0.3776, "step": 15257 }, { "epoch": 2.0403851297138274, "grad_norm": 1.3443933725357056, "learning_rate": 4.903622137002579e-06, "loss": 0.3691, "step": 15258 }, { "epoch": 2.040518855308906, "grad_norm": 1.3063358068466187, "learning_rate": 4.9023800063273795e-06, "loss": 0.3458, "step": 15259 }, { "epoch": 2.040652580903985, "grad_norm": 1.3413947820663452, "learning_rate": 4.9011379819071935e-06, "loss": 0.3771, "step": 15260 }, { "epoch": 2.040786306499064, "grad_norm": 1.543323278427124, "learning_rate": 4.899896063767908e-06, "loss": 0.4247, "step": 15261 }, { "epoch": 2.0409200320941427, "grad_norm": 1.4663028717041016, "learning_rate": 4.898654251935409e-06, "loss": 0.3917, "step": 15262 }, { "epoch": 2.041053757689222, "grad_norm": 1.6112192869186401, "learning_rate": 4.8974125464355845e-06, "loss": 0.4333, "step": 15263 }, { "epoch": 2.0411874832843004, "grad_norm": 1.3861055374145508, "learning_rate": 4.8961709472943045e-06, "loss": 0.3634, "step": 15264 }, { "epoch": 2.0413212088793795, "grad_norm": 1.3533494472503662, "learning_rate": 4.894929454537466e-06, "loss": 0.3607, "step": 15265 }, { "epoch": 2.0414549344744586, "grad_norm": 1.5087858438491821, "learning_rate": 4.893688068190933e-06, "loss": 0.4189, "step": 15266 }, { "epoch": 2.041588660069537, "grad_norm": 1.486737847328186, "learning_rate": 4.892446788280587e-06, "loss": 0.3882, "step": 15267 }, { "epoch": 2.0417223856646163, "grad_norm": 1.3685330152511597, "learning_rate": 4.8912056148323e-06, "loss": 0.3735, "step": 15268 }, { "epoch": 2.041856111259695, "grad_norm": 1.2882962226867676, "learning_rate": 4.889964547871938e-06, "loss": 0.3422, "step": 15269 }, { "epoch": 2.041989836854774, "grad_norm": 1.476643443107605, "learning_rate": 4.888723587425385e-06, "loss": 0.3588, "step": 15270 }, { "epoch": 2.042123562449853, "grad_norm": 1.3776965141296387, "learning_rate": 4.887482733518493e-06, "loss": 0.3786, "step": 15271 }, { "epoch": 2.0422572880449317, "grad_norm": 1.3672350645065308, "learning_rate": 4.886241986177132e-06, "loss": 0.362, "step": 15272 }, { "epoch": 2.0423910136400107, "grad_norm": 1.4884644746780396, "learning_rate": 4.885001345427163e-06, "loss": 0.3524, "step": 15273 }, { "epoch": 2.04252473923509, "grad_norm": 1.5446165800094604, "learning_rate": 4.8837608112944456e-06, "loss": 0.4219, "step": 15274 }, { "epoch": 2.0426584648301684, "grad_norm": 1.1544125080108643, "learning_rate": 4.88252038380484e-06, "loss": 0.293, "step": 15275 }, { "epoch": 2.0427921904252475, "grad_norm": 1.2423200607299805, "learning_rate": 4.881280062984198e-06, "loss": 0.3475, "step": 15276 }, { "epoch": 2.042925916020326, "grad_norm": 1.3745707273483276, "learning_rate": 4.880039848858377e-06, "loss": 0.4141, "step": 15277 }, { "epoch": 2.043059641615405, "grad_norm": 1.3408139944076538, "learning_rate": 4.878799741453225e-06, "loss": 0.3768, "step": 15278 }, { "epoch": 2.0431933672104843, "grad_norm": 1.4538769721984863, "learning_rate": 4.877559740794593e-06, "loss": 0.3642, "step": 15279 }, { "epoch": 2.043327092805563, "grad_norm": 1.4617369174957275, "learning_rate": 4.876319846908326e-06, "loss": 0.3734, "step": 15280 }, { "epoch": 2.043460818400642, "grad_norm": 1.7942547798156738, "learning_rate": 4.875080059820268e-06, "loss": 0.4604, "step": 15281 }, { "epoch": 2.0435945439957206, "grad_norm": 1.6429582834243774, "learning_rate": 4.873840379556268e-06, "loss": 0.4038, "step": 15282 }, { "epoch": 2.0437282695907997, "grad_norm": 1.4804496765136719, "learning_rate": 4.87260080614215e-06, "loss": 0.4507, "step": 15283 }, { "epoch": 2.0438619951858787, "grad_norm": 1.5194768905639648, "learning_rate": 4.87136133960377e-06, "loss": 0.4226, "step": 15284 }, { "epoch": 2.0439957207809574, "grad_norm": 1.3231741189956665, "learning_rate": 4.8701219799669495e-06, "loss": 0.3851, "step": 15285 }, { "epoch": 2.0441294463760364, "grad_norm": 1.431269884109497, "learning_rate": 4.86888272725753e-06, "loss": 0.3693, "step": 15286 }, { "epoch": 2.044263171971115, "grad_norm": 1.5591132640838623, "learning_rate": 4.867643581501345e-06, "loss": 0.4273, "step": 15287 }, { "epoch": 2.044396897566194, "grad_norm": 1.5449163913726807, "learning_rate": 4.866404542724209e-06, "loss": 0.4169, "step": 15288 }, { "epoch": 2.044530623161273, "grad_norm": 1.5689740180969238, "learning_rate": 4.865165610951966e-06, "loss": 0.3809, "step": 15289 }, { "epoch": 2.044664348756352, "grad_norm": 1.4284613132476807, "learning_rate": 4.86392678621043e-06, "loss": 0.4197, "step": 15290 }, { "epoch": 2.044798074351431, "grad_norm": 1.4720640182495117, "learning_rate": 4.862688068525424e-06, "loss": 0.4081, "step": 15291 }, { "epoch": 2.04493179994651, "grad_norm": 1.5082443952560425, "learning_rate": 4.86144945792277e-06, "loss": 0.4029, "step": 15292 }, { "epoch": 2.0450655255415886, "grad_norm": 1.4518718719482422, "learning_rate": 4.860210954428285e-06, "loss": 0.3803, "step": 15293 }, { "epoch": 2.0451992511366677, "grad_norm": 1.3054760694503784, "learning_rate": 4.858972558067784e-06, "loss": 0.3502, "step": 15294 }, { "epoch": 2.0453329767317463, "grad_norm": 1.5468804836273193, "learning_rate": 4.857734268867082e-06, "loss": 0.42, "step": 15295 }, { "epoch": 2.0454667023268254, "grad_norm": 1.467820167541504, "learning_rate": 4.856496086851986e-06, "loss": 0.3736, "step": 15296 }, { "epoch": 2.0456004279219044, "grad_norm": 1.5120972394943237, "learning_rate": 4.855258012048309e-06, "loss": 0.3558, "step": 15297 }, { "epoch": 2.045734153516983, "grad_norm": 1.479880452156067, "learning_rate": 4.854020044481855e-06, "loss": 0.4203, "step": 15298 }, { "epoch": 2.045867879112062, "grad_norm": 1.5339252948760986, "learning_rate": 4.852782184178431e-06, "loss": 0.3882, "step": 15299 }, { "epoch": 2.0460016047071408, "grad_norm": 1.3302209377288818, "learning_rate": 4.851544431163835e-06, "loss": 0.3336, "step": 15300 }, { "epoch": 2.04613533030222, "grad_norm": 1.3409371376037598, "learning_rate": 4.850306785463869e-06, "loss": 0.3656, "step": 15301 }, { "epoch": 2.046269055897299, "grad_norm": 1.5098565816879272, "learning_rate": 4.84906924710433e-06, "loss": 0.3877, "step": 15302 }, { "epoch": 2.0464027814923775, "grad_norm": 1.410828709602356, "learning_rate": 4.847831816111019e-06, "loss": 0.3682, "step": 15303 }, { "epoch": 2.0465365070874566, "grad_norm": 1.2830333709716797, "learning_rate": 4.846594492509714e-06, "loss": 0.3522, "step": 15304 }, { "epoch": 2.0466702326825352, "grad_norm": 1.3533754348754883, "learning_rate": 4.845357276326221e-06, "loss": 0.3606, "step": 15305 }, { "epoch": 2.0468039582776143, "grad_norm": 1.5009660720825195, "learning_rate": 4.844120167586323e-06, "loss": 0.3761, "step": 15306 }, { "epoch": 2.0469376838726934, "grad_norm": 1.3547630310058594, "learning_rate": 4.842883166315806e-06, "loss": 0.4152, "step": 15307 }, { "epoch": 2.047071409467772, "grad_norm": 1.7151812314987183, "learning_rate": 4.8416462725404575e-06, "loss": 0.4448, "step": 15308 }, { "epoch": 2.047205135062851, "grad_norm": 1.3633064031600952, "learning_rate": 4.840409486286051e-06, "loss": 0.3704, "step": 15309 }, { "epoch": 2.04733886065793, "grad_norm": 1.3008447885513306, "learning_rate": 4.839172807578377e-06, "loss": 0.3466, "step": 15310 }, { "epoch": 2.0474725862530088, "grad_norm": 1.6451970338821411, "learning_rate": 4.8379362364432045e-06, "loss": 0.4071, "step": 15311 }, { "epoch": 2.047606311848088, "grad_norm": 1.5467127561569214, "learning_rate": 4.836699772906311e-06, "loss": 0.3838, "step": 15312 }, { "epoch": 2.0477400374431665, "grad_norm": 1.5761569738388062, "learning_rate": 4.835463416993471e-06, "loss": 0.4, "step": 15313 }, { "epoch": 2.0478737630382455, "grad_norm": 1.417314052581787, "learning_rate": 4.834227168730451e-06, "loss": 0.3457, "step": 15314 }, { "epoch": 2.0480074886333246, "grad_norm": 1.5240907669067383, "learning_rate": 4.8329910281430285e-06, "loss": 0.3965, "step": 15315 }, { "epoch": 2.0481412142284032, "grad_norm": 1.4235204458236694, "learning_rate": 4.8317549952569605e-06, "loss": 0.3546, "step": 15316 }, { "epoch": 2.0482749398234823, "grad_norm": 1.474574089050293, "learning_rate": 4.830519070098014e-06, "loss": 0.3561, "step": 15317 }, { "epoch": 2.048408665418561, "grad_norm": 1.5900211334228516, "learning_rate": 4.829283252691951e-06, "loss": 0.4176, "step": 15318 }, { "epoch": 2.04854239101364, "grad_norm": 1.361810326576233, "learning_rate": 4.828047543064532e-06, "loss": 0.376, "step": 15319 }, { "epoch": 2.048676116608719, "grad_norm": 1.494605541229248, "learning_rate": 4.82681194124151e-06, "loss": 0.3982, "step": 15320 }, { "epoch": 2.0488098422037977, "grad_norm": 1.5423048734664917, "learning_rate": 4.8255764472486455e-06, "loss": 0.3804, "step": 15321 }, { "epoch": 2.0489435677988768, "grad_norm": 1.5912147760391235, "learning_rate": 4.824341061111688e-06, "loss": 0.3965, "step": 15322 }, { "epoch": 2.0490772933939554, "grad_norm": 1.405929684638977, "learning_rate": 4.823105782856388e-06, "loss": 0.3882, "step": 15323 }, { "epoch": 2.0492110189890345, "grad_norm": 1.4059332609176636, "learning_rate": 4.821870612508494e-06, "loss": 0.398, "step": 15324 }, { "epoch": 2.0493447445841135, "grad_norm": 1.460900902748108, "learning_rate": 4.820635550093753e-06, "loss": 0.412, "step": 15325 }, { "epoch": 2.049478470179192, "grad_norm": 1.3327229022979736, "learning_rate": 4.819400595637908e-06, "loss": 0.3349, "step": 15326 }, { "epoch": 2.0496121957742712, "grad_norm": 1.4798040390014648, "learning_rate": 4.818165749166703e-06, "loss": 0.3996, "step": 15327 }, { "epoch": 2.0497459213693503, "grad_norm": 1.2080055475234985, "learning_rate": 4.816931010705867e-06, "loss": 0.3163, "step": 15328 }, { "epoch": 2.049879646964429, "grad_norm": 1.32821786403656, "learning_rate": 4.815696380281153e-06, "loss": 0.3402, "step": 15329 }, { "epoch": 2.050013372559508, "grad_norm": 1.53327476978302, "learning_rate": 4.814461857918279e-06, "loss": 0.4132, "step": 15330 }, { "epoch": 2.0501470981545866, "grad_norm": 1.4783756732940674, "learning_rate": 4.8132274436429925e-06, "loss": 0.3839, "step": 15331 }, { "epoch": 2.0502808237496657, "grad_norm": 1.5153189897537231, "learning_rate": 4.811993137481014e-06, "loss": 0.3669, "step": 15332 }, { "epoch": 2.0504145493447448, "grad_norm": 1.3218958377838135, "learning_rate": 4.81075893945807e-06, "loss": 0.4032, "step": 15333 }, { "epoch": 2.0505482749398234, "grad_norm": 1.4127275943756104, "learning_rate": 4.809524849599897e-06, "loss": 0.3625, "step": 15334 }, { "epoch": 2.0506820005349025, "grad_norm": 1.3549495935440063, "learning_rate": 4.808290867932209e-06, "loss": 0.3601, "step": 15335 }, { "epoch": 2.050815726129981, "grad_norm": 1.3982123136520386, "learning_rate": 4.80705699448073e-06, "loss": 0.376, "step": 15336 }, { "epoch": 2.05094945172506, "grad_norm": 1.5429255962371826, "learning_rate": 4.8058232292711785e-06, "loss": 0.3432, "step": 15337 }, { "epoch": 2.0510831773201392, "grad_norm": 1.6331851482391357, "learning_rate": 4.804589572329271e-06, "loss": 0.4328, "step": 15338 }, { "epoch": 2.051216902915218, "grad_norm": 1.5125499963760376, "learning_rate": 4.803356023680723e-06, "loss": 0.3859, "step": 15339 }, { "epoch": 2.051350628510297, "grad_norm": 1.4796911478042603, "learning_rate": 4.802122583351246e-06, "loss": 0.3794, "step": 15340 }, { "epoch": 2.0514843541053756, "grad_norm": 1.316720724105835, "learning_rate": 4.80088925136655e-06, "loss": 0.412, "step": 15341 }, { "epoch": 2.0516180797004546, "grad_norm": 1.40620756149292, "learning_rate": 4.799656027752343e-06, "loss": 0.3939, "step": 15342 }, { "epoch": 2.0517518052955337, "grad_norm": 1.6071455478668213, "learning_rate": 4.798422912534329e-06, "loss": 0.389, "step": 15343 }, { "epoch": 2.0518855308906123, "grad_norm": 1.5946511030197144, "learning_rate": 4.797189905738212e-06, "loss": 0.3992, "step": 15344 }, { "epoch": 2.0520192564856914, "grad_norm": 1.4388788938522339, "learning_rate": 4.7959570073896935e-06, "loss": 0.3575, "step": 15345 }, { "epoch": 2.0521529820807705, "grad_norm": 1.3409440517425537, "learning_rate": 4.794724217514472e-06, "loss": 0.3401, "step": 15346 }, { "epoch": 2.052286707675849, "grad_norm": 1.6862062215805054, "learning_rate": 4.7934915361382414e-06, "loss": 0.4237, "step": 15347 }, { "epoch": 2.052420433270928, "grad_norm": 1.679821252822876, "learning_rate": 4.792258963286703e-06, "loss": 0.4306, "step": 15348 }, { "epoch": 2.052554158866007, "grad_norm": 1.4185497760772705, "learning_rate": 4.791026498985535e-06, "loss": 0.3337, "step": 15349 }, { "epoch": 2.052687884461086, "grad_norm": 1.4347972869873047, "learning_rate": 4.789794143260443e-06, "loss": 0.4081, "step": 15350 }, { "epoch": 2.052821610056165, "grad_norm": 1.4867558479309082, "learning_rate": 4.7885618961371025e-06, "loss": 0.3738, "step": 15351 }, { "epoch": 2.0529553356512436, "grad_norm": 1.5347763299942017, "learning_rate": 4.787329757641199e-06, "loss": 0.3621, "step": 15352 }, { "epoch": 2.0530890612463226, "grad_norm": 1.5248593091964722, "learning_rate": 4.7860977277984265e-06, "loss": 0.3582, "step": 15353 }, { "epoch": 2.0532227868414012, "grad_norm": 1.5093733072280884, "learning_rate": 4.784865806634449e-06, "loss": 0.378, "step": 15354 }, { "epoch": 2.0533565124364803, "grad_norm": 1.4741685390472412, "learning_rate": 4.783633994174962e-06, "loss": 0.3582, "step": 15355 }, { "epoch": 2.0534902380315594, "grad_norm": 1.480236530303955, "learning_rate": 4.782402290445629e-06, "loss": 0.3911, "step": 15356 }, { "epoch": 2.053623963626638, "grad_norm": 1.3714247941970825, "learning_rate": 4.781170695472127e-06, "loss": 0.3769, "step": 15357 }, { "epoch": 2.053757689221717, "grad_norm": 1.6008464097976685, "learning_rate": 4.779939209280129e-06, "loss": 0.3953, "step": 15358 }, { "epoch": 2.053891414816796, "grad_norm": 1.5429996252059937, "learning_rate": 4.778707831895302e-06, "loss": 0.3623, "step": 15359 }, { "epoch": 2.054025140411875, "grad_norm": 1.5035858154296875, "learning_rate": 4.777476563343314e-06, "loss": 0.3542, "step": 15360 }, { "epoch": 2.054158866006954, "grad_norm": 1.4074007272720337, "learning_rate": 4.776245403649831e-06, "loss": 0.3619, "step": 15361 }, { "epoch": 2.0542925916020325, "grad_norm": 1.4304219484329224, "learning_rate": 4.775014352840512e-06, "loss": 0.3642, "step": 15362 }, { "epoch": 2.0544263171971116, "grad_norm": 1.3462783098220825, "learning_rate": 4.773783410941021e-06, "loss": 0.3583, "step": 15363 }, { "epoch": 2.0545600427921906, "grad_norm": 1.4243463277816772, "learning_rate": 4.772552577977012e-06, "loss": 0.3796, "step": 15364 }, { "epoch": 2.0546937683872692, "grad_norm": 1.3190523386001587, "learning_rate": 4.771321853974144e-06, "loss": 0.4011, "step": 15365 }, { "epoch": 2.0548274939823483, "grad_norm": 1.4821985960006714, "learning_rate": 4.770091238958068e-06, "loss": 0.4381, "step": 15366 }, { "epoch": 2.054961219577427, "grad_norm": 1.3100749254226685, "learning_rate": 4.768860732954439e-06, "loss": 0.3692, "step": 15367 }, { "epoch": 2.055094945172506, "grad_norm": 1.4381048679351807, "learning_rate": 4.767630335988895e-06, "loss": 0.3713, "step": 15368 }, { "epoch": 2.055228670767585, "grad_norm": 1.4290353059768677, "learning_rate": 4.766400048087098e-06, "loss": 0.3603, "step": 15369 }, { "epoch": 2.0553623963626637, "grad_norm": 1.7387784719467163, "learning_rate": 4.765169869274676e-06, "loss": 0.4505, "step": 15370 }, { "epoch": 2.055496121957743, "grad_norm": 1.6746313571929932, "learning_rate": 4.763939799577283e-06, "loss": 0.4423, "step": 15371 }, { "epoch": 2.0556298475528214, "grad_norm": 1.3991596698760986, "learning_rate": 4.7627098390205574e-06, "loss": 0.3837, "step": 15372 }, { "epoch": 2.0557635731479005, "grad_norm": 1.4219862222671509, "learning_rate": 4.761479987630127e-06, "loss": 0.3342, "step": 15373 }, { "epoch": 2.0558972987429796, "grad_norm": 1.6099721193313599, "learning_rate": 4.76025024543164e-06, "loss": 0.3804, "step": 15374 }, { "epoch": 2.056031024338058, "grad_norm": 1.6572116613388062, "learning_rate": 4.75902061245072e-06, "loss": 0.4138, "step": 15375 }, { "epoch": 2.0561647499331372, "grad_norm": 1.5832918882369995, "learning_rate": 4.7577910887130004e-06, "loss": 0.3771, "step": 15376 }, { "epoch": 2.0562984755282163, "grad_norm": 1.4484951496124268, "learning_rate": 4.756561674244109e-06, "loss": 0.3726, "step": 15377 }, { "epoch": 2.056432201123295, "grad_norm": 1.4732202291488647, "learning_rate": 4.7553323690696685e-06, "loss": 0.3988, "step": 15378 }, { "epoch": 2.056565926718374, "grad_norm": 1.6103055477142334, "learning_rate": 4.754103173215313e-06, "loss": 0.4041, "step": 15379 }, { "epoch": 2.0566996523134526, "grad_norm": 1.2619105577468872, "learning_rate": 4.752874086706653e-06, "loss": 0.3517, "step": 15380 }, { "epoch": 2.0568333779085317, "grad_norm": 1.503366470336914, "learning_rate": 4.7516451095693125e-06, "loss": 0.4198, "step": 15381 }, { "epoch": 2.056967103503611, "grad_norm": 1.1841264963150024, "learning_rate": 4.7504162418289075e-06, "loss": 0.3487, "step": 15382 }, { "epoch": 2.0571008290986894, "grad_norm": 1.4515161514282227, "learning_rate": 4.749187483511053e-06, "loss": 0.3817, "step": 15383 }, { "epoch": 2.0572345546937685, "grad_norm": 1.376715064048767, "learning_rate": 4.747958834641361e-06, "loss": 0.4159, "step": 15384 }, { "epoch": 2.057368280288847, "grad_norm": 1.4098161458969116, "learning_rate": 4.746730295245441e-06, "loss": 0.3558, "step": 15385 }, { "epoch": 2.057502005883926, "grad_norm": 1.3264961242675781, "learning_rate": 4.7455018653489005e-06, "loss": 0.3744, "step": 15386 }, { "epoch": 2.0576357314790052, "grad_norm": 1.3074675798416138, "learning_rate": 4.744273544977346e-06, "loss": 0.342, "step": 15387 }, { "epoch": 2.057769457074084, "grad_norm": 1.5180000066757202, "learning_rate": 4.7430453341563806e-06, "loss": 0.3916, "step": 15388 }, { "epoch": 2.057903182669163, "grad_norm": 1.4633170366287231, "learning_rate": 4.7418172329116056e-06, "loss": 0.3741, "step": 15389 }, { "epoch": 2.0580369082642416, "grad_norm": 1.378891944885254, "learning_rate": 4.740589241268617e-06, "loss": 0.4013, "step": 15390 }, { "epoch": 2.0581706338593206, "grad_norm": 1.5611132383346558, "learning_rate": 4.739361359253014e-06, "loss": 0.3818, "step": 15391 }, { "epoch": 2.0583043594543997, "grad_norm": 1.3337607383728027, "learning_rate": 4.73813358689039e-06, "loss": 0.3919, "step": 15392 }, { "epoch": 2.0584380850494783, "grad_norm": 1.3864563703536987, "learning_rate": 4.73690592420634e-06, "loss": 0.3058, "step": 15393 }, { "epoch": 2.0585718106445574, "grad_norm": 1.673964023590088, "learning_rate": 4.7356783712264405e-06, "loss": 0.4276, "step": 15394 }, { "epoch": 2.0587055362396365, "grad_norm": 1.4214125871658325, "learning_rate": 4.7344509279762975e-06, "loss": 0.3592, "step": 15395 }, { "epoch": 2.058839261834715, "grad_norm": 1.6643668413162231, "learning_rate": 4.733223594481482e-06, "loss": 0.3514, "step": 15396 }, { "epoch": 2.058972987429794, "grad_norm": 1.4524213075637817, "learning_rate": 4.731996370767578e-06, "loss": 0.449, "step": 15397 }, { "epoch": 2.059106713024873, "grad_norm": 1.4068831205368042, "learning_rate": 4.730769256860175e-06, "loss": 0.3586, "step": 15398 }, { "epoch": 2.059240438619952, "grad_norm": 1.5733909606933594, "learning_rate": 4.729542252784837e-06, "loss": 0.4277, "step": 15399 }, { "epoch": 2.059374164215031, "grad_norm": 1.3529168367385864, "learning_rate": 4.728315358567155e-06, "loss": 0.3661, "step": 15400 }, { "epoch": 2.0595078898101096, "grad_norm": 1.4157673120498657, "learning_rate": 4.727088574232692e-06, "loss": 0.3594, "step": 15401 }, { "epoch": 2.0596416154051886, "grad_norm": 1.3042711019515991, "learning_rate": 4.7258618998070215e-06, "loss": 0.3721, "step": 15402 }, { "epoch": 2.0597753410002673, "grad_norm": 1.447706699371338, "learning_rate": 4.7246353353157125e-06, "loss": 0.4001, "step": 15403 }, { "epoch": 2.0599090665953463, "grad_norm": 1.6153004169464111, "learning_rate": 4.7234088807843334e-06, "loss": 0.4021, "step": 15404 }, { "epoch": 2.0600427921904254, "grad_norm": 1.5024189949035645, "learning_rate": 4.722182536238445e-06, "loss": 0.3561, "step": 15405 }, { "epoch": 2.060176517785504, "grad_norm": 1.4377762079238892, "learning_rate": 4.720956301703613e-06, "loss": 0.4167, "step": 15406 }, { "epoch": 2.060310243380583, "grad_norm": 1.46294105052948, "learning_rate": 4.719730177205395e-06, "loss": 0.3442, "step": 15407 }, { "epoch": 2.0604439689756617, "grad_norm": 1.506975769996643, "learning_rate": 4.7185041627693485e-06, "loss": 0.3877, "step": 15408 }, { "epoch": 2.060577694570741, "grad_norm": 1.5477209091186523, "learning_rate": 4.71727825842103e-06, "loss": 0.413, "step": 15409 }, { "epoch": 2.06071142016582, "grad_norm": 1.5747721195220947, "learning_rate": 4.71605246418599e-06, "loss": 0.3968, "step": 15410 }, { "epoch": 2.0608451457608985, "grad_norm": 1.5606242418289185, "learning_rate": 4.71482678008978e-06, "loss": 0.3785, "step": 15411 }, { "epoch": 2.0609788713559776, "grad_norm": 1.3939623832702637, "learning_rate": 4.713601206157953e-06, "loss": 0.3879, "step": 15412 }, { "epoch": 2.0611125969510566, "grad_norm": 1.3177127838134766, "learning_rate": 4.7123757424160425e-06, "loss": 0.3207, "step": 15413 }, { "epoch": 2.0612463225461353, "grad_norm": 1.5481077432632446, "learning_rate": 4.711150388889607e-06, "loss": 0.3892, "step": 15414 }, { "epoch": 2.0613800481412143, "grad_norm": 1.4694411754608154, "learning_rate": 4.709925145604173e-06, "loss": 0.409, "step": 15415 }, { "epoch": 2.061513773736293, "grad_norm": 1.406741976737976, "learning_rate": 4.708700012585292e-06, "loss": 0.3447, "step": 15416 }, { "epoch": 2.061647499331372, "grad_norm": 1.3609262704849243, "learning_rate": 4.707474989858499e-06, "loss": 0.3741, "step": 15417 }, { "epoch": 2.061781224926451, "grad_norm": 1.4467686414718628, "learning_rate": 4.706250077449318e-06, "loss": 0.4058, "step": 15418 }, { "epoch": 2.0619149505215297, "grad_norm": 1.4460002183914185, "learning_rate": 4.705025275383297e-06, "loss": 0.3858, "step": 15419 }, { "epoch": 2.062048676116609, "grad_norm": 1.3948044776916504, "learning_rate": 4.7038005836859525e-06, "loss": 0.3894, "step": 15420 }, { "epoch": 2.0621824017116874, "grad_norm": 1.5428670644760132, "learning_rate": 4.702576002382818e-06, "loss": 0.4011, "step": 15421 }, { "epoch": 2.0623161273067665, "grad_norm": 1.483926773071289, "learning_rate": 4.7013515314994174e-06, "loss": 0.3949, "step": 15422 }, { "epoch": 2.0624498529018456, "grad_norm": 1.4130859375, "learning_rate": 4.70012717106127e-06, "loss": 0.3747, "step": 15423 }, { "epoch": 2.062583578496924, "grad_norm": 1.3366918563842773, "learning_rate": 4.698902921093907e-06, "loss": 0.3625, "step": 15424 }, { "epoch": 2.0627173040920033, "grad_norm": 1.504630446434021, "learning_rate": 4.697678781622837e-06, "loss": 0.4103, "step": 15425 }, { "epoch": 2.062851029687082, "grad_norm": 1.4659929275512695, "learning_rate": 4.696454752673578e-06, "loss": 0.36, "step": 15426 }, { "epoch": 2.062984755282161, "grad_norm": 1.4550950527191162, "learning_rate": 4.695230834271647e-06, "loss": 0.403, "step": 15427 }, { "epoch": 2.06311848087724, "grad_norm": 1.4081318378448486, "learning_rate": 4.694007026442551e-06, "loss": 0.3468, "step": 15428 }, { "epoch": 2.0632522064723187, "grad_norm": 1.5238710641860962, "learning_rate": 4.692783329211802e-06, "loss": 0.4071, "step": 15429 }, { "epoch": 2.0633859320673977, "grad_norm": 1.3430339097976685, "learning_rate": 4.691559742604906e-06, "loss": 0.3464, "step": 15430 }, { "epoch": 2.063519657662477, "grad_norm": 1.5392705202102661, "learning_rate": 4.690336266647368e-06, "loss": 0.4052, "step": 15431 }, { "epoch": 2.0636533832575554, "grad_norm": 1.469671607017517, "learning_rate": 4.68911290136469e-06, "loss": 0.3903, "step": 15432 }, { "epoch": 2.0637871088526345, "grad_norm": 1.3256806135177612, "learning_rate": 4.687889646782374e-06, "loss": 0.3561, "step": 15433 }, { "epoch": 2.063920834447713, "grad_norm": 1.5027660131454468, "learning_rate": 4.686666502925908e-06, "loss": 0.3892, "step": 15434 }, { "epoch": 2.064054560042792, "grad_norm": 1.4907779693603516, "learning_rate": 4.685443469820799e-06, "loss": 0.3932, "step": 15435 }, { "epoch": 2.0641882856378713, "grad_norm": 1.6684945821762085, "learning_rate": 4.684220547492539e-06, "loss": 0.4415, "step": 15436 }, { "epoch": 2.06432201123295, "grad_norm": 1.4465241432189941, "learning_rate": 4.682997735966607e-06, "loss": 0.3496, "step": 15437 }, { "epoch": 2.064455736828029, "grad_norm": 1.2934316396713257, "learning_rate": 4.681775035268507e-06, "loss": 0.3857, "step": 15438 }, { "epoch": 2.0645894624231076, "grad_norm": 1.372052550315857, "learning_rate": 4.6805524454237095e-06, "loss": 0.3723, "step": 15439 }, { "epoch": 2.0647231880181867, "grad_norm": 1.4609370231628418, "learning_rate": 4.6793299664577145e-06, "loss": 0.3546, "step": 15440 }, { "epoch": 2.0648569136132657, "grad_norm": 1.4659334421157837, "learning_rate": 4.678107598395991e-06, "loss": 0.3685, "step": 15441 }, { "epoch": 2.0649906392083444, "grad_norm": 1.42794668674469, "learning_rate": 4.676885341264018e-06, "loss": 0.4092, "step": 15442 }, { "epoch": 2.0651243648034234, "grad_norm": 1.466582179069519, "learning_rate": 4.675663195087285e-06, "loss": 0.3961, "step": 15443 }, { "epoch": 2.065258090398502, "grad_norm": 1.6180777549743652, "learning_rate": 4.674441159891252e-06, "loss": 0.3839, "step": 15444 }, { "epoch": 2.065391815993581, "grad_norm": 1.452977180480957, "learning_rate": 4.673219235701398e-06, "loss": 0.4032, "step": 15445 }, { "epoch": 2.06552554158866, "grad_norm": 1.3510925769805908, "learning_rate": 4.6719974225431926e-06, "loss": 0.3724, "step": 15446 }, { "epoch": 2.065659267183739, "grad_norm": 1.3907662630081177, "learning_rate": 4.670775720442102e-06, "loss": 0.3478, "step": 15447 }, { "epoch": 2.065792992778818, "grad_norm": 1.6616202592849731, "learning_rate": 4.669554129423593e-06, "loss": 0.4248, "step": 15448 }, { "epoch": 2.065926718373897, "grad_norm": 1.3344568014144897, "learning_rate": 4.668332649513127e-06, "loss": 0.3925, "step": 15449 }, { "epoch": 2.0660604439689756, "grad_norm": 1.4247746467590332, "learning_rate": 4.667111280736164e-06, "loss": 0.4016, "step": 15450 }, { "epoch": 2.0661941695640547, "grad_norm": 1.465731143951416, "learning_rate": 4.665890023118164e-06, "loss": 0.4014, "step": 15451 }, { "epoch": 2.0663278951591333, "grad_norm": 1.4717645645141602, "learning_rate": 4.664668876684586e-06, "loss": 0.3552, "step": 15452 }, { "epoch": 2.0664616207542124, "grad_norm": 1.4359855651855469, "learning_rate": 4.663447841460872e-06, "loss": 0.3903, "step": 15453 }, { "epoch": 2.0665953463492914, "grad_norm": 1.3743728399276733, "learning_rate": 4.662226917472485e-06, "loss": 0.378, "step": 15454 }, { "epoch": 2.06672907194437, "grad_norm": 1.4159470796585083, "learning_rate": 4.661006104744871e-06, "loss": 0.416, "step": 15455 }, { "epoch": 2.066862797539449, "grad_norm": 1.5950192213058472, "learning_rate": 4.659785403303476e-06, "loss": 0.4044, "step": 15456 }, { "epoch": 2.0669965231345278, "grad_norm": 1.3533879518508911, "learning_rate": 4.658564813173747e-06, "loss": 0.3399, "step": 15457 }, { "epoch": 2.067130248729607, "grad_norm": 1.539728045463562, "learning_rate": 4.657344334381116e-06, "loss": 0.3432, "step": 15458 }, { "epoch": 2.067263974324686, "grad_norm": 1.3842591047286987, "learning_rate": 4.6561239669510385e-06, "loss": 0.3896, "step": 15459 }, { "epoch": 2.0673976999197645, "grad_norm": 1.5253359079360962, "learning_rate": 4.654903710908938e-06, "loss": 0.4117, "step": 15460 }, { "epoch": 2.0675314255148436, "grad_norm": 1.5704386234283447, "learning_rate": 4.653683566280253e-06, "loss": 0.4018, "step": 15461 }, { "epoch": 2.0676651511099227, "grad_norm": 1.3722317218780518, "learning_rate": 4.652463533090425e-06, "loss": 0.3447, "step": 15462 }, { "epoch": 2.0677988767050013, "grad_norm": 1.3908131122589111, "learning_rate": 4.65124361136487e-06, "loss": 0.342, "step": 15463 }, { "epoch": 2.0679326023000804, "grad_norm": 1.599563479423523, "learning_rate": 4.65002380112903e-06, "loss": 0.4318, "step": 15464 }, { "epoch": 2.068066327895159, "grad_norm": 1.3315554857254028, "learning_rate": 4.648804102408322e-06, "loss": 0.3428, "step": 15465 }, { "epoch": 2.068200053490238, "grad_norm": 1.4610991477966309, "learning_rate": 4.647584515228172e-06, "loss": 0.4078, "step": 15466 }, { "epoch": 2.068333779085317, "grad_norm": 1.3391027450561523, "learning_rate": 4.646365039614001e-06, "loss": 0.3362, "step": 15467 }, { "epoch": 2.0684675046803958, "grad_norm": 1.5410743951797485, "learning_rate": 4.6451456755912235e-06, "loss": 0.4062, "step": 15468 }, { "epoch": 2.068601230275475, "grad_norm": 1.4280060529708862, "learning_rate": 4.6439264231852685e-06, "loss": 0.384, "step": 15469 }, { "epoch": 2.0687349558705534, "grad_norm": 1.5747367143630981, "learning_rate": 4.642707282421538e-06, "loss": 0.3836, "step": 15470 }, { "epoch": 2.0688686814656325, "grad_norm": 1.3540974855422974, "learning_rate": 4.641488253325448e-06, "loss": 0.3465, "step": 15471 }, { "epoch": 2.0690024070607116, "grad_norm": 1.3723469972610474, "learning_rate": 4.6402693359224076e-06, "loss": 0.3493, "step": 15472 }, { "epoch": 2.06913613265579, "grad_norm": 1.2425537109375, "learning_rate": 4.639050530237824e-06, "loss": 0.352, "step": 15473 }, { "epoch": 2.0692698582508693, "grad_norm": 1.4034665822982788, "learning_rate": 4.637831836297103e-06, "loss": 0.4277, "step": 15474 }, { "epoch": 2.069403583845948, "grad_norm": 1.4398534297943115, "learning_rate": 4.636613254125646e-06, "loss": 0.3844, "step": 15475 }, { "epoch": 2.069537309441027, "grad_norm": 1.7757490873336792, "learning_rate": 4.635394783748853e-06, "loss": 0.4555, "step": 15476 }, { "epoch": 2.069671035036106, "grad_norm": 1.5073790550231934, "learning_rate": 4.634176425192123e-06, "loss": 0.3962, "step": 15477 }, { "epoch": 2.0698047606311847, "grad_norm": 1.4159979820251465, "learning_rate": 4.632958178480854e-06, "loss": 0.3639, "step": 15478 }, { "epoch": 2.0699384862262638, "grad_norm": 1.5473610162734985, "learning_rate": 4.6317400436404295e-06, "loss": 0.3332, "step": 15479 }, { "epoch": 2.0700722118213424, "grad_norm": 1.4713704586029053, "learning_rate": 4.63052202069625e-06, "loss": 0.3856, "step": 15480 }, { "epoch": 2.0702059374164214, "grad_norm": 1.3772010803222656, "learning_rate": 4.629304109673705e-06, "loss": 0.3616, "step": 15481 }, { "epoch": 2.0703396630115005, "grad_norm": 1.3994207382202148, "learning_rate": 4.628086310598169e-06, "loss": 0.3799, "step": 15482 }, { "epoch": 2.070473388606579, "grad_norm": 1.4276278018951416, "learning_rate": 4.62686862349504e-06, "loss": 0.3902, "step": 15483 }, { "epoch": 2.070607114201658, "grad_norm": 1.5051707029342651, "learning_rate": 4.625651048389687e-06, "loss": 0.3681, "step": 15484 }, { "epoch": 2.0707408397967373, "grad_norm": 1.5453317165374756, "learning_rate": 4.624433585307502e-06, "loss": 0.3946, "step": 15485 }, { "epoch": 2.070874565391816, "grad_norm": 1.5410877466201782, "learning_rate": 4.623216234273852e-06, "loss": 0.391, "step": 15486 }, { "epoch": 2.071008290986895, "grad_norm": 1.4690775871276855, "learning_rate": 4.62199899531411e-06, "loss": 0.4128, "step": 15487 }, { "epoch": 2.0711420165819736, "grad_norm": 1.3076168298721313, "learning_rate": 4.62078186845366e-06, "loss": 0.3263, "step": 15488 }, { "epoch": 2.0712757421770527, "grad_norm": 1.4047549962997437, "learning_rate": 4.619564853717861e-06, "loss": 0.4139, "step": 15489 }, { "epoch": 2.0714094677721318, "grad_norm": 1.686294436454773, "learning_rate": 4.618347951132085e-06, "loss": 0.454, "step": 15490 }, { "epoch": 2.0715431933672104, "grad_norm": 1.4207226037979126, "learning_rate": 4.617131160721696e-06, "loss": 0.4187, "step": 15491 }, { "epoch": 2.0716769189622894, "grad_norm": 1.4855901002883911, "learning_rate": 4.615914482512056e-06, "loss": 0.391, "step": 15492 }, { "epoch": 2.071810644557368, "grad_norm": 1.712656021118164, "learning_rate": 4.614697916528528e-06, "loss": 0.4107, "step": 15493 }, { "epoch": 2.071944370152447, "grad_norm": 1.6097272634506226, "learning_rate": 4.613481462796468e-06, "loss": 0.385, "step": 15494 }, { "epoch": 2.072078095747526, "grad_norm": 1.4192359447479248, "learning_rate": 4.612265121341233e-06, "loss": 0.3336, "step": 15495 }, { "epoch": 2.072211821342605, "grad_norm": 1.4438153505325317, "learning_rate": 4.6110488921881755e-06, "loss": 0.3494, "step": 15496 }, { "epoch": 2.072345546937684, "grad_norm": 1.5269197225570679, "learning_rate": 4.6098327753626515e-06, "loss": 0.4079, "step": 15497 }, { "epoch": 2.072479272532763, "grad_norm": 1.3769514560699463, "learning_rate": 4.608616770889998e-06, "loss": 0.3555, "step": 15498 }, { "epoch": 2.0726129981278416, "grad_norm": 1.4006940126419067, "learning_rate": 4.6074008787955725e-06, "loss": 0.3608, "step": 15499 }, { "epoch": 2.0727467237229207, "grad_norm": 1.4371217489242554, "learning_rate": 4.606185099104716e-06, "loss": 0.3772, "step": 15500 }, { "epoch": 2.0728804493179993, "grad_norm": 1.6114728450775146, "learning_rate": 4.604969431842769e-06, "loss": 0.3816, "step": 15501 }, { "epoch": 2.0730141749130784, "grad_norm": 1.3759255409240723, "learning_rate": 4.603753877035075e-06, "loss": 0.389, "step": 15502 }, { "epoch": 2.0731479005081574, "grad_norm": 1.2989004850387573, "learning_rate": 4.6025384347069615e-06, "loss": 0.368, "step": 15503 }, { "epoch": 2.073281626103236, "grad_norm": 1.4718331098556519, "learning_rate": 4.601323104883776e-06, "loss": 0.418, "step": 15504 }, { "epoch": 2.073415351698315, "grad_norm": 1.5887699127197266, "learning_rate": 4.600107887590841e-06, "loss": 0.432, "step": 15505 }, { "epoch": 2.0735490772933938, "grad_norm": 1.3867260217666626, "learning_rate": 4.598892782853487e-06, "loss": 0.3642, "step": 15506 }, { "epoch": 2.073682802888473, "grad_norm": 1.6015316247940063, "learning_rate": 4.597677790697051e-06, "loss": 0.4023, "step": 15507 }, { "epoch": 2.073816528483552, "grad_norm": 1.3856736421585083, "learning_rate": 4.596462911146845e-06, "loss": 0.4079, "step": 15508 }, { "epoch": 2.0739502540786305, "grad_norm": 1.439468502998352, "learning_rate": 4.595248144228206e-06, "loss": 0.4067, "step": 15509 }, { "epoch": 2.0740839796737096, "grad_norm": 1.6168230772018433, "learning_rate": 4.594033489966444e-06, "loss": 0.4147, "step": 15510 }, { "epoch": 2.0742177052687882, "grad_norm": 1.4911526441574097, "learning_rate": 4.592818948386882e-06, "loss": 0.3979, "step": 15511 }, { "epoch": 2.0743514308638673, "grad_norm": 1.4144484996795654, "learning_rate": 4.591604519514834e-06, "loss": 0.3672, "step": 15512 }, { "epoch": 2.0744851564589464, "grad_norm": 1.4156376123428345, "learning_rate": 4.5903902033756145e-06, "loss": 0.378, "step": 15513 }, { "epoch": 2.074618882054025, "grad_norm": 1.3800048828125, "learning_rate": 4.589175999994535e-06, "loss": 0.4055, "step": 15514 }, { "epoch": 2.074752607649104, "grad_norm": 1.5969502925872803, "learning_rate": 4.587961909396904e-06, "loss": 0.3754, "step": 15515 }, { "epoch": 2.074886333244183, "grad_norm": 1.4718846082687378, "learning_rate": 4.586747931608029e-06, "loss": 0.3701, "step": 15516 }, { "epoch": 2.0750200588392618, "grad_norm": 1.396140217781067, "learning_rate": 4.585534066653212e-06, "loss": 0.3939, "step": 15517 }, { "epoch": 2.075153784434341, "grad_norm": 1.3933732509613037, "learning_rate": 4.584320314557758e-06, "loss": 0.3096, "step": 15518 }, { "epoch": 2.0752875100294195, "grad_norm": 1.5565464496612549, "learning_rate": 4.583106675346964e-06, "loss": 0.4378, "step": 15519 }, { "epoch": 2.0754212356244985, "grad_norm": 1.356546401977539, "learning_rate": 4.581893149046128e-06, "loss": 0.3546, "step": 15520 }, { "epoch": 2.0755549612195776, "grad_norm": 1.4793492555618286, "learning_rate": 4.580679735680548e-06, "loss": 0.3828, "step": 15521 }, { "epoch": 2.0756886868146562, "grad_norm": 1.3967177867889404, "learning_rate": 4.579466435275506e-06, "loss": 0.3432, "step": 15522 }, { "epoch": 2.0758224124097353, "grad_norm": 1.6533570289611816, "learning_rate": 4.5782532478563065e-06, "loss": 0.383, "step": 15523 }, { "epoch": 2.075956138004814, "grad_norm": 1.3016936779022217, "learning_rate": 4.577040173448224e-06, "loss": 0.387, "step": 15524 }, { "epoch": 2.076089863599893, "grad_norm": 1.3832767009735107, "learning_rate": 4.575827212076553e-06, "loss": 0.407, "step": 15525 }, { "epoch": 2.076223589194972, "grad_norm": 1.5316741466522217, "learning_rate": 4.574614363766575e-06, "loss": 0.3565, "step": 15526 }, { "epoch": 2.0763573147900507, "grad_norm": 1.793339490890503, "learning_rate": 4.573401628543564e-06, "loss": 0.4005, "step": 15527 }, { "epoch": 2.0764910403851298, "grad_norm": 1.4098761081695557, "learning_rate": 4.57218900643281e-06, "loss": 0.3456, "step": 15528 }, { "epoch": 2.0766247659802084, "grad_norm": 1.622809886932373, "learning_rate": 4.570976497459579e-06, "loss": 0.4269, "step": 15529 }, { "epoch": 2.0767584915752875, "grad_norm": 1.3706339597702026, "learning_rate": 4.5697641016491465e-06, "loss": 0.4211, "step": 15530 }, { "epoch": 2.0768922171703665, "grad_norm": 1.5597443580627441, "learning_rate": 4.568551819026786e-06, "loss": 0.4087, "step": 15531 }, { "epoch": 2.077025942765445, "grad_norm": 1.3971006870269775, "learning_rate": 4.567339649617763e-06, "loss": 0.3465, "step": 15532 }, { "epoch": 2.0771596683605242, "grad_norm": 1.444258689880371, "learning_rate": 4.566127593447353e-06, "loss": 0.365, "step": 15533 }, { "epoch": 2.0772933939556033, "grad_norm": 1.5460172891616821, "learning_rate": 4.5649156505408084e-06, "loss": 0.4015, "step": 15534 }, { "epoch": 2.077427119550682, "grad_norm": 1.307062029838562, "learning_rate": 4.563703820923399e-06, "loss": 0.3839, "step": 15535 }, { "epoch": 2.077560845145761, "grad_norm": 1.4445074796676636, "learning_rate": 4.56249210462038e-06, "loss": 0.377, "step": 15536 }, { "epoch": 2.0776945707408396, "grad_norm": 1.470812201499939, "learning_rate": 4.56128050165701e-06, "loss": 0.4017, "step": 15537 }, { "epoch": 2.0778282963359187, "grad_norm": 1.636654019355774, "learning_rate": 4.560069012058543e-06, "loss": 0.4301, "step": 15538 }, { "epoch": 2.0779620219309978, "grad_norm": 1.4901503324508667, "learning_rate": 4.558857635850233e-06, "loss": 0.3584, "step": 15539 }, { "epoch": 2.0780957475260764, "grad_norm": 1.41392183303833, "learning_rate": 4.557646373057329e-06, "loss": 0.3564, "step": 15540 }, { "epoch": 2.0782294731211555, "grad_norm": 1.3927148580551147, "learning_rate": 4.556435223705078e-06, "loss": 0.3758, "step": 15541 }, { "epoch": 2.078363198716234, "grad_norm": 1.6424471139907837, "learning_rate": 4.55522418781873e-06, "loss": 0.3916, "step": 15542 }, { "epoch": 2.078496924311313, "grad_norm": 1.497828483581543, "learning_rate": 4.554013265423516e-06, "loss": 0.3418, "step": 15543 }, { "epoch": 2.0786306499063922, "grad_norm": 1.3883613348007202, "learning_rate": 4.552802456544688e-06, "loss": 0.3662, "step": 15544 }, { "epoch": 2.078764375501471, "grad_norm": 1.487144112586975, "learning_rate": 4.551591761207485e-06, "loss": 0.3777, "step": 15545 }, { "epoch": 2.07889810109655, "grad_norm": 1.60309636592865, "learning_rate": 4.550381179437129e-06, "loss": 0.3903, "step": 15546 }, { "epoch": 2.0790318266916286, "grad_norm": 1.3909451961517334, "learning_rate": 4.549170711258872e-06, "loss": 0.3926, "step": 15547 }, { "epoch": 2.0791655522867076, "grad_norm": 1.4441571235656738, "learning_rate": 4.547960356697927e-06, "loss": 0.3914, "step": 15548 }, { "epoch": 2.0792992778817867, "grad_norm": 1.4510213136672974, "learning_rate": 4.546750115779538e-06, "loss": 0.378, "step": 15549 }, { "epoch": 2.0794330034768653, "grad_norm": 1.4373650550842285, "learning_rate": 4.545539988528922e-06, "loss": 0.3843, "step": 15550 }, { "epoch": 2.0795667290719444, "grad_norm": 1.447117567062378, "learning_rate": 4.544329974971302e-06, "loss": 0.4021, "step": 15551 }, { "epoch": 2.0797004546670235, "grad_norm": 1.6633490324020386, "learning_rate": 4.543120075131911e-06, "loss": 0.4068, "step": 15552 }, { "epoch": 2.079834180262102, "grad_norm": 1.3152557611465454, "learning_rate": 4.5419102890359515e-06, "loss": 0.3379, "step": 15553 }, { "epoch": 2.079967905857181, "grad_norm": 1.5810796022415161, "learning_rate": 4.5407006167086575e-06, "loss": 0.3816, "step": 15554 }, { "epoch": 2.08010163145226, "grad_norm": 1.4355103969573975, "learning_rate": 4.5394910581752315e-06, "loss": 0.3462, "step": 15555 }, { "epoch": 2.080235357047339, "grad_norm": 1.437821388244629, "learning_rate": 4.538281613460889e-06, "loss": 0.3969, "step": 15556 }, { "epoch": 2.080369082642418, "grad_norm": 1.5658038854599, "learning_rate": 4.5370722825908395e-06, "loss": 0.4516, "step": 15557 }, { "epoch": 2.0805028082374966, "grad_norm": 1.5167653560638428, "learning_rate": 4.5358630655902916e-06, "loss": 0.3763, "step": 15558 }, { "epoch": 2.0806365338325756, "grad_norm": 1.448601484298706, "learning_rate": 4.53465396248445e-06, "loss": 0.3306, "step": 15559 }, { "epoch": 2.0807702594276543, "grad_norm": 1.5941975116729736, "learning_rate": 4.533444973298516e-06, "loss": 0.421, "step": 15560 }, { "epoch": 2.0809039850227333, "grad_norm": 1.4270976781845093, "learning_rate": 4.5322360980576904e-06, "loss": 0.3797, "step": 15561 }, { "epoch": 2.0810377106178124, "grad_norm": 1.5342450141906738, "learning_rate": 4.531027336787172e-06, "loss": 0.392, "step": 15562 }, { "epoch": 2.081171436212891, "grad_norm": 1.5524028539657593, "learning_rate": 4.529818689512154e-06, "loss": 0.4221, "step": 15563 }, { "epoch": 2.08130516180797, "grad_norm": 1.3768445253372192, "learning_rate": 4.528610156257832e-06, "loss": 0.3553, "step": 15564 }, { "epoch": 2.081438887403049, "grad_norm": 1.478058934211731, "learning_rate": 4.527401737049396e-06, "loss": 0.4038, "step": 15565 }, { "epoch": 2.081572612998128, "grad_norm": 1.5652023553848267, "learning_rate": 4.526193431912038e-06, "loss": 0.3762, "step": 15566 }, { "epoch": 2.081706338593207, "grad_norm": 1.4810761213302612, "learning_rate": 4.524985240870932e-06, "loss": 0.3679, "step": 15567 }, { "epoch": 2.0818400641882855, "grad_norm": 1.6437420845031738, "learning_rate": 4.523777163951277e-06, "loss": 0.3823, "step": 15568 }, { "epoch": 2.0819737897833646, "grad_norm": 1.3160046339035034, "learning_rate": 4.5225692011782395e-06, "loss": 0.3412, "step": 15569 }, { "epoch": 2.0821075153784436, "grad_norm": 1.2827880382537842, "learning_rate": 4.521361352577011e-06, "loss": 0.391, "step": 15570 }, { "epoch": 2.0822412409735223, "grad_norm": 1.5145409107208252, "learning_rate": 4.520153618172764e-06, "loss": 0.4538, "step": 15571 }, { "epoch": 2.0823749665686013, "grad_norm": 1.621311902999878, "learning_rate": 4.518945997990665e-06, "loss": 0.4002, "step": 15572 }, { "epoch": 2.08250869216368, "grad_norm": 1.3657732009887695, "learning_rate": 4.5177384920558985e-06, "loss": 0.3904, "step": 15573 }, { "epoch": 2.082642417758759, "grad_norm": 1.5254472494125366, "learning_rate": 4.516531100393624e-06, "loss": 0.3843, "step": 15574 }, { "epoch": 2.082776143353838, "grad_norm": 1.6675376892089844, "learning_rate": 4.515323823029012e-06, "loss": 0.4055, "step": 15575 }, { "epoch": 2.0829098689489167, "grad_norm": 1.5225484371185303, "learning_rate": 4.5141166599872255e-06, "loss": 0.3979, "step": 15576 }, { "epoch": 2.083043594543996, "grad_norm": 1.336268663406372, "learning_rate": 4.512909611293429e-06, "loss": 0.3485, "step": 15577 }, { "epoch": 2.0831773201390744, "grad_norm": 1.3677226305007935, "learning_rate": 4.51170267697278e-06, "loss": 0.3734, "step": 15578 }, { "epoch": 2.0833110457341535, "grad_norm": 1.5330201387405396, "learning_rate": 4.510495857050437e-06, "loss": 0.3776, "step": 15579 }, { "epoch": 2.0834447713292326, "grad_norm": 1.454996943473816, "learning_rate": 4.509289151551556e-06, "loss": 0.355, "step": 15580 }, { "epoch": 2.083578496924311, "grad_norm": 1.347825288772583, "learning_rate": 4.508082560501288e-06, "loss": 0.388, "step": 15581 }, { "epoch": 2.0837122225193903, "grad_norm": 1.611128330230713, "learning_rate": 4.5068760839247835e-06, "loss": 0.3684, "step": 15582 }, { "epoch": 2.083845948114469, "grad_norm": 1.219836711883545, "learning_rate": 4.505669721847193e-06, "loss": 0.3251, "step": 15583 }, { "epoch": 2.083979673709548, "grad_norm": 1.643996000289917, "learning_rate": 4.504463474293656e-06, "loss": 0.4321, "step": 15584 }, { "epoch": 2.084113399304627, "grad_norm": 1.5773465633392334, "learning_rate": 4.503257341289321e-06, "loss": 0.3991, "step": 15585 }, { "epoch": 2.0842471248997056, "grad_norm": 1.5046992301940918, "learning_rate": 4.5020513228593275e-06, "loss": 0.3778, "step": 15586 }, { "epoch": 2.0843808504947847, "grad_norm": 1.5279245376586914, "learning_rate": 4.500845419028817e-06, "loss": 0.4339, "step": 15587 }, { "epoch": 2.084514576089864, "grad_norm": 1.5555322170257568, "learning_rate": 4.4996396298229126e-06, "loss": 0.3951, "step": 15588 }, { "epoch": 2.0846483016849424, "grad_norm": 1.4350197315216064, "learning_rate": 4.498433955266761e-06, "loss": 0.3801, "step": 15589 }, { "epoch": 2.0847820272800215, "grad_norm": 1.5810738801956177, "learning_rate": 4.497228395385494e-06, "loss": 0.415, "step": 15590 }, { "epoch": 2.0849157528751, "grad_norm": 1.4832266569137573, "learning_rate": 4.4960229502042275e-06, "loss": 0.3777, "step": 15591 }, { "epoch": 2.085049478470179, "grad_norm": 1.4933520555496216, "learning_rate": 4.494817619748103e-06, "loss": 0.351, "step": 15592 }, { "epoch": 2.0851832040652583, "grad_norm": 1.5231611728668213, "learning_rate": 4.49361240404223e-06, "loss": 0.4139, "step": 15593 }, { "epoch": 2.085316929660337, "grad_norm": 1.6450469493865967, "learning_rate": 4.492407303111745e-06, "loss": 0.3934, "step": 15594 }, { "epoch": 2.085450655255416, "grad_norm": 1.3872545957565308, "learning_rate": 4.491202316981755e-06, "loss": 0.3643, "step": 15595 }, { "epoch": 2.0855843808504946, "grad_norm": 1.570786714553833, "learning_rate": 4.489997445677383e-06, "loss": 0.4259, "step": 15596 }, { "epoch": 2.0857181064455736, "grad_norm": 1.5542056560516357, "learning_rate": 4.488792689223741e-06, "loss": 0.4404, "step": 15597 }, { "epoch": 2.0858518320406527, "grad_norm": 1.400649905204773, "learning_rate": 4.487588047645941e-06, "loss": 0.3832, "step": 15598 }, { "epoch": 2.0859855576357313, "grad_norm": 1.40584397315979, "learning_rate": 4.486383520969094e-06, "loss": 0.3444, "step": 15599 }, { "epoch": 2.0861192832308104, "grad_norm": 1.3746213912963867, "learning_rate": 4.485179109218307e-06, "loss": 0.3836, "step": 15600 }, { "epoch": 2.0862530088258895, "grad_norm": 1.5158931016921997, "learning_rate": 4.483974812418684e-06, "loss": 0.3888, "step": 15601 }, { "epoch": 2.086386734420968, "grad_norm": 1.37674081325531, "learning_rate": 4.482770630595328e-06, "loss": 0.3603, "step": 15602 }, { "epoch": 2.086520460016047, "grad_norm": 1.4940325021743774, "learning_rate": 4.481566563773337e-06, "loss": 0.4089, "step": 15603 }, { "epoch": 2.086654185611126, "grad_norm": 1.4464157819747925, "learning_rate": 4.4803626119778135e-06, "loss": 0.3748, "step": 15604 }, { "epoch": 2.086787911206205, "grad_norm": 1.3560158014297485, "learning_rate": 4.4791587752338475e-06, "loss": 0.4024, "step": 15605 }, { "epoch": 2.086921636801284, "grad_norm": 1.2261706590652466, "learning_rate": 4.4779550535665385e-06, "loss": 0.33, "step": 15606 }, { "epoch": 2.0870553623963626, "grad_norm": 1.3653289079666138, "learning_rate": 4.4767514470009646e-06, "loss": 0.3791, "step": 15607 }, { "epoch": 2.0871890879914416, "grad_norm": 1.5935239791870117, "learning_rate": 4.475547955562225e-06, "loss": 0.3943, "step": 15608 }, { "epoch": 2.0873228135865203, "grad_norm": 1.4235332012176514, "learning_rate": 4.4743445792754014e-06, "loss": 0.3857, "step": 15609 }, { "epoch": 2.0874565391815993, "grad_norm": 1.3664047718048096, "learning_rate": 4.4731413181655794e-06, "loss": 0.3442, "step": 15610 }, { "epoch": 2.0875902647766784, "grad_norm": 1.5194188356399536, "learning_rate": 4.4719381722578405e-06, "loss": 0.3633, "step": 15611 }, { "epoch": 2.087723990371757, "grad_norm": 1.4515771865844727, "learning_rate": 4.4707351415772535e-06, "loss": 0.3701, "step": 15612 }, { "epoch": 2.087857715966836, "grad_norm": 1.5529335737228394, "learning_rate": 4.469532226148908e-06, "loss": 0.369, "step": 15613 }, { "epoch": 2.0879914415619147, "grad_norm": 1.6439588069915771, "learning_rate": 4.46832942599787e-06, "loss": 0.4455, "step": 15614 }, { "epoch": 2.088125167156994, "grad_norm": 1.5345412492752075, "learning_rate": 4.467126741149209e-06, "loss": 0.407, "step": 15615 }, { "epoch": 2.088258892752073, "grad_norm": 1.5773178339004517, "learning_rate": 4.4659241716279974e-06, "loss": 0.393, "step": 15616 }, { "epoch": 2.0883926183471515, "grad_norm": 1.4791076183319092, "learning_rate": 4.464721717459298e-06, "loss": 0.3722, "step": 15617 }, { "epoch": 2.0885263439422306, "grad_norm": 1.3979734182357788, "learning_rate": 4.463519378668185e-06, "loss": 0.3855, "step": 15618 }, { "epoch": 2.0886600695373096, "grad_norm": 1.4221725463867188, "learning_rate": 4.46231715527971e-06, "loss": 0.3582, "step": 15619 }, { "epoch": 2.0887937951323883, "grad_norm": 1.3345770835876465, "learning_rate": 4.461115047318934e-06, "loss": 0.3412, "step": 15620 }, { "epoch": 2.0889275207274673, "grad_norm": 1.418308973312378, "learning_rate": 4.459913054810913e-06, "loss": 0.3604, "step": 15621 }, { "epoch": 2.089061246322546, "grad_norm": 1.4533021450042725, "learning_rate": 4.458711177780705e-06, "loss": 0.3926, "step": 15622 }, { "epoch": 2.089194971917625, "grad_norm": 1.4725196361541748, "learning_rate": 4.45750941625336e-06, "loss": 0.396, "step": 15623 }, { "epoch": 2.089328697512704, "grad_norm": 1.516769289970398, "learning_rate": 4.456307770253927e-06, "loss": 0.3647, "step": 15624 }, { "epoch": 2.0894624231077827, "grad_norm": 1.5384849309921265, "learning_rate": 4.455106239807454e-06, "loss": 0.3895, "step": 15625 }, { "epoch": 2.089596148702862, "grad_norm": 1.4518753290176392, "learning_rate": 4.453904824938986e-06, "loss": 0.3968, "step": 15626 }, { "epoch": 2.0897298742979404, "grad_norm": 1.6094948053359985, "learning_rate": 4.452703525673564e-06, "loss": 0.3951, "step": 15627 }, { "epoch": 2.0898635998930195, "grad_norm": 1.6361199617385864, "learning_rate": 4.451502342036229e-06, "loss": 0.3637, "step": 15628 }, { "epoch": 2.0899973254880986, "grad_norm": 1.4584499597549438, "learning_rate": 4.450301274052019e-06, "loss": 0.3641, "step": 15629 }, { "epoch": 2.090131051083177, "grad_norm": 1.5477406978607178, "learning_rate": 4.449100321745972e-06, "loss": 0.387, "step": 15630 }, { "epoch": 2.0902647766782563, "grad_norm": 1.3772072792053223, "learning_rate": 4.447899485143109e-06, "loss": 0.3568, "step": 15631 }, { "epoch": 2.090398502273335, "grad_norm": 1.5331861972808838, "learning_rate": 4.446698764268477e-06, "loss": 0.3704, "step": 15632 }, { "epoch": 2.090532227868414, "grad_norm": 1.4482604265213013, "learning_rate": 4.445498159147087e-06, "loss": 0.3911, "step": 15633 }, { "epoch": 2.090665953463493, "grad_norm": 1.5205113887786865, "learning_rate": 4.444297669803981e-06, "loss": 0.3844, "step": 15634 }, { "epoch": 2.0907996790585717, "grad_norm": 1.4746811389923096, "learning_rate": 4.4430972962641695e-06, "loss": 0.352, "step": 15635 }, { "epoch": 2.0909334046536507, "grad_norm": 1.596834421157837, "learning_rate": 4.441897038552674e-06, "loss": 0.4215, "step": 15636 }, { "epoch": 2.09106713024873, "grad_norm": 1.5580360889434814, "learning_rate": 4.440696896694523e-06, "loss": 0.4239, "step": 15637 }, { "epoch": 2.0912008558438084, "grad_norm": 1.6279077529907227, "learning_rate": 4.439496870714719e-06, "loss": 0.4042, "step": 15638 }, { "epoch": 2.0913345814388875, "grad_norm": 1.5757710933685303, "learning_rate": 4.438296960638289e-06, "loss": 0.412, "step": 15639 }, { "epoch": 2.091468307033966, "grad_norm": 1.3488764762878418, "learning_rate": 4.4370971664902325e-06, "loss": 0.4166, "step": 15640 }, { "epoch": 2.091602032629045, "grad_norm": 1.4164925813674927, "learning_rate": 4.435897488295564e-06, "loss": 0.36, "step": 15641 }, { "epoch": 2.0917357582241243, "grad_norm": 1.3988877534866333, "learning_rate": 4.434697926079287e-06, "loss": 0.3393, "step": 15642 }, { "epoch": 2.091869483819203, "grad_norm": 1.4428128004074097, "learning_rate": 4.433498479866406e-06, "loss": 0.3741, "step": 15643 }, { "epoch": 2.092003209414282, "grad_norm": 1.4864366054534912, "learning_rate": 4.4322991496819234e-06, "loss": 0.3669, "step": 15644 }, { "epoch": 2.0921369350093606, "grad_norm": 1.3318320512771606, "learning_rate": 4.431099935550837e-06, "loss": 0.3697, "step": 15645 }, { "epoch": 2.0922706606044397, "grad_norm": 1.7070696353912354, "learning_rate": 4.4299008374981436e-06, "loss": 0.4413, "step": 15646 }, { "epoch": 2.0924043861995187, "grad_norm": 1.3534314632415771, "learning_rate": 4.428701855548837e-06, "loss": 0.3784, "step": 15647 }, { "epoch": 2.0925381117945974, "grad_norm": 1.478061556816101, "learning_rate": 4.42750298972791e-06, "loss": 0.3809, "step": 15648 }, { "epoch": 2.0926718373896764, "grad_norm": 1.4423024654388428, "learning_rate": 4.42630424006035e-06, "loss": 0.371, "step": 15649 }, { "epoch": 2.092805562984755, "grad_norm": 1.2507692575454712, "learning_rate": 4.425105606571145e-06, "loss": 0.3904, "step": 15650 }, { "epoch": 2.092939288579834, "grad_norm": 1.4497268199920654, "learning_rate": 4.423907089285282e-06, "loss": 0.367, "step": 15651 }, { "epoch": 2.093073014174913, "grad_norm": 1.4894993305206299, "learning_rate": 4.4227086882277335e-06, "loss": 0.365, "step": 15652 }, { "epoch": 2.093206739769992, "grad_norm": 1.5055650472640991, "learning_rate": 4.421510403423489e-06, "loss": 0.3694, "step": 15653 }, { "epoch": 2.093340465365071, "grad_norm": 1.3029303550720215, "learning_rate": 4.420312234897521e-06, "loss": 0.3859, "step": 15654 }, { "epoch": 2.09347419096015, "grad_norm": 1.4466160535812378, "learning_rate": 4.419114182674807e-06, "loss": 0.3899, "step": 15655 }, { "epoch": 2.0936079165552286, "grad_norm": 1.3878724575042725, "learning_rate": 4.41791624678032e-06, "loss": 0.3613, "step": 15656 }, { "epoch": 2.0937416421503077, "grad_norm": 1.3854899406433105, "learning_rate": 4.4167184272390204e-06, "loss": 0.3889, "step": 15657 }, { "epoch": 2.0938753677453863, "grad_norm": 1.5768896341323853, "learning_rate": 4.415520724075891e-06, "loss": 0.3966, "step": 15658 }, { "epoch": 2.0940090933404654, "grad_norm": 1.3193234205245972, "learning_rate": 4.414323137315884e-06, "loss": 0.3417, "step": 15659 }, { "epoch": 2.0941428189355444, "grad_norm": 1.2497140169143677, "learning_rate": 4.413125666983965e-06, "loss": 0.3215, "step": 15660 }, { "epoch": 2.094276544530623, "grad_norm": 1.4555151462554932, "learning_rate": 4.411928313105097e-06, "loss": 0.3607, "step": 15661 }, { "epoch": 2.094410270125702, "grad_norm": 1.5373482704162598, "learning_rate": 4.410731075704232e-06, "loss": 0.3729, "step": 15662 }, { "epoch": 2.0945439957207808, "grad_norm": 1.6334055662155151, "learning_rate": 4.409533954806336e-06, "loss": 0.4102, "step": 15663 }, { "epoch": 2.09467772131586, "grad_norm": 1.3893389701843262, "learning_rate": 4.408336950436353e-06, "loss": 0.4114, "step": 15664 }, { "epoch": 2.094811446910939, "grad_norm": 1.5049036741256714, "learning_rate": 4.407140062619234e-06, "loss": 0.3435, "step": 15665 }, { "epoch": 2.0949451725060175, "grad_norm": 1.5738027095794678, "learning_rate": 4.405943291379929e-06, "loss": 0.3901, "step": 15666 }, { "epoch": 2.0950788981010966, "grad_norm": 1.4291270971298218, "learning_rate": 4.404746636743383e-06, "loss": 0.4539, "step": 15667 }, { "epoch": 2.0952126236961757, "grad_norm": 1.340965986251831, "learning_rate": 4.403550098734541e-06, "loss": 0.376, "step": 15668 }, { "epoch": 2.0953463492912543, "grad_norm": 1.5585929155349731, "learning_rate": 4.402353677378341e-06, "loss": 0.3868, "step": 15669 }, { "epoch": 2.0954800748863334, "grad_norm": 1.5626426935195923, "learning_rate": 4.4011573726997215e-06, "loss": 0.4088, "step": 15670 }, { "epoch": 2.095613800481412, "grad_norm": 1.3515079021453857, "learning_rate": 4.399961184723619e-06, "loss": 0.3843, "step": 15671 }, { "epoch": 2.095747526076491, "grad_norm": 1.1834638118743896, "learning_rate": 4.398765113474968e-06, "loss": 0.338, "step": 15672 }, { "epoch": 2.09588125167157, "grad_norm": 1.3986002206802368, "learning_rate": 4.397569158978698e-06, "loss": 0.3148, "step": 15673 }, { "epoch": 2.0960149772666488, "grad_norm": 1.4963678121566772, "learning_rate": 4.396373321259737e-06, "loss": 0.3738, "step": 15674 }, { "epoch": 2.096148702861728, "grad_norm": 1.4160876274108887, "learning_rate": 4.395177600343017e-06, "loss": 0.3595, "step": 15675 }, { "epoch": 2.0962824284568065, "grad_norm": 1.526122808456421, "learning_rate": 4.393981996253448e-06, "loss": 0.3906, "step": 15676 }, { "epoch": 2.0964161540518855, "grad_norm": 1.182529330253601, "learning_rate": 4.392786509015968e-06, "loss": 0.3147, "step": 15677 }, { "epoch": 2.0965498796469646, "grad_norm": 1.4445598125457764, "learning_rate": 4.391591138655481e-06, "loss": 0.4224, "step": 15678 }, { "epoch": 2.0966836052420432, "grad_norm": 1.4032464027404785, "learning_rate": 4.390395885196916e-06, "loss": 0.3841, "step": 15679 }, { "epoch": 2.0968173308371223, "grad_norm": 1.5276345014572144, "learning_rate": 4.389200748665179e-06, "loss": 0.3992, "step": 15680 }, { "epoch": 2.096951056432201, "grad_norm": 1.3111323118209839, "learning_rate": 4.3880057290851786e-06, "loss": 0.351, "step": 15681 }, { "epoch": 2.09708478202728, "grad_norm": 1.6265418529510498, "learning_rate": 4.3868108264818366e-06, "loss": 0.3981, "step": 15682 }, { "epoch": 2.097218507622359, "grad_norm": 1.591089129447937, "learning_rate": 4.3856160408800475e-06, "loss": 0.3703, "step": 15683 }, { "epoch": 2.0973522332174377, "grad_norm": 1.4167088270187378, "learning_rate": 4.38442137230472e-06, "loss": 0.4126, "step": 15684 }, { "epoch": 2.0974859588125168, "grad_norm": 1.4229716062545776, "learning_rate": 4.383226820780756e-06, "loss": 0.3687, "step": 15685 }, { "epoch": 2.0976196844075954, "grad_norm": 1.6457507610321045, "learning_rate": 4.382032386333053e-06, "loss": 0.4445, "step": 15686 }, { "epoch": 2.0977534100026745, "grad_norm": 1.6720408201217651, "learning_rate": 4.3808380689865106e-06, "loss": 0.4007, "step": 15687 }, { "epoch": 2.0978871355977535, "grad_norm": 1.3859692811965942, "learning_rate": 4.37964386876602e-06, "loss": 0.3906, "step": 15688 }, { "epoch": 2.098020861192832, "grad_norm": 1.4062530994415283, "learning_rate": 4.378449785696476e-06, "loss": 0.3411, "step": 15689 }, { "epoch": 2.0981545867879112, "grad_norm": 1.5429835319519043, "learning_rate": 4.377255819802766e-06, "loss": 0.3993, "step": 15690 }, { "epoch": 2.0982883123829903, "grad_norm": 1.537819266319275, "learning_rate": 4.376061971109779e-06, "loss": 0.3503, "step": 15691 }, { "epoch": 2.098422037978069, "grad_norm": 1.6518570184707642, "learning_rate": 4.374868239642398e-06, "loss": 0.4036, "step": 15692 }, { "epoch": 2.098555763573148, "grad_norm": 1.4917223453521729, "learning_rate": 4.373674625425507e-06, "loss": 0.3587, "step": 15693 }, { "epoch": 2.0986894891682266, "grad_norm": 1.4152390956878662, "learning_rate": 4.372481128483984e-06, "loss": 0.384, "step": 15694 }, { "epoch": 2.0988232147633057, "grad_norm": 1.3284255266189575, "learning_rate": 4.371287748842706e-06, "loss": 0.3864, "step": 15695 }, { "epoch": 2.0989569403583848, "grad_norm": 1.5194693803787231, "learning_rate": 4.370094486526553e-06, "loss": 0.3826, "step": 15696 }, { "epoch": 2.0990906659534634, "grad_norm": 1.6204891204833984, "learning_rate": 4.368901341560386e-06, "loss": 0.4239, "step": 15697 }, { "epoch": 2.0992243915485425, "grad_norm": 1.5124558210372925, "learning_rate": 4.36770831396909e-06, "loss": 0.4265, "step": 15698 }, { "epoch": 2.099358117143621, "grad_norm": 1.375800371170044, "learning_rate": 4.366515403777522e-06, "loss": 0.3407, "step": 15699 }, { "epoch": 2.0994918427387, "grad_norm": 1.6754341125488281, "learning_rate": 4.365322611010544e-06, "loss": 0.4077, "step": 15700 }, { "epoch": 2.0996255683337792, "grad_norm": 1.392293930053711, "learning_rate": 4.364129935693032e-06, "loss": 0.371, "step": 15701 }, { "epoch": 2.099759293928858, "grad_norm": 1.5114208459854126, "learning_rate": 4.362937377849832e-06, "loss": 0.3947, "step": 15702 }, { "epoch": 2.099893019523937, "grad_norm": 1.526084303855896, "learning_rate": 4.361744937505815e-06, "loss": 0.4077, "step": 15703 }, { "epoch": 2.100026745119016, "grad_norm": 1.6001319885253906, "learning_rate": 4.360552614685825e-06, "loss": 0.4082, "step": 15704 }, { "epoch": 2.1001604707140946, "grad_norm": 1.4394702911376953, "learning_rate": 4.359360409414721e-06, "loss": 0.3342, "step": 15705 }, { "epoch": 2.1002941963091737, "grad_norm": 1.4712375402450562, "learning_rate": 4.358168321717352e-06, "loss": 0.3948, "step": 15706 }, { "epoch": 2.1004279219042523, "grad_norm": 1.5200183391571045, "learning_rate": 4.356976351618565e-06, "loss": 0.4021, "step": 15707 }, { "epoch": 2.1005616474993314, "grad_norm": 1.5098627805709839, "learning_rate": 4.355784499143207e-06, "loss": 0.3706, "step": 15708 }, { "epoch": 2.1006953730944105, "grad_norm": 1.4118281602859497, "learning_rate": 4.354592764316118e-06, "loss": 0.3649, "step": 15709 }, { "epoch": 2.100829098689489, "grad_norm": 1.18292236328125, "learning_rate": 4.353401147162142e-06, "loss": 0.357, "step": 15710 }, { "epoch": 2.100962824284568, "grad_norm": 1.5326002836227417, "learning_rate": 4.352209647706116e-06, "loss": 0.3604, "step": 15711 }, { "epoch": 2.101096549879647, "grad_norm": 1.590121865272522, "learning_rate": 4.351018265972875e-06, "loss": 0.4002, "step": 15712 }, { "epoch": 2.101230275474726, "grad_norm": 1.536358118057251, "learning_rate": 4.349827001987254e-06, "loss": 0.3993, "step": 15713 }, { "epoch": 2.101364001069805, "grad_norm": 1.589345097541809, "learning_rate": 4.348635855774082e-06, "loss": 0.4087, "step": 15714 }, { "epoch": 2.1014977266648835, "grad_norm": 1.3861310482025146, "learning_rate": 4.34744482735819e-06, "loss": 0.3454, "step": 15715 }, { "epoch": 2.1016314522599626, "grad_norm": 1.3468713760375977, "learning_rate": 4.346253916764396e-06, "loss": 0.3476, "step": 15716 }, { "epoch": 2.1017651778550412, "grad_norm": 1.2690790891647339, "learning_rate": 4.345063124017537e-06, "loss": 0.3661, "step": 15717 }, { "epoch": 2.1018989034501203, "grad_norm": 1.5111621618270874, "learning_rate": 4.343872449142417e-06, "loss": 0.3915, "step": 15718 }, { "epoch": 2.1020326290451994, "grad_norm": 1.4507535696029663, "learning_rate": 4.342681892163868e-06, "loss": 0.3789, "step": 15719 }, { "epoch": 2.102166354640278, "grad_norm": 1.4821652173995972, "learning_rate": 4.341491453106704e-06, "loss": 0.3735, "step": 15720 }, { "epoch": 2.102300080235357, "grad_norm": 1.5352351665496826, "learning_rate": 4.34030113199573e-06, "loss": 0.3519, "step": 15721 }, { "epoch": 2.102433805830436, "grad_norm": 1.6089775562286377, "learning_rate": 4.33911092885577e-06, "loss": 0.401, "step": 15722 }, { "epoch": 2.102567531425515, "grad_norm": 1.595123291015625, "learning_rate": 4.337920843711619e-06, "loss": 0.3904, "step": 15723 }, { "epoch": 2.102701257020594, "grad_norm": 1.480790138244629, "learning_rate": 4.336730876588097e-06, "loss": 0.411, "step": 15724 }, { "epoch": 2.1028349826156725, "grad_norm": 1.5942189693450928, "learning_rate": 4.335541027509996e-06, "loss": 0.3746, "step": 15725 }, { "epoch": 2.1029687082107515, "grad_norm": 1.5302094221115112, "learning_rate": 4.334351296502119e-06, "loss": 0.4216, "step": 15726 }, { "epoch": 2.1031024338058306, "grad_norm": 1.3961538076400757, "learning_rate": 4.333161683589276e-06, "loss": 0.3446, "step": 15727 }, { "epoch": 2.1032361594009092, "grad_norm": 1.3041951656341553, "learning_rate": 4.3319721887962505e-06, "loss": 0.3548, "step": 15728 }, { "epoch": 2.1033698849959883, "grad_norm": 1.5044530630111694, "learning_rate": 4.330782812147842e-06, "loss": 0.4238, "step": 15729 }, { "epoch": 2.103503610591067, "grad_norm": 1.5571688413619995, "learning_rate": 4.329593553668841e-06, "loss": 0.3605, "step": 15730 }, { "epoch": 2.103637336186146, "grad_norm": 1.449092984199524, "learning_rate": 4.328404413384035e-06, "loss": 0.3702, "step": 15731 }, { "epoch": 2.103771061781225, "grad_norm": 1.6132605075836182, "learning_rate": 4.327215391318213e-06, "loss": 0.4227, "step": 15732 }, { "epoch": 2.1039047873763037, "grad_norm": 1.3593202829360962, "learning_rate": 4.326026487496157e-06, "loss": 0.3935, "step": 15733 }, { "epoch": 2.104038512971383, "grad_norm": 1.5615870952606201, "learning_rate": 4.32483770194265e-06, "loss": 0.4088, "step": 15734 }, { "epoch": 2.1041722385664614, "grad_norm": 1.528801441192627, "learning_rate": 4.32364903468247e-06, "loss": 0.4129, "step": 15735 }, { "epoch": 2.1043059641615405, "grad_norm": 1.6716378927230835, "learning_rate": 4.3224604857403985e-06, "loss": 0.4703, "step": 15736 }, { "epoch": 2.1044396897566195, "grad_norm": 1.494335412979126, "learning_rate": 4.321272055141198e-06, "loss": 0.4057, "step": 15737 }, { "epoch": 2.104573415351698, "grad_norm": 1.373849868774414, "learning_rate": 4.320083742909651e-06, "loss": 0.3808, "step": 15738 }, { "epoch": 2.1047071409467772, "grad_norm": 1.413920521736145, "learning_rate": 4.318895549070524e-06, "loss": 0.3489, "step": 15739 }, { "epoch": 2.1048408665418563, "grad_norm": 1.4368923902511597, "learning_rate": 4.317707473648582e-06, "loss": 0.3586, "step": 15740 }, { "epoch": 2.104974592136935, "grad_norm": 1.1731454133987427, "learning_rate": 4.316519516668595e-06, "loss": 0.333, "step": 15741 }, { "epoch": 2.105108317732014, "grad_norm": 1.3359408378601074, "learning_rate": 4.315331678155312e-06, "loss": 0.3248, "step": 15742 }, { "epoch": 2.1052420433270926, "grad_norm": 1.4907485246658325, "learning_rate": 4.314143958133508e-06, "loss": 0.3814, "step": 15743 }, { "epoch": 2.1053757689221717, "grad_norm": 1.5934419631958008, "learning_rate": 4.312956356627929e-06, "loss": 0.3672, "step": 15744 }, { "epoch": 2.105509494517251, "grad_norm": 1.4008458852767944, "learning_rate": 4.311768873663329e-06, "loss": 0.3738, "step": 15745 }, { "epoch": 2.1056432201123294, "grad_norm": 1.2961705923080444, "learning_rate": 4.310581509264471e-06, "loss": 0.3568, "step": 15746 }, { "epoch": 2.1057769457074085, "grad_norm": 1.6165697574615479, "learning_rate": 4.309394263456091e-06, "loss": 0.4176, "step": 15747 }, { "epoch": 2.105910671302487, "grad_norm": 1.6913940906524658, "learning_rate": 4.308207136262949e-06, "loss": 0.4002, "step": 15748 }, { "epoch": 2.106044396897566, "grad_norm": 1.4439914226531982, "learning_rate": 4.3070201277097775e-06, "loss": 0.3488, "step": 15749 }, { "epoch": 2.1061781224926452, "grad_norm": 1.5959389209747314, "learning_rate": 4.305833237821325e-06, "loss": 0.3905, "step": 15750 }, { "epoch": 2.106311848087724, "grad_norm": 1.5053926706314087, "learning_rate": 4.304646466622331e-06, "loss": 0.3839, "step": 15751 }, { "epoch": 2.106445573682803, "grad_norm": 1.4584319591522217, "learning_rate": 4.303459814137531e-06, "loss": 0.3869, "step": 15752 }, { "epoch": 2.1065792992778816, "grad_norm": 1.4074989557266235, "learning_rate": 4.302273280391659e-06, "loss": 0.3914, "step": 15753 }, { "epoch": 2.1067130248729606, "grad_norm": 1.507177472114563, "learning_rate": 4.301086865409449e-06, "loss": 0.3795, "step": 15754 }, { "epoch": 2.1068467504680397, "grad_norm": 1.577232837677002, "learning_rate": 4.29990056921563e-06, "loss": 0.4047, "step": 15755 }, { "epoch": 2.1069804760631183, "grad_norm": 1.534798264503479, "learning_rate": 4.298714391834929e-06, "loss": 0.384, "step": 15756 }, { "epoch": 2.1071142016581974, "grad_norm": 1.3766248226165771, "learning_rate": 4.297528333292072e-06, "loss": 0.3727, "step": 15757 }, { "epoch": 2.1072479272532765, "grad_norm": 1.5357187986373901, "learning_rate": 4.2963423936117795e-06, "loss": 0.365, "step": 15758 }, { "epoch": 2.107381652848355, "grad_norm": 1.4902327060699463, "learning_rate": 4.295156572818773e-06, "loss": 0.3753, "step": 15759 }, { "epoch": 2.107515378443434, "grad_norm": 1.4230576753616333, "learning_rate": 4.293970870937772e-06, "loss": 0.3817, "step": 15760 }, { "epoch": 2.107649104038513, "grad_norm": 1.448320984840393, "learning_rate": 4.292785287993479e-06, "loss": 0.3791, "step": 15761 }, { "epoch": 2.107782829633592, "grad_norm": 1.5622847080230713, "learning_rate": 4.291599824010625e-06, "loss": 0.3967, "step": 15762 }, { "epoch": 2.107916555228671, "grad_norm": 1.4429446458816528, "learning_rate": 4.290414479013902e-06, "loss": 0.3853, "step": 15763 }, { "epoch": 2.1080502808237496, "grad_norm": 1.6376534700393677, "learning_rate": 4.289229253028029e-06, "loss": 0.4045, "step": 15764 }, { "epoch": 2.1081840064188286, "grad_norm": 1.5807442665100098, "learning_rate": 4.288044146077712e-06, "loss": 0.3984, "step": 15765 }, { "epoch": 2.1083177320139073, "grad_norm": 1.2993814945220947, "learning_rate": 4.286859158187641e-06, "loss": 0.3575, "step": 15766 }, { "epoch": 2.1084514576089863, "grad_norm": 1.313005805015564, "learning_rate": 4.285674289382532e-06, "loss": 0.3521, "step": 15767 }, { "epoch": 2.1085851832040654, "grad_norm": 1.5080761909484863, "learning_rate": 4.2844895396870704e-06, "loss": 0.3748, "step": 15768 }, { "epoch": 2.108718908799144, "grad_norm": 1.5081191062927246, "learning_rate": 4.283304909125956e-06, "loss": 0.3729, "step": 15769 }, { "epoch": 2.108852634394223, "grad_norm": 1.6054719686508179, "learning_rate": 4.282120397723879e-06, "loss": 0.4176, "step": 15770 }, { "epoch": 2.108986359989302, "grad_norm": 1.5559393167495728, "learning_rate": 4.280936005505528e-06, "loss": 0.4011, "step": 15771 }, { "epoch": 2.109120085584381, "grad_norm": 1.3938302993774414, "learning_rate": 4.279751732495601e-06, "loss": 0.3278, "step": 15772 }, { "epoch": 2.10925381117946, "grad_norm": 1.3618437051773071, "learning_rate": 4.278567578718772e-06, "loss": 0.3693, "step": 15773 }, { "epoch": 2.1093875367745385, "grad_norm": 1.4044644832611084, "learning_rate": 4.277383544199726e-06, "loss": 0.3662, "step": 15774 }, { "epoch": 2.1095212623696176, "grad_norm": 1.4152419567108154, "learning_rate": 4.276199628963145e-06, "loss": 0.4061, "step": 15775 }, { "epoch": 2.1096549879646966, "grad_norm": 1.4973400831222534, "learning_rate": 4.275015833033706e-06, "loss": 0.3776, "step": 15776 }, { "epoch": 2.1097887135597753, "grad_norm": 1.4519171714782715, "learning_rate": 4.273832156436082e-06, "loss": 0.3685, "step": 15777 }, { "epoch": 2.1099224391548543, "grad_norm": 1.3978184461593628, "learning_rate": 4.272648599194948e-06, "loss": 0.3681, "step": 15778 }, { "epoch": 2.110056164749933, "grad_norm": 1.284856915473938, "learning_rate": 4.271465161334974e-06, "loss": 0.3746, "step": 15779 }, { "epoch": 2.110189890345012, "grad_norm": 1.6457892656326294, "learning_rate": 4.270281842880827e-06, "loss": 0.457, "step": 15780 }, { "epoch": 2.110323615940091, "grad_norm": 1.372888207435608, "learning_rate": 4.269098643857176e-06, "loss": 0.3346, "step": 15781 }, { "epoch": 2.1104573415351697, "grad_norm": 1.6197234392166138, "learning_rate": 4.267915564288673e-06, "loss": 0.3978, "step": 15782 }, { "epoch": 2.110591067130249, "grad_norm": 1.6297394037246704, "learning_rate": 4.266732604199988e-06, "loss": 0.4306, "step": 15783 }, { "epoch": 2.1107247927253274, "grad_norm": 1.6843125820159912, "learning_rate": 4.26554976361578e-06, "loss": 0.404, "step": 15784 }, { "epoch": 2.1108585183204065, "grad_norm": 1.6915265321731567, "learning_rate": 4.264367042560691e-06, "loss": 0.3825, "step": 15785 }, { "epoch": 2.1109922439154856, "grad_norm": 1.5842605829238892, "learning_rate": 4.263184441059391e-06, "loss": 0.3899, "step": 15786 }, { "epoch": 2.111125969510564, "grad_norm": 1.4372575283050537, "learning_rate": 4.262001959136515e-06, "loss": 0.3644, "step": 15787 }, { "epoch": 2.1112596951056433, "grad_norm": 1.5107115507125854, "learning_rate": 4.260819596816725e-06, "loss": 0.3622, "step": 15788 }, { "epoch": 2.111393420700722, "grad_norm": 1.4474635124206543, "learning_rate": 4.259637354124654e-06, "loss": 0.3537, "step": 15789 }, { "epoch": 2.111527146295801, "grad_norm": 1.4111926555633545, "learning_rate": 4.2584552310849454e-06, "loss": 0.3836, "step": 15790 }, { "epoch": 2.11166087189088, "grad_norm": 1.4600623846054077, "learning_rate": 4.257273227722252e-06, "loss": 0.3388, "step": 15791 }, { "epoch": 2.1117945974859587, "grad_norm": 1.5950621366500854, "learning_rate": 4.256091344061199e-06, "loss": 0.388, "step": 15792 }, { "epoch": 2.1119283230810377, "grad_norm": 1.574222445487976, "learning_rate": 4.254909580126425e-06, "loss": 0.3906, "step": 15793 }, { "epoch": 2.112062048676117, "grad_norm": 1.4009393453598022, "learning_rate": 4.253727935942563e-06, "loss": 0.3662, "step": 15794 }, { "epoch": 2.1121957742711954, "grad_norm": 1.309697151184082, "learning_rate": 4.252546411534245e-06, "loss": 0.357, "step": 15795 }, { "epoch": 2.1123294998662745, "grad_norm": 1.3598058223724365, "learning_rate": 4.251365006926096e-06, "loss": 0.3205, "step": 15796 }, { "epoch": 2.112463225461353, "grad_norm": 1.447260856628418, "learning_rate": 4.250183722142743e-06, "loss": 0.3126, "step": 15797 }, { "epoch": 2.112596951056432, "grad_norm": 1.5538804531097412, "learning_rate": 4.249002557208809e-06, "loss": 0.4169, "step": 15798 }, { "epoch": 2.1127306766515113, "grad_norm": 1.5057202577590942, "learning_rate": 4.247821512148913e-06, "loss": 0.3827, "step": 15799 }, { "epoch": 2.11286440224659, "grad_norm": 1.4938865900039673, "learning_rate": 4.246640586987677e-06, "loss": 0.3818, "step": 15800 }, { "epoch": 2.112998127841669, "grad_norm": 1.5976083278656006, "learning_rate": 4.2454597817497054e-06, "loss": 0.4118, "step": 15801 }, { "epoch": 2.1131318534367476, "grad_norm": 1.5542221069335938, "learning_rate": 4.244279096459623e-06, "loss": 0.3708, "step": 15802 }, { "epoch": 2.1132655790318267, "grad_norm": 1.5483275651931763, "learning_rate": 4.243098531142034e-06, "loss": 0.3455, "step": 15803 }, { "epoch": 2.1133993046269057, "grad_norm": 1.409231424331665, "learning_rate": 4.241918085821547e-06, "loss": 0.3581, "step": 15804 }, { "epoch": 2.1135330302219844, "grad_norm": 1.7263003587722778, "learning_rate": 4.2407377605227715e-06, "loss": 0.4254, "step": 15805 }, { "epoch": 2.1136667558170634, "grad_norm": 1.403637170791626, "learning_rate": 4.2395575552702996e-06, "loss": 0.3382, "step": 15806 }, { "epoch": 2.1138004814121425, "grad_norm": 1.482252836227417, "learning_rate": 4.238377470088745e-06, "loss": 0.3248, "step": 15807 }, { "epoch": 2.113934207007221, "grad_norm": 1.4706225395202637, "learning_rate": 4.2371975050026915e-06, "loss": 0.3911, "step": 15808 }, { "epoch": 2.1140679326023, "grad_norm": 1.3683924674987793, "learning_rate": 4.236017660036745e-06, "loss": 0.3618, "step": 15809 }, { "epoch": 2.114201658197379, "grad_norm": 1.46933114528656, "learning_rate": 4.2348379352155e-06, "loss": 0.3786, "step": 15810 }, { "epoch": 2.114335383792458, "grad_norm": 1.4610997438430786, "learning_rate": 4.233658330563533e-06, "loss": 0.359, "step": 15811 }, { "epoch": 2.114469109387537, "grad_norm": 1.5270748138427734, "learning_rate": 4.232478846105447e-06, "loss": 0.4046, "step": 15812 }, { "epoch": 2.1146028349826156, "grad_norm": 1.4847596883773804, "learning_rate": 4.231299481865818e-06, "loss": 0.3528, "step": 15813 }, { "epoch": 2.1147365605776947, "grad_norm": 1.5384604930877686, "learning_rate": 4.230120237869232e-06, "loss": 0.3816, "step": 15814 }, { "epoch": 2.1148702861727733, "grad_norm": 1.4346331357955933, "learning_rate": 4.228941114140267e-06, "loss": 0.3923, "step": 15815 }, { "epoch": 2.1150040117678524, "grad_norm": 1.6083632707595825, "learning_rate": 4.227762110703499e-06, "loss": 0.3625, "step": 15816 }, { "epoch": 2.1151377373629314, "grad_norm": 1.4010009765625, "learning_rate": 4.226583227583514e-06, "loss": 0.3771, "step": 15817 }, { "epoch": 2.11527146295801, "grad_norm": 1.3030776977539062, "learning_rate": 4.225404464804873e-06, "loss": 0.3904, "step": 15818 }, { "epoch": 2.115405188553089, "grad_norm": 1.5039422512054443, "learning_rate": 4.224225822392149e-06, "loss": 0.3919, "step": 15819 }, { "epoch": 2.1155389141481677, "grad_norm": 1.7316020727157593, "learning_rate": 4.223047300369914e-06, "loss": 0.4022, "step": 15820 }, { "epoch": 2.115672639743247, "grad_norm": 1.443379282951355, "learning_rate": 4.2218688987627276e-06, "loss": 0.3761, "step": 15821 }, { "epoch": 2.115806365338326, "grad_norm": 1.3161598443984985, "learning_rate": 4.220690617595155e-06, "loss": 0.3698, "step": 15822 }, { "epoch": 2.1159400909334045, "grad_norm": 1.494821310043335, "learning_rate": 4.2195124568917574e-06, "loss": 0.3589, "step": 15823 }, { "epoch": 2.1160738165284836, "grad_norm": 1.4729195833206177, "learning_rate": 4.218334416677091e-06, "loss": 0.3794, "step": 15824 }, { "epoch": 2.1162075421235627, "grad_norm": 1.4879413843154907, "learning_rate": 4.217156496975711e-06, "loss": 0.3542, "step": 15825 }, { "epoch": 2.1163412677186413, "grad_norm": 1.3683109283447266, "learning_rate": 4.215978697812174e-06, "loss": 0.384, "step": 15826 }, { "epoch": 2.1164749933137204, "grad_norm": 1.455397129058838, "learning_rate": 4.214801019211019e-06, "loss": 0.396, "step": 15827 }, { "epoch": 2.116608718908799, "grad_norm": 1.5102944374084473, "learning_rate": 4.213623461196804e-06, "loss": 0.3665, "step": 15828 }, { "epoch": 2.116742444503878, "grad_norm": 1.746572732925415, "learning_rate": 4.212446023794076e-06, "loss": 0.4199, "step": 15829 }, { "epoch": 2.116876170098957, "grad_norm": 1.533099889755249, "learning_rate": 4.211268707027364e-06, "loss": 0.3453, "step": 15830 }, { "epoch": 2.1170098956940357, "grad_norm": 1.5608463287353516, "learning_rate": 4.210091510921225e-06, "loss": 0.3598, "step": 15831 }, { "epoch": 2.117143621289115, "grad_norm": 1.8351783752441406, "learning_rate": 4.20891443550018e-06, "loss": 0.3585, "step": 15832 }, { "epoch": 2.1172773468841934, "grad_norm": 1.517020583152771, "learning_rate": 4.207737480788779e-06, "loss": 0.3446, "step": 15833 }, { "epoch": 2.1174110724792725, "grad_norm": 1.4805076122283936, "learning_rate": 4.206560646811545e-06, "loss": 0.3777, "step": 15834 }, { "epoch": 2.1175447980743516, "grad_norm": 1.7181388139724731, "learning_rate": 4.205383933593006e-06, "loss": 0.4432, "step": 15835 }, { "epoch": 2.11767852366943, "grad_norm": 1.6780328750610352, "learning_rate": 4.204207341157702e-06, "loss": 0.3933, "step": 15836 }, { "epoch": 2.1178122492645093, "grad_norm": 1.4277105331420898, "learning_rate": 4.2030308695301455e-06, "loss": 0.3864, "step": 15837 }, { "epoch": 2.1179459748595884, "grad_norm": 1.689159631729126, "learning_rate": 4.2018545187348645e-06, "loss": 0.3941, "step": 15838 }, { "epoch": 2.118079700454667, "grad_norm": 1.4892306327819824, "learning_rate": 4.200678288796378e-06, "loss": 0.3813, "step": 15839 }, { "epoch": 2.118213426049746, "grad_norm": 1.466098666191101, "learning_rate": 4.199502179739202e-06, "loss": 0.3655, "step": 15840 }, { "epoch": 2.1183471516448247, "grad_norm": 1.473799467086792, "learning_rate": 4.1983261915878535e-06, "loss": 0.4329, "step": 15841 }, { "epoch": 2.1184808772399037, "grad_norm": 1.4501913785934448, "learning_rate": 4.197150324366844e-06, "loss": 0.3504, "step": 15842 }, { "epoch": 2.118614602834983, "grad_norm": 1.538856863975525, "learning_rate": 4.1959745781006835e-06, "loss": 0.4147, "step": 15843 }, { "epoch": 2.1187483284300614, "grad_norm": 1.2100977897644043, "learning_rate": 4.194798952813878e-06, "loss": 0.3535, "step": 15844 }, { "epoch": 2.1188820540251405, "grad_norm": 1.507686734199524, "learning_rate": 4.193623448530937e-06, "loss": 0.3707, "step": 15845 }, { "epoch": 2.119015779620219, "grad_norm": 1.5177654027938843, "learning_rate": 4.192448065276352e-06, "loss": 0.4113, "step": 15846 }, { "epoch": 2.119149505215298, "grad_norm": 1.3500256538391113, "learning_rate": 4.191272803074634e-06, "loss": 0.3759, "step": 15847 }, { "epoch": 2.1192832308103773, "grad_norm": 1.655120611190796, "learning_rate": 4.190097661950277e-06, "loss": 0.4211, "step": 15848 }, { "epoch": 2.119416956405456, "grad_norm": 1.3412137031555176, "learning_rate": 4.188922641927773e-06, "loss": 0.3963, "step": 15849 }, { "epoch": 2.119550682000535, "grad_norm": 1.4473352432250977, "learning_rate": 4.18774774303162e-06, "loss": 0.3918, "step": 15850 }, { "epoch": 2.1196844075956136, "grad_norm": 1.572608232498169, "learning_rate": 4.186572965286297e-06, "loss": 0.3822, "step": 15851 }, { "epoch": 2.1198181331906927, "grad_norm": 1.4750779867172241, "learning_rate": 4.185398308716304e-06, "loss": 0.4115, "step": 15852 }, { "epoch": 2.1199518587857717, "grad_norm": 1.395971417427063, "learning_rate": 4.1842237733461166e-06, "loss": 0.3829, "step": 15853 }, { "epoch": 2.1200855843808504, "grad_norm": 1.5216400623321533, "learning_rate": 4.183049359200215e-06, "loss": 0.3927, "step": 15854 }, { "epoch": 2.1202193099759294, "grad_norm": 1.5628246068954468, "learning_rate": 4.181875066303092e-06, "loss": 0.3848, "step": 15855 }, { "epoch": 2.120353035571008, "grad_norm": 1.364039659500122, "learning_rate": 4.1807008946792075e-06, "loss": 0.3793, "step": 15856 }, { "epoch": 2.120486761166087, "grad_norm": 1.2929723262786865, "learning_rate": 4.179526844353051e-06, "loss": 0.3906, "step": 15857 }, { "epoch": 2.120620486761166, "grad_norm": 1.3052074909210205, "learning_rate": 4.178352915349085e-06, "loss": 0.3667, "step": 15858 }, { "epoch": 2.120754212356245, "grad_norm": 1.6489425897598267, "learning_rate": 4.177179107691782e-06, "loss": 0.3991, "step": 15859 }, { "epoch": 2.120887937951324, "grad_norm": 1.7088359594345093, "learning_rate": 4.176005421405609e-06, "loss": 0.4271, "step": 15860 }, { "epoch": 2.121021663546403, "grad_norm": 1.4561560153961182, "learning_rate": 4.174831856515029e-06, "loss": 0.3842, "step": 15861 }, { "epoch": 2.1211553891414816, "grad_norm": 1.3666518926620483, "learning_rate": 4.173658413044506e-06, "loss": 0.3748, "step": 15862 }, { "epoch": 2.1212891147365607, "grad_norm": 1.4363411664962769, "learning_rate": 4.172485091018498e-06, "loss": 0.3643, "step": 15863 }, { "epoch": 2.1214228403316393, "grad_norm": 1.3198888301849365, "learning_rate": 4.171311890461461e-06, "loss": 0.3992, "step": 15864 }, { "epoch": 2.1215565659267184, "grad_norm": 1.4109219312667847, "learning_rate": 4.17013881139785e-06, "loss": 0.4146, "step": 15865 }, { "epoch": 2.1216902915217974, "grad_norm": 1.5402634143829346, "learning_rate": 4.1689658538521185e-06, "loss": 0.3713, "step": 15866 }, { "epoch": 2.121824017116876, "grad_norm": 1.6806546449661255, "learning_rate": 4.167793017848712e-06, "loss": 0.3915, "step": 15867 }, { "epoch": 2.121957742711955, "grad_norm": 1.5720099210739136, "learning_rate": 4.166620303412081e-06, "loss": 0.4215, "step": 15868 }, { "epoch": 2.1220914683070338, "grad_norm": 1.5742303133010864, "learning_rate": 4.165447710566671e-06, "loss": 0.4155, "step": 15869 }, { "epoch": 2.122225193902113, "grad_norm": 1.4051637649536133, "learning_rate": 4.164275239336914e-06, "loss": 0.3651, "step": 15870 }, { "epoch": 2.122358919497192, "grad_norm": 1.4366191625595093, "learning_rate": 4.16310288974726e-06, "loss": 0.3961, "step": 15871 }, { "epoch": 2.1224926450922705, "grad_norm": 1.4073134660720825, "learning_rate": 4.161930661822137e-06, "loss": 0.3556, "step": 15872 }, { "epoch": 2.1226263706873496, "grad_norm": 1.5554659366607666, "learning_rate": 4.160758555585984e-06, "loss": 0.3813, "step": 15873 }, { "epoch": 2.1227600962824287, "grad_norm": 1.6783851385116577, "learning_rate": 4.1595865710632366e-06, "loss": 0.4434, "step": 15874 }, { "epoch": 2.1228938218775073, "grad_norm": 1.5492866039276123, "learning_rate": 4.15841470827831e-06, "loss": 0.4342, "step": 15875 }, { "epoch": 2.1230275474725864, "grad_norm": 1.425931692123413, "learning_rate": 4.157242967255647e-06, "loss": 0.3399, "step": 15876 }, { "epoch": 2.123161273067665, "grad_norm": 1.5937637090682983, "learning_rate": 4.15607134801966e-06, "loss": 0.4187, "step": 15877 }, { "epoch": 2.123294998662744, "grad_norm": 1.7412713766098022, "learning_rate": 4.154899850594774e-06, "loss": 0.4214, "step": 15878 }, { "epoch": 2.123428724257823, "grad_norm": 1.5094362497329712, "learning_rate": 4.153728475005406e-06, "loss": 0.3837, "step": 15879 }, { "epoch": 2.1235624498529018, "grad_norm": 1.645103096961975, "learning_rate": 4.152557221275975e-06, "loss": 0.3512, "step": 15880 }, { "epoch": 2.123696175447981, "grad_norm": 1.4615051746368408, "learning_rate": 4.151386089430892e-06, "loss": 0.388, "step": 15881 }, { "epoch": 2.1238299010430595, "grad_norm": 1.3701961040496826, "learning_rate": 4.1502150794945705e-06, "loss": 0.363, "step": 15882 }, { "epoch": 2.1239636266381385, "grad_norm": 1.322082281112671, "learning_rate": 4.149044191491418e-06, "loss": 0.4058, "step": 15883 }, { "epoch": 2.1240973522332176, "grad_norm": 1.5117310285568237, "learning_rate": 4.147873425445839e-06, "loss": 0.3722, "step": 15884 }, { "epoch": 2.1242310778282962, "grad_norm": 1.3842909336090088, "learning_rate": 4.146702781382242e-06, "loss": 0.3531, "step": 15885 }, { "epoch": 2.1243648034233753, "grad_norm": 1.3393534421920776, "learning_rate": 4.1455322593250216e-06, "loss": 0.3705, "step": 15886 }, { "epoch": 2.124498529018454, "grad_norm": 1.5686941146850586, "learning_rate": 4.14436185929858e-06, "loss": 0.4062, "step": 15887 }, { "epoch": 2.124632254613533, "grad_norm": 1.5218119621276855, "learning_rate": 4.1431915813273124e-06, "loss": 0.3976, "step": 15888 }, { "epoch": 2.124765980208612, "grad_norm": 1.502213478088379, "learning_rate": 4.142021425435612e-06, "loss": 0.3997, "step": 15889 }, { "epoch": 2.1248997058036907, "grad_norm": 1.6279336214065552, "learning_rate": 4.140851391647872e-06, "loss": 0.3855, "step": 15890 }, { "epoch": 2.1250334313987698, "grad_norm": 1.5856924057006836, "learning_rate": 4.139681479988472e-06, "loss": 0.39, "step": 15891 }, { "epoch": 2.1251671569938484, "grad_norm": 1.6946572065353394, "learning_rate": 4.138511690481808e-06, "loss": 0.4107, "step": 15892 }, { "epoch": 2.1253008825889275, "grad_norm": 1.4954934120178223, "learning_rate": 4.137342023152257e-06, "loss": 0.3469, "step": 15893 }, { "epoch": 2.1254346081840065, "grad_norm": 1.484686017036438, "learning_rate": 4.136172478024203e-06, "loss": 0.3377, "step": 15894 }, { "epoch": 2.125568333779085, "grad_norm": 1.4771796464920044, "learning_rate": 4.135003055122027e-06, "loss": 0.409, "step": 15895 }, { "epoch": 2.1257020593741642, "grad_norm": 1.5144153833389282, "learning_rate": 4.133833754470091e-06, "loss": 0.3643, "step": 15896 }, { "epoch": 2.1258357849692433, "grad_norm": 1.5525412559509277, "learning_rate": 4.132664576092785e-06, "loss": 0.3769, "step": 15897 }, { "epoch": 2.125969510564322, "grad_norm": 1.3438653945922852, "learning_rate": 4.131495520014469e-06, "loss": 0.345, "step": 15898 }, { "epoch": 2.126103236159401, "grad_norm": 1.4278593063354492, "learning_rate": 4.130326586259509e-06, "loss": 0.3691, "step": 15899 }, { "epoch": 2.1262369617544796, "grad_norm": 1.6261132955551147, "learning_rate": 4.129157774852282e-06, "loss": 0.3717, "step": 15900 }, { "epoch": 2.1263706873495587, "grad_norm": 1.4129383563995361, "learning_rate": 4.127989085817135e-06, "loss": 0.3659, "step": 15901 }, { "epoch": 2.1265044129446378, "grad_norm": 1.6368474960327148, "learning_rate": 4.126820519178445e-06, "loss": 0.425, "step": 15902 }, { "epoch": 2.1266381385397164, "grad_norm": 1.5429695844650269, "learning_rate": 4.125652074960556e-06, "loss": 0.3861, "step": 15903 }, { "epoch": 2.1267718641347955, "grad_norm": 1.3731828927993774, "learning_rate": 4.124483753187831e-06, "loss": 0.3443, "step": 15904 }, { "epoch": 2.126905589729874, "grad_norm": 1.4839287996292114, "learning_rate": 4.123315553884618e-06, "loss": 0.4191, "step": 15905 }, { "epoch": 2.127039315324953, "grad_norm": 1.428155779838562, "learning_rate": 4.12214747707527e-06, "loss": 0.3862, "step": 15906 }, { "epoch": 2.1271730409200322, "grad_norm": 1.480543613433838, "learning_rate": 4.120979522784132e-06, "loss": 0.3636, "step": 15907 }, { "epoch": 2.127306766515111, "grad_norm": 1.5258797407150269, "learning_rate": 4.119811691035551e-06, "loss": 0.4109, "step": 15908 }, { "epoch": 2.12744049211019, "grad_norm": 1.6359429359436035, "learning_rate": 4.118643981853869e-06, "loss": 0.4021, "step": 15909 }, { "epoch": 2.127574217705269, "grad_norm": 1.7595237493515015, "learning_rate": 4.1174763952634255e-06, "loss": 0.3884, "step": 15910 }, { "epoch": 2.1277079433003476, "grad_norm": 1.4064629077911377, "learning_rate": 4.116308931288556e-06, "loss": 0.3718, "step": 15911 }, { "epoch": 2.1278416688954267, "grad_norm": 1.576346755027771, "learning_rate": 4.115141589953599e-06, "loss": 0.3656, "step": 15912 }, { "epoch": 2.1279753944905053, "grad_norm": 1.5196313858032227, "learning_rate": 4.113974371282883e-06, "loss": 0.4112, "step": 15913 }, { "epoch": 2.1281091200855844, "grad_norm": 1.350335955619812, "learning_rate": 4.112807275300742e-06, "loss": 0.3765, "step": 15914 }, { "epoch": 2.1282428456806635, "grad_norm": 1.6362286806106567, "learning_rate": 4.111640302031494e-06, "loss": 0.3773, "step": 15915 }, { "epoch": 2.128376571275742, "grad_norm": 1.516675591468811, "learning_rate": 4.110473451499476e-06, "loss": 0.3983, "step": 15916 }, { "epoch": 2.128510296870821, "grad_norm": 1.464879035949707, "learning_rate": 4.109306723728995e-06, "loss": 0.3917, "step": 15917 }, { "epoch": 2.1286440224659, "grad_norm": 1.5611447095870972, "learning_rate": 4.108140118744383e-06, "loss": 0.3972, "step": 15918 }, { "epoch": 2.128777748060979, "grad_norm": 1.5515780448913574, "learning_rate": 4.106973636569956e-06, "loss": 0.3664, "step": 15919 }, { "epoch": 2.128911473656058, "grad_norm": 1.392643690109253, "learning_rate": 4.105807277230018e-06, "loss": 0.3729, "step": 15920 }, { "epoch": 2.1290451992511366, "grad_norm": 1.5708081722259521, "learning_rate": 4.104641040748894e-06, "loss": 0.3749, "step": 15921 }, { "epoch": 2.1291789248462156, "grad_norm": 1.4403773546218872, "learning_rate": 4.103474927150882e-06, "loss": 0.3493, "step": 15922 }, { "epoch": 2.1293126504412943, "grad_norm": 1.3526792526245117, "learning_rate": 4.1023089364602945e-06, "loss": 0.3799, "step": 15923 }, { "epoch": 2.1294463760363733, "grad_norm": 1.5631285905838013, "learning_rate": 4.101143068701432e-06, "loss": 0.3512, "step": 15924 }, { "epoch": 2.1295801016314524, "grad_norm": 1.5933613777160645, "learning_rate": 4.0999773238985975e-06, "loss": 0.4104, "step": 15925 }, { "epoch": 2.129713827226531, "grad_norm": 1.5095676183700562, "learning_rate": 4.098811702076091e-06, "loss": 0.3925, "step": 15926 }, { "epoch": 2.12984755282161, "grad_norm": 1.5459033250808716, "learning_rate": 4.097646203258207e-06, "loss": 0.3601, "step": 15927 }, { "epoch": 2.129981278416689, "grad_norm": 1.650770664215088, "learning_rate": 4.09648082746924e-06, "loss": 0.4173, "step": 15928 }, { "epoch": 2.130115004011768, "grad_norm": 1.4838576316833496, "learning_rate": 4.095315574733482e-06, "loss": 0.3926, "step": 15929 }, { "epoch": 2.130248729606847, "grad_norm": 1.3735183477401733, "learning_rate": 4.09415044507522e-06, "loss": 0.327, "step": 15930 }, { "epoch": 2.1303824552019255, "grad_norm": 1.4151078462600708, "learning_rate": 4.09298543851874e-06, "loss": 0.3366, "step": 15931 }, { "epoch": 2.1305161807970046, "grad_norm": 1.3822365999221802, "learning_rate": 4.091820555088327e-06, "loss": 0.373, "step": 15932 }, { "epoch": 2.1306499063920836, "grad_norm": 1.5044782161712646, "learning_rate": 4.090655794808262e-06, "loss": 0.3796, "step": 15933 }, { "epoch": 2.1307836319871623, "grad_norm": 1.5967580080032349, "learning_rate": 4.089491157702821e-06, "loss": 0.3798, "step": 15934 }, { "epoch": 2.1309173575822413, "grad_norm": 1.3753914833068848, "learning_rate": 4.088326643796284e-06, "loss": 0.3762, "step": 15935 }, { "epoch": 2.13105108317732, "grad_norm": 1.4694678783416748, "learning_rate": 4.087162253112915e-06, "loss": 0.378, "step": 15936 }, { "epoch": 2.131184808772399, "grad_norm": 1.3661811351776123, "learning_rate": 4.085997985676995e-06, "loss": 0.3489, "step": 15937 }, { "epoch": 2.131318534367478, "grad_norm": 1.537551999092102, "learning_rate": 4.084833841512791e-06, "loss": 0.3752, "step": 15938 }, { "epoch": 2.1314522599625567, "grad_norm": 1.6699230670928955, "learning_rate": 4.083669820644558e-06, "loss": 0.3701, "step": 15939 }, { "epoch": 2.131585985557636, "grad_norm": 1.5585919618606567, "learning_rate": 4.0825059230965735e-06, "loss": 0.4136, "step": 15940 }, { "epoch": 2.131719711152715, "grad_norm": 1.7295082807540894, "learning_rate": 4.081342148893083e-06, "loss": 0.4014, "step": 15941 }, { "epoch": 2.1318534367477935, "grad_norm": 1.4812641143798828, "learning_rate": 4.080178498058359e-06, "loss": 0.3696, "step": 15942 }, { "epoch": 2.1319871623428726, "grad_norm": 1.396743655204773, "learning_rate": 4.079014970616647e-06, "loss": 0.3354, "step": 15943 }, { "epoch": 2.132120887937951, "grad_norm": 1.6025176048278809, "learning_rate": 4.077851566592202e-06, "loss": 0.383, "step": 15944 }, { "epoch": 2.1322546135330303, "grad_norm": 1.7655713558197021, "learning_rate": 4.076688286009274e-06, "loss": 0.4239, "step": 15945 }, { "epoch": 2.1323883391281093, "grad_norm": 1.4127881526947021, "learning_rate": 4.07552512889211e-06, "loss": 0.362, "step": 15946 }, { "epoch": 2.132522064723188, "grad_norm": 1.595535397529602, "learning_rate": 4.074362095264957e-06, "loss": 0.3722, "step": 15947 }, { "epoch": 2.132655790318267, "grad_norm": 1.55341637134552, "learning_rate": 4.073199185152054e-06, "loss": 0.3444, "step": 15948 }, { "epoch": 2.1327895159133456, "grad_norm": 1.4043346643447876, "learning_rate": 4.072036398577644e-06, "loss": 0.3723, "step": 15949 }, { "epoch": 2.1329232415084247, "grad_norm": 1.4377882480621338, "learning_rate": 4.070873735565962e-06, "loss": 0.338, "step": 15950 }, { "epoch": 2.133056967103504, "grad_norm": 1.513719916343689, "learning_rate": 4.069711196141244e-06, "loss": 0.3718, "step": 15951 }, { "epoch": 2.1331906926985824, "grad_norm": 1.6297292709350586, "learning_rate": 4.068548780327721e-06, "loss": 0.3596, "step": 15952 }, { "epoch": 2.1333244182936615, "grad_norm": 1.4297336339950562, "learning_rate": 4.067386488149624e-06, "loss": 0.3722, "step": 15953 }, { "epoch": 2.13345814388874, "grad_norm": 1.4753037691116333, "learning_rate": 4.066224319631181e-06, "loss": 0.3853, "step": 15954 }, { "epoch": 2.133591869483819, "grad_norm": 1.5886751413345337, "learning_rate": 4.065062274796609e-06, "loss": 0.3918, "step": 15955 }, { "epoch": 2.1337255950788983, "grad_norm": 1.56856369972229, "learning_rate": 4.063900353670136e-06, "loss": 0.3619, "step": 15956 }, { "epoch": 2.133859320673977, "grad_norm": 1.4538838863372803, "learning_rate": 4.06273855627598e-06, "loss": 0.4007, "step": 15957 }, { "epoch": 2.133993046269056, "grad_norm": 1.376729130744934, "learning_rate": 4.061576882638359e-06, "loss": 0.3656, "step": 15958 }, { "epoch": 2.1341267718641346, "grad_norm": 1.6132502555847168, "learning_rate": 4.060415332781488e-06, "loss": 0.4181, "step": 15959 }, { "epoch": 2.1342604974592136, "grad_norm": 1.1769644021987915, "learning_rate": 4.059253906729569e-06, "loss": 0.3602, "step": 15960 }, { "epoch": 2.1343942230542927, "grad_norm": 1.4853334426879883, "learning_rate": 4.058092604506825e-06, "loss": 0.3866, "step": 15961 }, { "epoch": 2.1345279486493713, "grad_norm": 1.291886568069458, "learning_rate": 4.05693142613745e-06, "loss": 0.3443, "step": 15962 }, { "epoch": 2.1346616742444504, "grad_norm": 1.522615671157837, "learning_rate": 4.055770371645655e-06, "loss": 0.3927, "step": 15963 }, { "epoch": 2.1347953998395295, "grad_norm": 1.3327890634536743, "learning_rate": 4.054609441055636e-06, "loss": 0.3401, "step": 15964 }, { "epoch": 2.134929125434608, "grad_norm": 1.3329821825027466, "learning_rate": 4.053448634391591e-06, "loss": 0.346, "step": 15965 }, { "epoch": 2.135062851029687, "grad_norm": 1.4154276847839355, "learning_rate": 4.052287951677727e-06, "loss": 0.3798, "step": 15966 }, { "epoch": 2.135196576624766, "grad_norm": 1.3937063217163086, "learning_rate": 4.051127392938226e-06, "loss": 0.3686, "step": 15967 }, { "epoch": 2.135330302219845, "grad_norm": 1.2747223377227783, "learning_rate": 4.049966958197281e-06, "loss": 0.3545, "step": 15968 }, { "epoch": 2.135464027814924, "grad_norm": 1.3360567092895508, "learning_rate": 4.048806647479082e-06, "loss": 0.3498, "step": 15969 }, { "epoch": 2.1355977534100026, "grad_norm": 1.5092837810516357, "learning_rate": 4.047646460807814e-06, "loss": 0.3826, "step": 15970 }, { "epoch": 2.1357314790050816, "grad_norm": 1.5105440616607666, "learning_rate": 4.046486398207659e-06, "loss": 0.3947, "step": 15971 }, { "epoch": 2.1358652046001603, "grad_norm": 1.5411834716796875, "learning_rate": 4.045326459702797e-06, "loss": 0.3859, "step": 15972 }, { "epoch": 2.1359989301952393, "grad_norm": 1.4459211826324463, "learning_rate": 4.044166645317409e-06, "loss": 0.4044, "step": 15973 }, { "epoch": 2.1361326557903184, "grad_norm": 1.424497365951538, "learning_rate": 4.043006955075667e-06, "loss": 0.3936, "step": 15974 }, { "epoch": 2.136266381385397, "grad_norm": 1.5023434162139893, "learning_rate": 4.041847389001745e-06, "loss": 0.3632, "step": 15975 }, { "epoch": 2.136400106980476, "grad_norm": 1.7268065214157104, "learning_rate": 4.040687947119813e-06, "loss": 0.4304, "step": 15976 }, { "epoch": 2.136533832575555, "grad_norm": 1.5523854494094849, "learning_rate": 4.039528629454039e-06, "loss": 0.3546, "step": 15977 }, { "epoch": 2.136667558170634, "grad_norm": 1.670316219329834, "learning_rate": 4.038369436028586e-06, "loss": 0.379, "step": 15978 }, { "epoch": 2.136801283765713, "grad_norm": 1.4367728233337402, "learning_rate": 4.037210366867617e-06, "loss": 0.386, "step": 15979 }, { "epoch": 2.1369350093607915, "grad_norm": 1.7735098600387573, "learning_rate": 4.036051421995298e-06, "loss": 0.4593, "step": 15980 }, { "epoch": 2.1370687349558706, "grad_norm": 1.5288199186325073, "learning_rate": 4.034892601435771e-06, "loss": 0.3839, "step": 15981 }, { "epoch": 2.1372024605509496, "grad_norm": 1.5304921865463257, "learning_rate": 4.033733905213209e-06, "loss": 0.3776, "step": 15982 }, { "epoch": 2.1373361861460283, "grad_norm": 1.5764700174331665, "learning_rate": 4.032575333351749e-06, "loss": 0.3603, "step": 15983 }, { "epoch": 2.1374699117411073, "grad_norm": 1.4649922847747803, "learning_rate": 4.0314168858755434e-06, "loss": 0.3668, "step": 15984 }, { "epoch": 2.137603637336186, "grad_norm": 1.4266655445098877, "learning_rate": 4.0302585628087475e-06, "loss": 0.3375, "step": 15985 }, { "epoch": 2.137737362931265, "grad_norm": 1.335291862487793, "learning_rate": 4.0291003641754935e-06, "loss": 0.359, "step": 15986 }, { "epoch": 2.137871088526344, "grad_norm": 1.6448557376861572, "learning_rate": 4.0279422899999355e-06, "loss": 0.4122, "step": 15987 }, { "epoch": 2.1380048141214227, "grad_norm": 1.4767085313796997, "learning_rate": 4.026784340306202e-06, "loss": 0.3877, "step": 15988 }, { "epoch": 2.138138539716502, "grad_norm": 1.6708890199661255, "learning_rate": 4.025626515118434e-06, "loss": 0.4423, "step": 15989 }, { "epoch": 2.1382722653115804, "grad_norm": 1.5185105800628662, "learning_rate": 4.024468814460764e-06, "loss": 0.3602, "step": 15990 }, { "epoch": 2.1384059909066595, "grad_norm": 1.6793361902236938, "learning_rate": 4.023311238357324e-06, "loss": 0.4378, "step": 15991 }, { "epoch": 2.1385397165017386, "grad_norm": 1.6090344190597534, "learning_rate": 4.022153786832241e-06, "loss": 0.3605, "step": 15992 }, { "epoch": 2.138673442096817, "grad_norm": 1.4538154602050781, "learning_rate": 4.020996459909643e-06, "loss": 0.3485, "step": 15993 }, { "epoch": 2.1388071676918963, "grad_norm": 1.4623823165893555, "learning_rate": 4.019839257613652e-06, "loss": 0.3501, "step": 15994 }, { "epoch": 2.138940893286975, "grad_norm": 1.383408546447754, "learning_rate": 4.018682179968391e-06, "loss": 0.3324, "step": 15995 }, { "epoch": 2.139074618882054, "grad_norm": 1.5650684833526611, "learning_rate": 4.017525226997975e-06, "loss": 0.4141, "step": 15996 }, { "epoch": 2.139208344477133, "grad_norm": 1.5420795679092407, "learning_rate": 4.0163683987265215e-06, "loss": 0.4166, "step": 15997 }, { "epoch": 2.1393420700722117, "grad_norm": 1.5818982124328613, "learning_rate": 4.015211695178142e-06, "loss": 0.4164, "step": 15998 }, { "epoch": 2.1394757956672907, "grad_norm": 1.6897526979446411, "learning_rate": 4.014055116376952e-06, "loss": 0.4304, "step": 15999 }, { "epoch": 2.13960952126237, "grad_norm": 1.4433164596557617, "learning_rate": 4.012898662347048e-06, "loss": 0.412, "step": 16000 }, { "epoch": 2.1397432468574484, "grad_norm": 1.5308454036712646, "learning_rate": 4.011742333112546e-06, "loss": 0.4005, "step": 16001 }, { "epoch": 2.1398769724525275, "grad_norm": 1.6341255903244019, "learning_rate": 4.010586128697546e-06, "loss": 0.4279, "step": 16002 }, { "epoch": 2.140010698047606, "grad_norm": 1.383974552154541, "learning_rate": 4.009430049126145e-06, "loss": 0.3765, "step": 16003 }, { "epoch": 2.140144423642685, "grad_norm": 1.560968041419983, "learning_rate": 4.008274094422447e-06, "loss": 0.365, "step": 16004 }, { "epoch": 2.1402781492377643, "grad_norm": 1.5451240539550781, "learning_rate": 4.007118264610534e-06, "loss": 0.4146, "step": 16005 }, { "epoch": 2.140411874832843, "grad_norm": 1.596616268157959, "learning_rate": 4.005962559714514e-06, "loss": 0.4031, "step": 16006 }, { "epoch": 2.140545600427922, "grad_norm": 1.489897608757019, "learning_rate": 4.0048069797584665e-06, "loss": 0.3924, "step": 16007 }, { "epoch": 2.1406793260230006, "grad_norm": 1.6411100625991821, "learning_rate": 4.003651524766479e-06, "loss": 0.3718, "step": 16008 }, { "epoch": 2.1408130516180797, "grad_norm": 1.479434609413147, "learning_rate": 4.0024961947626386e-06, "loss": 0.3894, "step": 16009 }, { "epoch": 2.1409467772131587, "grad_norm": 1.4158761501312256, "learning_rate": 4.001340989771022e-06, "loss": 0.3949, "step": 16010 }, { "epoch": 2.1410805028082374, "grad_norm": 1.5351736545562744, "learning_rate": 4.000185909815719e-06, "loss": 0.4204, "step": 16011 }, { "epoch": 2.1412142284033164, "grad_norm": 1.5570470094680786, "learning_rate": 3.999030954920796e-06, "loss": 0.3677, "step": 16012 }, { "epoch": 2.1413479539983955, "grad_norm": 1.712088942527771, "learning_rate": 3.997876125110331e-06, "loss": 0.3747, "step": 16013 }, { "epoch": 2.141481679593474, "grad_norm": 1.4223662614822388, "learning_rate": 3.996721420408395e-06, "loss": 0.382, "step": 16014 }, { "epoch": 2.141615405188553, "grad_norm": 1.4365901947021484, "learning_rate": 3.995566840839056e-06, "loss": 0.3654, "step": 16015 }, { "epoch": 2.141749130783632, "grad_norm": 1.6464719772338867, "learning_rate": 3.99441238642638e-06, "loss": 0.4045, "step": 16016 }, { "epoch": 2.141882856378711, "grad_norm": 1.2602894306182861, "learning_rate": 3.993258057194432e-06, "loss": 0.3644, "step": 16017 }, { "epoch": 2.14201658197379, "grad_norm": 1.3909038305282593, "learning_rate": 3.992103853167272e-06, "loss": 0.3824, "step": 16018 }, { "epoch": 2.1421503075688686, "grad_norm": 1.496744155883789, "learning_rate": 3.990949774368957e-06, "loss": 0.3846, "step": 16019 }, { "epoch": 2.1422840331639477, "grad_norm": 1.3516961336135864, "learning_rate": 3.9897958208235456e-06, "loss": 0.3386, "step": 16020 }, { "epoch": 2.1424177587590263, "grad_norm": 1.5119680166244507, "learning_rate": 3.988641992555088e-06, "loss": 0.4065, "step": 16021 }, { "epoch": 2.1425514843541054, "grad_norm": 1.5900371074676514, "learning_rate": 3.9874882895876364e-06, "loss": 0.3887, "step": 16022 }, { "epoch": 2.1426852099491844, "grad_norm": 1.4662861824035645, "learning_rate": 3.986334711945241e-06, "loss": 0.4152, "step": 16023 }, { "epoch": 2.142818935544263, "grad_norm": 1.3891469240188599, "learning_rate": 3.985181259651938e-06, "loss": 0.3696, "step": 16024 }, { "epoch": 2.142952661139342, "grad_norm": 1.524953842163086, "learning_rate": 3.984027932731782e-06, "loss": 0.3718, "step": 16025 }, { "epoch": 2.1430863867344208, "grad_norm": 1.5173137187957764, "learning_rate": 3.982874731208802e-06, "loss": 0.3986, "step": 16026 }, { "epoch": 2.1432201123295, "grad_norm": 1.4850375652313232, "learning_rate": 3.981721655107046e-06, "loss": 0.3967, "step": 16027 }, { "epoch": 2.143353837924579, "grad_norm": 1.6878807544708252, "learning_rate": 3.980568704450539e-06, "loss": 0.4272, "step": 16028 }, { "epoch": 2.1434875635196575, "grad_norm": 1.55438232421875, "learning_rate": 3.9794158792633155e-06, "loss": 0.4002, "step": 16029 }, { "epoch": 2.1436212891147366, "grad_norm": 1.4437282085418701, "learning_rate": 3.978263179569413e-06, "loss": 0.3531, "step": 16030 }, { "epoch": 2.1437550147098157, "grad_norm": 1.4001771211624146, "learning_rate": 3.977110605392849e-06, "loss": 0.3254, "step": 16031 }, { "epoch": 2.1438887403048943, "grad_norm": 1.435258150100708, "learning_rate": 3.9759581567576515e-06, "loss": 0.4046, "step": 16032 }, { "epoch": 2.1440224658999734, "grad_norm": 1.528347134590149, "learning_rate": 3.974805833687841e-06, "loss": 0.4144, "step": 16033 }, { "epoch": 2.144156191495052, "grad_norm": 1.3722405433654785, "learning_rate": 3.973653636207437e-06, "loss": 0.3465, "step": 16034 }, { "epoch": 2.144289917090131, "grad_norm": 1.4615352153778076, "learning_rate": 3.972501564340457e-06, "loss": 0.3907, "step": 16035 }, { "epoch": 2.14442364268521, "grad_norm": 1.461098313331604, "learning_rate": 3.971349618110915e-06, "loss": 0.4037, "step": 16036 }, { "epoch": 2.1445573682802888, "grad_norm": 1.44745934009552, "learning_rate": 3.970197797542821e-06, "loss": 0.3542, "step": 16037 }, { "epoch": 2.144691093875368, "grad_norm": 1.4748221635818481, "learning_rate": 3.9690461026601844e-06, "loss": 0.376, "step": 16038 }, { "epoch": 2.1448248194704465, "grad_norm": 1.5287187099456787, "learning_rate": 3.96789453348701e-06, "loss": 0.3953, "step": 16039 }, { "epoch": 2.1449585450655255, "grad_norm": 1.5448416471481323, "learning_rate": 3.9667430900473024e-06, "loss": 0.4073, "step": 16040 }, { "epoch": 2.1450922706606046, "grad_norm": 1.4126704931259155, "learning_rate": 3.965591772365062e-06, "loss": 0.3792, "step": 16041 }, { "epoch": 2.145225996255683, "grad_norm": 1.2463871240615845, "learning_rate": 3.964440580464286e-06, "loss": 0.301, "step": 16042 }, { "epoch": 2.1453597218507623, "grad_norm": 1.5668666362762451, "learning_rate": 3.963289514368971e-06, "loss": 0.4059, "step": 16043 }, { "epoch": 2.1454934474458414, "grad_norm": 1.3873860836029053, "learning_rate": 3.962138574103114e-06, "loss": 0.4057, "step": 16044 }, { "epoch": 2.14562717304092, "grad_norm": 1.3748260736465454, "learning_rate": 3.960987759690692e-06, "loss": 0.368, "step": 16045 }, { "epoch": 2.145760898635999, "grad_norm": 1.4905911684036255, "learning_rate": 3.95983707115571e-06, "loss": 0.4075, "step": 16046 }, { "epoch": 2.1458946242310777, "grad_norm": 1.492329478263855, "learning_rate": 3.95868650852214e-06, "loss": 0.3585, "step": 16047 }, { "epoch": 2.1460283498261568, "grad_norm": 1.4288463592529297, "learning_rate": 3.957536071813966e-06, "loss": 0.3547, "step": 16048 }, { "epoch": 2.146162075421236, "grad_norm": 1.5469757318496704, "learning_rate": 3.9563857610551785e-06, "loss": 0.3854, "step": 16049 }, { "epoch": 2.1462958010163145, "grad_norm": 1.484440565109253, "learning_rate": 3.955235576269738e-06, "loss": 0.3686, "step": 16050 }, { "epoch": 2.1464295266113935, "grad_norm": 1.7135186195373535, "learning_rate": 3.954085517481635e-06, "loss": 0.4091, "step": 16051 }, { "epoch": 2.146563252206472, "grad_norm": 1.592209815979004, "learning_rate": 3.952935584714831e-06, "loss": 0.3724, "step": 16052 }, { "epoch": 2.146696977801551, "grad_norm": 1.8524796962738037, "learning_rate": 3.951785777993298e-06, "loss": 0.4118, "step": 16053 }, { "epoch": 2.1468307033966303, "grad_norm": 1.4964635372161865, "learning_rate": 3.950636097341003e-06, "loss": 0.332, "step": 16054 }, { "epoch": 2.146964428991709, "grad_norm": 1.5913512706756592, "learning_rate": 3.949486542781911e-06, "loss": 0.3505, "step": 16055 }, { "epoch": 2.147098154586788, "grad_norm": 1.5682556629180908, "learning_rate": 3.948337114339981e-06, "loss": 0.4297, "step": 16056 }, { "epoch": 2.1472318801818666, "grad_norm": 1.5334926843643188, "learning_rate": 3.947187812039173e-06, "loss": 0.3933, "step": 16057 }, { "epoch": 2.1473656057769457, "grad_norm": 1.5958219766616821, "learning_rate": 3.946038635903443e-06, "loss": 0.4209, "step": 16058 }, { "epoch": 2.1474993313720248, "grad_norm": 1.4068228006362915, "learning_rate": 3.944889585956746e-06, "loss": 0.3928, "step": 16059 }, { "epoch": 2.1476330569671034, "grad_norm": 1.4997811317443848, "learning_rate": 3.94374066222303e-06, "loss": 0.4255, "step": 16060 }, { "epoch": 2.1477667825621825, "grad_norm": 1.6097463369369507, "learning_rate": 3.942591864726246e-06, "loss": 0.3911, "step": 16061 }, { "epoch": 2.147900508157261, "grad_norm": 1.5123066902160645, "learning_rate": 3.941443193490338e-06, "loss": 0.4154, "step": 16062 }, { "epoch": 2.14803423375234, "grad_norm": 1.60038161277771, "learning_rate": 3.940294648539248e-06, "loss": 0.3822, "step": 16063 }, { "epoch": 2.148167959347419, "grad_norm": 1.4714967012405396, "learning_rate": 3.939146229896919e-06, "loss": 0.4009, "step": 16064 }, { "epoch": 2.148301684942498, "grad_norm": 1.302838683128357, "learning_rate": 3.93799793758729e-06, "loss": 0.3492, "step": 16065 }, { "epoch": 2.148435410537577, "grad_norm": 1.4940990209579468, "learning_rate": 3.936849771634286e-06, "loss": 0.3762, "step": 16066 }, { "epoch": 2.148569136132656, "grad_norm": 1.8734276294708252, "learning_rate": 3.9357017320618506e-06, "loss": 0.4117, "step": 16067 }, { "epoch": 2.1487028617277346, "grad_norm": 1.591796875, "learning_rate": 3.934553818893912e-06, "loss": 0.3859, "step": 16068 }, { "epoch": 2.1488365873228137, "grad_norm": 1.4919301271438599, "learning_rate": 3.93340603215439e-06, "loss": 0.3979, "step": 16069 }, { "epoch": 2.1489703129178923, "grad_norm": 1.593639850616455, "learning_rate": 3.932258371867221e-06, "loss": 0.353, "step": 16070 }, { "epoch": 2.1491040385129714, "grad_norm": 1.4452128410339355, "learning_rate": 3.9311108380563125e-06, "loss": 0.387, "step": 16071 }, { "epoch": 2.1492377641080505, "grad_norm": 1.685669183731079, "learning_rate": 3.929963430745598e-06, "loss": 0.4416, "step": 16072 }, { "epoch": 2.149371489703129, "grad_norm": 1.4676814079284668, "learning_rate": 3.928816149958984e-06, "loss": 0.3905, "step": 16073 }, { "epoch": 2.149505215298208, "grad_norm": 1.3602242469787598, "learning_rate": 3.927668995720384e-06, "loss": 0.3444, "step": 16074 }, { "epoch": 2.1496389408932868, "grad_norm": 1.5395230054855347, "learning_rate": 3.92652196805372e-06, "loss": 0.3791, "step": 16075 }, { "epoch": 2.149772666488366, "grad_norm": 1.542179822921753, "learning_rate": 3.925375066982892e-06, "loss": 0.3969, "step": 16076 }, { "epoch": 2.149906392083445, "grad_norm": 1.5008248090744019, "learning_rate": 3.9242282925318064e-06, "loss": 0.363, "step": 16077 }, { "epoch": 2.1500401176785235, "grad_norm": 1.4808636903762817, "learning_rate": 3.9230816447243695e-06, "loss": 0.3764, "step": 16078 }, { "epoch": 2.1501738432736026, "grad_norm": 1.516959547996521, "learning_rate": 3.921935123584479e-06, "loss": 0.372, "step": 16079 }, { "epoch": 2.1503075688686817, "grad_norm": 1.6373462677001953, "learning_rate": 3.920788729136036e-06, "loss": 0.4529, "step": 16080 }, { "epoch": 2.1504412944637603, "grad_norm": 1.5589298009872437, "learning_rate": 3.919642461402935e-06, "loss": 0.4203, "step": 16081 }, { "epoch": 2.1505750200588394, "grad_norm": 1.6543482542037964, "learning_rate": 3.918496320409068e-06, "loss": 0.4422, "step": 16082 }, { "epoch": 2.150708745653918, "grad_norm": 1.6424309015274048, "learning_rate": 3.917350306178326e-06, "loss": 0.4419, "step": 16083 }, { "epoch": 2.150842471248997, "grad_norm": 1.3029494285583496, "learning_rate": 3.916204418734599e-06, "loss": 0.3444, "step": 16084 }, { "epoch": 2.150976196844076, "grad_norm": 1.6073129177093506, "learning_rate": 3.915058658101763e-06, "loss": 0.44, "step": 16085 }, { "epoch": 2.1511099224391548, "grad_norm": 1.4433947801589966, "learning_rate": 3.913913024303712e-06, "loss": 0.3589, "step": 16086 }, { "epoch": 2.151243648034234, "grad_norm": 1.5924313068389893, "learning_rate": 3.912767517364317e-06, "loss": 0.3968, "step": 16087 }, { "epoch": 2.1513773736293125, "grad_norm": 1.6250706911087036, "learning_rate": 3.91162213730746e-06, "loss": 0.3952, "step": 16088 }, { "epoch": 2.1515110992243915, "grad_norm": 1.4148329496383667, "learning_rate": 3.9104768841570175e-06, "loss": 0.3653, "step": 16089 }, { "epoch": 2.1516448248194706, "grad_norm": 1.40345299243927, "learning_rate": 3.90933175793685e-06, "loss": 0.3497, "step": 16090 }, { "epoch": 2.1517785504145492, "grad_norm": 1.456261396408081, "learning_rate": 3.90818675867084e-06, "loss": 0.3875, "step": 16091 }, { "epoch": 2.1519122760096283, "grad_norm": 1.454257845878601, "learning_rate": 3.907041886382845e-06, "loss": 0.3765, "step": 16092 }, { "epoch": 2.152046001604707, "grad_norm": 1.67599618434906, "learning_rate": 3.9058971410967285e-06, "loss": 0.4213, "step": 16093 }, { "epoch": 2.152179727199786, "grad_norm": 1.4291666746139526, "learning_rate": 3.90475252283636e-06, "loss": 0.3876, "step": 16094 }, { "epoch": 2.152313452794865, "grad_norm": 1.539754867553711, "learning_rate": 3.903608031625587e-06, "loss": 0.3934, "step": 16095 }, { "epoch": 2.1524471783899437, "grad_norm": 1.4383267164230347, "learning_rate": 3.902463667488278e-06, "loss": 0.3759, "step": 16096 }, { "epoch": 2.1525809039850228, "grad_norm": 1.4488979578018188, "learning_rate": 3.901319430448276e-06, "loss": 0.3963, "step": 16097 }, { "epoch": 2.1527146295801014, "grad_norm": 1.5035040378570557, "learning_rate": 3.9001753205294335e-06, "loss": 0.3454, "step": 16098 }, { "epoch": 2.1528483551751805, "grad_norm": 1.335659384727478, "learning_rate": 3.8990313377556e-06, "loss": 0.3287, "step": 16099 }, { "epoch": 2.1529820807702595, "grad_norm": 1.6142460107803345, "learning_rate": 3.897887482150621e-06, "loss": 0.4122, "step": 16100 }, { "epoch": 2.153115806365338, "grad_norm": 1.5206695795059204, "learning_rate": 3.896743753738337e-06, "loss": 0.3992, "step": 16101 }, { "epoch": 2.1532495319604172, "grad_norm": 1.4361813068389893, "learning_rate": 3.89560015254259e-06, "loss": 0.3772, "step": 16102 }, { "epoch": 2.1533832575554963, "grad_norm": 1.5769808292388916, "learning_rate": 3.894456678587216e-06, "loss": 0.3854, "step": 16103 }, { "epoch": 2.153516983150575, "grad_norm": 1.4655263423919678, "learning_rate": 3.893313331896051e-06, "loss": 0.408, "step": 16104 }, { "epoch": 2.153650708745654, "grad_norm": 1.3872959613800049, "learning_rate": 3.8921701124929255e-06, "loss": 0.3675, "step": 16105 }, { "epoch": 2.1537844343407326, "grad_norm": 1.3545231819152832, "learning_rate": 3.89102702040167e-06, "loss": 0.3791, "step": 16106 }, { "epoch": 2.1539181599358117, "grad_norm": 1.4708161354064941, "learning_rate": 3.88988405564611e-06, "loss": 0.3674, "step": 16107 }, { "epoch": 2.1540518855308908, "grad_norm": 1.4483819007873535, "learning_rate": 3.888741218250074e-06, "loss": 0.3351, "step": 16108 }, { "epoch": 2.1541856111259694, "grad_norm": 1.435144305229187, "learning_rate": 3.8875985082373725e-06, "loss": 0.3736, "step": 16109 }, { "epoch": 2.1543193367210485, "grad_norm": 1.4609761238098145, "learning_rate": 3.8864559256318375e-06, "loss": 0.3989, "step": 16110 }, { "epoch": 2.154453062316127, "grad_norm": 1.5012121200561523, "learning_rate": 3.885313470457272e-06, "loss": 0.4283, "step": 16111 }, { "epoch": 2.154586787911206, "grad_norm": 1.4494997262954712, "learning_rate": 3.8841711427375e-06, "loss": 0.3684, "step": 16112 }, { "epoch": 2.1547205135062852, "grad_norm": 1.6195967197418213, "learning_rate": 3.883028942496333e-06, "loss": 0.3848, "step": 16113 }, { "epoch": 2.154854239101364, "grad_norm": 1.5485037565231323, "learning_rate": 3.881886869757565e-06, "loss": 0.4225, "step": 16114 }, { "epoch": 2.154987964696443, "grad_norm": 1.429679036140442, "learning_rate": 3.880744924545019e-06, "loss": 0.4064, "step": 16115 }, { "epoch": 2.155121690291522, "grad_norm": 1.559616208076477, "learning_rate": 3.8796031068824865e-06, "loss": 0.3955, "step": 16116 }, { "epoch": 2.1552554158866006, "grad_norm": 1.8206707239151, "learning_rate": 3.87846141679377e-06, "loss": 0.4716, "step": 16117 }, { "epoch": 2.1553891414816797, "grad_norm": 1.3845725059509277, "learning_rate": 3.877319854302668e-06, "loss": 0.3853, "step": 16118 }, { "epoch": 2.1555228670767583, "grad_norm": 1.4974833726882935, "learning_rate": 3.876178419432971e-06, "loss": 0.3889, "step": 16119 }, { "epoch": 2.1556565926718374, "grad_norm": 1.488113522529602, "learning_rate": 3.875037112208482e-06, "loss": 0.3961, "step": 16120 }, { "epoch": 2.1557903182669165, "grad_norm": 1.4846007823944092, "learning_rate": 3.87389593265298e-06, "loss": 0.3768, "step": 16121 }, { "epoch": 2.155924043861995, "grad_norm": 1.6884653568267822, "learning_rate": 3.872754880790255e-06, "loss": 0.4116, "step": 16122 }, { "epoch": 2.156057769457074, "grad_norm": 1.4158353805541992, "learning_rate": 3.871613956644091e-06, "loss": 0.373, "step": 16123 }, { "epoch": 2.156191495052153, "grad_norm": 1.4970380067825317, "learning_rate": 3.870473160238271e-06, "loss": 0.4165, "step": 16124 }, { "epoch": 2.156325220647232, "grad_norm": 1.563650369644165, "learning_rate": 3.869332491596573e-06, "loss": 0.3876, "step": 16125 }, { "epoch": 2.156458946242311, "grad_norm": 1.581353783607483, "learning_rate": 3.868191950742771e-06, "loss": 0.4018, "step": 16126 }, { "epoch": 2.1565926718373896, "grad_norm": 1.5553282499313354, "learning_rate": 3.867051537700642e-06, "loss": 0.4135, "step": 16127 }, { "epoch": 2.1567263974324686, "grad_norm": 1.5583549737930298, "learning_rate": 3.8659112524939535e-06, "loss": 0.3476, "step": 16128 }, { "epoch": 2.1568601230275473, "grad_norm": 1.486255407333374, "learning_rate": 3.864771095146479e-06, "loss": 0.403, "step": 16129 }, { "epoch": 2.1569938486226263, "grad_norm": 1.3971513509750366, "learning_rate": 3.863631065681974e-06, "loss": 0.4273, "step": 16130 }, { "epoch": 2.1571275742177054, "grad_norm": 1.4773471355438232, "learning_rate": 3.862491164124211e-06, "loss": 0.3563, "step": 16131 }, { "epoch": 2.157261299812784, "grad_norm": 1.6563184261322021, "learning_rate": 3.86135139049695e-06, "loss": 0.3698, "step": 16132 }, { "epoch": 2.157395025407863, "grad_norm": 1.5103743076324463, "learning_rate": 3.860211744823939e-06, "loss": 0.4156, "step": 16133 }, { "epoch": 2.157528751002942, "grad_norm": 1.5286197662353516, "learning_rate": 3.859072227128945e-06, "loss": 0.3726, "step": 16134 }, { "epoch": 2.157662476598021, "grad_norm": 1.5182170867919922, "learning_rate": 3.857932837435707e-06, "loss": 0.368, "step": 16135 }, { "epoch": 2.1577962021931, "grad_norm": 1.495816946029663, "learning_rate": 3.856793575767989e-06, "loss": 0.3741, "step": 16136 }, { "epoch": 2.1579299277881785, "grad_norm": 1.4636350870132446, "learning_rate": 3.855654442149527e-06, "loss": 0.3726, "step": 16137 }, { "epoch": 2.1580636533832576, "grad_norm": 1.3995403051376343, "learning_rate": 3.854515436604066e-06, "loss": 0.3781, "step": 16138 }, { "epoch": 2.1581973789783366, "grad_norm": 1.4839344024658203, "learning_rate": 3.8533765591553564e-06, "loss": 0.4089, "step": 16139 }, { "epoch": 2.1583311045734153, "grad_norm": 1.4802722930908203, "learning_rate": 3.852237809827127e-06, "loss": 0.358, "step": 16140 }, { "epoch": 2.1584648301684943, "grad_norm": 1.516821026802063, "learning_rate": 3.8510991886431185e-06, "loss": 0.3632, "step": 16141 }, { "epoch": 2.158598555763573, "grad_norm": 1.5478018522262573, "learning_rate": 3.849960695627063e-06, "loss": 0.3748, "step": 16142 }, { "epoch": 2.158732281358652, "grad_norm": 1.4458105564117432, "learning_rate": 3.848822330802691e-06, "loss": 0.3691, "step": 16143 }, { "epoch": 2.158866006953731, "grad_norm": 1.4049608707427979, "learning_rate": 3.847684094193733e-06, "loss": 0.3678, "step": 16144 }, { "epoch": 2.1589997325488097, "grad_norm": 1.47234046459198, "learning_rate": 3.846545985823912e-06, "loss": 0.4045, "step": 16145 }, { "epoch": 2.159133458143889, "grad_norm": 1.5856846570968628, "learning_rate": 3.845408005716952e-06, "loss": 0.3814, "step": 16146 }, { "epoch": 2.159267183738968, "grad_norm": 1.4797228574752808, "learning_rate": 3.844270153896574e-06, "loss": 0.3788, "step": 16147 }, { "epoch": 2.1594009093340465, "grad_norm": 1.2931476831436157, "learning_rate": 3.843132430386492e-06, "loss": 0.3232, "step": 16148 }, { "epoch": 2.1595346349291256, "grad_norm": 1.532896876335144, "learning_rate": 3.841994835210424e-06, "loss": 0.4106, "step": 16149 }, { "epoch": 2.159668360524204, "grad_norm": 1.380008578300476, "learning_rate": 3.840857368392082e-06, "loss": 0.3204, "step": 16150 }, { "epoch": 2.1598020861192833, "grad_norm": 1.321828007698059, "learning_rate": 3.839720029955173e-06, "loss": 0.3441, "step": 16151 }, { "epoch": 2.1599358117143623, "grad_norm": 1.3661874532699585, "learning_rate": 3.838582819923405e-06, "loss": 0.3727, "step": 16152 }, { "epoch": 2.160069537309441, "grad_norm": 1.6206506490707397, "learning_rate": 3.837445738320488e-06, "loss": 0.4039, "step": 16153 }, { "epoch": 2.16020326290452, "grad_norm": 1.478460431098938, "learning_rate": 3.836308785170109e-06, "loss": 0.3766, "step": 16154 }, { "epoch": 2.1603369884995987, "grad_norm": 1.7232661247253418, "learning_rate": 3.835171960495983e-06, "loss": 0.4125, "step": 16155 }, { "epoch": 2.1604707140946777, "grad_norm": 1.5069818496704102, "learning_rate": 3.8340352643217904e-06, "loss": 0.3732, "step": 16156 }, { "epoch": 2.160604439689757, "grad_norm": 1.5332118272781372, "learning_rate": 3.832898696671237e-06, "loss": 0.3765, "step": 16157 }, { "epoch": 2.1607381652848354, "grad_norm": 1.6705482006072998, "learning_rate": 3.831762257568013e-06, "loss": 0.3618, "step": 16158 }, { "epoch": 2.1608718908799145, "grad_norm": 1.3945168256759644, "learning_rate": 3.8306259470357935e-06, "loss": 0.316, "step": 16159 }, { "epoch": 2.161005616474993, "grad_norm": 1.4786723852157593, "learning_rate": 3.829489765098281e-06, "loss": 0.4065, "step": 16160 }, { "epoch": 2.161139342070072, "grad_norm": 1.5336272716522217, "learning_rate": 3.828353711779146e-06, "loss": 0.4358, "step": 16161 }, { "epoch": 2.1612730676651513, "grad_norm": 1.4567012786865234, "learning_rate": 3.827217787102072e-06, "loss": 0.4005, "step": 16162 }, { "epoch": 2.16140679326023, "grad_norm": 1.6541118621826172, "learning_rate": 3.826081991090737e-06, "loss": 0.4013, "step": 16163 }, { "epoch": 2.161540518855309, "grad_norm": 1.4277007579803467, "learning_rate": 3.824946323768811e-06, "loss": 0.3855, "step": 16164 }, { "epoch": 2.1616742444503876, "grad_norm": 1.5351969003677368, "learning_rate": 3.8238107851599785e-06, "loss": 0.438, "step": 16165 }, { "epoch": 2.1618079700454667, "grad_norm": 1.5175426006317139, "learning_rate": 3.8226753752878955e-06, "loss": 0.398, "step": 16166 }, { "epoch": 2.1619416956405457, "grad_norm": 1.3230775594711304, "learning_rate": 3.8215400941762325e-06, "loss": 0.3421, "step": 16167 }, { "epoch": 2.1620754212356244, "grad_norm": 1.35922372341156, "learning_rate": 3.820404941848656e-06, "loss": 0.3404, "step": 16168 }, { "epoch": 2.1622091468307034, "grad_norm": 1.5555135011672974, "learning_rate": 3.819269918328824e-06, "loss": 0.4134, "step": 16169 }, { "epoch": 2.1623428724257825, "grad_norm": 1.5486416816711426, "learning_rate": 3.8181350236403955e-06, "loss": 0.3892, "step": 16170 }, { "epoch": 2.162476598020861, "grad_norm": 1.6113145351409912, "learning_rate": 3.817000257807029e-06, "loss": 0.4141, "step": 16171 }, { "epoch": 2.16261032361594, "grad_norm": 1.5268046855926514, "learning_rate": 3.815865620852375e-06, "loss": 0.3824, "step": 16172 }, { "epoch": 2.162744049211019, "grad_norm": 1.4876846075057983, "learning_rate": 3.814731112800083e-06, "loss": 0.3818, "step": 16173 }, { "epoch": 2.162877774806098, "grad_norm": 1.521790862083435, "learning_rate": 3.8135967336738076e-06, "loss": 0.37, "step": 16174 }, { "epoch": 2.163011500401177, "grad_norm": 1.456770896911621, "learning_rate": 3.8124624834971803e-06, "loss": 0.3762, "step": 16175 }, { "epoch": 2.1631452259962556, "grad_norm": 1.26850163936615, "learning_rate": 3.8113283622938556e-06, "loss": 0.3257, "step": 16176 }, { "epoch": 2.1632789515913347, "grad_norm": 1.5819414854049683, "learning_rate": 3.810194370087473e-06, "loss": 0.4165, "step": 16177 }, { "epoch": 2.1634126771864133, "grad_norm": 1.5373594760894775, "learning_rate": 3.8090605069016596e-06, "loss": 0.3783, "step": 16178 }, { "epoch": 2.1635464027814924, "grad_norm": 1.3819327354431152, "learning_rate": 3.8079267727600623e-06, "loss": 0.344, "step": 16179 }, { "epoch": 2.1636801283765714, "grad_norm": 1.460359811782837, "learning_rate": 3.806793167686298e-06, "loss": 0.3882, "step": 16180 }, { "epoch": 2.16381385397165, "grad_norm": 1.501381278038025, "learning_rate": 3.805659691704012e-06, "loss": 0.37, "step": 16181 }, { "epoch": 2.163947579566729, "grad_norm": 1.392208218574524, "learning_rate": 3.8045263448368186e-06, "loss": 0.3502, "step": 16182 }, { "epoch": 2.164081305161808, "grad_norm": 1.6806392669677734, "learning_rate": 3.8033931271083423e-06, "loss": 0.3831, "step": 16183 }, { "epoch": 2.164215030756887, "grad_norm": 1.4217662811279297, "learning_rate": 3.8022600385422126e-06, "loss": 0.3744, "step": 16184 }, { "epoch": 2.164348756351966, "grad_norm": 1.557889461517334, "learning_rate": 3.801127079162039e-06, "loss": 0.4237, "step": 16185 }, { "epoch": 2.1644824819470445, "grad_norm": 1.4533740282058716, "learning_rate": 3.7999942489914397e-06, "loss": 0.3994, "step": 16186 }, { "epoch": 2.1646162075421236, "grad_norm": 1.6821880340576172, "learning_rate": 3.798861548054028e-06, "loss": 0.4034, "step": 16187 }, { "epoch": 2.1647499331372027, "grad_norm": 1.5353648662567139, "learning_rate": 3.7977289763734125e-06, "loss": 0.3988, "step": 16188 }, { "epoch": 2.1648836587322813, "grad_norm": 1.403009057044983, "learning_rate": 3.7965965339732025e-06, "loss": 0.3631, "step": 16189 }, { "epoch": 2.1650173843273604, "grad_norm": 1.4433552026748657, "learning_rate": 3.795464220877001e-06, "loss": 0.3907, "step": 16190 }, { "epoch": 2.165151109922439, "grad_norm": 1.570870041847229, "learning_rate": 3.7943320371084104e-06, "loss": 0.3582, "step": 16191 }, { "epoch": 2.165284835517518, "grad_norm": 1.4943965673446655, "learning_rate": 3.7931999826910316e-06, "loss": 0.3556, "step": 16192 }, { "epoch": 2.165418561112597, "grad_norm": 1.3177591562271118, "learning_rate": 3.7920680576484627e-06, "loss": 0.3639, "step": 16193 }, { "epoch": 2.1655522867076757, "grad_norm": 1.419259786605835, "learning_rate": 3.790936262004287e-06, "loss": 0.342, "step": 16194 }, { "epoch": 2.165686012302755, "grad_norm": 1.5744701623916626, "learning_rate": 3.7898045957821082e-06, "loss": 0.4005, "step": 16195 }, { "epoch": 2.1658197378978334, "grad_norm": 1.3720170259475708, "learning_rate": 3.78867305900551e-06, "loss": 0.3538, "step": 16196 }, { "epoch": 2.1659534634929125, "grad_norm": 1.5274364948272705, "learning_rate": 3.787541651698077e-06, "loss": 0.3957, "step": 16197 }, { "epoch": 2.1660871890879916, "grad_norm": 1.5357438325881958, "learning_rate": 3.786410373883398e-06, "loss": 0.3787, "step": 16198 }, { "epoch": 2.16622091468307, "grad_norm": 1.5135211944580078, "learning_rate": 3.785279225585042e-06, "loss": 0.3898, "step": 16199 }, { "epoch": 2.1663546402781493, "grad_norm": 1.4882596731185913, "learning_rate": 3.7841482068266013e-06, "loss": 0.3367, "step": 16200 }, { "epoch": 2.166488365873228, "grad_norm": 1.4050239324569702, "learning_rate": 3.783017317631639e-06, "loss": 0.3303, "step": 16201 }, { "epoch": 2.166622091468307, "grad_norm": 1.5266227722167969, "learning_rate": 3.7818865580237287e-06, "loss": 0.3932, "step": 16202 }, { "epoch": 2.166755817063386, "grad_norm": 1.5692311525344849, "learning_rate": 3.7807559280264495e-06, "loss": 0.4141, "step": 16203 }, { "epoch": 2.1668895426584647, "grad_norm": 1.45881986618042, "learning_rate": 3.779625427663355e-06, "loss": 0.4009, "step": 16204 }, { "epoch": 2.1670232682535437, "grad_norm": 1.5354901552200317, "learning_rate": 3.7784950569580224e-06, "loss": 0.3943, "step": 16205 }, { "epoch": 2.167156993848623, "grad_norm": 1.4313868284225464, "learning_rate": 3.777364815934005e-06, "loss": 0.3268, "step": 16206 }, { "epoch": 2.1672907194437014, "grad_norm": 1.69831120967865, "learning_rate": 3.776234704614863e-06, "loss": 0.3721, "step": 16207 }, { "epoch": 2.1674244450387805, "grad_norm": 1.4901002645492554, "learning_rate": 3.7751047230241535e-06, "loss": 0.3441, "step": 16208 }, { "epoch": 2.167558170633859, "grad_norm": 1.7743366956710815, "learning_rate": 3.7739748711854284e-06, "loss": 0.4466, "step": 16209 }, { "epoch": 2.167691896228938, "grad_norm": 1.3814276456832886, "learning_rate": 3.7728451491222394e-06, "loss": 0.3717, "step": 16210 }, { "epoch": 2.1678256218240173, "grad_norm": 1.6186026334762573, "learning_rate": 3.7717155568581354e-06, "loss": 0.3418, "step": 16211 }, { "epoch": 2.167959347419096, "grad_norm": 1.744234561920166, "learning_rate": 3.7705860944166607e-06, "loss": 0.4086, "step": 16212 }, { "epoch": 2.168093073014175, "grad_norm": 1.6753228902816772, "learning_rate": 3.7694567618213584e-06, "loss": 0.4351, "step": 16213 }, { "epoch": 2.168226798609254, "grad_norm": 1.3432114124298096, "learning_rate": 3.768327559095767e-06, "loss": 0.3689, "step": 16214 }, { "epoch": 2.1683605242043327, "grad_norm": 1.5963348150253296, "learning_rate": 3.7671984862634246e-06, "loss": 0.346, "step": 16215 }, { "epoch": 2.1684942497994117, "grad_norm": 1.4214552640914917, "learning_rate": 3.7660695433478667e-06, "loss": 0.363, "step": 16216 }, { "epoch": 2.1686279753944904, "grad_norm": 1.4677363634109497, "learning_rate": 3.7649407303726258e-06, "loss": 0.3511, "step": 16217 }, { "epoch": 2.1687617009895694, "grad_norm": 1.4466300010681152, "learning_rate": 3.7638120473612228e-06, "loss": 0.3695, "step": 16218 }, { "epoch": 2.1688954265846485, "grad_norm": 1.2779343128204346, "learning_rate": 3.7626834943371984e-06, "loss": 0.308, "step": 16219 }, { "epoch": 2.169029152179727, "grad_norm": 1.4937680959701538, "learning_rate": 3.76155507132406e-06, "loss": 0.3706, "step": 16220 }, { "epoch": 2.169162877774806, "grad_norm": 1.2822743654251099, "learning_rate": 3.7604267783453395e-06, "loss": 0.3574, "step": 16221 }, { "epoch": 2.169296603369885, "grad_norm": 1.6081180572509766, "learning_rate": 3.759298615424557e-06, "loss": 0.4078, "step": 16222 }, { "epoch": 2.169430328964964, "grad_norm": 1.4695504903793335, "learning_rate": 3.7581705825852156e-06, "loss": 0.3687, "step": 16223 }, { "epoch": 2.169564054560043, "grad_norm": 1.583430528640747, "learning_rate": 3.7570426798508417e-06, "loss": 0.4166, "step": 16224 }, { "epoch": 2.1696977801551216, "grad_norm": 1.5401413440704346, "learning_rate": 3.7559149072449377e-06, "loss": 0.3882, "step": 16225 }, { "epoch": 2.1698315057502007, "grad_norm": 1.3912434577941895, "learning_rate": 3.754787264791011e-06, "loss": 0.3487, "step": 16226 }, { "epoch": 2.1699652313452793, "grad_norm": 1.2680352926254272, "learning_rate": 3.7536597525125683e-06, "loss": 0.3146, "step": 16227 }, { "epoch": 2.1700989569403584, "grad_norm": 1.4927648305892944, "learning_rate": 3.7525323704331108e-06, "loss": 0.3703, "step": 16228 }, { "epoch": 2.1702326825354374, "grad_norm": 1.3060998916625977, "learning_rate": 3.751405118576138e-06, "loss": 0.3528, "step": 16229 }, { "epoch": 2.170366408130516, "grad_norm": 1.6163612604141235, "learning_rate": 3.750277996965146e-06, "loss": 0.3726, "step": 16230 }, { "epoch": 2.170500133725595, "grad_norm": 1.566009759902954, "learning_rate": 3.749151005623629e-06, "loss": 0.364, "step": 16231 }, { "epoch": 2.1706338593206738, "grad_norm": 1.4719799757003784, "learning_rate": 3.7480241445750776e-06, "loss": 0.3866, "step": 16232 }, { "epoch": 2.170767584915753, "grad_norm": 1.4604594707489014, "learning_rate": 3.7468974138429802e-06, "loss": 0.3642, "step": 16233 }, { "epoch": 2.170901310510832, "grad_norm": 1.3394873142242432, "learning_rate": 3.745770813450824e-06, "loss": 0.3721, "step": 16234 }, { "epoch": 2.1710350361059105, "grad_norm": 1.6297065019607544, "learning_rate": 3.7446443434220894e-06, "loss": 0.4034, "step": 16235 }, { "epoch": 2.1711687617009896, "grad_norm": 1.4894779920578003, "learning_rate": 3.7435180037802575e-06, "loss": 0.3924, "step": 16236 }, { "epoch": 2.1713024872960687, "grad_norm": 1.478907823562622, "learning_rate": 3.7423917945488075e-06, "loss": 0.396, "step": 16237 }, { "epoch": 2.1714362128911473, "grad_norm": 1.7704520225524902, "learning_rate": 3.7412657157512144e-06, "loss": 0.4439, "step": 16238 }, { "epoch": 2.1715699384862264, "grad_norm": 1.4853854179382324, "learning_rate": 3.740139767410943e-06, "loss": 0.3659, "step": 16239 }, { "epoch": 2.171703664081305, "grad_norm": 1.542970061302185, "learning_rate": 3.739013949551471e-06, "loss": 0.3781, "step": 16240 }, { "epoch": 2.171837389676384, "grad_norm": 1.3319801092147827, "learning_rate": 3.737888262196262e-06, "loss": 0.3425, "step": 16241 }, { "epoch": 2.171971115271463, "grad_norm": 1.6695350408554077, "learning_rate": 3.7367627053687796e-06, "loss": 0.3873, "step": 16242 }, { "epoch": 2.1721048408665418, "grad_norm": 1.4810701608657837, "learning_rate": 3.735637279092489e-06, "loss": 0.4105, "step": 16243 }, { "epoch": 2.172238566461621, "grad_norm": 1.6019119024276733, "learning_rate": 3.7345119833908383e-06, "loss": 0.4257, "step": 16244 }, { "epoch": 2.1723722920566995, "grad_norm": 1.475785493850708, "learning_rate": 3.7333868182872966e-06, "loss": 0.3971, "step": 16245 }, { "epoch": 2.1725060176517785, "grad_norm": 1.5395865440368652, "learning_rate": 3.7322617838053066e-06, "loss": 0.3769, "step": 16246 }, { "epoch": 2.1726397432468576, "grad_norm": 1.5096434354782104, "learning_rate": 3.731136879968319e-06, "loss": 0.3619, "step": 16247 }, { "epoch": 2.1727734688419362, "grad_norm": 1.428429365158081, "learning_rate": 3.7300121067997917e-06, "loss": 0.3615, "step": 16248 }, { "epoch": 2.1729071944370153, "grad_norm": 1.5296647548675537, "learning_rate": 3.7288874643231543e-06, "loss": 0.3681, "step": 16249 }, { "epoch": 2.1730409200320944, "grad_norm": 1.5431780815124512, "learning_rate": 3.7277629525618653e-06, "loss": 0.4133, "step": 16250 }, { "epoch": 2.173174645627173, "grad_norm": 1.2569829225540161, "learning_rate": 3.7266385715393515e-06, "loss": 0.3667, "step": 16251 }, { "epoch": 2.173308371222252, "grad_norm": 1.3211477994918823, "learning_rate": 3.7255143212790536e-06, "loss": 0.3418, "step": 16252 }, { "epoch": 2.1734420968173307, "grad_norm": 1.6510108709335327, "learning_rate": 3.7243902018044054e-06, "loss": 0.4489, "step": 16253 }, { "epoch": 2.1735758224124098, "grad_norm": 1.542637825012207, "learning_rate": 3.7232662131388386e-06, "loss": 0.3869, "step": 16254 }, { "epoch": 2.173709548007489, "grad_norm": 1.6173707246780396, "learning_rate": 3.7221423553057814e-06, "loss": 0.4046, "step": 16255 }, { "epoch": 2.1738432736025675, "grad_norm": 1.5031521320343018, "learning_rate": 3.7210186283286596e-06, "loss": 0.3857, "step": 16256 }, { "epoch": 2.1739769991976465, "grad_norm": 1.5874103307724, "learning_rate": 3.7198950322308956e-06, "loss": 0.3814, "step": 16257 }, { "epoch": 2.174110724792725, "grad_norm": 1.6553881168365479, "learning_rate": 3.7187715670359114e-06, "loss": 0.3822, "step": 16258 }, { "epoch": 2.1742444503878042, "grad_norm": 1.4726183414459229, "learning_rate": 3.7176482327671224e-06, "loss": 0.3603, "step": 16259 }, { "epoch": 2.1743781759828833, "grad_norm": 1.512617826461792, "learning_rate": 3.716525029447945e-06, "loss": 0.3632, "step": 16260 }, { "epoch": 2.174511901577962, "grad_norm": 1.5675050020217896, "learning_rate": 3.7154019571017907e-06, "loss": 0.3585, "step": 16261 }, { "epoch": 2.174645627173041, "grad_norm": 1.5499221086502075, "learning_rate": 3.7142790157520725e-06, "loss": 0.3647, "step": 16262 }, { "epoch": 2.1747793527681196, "grad_norm": 1.5489075183868408, "learning_rate": 3.713156205422186e-06, "loss": 0.3605, "step": 16263 }, { "epoch": 2.1749130783631987, "grad_norm": 1.3635002374649048, "learning_rate": 3.71203352613555e-06, "loss": 0.3447, "step": 16264 }, { "epoch": 2.1750468039582778, "grad_norm": 1.5593349933624268, "learning_rate": 3.7109109779155505e-06, "loss": 0.375, "step": 16265 }, { "epoch": 2.1751805295533564, "grad_norm": 1.4230684041976929, "learning_rate": 3.7097885607855977e-06, "loss": 0.3502, "step": 16266 }, { "epoch": 2.1753142551484355, "grad_norm": 1.5980064868927002, "learning_rate": 3.7086662747690873e-06, "loss": 0.3866, "step": 16267 }, { "epoch": 2.175447980743514, "grad_norm": 1.4956570863723755, "learning_rate": 3.7075441198894004e-06, "loss": 0.3973, "step": 16268 }, { "epoch": 2.175581706338593, "grad_norm": 1.4807687997817993, "learning_rate": 3.7064220961699427e-06, "loss": 0.3796, "step": 16269 }, { "epoch": 2.1757154319336722, "grad_norm": 1.5164546966552734, "learning_rate": 3.70530020363409e-06, "loss": 0.3885, "step": 16270 }, { "epoch": 2.175849157528751, "grad_norm": 1.6316159963607788, "learning_rate": 3.704178442305231e-06, "loss": 0.363, "step": 16271 }, { "epoch": 2.17598288312383, "grad_norm": 1.6896103620529175, "learning_rate": 3.703056812206748e-06, "loss": 0.3904, "step": 16272 }, { "epoch": 2.176116608718909, "grad_norm": 1.6158279180526733, "learning_rate": 3.7019353133620208e-06, "loss": 0.4389, "step": 16273 }, { "epoch": 2.1762503343139876, "grad_norm": 1.7111865282058716, "learning_rate": 3.700813945794425e-06, "loss": 0.4283, "step": 16274 }, { "epoch": 2.1763840599090667, "grad_norm": 1.4147570133209229, "learning_rate": 3.699692709527335e-06, "loss": 0.3559, "step": 16275 }, { "epoch": 2.1765177855041453, "grad_norm": 1.4546010494232178, "learning_rate": 3.6985716045841223e-06, "loss": 0.342, "step": 16276 }, { "epoch": 2.1766515110992244, "grad_norm": 1.431753396987915, "learning_rate": 3.697450630988154e-06, "loss": 0.3446, "step": 16277 }, { "epoch": 2.1767852366943035, "grad_norm": 1.2828805446624756, "learning_rate": 3.6963297887627957e-06, "loss": 0.3381, "step": 16278 }, { "epoch": 2.176918962289382, "grad_norm": 1.6131726503372192, "learning_rate": 3.695209077931412e-06, "loss": 0.3991, "step": 16279 }, { "epoch": 2.177052687884461, "grad_norm": 1.7734261751174927, "learning_rate": 3.694088498517362e-06, "loss": 0.411, "step": 16280 }, { "epoch": 2.17718641347954, "grad_norm": 1.5452563762664795, "learning_rate": 3.6929680505440035e-06, "loss": 0.3809, "step": 16281 }, { "epoch": 2.177320139074619, "grad_norm": 1.6094892024993896, "learning_rate": 3.6918477340346903e-06, "loss": 0.3521, "step": 16282 }, { "epoch": 2.177453864669698, "grad_norm": 1.2754590511322021, "learning_rate": 3.690727549012778e-06, "loss": 0.3219, "step": 16283 }, { "epoch": 2.1775875902647766, "grad_norm": 1.4750466346740723, "learning_rate": 3.689607495501606e-06, "loss": 0.36, "step": 16284 }, { "epoch": 2.1777213158598556, "grad_norm": 1.6493852138519287, "learning_rate": 3.6884875735245307e-06, "loss": 0.3958, "step": 16285 }, { "epoch": 2.1778550414549347, "grad_norm": 1.732311725616455, "learning_rate": 3.687367783104896e-06, "loss": 0.4202, "step": 16286 }, { "epoch": 2.1779887670500133, "grad_norm": 1.4855539798736572, "learning_rate": 3.686248124266033e-06, "loss": 0.3675, "step": 16287 }, { "epoch": 2.1781224926450924, "grad_norm": 1.514873743057251, "learning_rate": 3.6851285970312923e-06, "loss": 0.3733, "step": 16288 }, { "epoch": 2.178256218240171, "grad_norm": 1.529582142829895, "learning_rate": 3.6840092014239968e-06, "loss": 0.4069, "step": 16289 }, { "epoch": 2.17838994383525, "grad_norm": 1.5395400524139404, "learning_rate": 3.6828899374674933e-06, "loss": 0.3649, "step": 16290 }, { "epoch": 2.178523669430329, "grad_norm": 1.4626466035842896, "learning_rate": 3.6817708051851e-06, "loss": 0.3781, "step": 16291 }, { "epoch": 2.178657395025408, "grad_norm": 1.6334456205368042, "learning_rate": 3.680651804600148e-06, "loss": 0.3779, "step": 16292 }, { "epoch": 2.178791120620487, "grad_norm": 1.5947253704071045, "learning_rate": 3.679532935735962e-06, "loss": 0.3763, "step": 16293 }, { "epoch": 2.1789248462155655, "grad_norm": 1.5445412397384644, "learning_rate": 3.6784141986158652e-06, "loss": 0.3776, "step": 16294 }, { "epoch": 2.1790585718106446, "grad_norm": 1.4923593997955322, "learning_rate": 3.6772955932631748e-06, "loss": 0.3955, "step": 16295 }, { "epoch": 2.1791922974057236, "grad_norm": 1.6122815608978271, "learning_rate": 3.6761771197012075e-06, "loss": 0.4, "step": 16296 }, { "epoch": 2.1793260230008022, "grad_norm": 1.423462986946106, "learning_rate": 3.6750587779532763e-06, "loss": 0.3627, "step": 16297 }, { "epoch": 2.1794597485958813, "grad_norm": 1.3249611854553223, "learning_rate": 3.6739405680426933e-06, "loss": 0.3556, "step": 16298 }, { "epoch": 2.17959347419096, "grad_norm": 1.5497000217437744, "learning_rate": 3.6728224899927658e-06, "loss": 0.4328, "step": 16299 }, { "epoch": 2.179727199786039, "grad_norm": 1.2785531282424927, "learning_rate": 3.6717045438267986e-06, "loss": 0.3453, "step": 16300 }, { "epoch": 2.179860925381118, "grad_norm": 1.3955689668655396, "learning_rate": 3.6705867295680954e-06, "loss": 0.3468, "step": 16301 }, { "epoch": 2.1799946509761967, "grad_norm": 1.4439014196395874, "learning_rate": 3.6694690472399575e-06, "loss": 0.413, "step": 16302 }, { "epoch": 2.180128376571276, "grad_norm": 1.4956010580062866, "learning_rate": 3.668351496865674e-06, "loss": 0.3763, "step": 16303 }, { "epoch": 2.1802621021663544, "grad_norm": 1.4027496576309204, "learning_rate": 3.6672340784685477e-06, "loss": 0.3798, "step": 16304 }, { "epoch": 2.1803958277614335, "grad_norm": 1.3655768632888794, "learning_rate": 3.6661167920718664e-06, "loss": 0.3297, "step": 16305 }, { "epoch": 2.1805295533565126, "grad_norm": 1.4732098579406738, "learning_rate": 3.6649996376989215e-06, "loss": 0.4158, "step": 16306 }, { "epoch": 2.180663278951591, "grad_norm": 1.5541791915893555, "learning_rate": 3.663882615372999e-06, "loss": 0.4175, "step": 16307 }, { "epoch": 2.1807970045466702, "grad_norm": 1.58713960647583, "learning_rate": 3.662765725117374e-06, "loss": 0.3975, "step": 16308 }, { "epoch": 2.1809307301417493, "grad_norm": 1.4791971445083618, "learning_rate": 3.661648966955341e-06, "loss": 0.3502, "step": 16309 }, { "epoch": 2.181064455736828, "grad_norm": 1.5218161344528198, "learning_rate": 3.6605323409101656e-06, "loss": 0.3589, "step": 16310 }, { "epoch": 2.181198181331907, "grad_norm": 1.2320057153701782, "learning_rate": 3.659415847005129e-06, "loss": 0.3706, "step": 16311 }, { "epoch": 2.1813319069269856, "grad_norm": 1.3070148229599, "learning_rate": 3.6582994852635e-06, "loss": 0.3479, "step": 16312 }, { "epoch": 2.1814656325220647, "grad_norm": 1.6972564458847046, "learning_rate": 3.6571832557085475e-06, "loss": 0.3816, "step": 16313 }, { "epoch": 2.181599358117144, "grad_norm": 1.5121333599090576, "learning_rate": 3.6560671583635467e-06, "loss": 0.3351, "step": 16314 }, { "epoch": 2.1817330837122224, "grad_norm": 1.4832582473754883, "learning_rate": 3.654951193251752e-06, "loss": 0.3833, "step": 16315 }, { "epoch": 2.1818668093073015, "grad_norm": 1.519750714302063, "learning_rate": 3.6538353603964292e-06, "loss": 0.3672, "step": 16316 }, { "epoch": 2.1820005349023806, "grad_norm": 1.718870997428894, "learning_rate": 3.6527196598208347e-06, "loss": 0.393, "step": 16317 }, { "epoch": 2.182134260497459, "grad_norm": 1.33301842212677, "learning_rate": 3.6516040915482264e-06, "loss": 0.3749, "step": 16318 }, { "epoch": 2.1822679860925382, "grad_norm": 1.322077989578247, "learning_rate": 3.6504886556018547e-06, "loss": 0.3346, "step": 16319 }, { "epoch": 2.182401711687617, "grad_norm": 1.4060719013214111, "learning_rate": 3.649373352004972e-06, "loss": 0.3292, "step": 16320 }, { "epoch": 2.182535437282696, "grad_norm": 1.501711368560791, "learning_rate": 3.648258180780825e-06, "loss": 0.345, "step": 16321 }, { "epoch": 2.182669162877775, "grad_norm": 1.4843274354934692, "learning_rate": 3.647143141952657e-06, "loss": 0.3703, "step": 16322 }, { "epoch": 2.1828028884728536, "grad_norm": 1.4530706405639648, "learning_rate": 3.6460282355437125e-06, "loss": 0.3869, "step": 16323 }, { "epoch": 2.1829366140679327, "grad_norm": 1.5931384563446045, "learning_rate": 3.6449134615772284e-06, "loss": 0.3771, "step": 16324 }, { "epoch": 2.1830703396630113, "grad_norm": 1.6148937940597534, "learning_rate": 3.6437988200764427e-06, "loss": 0.3932, "step": 16325 }, { "epoch": 2.1832040652580904, "grad_norm": 1.5775597095489502, "learning_rate": 3.642684311064588e-06, "loss": 0.4149, "step": 16326 }, { "epoch": 2.1833377908531695, "grad_norm": 1.7508631944656372, "learning_rate": 3.641569934564896e-06, "loss": 0.3497, "step": 16327 }, { "epoch": 2.183471516448248, "grad_norm": 1.4091308116912842, "learning_rate": 3.6404556906005973e-06, "loss": 0.3549, "step": 16328 }, { "epoch": 2.183605242043327, "grad_norm": 1.5087890625, "learning_rate": 3.6393415791949084e-06, "loss": 0.378, "step": 16329 }, { "epoch": 2.183738967638406, "grad_norm": 1.6268309354782104, "learning_rate": 3.638227600371064e-06, "loss": 0.3606, "step": 16330 }, { "epoch": 2.183872693233485, "grad_norm": 1.5602035522460938, "learning_rate": 3.6371137541522737e-06, "loss": 0.3669, "step": 16331 }, { "epoch": 2.184006418828564, "grad_norm": 1.4617384672164917, "learning_rate": 3.6360000405617558e-06, "loss": 0.3662, "step": 16332 }, { "epoch": 2.1841401444236426, "grad_norm": 1.5935776233673096, "learning_rate": 3.634886459622734e-06, "loss": 0.4133, "step": 16333 }, { "epoch": 2.1842738700187216, "grad_norm": 1.5108132362365723, "learning_rate": 3.6337730113584058e-06, "loss": 0.3497, "step": 16334 }, { "epoch": 2.1844075956138003, "grad_norm": 1.5082652568817139, "learning_rate": 3.6326596957919957e-06, "loss": 0.4285, "step": 16335 }, { "epoch": 2.1845413212088793, "grad_norm": 1.4768787622451782, "learning_rate": 3.6315465129466966e-06, "loss": 0.3981, "step": 16336 }, { "epoch": 2.1846750468039584, "grad_norm": 1.4968199729919434, "learning_rate": 3.630433462845717e-06, "loss": 0.3607, "step": 16337 }, { "epoch": 2.184808772399037, "grad_norm": 1.2967222929000854, "learning_rate": 3.629320545512257e-06, "loss": 0.3578, "step": 16338 }, { "epoch": 2.184942497994116, "grad_norm": 1.4939343929290771, "learning_rate": 3.628207760969513e-06, "loss": 0.3684, "step": 16339 }, { "epoch": 2.185076223589195, "grad_norm": 1.4628163576126099, "learning_rate": 3.6270951092406826e-06, "loss": 0.3444, "step": 16340 }, { "epoch": 2.185209949184274, "grad_norm": 1.6034502983093262, "learning_rate": 3.6259825903489567e-06, "loss": 0.4487, "step": 16341 }, { "epoch": 2.185343674779353, "grad_norm": 1.4875776767730713, "learning_rate": 3.624870204317523e-06, "loss": 0.3484, "step": 16342 }, { "epoch": 2.1854774003744315, "grad_norm": 1.6573498249053955, "learning_rate": 3.6237579511695696e-06, "loss": 0.4063, "step": 16343 }, { "epoch": 2.1856111259695106, "grad_norm": 1.6375935077667236, "learning_rate": 3.6226458309282806e-06, "loss": 0.4037, "step": 16344 }, { "epoch": 2.1857448515645896, "grad_norm": 1.424809217453003, "learning_rate": 3.621533843616838e-06, "loss": 0.3948, "step": 16345 }, { "epoch": 2.1858785771596683, "grad_norm": 1.5334001779556274, "learning_rate": 3.620421989258418e-06, "loss": 0.3986, "step": 16346 }, { "epoch": 2.1860123027547473, "grad_norm": 1.8028336763381958, "learning_rate": 3.6193102678762004e-06, "loss": 0.4054, "step": 16347 }, { "epoch": 2.186146028349826, "grad_norm": 1.5019755363464355, "learning_rate": 3.618198679493348e-06, "loss": 0.4064, "step": 16348 }, { "epoch": 2.186279753944905, "grad_norm": 1.7856967449188232, "learning_rate": 3.61708722413304e-06, "loss": 0.3923, "step": 16349 }, { "epoch": 2.186413479539984, "grad_norm": 1.3765339851379395, "learning_rate": 3.6159759018184417e-06, "loss": 0.3521, "step": 16350 }, { "epoch": 2.1865472051350627, "grad_norm": 1.4637154340744019, "learning_rate": 3.6148647125727165e-06, "loss": 0.4091, "step": 16351 }, { "epoch": 2.186680930730142, "grad_norm": 1.6549135446548462, "learning_rate": 3.6137536564190302e-06, "loss": 0.434, "step": 16352 }, { "epoch": 2.186814656325221, "grad_norm": 1.460453748703003, "learning_rate": 3.6126427333805315e-06, "loss": 0.3412, "step": 16353 }, { "epoch": 2.1869483819202995, "grad_norm": 1.5194929838180542, "learning_rate": 3.6115319434803897e-06, "loss": 0.3408, "step": 16354 }, { "epoch": 2.1870821075153786, "grad_norm": 1.5000133514404297, "learning_rate": 3.6104212867417477e-06, "loss": 0.396, "step": 16355 }, { "epoch": 2.187215833110457, "grad_norm": 1.9321099519729614, "learning_rate": 3.609310763187759e-06, "loss": 0.3954, "step": 16356 }, { "epoch": 2.1873495587055363, "grad_norm": 1.4440604448318481, "learning_rate": 3.608200372841574e-06, "loss": 0.3903, "step": 16357 }, { "epoch": 2.1874832843006153, "grad_norm": 1.4573160409927368, "learning_rate": 3.6070901157263303e-06, "loss": 0.3618, "step": 16358 }, { "epoch": 2.187617009895694, "grad_norm": 1.5695223808288574, "learning_rate": 3.605979991865185e-06, "loss": 0.4484, "step": 16359 }, { "epoch": 2.187750735490773, "grad_norm": 1.3229132890701294, "learning_rate": 3.604870001281263e-06, "loss": 0.3687, "step": 16360 }, { "epoch": 2.1878844610858517, "grad_norm": 1.441109538078308, "learning_rate": 3.603760143997708e-06, "loss": 0.3786, "step": 16361 }, { "epoch": 2.1880181866809307, "grad_norm": 1.3831636905670166, "learning_rate": 3.602650420037651e-06, "loss": 0.3344, "step": 16362 }, { "epoch": 2.18815191227601, "grad_norm": 1.381396770477295, "learning_rate": 3.601540829424225e-06, "loss": 0.4099, "step": 16363 }, { "epoch": 2.1882856378710884, "grad_norm": 1.5415884256362915, "learning_rate": 3.600431372180557e-06, "loss": 0.3303, "step": 16364 }, { "epoch": 2.1884193634661675, "grad_norm": 1.4183281660079956, "learning_rate": 3.599322048329774e-06, "loss": 0.3458, "step": 16365 }, { "epoch": 2.188553089061246, "grad_norm": 1.5219459533691406, "learning_rate": 3.5982128578949984e-06, "loss": 0.3977, "step": 16366 }, { "epoch": 2.188686814656325, "grad_norm": 1.6582280397415161, "learning_rate": 3.5971038008993496e-06, "loss": 0.4177, "step": 16367 }, { "epoch": 2.1888205402514043, "grad_norm": 1.4540014266967773, "learning_rate": 3.595994877365945e-06, "loss": 0.4223, "step": 16368 }, { "epoch": 2.188954265846483, "grad_norm": 1.694222331047058, "learning_rate": 3.5948860873178992e-06, "loss": 0.3705, "step": 16369 }, { "epoch": 2.189087991441562, "grad_norm": 1.4442307949066162, "learning_rate": 3.5937774307783245e-06, "loss": 0.35, "step": 16370 }, { "epoch": 2.1892217170366406, "grad_norm": 1.4792312383651733, "learning_rate": 3.5926689077703323e-06, "loss": 0.4117, "step": 16371 }, { "epoch": 2.1893554426317197, "grad_norm": 1.5308893918991089, "learning_rate": 3.591560518317019e-06, "loss": 0.4331, "step": 16372 }, { "epoch": 2.1894891682267987, "grad_norm": 1.6259902715682983, "learning_rate": 3.5904522624415007e-06, "loss": 0.4114, "step": 16373 }, { "epoch": 2.1896228938218774, "grad_norm": 1.8770668506622314, "learning_rate": 3.5893441401668648e-06, "loss": 0.3601, "step": 16374 }, { "epoch": 2.1897566194169564, "grad_norm": 1.5259467363357544, "learning_rate": 3.5882361515162223e-06, "loss": 0.3998, "step": 16375 }, { "epoch": 2.1898903450120355, "grad_norm": 1.4136395454406738, "learning_rate": 3.5871282965126596e-06, "loss": 0.3936, "step": 16376 }, { "epoch": 2.190024070607114, "grad_norm": 1.4673441648483276, "learning_rate": 3.5860205751792676e-06, "loss": 0.3995, "step": 16377 }, { "epoch": 2.190157796202193, "grad_norm": 1.5149905681610107, "learning_rate": 3.5849129875391453e-06, "loss": 0.3774, "step": 16378 }, { "epoch": 2.190291521797272, "grad_norm": 1.5662513971328735, "learning_rate": 3.58380553361537e-06, "loss": 0.3829, "step": 16379 }, { "epoch": 2.190425247392351, "grad_norm": 1.5141760110855103, "learning_rate": 3.5826982134310294e-06, "loss": 0.3978, "step": 16380 }, { "epoch": 2.19055897298743, "grad_norm": 1.3788865804672241, "learning_rate": 3.5815910270092025e-06, "loss": 0.3558, "step": 16381 }, { "epoch": 2.1906926985825086, "grad_norm": 1.5146018266677856, "learning_rate": 3.58048397437297e-06, "loss": 0.3533, "step": 16382 }, { "epoch": 2.1908264241775877, "grad_norm": 1.3292251825332642, "learning_rate": 3.5793770555454065e-06, "loss": 0.3036, "step": 16383 }, { "epoch": 2.1909601497726663, "grad_norm": 1.4756734371185303, "learning_rate": 3.578270270549583e-06, "loss": 0.3854, "step": 16384 }, { "epoch": 2.1910938753677454, "grad_norm": 1.5888948440551758, "learning_rate": 3.5771636194085724e-06, "loss": 0.3658, "step": 16385 }, { "epoch": 2.1912276009628244, "grad_norm": 1.4345057010650635, "learning_rate": 3.5760571021454393e-06, "loss": 0.3858, "step": 16386 }, { "epoch": 2.191361326557903, "grad_norm": 1.687107801437378, "learning_rate": 3.5749507187832486e-06, "loss": 0.4428, "step": 16387 }, { "epoch": 2.191495052152982, "grad_norm": 1.433917760848999, "learning_rate": 3.5738444693450624e-06, "loss": 0.3698, "step": 16388 }, { "epoch": 2.191628777748061, "grad_norm": 1.42483651638031, "learning_rate": 3.5727383538539395e-06, "loss": 0.3514, "step": 16389 }, { "epoch": 2.19176250334314, "grad_norm": 1.6001818180084229, "learning_rate": 3.5716323723329347e-06, "loss": 0.4188, "step": 16390 }, { "epoch": 2.191896228938219, "grad_norm": 1.507879376411438, "learning_rate": 3.5705265248051023e-06, "loss": 0.4128, "step": 16391 }, { "epoch": 2.1920299545332975, "grad_norm": 1.405267596244812, "learning_rate": 3.569420811293496e-06, "loss": 0.365, "step": 16392 }, { "epoch": 2.1921636801283766, "grad_norm": 1.49809730052948, "learning_rate": 3.568315231821151e-06, "loss": 0.4108, "step": 16393 }, { "epoch": 2.1922974057234557, "grad_norm": 1.510206937789917, "learning_rate": 3.5672097864111287e-06, "loss": 0.3964, "step": 16394 }, { "epoch": 2.1924311313185343, "grad_norm": 1.4786604642868042, "learning_rate": 3.5661044750864595e-06, "loss": 0.4022, "step": 16395 }, { "epoch": 2.1925648569136134, "grad_norm": 1.5953116416931152, "learning_rate": 3.564999297870182e-06, "loss": 0.3744, "step": 16396 }, { "epoch": 2.192698582508692, "grad_norm": 1.654759168624878, "learning_rate": 3.563894254785344e-06, "loss": 0.4323, "step": 16397 }, { "epoch": 2.192832308103771, "grad_norm": 1.2955964803695679, "learning_rate": 3.5627893458549644e-06, "loss": 0.3363, "step": 16398 }, { "epoch": 2.19296603369885, "grad_norm": 1.3139053583145142, "learning_rate": 3.5616845711020876e-06, "loss": 0.3229, "step": 16399 }, { "epoch": 2.1930997592939288, "grad_norm": 1.6418778896331787, "learning_rate": 3.5605799305497325e-06, "loss": 0.4172, "step": 16400 }, { "epoch": 2.193233484889008, "grad_norm": 1.4203234910964966, "learning_rate": 3.5594754242209263e-06, "loss": 0.3519, "step": 16401 }, { "epoch": 2.1933672104840864, "grad_norm": 1.3634294271469116, "learning_rate": 3.5583710521386916e-06, "loss": 0.3992, "step": 16402 }, { "epoch": 2.1935009360791655, "grad_norm": 1.5628266334533691, "learning_rate": 3.5572668143260458e-06, "loss": 0.3657, "step": 16403 }, { "epoch": 2.1936346616742446, "grad_norm": 1.4561527967453003, "learning_rate": 3.5561627108060137e-06, "loss": 0.3682, "step": 16404 }, { "epoch": 2.193768387269323, "grad_norm": 1.735776662826538, "learning_rate": 3.5550587416016016e-06, "loss": 0.4276, "step": 16405 }, { "epoch": 2.1939021128644023, "grad_norm": 1.4480133056640625, "learning_rate": 3.5539549067358225e-06, "loss": 0.351, "step": 16406 }, { "epoch": 2.194035838459481, "grad_norm": 1.4428770542144775, "learning_rate": 3.5528512062316857e-06, "loss": 0.3852, "step": 16407 }, { "epoch": 2.19416956405456, "grad_norm": 1.696537733078003, "learning_rate": 3.5517476401121953e-06, "loss": 0.4368, "step": 16408 }, { "epoch": 2.194303289649639, "grad_norm": 1.6042019128799438, "learning_rate": 3.5506442084003554e-06, "loss": 0.3758, "step": 16409 }, { "epoch": 2.1944370152447177, "grad_norm": 1.5025063753128052, "learning_rate": 3.549540911119166e-06, "loss": 0.3797, "step": 16410 }, { "epoch": 2.1945707408397968, "grad_norm": 1.5232625007629395, "learning_rate": 3.5484377482916245e-06, "loss": 0.3962, "step": 16411 }, { "epoch": 2.194704466434876, "grad_norm": 1.4286611080169678, "learning_rate": 3.547334719940724e-06, "loss": 0.3499, "step": 16412 }, { "epoch": 2.1948381920299544, "grad_norm": 1.2526613473892212, "learning_rate": 3.546231826089459e-06, "loss": 0.3669, "step": 16413 }, { "epoch": 2.1949719176250335, "grad_norm": 1.7012066841125488, "learning_rate": 3.545129066760811e-06, "loss": 0.4387, "step": 16414 }, { "epoch": 2.195105643220112, "grad_norm": 1.4367096424102783, "learning_rate": 3.5440264419777724e-06, "loss": 0.3853, "step": 16415 }, { "epoch": 2.195239368815191, "grad_norm": 1.608490228652954, "learning_rate": 3.5429239517633297e-06, "loss": 0.3997, "step": 16416 }, { "epoch": 2.1953730944102703, "grad_norm": 1.570743441581726, "learning_rate": 3.541821596140452e-06, "loss": 0.3825, "step": 16417 }, { "epoch": 2.195506820005349, "grad_norm": 1.6516621112823486, "learning_rate": 3.540719375132129e-06, "loss": 0.3988, "step": 16418 }, { "epoch": 2.195640545600428, "grad_norm": 1.6899850368499756, "learning_rate": 3.5396172887613246e-06, "loss": 0.38, "step": 16419 }, { "epoch": 2.195774271195507, "grad_norm": 1.3151986598968506, "learning_rate": 3.5385153370510207e-06, "loss": 0.3512, "step": 16420 }, { "epoch": 2.1959079967905857, "grad_norm": 1.562608003616333, "learning_rate": 3.53741352002418e-06, "loss": 0.4111, "step": 16421 }, { "epoch": 2.1960417223856648, "grad_norm": 1.5565778017044067, "learning_rate": 3.5363118377037654e-06, "loss": 0.3704, "step": 16422 }, { "epoch": 2.1961754479807434, "grad_norm": 1.5046974420547485, "learning_rate": 3.5352102901127527e-06, "loss": 0.3403, "step": 16423 }, { "epoch": 2.1963091735758224, "grad_norm": 1.3644211292266846, "learning_rate": 3.5341088772740928e-06, "loss": 0.3685, "step": 16424 }, { "epoch": 2.1964428991709015, "grad_norm": 1.3171924352645874, "learning_rate": 3.533007599210746e-06, "loss": 0.3546, "step": 16425 }, { "epoch": 2.19657662476598, "grad_norm": 1.5270090103149414, "learning_rate": 3.5319064559456672e-06, "loss": 0.3822, "step": 16426 }, { "epoch": 2.196710350361059, "grad_norm": 1.4101202487945557, "learning_rate": 3.5308054475018095e-06, "loss": 0.3684, "step": 16427 }, { "epoch": 2.196844075956138, "grad_norm": 1.6080455780029297, "learning_rate": 3.529704573902121e-06, "loss": 0.4063, "step": 16428 }, { "epoch": 2.196977801551217, "grad_norm": 1.4410773515701294, "learning_rate": 3.5286038351695493e-06, "loss": 0.3802, "step": 16429 }, { "epoch": 2.197111527146296, "grad_norm": 1.4366222620010376, "learning_rate": 3.5275032313270386e-06, "loss": 0.3702, "step": 16430 }, { "epoch": 2.1972452527413746, "grad_norm": 1.5443081855773926, "learning_rate": 3.5264027623975294e-06, "loss": 0.4458, "step": 16431 }, { "epoch": 2.1973789783364537, "grad_norm": 1.4828189611434937, "learning_rate": 3.525302428403964e-06, "loss": 0.384, "step": 16432 }, { "epoch": 2.1975127039315323, "grad_norm": 1.5275070667266846, "learning_rate": 3.524202229369267e-06, "loss": 0.3824, "step": 16433 }, { "epoch": 2.1976464295266114, "grad_norm": 1.4760372638702393, "learning_rate": 3.523102165316381e-06, "loss": 0.3952, "step": 16434 }, { "epoch": 2.1977801551216904, "grad_norm": 1.7516838312149048, "learning_rate": 3.522002236268233e-06, "loss": 0.424, "step": 16435 }, { "epoch": 2.197913880716769, "grad_norm": 1.491129755973816, "learning_rate": 3.520902442247749e-06, "loss": 0.3578, "step": 16436 }, { "epoch": 2.198047606311848, "grad_norm": 1.3396703004837036, "learning_rate": 3.519802783277857e-06, "loss": 0.3633, "step": 16437 }, { "epoch": 2.1981813319069268, "grad_norm": 1.4581927061080933, "learning_rate": 3.5187032593814684e-06, "loss": 0.3626, "step": 16438 }, { "epoch": 2.198315057502006, "grad_norm": 1.549901008605957, "learning_rate": 3.5176038705815163e-06, "loss": 0.3759, "step": 16439 }, { "epoch": 2.198448783097085, "grad_norm": 1.4372854232788086, "learning_rate": 3.516504616900904e-06, "loss": 0.3788, "step": 16440 }, { "epoch": 2.1985825086921635, "grad_norm": 1.3589279651641846, "learning_rate": 3.5154054983625463e-06, "loss": 0.368, "step": 16441 }, { "epoch": 2.1987162342872426, "grad_norm": 1.4434417486190796, "learning_rate": 3.5143065149893617e-06, "loss": 0.3754, "step": 16442 }, { "epoch": 2.1988499598823217, "grad_norm": 1.4635580778121948, "learning_rate": 3.5132076668042457e-06, "loss": 0.385, "step": 16443 }, { "epoch": 2.1989836854774003, "grad_norm": 1.4728752374649048, "learning_rate": 3.5121089538301156e-06, "loss": 0.374, "step": 16444 }, { "epoch": 2.1991174110724794, "grad_norm": 1.4978705644607544, "learning_rate": 3.5110103760898616e-06, "loss": 0.4, "step": 16445 }, { "epoch": 2.199251136667558, "grad_norm": 1.5517399311065674, "learning_rate": 3.509911933606388e-06, "loss": 0.3652, "step": 16446 }, { "epoch": 2.199384862262637, "grad_norm": 1.557432770729065, "learning_rate": 3.5088136264025895e-06, "loss": 0.3842, "step": 16447 }, { "epoch": 2.199518587857716, "grad_norm": 1.476466178894043, "learning_rate": 3.5077154545013603e-06, "loss": 0.3914, "step": 16448 }, { "epoch": 2.1996523134527948, "grad_norm": 1.2703248262405396, "learning_rate": 3.5066174179255885e-06, "loss": 0.3346, "step": 16449 }, { "epoch": 2.199786039047874, "grad_norm": 1.552782416343689, "learning_rate": 3.505519516698165e-06, "loss": 0.3794, "step": 16450 }, { "epoch": 2.1999197646429525, "grad_norm": 1.293184757232666, "learning_rate": 3.504421750841971e-06, "loss": 0.3647, "step": 16451 }, { "epoch": 2.2000534902380315, "grad_norm": 1.3890290260314941, "learning_rate": 3.5033241203798907e-06, "loss": 0.3592, "step": 16452 }, { "epoch": 2.2001872158331106, "grad_norm": 1.3625593185424805, "learning_rate": 3.5022266253348025e-06, "loss": 0.3826, "step": 16453 }, { "epoch": 2.2003209414281892, "grad_norm": 1.7006300687789917, "learning_rate": 3.5011292657295825e-06, "loss": 0.4091, "step": 16454 }, { "epoch": 2.2004546670232683, "grad_norm": 1.5066580772399902, "learning_rate": 3.5000320415871035e-06, "loss": 0.3587, "step": 16455 }, { "epoch": 2.2005883926183474, "grad_norm": 1.374006748199463, "learning_rate": 3.498934952930242e-06, "loss": 0.3418, "step": 16456 }, { "epoch": 2.200722118213426, "grad_norm": 1.4527384042739868, "learning_rate": 3.497837999781852e-06, "loss": 0.3474, "step": 16457 }, { "epoch": 2.200855843808505, "grad_norm": 1.581458330154419, "learning_rate": 3.4967411821648144e-06, "loss": 0.3934, "step": 16458 }, { "epoch": 2.2009895694035837, "grad_norm": 1.4305740594863892, "learning_rate": 3.495644500101978e-06, "loss": 0.3831, "step": 16459 }, { "epoch": 2.2011232949986628, "grad_norm": 1.5581700801849365, "learning_rate": 3.4945479536162096e-06, "loss": 0.3848, "step": 16460 }, { "epoch": 2.201257020593742, "grad_norm": 1.552886962890625, "learning_rate": 3.4934515427303684e-06, "loss": 0.4187, "step": 16461 }, { "epoch": 2.2013907461888205, "grad_norm": 1.541314959526062, "learning_rate": 3.4923552674672978e-06, "loss": 0.4161, "step": 16462 }, { "epoch": 2.2015244717838995, "grad_norm": 1.5983177423477173, "learning_rate": 3.49125912784986e-06, "loss": 0.3628, "step": 16463 }, { "epoch": 2.201658197378978, "grad_norm": 1.635797381401062, "learning_rate": 3.4901631239008947e-06, "loss": 0.4056, "step": 16464 }, { "epoch": 2.2017919229740572, "grad_norm": 1.5336662530899048, "learning_rate": 3.489067255643249e-06, "loss": 0.3573, "step": 16465 }, { "epoch": 2.2019256485691363, "grad_norm": 1.4986268281936646, "learning_rate": 3.487971523099768e-06, "loss": 0.4169, "step": 16466 }, { "epoch": 2.202059374164215, "grad_norm": 1.5082272291183472, "learning_rate": 3.486875926293284e-06, "loss": 0.3921, "step": 16467 }, { "epoch": 2.202193099759294, "grad_norm": 1.5096263885498047, "learning_rate": 3.4857804652466466e-06, "loss": 0.3922, "step": 16468 }, { "epoch": 2.2023268253543726, "grad_norm": 1.3791091442108154, "learning_rate": 3.4846851399826788e-06, "loss": 0.3467, "step": 16469 }, { "epoch": 2.2024605509494517, "grad_norm": 1.6764763593673706, "learning_rate": 3.483589950524213e-06, "loss": 0.381, "step": 16470 }, { "epoch": 2.2025942765445308, "grad_norm": 1.7501657009124756, "learning_rate": 3.4824948968940808e-06, "loss": 0.408, "step": 16471 }, { "epoch": 2.2027280021396094, "grad_norm": 1.4173920154571533, "learning_rate": 3.4813999791151065e-06, "loss": 0.3914, "step": 16472 }, { "epoch": 2.2028617277346885, "grad_norm": 1.6346664428710938, "learning_rate": 3.480305197210111e-06, "loss": 0.4193, "step": 16473 }, { "epoch": 2.202995453329767, "grad_norm": 1.500555396080017, "learning_rate": 3.4792105512019148e-06, "loss": 0.3794, "step": 16474 }, { "epoch": 2.203129178924846, "grad_norm": 1.4445401430130005, "learning_rate": 3.4781160411133354e-06, "loss": 0.3572, "step": 16475 }, { "epoch": 2.2032629045199252, "grad_norm": 1.4952434301376343, "learning_rate": 3.477021666967186e-06, "loss": 0.348, "step": 16476 }, { "epoch": 2.203396630115004, "grad_norm": 1.6164652109146118, "learning_rate": 3.475927428786281e-06, "loss": 0.3446, "step": 16477 }, { "epoch": 2.203530355710083, "grad_norm": 1.5771899223327637, "learning_rate": 3.474833326593421e-06, "loss": 0.4037, "step": 16478 }, { "epoch": 2.203664081305162, "grad_norm": 1.3571815490722656, "learning_rate": 3.473739360411418e-06, "loss": 0.3781, "step": 16479 }, { "epoch": 2.2037978069002406, "grad_norm": 1.5566506385803223, "learning_rate": 3.4726455302630768e-06, "loss": 0.39, "step": 16480 }, { "epoch": 2.2039315324953197, "grad_norm": 1.6009420156478882, "learning_rate": 3.4715518361711876e-06, "loss": 0.3966, "step": 16481 }, { "epoch": 2.2040652580903983, "grad_norm": 1.5089601278305054, "learning_rate": 3.4704582781585596e-06, "loss": 0.396, "step": 16482 }, { "epoch": 2.2041989836854774, "grad_norm": 1.5041552782058716, "learning_rate": 3.4693648562479733e-06, "loss": 0.3674, "step": 16483 }, { "epoch": 2.2043327092805565, "grad_norm": 1.5039492845535278, "learning_rate": 3.468271570462235e-06, "loss": 0.3397, "step": 16484 }, { "epoch": 2.204466434875635, "grad_norm": 1.5863938331604004, "learning_rate": 3.467178420824122e-06, "loss": 0.4163, "step": 16485 }, { "epoch": 2.204600160470714, "grad_norm": 1.6247525215148926, "learning_rate": 3.46608540735642e-06, "loss": 0.3812, "step": 16486 }, { "epoch": 2.204733886065793, "grad_norm": 1.5923283100128174, "learning_rate": 3.464992530081922e-06, "loss": 0.4138, "step": 16487 }, { "epoch": 2.204867611660872, "grad_norm": 1.4704937934875488, "learning_rate": 3.463899789023395e-06, "loss": 0.3805, "step": 16488 }, { "epoch": 2.205001337255951, "grad_norm": 1.522513747215271, "learning_rate": 3.462807184203629e-06, "loss": 0.3959, "step": 16489 }, { "epoch": 2.2051350628510296, "grad_norm": 1.5773521661758423, "learning_rate": 3.461714715645389e-06, "loss": 0.4472, "step": 16490 }, { "epoch": 2.2052687884461086, "grad_norm": 1.338305115699768, "learning_rate": 3.4606223833714493e-06, "loss": 0.3235, "step": 16491 }, { "epoch": 2.2054025140411877, "grad_norm": 1.6059221029281616, "learning_rate": 3.4595301874045785e-06, "loss": 0.3748, "step": 16492 }, { "epoch": 2.2055362396362663, "grad_norm": 1.7253587245941162, "learning_rate": 3.4584381277675416e-06, "loss": 0.431, "step": 16493 }, { "epoch": 2.2056699652313454, "grad_norm": 1.5903260707855225, "learning_rate": 3.457346204483103e-06, "loss": 0.4039, "step": 16494 }, { "epoch": 2.205803690826424, "grad_norm": 1.5016887187957764, "learning_rate": 3.456254417574022e-06, "loss": 0.4106, "step": 16495 }, { "epoch": 2.205937416421503, "grad_norm": 1.417677879333496, "learning_rate": 3.4551627670630562e-06, "loss": 0.3116, "step": 16496 }, { "epoch": 2.206071142016582, "grad_norm": 1.532374382019043, "learning_rate": 3.4540712529729592e-06, "loss": 0.3915, "step": 16497 }, { "epoch": 2.206204867611661, "grad_norm": 1.4879239797592163, "learning_rate": 3.452979875326483e-06, "loss": 0.3449, "step": 16498 }, { "epoch": 2.20633859320674, "grad_norm": 1.3327503204345703, "learning_rate": 3.4518886341463775e-06, "loss": 0.3601, "step": 16499 }, { "epoch": 2.2064723188018185, "grad_norm": 1.380768060684204, "learning_rate": 3.4507975294553877e-06, "loss": 0.3298, "step": 16500 }, { "epoch": 2.2066060443968976, "grad_norm": 1.4507542848587036, "learning_rate": 3.449706561276259e-06, "loss": 0.3999, "step": 16501 }, { "epoch": 2.2067397699919766, "grad_norm": 1.4084267616271973, "learning_rate": 3.4486157296317224e-06, "loss": 0.366, "step": 16502 }, { "epoch": 2.2068734955870553, "grad_norm": 1.5734275579452515, "learning_rate": 3.4475250345445287e-06, "loss": 0.3654, "step": 16503 }, { "epoch": 2.2070072211821343, "grad_norm": 1.7202084064483643, "learning_rate": 3.446434476037399e-06, "loss": 0.3861, "step": 16504 }, { "epoch": 2.207140946777213, "grad_norm": 1.573837161064148, "learning_rate": 3.445344054133075e-06, "loss": 0.4055, "step": 16505 }, { "epoch": 2.207274672372292, "grad_norm": 1.4000096321105957, "learning_rate": 3.4442537688542855e-06, "loss": 0.3463, "step": 16506 }, { "epoch": 2.207408397967371, "grad_norm": 1.6614608764648438, "learning_rate": 3.4431636202237464e-06, "loss": 0.371, "step": 16507 }, { "epoch": 2.2075421235624497, "grad_norm": 1.585769534111023, "learning_rate": 3.442073608264194e-06, "loss": 0.4082, "step": 16508 }, { "epoch": 2.207675849157529, "grad_norm": 1.5032787322998047, "learning_rate": 3.4409837329983376e-06, "loss": 0.3764, "step": 16509 }, { "epoch": 2.2078095747526074, "grad_norm": 1.4769421815872192, "learning_rate": 3.4398939944488994e-06, "loss": 0.3537, "step": 16510 }, { "epoch": 2.2079433003476865, "grad_norm": 1.464247226715088, "learning_rate": 3.438804392638595e-06, "loss": 0.4041, "step": 16511 }, { "epoch": 2.2080770259427656, "grad_norm": 1.4186909198760986, "learning_rate": 3.43771492759013e-06, "loss": 0.3487, "step": 16512 }, { "epoch": 2.208210751537844, "grad_norm": 1.361175775527954, "learning_rate": 3.4366255993262255e-06, "loss": 0.3567, "step": 16513 }, { "epoch": 2.2083444771329233, "grad_norm": 1.567903757095337, "learning_rate": 3.435536407869575e-06, "loss": 0.3775, "step": 16514 }, { "epoch": 2.2084782027280023, "grad_norm": 1.8031455278396606, "learning_rate": 3.434447353242888e-06, "loss": 0.4244, "step": 16515 }, { "epoch": 2.208611928323081, "grad_norm": 1.3213262557983398, "learning_rate": 3.4333584354688634e-06, "loss": 0.3696, "step": 16516 }, { "epoch": 2.20874565391816, "grad_norm": 1.626657247543335, "learning_rate": 3.4322696545701984e-06, "loss": 0.3741, "step": 16517 }, { "epoch": 2.2088793795132386, "grad_norm": 1.4158945083618164, "learning_rate": 3.4311810105695875e-06, "loss": 0.3485, "step": 16518 }, { "epoch": 2.2090131051083177, "grad_norm": 1.5091612339019775, "learning_rate": 3.4300925034897227e-06, "loss": 0.3758, "step": 16519 }, { "epoch": 2.209146830703397, "grad_norm": 1.700692057609558, "learning_rate": 3.429004133353293e-06, "loss": 0.4021, "step": 16520 }, { "epoch": 2.2092805562984754, "grad_norm": 1.6729439496994019, "learning_rate": 3.4279159001829844e-06, "loss": 0.382, "step": 16521 }, { "epoch": 2.2094142818935545, "grad_norm": 1.545614242553711, "learning_rate": 3.4268278040014836e-06, "loss": 0.3756, "step": 16522 }, { "epoch": 2.2095480074886336, "grad_norm": 1.4382662773132324, "learning_rate": 3.4257398448314604e-06, "loss": 0.3428, "step": 16523 }, { "epoch": 2.209681733083712, "grad_norm": 1.5453675985336304, "learning_rate": 3.4246520226956028e-06, "loss": 0.3619, "step": 16524 }, { "epoch": 2.2098154586787913, "grad_norm": 1.7147769927978516, "learning_rate": 3.423564337616585e-06, "loss": 0.444, "step": 16525 }, { "epoch": 2.20994918427387, "grad_norm": 1.5619629621505737, "learning_rate": 3.4224767896170697e-06, "loss": 0.3541, "step": 16526 }, { "epoch": 2.210082909868949, "grad_norm": 1.6263844966888428, "learning_rate": 3.4213893787197372e-06, "loss": 0.3528, "step": 16527 }, { "epoch": 2.210216635464028, "grad_norm": 1.637852430343628, "learning_rate": 3.4203021049472417e-06, "loss": 0.4085, "step": 16528 }, { "epoch": 2.2103503610591066, "grad_norm": 1.3829307556152344, "learning_rate": 3.41921496832226e-06, "loss": 0.3488, "step": 16529 }, { "epoch": 2.2104840866541857, "grad_norm": 1.7452017068862915, "learning_rate": 3.418127968867442e-06, "loss": 0.3945, "step": 16530 }, { "epoch": 2.2106178122492643, "grad_norm": 1.7482801675796509, "learning_rate": 3.4170411066054442e-06, "loss": 0.4851, "step": 16531 }, { "epoch": 2.2107515378443434, "grad_norm": 1.4223763942718506, "learning_rate": 3.4159543815589325e-06, "loss": 0.3639, "step": 16532 }, { "epoch": 2.2108852634394225, "grad_norm": 1.6544294357299805, "learning_rate": 3.414867793750547e-06, "loss": 0.3833, "step": 16533 }, { "epoch": 2.211018989034501, "grad_norm": 1.545331597328186, "learning_rate": 3.413781343202942e-06, "loss": 0.3906, "step": 16534 }, { "epoch": 2.21115271462958, "grad_norm": 1.5762994289398193, "learning_rate": 3.412695029938763e-06, "loss": 0.3987, "step": 16535 }, { "epoch": 2.211286440224659, "grad_norm": 1.5882729291915894, "learning_rate": 3.4116088539806523e-06, "loss": 0.3883, "step": 16536 }, { "epoch": 2.211420165819738, "grad_norm": 1.5356632471084595, "learning_rate": 3.4105228153512502e-06, "loss": 0.3914, "step": 16537 }, { "epoch": 2.211553891414817, "grad_norm": 1.633872628211975, "learning_rate": 3.4094369140731953e-06, "loss": 0.3648, "step": 16538 }, { "epoch": 2.2116876170098956, "grad_norm": 1.5918583869934082, "learning_rate": 3.4083511501691214e-06, "loss": 0.3819, "step": 16539 }, { "epoch": 2.2118213426049746, "grad_norm": 1.4663565158843994, "learning_rate": 3.4072655236616593e-06, "loss": 0.351, "step": 16540 }, { "epoch": 2.2119550682000533, "grad_norm": 1.2753360271453857, "learning_rate": 3.406180034573443e-06, "loss": 0.3367, "step": 16541 }, { "epoch": 2.2120887937951323, "grad_norm": 1.5873600244522095, "learning_rate": 3.405094682927087e-06, "loss": 0.3939, "step": 16542 }, { "epoch": 2.2122225193902114, "grad_norm": 1.669495701789856, "learning_rate": 3.4040094687452263e-06, "loss": 0.4158, "step": 16543 }, { "epoch": 2.21235624498529, "grad_norm": 1.4929075241088867, "learning_rate": 3.402924392050475e-06, "loss": 0.3585, "step": 16544 }, { "epoch": 2.212489970580369, "grad_norm": 1.549967885017395, "learning_rate": 3.401839452865453e-06, "loss": 0.4269, "step": 16545 }, { "epoch": 2.212623696175448, "grad_norm": 1.4322086572647095, "learning_rate": 3.4007546512127764e-06, "loss": 0.3692, "step": 16546 }, { "epoch": 2.212757421770527, "grad_norm": 1.4452801942825317, "learning_rate": 3.3996699871150486e-06, "loss": 0.3769, "step": 16547 }, { "epoch": 2.212891147365606, "grad_norm": 1.2992616891860962, "learning_rate": 3.3985854605948896e-06, "loss": 0.35, "step": 16548 }, { "epoch": 2.2130248729606845, "grad_norm": 1.4481343030929565, "learning_rate": 3.397501071674898e-06, "loss": 0.3728, "step": 16549 }, { "epoch": 2.2131585985557636, "grad_norm": 1.4488797187805176, "learning_rate": 3.396416820377675e-06, "loss": 0.3792, "step": 16550 }, { "epoch": 2.2132923241508426, "grad_norm": 1.4925519227981567, "learning_rate": 3.3953327067258303e-06, "loss": 0.3843, "step": 16551 }, { "epoch": 2.2134260497459213, "grad_norm": 1.473625898361206, "learning_rate": 3.394248730741948e-06, "loss": 0.3529, "step": 16552 }, { "epoch": 2.2135597753410003, "grad_norm": 1.5663082599639893, "learning_rate": 3.3931648924486383e-06, "loss": 0.3897, "step": 16553 }, { "epoch": 2.213693500936079, "grad_norm": 1.5980395078659058, "learning_rate": 3.3920811918684804e-06, "loss": 0.3409, "step": 16554 }, { "epoch": 2.213827226531158, "grad_norm": 1.5004271268844604, "learning_rate": 3.3909976290240663e-06, "loss": 0.3463, "step": 16555 }, { "epoch": 2.213960952126237, "grad_norm": 1.3759044408798218, "learning_rate": 3.389914203937983e-06, "loss": 0.3526, "step": 16556 }, { "epoch": 2.2140946777213157, "grad_norm": 1.4824753999710083, "learning_rate": 3.388830916632813e-06, "loss": 0.3887, "step": 16557 }, { "epoch": 2.214228403316395, "grad_norm": 1.5208218097686768, "learning_rate": 3.3877477671311363e-06, "loss": 0.3941, "step": 16558 }, { "epoch": 2.214362128911474, "grad_norm": 1.6410958766937256, "learning_rate": 3.38666475545553e-06, "loss": 0.4222, "step": 16559 }, { "epoch": 2.2144958545065525, "grad_norm": 1.4773175716400146, "learning_rate": 3.3855818816285692e-06, "loss": 0.4176, "step": 16560 }, { "epoch": 2.2146295801016316, "grad_norm": 1.5418674945831299, "learning_rate": 3.384499145672824e-06, "loss": 0.3953, "step": 16561 }, { "epoch": 2.21476330569671, "grad_norm": 1.3757091760635376, "learning_rate": 3.3834165476108637e-06, "loss": 0.3571, "step": 16562 }, { "epoch": 2.2148970312917893, "grad_norm": 1.4011141061782837, "learning_rate": 3.3823340874652543e-06, "loss": 0.3899, "step": 16563 }, { "epoch": 2.2150307568868683, "grad_norm": 1.577030062675476, "learning_rate": 3.3812517652585597e-06, "loss": 0.3839, "step": 16564 }, { "epoch": 2.215164482481947, "grad_norm": 1.3586053848266602, "learning_rate": 3.3801695810133407e-06, "loss": 0.353, "step": 16565 }, { "epoch": 2.215298208077026, "grad_norm": 1.5198086500167847, "learning_rate": 3.3790875347521456e-06, "loss": 0.4159, "step": 16566 }, { "epoch": 2.2154319336721047, "grad_norm": 1.4178853034973145, "learning_rate": 3.378005626497541e-06, "loss": 0.3552, "step": 16567 }, { "epoch": 2.2155656592671837, "grad_norm": 1.3791643381118774, "learning_rate": 3.3769238562720674e-06, "loss": 0.3932, "step": 16568 }, { "epoch": 2.215699384862263, "grad_norm": 1.512338399887085, "learning_rate": 3.3758422240982814e-06, "loss": 0.4006, "step": 16569 }, { "epoch": 2.2158331104573414, "grad_norm": 1.402131199836731, "learning_rate": 3.3747607299987294e-06, "loss": 0.3518, "step": 16570 }, { "epoch": 2.2159668360524205, "grad_norm": 1.5381319522857666, "learning_rate": 3.3736793739959426e-06, "loss": 0.3676, "step": 16571 }, { "epoch": 2.216100561647499, "grad_norm": 1.4864060878753662, "learning_rate": 3.3725981561124764e-06, "loss": 0.3737, "step": 16572 }, { "epoch": 2.216234287242578, "grad_norm": 1.2980787754058838, "learning_rate": 3.3715170763708526e-06, "loss": 0.3394, "step": 16573 }, { "epoch": 2.2163680128376573, "grad_norm": 1.280125379562378, "learning_rate": 3.3704361347936186e-06, "loss": 0.3621, "step": 16574 }, { "epoch": 2.216501738432736, "grad_norm": 1.4714363813400269, "learning_rate": 3.3693553314032967e-06, "loss": 0.3873, "step": 16575 }, { "epoch": 2.216635464027815, "grad_norm": 1.440182089805603, "learning_rate": 3.368274666222419e-06, "loss": 0.3256, "step": 16576 }, { "epoch": 2.2167691896228936, "grad_norm": 1.4203541278839111, "learning_rate": 3.367194139273509e-06, "loss": 0.3421, "step": 16577 }, { "epoch": 2.2169029152179727, "grad_norm": 1.66934335231781, "learning_rate": 3.366113750579091e-06, "loss": 0.4392, "step": 16578 }, { "epoch": 2.2170366408130517, "grad_norm": 1.6014271974563599, "learning_rate": 3.365033500161683e-06, "loss": 0.3165, "step": 16579 }, { "epoch": 2.2171703664081304, "grad_norm": 1.6330770254135132, "learning_rate": 3.3639533880438037e-06, "loss": 0.3451, "step": 16580 }, { "epoch": 2.2173040920032094, "grad_norm": 1.5846052169799805, "learning_rate": 3.3628734142479646e-06, "loss": 0.3922, "step": 16581 }, { "epoch": 2.2174378175982885, "grad_norm": 1.5264737606048584, "learning_rate": 3.3617935787966793e-06, "loss": 0.4027, "step": 16582 }, { "epoch": 2.217571543193367, "grad_norm": 1.5482114553451538, "learning_rate": 3.360713881712454e-06, "loss": 0.4016, "step": 16583 }, { "epoch": 2.217705268788446, "grad_norm": 1.6392947435379028, "learning_rate": 3.3596343230177954e-06, "loss": 0.3492, "step": 16584 }, { "epoch": 2.217838994383525, "grad_norm": 1.4725600481033325, "learning_rate": 3.3585549027352047e-06, "loss": 0.3229, "step": 16585 }, { "epoch": 2.217972719978604, "grad_norm": 1.5494537353515625, "learning_rate": 3.3574756208871862e-06, "loss": 0.394, "step": 16586 }, { "epoch": 2.218106445573683, "grad_norm": 1.4391752481460571, "learning_rate": 3.3563964774962245e-06, "loss": 0.3749, "step": 16587 }, { "epoch": 2.2182401711687616, "grad_norm": 1.4733268022537231, "learning_rate": 3.3553174725848247e-06, "loss": 0.3209, "step": 16588 }, { "epoch": 2.2183738967638407, "grad_norm": 1.531790018081665, "learning_rate": 3.354238606175474e-06, "loss": 0.4118, "step": 16589 }, { "epoch": 2.2185076223589193, "grad_norm": 1.5632377862930298, "learning_rate": 3.3531598782906605e-06, "loss": 0.354, "step": 16590 }, { "epoch": 2.2186413479539984, "grad_norm": 1.6562833786010742, "learning_rate": 3.352081288952872e-06, "loss": 0.4089, "step": 16591 }, { "epoch": 2.2187750735490774, "grad_norm": 1.6304148435592651, "learning_rate": 3.3510028381845804e-06, "loss": 0.3726, "step": 16592 }, { "epoch": 2.218908799144156, "grad_norm": 1.4776134490966797, "learning_rate": 3.3499245260082803e-06, "loss": 0.3734, "step": 16593 }, { "epoch": 2.219042524739235, "grad_norm": 1.4355220794677734, "learning_rate": 3.3488463524464355e-06, "loss": 0.3858, "step": 16594 }, { "epoch": 2.219176250334314, "grad_norm": 1.3044672012329102, "learning_rate": 3.3477683175215213e-06, "loss": 0.378, "step": 16595 }, { "epoch": 2.219309975929393, "grad_norm": 1.5870615243911743, "learning_rate": 3.346690421256017e-06, "loss": 0.4281, "step": 16596 }, { "epoch": 2.219443701524472, "grad_norm": 1.419994592666626, "learning_rate": 3.3456126636723786e-06, "loss": 0.3881, "step": 16597 }, { "epoch": 2.2195774271195505, "grad_norm": 1.3339389562606812, "learning_rate": 3.3445350447930824e-06, "loss": 0.3395, "step": 16598 }, { "epoch": 2.2197111527146296, "grad_norm": 1.4296832084655762, "learning_rate": 3.343457564640582e-06, "loss": 0.3748, "step": 16599 }, { "epoch": 2.2198448783097087, "grad_norm": 1.5512733459472656, "learning_rate": 3.342380223237338e-06, "loss": 0.3767, "step": 16600 }, { "epoch": 2.2199786039047873, "grad_norm": 1.2560606002807617, "learning_rate": 3.341303020605808e-06, "loss": 0.3686, "step": 16601 }, { "epoch": 2.2201123294998664, "grad_norm": 1.3711079359054565, "learning_rate": 3.340225956768446e-06, "loss": 0.3384, "step": 16602 }, { "epoch": 2.220246055094945, "grad_norm": 1.4956520795822144, "learning_rate": 3.3391490317477006e-06, "loss": 0.3794, "step": 16603 }, { "epoch": 2.220379780690024, "grad_norm": 1.413162350654602, "learning_rate": 3.33807224556602e-06, "loss": 0.392, "step": 16604 }, { "epoch": 2.220513506285103, "grad_norm": 1.4823578596115112, "learning_rate": 3.336995598245848e-06, "loss": 0.4032, "step": 16605 }, { "epoch": 2.2206472318801818, "grad_norm": 1.4129291772842407, "learning_rate": 3.3359190898096273e-06, "loss": 0.4034, "step": 16606 }, { "epoch": 2.220780957475261, "grad_norm": 1.4582463502883911, "learning_rate": 3.3348427202797964e-06, "loss": 0.4133, "step": 16607 }, { "epoch": 2.2209146830703395, "grad_norm": 1.6861326694488525, "learning_rate": 3.3337664896787915e-06, "loss": 0.4025, "step": 16608 }, { "epoch": 2.2210484086654185, "grad_norm": 1.3861110210418701, "learning_rate": 3.332690398029044e-06, "loss": 0.3809, "step": 16609 }, { "epoch": 2.2211821342604976, "grad_norm": 1.485183835029602, "learning_rate": 3.3316144453529897e-06, "loss": 0.3958, "step": 16610 }, { "epoch": 2.2213158598555762, "grad_norm": 1.4620007276535034, "learning_rate": 3.330538631673045e-06, "loss": 0.3335, "step": 16611 }, { "epoch": 2.2214495854506553, "grad_norm": 1.524143099784851, "learning_rate": 3.3294629570116453e-06, "loss": 0.3867, "step": 16612 }, { "epoch": 2.221583311045734, "grad_norm": 1.3652702569961548, "learning_rate": 3.3283874213912028e-06, "loss": 0.3405, "step": 16613 }, { "epoch": 2.221717036640813, "grad_norm": 1.5495275259017944, "learning_rate": 3.3273120248341427e-06, "loss": 0.3851, "step": 16614 }, { "epoch": 2.221850762235892, "grad_norm": 1.517457365989685, "learning_rate": 3.3262367673628813e-06, "loss": 0.3956, "step": 16615 }, { "epoch": 2.2219844878309707, "grad_norm": 1.4989700317382812, "learning_rate": 3.325161648999823e-06, "loss": 0.3753, "step": 16616 }, { "epoch": 2.2221182134260498, "grad_norm": 1.8849539756774902, "learning_rate": 3.324086669767388e-06, "loss": 0.4013, "step": 16617 }, { "epoch": 2.222251939021129, "grad_norm": 1.2653659582138062, "learning_rate": 3.3230118296879765e-06, "loss": 0.3315, "step": 16618 }, { "epoch": 2.2223856646162075, "grad_norm": 1.6657754182815552, "learning_rate": 3.321937128783993e-06, "loss": 0.412, "step": 16619 }, { "epoch": 2.2225193902112865, "grad_norm": 1.49312424659729, "learning_rate": 3.3208625670778403e-06, "loss": 0.376, "step": 16620 }, { "epoch": 2.222653115806365, "grad_norm": 1.355749249458313, "learning_rate": 3.3197881445919165e-06, "loss": 0.3275, "step": 16621 }, { "epoch": 2.2227868414014442, "grad_norm": 1.5757213830947876, "learning_rate": 3.318713861348617e-06, "loss": 0.3993, "step": 16622 }, { "epoch": 2.2229205669965233, "grad_norm": 1.6092207431793213, "learning_rate": 3.3176397173703323e-06, "loss": 0.3731, "step": 16623 }, { "epoch": 2.223054292591602, "grad_norm": 1.4089053869247437, "learning_rate": 3.3165657126794537e-06, "loss": 0.3595, "step": 16624 }, { "epoch": 2.223188018186681, "grad_norm": 1.3993682861328125, "learning_rate": 3.3154918472983687e-06, "loss": 0.3834, "step": 16625 }, { "epoch": 2.22332174378176, "grad_norm": 1.542460560798645, "learning_rate": 3.314418121249459e-06, "loss": 0.4148, "step": 16626 }, { "epoch": 2.2234554693768387, "grad_norm": 1.5561797618865967, "learning_rate": 3.313344534555106e-06, "loss": 0.4478, "step": 16627 }, { "epoch": 2.2235891949719178, "grad_norm": 1.5955150127410889, "learning_rate": 3.3122710872376875e-06, "loss": 0.3922, "step": 16628 }, { "epoch": 2.2237229205669964, "grad_norm": 1.4200388193130493, "learning_rate": 3.3111977793195794e-06, "loss": 0.3934, "step": 16629 }, { "epoch": 2.2238566461620755, "grad_norm": 1.5386641025543213, "learning_rate": 3.310124610823152e-06, "loss": 0.3919, "step": 16630 }, { "epoch": 2.2239903717571545, "grad_norm": 1.5449252128601074, "learning_rate": 3.3090515817707803e-06, "loss": 0.4032, "step": 16631 }, { "epoch": 2.224124097352233, "grad_norm": 1.5345039367675781, "learning_rate": 3.307978692184819e-06, "loss": 0.418, "step": 16632 }, { "epoch": 2.2242578229473122, "grad_norm": 1.3569824695587158, "learning_rate": 3.30690594208764e-06, "loss": 0.3802, "step": 16633 }, { "epoch": 2.224391548542391, "grad_norm": 1.5612645149230957, "learning_rate": 3.3058333315016066e-06, "loss": 0.3931, "step": 16634 }, { "epoch": 2.22452527413747, "grad_norm": 1.4902763366699219, "learning_rate": 3.3047608604490655e-06, "loss": 0.3992, "step": 16635 }, { "epoch": 2.224658999732549, "grad_norm": 1.4195106029510498, "learning_rate": 3.3036885289523836e-06, "loss": 0.3721, "step": 16636 }, { "epoch": 2.2247927253276276, "grad_norm": 1.5231140851974487, "learning_rate": 3.3026163370339e-06, "loss": 0.3923, "step": 16637 }, { "epoch": 2.2249264509227067, "grad_norm": 1.5485320091247559, "learning_rate": 3.3015442847159772e-06, "loss": 0.3779, "step": 16638 }, { "epoch": 2.2250601765177853, "grad_norm": 1.6018744707107544, "learning_rate": 3.3004723720209507e-06, "loss": 0.3841, "step": 16639 }, { "epoch": 2.2251939021128644, "grad_norm": 1.3405845165252686, "learning_rate": 3.2994005989711664e-06, "loss": 0.3741, "step": 16640 }, { "epoch": 2.2253276277079435, "grad_norm": 1.5225411653518677, "learning_rate": 3.298328965588966e-06, "loss": 0.4035, "step": 16641 }, { "epoch": 2.225461353303022, "grad_norm": 1.5103999376296997, "learning_rate": 3.2972574718966845e-06, "loss": 0.324, "step": 16642 }, { "epoch": 2.225595078898101, "grad_norm": 1.3894184827804565, "learning_rate": 3.2961861179166568e-06, "loss": 0.356, "step": 16643 }, { "epoch": 2.22572880449318, "grad_norm": 1.7332139015197754, "learning_rate": 3.2951149036712147e-06, "loss": 0.3557, "step": 16644 }, { "epoch": 2.225862530088259, "grad_norm": 1.7833398580551147, "learning_rate": 3.2940438291826883e-06, "loss": 0.4069, "step": 16645 }, { "epoch": 2.225996255683338, "grad_norm": 1.4716541767120361, "learning_rate": 3.2929728944733997e-06, "loss": 0.3409, "step": 16646 }, { "epoch": 2.2261299812784165, "grad_norm": 1.4921190738677979, "learning_rate": 3.2919020995656735e-06, "loss": 0.3839, "step": 16647 }, { "epoch": 2.2262637068734956, "grad_norm": 1.5928740501403809, "learning_rate": 3.290831444481829e-06, "loss": 0.3773, "step": 16648 }, { "epoch": 2.2263974324685747, "grad_norm": 1.9459527730941772, "learning_rate": 3.2897609292441834e-06, "loss": 0.516, "step": 16649 }, { "epoch": 2.2265311580636533, "grad_norm": 1.4410967826843262, "learning_rate": 3.2886905538750523e-06, "loss": 0.3862, "step": 16650 }, { "epoch": 2.2266648836587324, "grad_norm": 1.634263515472412, "learning_rate": 3.287620318396739e-06, "loss": 0.3557, "step": 16651 }, { "epoch": 2.226798609253811, "grad_norm": 1.7036445140838623, "learning_rate": 3.2865502228315615e-06, "loss": 0.397, "step": 16652 }, { "epoch": 2.22693233484889, "grad_norm": 1.5910916328430176, "learning_rate": 3.2854802672018194e-06, "loss": 0.3808, "step": 16653 }, { "epoch": 2.227066060443969, "grad_norm": 1.559190273284912, "learning_rate": 3.284410451529816e-06, "loss": 0.3961, "step": 16654 }, { "epoch": 2.227199786039048, "grad_norm": 1.596232295036316, "learning_rate": 3.2833407758378534e-06, "loss": 0.3922, "step": 16655 }, { "epoch": 2.227333511634127, "grad_norm": 1.4821006059646606, "learning_rate": 3.282271240148219e-06, "loss": 0.3737, "step": 16656 }, { "epoch": 2.2274672372292055, "grad_norm": 1.533178448677063, "learning_rate": 3.2812018444832195e-06, "loss": 0.3576, "step": 16657 }, { "epoch": 2.2276009628242845, "grad_norm": 1.377198576927185, "learning_rate": 3.2801325888651313e-06, "loss": 0.3122, "step": 16658 }, { "epoch": 2.2277346884193636, "grad_norm": 1.4968308210372925, "learning_rate": 3.2790634733162563e-06, "loss": 0.3309, "step": 16659 }, { "epoch": 2.2278684140144422, "grad_norm": 1.7413092851638794, "learning_rate": 3.2779944978588686e-06, "loss": 0.4114, "step": 16660 }, { "epoch": 2.2280021396095213, "grad_norm": 1.614399790763855, "learning_rate": 3.276925662515249e-06, "loss": 0.3782, "step": 16661 }, { "epoch": 2.2281358652046004, "grad_norm": 1.5151318311691284, "learning_rate": 3.275856967307688e-06, "loss": 0.3447, "step": 16662 }, { "epoch": 2.228269590799679, "grad_norm": 1.489237666130066, "learning_rate": 3.2747884122584504e-06, "loss": 0.378, "step": 16663 }, { "epoch": 2.228403316394758, "grad_norm": 1.902297019958496, "learning_rate": 3.2737199973898136e-06, "loss": 0.4598, "step": 16664 }, { "epoch": 2.2285370419898367, "grad_norm": 1.259997844696045, "learning_rate": 3.272651722724047e-06, "loss": 0.3169, "step": 16665 }, { "epoch": 2.228670767584916, "grad_norm": 1.5152636766433716, "learning_rate": 3.271583588283418e-06, "loss": 0.3542, "step": 16666 }, { "epoch": 2.228804493179995, "grad_norm": 1.405554175376892, "learning_rate": 3.27051559409019e-06, "loss": 0.3833, "step": 16667 }, { "epoch": 2.2289382187750735, "grad_norm": 1.508276104927063, "learning_rate": 3.2694477401666257e-06, "loss": 0.396, "step": 16668 }, { "epoch": 2.2290719443701525, "grad_norm": 1.3172376155853271, "learning_rate": 3.268380026534983e-06, "loss": 0.342, "step": 16669 }, { "epoch": 2.229205669965231, "grad_norm": 1.3303909301757812, "learning_rate": 3.267312453217517e-06, "loss": 0.298, "step": 16670 }, { "epoch": 2.2293393955603102, "grad_norm": 1.492348551750183, "learning_rate": 3.2662450202364806e-06, "loss": 0.3517, "step": 16671 }, { "epoch": 2.2294731211553893, "grad_norm": 1.4973037242889404, "learning_rate": 3.265177727614123e-06, "loss": 0.408, "step": 16672 }, { "epoch": 2.229606846750468, "grad_norm": 1.4724109172821045, "learning_rate": 3.26411057537269e-06, "loss": 0.3762, "step": 16673 }, { "epoch": 2.229740572345547, "grad_norm": 1.5360755920410156, "learning_rate": 3.2630435635344283e-06, "loss": 0.4055, "step": 16674 }, { "epoch": 2.2298742979406256, "grad_norm": 1.3943054676055908, "learning_rate": 3.2619766921215755e-06, "loss": 0.3335, "step": 16675 }, { "epoch": 2.2300080235357047, "grad_norm": 1.586645483970642, "learning_rate": 3.2609099611563754e-06, "loss": 0.4007, "step": 16676 }, { "epoch": 2.230141749130784, "grad_norm": 1.4012998342514038, "learning_rate": 3.259843370661051e-06, "loss": 0.3651, "step": 16677 }, { "epoch": 2.2302754747258624, "grad_norm": 1.3928658962249756, "learning_rate": 3.258776920657849e-06, "loss": 0.3451, "step": 16678 }, { "epoch": 2.2304092003209415, "grad_norm": 1.5631343126296997, "learning_rate": 3.2577106111689884e-06, "loss": 0.4097, "step": 16679 }, { "epoch": 2.23054292591602, "grad_norm": 1.497756838798523, "learning_rate": 3.2566444422166955e-06, "loss": 0.3993, "step": 16680 }, { "epoch": 2.230676651511099, "grad_norm": 1.5008302927017212, "learning_rate": 3.2555784138232014e-06, "loss": 0.3837, "step": 16681 }, { "epoch": 2.2308103771061782, "grad_norm": 1.3706705570220947, "learning_rate": 3.254512526010717e-06, "loss": 0.3267, "step": 16682 }, { "epoch": 2.230944102701257, "grad_norm": 1.3972560167312622, "learning_rate": 3.25344677880147e-06, "loss": 0.3412, "step": 16683 }, { "epoch": 2.231077828296336, "grad_norm": 1.5514709949493408, "learning_rate": 3.2523811722176657e-06, "loss": 0.4121, "step": 16684 }, { "epoch": 2.231211553891415, "grad_norm": 1.3561575412750244, "learning_rate": 3.251315706281519e-06, "loss": 0.3564, "step": 16685 }, { "epoch": 2.2313452794864936, "grad_norm": 1.503368854522705, "learning_rate": 3.2502503810152385e-06, "loss": 0.4117, "step": 16686 }, { "epoch": 2.2314790050815727, "grad_norm": 1.604862093925476, "learning_rate": 3.2491851964410304e-06, "loss": 0.406, "step": 16687 }, { "epoch": 2.2316127306766513, "grad_norm": 1.319469928741455, "learning_rate": 3.248120152581097e-06, "loss": 0.3753, "step": 16688 }, { "epoch": 2.2317464562717304, "grad_norm": 1.5132969617843628, "learning_rate": 3.247055249457638e-06, "loss": 0.3655, "step": 16689 }, { "epoch": 2.2318801818668095, "grad_norm": 1.4819958209991455, "learning_rate": 3.2459904870928503e-06, "loss": 0.3456, "step": 16690 }, { "epoch": 2.232013907461888, "grad_norm": 1.3600480556488037, "learning_rate": 3.244925865508929e-06, "loss": 0.3763, "step": 16691 }, { "epoch": 2.232147633056967, "grad_norm": 1.4519842863082886, "learning_rate": 3.243861384728063e-06, "loss": 0.3876, "step": 16692 }, { "epoch": 2.232281358652046, "grad_norm": 1.550400733947754, "learning_rate": 3.2427970447724424e-06, "loss": 0.4092, "step": 16693 }, { "epoch": 2.232415084247125, "grad_norm": 1.380650281906128, "learning_rate": 3.2417328456642507e-06, "loss": 0.3817, "step": 16694 }, { "epoch": 2.232548809842204, "grad_norm": 1.512605905532837, "learning_rate": 3.2406687874256736e-06, "loss": 0.3907, "step": 16695 }, { "epoch": 2.2326825354372826, "grad_norm": 1.4997767210006714, "learning_rate": 3.239604870078883e-06, "loss": 0.3337, "step": 16696 }, { "epoch": 2.2328162610323616, "grad_norm": 1.6403921842575073, "learning_rate": 3.2385410936460616e-06, "loss": 0.3941, "step": 16697 }, { "epoch": 2.2329499866274407, "grad_norm": 1.4092366695404053, "learning_rate": 3.2374774581493816e-06, "loss": 0.3175, "step": 16698 }, { "epoch": 2.2330837122225193, "grad_norm": 1.4954997301101685, "learning_rate": 3.2364139636110127e-06, "loss": 0.3804, "step": 16699 }, { "epoch": 2.2332174378175984, "grad_norm": 1.6160610914230347, "learning_rate": 3.235350610053126e-06, "loss": 0.3702, "step": 16700 }, { "epoch": 2.233351163412677, "grad_norm": 1.3948166370391846, "learning_rate": 3.234287397497877e-06, "loss": 0.3526, "step": 16701 }, { "epoch": 2.233484889007756, "grad_norm": 1.4766731262207031, "learning_rate": 3.233224325967439e-06, "loss": 0.427, "step": 16702 }, { "epoch": 2.233618614602835, "grad_norm": 1.6584209203720093, "learning_rate": 3.2321613954839616e-06, "loss": 0.4122, "step": 16703 }, { "epoch": 2.233752340197914, "grad_norm": 1.5687224864959717, "learning_rate": 3.2310986060696038e-06, "loss": 0.3879, "step": 16704 }, { "epoch": 2.233886065792993, "grad_norm": 1.6024816036224365, "learning_rate": 3.230035957746518e-06, "loss": 0.4252, "step": 16705 }, { "epoch": 2.2340197913880715, "grad_norm": 1.7430874109268188, "learning_rate": 3.228973450536852e-06, "loss": 0.3805, "step": 16706 }, { "epoch": 2.2341535169831506, "grad_norm": 1.388566493988037, "learning_rate": 3.2279110844627616e-06, "loss": 0.3516, "step": 16707 }, { "epoch": 2.2342872425782296, "grad_norm": 1.499253273010254, "learning_rate": 3.2268488595463808e-06, "loss": 0.3372, "step": 16708 }, { "epoch": 2.2344209681733083, "grad_norm": 1.3733304738998413, "learning_rate": 3.225786775809855e-06, "loss": 0.3263, "step": 16709 }, { "epoch": 2.2345546937683873, "grad_norm": 1.5888887643814087, "learning_rate": 3.2247248332753213e-06, "loss": 0.4072, "step": 16710 }, { "epoch": 2.234688419363466, "grad_norm": 1.7010387182235718, "learning_rate": 3.223663031964914e-06, "loss": 0.4005, "step": 16711 }, { "epoch": 2.234822144958545, "grad_norm": 1.3099058866500854, "learning_rate": 3.2226013719007686e-06, "loss": 0.3151, "step": 16712 }, { "epoch": 2.234955870553624, "grad_norm": 1.47629714012146, "learning_rate": 3.2215398531050114e-06, "loss": 0.4032, "step": 16713 }, { "epoch": 2.2350895961487027, "grad_norm": 1.6433742046356201, "learning_rate": 3.22047847559977e-06, "loss": 0.3688, "step": 16714 }, { "epoch": 2.235223321743782, "grad_norm": 1.5814965963363647, "learning_rate": 3.2194172394071666e-06, "loss": 0.3592, "step": 16715 }, { "epoch": 2.2353570473388604, "grad_norm": 1.609731674194336, "learning_rate": 3.2183561445493226e-06, "loss": 0.4285, "step": 16716 }, { "epoch": 2.2354907729339395, "grad_norm": 1.5654329061508179, "learning_rate": 3.2172951910483564e-06, "loss": 0.3805, "step": 16717 }, { "epoch": 2.2356244985290186, "grad_norm": 1.3929680585861206, "learning_rate": 3.2162343789263807e-06, "loss": 0.3393, "step": 16718 }, { "epoch": 2.235758224124097, "grad_norm": 1.5375103950500488, "learning_rate": 3.2151737082055123e-06, "loss": 0.3915, "step": 16719 }, { "epoch": 2.2358919497191763, "grad_norm": 1.6191916465759277, "learning_rate": 3.2141131789078482e-06, "loss": 0.4022, "step": 16720 }, { "epoch": 2.2360256753142553, "grad_norm": 1.6870583295822144, "learning_rate": 3.2130527910555088e-06, "loss": 0.3837, "step": 16721 }, { "epoch": 2.236159400909334, "grad_norm": 1.377785563468933, "learning_rate": 3.2119925446705824e-06, "loss": 0.3931, "step": 16722 }, { "epoch": 2.236293126504413, "grad_norm": 1.4136121273040771, "learning_rate": 3.2109324397751818e-06, "loss": 0.3565, "step": 16723 }, { "epoch": 2.2364268520994917, "grad_norm": 1.3534873723983765, "learning_rate": 3.2098724763913958e-06, "loss": 0.36, "step": 16724 }, { "epoch": 2.2365605776945707, "grad_norm": 1.371701955795288, "learning_rate": 3.2088126545413168e-06, "loss": 0.3272, "step": 16725 }, { "epoch": 2.23669430328965, "grad_norm": 1.512346625328064, "learning_rate": 3.2077529742470472e-06, "loss": 0.378, "step": 16726 }, { "epoch": 2.2368280288847284, "grad_norm": 1.5362614393234253, "learning_rate": 3.2066934355306633e-06, "loss": 0.3613, "step": 16727 }, { "epoch": 2.2369617544798075, "grad_norm": 1.6169233322143555, "learning_rate": 3.2056340384142536e-06, "loss": 0.4157, "step": 16728 }, { "epoch": 2.2370954800748866, "grad_norm": 1.5522016286849976, "learning_rate": 3.2045747829199015e-06, "loss": 0.3906, "step": 16729 }, { "epoch": 2.237229205669965, "grad_norm": 1.4609475135803223, "learning_rate": 3.2035156690696857e-06, "loss": 0.3807, "step": 16730 }, { "epoch": 2.2373629312650443, "grad_norm": 1.6271857023239136, "learning_rate": 3.202456696885683e-06, "loss": 0.3914, "step": 16731 }, { "epoch": 2.237496656860123, "grad_norm": 1.3557190895080566, "learning_rate": 3.2013978663899647e-06, "loss": 0.3979, "step": 16732 }, { "epoch": 2.237630382455202, "grad_norm": 1.53298819065094, "learning_rate": 3.200339177604602e-06, "loss": 0.3979, "step": 16733 }, { "epoch": 2.237764108050281, "grad_norm": 1.458052635192871, "learning_rate": 3.199280630551663e-06, "loss": 0.352, "step": 16734 }, { "epoch": 2.2378978336453597, "grad_norm": 1.5469892024993896, "learning_rate": 3.1982222252532126e-06, "loss": 0.4259, "step": 16735 }, { "epoch": 2.2380315592404387, "grad_norm": 1.3807612657546997, "learning_rate": 3.197163961731311e-06, "loss": 0.3441, "step": 16736 }, { "epoch": 2.2381652848355174, "grad_norm": 1.3742730617523193, "learning_rate": 3.1961058400080157e-06, "loss": 0.3625, "step": 16737 }, { "epoch": 2.2382990104305964, "grad_norm": 1.5196279287338257, "learning_rate": 3.1950478601053847e-06, "loss": 0.3912, "step": 16738 }, { "epoch": 2.2384327360256755, "grad_norm": 1.5679413080215454, "learning_rate": 3.19399002204547e-06, "loss": 0.3952, "step": 16739 }, { "epoch": 2.238566461620754, "grad_norm": 1.6415464878082275, "learning_rate": 3.192932325850323e-06, "loss": 0.4257, "step": 16740 }, { "epoch": 2.238700187215833, "grad_norm": 1.5730862617492676, "learning_rate": 3.1918747715419808e-06, "loss": 0.397, "step": 16741 }, { "epoch": 2.238833912810912, "grad_norm": 1.4044052362442017, "learning_rate": 3.190817359142502e-06, "loss": 0.3587, "step": 16742 }, { "epoch": 2.238967638405991, "grad_norm": 1.6410927772521973, "learning_rate": 3.1897600886739134e-06, "loss": 0.4045, "step": 16743 }, { "epoch": 2.23910136400107, "grad_norm": 1.4425358772277832, "learning_rate": 3.1887029601582607e-06, "loss": 0.3626, "step": 16744 }, { "epoch": 2.2392350895961486, "grad_norm": 1.5258649587631226, "learning_rate": 3.1876459736175815e-06, "loss": 0.3659, "step": 16745 }, { "epoch": 2.2393688151912277, "grad_norm": 1.2879316806793213, "learning_rate": 3.1865891290738972e-06, "loss": 0.3221, "step": 16746 }, { "epoch": 2.2395025407863063, "grad_norm": 1.396704077720642, "learning_rate": 3.1855324265492483e-06, "loss": 0.3736, "step": 16747 }, { "epoch": 2.2396362663813854, "grad_norm": 1.6822084188461304, "learning_rate": 3.1844758660656528e-06, "loss": 0.3955, "step": 16748 }, { "epoch": 2.2397699919764644, "grad_norm": 1.6863566637039185, "learning_rate": 3.1834194476451352e-06, "loss": 0.3774, "step": 16749 }, { "epoch": 2.239903717571543, "grad_norm": 1.536750078201294, "learning_rate": 3.182363171309717e-06, "loss": 0.3797, "step": 16750 }, { "epoch": 2.240037443166622, "grad_norm": 1.579534649848938, "learning_rate": 3.1813070370814112e-06, "loss": 0.4259, "step": 16751 }, { "epoch": 2.240171168761701, "grad_norm": 1.397848129272461, "learning_rate": 3.180251044982242e-06, "loss": 0.338, "step": 16752 }, { "epoch": 2.24030489435678, "grad_norm": 1.5751878023147583, "learning_rate": 3.1791951950342117e-06, "loss": 0.3764, "step": 16753 }, { "epoch": 2.240438619951859, "grad_norm": 1.519509196281433, "learning_rate": 3.1781394872593296e-06, "loss": 0.3767, "step": 16754 }, { "epoch": 2.2405723455469375, "grad_norm": 1.4400750398635864, "learning_rate": 3.1770839216796025e-06, "loss": 0.3477, "step": 16755 }, { "epoch": 2.2407060711420166, "grad_norm": 1.5375254154205322, "learning_rate": 3.176028498317032e-06, "loss": 0.3739, "step": 16756 }, { "epoch": 2.2408397967370957, "grad_norm": 1.5784186124801636, "learning_rate": 3.1749732171936176e-06, "loss": 0.3808, "step": 16757 }, { "epoch": 2.2409735223321743, "grad_norm": 1.6434003114700317, "learning_rate": 3.1739180783313563e-06, "loss": 0.387, "step": 16758 }, { "epoch": 2.2411072479272534, "grad_norm": 1.5441961288452148, "learning_rate": 3.1728630817522397e-06, "loss": 0.3808, "step": 16759 }, { "epoch": 2.241240973522332, "grad_norm": 1.3710834980010986, "learning_rate": 3.1718082274782604e-06, "loss": 0.3453, "step": 16760 }, { "epoch": 2.241374699117411, "grad_norm": 1.4352635145187378, "learning_rate": 3.170753515531407e-06, "loss": 0.3584, "step": 16761 }, { "epoch": 2.24150842471249, "grad_norm": 1.4013431072235107, "learning_rate": 3.169698945933656e-06, "loss": 0.3165, "step": 16762 }, { "epoch": 2.2416421503075687, "grad_norm": 1.3648494482040405, "learning_rate": 3.1686445187069968e-06, "loss": 0.3452, "step": 16763 }, { "epoch": 2.241775875902648, "grad_norm": 1.396718978881836, "learning_rate": 3.16759023387341e-06, "loss": 0.3354, "step": 16764 }, { "epoch": 2.241909601497727, "grad_norm": 1.4493862390518188, "learning_rate": 3.1665360914548603e-06, "loss": 0.38, "step": 16765 }, { "epoch": 2.2420433270928055, "grad_norm": 1.2586445808410645, "learning_rate": 3.165482091473333e-06, "loss": 0.3326, "step": 16766 }, { "epoch": 2.2421770526878846, "grad_norm": 1.4625751972198486, "learning_rate": 3.1644282339507847e-06, "loss": 0.3638, "step": 16767 }, { "epoch": 2.242310778282963, "grad_norm": 1.3744359016418457, "learning_rate": 3.163374518909197e-06, "loss": 0.3301, "step": 16768 }, { "epoch": 2.2424445038780423, "grad_norm": 1.4949232339859009, "learning_rate": 3.1623209463705207e-06, "loss": 0.3748, "step": 16769 }, { "epoch": 2.2425782294731214, "grad_norm": 1.4209668636322021, "learning_rate": 3.1612675163567186e-06, "loss": 0.3752, "step": 16770 }, { "epoch": 2.2427119550682, "grad_norm": 1.4952473640441895, "learning_rate": 3.1602142288897575e-06, "loss": 0.3348, "step": 16771 }, { "epoch": 2.242845680663279, "grad_norm": 1.5211883783340454, "learning_rate": 3.1591610839915822e-06, "loss": 0.3615, "step": 16772 }, { "epoch": 2.2429794062583577, "grad_norm": 1.39066743850708, "learning_rate": 3.1581080816841492e-06, "loss": 0.3432, "step": 16773 }, { "epoch": 2.2431131318534367, "grad_norm": 1.3620537519454956, "learning_rate": 3.1570552219894055e-06, "loss": 0.3018, "step": 16774 }, { "epoch": 2.243246857448516, "grad_norm": 1.355678677558899, "learning_rate": 3.1560025049292973e-06, "loss": 0.3263, "step": 16775 }, { "epoch": 2.2433805830435944, "grad_norm": 1.6843209266662598, "learning_rate": 3.154949930525769e-06, "loss": 0.3909, "step": 16776 }, { "epoch": 2.2435143086386735, "grad_norm": 1.4969115257263184, "learning_rate": 3.1538974988007587e-06, "loss": 0.3595, "step": 16777 }, { "epoch": 2.243648034233752, "grad_norm": 1.4056955575942993, "learning_rate": 3.152845209776204e-06, "loss": 0.356, "step": 16778 }, { "epoch": 2.243781759828831, "grad_norm": 1.7109324932098389, "learning_rate": 3.151793063474039e-06, "loss": 0.4431, "step": 16779 }, { "epoch": 2.2439154854239103, "grad_norm": 1.5168843269348145, "learning_rate": 3.150741059916198e-06, "loss": 0.4179, "step": 16780 }, { "epoch": 2.244049211018989, "grad_norm": 1.3413865566253662, "learning_rate": 3.1496891991245994e-06, "loss": 0.3371, "step": 16781 }, { "epoch": 2.244182936614068, "grad_norm": 1.4487597942352295, "learning_rate": 3.148637481121177e-06, "loss": 0.3486, "step": 16782 }, { "epoch": 2.2443166622091466, "grad_norm": 1.455621361732483, "learning_rate": 3.1475859059278502e-06, "loss": 0.33, "step": 16783 }, { "epoch": 2.2444503878042257, "grad_norm": 1.5780792236328125, "learning_rate": 3.146534473566539e-06, "loss": 0.3956, "step": 16784 }, { "epoch": 2.2445841133993047, "grad_norm": 1.7076690196990967, "learning_rate": 3.1454831840591616e-06, "loss": 0.404, "step": 16785 }, { "epoch": 2.2447178389943834, "grad_norm": 1.6057820320129395, "learning_rate": 3.1444320374276203e-06, "loss": 0.3566, "step": 16786 }, { "epoch": 2.2448515645894624, "grad_norm": 1.6587820053100586, "learning_rate": 3.143381033693842e-06, "loss": 0.4041, "step": 16787 }, { "epoch": 2.2449852901845415, "grad_norm": 1.6360174417495728, "learning_rate": 3.1423301728797197e-06, "loss": 0.3964, "step": 16788 }, { "epoch": 2.24511901577962, "grad_norm": 1.3965563774108887, "learning_rate": 3.14127945500716e-06, "loss": 0.3468, "step": 16789 }, { "epoch": 2.245252741374699, "grad_norm": 1.6767451763153076, "learning_rate": 3.140228880098074e-06, "loss": 0.4598, "step": 16790 }, { "epoch": 2.245386466969778, "grad_norm": 1.6556202173233032, "learning_rate": 3.139178448174347e-06, "loss": 0.4531, "step": 16791 }, { "epoch": 2.245520192564857, "grad_norm": 1.63777756690979, "learning_rate": 3.138128159257885e-06, "loss": 0.3827, "step": 16792 }, { "epoch": 2.245653918159936, "grad_norm": 1.5401463508605957, "learning_rate": 3.1370780133705737e-06, "loss": 0.3309, "step": 16793 }, { "epoch": 2.2457876437550146, "grad_norm": 1.5661907196044922, "learning_rate": 3.136028010534303e-06, "loss": 0.4333, "step": 16794 }, { "epoch": 2.2459213693500937, "grad_norm": 1.525539517402649, "learning_rate": 3.1349781507709607e-06, "loss": 0.324, "step": 16795 }, { "epoch": 2.2460550949451723, "grad_norm": 1.4739433526992798, "learning_rate": 3.13392843410243e-06, "loss": 0.3478, "step": 16796 }, { "epoch": 2.2461888205402514, "grad_norm": 1.343876600265503, "learning_rate": 3.132878860550591e-06, "loss": 0.3575, "step": 16797 }, { "epoch": 2.2463225461353304, "grad_norm": 1.5756762027740479, "learning_rate": 3.131829430137321e-06, "loss": 0.3847, "step": 16798 }, { "epoch": 2.246456271730409, "grad_norm": 1.4930800199508667, "learning_rate": 3.130780142884494e-06, "loss": 0.3734, "step": 16799 }, { "epoch": 2.246589997325488, "grad_norm": 1.5194464921951294, "learning_rate": 3.1297309988139824e-06, "loss": 0.357, "step": 16800 }, { "epoch": 2.246723722920567, "grad_norm": 1.6453365087509155, "learning_rate": 3.1286819979476533e-06, "loss": 0.3771, "step": 16801 }, { "epoch": 2.246857448515646, "grad_norm": 1.3833181858062744, "learning_rate": 3.1276331403073733e-06, "loss": 0.3763, "step": 16802 }, { "epoch": 2.246991174110725, "grad_norm": 1.5450055599212646, "learning_rate": 3.1265844259150035e-06, "loss": 0.3516, "step": 16803 }, { "epoch": 2.2471248997058035, "grad_norm": 1.8055185079574585, "learning_rate": 3.1255358547924084e-06, "loss": 0.4233, "step": 16804 }, { "epoch": 2.2472586253008826, "grad_norm": 1.5006890296936035, "learning_rate": 3.1244874269614335e-06, "loss": 0.3726, "step": 16805 }, { "epoch": 2.2473923508959617, "grad_norm": 1.4413305521011353, "learning_rate": 3.123439142443946e-06, "loss": 0.3382, "step": 16806 }, { "epoch": 2.2475260764910403, "grad_norm": 1.3707932233810425, "learning_rate": 3.122391001261782e-06, "loss": 0.3805, "step": 16807 }, { "epoch": 2.2476598020861194, "grad_norm": 1.3762987852096558, "learning_rate": 3.1213430034367995e-06, "loss": 0.3413, "step": 16808 }, { "epoch": 2.247793527681198, "grad_norm": 1.5467716455459595, "learning_rate": 3.120295148990845e-06, "loss": 0.4057, "step": 16809 }, { "epoch": 2.247927253276277, "grad_norm": 1.6736401319503784, "learning_rate": 3.119247437945747e-06, "loss": 0.3914, "step": 16810 }, { "epoch": 2.248060978871356, "grad_norm": 1.400375485420227, "learning_rate": 3.1181998703233584e-06, "loss": 0.35, "step": 16811 }, { "epoch": 2.2481947044664348, "grad_norm": 1.4999583959579468, "learning_rate": 3.117152446145506e-06, "loss": 0.3876, "step": 16812 }, { "epoch": 2.248328430061514, "grad_norm": 1.5084211826324463, "learning_rate": 3.1161051654340236e-06, "loss": 0.3768, "step": 16813 }, { "epoch": 2.2484621556565925, "grad_norm": 1.5979585647583008, "learning_rate": 3.1150580282107425e-06, "loss": 0.3922, "step": 16814 }, { "epoch": 2.2485958812516715, "grad_norm": 1.420744776725769, "learning_rate": 3.114011034497485e-06, "loss": 0.3581, "step": 16815 }, { "epoch": 2.2487296068467506, "grad_norm": 1.466162919998169, "learning_rate": 3.1129641843160854e-06, "loss": 0.3758, "step": 16816 }, { "epoch": 2.2488633324418292, "grad_norm": 1.5573830604553223, "learning_rate": 3.111917477688353e-06, "loss": 0.376, "step": 16817 }, { "epoch": 2.2489970580369083, "grad_norm": 1.4667078256607056, "learning_rate": 3.1108709146361106e-06, "loss": 0.3821, "step": 16818 }, { "epoch": 2.249130783631987, "grad_norm": 1.5942108631134033, "learning_rate": 3.1098244951811718e-06, "loss": 0.3951, "step": 16819 }, { "epoch": 2.249264509227066, "grad_norm": 1.6193267107009888, "learning_rate": 3.1087782193453477e-06, "loss": 0.3846, "step": 16820 }, { "epoch": 2.249398234822145, "grad_norm": 1.5907152891159058, "learning_rate": 3.107732087150447e-06, "loss": 0.408, "step": 16821 }, { "epoch": 2.2495319604172237, "grad_norm": 1.5456286668777466, "learning_rate": 3.106686098618277e-06, "loss": 0.4067, "step": 16822 }, { "epoch": 2.2496656860123028, "grad_norm": 1.4454903602600098, "learning_rate": 3.1056402537706375e-06, "loss": 0.3455, "step": 16823 }, { "epoch": 2.249799411607382, "grad_norm": 1.3326643705368042, "learning_rate": 3.1045945526293307e-06, "loss": 0.3482, "step": 16824 }, { "epoch": 2.2499331372024605, "grad_norm": 1.5931422710418701, "learning_rate": 3.1035489952161556e-06, "loss": 0.3728, "step": 16825 }, { "epoch": 2.2500668627975395, "grad_norm": 1.6935925483703613, "learning_rate": 3.102503581552896e-06, "loss": 0.3908, "step": 16826 }, { "epoch": 2.250200588392618, "grad_norm": 1.5537736415863037, "learning_rate": 3.101458311661352e-06, "loss": 0.3751, "step": 16827 }, { "epoch": 2.2503343139876972, "grad_norm": 1.660982370376587, "learning_rate": 3.100413185563309e-06, "loss": 0.3937, "step": 16828 }, { "epoch": 2.2504680395827763, "grad_norm": 1.6677594184875488, "learning_rate": 3.0993682032805507e-06, "loss": 0.3872, "step": 16829 }, { "epoch": 2.250601765177855, "grad_norm": 1.5561074018478394, "learning_rate": 3.0983233648348608e-06, "loss": 0.3426, "step": 16830 }, { "epoch": 2.250735490772934, "grad_norm": 1.4261614084243774, "learning_rate": 3.0972786702480116e-06, "loss": 0.3578, "step": 16831 }, { "epoch": 2.250869216368013, "grad_norm": 1.276711106300354, "learning_rate": 3.096234119541789e-06, "loss": 0.3258, "step": 16832 }, { "epoch": 2.2510029419630917, "grad_norm": 1.5506370067596436, "learning_rate": 3.095189712737957e-06, "loss": 0.4213, "step": 16833 }, { "epoch": 2.2511366675581708, "grad_norm": 1.4372632503509521, "learning_rate": 3.0941454498582847e-06, "loss": 0.3456, "step": 16834 }, { "epoch": 2.2512703931532494, "grad_norm": 1.4412713050842285, "learning_rate": 3.0931013309245484e-06, "loss": 0.3939, "step": 16835 }, { "epoch": 2.2514041187483285, "grad_norm": 1.3859285116195679, "learning_rate": 3.0920573559585e-06, "loss": 0.3687, "step": 16836 }, { "epoch": 2.2515378443434075, "grad_norm": 1.4443210363388062, "learning_rate": 3.0910135249819116e-06, "loss": 0.3856, "step": 16837 }, { "epoch": 2.251671569938486, "grad_norm": 1.4229679107666016, "learning_rate": 3.089969838016532e-06, "loss": 0.3545, "step": 16838 }, { "epoch": 2.2518052955335652, "grad_norm": 1.564966082572937, "learning_rate": 3.0889262950841205e-06, "loss": 0.3865, "step": 16839 }, { "epoch": 2.251939021128644, "grad_norm": 1.4747819900512695, "learning_rate": 3.0878828962064256e-06, "loss": 0.368, "step": 16840 }, { "epoch": 2.252072746723723, "grad_norm": 1.4413813352584839, "learning_rate": 3.086839641405197e-06, "loss": 0.374, "step": 16841 }, { "epoch": 2.252206472318802, "grad_norm": 1.4476912021636963, "learning_rate": 3.085796530702182e-06, "loss": 0.3595, "step": 16842 }, { "epoch": 2.2523401979138806, "grad_norm": 1.414391040802002, "learning_rate": 3.084753564119122e-06, "loss": 0.3709, "step": 16843 }, { "epoch": 2.2524739235089597, "grad_norm": 1.5013394355773926, "learning_rate": 3.083710741677757e-06, "loss": 0.3413, "step": 16844 }, { "epoch": 2.2526076491040383, "grad_norm": 1.5913958549499512, "learning_rate": 3.082668063399823e-06, "loss": 0.366, "step": 16845 }, { "epoch": 2.2527413746991174, "grad_norm": 1.4901087284088135, "learning_rate": 3.081625529307054e-06, "loss": 0.3804, "step": 16846 }, { "epoch": 2.2528751002941965, "grad_norm": 1.5260628461837769, "learning_rate": 3.0805831394211805e-06, "loss": 0.377, "step": 16847 }, { "epoch": 2.253008825889275, "grad_norm": 1.7693425416946411, "learning_rate": 3.0795408937639313e-06, "loss": 0.4362, "step": 16848 }, { "epoch": 2.253142551484354, "grad_norm": 1.5618743896484375, "learning_rate": 3.078498792357032e-06, "loss": 0.382, "step": 16849 }, { "epoch": 2.253276277079433, "grad_norm": 1.6441594362258911, "learning_rate": 3.0774568352221966e-06, "loss": 0.3931, "step": 16850 }, { "epoch": 2.253410002674512, "grad_norm": 1.617983341217041, "learning_rate": 3.076415022381155e-06, "loss": 0.4092, "step": 16851 }, { "epoch": 2.253543728269591, "grad_norm": 1.704642653465271, "learning_rate": 3.0753733538556117e-06, "loss": 0.4183, "step": 16852 }, { "epoch": 2.2536774538646696, "grad_norm": 1.4673752784729004, "learning_rate": 3.0743318296672876e-06, "loss": 0.3547, "step": 16853 }, { "epoch": 2.2538111794597486, "grad_norm": 1.397048830986023, "learning_rate": 3.0732904498378925e-06, "loss": 0.3578, "step": 16854 }, { "epoch": 2.2539449050548273, "grad_norm": 1.3881217241287231, "learning_rate": 3.0722492143891223e-06, "loss": 0.331, "step": 16855 }, { "epoch": 2.2540786306499063, "grad_norm": 1.498417615890503, "learning_rate": 3.071208123342696e-06, "loss": 0.3995, "step": 16856 }, { "epoch": 2.2542123562449854, "grad_norm": 1.47802734375, "learning_rate": 3.070167176720302e-06, "loss": 0.3771, "step": 16857 }, { "epoch": 2.254346081840064, "grad_norm": 1.3491392135620117, "learning_rate": 3.069126374543643e-06, "loss": 0.3529, "step": 16858 }, { "epoch": 2.254479807435143, "grad_norm": 1.5695017576217651, "learning_rate": 3.0680857168344123e-06, "loss": 0.3975, "step": 16859 }, { "epoch": 2.254613533030222, "grad_norm": 1.3904234170913696, "learning_rate": 3.0670452036142986e-06, "loss": 0.3522, "step": 16860 }, { "epoch": 2.254747258625301, "grad_norm": 1.569503664970398, "learning_rate": 3.066004834905e-06, "loss": 0.3703, "step": 16861 }, { "epoch": 2.25488098422038, "grad_norm": 1.488072156906128, "learning_rate": 3.0649646107281917e-06, "loss": 0.3657, "step": 16862 }, { "epoch": 2.2550147098154585, "grad_norm": 1.5880361795425415, "learning_rate": 3.06392453110556e-06, "loss": 0.3761, "step": 16863 }, { "epoch": 2.2551484354105376, "grad_norm": 1.4313229322433472, "learning_rate": 3.062884596058784e-06, "loss": 0.3255, "step": 16864 }, { "epoch": 2.2552821610056166, "grad_norm": 1.5292876958847046, "learning_rate": 3.0618448056095417e-06, "loss": 0.3902, "step": 16865 }, { "epoch": 2.2554158866006953, "grad_norm": 1.6403310298919678, "learning_rate": 3.0608051597795043e-06, "loss": 0.3997, "step": 16866 }, { "epoch": 2.2555496121957743, "grad_norm": 1.4868853092193604, "learning_rate": 3.0597656585903435e-06, "loss": 0.3567, "step": 16867 }, { "epoch": 2.2556833377908534, "grad_norm": 1.5466123819351196, "learning_rate": 3.058726302063727e-06, "loss": 0.3745, "step": 16868 }, { "epoch": 2.255817063385932, "grad_norm": 1.6676883697509766, "learning_rate": 3.0576870902213186e-06, "loss": 0.4401, "step": 16869 }, { "epoch": 2.255950788981011, "grad_norm": 1.7351393699645996, "learning_rate": 3.056648023084783e-06, "loss": 0.4133, "step": 16870 }, { "epoch": 2.2560845145760897, "grad_norm": 1.3027445077896118, "learning_rate": 3.0556091006757684e-06, "loss": 0.3135, "step": 16871 }, { "epoch": 2.256218240171169, "grad_norm": 1.5714423656463623, "learning_rate": 3.0545703230159394e-06, "loss": 0.3948, "step": 16872 }, { "epoch": 2.256351965766248, "grad_norm": 1.6864912509918213, "learning_rate": 3.053531690126951e-06, "loss": 0.3792, "step": 16873 }, { "epoch": 2.2564856913613265, "grad_norm": 1.6888189315795898, "learning_rate": 3.05249320203044e-06, "loss": 0.3768, "step": 16874 }, { "epoch": 2.2566194169564056, "grad_norm": 1.7319388389587402, "learning_rate": 3.0514548587480663e-06, "loss": 0.408, "step": 16875 }, { "epoch": 2.256753142551484, "grad_norm": 1.47159743309021, "learning_rate": 3.050416660301462e-06, "loss": 0.3692, "step": 16876 }, { "epoch": 2.2568868681465633, "grad_norm": 1.4730385541915894, "learning_rate": 3.0493786067122764e-06, "loss": 0.3749, "step": 16877 }, { "epoch": 2.2570205937416423, "grad_norm": 1.6782777309417725, "learning_rate": 3.0483406980021414e-06, "loss": 0.3657, "step": 16878 }, { "epoch": 2.257154319336721, "grad_norm": 1.7455781698226929, "learning_rate": 3.0473029341926897e-06, "loss": 0.3915, "step": 16879 }, { "epoch": 2.2572880449318, "grad_norm": 1.4600164890289307, "learning_rate": 3.0462653153055612e-06, "loss": 0.3631, "step": 16880 }, { "epoch": 2.2574217705268786, "grad_norm": 1.6327508687973022, "learning_rate": 3.0452278413623736e-06, "loss": 0.4067, "step": 16881 }, { "epoch": 2.2575554961219577, "grad_norm": 1.8765074014663696, "learning_rate": 3.0441905123847583e-06, "loss": 0.4633, "step": 16882 }, { "epoch": 2.257689221717037, "grad_norm": 1.325674057006836, "learning_rate": 3.043153328394335e-06, "loss": 0.356, "step": 16883 }, { "epoch": 2.2578229473121154, "grad_norm": 1.666165828704834, "learning_rate": 3.042116289412724e-06, "loss": 0.4178, "step": 16884 }, { "epoch": 2.2579566729071945, "grad_norm": 1.5565783977508545, "learning_rate": 3.0410793954615414e-06, "loss": 0.356, "step": 16885 }, { "epoch": 2.258090398502273, "grad_norm": 1.5822184085845947, "learning_rate": 3.040042646562399e-06, "loss": 0.3531, "step": 16886 }, { "epoch": 2.258224124097352, "grad_norm": 1.6373484134674072, "learning_rate": 3.0390060427369074e-06, "loss": 0.3938, "step": 16887 }, { "epoch": 2.2583578496924313, "grad_norm": 1.5109983682632446, "learning_rate": 3.037969584006675e-06, "loss": 0.3627, "step": 16888 }, { "epoch": 2.25849157528751, "grad_norm": 1.4306840896606445, "learning_rate": 3.0369332703933073e-06, "loss": 0.3952, "step": 16889 }, { "epoch": 2.258625300882589, "grad_norm": 1.3334693908691406, "learning_rate": 3.035897101918396e-06, "loss": 0.342, "step": 16890 }, { "epoch": 2.258759026477668, "grad_norm": 1.5721312761306763, "learning_rate": 3.034861078603549e-06, "loss": 0.366, "step": 16891 }, { "epoch": 2.2588927520727466, "grad_norm": 1.294703722000122, "learning_rate": 3.0338252004703583e-06, "loss": 0.3022, "step": 16892 }, { "epoch": 2.2590264776678257, "grad_norm": 1.5428558588027954, "learning_rate": 3.0327894675404155e-06, "loss": 0.39, "step": 16893 }, { "epoch": 2.2591602032629043, "grad_norm": 1.7050325870513916, "learning_rate": 3.0317538798353117e-06, "loss": 0.3884, "step": 16894 }, { "epoch": 2.2592939288579834, "grad_norm": 1.4307669401168823, "learning_rate": 3.030718437376625e-06, "loss": 0.3884, "step": 16895 }, { "epoch": 2.2594276544530625, "grad_norm": 1.4387240409851074, "learning_rate": 3.0296831401859494e-06, "loss": 0.3512, "step": 16896 }, { "epoch": 2.259561380048141, "grad_norm": 1.3711886405944824, "learning_rate": 3.028647988284855e-06, "loss": 0.3344, "step": 16897 }, { "epoch": 2.25969510564322, "grad_norm": 1.5028516054153442, "learning_rate": 3.0276129816949207e-06, "loss": 0.363, "step": 16898 }, { "epoch": 2.2598288312382993, "grad_norm": 1.4393095970153809, "learning_rate": 3.0265781204377278e-06, "loss": 0.379, "step": 16899 }, { "epoch": 2.259962556833378, "grad_norm": 1.4832910299301147, "learning_rate": 3.0255434045348344e-06, "loss": 0.3781, "step": 16900 }, { "epoch": 2.260096282428457, "grad_norm": 1.5236473083496094, "learning_rate": 3.024508834007821e-06, "loss": 0.4021, "step": 16901 }, { "epoch": 2.2602300080235356, "grad_norm": 1.5917166471481323, "learning_rate": 3.0234744088782443e-06, "loss": 0.3763, "step": 16902 }, { "epoch": 2.2603637336186146, "grad_norm": 1.438387393951416, "learning_rate": 3.022440129167666e-06, "loss": 0.3655, "step": 16903 }, { "epoch": 2.2604974592136937, "grad_norm": 1.4679757356643677, "learning_rate": 3.021405994897647e-06, "loss": 0.3502, "step": 16904 }, { "epoch": 2.2606311848087723, "grad_norm": 1.3614897727966309, "learning_rate": 3.0203720060897434e-06, "loss": 0.3595, "step": 16905 }, { "epoch": 2.2607649104038514, "grad_norm": 1.4986467361450195, "learning_rate": 3.019338162765505e-06, "loss": 0.3754, "step": 16906 }, { "epoch": 2.26089863599893, "grad_norm": 1.7912352085113525, "learning_rate": 3.018304464946483e-06, "loss": 0.4413, "step": 16907 }, { "epoch": 2.261032361594009, "grad_norm": 1.600490927696228, "learning_rate": 3.0172709126542244e-06, "loss": 0.4243, "step": 16908 }, { "epoch": 2.261166087189088, "grad_norm": 1.5026960372924805, "learning_rate": 3.016237505910272e-06, "loss": 0.4, "step": 16909 }, { "epoch": 2.261299812784167, "grad_norm": 1.5268440246582031, "learning_rate": 3.015204244736166e-06, "loss": 0.3742, "step": 16910 }, { "epoch": 2.261433538379246, "grad_norm": 1.6370006799697876, "learning_rate": 3.0141711291534435e-06, "loss": 0.3794, "step": 16911 }, { "epoch": 2.2615672639743245, "grad_norm": 1.5226500034332275, "learning_rate": 3.0131381591836385e-06, "loss": 0.3957, "step": 16912 }, { "epoch": 2.2617009895694036, "grad_norm": 1.3329479694366455, "learning_rate": 3.0121053348482844e-06, "loss": 0.3461, "step": 16913 }, { "epoch": 2.2618347151644826, "grad_norm": 1.4048888683319092, "learning_rate": 3.011072656168906e-06, "loss": 0.3324, "step": 16914 }, { "epoch": 2.2619684407595613, "grad_norm": 1.4553167819976807, "learning_rate": 3.0100401231670353e-06, "loss": 0.3552, "step": 16915 }, { "epoch": 2.2621021663546403, "grad_norm": 1.495451807975769, "learning_rate": 3.009007735864182e-06, "loss": 0.3481, "step": 16916 }, { "epoch": 2.262235891949719, "grad_norm": 1.4222584962844849, "learning_rate": 3.007975494281876e-06, "loss": 0.3523, "step": 16917 }, { "epoch": 2.262369617544798, "grad_norm": 1.4308780431747437, "learning_rate": 3.006943398441634e-06, "loss": 0.384, "step": 16918 }, { "epoch": 2.262503343139877, "grad_norm": 1.3845402002334595, "learning_rate": 3.005911448364959e-06, "loss": 0.3769, "step": 16919 }, { "epoch": 2.2626370687349557, "grad_norm": 1.6162265539169312, "learning_rate": 3.004879644073373e-06, "loss": 0.411, "step": 16920 }, { "epoch": 2.262770794330035, "grad_norm": 1.5653401613235474, "learning_rate": 3.0038479855883705e-06, "loss": 0.3771, "step": 16921 }, { "epoch": 2.2629045199251134, "grad_norm": 1.6274349689483643, "learning_rate": 3.00281647293147e-06, "loss": 0.3873, "step": 16922 }, { "epoch": 2.2630382455201925, "grad_norm": 1.4817800521850586, "learning_rate": 3.00178510612416e-06, "loss": 0.4002, "step": 16923 }, { "epoch": 2.2631719711152716, "grad_norm": 1.4545910358428955, "learning_rate": 3.0007538851879435e-06, "loss": 0.3798, "step": 16924 }, { "epoch": 2.26330569671035, "grad_norm": 1.478630542755127, "learning_rate": 2.9997228101443143e-06, "loss": 0.3851, "step": 16925 }, { "epoch": 2.2634394223054293, "grad_norm": 1.4287770986557007, "learning_rate": 2.998691881014765e-06, "loss": 0.3808, "step": 16926 }, { "epoch": 2.2635731479005083, "grad_norm": 1.604519248008728, "learning_rate": 2.997661097820784e-06, "loss": 0.3803, "step": 16927 }, { "epoch": 2.263706873495587, "grad_norm": 1.423415184020996, "learning_rate": 2.996630460583857e-06, "loss": 0.3694, "step": 16928 }, { "epoch": 2.263840599090666, "grad_norm": 1.5680296421051025, "learning_rate": 2.9955999693254656e-06, "loss": 0.4067, "step": 16929 }, { "epoch": 2.2639743246857447, "grad_norm": 1.3366303443908691, "learning_rate": 2.9945696240670905e-06, "loss": 0.3478, "step": 16930 }, { "epoch": 2.2641080502808237, "grad_norm": 1.4197465181350708, "learning_rate": 2.9935394248302097e-06, "loss": 0.3239, "step": 16931 }, { "epoch": 2.264241775875903, "grad_norm": 1.289496660232544, "learning_rate": 2.992509371636294e-06, "loss": 0.319, "step": 16932 }, { "epoch": 2.2643755014709814, "grad_norm": 1.4601109027862549, "learning_rate": 2.9914794645068147e-06, "loss": 0.3488, "step": 16933 }, { "epoch": 2.2645092270660605, "grad_norm": 1.7249987125396729, "learning_rate": 2.990449703463243e-06, "loss": 0.4339, "step": 16934 }, { "epoch": 2.2646429526611396, "grad_norm": 1.475365400314331, "learning_rate": 2.9894200885270342e-06, "loss": 0.4152, "step": 16935 }, { "epoch": 2.264776678256218, "grad_norm": 1.727491021156311, "learning_rate": 2.988390619719658e-06, "loss": 0.3644, "step": 16936 }, { "epoch": 2.2649104038512973, "grad_norm": 1.5721575021743774, "learning_rate": 2.9873612970625687e-06, "loss": 0.3673, "step": 16937 }, { "epoch": 2.265044129446376, "grad_norm": 1.7165000438690186, "learning_rate": 2.9863321205772243e-06, "loss": 0.4023, "step": 16938 }, { "epoch": 2.265177855041455, "grad_norm": 1.4183181524276733, "learning_rate": 2.985303090285078e-06, "loss": 0.3731, "step": 16939 }, { "epoch": 2.265311580636534, "grad_norm": 1.267835021018982, "learning_rate": 2.9842742062075703e-06, "loss": 0.3599, "step": 16940 }, { "epoch": 2.2654453062316127, "grad_norm": 1.4425122737884521, "learning_rate": 2.9832454683661595e-06, "loss": 0.3084, "step": 16941 }, { "epoch": 2.2655790318266917, "grad_norm": 1.434779167175293, "learning_rate": 2.98221687678228e-06, "loss": 0.3928, "step": 16942 }, { "epoch": 2.2657127574217704, "grad_norm": 1.5460067987442017, "learning_rate": 2.981188431477371e-06, "loss": 0.3749, "step": 16943 }, { "epoch": 2.2658464830168494, "grad_norm": 1.5429136753082275, "learning_rate": 2.980160132472879e-06, "loss": 0.3626, "step": 16944 }, { "epoch": 2.2659802086119285, "grad_norm": 1.5830225944519043, "learning_rate": 2.979131979790225e-06, "loss": 0.3821, "step": 16945 }, { "epoch": 2.266113934207007, "grad_norm": 1.5474716424942017, "learning_rate": 2.9781039734508543e-06, "loss": 0.3506, "step": 16946 }, { "epoch": 2.266247659802086, "grad_norm": 1.5434056520462036, "learning_rate": 2.9770761134761828e-06, "loss": 0.3898, "step": 16947 }, { "epoch": 2.266381385397165, "grad_norm": 1.436893343925476, "learning_rate": 2.97604839988764e-06, "loss": 0.3967, "step": 16948 }, { "epoch": 2.266515110992244, "grad_norm": 1.6894923448562622, "learning_rate": 2.9750208327066466e-06, "loss": 0.368, "step": 16949 }, { "epoch": 2.266648836587323, "grad_norm": 1.4925389289855957, "learning_rate": 2.973993411954622e-06, "loss": 0.3575, "step": 16950 }, { "epoch": 2.2667825621824016, "grad_norm": 1.543095588684082, "learning_rate": 2.972966137652983e-06, "loss": 0.37, "step": 16951 }, { "epoch": 2.2669162877774807, "grad_norm": 1.6135847568511963, "learning_rate": 2.9719390098231384e-06, "loss": 0.3478, "step": 16952 }, { "epoch": 2.2670500133725593, "grad_norm": 1.347380518913269, "learning_rate": 2.9709120284865012e-06, "loss": 0.3335, "step": 16953 }, { "epoch": 2.2671837389676384, "grad_norm": 1.5861824750900269, "learning_rate": 2.9698851936644767e-06, "loss": 0.4158, "step": 16954 }, { "epoch": 2.2673174645627174, "grad_norm": 1.5730758905410767, "learning_rate": 2.968858505378468e-06, "loss": 0.4128, "step": 16955 }, { "epoch": 2.267451190157796, "grad_norm": 1.415982961654663, "learning_rate": 2.9678319636498752e-06, "loss": 0.3957, "step": 16956 }, { "epoch": 2.267584915752875, "grad_norm": 1.449568748474121, "learning_rate": 2.9668055685000976e-06, "loss": 0.3615, "step": 16957 }, { "epoch": 2.2677186413479538, "grad_norm": 1.7115237712860107, "learning_rate": 2.965779319950529e-06, "loss": 0.391, "step": 16958 }, { "epoch": 2.267852366943033, "grad_norm": 1.32489812374115, "learning_rate": 2.9647532180225547e-06, "loss": 0.3554, "step": 16959 }, { "epoch": 2.267986092538112, "grad_norm": 1.6141796112060547, "learning_rate": 2.9637272627375735e-06, "loss": 0.3521, "step": 16960 }, { "epoch": 2.2681198181331905, "grad_norm": 1.497443437576294, "learning_rate": 2.9627014541169575e-06, "loss": 0.3798, "step": 16961 }, { "epoch": 2.2682535437282696, "grad_norm": 1.6728148460388184, "learning_rate": 2.9616757921821005e-06, "loss": 0.3709, "step": 16962 }, { "epoch": 2.2683872693233487, "grad_norm": 1.6707217693328857, "learning_rate": 2.9606502769543778e-06, "loss": 0.3738, "step": 16963 }, { "epoch": 2.2685209949184273, "grad_norm": 1.5550148487091064, "learning_rate": 2.959624908455159e-06, "loss": 0.3601, "step": 16964 }, { "epoch": 2.2686547205135064, "grad_norm": 1.6925963163375854, "learning_rate": 2.9585996867058286e-06, "loss": 0.4088, "step": 16965 }, { "epoch": 2.2687884461085854, "grad_norm": 1.395007610321045, "learning_rate": 2.957574611727746e-06, "loss": 0.3967, "step": 16966 }, { "epoch": 2.268922171703664, "grad_norm": 1.3553985357284546, "learning_rate": 2.9565496835422822e-06, "loss": 0.3809, "step": 16967 }, { "epoch": 2.269055897298743, "grad_norm": 1.5196080207824707, "learning_rate": 2.9555249021707998e-06, "loss": 0.3671, "step": 16968 }, { "epoch": 2.2691896228938218, "grad_norm": 1.2926737070083618, "learning_rate": 2.954500267634661e-06, "loss": 0.3067, "step": 16969 }, { "epoch": 2.269323348488901, "grad_norm": 1.5860449075698853, "learning_rate": 2.9534757799552216e-06, "loss": 0.4214, "step": 16970 }, { "epoch": 2.26945707408398, "grad_norm": 1.631088137626648, "learning_rate": 2.952451439153837e-06, "loss": 0.4049, "step": 16971 }, { "epoch": 2.2695907996790585, "grad_norm": 1.5856449604034424, "learning_rate": 2.951427245251858e-06, "loss": 0.3861, "step": 16972 }, { "epoch": 2.2697245252741376, "grad_norm": 1.5612843036651611, "learning_rate": 2.950403198270634e-06, "loss": 0.3718, "step": 16973 }, { "epoch": 2.269858250869216, "grad_norm": 1.4442483186721802, "learning_rate": 2.9493792982315082e-06, "loss": 0.3694, "step": 16974 }, { "epoch": 2.2699919764642953, "grad_norm": 1.5850260257720947, "learning_rate": 2.9483555451558253e-06, "loss": 0.3848, "step": 16975 }, { "epoch": 2.2701257020593744, "grad_norm": 1.5037930011749268, "learning_rate": 2.9473319390649234e-06, "loss": 0.3639, "step": 16976 }, { "epoch": 2.270259427654453, "grad_norm": 1.4450018405914307, "learning_rate": 2.946308479980139e-06, "loss": 0.3593, "step": 16977 }, { "epoch": 2.270393153249532, "grad_norm": 1.5997565984725952, "learning_rate": 2.9452851679228044e-06, "loss": 0.3855, "step": 16978 }, { "epoch": 2.2705268788446107, "grad_norm": 1.4115175008773804, "learning_rate": 2.944262002914252e-06, "loss": 0.3214, "step": 16979 }, { "epoch": 2.2706606044396898, "grad_norm": 1.6623889207839966, "learning_rate": 2.9432389849758014e-06, "loss": 0.4379, "step": 16980 }, { "epoch": 2.270794330034769, "grad_norm": 1.5614867210388184, "learning_rate": 2.9422161141287843e-06, "loss": 0.3762, "step": 16981 }, { "epoch": 2.2709280556298475, "grad_norm": 1.3938965797424316, "learning_rate": 2.9411933903945224e-06, "loss": 0.3446, "step": 16982 }, { "epoch": 2.2710617812249265, "grad_norm": 1.5526942014694214, "learning_rate": 2.940170813794322e-06, "loss": 0.3859, "step": 16983 }, { "epoch": 2.271195506820005, "grad_norm": 1.4636236429214478, "learning_rate": 2.9391483843495126e-06, "loss": 0.3578, "step": 16984 }, { "epoch": 2.271329232415084, "grad_norm": 1.679840326309204, "learning_rate": 2.938126102081392e-06, "loss": 0.4148, "step": 16985 }, { "epoch": 2.2714629580101633, "grad_norm": 1.5810012817382812, "learning_rate": 2.9371039670112832e-06, "loss": 0.3611, "step": 16986 }, { "epoch": 2.271596683605242, "grad_norm": 1.582472801208496, "learning_rate": 2.936081979160479e-06, "loss": 0.3749, "step": 16987 }, { "epoch": 2.271730409200321, "grad_norm": 1.51668381690979, "learning_rate": 2.9350601385502865e-06, "loss": 0.393, "step": 16988 }, { "epoch": 2.2718641347953996, "grad_norm": 1.632483720779419, "learning_rate": 2.9340384452020053e-06, "loss": 0.3778, "step": 16989 }, { "epoch": 2.2719978603904787, "grad_norm": 1.3301669359207153, "learning_rate": 2.9330168991369323e-06, "loss": 0.3367, "step": 16990 }, { "epoch": 2.2721315859855578, "grad_norm": 1.4877718687057495, "learning_rate": 2.931995500376359e-06, "loss": 0.3705, "step": 16991 }, { "epoch": 2.2722653115806364, "grad_norm": 1.4969431161880493, "learning_rate": 2.9309742489415747e-06, "loss": 0.3873, "step": 16992 }, { "epoch": 2.2723990371757155, "grad_norm": 1.4712682962417603, "learning_rate": 2.92995314485387e-06, "loss": 0.3251, "step": 16993 }, { "epoch": 2.2725327627707945, "grad_norm": 1.4961767196655273, "learning_rate": 2.9289321881345257e-06, "loss": 0.3676, "step": 16994 }, { "epoch": 2.272666488365873, "grad_norm": 1.567962884902954, "learning_rate": 2.927911378804824e-06, "loss": 0.3616, "step": 16995 }, { "epoch": 2.272800213960952, "grad_norm": 1.7414886951446533, "learning_rate": 2.926890716886042e-06, "loss": 0.4515, "step": 16996 }, { "epoch": 2.272933939556031, "grad_norm": 1.419416069984436, "learning_rate": 2.9258702023994547e-06, "loss": 0.3576, "step": 16997 }, { "epoch": 2.27306766515111, "grad_norm": 1.4319350719451904, "learning_rate": 2.9248498353663337e-06, "loss": 0.3747, "step": 16998 }, { "epoch": 2.273201390746189, "grad_norm": 1.385412335395813, "learning_rate": 2.923829615807948e-06, "loss": 0.3758, "step": 16999 }, { "epoch": 2.2733351163412676, "grad_norm": 1.4646358489990234, "learning_rate": 2.922809543745563e-06, "loss": 0.3535, "step": 17000 }, { "epoch": 2.2734688419363467, "grad_norm": 1.560086965560913, "learning_rate": 2.9217896192004413e-06, "loss": 0.3414, "step": 17001 }, { "epoch": 2.2736025675314258, "grad_norm": 1.6190581321716309, "learning_rate": 2.9207698421938415e-06, "loss": 0.4007, "step": 17002 }, { "epoch": 2.2737362931265044, "grad_norm": 1.555875301361084, "learning_rate": 2.9197502127470223e-06, "loss": 0.3734, "step": 17003 }, { "epoch": 2.2738700187215835, "grad_norm": 1.4572465419769287, "learning_rate": 2.9187307308812298e-06, "loss": 0.3574, "step": 17004 }, { "epoch": 2.274003744316662, "grad_norm": 1.464336633682251, "learning_rate": 2.917711396617725e-06, "loss": 0.3834, "step": 17005 }, { "epoch": 2.274137469911741, "grad_norm": 1.6059752702713013, "learning_rate": 2.916692209977743e-06, "loss": 0.3705, "step": 17006 }, { "epoch": 2.27427119550682, "grad_norm": 1.513388752937317, "learning_rate": 2.91567317098254e-06, "loss": 0.3777, "step": 17007 }, { "epoch": 2.274404921101899, "grad_norm": 1.5431640148162842, "learning_rate": 2.9146542796533484e-06, "loss": 0.3404, "step": 17008 }, { "epoch": 2.274538646696978, "grad_norm": 1.6186972856521606, "learning_rate": 2.9136355360114045e-06, "loss": 0.4268, "step": 17009 }, { "epoch": 2.2746723722920565, "grad_norm": 1.4681708812713623, "learning_rate": 2.9126169400779536e-06, "loss": 0.3735, "step": 17010 }, { "epoch": 2.2748060978871356, "grad_norm": 1.64728844165802, "learning_rate": 2.9115984918742167e-06, "loss": 0.3917, "step": 17011 }, { "epoch": 2.2749398234822147, "grad_norm": 1.5776786804199219, "learning_rate": 2.9105801914214272e-06, "loss": 0.4061, "step": 17012 }, { "epoch": 2.2750735490772933, "grad_norm": 1.7696211338043213, "learning_rate": 2.9095620387408097e-06, "loss": 0.4438, "step": 17013 }, { "epoch": 2.2752072746723724, "grad_norm": 1.3879187107086182, "learning_rate": 2.9085440338535866e-06, "loss": 0.3329, "step": 17014 }, { "epoch": 2.275341000267451, "grad_norm": 1.3968441486358643, "learning_rate": 2.907526176780977e-06, "loss": 0.3148, "step": 17015 }, { "epoch": 2.27547472586253, "grad_norm": 1.6089133024215698, "learning_rate": 2.906508467544198e-06, "loss": 0.3593, "step": 17016 }, { "epoch": 2.275608451457609, "grad_norm": 1.6451023817062378, "learning_rate": 2.9054909061644623e-06, "loss": 0.4011, "step": 17017 }, { "epoch": 2.2757421770526878, "grad_norm": 1.6566888093948364, "learning_rate": 2.9044734926629793e-06, "loss": 0.4077, "step": 17018 }, { "epoch": 2.275875902647767, "grad_norm": 1.550437569618225, "learning_rate": 2.9034562270609567e-06, "loss": 0.3473, "step": 17019 }, { "epoch": 2.2760096282428455, "grad_norm": 1.5410364866256714, "learning_rate": 2.902439109379599e-06, "loss": 0.3571, "step": 17020 }, { "epoch": 2.2761433538379245, "grad_norm": 1.4992727041244507, "learning_rate": 2.9014221396401064e-06, "loss": 0.3645, "step": 17021 }, { "epoch": 2.2762770794330036, "grad_norm": 1.5268008708953857, "learning_rate": 2.900405317863676e-06, "loss": 0.3658, "step": 17022 }, { "epoch": 2.2764108050280822, "grad_norm": 1.3154749870300293, "learning_rate": 2.8993886440715036e-06, "loss": 0.3304, "step": 17023 }, { "epoch": 2.2765445306231613, "grad_norm": 1.2949903011322021, "learning_rate": 2.8983721182847834e-06, "loss": 0.35, "step": 17024 }, { "epoch": 2.27667825621824, "grad_norm": 1.5786867141723633, "learning_rate": 2.8973557405246954e-06, "loss": 0.4079, "step": 17025 }, { "epoch": 2.276811981813319, "grad_norm": 1.5485320091247559, "learning_rate": 2.896339510812436e-06, "loss": 0.3732, "step": 17026 }, { "epoch": 2.276945707408398, "grad_norm": 1.2919182777404785, "learning_rate": 2.895323429169179e-06, "loss": 0.3409, "step": 17027 }, { "epoch": 2.2770794330034767, "grad_norm": 1.6092054843902588, "learning_rate": 2.894307495616103e-06, "loss": 0.4003, "step": 17028 }, { "epoch": 2.2772131585985558, "grad_norm": 1.2670623064041138, "learning_rate": 2.8932917101743953e-06, "loss": 0.3444, "step": 17029 }, { "epoch": 2.277346884193635, "grad_norm": 1.5616655349731445, "learning_rate": 2.8922760728652144e-06, "loss": 0.4051, "step": 17030 }, { "epoch": 2.2774806097887135, "grad_norm": 1.3748559951782227, "learning_rate": 2.891260583709744e-06, "loss": 0.3411, "step": 17031 }, { "epoch": 2.2776143353837925, "grad_norm": 1.5091861486434937, "learning_rate": 2.8902452427291407e-06, "loss": 0.3718, "step": 17032 }, { "epoch": 2.277748060978871, "grad_norm": 1.4395629167556763, "learning_rate": 2.8892300499445725e-06, "loss": 0.3736, "step": 17033 }, { "epoch": 2.2778817865739502, "grad_norm": 1.3729289770126343, "learning_rate": 2.8882150053771997e-06, "loss": 0.4225, "step": 17034 }, { "epoch": 2.2780155121690293, "grad_norm": 1.5264701843261719, "learning_rate": 2.8872001090481804e-06, "loss": 0.3819, "step": 17035 }, { "epoch": 2.278149237764108, "grad_norm": 1.4509196281433105, "learning_rate": 2.886185360978667e-06, "loss": 0.3819, "step": 17036 }, { "epoch": 2.278282963359187, "grad_norm": 1.5283418893814087, "learning_rate": 2.8851707611898138e-06, "loss": 0.3386, "step": 17037 }, { "epoch": 2.278416688954266, "grad_norm": 1.734653353691101, "learning_rate": 2.884156309702768e-06, "loss": 0.3907, "step": 17038 }, { "epoch": 2.2785504145493447, "grad_norm": 1.8386884927749634, "learning_rate": 2.883142006538675e-06, "loss": 0.3685, "step": 17039 }, { "epoch": 2.2786841401444238, "grad_norm": 1.3719942569732666, "learning_rate": 2.8821278517186755e-06, "loss": 0.3512, "step": 17040 }, { "epoch": 2.2788178657395024, "grad_norm": 1.417429804801941, "learning_rate": 2.881113845263911e-06, "loss": 0.3388, "step": 17041 }, { "epoch": 2.2789515913345815, "grad_norm": 1.5438954830169678, "learning_rate": 2.880099987195516e-06, "loss": 0.3837, "step": 17042 }, { "epoch": 2.2790853169296605, "grad_norm": 1.5103967189788818, "learning_rate": 2.8790862775346275e-06, "loss": 0.3461, "step": 17043 }, { "epoch": 2.279219042524739, "grad_norm": 1.492672324180603, "learning_rate": 2.878072716302364e-06, "loss": 0.3629, "step": 17044 }, { "epoch": 2.2793527681198182, "grad_norm": 1.3144148588180542, "learning_rate": 2.8770593035198667e-06, "loss": 0.3211, "step": 17045 }, { "epoch": 2.279486493714897, "grad_norm": 1.648748755455017, "learning_rate": 2.8760460392082468e-06, "loss": 0.3748, "step": 17046 }, { "epoch": 2.279620219309976, "grad_norm": 1.631947636604309, "learning_rate": 2.875032923388632e-06, "loss": 0.3879, "step": 17047 }, { "epoch": 2.279753944905055, "grad_norm": 1.4238033294677734, "learning_rate": 2.8740199560821426e-06, "loss": 0.3436, "step": 17048 }, { "epoch": 2.2798876705001336, "grad_norm": 1.3393027782440186, "learning_rate": 2.8730071373098813e-06, "loss": 0.3528, "step": 17049 }, { "epoch": 2.2800213960952127, "grad_norm": 1.377915620803833, "learning_rate": 2.871994467092972e-06, "loss": 0.3564, "step": 17050 }, { "epoch": 2.2801551216902913, "grad_norm": 1.3234686851501465, "learning_rate": 2.8709819454525157e-06, "loss": 0.352, "step": 17051 }, { "epoch": 2.2802888472853704, "grad_norm": 1.5339642763137817, "learning_rate": 2.8699695724096177e-06, "loss": 0.3543, "step": 17052 }, { "epoch": 2.2804225728804495, "grad_norm": 1.5116121768951416, "learning_rate": 2.8689573479853826e-06, "loss": 0.3846, "step": 17053 }, { "epoch": 2.280556298475528, "grad_norm": 1.4378588199615479, "learning_rate": 2.867945272200904e-06, "loss": 0.333, "step": 17054 }, { "epoch": 2.280690024070607, "grad_norm": 1.3883188962936401, "learning_rate": 2.8669333450772873e-06, "loss": 0.3411, "step": 17055 }, { "epoch": 2.280823749665686, "grad_norm": 1.4780216217041016, "learning_rate": 2.865921566635618e-06, "loss": 0.329, "step": 17056 }, { "epoch": 2.280957475260765, "grad_norm": 1.2886308431625366, "learning_rate": 2.864909936896986e-06, "loss": 0.3305, "step": 17057 }, { "epoch": 2.281091200855844, "grad_norm": 1.562170147895813, "learning_rate": 2.8638984558824777e-06, "loss": 0.4124, "step": 17058 }, { "epoch": 2.2812249264509226, "grad_norm": 1.4724701642990112, "learning_rate": 2.8628871236131796e-06, "loss": 0.366, "step": 17059 }, { "epoch": 2.2813586520460016, "grad_norm": 1.5228806734085083, "learning_rate": 2.861875940110168e-06, "loss": 0.347, "step": 17060 }, { "epoch": 2.2814923776410803, "grad_norm": 1.6004219055175781, "learning_rate": 2.8608649053945235e-06, "loss": 0.3733, "step": 17061 }, { "epoch": 2.2816261032361593, "grad_norm": 1.432561993598938, "learning_rate": 2.859854019487318e-06, "loss": 0.3876, "step": 17062 }, { "epoch": 2.2817598288312384, "grad_norm": 1.5826714038848877, "learning_rate": 2.8588432824096236e-06, "loss": 0.3816, "step": 17063 }, { "epoch": 2.281893554426317, "grad_norm": 1.628354549407959, "learning_rate": 2.8578326941825074e-06, "loss": 0.4108, "step": 17064 }, { "epoch": 2.282027280021396, "grad_norm": 1.4790418148040771, "learning_rate": 2.856822254827034e-06, "loss": 0.3424, "step": 17065 }, { "epoch": 2.282161005616475, "grad_norm": 1.4843205213546753, "learning_rate": 2.8558119643642657e-06, "loss": 0.3788, "step": 17066 }, { "epoch": 2.282294731211554, "grad_norm": 1.7294301986694336, "learning_rate": 2.854801822815263e-06, "loss": 0.408, "step": 17067 }, { "epoch": 2.282428456806633, "grad_norm": 1.4788068532943726, "learning_rate": 2.8537918302010737e-06, "loss": 0.3524, "step": 17068 }, { "epoch": 2.282562182401712, "grad_norm": 1.5772935152053833, "learning_rate": 2.852781986542762e-06, "loss": 0.4045, "step": 17069 }, { "epoch": 2.2826959079967906, "grad_norm": 1.4873038530349731, "learning_rate": 2.8517722918613642e-06, "loss": 0.3663, "step": 17070 }, { "epoch": 2.2828296335918696, "grad_norm": 1.5206979513168335, "learning_rate": 2.8507627461779384e-06, "loss": 0.3662, "step": 17071 }, { "epoch": 2.2829633591869483, "grad_norm": 1.6808936595916748, "learning_rate": 2.84975334951352e-06, "loss": 0.3631, "step": 17072 }, { "epoch": 2.2830970847820273, "grad_norm": 1.3503527641296387, "learning_rate": 2.848744101889148e-06, "loss": 0.3714, "step": 17073 }, { "epoch": 2.2832308103771064, "grad_norm": 1.3562567234039307, "learning_rate": 2.847735003325868e-06, "loss": 0.3137, "step": 17074 }, { "epoch": 2.283364535972185, "grad_norm": 1.439693570137024, "learning_rate": 2.8467260538447038e-06, "loss": 0.3151, "step": 17075 }, { "epoch": 2.283498261567264, "grad_norm": 1.7823055982589722, "learning_rate": 2.845717253466691e-06, "loss": 0.4178, "step": 17076 }, { "epoch": 2.2836319871623427, "grad_norm": 1.6346759796142578, "learning_rate": 2.8447086022128565e-06, "loss": 0.4156, "step": 17077 }, { "epoch": 2.283765712757422, "grad_norm": 1.2736523151397705, "learning_rate": 2.8437001001042244e-06, "loss": 0.3355, "step": 17078 }, { "epoch": 2.283899438352501, "grad_norm": 1.707222819328308, "learning_rate": 2.8426917471618144e-06, "loss": 0.4395, "step": 17079 }, { "epoch": 2.2840331639475795, "grad_norm": 1.5472502708435059, "learning_rate": 2.841683543406647e-06, "loss": 0.4029, "step": 17080 }, { "epoch": 2.2841668895426586, "grad_norm": 1.539623498916626, "learning_rate": 2.8406754888597365e-06, "loss": 0.4062, "step": 17081 }, { "epoch": 2.284300615137737, "grad_norm": 1.5965551137924194, "learning_rate": 2.839667583542095e-06, "loss": 0.3919, "step": 17082 }, { "epoch": 2.2844343407328163, "grad_norm": 1.4609220027923584, "learning_rate": 2.8386598274747303e-06, "loss": 0.3665, "step": 17083 }, { "epoch": 2.2845680663278953, "grad_norm": 1.5263363122940063, "learning_rate": 2.8376522206786494e-06, "loss": 0.3822, "step": 17084 }, { "epoch": 2.284701791922974, "grad_norm": 1.6540801525115967, "learning_rate": 2.836644763174854e-06, "loss": 0.4484, "step": 17085 }, { "epoch": 2.284835517518053, "grad_norm": 1.5076287984848022, "learning_rate": 2.8356374549843447e-06, "loss": 0.39, "step": 17086 }, { "epoch": 2.2849692431131317, "grad_norm": 1.4015815258026123, "learning_rate": 2.834630296128116e-06, "loss": 0.3515, "step": 17087 }, { "epoch": 2.2851029687082107, "grad_norm": 1.5057001113891602, "learning_rate": 2.8336232866271663e-06, "loss": 0.4021, "step": 17088 }, { "epoch": 2.28523669430329, "grad_norm": 1.4895102977752686, "learning_rate": 2.8326164265024746e-06, "loss": 0.363, "step": 17089 }, { "epoch": 2.2853704198983684, "grad_norm": 1.730924129486084, "learning_rate": 2.8316097157750422e-06, "loss": 0.4047, "step": 17090 }, { "epoch": 2.2855041454934475, "grad_norm": 1.4394056797027588, "learning_rate": 2.8306031544658387e-06, "loss": 0.3764, "step": 17091 }, { "epoch": 2.285637871088526, "grad_norm": 1.4548566341400146, "learning_rate": 2.8295967425958557e-06, "loss": 0.3679, "step": 17092 }, { "epoch": 2.285771596683605, "grad_norm": 1.4451931715011597, "learning_rate": 2.82859048018607e-06, "loss": 0.361, "step": 17093 }, { "epoch": 2.2859053222786843, "grad_norm": 1.5823029279708862, "learning_rate": 2.8275843672574476e-06, "loss": 0.368, "step": 17094 }, { "epoch": 2.286039047873763, "grad_norm": 1.3878077268600464, "learning_rate": 2.826578403830972e-06, "loss": 0.374, "step": 17095 }, { "epoch": 2.286172773468842, "grad_norm": 1.5558421611785889, "learning_rate": 2.825572589927602e-06, "loss": 0.37, "step": 17096 }, { "epoch": 2.286306499063921, "grad_norm": 1.6921919584274292, "learning_rate": 2.8245669255683072e-06, "loss": 0.4234, "step": 17097 }, { "epoch": 2.2864402246589997, "grad_norm": 1.505511999130249, "learning_rate": 2.823561410774047e-06, "loss": 0.3643, "step": 17098 }, { "epoch": 2.2865739502540787, "grad_norm": 1.4950768947601318, "learning_rate": 2.8225560455657807e-06, "loss": 0.3384, "step": 17099 }, { "epoch": 2.2867076758491574, "grad_norm": 1.5675758123397827, "learning_rate": 2.82155082996447e-06, "loss": 0.3782, "step": 17100 }, { "epoch": 2.2868414014442364, "grad_norm": 1.3418360948562622, "learning_rate": 2.8205457639910616e-06, "loss": 0.3459, "step": 17101 }, { "epoch": 2.2869751270393155, "grad_norm": 1.5419352054595947, "learning_rate": 2.8195408476665064e-06, "loss": 0.3544, "step": 17102 }, { "epoch": 2.287108852634394, "grad_norm": 1.5953378677368164, "learning_rate": 2.8185360810117514e-06, "loss": 0.4009, "step": 17103 }, { "epoch": 2.287242578229473, "grad_norm": 1.4761632680892944, "learning_rate": 2.817531464047739e-06, "loss": 0.3927, "step": 17104 }, { "epoch": 2.2873763038245523, "grad_norm": 1.496482491493225, "learning_rate": 2.816526996795411e-06, "loss": 0.3681, "step": 17105 }, { "epoch": 2.287510029419631, "grad_norm": 1.5692352056503296, "learning_rate": 2.815522679275704e-06, "loss": 0.4087, "step": 17106 }, { "epoch": 2.28764375501471, "grad_norm": 1.6782152652740479, "learning_rate": 2.814518511509552e-06, "loss": 0.4057, "step": 17107 }, { "epoch": 2.2877774806097886, "grad_norm": 1.4950534105300903, "learning_rate": 2.813514493517885e-06, "loss": 0.3216, "step": 17108 }, { "epoch": 2.2879112062048677, "grad_norm": 1.3602277040481567, "learning_rate": 2.8125106253216363e-06, "loss": 0.316, "step": 17109 }, { "epoch": 2.2880449317999467, "grad_norm": 1.5474023818969727, "learning_rate": 2.8115069069417176e-06, "loss": 0.3558, "step": 17110 }, { "epoch": 2.2881786573950254, "grad_norm": 1.333630084991455, "learning_rate": 2.810503338399063e-06, "loss": 0.3345, "step": 17111 }, { "epoch": 2.2883123829901044, "grad_norm": 1.4353400468826294, "learning_rate": 2.8094999197145902e-06, "loss": 0.381, "step": 17112 }, { "epoch": 2.288446108585183, "grad_norm": 1.6984935998916626, "learning_rate": 2.808496650909205e-06, "loss": 0.4281, "step": 17113 }, { "epoch": 2.288579834180262, "grad_norm": 1.4357613325119019, "learning_rate": 2.807493532003831e-06, "loss": 0.3825, "step": 17114 }, { "epoch": 2.288713559775341, "grad_norm": 1.4283082485198975, "learning_rate": 2.806490563019366e-06, "loss": 0.394, "step": 17115 }, { "epoch": 2.28884728537042, "grad_norm": 1.4725817441940308, "learning_rate": 2.8054877439767283e-06, "loss": 0.3966, "step": 17116 }, { "epoch": 2.288981010965499, "grad_norm": 1.763079285621643, "learning_rate": 2.8044850748968112e-06, "loss": 0.424, "step": 17117 }, { "epoch": 2.2891147365605775, "grad_norm": 1.4673564434051514, "learning_rate": 2.803482555800513e-06, "loss": 0.3698, "step": 17118 }, { "epoch": 2.2892484621556566, "grad_norm": 1.3144391775131226, "learning_rate": 2.8024801867087414e-06, "loss": 0.3908, "step": 17119 }, { "epoch": 2.2893821877507357, "grad_norm": 1.4826347827911377, "learning_rate": 2.801477967642381e-06, "loss": 0.3733, "step": 17120 }, { "epoch": 2.2895159133458143, "grad_norm": 1.4246609210968018, "learning_rate": 2.8004758986223225e-06, "loss": 0.3728, "step": 17121 }, { "epoch": 2.2896496389408934, "grad_norm": 1.4616708755493164, "learning_rate": 2.799473979669456e-06, "loss": 0.4015, "step": 17122 }, { "epoch": 2.289783364535972, "grad_norm": 1.6043283939361572, "learning_rate": 2.7984722108046637e-06, "loss": 0.3643, "step": 17123 }, { "epoch": 2.289917090131051, "grad_norm": 1.555106282234192, "learning_rate": 2.7974705920488267e-06, "loss": 0.3645, "step": 17124 }, { "epoch": 2.29005081572613, "grad_norm": 1.6657214164733887, "learning_rate": 2.7964691234228238e-06, "loss": 0.3945, "step": 17125 }, { "epoch": 2.2901845413212087, "grad_norm": 1.3964290618896484, "learning_rate": 2.795467804947528e-06, "loss": 0.3732, "step": 17126 }, { "epoch": 2.290318266916288, "grad_norm": 1.507797360420227, "learning_rate": 2.794466636643812e-06, "loss": 0.4029, "step": 17127 }, { "epoch": 2.2904519925113664, "grad_norm": 1.6231237649917603, "learning_rate": 2.7934656185325483e-06, "loss": 0.3741, "step": 17128 }, { "epoch": 2.2905857181064455, "grad_norm": 1.6168947219848633, "learning_rate": 2.7924647506345913e-06, "loss": 0.4103, "step": 17129 }, { "epoch": 2.2907194437015246, "grad_norm": 1.4203189611434937, "learning_rate": 2.791464032970812e-06, "loss": 0.385, "step": 17130 }, { "epoch": 2.290853169296603, "grad_norm": 1.4013820886611938, "learning_rate": 2.790463465562068e-06, "loss": 0.3256, "step": 17131 }, { "epoch": 2.2909868948916823, "grad_norm": 1.4253093004226685, "learning_rate": 2.789463048429214e-06, "loss": 0.3776, "step": 17132 }, { "epoch": 2.2911206204867614, "grad_norm": 1.5374804735183716, "learning_rate": 2.7884627815931052e-06, "loss": 0.3946, "step": 17133 }, { "epoch": 2.29125434608184, "grad_norm": 1.5190311670303345, "learning_rate": 2.7874626650745838e-06, "loss": 0.3761, "step": 17134 }, { "epoch": 2.291388071676919, "grad_norm": 1.5384248495101929, "learning_rate": 2.786462698894508e-06, "loss": 0.3555, "step": 17135 }, { "epoch": 2.2915217972719977, "grad_norm": 1.5339213609695435, "learning_rate": 2.785462883073711e-06, "loss": 0.3792, "step": 17136 }, { "epoch": 2.2916555228670767, "grad_norm": 1.5710184574127197, "learning_rate": 2.784463217633033e-06, "loss": 0.4061, "step": 17137 }, { "epoch": 2.291789248462156, "grad_norm": 1.4566221237182617, "learning_rate": 2.783463702593322e-06, "loss": 0.3638, "step": 17138 }, { "epoch": 2.2919229740572344, "grad_norm": 1.3668036460876465, "learning_rate": 2.782464337975398e-06, "loss": 0.3671, "step": 17139 }, { "epoch": 2.2920566996523135, "grad_norm": 1.782456398010254, "learning_rate": 2.7814651238001045e-06, "loss": 0.4277, "step": 17140 }, { "epoch": 2.2921904252473926, "grad_norm": 1.4327895641326904, "learning_rate": 2.780466060088259e-06, "loss": 0.4144, "step": 17141 }, { "epoch": 2.292324150842471, "grad_norm": 1.4891985654830933, "learning_rate": 2.7794671468606916e-06, "loss": 0.3983, "step": 17142 }, { "epoch": 2.2924578764375503, "grad_norm": 1.5933634042739868, "learning_rate": 2.778468384138222e-06, "loss": 0.3939, "step": 17143 }, { "epoch": 2.292591602032629, "grad_norm": 1.682861328125, "learning_rate": 2.7774697719416688e-06, "loss": 0.3731, "step": 17144 }, { "epoch": 2.292725327627708, "grad_norm": 1.4285898208618164, "learning_rate": 2.776471310291846e-06, "loss": 0.3717, "step": 17145 }, { "epoch": 2.292859053222787, "grad_norm": 1.418239951133728, "learning_rate": 2.7754729992095673e-06, "loss": 0.3506, "step": 17146 }, { "epoch": 2.2929927788178657, "grad_norm": 1.4235265254974365, "learning_rate": 2.774474838715642e-06, "loss": 0.3399, "step": 17147 }, { "epoch": 2.2931265044129447, "grad_norm": 1.4237998723983765, "learning_rate": 2.7734768288308724e-06, "loss": 0.3926, "step": 17148 }, { "epoch": 2.2932602300080234, "grad_norm": 1.3857581615447998, "learning_rate": 2.7724789695760645e-06, "loss": 0.3215, "step": 17149 }, { "epoch": 2.2933939556031024, "grad_norm": 1.6971173286437988, "learning_rate": 2.7714812609720167e-06, "loss": 0.4083, "step": 17150 }, { "epoch": 2.2935276811981815, "grad_norm": 1.4783122539520264, "learning_rate": 2.7704837030395237e-06, "loss": 0.3921, "step": 17151 }, { "epoch": 2.29366140679326, "grad_norm": 1.4801981449127197, "learning_rate": 2.769486295799385e-06, "loss": 0.3453, "step": 17152 }, { "epoch": 2.293795132388339, "grad_norm": 1.4771761894226074, "learning_rate": 2.7684890392723783e-06, "loss": 0.4077, "step": 17153 }, { "epoch": 2.293928857983418, "grad_norm": 1.6517611742019653, "learning_rate": 2.767491933479304e-06, "loss": 0.4003, "step": 17154 }, { "epoch": 2.294062583578497, "grad_norm": 1.4087947607040405, "learning_rate": 2.7664949784409335e-06, "loss": 0.3263, "step": 17155 }, { "epoch": 2.294196309173576, "grad_norm": 1.5638269186019897, "learning_rate": 2.765498174178056e-06, "loss": 0.3867, "step": 17156 }, { "epoch": 2.2943300347686546, "grad_norm": 1.510145664215088, "learning_rate": 2.76450152071145e-06, "loss": 0.368, "step": 17157 }, { "epoch": 2.2944637603637337, "grad_norm": 1.495118260383606, "learning_rate": 2.7635050180618805e-06, "loss": 0.3875, "step": 17158 }, { "epoch": 2.2945974859588123, "grad_norm": 1.5410338640213013, "learning_rate": 2.76250866625013e-06, "loss": 0.3824, "step": 17159 }, { "epoch": 2.2947312115538914, "grad_norm": 1.4344508647918701, "learning_rate": 2.7615124652969583e-06, "loss": 0.3636, "step": 17160 }, { "epoch": 2.2948649371489704, "grad_norm": 1.505493402481079, "learning_rate": 2.7605164152231322e-06, "loss": 0.3498, "step": 17161 }, { "epoch": 2.294998662744049, "grad_norm": 1.4173133373260498, "learning_rate": 2.7595205160494133e-06, "loss": 0.363, "step": 17162 }, { "epoch": 2.295132388339128, "grad_norm": 1.509009838104248, "learning_rate": 2.7585247677965588e-06, "loss": 0.391, "step": 17163 }, { "epoch": 2.2952661139342068, "grad_norm": 1.8681782484054565, "learning_rate": 2.7575291704853325e-06, "loss": 0.4129, "step": 17164 }, { "epoch": 2.295399839529286, "grad_norm": 1.4409300088882446, "learning_rate": 2.7565337241364766e-06, "loss": 0.3271, "step": 17165 }, { "epoch": 2.295533565124365, "grad_norm": 1.390662431716919, "learning_rate": 2.7555384287707443e-06, "loss": 0.3299, "step": 17166 }, { "epoch": 2.2956672907194435, "grad_norm": 1.6760491132736206, "learning_rate": 2.7545432844088814e-06, "loss": 0.3982, "step": 17167 }, { "epoch": 2.2958010163145226, "grad_norm": 1.4453706741333008, "learning_rate": 2.7535482910716305e-06, "loss": 0.3777, "step": 17168 }, { "epoch": 2.2959347419096017, "grad_norm": 1.4513747692108154, "learning_rate": 2.7525534487797313e-06, "loss": 0.3452, "step": 17169 }, { "epoch": 2.2960684675046803, "grad_norm": 1.4528206586837769, "learning_rate": 2.751558757553919e-06, "loss": 0.3478, "step": 17170 }, { "epoch": 2.2962021930997594, "grad_norm": 1.6733758449554443, "learning_rate": 2.7505642174149306e-06, "loss": 0.4297, "step": 17171 }, { "epoch": 2.2963359186948384, "grad_norm": 1.7016123533248901, "learning_rate": 2.7495698283834926e-06, "loss": 0.4558, "step": 17172 }, { "epoch": 2.296469644289917, "grad_norm": 1.4338643550872803, "learning_rate": 2.748575590480338e-06, "loss": 0.3422, "step": 17173 }, { "epoch": 2.296603369884996, "grad_norm": 1.4443522691726685, "learning_rate": 2.74758150372618e-06, "loss": 0.3697, "step": 17174 }, { "epoch": 2.2967370954800748, "grad_norm": 1.5457407236099243, "learning_rate": 2.7465875681417475e-06, "loss": 0.3812, "step": 17175 }, { "epoch": 2.296870821075154, "grad_norm": 1.337968111038208, "learning_rate": 2.7455937837477577e-06, "loss": 0.3654, "step": 17176 }, { "epoch": 2.297004546670233, "grad_norm": 1.4253863096237183, "learning_rate": 2.7446001505649234e-06, "loss": 0.3874, "step": 17177 }, { "epoch": 2.2971382722653115, "grad_norm": 1.4307308197021484, "learning_rate": 2.7436066686139595e-06, "loss": 0.3765, "step": 17178 }, { "epoch": 2.2972719978603906, "grad_norm": 1.6833326816558838, "learning_rate": 2.742613337915564e-06, "loss": 0.4199, "step": 17179 }, { "epoch": 2.2974057234554692, "grad_norm": 1.552001714706421, "learning_rate": 2.7416201584904556e-06, "loss": 0.3581, "step": 17180 }, { "epoch": 2.2975394490505483, "grad_norm": 1.3154784440994263, "learning_rate": 2.7406271303593266e-06, "loss": 0.3485, "step": 17181 }, { "epoch": 2.2976731746456274, "grad_norm": 1.4997074604034424, "learning_rate": 2.7396342535428753e-06, "loss": 0.3953, "step": 17182 }, { "epoch": 2.297806900240706, "grad_norm": 1.3807638883590698, "learning_rate": 2.7386415280618074e-06, "loss": 0.3987, "step": 17183 }, { "epoch": 2.297940625835785, "grad_norm": 1.716066837310791, "learning_rate": 2.7376489539368014e-06, "loss": 0.4329, "step": 17184 }, { "epoch": 2.2980743514308637, "grad_norm": 1.3511594533920288, "learning_rate": 2.7366565311885605e-06, "loss": 0.3533, "step": 17185 }, { "epoch": 2.2982080770259428, "grad_norm": 1.5046862363815308, "learning_rate": 2.7356642598377604e-06, "loss": 0.4061, "step": 17186 }, { "epoch": 2.298341802621022, "grad_norm": 1.5308281183242798, "learning_rate": 2.734672139905088e-06, "loss": 0.3459, "step": 17187 }, { "epoch": 2.2984755282161005, "grad_norm": 1.618495225906372, "learning_rate": 2.7336801714112217e-06, "loss": 0.4146, "step": 17188 }, { "epoch": 2.2986092538111795, "grad_norm": 1.4642754793167114, "learning_rate": 2.7326883543768403e-06, "loss": 0.3742, "step": 17189 }, { "epoch": 2.298742979406258, "grad_norm": 1.4755795001983643, "learning_rate": 2.731696688822615e-06, "loss": 0.3519, "step": 17190 }, { "epoch": 2.2988767050013372, "grad_norm": 1.3965767621994019, "learning_rate": 2.730705174769218e-06, "loss": 0.3263, "step": 17191 }, { "epoch": 2.2990104305964163, "grad_norm": 1.6315058469772339, "learning_rate": 2.7297138122373158e-06, "loss": 0.4365, "step": 17192 }, { "epoch": 2.299144156191495, "grad_norm": 1.4860994815826416, "learning_rate": 2.728722601247572e-06, "loss": 0.3673, "step": 17193 }, { "epoch": 2.299277881786574, "grad_norm": 1.5704338550567627, "learning_rate": 2.7277315418206476e-06, "loss": 0.4365, "step": 17194 }, { "epoch": 2.2994116073816526, "grad_norm": 1.4059998989105225, "learning_rate": 2.7267406339771995e-06, "loss": 0.3541, "step": 17195 }, { "epoch": 2.2995453329767317, "grad_norm": 1.5870018005371094, "learning_rate": 2.7257498777378843e-06, "loss": 0.3387, "step": 17196 }, { "epoch": 2.2996790585718108, "grad_norm": 1.4791349172592163, "learning_rate": 2.7247592731233552e-06, "loss": 0.3506, "step": 17197 }, { "epoch": 2.2998127841668894, "grad_norm": 1.7134732007980347, "learning_rate": 2.723768820154251e-06, "loss": 0.4395, "step": 17198 }, { "epoch": 2.2999465097619685, "grad_norm": 1.6738784313201904, "learning_rate": 2.72277851885123e-06, "loss": 0.42, "step": 17199 }, { "epoch": 2.3000802353570475, "grad_norm": 1.5754859447479248, "learning_rate": 2.72178836923492e-06, "loss": 0.3898, "step": 17200 }, { "epoch": 2.300213960952126, "grad_norm": 1.5172200202941895, "learning_rate": 2.7207983713259713e-06, "loss": 0.3671, "step": 17201 }, { "epoch": 2.3003476865472052, "grad_norm": 1.784548044204712, "learning_rate": 2.719808525145017e-06, "loss": 0.3796, "step": 17202 }, { "epoch": 2.300481412142284, "grad_norm": 1.2046096324920654, "learning_rate": 2.7188188307126817e-06, "loss": 0.3149, "step": 17203 }, { "epoch": 2.300615137737363, "grad_norm": 1.6806238889694214, "learning_rate": 2.717829288049607e-06, "loss": 0.426, "step": 17204 }, { "epoch": 2.300748863332442, "grad_norm": 1.468948483467102, "learning_rate": 2.7168398971764088e-06, "loss": 0.3518, "step": 17205 }, { "epoch": 2.3008825889275206, "grad_norm": 1.4999831914901733, "learning_rate": 2.7158506581137147e-06, "loss": 0.3991, "step": 17206 }, { "epoch": 2.3010163145225997, "grad_norm": 1.8045616149902344, "learning_rate": 2.7148615708821422e-06, "loss": 0.4184, "step": 17207 }, { "epoch": 2.3011500401176788, "grad_norm": 1.570469617843628, "learning_rate": 2.713872635502307e-06, "loss": 0.4067, "step": 17208 }, { "epoch": 2.3012837657127574, "grad_norm": 1.5693238973617554, "learning_rate": 2.7128838519948307e-06, "loss": 0.3736, "step": 17209 }, { "epoch": 2.3014174913078365, "grad_norm": 1.5478546619415283, "learning_rate": 2.711895220380315e-06, "loss": 0.3934, "step": 17210 }, { "epoch": 2.301551216902915, "grad_norm": 1.564774751663208, "learning_rate": 2.7109067406793688e-06, "loss": 0.3455, "step": 17211 }, { "epoch": 2.301684942497994, "grad_norm": 1.3911818265914917, "learning_rate": 2.7099184129125967e-06, "loss": 0.3866, "step": 17212 }, { "epoch": 2.3018186680930732, "grad_norm": 1.2479983568191528, "learning_rate": 2.7089302371005986e-06, "loss": 0.3294, "step": 17213 }, { "epoch": 2.301952393688152, "grad_norm": 1.4475657939910889, "learning_rate": 2.7079422132639745e-06, "loss": 0.4193, "step": 17214 }, { "epoch": 2.302086119283231, "grad_norm": 1.4607634544372559, "learning_rate": 2.7069543414233157e-06, "loss": 0.3894, "step": 17215 }, { "epoch": 2.3022198448783096, "grad_norm": 1.547248363494873, "learning_rate": 2.7059666215992165e-06, "loss": 0.3502, "step": 17216 }, { "epoch": 2.3023535704733886, "grad_norm": 1.5162708759307861, "learning_rate": 2.7049790538122623e-06, "loss": 0.361, "step": 17217 }, { "epoch": 2.3024872960684677, "grad_norm": 1.582796335220337, "learning_rate": 2.703991638083042e-06, "loss": 0.3743, "step": 17218 }, { "epoch": 2.3026210216635463, "grad_norm": 1.6232115030288696, "learning_rate": 2.703004374432129e-06, "loss": 0.421, "step": 17219 }, { "epoch": 2.3027547472586254, "grad_norm": 1.662674069404602, "learning_rate": 2.702017262880111e-06, "loss": 0.4009, "step": 17220 }, { "epoch": 2.302888472853704, "grad_norm": 1.574709177017212, "learning_rate": 2.7010303034475616e-06, "loss": 0.4057, "step": 17221 }, { "epoch": 2.303022198448783, "grad_norm": 1.4702725410461426, "learning_rate": 2.7000434961550458e-06, "loss": 0.3542, "step": 17222 }, { "epoch": 2.303155924043862, "grad_norm": 1.8665411472320557, "learning_rate": 2.6990568410231432e-06, "loss": 0.3845, "step": 17223 }, { "epoch": 2.303289649638941, "grad_norm": 1.6615039110183716, "learning_rate": 2.6980703380724093e-06, "loss": 0.4137, "step": 17224 }, { "epoch": 2.30342337523402, "grad_norm": 1.4534879922866821, "learning_rate": 2.697083987323418e-06, "loss": 0.3597, "step": 17225 }, { "epoch": 2.3035571008290985, "grad_norm": 1.6492722034454346, "learning_rate": 2.69609778879672e-06, "loss": 0.3954, "step": 17226 }, { "epoch": 2.3036908264241776, "grad_norm": 1.6218595504760742, "learning_rate": 2.6951117425128715e-06, "loss": 0.424, "step": 17227 }, { "epoch": 2.3038245520192566, "grad_norm": 1.7219208478927612, "learning_rate": 2.694125848492434e-06, "loss": 0.3775, "step": 17228 }, { "epoch": 2.3039582776143352, "grad_norm": 1.6455587148666382, "learning_rate": 2.6931401067559503e-06, "loss": 0.342, "step": 17229 }, { "epoch": 2.3040920032094143, "grad_norm": 1.525317668914795, "learning_rate": 2.6921545173239684e-06, "loss": 0.3536, "step": 17230 }, { "epoch": 2.304225728804493, "grad_norm": 1.5554383993148804, "learning_rate": 2.691169080217032e-06, "loss": 0.3933, "step": 17231 }, { "epoch": 2.304359454399572, "grad_norm": 1.5328584909439087, "learning_rate": 2.690183795455684e-06, "loss": 0.3483, "step": 17232 }, { "epoch": 2.304493179994651, "grad_norm": 1.4847791194915771, "learning_rate": 2.6891986630604595e-06, "loss": 0.3311, "step": 17233 }, { "epoch": 2.3046269055897297, "grad_norm": 1.468964695930481, "learning_rate": 2.6882136830518923e-06, "loss": 0.3361, "step": 17234 }, { "epoch": 2.304760631184809, "grad_norm": 1.5205678939819336, "learning_rate": 2.6872288554505157e-06, "loss": 0.3384, "step": 17235 }, { "epoch": 2.304894356779888, "grad_norm": 1.5176019668579102, "learning_rate": 2.686244180276855e-06, "loss": 0.3739, "step": 17236 }, { "epoch": 2.3050280823749665, "grad_norm": 1.576094627380371, "learning_rate": 2.685259657551439e-06, "loss": 0.3426, "step": 17237 }, { "epoch": 2.3051618079700456, "grad_norm": 1.527039885520935, "learning_rate": 2.68427528729478e-06, "loss": 0.4112, "step": 17238 }, { "epoch": 2.305295533565124, "grad_norm": 1.5439746379852295, "learning_rate": 2.683291069527405e-06, "loss": 0.3263, "step": 17239 }, { "epoch": 2.3054292591602032, "grad_norm": 1.3784844875335693, "learning_rate": 2.6823070042698276e-06, "loss": 0.3478, "step": 17240 }, { "epoch": 2.3055629847552823, "grad_norm": 1.3741300106048584, "learning_rate": 2.681323091542557e-06, "loss": 0.3683, "step": 17241 }, { "epoch": 2.305696710350361, "grad_norm": 1.692230463027954, "learning_rate": 2.6803393313661063e-06, "loss": 0.4138, "step": 17242 }, { "epoch": 2.30583043594544, "grad_norm": 1.4910967350006104, "learning_rate": 2.6793557237609724e-06, "loss": 0.3371, "step": 17243 }, { "epoch": 2.305964161540519, "grad_norm": 1.342836618423462, "learning_rate": 2.67837226874767e-06, "loss": 0.3539, "step": 17244 }, { "epoch": 2.3060978871355977, "grad_norm": 1.5732731819152832, "learning_rate": 2.677388966346688e-06, "loss": 0.4028, "step": 17245 }, { "epoch": 2.306231612730677, "grad_norm": 1.5863515138626099, "learning_rate": 2.6764058165785233e-06, "loss": 0.4016, "step": 17246 }, { "epoch": 2.3063653383257554, "grad_norm": 1.295082688331604, "learning_rate": 2.675422819463678e-06, "loss": 0.3503, "step": 17247 }, { "epoch": 2.3064990639208345, "grad_norm": 1.596990704536438, "learning_rate": 2.674439975022628e-06, "loss": 0.3734, "step": 17248 }, { "epoch": 2.3066327895159136, "grad_norm": 1.6416265964508057, "learning_rate": 2.673457283275873e-06, "loss": 0.354, "step": 17249 }, { "epoch": 2.306766515110992, "grad_norm": 1.4452191591262817, "learning_rate": 2.672474744243888e-06, "loss": 0.3637, "step": 17250 }, { "epoch": 2.3069002407060712, "grad_norm": 1.4098650217056274, "learning_rate": 2.671492357947155e-06, "loss": 0.329, "step": 17251 }, { "epoch": 2.30703396630115, "grad_norm": 1.637052059173584, "learning_rate": 2.6705101244061506e-06, "loss": 0.3822, "step": 17252 }, { "epoch": 2.307167691896229, "grad_norm": 1.3661260604858398, "learning_rate": 2.6695280436413494e-06, "loss": 0.3706, "step": 17253 }, { "epoch": 2.307301417491308, "grad_norm": 1.4715721607208252, "learning_rate": 2.668546115673222e-06, "loss": 0.3478, "step": 17254 }, { "epoch": 2.3074351430863866, "grad_norm": 1.7912428379058838, "learning_rate": 2.667564340522235e-06, "loss": 0.4301, "step": 17255 }, { "epoch": 2.3075688686814657, "grad_norm": 1.503482699394226, "learning_rate": 2.666582718208853e-06, "loss": 0.3682, "step": 17256 }, { "epoch": 2.3077025942765443, "grad_norm": 1.5292028188705444, "learning_rate": 2.6656012487535377e-06, "loss": 0.3912, "step": 17257 }, { "epoch": 2.3078363198716234, "grad_norm": 1.6456142663955688, "learning_rate": 2.664619932176745e-06, "loss": 0.3761, "step": 17258 }, { "epoch": 2.3079700454667025, "grad_norm": 1.4305490255355835, "learning_rate": 2.663638768498932e-06, "loss": 0.3452, "step": 17259 }, { "epoch": 2.308103771061781, "grad_norm": 1.388782262802124, "learning_rate": 2.6626577577405464e-06, "loss": 0.3213, "step": 17260 }, { "epoch": 2.30823749665686, "grad_norm": 1.6446101665496826, "learning_rate": 2.661676899922041e-06, "loss": 0.367, "step": 17261 }, { "epoch": 2.308371222251939, "grad_norm": 1.4010463953018188, "learning_rate": 2.660696195063858e-06, "loss": 0.3904, "step": 17262 }, { "epoch": 2.308504947847018, "grad_norm": 1.6406043767929077, "learning_rate": 2.6597156431864423e-06, "loss": 0.3642, "step": 17263 }, { "epoch": 2.308638673442097, "grad_norm": 1.5354620218276978, "learning_rate": 2.6587352443102245e-06, "loss": 0.3697, "step": 17264 }, { "epoch": 2.3087723990371756, "grad_norm": 1.508113980293274, "learning_rate": 2.6577549984556485e-06, "loss": 0.3638, "step": 17265 }, { "epoch": 2.3089061246322546, "grad_norm": 1.43109929561615, "learning_rate": 2.656774905643147e-06, "loss": 0.406, "step": 17266 }, { "epoch": 2.3090398502273333, "grad_norm": 1.6870267391204834, "learning_rate": 2.6557949658931402e-06, "loss": 0.4078, "step": 17267 }, { "epoch": 2.3091735758224123, "grad_norm": 1.964639663696289, "learning_rate": 2.6548151792260647e-06, "loss": 0.451, "step": 17268 }, { "epoch": 2.3093073014174914, "grad_norm": 1.4153599739074707, "learning_rate": 2.653835545662333e-06, "loss": 0.3788, "step": 17269 }, { "epoch": 2.30944102701257, "grad_norm": 1.5784382820129395, "learning_rate": 2.6528560652223756e-06, "loss": 0.357, "step": 17270 }, { "epoch": 2.309574752607649, "grad_norm": 1.3733164072036743, "learning_rate": 2.651876737926601e-06, "loss": 0.3555, "step": 17271 }, { "epoch": 2.309708478202728, "grad_norm": 1.3948289155960083, "learning_rate": 2.6508975637954224e-06, "loss": 0.3954, "step": 17272 }, { "epoch": 2.309842203797807, "grad_norm": 1.6088441610336304, "learning_rate": 2.6499185428492534e-06, "loss": 0.3765, "step": 17273 }, { "epoch": 2.309975929392886, "grad_norm": 1.3912757635116577, "learning_rate": 2.6489396751084983e-06, "loss": 0.3559, "step": 17274 }, { "epoch": 2.310109654987965, "grad_norm": 1.437946081161499, "learning_rate": 2.647960960593562e-06, "loss": 0.3927, "step": 17275 }, { "epoch": 2.3102433805830436, "grad_norm": 1.6099984645843506, "learning_rate": 2.6469823993248444e-06, "loss": 0.3702, "step": 17276 }, { "epoch": 2.3103771061781226, "grad_norm": 1.662084937095642, "learning_rate": 2.646003991322742e-06, "loss": 0.3641, "step": 17277 }, { "epoch": 2.3105108317732013, "grad_norm": 1.3653630018234253, "learning_rate": 2.6450257366076494e-06, "loss": 0.3421, "step": 17278 }, { "epoch": 2.3106445573682803, "grad_norm": 1.5453976392745972, "learning_rate": 2.644047635199958e-06, "loss": 0.3257, "step": 17279 }, { "epoch": 2.3107782829633594, "grad_norm": 1.4139389991760254, "learning_rate": 2.6430696871200546e-06, "loss": 0.3879, "step": 17280 }, { "epoch": 2.310912008558438, "grad_norm": 1.6965724229812622, "learning_rate": 2.642091892388323e-06, "loss": 0.4165, "step": 17281 }, { "epoch": 2.311045734153517, "grad_norm": 1.4413119554519653, "learning_rate": 2.64111425102515e-06, "loss": 0.3115, "step": 17282 }, { "epoch": 2.3111794597485957, "grad_norm": 1.5952370166778564, "learning_rate": 2.640136763050901e-06, "loss": 0.3623, "step": 17283 }, { "epoch": 2.311313185343675, "grad_norm": 1.43675696849823, "learning_rate": 2.639159428485962e-06, "loss": 0.3826, "step": 17284 }, { "epoch": 2.311446910938754, "grad_norm": 1.7120444774627686, "learning_rate": 2.6381822473507014e-06, "loss": 0.3921, "step": 17285 }, { "epoch": 2.3115806365338325, "grad_norm": 1.4572577476501465, "learning_rate": 2.637205219665486e-06, "loss": 0.3505, "step": 17286 }, { "epoch": 2.3117143621289116, "grad_norm": 1.5276273488998413, "learning_rate": 2.6362283454506877e-06, "loss": 0.3362, "step": 17287 }, { "epoch": 2.31184808772399, "grad_norm": 1.5447605848312378, "learning_rate": 2.635251624726656e-06, "loss": 0.3949, "step": 17288 }, { "epoch": 2.3119818133190693, "grad_norm": 1.6203325986862183, "learning_rate": 2.6342750575137623e-06, "loss": 0.4039, "step": 17289 }, { "epoch": 2.3121155389141483, "grad_norm": 1.449604868888855, "learning_rate": 2.633298643832355e-06, "loss": 0.4081, "step": 17290 }, { "epoch": 2.312249264509227, "grad_norm": 1.6105260848999023, "learning_rate": 2.6323223837027876e-06, "loss": 0.4135, "step": 17291 }, { "epoch": 2.312382990104306, "grad_norm": 1.4969819784164429, "learning_rate": 2.6313462771454103e-06, "loss": 0.3325, "step": 17292 }, { "epoch": 2.3125167156993847, "grad_norm": 1.3541268110275269, "learning_rate": 2.6303703241805656e-06, "loss": 0.3699, "step": 17293 }, { "epoch": 2.3126504412944637, "grad_norm": 1.722935438156128, "learning_rate": 2.6293945248286047e-06, "loss": 0.3968, "step": 17294 }, { "epoch": 2.312784166889543, "grad_norm": 1.579074740409851, "learning_rate": 2.62841887910986e-06, "loss": 0.4123, "step": 17295 }, { "epoch": 2.3129178924846214, "grad_norm": 1.4995322227478027, "learning_rate": 2.6274433870446704e-06, "loss": 0.3655, "step": 17296 }, { "epoch": 2.3130516180797005, "grad_norm": 1.4410686492919922, "learning_rate": 2.6264680486533677e-06, "loss": 0.3953, "step": 17297 }, { "epoch": 2.313185343674779, "grad_norm": 1.307084560394287, "learning_rate": 2.6254928639562826e-06, "loss": 0.3558, "step": 17298 }, { "epoch": 2.313319069269858, "grad_norm": 1.5430079698562622, "learning_rate": 2.624517832973743e-06, "loss": 0.3798, "step": 17299 }, { "epoch": 2.3134527948649373, "grad_norm": 1.6198211908340454, "learning_rate": 2.6235429557260716e-06, "loss": 0.3956, "step": 17300 }, { "epoch": 2.313586520460016, "grad_norm": 1.4721360206604004, "learning_rate": 2.6225682322335876e-06, "loss": 0.3706, "step": 17301 }, { "epoch": 2.313720246055095, "grad_norm": 1.5926934480667114, "learning_rate": 2.6215936625166106e-06, "loss": 0.3769, "step": 17302 }, { "epoch": 2.313853971650174, "grad_norm": 1.701040267944336, "learning_rate": 2.620619246595453e-06, "loss": 0.4516, "step": 17303 }, { "epoch": 2.3139876972452527, "grad_norm": 1.6770532131195068, "learning_rate": 2.6196449844904257e-06, "loss": 0.4197, "step": 17304 }, { "epoch": 2.3141214228403317, "grad_norm": 1.6526716947555542, "learning_rate": 2.6186708762218373e-06, "loss": 0.4208, "step": 17305 }, { "epoch": 2.3142551484354104, "grad_norm": 1.3465416431427002, "learning_rate": 2.6176969218099936e-06, "loss": 0.323, "step": 17306 }, { "epoch": 2.3143888740304894, "grad_norm": 1.4111219644546509, "learning_rate": 2.6167231212751864e-06, "loss": 0.3783, "step": 17307 }, { "epoch": 2.3145225996255685, "grad_norm": 1.658780574798584, "learning_rate": 2.6157494746377276e-06, "loss": 0.3872, "step": 17308 }, { "epoch": 2.314656325220647, "grad_norm": 1.5630604028701782, "learning_rate": 2.6147759819179e-06, "loss": 0.3953, "step": 17309 }, { "epoch": 2.314790050815726, "grad_norm": 1.5395259857177734, "learning_rate": 2.613802643136002e-06, "loss": 0.3471, "step": 17310 }, { "epoch": 2.3149237764108053, "grad_norm": 1.3887840509414673, "learning_rate": 2.6128294583123236e-06, "loss": 0.4009, "step": 17311 }, { "epoch": 2.315057502005884, "grad_norm": 1.3718063831329346, "learning_rate": 2.61185642746714e-06, "loss": 0.3525, "step": 17312 }, { "epoch": 2.315191227600963, "grad_norm": 1.4196175336837769, "learning_rate": 2.6108835506207465e-06, "loss": 0.3757, "step": 17313 }, { "epoch": 2.3153249531960416, "grad_norm": 1.5935378074645996, "learning_rate": 2.6099108277934105e-06, "loss": 0.4224, "step": 17314 }, { "epoch": 2.3154586787911207, "grad_norm": 1.4283748865127563, "learning_rate": 2.6089382590054122e-06, "loss": 0.3574, "step": 17315 }, { "epoch": 2.3155924043861997, "grad_norm": 1.3659788370132446, "learning_rate": 2.607965844277024e-06, "loss": 0.3669, "step": 17316 }, { "epoch": 2.3157261299812784, "grad_norm": 1.509053111076355, "learning_rate": 2.606993583628513e-06, "loss": 0.3626, "step": 17317 }, { "epoch": 2.3158598555763574, "grad_norm": 1.476450800895691, "learning_rate": 2.606021477080147e-06, "loss": 0.3718, "step": 17318 }, { "epoch": 2.315993581171436, "grad_norm": 1.5358840227127075, "learning_rate": 2.605049524652189e-06, "loss": 0.3752, "step": 17319 }, { "epoch": 2.316127306766515, "grad_norm": 1.5286520719528198, "learning_rate": 2.6040777263648964e-06, "loss": 0.3665, "step": 17320 }, { "epoch": 2.316261032361594, "grad_norm": 1.4930964708328247, "learning_rate": 2.603106082238527e-06, "loss": 0.3717, "step": 17321 }, { "epoch": 2.316394757956673, "grad_norm": 1.5236842632293701, "learning_rate": 2.6021345922933328e-06, "loss": 0.384, "step": 17322 }, { "epoch": 2.316528483551752, "grad_norm": 1.575071096420288, "learning_rate": 2.6011632565495646e-06, "loss": 0.3714, "step": 17323 }, { "epoch": 2.3166622091468305, "grad_norm": 1.4129579067230225, "learning_rate": 2.600192075027468e-06, "loss": 0.3595, "step": 17324 }, { "epoch": 2.3167959347419096, "grad_norm": 1.6925137042999268, "learning_rate": 2.5992210477472866e-06, "loss": 0.3785, "step": 17325 }, { "epoch": 2.3169296603369887, "grad_norm": 1.6678059101104736, "learning_rate": 2.598250174729261e-06, "loss": 0.4124, "step": 17326 }, { "epoch": 2.3170633859320673, "grad_norm": 1.4436016082763672, "learning_rate": 2.597279455993631e-06, "loss": 0.3724, "step": 17327 }, { "epoch": 2.3171971115271464, "grad_norm": 1.6795214414596558, "learning_rate": 2.5963088915606204e-06, "loss": 0.4053, "step": 17328 }, { "epoch": 2.317330837122225, "grad_norm": 1.7218916416168213, "learning_rate": 2.59533848145047e-06, "loss": 0.3873, "step": 17329 }, { "epoch": 2.317464562717304, "grad_norm": 1.385292410850525, "learning_rate": 2.594368225683407e-06, "loss": 0.3536, "step": 17330 }, { "epoch": 2.317598288312383, "grad_norm": 1.4175090789794922, "learning_rate": 2.5933981242796445e-06, "loss": 0.3684, "step": 17331 }, { "epoch": 2.3177320139074618, "grad_norm": 1.4795702695846558, "learning_rate": 2.5924281772594174e-06, "loss": 0.3959, "step": 17332 }, { "epoch": 2.317865739502541, "grad_norm": 1.567068338394165, "learning_rate": 2.591458384642931e-06, "loss": 0.4085, "step": 17333 }, { "epoch": 2.3179994650976194, "grad_norm": 1.549087643623352, "learning_rate": 2.5904887464504115e-06, "loss": 0.3724, "step": 17334 }, { "epoch": 2.3181331906926985, "grad_norm": 1.4080950021743774, "learning_rate": 2.5895192627020604e-06, "loss": 0.4163, "step": 17335 }, { "epoch": 2.3182669162877776, "grad_norm": 1.4772248268127441, "learning_rate": 2.5885499334180887e-06, "loss": 0.4135, "step": 17336 }, { "epoch": 2.318400641882856, "grad_norm": 1.6864362955093384, "learning_rate": 2.587580758618703e-06, "loss": 0.4347, "step": 17337 }, { "epoch": 2.3185343674779353, "grad_norm": 1.459546685218811, "learning_rate": 2.5866117383240997e-06, "loss": 0.353, "step": 17338 }, { "epoch": 2.3186680930730144, "grad_norm": 1.487243890762329, "learning_rate": 2.5856428725544868e-06, "loss": 0.371, "step": 17339 }, { "epoch": 2.318801818668093, "grad_norm": 1.519048810005188, "learning_rate": 2.584674161330051e-06, "loss": 0.368, "step": 17340 }, { "epoch": 2.318935544263172, "grad_norm": 1.3728705644607544, "learning_rate": 2.583705604670985e-06, "loss": 0.3849, "step": 17341 }, { "epoch": 2.3190692698582507, "grad_norm": 1.6111341714859009, "learning_rate": 2.5827372025974804e-06, "loss": 0.4151, "step": 17342 }, { "epoch": 2.3192029954533298, "grad_norm": 1.4583547115325928, "learning_rate": 2.581768955129722e-06, "loss": 0.3496, "step": 17343 }, { "epoch": 2.319336721048409, "grad_norm": 1.3331198692321777, "learning_rate": 2.58080086228789e-06, "loss": 0.3686, "step": 17344 }, { "epoch": 2.3194704466434874, "grad_norm": 1.408850908279419, "learning_rate": 2.579832924092165e-06, "loss": 0.3727, "step": 17345 }, { "epoch": 2.3196041722385665, "grad_norm": 1.5345115661621094, "learning_rate": 2.578865140562722e-06, "loss": 0.3468, "step": 17346 }, { "epoch": 2.3197378978336456, "grad_norm": 1.5069886445999146, "learning_rate": 2.577897511719735e-06, "loss": 0.3853, "step": 17347 }, { "epoch": 2.319871623428724, "grad_norm": 1.452288269996643, "learning_rate": 2.5769300375833705e-06, "loss": 0.3782, "step": 17348 }, { "epoch": 2.3200053490238033, "grad_norm": 1.5234006643295288, "learning_rate": 2.5759627181737977e-06, "loss": 0.4271, "step": 17349 }, { "epoch": 2.320139074618882, "grad_norm": 1.442626953125, "learning_rate": 2.574995553511177e-06, "loss": 0.3326, "step": 17350 }, { "epoch": 2.320272800213961, "grad_norm": 1.3990323543548584, "learning_rate": 2.5740285436156732e-06, "loss": 0.352, "step": 17351 }, { "epoch": 2.32040652580904, "grad_norm": 1.4189847707748413, "learning_rate": 2.573061688507431e-06, "loss": 0.3578, "step": 17352 }, { "epoch": 2.3205402514041187, "grad_norm": 1.3224866390228271, "learning_rate": 2.5720949882066184e-06, "loss": 0.3613, "step": 17353 }, { "epoch": 2.3206739769991978, "grad_norm": 1.6355680227279663, "learning_rate": 2.5711284427333716e-06, "loss": 0.3903, "step": 17354 }, { "epoch": 2.3208077025942764, "grad_norm": 1.4673792123794556, "learning_rate": 2.5701620521078497e-06, "loss": 0.3676, "step": 17355 }, { "epoch": 2.3209414281893554, "grad_norm": 1.5565831661224365, "learning_rate": 2.5691958163501875e-06, "loss": 0.3669, "step": 17356 }, { "epoch": 2.3210751537844345, "grad_norm": 1.6876822710037231, "learning_rate": 2.568229735480524e-06, "loss": 0.3789, "step": 17357 }, { "epoch": 2.321208879379513, "grad_norm": 1.5254268646240234, "learning_rate": 2.567263809519007e-06, "loss": 0.339, "step": 17358 }, { "epoch": 2.321342604974592, "grad_norm": 1.4397848844528198, "learning_rate": 2.5662980384857605e-06, "loss": 0.3452, "step": 17359 }, { "epoch": 2.321476330569671, "grad_norm": 1.6071133613586426, "learning_rate": 2.5653324224009192e-06, "loss": 0.3539, "step": 17360 }, { "epoch": 2.32161005616475, "grad_norm": 1.607040286064148, "learning_rate": 2.564366961284608e-06, "loss": 0.3885, "step": 17361 }, { "epoch": 2.321743781759829, "grad_norm": 1.6239193677902222, "learning_rate": 2.563401655156952e-06, "loss": 0.4151, "step": 17362 }, { "epoch": 2.3218775073549076, "grad_norm": 1.3694560527801514, "learning_rate": 2.562436504038074e-06, "loss": 0.3529, "step": 17363 }, { "epoch": 2.3220112329499867, "grad_norm": 1.5949527025222778, "learning_rate": 2.561471507948089e-06, "loss": 0.3779, "step": 17364 }, { "epoch": 2.3221449585450653, "grad_norm": 1.5379475355148315, "learning_rate": 2.5605066669071123e-06, "loss": 0.3697, "step": 17365 }, { "epoch": 2.3222786841401444, "grad_norm": 1.5675251483917236, "learning_rate": 2.559541980935256e-06, "loss": 0.3417, "step": 17366 }, { "epoch": 2.3224124097352234, "grad_norm": 1.6319044828414917, "learning_rate": 2.558577450052627e-06, "loss": 0.3592, "step": 17367 }, { "epoch": 2.322546135330302, "grad_norm": 1.318451166152954, "learning_rate": 2.5576130742793304e-06, "loss": 0.3307, "step": 17368 }, { "epoch": 2.322679860925381, "grad_norm": 1.297761082649231, "learning_rate": 2.5566488536354673e-06, "loss": 0.3111, "step": 17369 }, { "epoch": 2.3228135865204598, "grad_norm": 1.619277834892273, "learning_rate": 2.555684788141137e-06, "loss": 0.2983, "step": 17370 }, { "epoch": 2.322947312115539, "grad_norm": 1.3799242973327637, "learning_rate": 2.5547208778164336e-06, "loss": 0.3385, "step": 17371 }, { "epoch": 2.323081037710618, "grad_norm": 1.5364391803741455, "learning_rate": 2.5537571226814517e-06, "loss": 0.3603, "step": 17372 }, { "epoch": 2.3232147633056965, "grad_norm": 1.5701355934143066, "learning_rate": 2.5527935227562716e-06, "loss": 0.3675, "step": 17373 }, { "epoch": 2.3233484889007756, "grad_norm": 1.7673043012619019, "learning_rate": 2.5518300780609905e-06, "loss": 0.4049, "step": 17374 }, { "epoch": 2.3234822144958547, "grad_norm": 1.6083894968032837, "learning_rate": 2.5508667886156814e-06, "loss": 0.3571, "step": 17375 }, { "epoch": 2.3236159400909333, "grad_norm": 1.6863664388656616, "learning_rate": 2.549903654440423e-06, "loss": 0.4013, "step": 17376 }, { "epoch": 2.3237496656860124, "grad_norm": 1.5348397493362427, "learning_rate": 2.5489406755553005e-06, "loss": 0.34, "step": 17377 }, { "epoch": 2.3238833912810914, "grad_norm": 1.376990556716919, "learning_rate": 2.547977851980373e-06, "loss": 0.3528, "step": 17378 }, { "epoch": 2.32401711687617, "grad_norm": 1.4662714004516602, "learning_rate": 2.5470151837357227e-06, "loss": 0.3391, "step": 17379 }, { "epoch": 2.324150842471249, "grad_norm": 1.522168755531311, "learning_rate": 2.546052670841406e-06, "loss": 0.377, "step": 17380 }, { "epoch": 2.3242845680663278, "grad_norm": 1.5200400352478027, "learning_rate": 2.5450903133174878e-06, "loss": 0.3398, "step": 17381 }, { "epoch": 2.324418293661407, "grad_norm": 1.5238368511199951, "learning_rate": 2.54412811118403e-06, "loss": 0.3516, "step": 17382 }, { "epoch": 2.324552019256486, "grad_norm": 1.58733069896698, "learning_rate": 2.5431660644610856e-06, "loss": 0.3385, "step": 17383 }, { "epoch": 2.3246857448515645, "grad_norm": 1.3430794477462769, "learning_rate": 2.542204173168711e-06, "loss": 0.3129, "step": 17384 }, { "epoch": 2.3248194704466436, "grad_norm": 1.43308687210083, "learning_rate": 2.541242437326953e-06, "loss": 0.3484, "step": 17385 }, { "epoch": 2.3249531960417222, "grad_norm": 1.5380980968475342, "learning_rate": 2.540280856955859e-06, "loss": 0.3646, "step": 17386 }, { "epoch": 2.3250869216368013, "grad_norm": 1.5723624229431152, "learning_rate": 2.539319432075472e-06, "loss": 0.3502, "step": 17387 }, { "epoch": 2.3252206472318804, "grad_norm": 1.6219971179962158, "learning_rate": 2.538358162705834e-06, "loss": 0.3596, "step": 17388 }, { "epoch": 2.325354372826959, "grad_norm": 1.5307294130325317, "learning_rate": 2.5373970488669784e-06, "loss": 0.3404, "step": 17389 }, { "epoch": 2.325488098422038, "grad_norm": 1.7119563817977905, "learning_rate": 2.536436090578941e-06, "loss": 0.4327, "step": 17390 }, { "epoch": 2.3256218240171167, "grad_norm": 1.5795284509658813, "learning_rate": 2.535475287861755e-06, "loss": 0.3429, "step": 17391 }, { "epoch": 2.3257555496121958, "grad_norm": 1.4756495952606201, "learning_rate": 2.534514640735437e-06, "loss": 0.3267, "step": 17392 }, { "epoch": 2.325889275207275, "grad_norm": 1.5753260850906372, "learning_rate": 2.533554149220024e-06, "loss": 0.3654, "step": 17393 }, { "epoch": 2.3260230008023535, "grad_norm": 1.6040136814117432, "learning_rate": 2.532593813335524e-06, "loss": 0.3742, "step": 17394 }, { "epoch": 2.3261567263974325, "grad_norm": 1.6881283521652222, "learning_rate": 2.531633633101964e-06, "loss": 0.3991, "step": 17395 }, { "epoch": 2.326290451992511, "grad_norm": 1.508731722831726, "learning_rate": 2.530673608539357e-06, "loss": 0.3627, "step": 17396 }, { "epoch": 2.3264241775875902, "grad_norm": 1.5773926973342896, "learning_rate": 2.529713739667705e-06, "loss": 0.3423, "step": 17397 }, { "epoch": 2.3265579031826693, "grad_norm": 1.667004942893982, "learning_rate": 2.5287540265070277e-06, "loss": 0.3494, "step": 17398 }, { "epoch": 2.326691628777748, "grad_norm": 1.5701279640197754, "learning_rate": 2.5277944690773213e-06, "loss": 0.4043, "step": 17399 }, { "epoch": 2.326825354372827, "grad_norm": 1.41645085811615, "learning_rate": 2.5268350673985887e-06, "loss": 0.3364, "step": 17400 }, { "epoch": 2.3269590799679056, "grad_norm": 1.483176827430725, "learning_rate": 2.5258758214908273e-06, "loss": 0.3774, "step": 17401 }, { "epoch": 2.3270928055629847, "grad_norm": 1.4493509531021118, "learning_rate": 2.5249167313740307e-06, "loss": 0.3623, "step": 17402 }, { "epoch": 2.3272265311580638, "grad_norm": 1.496437668800354, "learning_rate": 2.523957797068197e-06, "loss": 0.3894, "step": 17403 }, { "epoch": 2.3273602567531424, "grad_norm": 1.7576886415481567, "learning_rate": 2.5229990185933075e-06, "loss": 0.439, "step": 17404 }, { "epoch": 2.3274939823482215, "grad_norm": 1.4831782579421997, "learning_rate": 2.5220403959693473e-06, "loss": 0.3662, "step": 17405 }, { "epoch": 2.3276277079433005, "grad_norm": 1.5239614248275757, "learning_rate": 2.5210819292163003e-06, "loss": 0.3381, "step": 17406 }, { "epoch": 2.327761433538379, "grad_norm": 1.4576964378356934, "learning_rate": 2.5201236183541433e-06, "loss": 0.3383, "step": 17407 }, { "epoch": 2.3278951591334582, "grad_norm": 1.570426106452942, "learning_rate": 2.519165463402853e-06, "loss": 0.3873, "step": 17408 }, { "epoch": 2.328028884728537, "grad_norm": 1.5089309215545654, "learning_rate": 2.5182074643823996e-06, "loss": 0.3694, "step": 17409 }, { "epoch": 2.328162610323616, "grad_norm": 1.7286098003387451, "learning_rate": 2.517249621312752e-06, "loss": 0.4302, "step": 17410 }, { "epoch": 2.328296335918695, "grad_norm": 1.3437933921813965, "learning_rate": 2.516291934213876e-06, "loss": 0.3413, "step": 17411 }, { "epoch": 2.3284300615137736, "grad_norm": 1.4931237697601318, "learning_rate": 2.5153344031057337e-06, "loss": 0.377, "step": 17412 }, { "epoch": 2.3285637871088527, "grad_norm": 1.443722128868103, "learning_rate": 2.5143770280082837e-06, "loss": 0.3786, "step": 17413 }, { "epoch": 2.3286975127039318, "grad_norm": 1.553514838218689, "learning_rate": 2.513419808941482e-06, "loss": 0.3514, "step": 17414 }, { "epoch": 2.3288312382990104, "grad_norm": 1.6258881092071533, "learning_rate": 2.5124627459252826e-06, "loss": 0.3721, "step": 17415 }, { "epoch": 2.3289649638940895, "grad_norm": 1.626522421836853, "learning_rate": 2.5115058389796264e-06, "loss": 0.3513, "step": 17416 }, { "epoch": 2.329098689489168, "grad_norm": 1.4538726806640625, "learning_rate": 2.510549088124472e-06, "loss": 0.3595, "step": 17417 }, { "epoch": 2.329232415084247, "grad_norm": 1.4378339052200317, "learning_rate": 2.509592493379749e-06, "loss": 0.3397, "step": 17418 }, { "epoch": 2.3293661406793262, "grad_norm": 1.31521475315094, "learning_rate": 2.5086360547654088e-06, "loss": 0.306, "step": 17419 }, { "epoch": 2.329499866274405, "grad_norm": 1.5718178749084473, "learning_rate": 2.507679772301379e-06, "loss": 0.3651, "step": 17420 }, { "epoch": 2.329633591869484, "grad_norm": 1.7533091306686401, "learning_rate": 2.5067236460075916e-06, "loss": 0.4231, "step": 17421 }, { "epoch": 2.3297673174645626, "grad_norm": 1.6867796182632446, "learning_rate": 2.505767675903985e-06, "loss": 0.3965, "step": 17422 }, { "epoch": 2.3299010430596416, "grad_norm": 1.5020016431808472, "learning_rate": 2.5048118620104754e-06, "loss": 0.3993, "step": 17423 }, { "epoch": 2.3300347686547207, "grad_norm": 1.4248294830322266, "learning_rate": 2.503856204346995e-06, "loss": 0.3557, "step": 17424 }, { "epoch": 2.3301684942497993, "grad_norm": 1.6056840419769287, "learning_rate": 2.5029007029334574e-06, "loss": 0.3938, "step": 17425 }, { "epoch": 2.3303022198448784, "grad_norm": 1.6165626049041748, "learning_rate": 2.501945357789779e-06, "loss": 0.3733, "step": 17426 }, { "epoch": 2.330435945439957, "grad_norm": 1.730675458908081, "learning_rate": 2.5009901689358763e-06, "loss": 0.3617, "step": 17427 }, { "epoch": 2.330569671035036, "grad_norm": 1.538291573524475, "learning_rate": 2.5000351363916564e-06, "loss": 0.344, "step": 17428 }, { "epoch": 2.330703396630115, "grad_norm": 1.405634880065918, "learning_rate": 2.499080260177028e-06, "loss": 0.3671, "step": 17429 }, { "epoch": 2.330837122225194, "grad_norm": 1.5386171340942383, "learning_rate": 2.4981255403118942e-06, "loss": 0.3292, "step": 17430 }, { "epoch": 2.330970847820273, "grad_norm": 1.6686047315597534, "learning_rate": 2.497170976816156e-06, "loss": 0.3713, "step": 17431 }, { "epoch": 2.3311045734153515, "grad_norm": 1.7270348072052002, "learning_rate": 2.4962165697097075e-06, "loss": 0.3855, "step": 17432 }, { "epoch": 2.3312382990104306, "grad_norm": 1.5836387872695923, "learning_rate": 2.495262319012445e-06, "loss": 0.3959, "step": 17433 }, { "epoch": 2.3313720246055096, "grad_norm": 1.5012779235839844, "learning_rate": 2.4943082247442584e-06, "loss": 0.3392, "step": 17434 }, { "epoch": 2.3315057502005883, "grad_norm": 1.649025321006775, "learning_rate": 2.493354286925035e-06, "loss": 0.4016, "step": 17435 }, { "epoch": 2.3316394757956673, "grad_norm": 1.3496390581130981, "learning_rate": 2.4924005055746603e-06, "loss": 0.3365, "step": 17436 }, { "epoch": 2.331773201390746, "grad_norm": 1.4612303972244263, "learning_rate": 2.4914468807130076e-06, "loss": 0.3335, "step": 17437 }, { "epoch": 2.331906926985825, "grad_norm": 1.624683141708374, "learning_rate": 2.4904934123599657e-06, "loss": 0.3241, "step": 17438 }, { "epoch": 2.332040652580904, "grad_norm": 1.5036067962646484, "learning_rate": 2.489540100535397e-06, "loss": 0.3434, "step": 17439 }, { "epoch": 2.3321743781759827, "grad_norm": 1.6572527885437012, "learning_rate": 2.4885869452591817e-06, "loss": 0.387, "step": 17440 }, { "epoch": 2.332308103771062, "grad_norm": 1.388824701309204, "learning_rate": 2.4876339465511857e-06, "loss": 0.3879, "step": 17441 }, { "epoch": 2.332441829366141, "grad_norm": 1.5635526180267334, "learning_rate": 2.4866811044312667e-06, "loss": 0.3748, "step": 17442 }, { "epoch": 2.3325755549612195, "grad_norm": 1.5039589405059814, "learning_rate": 2.4857284189192956e-06, "loss": 0.4148, "step": 17443 }, { "epoch": 2.3327092805562986, "grad_norm": 1.4200513362884521, "learning_rate": 2.4847758900351226e-06, "loss": 0.3494, "step": 17444 }, { "epoch": 2.332843006151377, "grad_norm": 1.5599287748336792, "learning_rate": 2.4838235177986046e-06, "loss": 0.3536, "step": 17445 }, { "epoch": 2.3329767317464563, "grad_norm": 1.4237825870513916, "learning_rate": 2.4828713022295936e-06, "loss": 0.3501, "step": 17446 }, { "epoch": 2.3331104573415353, "grad_norm": 1.455592155456543, "learning_rate": 2.4819192433479344e-06, "loss": 0.3555, "step": 17447 }, { "epoch": 2.333244182936614, "grad_norm": 1.4499908685684204, "learning_rate": 2.4809673411734805e-06, "loss": 0.3917, "step": 17448 }, { "epoch": 2.333377908531693, "grad_norm": 1.3951843976974487, "learning_rate": 2.4800155957260643e-06, "loss": 0.3358, "step": 17449 }, { "epoch": 2.333511634126772, "grad_norm": 1.8905631303787231, "learning_rate": 2.4790640070255267e-06, "loss": 0.4297, "step": 17450 }, { "epoch": 2.3336453597218507, "grad_norm": 1.5401147603988647, "learning_rate": 2.4781125750917036e-06, "loss": 0.3962, "step": 17451 }, { "epoch": 2.33377908531693, "grad_norm": 1.3732661008834839, "learning_rate": 2.477161299944426e-06, "loss": 0.3055, "step": 17452 }, { "epoch": 2.3339128109120084, "grad_norm": 1.3883804082870483, "learning_rate": 2.476210181603522e-06, "loss": 0.3277, "step": 17453 }, { "epoch": 2.3340465365070875, "grad_norm": 1.5273683071136475, "learning_rate": 2.4752592200888183e-06, "loss": 0.4022, "step": 17454 }, { "epoch": 2.3341802621021666, "grad_norm": 1.4578170776367188, "learning_rate": 2.474308415420136e-06, "loss": 0.3448, "step": 17455 }, { "epoch": 2.334313987697245, "grad_norm": 1.3809643983840942, "learning_rate": 2.4733577676172927e-06, "loss": 0.3651, "step": 17456 }, { "epoch": 2.3344477132923243, "grad_norm": 1.4942042827606201, "learning_rate": 2.4724072767001074e-06, "loss": 0.3591, "step": 17457 }, { "epoch": 2.334581438887403, "grad_norm": 1.4241713285446167, "learning_rate": 2.471456942688384e-06, "loss": 0.3398, "step": 17458 }, { "epoch": 2.334715164482482, "grad_norm": 1.5271642208099365, "learning_rate": 2.4705067656019386e-06, "loss": 0.3442, "step": 17459 }, { "epoch": 2.334848890077561, "grad_norm": 1.6252468824386597, "learning_rate": 2.4695567454605785e-06, "loss": 0.3968, "step": 17460 }, { "epoch": 2.3349826156726396, "grad_norm": 1.5473562479019165, "learning_rate": 2.468606882284096e-06, "loss": 0.3754, "step": 17461 }, { "epoch": 2.3351163412677187, "grad_norm": 1.491461157798767, "learning_rate": 2.467657176092302e-06, "loss": 0.3582, "step": 17462 }, { "epoch": 2.3352500668627973, "grad_norm": 1.4178305864334106, "learning_rate": 2.4667076269049805e-06, "loss": 0.3944, "step": 17463 }, { "epoch": 2.3353837924578764, "grad_norm": 1.3842869997024536, "learning_rate": 2.465758234741936e-06, "loss": 0.3278, "step": 17464 }, { "epoch": 2.3355175180529555, "grad_norm": 1.6831704378128052, "learning_rate": 2.4648089996229485e-06, "loss": 0.4049, "step": 17465 }, { "epoch": 2.335651243648034, "grad_norm": 1.4243803024291992, "learning_rate": 2.463859921567805e-06, "loss": 0.3538, "step": 17466 }, { "epoch": 2.335784969243113, "grad_norm": 1.5287054777145386, "learning_rate": 2.4629110005962954e-06, "loss": 0.3754, "step": 17467 }, { "epoch": 2.335918694838192, "grad_norm": 1.5389456748962402, "learning_rate": 2.4619622367281905e-06, "loss": 0.3491, "step": 17468 }, { "epoch": 2.336052420433271, "grad_norm": 1.6213796138763428, "learning_rate": 2.4610136299832697e-06, "loss": 0.3921, "step": 17469 }, { "epoch": 2.33618614602835, "grad_norm": 1.5653772354125977, "learning_rate": 2.4600651803813057e-06, "loss": 0.3447, "step": 17470 }, { "epoch": 2.3363198716234286, "grad_norm": 1.449273705482483, "learning_rate": 2.459116887942069e-06, "loss": 0.3518, "step": 17471 }, { "epoch": 2.3364535972185076, "grad_norm": 1.8648433685302734, "learning_rate": 2.4581687526853235e-06, "loss": 0.3961, "step": 17472 }, { "epoch": 2.3365873228135863, "grad_norm": 1.5202324390411377, "learning_rate": 2.457220774630835e-06, "loss": 0.3725, "step": 17473 }, { "epoch": 2.3367210484086653, "grad_norm": 1.4705729484558105, "learning_rate": 2.456272953798361e-06, "loss": 0.3641, "step": 17474 }, { "epoch": 2.3368547740037444, "grad_norm": 1.4698258638381958, "learning_rate": 2.4553252902076595e-06, "loss": 0.3726, "step": 17475 }, { "epoch": 2.336988499598823, "grad_norm": 1.645731806755066, "learning_rate": 2.4543777838784855e-06, "loss": 0.4108, "step": 17476 }, { "epoch": 2.337122225193902, "grad_norm": 1.755331039428711, "learning_rate": 2.4534304348305795e-06, "loss": 0.3869, "step": 17477 }, { "epoch": 2.337255950788981, "grad_norm": 1.6362115144729614, "learning_rate": 2.452483243083699e-06, "loss": 0.4211, "step": 17478 }, { "epoch": 2.33738967638406, "grad_norm": 1.6000616550445557, "learning_rate": 2.4515362086575824e-06, "loss": 0.41, "step": 17479 }, { "epoch": 2.337523401979139, "grad_norm": 1.4886176586151123, "learning_rate": 2.45058933157197e-06, "loss": 0.3665, "step": 17480 }, { "epoch": 2.337657127574218, "grad_norm": 1.780479907989502, "learning_rate": 2.449642611846602e-06, "loss": 0.3826, "step": 17481 }, { "epoch": 2.3377908531692966, "grad_norm": 1.5574089288711548, "learning_rate": 2.4486960495012037e-06, "loss": 0.352, "step": 17482 }, { "epoch": 2.3379245787643756, "grad_norm": 1.6260024309158325, "learning_rate": 2.447749644555516e-06, "loss": 0.4245, "step": 17483 }, { "epoch": 2.3380583043594543, "grad_norm": 1.5757054090499878, "learning_rate": 2.446803397029257e-06, "loss": 0.3281, "step": 17484 }, { "epoch": 2.3381920299545333, "grad_norm": 1.5461342334747314, "learning_rate": 2.445857306942151e-06, "loss": 0.3612, "step": 17485 }, { "epoch": 2.3383257555496124, "grad_norm": 1.5894041061401367, "learning_rate": 2.444911374313926e-06, "loss": 0.3776, "step": 17486 }, { "epoch": 2.338459481144691, "grad_norm": 1.483266830444336, "learning_rate": 2.4439655991642897e-06, "loss": 0.3763, "step": 17487 }, { "epoch": 2.33859320673977, "grad_norm": 1.5486618280410767, "learning_rate": 2.443019981512964e-06, "loss": 0.4308, "step": 17488 }, { "epoch": 2.3387269323348487, "grad_norm": 1.3916655778884888, "learning_rate": 2.442074521379654e-06, "loss": 0.3087, "step": 17489 }, { "epoch": 2.338860657929928, "grad_norm": 1.835715889930725, "learning_rate": 2.4411292187840685e-06, "loss": 0.4419, "step": 17490 }, { "epoch": 2.338994383525007, "grad_norm": 1.6722612380981445, "learning_rate": 2.4401840737459104e-06, "loss": 0.3871, "step": 17491 }, { "epoch": 2.3391281091200855, "grad_norm": 1.677459478378296, "learning_rate": 2.4392390862848826e-06, "loss": 0.3671, "step": 17492 }, { "epoch": 2.3392618347151646, "grad_norm": 1.6830699443817139, "learning_rate": 2.43829425642068e-06, "loss": 0.4144, "step": 17493 }, { "epoch": 2.339395560310243, "grad_norm": 1.5476511716842651, "learning_rate": 2.4373495841729987e-06, "loss": 0.3736, "step": 17494 }, { "epoch": 2.3395292859053223, "grad_norm": 1.6017037630081177, "learning_rate": 2.4364050695615284e-06, "loss": 0.3506, "step": 17495 }, { "epoch": 2.3396630115004013, "grad_norm": 1.462943434715271, "learning_rate": 2.435460712605956e-06, "loss": 0.3494, "step": 17496 }, { "epoch": 2.33979673709548, "grad_norm": 1.6367404460906982, "learning_rate": 2.4345165133259673e-06, "loss": 0.4114, "step": 17497 }, { "epoch": 2.339930462690559, "grad_norm": 1.4398554563522339, "learning_rate": 2.4335724717412433e-06, "loss": 0.3429, "step": 17498 }, { "epoch": 2.3400641882856377, "grad_norm": 1.4657833576202393, "learning_rate": 2.4326285878714595e-06, "loss": 0.3271, "step": 17499 }, { "epoch": 2.3401979138807167, "grad_norm": 1.2831934690475464, "learning_rate": 2.4316848617362952e-06, "loss": 0.3376, "step": 17500 }, { "epoch": 2.340331639475796, "grad_norm": 1.6171060800552368, "learning_rate": 2.430741293355412e-06, "loss": 0.3608, "step": 17501 }, { "epoch": 2.3404653650708744, "grad_norm": 1.6490013599395752, "learning_rate": 2.4297978827484893e-06, "loss": 0.4112, "step": 17502 }, { "epoch": 2.3405990906659535, "grad_norm": 1.5419689416885376, "learning_rate": 2.42885462993518e-06, "loss": 0.3597, "step": 17503 }, { "epoch": 2.340732816261032, "grad_norm": 1.4847825765609741, "learning_rate": 2.4279115349351546e-06, "loss": 0.3592, "step": 17504 }, { "epoch": 2.340866541856111, "grad_norm": 1.53132164478302, "learning_rate": 2.426968597768069e-06, "loss": 0.3704, "step": 17505 }, { "epoch": 2.3410002674511903, "grad_norm": 1.7561753988265991, "learning_rate": 2.426025818453572e-06, "loss": 0.3668, "step": 17506 }, { "epoch": 2.341133993046269, "grad_norm": 1.456761121749878, "learning_rate": 2.425083197011324e-06, "loss": 0.385, "step": 17507 }, { "epoch": 2.341267718641348, "grad_norm": 1.5619450807571411, "learning_rate": 2.4241407334609634e-06, "loss": 0.4037, "step": 17508 }, { "epoch": 2.341401444236427, "grad_norm": 1.4734280109405518, "learning_rate": 2.4231984278221453e-06, "loss": 0.3403, "step": 17509 }, { "epoch": 2.3415351698315057, "grad_norm": 1.5037074089050293, "learning_rate": 2.4222562801145035e-06, "loss": 0.3531, "step": 17510 }, { "epoch": 2.3416688954265847, "grad_norm": 1.4884778261184692, "learning_rate": 2.421314290357675e-06, "loss": 0.3782, "step": 17511 }, { "epoch": 2.3418026210216634, "grad_norm": 1.478894829750061, "learning_rate": 2.420372458571304e-06, "loss": 0.3674, "step": 17512 }, { "epoch": 2.3419363466167424, "grad_norm": 1.4484221935272217, "learning_rate": 2.419430784775013e-06, "loss": 0.3232, "step": 17513 }, { "epoch": 2.3420700722118215, "grad_norm": 1.373468279838562, "learning_rate": 2.418489268988433e-06, "loss": 0.3663, "step": 17514 }, { "epoch": 2.3422037978069, "grad_norm": 1.6089197397232056, "learning_rate": 2.4175479112311904e-06, "loss": 0.37, "step": 17515 }, { "epoch": 2.342337523401979, "grad_norm": 1.5382609367370605, "learning_rate": 2.4166067115229062e-06, "loss": 0.3965, "step": 17516 }, { "epoch": 2.3424712489970583, "grad_norm": 1.325708031654358, "learning_rate": 2.415665669883198e-06, "loss": 0.365, "step": 17517 }, { "epoch": 2.342604974592137, "grad_norm": 1.5110138654708862, "learning_rate": 2.4147247863316814e-06, "loss": 0.3673, "step": 17518 }, { "epoch": 2.342738700187216, "grad_norm": 1.4987272024154663, "learning_rate": 2.4137840608879682e-06, "loss": 0.4058, "step": 17519 }, { "epoch": 2.3428724257822946, "grad_norm": 1.4979009628295898, "learning_rate": 2.4128434935716673e-06, "loss": 0.3942, "step": 17520 }, { "epoch": 2.3430061513773737, "grad_norm": 1.388946771621704, "learning_rate": 2.411903084402387e-06, "loss": 0.3399, "step": 17521 }, { "epoch": 2.3431398769724527, "grad_norm": 1.5249682664871216, "learning_rate": 2.410962833399719e-06, "loss": 0.3723, "step": 17522 }, { "epoch": 2.3432736025675314, "grad_norm": 1.6147184371948242, "learning_rate": 2.4100227405832734e-06, "loss": 0.4015, "step": 17523 }, { "epoch": 2.3434073281626104, "grad_norm": 1.434480905532837, "learning_rate": 2.409082805972639e-06, "loss": 0.3244, "step": 17524 }, { "epoch": 2.343541053757689, "grad_norm": 1.5144776105880737, "learning_rate": 2.408143029587411e-06, "loss": 0.319, "step": 17525 }, { "epoch": 2.343674779352768, "grad_norm": 1.3578449487686157, "learning_rate": 2.40720341144718e-06, "loss": 0.3393, "step": 17526 }, { "epoch": 2.343808504947847, "grad_norm": 1.6287689208984375, "learning_rate": 2.4062639515715214e-06, "loss": 0.4123, "step": 17527 }, { "epoch": 2.343942230542926, "grad_norm": 1.6070001125335693, "learning_rate": 2.4053246499800307e-06, "loss": 0.3875, "step": 17528 }, { "epoch": 2.344075956138005, "grad_norm": 1.6061204671859741, "learning_rate": 2.4043855066922783e-06, "loss": 0.3831, "step": 17529 }, { "epoch": 2.3442096817330835, "grad_norm": 1.358227252960205, "learning_rate": 2.403446521727838e-06, "loss": 0.3779, "step": 17530 }, { "epoch": 2.3443434073281626, "grad_norm": 1.5503959655761719, "learning_rate": 2.402507695106292e-06, "loss": 0.3562, "step": 17531 }, { "epoch": 2.3444771329232417, "grad_norm": 1.5253574848175049, "learning_rate": 2.401569026847197e-06, "loss": 0.3923, "step": 17532 }, { "epoch": 2.3446108585183203, "grad_norm": 1.517142415046692, "learning_rate": 2.4006305169701306e-06, "loss": 0.3841, "step": 17533 }, { "epoch": 2.3447445841133994, "grad_norm": 1.5566586256027222, "learning_rate": 2.399692165494646e-06, "loss": 0.3989, "step": 17534 }, { "epoch": 2.344878309708478, "grad_norm": 1.4871102571487427, "learning_rate": 2.3987539724403065e-06, "loss": 0.3466, "step": 17535 }, { "epoch": 2.345012035303557, "grad_norm": 1.4861352443695068, "learning_rate": 2.3978159378266663e-06, "loss": 0.3713, "step": 17536 }, { "epoch": 2.345145760898636, "grad_norm": 1.6022893190383911, "learning_rate": 2.396878061673278e-06, "loss": 0.4356, "step": 17537 }, { "epoch": 2.3452794864937148, "grad_norm": 1.3883713483810425, "learning_rate": 2.395940343999691e-06, "loss": 0.3855, "step": 17538 }, { "epoch": 2.345413212088794, "grad_norm": 1.8230940103530884, "learning_rate": 2.395002784825452e-06, "loss": 0.3878, "step": 17539 }, { "epoch": 2.3455469376838725, "grad_norm": 1.3361262083053589, "learning_rate": 2.3940653841701023e-06, "loss": 0.3512, "step": 17540 }, { "epoch": 2.3456806632789515, "grad_norm": 1.5380806922912598, "learning_rate": 2.3931281420531816e-06, "loss": 0.3503, "step": 17541 }, { "epoch": 2.3458143888740306, "grad_norm": 1.5438785552978516, "learning_rate": 2.3921910584942265e-06, "loss": 0.3575, "step": 17542 }, { "epoch": 2.3459481144691092, "grad_norm": 1.6624984741210938, "learning_rate": 2.391254133512768e-06, "loss": 0.3894, "step": 17543 }, { "epoch": 2.3460818400641883, "grad_norm": 1.4388582706451416, "learning_rate": 2.3903173671283363e-06, "loss": 0.3513, "step": 17544 }, { "epoch": 2.3462155656592674, "grad_norm": 1.3611366748809814, "learning_rate": 2.3893807593604614e-06, "loss": 0.3374, "step": 17545 }, { "epoch": 2.346349291254346, "grad_norm": 1.441688895225525, "learning_rate": 2.3884443102286547e-06, "loss": 0.332, "step": 17546 }, { "epoch": 2.346483016849425, "grad_norm": 1.708808422088623, "learning_rate": 2.387508019752449e-06, "loss": 0.374, "step": 17547 }, { "epoch": 2.3466167424445037, "grad_norm": 1.4082392454147339, "learning_rate": 2.386571887951349e-06, "loss": 0.3341, "step": 17548 }, { "epoch": 2.3467504680395828, "grad_norm": 1.3861737251281738, "learning_rate": 2.385635914844876e-06, "loss": 0.3446, "step": 17549 }, { "epoch": 2.346884193634662, "grad_norm": 1.306904673576355, "learning_rate": 2.384700100452538e-06, "loss": 0.3222, "step": 17550 }, { "epoch": 2.3470179192297405, "grad_norm": 1.538404107093811, "learning_rate": 2.3837644447938348e-06, "loss": 0.3756, "step": 17551 }, { "epoch": 2.3471516448248195, "grad_norm": 1.691231369972229, "learning_rate": 2.3828289478882783e-06, "loss": 0.3761, "step": 17552 }, { "epoch": 2.3472853704198986, "grad_norm": 1.5625964403152466, "learning_rate": 2.381893609755361e-06, "loss": 0.3561, "step": 17553 }, { "epoch": 2.3474190960149772, "grad_norm": 1.4818755388259888, "learning_rate": 2.3809584304145827e-06, "loss": 0.4171, "step": 17554 }, { "epoch": 2.3475528216100563, "grad_norm": 1.4765480756759644, "learning_rate": 2.3800234098854346e-06, "loss": 0.3732, "step": 17555 }, { "epoch": 2.347686547205135, "grad_norm": 1.5711544752120972, "learning_rate": 2.3790885481874037e-06, "loss": 0.3587, "step": 17556 }, { "epoch": 2.347820272800214, "grad_norm": 1.466008186340332, "learning_rate": 2.3781538453399856e-06, "loss": 0.366, "step": 17557 }, { "epoch": 2.347953998395293, "grad_norm": 1.5146269798278809, "learning_rate": 2.3772193013626545e-06, "loss": 0.3657, "step": 17558 }, { "epoch": 2.3480877239903717, "grad_norm": 1.6002072095870972, "learning_rate": 2.3762849162748935e-06, "loss": 0.3136, "step": 17559 }, { "epoch": 2.3482214495854508, "grad_norm": 1.549189567565918, "learning_rate": 2.3753506900961774e-06, "loss": 0.3917, "step": 17560 }, { "epoch": 2.3483551751805294, "grad_norm": 1.4733622074127197, "learning_rate": 2.374416622845981e-06, "loss": 0.3629, "step": 17561 }, { "epoch": 2.3484889007756085, "grad_norm": 1.8149288892745972, "learning_rate": 2.3734827145437723e-06, "loss": 0.3327, "step": 17562 }, { "epoch": 2.3486226263706875, "grad_norm": 1.657407522201538, "learning_rate": 2.3725489652090183e-06, "loss": 0.4353, "step": 17563 }, { "epoch": 2.348756351965766, "grad_norm": 1.443459153175354, "learning_rate": 2.371615374861184e-06, "loss": 0.3339, "step": 17564 }, { "epoch": 2.3488900775608452, "grad_norm": 1.5707156658172607, "learning_rate": 2.3706819435197257e-06, "loss": 0.3918, "step": 17565 }, { "epoch": 2.349023803155924, "grad_norm": 1.5400866270065308, "learning_rate": 2.369748671204106e-06, "loss": 0.4153, "step": 17566 }, { "epoch": 2.349157528751003, "grad_norm": 1.4629552364349365, "learning_rate": 2.368815557933768e-06, "loss": 0.3654, "step": 17567 }, { "epoch": 2.349291254346082, "grad_norm": 1.4154753684997559, "learning_rate": 2.36788260372817e-06, "loss": 0.361, "step": 17568 }, { "epoch": 2.3494249799411606, "grad_norm": 1.4788978099822998, "learning_rate": 2.366949808606759e-06, "loss": 0.4102, "step": 17569 }, { "epoch": 2.3495587055362397, "grad_norm": 1.4997624158859253, "learning_rate": 2.3660171725889703e-06, "loss": 0.359, "step": 17570 }, { "epoch": 2.3496924311313183, "grad_norm": 1.4077261686325073, "learning_rate": 2.365084695694253e-06, "loss": 0.3637, "step": 17571 }, { "epoch": 2.3498261567263974, "grad_norm": 1.4685546159744263, "learning_rate": 2.364152377942035e-06, "loss": 0.387, "step": 17572 }, { "epoch": 2.3499598823214765, "grad_norm": 1.5082988739013672, "learning_rate": 2.3632202193517582e-06, "loss": 0.4012, "step": 17573 }, { "epoch": 2.350093607916555, "grad_norm": 1.3603029251098633, "learning_rate": 2.3622882199428463e-06, "loss": 0.3772, "step": 17574 }, { "epoch": 2.350227333511634, "grad_norm": 1.3584884405136108, "learning_rate": 2.361356379734725e-06, "loss": 0.362, "step": 17575 }, { "epoch": 2.350361059106713, "grad_norm": 1.573844313621521, "learning_rate": 2.360424698746827e-06, "loss": 0.3739, "step": 17576 }, { "epoch": 2.350494784701792, "grad_norm": 1.5637869834899902, "learning_rate": 2.359493176998562e-06, "loss": 0.3571, "step": 17577 }, { "epoch": 2.350628510296871, "grad_norm": 1.678982138633728, "learning_rate": 2.3585618145093513e-06, "loss": 0.3794, "step": 17578 }, { "epoch": 2.3507622358919495, "grad_norm": 1.5881446599960327, "learning_rate": 2.357630611298607e-06, "loss": 0.3793, "step": 17579 }, { "epoch": 2.3508959614870286, "grad_norm": 1.5485620498657227, "learning_rate": 2.3566995673857397e-06, "loss": 0.3736, "step": 17580 }, { "epoch": 2.3510296870821077, "grad_norm": 1.4549474716186523, "learning_rate": 2.355768682790156e-06, "loss": 0.3496, "step": 17581 }, { "epoch": 2.3511634126771863, "grad_norm": 1.5404152870178223, "learning_rate": 2.3548379575312597e-06, "loss": 0.3361, "step": 17582 }, { "epoch": 2.3512971382722654, "grad_norm": 1.4260433912277222, "learning_rate": 2.3539073916284504e-06, "loss": 0.3521, "step": 17583 }, { "epoch": 2.3514308638673445, "grad_norm": 1.447527527809143, "learning_rate": 2.352976985101125e-06, "loss": 0.3715, "step": 17584 }, { "epoch": 2.351564589462423, "grad_norm": 1.5347251892089844, "learning_rate": 2.3520467379686797e-06, "loss": 0.3794, "step": 17585 }, { "epoch": 2.351698315057502, "grad_norm": 1.4257363080978394, "learning_rate": 2.3511166502504967e-06, "loss": 0.3454, "step": 17586 }, { "epoch": 2.351832040652581, "grad_norm": 1.6945871114730835, "learning_rate": 2.3501867219659703e-06, "loss": 0.355, "step": 17587 }, { "epoch": 2.35196576624766, "grad_norm": 1.4235073328018188, "learning_rate": 2.349256953134481e-06, "loss": 0.3855, "step": 17588 }, { "epoch": 2.352099491842739, "grad_norm": 1.6472917795181274, "learning_rate": 2.3483273437754106e-06, "loss": 0.3516, "step": 17589 }, { "epoch": 2.3522332174378175, "grad_norm": 1.6385244131088257, "learning_rate": 2.3473978939081375e-06, "loss": 0.3861, "step": 17590 }, { "epoch": 2.3523669430328966, "grad_norm": 1.4511382579803467, "learning_rate": 2.3464686035520267e-06, "loss": 0.3333, "step": 17591 }, { "epoch": 2.3525006686279752, "grad_norm": 1.6987982988357544, "learning_rate": 2.345539472726459e-06, "loss": 0.4102, "step": 17592 }, { "epoch": 2.3526343942230543, "grad_norm": 1.5320804119110107, "learning_rate": 2.3446105014507925e-06, "loss": 0.3489, "step": 17593 }, { "epoch": 2.3527681198181334, "grad_norm": 1.5359894037246704, "learning_rate": 2.343681689744396e-06, "loss": 0.3784, "step": 17594 }, { "epoch": 2.352901845413212, "grad_norm": 1.3519891500473022, "learning_rate": 2.342753037626633e-06, "loss": 0.3181, "step": 17595 }, { "epoch": 2.353035571008291, "grad_norm": 1.3641998767852783, "learning_rate": 2.341824545116849e-06, "loss": 0.3465, "step": 17596 }, { "epoch": 2.3531692966033697, "grad_norm": 1.6393500566482544, "learning_rate": 2.3408962122344093e-06, "loss": 0.3721, "step": 17597 }, { "epoch": 2.353303022198449, "grad_norm": 1.371472954750061, "learning_rate": 2.339968038998657e-06, "loss": 0.3335, "step": 17598 }, { "epoch": 2.353436747793528, "grad_norm": 1.7660419940948486, "learning_rate": 2.3390400254289402e-06, "loss": 0.3856, "step": 17599 }, { "epoch": 2.3535704733886065, "grad_norm": 1.535967230796814, "learning_rate": 2.3381121715446044e-06, "loss": 0.3634, "step": 17600 }, { "epoch": 2.3537041989836855, "grad_norm": 1.6346659660339355, "learning_rate": 2.3371844773649888e-06, "loss": 0.3641, "step": 17601 }, { "epoch": 2.353837924578764, "grad_norm": 1.409423589706421, "learning_rate": 2.3362569429094295e-06, "loss": 0.3799, "step": 17602 }, { "epoch": 2.3539716501738432, "grad_norm": 1.621748924255371, "learning_rate": 2.335329568197261e-06, "loss": 0.344, "step": 17603 }, { "epoch": 2.3541053757689223, "grad_norm": 1.5207022428512573, "learning_rate": 2.3344023532478135e-06, "loss": 0.351, "step": 17604 }, { "epoch": 2.354239101364001, "grad_norm": 1.4854915142059326, "learning_rate": 2.333475298080414e-06, "loss": 0.3551, "step": 17605 }, { "epoch": 2.35437282695908, "grad_norm": 1.4008948802947998, "learning_rate": 2.332548402714385e-06, "loss": 0.3489, "step": 17606 }, { "epoch": 2.3545065525541586, "grad_norm": 1.619930386543274, "learning_rate": 2.3316216671690485e-06, "loss": 0.3605, "step": 17607 }, { "epoch": 2.3546402781492377, "grad_norm": 1.6028120517730713, "learning_rate": 2.3306950914637205e-06, "loss": 0.3953, "step": 17608 }, { "epoch": 2.354774003744317, "grad_norm": 1.6198242902755737, "learning_rate": 2.329768675617714e-06, "loss": 0.3943, "step": 17609 }, { "epoch": 2.3549077293393954, "grad_norm": 1.7245213985443115, "learning_rate": 2.32884241965034e-06, "loss": 0.4087, "step": 17610 }, { "epoch": 2.3550414549344745, "grad_norm": 1.3707060813903809, "learning_rate": 2.327916323580909e-06, "loss": 0.3764, "step": 17611 }, { "epoch": 2.3551751805295535, "grad_norm": 1.5521405935287476, "learning_rate": 2.3269903874287146e-06, "loss": 0.3669, "step": 17612 }, { "epoch": 2.355308906124632, "grad_norm": 1.464019536972046, "learning_rate": 2.3260646112130657e-06, "loss": 0.3493, "step": 17613 }, { "epoch": 2.3554426317197112, "grad_norm": 1.2734593152999878, "learning_rate": 2.32513899495326e-06, "loss": 0.3238, "step": 17614 }, { "epoch": 2.35557635731479, "grad_norm": 1.6263285875320435, "learning_rate": 2.3242135386685816e-06, "loss": 0.3889, "step": 17615 }, { "epoch": 2.355710082909869, "grad_norm": 1.7689213752746582, "learning_rate": 2.3232882423783342e-06, "loss": 0.4187, "step": 17616 }, { "epoch": 2.355843808504948, "grad_norm": 1.5880107879638672, "learning_rate": 2.3223631061017903e-06, "loss": 0.4012, "step": 17617 }, { "epoch": 2.3559775341000266, "grad_norm": 1.4149914979934692, "learning_rate": 2.3214381298582477e-06, "loss": 0.3433, "step": 17618 }, { "epoch": 2.3561112596951057, "grad_norm": 1.5423498153686523, "learning_rate": 2.3205133136669757e-06, "loss": 0.3686, "step": 17619 }, { "epoch": 2.356244985290185, "grad_norm": 1.7391676902770996, "learning_rate": 2.3195886575472557e-06, "loss": 0.4466, "step": 17620 }, { "epoch": 2.3563787108852634, "grad_norm": 1.5171104669570923, "learning_rate": 2.3186641615183615e-06, "loss": 0.3816, "step": 17621 }, { "epoch": 2.3565124364803425, "grad_norm": 1.5710216760635376, "learning_rate": 2.317739825599562e-06, "loss": 0.377, "step": 17622 }, { "epoch": 2.356646162075421, "grad_norm": 1.7492246627807617, "learning_rate": 2.3168156498101247e-06, "loss": 0.4088, "step": 17623 }, { "epoch": 2.3567798876705, "grad_norm": 1.6864070892333984, "learning_rate": 2.3158916341693126e-06, "loss": 0.4073, "step": 17624 }, { "epoch": 2.3569136132655792, "grad_norm": 1.572609782218933, "learning_rate": 2.3149677786963874e-06, "loss": 0.3674, "step": 17625 }, { "epoch": 2.357047338860658, "grad_norm": 1.4225693941116333, "learning_rate": 2.314044083410605e-06, "loss": 0.3393, "step": 17626 }, { "epoch": 2.357181064455737, "grad_norm": 1.512403130531311, "learning_rate": 2.313120548331218e-06, "loss": 0.39, "step": 17627 }, { "epoch": 2.3573147900508156, "grad_norm": 1.6987284421920776, "learning_rate": 2.3121971734774783e-06, "loss": 0.3984, "step": 17628 }, { "epoch": 2.3574485156458946, "grad_norm": 1.4325984716415405, "learning_rate": 2.3112739588686327e-06, "loss": 0.3244, "step": 17629 }, { "epoch": 2.3575822412409737, "grad_norm": 1.5761210918426514, "learning_rate": 2.310350904523926e-06, "loss": 0.3391, "step": 17630 }, { "epoch": 2.3577159668360523, "grad_norm": 1.4827806949615479, "learning_rate": 2.309428010462591e-06, "loss": 0.376, "step": 17631 }, { "epoch": 2.3578496924311314, "grad_norm": 1.7346501350402832, "learning_rate": 2.308505276703874e-06, "loss": 0.4202, "step": 17632 }, { "epoch": 2.35798341802621, "grad_norm": 1.4199753999710083, "learning_rate": 2.3075827032670028e-06, "loss": 0.3802, "step": 17633 }, { "epoch": 2.358117143621289, "grad_norm": 1.7224870920181274, "learning_rate": 2.306660290171211e-06, "loss": 0.4244, "step": 17634 }, { "epoch": 2.358250869216368, "grad_norm": 1.4850341081619263, "learning_rate": 2.305738037435725e-06, "loss": 0.3619, "step": 17635 }, { "epoch": 2.358384594811447, "grad_norm": 1.5659384727478027, "learning_rate": 2.3048159450797626e-06, "loss": 0.4409, "step": 17636 }, { "epoch": 2.358518320406526, "grad_norm": 1.4847172498703003, "learning_rate": 2.303894013122553e-06, "loss": 0.3762, "step": 17637 }, { "epoch": 2.3586520460016045, "grad_norm": 1.4024910926818848, "learning_rate": 2.3029722415833057e-06, "loss": 0.3736, "step": 17638 }, { "epoch": 2.3587857715966836, "grad_norm": 1.5001801252365112, "learning_rate": 2.3020506304812373e-06, "loss": 0.373, "step": 17639 }, { "epoch": 2.3589194971917626, "grad_norm": 1.439276099205017, "learning_rate": 2.3011291798355573e-06, "loss": 0.3589, "step": 17640 }, { "epoch": 2.3590532227868413, "grad_norm": 1.6095563173294067, "learning_rate": 2.300207889665469e-06, "loss": 0.3454, "step": 17641 }, { "epoch": 2.3591869483819203, "grad_norm": 1.5512473583221436, "learning_rate": 2.299286759990186e-06, "loss": 0.3547, "step": 17642 }, { "epoch": 2.359320673976999, "grad_norm": 1.5021485090255737, "learning_rate": 2.298365790828898e-06, "loss": 0.3568, "step": 17643 }, { "epoch": 2.359454399572078, "grad_norm": 1.490556001663208, "learning_rate": 2.2974449822008062e-06, "loss": 0.3301, "step": 17644 }, { "epoch": 2.359588125167157, "grad_norm": 1.4124581813812256, "learning_rate": 2.296524334125102e-06, "loss": 0.3594, "step": 17645 }, { "epoch": 2.3597218507622357, "grad_norm": 1.4436336755752563, "learning_rate": 2.2956038466209775e-06, "loss": 0.3569, "step": 17646 }, { "epoch": 2.359855576357315, "grad_norm": 1.6325069665908813, "learning_rate": 2.294683519707619e-06, "loss": 0.3803, "step": 17647 }, { "epoch": 2.359989301952394, "grad_norm": 1.4365670680999756, "learning_rate": 2.2937633534042083e-06, "loss": 0.3505, "step": 17648 }, { "epoch": 2.3601230275474725, "grad_norm": 1.5861457586288452, "learning_rate": 2.2928433477299274e-06, "loss": 0.3847, "step": 17649 }, { "epoch": 2.3602567531425516, "grad_norm": 1.5435835123062134, "learning_rate": 2.2919235027039512e-06, "loss": 0.3663, "step": 17650 }, { "epoch": 2.36039047873763, "grad_norm": 1.3797398805618286, "learning_rate": 2.291003818345454e-06, "loss": 0.3047, "step": 17651 }, { "epoch": 2.3605242043327093, "grad_norm": 1.4556787014007568, "learning_rate": 2.290084294673606e-06, "loss": 0.3538, "step": 17652 }, { "epoch": 2.3606579299277883, "grad_norm": 1.5313482284545898, "learning_rate": 2.2891649317075728e-06, "loss": 0.3925, "step": 17653 }, { "epoch": 2.360791655522867, "grad_norm": 1.8698794841766357, "learning_rate": 2.2882457294665205e-06, "loss": 0.4318, "step": 17654 }, { "epoch": 2.360925381117946, "grad_norm": 1.4722373485565186, "learning_rate": 2.287326687969601e-06, "loss": 0.3767, "step": 17655 }, { "epoch": 2.361059106713025, "grad_norm": 1.4357688426971436, "learning_rate": 2.286407807235983e-06, "loss": 0.3639, "step": 17656 }, { "epoch": 2.3611928323081037, "grad_norm": 1.4033628702163696, "learning_rate": 2.2854890872848067e-06, "loss": 0.3329, "step": 17657 }, { "epoch": 2.361326557903183, "grad_norm": 1.4588919878005981, "learning_rate": 2.2845705281352317e-06, "loss": 0.3183, "step": 17658 }, { "epoch": 2.3614602834982614, "grad_norm": 1.474920392036438, "learning_rate": 2.283652129806404e-06, "loss": 0.3981, "step": 17659 }, { "epoch": 2.3615940090933405, "grad_norm": 1.6830304861068726, "learning_rate": 2.282733892317458e-06, "loss": 0.3746, "step": 17660 }, { "epoch": 2.3617277346884196, "grad_norm": 1.6151689291000366, "learning_rate": 2.281815815687545e-06, "loss": 0.3548, "step": 17661 }, { "epoch": 2.361861460283498, "grad_norm": 1.464469075202942, "learning_rate": 2.2808978999357933e-06, "loss": 0.4145, "step": 17662 }, { "epoch": 2.3619951858785773, "grad_norm": 1.539075255393982, "learning_rate": 2.2799801450813385e-06, "loss": 0.3747, "step": 17663 }, { "epoch": 2.362128911473656, "grad_norm": 1.3395589590072632, "learning_rate": 2.2790625511433096e-06, "loss": 0.324, "step": 17664 }, { "epoch": 2.362262637068735, "grad_norm": 1.4639531373977661, "learning_rate": 2.2781451181408343e-06, "loss": 0.3776, "step": 17665 }, { "epoch": 2.362396362663814, "grad_norm": 1.4742915630340576, "learning_rate": 2.277227846093035e-06, "loss": 0.3615, "step": 17666 }, { "epoch": 2.3625300882588927, "grad_norm": 1.5962618589401245, "learning_rate": 2.2763107350190318e-06, "loss": 0.4345, "step": 17667 }, { "epoch": 2.3626638138539717, "grad_norm": 1.6713985204696655, "learning_rate": 2.2753937849379392e-06, "loss": 0.3632, "step": 17668 }, { "epoch": 2.3627975394490504, "grad_norm": 1.3686665296554565, "learning_rate": 2.274476995868873e-06, "loss": 0.3419, "step": 17669 }, { "epoch": 2.3629312650441294, "grad_norm": 1.44289231300354, "learning_rate": 2.2735603678309402e-06, "loss": 0.3688, "step": 17670 }, { "epoch": 2.3630649906392085, "grad_norm": 1.7823231220245361, "learning_rate": 2.272643900843249e-06, "loss": 0.4065, "step": 17671 }, { "epoch": 2.363198716234287, "grad_norm": 1.4176145792007446, "learning_rate": 2.271727594924901e-06, "loss": 0.3096, "step": 17672 }, { "epoch": 2.363332441829366, "grad_norm": 1.5126402378082275, "learning_rate": 2.270811450094996e-06, "loss": 0.3906, "step": 17673 }, { "epoch": 2.363466167424445, "grad_norm": 1.5010432004928589, "learning_rate": 2.26989546637263e-06, "loss": 0.3594, "step": 17674 }, { "epoch": 2.363599893019524, "grad_norm": 1.445062518119812, "learning_rate": 2.2689796437768996e-06, "loss": 0.3566, "step": 17675 }, { "epoch": 2.363733618614603, "grad_norm": 1.6571155786514282, "learning_rate": 2.2680639823268848e-06, "loss": 0.3954, "step": 17676 }, { "epoch": 2.3638673442096816, "grad_norm": 1.512982726097107, "learning_rate": 2.267148482041681e-06, "loss": 0.3898, "step": 17677 }, { "epoch": 2.3640010698047607, "grad_norm": 1.2868047952651978, "learning_rate": 2.2662331429403672e-06, "loss": 0.3292, "step": 17678 }, { "epoch": 2.3641347953998393, "grad_norm": 1.406975269317627, "learning_rate": 2.265317965042022e-06, "loss": 0.362, "step": 17679 }, { "epoch": 2.3642685209949184, "grad_norm": 1.5612964630126953, "learning_rate": 2.264402948365727e-06, "loss": 0.3681, "step": 17680 }, { "epoch": 2.3644022465899974, "grad_norm": 1.7238435745239258, "learning_rate": 2.2634880929305436e-06, "loss": 0.393, "step": 17681 }, { "epoch": 2.364535972185076, "grad_norm": 1.6193193197250366, "learning_rate": 2.2625733987555542e-06, "loss": 0.436, "step": 17682 }, { "epoch": 2.364669697780155, "grad_norm": 1.524370789527893, "learning_rate": 2.2616588658598147e-06, "loss": 0.4031, "step": 17683 }, { "epoch": 2.364803423375234, "grad_norm": 1.5803323984146118, "learning_rate": 2.2607444942623922e-06, "loss": 0.4038, "step": 17684 }, { "epoch": 2.364937148970313, "grad_norm": 1.7810205221176147, "learning_rate": 2.259830283982345e-06, "loss": 0.4262, "step": 17685 }, { "epoch": 2.365070874565392, "grad_norm": 1.472284197807312, "learning_rate": 2.258916235038726e-06, "loss": 0.3536, "step": 17686 }, { "epoch": 2.365204600160471, "grad_norm": 1.613823413848877, "learning_rate": 2.2580023474505965e-06, "loss": 0.3932, "step": 17687 }, { "epoch": 2.3653383257555496, "grad_norm": 1.7042313814163208, "learning_rate": 2.257088621236997e-06, "loss": 0.4041, "step": 17688 }, { "epoch": 2.3654720513506287, "grad_norm": 1.4048388004302979, "learning_rate": 2.256175056416976e-06, "loss": 0.3402, "step": 17689 }, { "epoch": 2.3656057769457073, "grad_norm": 1.6079317331314087, "learning_rate": 2.255261653009575e-06, "loss": 0.3643, "step": 17690 }, { "epoch": 2.3657395025407864, "grad_norm": 1.4293856620788574, "learning_rate": 2.2543484110338353e-06, "loss": 0.3352, "step": 17691 }, { "epoch": 2.3658732281358654, "grad_norm": 1.4527764320373535, "learning_rate": 2.253435330508791e-06, "loss": 0.3691, "step": 17692 }, { "epoch": 2.366006953730944, "grad_norm": 1.732912540435791, "learning_rate": 2.252522411453474e-06, "loss": 0.4022, "step": 17693 }, { "epoch": 2.366140679326023, "grad_norm": 1.4639184474945068, "learning_rate": 2.2516096538869137e-06, "loss": 0.3697, "step": 17694 }, { "epoch": 2.3662744049211017, "grad_norm": 1.4273416996002197, "learning_rate": 2.250697057828135e-06, "loss": 0.3377, "step": 17695 }, { "epoch": 2.366408130516181, "grad_norm": 1.5685405731201172, "learning_rate": 2.249784623296163e-06, "loss": 0.3739, "step": 17696 }, { "epoch": 2.36654185611126, "grad_norm": 1.6673862934112549, "learning_rate": 2.248872350310013e-06, "loss": 0.4104, "step": 17697 }, { "epoch": 2.3666755817063385, "grad_norm": 1.288824200630188, "learning_rate": 2.2479602388887013e-06, "loss": 0.3308, "step": 17698 }, { "epoch": 2.3668093073014176, "grad_norm": 1.3809102773666382, "learning_rate": 2.2470482890512446e-06, "loss": 0.3927, "step": 17699 }, { "epoch": 2.366943032896496, "grad_norm": 1.379055380821228, "learning_rate": 2.2461365008166412e-06, "loss": 0.3275, "step": 17700 }, { "epoch": 2.3670767584915753, "grad_norm": 1.4392231702804565, "learning_rate": 2.2452248742039083e-06, "loss": 0.3495, "step": 17701 }, { "epoch": 2.3672104840866544, "grad_norm": 1.4698125123977661, "learning_rate": 2.244313409232037e-06, "loss": 0.3541, "step": 17702 }, { "epoch": 2.367344209681733, "grad_norm": 1.4300315380096436, "learning_rate": 2.2434021059200373e-06, "loss": 0.3673, "step": 17703 }, { "epoch": 2.367477935276812, "grad_norm": 1.4911444187164307, "learning_rate": 2.242490964286895e-06, "loss": 0.3693, "step": 17704 }, { "epoch": 2.3676116608718907, "grad_norm": 1.3211544752120972, "learning_rate": 2.241579984351603e-06, "loss": 0.3236, "step": 17705 }, { "epoch": 2.3677453864669697, "grad_norm": 1.5822662115097046, "learning_rate": 2.240669166133158e-06, "loss": 0.4003, "step": 17706 }, { "epoch": 2.367879112062049, "grad_norm": 1.4565086364746094, "learning_rate": 2.239758509650536e-06, "loss": 0.3566, "step": 17707 }, { "epoch": 2.3680128376571274, "grad_norm": 1.5679067373275757, "learning_rate": 2.2388480149227233e-06, "loss": 0.3291, "step": 17708 }, { "epoch": 2.3681465632522065, "grad_norm": 1.473365306854248, "learning_rate": 2.237937681968696e-06, "loss": 0.3649, "step": 17709 }, { "epoch": 2.368280288847285, "grad_norm": 1.6807719469070435, "learning_rate": 2.2370275108074303e-06, "loss": 0.3947, "step": 17710 }, { "epoch": 2.368414014442364, "grad_norm": 1.4028116464614868, "learning_rate": 2.2361175014578983e-06, "loss": 0.3301, "step": 17711 }, { "epoch": 2.3685477400374433, "grad_norm": 1.4825043678283691, "learning_rate": 2.2352076539390664e-06, "loss": 0.3329, "step": 17712 }, { "epoch": 2.368681465632522, "grad_norm": 1.5437074899673462, "learning_rate": 2.234297968269903e-06, "loss": 0.3619, "step": 17713 }, { "epoch": 2.368815191227601, "grad_norm": 1.6066052913665771, "learning_rate": 2.2333884444693656e-06, "loss": 0.4132, "step": 17714 }, { "epoch": 2.36894891682268, "grad_norm": 1.476467251777649, "learning_rate": 2.2324790825564146e-06, "loss": 0.376, "step": 17715 }, { "epoch": 2.3690826424177587, "grad_norm": 1.8732562065124512, "learning_rate": 2.2315698825500053e-06, "loss": 0.3977, "step": 17716 }, { "epoch": 2.3692163680128377, "grad_norm": 1.6135532855987549, "learning_rate": 2.230660844469088e-06, "loss": 0.3657, "step": 17717 }, { "epoch": 2.3693500936079164, "grad_norm": 1.5239770412445068, "learning_rate": 2.229751968332611e-06, "loss": 0.368, "step": 17718 }, { "epoch": 2.3694838192029954, "grad_norm": 1.6480637788772583, "learning_rate": 2.2288432541595185e-06, "loss": 0.3903, "step": 17719 }, { "epoch": 2.3696175447980745, "grad_norm": 1.7169145345687866, "learning_rate": 2.227934701968755e-06, "loss": 0.3827, "step": 17720 }, { "epoch": 2.369751270393153, "grad_norm": 1.447026252746582, "learning_rate": 2.227026311779249e-06, "loss": 0.3929, "step": 17721 }, { "epoch": 2.369884995988232, "grad_norm": 1.5903888940811157, "learning_rate": 2.2261180836099482e-06, "loss": 0.3856, "step": 17722 }, { "epoch": 2.3700187215833113, "grad_norm": 1.4336998462677002, "learning_rate": 2.2252100174797753e-06, "loss": 0.4015, "step": 17723 }, { "epoch": 2.37015244717839, "grad_norm": 1.4801890850067139, "learning_rate": 2.2243021134076557e-06, "loss": 0.3936, "step": 17724 }, { "epoch": 2.370286172773469, "grad_norm": 1.5349730253219604, "learning_rate": 2.223394371412524e-06, "loss": 0.3902, "step": 17725 }, { "epoch": 2.3704198983685476, "grad_norm": 1.7089978456497192, "learning_rate": 2.2224867915132896e-06, "loss": 0.4404, "step": 17726 }, { "epoch": 2.3705536239636267, "grad_norm": 1.6646735668182373, "learning_rate": 2.2215793737288817e-06, "loss": 0.3939, "step": 17727 }, { "epoch": 2.3706873495587057, "grad_norm": 1.4868961572647095, "learning_rate": 2.2206721180782053e-06, "loss": 0.3971, "step": 17728 }, { "epoch": 2.3708210751537844, "grad_norm": 1.3605149984359741, "learning_rate": 2.219765024580175e-06, "loss": 0.36, "step": 17729 }, { "epoch": 2.3709548007488634, "grad_norm": 1.5987104177474976, "learning_rate": 2.2188580932536986e-06, "loss": 0.3748, "step": 17730 }, { "epoch": 2.371088526343942, "grad_norm": 1.6482453346252441, "learning_rate": 2.2179513241176777e-06, "loss": 0.3621, "step": 17731 }, { "epoch": 2.371222251939021, "grad_norm": 1.4732414484024048, "learning_rate": 2.2170447171910157e-06, "loss": 0.3677, "step": 17732 }, { "epoch": 2.3713559775341, "grad_norm": 1.6557761430740356, "learning_rate": 2.2161382724926096e-06, "loss": 0.4107, "step": 17733 }, { "epoch": 2.371489703129179, "grad_norm": 1.6231448650360107, "learning_rate": 2.2152319900413523e-06, "loss": 0.371, "step": 17734 }, { "epoch": 2.371623428724258, "grad_norm": 1.500584363937378, "learning_rate": 2.2143258698561354e-06, "loss": 0.3339, "step": 17735 }, { "epoch": 2.3717571543193365, "grad_norm": 1.3678990602493286, "learning_rate": 2.213419911955845e-06, "loss": 0.3373, "step": 17736 }, { "epoch": 2.3718908799144156, "grad_norm": 1.640110969543457, "learning_rate": 2.212514116359367e-06, "loss": 0.3606, "step": 17737 }, { "epoch": 2.3720246055094947, "grad_norm": 1.3266701698303223, "learning_rate": 2.211608483085579e-06, "loss": 0.3415, "step": 17738 }, { "epoch": 2.3721583311045733, "grad_norm": 1.520799160003662, "learning_rate": 2.2107030121533623e-06, "loss": 0.3821, "step": 17739 }, { "epoch": 2.3722920566996524, "grad_norm": 1.433852195739746, "learning_rate": 2.209797703581582e-06, "loss": 0.3504, "step": 17740 }, { "epoch": 2.372425782294731, "grad_norm": 1.5438417196273804, "learning_rate": 2.2088925573891207e-06, "loss": 0.3223, "step": 17741 }, { "epoch": 2.37255950788981, "grad_norm": 1.7594188451766968, "learning_rate": 2.207987573594833e-06, "loss": 0.4181, "step": 17742 }, { "epoch": 2.372693233484889, "grad_norm": 1.3919388055801392, "learning_rate": 2.207082752217591e-06, "loss": 0.3013, "step": 17743 }, { "epoch": 2.3728269590799678, "grad_norm": 1.5057038068771362, "learning_rate": 2.2061780932762545e-06, "loss": 0.3277, "step": 17744 }, { "epoch": 2.372960684675047, "grad_norm": 1.6068222522735596, "learning_rate": 2.205273596789672e-06, "loss": 0.3601, "step": 17745 }, { "epoch": 2.3730944102701255, "grad_norm": 1.587902307510376, "learning_rate": 2.2043692627767077e-06, "loss": 0.3758, "step": 17746 }, { "epoch": 2.3732281358652045, "grad_norm": 1.5797476768493652, "learning_rate": 2.203465091256205e-06, "loss": 0.3321, "step": 17747 }, { "epoch": 2.3733618614602836, "grad_norm": 1.4322763681411743, "learning_rate": 2.2025610822470113e-06, "loss": 0.325, "step": 17748 }, { "epoch": 2.3734955870553622, "grad_norm": 1.4042689800262451, "learning_rate": 2.201657235767971e-06, "loss": 0.3464, "step": 17749 }, { "epoch": 2.3736293126504413, "grad_norm": 1.5412917137145996, "learning_rate": 2.2007535518379196e-06, "loss": 0.3625, "step": 17750 }, { "epoch": 2.3737630382455204, "grad_norm": 1.758390188217163, "learning_rate": 2.1998500304757044e-06, "loss": 0.4536, "step": 17751 }, { "epoch": 2.373896763840599, "grad_norm": 1.7407138347625732, "learning_rate": 2.1989466717001475e-06, "loss": 0.4084, "step": 17752 }, { "epoch": 2.374030489435678, "grad_norm": 1.5752229690551758, "learning_rate": 2.1980434755300828e-06, "loss": 0.4003, "step": 17753 }, { "epoch": 2.3741642150307567, "grad_norm": 1.4256819486618042, "learning_rate": 2.1971404419843355e-06, "loss": 0.375, "step": 17754 }, { "epoch": 2.3742979406258358, "grad_norm": 1.5044407844543457, "learning_rate": 2.1962375710817296e-06, "loss": 0.3681, "step": 17755 }, { "epoch": 2.374431666220915, "grad_norm": 1.2612330913543701, "learning_rate": 2.1953348628410855e-06, "loss": 0.3285, "step": 17756 }, { "epoch": 2.3745653918159935, "grad_norm": 1.3040237426757812, "learning_rate": 2.1944323172812166e-06, "loss": 0.3313, "step": 17757 }, { "epoch": 2.3746991174110725, "grad_norm": 1.4450445175170898, "learning_rate": 2.193529934420937e-06, "loss": 0.3827, "step": 17758 }, { "epoch": 2.3748328430061516, "grad_norm": 1.4951387643814087, "learning_rate": 2.1926277142790554e-06, "loss": 0.3338, "step": 17759 }, { "epoch": 2.3749665686012302, "grad_norm": 1.4737999439239502, "learning_rate": 2.1917256568743794e-06, "loss": 0.333, "step": 17760 }, { "epoch": 2.3751002941963093, "grad_norm": 1.5859688520431519, "learning_rate": 2.1908237622257087e-06, "loss": 0.3623, "step": 17761 }, { "epoch": 2.375234019791388, "grad_norm": 1.68455171585083, "learning_rate": 2.1899220303518465e-06, "loss": 0.3677, "step": 17762 }, { "epoch": 2.375367745386467, "grad_norm": 1.478835940361023, "learning_rate": 2.1890204612715847e-06, "loss": 0.3836, "step": 17763 }, { "epoch": 2.375501470981546, "grad_norm": 1.4816006422042847, "learning_rate": 2.188119055003717e-06, "loss": 0.356, "step": 17764 }, { "epoch": 2.3756351965766247, "grad_norm": 1.6837992668151855, "learning_rate": 2.187217811567035e-06, "loss": 0.3827, "step": 17765 }, { "epoch": 2.3757689221717038, "grad_norm": 1.482251524925232, "learning_rate": 2.186316730980317e-06, "loss": 0.3699, "step": 17766 }, { "epoch": 2.3759026477667824, "grad_norm": 1.431774616241455, "learning_rate": 2.185415813262355e-06, "loss": 0.3415, "step": 17767 }, { "epoch": 2.3760363733618615, "grad_norm": 2.1552860736846924, "learning_rate": 2.1845150584319197e-06, "loss": 0.39, "step": 17768 }, { "epoch": 2.3761700989569405, "grad_norm": 1.6339871883392334, "learning_rate": 2.1836144665077873e-06, "loss": 0.3825, "step": 17769 }, { "epoch": 2.376303824552019, "grad_norm": 1.5598790645599365, "learning_rate": 2.1827140375087363e-06, "loss": 0.3976, "step": 17770 }, { "epoch": 2.3764375501470982, "grad_norm": 1.6280962228775024, "learning_rate": 2.181813771453526e-06, "loss": 0.4169, "step": 17771 }, { "epoch": 2.376571275742177, "grad_norm": 1.6548141241073608, "learning_rate": 2.1809136683609324e-06, "loss": 0.398, "step": 17772 }, { "epoch": 2.376705001337256, "grad_norm": 1.705264687538147, "learning_rate": 2.180013728249708e-06, "loss": 0.4104, "step": 17773 }, { "epoch": 2.376838726932335, "grad_norm": 1.4637964963912964, "learning_rate": 2.179113951138615e-06, "loss": 0.3515, "step": 17774 }, { "epoch": 2.3769724525274136, "grad_norm": 1.6449227333068848, "learning_rate": 2.1782143370464072e-06, "loss": 0.3665, "step": 17775 }, { "epoch": 2.3771061781224927, "grad_norm": 1.4319182634353638, "learning_rate": 2.177314885991837e-06, "loss": 0.3859, "step": 17776 }, { "epoch": 2.3772399037175713, "grad_norm": 1.6064527034759521, "learning_rate": 2.176415597993653e-06, "loss": 0.3923, "step": 17777 }, { "epoch": 2.3773736293126504, "grad_norm": 1.7177451848983765, "learning_rate": 2.175516473070599e-06, "loss": 0.4329, "step": 17778 }, { "epoch": 2.3775073549077295, "grad_norm": 1.4720944166183472, "learning_rate": 2.174617511241417e-06, "loss": 0.3316, "step": 17779 }, { "epoch": 2.377641080502808, "grad_norm": 1.4723901748657227, "learning_rate": 2.173718712524845e-06, "loss": 0.3682, "step": 17780 }, { "epoch": 2.377774806097887, "grad_norm": 1.556833028793335, "learning_rate": 2.172820076939618e-06, "loss": 0.349, "step": 17781 }, { "epoch": 2.377908531692966, "grad_norm": 1.580910325050354, "learning_rate": 2.1719216045044656e-06, "loss": 0.4273, "step": 17782 }, { "epoch": 2.378042257288045, "grad_norm": 1.4721739292144775, "learning_rate": 2.171023295238117e-06, "loss": 0.3601, "step": 17783 }, { "epoch": 2.378175982883124, "grad_norm": 1.7716712951660156, "learning_rate": 2.1701251491593e-06, "loss": 0.4344, "step": 17784 }, { "epoch": 2.3783097084782026, "grad_norm": 1.5400400161743164, "learning_rate": 2.1692271662867257e-06, "loss": 0.3606, "step": 17785 }, { "epoch": 2.3784434340732816, "grad_norm": 1.407920479774475, "learning_rate": 2.168329346639123e-06, "loss": 0.3914, "step": 17786 }, { "epoch": 2.3785771596683607, "grad_norm": 1.6963717937469482, "learning_rate": 2.1674316902351967e-06, "loss": 0.3888, "step": 17787 }, { "epoch": 2.3787108852634393, "grad_norm": 1.3053958415985107, "learning_rate": 2.166534197093664e-06, "loss": 0.3748, "step": 17788 }, { "epoch": 2.3788446108585184, "grad_norm": 1.5242102146148682, "learning_rate": 2.165636867233232e-06, "loss": 0.3394, "step": 17789 }, { "epoch": 2.3789783364535975, "grad_norm": 1.4534916877746582, "learning_rate": 2.1647397006725978e-06, "loss": 0.3449, "step": 17790 }, { "epoch": 2.379112062048676, "grad_norm": 1.4911854267120361, "learning_rate": 2.1638426974304737e-06, "loss": 0.3732, "step": 17791 }, { "epoch": 2.379245787643755, "grad_norm": 1.5047416687011719, "learning_rate": 2.1629458575255457e-06, "loss": 0.3482, "step": 17792 }, { "epoch": 2.379379513238834, "grad_norm": 1.5534480810165405, "learning_rate": 2.1620491809765133e-06, "loss": 0.4073, "step": 17793 }, { "epoch": 2.379513238833913, "grad_norm": 1.4092671871185303, "learning_rate": 2.1611526678020658e-06, "loss": 0.347, "step": 17794 }, { "epoch": 2.379646964428992, "grad_norm": 1.5560104846954346, "learning_rate": 2.1602563180208857e-06, "loss": 0.3335, "step": 17795 }, { "epoch": 2.3797806900240706, "grad_norm": 1.590920329093933, "learning_rate": 2.1593601316516677e-06, "loss": 0.3528, "step": 17796 }, { "epoch": 2.3799144156191496, "grad_norm": 1.4623390436172485, "learning_rate": 2.158464108713082e-06, "loss": 0.3353, "step": 17797 }, { "epoch": 2.3800481412142283, "grad_norm": 1.5075510740280151, "learning_rate": 2.157568249223808e-06, "loss": 0.408, "step": 17798 }, { "epoch": 2.3801818668093073, "grad_norm": 1.6794437170028687, "learning_rate": 2.156672553202519e-06, "loss": 0.4133, "step": 17799 }, { "epoch": 2.3803155924043864, "grad_norm": 1.3852394819259644, "learning_rate": 2.155777020667886e-06, "loss": 0.3405, "step": 17800 }, { "epoch": 2.380449317999465, "grad_norm": 1.5128384828567505, "learning_rate": 2.154881651638575e-06, "loss": 0.3629, "step": 17801 }, { "epoch": 2.380583043594544, "grad_norm": 1.7992634773254395, "learning_rate": 2.1539864461332495e-06, "loss": 0.4159, "step": 17802 }, { "epoch": 2.3807167691896227, "grad_norm": 1.6477489471435547, "learning_rate": 2.1530914041705686e-06, "loss": 0.3834, "step": 17803 }, { "epoch": 2.380850494784702, "grad_norm": 1.2010512351989746, "learning_rate": 2.152196525769188e-06, "loss": 0.3368, "step": 17804 }, { "epoch": 2.380984220379781, "grad_norm": 1.4801628589630127, "learning_rate": 2.1513018109477647e-06, "loss": 0.3601, "step": 17805 }, { "epoch": 2.3811179459748595, "grad_norm": 1.3441920280456543, "learning_rate": 2.150407259724938e-06, "loss": 0.3405, "step": 17806 }, { "epoch": 2.3812516715699386, "grad_norm": 1.4753878116607666, "learning_rate": 2.1495128721193648e-06, "loss": 0.3838, "step": 17807 }, { "epoch": 2.381385397165017, "grad_norm": 1.5801125764846802, "learning_rate": 2.1486186481496863e-06, "loss": 0.3543, "step": 17808 }, { "epoch": 2.3815191227600963, "grad_norm": 1.6334245204925537, "learning_rate": 2.147724587834533e-06, "loss": 0.3349, "step": 17809 }, { "epoch": 2.3816528483551753, "grad_norm": 1.6705485582351685, "learning_rate": 2.146830691192553e-06, "loss": 0.3989, "step": 17810 }, { "epoch": 2.381786573950254, "grad_norm": 1.8443785905838013, "learning_rate": 2.1459369582423663e-06, "loss": 0.3856, "step": 17811 }, { "epoch": 2.381920299545333, "grad_norm": 1.5280587673187256, "learning_rate": 2.1450433890026147e-06, "loss": 0.4108, "step": 17812 }, { "epoch": 2.3820540251404116, "grad_norm": 1.6207430362701416, "learning_rate": 2.144149983491913e-06, "loss": 0.402, "step": 17813 }, { "epoch": 2.3821877507354907, "grad_norm": 1.5400564670562744, "learning_rate": 2.1432567417288862e-06, "loss": 0.3952, "step": 17814 }, { "epoch": 2.38232147633057, "grad_norm": 1.4587730169296265, "learning_rate": 2.14236366373216e-06, "loss": 0.3711, "step": 17815 }, { "epoch": 2.3824552019256484, "grad_norm": 1.4588806629180908, "learning_rate": 2.1414707495203415e-06, "loss": 0.3526, "step": 17816 }, { "epoch": 2.3825889275207275, "grad_norm": 1.6683028936386108, "learning_rate": 2.1405779991120445e-06, "loss": 0.3606, "step": 17817 }, { "epoch": 2.3827226531158066, "grad_norm": 1.3662933111190796, "learning_rate": 2.139685412525879e-06, "loss": 0.327, "step": 17818 }, { "epoch": 2.382856378710885, "grad_norm": 1.417389988899231, "learning_rate": 2.1387929897804503e-06, "loss": 0.3658, "step": 17819 }, { "epoch": 2.3829901043059643, "grad_norm": 1.5727050304412842, "learning_rate": 2.137900730894359e-06, "loss": 0.3705, "step": 17820 }, { "epoch": 2.383123829901043, "grad_norm": 1.5236448049545288, "learning_rate": 2.137008635886203e-06, "loss": 0.3146, "step": 17821 }, { "epoch": 2.383257555496122, "grad_norm": 1.7054824829101562, "learning_rate": 2.136116704774579e-06, "loss": 0.3808, "step": 17822 }, { "epoch": 2.383391281091201, "grad_norm": 1.5645071268081665, "learning_rate": 2.1352249375780763e-06, "loss": 0.3879, "step": 17823 }, { "epoch": 2.3835250066862796, "grad_norm": 1.552512764930725, "learning_rate": 2.1343333343152873e-06, "loss": 0.3356, "step": 17824 }, { "epoch": 2.3836587322813587, "grad_norm": 1.8232992887496948, "learning_rate": 2.1334418950047885e-06, "loss": 0.4219, "step": 17825 }, { "epoch": 2.383792457876438, "grad_norm": 1.5619089603424072, "learning_rate": 2.132550619665168e-06, "loss": 0.4007, "step": 17826 }, { "epoch": 2.3839261834715164, "grad_norm": 1.3505864143371582, "learning_rate": 2.1316595083150017e-06, "loss": 0.3465, "step": 17827 }, { "epoch": 2.3840599090665955, "grad_norm": 1.7365391254425049, "learning_rate": 2.1307685609728634e-06, "loss": 0.4061, "step": 17828 }, { "epoch": 2.384193634661674, "grad_norm": 1.5226134061813354, "learning_rate": 2.1298777776573267e-06, "loss": 0.4152, "step": 17829 }, { "epoch": 2.384327360256753, "grad_norm": 1.6723659038543701, "learning_rate": 2.1289871583869527e-06, "loss": 0.3957, "step": 17830 }, { "epoch": 2.3844610858518323, "grad_norm": 1.3853586912155151, "learning_rate": 2.1280967031803134e-06, "loss": 0.3675, "step": 17831 }, { "epoch": 2.384594811446911, "grad_norm": 1.5825053453445435, "learning_rate": 2.1272064120559644e-06, "loss": 0.3682, "step": 17832 }, { "epoch": 2.38472853704199, "grad_norm": 1.5157063007354736, "learning_rate": 2.1263162850324617e-06, "loss": 0.4006, "step": 17833 }, { "epoch": 2.3848622626370686, "grad_norm": 1.5054740905761719, "learning_rate": 2.1254263221283657e-06, "loss": 0.3644, "step": 17834 }, { "epoch": 2.3849959882321476, "grad_norm": 1.7522867918014526, "learning_rate": 2.1245365233622186e-06, "loss": 0.4264, "step": 17835 }, { "epoch": 2.3851297138272267, "grad_norm": 1.6084753274917603, "learning_rate": 2.123646888752576e-06, "loss": 0.4082, "step": 17836 }, { "epoch": 2.3852634394223053, "grad_norm": 1.5315499305725098, "learning_rate": 2.1227574183179755e-06, "loss": 0.3524, "step": 17837 }, { "epoch": 2.3853971650173844, "grad_norm": 1.7481637001037598, "learning_rate": 2.121868112076959e-06, "loss": 0.4147, "step": 17838 }, { "epoch": 2.385530890612463, "grad_norm": 1.5142607688903809, "learning_rate": 2.120978970048063e-06, "loss": 0.3235, "step": 17839 }, { "epoch": 2.385664616207542, "grad_norm": 1.6345171928405762, "learning_rate": 2.120089992249821e-06, "loss": 0.3778, "step": 17840 }, { "epoch": 2.385798341802621, "grad_norm": 1.7262449264526367, "learning_rate": 2.119201178700763e-06, "loss": 0.4361, "step": 17841 }, { "epoch": 2.3859320673977, "grad_norm": 1.455633282661438, "learning_rate": 2.118312529419414e-06, "loss": 0.3584, "step": 17842 }, { "epoch": 2.386065792992779, "grad_norm": 1.5089219808578491, "learning_rate": 2.1174240444243e-06, "loss": 0.3552, "step": 17843 }, { "epoch": 2.3861995185878575, "grad_norm": 1.4888718128204346, "learning_rate": 2.116535723733938e-06, "loss": 0.397, "step": 17844 }, { "epoch": 2.3863332441829366, "grad_norm": 1.4841015338897705, "learning_rate": 2.1156475673668453e-06, "loss": 0.4196, "step": 17845 }, { "epoch": 2.3864669697780156, "grad_norm": 1.532810926437378, "learning_rate": 2.114759575341535e-06, "loss": 0.3914, "step": 17846 }, { "epoch": 2.3866006953730943, "grad_norm": 1.5386040210723877, "learning_rate": 2.113871747676516e-06, "loss": 0.3722, "step": 17847 }, { "epoch": 2.3867344209681733, "grad_norm": 1.6453512907028198, "learning_rate": 2.112984084390294e-06, "loss": 0.3675, "step": 17848 }, { "epoch": 2.386868146563252, "grad_norm": 1.5648529529571533, "learning_rate": 2.112096585501371e-06, "loss": 0.3948, "step": 17849 }, { "epoch": 2.387001872158331, "grad_norm": 1.3051241636276245, "learning_rate": 2.11120925102825e-06, "loss": 0.2982, "step": 17850 }, { "epoch": 2.38713559775341, "grad_norm": 1.5490403175354004, "learning_rate": 2.1103220809894188e-06, "loss": 0.3551, "step": 17851 }, { "epoch": 2.3872693233484887, "grad_norm": 1.7357176542282104, "learning_rate": 2.1094350754033765e-06, "loss": 0.4098, "step": 17852 }, { "epoch": 2.387403048943568, "grad_norm": 1.2691576480865479, "learning_rate": 2.108548234288612e-06, "loss": 0.355, "step": 17853 }, { "epoch": 2.387536774538647, "grad_norm": 1.554545521736145, "learning_rate": 2.107661557663603e-06, "loss": 0.3722, "step": 17854 }, { "epoch": 2.3876705001337255, "grad_norm": 1.5350016355514526, "learning_rate": 2.106775045546842e-06, "loss": 0.4025, "step": 17855 }, { "epoch": 2.3878042257288046, "grad_norm": 1.5181926488876343, "learning_rate": 2.105888697956796e-06, "loss": 0.3969, "step": 17856 }, { "epoch": 2.387937951323883, "grad_norm": 1.434273600578308, "learning_rate": 2.1050025149119523e-06, "loss": 0.3411, "step": 17857 }, { "epoch": 2.3880716769189623, "grad_norm": 1.6034188270568848, "learning_rate": 2.1041164964307747e-06, "loss": 0.3897, "step": 17858 }, { "epoch": 2.3882054025140413, "grad_norm": 1.6710104942321777, "learning_rate": 2.1032306425317296e-06, "loss": 0.4021, "step": 17859 }, { "epoch": 2.38833912810912, "grad_norm": 1.4621918201446533, "learning_rate": 2.1023449532332908e-06, "loss": 0.3617, "step": 17860 }, { "epoch": 2.388472853704199, "grad_norm": 1.3159018754959106, "learning_rate": 2.101459428553911e-06, "loss": 0.3324, "step": 17861 }, { "epoch": 2.388606579299278, "grad_norm": 1.4102952480316162, "learning_rate": 2.1005740685120524e-06, "loss": 0.3537, "step": 17862 }, { "epoch": 2.3887403048943567, "grad_norm": 1.6028289794921875, "learning_rate": 2.099688873126168e-06, "loss": 0.3953, "step": 17863 }, { "epoch": 2.388874030489436, "grad_norm": 1.6218167543411255, "learning_rate": 2.0988038424147093e-06, "loss": 0.4323, "step": 17864 }, { "epoch": 2.3890077560845144, "grad_norm": 1.5229204893112183, "learning_rate": 2.097918976396124e-06, "loss": 0.3903, "step": 17865 }, { "epoch": 2.3891414816795935, "grad_norm": 1.4938158988952637, "learning_rate": 2.097034275088855e-06, "loss": 0.3866, "step": 17866 }, { "epoch": 2.3892752072746726, "grad_norm": 1.4379466772079468, "learning_rate": 2.096149738511346e-06, "loss": 0.3695, "step": 17867 }, { "epoch": 2.389408932869751, "grad_norm": 1.4845023155212402, "learning_rate": 2.095265366682031e-06, "loss": 0.3451, "step": 17868 }, { "epoch": 2.3895426584648303, "grad_norm": 1.611345648765564, "learning_rate": 2.0943811596193485e-06, "loss": 0.369, "step": 17869 }, { "epoch": 2.389676384059909, "grad_norm": 1.597919225692749, "learning_rate": 2.093497117341722e-06, "loss": 0.3677, "step": 17870 }, { "epoch": 2.389810109654988, "grad_norm": 1.493640422821045, "learning_rate": 2.0926132398675836e-06, "loss": 0.3687, "step": 17871 }, { "epoch": 2.389943835250067, "grad_norm": 1.4551411867141724, "learning_rate": 2.091729527215356e-06, "loss": 0.3843, "step": 17872 }, { "epoch": 2.3900775608451457, "grad_norm": 1.5585696697235107, "learning_rate": 2.0908459794034587e-06, "loss": 0.3935, "step": 17873 }, { "epoch": 2.3902112864402247, "grad_norm": 1.6306896209716797, "learning_rate": 2.0899625964503113e-06, "loss": 0.3692, "step": 17874 }, { "epoch": 2.3903450120353034, "grad_norm": 1.2582402229309082, "learning_rate": 2.0890793783743204e-06, "loss": 0.3237, "step": 17875 }, { "epoch": 2.3904787376303824, "grad_norm": 1.614313006401062, "learning_rate": 2.088196325193904e-06, "loss": 0.4098, "step": 17876 }, { "epoch": 2.3906124632254615, "grad_norm": 1.5872982740402222, "learning_rate": 2.0873134369274616e-06, "loss": 0.3368, "step": 17877 }, { "epoch": 2.39074618882054, "grad_norm": 1.51145601272583, "learning_rate": 2.086430713593397e-06, "loss": 0.3771, "step": 17878 }, { "epoch": 2.390879914415619, "grad_norm": 1.7802976369857788, "learning_rate": 2.0855481552101163e-06, "loss": 0.4001, "step": 17879 }, { "epoch": 2.391013640010698, "grad_norm": 1.484453558921814, "learning_rate": 2.0846657617960063e-06, "loss": 0.3814, "step": 17880 }, { "epoch": 2.391147365605777, "grad_norm": 1.645645260810852, "learning_rate": 2.08378353336947e-06, "loss": 0.3895, "step": 17881 }, { "epoch": 2.391281091200856, "grad_norm": 1.3851213455200195, "learning_rate": 2.082901469948888e-06, "loss": 0.3029, "step": 17882 }, { "epoch": 2.3914148167959346, "grad_norm": 1.3250372409820557, "learning_rate": 2.0820195715526493e-06, "loss": 0.3724, "step": 17883 }, { "epoch": 2.3915485423910137, "grad_norm": 1.5430479049682617, "learning_rate": 2.0811378381991354e-06, "loss": 0.3558, "step": 17884 }, { "epoch": 2.3916822679860923, "grad_norm": 1.6684101819992065, "learning_rate": 2.0802562699067254e-06, "loss": 0.3722, "step": 17885 }, { "epoch": 2.3918159935811714, "grad_norm": 1.4793622493743896, "learning_rate": 2.0793748666937963e-06, "loss": 0.301, "step": 17886 }, { "epoch": 2.3919497191762504, "grad_norm": 1.4755393266677856, "learning_rate": 2.0784936285787173e-06, "loss": 0.3494, "step": 17887 }, { "epoch": 2.392083444771329, "grad_norm": 1.51986825466156, "learning_rate": 2.07761255557986e-06, "loss": 0.3667, "step": 17888 }, { "epoch": 2.392217170366408, "grad_norm": 1.6284205913543701, "learning_rate": 2.0767316477155875e-06, "loss": 0.3856, "step": 17889 }, { "epoch": 2.392350895961487, "grad_norm": 1.5278631448745728, "learning_rate": 2.075850905004262e-06, "loss": 0.3567, "step": 17890 }, { "epoch": 2.392484621556566, "grad_norm": 1.5485665798187256, "learning_rate": 2.074970327464242e-06, "loss": 0.4004, "step": 17891 }, { "epoch": 2.392618347151645, "grad_norm": 1.610215663909912, "learning_rate": 2.0740899151138816e-06, "loss": 0.3607, "step": 17892 }, { "epoch": 2.392752072746724, "grad_norm": 1.4530028104782104, "learning_rate": 2.0732096679715353e-06, "loss": 0.3818, "step": 17893 }, { "epoch": 2.3928857983418026, "grad_norm": 1.550559639930725, "learning_rate": 2.0723295860555438e-06, "loss": 0.3782, "step": 17894 }, { "epoch": 2.3930195239368817, "grad_norm": 1.5442068576812744, "learning_rate": 2.071449669384261e-06, "loss": 0.357, "step": 17895 }, { "epoch": 2.3931532495319603, "grad_norm": 1.484924554824829, "learning_rate": 2.0705699179760176e-06, "loss": 0.3785, "step": 17896 }, { "epoch": 2.3932869751270394, "grad_norm": 1.5484185218811035, "learning_rate": 2.069690331849159e-06, "loss": 0.3551, "step": 17897 }, { "epoch": 2.3934207007221184, "grad_norm": 1.5027798414230347, "learning_rate": 2.068810911022021e-06, "loss": 0.3649, "step": 17898 }, { "epoch": 2.393554426317197, "grad_norm": 1.532842993736267, "learning_rate": 2.0679316555129236e-06, "loss": 0.3576, "step": 17899 }, { "epoch": 2.393688151912276, "grad_norm": 1.3951435089111328, "learning_rate": 2.0670525653402064e-06, "loss": 0.3302, "step": 17900 }, { "epoch": 2.3938218775073548, "grad_norm": 1.3945815563201904, "learning_rate": 2.0661736405221843e-06, "loss": 0.336, "step": 17901 }, { "epoch": 2.393955603102434, "grad_norm": 1.5005645751953125, "learning_rate": 2.065294881077181e-06, "loss": 0.307, "step": 17902 }, { "epoch": 2.394089328697513, "grad_norm": 1.5576838254928589, "learning_rate": 2.064416287023514e-06, "loss": 0.3416, "step": 17903 }, { "epoch": 2.3942230542925915, "grad_norm": 1.5740407705307007, "learning_rate": 2.063537858379493e-06, "loss": 0.3831, "step": 17904 }, { "epoch": 2.3943567798876706, "grad_norm": 1.5155054330825806, "learning_rate": 2.0626595951634365e-06, "loss": 0.3648, "step": 17905 }, { "epoch": 2.394490505482749, "grad_norm": 1.3387234210968018, "learning_rate": 2.0617814973936425e-06, "loss": 0.2807, "step": 17906 }, { "epoch": 2.3946242310778283, "grad_norm": 1.616338849067688, "learning_rate": 2.060903565088417e-06, "loss": 0.3639, "step": 17907 }, { "epoch": 2.3947579566729074, "grad_norm": 1.3485593795776367, "learning_rate": 2.0600257982660598e-06, "loss": 0.3189, "step": 17908 }, { "epoch": 2.394891682267986, "grad_norm": 1.6645393371582031, "learning_rate": 2.0591481969448668e-06, "loss": 0.3966, "step": 17909 }, { "epoch": 2.395025407863065, "grad_norm": 1.4824212789535522, "learning_rate": 2.058270761143132e-06, "loss": 0.3263, "step": 17910 }, { "epoch": 2.3951591334581437, "grad_norm": 1.563370943069458, "learning_rate": 2.0573934908791426e-06, "loss": 0.3875, "step": 17911 }, { "epoch": 2.3952928590532228, "grad_norm": 1.8260524272918701, "learning_rate": 2.0565163861711867e-06, "loss": 0.4287, "step": 17912 }, { "epoch": 2.395426584648302, "grad_norm": 1.5508047342300415, "learning_rate": 2.055639447037545e-06, "loss": 0.3481, "step": 17913 }, { "epoch": 2.3955603102433805, "grad_norm": 1.6852552890777588, "learning_rate": 2.0547626734965e-06, "loss": 0.379, "step": 17914 }, { "epoch": 2.3956940358384595, "grad_norm": 1.6302062273025513, "learning_rate": 2.0538860655663183e-06, "loss": 0.3894, "step": 17915 }, { "epoch": 2.395827761433538, "grad_norm": 1.645849347114563, "learning_rate": 2.0530096232652818e-06, "loss": 0.3776, "step": 17916 }, { "epoch": 2.395961487028617, "grad_norm": 1.5102357864379883, "learning_rate": 2.0521333466116576e-06, "loss": 0.4024, "step": 17917 }, { "epoch": 2.3960952126236963, "grad_norm": 1.4829307794570923, "learning_rate": 2.0512572356237027e-06, "loss": 0.3551, "step": 17918 }, { "epoch": 2.396228938218775, "grad_norm": 1.590675711631775, "learning_rate": 2.0503812903196897e-06, "loss": 0.3606, "step": 17919 }, { "epoch": 2.396362663813854, "grad_norm": 1.4651782512664795, "learning_rate": 2.0495055107178675e-06, "loss": 0.3638, "step": 17920 }, { "epoch": 2.396496389408933, "grad_norm": 1.6999223232269287, "learning_rate": 2.0486298968364994e-06, "loss": 0.3832, "step": 17921 }, { "epoch": 2.3966301150040117, "grad_norm": 1.367639183998108, "learning_rate": 2.0477544486938306e-06, "loss": 0.3268, "step": 17922 }, { "epoch": 2.3967638405990908, "grad_norm": 1.6939268112182617, "learning_rate": 2.0468791663081077e-06, "loss": 0.413, "step": 17923 }, { "epoch": 2.3968975661941694, "grad_norm": 1.531829833984375, "learning_rate": 2.0460040496975843e-06, "loss": 0.375, "step": 17924 }, { "epoch": 2.3970312917892485, "grad_norm": 1.491853952407837, "learning_rate": 2.0451290988804916e-06, "loss": 0.3619, "step": 17925 }, { "epoch": 2.3971650173843275, "grad_norm": 1.5072453022003174, "learning_rate": 2.0442543138750713e-06, "loss": 0.3346, "step": 17926 }, { "epoch": 2.397298742979406, "grad_norm": 1.487454891204834, "learning_rate": 2.0433796946995565e-06, "loss": 0.3511, "step": 17927 }, { "epoch": 2.397432468574485, "grad_norm": 1.6014050245285034, "learning_rate": 2.0425052413721793e-06, "loss": 0.3512, "step": 17928 }, { "epoch": 2.3975661941695643, "grad_norm": 1.7423291206359863, "learning_rate": 2.0416309539111656e-06, "loss": 0.4098, "step": 17929 }, { "epoch": 2.397699919764643, "grad_norm": 1.3837201595306396, "learning_rate": 2.0407568323347395e-06, "loss": 0.3503, "step": 17930 }, { "epoch": 2.397833645359722, "grad_norm": 1.6199169158935547, "learning_rate": 2.03988287666112e-06, "loss": 0.3584, "step": 17931 }, { "epoch": 2.3979673709548006, "grad_norm": 1.5247902870178223, "learning_rate": 2.0390090869085254e-06, "loss": 0.4106, "step": 17932 }, { "epoch": 2.3981010965498797, "grad_norm": 1.3270975351333618, "learning_rate": 2.038135463095169e-06, "loss": 0.3337, "step": 17933 }, { "epoch": 2.3982348221449588, "grad_norm": 1.5214077234268188, "learning_rate": 2.03726200523926e-06, "loss": 0.3565, "step": 17934 }, { "epoch": 2.3983685477400374, "grad_norm": 1.53960120677948, "learning_rate": 2.0363887133590053e-06, "loss": 0.3327, "step": 17935 }, { "epoch": 2.3985022733351165, "grad_norm": 1.6621617078781128, "learning_rate": 2.0355155874726073e-06, "loss": 0.3711, "step": 17936 }, { "epoch": 2.398635998930195, "grad_norm": 1.5302265882492065, "learning_rate": 2.0346426275982654e-06, "loss": 0.3916, "step": 17937 }, { "epoch": 2.398769724525274, "grad_norm": 1.3897732496261597, "learning_rate": 2.0337698337541787e-06, "loss": 0.3589, "step": 17938 }, { "epoch": 2.398903450120353, "grad_norm": 1.5956616401672363, "learning_rate": 2.0328972059585317e-06, "loss": 0.4011, "step": 17939 }, { "epoch": 2.399037175715432, "grad_norm": 1.746777057647705, "learning_rate": 2.0320247442295237e-06, "loss": 0.3963, "step": 17940 }, { "epoch": 2.399170901310511, "grad_norm": 1.7709999084472656, "learning_rate": 2.0311524485853307e-06, "loss": 0.4164, "step": 17941 }, { "epoch": 2.3993046269055895, "grad_norm": 1.4809352159500122, "learning_rate": 2.0302803190441424e-06, "loss": 0.3549, "step": 17942 }, { "epoch": 2.3994383525006686, "grad_norm": 1.701432228088379, "learning_rate": 2.029408355624136e-06, "loss": 0.3992, "step": 17943 }, { "epoch": 2.3995720780957477, "grad_norm": 1.6228365898132324, "learning_rate": 2.028536558343481e-06, "loss": 0.3633, "step": 17944 }, { "epoch": 2.3997058036908263, "grad_norm": 1.543188214302063, "learning_rate": 2.0276649272203586e-06, "loss": 0.3456, "step": 17945 }, { "epoch": 2.3998395292859054, "grad_norm": 1.6711128950119019, "learning_rate": 2.02679346227293e-06, "loss": 0.3971, "step": 17946 }, { "epoch": 2.399973254880984, "grad_norm": 1.6172393560409546, "learning_rate": 2.0259221635193616e-06, "loss": 0.3755, "step": 17947 }, { "epoch": 2.400106980476063, "grad_norm": 1.6031951904296875, "learning_rate": 2.025051030977816e-06, "loss": 0.3561, "step": 17948 }, { "epoch": 2.400240706071142, "grad_norm": 1.8141647577285767, "learning_rate": 2.02418006466645e-06, "loss": 0.3939, "step": 17949 }, { "epoch": 2.4003744316662208, "grad_norm": 1.6056065559387207, "learning_rate": 2.023309264603418e-06, "loss": 0.3987, "step": 17950 }, { "epoch": 2.4005081572613, "grad_norm": 1.5413163900375366, "learning_rate": 2.022438630806872e-06, "loss": 0.4063, "step": 17951 }, { "epoch": 2.4006418828563785, "grad_norm": 1.4227293729782104, "learning_rate": 2.021568163294959e-06, "loss": 0.3295, "step": 17952 }, { "epoch": 2.4007756084514575, "grad_norm": 1.6649378538131714, "learning_rate": 2.020697862085823e-06, "loss": 0.4115, "step": 17953 }, { "epoch": 2.4009093340465366, "grad_norm": 1.6725213527679443, "learning_rate": 2.019827727197605e-06, "loss": 0.3852, "step": 17954 }, { "epoch": 2.4010430596416152, "grad_norm": 1.4428149461746216, "learning_rate": 2.018957758648442e-06, "loss": 0.3718, "step": 17955 }, { "epoch": 2.4011767852366943, "grad_norm": 1.492225170135498, "learning_rate": 2.018087956456467e-06, "loss": 0.3357, "step": 17956 }, { "epoch": 2.4013105108317734, "grad_norm": 1.4774190187454224, "learning_rate": 2.017218320639811e-06, "loss": 0.3526, "step": 17957 }, { "epoch": 2.401444236426852, "grad_norm": 1.5460723638534546, "learning_rate": 2.0163488512166007e-06, "loss": 0.338, "step": 17958 }, { "epoch": 2.401577962021931, "grad_norm": 1.7437920570373535, "learning_rate": 2.0154795482049616e-06, "loss": 0.3749, "step": 17959 }, { "epoch": 2.4017116876170097, "grad_norm": 1.440415620803833, "learning_rate": 2.014610411623005e-06, "loss": 0.3341, "step": 17960 }, { "epoch": 2.4018454132120888, "grad_norm": 1.4563069343566895, "learning_rate": 2.0137414414888555e-06, "loss": 0.3265, "step": 17961 }, { "epoch": 2.401979138807168, "grad_norm": 1.4194328784942627, "learning_rate": 2.0128726378206275e-06, "loss": 0.356, "step": 17962 }, { "epoch": 2.4021128644022465, "grad_norm": 1.5021103620529175, "learning_rate": 2.0120040006364204e-06, "loss": 0.3657, "step": 17963 }, { "epoch": 2.4022465899973255, "grad_norm": 1.604524850845337, "learning_rate": 2.011135529954352e-06, "loss": 0.3989, "step": 17964 }, { "epoch": 2.4023803155924046, "grad_norm": 1.441267490386963, "learning_rate": 2.0102672257925137e-06, "loss": 0.3469, "step": 17965 }, { "epoch": 2.4025140411874832, "grad_norm": 1.7912224531173706, "learning_rate": 2.009399088169015e-06, "loss": 0.3779, "step": 17966 }, { "epoch": 2.4026477667825623, "grad_norm": 1.6365660429000854, "learning_rate": 2.008531117101943e-06, "loss": 0.3826, "step": 17967 }, { "epoch": 2.402781492377641, "grad_norm": 1.3388489484786987, "learning_rate": 2.007663312609394e-06, "loss": 0.3122, "step": 17968 }, { "epoch": 2.40291521797272, "grad_norm": 1.5664211511611938, "learning_rate": 2.0067956747094542e-06, "loss": 0.3374, "step": 17969 }, { "epoch": 2.403048943567799, "grad_norm": 1.5719892978668213, "learning_rate": 2.0059282034202097e-06, "loss": 0.3469, "step": 17970 }, { "epoch": 2.4031826691628777, "grad_norm": 1.5805082321166992, "learning_rate": 2.005060898759743e-06, "loss": 0.3754, "step": 17971 }, { "epoch": 2.4033163947579568, "grad_norm": 1.529064655303955, "learning_rate": 2.0041937607461315e-06, "loss": 0.3975, "step": 17972 }, { "epoch": 2.4034501203530354, "grad_norm": 1.646658182144165, "learning_rate": 2.0033267893974495e-06, "loss": 0.3929, "step": 17973 }, { "epoch": 2.4035838459481145, "grad_norm": 1.5485522747039795, "learning_rate": 2.0024599847317695e-06, "loss": 0.3698, "step": 17974 }, { "epoch": 2.4037175715431935, "grad_norm": 1.4969358444213867, "learning_rate": 2.001593346767158e-06, "loss": 0.368, "step": 17975 }, { "epoch": 2.403851297138272, "grad_norm": 1.4068106412887573, "learning_rate": 2.000726875521679e-06, "loss": 0.3562, "step": 17976 }, { "epoch": 2.4039850227333512, "grad_norm": 1.4809266328811646, "learning_rate": 1.999860571013393e-06, "loss": 0.3621, "step": 17977 }, { "epoch": 2.40411874832843, "grad_norm": 1.555912733078003, "learning_rate": 1.998994433260363e-06, "loss": 0.3618, "step": 17978 }, { "epoch": 2.404252473923509, "grad_norm": 1.526281476020813, "learning_rate": 1.9981284622806306e-06, "loss": 0.3446, "step": 17979 }, { "epoch": 2.404386199518588, "grad_norm": 1.6385716199874878, "learning_rate": 1.9972626580922573e-06, "loss": 0.3707, "step": 17980 }, { "epoch": 2.4045199251136666, "grad_norm": 1.5952492952346802, "learning_rate": 1.9963970207132854e-06, "loss": 0.3503, "step": 17981 }, { "epoch": 2.4046536507087457, "grad_norm": 1.4557716846466064, "learning_rate": 1.995531550161759e-06, "loss": 0.366, "step": 17982 }, { "epoch": 2.4047873763038243, "grad_norm": 1.4788583517074585, "learning_rate": 1.994666246455721e-06, "loss": 0.3195, "step": 17983 }, { "epoch": 2.4049211018989034, "grad_norm": 1.3122265338897705, "learning_rate": 1.9938011096131993e-06, "loss": 0.3683, "step": 17984 }, { "epoch": 2.4050548274939825, "grad_norm": 1.5411807298660278, "learning_rate": 1.9929361396522386e-06, "loss": 0.3418, "step": 17985 }, { "epoch": 2.405188553089061, "grad_norm": 1.5965473651885986, "learning_rate": 1.9920713365908586e-06, "loss": 0.3975, "step": 17986 }, { "epoch": 2.40532227868414, "grad_norm": 1.73786199092865, "learning_rate": 1.9912067004470892e-06, "loss": 0.3787, "step": 17987 }, { "epoch": 2.405456004279219, "grad_norm": 1.5518689155578613, "learning_rate": 1.990342231238952e-06, "loss": 0.3749, "step": 17988 }, { "epoch": 2.405589729874298, "grad_norm": 1.6562516689300537, "learning_rate": 1.9894779289844646e-06, "loss": 0.3583, "step": 17989 }, { "epoch": 2.405723455469377, "grad_norm": 1.7126809358596802, "learning_rate": 1.9886137937016493e-06, "loss": 0.3936, "step": 17990 }, { "epoch": 2.4058571810644556, "grad_norm": 1.739372968673706, "learning_rate": 1.9877498254085103e-06, "loss": 0.4516, "step": 17991 }, { "epoch": 2.4059909066595346, "grad_norm": 1.5956242084503174, "learning_rate": 1.9868860241230604e-06, "loss": 0.418, "step": 17992 }, { "epoch": 2.4061246322546137, "grad_norm": 1.2999794483184814, "learning_rate": 1.9860223898633023e-06, "loss": 0.301, "step": 17993 }, { "epoch": 2.4062583578496923, "grad_norm": 1.4201385974884033, "learning_rate": 1.9851589226472402e-06, "loss": 0.3231, "step": 17994 }, { "epoch": 2.4063920834447714, "grad_norm": 1.7690218687057495, "learning_rate": 1.98429562249287e-06, "loss": 0.3849, "step": 17995 }, { "epoch": 2.4065258090398505, "grad_norm": 1.4726568460464478, "learning_rate": 1.983432489418189e-06, "loss": 0.3738, "step": 17996 }, { "epoch": 2.406659534634929, "grad_norm": 1.433272361755371, "learning_rate": 1.9825695234411847e-06, "loss": 0.3846, "step": 17997 }, { "epoch": 2.406793260230008, "grad_norm": 1.7957602739334106, "learning_rate": 1.981706724579848e-06, "loss": 0.4078, "step": 17998 }, { "epoch": 2.406926985825087, "grad_norm": 1.7641193866729736, "learning_rate": 1.980844092852162e-06, "loss": 0.393, "step": 17999 }, { "epoch": 2.407060711420166, "grad_norm": 1.5508638620376587, "learning_rate": 1.9799816282761064e-06, "loss": 0.3569, "step": 18000 }, { "epoch": 2.407194437015245, "grad_norm": 1.6310198307037354, "learning_rate": 1.979119330869661e-06, "loss": 0.4101, "step": 18001 }, { "epoch": 2.4073281626103236, "grad_norm": 1.676611304283142, "learning_rate": 1.9782572006507995e-06, "loss": 0.3844, "step": 18002 }, { "epoch": 2.4074618882054026, "grad_norm": 1.5769695043563843, "learning_rate": 1.977395237637485e-06, "loss": 0.3692, "step": 18003 }, { "epoch": 2.4075956138004813, "grad_norm": 1.7358254194259644, "learning_rate": 1.9765334418476967e-06, "loss": 0.3443, "step": 18004 }, { "epoch": 2.4077293393955603, "grad_norm": 1.6642705202102661, "learning_rate": 1.9756718132993848e-06, "loss": 0.4427, "step": 18005 }, { "epoch": 2.4078630649906394, "grad_norm": 1.6510920524597168, "learning_rate": 1.974810352010519e-06, "loss": 0.3807, "step": 18006 }, { "epoch": 2.407996790585718, "grad_norm": 1.6528747081756592, "learning_rate": 1.973949057999054e-06, "loss": 0.3754, "step": 18007 }, { "epoch": 2.408130516180797, "grad_norm": 1.5447001457214355, "learning_rate": 1.9730879312829354e-06, "loss": 0.3488, "step": 18008 }, { "epoch": 2.4082642417758757, "grad_norm": 1.3964084386825562, "learning_rate": 1.9722269718801236e-06, "loss": 0.3525, "step": 18009 }, { "epoch": 2.408397967370955, "grad_norm": 1.3696916103363037, "learning_rate": 1.9713661798085557e-06, "loss": 0.3765, "step": 18010 }, { "epoch": 2.408531692966034, "grad_norm": 1.7484173774719238, "learning_rate": 1.9705055550861784e-06, "loss": 0.3591, "step": 18011 }, { "epoch": 2.4086654185611125, "grad_norm": 1.5583627223968506, "learning_rate": 1.9696450977309278e-06, "loss": 0.3635, "step": 18012 }, { "epoch": 2.4087991441561916, "grad_norm": 1.4588489532470703, "learning_rate": 1.968784807760742e-06, "loss": 0.3418, "step": 18013 }, { "epoch": 2.40893286975127, "grad_norm": 1.4994537830352783, "learning_rate": 1.967924685193552e-06, "loss": 0.3505, "step": 18014 }, { "epoch": 2.4090665953463493, "grad_norm": 1.3571383953094482, "learning_rate": 1.9670647300472856e-06, "loss": 0.3921, "step": 18015 }, { "epoch": 2.4092003209414283, "grad_norm": 1.178723931312561, "learning_rate": 1.966204942339869e-06, "loss": 0.3164, "step": 18016 }, { "epoch": 2.409334046536507, "grad_norm": 1.6758408546447754, "learning_rate": 1.9653453220892217e-06, "loss": 0.3848, "step": 18017 }, { "epoch": 2.409467772131586, "grad_norm": 1.3990116119384766, "learning_rate": 1.9644858693132627e-06, "loss": 0.4024, "step": 18018 }, { "epoch": 2.4096014977266647, "grad_norm": 1.5540207624435425, "learning_rate": 1.9636265840299075e-06, "loss": 0.3369, "step": 18019 }, { "epoch": 2.4097352233217437, "grad_norm": 1.4744235277175903, "learning_rate": 1.962767466257066e-06, "loss": 0.4021, "step": 18020 }, { "epoch": 2.409868948916823, "grad_norm": 1.4892199039459229, "learning_rate": 1.961908516012646e-06, "loss": 0.3363, "step": 18021 }, { "epoch": 2.4100026745119014, "grad_norm": 1.689810037612915, "learning_rate": 1.9610497333145506e-06, "loss": 0.376, "step": 18022 }, { "epoch": 2.4101364001069805, "grad_norm": 1.4549789428710938, "learning_rate": 1.9601911181806845e-06, "loss": 0.3979, "step": 18023 }, { "epoch": 2.4102701257020596, "grad_norm": 1.3405613899230957, "learning_rate": 1.959332670628936e-06, "loss": 0.3003, "step": 18024 }, { "epoch": 2.410403851297138, "grad_norm": 1.635075569152832, "learning_rate": 1.9584743906772063e-06, "loss": 0.4037, "step": 18025 }, { "epoch": 2.4105375768922173, "grad_norm": 1.6496763229370117, "learning_rate": 1.9576162783433826e-06, "loss": 0.407, "step": 18026 }, { "epoch": 2.410671302487296, "grad_norm": 1.578217625617981, "learning_rate": 1.9567583336453523e-06, "loss": 0.3422, "step": 18027 }, { "epoch": 2.410805028082375, "grad_norm": 1.4393810033798218, "learning_rate": 1.9559005566010013e-06, "loss": 0.3659, "step": 18028 }, { "epoch": 2.410938753677454, "grad_norm": 1.659801959991455, "learning_rate": 1.9550429472281995e-06, "loss": 0.3978, "step": 18029 }, { "epoch": 2.4110724792725327, "grad_norm": 1.5541491508483887, "learning_rate": 1.9541855055448346e-06, "loss": 0.3682, "step": 18030 }, { "epoch": 2.4112062048676117, "grad_norm": 1.5560804605484009, "learning_rate": 1.9533282315687716e-06, "loss": 0.3573, "step": 18031 }, { "epoch": 2.411339930462691, "grad_norm": 1.6256047487258911, "learning_rate": 1.952471125317882e-06, "loss": 0.3698, "step": 18032 }, { "epoch": 2.4114736560577694, "grad_norm": 1.3780567646026611, "learning_rate": 1.9516141868100304e-06, "loss": 0.3433, "step": 18033 }, { "epoch": 2.4116073816528485, "grad_norm": 1.5133588314056396, "learning_rate": 1.950757416063077e-06, "loss": 0.3118, "step": 18034 }, { "epoch": 2.411741107247927, "grad_norm": 1.6051510572433472, "learning_rate": 1.9499008130948893e-06, "loss": 0.3554, "step": 18035 }, { "epoch": 2.411874832843006, "grad_norm": 1.5389469861984253, "learning_rate": 1.9490443779233127e-06, "loss": 0.3438, "step": 18036 }, { "epoch": 2.4120085584380853, "grad_norm": 1.4993011951446533, "learning_rate": 1.9481881105662027e-06, "loss": 0.3425, "step": 18037 }, { "epoch": 2.412142284033164, "grad_norm": 1.7161153554916382, "learning_rate": 1.947332011041406e-06, "loss": 0.3492, "step": 18038 }, { "epoch": 2.412276009628243, "grad_norm": 1.799184799194336, "learning_rate": 1.946476079366768e-06, "loss": 0.3969, "step": 18039 }, { "epoch": 2.4124097352233216, "grad_norm": 1.7864443063735962, "learning_rate": 1.9456203155601295e-06, "loss": 0.3695, "step": 18040 }, { "epoch": 2.4125434608184007, "grad_norm": 1.6597378253936768, "learning_rate": 1.9447647196393295e-06, "loss": 0.4108, "step": 18041 }, { "epoch": 2.4126771864134797, "grad_norm": 1.5790430307388306, "learning_rate": 1.9439092916222004e-06, "loss": 0.3718, "step": 18042 }, { "epoch": 2.4128109120085584, "grad_norm": 1.7413369417190552, "learning_rate": 1.9430540315265723e-06, "loss": 0.4087, "step": 18043 }, { "epoch": 2.4129446376036374, "grad_norm": 1.695953607559204, "learning_rate": 1.9421989393702744e-06, "loss": 0.4181, "step": 18044 }, { "epoch": 2.413078363198716, "grad_norm": 1.694677472114563, "learning_rate": 1.9413440151711282e-06, "loss": 0.3699, "step": 18045 }, { "epoch": 2.413212088793795, "grad_norm": 1.599601149559021, "learning_rate": 1.940489258946955e-06, "loss": 0.3471, "step": 18046 }, { "epoch": 2.413345814388874, "grad_norm": 1.5811517238616943, "learning_rate": 1.9396346707155745e-06, "loss": 0.3826, "step": 18047 }, { "epoch": 2.413479539983953, "grad_norm": 1.626510739326477, "learning_rate": 1.9387802504947906e-06, "loss": 0.3671, "step": 18048 }, { "epoch": 2.413613265579032, "grad_norm": 1.548584222793579, "learning_rate": 1.9379259983024236e-06, "loss": 0.3593, "step": 18049 }, { "epoch": 2.4137469911741105, "grad_norm": 1.4227983951568604, "learning_rate": 1.9370719141562687e-06, "loss": 0.3068, "step": 18050 }, { "epoch": 2.4138807167691896, "grad_norm": 1.3419324159622192, "learning_rate": 1.9362179980741413e-06, "loss": 0.3336, "step": 18051 }, { "epoch": 2.4140144423642687, "grad_norm": 1.4893600940704346, "learning_rate": 1.93536425007383e-06, "loss": 0.3544, "step": 18052 }, { "epoch": 2.4141481679593473, "grad_norm": 1.541743516921997, "learning_rate": 1.934510670173131e-06, "loss": 0.3961, "step": 18053 }, { "epoch": 2.4142818935544264, "grad_norm": 1.6864932775497437, "learning_rate": 1.9336572583898448e-06, "loss": 0.4035, "step": 18054 }, { "epoch": 2.414415619149505, "grad_norm": 1.4050863981246948, "learning_rate": 1.9328040147417513e-06, "loss": 0.3383, "step": 18055 }, { "epoch": 2.414549344744584, "grad_norm": 1.4355391263961792, "learning_rate": 1.9319509392466394e-06, "loss": 0.3136, "step": 18056 }, { "epoch": 2.414683070339663, "grad_norm": 1.4853312969207764, "learning_rate": 1.9310980319222903e-06, "loss": 0.3698, "step": 18057 }, { "epoch": 2.4148167959347417, "grad_norm": 1.5603691339492798, "learning_rate": 1.9302452927864812e-06, "loss": 0.3138, "step": 18058 }, { "epoch": 2.414950521529821, "grad_norm": 1.572059154510498, "learning_rate": 1.9293927218569863e-06, "loss": 0.3832, "step": 18059 }, { "epoch": 2.4150842471249, "grad_norm": 1.472150206565857, "learning_rate": 1.9285403191515783e-06, "loss": 0.3444, "step": 18060 }, { "epoch": 2.4152179727199785, "grad_norm": 1.553295612335205, "learning_rate": 1.927688084688023e-06, "loss": 0.3799, "step": 18061 }, { "epoch": 2.4153516983150576, "grad_norm": 1.5900689363479614, "learning_rate": 1.926836018484085e-06, "loss": 0.4046, "step": 18062 }, { "epoch": 2.415485423910136, "grad_norm": 1.525107979774475, "learning_rate": 1.925984120557526e-06, "loss": 0.3872, "step": 18063 }, { "epoch": 2.4156191495052153, "grad_norm": 1.494560718536377, "learning_rate": 1.925132390926102e-06, "loss": 0.3603, "step": 18064 }, { "epoch": 2.4157528751002944, "grad_norm": 1.452217936515808, "learning_rate": 1.9242808296075655e-06, "loss": 0.3539, "step": 18065 }, { "epoch": 2.415886600695373, "grad_norm": 1.7618021965026855, "learning_rate": 1.9234294366196683e-06, "loss": 0.4059, "step": 18066 }, { "epoch": 2.416020326290452, "grad_norm": 1.458060383796692, "learning_rate": 1.9225782119801563e-06, "loss": 0.3576, "step": 18067 }, { "epoch": 2.416154051885531, "grad_norm": 1.6456409692764282, "learning_rate": 1.921727155706774e-06, "loss": 0.3641, "step": 18068 }, { "epoch": 2.4162877774806097, "grad_norm": 1.4869545698165894, "learning_rate": 1.9208762678172543e-06, "loss": 0.3495, "step": 18069 }, { "epoch": 2.416421503075689, "grad_norm": 1.7008962631225586, "learning_rate": 1.9200255483293427e-06, "loss": 0.3769, "step": 18070 }, { "epoch": 2.4165552286707674, "grad_norm": 1.6859917640686035, "learning_rate": 1.9191749972607655e-06, "loss": 0.4341, "step": 18071 }, { "epoch": 2.4166889542658465, "grad_norm": 1.458686113357544, "learning_rate": 1.918324614629249e-06, "loss": 0.3648, "step": 18072 }, { "epoch": 2.4168226798609256, "grad_norm": 1.4641509056091309, "learning_rate": 1.917474400452528e-06, "loss": 0.3298, "step": 18073 }, { "epoch": 2.416956405456004, "grad_norm": 1.5092238187789917, "learning_rate": 1.9166243547483143e-06, "loss": 0.378, "step": 18074 }, { "epoch": 2.4170901310510833, "grad_norm": 1.576855182647705, "learning_rate": 1.9157744775343355e-06, "loss": 0.3835, "step": 18075 }, { "epoch": 2.417223856646162, "grad_norm": 1.6809840202331543, "learning_rate": 1.9149247688283e-06, "loss": 0.3509, "step": 18076 }, { "epoch": 2.417357582241241, "grad_norm": 1.5899978876113892, "learning_rate": 1.9140752286479213e-06, "loss": 0.4223, "step": 18077 }, { "epoch": 2.41749130783632, "grad_norm": 1.6565505266189575, "learning_rate": 1.9132258570109062e-06, "loss": 0.3544, "step": 18078 }, { "epoch": 2.4176250334313987, "grad_norm": 1.555350422859192, "learning_rate": 1.912376653934961e-06, "loss": 0.3623, "step": 18079 }, { "epoch": 2.4177587590264777, "grad_norm": 1.5153439044952393, "learning_rate": 1.911527619437784e-06, "loss": 0.3991, "step": 18080 }, { "epoch": 2.4178924846215564, "grad_norm": 1.470353364944458, "learning_rate": 1.9106787535370753e-06, "loss": 0.3602, "step": 18081 }, { "epoch": 2.4180262102166354, "grad_norm": 1.6296138763427734, "learning_rate": 1.9098300562505266e-06, "loss": 0.3637, "step": 18082 }, { "epoch": 2.4181599358117145, "grad_norm": 1.5523146390914917, "learning_rate": 1.908981527595829e-06, "loss": 0.3986, "step": 18083 }, { "epoch": 2.418293661406793, "grad_norm": 1.6103651523590088, "learning_rate": 1.908133167590669e-06, "loss": 0.3991, "step": 18084 }, { "epoch": 2.418427387001872, "grad_norm": 1.6443121433258057, "learning_rate": 1.9072849762527301e-06, "loss": 0.3852, "step": 18085 }, { "epoch": 2.418561112596951, "grad_norm": 1.546447515487671, "learning_rate": 1.906436953599693e-06, "loss": 0.3508, "step": 18086 }, { "epoch": 2.41869483819203, "grad_norm": 1.654009222984314, "learning_rate": 1.9055890996492344e-06, "loss": 0.3886, "step": 18087 }, { "epoch": 2.418828563787109, "grad_norm": 1.8355324268341064, "learning_rate": 1.9047414144190203e-06, "loss": 0.3874, "step": 18088 }, { "epoch": 2.4189622893821876, "grad_norm": 1.4860445261001587, "learning_rate": 1.9038938979267308e-06, "loss": 0.3381, "step": 18089 }, { "epoch": 2.4190960149772667, "grad_norm": 1.552764654159546, "learning_rate": 1.9030465501900207e-06, "loss": 0.368, "step": 18090 }, { "epoch": 2.4192297405723453, "grad_norm": 1.345727801322937, "learning_rate": 1.9021993712265596e-06, "loss": 0.3228, "step": 18091 }, { "epoch": 2.4193634661674244, "grad_norm": 1.5653833150863647, "learning_rate": 1.9013523610540064e-06, "loss": 0.3729, "step": 18092 }, { "epoch": 2.4194971917625034, "grad_norm": 1.538627028465271, "learning_rate": 1.900505519690009e-06, "loss": 0.3565, "step": 18093 }, { "epoch": 2.419630917357582, "grad_norm": 1.4516618251800537, "learning_rate": 1.8996588471522282e-06, "loss": 0.2736, "step": 18094 }, { "epoch": 2.419764642952661, "grad_norm": 1.4271422624588013, "learning_rate": 1.898812343458305e-06, "loss": 0.3368, "step": 18095 }, { "epoch": 2.41989836854774, "grad_norm": 1.3784897327423096, "learning_rate": 1.8979660086258866e-06, "loss": 0.3068, "step": 18096 }, { "epoch": 2.420032094142819, "grad_norm": 1.8402807712554932, "learning_rate": 1.8971198426726145e-06, "loss": 0.3888, "step": 18097 }, { "epoch": 2.420165819737898, "grad_norm": 1.4829161167144775, "learning_rate": 1.8962738456161223e-06, "loss": 0.3443, "step": 18098 }, { "epoch": 2.420299545332977, "grad_norm": 1.5332369804382324, "learning_rate": 1.8954280174740536e-06, "loss": 0.3504, "step": 18099 }, { "epoch": 2.4204332709280556, "grad_norm": 1.4898616075515747, "learning_rate": 1.8945823582640288e-06, "loss": 0.3826, "step": 18100 }, { "epoch": 2.4205669965231347, "grad_norm": 1.5770057439804077, "learning_rate": 1.8937368680036794e-06, "loss": 0.3601, "step": 18101 }, { "epoch": 2.4207007221182133, "grad_norm": 1.3761988878250122, "learning_rate": 1.892891546710628e-06, "loss": 0.35, "step": 18102 }, { "epoch": 2.4208344477132924, "grad_norm": 1.75946843624115, "learning_rate": 1.8920463944024948e-06, "loss": 0.4025, "step": 18103 }, { "epoch": 2.4209681733083714, "grad_norm": 1.457396149635315, "learning_rate": 1.8912014110968956e-06, "loss": 0.3405, "step": 18104 }, { "epoch": 2.42110189890345, "grad_norm": 1.507546067237854, "learning_rate": 1.8903565968114445e-06, "loss": 0.3728, "step": 18105 }, { "epoch": 2.421235624498529, "grad_norm": 1.5793941020965576, "learning_rate": 1.8895119515637495e-06, "loss": 0.3878, "step": 18106 }, { "epoch": 2.4213693500936078, "grad_norm": 1.387105941772461, "learning_rate": 1.8886674753714162e-06, "loss": 0.3375, "step": 18107 }, { "epoch": 2.421503075688687, "grad_norm": 1.4817014932632446, "learning_rate": 1.8878231682520488e-06, "loss": 0.3076, "step": 18108 }, { "epoch": 2.421636801283766, "grad_norm": 1.5437718629837036, "learning_rate": 1.886979030223245e-06, "loss": 0.3636, "step": 18109 }, { "epoch": 2.4217705268788445, "grad_norm": 1.4769299030303955, "learning_rate": 1.8861350613025996e-06, "loss": 0.346, "step": 18110 }, { "epoch": 2.4219042524739236, "grad_norm": 1.4321874380111694, "learning_rate": 1.8852912615077045e-06, "loss": 0.3626, "step": 18111 }, { "epoch": 2.4220379780690022, "grad_norm": 1.4741073846817017, "learning_rate": 1.8844476308561488e-06, "loss": 0.3613, "step": 18112 }, { "epoch": 2.4221717036640813, "grad_norm": 1.5756639242172241, "learning_rate": 1.8836041693655183e-06, "loss": 0.377, "step": 18113 }, { "epoch": 2.4223054292591604, "grad_norm": 1.4143149852752686, "learning_rate": 1.882760877053388e-06, "loss": 0.3279, "step": 18114 }, { "epoch": 2.422439154854239, "grad_norm": 1.5536344051361084, "learning_rate": 1.8819177539373445e-06, "loss": 0.3794, "step": 18115 }, { "epoch": 2.422572880449318, "grad_norm": 1.4539053440093994, "learning_rate": 1.8810748000349544e-06, "loss": 0.3503, "step": 18116 }, { "epoch": 2.4227066060443967, "grad_norm": 1.6055166721343994, "learning_rate": 1.8802320153637888e-06, "loss": 0.3885, "step": 18117 }, { "epoch": 2.4228403316394758, "grad_norm": 1.4351887702941895, "learning_rate": 1.8793893999414226e-06, "loss": 0.35, "step": 18118 }, { "epoch": 2.422974057234555, "grad_norm": 1.4355101585388184, "learning_rate": 1.8785469537854084e-06, "loss": 0.3822, "step": 18119 }, { "epoch": 2.4231077828296335, "grad_norm": 1.6233646869659424, "learning_rate": 1.8777046769133167e-06, "loss": 0.3934, "step": 18120 }, { "epoch": 2.4232415084247125, "grad_norm": 1.402411937713623, "learning_rate": 1.8768625693426956e-06, "loss": 0.3356, "step": 18121 }, { "epoch": 2.423375234019791, "grad_norm": 1.674034595489502, "learning_rate": 1.8760206310911023e-06, "loss": 0.3499, "step": 18122 }, { "epoch": 2.4235089596148702, "grad_norm": 1.4749321937561035, "learning_rate": 1.8751788621760846e-06, "loss": 0.4147, "step": 18123 }, { "epoch": 2.4236426852099493, "grad_norm": 1.5503557920455933, "learning_rate": 1.874337262615189e-06, "loss": 0.3999, "step": 18124 }, { "epoch": 2.423776410805028, "grad_norm": 1.4730955362319946, "learning_rate": 1.8734958324259577e-06, "loss": 0.3536, "step": 18125 }, { "epoch": 2.423910136400107, "grad_norm": 1.468219518661499, "learning_rate": 1.8726545716259293e-06, "loss": 0.3651, "step": 18126 }, { "epoch": 2.424043861995186, "grad_norm": 1.6197565793991089, "learning_rate": 1.8718134802326393e-06, "loss": 0.3528, "step": 18127 }, { "epoch": 2.4241775875902647, "grad_norm": 1.6689503192901611, "learning_rate": 1.8709725582636195e-06, "loss": 0.3817, "step": 18128 }, { "epoch": 2.4243113131853438, "grad_norm": 1.5541847944259644, "learning_rate": 1.8701318057363981e-06, "loss": 0.3817, "step": 18129 }, { "epoch": 2.4244450387804224, "grad_norm": 1.4522587060928345, "learning_rate": 1.8692912226685012e-06, "loss": 0.3823, "step": 18130 }, { "epoch": 2.4245787643755015, "grad_norm": 1.528070092201233, "learning_rate": 1.8684508090774467e-06, "loss": 0.3501, "step": 18131 }, { "epoch": 2.4247124899705805, "grad_norm": 1.4616628885269165, "learning_rate": 1.8676105649807573e-06, "loss": 0.3424, "step": 18132 }, { "epoch": 2.424846215565659, "grad_norm": 1.7883821725845337, "learning_rate": 1.8667704903959383e-06, "loss": 0.3875, "step": 18133 }, { "epoch": 2.4249799411607382, "grad_norm": 1.4654746055603027, "learning_rate": 1.8659305853405118e-06, "loss": 0.3796, "step": 18134 }, { "epoch": 2.4251136667558173, "grad_norm": 1.404805064201355, "learning_rate": 1.865090849831973e-06, "loss": 0.3382, "step": 18135 }, { "epoch": 2.425247392350896, "grad_norm": 1.5814189910888672, "learning_rate": 1.8642512838878335e-06, "loss": 0.4161, "step": 18136 }, { "epoch": 2.425381117945975, "grad_norm": 1.5269899368286133, "learning_rate": 1.8634118875255935e-06, "loss": 0.3219, "step": 18137 }, { "epoch": 2.4255148435410536, "grad_norm": 1.4669893980026245, "learning_rate": 1.8625726607627425e-06, "loss": 0.366, "step": 18138 }, { "epoch": 2.4256485691361327, "grad_norm": 1.62925386428833, "learning_rate": 1.8617336036167822e-06, "loss": 0.4055, "step": 18139 }, { "epoch": 2.4257822947312118, "grad_norm": 1.477181077003479, "learning_rate": 1.8608947161051949e-06, "loss": 0.3695, "step": 18140 }, { "epoch": 2.4259160203262904, "grad_norm": 1.501099944114685, "learning_rate": 1.8600559982454691e-06, "loss": 0.3501, "step": 18141 }, { "epoch": 2.4260497459213695, "grad_norm": 1.5785404443740845, "learning_rate": 1.8592174500550875e-06, "loss": 0.3937, "step": 18142 }, { "epoch": 2.426183471516448, "grad_norm": 1.5076217651367188, "learning_rate": 1.8583790715515248e-06, "loss": 0.3382, "step": 18143 }, { "epoch": 2.426317197111527, "grad_norm": 1.3558850288391113, "learning_rate": 1.857540862752265e-06, "loss": 0.355, "step": 18144 }, { "epoch": 2.4264509227066062, "grad_norm": 1.367803931236267, "learning_rate": 1.856702823674772e-06, "loss": 0.3254, "step": 18145 }, { "epoch": 2.426584648301685, "grad_norm": 1.426256537437439, "learning_rate": 1.855864954336517e-06, "loss": 0.38, "step": 18146 }, { "epoch": 2.426718373896764, "grad_norm": 1.5262291431427002, "learning_rate": 1.855027254754963e-06, "loss": 0.3599, "step": 18147 }, { "epoch": 2.4268520994918426, "grad_norm": 1.6131625175476074, "learning_rate": 1.8541897249475715e-06, "loss": 0.3719, "step": 18148 }, { "epoch": 2.4269858250869216, "grad_norm": 1.5687006711959839, "learning_rate": 1.853352364931802e-06, "loss": 0.3442, "step": 18149 }, { "epoch": 2.4271195506820007, "grad_norm": 1.5420058965682983, "learning_rate": 1.8525151747251058e-06, "loss": 0.3556, "step": 18150 }, { "epoch": 2.4272532762770793, "grad_norm": 1.5271199941635132, "learning_rate": 1.8516781543449346e-06, "loss": 0.3802, "step": 18151 }, { "epoch": 2.4273870018721584, "grad_norm": 1.672491192817688, "learning_rate": 1.8508413038087358e-06, "loss": 0.4274, "step": 18152 }, { "epoch": 2.427520727467237, "grad_norm": 1.634647250175476, "learning_rate": 1.850004623133954e-06, "loss": 0.3528, "step": 18153 }, { "epoch": 2.427654453062316, "grad_norm": 1.5605021715164185, "learning_rate": 1.8491681123380235e-06, "loss": 0.3752, "step": 18154 }, { "epoch": 2.427788178657395, "grad_norm": 1.6575759649276733, "learning_rate": 1.8483317714383852e-06, "loss": 0.4058, "step": 18155 }, { "epoch": 2.427921904252474, "grad_norm": 1.6028364896774292, "learning_rate": 1.8474956004524736e-06, "loss": 0.4298, "step": 18156 }, { "epoch": 2.428055629847553, "grad_norm": 1.501952052116394, "learning_rate": 1.8466595993977098e-06, "loss": 0.3281, "step": 18157 }, { "epoch": 2.4281893554426315, "grad_norm": 1.3506063222885132, "learning_rate": 1.8458237682915303e-06, "loss": 0.3734, "step": 18158 }, { "epoch": 2.4283230810377106, "grad_norm": 1.4719749689102173, "learning_rate": 1.8449881071513464e-06, "loss": 0.3349, "step": 18159 }, { "epoch": 2.4284568066327896, "grad_norm": 1.6349042654037476, "learning_rate": 1.8441526159945878e-06, "loss": 0.3995, "step": 18160 }, { "epoch": 2.4285905322278682, "grad_norm": 1.5119370222091675, "learning_rate": 1.84331729483866e-06, "loss": 0.3765, "step": 18161 }, { "epoch": 2.4287242578229473, "grad_norm": 1.6625653505325317, "learning_rate": 1.8424821437009766e-06, "loss": 0.3934, "step": 18162 }, { "epoch": 2.4288579834180264, "grad_norm": 1.4546360969543457, "learning_rate": 1.8416471625989506e-06, "loss": 0.3783, "step": 18163 }, { "epoch": 2.428991709013105, "grad_norm": 1.339250087738037, "learning_rate": 1.8408123515499821e-06, "loss": 0.3482, "step": 18164 }, { "epoch": 2.429125434608184, "grad_norm": 1.6681081056594849, "learning_rate": 1.839977710571471e-06, "loss": 0.4295, "step": 18165 }, { "epoch": 2.4292591602032627, "grad_norm": 1.3231414556503296, "learning_rate": 1.8391432396808173e-06, "loss": 0.3427, "step": 18166 }, { "epoch": 2.429392885798342, "grad_norm": 1.5408296585083008, "learning_rate": 1.8383089388954134e-06, "loss": 0.3932, "step": 18167 }, { "epoch": 2.429526611393421, "grad_norm": 1.5553381443023682, "learning_rate": 1.8374748082326487e-06, "loss": 0.3744, "step": 18168 }, { "epoch": 2.4296603369884995, "grad_norm": 1.5951651334762573, "learning_rate": 1.8366408477099118e-06, "loss": 0.3719, "step": 18169 }, { "epoch": 2.4297940625835786, "grad_norm": 1.461053729057312, "learning_rate": 1.8358070573445852e-06, "loss": 0.3781, "step": 18170 }, { "epoch": 2.4299277881786576, "grad_norm": 1.694732666015625, "learning_rate": 1.8349734371540485e-06, "loss": 0.4401, "step": 18171 }, { "epoch": 2.4300615137737362, "grad_norm": 1.6370354890823364, "learning_rate": 1.8341399871556786e-06, "loss": 0.3338, "step": 18172 }, { "epoch": 2.4301952393688153, "grad_norm": 1.4816865921020508, "learning_rate": 1.8333067073668432e-06, "loss": 0.3756, "step": 18173 }, { "epoch": 2.430328964963894, "grad_norm": 1.524155616760254, "learning_rate": 1.8324735978049168e-06, "loss": 0.4194, "step": 18174 }, { "epoch": 2.430462690558973, "grad_norm": 1.6262582540512085, "learning_rate": 1.8316406584872625e-06, "loss": 0.4487, "step": 18175 }, { "epoch": 2.430596416154052, "grad_norm": 1.5761114358901978, "learning_rate": 1.8308078894312431e-06, "loss": 0.3709, "step": 18176 }, { "epoch": 2.4307301417491307, "grad_norm": 1.3818485736846924, "learning_rate": 1.829975290654218e-06, "loss": 0.3556, "step": 18177 }, { "epoch": 2.43086386734421, "grad_norm": 1.6064000129699707, "learning_rate": 1.8291428621735353e-06, "loss": 0.4244, "step": 18178 }, { "epoch": 2.4309975929392884, "grad_norm": 1.4403181076049805, "learning_rate": 1.8283106040065557e-06, "loss": 0.3606, "step": 18179 }, { "epoch": 2.4311313185343675, "grad_norm": 1.559250831604004, "learning_rate": 1.8274785161706198e-06, "loss": 0.363, "step": 18180 }, { "epoch": 2.4312650441294466, "grad_norm": 1.4103502035140991, "learning_rate": 1.8266465986830718e-06, "loss": 0.3584, "step": 18181 }, { "epoch": 2.431398769724525, "grad_norm": 1.6944823265075684, "learning_rate": 1.8258148515612584e-06, "loss": 0.417, "step": 18182 }, { "epoch": 2.4315324953196042, "grad_norm": 1.6668181419372559, "learning_rate": 1.8249832748225082e-06, "loss": 0.3424, "step": 18183 }, { "epoch": 2.431666220914683, "grad_norm": 1.3947749137878418, "learning_rate": 1.8241518684841642e-06, "loss": 0.32, "step": 18184 }, { "epoch": 2.431799946509762, "grad_norm": 1.5086702108383179, "learning_rate": 1.8233206325635489e-06, "loss": 0.3582, "step": 18185 }, { "epoch": 2.431933672104841, "grad_norm": 1.5507982969284058, "learning_rate": 1.8224895670779906e-06, "loss": 0.3815, "step": 18186 }, { "epoch": 2.4320673976999196, "grad_norm": 1.6147749423980713, "learning_rate": 1.8216586720448115e-06, "loss": 0.3778, "step": 18187 }, { "epoch": 2.4322011232949987, "grad_norm": 1.6236050128936768, "learning_rate": 1.8208279474813295e-06, "loss": 0.3849, "step": 18188 }, { "epoch": 2.4323348488900773, "grad_norm": 1.5117872953414917, "learning_rate": 1.8199973934048677e-06, "loss": 0.3446, "step": 18189 }, { "epoch": 2.4324685744851564, "grad_norm": 1.6009238958358765, "learning_rate": 1.8191670098327297e-06, "loss": 0.374, "step": 18190 }, { "epoch": 2.4326023000802355, "grad_norm": 1.6920732259750366, "learning_rate": 1.8183367967822274e-06, "loss": 0.4235, "step": 18191 }, { "epoch": 2.432736025675314, "grad_norm": 1.650769591331482, "learning_rate": 1.8175067542706659e-06, "loss": 0.4341, "step": 18192 }, { "epoch": 2.432869751270393, "grad_norm": 1.4963065385818481, "learning_rate": 1.8166768823153458e-06, "loss": 0.355, "step": 18193 }, { "epoch": 2.433003476865472, "grad_norm": 1.6237667798995972, "learning_rate": 1.8158471809335653e-06, "loss": 0.4489, "step": 18194 }, { "epoch": 2.433137202460551, "grad_norm": 1.5648760795593262, "learning_rate": 1.8150176501426199e-06, "loss": 0.3603, "step": 18195 }, { "epoch": 2.43327092805563, "grad_norm": 1.7834364175796509, "learning_rate": 1.8141882899597986e-06, "loss": 0.4079, "step": 18196 }, { "epoch": 2.4334046536507086, "grad_norm": 1.4551732540130615, "learning_rate": 1.8133591004023897e-06, "loss": 0.3978, "step": 18197 }, { "epoch": 2.4335383792457876, "grad_norm": 1.5140717029571533, "learning_rate": 1.812530081487679e-06, "loss": 0.3578, "step": 18198 }, { "epoch": 2.4336721048408667, "grad_norm": 1.7185540199279785, "learning_rate": 1.8117012332329399e-06, "loss": 0.3677, "step": 18199 }, { "epoch": 2.4338058304359453, "grad_norm": 1.321104645729065, "learning_rate": 1.810872555655454e-06, "loss": 0.3359, "step": 18200 }, { "epoch": 2.4339395560310244, "grad_norm": 1.5534127950668335, "learning_rate": 1.810044048772498e-06, "loss": 0.3575, "step": 18201 }, { "epoch": 2.4340732816261035, "grad_norm": 1.535367727279663, "learning_rate": 1.809215712601331e-06, "loss": 0.3681, "step": 18202 }, { "epoch": 2.434207007221182, "grad_norm": 1.5620211362838745, "learning_rate": 1.8083875471592294e-06, "loss": 0.3617, "step": 18203 }, { "epoch": 2.434340732816261, "grad_norm": 1.4875489473342896, "learning_rate": 1.807559552463446e-06, "loss": 0.3733, "step": 18204 }, { "epoch": 2.43447445841134, "grad_norm": 1.5180929899215698, "learning_rate": 1.8067317285312503e-06, "loss": 0.3648, "step": 18205 }, { "epoch": 2.434608184006419, "grad_norm": 1.5418572425842285, "learning_rate": 1.8059040753798884e-06, "loss": 0.361, "step": 18206 }, { "epoch": 2.434741909601498, "grad_norm": 1.4747289419174194, "learning_rate": 1.8050765930266123e-06, "loss": 0.3393, "step": 18207 }, { "epoch": 2.4348756351965766, "grad_norm": 1.499009132385254, "learning_rate": 1.804249281488678e-06, "loss": 0.4091, "step": 18208 }, { "epoch": 2.4350093607916556, "grad_norm": 1.6491479873657227, "learning_rate": 1.803422140783323e-06, "loss": 0.3886, "step": 18209 }, { "epoch": 2.4351430863867343, "grad_norm": 1.5776349306106567, "learning_rate": 1.80259517092779e-06, "loss": 0.3917, "step": 18210 }, { "epoch": 2.4352768119818133, "grad_norm": 1.514560580253601, "learning_rate": 1.8017683719393163e-06, "loss": 0.3759, "step": 18211 }, { "epoch": 2.4354105375768924, "grad_norm": 1.577111005783081, "learning_rate": 1.8009417438351363e-06, "loss": 0.3621, "step": 18212 }, { "epoch": 2.435544263171971, "grad_norm": 1.7509959936141968, "learning_rate": 1.80011528663248e-06, "loss": 0.4054, "step": 18213 }, { "epoch": 2.43567798876705, "grad_norm": 1.4242032766342163, "learning_rate": 1.7992890003485742e-06, "loss": 0.3622, "step": 18214 }, { "epoch": 2.4358117143621287, "grad_norm": 1.655269742012024, "learning_rate": 1.7984628850006414e-06, "loss": 0.358, "step": 18215 }, { "epoch": 2.435945439957208, "grad_norm": 1.7394752502441406, "learning_rate": 1.7976369406059025e-06, "loss": 0.4521, "step": 18216 }, { "epoch": 2.436079165552287, "grad_norm": 1.4547960758209229, "learning_rate": 1.7968111671815747e-06, "loss": 0.3311, "step": 18217 }, { "epoch": 2.4362128911473655, "grad_norm": 1.5292388200759888, "learning_rate": 1.7959855647448642e-06, "loss": 0.3646, "step": 18218 }, { "epoch": 2.4363466167424446, "grad_norm": 1.6920738220214844, "learning_rate": 1.7951601333129864e-06, "loss": 0.3907, "step": 18219 }, { "epoch": 2.436480342337523, "grad_norm": 1.5297890901565552, "learning_rate": 1.794334872903144e-06, "loss": 0.353, "step": 18220 }, { "epoch": 2.4366140679326023, "grad_norm": 1.5986666679382324, "learning_rate": 1.7935097835325399e-06, "loss": 0.3683, "step": 18221 }, { "epoch": 2.4367477935276813, "grad_norm": 1.4229732751846313, "learning_rate": 1.7926848652183736e-06, "loss": 0.3676, "step": 18222 }, { "epoch": 2.43688151912276, "grad_norm": 1.4651970863342285, "learning_rate": 1.7918601179778328e-06, "loss": 0.3292, "step": 18223 }, { "epoch": 2.437015244717839, "grad_norm": 1.4440951347351074, "learning_rate": 1.7910355418281189e-06, "loss": 0.3596, "step": 18224 }, { "epoch": 2.4371489703129177, "grad_norm": 1.5454461574554443, "learning_rate": 1.7902111367864106e-06, "loss": 0.3555, "step": 18225 }, { "epoch": 2.4372826959079967, "grad_norm": 1.6433773040771484, "learning_rate": 1.789386902869893e-06, "loss": 0.3617, "step": 18226 }, { "epoch": 2.437416421503076, "grad_norm": 1.3851348161697388, "learning_rate": 1.7885628400957543e-06, "loss": 0.3177, "step": 18227 }, { "epoch": 2.4375501470981544, "grad_norm": 1.5309910774230957, "learning_rate": 1.7877389484811603e-06, "loss": 0.3675, "step": 18228 }, { "epoch": 2.4376838726932335, "grad_norm": 1.658008098602295, "learning_rate": 1.7869152280432944e-06, "loss": 0.4015, "step": 18229 }, { "epoch": 2.4378175982883126, "grad_norm": 1.5738810300827026, "learning_rate": 1.7860916787993198e-06, "loss": 0.334, "step": 18230 }, { "epoch": 2.437951323883391, "grad_norm": 1.5266389846801758, "learning_rate": 1.785268300766404e-06, "loss": 0.3487, "step": 18231 }, { "epoch": 2.4380850494784703, "grad_norm": 1.4081307649612427, "learning_rate": 1.7844450939617098e-06, "loss": 0.3758, "step": 18232 }, { "epoch": 2.438218775073549, "grad_norm": 1.6197218894958496, "learning_rate": 1.7836220584023956e-06, "loss": 0.373, "step": 18233 }, { "epoch": 2.438352500668628, "grad_norm": 1.5370994806289673, "learning_rate": 1.7827991941056177e-06, "loss": 0.3492, "step": 18234 }, { "epoch": 2.438486226263707, "grad_norm": 1.6212660074234009, "learning_rate": 1.7819765010885281e-06, "loss": 0.3888, "step": 18235 }, { "epoch": 2.4386199518587857, "grad_norm": 1.7444339990615845, "learning_rate": 1.781153979368274e-06, "loss": 0.3853, "step": 18236 }, { "epoch": 2.4387536774538647, "grad_norm": 1.4328597784042358, "learning_rate": 1.780331628962001e-06, "loss": 0.3613, "step": 18237 }, { "epoch": 2.438887403048944, "grad_norm": 1.5061352252960205, "learning_rate": 1.7795094498868494e-06, "loss": 0.3802, "step": 18238 }, { "epoch": 2.4390211286440224, "grad_norm": 1.4693876504898071, "learning_rate": 1.7786874421599575e-06, "loss": 0.3285, "step": 18239 }, { "epoch": 2.4391548542391015, "grad_norm": 1.5137965679168701, "learning_rate": 1.7778656057984588e-06, "loss": 0.3854, "step": 18240 }, { "epoch": 2.43928857983418, "grad_norm": 1.4213095903396606, "learning_rate": 1.7770439408194862e-06, "loss": 0.3105, "step": 18241 }, { "epoch": 2.439422305429259, "grad_norm": 1.6336909532546997, "learning_rate": 1.776222447240159e-06, "loss": 0.353, "step": 18242 }, { "epoch": 2.4395560310243383, "grad_norm": 1.6486101150512695, "learning_rate": 1.7754011250776114e-06, "loss": 0.4112, "step": 18243 }, { "epoch": 2.439689756619417, "grad_norm": 1.4699738025665283, "learning_rate": 1.7745799743489512e-06, "loss": 0.3485, "step": 18244 }, { "epoch": 2.439823482214496, "grad_norm": 1.5429487228393555, "learning_rate": 1.7737589950713042e-06, "loss": 0.3763, "step": 18245 }, { "epoch": 2.4399572078095746, "grad_norm": 1.5133074522018433, "learning_rate": 1.7729381872617812e-06, "loss": 0.3654, "step": 18246 }, { "epoch": 2.4400909334046537, "grad_norm": 1.5166326761245728, "learning_rate": 1.7721175509374832e-06, "loss": 0.3536, "step": 18247 }, { "epoch": 2.4402246589997327, "grad_norm": 1.5559535026550293, "learning_rate": 1.7712970861155276e-06, "loss": 0.4087, "step": 18248 }, { "epoch": 2.4403583845948114, "grad_norm": 1.5959115028381348, "learning_rate": 1.7704767928130084e-06, "loss": 0.3389, "step": 18249 }, { "epoch": 2.4404921101898904, "grad_norm": 1.5388987064361572, "learning_rate": 1.7696566710470254e-06, "loss": 0.3898, "step": 18250 }, { "epoch": 2.440625835784969, "grad_norm": 1.5253854990005493, "learning_rate": 1.7688367208346723e-06, "loss": 0.3611, "step": 18251 }, { "epoch": 2.440759561380048, "grad_norm": 1.5663312673568726, "learning_rate": 1.7680169421930404e-06, "loss": 0.3708, "step": 18252 }, { "epoch": 2.440893286975127, "grad_norm": 1.816049337387085, "learning_rate": 1.7671973351392223e-06, "loss": 0.37, "step": 18253 }, { "epoch": 2.441027012570206, "grad_norm": 1.4882328510284424, "learning_rate": 1.7663778996902947e-06, "loss": 0.3711, "step": 18254 }, { "epoch": 2.441160738165285, "grad_norm": 1.6296606063842773, "learning_rate": 1.7655586358633426e-06, "loss": 0.3475, "step": 18255 }, { "epoch": 2.4412944637603635, "grad_norm": 1.435652494430542, "learning_rate": 1.76473954367544e-06, "loss": 0.3636, "step": 18256 }, { "epoch": 2.4414281893554426, "grad_norm": 1.6103055477142334, "learning_rate": 1.7639206231436622e-06, "loss": 0.3989, "step": 18257 }, { "epoch": 2.4415619149505217, "grad_norm": 1.8223057985305786, "learning_rate": 1.763101874285077e-06, "loss": 0.3943, "step": 18258 }, { "epoch": 2.4416956405456003, "grad_norm": 1.508858561515808, "learning_rate": 1.7622832971167524e-06, "loss": 0.3859, "step": 18259 }, { "epoch": 2.4418293661406794, "grad_norm": 1.535022258758545, "learning_rate": 1.7614648916557486e-06, "loss": 0.3528, "step": 18260 }, { "epoch": 2.441963091735758, "grad_norm": 1.5430930852890015, "learning_rate": 1.7606466579191272e-06, "loss": 0.3464, "step": 18261 }, { "epoch": 2.442096817330837, "grad_norm": 1.557789921760559, "learning_rate": 1.7598285959239437e-06, "loss": 0.358, "step": 18262 }, { "epoch": 2.442230542925916, "grad_norm": 1.3906068801879883, "learning_rate": 1.759010705687243e-06, "loss": 0.3243, "step": 18263 }, { "epoch": 2.4423642685209948, "grad_norm": 1.385672688484192, "learning_rate": 1.7581929872260805e-06, "loss": 0.3209, "step": 18264 }, { "epoch": 2.442497994116074, "grad_norm": 1.5294710397720337, "learning_rate": 1.7573754405575029e-06, "loss": 0.3763, "step": 18265 }, { "epoch": 2.442631719711153, "grad_norm": 1.8096469640731812, "learning_rate": 1.7565580656985403e-06, "loss": 0.3588, "step": 18266 }, { "epoch": 2.4427654453062315, "grad_norm": 1.259243369102478, "learning_rate": 1.755740862666242e-06, "loss": 0.3348, "step": 18267 }, { "epoch": 2.4428991709013106, "grad_norm": 1.4385862350463867, "learning_rate": 1.7549238314776318e-06, "loss": 0.3008, "step": 18268 }, { "epoch": 2.443032896496389, "grad_norm": 1.4794098138809204, "learning_rate": 1.7541069721497494e-06, "loss": 0.3155, "step": 18269 }, { "epoch": 2.4431666220914683, "grad_norm": 1.2600165605545044, "learning_rate": 1.7532902846996136e-06, "loss": 0.283, "step": 18270 }, { "epoch": 2.4433003476865474, "grad_norm": 1.4867573976516724, "learning_rate": 1.7524737691442495e-06, "loss": 0.3864, "step": 18271 }, { "epoch": 2.443434073281626, "grad_norm": 1.5677626132965088, "learning_rate": 1.7516574255006813e-06, "loss": 0.3646, "step": 18272 }, { "epoch": 2.443567798876705, "grad_norm": 1.3933168649673462, "learning_rate": 1.7508412537859164e-06, "loss": 0.3267, "step": 18273 }, { "epoch": 2.443701524471784, "grad_norm": 1.4900708198547363, "learning_rate": 1.7500252540169782e-06, "loss": 0.3428, "step": 18274 }, { "epoch": 2.4438352500668628, "grad_norm": 1.6241878271102905, "learning_rate": 1.7492094262108661e-06, "loss": 0.3504, "step": 18275 }, { "epoch": 2.443968975661942, "grad_norm": 1.681395173072815, "learning_rate": 1.7483937703845876e-06, "loss": 0.3824, "step": 18276 }, { "epoch": 2.4441027012570204, "grad_norm": 1.5869262218475342, "learning_rate": 1.747578286555146e-06, "loss": 0.367, "step": 18277 }, { "epoch": 2.4442364268520995, "grad_norm": 1.6530287265777588, "learning_rate": 1.7467629747395376e-06, "loss": 0.3607, "step": 18278 }, { "epoch": 2.4443701524471786, "grad_norm": 1.4440975189208984, "learning_rate": 1.7459478349547577e-06, "loss": 0.367, "step": 18279 }, { "epoch": 2.444503878042257, "grad_norm": 1.3625982999801636, "learning_rate": 1.7451328672177969e-06, "loss": 0.3867, "step": 18280 }, { "epoch": 2.4446376036373363, "grad_norm": 1.5737146139144897, "learning_rate": 1.7443180715456431e-06, "loss": 0.3849, "step": 18281 }, { "epoch": 2.444771329232415, "grad_norm": 1.5660408735275269, "learning_rate": 1.743503447955278e-06, "loss": 0.3669, "step": 18282 }, { "epoch": 2.444905054827494, "grad_norm": 1.7328089475631714, "learning_rate": 1.742688996463684e-06, "loss": 0.4028, "step": 18283 }, { "epoch": 2.445038780422573, "grad_norm": 1.5653839111328125, "learning_rate": 1.741874717087836e-06, "loss": 0.3714, "step": 18284 }, { "epoch": 2.4451725060176517, "grad_norm": 1.5264708995819092, "learning_rate": 1.741060609844708e-06, "loss": 0.3788, "step": 18285 }, { "epoch": 2.4453062316127308, "grad_norm": 1.595931887626648, "learning_rate": 1.7402466747512704e-06, "loss": 0.3203, "step": 18286 }, { "epoch": 2.4454399572078094, "grad_norm": 1.446514368057251, "learning_rate": 1.7394329118244825e-06, "loss": 0.3595, "step": 18287 }, { "epoch": 2.4455736828028884, "grad_norm": 1.4631075859069824, "learning_rate": 1.7386193210813163e-06, "loss": 0.3545, "step": 18288 }, { "epoch": 2.4457074083979675, "grad_norm": 1.5443226099014282, "learning_rate": 1.7378059025387194e-06, "loss": 0.3634, "step": 18289 }, { "epoch": 2.445841133993046, "grad_norm": 1.5995323657989502, "learning_rate": 1.7369926562136553e-06, "loss": 0.4054, "step": 18290 }, { "epoch": 2.445974859588125, "grad_norm": 1.419340968132019, "learning_rate": 1.7361795821230741e-06, "loss": 0.3557, "step": 18291 }, { "epoch": 2.446108585183204, "grad_norm": 1.5237889289855957, "learning_rate": 1.7353666802839176e-06, "loss": 0.3832, "step": 18292 }, { "epoch": 2.446242310778283, "grad_norm": 1.7873990535736084, "learning_rate": 1.7345539507131392e-06, "loss": 0.3961, "step": 18293 }, { "epoch": 2.446376036373362, "grad_norm": 1.6849751472473145, "learning_rate": 1.7337413934276726e-06, "loss": 0.4115, "step": 18294 }, { "epoch": 2.4465097619684406, "grad_norm": 1.5099862813949585, "learning_rate": 1.7329290084444561e-06, "loss": 0.377, "step": 18295 }, { "epoch": 2.4466434875635197, "grad_norm": 1.6152827739715576, "learning_rate": 1.7321167957804241e-06, "loss": 0.3817, "step": 18296 }, { "epoch": 2.4467772131585983, "grad_norm": 1.8857215642929077, "learning_rate": 1.7313047554525054e-06, "loss": 0.3951, "step": 18297 }, { "epoch": 2.4469109387536774, "grad_norm": 1.658272385597229, "learning_rate": 1.7304928874776272e-06, "loss": 0.3676, "step": 18298 }, { "epoch": 2.4470446643487564, "grad_norm": 1.38486909866333, "learning_rate": 1.7296811918727107e-06, "loss": 0.3277, "step": 18299 }, { "epoch": 2.447178389943835, "grad_norm": 1.3321501016616821, "learning_rate": 1.7288696686546768e-06, "loss": 0.3402, "step": 18300 }, { "epoch": 2.447312115538914, "grad_norm": 1.6780868768692017, "learning_rate": 1.7280583178404408e-06, "loss": 0.3673, "step": 18301 }, { "epoch": 2.447445841133993, "grad_norm": 1.7888216972351074, "learning_rate": 1.7272471394469125e-06, "loss": 0.3879, "step": 18302 }, { "epoch": 2.447579566729072, "grad_norm": 1.8735895156860352, "learning_rate": 1.726436133491002e-06, "loss": 0.4083, "step": 18303 }, { "epoch": 2.447713292324151, "grad_norm": 1.6659198999404907, "learning_rate": 1.725625299989614e-06, "loss": 0.4284, "step": 18304 }, { "epoch": 2.44784701791923, "grad_norm": 1.5958473682403564, "learning_rate": 1.7248146389596476e-06, "loss": 0.3802, "step": 18305 }, { "epoch": 2.4479807435143086, "grad_norm": 1.7837719917297363, "learning_rate": 1.7240041504180016e-06, "loss": 0.3973, "step": 18306 }, { "epoch": 2.4481144691093877, "grad_norm": 1.7201337814331055, "learning_rate": 1.7231938343815735e-06, "loss": 0.388, "step": 18307 }, { "epoch": 2.4482481947044663, "grad_norm": 1.5669019222259521, "learning_rate": 1.7223836908672441e-06, "loss": 0.322, "step": 18308 }, { "epoch": 2.4483819202995454, "grad_norm": 1.6596336364746094, "learning_rate": 1.721573719891908e-06, "loss": 0.3879, "step": 18309 }, { "epoch": 2.4485156458946244, "grad_norm": 1.5750923156738281, "learning_rate": 1.7207639214724491e-06, "loss": 0.3297, "step": 18310 }, { "epoch": 2.448649371489703, "grad_norm": 1.6255757808685303, "learning_rate": 1.7199542956257388e-06, "loss": 0.3721, "step": 18311 }, { "epoch": 2.448783097084782, "grad_norm": 1.5581547021865845, "learning_rate": 1.719144842368663e-06, "loss": 0.3635, "step": 18312 }, { "epoch": 2.4489168226798608, "grad_norm": 1.6339961290359497, "learning_rate": 1.718335561718084e-06, "loss": 0.3348, "step": 18313 }, { "epoch": 2.44905054827494, "grad_norm": 1.7018744945526123, "learning_rate": 1.717526453690881e-06, "loss": 0.4059, "step": 18314 }, { "epoch": 2.449184273870019, "grad_norm": 1.5955941677093506, "learning_rate": 1.7167175183039108e-06, "loss": 0.3754, "step": 18315 }, { "epoch": 2.4493179994650975, "grad_norm": 1.902502417564392, "learning_rate": 1.7159087555740383e-06, "loss": 0.42, "step": 18316 }, { "epoch": 2.4494517250601766, "grad_norm": 1.6018946170806885, "learning_rate": 1.7151001655181199e-06, "loss": 0.3823, "step": 18317 }, { "epoch": 2.4495854506552552, "grad_norm": 1.2996386289596558, "learning_rate": 1.7142917481530108e-06, "loss": 0.326, "step": 18318 }, { "epoch": 2.4497191762503343, "grad_norm": 1.5361231565475464, "learning_rate": 1.713483503495562e-06, "loss": 0.3976, "step": 18319 }, { "epoch": 2.4498529018454134, "grad_norm": 2.2248213291168213, "learning_rate": 1.7126754315626203e-06, "loss": 0.3894, "step": 18320 }, { "epoch": 2.449986627440492, "grad_norm": 1.516777515411377, "learning_rate": 1.7118675323710288e-06, "loss": 0.3616, "step": 18321 }, { "epoch": 2.450120353035571, "grad_norm": 1.6168711185455322, "learning_rate": 1.7110598059376282e-06, "loss": 0.3623, "step": 18322 }, { "epoch": 2.4502540786306497, "grad_norm": 1.5790424346923828, "learning_rate": 1.710252252279253e-06, "loss": 0.3716, "step": 18323 }, { "epoch": 2.4503878042257288, "grad_norm": 1.3731200695037842, "learning_rate": 1.7094448714127387e-06, "loss": 0.3126, "step": 18324 }, { "epoch": 2.450521529820808, "grad_norm": 1.678053617477417, "learning_rate": 1.7086376633549119e-06, "loss": 0.3991, "step": 18325 }, { "epoch": 2.4506552554158865, "grad_norm": 1.368772029876709, "learning_rate": 1.707830628122602e-06, "loss": 0.3477, "step": 18326 }, { "epoch": 2.4507889810109655, "grad_norm": 1.39094078540802, "learning_rate": 1.7070237657326228e-06, "loss": 0.3049, "step": 18327 }, { "epoch": 2.450922706606044, "grad_norm": 1.4616683721542358, "learning_rate": 1.7062170762018005e-06, "loss": 0.3822, "step": 18328 }, { "epoch": 2.4510564322011232, "grad_norm": 1.4798396825790405, "learning_rate": 1.7054105595469462e-06, "loss": 0.3733, "step": 18329 }, { "epoch": 2.4511901577962023, "grad_norm": 1.5773667097091675, "learning_rate": 1.7046042157848718e-06, "loss": 0.3709, "step": 18330 }, { "epoch": 2.451323883391281, "grad_norm": 1.5374013185501099, "learning_rate": 1.7037980449323876e-06, "loss": 0.3715, "step": 18331 }, { "epoch": 2.45145760898636, "grad_norm": 1.3983029127120972, "learning_rate": 1.70299204700629e-06, "loss": 0.3489, "step": 18332 }, { "epoch": 2.451591334581439, "grad_norm": 1.4616225957870483, "learning_rate": 1.7021862220233887e-06, "loss": 0.3279, "step": 18333 }, { "epoch": 2.4517250601765177, "grad_norm": 1.372166395187378, "learning_rate": 1.7013805700004715e-06, "loss": 0.3546, "step": 18334 }, { "epoch": 2.4518587857715968, "grad_norm": 1.5691156387329102, "learning_rate": 1.7005750909543373e-06, "loss": 0.37, "step": 18335 }, { "epoch": 2.4519925113666754, "grad_norm": 1.3386577367782593, "learning_rate": 1.6997697849017725e-06, "loss": 0.3177, "step": 18336 }, { "epoch": 2.4521262369617545, "grad_norm": 1.3635553121566772, "learning_rate": 1.6989646518595616e-06, "loss": 0.3557, "step": 18337 }, { "epoch": 2.4522599625568335, "grad_norm": 1.5327842235565186, "learning_rate": 1.6981596918444953e-06, "loss": 0.3572, "step": 18338 }, { "epoch": 2.452393688151912, "grad_norm": 1.3996065855026245, "learning_rate": 1.6973549048733428e-06, "loss": 0.3334, "step": 18339 }, { "epoch": 2.4525274137469912, "grad_norm": 1.4622074365615845, "learning_rate": 1.6965502909628828e-06, "loss": 0.3555, "step": 18340 }, { "epoch": 2.4526611393420703, "grad_norm": 1.5473166704177856, "learning_rate": 1.6957458501298862e-06, "loss": 0.3629, "step": 18341 }, { "epoch": 2.452794864937149, "grad_norm": 1.5477222204208374, "learning_rate": 1.6949415823911208e-06, "loss": 0.3389, "step": 18342 }, { "epoch": 2.452928590532228, "grad_norm": 1.4794080257415771, "learning_rate": 1.6941374877633522e-06, "loss": 0.3622, "step": 18343 }, { "epoch": 2.4530623161273066, "grad_norm": 1.677876353263855, "learning_rate": 1.6933335662633387e-06, "loss": 0.3893, "step": 18344 }, { "epoch": 2.4531960417223857, "grad_norm": 1.678351640701294, "learning_rate": 1.6925298179078386e-06, "loss": 0.4135, "step": 18345 }, { "epoch": 2.4533297673174648, "grad_norm": 1.4939243793487549, "learning_rate": 1.6917262427136049e-06, "loss": 0.3314, "step": 18346 }, { "epoch": 2.4534634929125434, "grad_norm": 1.468875765800476, "learning_rate": 1.6909228406973887e-06, "loss": 0.3472, "step": 18347 }, { "epoch": 2.4535972185076225, "grad_norm": 1.737004280090332, "learning_rate": 1.6901196118759333e-06, "loss": 0.4035, "step": 18348 }, { "epoch": 2.453730944102701, "grad_norm": 1.6445791721343994, "learning_rate": 1.6893165562659842e-06, "loss": 0.3685, "step": 18349 }, { "epoch": 2.45386466969778, "grad_norm": 1.306131362915039, "learning_rate": 1.6885136738842812e-06, "loss": 0.3652, "step": 18350 }, { "epoch": 2.4539983952928592, "grad_norm": 1.4941554069519043, "learning_rate": 1.687710964747552e-06, "loss": 0.3455, "step": 18351 }, { "epoch": 2.454132120887938, "grad_norm": 1.5097514390945435, "learning_rate": 1.686908428872539e-06, "loss": 0.3274, "step": 18352 }, { "epoch": 2.454265846483017, "grad_norm": 1.3403040170669556, "learning_rate": 1.6861060662759598e-06, "loss": 0.3295, "step": 18353 }, { "epoch": 2.4543995720780956, "grad_norm": 1.5221773386001587, "learning_rate": 1.6853038769745466e-06, "loss": 0.3481, "step": 18354 }, { "epoch": 2.4545332976731746, "grad_norm": 1.7235254049301147, "learning_rate": 1.6845018609850206e-06, "loss": 0.3462, "step": 18355 }, { "epoch": 2.4546670232682537, "grad_norm": 1.8121986389160156, "learning_rate": 1.6837000183240915e-06, "loss": 0.4338, "step": 18356 }, { "epoch": 2.4548007488633323, "grad_norm": 1.6822962760925293, "learning_rate": 1.6828983490084827e-06, "loss": 0.3471, "step": 18357 }, { "epoch": 2.4549344744584114, "grad_norm": 1.695307731628418, "learning_rate": 1.6820968530548931e-06, "loss": 0.3571, "step": 18358 }, { "epoch": 2.45506820005349, "grad_norm": 1.4447683095932007, "learning_rate": 1.6812955304800415e-06, "loss": 0.3204, "step": 18359 }, { "epoch": 2.455201925648569, "grad_norm": 1.5660394430160522, "learning_rate": 1.6804943813006214e-06, "loss": 0.4041, "step": 18360 }, { "epoch": 2.455335651243648, "grad_norm": 1.501447081565857, "learning_rate": 1.6796934055333346e-06, "loss": 0.4091, "step": 18361 }, { "epoch": 2.455469376838727, "grad_norm": 1.6945569515228271, "learning_rate": 1.6788926031948782e-06, "loss": 0.3714, "step": 18362 }, { "epoch": 2.455603102433806, "grad_norm": 1.761191725730896, "learning_rate": 1.678091974301942e-06, "loss": 0.3928, "step": 18363 }, { "epoch": 2.4557368280288845, "grad_norm": 1.5343469381332397, "learning_rate": 1.6772915188712157e-06, "loss": 0.3631, "step": 18364 }, { "epoch": 2.4558705536239636, "grad_norm": 1.5702824592590332, "learning_rate": 1.676491236919384e-06, "loss": 0.3252, "step": 18365 }, { "epoch": 2.4560042792190426, "grad_norm": 1.3933650255203247, "learning_rate": 1.6756911284631272e-06, "loss": 0.3496, "step": 18366 }, { "epoch": 2.4561380048141213, "grad_norm": 1.6416264772415161, "learning_rate": 1.6748911935191236e-06, "loss": 0.3612, "step": 18367 }, { "epoch": 2.4562717304092003, "grad_norm": 1.5850558280944824, "learning_rate": 1.6740914321040468e-06, "loss": 0.4024, "step": 18368 }, { "epoch": 2.4564054560042794, "grad_norm": 1.4282252788543701, "learning_rate": 1.673291844234568e-06, "loss": 0.3293, "step": 18369 }, { "epoch": 2.456539181599358, "grad_norm": 1.516066074371338, "learning_rate": 1.6724924299273514e-06, "loss": 0.3685, "step": 18370 }, { "epoch": 2.456672907194437, "grad_norm": 1.2862077951431274, "learning_rate": 1.671693189199065e-06, "loss": 0.3349, "step": 18371 }, { "epoch": 2.4568066327895157, "grad_norm": 1.534590721130371, "learning_rate": 1.67089412206636e-06, "loss": 0.3891, "step": 18372 }, { "epoch": 2.456940358384595, "grad_norm": 1.6412922143936157, "learning_rate": 1.6700952285458983e-06, "loss": 0.3711, "step": 18373 }, { "epoch": 2.457074083979674, "grad_norm": 1.4446101188659668, "learning_rate": 1.6692965086543311e-06, "loss": 0.3674, "step": 18374 }, { "epoch": 2.4572078095747525, "grad_norm": 1.531467080116272, "learning_rate": 1.6684979624083076e-06, "loss": 0.3513, "step": 18375 }, { "epoch": 2.4573415351698316, "grad_norm": 1.5182467699050903, "learning_rate": 1.667699589824473e-06, "loss": 0.363, "step": 18376 }, { "epoch": 2.4574752607649106, "grad_norm": 1.56467866897583, "learning_rate": 1.666901390919462e-06, "loss": 0.3555, "step": 18377 }, { "epoch": 2.4576089863599893, "grad_norm": 1.6375277042388916, "learning_rate": 1.6661033657099236e-06, "loss": 0.3836, "step": 18378 }, { "epoch": 2.4577427119550683, "grad_norm": 1.543134093284607, "learning_rate": 1.665305514212483e-06, "loss": 0.3696, "step": 18379 }, { "epoch": 2.457876437550147, "grad_norm": 1.5102978944778442, "learning_rate": 1.6645078364437739e-06, "loss": 0.3825, "step": 18380 }, { "epoch": 2.458010163145226, "grad_norm": 1.5835179090499878, "learning_rate": 1.6637103324204219e-06, "loss": 0.3558, "step": 18381 }, { "epoch": 2.458143888740305, "grad_norm": 1.3649482727050781, "learning_rate": 1.662913002159049e-06, "loss": 0.3195, "step": 18382 }, { "epoch": 2.4582776143353837, "grad_norm": 1.4722611904144287, "learning_rate": 1.662115845676282e-06, "loss": 0.3007, "step": 18383 }, { "epoch": 2.458411339930463, "grad_norm": 1.4906474351882935, "learning_rate": 1.661318862988729e-06, "loss": 0.3371, "step": 18384 }, { "epoch": 2.4585450655255414, "grad_norm": 1.5938630104064941, "learning_rate": 1.6605220541130052e-06, "loss": 0.3759, "step": 18385 }, { "epoch": 2.4586787911206205, "grad_norm": 1.6239675283432007, "learning_rate": 1.6597254190657187e-06, "loss": 0.3836, "step": 18386 }, { "epoch": 2.4588125167156996, "grad_norm": 1.5647399425506592, "learning_rate": 1.658928957863476e-06, "loss": 0.3386, "step": 18387 }, { "epoch": 2.458946242310778, "grad_norm": 1.6796379089355469, "learning_rate": 1.6581326705228772e-06, "loss": 0.3773, "step": 18388 }, { "epoch": 2.4590799679058573, "grad_norm": 1.7886395454406738, "learning_rate": 1.6573365570605204e-06, "loss": 0.4315, "step": 18389 }, { "epoch": 2.459213693500936, "grad_norm": 1.479917287826538, "learning_rate": 1.6565406174929999e-06, "loss": 0.351, "step": 18390 }, { "epoch": 2.459347419096015, "grad_norm": 1.678924560546875, "learning_rate": 1.6557448518369067e-06, "loss": 0.3555, "step": 18391 }, { "epoch": 2.459481144691094, "grad_norm": 1.5352133512496948, "learning_rate": 1.6549492601088268e-06, "loss": 0.3468, "step": 18392 }, { "epoch": 2.4596148702861726, "grad_norm": 1.7141751050949097, "learning_rate": 1.6541538423253456e-06, "loss": 0.3847, "step": 18393 }, { "epoch": 2.4597485958812517, "grad_norm": 1.6180425882339478, "learning_rate": 1.6533585985030398e-06, "loss": 0.3863, "step": 18394 }, { "epoch": 2.4598823214763303, "grad_norm": 1.4593968391418457, "learning_rate": 1.6525635286584907e-06, "loss": 0.3212, "step": 18395 }, { "epoch": 2.4600160470714094, "grad_norm": 1.4271601438522339, "learning_rate": 1.6517686328082616e-06, "loss": 0.3253, "step": 18396 }, { "epoch": 2.4601497726664885, "grad_norm": 1.650604248046875, "learning_rate": 1.6509739109689326e-06, "loss": 0.4075, "step": 18397 }, { "epoch": 2.460283498261567, "grad_norm": 1.4136661291122437, "learning_rate": 1.6501793631570584e-06, "loss": 0.3297, "step": 18398 }, { "epoch": 2.460417223856646, "grad_norm": 1.6053146123886108, "learning_rate": 1.64938498938921e-06, "loss": 0.3746, "step": 18399 }, { "epoch": 2.460550949451725, "grad_norm": 1.4433890581130981, "learning_rate": 1.6485907896819387e-06, "loss": 0.3662, "step": 18400 }, { "epoch": 2.460684675046804, "grad_norm": 1.4452996253967285, "learning_rate": 1.6477967640517978e-06, "loss": 0.3324, "step": 18401 }, { "epoch": 2.460818400641883, "grad_norm": 1.5810301303863525, "learning_rate": 1.6470029125153463e-06, "loss": 0.3824, "step": 18402 }, { "epoch": 2.4609521262369616, "grad_norm": 1.5991277694702148, "learning_rate": 1.6462092350891245e-06, "loss": 0.4011, "step": 18403 }, { "epoch": 2.4610858518320406, "grad_norm": 1.425514817237854, "learning_rate": 1.645415731789677e-06, "loss": 0.3203, "step": 18404 }, { "epoch": 2.4612195774271197, "grad_norm": 1.6018075942993164, "learning_rate": 1.6446224026335434e-06, "loss": 0.367, "step": 18405 }, { "epoch": 2.4613533030221983, "grad_norm": 1.5432902574539185, "learning_rate": 1.6438292476372607e-06, "loss": 0.366, "step": 18406 }, { "epoch": 2.4614870286172774, "grad_norm": 1.618990182876587, "learning_rate": 1.6430362668173627e-06, "loss": 0.3478, "step": 18407 }, { "epoch": 2.4616207542123565, "grad_norm": 1.3908213376998901, "learning_rate": 1.6422434601903758e-06, "loss": 0.3486, "step": 18408 }, { "epoch": 2.461754479807435, "grad_norm": 1.4298393726348877, "learning_rate": 1.6414508277728268e-06, "loss": 0.3741, "step": 18409 }, { "epoch": 2.461888205402514, "grad_norm": 1.5809578895568848, "learning_rate": 1.6406583695812362e-06, "loss": 0.4048, "step": 18410 }, { "epoch": 2.462021930997593, "grad_norm": 1.5856084823608398, "learning_rate": 1.6398660856321236e-06, "loss": 0.3803, "step": 18411 }, { "epoch": 2.462155656592672, "grad_norm": 1.5606738328933716, "learning_rate": 1.6390739759420027e-06, "loss": 0.38, "step": 18412 }, { "epoch": 2.462289382187751, "grad_norm": 1.5805355310440063, "learning_rate": 1.6382820405273846e-06, "loss": 0.4016, "step": 18413 }, { "epoch": 2.4624231077828296, "grad_norm": 1.333855152130127, "learning_rate": 1.6374902794047754e-06, "loss": 0.3203, "step": 18414 }, { "epoch": 2.4625568333779086, "grad_norm": 1.3911138772964478, "learning_rate": 1.6366986925906802e-06, "loss": 0.3316, "step": 18415 }, { "epoch": 2.4626905589729873, "grad_norm": 1.4963572025299072, "learning_rate": 1.6359072801015995e-06, "loss": 0.3592, "step": 18416 }, { "epoch": 2.4628242845680663, "grad_norm": 1.5172936916351318, "learning_rate": 1.6351160419540235e-06, "loss": 0.3389, "step": 18417 }, { "epoch": 2.4629580101631454, "grad_norm": 1.5685102939605713, "learning_rate": 1.6343249781644533e-06, "loss": 0.3459, "step": 18418 }, { "epoch": 2.463091735758224, "grad_norm": 1.6269254684448242, "learning_rate": 1.6335340887493723e-06, "loss": 0.3493, "step": 18419 }, { "epoch": 2.463225461353303, "grad_norm": 1.49678373336792, "learning_rate": 1.6327433737252651e-06, "loss": 0.3717, "step": 18420 }, { "epoch": 2.4633591869483817, "grad_norm": 1.5435237884521484, "learning_rate": 1.6319528331086198e-06, "loss": 0.361, "step": 18421 }, { "epoch": 2.463492912543461, "grad_norm": 1.757003903388977, "learning_rate": 1.6311624669159064e-06, "loss": 0.4057, "step": 18422 }, { "epoch": 2.46362663813854, "grad_norm": 1.5914117097854614, "learning_rate": 1.6303722751636076e-06, "loss": 0.4317, "step": 18423 }, { "epoch": 2.4637603637336185, "grad_norm": 1.3521523475646973, "learning_rate": 1.6295822578681875e-06, "loss": 0.3259, "step": 18424 }, { "epoch": 2.4638940893286976, "grad_norm": 1.5597515106201172, "learning_rate": 1.6287924150461153e-06, "loss": 0.3998, "step": 18425 }, { "epoch": 2.464027814923776, "grad_norm": 1.5405718088150024, "learning_rate": 1.6280027467138547e-06, "loss": 0.3695, "step": 18426 }, { "epoch": 2.4641615405188553, "grad_norm": 1.5388661623001099, "learning_rate": 1.627213252887866e-06, "loss": 0.4022, "step": 18427 }, { "epoch": 2.4642952661139343, "grad_norm": 1.5667030811309814, "learning_rate": 1.6264239335846055e-06, "loss": 0.3658, "step": 18428 }, { "epoch": 2.464428991709013, "grad_norm": 1.5254513025283813, "learning_rate": 1.6256347888205248e-06, "loss": 0.3602, "step": 18429 }, { "epoch": 2.464562717304092, "grad_norm": 1.541748046875, "learning_rate": 1.6248458186120741e-06, "loss": 0.365, "step": 18430 }, { "epoch": 2.4646964428991707, "grad_norm": 1.5242400169372559, "learning_rate": 1.624057022975698e-06, "loss": 0.3983, "step": 18431 }, { "epoch": 2.4648301684942497, "grad_norm": 1.6263666152954102, "learning_rate": 1.6232684019278389e-06, "loss": 0.3381, "step": 18432 }, { "epoch": 2.464963894089329, "grad_norm": 1.4669629335403442, "learning_rate": 1.6224799554849335e-06, "loss": 0.3805, "step": 18433 }, { "epoch": 2.4650976196844074, "grad_norm": 1.5077074766159058, "learning_rate": 1.6216916836634179e-06, "loss": 0.3769, "step": 18434 }, { "epoch": 2.4652313452794865, "grad_norm": 1.5486758947372437, "learning_rate": 1.620903586479723e-06, "loss": 0.3664, "step": 18435 }, { "epoch": 2.4653650708745656, "grad_norm": 1.4843965768814087, "learning_rate": 1.6201156639502714e-06, "loss": 0.3262, "step": 18436 }, { "epoch": 2.465498796469644, "grad_norm": 1.5757921934127808, "learning_rate": 1.6193279160914943e-06, "loss": 0.3495, "step": 18437 }, { "epoch": 2.4656325220647233, "grad_norm": 1.6165995597839355, "learning_rate": 1.618540342919802e-06, "loss": 0.367, "step": 18438 }, { "epoch": 2.465766247659802, "grad_norm": 1.6041425466537476, "learning_rate": 1.6177529444516193e-06, "loss": 0.3601, "step": 18439 }, { "epoch": 2.465899973254881, "grad_norm": 1.4997284412384033, "learning_rate": 1.6169657207033574e-06, "loss": 0.3402, "step": 18440 }, { "epoch": 2.46603369884996, "grad_norm": 1.5533074140548706, "learning_rate": 1.6161786716914196e-06, "loss": 0.3662, "step": 18441 }, { "epoch": 2.4661674244450387, "grad_norm": 1.7296086549758911, "learning_rate": 1.6153917974322187e-06, "loss": 0.4126, "step": 18442 }, { "epoch": 2.4663011500401177, "grad_norm": 1.4833669662475586, "learning_rate": 1.614605097942148e-06, "loss": 0.36, "step": 18443 }, { "epoch": 2.466434875635197, "grad_norm": 1.5846545696258545, "learning_rate": 1.6138185732376144e-06, "loss": 0.3299, "step": 18444 }, { "epoch": 2.4665686012302754, "grad_norm": 1.5114147663116455, "learning_rate": 1.613032223335007e-06, "loss": 0.3717, "step": 18445 }, { "epoch": 2.4667023268253545, "grad_norm": 1.3816806077957153, "learning_rate": 1.612246048250714e-06, "loss": 0.3559, "step": 18446 }, { "epoch": 2.466836052420433, "grad_norm": 1.5780364274978638, "learning_rate": 1.611460048001131e-06, "loss": 0.3354, "step": 18447 }, { "epoch": 2.466969778015512, "grad_norm": 1.567331314086914, "learning_rate": 1.610674222602634e-06, "loss": 0.3597, "step": 18448 }, { "epoch": 2.4671035036105913, "grad_norm": 1.38765549659729, "learning_rate": 1.609888572071604e-06, "loss": 0.3057, "step": 18449 }, { "epoch": 2.46723722920567, "grad_norm": 1.5674617290496826, "learning_rate": 1.6091030964244192e-06, "loss": 0.396, "step": 18450 }, { "epoch": 2.467370954800749, "grad_norm": 1.5536940097808838, "learning_rate": 1.608317795677451e-06, "loss": 0.3986, "step": 18451 }, { "epoch": 2.4675046803958276, "grad_norm": 1.5396480560302734, "learning_rate": 1.6075326698470695e-06, "loss": 0.3831, "step": 18452 }, { "epoch": 2.4676384059909067, "grad_norm": 1.5758980512619019, "learning_rate": 1.6067477189496371e-06, "loss": 0.3363, "step": 18453 }, { "epoch": 2.4677721315859857, "grad_norm": 1.548737645149231, "learning_rate": 1.6059629430015178e-06, "loss": 0.3908, "step": 18454 }, { "epoch": 2.4679058571810644, "grad_norm": 1.5963069200515747, "learning_rate": 1.605178342019068e-06, "loss": 0.3529, "step": 18455 }, { "epoch": 2.4680395827761434, "grad_norm": 1.7830100059509277, "learning_rate": 1.6043939160186462e-06, "loss": 0.3972, "step": 18456 }, { "epoch": 2.468173308371222, "grad_norm": 1.4239022731781006, "learning_rate": 1.6036096650165944e-06, "loss": 0.3166, "step": 18457 }, { "epoch": 2.468307033966301, "grad_norm": 1.4585000276565552, "learning_rate": 1.6028255890292666e-06, "loss": 0.3924, "step": 18458 }, { "epoch": 2.46844075956138, "grad_norm": 1.5661699771881104, "learning_rate": 1.602041688073005e-06, "loss": 0.34, "step": 18459 }, { "epoch": 2.468574485156459, "grad_norm": 1.401645541191101, "learning_rate": 1.6012579621641478e-06, "loss": 0.3331, "step": 18460 }, { "epoch": 2.468708210751538, "grad_norm": 1.423363447189331, "learning_rate": 1.6004744113190341e-06, "loss": 0.3448, "step": 18461 }, { "epoch": 2.4688419363466165, "grad_norm": 1.6676380634307861, "learning_rate": 1.5996910355539884e-06, "loss": 0.3622, "step": 18462 }, { "epoch": 2.4689756619416956, "grad_norm": 1.3877291679382324, "learning_rate": 1.5989078348853505e-06, "loss": 0.3729, "step": 18463 }, { "epoch": 2.4691093875367747, "grad_norm": 1.5548053979873657, "learning_rate": 1.5981248093294377e-06, "loss": 0.4089, "step": 18464 }, { "epoch": 2.4692431131318533, "grad_norm": 1.5234910249710083, "learning_rate": 1.5973419589025707e-06, "loss": 0.3512, "step": 18465 }, { "epoch": 2.4693768387269324, "grad_norm": 1.6505564451217651, "learning_rate": 1.596559283621074e-06, "loss": 0.3549, "step": 18466 }, { "epoch": 2.469510564322011, "grad_norm": 1.421630620956421, "learning_rate": 1.595776783501254e-06, "loss": 0.3428, "step": 18467 }, { "epoch": 2.46964428991709, "grad_norm": 1.6747018098831177, "learning_rate": 1.59499445855943e-06, "loss": 0.4221, "step": 18468 }, { "epoch": 2.469778015512169, "grad_norm": 1.4631503820419312, "learning_rate": 1.594212308811901e-06, "loss": 0.3368, "step": 18469 }, { "epoch": 2.4699117411072478, "grad_norm": 1.5792455673217773, "learning_rate": 1.5934303342749725e-06, "loss": 0.3766, "step": 18470 }, { "epoch": 2.470045466702327, "grad_norm": 1.4541759490966797, "learning_rate": 1.5926485349649457e-06, "loss": 0.3411, "step": 18471 }, { "epoch": 2.470179192297406, "grad_norm": 1.5153157711029053, "learning_rate": 1.5918669108981143e-06, "loss": 0.3721, "step": 18472 }, { "epoch": 2.4703129178924845, "grad_norm": 1.4849190711975098, "learning_rate": 1.5910854620907711e-06, "loss": 0.3772, "step": 18473 }, { "epoch": 2.4704466434875636, "grad_norm": 1.444666862487793, "learning_rate": 1.5903041885592052e-06, "loss": 0.3284, "step": 18474 }, { "epoch": 2.4705803690826422, "grad_norm": 1.3294528722763062, "learning_rate": 1.5895230903197023e-06, "loss": 0.3516, "step": 18475 }, { "epoch": 2.4707140946777213, "grad_norm": 1.9074265956878662, "learning_rate": 1.5887421673885417e-06, "loss": 0.4134, "step": 18476 }, { "epoch": 2.4708478202728004, "grad_norm": 1.619092345237732, "learning_rate": 1.5879614197820026e-06, "loss": 0.4147, "step": 18477 }, { "epoch": 2.470981545867879, "grad_norm": 1.4971381425857544, "learning_rate": 1.5871808475163575e-06, "loss": 0.355, "step": 18478 }, { "epoch": 2.471115271462958, "grad_norm": 1.5303348302841187, "learning_rate": 1.5864004506078778e-06, "loss": 0.3646, "step": 18479 }, { "epoch": 2.471248997058037, "grad_norm": 1.6977455615997314, "learning_rate": 1.5856202290728318e-06, "loss": 0.3844, "step": 18480 }, { "epoch": 2.4713827226531158, "grad_norm": 1.4846254587173462, "learning_rate": 1.5848401829274762e-06, "loss": 0.379, "step": 18481 }, { "epoch": 2.471516448248195, "grad_norm": 1.6844042539596558, "learning_rate": 1.5840603121880782e-06, "loss": 0.4062, "step": 18482 }, { "epoch": 2.4716501738432735, "grad_norm": 1.4793013334274292, "learning_rate": 1.5832806168708858e-06, "loss": 0.3837, "step": 18483 }, { "epoch": 2.4717838994383525, "grad_norm": 1.5780149698257446, "learning_rate": 1.5825010969921583e-06, "loss": 0.3868, "step": 18484 }, { "epoch": 2.4719176250334316, "grad_norm": 1.8261311054229736, "learning_rate": 1.5817217525681416e-06, "loss": 0.3968, "step": 18485 }, { "epoch": 2.4720513506285102, "grad_norm": 1.6270192861557007, "learning_rate": 1.5809425836150761e-06, "loss": 0.3716, "step": 18486 }, { "epoch": 2.4721850762235893, "grad_norm": 1.542722225189209, "learning_rate": 1.5801635901492108e-06, "loss": 0.3931, "step": 18487 }, { "epoch": 2.472318801818668, "grad_norm": 1.6362234354019165, "learning_rate": 1.5793847721867749e-06, "loss": 0.3916, "step": 18488 }, { "epoch": 2.472452527413747, "grad_norm": 1.4620044231414795, "learning_rate": 1.578606129744007e-06, "loss": 0.3425, "step": 18489 }, { "epoch": 2.472586253008826, "grad_norm": 1.4025802612304688, "learning_rate": 1.577827662837136e-06, "loss": 0.3599, "step": 18490 }, { "epoch": 2.4727199786039047, "grad_norm": 1.504228115081787, "learning_rate": 1.5770493714823854e-06, "loss": 0.3786, "step": 18491 }, { "epoch": 2.4728537041989838, "grad_norm": 1.5774524211883545, "learning_rate": 1.5762712556959859e-06, "loss": 0.3686, "step": 18492 }, { "epoch": 2.4729874297940624, "grad_norm": 1.6592427492141724, "learning_rate": 1.5754933154941488e-06, "loss": 0.3766, "step": 18493 }, { "epoch": 2.4731211553891415, "grad_norm": 1.4538761377334595, "learning_rate": 1.5747155508930912e-06, "loss": 0.3927, "step": 18494 }, { "epoch": 2.4732548809842205, "grad_norm": 1.3130501508712769, "learning_rate": 1.5739379619090267e-06, "loss": 0.3134, "step": 18495 }, { "epoch": 2.473388606579299, "grad_norm": 1.5615488290786743, "learning_rate": 1.5731605485581624e-06, "loss": 0.3766, "step": 18496 }, { "epoch": 2.4735223321743782, "grad_norm": 1.5295140743255615, "learning_rate": 1.5723833108567033e-06, "loss": 0.3742, "step": 18497 }, { "epoch": 2.473656057769457, "grad_norm": 1.683884859085083, "learning_rate": 1.5716062488208494e-06, "loss": 0.3985, "step": 18498 }, { "epoch": 2.473789783364536, "grad_norm": 1.3327797651290894, "learning_rate": 1.570829362466798e-06, "loss": 0.3347, "step": 18499 }, { "epoch": 2.473923508959615, "grad_norm": 1.536956787109375, "learning_rate": 1.5700526518107428e-06, "loss": 0.403, "step": 18500 }, { "epoch": 2.4740572345546936, "grad_norm": 1.473001480102539, "learning_rate": 1.5692761168688764e-06, "loss": 0.3597, "step": 18501 }, { "epoch": 2.4741909601497727, "grad_norm": 1.409569501876831, "learning_rate": 1.5684997576573767e-06, "loss": 0.3429, "step": 18502 }, { "epoch": 2.4743246857448513, "grad_norm": 1.454110860824585, "learning_rate": 1.5677235741924347e-06, "loss": 0.3362, "step": 18503 }, { "epoch": 2.4744584113399304, "grad_norm": 1.5229406356811523, "learning_rate": 1.5669475664902268e-06, "loss": 0.3837, "step": 18504 }, { "epoch": 2.4745921369350095, "grad_norm": 1.6541240215301514, "learning_rate": 1.5661717345669237e-06, "loss": 0.3506, "step": 18505 }, { "epoch": 2.474725862530088, "grad_norm": 1.3895009756088257, "learning_rate": 1.5653960784387047e-06, "loss": 0.3464, "step": 18506 }, { "epoch": 2.474859588125167, "grad_norm": 1.621322512626648, "learning_rate": 1.5646205981217288e-06, "loss": 0.3672, "step": 18507 }, { "epoch": 2.4749933137202462, "grad_norm": 1.517978310585022, "learning_rate": 1.5638452936321702e-06, "loss": 0.3669, "step": 18508 }, { "epoch": 2.475127039315325, "grad_norm": 1.6285312175750732, "learning_rate": 1.5630701649861802e-06, "loss": 0.4053, "step": 18509 }, { "epoch": 2.475260764910404, "grad_norm": 1.3396122455596924, "learning_rate": 1.562295212199918e-06, "loss": 0.341, "step": 18510 }, { "epoch": 2.475394490505483, "grad_norm": 1.4089446067810059, "learning_rate": 1.561520435289543e-06, "loss": 0.3438, "step": 18511 }, { "epoch": 2.4755282161005616, "grad_norm": 1.6344127655029297, "learning_rate": 1.5607458342711968e-06, "loss": 0.3863, "step": 18512 }, { "epoch": 2.4756619416956407, "grad_norm": 1.6467463970184326, "learning_rate": 1.5599714091610284e-06, "loss": 0.3807, "step": 18513 }, { "epoch": 2.4757956672907193, "grad_norm": 1.7331737279891968, "learning_rate": 1.55919715997518e-06, "loss": 0.4159, "step": 18514 }, { "epoch": 2.4759293928857984, "grad_norm": 1.5829551219940186, "learning_rate": 1.5584230867297888e-06, "loss": 0.4259, "step": 18515 }, { "epoch": 2.4760631184808775, "grad_norm": 1.6567975282669067, "learning_rate": 1.5576491894409918e-06, "loss": 0.3522, "step": 18516 }, { "epoch": 2.476196844075956, "grad_norm": 1.5065925121307373, "learning_rate": 1.5568754681249188e-06, "loss": 0.3471, "step": 18517 }, { "epoch": 2.476330569671035, "grad_norm": 1.5182218551635742, "learning_rate": 1.556101922797697e-06, "loss": 0.3495, "step": 18518 }, { "epoch": 2.476464295266114, "grad_norm": 1.5013128519058228, "learning_rate": 1.5553285534754503e-06, "loss": 0.334, "step": 18519 }, { "epoch": 2.476598020861193, "grad_norm": 1.7551946640014648, "learning_rate": 1.5545553601743024e-06, "loss": 0.396, "step": 18520 }, { "epoch": 2.476731746456272, "grad_norm": 1.5106297731399536, "learning_rate": 1.5537823429103615e-06, "loss": 0.3372, "step": 18521 }, { "epoch": 2.4768654720513505, "grad_norm": 1.5935688018798828, "learning_rate": 1.5530095016997482e-06, "loss": 0.36, "step": 18522 }, { "epoch": 2.4769991976464296, "grad_norm": 1.5946675539016724, "learning_rate": 1.5522368365585695e-06, "loss": 0.376, "step": 18523 }, { "epoch": 2.4771329232415082, "grad_norm": 1.4072651863098145, "learning_rate": 1.551464347502929e-06, "loss": 0.3567, "step": 18524 }, { "epoch": 2.4772666488365873, "grad_norm": 1.4174827337265015, "learning_rate": 1.550692034548933e-06, "loss": 0.3429, "step": 18525 }, { "epoch": 2.4774003744316664, "grad_norm": 1.7302743196487427, "learning_rate": 1.5499198977126718e-06, "loss": 0.3595, "step": 18526 }, { "epoch": 2.477534100026745, "grad_norm": 1.5431721210479736, "learning_rate": 1.549147937010248e-06, "loss": 0.3543, "step": 18527 }, { "epoch": 2.477667825621824, "grad_norm": 1.4230859279632568, "learning_rate": 1.5483761524577457e-06, "loss": 0.348, "step": 18528 }, { "epoch": 2.4778015512169027, "grad_norm": 1.4824645519256592, "learning_rate": 1.5476045440712573e-06, "loss": 0.3457, "step": 18529 }, { "epoch": 2.477935276811982, "grad_norm": 1.558699607849121, "learning_rate": 1.5468331118668655e-06, "loss": 0.3363, "step": 18530 }, { "epoch": 2.478069002407061, "grad_norm": 1.4668254852294922, "learning_rate": 1.5460618558606445e-06, "loss": 0.3723, "step": 18531 }, { "epoch": 2.4782027280021395, "grad_norm": 1.4193419218063354, "learning_rate": 1.5452907760686798e-06, "loss": 0.3809, "step": 18532 }, { "epoch": 2.4783364535972185, "grad_norm": 1.5371713638305664, "learning_rate": 1.5445198725070355e-06, "loss": 0.354, "step": 18533 }, { "epoch": 2.478470179192297, "grad_norm": 1.4579757452011108, "learning_rate": 1.5437491451917829e-06, "loss": 0.3695, "step": 18534 }, { "epoch": 2.4786039047873762, "grad_norm": 1.4357386827468872, "learning_rate": 1.5429785941389885e-06, "loss": 0.3483, "step": 18535 }, { "epoch": 2.4787376303824553, "grad_norm": 1.4298735857009888, "learning_rate": 1.5422082193647102e-06, "loss": 0.3463, "step": 18536 }, { "epoch": 2.478871355977534, "grad_norm": 1.4432660341262817, "learning_rate": 1.5414380208850133e-06, "loss": 0.3537, "step": 18537 }, { "epoch": 2.479005081572613, "grad_norm": 1.7889536619186401, "learning_rate": 1.5406679987159445e-06, "loss": 0.4375, "step": 18538 }, { "epoch": 2.479138807167692, "grad_norm": 1.532205581665039, "learning_rate": 1.5398981528735569e-06, "loss": 0.3898, "step": 18539 }, { "epoch": 2.4792725327627707, "grad_norm": 1.5441720485687256, "learning_rate": 1.5391284833738961e-06, "loss": 0.3664, "step": 18540 }, { "epoch": 2.47940625835785, "grad_norm": 1.5795656442642212, "learning_rate": 1.5383589902330065e-06, "loss": 0.369, "step": 18541 }, { "epoch": 2.4795399839529284, "grad_norm": 1.5452322959899902, "learning_rate": 1.5375896734669271e-06, "loss": 0.3778, "step": 18542 }, { "epoch": 2.4796737095480075, "grad_norm": 1.4723248481750488, "learning_rate": 1.5368205330916918e-06, "loss": 0.3739, "step": 18543 }, { "epoch": 2.4798074351430865, "grad_norm": 1.5698682069778442, "learning_rate": 1.5360515691233358e-06, "loss": 0.3441, "step": 18544 }, { "epoch": 2.479941160738165, "grad_norm": 1.5200496912002563, "learning_rate": 1.5352827815778849e-06, "loss": 0.3543, "step": 18545 }, { "epoch": 2.4800748863332442, "grad_norm": 1.5363209247589111, "learning_rate": 1.5345141704713673e-06, "loss": 0.3523, "step": 18546 }, { "epoch": 2.4802086119283233, "grad_norm": 1.6510262489318848, "learning_rate": 1.533745735819796e-06, "loss": 0.3707, "step": 18547 }, { "epoch": 2.480342337523402, "grad_norm": 1.5178200006484985, "learning_rate": 1.532977477639196e-06, "loss": 0.3788, "step": 18548 }, { "epoch": 2.480476063118481, "grad_norm": 1.3785372972488403, "learning_rate": 1.5322093959455808e-06, "loss": 0.3308, "step": 18549 }, { "epoch": 2.4806097887135596, "grad_norm": 1.5331741571426392, "learning_rate": 1.5314414907549535e-06, "loss": 0.3528, "step": 18550 }, { "epoch": 2.4807435143086387, "grad_norm": 1.4459644556045532, "learning_rate": 1.530673762083329e-06, "loss": 0.3856, "step": 18551 }, { "epoch": 2.480877239903718, "grad_norm": 1.4166239500045776, "learning_rate": 1.5299062099467011e-06, "loss": 0.333, "step": 18552 }, { "epoch": 2.4810109654987964, "grad_norm": 1.6284527778625488, "learning_rate": 1.529138834361079e-06, "loss": 0.3979, "step": 18553 }, { "epoch": 2.4811446910938755, "grad_norm": 1.464064121246338, "learning_rate": 1.5283716353424482e-06, "loss": 0.333, "step": 18554 }, { "epoch": 2.481278416688954, "grad_norm": 1.6437480449676514, "learning_rate": 1.5276046129068034e-06, "loss": 0.3646, "step": 18555 }, { "epoch": 2.481412142284033, "grad_norm": 1.4831856489181519, "learning_rate": 1.5268377670701363e-06, "loss": 0.3301, "step": 18556 }, { "epoch": 2.4815458678791122, "grad_norm": 1.6348040103912354, "learning_rate": 1.5260710978484271e-06, "loss": 0.3829, "step": 18557 }, { "epoch": 2.481679593474191, "grad_norm": 1.657033085823059, "learning_rate": 1.5253046052576559e-06, "loss": 0.3984, "step": 18558 }, { "epoch": 2.48181331906927, "grad_norm": 1.5436540842056274, "learning_rate": 1.5245382893138016e-06, "loss": 0.3968, "step": 18559 }, { "epoch": 2.4819470446643486, "grad_norm": 1.4588854312896729, "learning_rate": 1.5237721500328373e-06, "loss": 0.3483, "step": 18560 }, { "epoch": 2.4820807702594276, "grad_norm": 1.589267373085022, "learning_rate": 1.52300618743073e-06, "loss": 0.3344, "step": 18561 }, { "epoch": 2.4822144958545067, "grad_norm": 1.5676326751708984, "learning_rate": 1.5222404015234483e-06, "loss": 0.378, "step": 18562 }, { "epoch": 2.4823482214495853, "grad_norm": 1.538878321647644, "learning_rate": 1.5214747923269524e-06, "loss": 0.3787, "step": 18563 }, { "epoch": 2.4824819470446644, "grad_norm": 1.4866918325424194, "learning_rate": 1.520709359857202e-06, "loss": 0.3726, "step": 18564 }, { "epoch": 2.482615672639743, "grad_norm": 1.5924081802368164, "learning_rate": 1.5199441041301533e-06, "loss": 0.3653, "step": 18565 }, { "epoch": 2.482749398234822, "grad_norm": 1.649298906326294, "learning_rate": 1.5191790251617499e-06, "loss": 0.4083, "step": 18566 }, { "epoch": 2.482883123829901, "grad_norm": 1.558677077293396, "learning_rate": 1.5184141229679472e-06, "loss": 0.3587, "step": 18567 }, { "epoch": 2.48301684942498, "grad_norm": 1.6864196062088013, "learning_rate": 1.5176493975646866e-06, "loss": 0.3242, "step": 18568 }, { "epoch": 2.483150575020059, "grad_norm": 1.3753076791763306, "learning_rate": 1.5168848489679066e-06, "loss": 0.3179, "step": 18569 }, { "epoch": 2.4832843006151375, "grad_norm": 1.5665085315704346, "learning_rate": 1.516120477193548e-06, "loss": 0.3625, "step": 18570 }, { "epoch": 2.4834180262102166, "grad_norm": 1.8492076396942139, "learning_rate": 1.5153562822575352e-06, "loss": 0.4308, "step": 18571 }, { "epoch": 2.4835517518052956, "grad_norm": 1.6114310026168823, "learning_rate": 1.5145922641758048e-06, "loss": 0.348, "step": 18572 }, { "epoch": 2.4836854774003743, "grad_norm": 1.6405280828475952, "learning_rate": 1.5138284229642786e-06, "loss": 0.3791, "step": 18573 }, { "epoch": 2.4838192029954533, "grad_norm": 1.6995488405227661, "learning_rate": 1.5130647586388746e-06, "loss": 0.3943, "step": 18574 }, { "epoch": 2.4839529285905324, "grad_norm": 1.5553573369979858, "learning_rate": 1.5123012712155205e-06, "loss": 0.337, "step": 18575 }, { "epoch": 2.484086654185611, "grad_norm": 1.5313228368759155, "learning_rate": 1.5115379607101189e-06, "loss": 0.3035, "step": 18576 }, { "epoch": 2.48422037978069, "grad_norm": 1.6485062837600708, "learning_rate": 1.5107748271385914e-06, "loss": 0.4021, "step": 18577 }, { "epoch": 2.4843541053757687, "grad_norm": 1.4323893785476685, "learning_rate": 1.5100118705168364e-06, "loss": 0.3351, "step": 18578 }, { "epoch": 2.484487830970848, "grad_norm": 1.518763542175293, "learning_rate": 1.5092490908607605e-06, "loss": 0.3848, "step": 18579 }, { "epoch": 2.484621556565927, "grad_norm": 1.476028323173523, "learning_rate": 1.5084864881862627e-06, "loss": 0.3432, "step": 18580 }, { "epoch": 2.4847552821610055, "grad_norm": 1.5826342105865479, "learning_rate": 1.507724062509237e-06, "loss": 0.3494, "step": 18581 }, { "epoch": 2.4848890077560846, "grad_norm": 1.4992865324020386, "learning_rate": 1.5069618138455788e-06, "loss": 0.3393, "step": 18582 }, { "epoch": 2.4850227333511636, "grad_norm": 1.5023773908615112, "learning_rate": 1.506199742211174e-06, "loss": 0.3246, "step": 18583 }, { "epoch": 2.4851564589462423, "grad_norm": 1.607790231704712, "learning_rate": 1.5054378476219079e-06, "loss": 0.3577, "step": 18584 }, { "epoch": 2.4852901845413213, "grad_norm": 1.612294316291809, "learning_rate": 1.5046761300936607e-06, "loss": 0.3638, "step": 18585 }, { "epoch": 2.4854239101364, "grad_norm": 1.4075566530227661, "learning_rate": 1.5039145896423112e-06, "loss": 0.3759, "step": 18586 }, { "epoch": 2.485557635731479, "grad_norm": 1.3451197147369385, "learning_rate": 1.5031532262837323e-06, "loss": 0.3129, "step": 18587 }, { "epoch": 2.485691361326558, "grad_norm": 1.7540115118026733, "learning_rate": 1.5023920400337932e-06, "loss": 0.3901, "step": 18588 }, { "epoch": 2.4858250869216367, "grad_norm": 1.434766411781311, "learning_rate": 1.5016310309083637e-06, "loss": 0.3508, "step": 18589 }, { "epoch": 2.485958812516716, "grad_norm": 1.323065161705017, "learning_rate": 1.5008701989232977e-06, "loss": 0.3037, "step": 18590 }, { "epoch": 2.4860925381117944, "grad_norm": 1.4208122491836548, "learning_rate": 1.5001095440944657e-06, "loss": 0.3482, "step": 18591 }, { "epoch": 2.4862262637068735, "grad_norm": 1.7600501775741577, "learning_rate": 1.499349066437711e-06, "loss": 0.4004, "step": 18592 }, { "epoch": 2.4863599893019526, "grad_norm": 1.603559970855713, "learning_rate": 1.4985887659688936e-06, "loss": 0.3751, "step": 18593 }, { "epoch": 2.486493714897031, "grad_norm": 1.6010901927947998, "learning_rate": 1.4978286427038602e-06, "loss": 0.4054, "step": 18594 }, { "epoch": 2.4866274404921103, "grad_norm": 1.5291130542755127, "learning_rate": 1.497068696658449e-06, "loss": 0.3701, "step": 18595 }, { "epoch": 2.486761166087189, "grad_norm": 1.6267318725585938, "learning_rate": 1.4963089278485088e-06, "loss": 0.3986, "step": 18596 }, { "epoch": 2.486894891682268, "grad_norm": 1.6186786890029907, "learning_rate": 1.4955493362898688e-06, "loss": 0.3617, "step": 18597 }, { "epoch": 2.487028617277347, "grad_norm": 1.341605305671692, "learning_rate": 1.4947899219983664e-06, "loss": 0.3401, "step": 18598 }, { "epoch": 2.4871623428724257, "grad_norm": 1.5712435245513916, "learning_rate": 1.4940306849898289e-06, "loss": 0.3781, "step": 18599 }, { "epoch": 2.4872960684675047, "grad_norm": 1.6367145776748657, "learning_rate": 1.4932716252800817e-06, "loss": 0.35, "step": 18600 }, { "epoch": 2.4874297940625834, "grad_norm": 1.6758514642715454, "learning_rate": 1.4925127428849484e-06, "loss": 0.3787, "step": 18601 }, { "epoch": 2.4875635196576624, "grad_norm": 1.500554084777832, "learning_rate": 1.4917540378202456e-06, "loss": 0.3593, "step": 18602 }, { "epoch": 2.4876972452527415, "grad_norm": 1.5667781829833984, "learning_rate": 1.4909955101017882e-06, "loss": 0.3504, "step": 18603 }, { "epoch": 2.48783097084782, "grad_norm": 1.6292206048965454, "learning_rate": 1.4902371597453879e-06, "loss": 0.3921, "step": 18604 }, { "epoch": 2.487964696442899, "grad_norm": 1.2281500101089478, "learning_rate": 1.4894789867668502e-06, "loss": 0.3387, "step": 18605 }, { "epoch": 2.488098422037978, "grad_norm": 1.7816895246505737, "learning_rate": 1.48872099118198e-06, "loss": 0.3833, "step": 18606 }, { "epoch": 2.488232147633057, "grad_norm": 1.3239103555679321, "learning_rate": 1.487963173006577e-06, "loss": 0.3108, "step": 18607 }, { "epoch": 2.488365873228136, "grad_norm": 1.4950788021087646, "learning_rate": 1.4872055322564349e-06, "loss": 0.3645, "step": 18608 }, { "epoch": 2.4884995988232146, "grad_norm": 1.6210441589355469, "learning_rate": 1.486448068947348e-06, "loss": 0.3707, "step": 18609 }, { "epoch": 2.4886333244182937, "grad_norm": 1.5311343669891357, "learning_rate": 1.4856907830951084e-06, "loss": 0.361, "step": 18610 }, { "epoch": 2.4887670500133727, "grad_norm": 1.4379596710205078, "learning_rate": 1.4849336747154908e-06, "loss": 0.4054, "step": 18611 }, { "epoch": 2.4889007756084514, "grad_norm": 1.4215463399887085, "learning_rate": 1.484176743824286e-06, "loss": 0.326, "step": 18612 }, { "epoch": 2.4890345012035304, "grad_norm": 1.5715378522872925, "learning_rate": 1.483419990437267e-06, "loss": 0.3683, "step": 18613 }, { "epoch": 2.4891682267986095, "grad_norm": 1.52204430103302, "learning_rate": 1.4826634145702102e-06, "loss": 0.3551, "step": 18614 }, { "epoch": 2.489301952393688, "grad_norm": 1.382900595664978, "learning_rate": 1.481907016238886e-06, "loss": 0.3376, "step": 18615 }, { "epoch": 2.489435677988767, "grad_norm": 1.6175472736358643, "learning_rate": 1.4811507954590542e-06, "loss": 0.4042, "step": 18616 }, { "epoch": 2.489569403583846, "grad_norm": 1.481105089187622, "learning_rate": 1.480394752246488e-06, "loss": 0.3447, "step": 18617 }, { "epoch": 2.489703129178925, "grad_norm": 1.7270575761795044, "learning_rate": 1.4796388866169375e-06, "loss": 0.4075, "step": 18618 }, { "epoch": 2.489836854774004, "grad_norm": 1.4701913595199585, "learning_rate": 1.4788831985861597e-06, "loss": 0.3603, "step": 18619 }, { "epoch": 2.4899705803690826, "grad_norm": 1.8368618488311768, "learning_rate": 1.4781276881699114e-06, "loss": 0.3784, "step": 18620 }, { "epoch": 2.4901043059641617, "grad_norm": 1.5809166431427002, "learning_rate": 1.4773723553839325e-06, "loss": 0.3802, "step": 18621 }, { "epoch": 2.4902380315592403, "grad_norm": 1.3764699697494507, "learning_rate": 1.4766172002439772e-06, "loss": 0.3266, "step": 18622 }, { "epoch": 2.4903717571543194, "grad_norm": 1.5955884456634521, "learning_rate": 1.475862222765777e-06, "loss": 0.3722, "step": 18623 }, { "epoch": 2.4905054827493984, "grad_norm": 1.4743257761001587, "learning_rate": 1.475107422965073e-06, "loss": 0.3551, "step": 18624 }, { "epoch": 2.490639208344477, "grad_norm": 1.6498315334320068, "learning_rate": 1.4743528008575968e-06, "loss": 0.3756, "step": 18625 }, { "epoch": 2.490772933939556, "grad_norm": 1.4713531732559204, "learning_rate": 1.4735983564590784e-06, "loss": 0.3748, "step": 18626 }, { "epoch": 2.4909066595346347, "grad_norm": 1.632378101348877, "learning_rate": 1.4728440897852436e-06, "loss": 0.4009, "step": 18627 }, { "epoch": 2.491040385129714, "grad_norm": 1.5066401958465576, "learning_rate": 1.4720900008518136e-06, "loss": 0.3472, "step": 18628 }, { "epoch": 2.491174110724793, "grad_norm": 1.7821909189224243, "learning_rate": 1.4713360896745077e-06, "loss": 0.4125, "step": 18629 }, { "epoch": 2.4913078363198715, "grad_norm": 1.47171950340271, "learning_rate": 1.4705823562690402e-06, "loss": 0.3191, "step": 18630 }, { "epoch": 2.4914415619149506, "grad_norm": 1.5529792308807373, "learning_rate": 1.4698288006511208e-06, "loss": 0.3592, "step": 18631 }, { "epoch": 2.491575287510029, "grad_norm": 1.7210625410079956, "learning_rate": 1.4690754228364578e-06, "loss": 0.3671, "step": 18632 }, { "epoch": 2.4917090131051083, "grad_norm": 1.4205901622772217, "learning_rate": 1.4683222228407544e-06, "loss": 0.3456, "step": 18633 }, { "epoch": 2.4918427387001874, "grad_norm": 1.579715609550476, "learning_rate": 1.4675692006797137e-06, "loss": 0.3886, "step": 18634 }, { "epoch": 2.491976464295266, "grad_norm": 1.402671456336975, "learning_rate": 1.466816356369023e-06, "loss": 0.3688, "step": 18635 }, { "epoch": 2.492110189890345, "grad_norm": 1.7316697835922241, "learning_rate": 1.4660636899243841e-06, "loss": 0.4021, "step": 18636 }, { "epoch": 2.4922439154854237, "grad_norm": 1.4928300380706787, "learning_rate": 1.465311201361478e-06, "loss": 0.3457, "step": 18637 }, { "epoch": 2.4923776410805027, "grad_norm": 1.4771106243133545, "learning_rate": 1.464558890695994e-06, "loss": 0.377, "step": 18638 }, { "epoch": 2.492511366675582, "grad_norm": 1.5786329507827759, "learning_rate": 1.4638067579436156e-06, "loss": 0.3912, "step": 18639 }, { "epoch": 2.4926450922706604, "grad_norm": 1.4679805040359497, "learning_rate": 1.463054803120012e-06, "loss": 0.3037, "step": 18640 }, { "epoch": 2.4927788178657395, "grad_norm": 1.2981265783309937, "learning_rate": 1.4623030262408677e-06, "loss": 0.2889, "step": 18641 }, { "epoch": 2.4929125434608186, "grad_norm": 1.6673259735107422, "learning_rate": 1.4615514273218435e-06, "loss": 0.4053, "step": 18642 }, { "epoch": 2.493046269055897, "grad_norm": 1.3892321586608887, "learning_rate": 1.4608000063786098e-06, "loss": 0.3463, "step": 18643 }, { "epoch": 2.4931799946509763, "grad_norm": 1.4569119215011597, "learning_rate": 1.460048763426829e-06, "loss": 0.3344, "step": 18644 }, { "epoch": 2.493313720246055, "grad_norm": 1.5483710765838623, "learning_rate": 1.4592976984821604e-06, "loss": 0.3435, "step": 18645 }, { "epoch": 2.493447445841134, "grad_norm": 1.4543675184249878, "learning_rate": 1.4585468115602574e-06, "loss": 0.3409, "step": 18646 }, { "epoch": 2.493581171436213, "grad_norm": 1.5567741394042969, "learning_rate": 1.457796102676774e-06, "loss": 0.3701, "step": 18647 }, { "epoch": 2.4937148970312917, "grad_norm": 1.6860926151275635, "learning_rate": 1.4570455718473563e-06, "loss": 0.4001, "step": 18648 }, { "epoch": 2.4938486226263707, "grad_norm": 1.771240472793579, "learning_rate": 1.456295219087649e-06, "loss": 0.4329, "step": 18649 }, { "epoch": 2.49398234822145, "grad_norm": 1.8035566806793213, "learning_rate": 1.4555450444132934e-06, "loss": 0.4223, "step": 18650 }, { "epoch": 2.4941160738165284, "grad_norm": 1.6802574396133423, "learning_rate": 1.4547950478399242e-06, "loss": 0.3724, "step": 18651 }, { "epoch": 2.4942497994116075, "grad_norm": 1.4724411964416504, "learning_rate": 1.4540452293831753e-06, "loss": 0.3455, "step": 18652 }, { "epoch": 2.494383525006686, "grad_norm": 1.5564385652542114, "learning_rate": 1.4532955890586764e-06, "loss": 0.3545, "step": 18653 }, { "epoch": 2.494517250601765, "grad_norm": 1.4613860845565796, "learning_rate": 1.4525461268820517e-06, "loss": 0.338, "step": 18654 }, { "epoch": 2.4946509761968443, "grad_norm": 1.4449515342712402, "learning_rate": 1.4517968428689277e-06, "loss": 0.3571, "step": 18655 }, { "epoch": 2.494784701791923, "grad_norm": 1.6455576419830322, "learning_rate": 1.451047737034913e-06, "loss": 0.3713, "step": 18656 }, { "epoch": 2.494918427387002, "grad_norm": 1.6345043182373047, "learning_rate": 1.4502988093956306e-06, "loss": 0.4092, "step": 18657 }, { "epoch": 2.4950521529820806, "grad_norm": 1.5034797191619873, "learning_rate": 1.44955005996669e-06, "loss": 0.3634, "step": 18658 }, { "epoch": 2.4951858785771597, "grad_norm": 1.5965651273727417, "learning_rate": 1.4488014887636926e-06, "loss": 0.3926, "step": 18659 }, { "epoch": 2.4953196041722387, "grad_norm": 1.5741041898727417, "learning_rate": 1.4480530958022498e-06, "loss": 0.3752, "step": 18660 }, { "epoch": 2.4954533297673174, "grad_norm": 1.5955390930175781, "learning_rate": 1.447304881097953e-06, "loss": 0.3512, "step": 18661 }, { "epoch": 2.4955870553623964, "grad_norm": 1.4500372409820557, "learning_rate": 1.4465568446664057e-06, "loss": 0.3786, "step": 18662 }, { "epoch": 2.495720780957475, "grad_norm": 1.5714104175567627, "learning_rate": 1.445808986523195e-06, "loss": 0.3611, "step": 18663 }, { "epoch": 2.495854506552554, "grad_norm": 1.5748096704483032, "learning_rate": 1.4450613066839092e-06, "loss": 0.3914, "step": 18664 }, { "epoch": 2.495988232147633, "grad_norm": 1.4553799629211426, "learning_rate": 1.4443138051641347e-06, "loss": 0.3054, "step": 18665 }, { "epoch": 2.496121957742712, "grad_norm": 1.525992751121521, "learning_rate": 1.4435664819794527e-06, "loss": 0.3564, "step": 18666 }, { "epoch": 2.496255683337791, "grad_norm": 1.4415743350982666, "learning_rate": 1.442819337145439e-06, "loss": 0.3683, "step": 18667 }, { "epoch": 2.4963894089328695, "grad_norm": 1.5423680543899536, "learning_rate": 1.4420723706776673e-06, "loss": 0.3672, "step": 18668 }, { "epoch": 2.4965231345279486, "grad_norm": 1.5602530241012573, "learning_rate": 1.4413255825917094e-06, "loss": 0.3483, "step": 18669 }, { "epoch": 2.4966568601230277, "grad_norm": 1.5395179986953735, "learning_rate": 1.4405789729031294e-06, "loss": 0.3449, "step": 18670 }, { "epoch": 2.4967905857181063, "grad_norm": 1.5500996112823486, "learning_rate": 1.4398325416274894e-06, "loss": 0.363, "step": 18671 }, { "epoch": 2.4969243113131854, "grad_norm": 1.4442201852798462, "learning_rate": 1.4390862887803502e-06, "loss": 0.3524, "step": 18672 }, { "epoch": 2.497058036908264, "grad_norm": 1.5078154802322388, "learning_rate": 1.4383402143772651e-06, "loss": 0.3574, "step": 18673 }, { "epoch": 2.497191762503343, "grad_norm": 1.6072710752487183, "learning_rate": 1.4375943184337871e-06, "loss": 0.3452, "step": 18674 }, { "epoch": 2.497325488098422, "grad_norm": 1.4882187843322754, "learning_rate": 1.4368486009654582e-06, "loss": 0.385, "step": 18675 }, { "epoch": 2.4974592136935008, "grad_norm": 1.5553520917892456, "learning_rate": 1.4361030619878292e-06, "loss": 0.3332, "step": 18676 }, { "epoch": 2.49759293928858, "grad_norm": 1.4280493259429932, "learning_rate": 1.4353577015164356e-06, "loss": 0.3527, "step": 18677 }, { "epoch": 2.497726664883659, "grad_norm": 1.6124001741409302, "learning_rate": 1.434612519566816e-06, "loss": 0.3693, "step": 18678 }, { "epoch": 2.4978603904787375, "grad_norm": 1.6526379585266113, "learning_rate": 1.4338675161545046e-06, "loss": 0.391, "step": 18679 }, { "epoch": 2.4979941160738166, "grad_norm": 1.56964910030365, "learning_rate": 1.4331226912950236e-06, "loss": 0.3922, "step": 18680 }, { "epoch": 2.4981278416688952, "grad_norm": 1.37174654006958, "learning_rate": 1.432378045003906e-06, "loss": 0.339, "step": 18681 }, { "epoch": 2.4982615672639743, "grad_norm": 1.5816730260849, "learning_rate": 1.4316335772966683e-06, "loss": 0.3544, "step": 18682 }, { "epoch": 2.4983952928590534, "grad_norm": 1.3611667156219482, "learning_rate": 1.4308892881888293e-06, "loss": 0.3678, "step": 18683 }, { "epoch": 2.498529018454132, "grad_norm": 1.4585638046264648, "learning_rate": 1.430145177695904e-06, "loss": 0.3478, "step": 18684 }, { "epoch": 2.498662744049211, "grad_norm": 1.6856166124343872, "learning_rate": 1.4294012458333995e-06, "loss": 0.4183, "step": 18685 }, { "epoch": 2.49879646964429, "grad_norm": 1.396251916885376, "learning_rate": 1.4286574926168284e-06, "loss": 0.2937, "step": 18686 }, { "epoch": 2.4989301952393688, "grad_norm": 1.7472703456878662, "learning_rate": 1.4279139180616886e-06, "loss": 0.3809, "step": 18687 }, { "epoch": 2.499063920834448, "grad_norm": 1.4669545888900757, "learning_rate": 1.4271705221834808e-06, "loss": 0.3291, "step": 18688 }, { "epoch": 2.4991976464295265, "grad_norm": 1.5886670351028442, "learning_rate": 1.4264273049976995e-06, "loss": 0.3243, "step": 18689 }, { "epoch": 2.4993313720246055, "grad_norm": 1.3921732902526855, "learning_rate": 1.4256842665198377e-06, "loss": 0.3106, "step": 18690 }, { "epoch": 2.4994650976196846, "grad_norm": 1.731690526008606, "learning_rate": 1.4249414067653821e-06, "loss": 0.3809, "step": 18691 }, { "epoch": 2.4995988232147632, "grad_norm": 1.7129285335540771, "learning_rate": 1.424198725749818e-06, "loss": 0.3751, "step": 18692 }, { "epoch": 2.4997325488098423, "grad_norm": 1.7014578580856323, "learning_rate": 1.423456223488625e-06, "loss": 0.4243, "step": 18693 }, { "epoch": 2.499866274404921, "grad_norm": 1.5362272262573242, "learning_rate": 1.4227138999972801e-06, "loss": 0.3555, "step": 18694 }, { "epoch": 2.5, "grad_norm": 1.6782039403915405, "learning_rate": 1.421971755291256e-06, "loss": 0.3735, "step": 18695 }, { "epoch": 2.500133725595079, "grad_norm": 1.5355224609375, "learning_rate": 1.4212297893860228e-06, "loss": 0.3752, "step": 18696 }, { "epoch": 2.5002674511901577, "grad_norm": 1.4049218893051147, "learning_rate": 1.4204880022970457e-06, "loss": 0.3239, "step": 18697 }, { "epoch": 2.5004011767852368, "grad_norm": 1.5492300987243652, "learning_rate": 1.419746394039786e-06, "loss": 0.3677, "step": 18698 }, { "epoch": 2.5005349023803154, "grad_norm": 1.6307997703552246, "learning_rate": 1.4190049646297032e-06, "loss": 0.3667, "step": 18699 }, { "epoch": 2.5006686279753945, "grad_norm": 1.423971176147461, "learning_rate": 1.418263714082252e-06, "loss": 0.3484, "step": 18700 }, { "epoch": 2.5008023535704735, "grad_norm": 1.3322798013687134, "learning_rate": 1.4175226424128775e-06, "loss": 0.335, "step": 18701 }, { "epoch": 2.500936079165552, "grad_norm": 1.680484652519226, "learning_rate": 1.4167817496370362e-06, "loss": 0.3785, "step": 18702 }, { "epoch": 2.5010698047606312, "grad_norm": 1.4885387420654297, "learning_rate": 1.4160410357701638e-06, "loss": 0.3693, "step": 18703 }, { "epoch": 2.50120353035571, "grad_norm": 1.524792194366455, "learning_rate": 1.4153005008276987e-06, "loss": 0.3617, "step": 18704 }, { "epoch": 2.501337255950789, "grad_norm": 1.346716046333313, "learning_rate": 1.4145601448250857e-06, "loss": 0.3338, "step": 18705 }, { "epoch": 2.501470981545868, "grad_norm": 1.6070681810379028, "learning_rate": 1.4138199677777465e-06, "loss": 0.3934, "step": 18706 }, { "epoch": 2.5016047071409466, "grad_norm": 1.6407783031463623, "learning_rate": 1.4130799697011177e-06, "loss": 0.3834, "step": 18707 }, { "epoch": 2.5017384327360257, "grad_norm": 1.808449387550354, "learning_rate": 1.4123401506106182e-06, "loss": 0.4059, "step": 18708 }, { "epoch": 2.5018721583311043, "grad_norm": 1.7229595184326172, "learning_rate": 1.4116005105216712e-06, "loss": 0.3975, "step": 18709 }, { "epoch": 2.5020058839261834, "grad_norm": 1.3103058338165283, "learning_rate": 1.4108610494496934e-06, "loss": 0.2707, "step": 18710 }, { "epoch": 2.5021396095212625, "grad_norm": 1.5738813877105713, "learning_rate": 1.4101217674100975e-06, "loss": 0.3697, "step": 18711 }, { "epoch": 2.5022733351163415, "grad_norm": 1.4926611185073853, "learning_rate": 1.4093826644182939e-06, "loss": 0.341, "step": 18712 }, { "epoch": 2.50240706071142, "grad_norm": 1.6358656883239746, "learning_rate": 1.408643740489688e-06, "loss": 0.3715, "step": 18713 }, { "epoch": 2.502540786306499, "grad_norm": 1.7031357288360596, "learning_rate": 1.4079049956396828e-06, "loss": 0.3621, "step": 18714 }, { "epoch": 2.502674511901578, "grad_norm": 1.6063978672027588, "learning_rate": 1.4071664298836762e-06, "loss": 0.3838, "step": 18715 }, { "epoch": 2.502808237496657, "grad_norm": 1.4091458320617676, "learning_rate": 1.4064280432370635e-06, "loss": 0.3785, "step": 18716 }, { "epoch": 2.502941963091736, "grad_norm": 1.3704969882965088, "learning_rate": 1.4056898357152338e-06, "loss": 0.354, "step": 18717 }, { "epoch": 2.5030756886868146, "grad_norm": 1.5371063947677612, "learning_rate": 1.4049518073335767e-06, "loss": 0.3657, "step": 18718 }, { "epoch": 2.5032094142818937, "grad_norm": 1.608318567276001, "learning_rate": 1.4042139581074765e-06, "loss": 0.4203, "step": 18719 }, { "epoch": 2.5033431398769723, "grad_norm": 1.4850339889526367, "learning_rate": 1.4034762880523068e-06, "loss": 0.3533, "step": 18720 }, { "epoch": 2.5034768654720514, "grad_norm": 1.4904999732971191, "learning_rate": 1.4027387971834495e-06, "loss": 0.3691, "step": 18721 }, { "epoch": 2.5036105910671305, "grad_norm": 1.6859025955200195, "learning_rate": 1.4020014855162755e-06, "loss": 0.3924, "step": 18722 }, { "epoch": 2.503744316662209, "grad_norm": 1.483705997467041, "learning_rate": 1.4012643530661529e-06, "loss": 0.3468, "step": 18723 }, { "epoch": 2.503878042257288, "grad_norm": 1.6232643127441406, "learning_rate": 1.4005273998484504e-06, "loss": 0.38, "step": 18724 }, { "epoch": 2.504011767852367, "grad_norm": 1.425099492073059, "learning_rate": 1.3997906258785188e-06, "loss": 0.3424, "step": 18725 }, { "epoch": 2.504145493447446, "grad_norm": 1.520273208618164, "learning_rate": 1.3990540311717282e-06, "loss": 0.3288, "step": 18726 }, { "epoch": 2.504279219042525, "grad_norm": 1.5543946027755737, "learning_rate": 1.398317615743423e-06, "loss": 0.3721, "step": 18727 }, { "epoch": 2.5044129446376036, "grad_norm": 1.4910900592803955, "learning_rate": 1.3975813796089566e-06, "loss": 0.3153, "step": 18728 }, { "epoch": 2.5045466702326826, "grad_norm": 1.5485210418701172, "learning_rate": 1.3968453227836753e-06, "loss": 0.3559, "step": 18729 }, { "epoch": 2.5046803958277613, "grad_norm": 1.681546926498413, "learning_rate": 1.3961094452829182e-06, "loss": 0.4147, "step": 18730 }, { "epoch": 2.5048141214228403, "grad_norm": 1.366850733757019, "learning_rate": 1.3953737471220307e-06, "loss": 0.3745, "step": 18731 }, { "epoch": 2.5049478470179194, "grad_norm": 1.7842521667480469, "learning_rate": 1.3946382283163417e-06, "loss": 0.4468, "step": 18732 }, { "epoch": 2.505081572612998, "grad_norm": 1.5375540256500244, "learning_rate": 1.3939028888811845e-06, "loss": 0.3757, "step": 18733 }, { "epoch": 2.505215298208077, "grad_norm": 1.53855299949646, "learning_rate": 1.3931677288318868e-06, "loss": 0.3455, "step": 18734 }, { "epoch": 2.5053490238031557, "grad_norm": 1.6233115196228027, "learning_rate": 1.3924327481837708e-06, "loss": 0.3927, "step": 18735 }, { "epoch": 2.505482749398235, "grad_norm": 1.5496952533721924, "learning_rate": 1.3916979469521585e-06, "loss": 0.3531, "step": 18736 }, { "epoch": 2.505616474993314, "grad_norm": 1.4044309854507446, "learning_rate": 1.3909633251523657e-06, "loss": 0.289, "step": 18737 }, { "epoch": 2.5057502005883925, "grad_norm": 1.5986356735229492, "learning_rate": 1.3902288827997035e-06, "loss": 0.3726, "step": 18738 }, { "epoch": 2.5058839261834716, "grad_norm": 1.588167667388916, "learning_rate": 1.3894946199094816e-06, "loss": 0.4141, "step": 18739 }, { "epoch": 2.50601765177855, "grad_norm": 1.4581599235534668, "learning_rate": 1.3887605364970058e-06, "loss": 0.3517, "step": 18740 }, { "epoch": 2.5061513773736293, "grad_norm": 1.5078647136688232, "learning_rate": 1.388026632577576e-06, "loss": 0.3392, "step": 18741 }, { "epoch": 2.5062851029687083, "grad_norm": 1.5856362581253052, "learning_rate": 1.387292908166491e-06, "loss": 0.344, "step": 18742 }, { "epoch": 2.506418828563787, "grad_norm": 1.5700857639312744, "learning_rate": 1.3865593632790453e-06, "loss": 0.3744, "step": 18743 }, { "epoch": 2.506552554158866, "grad_norm": 1.5993281602859497, "learning_rate": 1.3858259979305234e-06, "loss": 0.388, "step": 18744 }, { "epoch": 2.5066862797539446, "grad_norm": 1.5850213766098022, "learning_rate": 1.3850928121362195e-06, "loss": 0.4141, "step": 18745 }, { "epoch": 2.5068200053490237, "grad_norm": 1.6332415342330933, "learning_rate": 1.3843598059114083e-06, "loss": 0.4112, "step": 18746 }, { "epoch": 2.506953730944103, "grad_norm": 1.3776710033416748, "learning_rate": 1.3836269792713774e-06, "loss": 0.364, "step": 18747 }, { "epoch": 2.507087456539182, "grad_norm": 1.513225793838501, "learning_rate": 1.382894332231395e-06, "loss": 0.3742, "step": 18748 }, { "epoch": 2.5072211821342605, "grad_norm": 1.4606261253356934, "learning_rate": 1.3821618648067314e-06, "loss": 0.341, "step": 18749 }, { "epoch": 2.5073549077293396, "grad_norm": 1.4419642686843872, "learning_rate": 1.381429577012663e-06, "loss": 0.3273, "step": 18750 }, { "epoch": 2.507488633324418, "grad_norm": 1.7148438692092896, "learning_rate": 1.3806974688644449e-06, "loss": 0.4025, "step": 18751 }, { "epoch": 2.5076223589194973, "grad_norm": 1.47361159324646, "learning_rate": 1.3799655403773405e-06, "loss": 0.3458, "step": 18752 }, { "epoch": 2.5077560845145763, "grad_norm": 1.5109809637069702, "learning_rate": 1.3792337915666065e-06, "loss": 0.3662, "step": 18753 }, { "epoch": 2.507889810109655, "grad_norm": 1.3976682424545288, "learning_rate": 1.3785022224474943e-06, "loss": 0.3199, "step": 18754 }, { "epoch": 2.508023535704734, "grad_norm": 1.3809337615966797, "learning_rate": 1.3777708330352534e-06, "loss": 0.3491, "step": 18755 }, { "epoch": 2.5081572612998126, "grad_norm": 1.5658472776412964, "learning_rate": 1.3770396233451288e-06, "loss": 0.3941, "step": 18756 }, { "epoch": 2.5082909868948917, "grad_norm": 1.6389915943145752, "learning_rate": 1.3763085933923626e-06, "loss": 0.3864, "step": 18757 }, { "epoch": 2.508424712489971, "grad_norm": 1.5828673839569092, "learning_rate": 1.3755777431921912e-06, "loss": 0.3792, "step": 18758 }, { "epoch": 2.5085584380850494, "grad_norm": 1.604867935180664, "learning_rate": 1.3748470727598496e-06, "loss": 0.3857, "step": 18759 }, { "epoch": 2.5086921636801285, "grad_norm": 1.6293370723724365, "learning_rate": 1.3741165821105674e-06, "loss": 0.4127, "step": 18760 }, { "epoch": 2.508825889275207, "grad_norm": 1.5185595750808716, "learning_rate": 1.3733862712595702e-06, "loss": 0.3211, "step": 18761 }, { "epoch": 2.508959614870286, "grad_norm": 1.5716077089309692, "learning_rate": 1.3726561402220818e-06, "loss": 0.332, "step": 18762 }, { "epoch": 2.5090933404653653, "grad_norm": 1.5485892295837402, "learning_rate": 1.3719261890133206e-06, "loss": 0.3614, "step": 18763 }, { "epoch": 2.509227066060444, "grad_norm": 1.4621496200561523, "learning_rate": 1.3711964176485049e-06, "loss": 0.3266, "step": 18764 }, { "epoch": 2.509360791655523, "grad_norm": 1.573520302772522, "learning_rate": 1.3704668261428377e-06, "loss": 0.3716, "step": 18765 }, { "epoch": 2.5094945172506016, "grad_norm": 1.293465495109558, "learning_rate": 1.369737414511536e-06, "loss": 0.313, "step": 18766 }, { "epoch": 2.5096282428456806, "grad_norm": 1.499725103378296, "learning_rate": 1.3690081827697988e-06, "loss": 0.3308, "step": 18767 }, { "epoch": 2.5097619684407597, "grad_norm": 1.686970591545105, "learning_rate": 1.3682791309328236e-06, "loss": 0.4017, "step": 18768 }, { "epoch": 2.5098956940358383, "grad_norm": 1.6373852491378784, "learning_rate": 1.367550259015815e-06, "loss": 0.3788, "step": 18769 }, { "epoch": 2.5100294196309174, "grad_norm": 1.4756332635879517, "learning_rate": 1.3668215670339569e-06, "loss": 0.3121, "step": 18770 }, { "epoch": 2.510163145225996, "grad_norm": 1.5063499212265015, "learning_rate": 1.3660930550024454e-06, "loss": 0.3321, "step": 18771 }, { "epoch": 2.510296870821075, "grad_norm": 1.851346731185913, "learning_rate": 1.3653647229364619e-06, "loss": 0.4127, "step": 18772 }, { "epoch": 2.510430596416154, "grad_norm": 1.528391718864441, "learning_rate": 1.3646365708511867e-06, "loss": 0.3472, "step": 18773 }, { "epoch": 2.510564322011233, "grad_norm": 1.6154814958572388, "learning_rate": 1.3639085987618005e-06, "loss": 0.4041, "step": 18774 }, { "epoch": 2.510698047606312, "grad_norm": 1.6219675540924072, "learning_rate": 1.363180806683475e-06, "loss": 0.3969, "step": 18775 }, { "epoch": 2.5108317732013905, "grad_norm": 1.368686556816101, "learning_rate": 1.3624531946313812e-06, "loss": 0.3738, "step": 18776 }, { "epoch": 2.5109654987964696, "grad_norm": 1.381800889968872, "learning_rate": 1.3617257626206849e-06, "loss": 0.3337, "step": 18777 }, { "epoch": 2.5110992243915486, "grad_norm": 1.5714846849441528, "learning_rate": 1.3609985106665491e-06, "loss": 0.3588, "step": 18778 }, { "epoch": 2.5112329499866277, "grad_norm": 1.4829574823379517, "learning_rate": 1.3602714387841332e-06, "loss": 0.38, "step": 18779 }, { "epoch": 2.5113666755817063, "grad_norm": 1.5353392362594604, "learning_rate": 1.3595445469885915e-06, "loss": 0.3733, "step": 18780 }, { "epoch": 2.511500401176785, "grad_norm": 1.42531156539917, "learning_rate": 1.3588178352950764e-06, "loss": 0.3783, "step": 18781 }, { "epoch": 2.511634126771864, "grad_norm": 1.678500771522522, "learning_rate": 1.3580913037187338e-06, "loss": 0.3572, "step": 18782 }, { "epoch": 2.511767852366943, "grad_norm": 1.5032864809036255, "learning_rate": 1.357364952274709e-06, "loss": 0.3253, "step": 18783 }, { "epoch": 2.511901577962022, "grad_norm": 1.4216761589050293, "learning_rate": 1.3566387809781423e-06, "loss": 0.3724, "step": 18784 }, { "epoch": 2.512035303557101, "grad_norm": 1.5071951150894165, "learning_rate": 1.3559127898441703e-06, "loss": 0.3177, "step": 18785 }, { "epoch": 2.51216902915218, "grad_norm": 1.6937954425811768, "learning_rate": 1.3551869788879213e-06, "loss": 0.3584, "step": 18786 }, { "epoch": 2.5123027547472585, "grad_norm": 1.6006581783294678, "learning_rate": 1.3544613481245294e-06, "loss": 0.3517, "step": 18787 }, { "epoch": 2.5124364803423376, "grad_norm": 1.6860499382019043, "learning_rate": 1.3537358975691205e-06, "loss": 0.4051, "step": 18788 }, { "epoch": 2.5125702059374166, "grad_norm": 1.5064318180084229, "learning_rate": 1.3530106272368083e-06, "loss": 0.3696, "step": 18789 }, { "epoch": 2.5127039315324953, "grad_norm": 1.831417202949524, "learning_rate": 1.35228553714272e-06, "loss": 0.3862, "step": 18790 }, { "epoch": 2.5128376571275743, "grad_norm": 1.65804922580719, "learning_rate": 1.35156062730196e-06, "loss": 0.365, "step": 18791 }, { "epoch": 2.512971382722653, "grad_norm": 1.593489408493042, "learning_rate": 1.3508358977296477e-06, "loss": 0.4029, "step": 18792 }, { "epoch": 2.513105108317732, "grad_norm": 1.6318997144699097, "learning_rate": 1.3501113484408822e-06, "loss": 0.4142, "step": 18793 }, { "epoch": 2.513238833912811, "grad_norm": 1.4919341802597046, "learning_rate": 1.3493869794507664e-06, "loss": 0.3244, "step": 18794 }, { "epoch": 2.5133725595078897, "grad_norm": 1.4111082553863525, "learning_rate": 1.3486627907744065e-06, "loss": 0.3153, "step": 18795 }, { "epoch": 2.513506285102969, "grad_norm": 1.555806279182434, "learning_rate": 1.3479387824268897e-06, "loss": 0.4304, "step": 18796 }, { "epoch": 2.5136400106980474, "grad_norm": 1.4185792207717896, "learning_rate": 1.3472149544233092e-06, "loss": 0.3403, "step": 18797 }, { "epoch": 2.5137737362931265, "grad_norm": 1.5001932382583618, "learning_rate": 1.3464913067787534e-06, "loss": 0.3543, "step": 18798 }, { "epoch": 2.5139074618882056, "grad_norm": 1.6629923582077026, "learning_rate": 1.3457678395083062e-06, "loss": 0.3407, "step": 18799 }, { "epoch": 2.514041187483284, "grad_norm": 1.7201027870178223, "learning_rate": 1.3450445526270473e-06, "loss": 0.3956, "step": 18800 }, { "epoch": 2.5141749130783633, "grad_norm": 1.5988587141036987, "learning_rate": 1.344321446150052e-06, "loss": 0.3789, "step": 18801 }, { "epoch": 2.514308638673442, "grad_norm": 1.4937191009521484, "learning_rate": 1.343598520092394e-06, "loss": 0.3551, "step": 18802 }, { "epoch": 2.514442364268521, "grad_norm": 1.5772446393966675, "learning_rate": 1.3428757744691422e-06, "loss": 0.369, "step": 18803 }, { "epoch": 2.5145760898636, "grad_norm": 1.475939154624939, "learning_rate": 1.3421532092953625e-06, "loss": 0.3487, "step": 18804 }, { "epoch": 2.5147098154586787, "grad_norm": 1.53485107421875, "learning_rate": 1.3414308245861097e-06, "loss": 0.365, "step": 18805 }, { "epoch": 2.5148435410537577, "grad_norm": 1.580859899520874, "learning_rate": 1.340708620356449e-06, "loss": 0.3712, "step": 18806 }, { "epoch": 2.5149772666488364, "grad_norm": 1.6358156204223633, "learning_rate": 1.339986596621431e-06, "loss": 0.3569, "step": 18807 }, { "epoch": 2.5151109922439154, "grad_norm": 1.4552199840545654, "learning_rate": 1.3392647533961056e-06, "loss": 0.3497, "step": 18808 }, { "epoch": 2.5152447178389945, "grad_norm": 1.5465292930603027, "learning_rate": 1.338543090695521e-06, "loss": 0.3486, "step": 18809 }, { "epoch": 2.515378443434073, "grad_norm": 1.416013240814209, "learning_rate": 1.3378216085347128e-06, "loss": 0.3655, "step": 18810 }, { "epoch": 2.515512169029152, "grad_norm": 1.5111641883850098, "learning_rate": 1.3371003069287292e-06, "loss": 0.3288, "step": 18811 }, { "epoch": 2.515645894624231, "grad_norm": 1.5227692127227783, "learning_rate": 1.3363791858925978e-06, "loss": 0.3929, "step": 18812 }, { "epoch": 2.51577962021931, "grad_norm": 1.4305846691131592, "learning_rate": 1.3356582454413504e-06, "loss": 0.3455, "step": 18813 }, { "epoch": 2.515913345814389, "grad_norm": 1.4858986139297485, "learning_rate": 1.33493748559002e-06, "loss": 0.3487, "step": 18814 }, { "epoch": 2.516047071409468, "grad_norm": 1.4846031665802002, "learning_rate": 1.3342169063536214e-06, "loss": 0.3582, "step": 18815 }, { "epoch": 2.5161807970045467, "grad_norm": 1.7297195196151733, "learning_rate": 1.333496507747184e-06, "loss": 0.369, "step": 18816 }, { "epoch": 2.5163145225996253, "grad_norm": 1.680019736289978, "learning_rate": 1.3327762897857167e-06, "loss": 0.365, "step": 18817 }, { "epoch": 2.5164482481947044, "grad_norm": 1.7061219215393066, "learning_rate": 1.332056252484234e-06, "loss": 0.4049, "step": 18818 }, { "epoch": 2.5165819737897834, "grad_norm": 1.5936188697814941, "learning_rate": 1.3313363958577442e-06, "loss": 0.3612, "step": 18819 }, { "epoch": 2.5167156993848625, "grad_norm": 1.4679731130599976, "learning_rate": 1.3306167199212527e-06, "loss": 0.3258, "step": 18820 }, { "epoch": 2.516849424979941, "grad_norm": 1.3932182788848877, "learning_rate": 1.329897224689759e-06, "loss": 0.3415, "step": 18821 }, { "epoch": 2.51698315057502, "grad_norm": 1.4148304462432861, "learning_rate": 1.329177910178262e-06, "loss": 0.3315, "step": 18822 }, { "epoch": 2.517116876170099, "grad_norm": 1.6134790182113647, "learning_rate": 1.3284587764017543e-06, "loss": 0.3558, "step": 18823 }, { "epoch": 2.517250601765178, "grad_norm": 1.457486867904663, "learning_rate": 1.3277398233752258e-06, "loss": 0.3579, "step": 18824 }, { "epoch": 2.517384327360257, "grad_norm": 1.4696450233459473, "learning_rate": 1.3270210511136616e-06, "loss": 0.3733, "step": 18825 }, { "epoch": 2.5175180529553356, "grad_norm": 1.6368086338043213, "learning_rate": 1.326302459632045e-06, "loss": 0.3562, "step": 18826 }, { "epoch": 2.5176517785504147, "grad_norm": 1.4553279876708984, "learning_rate": 1.3255840489453542e-06, "loss": 0.3454, "step": 18827 }, { "epoch": 2.5177855041454933, "grad_norm": 1.5567313432693481, "learning_rate": 1.3248658190685648e-06, "loss": 0.3548, "step": 18828 }, { "epoch": 2.5179192297405724, "grad_norm": 1.6422576904296875, "learning_rate": 1.3241477700166427e-06, "loss": 0.4046, "step": 18829 }, { "epoch": 2.5180529553356514, "grad_norm": 1.5452489852905273, "learning_rate": 1.3234299018045615e-06, "loss": 0.3483, "step": 18830 }, { "epoch": 2.51818668093073, "grad_norm": 1.5659314393997192, "learning_rate": 1.3227122144472782e-06, "loss": 0.3924, "step": 18831 }, { "epoch": 2.518320406525809, "grad_norm": 1.5921807289123535, "learning_rate": 1.3219947079597573e-06, "loss": 0.3773, "step": 18832 }, { "epoch": 2.5184541321208878, "grad_norm": 1.5508805513381958, "learning_rate": 1.3212773823569548e-06, "loss": 0.4004, "step": 18833 }, { "epoch": 2.518587857715967, "grad_norm": 1.5954124927520752, "learning_rate": 1.3205602376538162e-06, "loss": 0.3655, "step": 18834 }, { "epoch": 2.518721583311046, "grad_norm": 1.544524073600769, "learning_rate": 1.3198432738652988e-06, "loss": 0.3341, "step": 18835 }, { "epoch": 2.5188553089061245, "grad_norm": 1.5325716733932495, "learning_rate": 1.3191264910063405e-06, "loss": 0.3964, "step": 18836 }, { "epoch": 2.5189890345012036, "grad_norm": 1.3853940963745117, "learning_rate": 1.3184098890918829e-06, "loss": 0.3574, "step": 18837 }, { "epoch": 2.519122760096282, "grad_norm": 1.5011093616485596, "learning_rate": 1.3176934681368648e-06, "loss": 0.3327, "step": 18838 }, { "epoch": 2.5192564856913613, "grad_norm": 1.4824696779251099, "learning_rate": 1.3169772281562154e-06, "loss": 0.326, "step": 18839 }, { "epoch": 2.5193902112864404, "grad_norm": 1.5798135995864868, "learning_rate": 1.3162611691648708e-06, "loss": 0.3216, "step": 18840 }, { "epoch": 2.519523936881519, "grad_norm": 1.6640273332595825, "learning_rate": 1.3155452911777511e-06, "loss": 0.3945, "step": 18841 }, { "epoch": 2.519657662476598, "grad_norm": 1.6306378841400146, "learning_rate": 1.3148295942097799e-06, "loss": 0.4206, "step": 18842 }, { "epoch": 2.5197913880716767, "grad_norm": 1.576798915863037, "learning_rate": 1.3141140782758743e-06, "loss": 0.3378, "step": 18843 }, { "epoch": 2.5199251136667558, "grad_norm": 1.5848937034606934, "learning_rate": 1.3133987433909502e-06, "loss": 0.3738, "step": 18844 }, { "epoch": 2.520058839261835, "grad_norm": 1.750974416732788, "learning_rate": 1.3126835895699164e-06, "loss": 0.3785, "step": 18845 }, { "epoch": 2.5201925648569135, "grad_norm": 1.5847290754318237, "learning_rate": 1.3119686168276812e-06, "loss": 0.3687, "step": 18846 }, { "epoch": 2.5203262904519925, "grad_norm": 1.610335111618042, "learning_rate": 1.3112538251791461e-06, "loss": 0.3779, "step": 18847 }, { "epoch": 2.520460016047071, "grad_norm": 1.6374783515930176, "learning_rate": 1.3105392146392104e-06, "loss": 0.4127, "step": 18848 }, { "epoch": 2.52059374164215, "grad_norm": 1.5574485063552856, "learning_rate": 1.309824785222772e-06, "loss": 0.3009, "step": 18849 }, { "epoch": 2.5207274672372293, "grad_norm": 1.4666000604629517, "learning_rate": 1.3091105369447166e-06, "loss": 0.3312, "step": 18850 }, { "epoch": 2.5208611928323084, "grad_norm": 1.4455159902572632, "learning_rate": 1.308396469819938e-06, "loss": 0.3367, "step": 18851 }, { "epoch": 2.520994918427387, "grad_norm": 1.4717121124267578, "learning_rate": 1.30768258386332e-06, "loss": 0.3203, "step": 18852 }, { "epoch": 2.521128644022466, "grad_norm": 1.5219894647598267, "learning_rate": 1.3069688790897362e-06, "loss": 0.3371, "step": 18853 }, { "epoch": 2.5212623696175447, "grad_norm": 1.7840780019760132, "learning_rate": 1.3062553555140722e-06, "loss": 0.4369, "step": 18854 }, { "epoch": 2.5213960952126238, "grad_norm": 1.6263341903686523, "learning_rate": 1.305542013151192e-06, "loss": 0.4057, "step": 18855 }, { "epoch": 2.521529820807703, "grad_norm": 1.6607122421264648, "learning_rate": 1.3048288520159736e-06, "loss": 0.3629, "step": 18856 }, { "epoch": 2.5216635464027815, "grad_norm": 1.5537337064743042, "learning_rate": 1.304115872123275e-06, "loss": 0.412, "step": 18857 }, { "epoch": 2.5217972719978605, "grad_norm": 1.489897608757019, "learning_rate": 1.3034030734879576e-06, "loss": 0.3513, "step": 18858 }, { "epoch": 2.521930997592939, "grad_norm": 1.659265398979187, "learning_rate": 1.3026904561248865e-06, "loss": 0.3631, "step": 18859 }, { "epoch": 2.522064723188018, "grad_norm": 1.540769338607788, "learning_rate": 1.3019780200489073e-06, "loss": 0.3753, "step": 18860 }, { "epoch": 2.5221984487830973, "grad_norm": 1.741860270500183, "learning_rate": 1.301265765274874e-06, "loss": 0.38, "step": 18861 }, { "epoch": 2.522332174378176, "grad_norm": 1.45553719997406, "learning_rate": 1.3005536918176309e-06, "loss": 0.3359, "step": 18862 }, { "epoch": 2.522465899973255, "grad_norm": 1.6767503023147583, "learning_rate": 1.299841799692023e-06, "loss": 0.4195, "step": 18863 }, { "epoch": 2.5225996255683336, "grad_norm": 1.3968744277954102, "learning_rate": 1.2991300889128867e-06, "loss": 0.3443, "step": 18864 }, { "epoch": 2.5227333511634127, "grad_norm": 1.4151256084442139, "learning_rate": 1.2984185594950582e-06, "loss": 0.3211, "step": 18865 }, { "epoch": 2.5228670767584918, "grad_norm": 1.5486268997192383, "learning_rate": 1.2977072114533683e-06, "loss": 0.3724, "step": 18866 }, { "epoch": 2.5230008023535704, "grad_norm": 1.5191481113433838, "learning_rate": 1.2969960448026443e-06, "loss": 0.3389, "step": 18867 }, { "epoch": 2.5231345279486495, "grad_norm": 1.516695499420166, "learning_rate": 1.2962850595577092e-06, "loss": 0.3724, "step": 18868 }, { "epoch": 2.523268253543728, "grad_norm": 1.7147924900054932, "learning_rate": 1.295574255733385e-06, "loss": 0.3902, "step": 18869 }, { "epoch": 2.523401979138807, "grad_norm": 1.419689655303955, "learning_rate": 1.2948636333444853e-06, "loss": 0.3332, "step": 18870 }, { "epoch": 2.523535704733886, "grad_norm": 1.519533395767212, "learning_rate": 1.2941531924058227e-06, "loss": 0.3517, "step": 18871 }, { "epoch": 2.523669430328965, "grad_norm": 1.5843182802200317, "learning_rate": 1.2934429329322073e-06, "loss": 0.3839, "step": 18872 }, { "epoch": 2.523803155924044, "grad_norm": 1.312522530555725, "learning_rate": 1.2927328549384444e-06, "loss": 0.3333, "step": 18873 }, { "epoch": 2.5239368815191225, "grad_norm": 1.5722289085388184, "learning_rate": 1.2920229584393284e-06, "loss": 0.3556, "step": 18874 }, { "epoch": 2.5240706071142016, "grad_norm": 1.5377271175384521, "learning_rate": 1.2913132434496666e-06, "loss": 0.386, "step": 18875 }, { "epoch": 2.5242043327092807, "grad_norm": 1.5642294883728027, "learning_rate": 1.2906037099842417e-06, "loss": 0.3333, "step": 18876 }, { "epoch": 2.5243380583043593, "grad_norm": 1.3723654747009277, "learning_rate": 1.2898943580578504e-06, "loss": 0.3382, "step": 18877 }, { "epoch": 2.5244717838994384, "grad_norm": 1.5495398044586182, "learning_rate": 1.2891851876852802e-06, "loss": 0.3313, "step": 18878 }, { "epoch": 2.524605509494517, "grad_norm": 1.7357217073440552, "learning_rate": 1.2884761988813034e-06, "loss": 0.3834, "step": 18879 }, { "epoch": 2.524739235089596, "grad_norm": 1.5397751331329346, "learning_rate": 1.2877673916607092e-06, "loss": 0.3741, "step": 18880 }, { "epoch": 2.524872960684675, "grad_norm": 1.5161101818084717, "learning_rate": 1.287058766038265e-06, "loss": 0.3926, "step": 18881 }, { "epoch": 2.525006686279754, "grad_norm": 1.4876248836517334, "learning_rate": 1.2863503220287433e-06, "loss": 0.3311, "step": 18882 }, { "epoch": 2.525140411874833, "grad_norm": 1.5441436767578125, "learning_rate": 1.285642059646911e-06, "loss": 0.3585, "step": 18883 }, { "epoch": 2.5252741374699115, "grad_norm": 1.4930050373077393, "learning_rate": 1.28493397890753e-06, "loss": 0.3824, "step": 18884 }, { "epoch": 2.5254078630649905, "grad_norm": 1.625380039215088, "learning_rate": 1.2842260798253637e-06, "loss": 0.3923, "step": 18885 }, { "epoch": 2.5255415886600696, "grad_norm": 1.4921505451202393, "learning_rate": 1.2835183624151637e-06, "loss": 0.3359, "step": 18886 }, { "epoch": 2.5256753142551487, "grad_norm": 1.3709782361984253, "learning_rate": 1.2828108266916817e-06, "loss": 0.3533, "step": 18887 }, { "epoch": 2.5258090398502273, "grad_norm": 1.5018943548202515, "learning_rate": 1.2821034726696669e-06, "loss": 0.3428, "step": 18888 }, { "epoch": 2.5259427654453064, "grad_norm": 1.8628551959991455, "learning_rate": 1.281396300363863e-06, "loss": 0.43, "step": 18889 }, { "epoch": 2.526076491040385, "grad_norm": 1.6328073740005493, "learning_rate": 1.2806893097890105e-06, "loss": 0.3712, "step": 18890 }, { "epoch": 2.526210216635464, "grad_norm": 1.5484539270401, "learning_rate": 1.2799825009598466e-06, "loss": 0.3549, "step": 18891 }, { "epoch": 2.526343942230543, "grad_norm": 1.4931472539901733, "learning_rate": 1.2792758738911026e-06, "loss": 0.3372, "step": 18892 }, { "epoch": 2.5264776678256218, "grad_norm": 1.4790502786636353, "learning_rate": 1.278569428597508e-06, "loss": 0.3773, "step": 18893 }, { "epoch": 2.526611393420701, "grad_norm": 1.5567066669464111, "learning_rate": 1.27786316509379e-06, "loss": 0.3383, "step": 18894 }, { "epoch": 2.5267451190157795, "grad_norm": 1.5801483392715454, "learning_rate": 1.2771570833946645e-06, "loss": 0.3358, "step": 18895 }, { "epoch": 2.5268788446108585, "grad_norm": 1.4971035718917847, "learning_rate": 1.2764511835148552e-06, "loss": 0.4023, "step": 18896 }, { "epoch": 2.5270125702059376, "grad_norm": 1.5514277219772339, "learning_rate": 1.2757454654690748e-06, "loss": 0.357, "step": 18897 }, { "epoch": 2.5271462958010162, "grad_norm": 1.6739771366119385, "learning_rate": 1.2750399292720284e-06, "loss": 0.413, "step": 18898 }, { "epoch": 2.5272800213960953, "grad_norm": 1.4762396812438965, "learning_rate": 1.2743345749384296e-06, "loss": 0.3844, "step": 18899 }, { "epoch": 2.527413746991174, "grad_norm": 1.636349081993103, "learning_rate": 1.2736294024829732e-06, "loss": 0.3619, "step": 18900 }, { "epoch": 2.527547472586253, "grad_norm": 1.557605504989624, "learning_rate": 1.2729244119203655e-06, "loss": 0.3979, "step": 18901 }, { "epoch": 2.527681198181332, "grad_norm": 1.7391892671585083, "learning_rate": 1.2722196032652955e-06, "loss": 0.3773, "step": 18902 }, { "epoch": 2.5278149237764107, "grad_norm": 1.4972485303878784, "learning_rate": 1.2715149765324542e-06, "loss": 0.3698, "step": 18903 }, { "epoch": 2.5279486493714898, "grad_norm": 1.526349663734436, "learning_rate": 1.270810531736535e-06, "loss": 0.3532, "step": 18904 }, { "epoch": 2.5280823749665684, "grad_norm": 1.4375758171081543, "learning_rate": 1.270106268892216e-06, "loss": 0.3724, "step": 18905 }, { "epoch": 2.5282161005616475, "grad_norm": 1.5881681442260742, "learning_rate": 1.2694021880141772e-06, "loss": 0.3777, "step": 18906 }, { "epoch": 2.5283498261567265, "grad_norm": 1.4318047761917114, "learning_rate": 1.2686982891170962e-06, "loss": 0.3423, "step": 18907 }, { "epoch": 2.528483551751805, "grad_norm": 1.5559161901474, "learning_rate": 1.267994572215644e-06, "loss": 0.3583, "step": 18908 }, { "epoch": 2.5286172773468842, "grad_norm": 1.701930046081543, "learning_rate": 1.2672910373244896e-06, "loss": 0.4203, "step": 18909 }, { "epoch": 2.528751002941963, "grad_norm": 1.5551903247833252, "learning_rate": 1.266587684458297e-06, "loss": 0.3768, "step": 18910 }, { "epoch": 2.528884728537042, "grad_norm": 1.3695919513702393, "learning_rate": 1.2658845136317276e-06, "loss": 0.3139, "step": 18911 }, { "epoch": 2.529018454132121, "grad_norm": 1.7634577751159668, "learning_rate": 1.2651815248594368e-06, "loss": 0.3994, "step": 18912 }, { "epoch": 2.5291521797271996, "grad_norm": 1.4687007665634155, "learning_rate": 1.2644787181560826e-06, "loss": 0.3556, "step": 18913 }, { "epoch": 2.5292859053222787, "grad_norm": 1.4918192625045776, "learning_rate": 1.2637760935363053e-06, "loss": 0.3584, "step": 18914 }, { "epoch": 2.5294196309173573, "grad_norm": 1.6515318155288696, "learning_rate": 1.2630736510147569e-06, "loss": 0.3726, "step": 18915 }, { "epoch": 2.5295533565124364, "grad_norm": 1.7606096267700195, "learning_rate": 1.2623713906060798e-06, "loss": 0.3648, "step": 18916 }, { "epoch": 2.5296870821075155, "grad_norm": 1.505626916885376, "learning_rate": 1.261669312324908e-06, "loss": 0.3066, "step": 18917 }, { "epoch": 2.5298208077025945, "grad_norm": 1.4503101110458374, "learning_rate": 1.260967416185882e-06, "loss": 0.3279, "step": 18918 }, { "epoch": 2.529954533297673, "grad_norm": 1.7868506908416748, "learning_rate": 1.2602657022036224e-06, "loss": 0.3945, "step": 18919 }, { "epoch": 2.530088258892752, "grad_norm": 1.4775365591049194, "learning_rate": 1.2595641703927652e-06, "loss": 0.3394, "step": 18920 }, { "epoch": 2.530221984487831, "grad_norm": 1.7230604887008667, "learning_rate": 1.2588628207679276e-06, "loss": 0.4013, "step": 18921 }, { "epoch": 2.53035571008291, "grad_norm": 1.548416256904602, "learning_rate": 1.2581616533437279e-06, "loss": 0.3794, "step": 18922 }, { "epoch": 2.530489435677989, "grad_norm": 1.4889628887176514, "learning_rate": 1.2574606681347878e-06, "loss": 0.3246, "step": 18923 }, { "epoch": 2.5306231612730676, "grad_norm": 1.5073761940002441, "learning_rate": 1.25675986515571e-06, "loss": 0.3332, "step": 18924 }, { "epoch": 2.5307568868681467, "grad_norm": 1.572583556175232, "learning_rate": 1.2560592444211106e-06, "loss": 0.363, "step": 18925 }, { "epoch": 2.5308906124632253, "grad_norm": 1.5006012916564941, "learning_rate": 1.2553588059455878e-06, "loss": 0.3461, "step": 18926 }, { "epoch": 2.5310243380583044, "grad_norm": 1.8597067594528198, "learning_rate": 1.2546585497437425e-06, "loss": 0.3793, "step": 18927 }, { "epoch": 2.5311580636533835, "grad_norm": 1.4208486080169678, "learning_rate": 1.2539584758301704e-06, "loss": 0.36, "step": 18928 }, { "epoch": 2.531291789248462, "grad_norm": 1.5138018131256104, "learning_rate": 1.2532585842194656e-06, "loss": 0.3554, "step": 18929 }, { "epoch": 2.531425514843541, "grad_norm": 1.3781754970550537, "learning_rate": 1.2525588749262163e-06, "loss": 0.336, "step": 18930 }, { "epoch": 2.53155924043862, "grad_norm": 1.4748908281326294, "learning_rate": 1.2518593479650065e-06, "loss": 0.3259, "step": 18931 }, { "epoch": 2.531692966033699, "grad_norm": 1.6090593338012695, "learning_rate": 1.2511600033504178e-06, "loss": 0.39, "step": 18932 }, { "epoch": 2.531826691628778, "grad_norm": 1.5167192220687866, "learning_rate": 1.2504608410970264e-06, "loss": 0.358, "step": 18933 }, { "epoch": 2.5319604172238566, "grad_norm": 1.4974462985992432, "learning_rate": 1.2497618612194073e-06, "loss": 0.3648, "step": 18934 }, { "epoch": 2.5320941428189356, "grad_norm": 1.4638608694076538, "learning_rate": 1.2490630637321289e-06, "loss": 0.3383, "step": 18935 }, { "epoch": 2.5322278684140143, "grad_norm": 1.5389032363891602, "learning_rate": 1.248364448649757e-06, "loss": 0.3777, "step": 18936 }, { "epoch": 2.5323615940090933, "grad_norm": 1.6555815935134888, "learning_rate": 1.2476660159868559e-06, "loss": 0.4132, "step": 18937 }, { "epoch": 2.5324953196041724, "grad_norm": 1.5481048822402954, "learning_rate": 1.2469677657579771e-06, "loss": 0.3578, "step": 18938 }, { "epoch": 2.532629045199251, "grad_norm": 1.7124639749526978, "learning_rate": 1.2462696979776835e-06, "loss": 0.356, "step": 18939 }, { "epoch": 2.53276277079433, "grad_norm": 1.6271015405654907, "learning_rate": 1.2455718126605176e-06, "loss": 0.3931, "step": 18940 }, { "epoch": 2.5328964963894087, "grad_norm": 1.7026063203811646, "learning_rate": 1.2448741098210326e-06, "loss": 0.3803, "step": 18941 }, { "epoch": 2.533030221984488, "grad_norm": 1.7247742414474487, "learning_rate": 1.2441765894737711e-06, "loss": 0.4584, "step": 18942 }, { "epoch": 2.533163947579567, "grad_norm": 1.5899831056594849, "learning_rate": 1.243479251633266e-06, "loss": 0.352, "step": 18943 }, { "epoch": 2.5332976731746455, "grad_norm": 1.6082937717437744, "learning_rate": 1.2427820963140612e-06, "loss": 0.4004, "step": 18944 }, { "epoch": 2.5334313987697246, "grad_norm": 1.5501461029052734, "learning_rate": 1.2420851235306819e-06, "loss": 0.3835, "step": 18945 }, { "epoch": 2.533565124364803, "grad_norm": 1.5390478372573853, "learning_rate": 1.2413883332976573e-06, "loss": 0.3305, "step": 18946 }, { "epoch": 2.5336988499598823, "grad_norm": 1.4468762874603271, "learning_rate": 1.2406917256295115e-06, "loss": 0.3827, "step": 18947 }, { "epoch": 2.5338325755549613, "grad_norm": 1.4085780382156372, "learning_rate": 1.239995300540765e-06, "loss": 0.3413, "step": 18948 }, { "epoch": 2.53396630115004, "grad_norm": 1.6610180139541626, "learning_rate": 1.2392990580459351e-06, "loss": 0.4127, "step": 18949 }, { "epoch": 2.534100026745119, "grad_norm": 1.6348180770874023, "learning_rate": 1.2386029981595327e-06, "loss": 0.4361, "step": 18950 }, { "epoch": 2.5342337523401977, "grad_norm": 1.6586226224899292, "learning_rate": 1.2379071208960669e-06, "loss": 0.3813, "step": 18951 }, { "epoch": 2.5343674779352767, "grad_norm": 1.4290226697921753, "learning_rate": 1.2372114262700419e-06, "loss": 0.3266, "step": 18952 }, { "epoch": 2.534501203530356, "grad_norm": 1.4148601293563843, "learning_rate": 1.2365159142959604e-06, "loss": 0.31, "step": 18953 }, { "epoch": 2.534634929125435, "grad_norm": 1.4793347120285034, "learning_rate": 1.2358205849883197e-06, "loss": 0.3724, "step": 18954 }, { "epoch": 2.5347686547205135, "grad_norm": 1.5342814922332764, "learning_rate": 1.235125438361612e-06, "loss": 0.3367, "step": 18955 }, { "epoch": 2.5349023803155926, "grad_norm": 1.5601845979690552, "learning_rate": 1.234430474430327e-06, "loss": 0.3565, "step": 18956 }, { "epoch": 2.535036105910671, "grad_norm": 1.6269174814224243, "learning_rate": 1.2337356932089517e-06, "loss": 0.3976, "step": 18957 }, { "epoch": 2.5351698315057503, "grad_norm": 1.8737772703170776, "learning_rate": 1.2330410947119685e-06, "loss": 0.4239, "step": 18958 }, { "epoch": 2.5353035571008293, "grad_norm": 1.744576096534729, "learning_rate": 1.2323466789538508e-06, "loss": 0.3963, "step": 18959 }, { "epoch": 2.535437282695908, "grad_norm": 1.4829845428466797, "learning_rate": 1.2316524459490796e-06, "loss": 0.328, "step": 18960 }, { "epoch": 2.535571008290987, "grad_norm": 1.599000096321106, "learning_rate": 1.230958395712123e-06, "loss": 0.3597, "step": 18961 }, { "epoch": 2.5357047338860657, "grad_norm": 1.5227789878845215, "learning_rate": 1.2302645282574465e-06, "loss": 0.3609, "step": 18962 }, { "epoch": 2.5358384594811447, "grad_norm": 1.6379772424697876, "learning_rate": 1.2295708435995168e-06, "loss": 0.3806, "step": 18963 }, { "epoch": 2.535972185076224, "grad_norm": 1.5787181854248047, "learning_rate": 1.2288773417527866e-06, "loss": 0.3813, "step": 18964 }, { "epoch": 2.5361059106713024, "grad_norm": 1.4062182903289795, "learning_rate": 1.2281840227317187e-06, "loss": 0.3524, "step": 18965 }, { "epoch": 2.5362396362663815, "grad_norm": 1.653089165687561, "learning_rate": 1.2274908865507595e-06, "loss": 0.3671, "step": 18966 }, { "epoch": 2.53637336186146, "grad_norm": 1.4121057987213135, "learning_rate": 1.2267979332243552e-06, "loss": 0.3521, "step": 18967 }, { "epoch": 2.536507087456539, "grad_norm": 1.4362971782684326, "learning_rate": 1.2261051627669584e-06, "loss": 0.3375, "step": 18968 }, { "epoch": 2.5366408130516183, "grad_norm": 1.4840314388275146, "learning_rate": 1.2254125751929991e-06, "loss": 0.3247, "step": 18969 }, { "epoch": 2.536774538646697, "grad_norm": 1.5780587196350098, "learning_rate": 1.2247201705169232e-06, "loss": 0.3474, "step": 18970 }, { "epoch": 2.536908264241776, "grad_norm": 1.3934011459350586, "learning_rate": 1.2240279487531548e-06, "loss": 0.3254, "step": 18971 }, { "epoch": 2.5370419898368546, "grad_norm": 1.4963421821594238, "learning_rate": 1.2233359099161268e-06, "loss": 0.3708, "step": 18972 }, { "epoch": 2.5371757154319337, "grad_norm": 1.4356242418289185, "learning_rate": 1.2226440540202645e-06, "loss": 0.3597, "step": 18973 }, { "epoch": 2.5373094410270127, "grad_norm": 1.3616023063659668, "learning_rate": 1.221952381079986e-06, "loss": 0.2859, "step": 18974 }, { "epoch": 2.5374431666220914, "grad_norm": 1.6241912841796875, "learning_rate": 1.2212608911097123e-06, "loss": 0.3904, "step": 18975 }, { "epoch": 2.5375768922171704, "grad_norm": 1.471322774887085, "learning_rate": 1.220569584123854e-06, "loss": 0.3649, "step": 18976 }, { "epoch": 2.537710617812249, "grad_norm": 1.5435800552368164, "learning_rate": 1.2198784601368208e-06, "loss": 0.4084, "step": 18977 }, { "epoch": 2.537844343407328, "grad_norm": 1.509615421295166, "learning_rate": 1.2191875191630209e-06, "loss": 0.3581, "step": 18978 }, { "epoch": 2.537978069002407, "grad_norm": 1.3876394033432007, "learning_rate": 1.218496761216854e-06, "loss": 0.2991, "step": 18979 }, { "epoch": 2.538111794597486, "grad_norm": 1.5642766952514648, "learning_rate": 1.21780618631272e-06, "loss": 0.344, "step": 18980 }, { "epoch": 2.538245520192565, "grad_norm": 1.4800750017166138, "learning_rate": 1.2171157944650114e-06, "loss": 0.3215, "step": 18981 }, { "epoch": 2.5383792457876435, "grad_norm": 1.552872657775879, "learning_rate": 1.2164255856881224e-06, "loss": 0.3373, "step": 18982 }, { "epoch": 2.5385129713827226, "grad_norm": 1.4098650217056274, "learning_rate": 1.2157355599964326e-06, "loss": 0.3437, "step": 18983 }, { "epoch": 2.5386466969778017, "grad_norm": 1.435842514038086, "learning_rate": 1.2150457174043339e-06, "loss": 0.329, "step": 18984 }, { "epoch": 2.5387804225728807, "grad_norm": 1.518319010734558, "learning_rate": 1.214356057926197e-06, "loss": 0.3355, "step": 18985 }, { "epoch": 2.5389141481679594, "grad_norm": 1.756947636604309, "learning_rate": 1.2136665815764027e-06, "loss": 0.405, "step": 18986 }, { "epoch": 2.539047873763038, "grad_norm": 1.588113784790039, "learning_rate": 1.2129772883693236e-06, "loss": 0.3687, "step": 18987 }, { "epoch": 2.539181599358117, "grad_norm": 1.9040706157684326, "learning_rate": 1.2122881783193197e-06, "loss": 0.3649, "step": 18988 }, { "epoch": 2.539315324953196, "grad_norm": 1.4508939981460571, "learning_rate": 1.2115992514407637e-06, "loss": 0.3213, "step": 18989 }, { "epoch": 2.539449050548275, "grad_norm": 1.6112890243530273, "learning_rate": 1.210910507748011e-06, "loss": 0.3592, "step": 18990 }, { "epoch": 2.539582776143354, "grad_norm": 1.3934227228164673, "learning_rate": 1.2102219472554177e-06, "loss": 0.3655, "step": 18991 }, { "epoch": 2.539716501738433, "grad_norm": 1.629698395729065, "learning_rate": 1.209533569977337e-06, "loss": 0.3581, "step": 18992 }, { "epoch": 2.5398502273335115, "grad_norm": 1.5746179819107056, "learning_rate": 1.2088453759281172e-06, "loss": 0.3777, "step": 18993 }, { "epoch": 2.5399839529285906, "grad_norm": 1.5221848487854004, "learning_rate": 1.2081573651221036e-06, "loss": 0.371, "step": 18994 }, { "epoch": 2.5401176785236697, "grad_norm": 1.5939481258392334, "learning_rate": 1.2074695375736368e-06, "loss": 0.4167, "step": 18995 }, { "epoch": 2.5402514041187483, "grad_norm": 1.4628301858901978, "learning_rate": 1.2067818932970543e-06, "loss": 0.3531, "step": 18996 }, { "epoch": 2.5403851297138274, "grad_norm": 1.3332810401916504, "learning_rate": 1.2060944323066891e-06, "loss": 0.3306, "step": 18997 }, { "epoch": 2.540518855308906, "grad_norm": 1.4984132051467896, "learning_rate": 1.20540715461687e-06, "loss": 0.3814, "step": 18998 }, { "epoch": 2.540652580903985, "grad_norm": 1.6222798824310303, "learning_rate": 1.204720060241924e-06, "loss": 0.3567, "step": 18999 }, { "epoch": 2.540786306499064, "grad_norm": 1.604555368423462, "learning_rate": 1.204033149196171e-06, "loss": 0.3778, "step": 19000 }, { "epoch": 2.5409200320941427, "grad_norm": 1.564584732055664, "learning_rate": 1.2033464214939317e-06, "loss": 0.3868, "step": 19001 }, { "epoch": 2.541053757689222, "grad_norm": 1.417330265045166, "learning_rate": 1.2026598771495167e-06, "loss": 0.3731, "step": 19002 }, { "epoch": 2.5411874832843004, "grad_norm": 1.4878209829330444, "learning_rate": 1.2019735161772429e-06, "loss": 0.3173, "step": 19003 }, { "epoch": 2.5413212088793795, "grad_norm": 1.6852128505706787, "learning_rate": 1.201287338591407e-06, "loss": 0.3695, "step": 19004 }, { "epoch": 2.5414549344744586, "grad_norm": 1.5643755197525024, "learning_rate": 1.2006013444063192e-06, "loss": 0.3505, "step": 19005 }, { "epoch": 2.541588660069537, "grad_norm": 1.5982167720794678, "learning_rate": 1.1999155336362779e-06, "loss": 0.435, "step": 19006 }, { "epoch": 2.5417223856646163, "grad_norm": 1.6048541069030762, "learning_rate": 1.1992299062955725e-06, "loss": 0.3804, "step": 19007 }, { "epoch": 2.541856111259695, "grad_norm": 1.4650464057922363, "learning_rate": 1.1985444623985031e-06, "loss": 0.3472, "step": 19008 }, { "epoch": 2.541989836854774, "grad_norm": 1.5495638847351074, "learning_rate": 1.1978592019593482e-06, "loss": 0.3544, "step": 19009 }, { "epoch": 2.542123562449853, "grad_norm": 1.5978690385818481, "learning_rate": 1.1971741249923985e-06, "loss": 0.3652, "step": 19010 }, { "epoch": 2.5422572880449317, "grad_norm": 1.6080735921859741, "learning_rate": 1.1964892315119292e-06, "loss": 0.3367, "step": 19011 }, { "epoch": 2.5423910136400107, "grad_norm": 1.582105040550232, "learning_rate": 1.195804521532219e-06, "loss": 0.3773, "step": 19012 }, { "epoch": 2.5425247392350894, "grad_norm": 1.6652531623840332, "learning_rate": 1.1951199950675373e-06, "loss": 0.3642, "step": 19013 }, { "epoch": 2.5426584648301684, "grad_norm": 1.5084993839263916, "learning_rate": 1.1944356521321542e-06, "loss": 0.3733, "step": 19014 }, { "epoch": 2.5427921904252475, "grad_norm": 1.4647800922393799, "learning_rate": 1.1937514927403349e-06, "loss": 0.3416, "step": 19015 }, { "epoch": 2.542925916020326, "grad_norm": 1.4628969430923462, "learning_rate": 1.1930675169063388e-06, "loss": 0.3275, "step": 19016 }, { "epoch": 2.543059641615405, "grad_norm": 1.5793293714523315, "learning_rate": 1.1923837246444225e-06, "loss": 0.3856, "step": 19017 }, { "epoch": 2.543193367210484, "grad_norm": 1.590939998626709, "learning_rate": 1.191700115968839e-06, "loss": 0.3654, "step": 19018 }, { "epoch": 2.543327092805563, "grad_norm": 1.5299972295761108, "learning_rate": 1.1910166908938392e-06, "loss": 0.3644, "step": 19019 }, { "epoch": 2.543460818400642, "grad_norm": 1.5773290395736694, "learning_rate": 1.190333449433666e-06, "loss": 0.3528, "step": 19020 }, { "epoch": 2.543594543995721, "grad_norm": 1.285900354385376, "learning_rate": 1.1896503916025627e-06, "loss": 0.3576, "step": 19021 }, { "epoch": 2.5437282695907997, "grad_norm": 1.4042330980300903, "learning_rate": 1.1889675174147685e-06, "loss": 0.3465, "step": 19022 }, { "epoch": 2.5438619951858783, "grad_norm": 1.5448381900787354, "learning_rate": 1.1882848268845115e-06, "loss": 0.3592, "step": 19023 }, { "epoch": 2.5439957207809574, "grad_norm": 1.4879765510559082, "learning_rate": 1.1876023200260268e-06, "loss": 0.3461, "step": 19024 }, { "epoch": 2.5441294463760364, "grad_norm": 1.812675952911377, "learning_rate": 1.1869199968535394e-06, "loss": 0.4068, "step": 19025 }, { "epoch": 2.5442631719711155, "grad_norm": 1.5145901441574097, "learning_rate": 1.1862378573812715e-06, "loss": 0.3704, "step": 19026 }, { "epoch": 2.544396897566194, "grad_norm": 1.530989646911621, "learning_rate": 1.185555901623443e-06, "loss": 0.3292, "step": 19027 }, { "epoch": 2.544530623161273, "grad_norm": 1.4953234195709229, "learning_rate": 1.1848741295942634e-06, "loss": 0.3569, "step": 19028 }, { "epoch": 2.544664348756352, "grad_norm": 1.5294607877731323, "learning_rate": 1.1841925413079526e-06, "loss": 0.3689, "step": 19029 }, { "epoch": 2.544798074351431, "grad_norm": 1.5358182191848755, "learning_rate": 1.1835111367787089e-06, "loss": 0.3674, "step": 19030 }, { "epoch": 2.54493179994651, "grad_norm": 1.594738245010376, "learning_rate": 1.18282991602074e-06, "loss": 0.3927, "step": 19031 }, { "epoch": 2.5450655255415886, "grad_norm": 1.5686489343643188, "learning_rate": 1.1821488790482439e-06, "loss": 0.3761, "step": 19032 }, { "epoch": 2.5451992511366677, "grad_norm": 1.6287424564361572, "learning_rate": 1.181468025875415e-06, "loss": 0.409, "step": 19033 }, { "epoch": 2.5453329767317463, "grad_norm": 1.470693826675415, "learning_rate": 1.1807873565164507e-06, "loss": 0.3646, "step": 19034 }, { "epoch": 2.5454667023268254, "grad_norm": 1.3940705060958862, "learning_rate": 1.1801068709855324e-06, "loss": 0.326, "step": 19035 }, { "epoch": 2.5456004279219044, "grad_norm": 1.5084551572799683, "learning_rate": 1.1794265692968476e-06, "loss": 0.3577, "step": 19036 }, { "epoch": 2.545734153516983, "grad_norm": 1.598075270652771, "learning_rate": 1.1787464514645752e-06, "loss": 0.3809, "step": 19037 }, { "epoch": 2.545867879112062, "grad_norm": 1.6303542852401733, "learning_rate": 1.1780665175028915e-06, "loss": 0.3902, "step": 19038 }, { "epoch": 2.5460016047071408, "grad_norm": 1.535286784172058, "learning_rate": 1.1773867674259698e-06, "loss": 0.35, "step": 19039 }, { "epoch": 2.54613533030222, "grad_norm": 1.5834107398986816, "learning_rate": 1.1767072012479785e-06, "loss": 0.3597, "step": 19040 }, { "epoch": 2.546269055897299, "grad_norm": 1.6079164743423462, "learning_rate": 1.1760278189830831e-06, "loss": 0.379, "step": 19041 }, { "epoch": 2.5464027814923775, "grad_norm": 1.6829663515090942, "learning_rate": 1.1753486206454433e-06, "loss": 0.385, "step": 19042 }, { "epoch": 2.5465365070874566, "grad_norm": 1.3769675493240356, "learning_rate": 1.174669606249218e-06, "loss": 0.3235, "step": 19043 }, { "epoch": 2.5466702326825352, "grad_norm": 1.6818662881851196, "learning_rate": 1.17399077580856e-06, "loss": 0.4019, "step": 19044 }, { "epoch": 2.5468039582776143, "grad_norm": 1.6505677700042725, "learning_rate": 1.1733121293376181e-06, "loss": 0.3885, "step": 19045 }, { "epoch": 2.5469376838726934, "grad_norm": 1.627119541168213, "learning_rate": 1.172633666850539e-06, "loss": 0.3704, "step": 19046 }, { "epoch": 2.547071409467772, "grad_norm": 1.6951234340667725, "learning_rate": 1.1719553883614642e-06, "loss": 0.4095, "step": 19047 }, { "epoch": 2.547205135062851, "grad_norm": 1.491919994354248, "learning_rate": 1.171277293884534e-06, "loss": 0.3072, "step": 19048 }, { "epoch": 2.5473388606579297, "grad_norm": 1.7445472478866577, "learning_rate": 1.1705993834338757e-06, "loss": 0.3841, "step": 19049 }, { "epoch": 2.5474725862530088, "grad_norm": 1.4695738554000854, "learning_rate": 1.1699216570236294e-06, "loss": 0.3486, "step": 19050 }, { "epoch": 2.547606311848088, "grad_norm": 1.5522472858428955, "learning_rate": 1.1692441146679135e-06, "loss": 0.3586, "step": 19051 }, { "epoch": 2.5477400374431665, "grad_norm": 1.6767175197601318, "learning_rate": 1.1685667563808534e-06, "loss": 0.3937, "step": 19052 }, { "epoch": 2.5478737630382455, "grad_norm": 1.6503719091415405, "learning_rate": 1.1678895821765712e-06, "loss": 0.3713, "step": 19053 }, { "epoch": 2.548007488633324, "grad_norm": 1.4516584873199463, "learning_rate": 1.1672125920691757e-06, "loss": 0.3538, "step": 19054 }, { "epoch": 2.5481412142284032, "grad_norm": 1.6738231182098389, "learning_rate": 1.1665357860727855e-06, "loss": 0.3831, "step": 19055 }, { "epoch": 2.5482749398234823, "grad_norm": 1.8212946653366089, "learning_rate": 1.1658591642015026e-06, "loss": 0.4117, "step": 19056 }, { "epoch": 2.5484086654185614, "grad_norm": 1.4859428405761719, "learning_rate": 1.1651827264694315e-06, "loss": 0.4018, "step": 19057 }, { "epoch": 2.54854239101364, "grad_norm": 1.5382407903671265, "learning_rate": 1.164506472890673e-06, "loss": 0.3891, "step": 19058 }, { "epoch": 2.548676116608719, "grad_norm": 1.5199946165084839, "learning_rate": 1.1638304034793224e-06, "loss": 0.3485, "step": 19059 }, { "epoch": 2.5488098422037977, "grad_norm": 1.433212161064148, "learning_rate": 1.1631545182494719e-06, "loss": 0.3533, "step": 19060 }, { "epoch": 2.5489435677988768, "grad_norm": 1.6004211902618408, "learning_rate": 1.162478817215209e-06, "loss": 0.3581, "step": 19061 }, { "epoch": 2.549077293393956, "grad_norm": 1.4585224390029907, "learning_rate": 1.161803300390618e-06, "loss": 0.3561, "step": 19062 }, { "epoch": 2.5492110189890345, "grad_norm": 1.3578685522079468, "learning_rate": 1.1611279677897813e-06, "loss": 0.3409, "step": 19063 }, { "epoch": 2.5493447445841135, "grad_norm": 1.5924443006515503, "learning_rate": 1.160452819426774e-06, "loss": 0.3909, "step": 19064 }, { "epoch": 2.549478470179192, "grad_norm": 1.5349609851837158, "learning_rate": 1.159777855315668e-06, "loss": 0.3715, "step": 19065 }, { "epoch": 2.5496121957742712, "grad_norm": 1.6952784061431885, "learning_rate": 1.1591030754705345e-06, "loss": 0.3803, "step": 19066 }, { "epoch": 2.5497459213693503, "grad_norm": 1.4711525440216064, "learning_rate": 1.1584284799054391e-06, "loss": 0.378, "step": 19067 }, { "epoch": 2.549879646964429, "grad_norm": 1.5080596208572388, "learning_rate": 1.157754068634438e-06, "loss": 0.3304, "step": 19068 }, { "epoch": 2.550013372559508, "grad_norm": 1.449229121208191, "learning_rate": 1.1570798416715933e-06, "loss": 0.3541, "step": 19069 }, { "epoch": 2.5501470981545866, "grad_norm": 1.7168112993240356, "learning_rate": 1.1564057990309584e-06, "loss": 0.4422, "step": 19070 }, { "epoch": 2.5502808237496657, "grad_norm": 1.5289077758789062, "learning_rate": 1.1557319407265821e-06, "loss": 0.3809, "step": 19071 }, { "epoch": 2.5504145493447448, "grad_norm": 1.6102027893066406, "learning_rate": 1.155058266772513e-06, "loss": 0.3755, "step": 19072 }, { "epoch": 2.5505482749398234, "grad_norm": 1.6097593307495117, "learning_rate": 1.1543847771827853e-06, "loss": 0.3363, "step": 19073 }, { "epoch": 2.5506820005349025, "grad_norm": 1.6595336198806763, "learning_rate": 1.1537114719714482e-06, "loss": 0.381, "step": 19074 }, { "epoch": 2.550815726129981, "grad_norm": 1.6471478939056396, "learning_rate": 1.1530383511525268e-06, "loss": 0.4048, "step": 19075 }, { "epoch": 2.55094945172506, "grad_norm": 1.451575517654419, "learning_rate": 1.1523654147400566e-06, "loss": 0.3492, "step": 19076 }, { "epoch": 2.5510831773201392, "grad_norm": 1.4595975875854492, "learning_rate": 1.1516926627480628e-06, "loss": 0.3287, "step": 19077 }, { "epoch": 2.551216902915218, "grad_norm": 1.4831385612487793, "learning_rate": 1.151020095190566e-06, "loss": 0.3228, "step": 19078 }, { "epoch": 2.551350628510297, "grad_norm": 1.6122426986694336, "learning_rate": 1.150347712081592e-06, "loss": 0.4014, "step": 19079 }, { "epoch": 2.5514843541053756, "grad_norm": 1.5214954614639282, "learning_rate": 1.14967551343515e-06, "loss": 0.3469, "step": 19080 }, { "epoch": 2.5516180797004546, "grad_norm": 1.6415059566497803, "learning_rate": 1.1490034992652533e-06, "loss": 0.3314, "step": 19081 }, { "epoch": 2.5517518052955337, "grad_norm": 1.386521339416504, "learning_rate": 1.1483316695859082e-06, "loss": 0.3549, "step": 19082 }, { "epoch": 2.5518855308906123, "grad_norm": 1.6972733736038208, "learning_rate": 1.1476600244111202e-06, "loss": 0.4035, "step": 19083 }, { "epoch": 2.5520192564856914, "grad_norm": 1.5155812501907349, "learning_rate": 1.1469885637548873e-06, "loss": 0.3152, "step": 19084 }, { "epoch": 2.55215298208077, "grad_norm": 1.4989207983016968, "learning_rate": 1.146317287631208e-06, "loss": 0.3339, "step": 19085 }, { "epoch": 2.552286707675849, "grad_norm": 1.5697849988937378, "learning_rate": 1.145646196054071e-06, "loss": 0.3119, "step": 19086 }, { "epoch": 2.552420433270928, "grad_norm": 1.791693091392517, "learning_rate": 1.1449752890374677e-06, "loss": 0.441, "step": 19087 }, { "epoch": 2.5525541588660072, "grad_norm": 1.517799735069275, "learning_rate": 1.14430456659538e-06, "loss": 0.3787, "step": 19088 }, { "epoch": 2.552687884461086, "grad_norm": 1.5205950736999512, "learning_rate": 1.14363402874179e-06, "loss": 0.3498, "step": 19089 }, { "epoch": 2.5528216100561645, "grad_norm": 1.4477252960205078, "learning_rate": 1.1429636754906747e-06, "loss": 0.339, "step": 19090 }, { "epoch": 2.5529553356512436, "grad_norm": 1.6501531600952148, "learning_rate": 1.1422935068560081e-06, "loss": 0.4066, "step": 19091 }, { "epoch": 2.5530890612463226, "grad_norm": 1.5092990398406982, "learning_rate": 1.1416235228517537e-06, "loss": 0.3803, "step": 19092 }, { "epoch": 2.5532227868414017, "grad_norm": 1.5319325923919678, "learning_rate": 1.1409537234918832e-06, "loss": 0.3699, "step": 19093 }, { "epoch": 2.5533565124364803, "grad_norm": 1.6036040782928467, "learning_rate": 1.1402841087903515e-06, "loss": 0.3782, "step": 19094 }, { "epoch": 2.5534902380315594, "grad_norm": 1.5045545101165771, "learning_rate": 1.1396146787611251e-06, "loss": 0.3831, "step": 19095 }, { "epoch": 2.553623963626638, "grad_norm": 1.7087947130203247, "learning_rate": 1.1389454334181494e-06, "loss": 0.3586, "step": 19096 }, { "epoch": 2.553757689221717, "grad_norm": 1.351144790649414, "learning_rate": 1.1382763727753742e-06, "loss": 0.3244, "step": 19097 }, { "epoch": 2.553891414816796, "grad_norm": 1.3674721717834473, "learning_rate": 1.1376074968467532e-06, "loss": 0.3304, "step": 19098 }, { "epoch": 2.554025140411875, "grad_norm": 1.7420861721038818, "learning_rate": 1.1369388056462217e-06, "loss": 0.4071, "step": 19099 }, { "epoch": 2.554158866006954, "grad_norm": 1.582253098487854, "learning_rate": 1.1362702991877184e-06, "loss": 0.3734, "step": 19100 }, { "epoch": 2.5542925916020325, "grad_norm": 1.429957389831543, "learning_rate": 1.13560197748518e-06, "loss": 0.3407, "step": 19101 }, { "epoch": 2.5544263171971116, "grad_norm": 1.4914565086364746, "learning_rate": 1.1349338405525368e-06, "loss": 0.3846, "step": 19102 }, { "epoch": 2.5545600427921906, "grad_norm": 1.6115014553070068, "learning_rate": 1.134265888403714e-06, "loss": 0.3369, "step": 19103 }, { "epoch": 2.5546937683872692, "grad_norm": 1.5044143199920654, "learning_rate": 1.1335981210526347e-06, "loss": 0.3493, "step": 19104 }, { "epoch": 2.5548274939823483, "grad_norm": 1.549876093864441, "learning_rate": 1.1329305385132194e-06, "loss": 0.3575, "step": 19105 }, { "epoch": 2.554961219577427, "grad_norm": 1.6749743223190308, "learning_rate": 1.132263140799381e-06, "loss": 0.3801, "step": 19106 }, { "epoch": 2.555094945172506, "grad_norm": 1.3045932054519653, "learning_rate": 1.1315959279250333e-06, "loss": 0.2883, "step": 19107 }, { "epoch": 2.555228670767585, "grad_norm": 1.3580830097198486, "learning_rate": 1.1309288999040812e-06, "loss": 0.296, "step": 19108 }, { "epoch": 2.5553623963626637, "grad_norm": 1.421675205230713, "learning_rate": 1.1302620567504297e-06, "loss": 0.3445, "step": 19109 }, { "epoch": 2.555496121957743, "grad_norm": 1.6112197637557983, "learning_rate": 1.1295953984779783e-06, "loss": 0.3436, "step": 19110 }, { "epoch": 2.5556298475528214, "grad_norm": 1.59079110622406, "learning_rate": 1.128928925100623e-06, "loss": 0.4154, "step": 19111 }, { "epoch": 2.5557635731479005, "grad_norm": 1.6369214057922363, "learning_rate": 1.1282626366322568e-06, "loss": 0.4133, "step": 19112 }, { "epoch": 2.5558972987429796, "grad_norm": 1.4973527193069458, "learning_rate": 1.1275965330867633e-06, "loss": 0.3546, "step": 19113 }, { "epoch": 2.556031024338058, "grad_norm": 1.7654200792312622, "learning_rate": 1.1269306144780335e-06, "loss": 0.4161, "step": 19114 }, { "epoch": 2.5561647499331372, "grad_norm": 1.4165576696395874, "learning_rate": 1.1262648808199427e-06, "loss": 0.3469, "step": 19115 }, { "epoch": 2.556298475528216, "grad_norm": 1.8297301530838013, "learning_rate": 1.125599332126368e-06, "loss": 0.4043, "step": 19116 }, { "epoch": 2.556432201123295, "grad_norm": 1.7113380432128906, "learning_rate": 1.124933968411187e-06, "loss": 0.3811, "step": 19117 }, { "epoch": 2.556565926718374, "grad_norm": 1.754014492034912, "learning_rate": 1.1242687896882597e-06, "loss": 0.3749, "step": 19118 }, { "epoch": 2.5566996523134526, "grad_norm": 1.5587259531021118, "learning_rate": 1.123603795971462e-06, "loss": 0.3503, "step": 19119 }, { "epoch": 2.5568333779085317, "grad_norm": 1.4812922477722168, "learning_rate": 1.1229389872746466e-06, "loss": 0.3503, "step": 19120 }, { "epoch": 2.5569671035036103, "grad_norm": 1.5797063112258911, "learning_rate": 1.122274363611674e-06, "loss": 0.3954, "step": 19121 }, { "epoch": 2.5571008290986894, "grad_norm": 1.5816878080368042, "learning_rate": 1.1216099249963964e-06, "loss": 0.347, "step": 19122 }, { "epoch": 2.5572345546937685, "grad_norm": 1.581769347190857, "learning_rate": 1.1209456714426625e-06, "loss": 0.3855, "step": 19123 }, { "epoch": 2.5573682802888476, "grad_norm": 1.5351593494415283, "learning_rate": 1.1202816029643238e-06, "loss": 0.4039, "step": 19124 }, { "epoch": 2.557502005883926, "grad_norm": 1.53022038936615, "learning_rate": 1.1196177195752167e-06, "loss": 0.3267, "step": 19125 }, { "epoch": 2.5576357314790052, "grad_norm": 1.4082410335540771, "learning_rate": 1.1189540212891791e-06, "loss": 0.3789, "step": 19126 }, { "epoch": 2.557769457074084, "grad_norm": 1.5588889122009277, "learning_rate": 1.118290508120048e-06, "loss": 0.3596, "step": 19127 }, { "epoch": 2.557903182669163, "grad_norm": 1.6179125308990479, "learning_rate": 1.117627180081653e-06, "loss": 0.3641, "step": 19128 }, { "epoch": 2.558036908264242, "grad_norm": 1.6267465353012085, "learning_rate": 1.1169640371878187e-06, "loss": 0.4023, "step": 19129 }, { "epoch": 2.5581706338593206, "grad_norm": 1.5655436515808105, "learning_rate": 1.1163010794523688e-06, "loss": 0.3959, "step": 19130 }, { "epoch": 2.5583043594543997, "grad_norm": 1.5150017738342285, "learning_rate": 1.115638306889123e-06, "loss": 0.3219, "step": 19131 }, { "epoch": 2.5584380850494783, "grad_norm": 1.7818816900253296, "learning_rate": 1.1149757195118949e-06, "loss": 0.3816, "step": 19132 }, { "epoch": 2.5585718106445574, "grad_norm": 1.7988239526748657, "learning_rate": 1.1143133173344978e-06, "loss": 0.3988, "step": 19133 }, { "epoch": 2.5587055362396365, "grad_norm": 1.5477315187454224, "learning_rate": 1.1136511003707329e-06, "loss": 0.3803, "step": 19134 }, { "epoch": 2.558839261834715, "grad_norm": 1.5614964962005615, "learning_rate": 1.1129890686344092e-06, "loss": 0.3609, "step": 19135 }, { "epoch": 2.558972987429794, "grad_norm": 1.4887311458587646, "learning_rate": 1.1123272221393267e-06, "loss": 0.334, "step": 19136 }, { "epoch": 2.559106713024873, "grad_norm": 1.714962363243103, "learning_rate": 1.1116655608992744e-06, "loss": 0.3704, "step": 19137 }, { "epoch": 2.559240438619952, "grad_norm": 1.7623990774154663, "learning_rate": 1.1110040849280534e-06, "loss": 0.4076, "step": 19138 }, { "epoch": 2.559374164215031, "grad_norm": 1.4858635663986206, "learning_rate": 1.1103427942394418e-06, "loss": 0.3273, "step": 19139 }, { "epoch": 2.5595078898101096, "grad_norm": 1.6777796745300293, "learning_rate": 1.1096816888472318e-06, "loss": 0.4072, "step": 19140 }, { "epoch": 2.5596416154051886, "grad_norm": 1.445453405380249, "learning_rate": 1.1090207687651978e-06, "loss": 0.3583, "step": 19141 }, { "epoch": 2.5597753410002673, "grad_norm": 1.5060611963272095, "learning_rate": 1.1083600340071165e-06, "loss": 0.3485, "step": 19142 }, { "epoch": 2.5599090665953463, "grad_norm": 1.6145423650741577, "learning_rate": 1.1076994845867662e-06, "loss": 0.3523, "step": 19143 }, { "epoch": 2.5600427921904254, "grad_norm": 1.5225163698196411, "learning_rate": 1.1070391205179087e-06, "loss": 0.3715, "step": 19144 }, { "epoch": 2.560176517785504, "grad_norm": 1.3707412481307983, "learning_rate": 1.106378941814311e-06, "loss": 0.3391, "step": 19145 }, { "epoch": 2.560310243380583, "grad_norm": 1.4909464120864868, "learning_rate": 1.1057189484897335e-06, "loss": 0.3075, "step": 19146 }, { "epoch": 2.5604439689756617, "grad_norm": 1.4648799896240234, "learning_rate": 1.1050591405579347e-06, "loss": 0.339, "step": 19147 }, { "epoch": 2.560577694570741, "grad_norm": 1.3774727582931519, "learning_rate": 1.1043995180326662e-06, "loss": 0.3302, "step": 19148 }, { "epoch": 2.56071142016582, "grad_norm": 1.316407322883606, "learning_rate": 1.1037400809276777e-06, "loss": 0.3194, "step": 19149 }, { "epoch": 2.5608451457608985, "grad_norm": 1.5402101278305054, "learning_rate": 1.1030808292567142e-06, "loss": 0.3985, "step": 19150 }, { "epoch": 2.5609788713559776, "grad_norm": 1.5872464179992676, "learning_rate": 1.1024217630335165e-06, "loss": 0.3823, "step": 19151 }, { "epoch": 2.561112596951056, "grad_norm": 1.5644649267196655, "learning_rate": 1.1017628822718262e-06, "loss": 0.3546, "step": 19152 }, { "epoch": 2.5612463225461353, "grad_norm": 1.5004148483276367, "learning_rate": 1.10110418698537e-06, "loss": 0.3656, "step": 19153 }, { "epoch": 2.5613800481412143, "grad_norm": 1.6251713037490845, "learning_rate": 1.1004456771878836e-06, "loss": 0.3429, "step": 19154 }, { "epoch": 2.561513773736293, "grad_norm": 1.6321078538894653, "learning_rate": 1.0997873528930903e-06, "loss": 0.3941, "step": 19155 }, { "epoch": 2.561647499331372, "grad_norm": 1.5436561107635498, "learning_rate": 1.0991292141147135e-06, "loss": 0.3484, "step": 19156 }, { "epoch": 2.5617812249264507, "grad_norm": 1.4110045433044434, "learning_rate": 1.098471260866474e-06, "loss": 0.3326, "step": 19157 }, { "epoch": 2.5619149505215297, "grad_norm": 1.5870317220687866, "learning_rate": 1.0978134931620787e-06, "loss": 0.4444, "step": 19158 }, { "epoch": 2.562048676116609, "grad_norm": 1.471293330192566, "learning_rate": 1.0971559110152463e-06, "loss": 0.3646, "step": 19159 }, { "epoch": 2.562182401711688, "grad_norm": 1.4929289817810059, "learning_rate": 1.0964985144396778e-06, "loss": 0.3497, "step": 19160 }, { "epoch": 2.5623161273067665, "grad_norm": 1.4330967664718628, "learning_rate": 1.0958413034490757e-06, "loss": 0.3418, "step": 19161 }, { "epoch": 2.5624498529018456, "grad_norm": 1.6930015087127686, "learning_rate": 1.0951842780571464e-06, "loss": 0.4011, "step": 19162 }, { "epoch": 2.562583578496924, "grad_norm": 1.5958704948425293, "learning_rate": 1.094527438277575e-06, "loss": 0.3429, "step": 19163 }, { "epoch": 2.5627173040920033, "grad_norm": 1.4633170366287231, "learning_rate": 1.0938707841240614e-06, "loss": 0.3438, "step": 19164 }, { "epoch": 2.5628510296870823, "grad_norm": 1.6038436889648438, "learning_rate": 1.093214315610287e-06, "loss": 0.3628, "step": 19165 }, { "epoch": 2.562984755282161, "grad_norm": 1.4914264678955078, "learning_rate": 1.0925580327499386e-06, "loss": 0.3375, "step": 19166 }, { "epoch": 2.56311848087724, "grad_norm": 1.5384317636489868, "learning_rate": 1.091901935556693e-06, "loss": 0.3576, "step": 19167 }, { "epoch": 2.5632522064723187, "grad_norm": 1.7759722471237183, "learning_rate": 1.091246024044228e-06, "loss": 0.364, "step": 19168 }, { "epoch": 2.5633859320673977, "grad_norm": 1.452349066734314, "learning_rate": 1.0905902982262151e-06, "loss": 0.3705, "step": 19169 }, { "epoch": 2.563519657662477, "grad_norm": 1.485260009765625, "learning_rate": 1.0899347581163222e-06, "loss": 0.3455, "step": 19170 }, { "epoch": 2.5636533832575554, "grad_norm": 1.620936632156372, "learning_rate": 1.0892794037282129e-06, "loss": 0.3588, "step": 19171 }, { "epoch": 2.5637871088526345, "grad_norm": 1.6014331579208374, "learning_rate": 1.088624235075547e-06, "loss": 0.3697, "step": 19172 }, { "epoch": 2.563920834447713, "grad_norm": 1.76543128490448, "learning_rate": 1.0879692521719831e-06, "loss": 0.4002, "step": 19173 }, { "epoch": 2.564054560042792, "grad_norm": 1.4574775695800781, "learning_rate": 1.087314455031172e-06, "loss": 0.3394, "step": 19174 }, { "epoch": 2.5641882856378713, "grad_norm": 1.4823658466339111, "learning_rate": 1.086659843666762e-06, "loss": 0.3485, "step": 19175 }, { "epoch": 2.56432201123295, "grad_norm": 1.6581299304962158, "learning_rate": 1.0860054180924007e-06, "loss": 0.3485, "step": 19176 }, { "epoch": 2.564455736828029, "grad_norm": 1.4035308361053467, "learning_rate": 1.085351178321722e-06, "loss": 0.3498, "step": 19177 }, { "epoch": 2.5645894624231076, "grad_norm": 1.621748447418213, "learning_rate": 1.0846971243683724e-06, "loss": 0.3624, "step": 19178 }, { "epoch": 2.5647231880181867, "grad_norm": 1.6111441850662231, "learning_rate": 1.0840432562459757e-06, "loss": 0.3597, "step": 19179 }, { "epoch": 2.5648569136132657, "grad_norm": 1.45008385181427, "learning_rate": 1.0833895739681689e-06, "loss": 0.3856, "step": 19180 }, { "epoch": 2.5649906392083444, "grad_norm": 1.4364136457443237, "learning_rate": 1.082736077548575e-06, "loss": 0.3314, "step": 19181 }, { "epoch": 2.5651243648034234, "grad_norm": 1.6160213947296143, "learning_rate": 1.0820827670008104e-06, "loss": 0.3955, "step": 19182 }, { "epoch": 2.565258090398502, "grad_norm": 1.5793753862380981, "learning_rate": 1.0814296423385018e-06, "loss": 0.3903, "step": 19183 }, { "epoch": 2.565391815993581, "grad_norm": 1.463008165359497, "learning_rate": 1.0807767035752558e-06, "loss": 0.353, "step": 19184 }, { "epoch": 2.56552554158866, "grad_norm": 1.5994659662246704, "learning_rate": 1.0801239507246853e-06, "loss": 0.3422, "step": 19185 }, { "epoch": 2.565659267183739, "grad_norm": 1.5649604797363281, "learning_rate": 1.0794713838003945e-06, "loss": 0.3844, "step": 19186 }, { "epoch": 2.565792992778818, "grad_norm": 1.5192291736602783, "learning_rate": 1.078819002815986e-06, "loss": 0.3539, "step": 19187 }, { "epoch": 2.5659267183738965, "grad_norm": 1.4657697677612305, "learning_rate": 1.0781668077850616e-06, "loss": 0.3474, "step": 19188 }, { "epoch": 2.5660604439689756, "grad_norm": 1.554478406906128, "learning_rate": 1.0775147987212108e-06, "loss": 0.3581, "step": 19189 }, { "epoch": 2.5661941695640547, "grad_norm": 1.447070598602295, "learning_rate": 1.0768629756380266e-06, "loss": 0.3359, "step": 19190 }, { "epoch": 2.5663278951591337, "grad_norm": 1.6692407131195068, "learning_rate": 1.0762113385490957e-06, "loss": 0.3828, "step": 19191 }, { "epoch": 2.5664616207542124, "grad_norm": 1.3471965789794922, "learning_rate": 1.0755598874679995e-06, "loss": 0.3033, "step": 19192 }, { "epoch": 2.566595346349291, "grad_norm": 1.6716856956481934, "learning_rate": 1.0749086224083184e-06, "loss": 0.4058, "step": 19193 }, { "epoch": 2.56672907194437, "grad_norm": 1.5668190717697144, "learning_rate": 1.0742575433836255e-06, "loss": 0.3744, "step": 19194 }, { "epoch": 2.566862797539449, "grad_norm": 1.7902649641036987, "learning_rate": 1.0736066504074937e-06, "loss": 0.4168, "step": 19195 }, { "epoch": 2.566996523134528, "grad_norm": 1.5918445587158203, "learning_rate": 1.07295594349349e-06, "loss": 0.382, "step": 19196 }, { "epoch": 2.567130248729607, "grad_norm": 1.5891847610473633, "learning_rate": 1.0723054226551798e-06, "loss": 0.3466, "step": 19197 }, { "epoch": 2.567263974324686, "grad_norm": 1.5330432653427124, "learning_rate": 1.0716550879061148e-06, "loss": 0.3984, "step": 19198 }, { "epoch": 2.5673976999197645, "grad_norm": 1.5324773788452148, "learning_rate": 1.0710049392598587e-06, "loss": 0.4194, "step": 19199 }, { "epoch": 2.5675314255148436, "grad_norm": 1.5851337909698486, "learning_rate": 1.0703549767299625e-06, "loss": 0.3839, "step": 19200 }, { "epoch": 2.5676651511099227, "grad_norm": 1.4476577043533325, "learning_rate": 1.069705200329969e-06, "loss": 0.3812, "step": 19201 }, { "epoch": 2.5677988767050013, "grad_norm": 1.4401919841766357, "learning_rate": 1.0690556100734284e-06, "loss": 0.3221, "step": 19202 }, { "epoch": 2.5679326023000804, "grad_norm": 1.5651764869689941, "learning_rate": 1.0684062059738731e-06, "loss": 0.3895, "step": 19203 }, { "epoch": 2.568066327895159, "grad_norm": 1.3623939752578735, "learning_rate": 1.0677569880448479e-06, "loss": 0.3441, "step": 19204 }, { "epoch": 2.568200053490238, "grad_norm": 1.6447831392288208, "learning_rate": 1.06710795629988e-06, "loss": 0.3829, "step": 19205 }, { "epoch": 2.568333779085317, "grad_norm": 1.437551498413086, "learning_rate": 1.0664591107524958e-06, "loss": 0.3353, "step": 19206 }, { "epoch": 2.5684675046803958, "grad_norm": 1.3475244045257568, "learning_rate": 1.0658104514162281e-06, "loss": 0.3741, "step": 19207 }, { "epoch": 2.568601230275475, "grad_norm": 1.508324146270752, "learning_rate": 1.0651619783045875e-06, "loss": 0.3497, "step": 19208 }, { "epoch": 2.5687349558705534, "grad_norm": 1.6285452842712402, "learning_rate": 1.0645136914311005e-06, "loss": 0.3915, "step": 19209 }, { "epoch": 2.5688686814656325, "grad_norm": 1.6734073162078857, "learning_rate": 1.063865590809272e-06, "loss": 0.3827, "step": 19210 }, { "epoch": 2.5690024070607116, "grad_norm": 1.6095579862594604, "learning_rate": 1.0632176764526159e-06, "loss": 0.3765, "step": 19211 }, { "epoch": 2.56913613265579, "grad_norm": 1.5068771839141846, "learning_rate": 1.0625699483746355e-06, "loss": 0.3464, "step": 19212 }, { "epoch": 2.5692698582508693, "grad_norm": 1.475645899772644, "learning_rate": 1.0619224065888312e-06, "loss": 0.3968, "step": 19213 }, { "epoch": 2.569403583845948, "grad_norm": 1.381011724472046, "learning_rate": 1.0612750511087022e-06, "loss": 0.3422, "step": 19214 }, { "epoch": 2.569537309441027, "grad_norm": 1.6200796365737915, "learning_rate": 1.0606278819477412e-06, "loss": 0.4273, "step": 19215 }, { "epoch": 2.569671035036106, "grad_norm": 1.5439568758010864, "learning_rate": 1.0599808991194383e-06, "loss": 0.3554, "step": 19216 }, { "epoch": 2.5698047606311847, "grad_norm": 1.691437005996704, "learning_rate": 1.0593341026372784e-06, "loss": 0.3757, "step": 19217 }, { "epoch": 2.5699384862262638, "grad_norm": 1.4096848964691162, "learning_rate": 1.058687492514745e-06, "loss": 0.3423, "step": 19218 }, { "epoch": 2.5700722118213424, "grad_norm": 1.5017589330673218, "learning_rate": 1.058041068765313e-06, "loss": 0.3792, "step": 19219 }, { "epoch": 2.5702059374164214, "grad_norm": 1.5276645421981812, "learning_rate": 1.0573948314024597e-06, "loss": 0.3624, "step": 19220 }, { "epoch": 2.5703396630115005, "grad_norm": 1.658337116241455, "learning_rate": 1.056748780439656e-06, "loss": 0.3774, "step": 19221 }, { "epoch": 2.570473388606579, "grad_norm": 1.4802452325820923, "learning_rate": 1.0561029158903623e-06, "loss": 0.3464, "step": 19222 }, { "epoch": 2.570607114201658, "grad_norm": 1.633117437362671, "learning_rate": 1.0554572377680483e-06, "loss": 0.3459, "step": 19223 }, { "epoch": 2.570740839796737, "grad_norm": 1.6145466566085815, "learning_rate": 1.0548117460861652e-06, "loss": 0.3823, "step": 19224 }, { "epoch": 2.570874565391816, "grad_norm": 1.6716312170028687, "learning_rate": 1.0541664408581742e-06, "loss": 0.3632, "step": 19225 }, { "epoch": 2.571008290986895, "grad_norm": 1.6233869791030884, "learning_rate": 1.0535213220975248e-06, "loss": 0.3854, "step": 19226 }, { "epoch": 2.571142016581974, "grad_norm": 1.6112399101257324, "learning_rate": 1.0528763898176586e-06, "loss": 0.3465, "step": 19227 }, { "epoch": 2.5712757421770527, "grad_norm": 1.4784741401672363, "learning_rate": 1.0522316440320279e-06, "loss": 0.3578, "step": 19228 }, { "epoch": 2.5714094677721318, "grad_norm": 1.6744282245635986, "learning_rate": 1.0515870847540632e-06, "loss": 0.4236, "step": 19229 }, { "epoch": 2.5715431933672104, "grad_norm": 1.430050015449524, "learning_rate": 1.0509427119972038e-06, "loss": 0.3313, "step": 19230 }, { "epoch": 2.5716769189622894, "grad_norm": 1.5020197629928589, "learning_rate": 1.0502985257748788e-06, "loss": 0.3659, "step": 19231 }, { "epoch": 2.5718106445573685, "grad_norm": 1.4548124074935913, "learning_rate": 1.0496545261005164e-06, "loss": 0.3232, "step": 19232 }, { "epoch": 2.571944370152447, "grad_norm": 1.517027735710144, "learning_rate": 1.0490107129875448e-06, "loss": 0.3628, "step": 19233 }, { "epoch": 2.572078095747526, "grad_norm": 1.4259926080703735, "learning_rate": 1.0483670864493777e-06, "loss": 0.3186, "step": 19234 }, { "epoch": 2.572211821342605, "grad_norm": 1.615278720855713, "learning_rate": 1.0477236464994322e-06, "loss": 0.3545, "step": 19235 }, { "epoch": 2.572345546937684, "grad_norm": 1.5949742794036865, "learning_rate": 1.047080393151122e-06, "loss": 0.4081, "step": 19236 }, { "epoch": 2.572479272532763, "grad_norm": 1.5634150505065918, "learning_rate": 1.046437326417853e-06, "loss": 0.4017, "step": 19237 }, { "epoch": 2.5726129981278416, "grad_norm": 1.5389418601989746, "learning_rate": 1.045794446313031e-06, "loss": 0.4078, "step": 19238 }, { "epoch": 2.5727467237229207, "grad_norm": 1.58133864402771, "learning_rate": 1.0451517528500544e-06, "loss": 0.3658, "step": 19239 }, { "epoch": 2.5728804493179993, "grad_norm": 1.2974414825439453, "learning_rate": 1.0445092460423222e-06, "loss": 0.3098, "step": 19240 }, { "epoch": 2.5730141749130784, "grad_norm": 1.6399016380310059, "learning_rate": 1.0438669259032241e-06, "loss": 0.3825, "step": 19241 }, { "epoch": 2.5731479005081574, "grad_norm": 1.683309555053711, "learning_rate": 1.0432247924461525e-06, "loss": 0.3629, "step": 19242 }, { "epoch": 2.573281626103236, "grad_norm": 1.513400912284851, "learning_rate": 1.0425828456844855e-06, "loss": 0.3831, "step": 19243 }, { "epoch": 2.573415351698315, "grad_norm": 1.5276539325714111, "learning_rate": 1.0419410856316092e-06, "loss": 0.3478, "step": 19244 }, { "epoch": 2.5735490772933938, "grad_norm": 1.757575273513794, "learning_rate": 1.0412995123009006e-06, "loss": 0.4055, "step": 19245 }, { "epoch": 2.573682802888473, "grad_norm": 1.3747422695159912, "learning_rate": 1.040658125705728e-06, "loss": 0.3613, "step": 19246 }, { "epoch": 2.573816528483552, "grad_norm": 1.7589099407196045, "learning_rate": 1.0400169258594673e-06, "loss": 0.3607, "step": 19247 }, { "epoch": 2.5739502540786305, "grad_norm": 1.539939045906067, "learning_rate": 1.0393759127754765e-06, "loss": 0.3684, "step": 19248 }, { "epoch": 2.5740839796737096, "grad_norm": 1.4342817068099976, "learning_rate": 1.0387350864671242e-06, "loss": 0.3371, "step": 19249 }, { "epoch": 2.5742177052687882, "grad_norm": 1.5475482940673828, "learning_rate": 1.0380944469477617e-06, "loss": 0.3451, "step": 19250 }, { "epoch": 2.5743514308638673, "grad_norm": 1.6336615085601807, "learning_rate": 1.0374539942307426e-06, "loss": 0.3528, "step": 19251 }, { "epoch": 2.5744851564589464, "grad_norm": 1.60920250415802, "learning_rate": 1.0368137283294232e-06, "loss": 0.3676, "step": 19252 }, { "epoch": 2.574618882054025, "grad_norm": 1.4386471509933472, "learning_rate": 1.0361736492571428e-06, "loss": 0.3698, "step": 19253 }, { "epoch": 2.574752607649104, "grad_norm": 1.6944243907928467, "learning_rate": 1.035533757027245e-06, "loss": 0.3811, "step": 19254 }, { "epoch": 2.5748863332441827, "grad_norm": 1.53303861618042, "learning_rate": 1.034894051653068e-06, "loss": 0.338, "step": 19255 }, { "epoch": 2.5750200588392618, "grad_norm": 1.5082919597625732, "learning_rate": 1.0342545331479459e-06, "loss": 0.3552, "step": 19256 }, { "epoch": 2.575153784434341, "grad_norm": 1.5976619720458984, "learning_rate": 1.0336152015252088e-06, "loss": 0.3612, "step": 19257 }, { "epoch": 2.5752875100294195, "grad_norm": 1.5609242916107178, "learning_rate": 1.032976056798184e-06, "loss": 0.3532, "step": 19258 }, { "epoch": 2.5754212356244985, "grad_norm": 1.4943677186965942, "learning_rate": 1.0323370989801907e-06, "loss": 0.3548, "step": 19259 }, { "epoch": 2.575554961219577, "grad_norm": 1.572357177734375, "learning_rate": 1.0316983280845505e-06, "loss": 0.3397, "step": 19260 }, { "epoch": 2.5756886868146562, "grad_norm": 1.6423535346984863, "learning_rate": 1.0310597441245795e-06, "loss": 0.3962, "step": 19261 }, { "epoch": 2.5758224124097353, "grad_norm": 1.4560085535049438, "learning_rate": 1.0304213471135816e-06, "loss": 0.3462, "step": 19262 }, { "epoch": 2.5759561380048144, "grad_norm": 1.62520432472229, "learning_rate": 1.0297831370648692e-06, "loss": 0.3882, "step": 19263 }, { "epoch": 2.576089863599893, "grad_norm": 1.6136783361434937, "learning_rate": 1.029145113991743e-06, "loss": 0.3822, "step": 19264 }, { "epoch": 2.576223589194972, "grad_norm": 1.5375196933746338, "learning_rate": 1.0285072779075045e-06, "loss": 0.3444, "step": 19265 }, { "epoch": 2.5763573147900507, "grad_norm": 1.4805903434753418, "learning_rate": 1.0278696288254475e-06, "loss": 0.3838, "step": 19266 }, { "epoch": 2.5764910403851298, "grad_norm": 1.7585663795471191, "learning_rate": 1.0272321667588592e-06, "loss": 0.4062, "step": 19267 }, { "epoch": 2.576624765980209, "grad_norm": 1.600266933441162, "learning_rate": 1.0265948917210345e-06, "loss": 0.3732, "step": 19268 }, { "epoch": 2.5767584915752875, "grad_norm": 1.573654294013977, "learning_rate": 1.0259578037252505e-06, "loss": 0.3488, "step": 19269 }, { "epoch": 2.5768922171703665, "grad_norm": 1.568121314048767, "learning_rate": 1.0253209027847876e-06, "loss": 0.3709, "step": 19270 }, { "epoch": 2.577025942765445, "grad_norm": 1.55734121799469, "learning_rate": 1.0246841889129255e-06, "loss": 0.3853, "step": 19271 }, { "epoch": 2.5771596683605242, "grad_norm": 1.3577525615692139, "learning_rate": 1.02404766212293e-06, "loss": 0.3249, "step": 19272 }, { "epoch": 2.5772933939556033, "grad_norm": 1.5033268928527832, "learning_rate": 1.023411322428075e-06, "loss": 0.3145, "step": 19273 }, { "epoch": 2.577427119550682, "grad_norm": 1.6319630146026611, "learning_rate": 1.02277516984162e-06, "loss": 0.3741, "step": 19274 }, { "epoch": 2.577560845145761, "grad_norm": 1.6792452335357666, "learning_rate": 1.0221392043768264e-06, "loss": 0.3855, "step": 19275 }, { "epoch": 2.5776945707408396, "grad_norm": 1.6028178930282593, "learning_rate": 1.0215034260469502e-06, "loss": 0.3878, "step": 19276 }, { "epoch": 2.5778282963359187, "grad_norm": 1.6867008209228516, "learning_rate": 1.0208678348652433e-06, "loss": 0.4167, "step": 19277 }, { "epoch": 2.5779620219309978, "grad_norm": 1.524542212486267, "learning_rate": 1.020232430844954e-06, "loss": 0.3234, "step": 19278 }, { "epoch": 2.5780957475260764, "grad_norm": 1.7698523998260498, "learning_rate": 1.019597213999327e-06, "loss": 0.3971, "step": 19279 }, { "epoch": 2.5782294731211555, "grad_norm": 1.5787378549575806, "learning_rate": 1.018962184341603e-06, "loss": 0.3998, "step": 19280 }, { "epoch": 2.578363198716234, "grad_norm": 1.6036442518234253, "learning_rate": 1.0183273418850192e-06, "loss": 0.3597, "step": 19281 }, { "epoch": 2.578496924311313, "grad_norm": 1.6378891468048096, "learning_rate": 1.017692686642806e-06, "loss": 0.3802, "step": 19282 }, { "epoch": 2.5786306499063922, "grad_norm": 1.4724100828170776, "learning_rate": 1.0170582186281952e-06, "loss": 0.3278, "step": 19283 }, { "epoch": 2.578764375501471, "grad_norm": 1.435001015663147, "learning_rate": 1.0164239378544083e-06, "loss": 0.3352, "step": 19284 }, { "epoch": 2.57889810109655, "grad_norm": 1.4401791095733643, "learning_rate": 1.0157898443346715e-06, "loss": 0.3631, "step": 19285 }, { "epoch": 2.5790318266916286, "grad_norm": 1.5669329166412354, "learning_rate": 1.015155938082194e-06, "loss": 0.3913, "step": 19286 }, { "epoch": 2.5791655522867076, "grad_norm": 1.5061671733856201, "learning_rate": 1.0145222191101967e-06, "loss": 0.3273, "step": 19287 }, { "epoch": 2.5792992778817867, "grad_norm": 1.5431190729141235, "learning_rate": 1.013888687431882e-06, "loss": 0.3441, "step": 19288 }, { "epoch": 2.5794330034768653, "grad_norm": 1.6048848628997803, "learning_rate": 1.0132553430604608e-06, "loss": 0.4045, "step": 19289 }, { "epoch": 2.5795667290719444, "grad_norm": 1.5820257663726807, "learning_rate": 1.0126221860091357e-06, "loss": 0.3472, "step": 19290 }, { "epoch": 2.579700454667023, "grad_norm": 1.565089464187622, "learning_rate": 1.011989216291096e-06, "loss": 0.3585, "step": 19291 }, { "epoch": 2.579834180262102, "grad_norm": 1.7364336252212524, "learning_rate": 1.0113564339195447e-06, "loss": 0.4077, "step": 19292 }, { "epoch": 2.579967905857181, "grad_norm": 1.6415561437606812, "learning_rate": 1.0107238389076636e-06, "loss": 0.3705, "step": 19293 }, { "epoch": 2.5801016314522602, "grad_norm": 1.6672186851501465, "learning_rate": 1.010091431268645e-06, "loss": 0.363, "step": 19294 }, { "epoch": 2.580235357047339, "grad_norm": 1.4849148988723755, "learning_rate": 1.0094592110156676e-06, "loss": 0.3176, "step": 19295 }, { "epoch": 2.5803690826424175, "grad_norm": 1.4996753931045532, "learning_rate": 1.0088271781619096e-06, "loss": 0.3658, "step": 19296 }, { "epoch": 2.5805028082374966, "grad_norm": 1.4081658124923706, "learning_rate": 1.0081953327205452e-06, "loss": 0.3568, "step": 19297 }, { "epoch": 2.5806365338325756, "grad_norm": 1.594196081161499, "learning_rate": 1.0075636747047446e-06, "loss": 0.3452, "step": 19298 }, { "epoch": 2.5807702594276547, "grad_norm": 1.4774590730667114, "learning_rate": 1.0069322041276752e-06, "loss": 0.3582, "step": 19299 }, { "epoch": 2.5809039850227333, "grad_norm": 1.4119399785995483, "learning_rate": 1.0063009210024978e-06, "loss": 0.3649, "step": 19300 }, { "epoch": 2.5810377106178124, "grad_norm": 1.4174410104751587, "learning_rate": 1.0056698253423725e-06, "loss": 0.3614, "step": 19301 }, { "epoch": 2.581171436212891, "grad_norm": 1.6803432703018188, "learning_rate": 1.0050389171604523e-06, "loss": 0.3897, "step": 19302 }, { "epoch": 2.58130516180797, "grad_norm": 1.6410739421844482, "learning_rate": 1.004408196469888e-06, "loss": 0.3454, "step": 19303 }, { "epoch": 2.581438887403049, "grad_norm": 1.6088812351226807, "learning_rate": 1.003777663283828e-06, "loss": 0.3476, "step": 19304 }, { "epoch": 2.581572612998128, "grad_norm": 1.6194511651992798, "learning_rate": 1.0031473176154139e-06, "loss": 0.3537, "step": 19305 }, { "epoch": 2.581706338593207, "grad_norm": 1.6188567876815796, "learning_rate": 1.0025171594777872e-06, "loss": 0.3873, "step": 19306 }, { "epoch": 2.5818400641882855, "grad_norm": 1.6760709285736084, "learning_rate": 1.0018871888840764e-06, "loss": 0.3423, "step": 19307 }, { "epoch": 2.5819737897833646, "grad_norm": 1.5439616441726685, "learning_rate": 1.001257405847419e-06, "loss": 0.3835, "step": 19308 }, { "epoch": 2.5821075153784436, "grad_norm": 1.667294979095459, "learning_rate": 1.0006278103809409e-06, "loss": 0.3871, "step": 19309 }, { "epoch": 2.5822412409735223, "grad_norm": 1.3349734544754028, "learning_rate": 9.999984024977626e-07, "loss": 0.335, "step": 19310 }, { "epoch": 2.5823749665686013, "grad_norm": 1.5528465509414673, "learning_rate": 9.993691822110096e-07, "loss": 0.3626, "step": 19311 }, { "epoch": 2.58250869216368, "grad_norm": 1.52981698513031, "learning_rate": 9.987401495337878e-07, "loss": 0.3486, "step": 19312 }, { "epoch": 2.582642417758759, "grad_norm": 1.6052106618881226, "learning_rate": 9.98111304479219e-07, "loss": 0.3619, "step": 19313 }, { "epoch": 2.582776143353838, "grad_norm": 1.4527983665466309, "learning_rate": 9.97482647060405e-07, "loss": 0.321, "step": 19314 }, { "epoch": 2.5829098689489167, "grad_norm": 1.5236835479736328, "learning_rate": 9.968541772904472e-07, "loss": 0.3696, "step": 19315 }, { "epoch": 2.583043594543996, "grad_norm": 1.6799372434616089, "learning_rate": 9.962258951824544e-07, "loss": 0.4027, "step": 19316 }, { "epoch": 2.5831773201390744, "grad_norm": 1.6325856447219849, "learning_rate": 9.955978007495116e-07, "loss": 0.3681, "step": 19317 }, { "epoch": 2.5833110457341535, "grad_norm": 1.4206188917160034, "learning_rate": 9.949698940047214e-07, "loss": 0.3474, "step": 19318 }, { "epoch": 2.5834447713292326, "grad_norm": 1.6846051216125488, "learning_rate": 9.943421749611648e-07, "loss": 0.3929, "step": 19319 }, { "epoch": 2.583578496924311, "grad_norm": 1.3665893077850342, "learning_rate": 9.937146436319278e-07, "loss": 0.3389, "step": 19320 }, { "epoch": 2.5837122225193903, "grad_norm": 1.806023120880127, "learning_rate": 9.930873000300912e-07, "loss": 0.386, "step": 19321 }, { "epoch": 2.583845948114469, "grad_norm": 1.488228440284729, "learning_rate": 9.92460144168731e-07, "loss": 0.3457, "step": 19322 }, { "epoch": 2.583979673709548, "grad_norm": 1.3787744045257568, "learning_rate": 9.918331760609201e-07, "loss": 0.3231, "step": 19323 }, { "epoch": 2.584113399304627, "grad_norm": 1.5200321674346924, "learning_rate": 9.91206395719726e-07, "loss": 0.3666, "step": 19324 }, { "epoch": 2.5842471248997056, "grad_norm": 1.4724156856536865, "learning_rate": 9.905798031582147e-07, "loss": 0.3662, "step": 19325 }, { "epoch": 2.5843808504947847, "grad_norm": 1.596299171447754, "learning_rate": 9.89953398389447e-07, "loss": 0.3828, "step": 19326 }, { "epoch": 2.5845145760898633, "grad_norm": 1.5065652132034302, "learning_rate": 9.893271814264781e-07, "loss": 0.3788, "step": 19327 }, { "epoch": 2.5846483016849424, "grad_norm": 1.5778130292892456, "learning_rate": 9.88701152282362e-07, "loss": 0.3616, "step": 19328 }, { "epoch": 2.5847820272800215, "grad_norm": 1.4566751718521118, "learning_rate": 9.88075310970148e-07, "loss": 0.3227, "step": 19329 }, { "epoch": 2.5849157528751006, "grad_norm": 1.460789680480957, "learning_rate": 9.874496575028814e-07, "loss": 0.3297, "step": 19330 }, { "epoch": 2.585049478470179, "grad_norm": 1.5926569700241089, "learning_rate": 9.868241918935994e-07, "loss": 0.3383, "step": 19331 }, { "epoch": 2.5851832040652583, "grad_norm": 1.6023719310760498, "learning_rate": 9.861989141553463e-07, "loss": 0.3473, "step": 19332 }, { "epoch": 2.585316929660337, "grad_norm": 1.5605714321136475, "learning_rate": 9.855738243011482e-07, "loss": 0.3936, "step": 19333 }, { "epoch": 2.585450655255416, "grad_norm": 1.5475319623947144, "learning_rate": 9.849489223440401e-07, "loss": 0.3455, "step": 19334 }, { "epoch": 2.585584380850495, "grad_norm": 1.5588434934616089, "learning_rate": 9.843242082970462e-07, "loss": 0.355, "step": 19335 }, { "epoch": 2.5857181064455736, "grad_norm": 1.3440344333648682, "learning_rate": 9.836996821731836e-07, "loss": 0.3517, "step": 19336 }, { "epoch": 2.5858518320406527, "grad_norm": 1.6707100868225098, "learning_rate": 9.830753439854769e-07, "loss": 0.3815, "step": 19337 }, { "epoch": 2.5859855576357313, "grad_norm": 1.7597123384475708, "learning_rate": 9.82451193746935e-07, "loss": 0.4004, "step": 19338 }, { "epoch": 2.5861192832308104, "grad_norm": 1.7298433780670166, "learning_rate": 9.81827231470569e-07, "loss": 0.4127, "step": 19339 }, { "epoch": 2.5862530088258895, "grad_norm": 1.783390760421753, "learning_rate": 9.812034571693841e-07, "loss": 0.3644, "step": 19340 }, { "epoch": 2.586386734420968, "grad_norm": 1.4775928258895874, "learning_rate": 9.80579870856384e-07, "loss": 0.3638, "step": 19341 }, { "epoch": 2.586520460016047, "grad_norm": 1.5533064603805542, "learning_rate": 9.799564725445653e-07, "loss": 0.345, "step": 19342 }, { "epoch": 2.586654185611126, "grad_norm": 1.5192970037460327, "learning_rate": 9.79333262246923e-07, "loss": 0.3805, "step": 19343 }, { "epoch": 2.586787911206205, "grad_norm": 1.4900189638137817, "learning_rate": 9.787102399764482e-07, "loss": 0.3112, "step": 19344 }, { "epoch": 2.586921636801284, "grad_norm": 1.4583711624145508, "learning_rate": 9.780874057461242e-07, "loss": 0.3452, "step": 19345 }, { "epoch": 2.5870553623963626, "grad_norm": 1.6585460901260376, "learning_rate": 9.774647595689356e-07, "loss": 0.4001, "step": 19346 }, { "epoch": 2.5871890879914416, "grad_norm": 1.377306580543518, "learning_rate": 9.76842301457861e-07, "loss": 0.3505, "step": 19347 }, { "epoch": 2.5873228135865203, "grad_norm": 1.5943844318389893, "learning_rate": 9.76220031425874e-07, "loss": 0.3332, "step": 19348 }, { "epoch": 2.5874565391815993, "grad_norm": 1.5348008871078491, "learning_rate": 9.755979494859459e-07, "loss": 0.3475, "step": 19349 }, { "epoch": 2.5875902647766784, "grad_norm": 1.4306881427764893, "learning_rate": 9.749760556510435e-07, "loss": 0.3166, "step": 19350 }, { "epoch": 2.587723990371757, "grad_norm": 1.9345148801803589, "learning_rate": 9.743543499341302e-07, "loss": 0.414, "step": 19351 }, { "epoch": 2.587857715966836, "grad_norm": 1.6305358409881592, "learning_rate": 9.7373283234816e-07, "loss": 0.3665, "step": 19352 }, { "epoch": 2.5879914415619147, "grad_norm": 1.6043014526367188, "learning_rate": 9.731115029060945e-07, "loss": 0.3513, "step": 19353 }, { "epoch": 2.588125167156994, "grad_norm": 1.5132147073745728, "learning_rate": 9.724903616208837e-07, "loss": 0.36, "step": 19354 }, { "epoch": 2.588258892752073, "grad_norm": 1.6710190773010254, "learning_rate": 9.718694085054681e-07, "loss": 0.4046, "step": 19355 }, { "epoch": 2.5883926183471515, "grad_norm": 1.4611085653305054, "learning_rate": 9.712486435728008e-07, "loss": 0.3274, "step": 19356 }, { "epoch": 2.5885263439422306, "grad_norm": 1.7424030303955078, "learning_rate": 9.706280668358115e-07, "loss": 0.3953, "step": 19357 }, { "epoch": 2.588660069537309, "grad_norm": 1.6497619152069092, "learning_rate": 9.70007678307443e-07, "loss": 0.3742, "step": 19358 }, { "epoch": 2.5887937951323883, "grad_norm": 1.6942094564437866, "learning_rate": 9.693874780006229e-07, "loss": 0.3919, "step": 19359 }, { "epoch": 2.5889275207274673, "grad_norm": 1.5680617094039917, "learning_rate": 9.687674659282797e-07, "loss": 0.3457, "step": 19360 }, { "epoch": 2.589061246322546, "grad_norm": 1.6897770166397095, "learning_rate": 9.681476421033354e-07, "loss": 0.4049, "step": 19361 }, { "epoch": 2.589194971917625, "grad_norm": 1.5389869213104248, "learning_rate": 9.675280065387117e-07, "loss": 0.3809, "step": 19362 }, { "epoch": 2.5893286975127037, "grad_norm": 1.4350450038909912, "learning_rate": 9.669085592473237e-07, "loss": 0.349, "step": 19363 }, { "epoch": 2.5894624231077827, "grad_norm": 1.6546533107757568, "learning_rate": 9.662893002420836e-07, "loss": 0.4225, "step": 19364 }, { "epoch": 2.589596148702862, "grad_norm": 1.5399622917175293, "learning_rate": 9.656702295358977e-07, "loss": 0.3673, "step": 19365 }, { "epoch": 2.589729874297941, "grad_norm": 1.607690691947937, "learning_rate": 9.650513471416712e-07, "loss": 0.4059, "step": 19366 }, { "epoch": 2.5898635998930195, "grad_norm": 1.5199034214019775, "learning_rate": 9.644326530723036e-07, "loss": 0.3057, "step": 19367 }, { "epoch": 2.5899973254880986, "grad_norm": 1.681808352470398, "learning_rate": 9.638141473406925e-07, "loss": 0.3784, "step": 19368 }, { "epoch": 2.590131051083177, "grad_norm": 1.4247227907180786, "learning_rate": 9.631958299597277e-07, "loss": 0.3143, "step": 19369 }, { "epoch": 2.5902647766782563, "grad_norm": 1.4172497987747192, "learning_rate": 9.62577700942301e-07, "loss": 0.3365, "step": 19370 }, { "epoch": 2.5903985022733353, "grad_norm": 1.4623740911483765, "learning_rate": 9.619597603012898e-07, "loss": 0.3313, "step": 19371 }, { "epoch": 2.590532227868414, "grad_norm": 1.7578006982803345, "learning_rate": 9.613420080495806e-07, "loss": 0.3834, "step": 19372 }, { "epoch": 2.590665953463493, "grad_norm": 1.4879993200302124, "learning_rate": 9.607244442000486e-07, "loss": 0.3723, "step": 19373 }, { "epoch": 2.5907996790585717, "grad_norm": 1.523939609527588, "learning_rate": 9.601070687655667e-07, "loss": 0.3558, "step": 19374 }, { "epoch": 2.5909334046536507, "grad_norm": 1.5160313844680786, "learning_rate": 9.594898817590037e-07, "loss": 0.3589, "step": 19375 }, { "epoch": 2.59106713024873, "grad_norm": 1.5185983180999756, "learning_rate": 9.588728831932193e-07, "loss": 0.3646, "step": 19376 }, { "epoch": 2.5912008558438084, "grad_norm": 1.373460292816162, "learning_rate": 9.58256073081083e-07, "loss": 0.3575, "step": 19377 }, { "epoch": 2.5913345814388875, "grad_norm": 1.4084264039993286, "learning_rate": 9.576394514354425e-07, "loss": 0.3085, "step": 19378 }, { "epoch": 2.591468307033966, "grad_norm": 1.6068617105484009, "learning_rate": 9.570230182691587e-07, "loss": 0.3653, "step": 19379 }, { "epoch": 2.591602032629045, "grad_norm": 1.4380632638931274, "learning_rate": 9.564067735950756e-07, "loss": 0.3495, "step": 19380 }, { "epoch": 2.5917357582241243, "grad_norm": 1.592768669128418, "learning_rate": 9.557907174260372e-07, "loss": 0.3737, "step": 19381 }, { "epoch": 2.591869483819203, "grad_norm": 1.4582490921020508, "learning_rate": 9.551748497748902e-07, "loss": 0.3967, "step": 19382 }, { "epoch": 2.592003209414282, "grad_norm": 1.4425758123397827, "learning_rate": 9.545591706544677e-07, "loss": 0.3726, "step": 19383 }, { "epoch": 2.5921369350093606, "grad_norm": 1.3401920795440674, "learning_rate": 9.539436800776026e-07, "loss": 0.3734, "step": 19384 }, { "epoch": 2.5922706606044397, "grad_norm": 1.3675986528396606, "learning_rate": 9.533283780571257e-07, "loss": 0.3295, "step": 19385 }, { "epoch": 2.5924043861995187, "grad_norm": 1.5308568477630615, "learning_rate": 9.527132646058623e-07, "loss": 0.3607, "step": 19386 }, { "epoch": 2.5925381117945974, "grad_norm": 1.5374099016189575, "learning_rate": 9.520983397366335e-07, "loss": 0.3846, "step": 19387 }, { "epoch": 2.5926718373896764, "grad_norm": 1.5637702941894531, "learning_rate": 9.514836034622565e-07, "loss": 0.3735, "step": 19388 }, { "epoch": 2.592805562984755, "grad_norm": 1.4943219423294067, "learning_rate": 9.508690557955458e-07, "loss": 0.3356, "step": 19389 }, { "epoch": 2.592939288579834, "grad_norm": 1.475665807723999, "learning_rate": 9.502546967493109e-07, "loss": 0.3724, "step": 19390 }, { "epoch": 2.593073014174913, "grad_norm": 1.6110912561416626, "learning_rate": 9.496405263363562e-07, "loss": 0.3829, "step": 19391 }, { "epoch": 2.593206739769992, "grad_norm": 1.6001758575439453, "learning_rate": 9.490265445694857e-07, "loss": 0.3206, "step": 19392 }, { "epoch": 2.593340465365071, "grad_norm": 1.5079129934310913, "learning_rate": 9.484127514614949e-07, "loss": 0.3261, "step": 19393 }, { "epoch": 2.5934741909601495, "grad_norm": 1.507460355758667, "learning_rate": 9.47799147025179e-07, "loss": 0.3584, "step": 19394 }, { "epoch": 2.5936079165552286, "grad_norm": 1.6498550176620483, "learning_rate": 9.47185731273329e-07, "loss": 0.3486, "step": 19395 }, { "epoch": 2.5937416421503077, "grad_norm": 1.479394555091858, "learning_rate": 9.465725042187301e-07, "loss": 0.3314, "step": 19396 }, { "epoch": 2.5938753677453867, "grad_norm": 1.4429948329925537, "learning_rate": 9.459594658741622e-07, "loss": 0.3146, "step": 19397 }, { "epoch": 2.5940090933404654, "grad_norm": 1.52006196975708, "learning_rate": 9.453466162524072e-07, "loss": 0.3644, "step": 19398 }, { "epoch": 2.594142818935544, "grad_norm": 1.3725566864013672, "learning_rate": 9.447339553662371e-07, "loss": 0.3299, "step": 19399 }, { "epoch": 2.594276544530623, "grad_norm": 1.4884238243103027, "learning_rate": 9.441214832284206e-07, "loss": 0.3252, "step": 19400 }, { "epoch": 2.594410270125702, "grad_norm": 1.5354907512664795, "learning_rate": 9.435091998517298e-07, "loss": 0.3348, "step": 19401 }, { "epoch": 2.594543995720781, "grad_norm": 1.6081600189208984, "learning_rate": 9.4289710524892e-07, "loss": 0.3457, "step": 19402 }, { "epoch": 2.59467772131586, "grad_norm": 1.4156938791275024, "learning_rate": 9.422851994327576e-07, "loss": 0.3329, "step": 19403 }, { "epoch": 2.594811446910939, "grad_norm": 1.6202735900878906, "learning_rate": 9.416734824159901e-07, "loss": 0.3776, "step": 19404 }, { "epoch": 2.5949451725060175, "grad_norm": 1.5548313856124878, "learning_rate": 9.410619542113719e-07, "loss": 0.3782, "step": 19405 }, { "epoch": 2.5950788981010966, "grad_norm": 1.5368061065673828, "learning_rate": 9.404506148316473e-07, "loss": 0.3568, "step": 19406 }, { "epoch": 2.5952126236961757, "grad_norm": 1.8214455842971802, "learning_rate": 9.398394642895625e-07, "loss": 0.4278, "step": 19407 }, { "epoch": 2.5953463492912543, "grad_norm": 1.6040012836456299, "learning_rate": 9.392285025978531e-07, "loss": 0.4053, "step": 19408 }, { "epoch": 2.5954800748863334, "grad_norm": 1.527644395828247, "learning_rate": 9.386177297692556e-07, "loss": 0.298, "step": 19409 }, { "epoch": 2.595613800481412, "grad_norm": 1.679027795791626, "learning_rate": 9.380071458165007e-07, "loss": 0.3561, "step": 19410 }, { "epoch": 2.595747526076491, "grad_norm": 1.566199541091919, "learning_rate": 9.373967507523163e-07, "loss": 0.3371, "step": 19411 }, { "epoch": 2.59588125167157, "grad_norm": 1.6476001739501953, "learning_rate": 9.367865445894231e-07, "loss": 0.3419, "step": 19412 }, { "epoch": 2.5960149772666488, "grad_norm": 1.4245692491531372, "learning_rate": 9.361765273405433e-07, "loss": 0.3663, "step": 19413 }, { "epoch": 2.596148702861728, "grad_norm": 1.659962773323059, "learning_rate": 9.355666990183898e-07, "loss": 0.3511, "step": 19414 }, { "epoch": 2.5962824284568065, "grad_norm": 1.4171830415725708, "learning_rate": 9.349570596356772e-07, "loss": 0.3154, "step": 19415 }, { "epoch": 2.5964161540518855, "grad_norm": 1.5904529094696045, "learning_rate": 9.343476092051063e-07, "loss": 0.3709, "step": 19416 }, { "epoch": 2.5965498796469646, "grad_norm": 1.579392910003662, "learning_rate": 9.337383477393858e-07, "loss": 0.4054, "step": 19417 }, { "epoch": 2.5966836052420432, "grad_norm": 1.62498939037323, "learning_rate": 9.331292752512156e-07, "loss": 0.3578, "step": 19418 }, { "epoch": 2.5968173308371223, "grad_norm": 1.4954196214675903, "learning_rate": 9.325203917532877e-07, "loss": 0.4089, "step": 19419 }, { "epoch": 2.596951056432201, "grad_norm": 1.502629041671753, "learning_rate": 9.319116972582987e-07, "loss": 0.3435, "step": 19420 }, { "epoch": 2.59708478202728, "grad_norm": 1.533696174621582, "learning_rate": 9.313031917789295e-07, "loss": 0.3467, "step": 19421 }, { "epoch": 2.597218507622359, "grad_norm": 1.4959895610809326, "learning_rate": 9.306948753278711e-07, "loss": 0.3881, "step": 19422 }, { "epoch": 2.5973522332174377, "grad_norm": 1.5563950538635254, "learning_rate": 9.300867479177966e-07, "loss": 0.3318, "step": 19423 }, { "epoch": 2.5974859588125168, "grad_norm": 1.737650990486145, "learning_rate": 9.294788095613861e-07, "loss": 0.4278, "step": 19424 }, { "epoch": 2.5976196844075954, "grad_norm": 1.568581461906433, "learning_rate": 9.288710602713102e-07, "loss": 0.3743, "step": 19425 }, { "epoch": 2.5977534100026745, "grad_norm": 1.453376054763794, "learning_rate": 9.282635000602346e-07, "loss": 0.3304, "step": 19426 }, { "epoch": 2.5978871355977535, "grad_norm": 1.5165928602218628, "learning_rate": 9.276561289408293e-07, "loss": 0.3523, "step": 19427 }, { "epoch": 2.598020861192832, "grad_norm": 1.5319135189056396, "learning_rate": 9.270489469257493e-07, "loss": 0.4147, "step": 19428 }, { "epoch": 2.5981545867879112, "grad_norm": 1.5313671827316284, "learning_rate": 9.264419540276526e-07, "loss": 0.3369, "step": 19429 }, { "epoch": 2.59828831238299, "grad_norm": 1.6111124753952026, "learning_rate": 9.2583515025919e-07, "loss": 0.3827, "step": 19430 }, { "epoch": 2.598422037978069, "grad_norm": 1.5030075311660767, "learning_rate": 9.252285356330104e-07, "loss": 0.3729, "step": 19431 }, { "epoch": 2.598555763573148, "grad_norm": 1.640863060951233, "learning_rate": 9.246221101617592e-07, "loss": 0.344, "step": 19432 }, { "epoch": 2.598689489168227, "grad_norm": 1.592004656791687, "learning_rate": 9.240158738580751e-07, "loss": 0.3646, "step": 19433 }, { "epoch": 2.5988232147633057, "grad_norm": 1.6026378870010376, "learning_rate": 9.234098267345959e-07, "loss": 0.3812, "step": 19434 }, { "epoch": 2.5989569403583848, "grad_norm": 1.424882173538208, "learning_rate": 9.228039688039537e-07, "loss": 0.3381, "step": 19435 }, { "epoch": 2.5990906659534634, "grad_norm": 1.8395777940750122, "learning_rate": 9.22198300078777e-07, "loss": 0.4023, "step": 19436 }, { "epoch": 2.5992243915485425, "grad_norm": 1.5865273475646973, "learning_rate": 9.215928205716895e-07, "loss": 0.4013, "step": 19437 }, { "epoch": 2.5993581171436215, "grad_norm": 1.4469475746154785, "learning_rate": 9.209875302953131e-07, "loss": 0.3542, "step": 19438 }, { "epoch": 2.5994918427387, "grad_norm": 1.672650933265686, "learning_rate": 9.203824292622654e-07, "loss": 0.4043, "step": 19439 }, { "epoch": 2.5996255683337792, "grad_norm": 1.3972047567367554, "learning_rate": 9.197775174851543e-07, "loss": 0.3359, "step": 19440 }, { "epoch": 2.599759293928858, "grad_norm": 1.4710986614227295, "learning_rate": 9.191727949765949e-07, "loss": 0.3505, "step": 19441 }, { "epoch": 2.599893019523937, "grad_norm": 1.467466115951538, "learning_rate": 9.185682617491865e-07, "loss": 0.3479, "step": 19442 }, { "epoch": 2.600026745119016, "grad_norm": 1.6823577880859375, "learning_rate": 9.179639178155364e-07, "loss": 0.4154, "step": 19443 }, { "epoch": 2.6001604707140946, "grad_norm": 1.6559479236602783, "learning_rate": 9.173597631882359e-07, "loss": 0.3721, "step": 19444 }, { "epoch": 2.6002941963091737, "grad_norm": 1.5296725034713745, "learning_rate": 9.16755797879878e-07, "loss": 0.367, "step": 19445 }, { "epoch": 2.6004279219042523, "grad_norm": 1.641188621520996, "learning_rate": 9.161520219030573e-07, "loss": 0.3901, "step": 19446 }, { "epoch": 2.6005616474993314, "grad_norm": 1.5461465120315552, "learning_rate": 9.155484352703537e-07, "loss": 0.4069, "step": 19447 }, { "epoch": 2.6006953730944105, "grad_norm": 1.581572413444519, "learning_rate": 9.149450379943491e-07, "loss": 0.3467, "step": 19448 }, { "epoch": 2.600829098689489, "grad_norm": 1.6334781646728516, "learning_rate": 9.143418300876228e-07, "loss": 0.3715, "step": 19449 }, { "epoch": 2.600962824284568, "grad_norm": 1.561816692352295, "learning_rate": 9.137388115627477e-07, "loss": 0.3504, "step": 19450 }, { "epoch": 2.601096549879647, "grad_norm": 1.49606192111969, "learning_rate": 9.131359824322916e-07, "loss": 0.3687, "step": 19451 }, { "epoch": 2.601230275474726, "grad_norm": 1.7139372825622559, "learning_rate": 9.125333427088201e-07, "loss": 0.3455, "step": 19452 }, { "epoch": 2.601364001069805, "grad_norm": 1.5788805484771729, "learning_rate": 9.119308924048964e-07, "loss": 0.3404, "step": 19453 }, { "epoch": 2.6014977266648835, "grad_norm": 1.435338020324707, "learning_rate": 9.11328631533076e-07, "loss": 0.3563, "step": 19454 }, { "epoch": 2.6016314522599626, "grad_norm": 1.5037792921066284, "learning_rate": 9.107265601059145e-07, "loss": 0.3522, "step": 19455 }, { "epoch": 2.6017651778550412, "grad_norm": 1.6911542415618896, "learning_rate": 9.101246781359596e-07, "loss": 0.3561, "step": 19456 }, { "epoch": 2.6018989034501203, "grad_norm": 1.6394643783569336, "learning_rate": 9.095229856357579e-07, "loss": 0.3635, "step": 19457 }, { "epoch": 2.6020326290451994, "grad_norm": 1.636730670928955, "learning_rate": 9.089214826178505e-07, "loss": 0.4194, "step": 19458 }, { "epoch": 2.602166354640278, "grad_norm": 1.2542637586593628, "learning_rate": 9.083201690947763e-07, "loss": 0.3049, "step": 19459 }, { "epoch": 2.602300080235357, "grad_norm": 1.3598214387893677, "learning_rate": 9.077190450790696e-07, "loss": 0.3391, "step": 19460 }, { "epoch": 2.6024338058304357, "grad_norm": 1.4779701232910156, "learning_rate": 9.071181105832561e-07, "loss": 0.2975, "step": 19461 }, { "epoch": 2.602567531425515, "grad_norm": 1.6051291227340698, "learning_rate": 9.065173656198678e-07, "loss": 0.3774, "step": 19462 }, { "epoch": 2.602701257020594, "grad_norm": 1.4587279558181763, "learning_rate": 9.059168102014193e-07, "loss": 0.3353, "step": 19463 }, { "epoch": 2.6028349826156725, "grad_norm": 1.505478024482727, "learning_rate": 9.053164443404361e-07, "loss": 0.3617, "step": 19464 }, { "epoch": 2.6029687082107515, "grad_norm": 1.5664360523223877, "learning_rate": 9.047162680494293e-07, "loss": 0.3199, "step": 19465 }, { "epoch": 2.60310243380583, "grad_norm": 1.5141276121139526, "learning_rate": 9.041162813409055e-07, "loss": 0.3757, "step": 19466 }, { "epoch": 2.6032361594009092, "grad_norm": 1.388545036315918, "learning_rate": 9.03516484227378e-07, "loss": 0.3326, "step": 19467 }, { "epoch": 2.6033698849959883, "grad_norm": 1.642006754875183, "learning_rate": 9.029168767213426e-07, "loss": 0.3804, "step": 19468 }, { "epoch": 2.6035036105910674, "grad_norm": 1.4901856184005737, "learning_rate": 9.023174588353001e-07, "loss": 0.3545, "step": 19469 }, { "epoch": 2.603637336186146, "grad_norm": 1.3451625108718872, "learning_rate": 9.017182305817451e-07, "loss": 0.3597, "step": 19470 }, { "epoch": 2.603771061781225, "grad_norm": 1.7079448699951172, "learning_rate": 9.011191919731655e-07, "loss": 0.4011, "step": 19471 }, { "epoch": 2.6039047873763037, "grad_norm": 1.529018759727478, "learning_rate": 9.005203430220532e-07, "loss": 0.3696, "step": 19472 }, { "epoch": 2.604038512971383, "grad_norm": 1.5724196434020996, "learning_rate": 8.999216837408853e-07, "loss": 0.423, "step": 19473 }, { "epoch": 2.604172238566462, "grad_norm": 1.5658984184265137, "learning_rate": 8.993232141421415e-07, "loss": 0.36, "step": 19474 }, { "epoch": 2.6043059641615405, "grad_norm": 1.5030330419540405, "learning_rate": 8.987249342382976e-07, "loss": 0.3342, "step": 19475 }, { "epoch": 2.6044396897566195, "grad_norm": 1.6373921632766724, "learning_rate": 8.981268440418234e-07, "loss": 0.3782, "step": 19476 }, { "epoch": 2.604573415351698, "grad_norm": 1.4834411144256592, "learning_rate": 8.975289435651857e-07, "loss": 0.3307, "step": 19477 }, { "epoch": 2.6047071409467772, "grad_norm": 1.6104497909545898, "learning_rate": 8.969312328208469e-07, "loss": 0.3712, "step": 19478 }, { "epoch": 2.6048408665418563, "grad_norm": 1.4394108057022095, "learning_rate": 8.963337118212656e-07, "loss": 0.3687, "step": 19479 }, { "epoch": 2.604974592136935, "grad_norm": 1.3359596729278564, "learning_rate": 8.957363805788965e-07, "loss": 0.3487, "step": 19480 }, { "epoch": 2.605108317732014, "grad_norm": 1.4096752405166626, "learning_rate": 8.95139239106193e-07, "loss": 0.3711, "step": 19481 }, { "epoch": 2.6052420433270926, "grad_norm": 1.685247778892517, "learning_rate": 8.945422874155962e-07, "loss": 0.4148, "step": 19482 }, { "epoch": 2.6053757689221717, "grad_norm": 1.422440767288208, "learning_rate": 8.939455255195539e-07, "loss": 0.348, "step": 19483 }, { "epoch": 2.605509494517251, "grad_norm": 1.450132131576538, "learning_rate": 8.933489534305051e-07, "loss": 0.3409, "step": 19484 }, { "epoch": 2.6056432201123294, "grad_norm": 1.4854105710983276, "learning_rate": 8.927525711608808e-07, "loss": 0.374, "step": 19485 }, { "epoch": 2.6057769457074085, "grad_norm": 1.7058250904083252, "learning_rate": 8.921563787231169e-07, "loss": 0.3629, "step": 19486 }, { "epoch": 2.605910671302487, "grad_norm": 1.6321206092834473, "learning_rate": 8.915603761296354e-07, "loss": 0.3227, "step": 19487 }, { "epoch": 2.606044396897566, "grad_norm": 1.592790961265564, "learning_rate": 8.909645633928643e-07, "loss": 0.3696, "step": 19488 }, { "epoch": 2.6061781224926452, "grad_norm": 1.5909035205841064, "learning_rate": 8.903689405252203e-07, "loss": 0.3484, "step": 19489 }, { "epoch": 2.606311848087724, "grad_norm": 1.5592460632324219, "learning_rate": 8.897735075391156e-07, "loss": 0.3731, "step": 19490 }, { "epoch": 2.606445573682803, "grad_norm": 1.6861019134521484, "learning_rate": 8.891782644469693e-07, "loss": 0.4333, "step": 19491 }, { "epoch": 2.6065792992778816, "grad_norm": 1.4520800113677979, "learning_rate": 8.885832112611814e-07, "loss": 0.3467, "step": 19492 }, { "epoch": 2.6067130248729606, "grad_norm": 1.4956095218658447, "learning_rate": 8.879883479941576e-07, "loss": 0.3856, "step": 19493 }, { "epoch": 2.6068467504680397, "grad_norm": 1.382568120956421, "learning_rate": 8.873936746582978e-07, "loss": 0.3199, "step": 19494 }, { "epoch": 2.6069804760631183, "grad_norm": 1.7488336563110352, "learning_rate": 8.867991912659979e-07, "loss": 0.3925, "step": 19495 }, { "epoch": 2.6071142016581974, "grad_norm": 1.5171436071395874, "learning_rate": 8.862048978296467e-07, "loss": 0.3257, "step": 19496 }, { "epoch": 2.607247927253276, "grad_norm": 1.5666707754135132, "learning_rate": 8.856107943616343e-07, "loss": 0.3637, "step": 19497 }, { "epoch": 2.607381652848355, "grad_norm": 1.5899471044540405, "learning_rate": 8.850168808743442e-07, "loss": 0.378, "step": 19498 }, { "epoch": 2.607515378443434, "grad_norm": 1.711914300918579, "learning_rate": 8.844231573801543e-07, "loss": 0.3676, "step": 19499 }, { "epoch": 2.6076491040385132, "grad_norm": 1.3864308595657349, "learning_rate": 8.838296238914424e-07, "loss": 0.337, "step": 19500 }, { "epoch": 2.607782829633592, "grad_norm": 1.355744481086731, "learning_rate": 8.832362804205763e-07, "loss": 0.3537, "step": 19501 }, { "epoch": 2.6079165552286705, "grad_norm": 1.5719846487045288, "learning_rate": 8.826431269799274e-07, "loss": 0.3434, "step": 19502 }, { "epoch": 2.6080502808237496, "grad_norm": 1.5453073978424072, "learning_rate": 8.820501635818579e-07, "loss": 0.3558, "step": 19503 }, { "epoch": 2.6081840064188286, "grad_norm": 1.5635631084442139, "learning_rate": 8.81457390238728e-07, "loss": 0.3451, "step": 19504 }, { "epoch": 2.6083177320139077, "grad_norm": 1.740233063697815, "learning_rate": 8.808648069628945e-07, "loss": 0.3695, "step": 19505 }, { "epoch": 2.6084514576089863, "grad_norm": 1.6847102642059326, "learning_rate": 8.802724137667052e-07, "loss": 0.352, "step": 19506 }, { "epoch": 2.6085851832040654, "grad_norm": 1.497064471244812, "learning_rate": 8.796802106625147e-07, "loss": 0.3511, "step": 19507 }, { "epoch": 2.608718908799144, "grad_norm": 1.622875452041626, "learning_rate": 8.790881976626598e-07, "loss": 0.3862, "step": 19508 }, { "epoch": 2.608852634394223, "grad_norm": 1.5367298126220703, "learning_rate": 8.784963747794828e-07, "loss": 0.3196, "step": 19509 }, { "epoch": 2.608986359989302, "grad_norm": 1.4905195236206055, "learning_rate": 8.779047420253239e-07, "loss": 0.3517, "step": 19510 }, { "epoch": 2.609120085584381, "grad_norm": 1.3953361511230469, "learning_rate": 8.773132994125089e-07, "loss": 0.3297, "step": 19511 }, { "epoch": 2.60925381117946, "grad_norm": 1.5885361433029175, "learning_rate": 8.767220469533722e-07, "loss": 0.3256, "step": 19512 }, { "epoch": 2.6093875367745385, "grad_norm": 1.5141377449035645, "learning_rate": 8.761309846602317e-07, "loss": 0.3379, "step": 19513 }, { "epoch": 2.6095212623696176, "grad_norm": 1.4275989532470703, "learning_rate": 8.75540112545411e-07, "loss": 0.3299, "step": 19514 }, { "epoch": 2.6096549879646966, "grad_norm": 1.681443452835083, "learning_rate": 8.749494306212247e-07, "loss": 0.3724, "step": 19515 }, { "epoch": 2.6097887135597753, "grad_norm": 1.6418869495391846, "learning_rate": 8.743589388999862e-07, "loss": 0.3886, "step": 19516 }, { "epoch": 2.6099224391548543, "grad_norm": 1.7757841348648071, "learning_rate": 8.737686373940036e-07, "loss": 0.3876, "step": 19517 }, { "epoch": 2.610056164749933, "grad_norm": 1.6405843496322632, "learning_rate": 8.731785261155801e-07, "loss": 0.3772, "step": 19518 }, { "epoch": 2.610189890345012, "grad_norm": 1.518615484237671, "learning_rate": 8.725886050770182e-07, "loss": 0.362, "step": 19519 }, { "epoch": 2.610323615940091, "grad_norm": 1.3937513828277588, "learning_rate": 8.719988742906116e-07, "loss": 0.3187, "step": 19520 }, { "epoch": 2.6104573415351697, "grad_norm": 1.4609380960464478, "learning_rate": 8.714093337686547e-07, "loss": 0.3671, "step": 19521 }, { "epoch": 2.610591067130249, "grad_norm": 1.57331120967865, "learning_rate": 8.708199835234343e-07, "loss": 0.3703, "step": 19522 }, { "epoch": 2.6107247927253274, "grad_norm": 1.6052734851837158, "learning_rate": 8.702308235672363e-07, "loss": 0.3474, "step": 19523 }, { "epoch": 2.6108585183204065, "grad_norm": 1.6063714027404785, "learning_rate": 8.696418539123419e-07, "loss": 0.3408, "step": 19524 }, { "epoch": 2.6109922439154856, "grad_norm": 1.5783113241195679, "learning_rate": 8.690530745710236e-07, "loss": 0.3496, "step": 19525 }, { "epoch": 2.611125969510564, "grad_norm": 1.717894196510315, "learning_rate": 8.684644855555591e-07, "loss": 0.3904, "step": 19526 }, { "epoch": 2.6112596951056433, "grad_norm": 1.2968883514404297, "learning_rate": 8.67876086878211e-07, "loss": 0.3133, "step": 19527 }, { "epoch": 2.611393420700722, "grad_norm": 1.5781841278076172, "learning_rate": 8.672878785512495e-07, "loss": 0.3832, "step": 19528 }, { "epoch": 2.611527146295801, "grad_norm": 1.652010202407837, "learning_rate": 8.666998605869348e-07, "loss": 0.4019, "step": 19529 }, { "epoch": 2.61166087189088, "grad_norm": 1.4182761907577515, "learning_rate": 8.661120329975192e-07, "loss": 0.3497, "step": 19530 }, { "epoch": 2.6117945974859587, "grad_norm": 1.486035704612732, "learning_rate": 8.655243957952608e-07, "loss": 0.3709, "step": 19531 }, { "epoch": 2.6119283230810377, "grad_norm": 1.6327126026153564, "learning_rate": 8.649369489924031e-07, "loss": 0.3568, "step": 19532 }, { "epoch": 2.6120620486761164, "grad_norm": 1.621757984161377, "learning_rate": 8.643496926011952e-07, "loss": 0.3336, "step": 19533 }, { "epoch": 2.6121957742711954, "grad_norm": 1.5309854745864868, "learning_rate": 8.63762626633875e-07, "loss": 0.3832, "step": 19534 }, { "epoch": 2.6123294998662745, "grad_norm": 1.6868252754211426, "learning_rate": 8.631757511026784e-07, "loss": 0.3724, "step": 19535 }, { "epoch": 2.6124632254613536, "grad_norm": 1.661942720413208, "learning_rate": 8.625890660198443e-07, "loss": 0.3704, "step": 19536 }, { "epoch": 2.612596951056432, "grad_norm": 1.4981926679611206, "learning_rate": 8.620025713975954e-07, "loss": 0.3703, "step": 19537 }, { "epoch": 2.6127306766515113, "grad_norm": 1.9869073629379272, "learning_rate": 8.614162672481585e-07, "loss": 0.3819, "step": 19538 }, { "epoch": 2.61286440224659, "grad_norm": 1.5965659618377686, "learning_rate": 8.60830153583756e-07, "loss": 0.3604, "step": 19539 }, { "epoch": 2.612998127841669, "grad_norm": 1.652419090270996, "learning_rate": 8.602442304166025e-07, "loss": 0.3408, "step": 19540 }, { "epoch": 2.613131853436748, "grad_norm": 1.572708010673523, "learning_rate": 8.596584977589128e-07, "loss": 0.3507, "step": 19541 }, { "epoch": 2.6132655790318267, "grad_norm": 1.4481728076934814, "learning_rate": 8.590729556228961e-07, "loss": 0.3642, "step": 19542 }, { "epoch": 2.6133993046269057, "grad_norm": 1.5846220254898071, "learning_rate": 8.584876040207557e-07, "loss": 0.3597, "step": 19543 }, { "epoch": 2.6135330302219844, "grad_norm": 1.5735191106796265, "learning_rate": 8.579024429646932e-07, "loss": 0.3948, "step": 19544 }, { "epoch": 2.6136667558170634, "grad_norm": 1.673148274421692, "learning_rate": 8.573174724669087e-07, "loss": 0.366, "step": 19545 }, { "epoch": 2.6138004814121425, "grad_norm": 1.5137078762054443, "learning_rate": 8.567326925395903e-07, "loss": 0.3632, "step": 19546 }, { "epoch": 2.613934207007221, "grad_norm": 1.6499638557434082, "learning_rate": 8.561481031949304e-07, "loss": 0.4049, "step": 19547 }, { "epoch": 2.6140679326023, "grad_norm": 1.5932596921920776, "learning_rate": 8.555637044451138e-07, "loss": 0.3651, "step": 19548 }, { "epoch": 2.614201658197379, "grad_norm": 1.3847600221633911, "learning_rate": 8.549794963023216e-07, "loss": 0.3166, "step": 19549 }, { "epoch": 2.614335383792458, "grad_norm": 1.7683069705963135, "learning_rate": 8.543954787787323e-07, "loss": 0.357, "step": 19550 }, { "epoch": 2.614469109387537, "grad_norm": 1.8103740215301514, "learning_rate": 8.538116518865147e-07, "loss": 0.4156, "step": 19551 }, { "epoch": 2.6146028349826156, "grad_norm": 1.7996622323989868, "learning_rate": 8.532280156378447e-07, "loss": 0.3778, "step": 19552 }, { "epoch": 2.6147365605776947, "grad_norm": 1.5456198453903198, "learning_rate": 8.526445700448827e-07, "loss": 0.3197, "step": 19553 }, { "epoch": 2.6148702861727733, "grad_norm": 1.5507893562316895, "learning_rate": 8.520613151197899e-07, "loss": 0.3472, "step": 19554 }, { "epoch": 2.6150040117678524, "grad_norm": 1.474548101425171, "learning_rate": 8.514782508747288e-07, "loss": 0.3421, "step": 19555 }, { "epoch": 2.6151377373629314, "grad_norm": 1.3237581253051758, "learning_rate": 8.508953773218454e-07, "loss": 0.3064, "step": 19556 }, { "epoch": 2.61527146295801, "grad_norm": 1.5358107089996338, "learning_rate": 8.503126944732964e-07, "loss": 0.3606, "step": 19557 }, { "epoch": 2.615405188553089, "grad_norm": 1.5017247200012207, "learning_rate": 8.497302023412235e-07, "loss": 0.3778, "step": 19558 }, { "epoch": 2.6155389141481677, "grad_norm": 1.580004334449768, "learning_rate": 8.491479009377679e-07, "loss": 0.3362, "step": 19559 }, { "epoch": 2.615672639743247, "grad_norm": 1.6635812520980835, "learning_rate": 8.485657902750677e-07, "loss": 0.3984, "step": 19560 }, { "epoch": 2.615806365338326, "grad_norm": 1.4983831644058228, "learning_rate": 8.479838703652565e-07, "loss": 0.3263, "step": 19561 }, { "epoch": 2.6159400909334045, "grad_norm": 1.586610198020935, "learning_rate": 8.474021412204647e-07, "loss": 0.3654, "step": 19562 }, { "epoch": 2.6160738165284836, "grad_norm": 1.5147449970245361, "learning_rate": 8.468206028528158e-07, "loss": 0.3453, "step": 19563 }, { "epoch": 2.616207542123562, "grad_norm": 1.6614540815353394, "learning_rate": 8.462392552744347e-07, "loss": 0.3669, "step": 19564 }, { "epoch": 2.6163412677186413, "grad_norm": 1.4653871059417725, "learning_rate": 8.45658098497436e-07, "loss": 0.3738, "step": 19565 }, { "epoch": 2.6164749933137204, "grad_norm": 1.6000083684921265, "learning_rate": 8.450771325339346e-07, "loss": 0.3735, "step": 19566 }, { "epoch": 2.616608718908799, "grad_norm": 1.5855908393859863, "learning_rate": 8.444963573960396e-07, "loss": 0.3564, "step": 19567 }, { "epoch": 2.616742444503878, "grad_norm": 1.770967721939087, "learning_rate": 8.43915773095858e-07, "loss": 0.4145, "step": 19568 }, { "epoch": 2.6168761700989567, "grad_norm": 1.5113626718521118, "learning_rate": 8.433353796454924e-07, "loss": 0.3939, "step": 19569 }, { "epoch": 2.6170098956940357, "grad_norm": 1.5077701807022095, "learning_rate": 8.427551770570352e-07, "loss": 0.3619, "step": 19570 }, { "epoch": 2.617143621289115, "grad_norm": 1.491633415222168, "learning_rate": 8.421751653425869e-07, "loss": 0.421, "step": 19571 }, { "epoch": 2.617277346884194, "grad_norm": 1.4598846435546875, "learning_rate": 8.415953445142311e-07, "loss": 0.3352, "step": 19572 }, { "epoch": 2.6174110724792725, "grad_norm": 1.5487326383590698, "learning_rate": 8.41015714584058e-07, "loss": 0.3692, "step": 19573 }, { "epoch": 2.6175447980743516, "grad_norm": 1.6238900423049927, "learning_rate": 8.404362755641504e-07, "loss": 0.4021, "step": 19574 }, { "epoch": 2.61767852366943, "grad_norm": 1.4055436849594116, "learning_rate": 8.398570274665796e-07, "loss": 0.3434, "step": 19575 }, { "epoch": 2.6178122492645093, "grad_norm": 1.5884904861450195, "learning_rate": 8.392779703034281e-07, "loss": 0.3649, "step": 19576 }, { "epoch": 2.6179459748595884, "grad_norm": 1.6798869371414185, "learning_rate": 8.386991040867598e-07, "loss": 0.3644, "step": 19577 }, { "epoch": 2.618079700454667, "grad_norm": 1.634273648262024, "learning_rate": 8.381204288286415e-07, "loss": 0.385, "step": 19578 }, { "epoch": 2.618213426049746, "grad_norm": 1.5996190309524536, "learning_rate": 8.37541944541137e-07, "loss": 0.3734, "step": 19579 }, { "epoch": 2.6183471516448247, "grad_norm": 1.6183395385742188, "learning_rate": 8.369636512363e-07, "loss": 0.365, "step": 19580 }, { "epoch": 2.6184808772399037, "grad_norm": 1.6020156145095825, "learning_rate": 8.363855489261918e-07, "loss": 0.4239, "step": 19581 }, { "epoch": 2.618614602834983, "grad_norm": 1.568663477897644, "learning_rate": 8.358076376228563e-07, "loss": 0.3665, "step": 19582 }, { "epoch": 2.6187483284300614, "grad_norm": 1.5924172401428223, "learning_rate": 8.352299173383416e-07, "loss": 0.3857, "step": 19583 }, { "epoch": 2.6188820540251405, "grad_norm": 1.6926302909851074, "learning_rate": 8.346523880846902e-07, "loss": 0.4003, "step": 19584 }, { "epoch": 2.619015779620219, "grad_norm": 1.5686759948730469, "learning_rate": 8.340750498739381e-07, "loss": 0.3437, "step": 19585 }, { "epoch": 2.619149505215298, "grad_norm": 1.5210316181182861, "learning_rate": 8.334979027181222e-07, "loss": 0.3568, "step": 19586 }, { "epoch": 2.6192832308103773, "grad_norm": 1.5274758338928223, "learning_rate": 8.329209466292698e-07, "loss": 0.3745, "step": 19587 }, { "epoch": 2.619416956405456, "grad_norm": 1.5009666681289673, "learning_rate": 8.323441816194089e-07, "loss": 0.3453, "step": 19588 }, { "epoch": 2.619550682000535, "grad_norm": 1.5530569553375244, "learning_rate": 8.31767607700561e-07, "loss": 0.362, "step": 19589 }, { "epoch": 2.6196844075956136, "grad_norm": 1.4850444793701172, "learning_rate": 8.311912248847465e-07, "loss": 0.3681, "step": 19590 }, { "epoch": 2.6198181331906927, "grad_norm": 1.4769301414489746, "learning_rate": 8.306150331839735e-07, "loss": 0.3535, "step": 19591 }, { "epoch": 2.6199518587857717, "grad_norm": 1.614455223083496, "learning_rate": 8.30039032610257e-07, "loss": 0.3686, "step": 19592 }, { "epoch": 2.6200855843808504, "grad_norm": 1.6423662900924683, "learning_rate": 8.29463223175605e-07, "loss": 0.3828, "step": 19593 }, { "epoch": 2.6202193099759294, "grad_norm": 1.5852645635604858, "learning_rate": 8.288876048920125e-07, "loss": 0.3443, "step": 19594 }, { "epoch": 2.620353035571008, "grad_norm": 1.4702638387680054, "learning_rate": 8.283121777714864e-07, "loss": 0.3491, "step": 19595 }, { "epoch": 2.620486761166087, "grad_norm": 1.6178233623504639, "learning_rate": 8.277369418260129e-07, "loss": 0.3931, "step": 19596 }, { "epoch": 2.620620486761166, "grad_norm": 1.4203206300735474, "learning_rate": 8.271618970675887e-07, "loss": 0.3689, "step": 19597 }, { "epoch": 2.620754212356245, "grad_norm": 1.5350168943405151, "learning_rate": 8.265870435081957e-07, "loss": 0.3499, "step": 19598 }, { "epoch": 2.620887937951324, "grad_norm": 1.679657220840454, "learning_rate": 8.260123811598164e-07, "loss": 0.378, "step": 19599 }, { "epoch": 2.6210216635464025, "grad_norm": 1.5770777463912964, "learning_rate": 8.254379100344345e-07, "loss": 0.3375, "step": 19600 }, { "epoch": 2.6211553891414816, "grad_norm": 1.8325660228729248, "learning_rate": 8.248636301440171e-07, "loss": 0.3932, "step": 19601 }, { "epoch": 2.6212891147365607, "grad_norm": 1.6508115530014038, "learning_rate": 8.242895415005391e-07, "loss": 0.3876, "step": 19602 }, { "epoch": 2.6214228403316397, "grad_norm": 1.4678726196289062, "learning_rate": 8.237156441159644e-07, "loss": 0.3341, "step": 19603 }, { "epoch": 2.6215565659267184, "grad_norm": 1.3634788990020752, "learning_rate": 8.231419380022576e-07, "loss": 0.3198, "step": 19604 }, { "epoch": 2.621690291521797, "grad_norm": 1.4159530401229858, "learning_rate": 8.225684231713749e-07, "loss": 0.3584, "step": 19605 }, { "epoch": 2.621824017116876, "grad_norm": 1.751919150352478, "learning_rate": 8.21995099635271e-07, "loss": 0.4523, "step": 19606 }, { "epoch": 2.621957742711955, "grad_norm": 1.6467260122299194, "learning_rate": 8.214219674058976e-07, "loss": 0.3655, "step": 19607 }, { "epoch": 2.622091468307034, "grad_norm": 1.383855938911438, "learning_rate": 8.208490264952007e-07, "loss": 0.3208, "step": 19608 }, { "epoch": 2.622225193902113, "grad_norm": 1.5537952184677124, "learning_rate": 8.202762769151229e-07, "loss": 0.3865, "step": 19609 }, { "epoch": 2.622358919497192, "grad_norm": 1.5524613857269287, "learning_rate": 8.197037186776002e-07, "loss": 0.3455, "step": 19610 }, { "epoch": 2.6224926450922705, "grad_norm": 1.5280252695083618, "learning_rate": 8.191313517945698e-07, "loss": 0.3662, "step": 19611 }, { "epoch": 2.6226263706873496, "grad_norm": 1.4856411218643188, "learning_rate": 8.18559176277961e-07, "loss": 0.3882, "step": 19612 }, { "epoch": 2.6227600962824287, "grad_norm": 1.5660953521728516, "learning_rate": 8.179871921396998e-07, "loss": 0.3456, "step": 19613 }, { "epoch": 2.6228938218775073, "grad_norm": 1.3239666223526, "learning_rate": 8.174153993917122e-07, "loss": 0.3196, "step": 19614 }, { "epoch": 2.6230275474725864, "grad_norm": 1.5252865552902222, "learning_rate": 8.168437980459098e-07, "loss": 0.348, "step": 19615 }, { "epoch": 2.623161273067665, "grad_norm": 1.4968665838241577, "learning_rate": 8.162723881142154e-07, "loss": 0.3881, "step": 19616 }, { "epoch": 2.623294998662744, "grad_norm": 1.709981918334961, "learning_rate": 8.157011696085326e-07, "loss": 0.3784, "step": 19617 }, { "epoch": 2.623428724257823, "grad_norm": 1.4801714420318604, "learning_rate": 8.151301425407699e-07, "loss": 0.345, "step": 19618 }, { "epoch": 2.6235624498529018, "grad_norm": 1.7283940315246582, "learning_rate": 8.145593069228331e-07, "loss": 0.3696, "step": 19619 }, { "epoch": 2.623696175447981, "grad_norm": 1.4477654695510864, "learning_rate": 8.139886627666139e-07, "loss": 0.3496, "step": 19620 }, { "epoch": 2.6238299010430595, "grad_norm": 1.8064619302749634, "learning_rate": 8.134182100840149e-07, "loss": 0.3907, "step": 19621 }, { "epoch": 2.6239636266381385, "grad_norm": 1.5552284717559814, "learning_rate": 8.128479488869212e-07, "loss": 0.3422, "step": 19622 }, { "epoch": 2.6240973522332176, "grad_norm": 1.5775485038757324, "learning_rate": 8.12277879187221e-07, "loss": 0.385, "step": 19623 }, { "epoch": 2.6242310778282962, "grad_norm": 1.684749960899353, "learning_rate": 8.117080009967971e-07, "loss": 0.377, "step": 19624 }, { "epoch": 2.6243648034233753, "grad_norm": 1.4373093843460083, "learning_rate": 8.111383143275264e-07, "loss": 0.3258, "step": 19625 }, { "epoch": 2.624498529018454, "grad_norm": 1.5786436796188354, "learning_rate": 8.105688191912852e-07, "loss": 0.3692, "step": 19626 }, { "epoch": 2.624632254613533, "grad_norm": 1.6357507705688477, "learning_rate": 8.09999515599944e-07, "loss": 0.361, "step": 19627 }, { "epoch": 2.624765980208612, "grad_norm": 1.6164336204528809, "learning_rate": 8.094304035653689e-07, "loss": 0.3817, "step": 19628 }, { "epoch": 2.6248997058036907, "grad_norm": 1.55000901222229, "learning_rate": 8.088614830994223e-07, "loss": 0.3476, "step": 19629 }, { "epoch": 2.6250334313987698, "grad_norm": 1.518129825592041, "learning_rate": 8.08292754213964e-07, "loss": 0.3452, "step": 19630 }, { "epoch": 2.6251671569938484, "grad_norm": 1.5852760076522827, "learning_rate": 8.077242169208477e-07, "loss": 0.3613, "step": 19631 }, { "epoch": 2.6253008825889275, "grad_norm": 1.5196270942687988, "learning_rate": 8.071558712319227e-07, "loss": 0.3632, "step": 19632 }, { "epoch": 2.6254346081840065, "grad_norm": 1.5668519735336304, "learning_rate": 8.065877171590375e-07, "loss": 0.3713, "step": 19633 }, { "epoch": 2.625568333779085, "grad_norm": 1.7547022104263306, "learning_rate": 8.060197547140347e-07, "loss": 0.3425, "step": 19634 }, { "epoch": 2.6257020593741642, "grad_norm": 1.327179193496704, "learning_rate": 8.054519839087537e-07, "loss": 0.3395, "step": 19635 }, { "epoch": 2.625835784969243, "grad_norm": 1.2485555410385132, "learning_rate": 8.048844047550252e-07, "loss": 0.3283, "step": 19636 }, { "epoch": 2.625969510564322, "grad_norm": 1.3951236009597778, "learning_rate": 8.043170172646841e-07, "loss": 0.3324, "step": 19637 }, { "epoch": 2.626103236159401, "grad_norm": 1.4200693368911743, "learning_rate": 8.037498214495565e-07, "loss": 0.3259, "step": 19638 }, { "epoch": 2.62623696175448, "grad_norm": 1.4747624397277832, "learning_rate": 8.031828173214607e-07, "loss": 0.3342, "step": 19639 }, { "epoch": 2.6263706873495587, "grad_norm": 1.6580431461334229, "learning_rate": 8.026160048922216e-07, "loss": 0.3528, "step": 19640 }, { "epoch": 2.6265044129446378, "grad_norm": 1.4473644495010376, "learning_rate": 8.020493841736487e-07, "loss": 0.4034, "step": 19641 }, { "epoch": 2.6266381385397164, "grad_norm": 1.6652432680130005, "learning_rate": 8.014829551775583e-07, "loss": 0.3952, "step": 19642 }, { "epoch": 2.6267718641347955, "grad_norm": 1.5977108478546143, "learning_rate": 8.009167179157506e-07, "loss": 0.3845, "step": 19643 }, { "epoch": 2.6269055897298745, "grad_norm": 1.5969293117523193, "learning_rate": 8.003506724000321e-07, "loss": 0.3477, "step": 19644 }, { "epoch": 2.627039315324953, "grad_norm": 1.6361817121505737, "learning_rate": 7.997848186422008e-07, "loss": 0.3437, "step": 19645 }, { "epoch": 2.6271730409200322, "grad_norm": 1.6540716886520386, "learning_rate": 7.992191566540519e-07, "loss": 0.3847, "step": 19646 }, { "epoch": 2.627306766515111, "grad_norm": 1.5350583791732788, "learning_rate": 7.986536864473748e-07, "loss": 0.3501, "step": 19647 }, { "epoch": 2.62744049211019, "grad_norm": 1.46451735496521, "learning_rate": 7.980884080339568e-07, "loss": 0.3222, "step": 19648 }, { "epoch": 2.627574217705269, "grad_norm": 1.5971907377243042, "learning_rate": 7.975233214255807e-07, "loss": 0.3387, "step": 19649 }, { "epoch": 2.6277079433003476, "grad_norm": 1.453679084777832, "learning_rate": 7.969584266340258e-07, "loss": 0.338, "step": 19650 }, { "epoch": 2.6278416688954267, "grad_norm": 1.5526421070098877, "learning_rate": 7.96393723671065e-07, "loss": 0.3513, "step": 19651 }, { "epoch": 2.6279753944905053, "grad_norm": 1.5498524904251099, "learning_rate": 7.958292125484713e-07, "loss": 0.3243, "step": 19652 }, { "epoch": 2.6281091200855844, "grad_norm": 1.7014621496200562, "learning_rate": 7.952648932780094e-07, "loss": 0.3741, "step": 19653 }, { "epoch": 2.6282428456806635, "grad_norm": 1.4539575576782227, "learning_rate": 7.947007658714446e-07, "loss": 0.3629, "step": 19654 }, { "epoch": 2.628376571275742, "grad_norm": 1.6647298336029053, "learning_rate": 7.941368303405306e-07, "loss": 0.3962, "step": 19655 }, { "epoch": 2.628510296870821, "grad_norm": 1.5608336925506592, "learning_rate": 7.93573086697027e-07, "loss": 0.3207, "step": 19656 }, { "epoch": 2.6286440224659, "grad_norm": 1.448331594467163, "learning_rate": 7.930095349526834e-07, "loss": 0.3242, "step": 19657 }, { "epoch": 2.628777748060979, "grad_norm": 1.375649094581604, "learning_rate": 7.924461751192447e-07, "loss": 0.3333, "step": 19658 }, { "epoch": 2.628911473656058, "grad_norm": 1.62669837474823, "learning_rate": 7.918830072084571e-07, "loss": 0.349, "step": 19659 }, { "epoch": 2.6290451992511366, "grad_norm": 1.4446439743041992, "learning_rate": 7.913200312320546e-07, "loss": 0.3605, "step": 19660 }, { "epoch": 2.6291789248462156, "grad_norm": 1.616838812828064, "learning_rate": 7.907572472017766e-07, "loss": 0.3732, "step": 19661 }, { "epoch": 2.6293126504412943, "grad_norm": 1.688781976699829, "learning_rate": 7.901946551293493e-07, "loss": 0.4061, "step": 19662 }, { "epoch": 2.6294463760363733, "grad_norm": 1.566849946975708, "learning_rate": 7.896322550265012e-07, "loss": 0.3674, "step": 19663 }, { "epoch": 2.6295801016314524, "grad_norm": 1.6776561737060547, "learning_rate": 7.890700469049573e-07, "loss": 0.3957, "step": 19664 }, { "epoch": 2.629713827226531, "grad_norm": 1.6221222877502441, "learning_rate": 7.885080307764326e-07, "loss": 0.3538, "step": 19665 }, { "epoch": 2.62984755282161, "grad_norm": 1.554355263710022, "learning_rate": 7.879462066526456e-07, "loss": 0.3553, "step": 19666 }, { "epoch": 2.6299812784166887, "grad_norm": 1.381039023399353, "learning_rate": 7.873845745453046e-07, "loss": 0.3433, "step": 19667 }, { "epoch": 2.630115004011768, "grad_norm": 1.6432201862335205, "learning_rate": 7.868231344661148e-07, "loss": 0.3925, "step": 19668 }, { "epoch": 2.630248729606847, "grad_norm": 1.6069613695144653, "learning_rate": 7.862618864267823e-07, "loss": 0.4013, "step": 19669 }, { "epoch": 2.6303824552019255, "grad_norm": 1.6324853897094727, "learning_rate": 7.857008304390035e-07, "loss": 0.3545, "step": 19670 }, { "epoch": 2.6305161807970046, "grad_norm": 1.5837786197662354, "learning_rate": 7.851399665144743e-07, "loss": 0.3609, "step": 19671 }, { "epoch": 2.630649906392083, "grad_norm": 1.6376917362213135, "learning_rate": 7.845792946648845e-07, "loss": 0.3819, "step": 19672 }, { "epoch": 2.6307836319871623, "grad_norm": 1.3903653621673584, "learning_rate": 7.840188149019201e-07, "loss": 0.3347, "step": 19673 }, { "epoch": 2.6309173575822413, "grad_norm": 1.6362255811691284, "learning_rate": 7.834585272372663e-07, "loss": 0.3872, "step": 19674 }, { "epoch": 2.6310510831773204, "grad_norm": 1.4612270593643188, "learning_rate": 7.828984316825994e-07, "loss": 0.3407, "step": 19675 }, { "epoch": 2.631184808772399, "grad_norm": 1.2788245677947998, "learning_rate": 7.823385282495954e-07, "loss": 0.3323, "step": 19676 }, { "epoch": 2.631318534367478, "grad_norm": 1.7574976682662964, "learning_rate": 7.81778816949924e-07, "loss": 0.3803, "step": 19677 }, { "epoch": 2.6314522599625567, "grad_norm": 1.418548345565796, "learning_rate": 7.812192977952538e-07, "loss": 0.3523, "step": 19678 }, { "epoch": 2.631585985557636, "grad_norm": 1.5804587602615356, "learning_rate": 7.806599707972429e-07, "loss": 0.3622, "step": 19679 }, { "epoch": 2.631719711152715, "grad_norm": 1.5596683025360107, "learning_rate": 7.801008359675565e-07, "loss": 0.334, "step": 19680 }, { "epoch": 2.6318534367477935, "grad_norm": 1.6001561880111694, "learning_rate": 7.795418933178423e-07, "loss": 0.3583, "step": 19681 }, { "epoch": 2.6319871623428726, "grad_norm": 1.6871455907821655, "learning_rate": 7.78983142859755e-07, "loss": 0.3403, "step": 19682 }, { "epoch": 2.632120887937951, "grad_norm": 1.5695146322250366, "learning_rate": 7.784245846049432e-07, "loss": 0.3577, "step": 19683 }, { "epoch": 2.6322546135330303, "grad_norm": 1.4506876468658447, "learning_rate": 7.778662185650431e-07, "loss": 0.3721, "step": 19684 }, { "epoch": 2.6323883391281093, "grad_norm": 1.5756123065948486, "learning_rate": 7.773080447517012e-07, "loss": 0.3562, "step": 19685 }, { "epoch": 2.632522064723188, "grad_norm": 1.4218013286590576, "learning_rate": 7.767500631765456e-07, "loss": 0.3734, "step": 19686 }, { "epoch": 2.632655790318267, "grad_norm": 1.4662104845046997, "learning_rate": 7.761922738512096e-07, "loss": 0.3397, "step": 19687 }, { "epoch": 2.6327895159133456, "grad_norm": 1.516066074371338, "learning_rate": 7.756346767873191e-07, "loss": 0.3509, "step": 19688 }, { "epoch": 2.6329232415084247, "grad_norm": 1.6813167333602905, "learning_rate": 7.750772719964961e-07, "loss": 0.4086, "step": 19689 }, { "epoch": 2.633056967103504, "grad_norm": 1.6123945713043213, "learning_rate": 7.745200594903612e-07, "loss": 0.3973, "step": 19690 }, { "epoch": 2.6331906926985824, "grad_norm": 1.4779176712036133, "learning_rate": 7.739630392805276e-07, "loss": 0.3345, "step": 19691 }, { "epoch": 2.6333244182936615, "grad_norm": 1.4237959384918213, "learning_rate": 7.734062113786067e-07, "loss": 0.3532, "step": 19692 }, { "epoch": 2.63345814388874, "grad_norm": 1.5116260051727295, "learning_rate": 7.72849575796204e-07, "loss": 0.3684, "step": 19693 }, { "epoch": 2.633591869483819, "grad_norm": 1.4630063772201538, "learning_rate": 7.722931325449223e-07, "loss": 0.3188, "step": 19694 }, { "epoch": 2.6337255950788983, "grad_norm": 1.4825414419174194, "learning_rate": 7.717368816363602e-07, "loss": 0.3586, "step": 19695 }, { "epoch": 2.633859320673977, "grad_norm": 1.4199774265289307, "learning_rate": 7.711808230821116e-07, "loss": 0.3368, "step": 19696 }, { "epoch": 2.633993046269056, "grad_norm": 1.567478895187378, "learning_rate": 7.706249568937685e-07, "loss": 0.3069, "step": 19697 }, { "epoch": 2.6341267718641346, "grad_norm": 1.4778313636779785, "learning_rate": 7.70069283082917e-07, "loss": 0.336, "step": 19698 }, { "epoch": 2.6342604974592136, "grad_norm": 1.4743849039077759, "learning_rate": 7.695138016611403e-07, "loss": 0.3426, "step": 19699 }, { "epoch": 2.6343942230542927, "grad_norm": 1.4713363647460938, "learning_rate": 7.689585126400135e-07, "loss": 0.3493, "step": 19700 }, { "epoch": 2.6345279486493713, "grad_norm": 1.541777491569519, "learning_rate": 7.684034160311138e-07, "loss": 0.3686, "step": 19701 }, { "epoch": 2.6346616742444504, "grad_norm": 1.9172792434692383, "learning_rate": 7.678485118460133e-07, "loss": 0.4178, "step": 19702 }, { "epoch": 2.634795399839529, "grad_norm": 1.7327104806900024, "learning_rate": 7.672938000962726e-07, "loss": 0.3957, "step": 19703 }, { "epoch": 2.634929125434608, "grad_norm": 1.4440220594406128, "learning_rate": 7.667392807934615e-07, "loss": 0.3698, "step": 19704 }, { "epoch": 2.635062851029687, "grad_norm": 1.3925327062606812, "learning_rate": 7.661849539491318e-07, "loss": 0.3403, "step": 19705 }, { "epoch": 2.6351965766247663, "grad_norm": 1.4897451400756836, "learning_rate": 7.656308195748441e-07, "loss": 0.3613, "step": 19706 }, { "epoch": 2.635330302219845, "grad_norm": 1.4798705577850342, "learning_rate": 7.650768776821438e-07, "loss": 0.3488, "step": 19707 }, { "epoch": 2.6354640278149235, "grad_norm": 1.408481240272522, "learning_rate": 7.645231282825794e-07, "loss": 0.3402, "step": 19708 }, { "epoch": 2.6355977534100026, "grad_norm": 1.6023201942443848, "learning_rate": 7.639695713876938e-07, "loss": 0.3522, "step": 19709 }, { "epoch": 2.6357314790050816, "grad_norm": 1.6901289224624634, "learning_rate": 7.634162070090234e-07, "loss": 0.4081, "step": 19710 }, { "epoch": 2.6358652046001607, "grad_norm": 1.626448631286621, "learning_rate": 7.628630351581035e-07, "loss": 0.3746, "step": 19711 }, { "epoch": 2.6359989301952393, "grad_norm": 1.475656270980835, "learning_rate": 7.623100558464658e-07, "loss": 0.2992, "step": 19712 }, { "epoch": 2.6361326557903184, "grad_norm": 1.5321446657180786, "learning_rate": 7.617572690856346e-07, "loss": 0.3395, "step": 19713 }, { "epoch": 2.636266381385397, "grad_norm": 1.5492538213729858, "learning_rate": 7.612046748871327e-07, "loss": 0.3655, "step": 19714 }, { "epoch": 2.636400106980476, "grad_norm": 1.636447548866272, "learning_rate": 7.606522732624799e-07, "loss": 0.3592, "step": 19715 }, { "epoch": 2.636533832575555, "grad_norm": 1.651064395904541, "learning_rate": 7.601000642231882e-07, "loss": 0.3851, "step": 19716 }, { "epoch": 2.636667558170634, "grad_norm": 1.7746621370315552, "learning_rate": 7.595480477807704e-07, "loss": 0.4742, "step": 19717 }, { "epoch": 2.636801283765713, "grad_norm": 1.5417121648788452, "learning_rate": 7.589962239467297e-07, "loss": 0.3661, "step": 19718 }, { "epoch": 2.6369350093607915, "grad_norm": 1.538307785987854, "learning_rate": 7.584445927325713e-07, "loss": 0.3748, "step": 19719 }, { "epoch": 2.6370687349558706, "grad_norm": 1.5849133729934692, "learning_rate": 7.578931541497925e-07, "loss": 0.3269, "step": 19720 }, { "epoch": 2.6372024605509496, "grad_norm": 1.694332480430603, "learning_rate": 7.573419082098865e-07, "loss": 0.3722, "step": 19721 }, { "epoch": 2.6373361861460283, "grad_norm": 1.5112320184707642, "learning_rate": 7.567908549243441e-07, "loss": 0.3276, "step": 19722 }, { "epoch": 2.6374699117411073, "grad_norm": 1.5857157707214355, "learning_rate": 7.562399943046527e-07, "loss": 0.3487, "step": 19723 }, { "epoch": 2.637603637336186, "grad_norm": 1.5652291774749756, "learning_rate": 7.556893263622911e-07, "loss": 0.3694, "step": 19724 }, { "epoch": 2.637737362931265, "grad_norm": 1.5392587184906006, "learning_rate": 7.551388511087421e-07, "loss": 0.3627, "step": 19725 }, { "epoch": 2.637871088526344, "grad_norm": 1.5864126682281494, "learning_rate": 7.545885685554743e-07, "loss": 0.3277, "step": 19726 }, { "epoch": 2.6380048141214227, "grad_norm": 1.504490613937378, "learning_rate": 7.540384787139643e-07, "loss": 0.3709, "step": 19727 }, { "epoch": 2.638138539716502, "grad_norm": 1.6559419631958008, "learning_rate": 7.534885815956727e-07, "loss": 0.3459, "step": 19728 }, { "epoch": 2.6382722653115804, "grad_norm": 1.4424320459365845, "learning_rate": 7.529388772120628e-07, "loss": 0.3748, "step": 19729 }, { "epoch": 2.6384059909066595, "grad_norm": 1.615708589553833, "learning_rate": 7.523893655745962e-07, "loss": 0.3857, "step": 19730 }, { "epoch": 2.6385397165017386, "grad_norm": 1.4666892290115356, "learning_rate": 7.518400466947229e-07, "loss": 0.3522, "step": 19731 }, { "epoch": 2.638673442096817, "grad_norm": 1.4887847900390625, "learning_rate": 7.512909205838948e-07, "loss": 0.3486, "step": 19732 }, { "epoch": 2.6388071676918963, "grad_norm": 1.520777940750122, "learning_rate": 7.507419872535559e-07, "loss": 0.3593, "step": 19733 }, { "epoch": 2.638940893286975, "grad_norm": 1.5798959732055664, "learning_rate": 7.501932467151507e-07, "loss": 0.3435, "step": 19734 }, { "epoch": 2.639074618882054, "grad_norm": 1.6038391590118408, "learning_rate": 7.496446989801165e-07, "loss": 0.3582, "step": 19735 }, { "epoch": 2.639208344477133, "grad_norm": 1.4596564769744873, "learning_rate": 7.490963440598864e-07, "loss": 0.3604, "step": 19736 }, { "epoch": 2.6393420700722117, "grad_norm": 1.3082728385925293, "learning_rate": 7.485481819658913e-07, "loss": 0.295, "step": 19737 }, { "epoch": 2.6394757956672907, "grad_norm": 1.5038464069366455, "learning_rate": 7.480002127095564e-07, "loss": 0.3164, "step": 19738 }, { "epoch": 2.6396095212623694, "grad_norm": 1.668889045715332, "learning_rate": 7.474524363023039e-07, "loss": 0.4225, "step": 19739 }, { "epoch": 2.6397432468574484, "grad_norm": 1.4537122249603271, "learning_rate": 7.469048527555512e-07, "loss": 0.3619, "step": 19740 }, { "epoch": 2.6398769724525275, "grad_norm": 1.5721479654312134, "learning_rate": 7.463574620807135e-07, "loss": 0.3741, "step": 19741 }, { "epoch": 2.6400106980476066, "grad_norm": 1.5630850791931152, "learning_rate": 7.458102642891984e-07, "loss": 0.4063, "step": 19742 }, { "epoch": 2.640144423642685, "grad_norm": 1.835421085357666, "learning_rate": 7.452632593924147e-07, "loss": 0.3837, "step": 19743 }, { "epoch": 2.6402781492377643, "grad_norm": 1.7075573205947876, "learning_rate": 7.447164474017632e-07, "loss": 0.3717, "step": 19744 }, { "epoch": 2.640411874832843, "grad_norm": 1.4152356386184692, "learning_rate": 7.44169828328637e-07, "loss": 0.3422, "step": 19745 }, { "epoch": 2.640545600427922, "grad_norm": 1.3786213397979736, "learning_rate": 7.43623402184438e-07, "loss": 0.3236, "step": 19746 }, { "epoch": 2.640679326023001, "grad_norm": 1.5594052076339722, "learning_rate": 7.430771689805504e-07, "loss": 0.3747, "step": 19747 }, { "epoch": 2.6408130516180797, "grad_norm": 1.647066593170166, "learning_rate": 7.425311287283599e-07, "loss": 0.3999, "step": 19748 }, { "epoch": 2.6409467772131587, "grad_norm": 1.6193326711654663, "learning_rate": 7.419852814392526e-07, "loss": 0.3508, "step": 19749 }, { "epoch": 2.6410805028082374, "grad_norm": 1.669316291809082, "learning_rate": 7.414396271245994e-07, "loss": 0.3482, "step": 19750 }, { "epoch": 2.6412142284033164, "grad_norm": 1.415085792541504, "learning_rate": 7.408941657957813e-07, "loss": 0.3269, "step": 19751 }, { "epoch": 2.6413479539983955, "grad_norm": 1.685892105102539, "learning_rate": 7.403488974641626e-07, "loss": 0.3767, "step": 19752 }, { "epoch": 2.641481679593474, "grad_norm": 1.4404512643814087, "learning_rate": 7.398038221411096e-07, "loss": 0.3099, "step": 19753 }, { "epoch": 2.641615405188553, "grad_norm": 1.4804177284240723, "learning_rate": 7.392589398379868e-07, "loss": 0.3288, "step": 19754 }, { "epoch": 2.641749130783632, "grad_norm": 1.6431386470794678, "learning_rate": 7.387142505661482e-07, "loss": 0.3684, "step": 19755 }, { "epoch": 2.641882856378711, "grad_norm": 1.444705605506897, "learning_rate": 7.381697543369492e-07, "loss": 0.3671, "step": 19756 }, { "epoch": 2.64201658197379, "grad_norm": 1.6506578922271729, "learning_rate": 7.376254511617398e-07, "loss": 0.3837, "step": 19757 }, { "epoch": 2.6421503075688686, "grad_norm": 1.5627301931381226, "learning_rate": 7.370813410518652e-07, "loss": 0.3436, "step": 19758 }, { "epoch": 2.6422840331639477, "grad_norm": 1.5087485313415527, "learning_rate": 7.365374240186651e-07, "loss": 0.3398, "step": 19759 }, { "epoch": 2.6424177587590263, "grad_norm": 1.4212716817855835, "learning_rate": 7.359937000734785e-07, "loss": 0.3675, "step": 19760 }, { "epoch": 2.6425514843541054, "grad_norm": 1.5440388917922974, "learning_rate": 7.354501692276394e-07, "loss": 0.3482, "step": 19761 }, { "epoch": 2.6426852099491844, "grad_norm": 1.684924840927124, "learning_rate": 7.349068314924757e-07, "loss": 0.3979, "step": 19762 }, { "epoch": 2.642818935544263, "grad_norm": 1.4977796077728271, "learning_rate": 7.343636868793147e-07, "loss": 0.3613, "step": 19763 }, { "epoch": 2.642952661139342, "grad_norm": 1.6045700311660767, "learning_rate": 7.33820735399473e-07, "loss": 0.4185, "step": 19764 }, { "epoch": 2.6430863867344208, "grad_norm": 1.6093525886535645, "learning_rate": 7.332779770642751e-07, "loss": 0.3672, "step": 19765 }, { "epoch": 2.6432201123295, "grad_norm": 1.4677379131317139, "learning_rate": 7.327354118850272e-07, "loss": 0.3382, "step": 19766 }, { "epoch": 2.643353837924579, "grad_norm": 1.4054431915283203, "learning_rate": 7.321930398730436e-07, "loss": 0.3482, "step": 19767 }, { "epoch": 2.6434875635196575, "grad_norm": 1.6680492162704468, "learning_rate": 7.316508610396289e-07, "loss": 0.3382, "step": 19768 }, { "epoch": 2.6436212891147366, "grad_norm": 1.4538205862045288, "learning_rate": 7.311088753960804e-07, "loss": 0.2845, "step": 19769 }, { "epoch": 2.643755014709815, "grad_norm": 1.687366247177124, "learning_rate": 7.305670829537004e-07, "loss": 0.4137, "step": 19770 }, { "epoch": 2.6438887403048943, "grad_norm": 1.7847224473953247, "learning_rate": 7.300254837237797e-07, "loss": 0.418, "step": 19771 }, { "epoch": 2.6440224658999734, "grad_norm": 1.6247503757476807, "learning_rate": 7.29484077717606e-07, "loss": 0.351, "step": 19772 }, { "epoch": 2.644156191495052, "grad_norm": 1.4451861381530762, "learning_rate": 7.289428649464658e-07, "loss": 0.3298, "step": 19773 }, { "epoch": 2.644289917090131, "grad_norm": 1.5634313821792603, "learning_rate": 7.28401845421639e-07, "loss": 0.362, "step": 19774 }, { "epoch": 2.6444236426852097, "grad_norm": 1.6519863605499268, "learning_rate": 7.278610191544067e-07, "loss": 0.4012, "step": 19775 }, { "epoch": 2.6445573682802888, "grad_norm": 1.7648208141326904, "learning_rate": 7.273203861560374e-07, "loss": 0.4192, "step": 19776 }, { "epoch": 2.644691093875368, "grad_norm": 1.6875418424606323, "learning_rate": 7.267799464378023e-07, "loss": 0.3976, "step": 19777 }, { "epoch": 2.644824819470447, "grad_norm": 1.388227939605713, "learning_rate": 7.262397000109645e-07, "loss": 0.3423, "step": 19778 }, { "epoch": 2.6449585450655255, "grad_norm": 1.7273554801940918, "learning_rate": 7.256996468867871e-07, "loss": 0.3515, "step": 19779 }, { "epoch": 2.6450922706606046, "grad_norm": 1.3983500003814697, "learning_rate": 7.251597870765259e-07, "loss": 0.3665, "step": 19780 }, { "epoch": 2.645225996255683, "grad_norm": 1.5141669511795044, "learning_rate": 7.246201205914338e-07, "loss": 0.3771, "step": 19781 }, { "epoch": 2.6453597218507623, "grad_norm": 1.422760009765625, "learning_rate": 7.240806474427598e-07, "loss": 0.3508, "step": 19782 }, { "epoch": 2.6454934474458414, "grad_norm": 1.5840612649917603, "learning_rate": 7.23541367641748e-07, "loss": 0.382, "step": 19783 }, { "epoch": 2.64562717304092, "grad_norm": 1.472528338432312, "learning_rate": 7.230022811996407e-07, "loss": 0.3316, "step": 19784 }, { "epoch": 2.645760898635999, "grad_norm": 1.5449398756027222, "learning_rate": 7.224633881276732e-07, "loss": 0.3503, "step": 19785 }, { "epoch": 2.6458946242310777, "grad_norm": 1.8165409564971924, "learning_rate": 7.21924688437079e-07, "loss": 0.3918, "step": 19786 }, { "epoch": 2.6460283498261568, "grad_norm": 1.5480653047561646, "learning_rate": 7.213861821390877e-07, "loss": 0.3221, "step": 19787 }, { "epoch": 2.646162075421236, "grad_norm": 1.6885993480682373, "learning_rate": 7.208478692449194e-07, "loss": 0.4337, "step": 19788 }, { "epoch": 2.6462958010163145, "grad_norm": 1.7999461889266968, "learning_rate": 7.203097497658019e-07, "loss": 0.4306, "step": 19789 }, { "epoch": 2.6464295266113935, "grad_norm": 1.8441200256347656, "learning_rate": 7.197718237129447e-07, "loss": 0.4277, "step": 19790 }, { "epoch": 2.646563252206472, "grad_norm": 1.3388022184371948, "learning_rate": 7.192340910975659e-07, "loss": 0.3213, "step": 19791 }, { "epoch": 2.646696977801551, "grad_norm": 1.6072968244552612, "learning_rate": 7.186965519308709e-07, "loss": 0.3761, "step": 19792 }, { "epoch": 2.6468307033966303, "grad_norm": 1.4616632461547852, "learning_rate": 7.181592062240638e-07, "loss": 0.364, "step": 19793 }, { "epoch": 2.646964428991709, "grad_norm": 1.6701246500015259, "learning_rate": 7.176220539883494e-07, "loss": 0.3966, "step": 19794 }, { "epoch": 2.647098154586788, "grad_norm": 1.6362653970718384, "learning_rate": 7.170850952349185e-07, "loss": 0.3919, "step": 19795 }, { "epoch": 2.6472318801818666, "grad_norm": 1.5370182991027832, "learning_rate": 7.165483299749665e-07, "loss": 0.3365, "step": 19796 }, { "epoch": 2.6473656057769457, "grad_norm": 1.7026193141937256, "learning_rate": 7.160117582196813e-07, "loss": 0.4003, "step": 19797 }, { "epoch": 2.6474993313720248, "grad_norm": 1.6964266300201416, "learning_rate": 7.154753799802472e-07, "loss": 0.3372, "step": 19798 }, { "epoch": 2.6476330569671034, "grad_norm": 1.4802186489105225, "learning_rate": 7.149391952678453e-07, "loss": 0.3726, "step": 19799 }, { "epoch": 2.6477667825621825, "grad_norm": 1.526956558227539, "learning_rate": 7.144032040936499e-07, "loss": 0.4067, "step": 19800 }, { "epoch": 2.647900508157261, "grad_norm": 1.4172106981277466, "learning_rate": 7.138674064688344e-07, "loss": 0.3489, "step": 19801 }, { "epoch": 2.64803423375234, "grad_norm": 1.426018238067627, "learning_rate": 7.133318024045677e-07, "loss": 0.3319, "step": 19802 }, { "epoch": 2.648167959347419, "grad_norm": 1.5189889669418335, "learning_rate": 7.127963919120129e-07, "loss": 0.3477, "step": 19803 }, { "epoch": 2.648301684942498, "grad_norm": 1.5168719291687012, "learning_rate": 7.1226117500233e-07, "loss": 0.3813, "step": 19804 }, { "epoch": 2.648435410537577, "grad_norm": 1.7775944471359253, "learning_rate": 7.117261516866758e-07, "loss": 0.3622, "step": 19805 }, { "epoch": 2.6485691361326555, "grad_norm": 1.4655529260635376, "learning_rate": 7.111913219762023e-07, "loss": 0.303, "step": 19806 }, { "epoch": 2.6487028617277346, "grad_norm": 1.6205755472183228, "learning_rate": 7.106566858820563e-07, "loss": 0.4123, "step": 19807 }, { "epoch": 2.6488365873228137, "grad_norm": 1.4475929737091064, "learning_rate": 7.101222434153854e-07, "loss": 0.3511, "step": 19808 }, { "epoch": 2.6489703129178928, "grad_norm": 1.561442494392395, "learning_rate": 7.095879945873241e-07, "loss": 0.3791, "step": 19809 }, { "epoch": 2.6491040385129714, "grad_norm": 1.4973715543746948, "learning_rate": 7.090539394090135e-07, "loss": 0.3417, "step": 19810 }, { "epoch": 2.64923776410805, "grad_norm": 1.5112063884735107, "learning_rate": 7.085200778915791e-07, "loss": 0.3539, "step": 19811 }, { "epoch": 2.649371489703129, "grad_norm": 1.6839542388916016, "learning_rate": 7.079864100461553e-07, "loss": 0.3828, "step": 19812 }, { "epoch": 2.649505215298208, "grad_norm": 1.700052261352539, "learning_rate": 7.074529358838644e-07, "loss": 0.3595, "step": 19813 }, { "epoch": 2.649638940893287, "grad_norm": 1.4718619585037231, "learning_rate": 7.069196554158219e-07, "loss": 0.3823, "step": 19814 }, { "epoch": 2.649772666488366, "grad_norm": 1.569956660270691, "learning_rate": 7.063865686531512e-07, "loss": 0.385, "step": 19815 }, { "epoch": 2.649906392083445, "grad_norm": 1.436689019203186, "learning_rate": 7.058536756069567e-07, "loss": 0.3279, "step": 19816 }, { "epoch": 2.6500401176785235, "grad_norm": 1.4086973667144775, "learning_rate": 7.053209762883483e-07, "loss": 0.3277, "step": 19817 }, { "epoch": 2.6501738432736026, "grad_norm": 1.5871057510375977, "learning_rate": 7.047884707084307e-07, "loss": 0.3912, "step": 19818 }, { "epoch": 2.6503075688686817, "grad_norm": 1.7710801362991333, "learning_rate": 7.042561588783015e-07, "loss": 0.3633, "step": 19819 }, { "epoch": 2.6504412944637603, "grad_norm": 1.4365084171295166, "learning_rate": 7.037240408090607e-07, "loss": 0.3009, "step": 19820 }, { "epoch": 2.6505750200588394, "grad_norm": 1.3276411294937134, "learning_rate": 7.03192116511795e-07, "loss": 0.3447, "step": 19821 }, { "epoch": 2.650708745653918, "grad_norm": 1.4707804918289185, "learning_rate": 7.026603859975933e-07, "loss": 0.3512, "step": 19822 }, { "epoch": 2.650842471248997, "grad_norm": 1.4275401830673218, "learning_rate": 7.021288492775391e-07, "loss": 0.3625, "step": 19823 }, { "epoch": 2.650976196844076, "grad_norm": 1.740086317062378, "learning_rate": 7.015975063627123e-07, "loss": 0.4437, "step": 19824 }, { "epoch": 2.6511099224391548, "grad_norm": 1.4595612287521362, "learning_rate": 7.010663572641885e-07, "loss": 0.4039, "step": 19825 }, { "epoch": 2.651243648034234, "grad_norm": 1.4893689155578613, "learning_rate": 7.005354019930377e-07, "loss": 0.3109, "step": 19826 }, { "epoch": 2.6513773736293125, "grad_norm": 1.4931366443634033, "learning_rate": 7.000046405603278e-07, "loss": 0.3526, "step": 19827 }, { "epoch": 2.6515110992243915, "grad_norm": 1.5984081029891968, "learning_rate": 6.994740729771221e-07, "loss": 0.3428, "step": 19828 }, { "epoch": 2.6516448248194706, "grad_norm": 1.5655324459075928, "learning_rate": 6.989436992544807e-07, "loss": 0.3761, "step": 19829 }, { "epoch": 2.6517785504145492, "grad_norm": 1.565459132194519, "learning_rate": 6.984135194034558e-07, "loss": 0.3501, "step": 19830 }, { "epoch": 2.6519122760096283, "grad_norm": 1.5754010677337646, "learning_rate": 6.978835334351008e-07, "loss": 0.3531, "step": 19831 }, { "epoch": 2.652046001604707, "grad_norm": 1.5657188892364502, "learning_rate": 6.973537413604647e-07, "loss": 0.3506, "step": 19832 }, { "epoch": 2.652179727199786, "grad_norm": 1.4824175834655762, "learning_rate": 6.968241431905853e-07, "loss": 0.3733, "step": 19833 }, { "epoch": 2.652313452794865, "grad_norm": 1.8440114259719849, "learning_rate": 6.962947389365071e-07, "loss": 0.41, "step": 19834 }, { "epoch": 2.6524471783899437, "grad_norm": 1.4448524713516235, "learning_rate": 6.95765528609259e-07, "loss": 0.3046, "step": 19835 }, { "epoch": 2.6525809039850228, "grad_norm": 1.571654200553894, "learning_rate": 6.95236512219879e-07, "loss": 0.3704, "step": 19836 }, { "epoch": 2.6527146295801014, "grad_norm": 1.7086185216903687, "learning_rate": 6.947076897793881e-07, "loss": 0.3516, "step": 19837 }, { "epoch": 2.6528483551751805, "grad_norm": 1.5983456373214722, "learning_rate": 6.941790612988097e-07, "loss": 0.383, "step": 19838 }, { "epoch": 2.6529820807702595, "grad_norm": 1.6153956651687622, "learning_rate": 6.936506267891685e-07, "loss": 0.3736, "step": 19839 }, { "epoch": 2.653115806365338, "grad_norm": 1.6302534341812134, "learning_rate": 6.931223862614711e-07, "loss": 0.3968, "step": 19840 }, { "epoch": 2.6532495319604172, "grad_norm": 1.3857886791229248, "learning_rate": 6.925943397267331e-07, "loss": 0.3426, "step": 19841 }, { "epoch": 2.653383257555496, "grad_norm": 1.577000617980957, "learning_rate": 6.920664871959603e-07, "loss": 0.3447, "step": 19842 }, { "epoch": 2.653516983150575, "grad_norm": 1.5896180868148804, "learning_rate": 6.915388286801539e-07, "loss": 0.4224, "step": 19843 }, { "epoch": 2.653650708745654, "grad_norm": 1.7300999164581299, "learning_rate": 6.910113641903138e-07, "loss": 0.3862, "step": 19844 }, { "epoch": 2.653784434340733, "grad_norm": 1.4600780010223389, "learning_rate": 6.904840937374336e-07, "loss": 0.3456, "step": 19845 }, { "epoch": 2.6539181599358117, "grad_norm": 1.5271409749984741, "learning_rate": 6.899570173325043e-07, "loss": 0.3515, "step": 19846 }, { "epoch": 2.6540518855308908, "grad_norm": 1.6505041122436523, "learning_rate": 6.894301349865129e-07, "loss": 0.3638, "step": 19847 }, { "epoch": 2.6541856111259694, "grad_norm": 1.4916082620620728, "learning_rate": 6.889034467104427e-07, "loss": 0.3215, "step": 19848 }, { "epoch": 2.6543193367210485, "grad_norm": 1.4528629779815674, "learning_rate": 6.883769525152661e-07, "loss": 0.3463, "step": 19849 }, { "epoch": 2.6544530623161275, "grad_norm": 1.7115123271942139, "learning_rate": 6.878506524119644e-07, "loss": 0.3691, "step": 19850 }, { "epoch": 2.654586787911206, "grad_norm": 1.5015507936477661, "learning_rate": 6.873245464115053e-07, "loss": 0.3304, "step": 19851 }, { "epoch": 2.6547205135062852, "grad_norm": 1.5385017395019531, "learning_rate": 6.867986345248534e-07, "loss": 0.3747, "step": 19852 }, { "epoch": 2.654854239101364, "grad_norm": 1.4977712631225586, "learning_rate": 6.862729167629745e-07, "loss": 0.336, "step": 19853 }, { "epoch": 2.654987964696443, "grad_norm": 1.7158117294311523, "learning_rate": 6.857473931368219e-07, "loss": 0.4123, "step": 19854 }, { "epoch": 2.655121690291522, "grad_norm": 1.4028549194335938, "learning_rate": 6.852220636573537e-07, "loss": 0.3497, "step": 19855 }, { "epoch": 2.6552554158866006, "grad_norm": 1.6149935722351074, "learning_rate": 6.846969283355176e-07, "loss": 0.3474, "step": 19856 }, { "epoch": 2.6553891414816797, "grad_norm": 1.6282262802124023, "learning_rate": 6.841719871822594e-07, "loss": 0.3403, "step": 19857 }, { "epoch": 2.6555228670767583, "grad_norm": 1.5465480089187622, "learning_rate": 6.836472402085237e-07, "loss": 0.3611, "step": 19858 }, { "epoch": 2.6556565926718374, "grad_norm": 1.5175998210906982, "learning_rate": 6.831226874252439e-07, "loss": 0.3378, "step": 19859 }, { "epoch": 2.6557903182669165, "grad_norm": 1.5405676364898682, "learning_rate": 6.825983288433602e-07, "loss": 0.3777, "step": 19860 }, { "epoch": 2.655924043861995, "grad_norm": 1.5204635858535767, "learning_rate": 6.82074164473796e-07, "loss": 0.3615, "step": 19861 }, { "epoch": 2.656057769457074, "grad_norm": 1.5734401941299438, "learning_rate": 6.815501943274804e-07, "loss": 0.3664, "step": 19862 }, { "epoch": 2.656191495052153, "grad_norm": 1.5918062925338745, "learning_rate": 6.810264184153336e-07, "loss": 0.3654, "step": 19863 }, { "epoch": 2.656325220647232, "grad_norm": 1.5607930421829224, "learning_rate": 6.805028367482736e-07, "loss": 0.3688, "step": 19864 }, { "epoch": 2.656458946242311, "grad_norm": 1.6355377435684204, "learning_rate": 6.799794493372148e-07, "loss": 0.3274, "step": 19865 }, { "epoch": 2.6565926718373896, "grad_norm": 1.7407686710357666, "learning_rate": 6.794562561930662e-07, "loss": 0.4188, "step": 19866 }, { "epoch": 2.6567263974324686, "grad_norm": 1.5318787097930908, "learning_rate": 6.789332573267327e-07, "loss": 0.3689, "step": 19867 }, { "epoch": 2.6568601230275473, "grad_norm": 1.7059862613677979, "learning_rate": 6.784104527491154e-07, "loss": 0.3792, "step": 19868 }, { "epoch": 2.6569938486226263, "grad_norm": 1.5117486715316772, "learning_rate": 6.778878424711133e-07, "loss": 0.3567, "step": 19869 }, { "epoch": 2.6571275742177054, "grad_norm": 1.408552885055542, "learning_rate": 6.773654265036189e-07, "loss": 0.3616, "step": 19870 }, { "epoch": 2.657261299812784, "grad_norm": 1.5598475933074951, "learning_rate": 6.768432048575213e-07, "loss": 0.3937, "step": 19871 }, { "epoch": 2.657395025407863, "grad_norm": 1.5333913564682007, "learning_rate": 6.763211775437073e-07, "loss": 0.3419, "step": 19872 }, { "epoch": 2.6575287510029417, "grad_norm": 1.4545553922653198, "learning_rate": 6.757993445730537e-07, "loss": 0.3508, "step": 19873 }, { "epoch": 2.657662476598021, "grad_norm": 1.5719926357269287, "learning_rate": 6.752777059564431e-07, "loss": 0.3852, "step": 19874 }, { "epoch": 2.6577962021931, "grad_norm": 1.5418215990066528, "learning_rate": 6.747562617047432e-07, "loss": 0.3534, "step": 19875 }, { "epoch": 2.6579299277881785, "grad_norm": 1.527799367904663, "learning_rate": 6.742350118288277e-07, "loss": 0.3364, "step": 19876 }, { "epoch": 2.6580636533832576, "grad_norm": 1.4655767679214478, "learning_rate": 6.737139563395601e-07, "loss": 0.3454, "step": 19877 }, { "epoch": 2.658197378978336, "grad_norm": 1.5679669380187988, "learning_rate": 6.731930952477983e-07, "loss": 0.3486, "step": 19878 }, { "epoch": 2.6583311045734153, "grad_norm": 1.468704342842102, "learning_rate": 6.726724285644048e-07, "loss": 0.3306, "step": 19879 }, { "epoch": 2.6584648301684943, "grad_norm": 1.5013827085494995, "learning_rate": 6.721519563002276e-07, "loss": 0.3988, "step": 19880 }, { "epoch": 2.6585985557635734, "grad_norm": 1.7555853128433228, "learning_rate": 6.71631678466117e-07, "loss": 0.3604, "step": 19881 }, { "epoch": 2.658732281358652, "grad_norm": 1.6543340682983398, "learning_rate": 6.711115950729174e-07, "loss": 0.3602, "step": 19882 }, { "epoch": 2.658866006953731, "grad_norm": 1.540244221687317, "learning_rate": 6.705917061314693e-07, "loss": 0.3753, "step": 19883 }, { "epoch": 2.6589997325488097, "grad_norm": 1.5035181045532227, "learning_rate": 6.700720116526116e-07, "loss": 0.3217, "step": 19884 }, { "epoch": 2.659133458143889, "grad_norm": 1.644747257232666, "learning_rate": 6.695525116471746e-07, "loss": 0.3697, "step": 19885 }, { "epoch": 2.659267183738968, "grad_norm": 1.4243842363357544, "learning_rate": 6.690332061259863e-07, "loss": 0.382, "step": 19886 }, { "epoch": 2.6594009093340465, "grad_norm": 1.4170126914978027, "learning_rate": 6.685140950998725e-07, "loss": 0.3226, "step": 19887 }, { "epoch": 2.6595346349291256, "grad_norm": 1.4763046503067017, "learning_rate": 6.679951785796534e-07, "loss": 0.343, "step": 19888 }, { "epoch": 2.659668360524204, "grad_norm": 1.7905995845794678, "learning_rate": 6.674764565761449e-07, "loss": 0.4187, "step": 19889 }, { "epoch": 2.6598020861192833, "grad_norm": 1.4849501848220825, "learning_rate": 6.669579291001593e-07, "loss": 0.362, "step": 19890 }, { "epoch": 2.6599358117143623, "grad_norm": 1.6455808877944946, "learning_rate": 6.664395961625048e-07, "loss": 0.3826, "step": 19891 }, { "epoch": 2.660069537309441, "grad_norm": 1.6777042150497437, "learning_rate": 6.659214577739858e-07, "loss": 0.3513, "step": 19892 }, { "epoch": 2.66020326290452, "grad_norm": 1.6227328777313232, "learning_rate": 6.65403513945404e-07, "loss": 0.3483, "step": 19893 }, { "epoch": 2.6603369884995987, "grad_norm": 1.5736827850341797, "learning_rate": 6.648857646875506e-07, "loss": 0.3717, "step": 19894 }, { "epoch": 2.6604707140946777, "grad_norm": 1.4937360286712646, "learning_rate": 6.643682100112226e-07, "loss": 0.3865, "step": 19895 }, { "epoch": 2.660604439689757, "grad_norm": 1.622722864151001, "learning_rate": 6.638508499272045e-07, "loss": 0.3931, "step": 19896 }, { "epoch": 2.6607381652848354, "grad_norm": 1.6487196683883667, "learning_rate": 6.633336844462834e-07, "loss": 0.3986, "step": 19897 }, { "epoch": 2.6608718908799145, "grad_norm": 1.453078031539917, "learning_rate": 6.628167135792385e-07, "loss": 0.3551, "step": 19898 }, { "epoch": 2.661005616474993, "grad_norm": 1.508697509765625, "learning_rate": 6.62299937336841e-07, "loss": 0.3569, "step": 19899 }, { "epoch": 2.661139342070072, "grad_norm": 1.473039984703064, "learning_rate": 6.617833557298692e-07, "loss": 0.3209, "step": 19900 }, { "epoch": 2.6612730676651513, "grad_norm": 1.6444505453109741, "learning_rate": 6.612669687690865e-07, "loss": 0.4043, "step": 19901 }, { "epoch": 2.66140679326023, "grad_norm": 1.6796361207962036, "learning_rate": 6.607507764652554e-07, "loss": 0.3975, "step": 19902 }, { "epoch": 2.661540518855309, "grad_norm": 1.6511452198028564, "learning_rate": 6.602347788291419e-07, "loss": 0.3833, "step": 19903 }, { "epoch": 2.6616742444503876, "grad_norm": 1.7967760562896729, "learning_rate": 6.597189758714928e-07, "loss": 0.386, "step": 19904 }, { "epoch": 2.6618079700454667, "grad_norm": 1.7029051780700684, "learning_rate": 6.592033676030685e-07, "loss": 0.3753, "step": 19905 }, { "epoch": 2.6619416956405457, "grad_norm": 1.5597264766693115, "learning_rate": 6.586879540346092e-07, "loss": 0.3704, "step": 19906 }, { "epoch": 2.6620754212356244, "grad_norm": 1.6921013593673706, "learning_rate": 6.581727351768608e-07, "loss": 0.3864, "step": 19907 }, { "epoch": 2.6622091468307034, "grad_norm": 1.52169930934906, "learning_rate": 6.576577110405635e-07, "loss": 0.309, "step": 19908 }, { "epoch": 2.662342872425782, "grad_norm": 1.7437068223953247, "learning_rate": 6.571428816364512e-07, "loss": 0.3946, "step": 19909 }, { "epoch": 2.662476598020861, "grad_norm": 1.4568579196929932, "learning_rate": 6.56628246975255e-07, "loss": 0.365, "step": 19910 }, { "epoch": 2.66261032361594, "grad_norm": 1.597946047782898, "learning_rate": 6.56113807067702e-07, "loss": 0.3449, "step": 19911 }, { "epoch": 2.6627440492110193, "grad_norm": 1.6338046789169312, "learning_rate": 6.555995619245159e-07, "loss": 0.3218, "step": 19912 }, { "epoch": 2.662877774806098, "grad_norm": 1.5306764841079712, "learning_rate": 6.550855115564159e-07, "loss": 0.3316, "step": 19913 }, { "epoch": 2.6630115004011765, "grad_norm": 1.5425440073013306, "learning_rate": 6.545716559741166e-07, "loss": 0.3819, "step": 19914 }, { "epoch": 2.6631452259962556, "grad_norm": 1.6091891527175903, "learning_rate": 6.540579951883275e-07, "loss": 0.3806, "step": 19915 }, { "epoch": 2.6632789515913347, "grad_norm": 1.6633224487304688, "learning_rate": 6.535445292097564e-07, "loss": 0.3584, "step": 19916 }, { "epoch": 2.6634126771864137, "grad_norm": 1.6942492723464966, "learning_rate": 6.530312580491082e-07, "loss": 0.3992, "step": 19917 }, { "epoch": 2.6635464027814924, "grad_norm": 1.4368776082992554, "learning_rate": 6.525181817170756e-07, "loss": 0.3363, "step": 19918 }, { "epoch": 2.6636801283765714, "grad_norm": 1.6549551486968994, "learning_rate": 6.520053002243609e-07, "loss": 0.3403, "step": 19919 }, { "epoch": 2.66381385397165, "grad_norm": 1.5179585218429565, "learning_rate": 6.514926135816469e-07, "loss": 0.3731, "step": 19920 }, { "epoch": 2.663947579566729, "grad_norm": 1.564812183380127, "learning_rate": 6.509801217996259e-07, "loss": 0.3673, "step": 19921 }, { "epoch": 2.664081305161808, "grad_norm": 1.5664849281311035, "learning_rate": 6.504678248889785e-07, "loss": 0.3594, "step": 19922 }, { "epoch": 2.664215030756887, "grad_norm": 1.6470412015914917, "learning_rate": 6.499557228603803e-07, "loss": 0.3456, "step": 19923 }, { "epoch": 2.664348756351966, "grad_norm": 1.7087093591690063, "learning_rate": 6.49443815724512e-07, "loss": 0.3412, "step": 19924 }, { "epoch": 2.6644824819470445, "grad_norm": 1.7388477325439453, "learning_rate": 6.489321034920382e-07, "loss": 0.4009, "step": 19925 }, { "epoch": 2.6646162075421236, "grad_norm": 1.3731889724731445, "learning_rate": 6.484205861736259e-07, "loss": 0.3168, "step": 19926 }, { "epoch": 2.6647499331372027, "grad_norm": 1.438269019126892, "learning_rate": 6.479092637799378e-07, "loss": 0.3591, "step": 19927 }, { "epoch": 2.6648836587322813, "grad_norm": 1.6568113565444946, "learning_rate": 6.473981363216309e-07, "loss": 0.3649, "step": 19928 }, { "epoch": 2.6650173843273604, "grad_norm": 1.5853067636489868, "learning_rate": 6.468872038093643e-07, "loss": 0.3377, "step": 19929 }, { "epoch": 2.665151109922439, "grad_norm": 1.4314448833465576, "learning_rate": 6.463764662537809e-07, "loss": 0.3285, "step": 19930 }, { "epoch": 2.665284835517518, "grad_norm": 1.5260133743286133, "learning_rate": 6.458659236655307e-07, "loss": 0.3849, "step": 19931 }, { "epoch": 2.665418561112597, "grad_norm": 1.530300498008728, "learning_rate": 6.453555760552544e-07, "loss": 0.359, "step": 19932 }, { "epoch": 2.6655522867076757, "grad_norm": 1.7612539529800415, "learning_rate": 6.448454234335888e-07, "loss": 0.3792, "step": 19933 }, { "epoch": 2.665686012302755, "grad_norm": 1.7463163137435913, "learning_rate": 6.4433546581117e-07, "loss": 0.4172, "step": 19934 }, { "epoch": 2.6658197378978334, "grad_norm": 1.5070174932479858, "learning_rate": 6.43825703198625e-07, "loss": 0.362, "step": 19935 }, { "epoch": 2.6659534634929125, "grad_norm": 1.5224629640579224, "learning_rate": 6.433161356065798e-07, "loss": 0.3652, "step": 19936 }, { "epoch": 2.6660871890879916, "grad_norm": 1.6397041082382202, "learning_rate": 6.42806763045657e-07, "loss": 0.3879, "step": 19937 }, { "epoch": 2.66622091468307, "grad_norm": 1.8572028875350952, "learning_rate": 6.422975855264757e-07, "loss": 0.4149, "step": 19938 }, { "epoch": 2.6663546402781493, "grad_norm": 1.6111172437667847, "learning_rate": 6.417886030596421e-07, "loss": 0.4006, "step": 19939 }, { "epoch": 2.666488365873228, "grad_norm": 1.547215223312378, "learning_rate": 6.412798156557732e-07, "loss": 0.3646, "step": 19940 }, { "epoch": 2.666622091468307, "grad_norm": 1.6440682411193848, "learning_rate": 6.407712233254726e-07, "loss": 0.3553, "step": 19941 }, { "epoch": 2.666755817063386, "grad_norm": 1.6081055402755737, "learning_rate": 6.402628260793365e-07, "loss": 0.3839, "step": 19942 }, { "epoch": 2.6668895426584647, "grad_norm": 1.5576151609420776, "learning_rate": 6.397546239279684e-07, "loss": 0.3689, "step": 19943 }, { "epoch": 2.6670232682535437, "grad_norm": 1.4261161088943481, "learning_rate": 6.392466168819555e-07, "loss": 0.3482, "step": 19944 }, { "epoch": 2.6671569938486224, "grad_norm": 1.633931279182434, "learning_rate": 6.387388049518927e-07, "loss": 0.3958, "step": 19945 }, { "epoch": 2.6672907194437014, "grad_norm": 1.595692753791809, "learning_rate": 6.382311881483605e-07, "loss": 0.3319, "step": 19946 }, { "epoch": 2.6674244450387805, "grad_norm": 1.6369017362594604, "learning_rate": 6.377237664819392e-07, "loss": 0.3228, "step": 19947 }, { "epoch": 2.6675581706338596, "grad_norm": 1.4656267166137695, "learning_rate": 6.372165399632102e-07, "loss": 0.3108, "step": 19948 }, { "epoch": 2.667691896228938, "grad_norm": 1.4643809795379639, "learning_rate": 6.367095086027419e-07, "loss": 0.3471, "step": 19949 }, { "epoch": 2.6678256218240173, "grad_norm": 1.6563465595245361, "learning_rate": 6.362026724111036e-07, "loss": 0.3486, "step": 19950 }, { "epoch": 2.667959347419096, "grad_norm": 1.520506501197815, "learning_rate": 6.356960313988614e-07, "loss": 0.3426, "step": 19951 }, { "epoch": 2.668093073014175, "grad_norm": 1.5092777013778687, "learning_rate": 6.351895855765733e-07, "loss": 0.3381, "step": 19952 }, { "epoch": 2.668226798609254, "grad_norm": 1.5897568464279175, "learning_rate": 6.346833349547988e-07, "loss": 0.3508, "step": 19953 }, { "epoch": 2.6683605242043327, "grad_norm": 1.4701083898544312, "learning_rate": 6.34177279544087e-07, "loss": 0.3465, "step": 19954 }, { "epoch": 2.6684942497994117, "grad_norm": 1.7193269729614258, "learning_rate": 6.336714193549887e-07, "loss": 0.3326, "step": 19955 }, { "epoch": 2.6686279753944904, "grad_norm": 1.7678195238113403, "learning_rate": 6.331657543980474e-07, "loss": 0.3491, "step": 19956 }, { "epoch": 2.6687617009895694, "grad_norm": 1.482303261756897, "learning_rate": 6.326602846838037e-07, "loss": 0.3452, "step": 19957 }, { "epoch": 2.6688954265846485, "grad_norm": 1.6284412145614624, "learning_rate": 6.321550102227902e-07, "loss": 0.3859, "step": 19958 }, { "epoch": 2.669029152179727, "grad_norm": 1.587695598602295, "learning_rate": 6.316499310255419e-07, "loss": 0.3358, "step": 19959 }, { "epoch": 2.669162877774806, "grad_norm": 1.489880919456482, "learning_rate": 6.31145047102587e-07, "loss": 0.3407, "step": 19960 }, { "epoch": 2.669296603369885, "grad_norm": 1.5229634046554565, "learning_rate": 6.306403584644494e-07, "loss": 0.3365, "step": 19961 }, { "epoch": 2.669430328964964, "grad_norm": 1.5497459173202515, "learning_rate": 6.301358651216482e-07, "loss": 0.3858, "step": 19962 }, { "epoch": 2.669564054560043, "grad_norm": 1.530900478363037, "learning_rate": 6.296315670846964e-07, "loss": 0.348, "step": 19963 }, { "epoch": 2.6696977801551216, "grad_norm": 1.5213403701782227, "learning_rate": 6.29127464364111e-07, "loss": 0.3667, "step": 19964 }, { "epoch": 2.6698315057502007, "grad_norm": 1.5871517658233643, "learning_rate": 6.286235569703958e-07, "loss": 0.36, "step": 19965 }, { "epoch": 2.6699652313452793, "grad_norm": 1.6712712049484253, "learning_rate": 6.281198449140525e-07, "loss": 0.3969, "step": 19966 }, { "epoch": 2.6700989569403584, "grad_norm": 1.6878187656402588, "learning_rate": 6.276163282055869e-07, "loss": 0.3822, "step": 19967 }, { "epoch": 2.6702326825354374, "grad_norm": 1.459533929824829, "learning_rate": 6.271130068554876e-07, "loss": 0.3602, "step": 19968 }, { "epoch": 2.670366408130516, "grad_norm": 1.4354403018951416, "learning_rate": 6.266098808742515e-07, "loss": 0.3415, "step": 19969 }, { "epoch": 2.670500133725595, "grad_norm": 1.6031494140625, "learning_rate": 6.261069502723616e-07, "loss": 0.3647, "step": 19970 }, { "epoch": 2.6706338593206738, "grad_norm": 1.4494433403015137, "learning_rate": 6.256042150603025e-07, "loss": 0.3584, "step": 19971 }, { "epoch": 2.670767584915753, "grad_norm": 1.4605907201766968, "learning_rate": 6.251016752485539e-07, "loss": 0.4064, "step": 19972 }, { "epoch": 2.670901310510832, "grad_norm": 1.5953834056854248, "learning_rate": 6.245993308475884e-07, "loss": 0.3559, "step": 19973 }, { "epoch": 2.6710350361059105, "grad_norm": 1.5787290334701538, "learning_rate": 6.240971818678798e-07, "loss": 0.4145, "step": 19974 }, { "epoch": 2.6711687617009896, "grad_norm": 1.4142094850540161, "learning_rate": 6.235952283198932e-07, "loss": 0.3224, "step": 19975 }, { "epoch": 2.6713024872960682, "grad_norm": 1.7694220542907715, "learning_rate": 6.230934702140923e-07, "loss": 0.3953, "step": 19976 }, { "epoch": 2.6714362128911473, "grad_norm": 1.5131410360336304, "learning_rate": 6.225919075609354e-07, "loss": 0.3618, "step": 19977 }, { "epoch": 2.6715699384862264, "grad_norm": 1.572045922279358, "learning_rate": 6.220905403708766e-07, "loss": 0.3617, "step": 19978 }, { "epoch": 2.671703664081305, "grad_norm": 1.4961086511611938, "learning_rate": 6.215893686543672e-07, "loss": 0.3209, "step": 19979 }, { "epoch": 2.671837389676384, "grad_norm": 1.5664687156677246, "learning_rate": 6.210883924218525e-07, "loss": 0.3729, "step": 19980 }, { "epoch": 2.6719711152714627, "grad_norm": 1.5498435497283936, "learning_rate": 6.205876116837761e-07, "loss": 0.3514, "step": 19981 }, { "epoch": 2.6721048408665418, "grad_norm": 1.3279353380203247, "learning_rate": 6.200870264505754e-07, "loss": 0.2999, "step": 19982 }, { "epoch": 2.672238566461621, "grad_norm": 1.4898501634597778, "learning_rate": 6.195866367326875e-07, "loss": 0.3652, "step": 19983 }, { "epoch": 2.6723722920567, "grad_norm": 1.3464760780334473, "learning_rate": 6.190864425405363e-07, "loss": 0.3421, "step": 19984 }, { "epoch": 2.6725060176517785, "grad_norm": 1.5027506351470947, "learning_rate": 6.185864438845523e-07, "loss": 0.3068, "step": 19985 }, { "epoch": 2.6726397432468576, "grad_norm": 1.7076531648635864, "learning_rate": 6.180866407751595e-07, "loss": 0.3528, "step": 19986 }, { "epoch": 2.6727734688419362, "grad_norm": 1.5985829830169678, "learning_rate": 6.175870332227707e-07, "loss": 0.3692, "step": 19987 }, { "epoch": 2.6729071944370153, "grad_norm": 1.5114191770553589, "learning_rate": 6.17087621237804e-07, "loss": 0.3529, "step": 19988 }, { "epoch": 2.6730409200320944, "grad_norm": 1.759737253189087, "learning_rate": 6.165884048306647e-07, "loss": 0.4068, "step": 19989 }, { "epoch": 2.673174645627173, "grad_norm": 1.550301432609558, "learning_rate": 6.160893840117643e-07, "loss": 0.3425, "step": 19990 }, { "epoch": 2.673308371222252, "grad_norm": 1.623882532119751, "learning_rate": 6.155905587915001e-07, "loss": 0.3515, "step": 19991 }, { "epoch": 2.6734420968173307, "grad_norm": 1.80980384349823, "learning_rate": 6.150919291802704e-07, "loss": 0.4122, "step": 19992 }, { "epoch": 2.6735758224124098, "grad_norm": 1.5744447708129883, "learning_rate": 6.145934951884691e-07, "loss": 0.3548, "step": 19993 }, { "epoch": 2.673709548007489, "grad_norm": 1.4680581092834473, "learning_rate": 6.140952568264858e-07, "loss": 0.3546, "step": 19994 }, { "epoch": 2.6738432736025675, "grad_norm": 1.5514980554580688, "learning_rate": 6.135972141047042e-07, "loss": 0.3622, "step": 19995 }, { "epoch": 2.6739769991976465, "grad_norm": 1.59768807888031, "learning_rate": 6.130993670335083e-07, "loss": 0.3548, "step": 19996 }, { "epoch": 2.674110724792725, "grad_norm": 1.3371001482009888, "learning_rate": 6.126017156232734e-07, "loss": 0.3248, "step": 19997 }, { "epoch": 2.6742444503878042, "grad_norm": 1.5018333196640015, "learning_rate": 6.121042598843729e-07, "loss": 0.3353, "step": 19998 }, { "epoch": 2.6743781759828833, "grad_norm": 1.5479469299316406, "learning_rate": 6.116069998271756e-07, "loss": 0.4087, "step": 19999 }, { "epoch": 2.674511901577962, "grad_norm": 1.579924464225769, "learning_rate": 6.111099354620476e-07, "loss": 0.4007, "step": 20000 }, { "epoch": 2.674645627173041, "grad_norm": 1.5959900617599487, "learning_rate": 6.106130667993482e-07, "loss": 0.3682, "step": 20001 }, { "epoch": 2.6747793527681196, "grad_norm": 1.6480743885040283, "learning_rate": 6.101163938494359e-07, "loss": 0.393, "step": 20002 }, { "epoch": 2.6749130783631987, "grad_norm": 1.4970154762268066, "learning_rate": 6.096199166226602e-07, "loss": 0.3639, "step": 20003 }, { "epoch": 2.6750468039582778, "grad_norm": 1.5489505529403687, "learning_rate": 6.091236351293717e-07, "loss": 0.346, "step": 20004 }, { "epoch": 2.6751805295533564, "grad_norm": 1.4105826616287231, "learning_rate": 6.086275493799165e-07, "loss": 0.3386, "step": 20005 }, { "epoch": 2.6753142551484355, "grad_norm": 1.5864354372024536, "learning_rate": 6.081316593846331e-07, "loss": 0.369, "step": 20006 }, { "epoch": 2.675447980743514, "grad_norm": 1.3754163980484009, "learning_rate": 6.076359651538588e-07, "loss": 0.3387, "step": 20007 }, { "epoch": 2.675581706338593, "grad_norm": 1.5301357507705688, "learning_rate": 6.071404666979231e-07, "loss": 0.3255, "step": 20008 }, { "epoch": 2.6757154319336722, "grad_norm": 1.3993881940841675, "learning_rate": 6.066451640271587e-07, "loss": 0.3225, "step": 20009 }, { "epoch": 2.675849157528751, "grad_norm": 1.2642285823822021, "learning_rate": 6.061500571518864e-07, "loss": 0.3561, "step": 20010 }, { "epoch": 2.67598288312383, "grad_norm": 1.6877914667129517, "learning_rate": 6.056551460824279e-07, "loss": 0.3961, "step": 20011 }, { "epoch": 2.6761166087189086, "grad_norm": 1.6665089130401611, "learning_rate": 6.05160430829097e-07, "loss": 0.3742, "step": 20012 }, { "epoch": 2.6762503343139876, "grad_norm": 1.596577525138855, "learning_rate": 6.046659114022068e-07, "loss": 0.3561, "step": 20013 }, { "epoch": 2.6763840599090667, "grad_norm": 1.763180136680603, "learning_rate": 6.04171587812068e-07, "loss": 0.423, "step": 20014 }, { "epoch": 2.6765177855041458, "grad_norm": 1.7104454040527344, "learning_rate": 6.036774600689798e-07, "loss": 0.4437, "step": 20015 }, { "epoch": 2.6766515110992244, "grad_norm": 1.4546639919281006, "learning_rate": 6.031835281832433e-07, "loss": 0.3574, "step": 20016 }, { "epoch": 2.676785236694303, "grad_norm": 1.5124884843826294, "learning_rate": 6.026897921651553e-07, "loss": 0.3687, "step": 20017 }, { "epoch": 2.676918962289382, "grad_norm": 1.5104905366897583, "learning_rate": 6.021962520250058e-07, "loss": 0.3623, "step": 20018 }, { "epoch": 2.677052687884461, "grad_norm": 1.5597808361053467, "learning_rate": 6.017029077730829e-07, "loss": 0.3446, "step": 20019 }, { "epoch": 2.6771864134795402, "grad_norm": 1.641136884689331, "learning_rate": 6.012097594196698e-07, "loss": 0.3928, "step": 20020 }, { "epoch": 2.677320139074619, "grad_norm": 1.6957744359970093, "learning_rate": 6.007168069750446e-07, "loss": 0.3854, "step": 20021 }, { "epoch": 2.677453864669698, "grad_norm": 1.575194001197815, "learning_rate": 6.002240504494849e-07, "loss": 0.3478, "step": 20022 }, { "epoch": 2.6775875902647766, "grad_norm": 1.5909523963928223, "learning_rate": 5.997314898532591e-07, "loss": 0.3325, "step": 20023 }, { "epoch": 2.6777213158598556, "grad_norm": 1.6662592887878418, "learning_rate": 5.992391251966356e-07, "loss": 0.3891, "step": 20024 }, { "epoch": 2.6778550414549347, "grad_norm": 1.7494367361068726, "learning_rate": 5.987469564898773e-07, "loss": 0.4163, "step": 20025 }, { "epoch": 2.6779887670500133, "grad_norm": 1.6909092664718628, "learning_rate": 5.982549837432439e-07, "loss": 0.3757, "step": 20026 }, { "epoch": 2.6781224926450924, "grad_norm": 1.6549019813537598, "learning_rate": 5.977632069669859e-07, "loss": 0.3736, "step": 20027 }, { "epoch": 2.678256218240171, "grad_norm": 1.8106907606124878, "learning_rate": 5.972716261713607e-07, "loss": 0.3784, "step": 20028 }, { "epoch": 2.67838994383525, "grad_norm": 1.5486547946929932, "learning_rate": 5.967802413666068e-07, "loss": 0.3515, "step": 20029 }, { "epoch": 2.678523669430329, "grad_norm": 1.4902559518814087, "learning_rate": 5.962890525629727e-07, "loss": 0.3446, "step": 20030 }, { "epoch": 2.678657395025408, "grad_norm": 1.4929165840148926, "learning_rate": 5.957980597706969e-07, "loss": 0.3435, "step": 20031 }, { "epoch": 2.678791120620487, "grad_norm": 1.5461690425872803, "learning_rate": 5.953072630000079e-07, "loss": 0.3419, "step": 20032 }, { "epoch": 2.6789248462155655, "grad_norm": 1.472486972808838, "learning_rate": 5.94816662261144e-07, "loss": 0.366, "step": 20033 }, { "epoch": 2.6790585718106446, "grad_norm": 1.6837760210037231, "learning_rate": 5.943262575643239e-07, "loss": 0.4023, "step": 20034 }, { "epoch": 2.6791922974057236, "grad_norm": 1.4544481039047241, "learning_rate": 5.938360489197736e-07, "loss": 0.334, "step": 20035 }, { "epoch": 2.6793260230008022, "grad_norm": 1.7290195226669312, "learning_rate": 5.933460363377108e-07, "loss": 0.3861, "step": 20036 }, { "epoch": 2.6794597485958813, "grad_norm": 1.5007505416870117, "learning_rate": 5.928562198283472e-07, "loss": 0.3207, "step": 20037 }, { "epoch": 2.67959347419096, "grad_norm": 1.5000938177108765, "learning_rate": 5.923665994018946e-07, "loss": 0.3309, "step": 20038 }, { "epoch": 2.679727199786039, "grad_norm": 1.60750150680542, "learning_rate": 5.918771750685581e-07, "loss": 0.3595, "step": 20039 }, { "epoch": 2.679860925381118, "grad_norm": 1.5071264505386353, "learning_rate": 5.913879468385397e-07, "loss": 0.3289, "step": 20040 }, { "epoch": 2.6799946509761967, "grad_norm": 1.4741175174713135, "learning_rate": 5.908989147220367e-07, "loss": 0.3733, "step": 20041 }, { "epoch": 2.680128376571276, "grad_norm": 1.6908389329910278, "learning_rate": 5.904100787292411e-07, "loss": 0.3714, "step": 20042 }, { "epoch": 2.6802621021663544, "grad_norm": 1.5359634160995483, "learning_rate": 5.899214388703445e-07, "loss": 0.3512, "step": 20043 }, { "epoch": 2.6803958277614335, "grad_norm": 1.5638254880905151, "learning_rate": 5.894329951555311e-07, "loss": 0.3461, "step": 20044 }, { "epoch": 2.6805295533565126, "grad_norm": 1.3662681579589844, "learning_rate": 5.889447475949805e-07, "loss": 0.3281, "step": 20045 }, { "epoch": 2.680663278951591, "grad_norm": 1.529213309288025, "learning_rate": 5.884566961988724e-07, "loss": 0.4165, "step": 20046 }, { "epoch": 2.6807970045466702, "grad_norm": 1.4362126588821411, "learning_rate": 5.879688409773798e-07, "loss": 0.3347, "step": 20047 }, { "epoch": 2.680930730141749, "grad_norm": 1.7076367139816284, "learning_rate": 5.874811819406678e-07, "loss": 0.3602, "step": 20048 }, { "epoch": 2.681064455736828, "grad_norm": 1.5246272087097168, "learning_rate": 5.86993719098905e-07, "loss": 0.3556, "step": 20049 }, { "epoch": 2.681198181331907, "grad_norm": 1.3723030090332031, "learning_rate": 5.865064524622522e-07, "loss": 0.3669, "step": 20050 }, { "epoch": 2.681331906926986, "grad_norm": 1.3810194730758667, "learning_rate": 5.860193820408621e-07, "loss": 0.35, "step": 20051 }, { "epoch": 2.6814656325220647, "grad_norm": 1.3940906524658203, "learning_rate": 5.855325078448926e-07, "loss": 0.3366, "step": 20052 }, { "epoch": 2.681599358117144, "grad_norm": 1.6015270948410034, "learning_rate": 5.850458298844863e-07, "loss": 0.3628, "step": 20053 }, { "epoch": 2.6817330837122224, "grad_norm": 1.8359153270721436, "learning_rate": 5.845593481697931e-07, "loss": 0.4393, "step": 20054 }, { "epoch": 2.6818668093073015, "grad_norm": 1.4320597648620605, "learning_rate": 5.840730627109492e-07, "loss": 0.3209, "step": 20055 }, { "epoch": 2.6820005349023806, "grad_norm": 1.4067903757095337, "learning_rate": 5.835869735180932e-07, "loss": 0.3549, "step": 20056 }, { "epoch": 2.682134260497459, "grad_norm": 1.6775288581848145, "learning_rate": 5.831010806013548e-07, "loss": 0.3688, "step": 20057 }, { "epoch": 2.6822679860925382, "grad_norm": 1.645927906036377, "learning_rate": 5.826153839708637e-07, "loss": 0.3456, "step": 20058 }, { "epoch": 2.682401711687617, "grad_norm": 1.5379769802093506, "learning_rate": 5.82129883636745e-07, "loss": 0.3699, "step": 20059 }, { "epoch": 2.682535437282696, "grad_norm": 1.8257750272750854, "learning_rate": 5.816445796091153e-07, "loss": 0.3964, "step": 20060 }, { "epoch": 2.682669162877775, "grad_norm": 1.4231352806091309, "learning_rate": 5.811594718980928e-07, "loss": 0.3211, "step": 20061 }, { "epoch": 2.6828028884728536, "grad_norm": 1.518759846687317, "learning_rate": 5.806745605137876e-07, "loss": 0.402, "step": 20062 }, { "epoch": 2.6829366140679327, "grad_norm": 1.752624273300171, "learning_rate": 5.801898454663091e-07, "loss": 0.4326, "step": 20063 }, { "epoch": 2.6830703396630113, "grad_norm": 1.5707350969314575, "learning_rate": 5.797053267657582e-07, "loss": 0.378, "step": 20064 }, { "epoch": 2.6832040652580904, "grad_norm": 1.5453437566757202, "learning_rate": 5.792210044222357e-07, "loss": 0.3275, "step": 20065 }, { "epoch": 2.6833377908531695, "grad_norm": 1.6071445941925049, "learning_rate": 5.78736878445837e-07, "loss": 0.3195, "step": 20066 }, { "epoch": 2.683471516448248, "grad_norm": 1.494536280632019, "learning_rate": 5.782529488466527e-07, "loss": 0.366, "step": 20067 }, { "epoch": 2.683605242043327, "grad_norm": 1.6537988185882568, "learning_rate": 5.777692156347703e-07, "loss": 0.3643, "step": 20068 }, { "epoch": 2.683738967638406, "grad_norm": 1.7844514846801758, "learning_rate": 5.77285678820273e-07, "loss": 0.3826, "step": 20069 }, { "epoch": 2.683872693233485, "grad_norm": 1.4989330768585205, "learning_rate": 5.768023384132382e-07, "loss": 0.333, "step": 20070 }, { "epoch": 2.684006418828564, "grad_norm": 1.5010210275650024, "learning_rate": 5.763191944237434e-07, "loss": 0.3541, "step": 20071 }, { "epoch": 2.6841401444236426, "grad_norm": 1.4865617752075195, "learning_rate": 5.75836246861854e-07, "loss": 0.359, "step": 20072 }, { "epoch": 2.6842738700187216, "grad_norm": 1.8358020782470703, "learning_rate": 5.753534957376438e-07, "loss": 0.3801, "step": 20073 }, { "epoch": 2.6844075956138003, "grad_norm": 1.5007939338684082, "learning_rate": 5.748709410611686e-07, "loss": 0.3534, "step": 20074 }, { "epoch": 2.6845413212088793, "grad_norm": 1.4989217519760132, "learning_rate": 5.743885828424923e-07, "loss": 0.3402, "step": 20075 }, { "epoch": 2.6846750468039584, "grad_norm": 1.4408620595932007, "learning_rate": 5.739064210916656e-07, "loss": 0.3566, "step": 20076 }, { "epoch": 2.684808772399037, "grad_norm": 1.5105623006820679, "learning_rate": 5.734244558187385e-07, "loss": 0.3452, "step": 20077 }, { "epoch": 2.684942497994116, "grad_norm": 1.5920864343643188, "learning_rate": 5.729426870337606e-07, "loss": 0.3545, "step": 20078 }, { "epoch": 2.6850762235891947, "grad_norm": 1.597782850265503, "learning_rate": 5.724611147467707e-07, "loss": 0.3656, "step": 20079 }, { "epoch": 2.685209949184274, "grad_norm": 1.436955451965332, "learning_rate": 5.719797389678072e-07, "loss": 0.3883, "step": 20080 }, { "epoch": 2.685343674779353, "grad_norm": 1.5158867835998535, "learning_rate": 5.714985597069045e-07, "loss": 0.3753, "step": 20081 }, { "epoch": 2.6854774003744315, "grad_norm": 1.6186591386795044, "learning_rate": 5.710175769740933e-07, "loss": 0.3383, "step": 20082 }, { "epoch": 2.6856111259695106, "grad_norm": 1.250008225440979, "learning_rate": 5.705367907793969e-07, "loss": 0.2942, "step": 20083 }, { "epoch": 2.685744851564589, "grad_norm": 1.6313121318817139, "learning_rate": 5.700562011328381e-07, "loss": 0.4211, "step": 20084 }, { "epoch": 2.6858785771596683, "grad_norm": 1.3851209878921509, "learning_rate": 5.695758080444346e-07, "loss": 0.3696, "step": 20085 }, { "epoch": 2.6860123027547473, "grad_norm": 1.521771788597107, "learning_rate": 5.690956115241997e-07, "loss": 0.3477, "step": 20086 }, { "epoch": 2.6861460283498264, "grad_norm": 1.4570585489273071, "learning_rate": 5.686156115821428e-07, "loss": 0.3041, "step": 20087 }, { "epoch": 2.686279753944905, "grad_norm": 1.7978097200393677, "learning_rate": 5.681358082282673e-07, "loss": 0.3894, "step": 20088 }, { "epoch": 2.686413479539984, "grad_norm": 1.6892977952957153, "learning_rate": 5.676562014725773e-07, "loss": 0.3578, "step": 20089 }, { "epoch": 2.6865472051350627, "grad_norm": 1.4773523807525635, "learning_rate": 5.671767913250669e-07, "loss": 0.3545, "step": 20090 }, { "epoch": 2.686680930730142, "grad_norm": 1.650503158569336, "learning_rate": 5.666975777957295e-07, "loss": 0.3753, "step": 20091 }, { "epoch": 2.686814656325221, "grad_norm": 1.352003574371338, "learning_rate": 5.66218560894557e-07, "loss": 0.3516, "step": 20092 }, { "epoch": 2.6869483819202995, "grad_norm": 1.5948925018310547, "learning_rate": 5.65739740631528e-07, "loss": 0.3486, "step": 20093 }, { "epoch": 2.6870821075153786, "grad_norm": 1.5667890310287476, "learning_rate": 5.652611170166288e-07, "loss": 0.3946, "step": 20094 }, { "epoch": 2.687215833110457, "grad_norm": 1.5759068727493286, "learning_rate": 5.64782690059833e-07, "loss": 0.3775, "step": 20095 }, { "epoch": 2.6873495587055363, "grad_norm": 1.7227462530136108, "learning_rate": 5.643044597711122e-07, "loss": 0.4033, "step": 20096 }, { "epoch": 2.6874832843006153, "grad_norm": 1.6775298118591309, "learning_rate": 5.638264261604387e-07, "loss": 0.3935, "step": 20097 }, { "epoch": 2.687617009895694, "grad_norm": 1.7272974252700806, "learning_rate": 5.633485892377699e-07, "loss": 0.4348, "step": 20098 }, { "epoch": 2.687750735490773, "grad_norm": 1.4525794982910156, "learning_rate": 5.628709490130734e-07, "loss": 0.335, "step": 20099 }, { "epoch": 2.6878844610858517, "grad_norm": 1.553026556968689, "learning_rate": 5.623935054963014e-07, "loss": 0.3758, "step": 20100 }, { "epoch": 2.6880181866809307, "grad_norm": 1.5336637496948242, "learning_rate": 5.619162586974048e-07, "loss": 0.3342, "step": 20101 }, { "epoch": 2.68815191227601, "grad_norm": 1.448643445968628, "learning_rate": 5.61439208626332e-07, "loss": 0.341, "step": 20102 }, { "epoch": 2.6882856378710884, "grad_norm": 1.5856961011886597, "learning_rate": 5.609623552930288e-07, "loss": 0.3705, "step": 20103 }, { "epoch": 2.6884193634661675, "grad_norm": 1.7282506227493286, "learning_rate": 5.604856987074314e-07, "loss": 0.3989, "step": 20104 }, { "epoch": 2.688553089061246, "grad_norm": 1.606393814086914, "learning_rate": 5.600092388794776e-07, "loss": 0.3659, "step": 20105 }, { "epoch": 2.688686814656325, "grad_norm": 1.4404832124710083, "learning_rate": 5.595329758190993e-07, "loss": 0.3192, "step": 20106 }, { "epoch": 2.6888205402514043, "grad_norm": 1.6190552711486816, "learning_rate": 5.590569095362208e-07, "loss": 0.4062, "step": 20107 }, { "epoch": 2.688954265846483, "grad_norm": 1.3745753765106201, "learning_rate": 5.585810400407677e-07, "loss": 0.3285, "step": 20108 }, { "epoch": 2.689087991441562, "grad_norm": 1.6005133390426636, "learning_rate": 5.581053673426584e-07, "loss": 0.3524, "step": 20109 }, { "epoch": 2.6892217170366406, "grad_norm": 1.4634137153625488, "learning_rate": 5.576298914518086e-07, "loss": 0.3919, "step": 20110 }, { "epoch": 2.6893554426317197, "grad_norm": 1.3834683895111084, "learning_rate": 5.571546123781291e-07, "loss": 0.3818, "step": 20111 }, { "epoch": 2.6894891682267987, "grad_norm": 1.6085689067840576, "learning_rate": 5.56679530131522e-07, "loss": 0.3649, "step": 20112 }, { "epoch": 2.6896228938218774, "grad_norm": 1.6271476745605469, "learning_rate": 5.562046447218983e-07, "loss": 0.3545, "step": 20113 }, { "epoch": 2.6897566194169564, "grad_norm": 1.629969596862793, "learning_rate": 5.557299561591478e-07, "loss": 0.3949, "step": 20114 }, { "epoch": 2.689890345012035, "grad_norm": 1.6920576095581055, "learning_rate": 5.552554644531715e-07, "loss": 0.3697, "step": 20115 }, { "epoch": 2.690024070607114, "grad_norm": 1.5295406579971313, "learning_rate": 5.547811696138594e-07, "loss": 0.4035, "step": 20116 }, { "epoch": 2.690157796202193, "grad_norm": 1.5440192222595215, "learning_rate": 5.543070716510912e-07, "loss": 0.3521, "step": 20117 }, { "epoch": 2.6902915217972723, "grad_norm": 1.4434239864349365, "learning_rate": 5.53833170574758e-07, "loss": 0.3518, "step": 20118 }, { "epoch": 2.690425247392351, "grad_norm": 1.7046767473220825, "learning_rate": 5.533594663947306e-07, "loss": 0.3998, "step": 20119 }, { "epoch": 2.6905589729874295, "grad_norm": 1.4687715768814087, "learning_rate": 5.528859591208869e-07, "loss": 0.3246, "step": 20120 }, { "epoch": 2.6906926985825086, "grad_norm": 1.5858440399169922, "learning_rate": 5.524126487630943e-07, "loss": 0.3591, "step": 20121 }, { "epoch": 2.6908264241775877, "grad_norm": 1.5686880350112915, "learning_rate": 5.519395353312195e-07, "loss": 0.3413, "step": 20122 }, { "epoch": 2.6909601497726667, "grad_norm": 1.7948952913284302, "learning_rate": 5.514666188351258e-07, "loss": 0.4078, "step": 20123 }, { "epoch": 2.6910938753677454, "grad_norm": 1.594271183013916, "learning_rate": 5.509938992846686e-07, "loss": 0.3485, "step": 20124 }, { "epoch": 2.6912276009628244, "grad_norm": 1.3327934741973877, "learning_rate": 5.505213766897022e-07, "loss": 0.3246, "step": 20125 }, { "epoch": 2.691361326557903, "grad_norm": 1.5597537755966187, "learning_rate": 5.500490510600742e-07, "loss": 0.349, "step": 20126 }, { "epoch": 2.691495052152982, "grad_norm": 1.3275127410888672, "learning_rate": 5.495769224056325e-07, "loss": 0.3381, "step": 20127 }, { "epoch": 2.691628777748061, "grad_norm": 1.4869270324707031, "learning_rate": 5.491049907362156e-07, "loss": 0.3367, "step": 20128 }, { "epoch": 2.69176250334314, "grad_norm": 1.5921506881713867, "learning_rate": 5.486332560616625e-07, "loss": 0.3414, "step": 20129 }, { "epoch": 2.691896228938219, "grad_norm": 1.6436141729354858, "learning_rate": 5.481617183918053e-07, "loss": 0.3412, "step": 20130 }, { "epoch": 2.6920299545332975, "grad_norm": 1.5021904706954956, "learning_rate": 5.476903777364717e-07, "loss": 0.3503, "step": 20131 }, { "epoch": 2.6921636801283766, "grad_norm": 1.5741074085235596, "learning_rate": 5.472192341054882e-07, "loss": 0.3797, "step": 20132 }, { "epoch": 2.6922974057234557, "grad_norm": 1.653293251991272, "learning_rate": 5.467482875086738e-07, "loss": 0.3714, "step": 20133 }, { "epoch": 2.6924311313185343, "grad_norm": 1.5534422397613525, "learning_rate": 5.462775379558461e-07, "loss": 0.3296, "step": 20134 }, { "epoch": 2.6925648569136134, "grad_norm": 1.657217264175415, "learning_rate": 5.458069854568182e-07, "loss": 0.366, "step": 20135 }, { "epoch": 2.692698582508692, "grad_norm": 1.3705108165740967, "learning_rate": 5.453366300213936e-07, "loss": 0.3517, "step": 20136 }, { "epoch": 2.692832308103771, "grad_norm": 1.6446661949157715, "learning_rate": 5.448664716593833e-07, "loss": 0.402, "step": 20137 }, { "epoch": 2.69296603369885, "grad_norm": 1.7517341375350952, "learning_rate": 5.443965103805803e-07, "loss": 0.3768, "step": 20138 }, { "epoch": 2.6930997592939288, "grad_norm": 1.4992750883102417, "learning_rate": 5.439267461947884e-07, "loss": 0.3608, "step": 20139 }, { "epoch": 2.693233484889008, "grad_norm": 1.6932213306427002, "learning_rate": 5.434571791117915e-07, "loss": 0.3541, "step": 20140 }, { "epoch": 2.6933672104840864, "grad_norm": 1.8865656852722168, "learning_rate": 5.42987809141381e-07, "loss": 0.4217, "step": 20141 }, { "epoch": 2.6935009360791655, "grad_norm": 1.4891695976257324, "learning_rate": 5.425186362933422e-07, "loss": 0.311, "step": 20142 }, { "epoch": 2.6936346616742446, "grad_norm": 1.5253475904464722, "learning_rate": 5.420496605774495e-07, "loss": 0.3613, "step": 20143 }, { "epoch": 2.693768387269323, "grad_norm": 1.668724536895752, "learning_rate": 5.415808820034851e-07, "loss": 0.4124, "step": 20144 }, { "epoch": 2.6939021128644023, "grad_norm": 1.3502877950668335, "learning_rate": 5.411123005812147e-07, "loss": 0.3434, "step": 20145 }, { "epoch": 2.694035838459481, "grad_norm": 1.4562195539474487, "learning_rate": 5.40643916320407e-07, "loss": 0.3605, "step": 20146 }, { "epoch": 2.69416956405456, "grad_norm": 1.5881630182266235, "learning_rate": 5.401757292308251e-07, "loss": 0.3868, "step": 20147 }, { "epoch": 2.694303289649639, "grad_norm": 1.3890424966812134, "learning_rate": 5.397077393222283e-07, "loss": 0.2983, "step": 20148 }, { "epoch": 2.6944370152447177, "grad_norm": 1.5175317525863647, "learning_rate": 5.392399466043719e-07, "loss": 0.3382, "step": 20149 }, { "epoch": 2.6945707408397968, "grad_norm": 1.5718451738357544, "learning_rate": 5.387723510870047e-07, "loss": 0.3839, "step": 20150 }, { "epoch": 2.6947044664348754, "grad_norm": 1.5663902759552002, "learning_rate": 5.383049527798756e-07, "loss": 0.3923, "step": 20151 }, { "epoch": 2.6948381920299544, "grad_norm": 1.532414436340332, "learning_rate": 5.378377516927247e-07, "loss": 0.3716, "step": 20152 }, { "epoch": 2.6949719176250335, "grad_norm": 1.5722057819366455, "learning_rate": 5.373707478352918e-07, "loss": 0.3255, "step": 20153 }, { "epoch": 2.6951056432201126, "grad_norm": 1.7246384620666504, "learning_rate": 5.369039412173116e-07, "loss": 0.4155, "step": 20154 }, { "epoch": 2.695239368815191, "grad_norm": 1.7544385194778442, "learning_rate": 5.364373318485128e-07, "loss": 0.3718, "step": 20155 }, { "epoch": 2.6953730944102703, "grad_norm": 1.41415274143219, "learning_rate": 5.359709197386243e-07, "loss": 0.315, "step": 20156 }, { "epoch": 2.695506820005349, "grad_norm": 1.5216760635375977, "learning_rate": 5.355047048973627e-07, "loss": 0.3566, "step": 20157 }, { "epoch": 2.695640545600428, "grad_norm": 1.5549861192703247, "learning_rate": 5.350386873344515e-07, "loss": 0.3866, "step": 20158 }, { "epoch": 2.695774271195507, "grad_norm": 1.6170156002044678, "learning_rate": 5.345728670595995e-07, "loss": 0.3278, "step": 20159 }, { "epoch": 2.6959079967905857, "grad_norm": 1.7136040925979614, "learning_rate": 5.341072440825201e-07, "loss": 0.3676, "step": 20160 }, { "epoch": 2.6960417223856648, "grad_norm": 1.7905199527740479, "learning_rate": 5.336418184129177e-07, "loss": 0.4451, "step": 20161 }, { "epoch": 2.6961754479807434, "grad_norm": 1.6279823780059814, "learning_rate": 5.331765900604913e-07, "loss": 0.3806, "step": 20162 }, { "epoch": 2.6963091735758224, "grad_norm": 1.4701722860336304, "learning_rate": 5.32711559034943e-07, "loss": 0.3438, "step": 20163 }, { "epoch": 2.6964428991709015, "grad_norm": 1.5260616540908813, "learning_rate": 5.322467253459618e-07, "loss": 0.3385, "step": 20164 }, { "epoch": 2.69657662476598, "grad_norm": 1.6723707914352417, "learning_rate": 5.317820890032376e-07, "loss": 0.3601, "step": 20165 }, { "epoch": 2.696710350361059, "grad_norm": 1.4774198532104492, "learning_rate": 5.313176500164563e-07, "loss": 0.3528, "step": 20166 }, { "epoch": 2.696844075956138, "grad_norm": 1.4690394401550293, "learning_rate": 5.308534083952954e-07, "loss": 0.3606, "step": 20167 }, { "epoch": 2.696977801551217, "grad_norm": 1.5543104410171509, "learning_rate": 5.303893641494374e-07, "loss": 0.3758, "step": 20168 }, { "epoch": 2.697111527146296, "grad_norm": 1.6397544145584106, "learning_rate": 5.299255172885509e-07, "loss": 0.3801, "step": 20169 }, { "epoch": 2.6972452527413746, "grad_norm": 1.5951378345489502, "learning_rate": 5.294618678223051e-07, "loss": 0.3706, "step": 20170 }, { "epoch": 2.6973789783364537, "grad_norm": 1.6154307126998901, "learning_rate": 5.289984157603634e-07, "loss": 0.379, "step": 20171 }, { "epoch": 2.6975127039315323, "grad_norm": 1.4765098094940186, "learning_rate": 5.285351611123879e-07, "loss": 0.3315, "step": 20172 }, { "epoch": 2.6976464295266114, "grad_norm": 1.5295416116714478, "learning_rate": 5.280721038880333e-07, "loss": 0.3889, "step": 20173 }, { "epoch": 2.6977801551216904, "grad_norm": 1.4261928796768188, "learning_rate": 5.276092440969527e-07, "loss": 0.2884, "step": 20174 }, { "epoch": 2.697913880716769, "grad_norm": 1.6325639486312866, "learning_rate": 5.271465817487919e-07, "loss": 0.3601, "step": 20175 }, { "epoch": 2.698047606311848, "grad_norm": 1.368662714958191, "learning_rate": 5.266841168531977e-07, "loss": 0.3037, "step": 20176 }, { "epoch": 2.6981813319069268, "grad_norm": 1.6453522443771362, "learning_rate": 5.26221849419809e-07, "loss": 0.3863, "step": 20177 }, { "epoch": 2.698315057502006, "grad_norm": 1.6269104480743408, "learning_rate": 5.25759779458257e-07, "loss": 0.3369, "step": 20178 }, { "epoch": 2.698448783097085, "grad_norm": 1.5034455060958862, "learning_rate": 5.252979069781783e-07, "loss": 0.3603, "step": 20179 }, { "epoch": 2.6985825086921635, "grad_norm": 1.607408881187439, "learning_rate": 5.248362319891998e-07, "loss": 0.3204, "step": 20180 }, { "epoch": 2.6987162342872426, "grad_norm": 1.4805762767791748, "learning_rate": 5.243747545009404e-07, "loss": 0.3424, "step": 20181 }, { "epoch": 2.6988499598823212, "grad_norm": 1.596431851387024, "learning_rate": 5.239134745230246e-07, "loss": 0.3678, "step": 20182 }, { "epoch": 2.6989836854774003, "grad_norm": 1.6354860067367554, "learning_rate": 5.234523920650624e-07, "loss": 0.348, "step": 20183 }, { "epoch": 2.6991174110724794, "grad_norm": 1.4448127746582031, "learning_rate": 5.229915071366698e-07, "loss": 0.3688, "step": 20184 }, { "epoch": 2.699251136667558, "grad_norm": 1.6629222631454468, "learning_rate": 5.225308197474499e-07, "loss": 0.3767, "step": 20185 }, { "epoch": 2.699384862262637, "grad_norm": 1.75308358669281, "learning_rate": 5.22070329907004e-07, "loss": 0.4084, "step": 20186 }, { "epoch": 2.6995185878577157, "grad_norm": 1.4608988761901855, "learning_rate": 5.216100376249356e-07, "loss": 0.3362, "step": 20187 }, { "epoch": 2.6996523134527948, "grad_norm": 1.5837665796279907, "learning_rate": 5.211499429108346e-07, "loss": 0.3845, "step": 20188 }, { "epoch": 2.699786039047874, "grad_norm": 1.697843313217163, "learning_rate": 5.206900457742924e-07, "loss": 0.3663, "step": 20189 }, { "epoch": 2.699919764642953, "grad_norm": 1.404334545135498, "learning_rate": 5.20230346224897e-07, "loss": 0.3378, "step": 20190 }, { "epoch": 2.7000534902380315, "grad_norm": 1.8581522703170776, "learning_rate": 5.197708442722272e-07, "loss": 0.3622, "step": 20191 }, { "epoch": 2.7001872158331106, "grad_norm": 1.7924938201904297, "learning_rate": 5.19311539925863e-07, "loss": 0.4232, "step": 20192 }, { "epoch": 2.7003209414281892, "grad_norm": 1.7070937156677246, "learning_rate": 5.188524331953782e-07, "loss": 0.3211, "step": 20193 }, { "epoch": 2.7004546670232683, "grad_norm": 1.791308045387268, "learning_rate": 5.183935240903415e-07, "loss": 0.3952, "step": 20194 }, { "epoch": 2.7005883926183474, "grad_norm": 1.4736934900283813, "learning_rate": 5.179348126203188e-07, "loss": 0.3458, "step": 20195 }, { "epoch": 2.700722118213426, "grad_norm": 1.509047269821167, "learning_rate": 5.174762987948734e-07, "loss": 0.3568, "step": 20196 }, { "epoch": 2.700855843808505, "grad_norm": 1.481350064277649, "learning_rate": 5.170179826235577e-07, "loss": 0.3588, "step": 20197 }, { "epoch": 2.7009895694035837, "grad_norm": 1.3817325830459595, "learning_rate": 5.165598641159297e-07, "loss": 0.3265, "step": 20198 }, { "epoch": 2.7011232949986628, "grad_norm": 1.5333493947982788, "learning_rate": 5.161019432815362e-07, "loss": 0.3921, "step": 20199 }, { "epoch": 2.701257020593742, "grad_norm": 1.7811447381973267, "learning_rate": 5.156442201299228e-07, "loss": 0.4296, "step": 20200 }, { "epoch": 2.7013907461888205, "grad_norm": 1.6808396577835083, "learning_rate": 5.151866946706318e-07, "loss": 0.3774, "step": 20201 }, { "epoch": 2.7015244717838995, "grad_norm": 1.4510438442230225, "learning_rate": 5.147293669131947e-07, "loss": 0.382, "step": 20202 }, { "epoch": 2.701658197378978, "grad_norm": 1.6661646366119385, "learning_rate": 5.142722368671505e-07, "loss": 0.372, "step": 20203 }, { "epoch": 2.7017919229740572, "grad_norm": 1.6509891748428345, "learning_rate": 5.138153045420236e-07, "loss": 0.3902, "step": 20204 }, { "epoch": 2.7019256485691363, "grad_norm": 1.7195852994918823, "learning_rate": 5.133585699473376e-07, "loss": 0.3957, "step": 20205 }, { "epoch": 2.702059374164215, "grad_norm": 1.6587779521942139, "learning_rate": 5.129020330926182e-07, "loss": 0.3474, "step": 20206 }, { "epoch": 2.702193099759294, "grad_norm": 1.5667407512664795, "learning_rate": 5.124456939873734e-07, "loss": 0.3795, "step": 20207 }, { "epoch": 2.7023268253543726, "grad_norm": 1.4892768859863281, "learning_rate": 5.119895526411234e-07, "loss": 0.3456, "step": 20208 }, { "epoch": 2.7024605509494517, "grad_norm": 1.724997639656067, "learning_rate": 5.115336090633705e-07, "loss": 0.3857, "step": 20209 }, { "epoch": 2.7025942765445308, "grad_norm": 1.5854359865188599, "learning_rate": 5.110778632636204e-07, "loss": 0.3641, "step": 20210 }, { "epoch": 2.7027280021396094, "grad_norm": 1.601108431816101, "learning_rate": 5.106223152513712e-07, "loss": 0.3972, "step": 20211 }, { "epoch": 2.7028617277346885, "grad_norm": 1.5840487480163574, "learning_rate": 5.101669650361207e-07, "loss": 0.3373, "step": 20212 }, { "epoch": 2.702995453329767, "grad_norm": 1.5212128162384033, "learning_rate": 5.097118126273582e-07, "loss": 0.364, "step": 20213 }, { "epoch": 2.703129178924846, "grad_norm": 1.5600091218948364, "learning_rate": 5.092568580345724e-07, "loss": 0.348, "step": 20214 }, { "epoch": 2.7032629045199252, "grad_norm": 1.7619163990020752, "learning_rate": 5.08802101267245e-07, "loss": 0.4032, "step": 20215 }, { "epoch": 2.703396630115004, "grad_norm": 1.467216968536377, "learning_rate": 5.083475423348572e-07, "loss": 0.3862, "step": 20216 }, { "epoch": 2.703530355710083, "grad_norm": 1.4702365398406982, "learning_rate": 5.078931812468813e-07, "loss": 0.3463, "step": 20217 }, { "epoch": 2.7036640813051616, "grad_norm": 1.4741390943527222, "learning_rate": 5.074390180127886e-07, "loss": 0.3947, "step": 20218 }, { "epoch": 2.7037978069002406, "grad_norm": 1.5596321821212769, "learning_rate": 5.069850526420461e-07, "loss": 0.3465, "step": 20219 }, { "epoch": 2.7039315324953197, "grad_norm": 1.7318263053894043, "learning_rate": 5.065312851441184e-07, "loss": 0.3823, "step": 20220 }, { "epoch": 2.7040652580903988, "grad_norm": 1.4680488109588623, "learning_rate": 5.06077715528459e-07, "loss": 0.2943, "step": 20221 }, { "epoch": 2.7041989836854774, "grad_norm": 1.4233413934707642, "learning_rate": 5.056243438045283e-07, "loss": 0.3122, "step": 20222 }, { "epoch": 2.704332709280556, "grad_norm": 1.5125577449798584, "learning_rate": 5.051711699817696e-07, "loss": 0.3461, "step": 20223 }, { "epoch": 2.704466434875635, "grad_norm": 1.7236768007278442, "learning_rate": 5.047181940696333e-07, "loss": 0.3479, "step": 20224 }, { "epoch": 2.704600160470714, "grad_norm": 1.5374935865402222, "learning_rate": 5.042654160775617e-07, "loss": 0.3189, "step": 20225 }, { "epoch": 2.7047338860657932, "grad_norm": 1.5510648488998413, "learning_rate": 5.038128360149885e-07, "loss": 0.3824, "step": 20226 }, { "epoch": 2.704867611660872, "grad_norm": 1.5830087661743164, "learning_rate": 5.033604538913528e-07, "loss": 0.3594, "step": 20227 }, { "epoch": 2.705001337255951, "grad_norm": 1.5863440036773682, "learning_rate": 5.029082697160781e-07, "loss": 0.3709, "step": 20228 }, { "epoch": 2.7051350628510296, "grad_norm": 1.5671225786209106, "learning_rate": 5.024562834985958e-07, "loss": 0.371, "step": 20229 }, { "epoch": 2.7052687884461086, "grad_norm": 1.5910592079162598, "learning_rate": 5.020044952483228e-07, "loss": 0.4248, "step": 20230 }, { "epoch": 2.7054025140411877, "grad_norm": 1.351464033126831, "learning_rate": 5.015529049746759e-07, "loss": 0.3128, "step": 20231 }, { "epoch": 2.7055362396362663, "grad_norm": 1.5354423522949219, "learning_rate": 5.011015126870722e-07, "loss": 0.3567, "step": 20232 }, { "epoch": 2.7056699652313454, "grad_norm": 1.8688173294067383, "learning_rate": 5.006503183949174e-07, "loss": 0.4018, "step": 20233 }, { "epoch": 2.705803690826424, "grad_norm": 1.5489201545715332, "learning_rate": 5.001993221076162e-07, "loss": 0.3367, "step": 20234 }, { "epoch": 2.705937416421503, "grad_norm": 1.6392488479614258, "learning_rate": 4.9974852383457e-07, "loss": 0.3739, "step": 20235 }, { "epoch": 2.706071142016582, "grad_norm": 1.710466742515564, "learning_rate": 4.992979235851747e-07, "loss": 0.3591, "step": 20236 }, { "epoch": 2.706204867611661, "grad_norm": 1.5702614784240723, "learning_rate": 4.988475213688238e-07, "loss": 0.3821, "step": 20237 }, { "epoch": 2.70633859320674, "grad_norm": 1.4164937734603882, "learning_rate": 4.983973171949042e-07, "loss": 0.3488, "step": 20238 }, { "epoch": 2.7064723188018185, "grad_norm": 1.607800006866455, "learning_rate": 4.979473110728006e-07, "loss": 0.3817, "step": 20239 }, { "epoch": 2.7066060443968976, "grad_norm": 1.3887269496917725, "learning_rate": 4.974975030118923e-07, "loss": 0.3113, "step": 20240 }, { "epoch": 2.7067397699919766, "grad_norm": 1.4379385709762573, "learning_rate": 4.970478930215573e-07, "loss": 0.329, "step": 20241 }, { "epoch": 2.7068734955870553, "grad_norm": 1.461632490158081, "learning_rate": 4.965984811111635e-07, "loss": 0.3351, "step": 20242 }, { "epoch": 2.7070072211821343, "grad_norm": 1.4693002700805664, "learning_rate": 4.961492672900814e-07, "loss": 0.3332, "step": 20243 }, { "epoch": 2.707140946777213, "grad_norm": 1.6032915115356445, "learning_rate": 4.957002515676735e-07, "loss": 0.3516, "step": 20244 }, { "epoch": 2.707274672372292, "grad_norm": 1.5636367797851562, "learning_rate": 4.952514339532998e-07, "loss": 0.3989, "step": 20245 }, { "epoch": 2.707408397967371, "grad_norm": 1.5605103969573975, "learning_rate": 4.948028144563155e-07, "loss": 0.3982, "step": 20246 }, { "epoch": 2.7075421235624497, "grad_norm": 1.4551712274551392, "learning_rate": 4.943543930860683e-07, "loss": 0.3272, "step": 20247 }, { "epoch": 2.707675849157529, "grad_norm": 1.5693658590316772, "learning_rate": 4.93906169851911e-07, "loss": 0.3593, "step": 20248 }, { "epoch": 2.7078095747526074, "grad_norm": 1.2982853651046753, "learning_rate": 4.934581447631825e-07, "loss": 0.3023, "step": 20249 }, { "epoch": 2.7079433003476865, "grad_norm": 1.611607551574707, "learning_rate": 4.930103178292201e-07, "loss": 0.3694, "step": 20250 }, { "epoch": 2.7080770259427656, "grad_norm": 1.4041997194290161, "learning_rate": 4.925626890593638e-07, "loss": 0.3463, "step": 20251 }, { "epoch": 2.708210751537844, "grad_norm": 1.5647393465042114, "learning_rate": 4.921152584629363e-07, "loss": 0.3738, "step": 20252 }, { "epoch": 2.7083444771329233, "grad_norm": 1.7457700967788696, "learning_rate": 4.916680260492724e-07, "loss": 0.3729, "step": 20253 }, { "epoch": 2.708478202728002, "grad_norm": 1.5076274871826172, "learning_rate": 4.912209918276877e-07, "loss": 0.3598, "step": 20254 }, { "epoch": 2.708611928323081, "grad_norm": 1.644917607307434, "learning_rate": 4.907741558075041e-07, "loss": 0.3276, "step": 20255 }, { "epoch": 2.70874565391816, "grad_norm": 1.810935378074646, "learning_rate": 4.903275179980327e-07, "loss": 0.4238, "step": 20256 }, { "epoch": 2.708879379513239, "grad_norm": 1.7289130687713623, "learning_rate": 4.898810784085838e-07, "loss": 0.39, "step": 20257 }, { "epoch": 2.7090131051083177, "grad_norm": 1.716043472290039, "learning_rate": 4.894348370484648e-07, "loss": 0.3827, "step": 20258 }, { "epoch": 2.709146830703397, "grad_norm": 1.5843870639801025, "learning_rate": 4.889887939269755e-07, "loss": 0.3529, "step": 20259 }, { "epoch": 2.7092805562984754, "grad_norm": 1.5630922317504883, "learning_rate": 4.885429490534133e-07, "loss": 0.3635, "step": 20260 }, { "epoch": 2.7094142818935545, "grad_norm": 1.5369900465011597, "learning_rate": 4.880973024370728e-07, "loss": 0.3695, "step": 20261 }, { "epoch": 2.7095480074886336, "grad_norm": 1.838873267173767, "learning_rate": 4.876518540872411e-07, "loss": 0.398, "step": 20262 }, { "epoch": 2.709681733083712, "grad_norm": 1.4741324186325073, "learning_rate": 4.87206604013205e-07, "loss": 0.3607, "step": 20263 }, { "epoch": 2.7098154586787913, "grad_norm": 1.5917896032333374, "learning_rate": 4.867615522242442e-07, "loss": 0.3579, "step": 20264 }, { "epoch": 2.70994918427387, "grad_norm": 1.7424354553222656, "learning_rate": 4.863166987296375e-07, "loss": 0.4083, "step": 20265 }, { "epoch": 2.710082909868949, "grad_norm": 1.4296364784240723, "learning_rate": 4.858720435386522e-07, "loss": 0.3662, "step": 20266 }, { "epoch": 2.710216635464028, "grad_norm": 1.6664575338363647, "learning_rate": 4.854275866605629e-07, "loss": 0.3751, "step": 20267 }, { "epoch": 2.7103503610591066, "grad_norm": 1.6264694929122925, "learning_rate": 4.84983328104629e-07, "loss": 0.347, "step": 20268 }, { "epoch": 2.7104840866541857, "grad_norm": 1.523430347442627, "learning_rate": 4.845392678801131e-07, "loss": 0.3748, "step": 20269 }, { "epoch": 2.7106178122492643, "grad_norm": 1.560166597366333, "learning_rate": 4.840954059962733e-07, "loss": 0.3557, "step": 20270 }, { "epoch": 2.7107515378443434, "grad_norm": 1.5584559440612793, "learning_rate": 4.836517424623555e-07, "loss": 0.372, "step": 20271 }, { "epoch": 2.7108852634394225, "grad_norm": 1.6313939094543457, "learning_rate": 4.832082772876135e-07, "loss": 0.3602, "step": 20272 }, { "epoch": 2.711018989034501, "grad_norm": 1.4671710729599, "learning_rate": 4.827650104812876e-07, "loss": 0.3612, "step": 20273 }, { "epoch": 2.71115271462958, "grad_norm": 1.4950790405273438, "learning_rate": 4.823219420526182e-07, "loss": 0.3312, "step": 20274 }, { "epoch": 2.711286440224659, "grad_norm": 1.6663103103637695, "learning_rate": 4.818790720108402e-07, "loss": 0.3691, "step": 20275 }, { "epoch": 2.711420165819738, "grad_norm": 1.705607533454895, "learning_rate": 4.814364003651839e-07, "loss": 0.3435, "step": 20276 }, { "epoch": 2.711553891414817, "grad_norm": 1.5607072114944458, "learning_rate": 4.809939271248798e-07, "loss": 0.3632, "step": 20277 }, { "epoch": 2.7116876170098956, "grad_norm": 1.5186140537261963, "learning_rate": 4.805516522991483e-07, "loss": 0.3813, "step": 20278 }, { "epoch": 2.7118213426049746, "grad_norm": 1.6969985961914062, "learning_rate": 4.801095758972074e-07, "loss": 0.4351, "step": 20279 }, { "epoch": 2.7119550682000533, "grad_norm": 1.581917405128479, "learning_rate": 4.796676979282733e-07, "loss": 0.3703, "step": 20280 }, { "epoch": 2.7120887937951323, "grad_norm": 1.7262730598449707, "learning_rate": 4.792260184015552e-07, "loss": 0.3262, "step": 20281 }, { "epoch": 2.7122225193902114, "grad_norm": 1.673905849456787, "learning_rate": 4.787845373262612e-07, "loss": 0.376, "step": 20282 }, { "epoch": 2.71235624498529, "grad_norm": 1.4823137521743774, "learning_rate": 4.783432547115929e-07, "loss": 0.377, "step": 20283 }, { "epoch": 2.712489970580369, "grad_norm": 1.3652760982513428, "learning_rate": 4.779021705667475e-07, "loss": 0.3355, "step": 20284 }, { "epoch": 2.7126236961754477, "grad_norm": 1.6847953796386719, "learning_rate": 4.774612849009208e-07, "loss": 0.3843, "step": 20285 }, { "epoch": 2.712757421770527, "grad_norm": 1.5922396183013916, "learning_rate": 4.770205977233022e-07, "loss": 0.3467, "step": 20286 }, { "epoch": 2.712891147365606, "grad_norm": 1.3647819757461548, "learning_rate": 4.765801090430733e-07, "loss": 0.3396, "step": 20287 }, { "epoch": 2.713024872960685, "grad_norm": 1.5936484336853027, "learning_rate": 4.761398188694211e-07, "loss": 0.3677, "step": 20288 }, { "epoch": 2.7131585985557636, "grad_norm": 1.5954011678695679, "learning_rate": 4.756997272115227e-07, "loss": 0.4102, "step": 20289 }, { "epoch": 2.713292324150842, "grad_norm": 1.4341659545898438, "learning_rate": 4.752598340785475e-07, "loss": 0.3388, "step": 20290 }, { "epoch": 2.7134260497459213, "grad_norm": 1.6562172174453735, "learning_rate": 4.748201394796681e-07, "loss": 0.3332, "step": 20291 }, { "epoch": 2.7135597753410003, "grad_norm": 1.4977498054504395, "learning_rate": 4.7438064342404724e-07, "loss": 0.3602, "step": 20292 }, { "epoch": 2.7136935009360794, "grad_norm": 1.39657461643219, "learning_rate": 4.739413459208486e-07, "loss": 0.3041, "step": 20293 }, { "epoch": 2.713827226531158, "grad_norm": 1.677172303199768, "learning_rate": 4.73502246979225e-07, "loss": 0.3665, "step": 20294 }, { "epoch": 2.713960952126237, "grad_norm": 1.6039676666259766, "learning_rate": 4.730633466083312e-07, "loss": 0.3818, "step": 20295 }, { "epoch": 2.7140946777213157, "grad_norm": 1.692585825920105, "learning_rate": 4.726246448173177e-07, "loss": 0.4, "step": 20296 }, { "epoch": 2.714228403316395, "grad_norm": 1.66712486743927, "learning_rate": 4.7218614161532505e-07, "loss": 0.3349, "step": 20297 }, { "epoch": 2.714362128911474, "grad_norm": 1.6623013019561768, "learning_rate": 4.7174783701149584e-07, "loss": 0.4001, "step": 20298 }, { "epoch": 2.7144958545065525, "grad_norm": 1.654388189315796, "learning_rate": 4.7130973101496504e-07, "loss": 0.3848, "step": 20299 }, { "epoch": 2.7146295801016316, "grad_norm": 1.5681519508361816, "learning_rate": 4.7087182363486525e-07, "loss": 0.4027, "step": 20300 }, { "epoch": 2.71476330569671, "grad_norm": 1.4763222932815552, "learning_rate": 4.7043411488032373e-07, "loss": 0.3603, "step": 20301 }, { "epoch": 2.7148970312917893, "grad_norm": 1.4896670579910278, "learning_rate": 4.699966047604643e-07, "loss": 0.3391, "step": 20302 }, { "epoch": 2.7150307568868683, "grad_norm": 1.60325288772583, "learning_rate": 4.695592932844073e-07, "loss": 0.3529, "step": 20303 }, { "epoch": 2.715164482481947, "grad_norm": 1.5308139324188232, "learning_rate": 4.691221804612656e-07, "loss": 0.3272, "step": 20304 }, { "epoch": 2.715298208077026, "grad_norm": 1.3872236013412476, "learning_rate": 4.68685266300154e-07, "loss": 0.3429, "step": 20305 }, { "epoch": 2.7154319336721047, "grad_norm": 1.429917812347412, "learning_rate": 4.6824855081017527e-07, "loss": 0.2858, "step": 20306 }, { "epoch": 2.7155656592671837, "grad_norm": 1.5927925109863281, "learning_rate": 4.678120340004355e-07, "loss": 0.3748, "step": 20307 }, { "epoch": 2.715699384862263, "grad_norm": 1.5316531658172607, "learning_rate": 4.6737571588003294e-07, "loss": 0.3595, "step": 20308 }, { "epoch": 2.7158331104573414, "grad_norm": 1.6830354928970337, "learning_rate": 4.6693959645806143e-07, "loss": 0.3981, "step": 20309 }, { "epoch": 2.7159668360524205, "grad_norm": 1.564633846282959, "learning_rate": 4.6650367574361366e-07, "loss": 0.3533, "step": 20310 }, { "epoch": 2.716100561647499, "grad_norm": 1.5637178421020508, "learning_rate": 4.660679537457713e-07, "loss": 0.3917, "step": 20311 }, { "epoch": 2.716234287242578, "grad_norm": 1.5787701606750488, "learning_rate": 4.656324304736215e-07, "loss": 0.3436, "step": 20312 }, { "epoch": 2.7163680128376573, "grad_norm": 1.4261279106140137, "learning_rate": 4.651971059362381e-07, "loss": 0.3182, "step": 20313 }, { "epoch": 2.716501738432736, "grad_norm": 1.439518928527832, "learning_rate": 4.6476198014269945e-07, "loss": 0.341, "step": 20314 }, { "epoch": 2.716635464027815, "grad_norm": 1.4222633838653564, "learning_rate": 4.643270531020738e-07, "loss": 0.3311, "step": 20315 }, { "epoch": 2.7167691896228936, "grad_norm": 1.4127367734909058, "learning_rate": 4.638923248234228e-07, "loss": 0.3315, "step": 20316 }, { "epoch": 2.7169029152179727, "grad_norm": 1.5622849464416504, "learning_rate": 4.634577953158137e-07, "loss": 0.3444, "step": 20317 }, { "epoch": 2.7170366408130517, "grad_norm": 1.5244227647781372, "learning_rate": 4.630234645883014e-07, "loss": 0.3575, "step": 20318 }, { "epoch": 2.7171703664081304, "grad_norm": 1.6528339385986328, "learning_rate": 4.625893326499387e-07, "loss": 0.4158, "step": 20319 }, { "epoch": 2.7173040920032094, "grad_norm": 1.7539079189300537, "learning_rate": 4.6215539950977385e-07, "loss": 0.4261, "step": 20320 }, { "epoch": 2.717437817598288, "grad_norm": 1.4734458923339844, "learning_rate": 4.617216651768541e-07, "loss": 0.3429, "step": 20321 }, { "epoch": 2.717571543193367, "grad_norm": 1.6417827606201172, "learning_rate": 4.6128812966021894e-07, "loss": 0.4042, "step": 20322 }, { "epoch": 2.717705268788446, "grad_norm": 1.5569766759872437, "learning_rate": 4.6085479296890444e-07, "loss": 0.3434, "step": 20323 }, { "epoch": 2.7178389943835253, "grad_norm": 1.6486555337905884, "learning_rate": 4.6042165511194447e-07, "loss": 0.3868, "step": 20324 }, { "epoch": 2.717972719978604, "grad_norm": 1.5388462543487549, "learning_rate": 4.599887160983674e-07, "loss": 0.35, "step": 20325 }, { "epoch": 2.7181064455736825, "grad_norm": 1.4521822929382324, "learning_rate": 4.5955597593719593e-07, "loss": 0.3692, "step": 20326 }, { "epoch": 2.7182401711687616, "grad_norm": 1.8240342140197754, "learning_rate": 4.591234346374507e-07, "loss": 0.4006, "step": 20327 }, { "epoch": 2.7183738967638407, "grad_norm": 1.761733889579773, "learning_rate": 4.586910922081478e-07, "loss": 0.4182, "step": 20328 }, { "epoch": 2.7185076223589197, "grad_norm": 1.7469160556793213, "learning_rate": 4.582589486583e-07, "loss": 0.3338, "step": 20329 }, { "epoch": 2.7186413479539984, "grad_norm": 1.534040927886963, "learning_rate": 4.5782700399691347e-07, "loss": 0.3485, "step": 20330 }, { "epoch": 2.7187750735490774, "grad_norm": 1.5893938541412354, "learning_rate": 4.5739525823299326e-07, "loss": 0.3563, "step": 20331 }, { "epoch": 2.718908799144156, "grad_norm": 1.7124924659729004, "learning_rate": 4.569637113755343e-07, "loss": 0.3868, "step": 20332 }, { "epoch": 2.719042524739235, "grad_norm": 1.6995853185653687, "learning_rate": 4.5653236343353727e-07, "loss": 0.3638, "step": 20333 }, { "epoch": 2.719176250334314, "grad_norm": 1.4726452827453613, "learning_rate": 4.561012144159926e-07, "loss": 0.3761, "step": 20334 }, { "epoch": 2.719309975929393, "grad_norm": 1.694965124130249, "learning_rate": 4.5567026433188223e-07, "loss": 0.4208, "step": 20335 }, { "epoch": 2.719443701524472, "grad_norm": 1.4601316452026367, "learning_rate": 4.5523951319019545e-07, "loss": 0.3369, "step": 20336 }, { "epoch": 2.7195774271195505, "grad_norm": 1.58147132396698, "learning_rate": 4.548089609999051e-07, "loss": 0.3355, "step": 20337 }, { "epoch": 2.7197111527146296, "grad_norm": 1.5607597827911377, "learning_rate": 4.5437860776999075e-07, "loss": 0.4036, "step": 20338 }, { "epoch": 2.7198448783097087, "grad_norm": 1.6427724361419678, "learning_rate": 4.5394845350941854e-07, "loss": 0.3235, "step": 20339 }, { "epoch": 2.7199786039047873, "grad_norm": 1.4942275285720825, "learning_rate": 4.5351849822715566e-07, "loss": 0.357, "step": 20340 }, { "epoch": 2.7201123294998664, "grad_norm": 1.5920283794403076, "learning_rate": 4.5308874193216614e-07, "loss": 0.4041, "step": 20341 }, { "epoch": 2.720246055094945, "grad_norm": 1.2896345853805542, "learning_rate": 4.52659184633405e-07, "loss": 0.3311, "step": 20342 }, { "epoch": 2.720379780690024, "grad_norm": 1.4886295795440674, "learning_rate": 4.5222982633982837e-07, "loss": 0.339, "step": 20343 }, { "epoch": 2.720513506285103, "grad_norm": 1.9110654592514038, "learning_rate": 4.518006670603847e-07, "loss": 0.3452, "step": 20344 }, { "epoch": 2.7206472318801818, "grad_norm": 1.495684027671814, "learning_rate": 4.5137170680401907e-07, "loss": 0.3314, "step": 20345 }, { "epoch": 2.720780957475261, "grad_norm": 1.4094411134719849, "learning_rate": 4.509429455796732e-07, "loss": 0.3017, "step": 20346 }, { "epoch": 2.7209146830703395, "grad_norm": 1.53178071975708, "learning_rate": 4.505143833962844e-07, "loss": 0.339, "step": 20347 }, { "epoch": 2.7210484086654185, "grad_norm": 1.7276862859725952, "learning_rate": 4.5008602026278545e-07, "loss": 0.3417, "step": 20348 }, { "epoch": 2.7211821342604976, "grad_norm": 1.6290594339370728, "learning_rate": 4.4965785618810486e-07, "loss": 0.3662, "step": 20349 }, { "epoch": 2.7213158598555762, "grad_norm": 1.445056676864624, "learning_rate": 4.492298911811688e-07, "loss": 0.3028, "step": 20350 }, { "epoch": 2.7214495854506553, "grad_norm": 1.6794085502624512, "learning_rate": 4.488021252508945e-07, "loss": 0.3361, "step": 20351 }, { "epoch": 2.721583311045734, "grad_norm": 1.563234567642212, "learning_rate": 4.483745584062005e-07, "loss": 0.3549, "step": 20352 }, { "epoch": 2.721717036640813, "grad_norm": 1.4091428518295288, "learning_rate": 4.4794719065599955e-07, "loss": 0.3517, "step": 20353 }, { "epoch": 2.721850762235892, "grad_norm": 1.4657106399536133, "learning_rate": 4.475200220092002e-07, "loss": 0.3375, "step": 20354 }, { "epoch": 2.7219844878309707, "grad_norm": 1.6407135725021362, "learning_rate": 4.4709305247470524e-07, "loss": 0.3512, "step": 20355 }, { "epoch": 2.7221182134260498, "grad_norm": 1.5972973108291626, "learning_rate": 4.4666628206141203e-07, "loss": 0.3283, "step": 20356 }, { "epoch": 2.7222519390211284, "grad_norm": 1.5466053485870361, "learning_rate": 4.4623971077822127e-07, "loss": 0.3481, "step": 20357 }, { "epoch": 2.7223856646162075, "grad_norm": 1.6096493005752563, "learning_rate": 4.4581333863402134e-07, "loss": 0.3423, "step": 20358 }, { "epoch": 2.7225193902112865, "grad_norm": 1.6768133640289307, "learning_rate": 4.453871656376996e-07, "loss": 0.3799, "step": 20359 }, { "epoch": 2.7226531158063656, "grad_norm": 1.8329030275344849, "learning_rate": 4.449611917981389e-07, "loss": 0.4451, "step": 20360 }, { "epoch": 2.7227868414014442, "grad_norm": 1.519812822341919, "learning_rate": 4.445354171242178e-07, "loss": 0.3201, "step": 20361 }, { "epoch": 2.7229205669965233, "grad_norm": 1.4075103998184204, "learning_rate": 4.4410984162481574e-07, "loss": 0.3125, "step": 20362 }, { "epoch": 2.723054292591602, "grad_norm": 1.3142962455749512, "learning_rate": 4.4368446530879794e-07, "loss": 0.327, "step": 20363 }, { "epoch": 2.723188018186681, "grad_norm": 1.4906506538391113, "learning_rate": 4.4325928818503395e-07, "loss": 0.3191, "step": 20364 }, { "epoch": 2.72332174378176, "grad_norm": 1.4396533966064453, "learning_rate": 4.4283431026238446e-07, "loss": 0.3833, "step": 20365 }, { "epoch": 2.7234554693768387, "grad_norm": 1.3519947528839111, "learning_rate": 4.42409531549709e-07, "loss": 0.3549, "step": 20366 }, { "epoch": 2.7235891949719178, "grad_norm": 1.5930904150009155, "learning_rate": 4.4198495205586056e-07, "loss": 0.4143, "step": 20367 }, { "epoch": 2.7237229205669964, "grad_norm": 1.5092682838439941, "learning_rate": 4.415605717896898e-07, "loss": 0.3737, "step": 20368 }, { "epoch": 2.7238566461620755, "grad_norm": 1.5401010513305664, "learning_rate": 4.41136390760043e-07, "loss": 0.3291, "step": 20369 }, { "epoch": 2.7239903717571545, "grad_norm": 1.5423215627670288, "learning_rate": 4.40712408975762e-07, "loss": 0.3381, "step": 20370 }, { "epoch": 2.724124097352233, "grad_norm": 1.5316669940948486, "learning_rate": 4.4028862644568293e-07, "loss": 0.3574, "step": 20371 }, { "epoch": 2.7242578229473122, "grad_norm": 1.5459325313568115, "learning_rate": 4.398650431786389e-07, "loss": 0.3463, "step": 20372 }, { "epoch": 2.724391548542391, "grad_norm": 1.515528678894043, "learning_rate": 4.394416591834616e-07, "loss": 0.3546, "step": 20373 }, { "epoch": 2.72452527413747, "grad_norm": 1.569743275642395, "learning_rate": 4.390184744689741e-07, "loss": 0.3645, "step": 20374 }, { "epoch": 2.724658999732549, "grad_norm": 1.6246967315673828, "learning_rate": 4.3859548904399586e-07, "loss": 0.4179, "step": 20375 }, { "epoch": 2.7247927253276276, "grad_norm": 1.727081298828125, "learning_rate": 4.381727029173488e-07, "loss": 0.3559, "step": 20376 }, { "epoch": 2.7249264509227067, "grad_norm": 1.5679454803466797, "learning_rate": 4.3775011609783814e-07, "loss": 0.3651, "step": 20377 }, { "epoch": 2.7250601765177853, "grad_norm": 1.4618114233016968, "learning_rate": 4.3732772859427787e-07, "loss": 0.3573, "step": 20378 }, { "epoch": 2.7251939021128644, "grad_norm": 1.6613402366638184, "learning_rate": 4.369055404154721e-07, "loss": 0.3328, "step": 20379 }, { "epoch": 2.7253276277079435, "grad_norm": 1.4926159381866455, "learning_rate": 4.3648355157021704e-07, "loss": 0.3491, "step": 20380 }, { "epoch": 2.725461353303022, "grad_norm": 1.5645604133605957, "learning_rate": 4.3606176206731354e-07, "loss": 0.385, "step": 20381 }, { "epoch": 2.725595078898101, "grad_norm": 1.5707736015319824, "learning_rate": 4.3564017191554895e-07, "loss": 0.3714, "step": 20382 }, { "epoch": 2.72572880449318, "grad_norm": 1.608981728553772, "learning_rate": 4.3521878112371406e-07, "loss": 0.3315, "step": 20383 }, { "epoch": 2.725862530088259, "grad_norm": 1.630436658859253, "learning_rate": 4.3479758970059074e-07, "loss": 0.3805, "step": 20384 }, { "epoch": 2.725996255683338, "grad_norm": 1.5983762741088867, "learning_rate": 4.3437659765495853e-07, "loss": 0.3624, "step": 20385 }, { "epoch": 2.7261299812784165, "grad_norm": 1.6413226127624512, "learning_rate": 4.3395580499559276e-07, "loss": 0.4015, "step": 20386 }, { "epoch": 2.7262637068734956, "grad_norm": 1.6908191442489624, "learning_rate": 4.3353521173126413e-07, "loss": 0.4125, "step": 20387 }, { "epoch": 2.7263974324685742, "grad_norm": 1.5939172506332397, "learning_rate": 4.331148178707412e-07, "loss": 0.3266, "step": 20388 }, { "epoch": 2.7265311580636533, "grad_norm": 1.7253609895706177, "learning_rate": 4.3269462342278356e-07, "loss": 0.4326, "step": 20389 }, { "epoch": 2.7266648836587324, "grad_norm": 1.5579947233200073, "learning_rate": 4.322746283961532e-07, "loss": 0.3328, "step": 20390 }, { "epoch": 2.7267986092538115, "grad_norm": 1.5691473484039307, "learning_rate": 4.3185483279960196e-07, "loss": 0.3481, "step": 20391 }, { "epoch": 2.72693233484889, "grad_norm": 1.6086087226867676, "learning_rate": 4.314352366418817e-07, "loss": 0.3953, "step": 20392 }, { "epoch": 2.7270660604439687, "grad_norm": 1.6190946102142334, "learning_rate": 4.3101583993173767e-07, "loss": 0.4124, "step": 20393 }, { "epoch": 2.727199786039048, "grad_norm": 1.5999892950057983, "learning_rate": 4.305966426779118e-07, "loss": 0.3665, "step": 20394 }, { "epoch": 2.727333511634127, "grad_norm": 1.8151549100875854, "learning_rate": 4.301776448891426e-07, "loss": 0.405, "step": 20395 }, { "epoch": 2.727467237229206, "grad_norm": 1.5247858762741089, "learning_rate": 4.297588465741609e-07, "loss": 0.3546, "step": 20396 }, { "epoch": 2.7276009628242845, "grad_norm": 1.5828157663345337, "learning_rate": 4.293402477416997e-07, "loss": 0.358, "step": 20397 }, { "epoch": 2.7277346884193636, "grad_norm": 1.5063343048095703, "learning_rate": 4.2892184840048315e-07, "loss": 0.4291, "step": 20398 }, { "epoch": 2.7278684140144422, "grad_norm": 1.4469585418701172, "learning_rate": 4.28503648559232e-07, "loss": 0.3593, "step": 20399 }, { "epoch": 2.7280021396095213, "grad_norm": 1.856197714805603, "learning_rate": 4.2808564822666486e-07, "loss": 0.4002, "step": 20400 }, { "epoch": 2.7281358652046004, "grad_norm": 1.4738566875457764, "learning_rate": 4.2766784741149034e-07, "loss": 0.3315, "step": 20401 }, { "epoch": 2.728269590799679, "grad_norm": 1.580964207649231, "learning_rate": 4.272502461224226e-07, "loss": 0.3461, "step": 20402 }, { "epoch": 2.728403316394758, "grad_norm": 1.398289680480957, "learning_rate": 4.268328443681613e-07, "loss": 0.332, "step": 20403 }, { "epoch": 2.7285370419898367, "grad_norm": 1.7332830429077148, "learning_rate": 4.264156421574095e-07, "loss": 0.3531, "step": 20404 }, { "epoch": 2.728670767584916, "grad_norm": 1.5387110710144043, "learning_rate": 4.2599863949886245e-07, "loss": 0.3753, "step": 20405 }, { "epoch": 2.728804493179995, "grad_norm": 1.5739604234695435, "learning_rate": 4.25581836401211e-07, "loss": 0.322, "step": 20406 }, { "epoch": 2.7289382187750735, "grad_norm": 1.4158567190170288, "learning_rate": 4.2516523287314703e-07, "loss": 0.3505, "step": 20407 }, { "epoch": 2.7290719443701525, "grad_norm": 1.4814201593399048, "learning_rate": 4.2474882892335144e-07, "loss": 0.3401, "step": 20408 }, { "epoch": 2.729205669965231, "grad_norm": 1.4562358856201172, "learning_rate": 4.2433262456050286e-07, "loss": 0.3374, "step": 20409 }, { "epoch": 2.7293393955603102, "grad_norm": 1.6334632635116577, "learning_rate": 4.239166197932776e-07, "loss": 0.3492, "step": 20410 }, { "epoch": 2.7294731211553893, "grad_norm": 1.3604103326797485, "learning_rate": 4.2350081463034767e-07, "loss": 0.3258, "step": 20411 }, { "epoch": 2.729606846750468, "grad_norm": 1.5220311880111694, "learning_rate": 4.230852090803794e-07, "loss": 0.3745, "step": 20412 }, { "epoch": 2.729740572345547, "grad_norm": 1.5934181213378906, "learning_rate": 4.22669803152036e-07, "loss": 0.3886, "step": 20413 }, { "epoch": 2.7298742979406256, "grad_norm": 1.4442826509475708, "learning_rate": 4.22254596853976e-07, "loss": 0.3461, "step": 20414 }, { "epoch": 2.7300080235357047, "grad_norm": 1.4409736394882202, "learning_rate": 4.2183959019485354e-07, "loss": 0.3159, "step": 20415 }, { "epoch": 2.730141749130784, "grad_norm": 1.4900918006896973, "learning_rate": 4.214247831833207e-07, "loss": 0.3463, "step": 20416 }, { "epoch": 2.7302754747258624, "grad_norm": 1.5594525337219238, "learning_rate": 4.210101758280216e-07, "loss": 0.3586, "step": 20417 }, { "epoch": 2.7304092003209415, "grad_norm": 1.5929666757583618, "learning_rate": 4.205957681375994e-07, "loss": 0.3871, "step": 20418 }, { "epoch": 2.73054292591602, "grad_norm": 1.52675461769104, "learning_rate": 4.2018156012069265e-07, "loss": 0.3421, "step": 20419 }, { "epoch": 2.730676651511099, "grad_norm": 1.6141642332077026, "learning_rate": 4.197675517859323e-07, "loss": 0.3313, "step": 20420 }, { "epoch": 2.7308103771061782, "grad_norm": 1.5692574977874756, "learning_rate": 4.1935374314195254e-07, "loss": 0.3959, "step": 20421 }, { "epoch": 2.730944102701257, "grad_norm": 1.5117493867874146, "learning_rate": 4.189401341973742e-07, "loss": 0.3568, "step": 20422 }, { "epoch": 2.731077828296336, "grad_norm": 1.543954610824585, "learning_rate": 4.1852672496082267e-07, "loss": 0.349, "step": 20423 }, { "epoch": 2.7312115538914146, "grad_norm": 1.4033490419387817, "learning_rate": 4.1811351544091217e-07, "loss": 0.3364, "step": 20424 }, { "epoch": 2.7313452794864936, "grad_norm": 1.6456210613250732, "learning_rate": 4.1770050564625577e-07, "loss": 0.3818, "step": 20425 }, { "epoch": 2.7314790050815727, "grad_norm": 1.4848979711532593, "learning_rate": 4.1728769558546547e-07, "loss": 0.346, "step": 20426 }, { "epoch": 2.731612730676652, "grad_norm": 1.4872301816940308, "learning_rate": 4.1687508526714103e-07, "loss": 0.3758, "step": 20427 }, { "epoch": 2.7317464562717304, "grad_norm": 1.499072551727295, "learning_rate": 4.164626746998868e-07, "loss": 0.3312, "step": 20428 }, { "epoch": 2.731880181866809, "grad_norm": 1.6013410091400146, "learning_rate": 4.1605046389229686e-07, "loss": 0.371, "step": 20429 }, { "epoch": 2.732013907461888, "grad_norm": 1.582965612411499, "learning_rate": 4.1563845285296443e-07, "loss": 0.3393, "step": 20430 }, { "epoch": 2.732147633056967, "grad_norm": 1.5268311500549316, "learning_rate": 4.152266415904771e-07, "loss": 0.3446, "step": 20431 }, { "epoch": 2.7322813586520462, "grad_norm": 1.6414775848388672, "learning_rate": 4.1481503011341906e-07, "loss": 0.3875, "step": 20432 }, { "epoch": 2.732415084247125, "grad_norm": 1.6225417852401733, "learning_rate": 4.14403618430369e-07, "loss": 0.402, "step": 20433 }, { "epoch": 2.732548809842204, "grad_norm": 1.4447029829025269, "learning_rate": 4.139924065499035e-07, "loss": 0.3651, "step": 20434 }, { "epoch": 2.7326825354372826, "grad_norm": 1.5432274341583252, "learning_rate": 4.135813944805933e-07, "loss": 0.357, "step": 20435 }, { "epoch": 2.7328162610323616, "grad_norm": 1.7019670009613037, "learning_rate": 4.1317058223100614e-07, "loss": 0.3518, "step": 20436 }, { "epoch": 2.7329499866274407, "grad_norm": 1.7091443538665771, "learning_rate": 4.12759969809704e-07, "loss": 0.4112, "step": 20437 }, { "epoch": 2.7330837122225193, "grad_norm": 1.4933542013168335, "learning_rate": 4.123495572252467e-07, "loss": 0.3532, "step": 20438 }, { "epoch": 2.7332174378175984, "grad_norm": 1.6354148387908936, "learning_rate": 4.1193934448618857e-07, "loss": 0.3787, "step": 20439 }, { "epoch": 2.733351163412677, "grad_norm": 1.5271919965744019, "learning_rate": 4.1152933160108157e-07, "loss": 0.3107, "step": 20440 }, { "epoch": 2.733484889007756, "grad_norm": 1.6925134658813477, "learning_rate": 4.1111951857846775e-07, "loss": 0.3549, "step": 20441 }, { "epoch": 2.733618614602835, "grad_norm": 1.4857733249664307, "learning_rate": 4.1070990542689373e-07, "loss": 0.3328, "step": 20442 }, { "epoch": 2.733752340197914, "grad_norm": 1.5856181383132935, "learning_rate": 4.1030049215489586e-07, "loss": 0.3352, "step": 20443 }, { "epoch": 2.733886065792993, "grad_norm": 1.601984977722168, "learning_rate": 4.0989127877100523e-07, "loss": 0.3284, "step": 20444 }, { "epoch": 2.7340197913880715, "grad_norm": 1.6515451669692993, "learning_rate": 4.0948226528375714e-07, "loss": 0.3314, "step": 20445 }, { "epoch": 2.7341535169831506, "grad_norm": 1.5508577823638916, "learning_rate": 4.090734517016726e-07, "loss": 0.3373, "step": 20446 }, { "epoch": 2.7342872425782296, "grad_norm": 1.5244628190994263, "learning_rate": 4.0866483803327583e-07, "loss": 0.3579, "step": 20447 }, { "epoch": 2.7344209681733083, "grad_norm": 1.3885129690170288, "learning_rate": 4.0825642428708125e-07, "loss": 0.3299, "step": 20448 }, { "epoch": 2.7345546937683873, "grad_norm": 1.4743024110794067, "learning_rate": 4.078482104716042e-07, "loss": 0.3688, "step": 20449 }, { "epoch": 2.734688419363466, "grad_norm": 1.723941683769226, "learning_rate": 4.0744019659535116e-07, "loss": 0.391, "step": 20450 }, { "epoch": 2.734822144958545, "grad_norm": 1.462186336517334, "learning_rate": 4.070323826668299e-07, "loss": 0.3767, "step": 20451 }, { "epoch": 2.734955870553624, "grad_norm": 1.6000711917877197, "learning_rate": 4.066247686945379e-07, "loss": 0.3701, "step": 20452 }, { "epoch": 2.7350895961487027, "grad_norm": 1.5289440155029297, "learning_rate": 4.0621735468697297e-07, "loss": 0.3218, "step": 20453 }, { "epoch": 2.735223321743782, "grad_norm": 1.6520367860794067, "learning_rate": 4.058101406526271e-07, "loss": 0.339, "step": 20454 }, { "epoch": 2.7353570473388604, "grad_norm": 1.7498400211334229, "learning_rate": 4.0540312659998803e-07, "loss": 0.3895, "step": 20455 }, { "epoch": 2.7354907729339395, "grad_norm": 1.6241114139556885, "learning_rate": 4.0499631253754003e-07, "loss": 0.369, "step": 20456 }, { "epoch": 2.7356244985290186, "grad_norm": 1.5540971755981445, "learning_rate": 4.0458969847376185e-07, "loss": 0.3456, "step": 20457 }, { "epoch": 2.735758224124097, "grad_norm": 1.469602108001709, "learning_rate": 4.0418328441713007e-07, "loss": 0.3323, "step": 20458 }, { "epoch": 2.7358919497191763, "grad_norm": 1.5364309549331665, "learning_rate": 4.037770703761168e-07, "loss": 0.3117, "step": 20459 }, { "epoch": 2.736025675314255, "grad_norm": 1.546209454536438, "learning_rate": 4.033710563591853e-07, "loss": 0.3219, "step": 20460 }, { "epoch": 2.736159400909334, "grad_norm": 1.7581359148025513, "learning_rate": 4.0296524237480426e-07, "loss": 0.4176, "step": 20461 }, { "epoch": 2.736293126504413, "grad_norm": 1.5537997484207153, "learning_rate": 4.025596284314259e-07, "loss": 0.3584, "step": 20462 }, { "epoch": 2.736426852099492, "grad_norm": 1.5532357692718506, "learning_rate": 4.0215421453751014e-07, "loss": 0.3338, "step": 20463 }, { "epoch": 2.7365605776945707, "grad_norm": 1.5923744440078735, "learning_rate": 4.017490007015068e-07, "loss": 0.3821, "step": 20464 }, { "epoch": 2.73669430328965, "grad_norm": 1.4443254470825195, "learning_rate": 4.0134398693185803e-07, "loss": 0.3168, "step": 20465 }, { "epoch": 2.7368280288847284, "grad_norm": 1.6133848428726196, "learning_rate": 4.009391732370116e-07, "loss": 0.3907, "step": 20466 }, { "epoch": 2.7369617544798075, "grad_norm": 1.5660719871520996, "learning_rate": 4.005345596254029e-07, "loss": 0.3704, "step": 20467 }, { "epoch": 2.7370954800748866, "grad_norm": 1.6654837131500244, "learning_rate": 4.001301461054641e-07, "loss": 0.3573, "step": 20468 }, { "epoch": 2.737229205669965, "grad_norm": 1.330752968788147, "learning_rate": 3.997259326856262e-07, "loss": 0.337, "step": 20469 }, { "epoch": 2.7373629312650443, "grad_norm": 1.469082236289978, "learning_rate": 3.9932191937431474e-07, "loss": 0.3326, "step": 20470 }, { "epoch": 2.737496656860123, "grad_norm": 1.6516684293746948, "learning_rate": 3.98918106179953e-07, "loss": 0.3616, "step": 20471 }, { "epoch": 2.737630382455202, "grad_norm": 1.3364589214324951, "learning_rate": 3.9851449311095415e-07, "loss": 0.3316, "step": 20472 }, { "epoch": 2.737764108050281, "grad_norm": 1.3165228366851807, "learning_rate": 3.981110801757337e-07, "loss": 0.2953, "step": 20473 }, { "epoch": 2.7378978336453597, "grad_norm": 1.637839674949646, "learning_rate": 3.977078673826995e-07, "loss": 0.4007, "step": 20474 }, { "epoch": 2.7380315592404387, "grad_norm": 1.376086950302124, "learning_rate": 3.9730485474025695e-07, "loss": 0.3521, "step": 20475 }, { "epoch": 2.7381652848355174, "grad_norm": 1.56931734085083, "learning_rate": 3.9690204225680595e-07, "loss": 0.3866, "step": 20476 }, { "epoch": 2.7382990104305964, "grad_norm": 1.6726292371749878, "learning_rate": 3.964994299407421e-07, "loss": 0.3739, "step": 20477 }, { "epoch": 2.7384327360256755, "grad_norm": 1.3977808952331543, "learning_rate": 3.960970178004586e-07, "loss": 0.3362, "step": 20478 }, { "epoch": 2.738566461620754, "grad_norm": 1.5280615091323853, "learning_rate": 3.9569480584434217e-07, "loss": 0.3498, "step": 20479 }, { "epoch": 2.738700187215833, "grad_norm": 1.4856868982315063, "learning_rate": 3.9529279408077715e-07, "loss": 0.3546, "step": 20480 }, { "epoch": 2.738833912810912, "grad_norm": 1.6603909730911255, "learning_rate": 3.9489098251814353e-07, "loss": 0.3925, "step": 20481 }, { "epoch": 2.738967638405991, "grad_norm": 1.5924251079559326, "learning_rate": 3.9448937116481676e-07, "loss": 0.392, "step": 20482 }, { "epoch": 2.73910136400107, "grad_norm": 1.4669179916381836, "learning_rate": 3.9408796002916696e-07, "loss": 0.3325, "step": 20483 }, { "epoch": 2.7392350895961486, "grad_norm": 1.5693894624710083, "learning_rate": 3.936867491195617e-07, "loss": 0.3868, "step": 20484 }, { "epoch": 2.7393688151912277, "grad_norm": 1.583762526512146, "learning_rate": 3.9328573844436555e-07, "loss": 0.366, "step": 20485 }, { "epoch": 2.7395025407863063, "grad_norm": 1.595390796661377, "learning_rate": 3.928849280119329e-07, "loss": 0.3571, "step": 20486 }, { "epoch": 2.7396362663813854, "grad_norm": 1.699400544166565, "learning_rate": 3.9248431783062366e-07, "loss": 0.3748, "step": 20487 }, { "epoch": 2.7397699919764644, "grad_norm": 1.7009303569793701, "learning_rate": 3.920839079087835e-07, "loss": 0.4033, "step": 20488 }, { "epoch": 2.739903717571543, "grad_norm": 1.571163535118103, "learning_rate": 3.9168369825476003e-07, "loss": 0.3922, "step": 20489 }, { "epoch": 2.740037443166622, "grad_norm": 1.8056821823120117, "learning_rate": 3.912836888768978e-07, "loss": 0.3847, "step": 20490 }, { "epoch": 2.7401711687617007, "grad_norm": 1.7083226442337036, "learning_rate": 3.9088387978353015e-07, "loss": 0.3752, "step": 20491 }, { "epoch": 2.74030489435678, "grad_norm": 1.6366592645645142, "learning_rate": 3.904842709829948e-07, "loss": 0.3699, "step": 20492 }, { "epoch": 2.740438619951859, "grad_norm": 1.5583598613739014, "learning_rate": 3.9008486248361957e-07, "loss": 0.3145, "step": 20493 }, { "epoch": 2.740572345546938, "grad_norm": 1.5246495008468628, "learning_rate": 3.8968565429372885e-07, "loss": 0.3625, "step": 20494 }, { "epoch": 2.7407060711420166, "grad_norm": 1.4283463954925537, "learning_rate": 3.892866464216449e-07, "loss": 0.3325, "step": 20495 }, { "epoch": 2.740839796737095, "grad_norm": 1.7979589700698853, "learning_rate": 3.888878388756845e-07, "loss": 0.4112, "step": 20496 }, { "epoch": 2.7409735223321743, "grad_norm": 1.6331069469451904, "learning_rate": 3.884892316641598e-07, "loss": 0.3668, "step": 20497 }, { "epoch": 2.7411072479272534, "grad_norm": 1.4808342456817627, "learning_rate": 3.880908247953796e-07, "loss": 0.3618, "step": 20498 }, { "epoch": 2.7412409735223324, "grad_norm": 1.497487187385559, "learning_rate": 3.876926182776497e-07, "loss": 0.3542, "step": 20499 }, { "epoch": 2.741374699117411, "grad_norm": 1.62755286693573, "learning_rate": 3.872946121192689e-07, "loss": 0.4, "step": 20500 }, { "epoch": 2.74150842471249, "grad_norm": 1.5997579097747803, "learning_rate": 3.8689680632853275e-07, "loss": 0.3933, "step": 20501 }, { "epoch": 2.7416421503075687, "grad_norm": 1.6901222467422485, "learning_rate": 3.864992009137347e-07, "loss": 0.3651, "step": 20502 }, { "epoch": 2.741775875902648, "grad_norm": 1.746044635772705, "learning_rate": 3.8610179588316144e-07, "loss": 0.402, "step": 20503 }, { "epoch": 2.741909601497727, "grad_norm": 1.6435599327087402, "learning_rate": 3.857045912450974e-07, "loss": 0.3529, "step": 20504 }, { "epoch": 2.7420433270928055, "grad_norm": 1.652241587638855, "learning_rate": 3.853075870078193e-07, "loss": 0.3772, "step": 20505 }, { "epoch": 2.7421770526878846, "grad_norm": 1.608970046043396, "learning_rate": 3.849107831796073e-07, "loss": 0.3751, "step": 20506 }, { "epoch": 2.742310778282963, "grad_norm": 1.4193334579467773, "learning_rate": 3.845141797687257e-07, "loss": 0.3687, "step": 20507 }, { "epoch": 2.7424445038780423, "grad_norm": 1.6934199333190918, "learning_rate": 3.84117776783447e-07, "loss": 0.3866, "step": 20508 }, { "epoch": 2.7425782294731214, "grad_norm": 1.4429768323898315, "learning_rate": 3.837215742320333e-07, "loss": 0.3553, "step": 20509 }, { "epoch": 2.7427119550682, "grad_norm": 1.5743972063064575, "learning_rate": 3.833255721227391e-07, "loss": 0.3518, "step": 20510 }, { "epoch": 2.742845680663279, "grad_norm": 1.6451466083526611, "learning_rate": 3.829297704638224e-07, "loss": 0.3473, "step": 20511 }, { "epoch": 2.7429794062583577, "grad_norm": 1.8716309070587158, "learning_rate": 3.82534169263532e-07, "loss": 0.3928, "step": 20512 }, { "epoch": 2.7431131318534367, "grad_norm": 1.5953787565231323, "learning_rate": 3.8213876853011365e-07, "loss": 0.391, "step": 20513 }, { "epoch": 2.743246857448516, "grad_norm": 1.6890983581542969, "learning_rate": 3.817435682718096e-07, "loss": 0.3608, "step": 20514 }, { "epoch": 2.7433805830435944, "grad_norm": 1.4891178607940674, "learning_rate": 3.813485684968565e-07, "loss": 0.3743, "step": 20515 }, { "epoch": 2.7435143086386735, "grad_norm": 1.824203610420227, "learning_rate": 3.8095376921349015e-07, "loss": 0.37, "step": 20516 }, { "epoch": 2.743648034233752, "grad_norm": 1.5892139673233032, "learning_rate": 3.8055917042993716e-07, "loss": 0.34, "step": 20517 }, { "epoch": 2.743781759828831, "grad_norm": 1.637537956237793, "learning_rate": 3.8016477215442325e-07, "loss": 0.3698, "step": 20518 }, { "epoch": 2.7439154854239103, "grad_norm": 1.5128076076507568, "learning_rate": 3.797705743951685e-07, "loss": 0.3543, "step": 20519 }, { "epoch": 2.744049211018989, "grad_norm": 1.5798979997634888, "learning_rate": 3.793765771603919e-07, "loss": 0.398, "step": 20520 }, { "epoch": 2.744182936614068, "grad_norm": 1.6757993698120117, "learning_rate": 3.789827804583046e-07, "loss": 0.3588, "step": 20521 }, { "epoch": 2.7443166622091466, "grad_norm": 1.6149609088897705, "learning_rate": 3.7858918429711455e-07, "loss": 0.365, "step": 20522 }, { "epoch": 2.7444503878042257, "grad_norm": 1.4499512910842896, "learning_rate": 3.7819578868502626e-07, "loss": 0.3773, "step": 20523 }, { "epoch": 2.7445841133993047, "grad_norm": 1.7151280641555786, "learning_rate": 3.7780259363023983e-07, "loss": 0.3532, "step": 20524 }, { "epoch": 2.7447178389943834, "grad_norm": 1.5275129079818726, "learning_rate": 3.774095991409521e-07, "loss": 0.3353, "step": 20525 }, { "epoch": 2.7448515645894624, "grad_norm": 1.5633188486099243, "learning_rate": 3.7701680522535087e-07, "loss": 0.336, "step": 20526 }, { "epoch": 2.744985290184541, "grad_norm": 1.5304471254348755, "learning_rate": 3.7662421189162745e-07, "loss": 0.3626, "step": 20527 }, { "epoch": 2.74511901577962, "grad_norm": 1.4296998977661133, "learning_rate": 3.762318191479641e-07, "loss": 0.3448, "step": 20528 }, { "epoch": 2.745252741374699, "grad_norm": 1.5945011377334595, "learning_rate": 3.7583962700253774e-07, "loss": 0.3867, "step": 20529 }, { "epoch": 2.7453864669697783, "grad_norm": 1.4329532384872437, "learning_rate": 3.7544763546352834e-07, "loss": 0.3533, "step": 20530 }, { "epoch": 2.745520192564857, "grad_norm": 1.4994834661483765, "learning_rate": 3.750558445390995e-07, "loss": 0.3183, "step": 20531 }, { "epoch": 2.7456539181599355, "grad_norm": 1.5915946960449219, "learning_rate": 3.7466425423742457e-07, "loss": 0.3537, "step": 20532 }, { "epoch": 2.7457876437550146, "grad_norm": 1.6245179176330566, "learning_rate": 3.742728645666616e-07, "loss": 0.3618, "step": 20533 }, { "epoch": 2.7459213693500937, "grad_norm": 1.4274048805236816, "learning_rate": 3.7388167553496944e-07, "loss": 0.371, "step": 20534 }, { "epoch": 2.7460550949451727, "grad_norm": 1.5425249338150024, "learning_rate": 3.73490687150504e-07, "loss": 0.3704, "step": 20535 }, { "epoch": 2.7461888205402514, "grad_norm": 1.4881196022033691, "learning_rate": 3.73099899421413e-07, "loss": 0.3676, "step": 20536 }, { "epoch": 2.7463225461353304, "grad_norm": 1.426841139793396, "learning_rate": 3.727093123558423e-07, "loss": 0.3035, "step": 20537 }, { "epoch": 2.746456271730409, "grad_norm": 1.4708136320114136, "learning_rate": 3.723189259619331e-07, "loss": 0.3184, "step": 20538 }, { "epoch": 2.746589997325488, "grad_norm": 1.4729924201965332, "learning_rate": 3.7192874024782443e-07, "loss": 0.3304, "step": 20539 }, { "epoch": 2.746723722920567, "grad_norm": 1.636109709739685, "learning_rate": 3.715387552216476e-07, "loss": 0.364, "step": 20540 }, { "epoch": 2.746857448515646, "grad_norm": 1.568668246269226, "learning_rate": 3.7114897089153167e-07, "loss": 0.3623, "step": 20541 }, { "epoch": 2.746991174110725, "grad_norm": 1.4993621110916138, "learning_rate": 3.7075938726560123e-07, "loss": 0.3395, "step": 20542 }, { "epoch": 2.7471248997058035, "grad_norm": 1.6911479234695435, "learning_rate": 3.703700043519787e-07, "loss": 0.3875, "step": 20543 }, { "epoch": 2.7472586253008826, "grad_norm": 1.6015174388885498, "learning_rate": 3.699808221587786e-07, "loss": 0.3652, "step": 20544 }, { "epoch": 2.7473923508959617, "grad_norm": 1.4216365814208984, "learning_rate": 3.6959184069411123e-07, "loss": 0.3435, "step": 20545 }, { "epoch": 2.7475260764910403, "grad_norm": 1.7470240592956543, "learning_rate": 3.6920305996608785e-07, "loss": 0.3854, "step": 20546 }, { "epoch": 2.7476598020861194, "grad_norm": 1.5597035884857178, "learning_rate": 3.6881447998281193e-07, "loss": 0.37, "step": 20547 }, { "epoch": 2.747793527681198, "grad_norm": 1.5774924755096436, "learning_rate": 3.684261007523815e-07, "loss": 0.3532, "step": 20548 }, { "epoch": 2.747927253276277, "grad_norm": 1.6980552673339844, "learning_rate": 3.6803792228289337e-07, "loss": 0.3656, "step": 20549 }, { "epoch": 2.748060978871356, "grad_norm": 1.6803628206253052, "learning_rate": 3.676499445824355e-07, "loss": 0.3494, "step": 20550 }, { "epoch": 2.7481947044664348, "grad_norm": 1.789453148841858, "learning_rate": 3.6726216765910036e-07, "loss": 0.4127, "step": 20551 }, { "epoch": 2.748328430061514, "grad_norm": 1.4321181774139404, "learning_rate": 3.6687459152096706e-07, "loss": 0.3744, "step": 20552 }, { "epoch": 2.7484621556565925, "grad_norm": 1.6468758583068848, "learning_rate": 3.664872161761135e-07, "loss": 0.3551, "step": 20553 }, { "epoch": 2.7485958812516715, "grad_norm": 1.5822397470474243, "learning_rate": 3.661000416326177e-07, "loss": 0.3758, "step": 20554 }, { "epoch": 2.7487296068467506, "grad_norm": 1.7735439538955688, "learning_rate": 3.6571306789854543e-07, "loss": 0.4132, "step": 20555 }, { "epoch": 2.7488633324418292, "grad_norm": 1.4930862188339233, "learning_rate": 3.6532629498196694e-07, "loss": 0.3333, "step": 20556 }, { "epoch": 2.7489970580369083, "grad_norm": 1.5944476127624512, "learning_rate": 3.649397228909424e-07, "loss": 0.3648, "step": 20557 }, { "epoch": 2.749130783631987, "grad_norm": 1.3522071838378906, "learning_rate": 3.6455335163352977e-07, "loss": 0.3795, "step": 20558 }, { "epoch": 2.749264509227066, "grad_norm": 1.5602048635482788, "learning_rate": 3.641671812177816e-07, "loss": 0.3365, "step": 20559 }, { "epoch": 2.749398234822145, "grad_norm": 1.6146314144134521, "learning_rate": 3.6378121165174806e-07, "loss": 0.3471, "step": 20560 }, { "epoch": 2.7495319604172237, "grad_norm": 1.7265864610671997, "learning_rate": 3.63395442943475e-07, "loss": 0.3662, "step": 20561 }, { "epoch": 2.7496656860123028, "grad_norm": 1.6512349843978882, "learning_rate": 3.6300987510100136e-07, "loss": 0.3825, "step": 20562 }, { "epoch": 2.7497994116073814, "grad_norm": 1.515991449356079, "learning_rate": 3.6262450813236647e-07, "loss": 0.3924, "step": 20563 }, { "epoch": 2.7499331372024605, "grad_norm": 1.4392749071121216, "learning_rate": 3.6223934204560165e-07, "loss": 0.3485, "step": 20564 }, { "epoch": 2.7500668627975395, "grad_norm": 1.4407451152801514, "learning_rate": 3.618543768487348e-07, "loss": 0.3483, "step": 20565 }, { "epoch": 2.7502005883926186, "grad_norm": 1.668703556060791, "learning_rate": 3.6146961254979187e-07, "loss": 0.3978, "step": 20566 }, { "epoch": 2.7503343139876972, "grad_norm": 1.430204153060913, "learning_rate": 3.610850491567908e-07, "loss": 0.3606, "step": 20567 }, { "epoch": 2.7504680395827763, "grad_norm": 1.5843908786773682, "learning_rate": 3.607006866777485e-07, "loss": 0.3908, "step": 20568 }, { "epoch": 2.750601765177855, "grad_norm": 1.6481750011444092, "learning_rate": 3.603165251206764e-07, "loss": 0.3521, "step": 20569 }, { "epoch": 2.750735490772934, "grad_norm": 1.571509599685669, "learning_rate": 3.5993256449358474e-07, "loss": 0.3543, "step": 20570 }, { "epoch": 2.750869216368013, "grad_norm": 1.4145193099975586, "learning_rate": 3.595488048044704e-07, "loss": 0.315, "step": 20571 }, { "epoch": 2.7510029419630917, "grad_norm": 1.607410192489624, "learning_rate": 3.591652460613382e-07, "loss": 0.3427, "step": 20572 }, { "epoch": 2.7511366675581708, "grad_norm": 1.6128318309783936, "learning_rate": 3.5878188827218166e-07, "loss": 0.3403, "step": 20573 }, { "epoch": 2.7512703931532494, "grad_norm": 1.7510581016540527, "learning_rate": 3.5839873144498885e-07, "loss": 0.4169, "step": 20574 }, { "epoch": 2.7514041187483285, "grad_norm": 1.5919370651245117, "learning_rate": 3.5801577558775113e-07, "loss": 0.387, "step": 20575 }, { "epoch": 2.7515378443434075, "grad_norm": 1.7270561456680298, "learning_rate": 3.576330207084466e-07, "loss": 0.3688, "step": 20576 }, { "epoch": 2.751671569938486, "grad_norm": 1.581162452697754, "learning_rate": 3.572504668150556e-07, "loss": 0.3742, "step": 20577 }, { "epoch": 2.7518052955335652, "grad_norm": 1.7202913761138916, "learning_rate": 3.5686811391555164e-07, "loss": 0.3453, "step": 20578 }, { "epoch": 2.751939021128644, "grad_norm": 1.520103096961975, "learning_rate": 3.564859620179029e-07, "loss": 0.3603, "step": 20579 }, { "epoch": 2.752072746723723, "grad_norm": 1.6849335432052612, "learning_rate": 3.5610401113007844e-07, "loss": 0.3821, "step": 20580 }, { "epoch": 2.752206472318802, "grad_norm": 1.4101080894470215, "learning_rate": 3.557222612600375e-07, "loss": 0.3103, "step": 20581 }, { "epoch": 2.7523401979138806, "grad_norm": 1.4197107553482056, "learning_rate": 3.55340712415736e-07, "loss": 0.342, "step": 20582 }, { "epoch": 2.7524739235089597, "grad_norm": 1.6285066604614258, "learning_rate": 3.549593646051297e-07, "loss": 0.3755, "step": 20583 }, { "epoch": 2.7526076491040383, "grad_norm": 1.7666267156600952, "learning_rate": 3.5457821783616565e-07, "loss": 0.4207, "step": 20584 }, { "epoch": 2.7527413746991174, "grad_norm": 1.6444405317306519, "learning_rate": 3.5419727211678857e-07, "loss": 0.3903, "step": 20585 }, { "epoch": 2.7528751002941965, "grad_norm": 1.6292647123336792, "learning_rate": 3.538165274549399e-07, "loss": 0.3631, "step": 20586 }, { "epoch": 2.753008825889275, "grad_norm": 1.5892674922943115, "learning_rate": 3.534359838585544e-07, "loss": 0.3613, "step": 20587 }, { "epoch": 2.753142551484354, "grad_norm": 1.5457539558410645, "learning_rate": 3.530556413355657e-07, "loss": 0.3554, "step": 20588 }, { "epoch": 2.753276277079433, "grad_norm": 1.4582083225250244, "learning_rate": 3.52675499893903e-07, "loss": 0.3368, "step": 20589 }, { "epoch": 2.753410002674512, "grad_norm": 1.6187825202941895, "learning_rate": 3.5229555954148453e-07, "loss": 0.377, "step": 20590 }, { "epoch": 2.753543728269591, "grad_norm": 1.8877390623092651, "learning_rate": 3.5191582028623495e-07, "loss": 0.4188, "step": 20591 }, { "epoch": 2.7536774538646696, "grad_norm": 1.6285940408706665, "learning_rate": 3.5153628213606795e-07, "loss": 0.3883, "step": 20592 }, { "epoch": 2.7538111794597486, "grad_norm": 1.5363494157791138, "learning_rate": 3.5115694509889386e-07, "loss": 0.3346, "step": 20593 }, { "epoch": 2.7539449050548273, "grad_norm": 1.4802676439285278, "learning_rate": 3.5077780918262196e-07, "loss": 0.3555, "step": 20594 }, { "epoch": 2.7540786306499063, "grad_norm": 1.7005364894866943, "learning_rate": 3.503988743951514e-07, "loss": 0.3787, "step": 20595 }, { "epoch": 2.7542123562449854, "grad_norm": 1.5036591291427612, "learning_rate": 3.500201407443848e-07, "loss": 0.3386, "step": 20596 }, { "epoch": 2.7543460818400645, "grad_norm": 1.5266205072402954, "learning_rate": 3.4964160823821257e-07, "loss": 0.3506, "step": 20597 }, { "epoch": 2.754479807435143, "grad_norm": 1.746596097946167, "learning_rate": 3.492632768845261e-07, "loss": 0.4086, "step": 20598 }, { "epoch": 2.7546135330302217, "grad_norm": 1.6739288568496704, "learning_rate": 3.488851466912135e-07, "loss": 0.3201, "step": 20599 }, { "epoch": 2.754747258625301, "grad_norm": 1.515841007232666, "learning_rate": 3.4850721766615304e-07, "loss": 0.3064, "step": 20600 }, { "epoch": 2.75488098422038, "grad_norm": 1.5862118005752563, "learning_rate": 3.4812948981722716e-07, "loss": 0.3715, "step": 20601 }, { "epoch": 2.755014709815459, "grad_norm": 1.4882827997207642, "learning_rate": 3.477519631523041e-07, "loss": 0.3336, "step": 20602 }, { "epoch": 2.7551484354105376, "grad_norm": 1.540814757347107, "learning_rate": 3.4737463767925526e-07, "loss": 0.3035, "step": 20603 }, { "epoch": 2.7552821610056166, "grad_norm": 1.607160210609436, "learning_rate": 3.4699751340594557e-07, "loss": 0.3902, "step": 20604 }, { "epoch": 2.7554158866006953, "grad_norm": 1.3468674421310425, "learning_rate": 3.4662059034023644e-07, "loss": 0.3233, "step": 20605 }, { "epoch": 2.7555496121957743, "grad_norm": 1.5717487335205078, "learning_rate": 3.462438684899827e-07, "loss": 0.3737, "step": 20606 }, { "epoch": 2.7556833377908534, "grad_norm": 1.514543056488037, "learning_rate": 3.458673478630392e-07, "loss": 0.3485, "step": 20607 }, { "epoch": 2.755817063385932, "grad_norm": 1.5631985664367676, "learning_rate": 3.454910284672519e-07, "loss": 0.4024, "step": 20608 }, { "epoch": 2.755950788981011, "grad_norm": 1.4613646268844604, "learning_rate": 3.451149103104656e-07, "loss": 0.3926, "step": 20609 }, { "epoch": 2.7560845145760897, "grad_norm": 1.536556601524353, "learning_rate": 3.4473899340052075e-07, "loss": 0.384, "step": 20610 }, { "epoch": 2.756218240171169, "grad_norm": 1.4610799551010132, "learning_rate": 3.443632777452521e-07, "loss": 0.3778, "step": 20611 }, { "epoch": 2.756351965766248, "grad_norm": 1.584354043006897, "learning_rate": 3.439877633524924e-07, "loss": 0.3518, "step": 20612 }, { "epoch": 2.7564856913613265, "grad_norm": 1.5367207527160645, "learning_rate": 3.4361245023006864e-07, "loss": 0.3575, "step": 20613 }, { "epoch": 2.7566194169564056, "grad_norm": 1.5644794702529907, "learning_rate": 3.432373383858001e-07, "loss": 0.3002, "step": 20614 }, { "epoch": 2.756753142551484, "grad_norm": 1.6107590198516846, "learning_rate": 3.4286242782751165e-07, "loss": 0.3936, "step": 20615 }, { "epoch": 2.7568868681465633, "grad_norm": 1.5187064409255981, "learning_rate": 3.4248771856301266e-07, "loss": 0.349, "step": 20616 }, { "epoch": 2.7570205937416423, "grad_norm": 1.641087532043457, "learning_rate": 3.4211321060011795e-07, "loss": 0.363, "step": 20617 }, { "epoch": 2.757154319336721, "grad_norm": 1.4647361040115356, "learning_rate": 3.4173890394663124e-07, "loss": 0.308, "step": 20618 }, { "epoch": 2.7572880449318, "grad_norm": 1.7544102668762207, "learning_rate": 3.413647986103541e-07, "loss": 0.3776, "step": 20619 }, { "epoch": 2.7574217705268786, "grad_norm": 1.5245217084884644, "learning_rate": 3.4099089459908697e-07, "loss": 0.3188, "step": 20620 }, { "epoch": 2.7575554961219577, "grad_norm": 1.8576616048812866, "learning_rate": 3.406171919206214e-07, "loss": 0.4136, "step": 20621 }, { "epoch": 2.757689221717037, "grad_norm": 1.659319519996643, "learning_rate": 3.4024369058274774e-07, "loss": 0.3726, "step": 20622 }, { "epoch": 2.7578229473121154, "grad_norm": 1.5806396007537842, "learning_rate": 3.398703905932499e-07, "loss": 0.3531, "step": 20623 }, { "epoch": 2.7579566729071945, "grad_norm": 1.554179072380066, "learning_rate": 3.394972919599093e-07, "loss": 0.3546, "step": 20624 }, { "epoch": 2.758090398502273, "grad_norm": 1.4996528625488281, "learning_rate": 3.391243946905065e-07, "loss": 0.323, "step": 20625 }, { "epoch": 2.758224124097352, "grad_norm": 1.5668412446975708, "learning_rate": 3.3875169879280966e-07, "loss": 0.3404, "step": 20626 }, { "epoch": 2.7583578496924313, "grad_norm": 1.6871651411056519, "learning_rate": 3.3837920427458814e-07, "loss": 0.3443, "step": 20627 }, { "epoch": 2.75849157528751, "grad_norm": 1.4550955295562744, "learning_rate": 3.3800691114360794e-07, "loss": 0.3681, "step": 20628 }, { "epoch": 2.758625300882589, "grad_norm": 1.501133680343628, "learning_rate": 3.376348194076273e-07, "loss": 0.341, "step": 20629 }, { "epoch": 2.7587590264776676, "grad_norm": 1.6942713260650635, "learning_rate": 3.372629290744034e-07, "loss": 0.3649, "step": 20630 }, { "epoch": 2.7588927520727466, "grad_norm": 1.62017822265625, "learning_rate": 3.368912401516877e-07, "loss": 0.3832, "step": 20631 }, { "epoch": 2.7590264776678257, "grad_norm": 1.5366078615188599, "learning_rate": 3.3651975264722746e-07, "loss": 0.3657, "step": 20632 }, { "epoch": 2.759160203262905, "grad_norm": 1.533186674118042, "learning_rate": 3.361484665687664e-07, "loss": 0.3318, "step": 20633 }, { "epoch": 2.7592939288579834, "grad_norm": 1.4641669988632202, "learning_rate": 3.3577738192404395e-07, "loss": 0.3498, "step": 20634 }, { "epoch": 2.759427654453062, "grad_norm": 1.8235427141189575, "learning_rate": 3.354064987207917e-07, "loss": 0.4082, "step": 20635 }, { "epoch": 2.759561380048141, "grad_norm": 1.5606300830841064, "learning_rate": 3.3503581696674446e-07, "loss": 0.3898, "step": 20636 }, { "epoch": 2.75969510564322, "grad_norm": 1.6499061584472656, "learning_rate": 3.346653366696284e-07, "loss": 0.3716, "step": 20637 }, { "epoch": 2.7598288312382993, "grad_norm": 1.5538313388824463, "learning_rate": 3.3429505783716177e-07, "loss": 0.3418, "step": 20638 }, { "epoch": 2.759962556833378, "grad_norm": 1.432137131690979, "learning_rate": 3.3392498047706836e-07, "loss": 0.3407, "step": 20639 }, { "epoch": 2.760096282428457, "grad_norm": 1.4365860223770142, "learning_rate": 3.3355510459705754e-07, "loss": 0.3674, "step": 20640 }, { "epoch": 2.7602300080235356, "grad_norm": 1.6688669919967651, "learning_rate": 3.331854302048432e-07, "loss": 0.3723, "step": 20641 }, { "epoch": 2.7603637336186146, "grad_norm": 1.6407005786895752, "learning_rate": 3.328159573081258e-07, "loss": 0.3533, "step": 20642 }, { "epoch": 2.7604974592136937, "grad_norm": 1.3723372220993042, "learning_rate": 3.3244668591460916e-07, "loss": 0.3088, "step": 20643 }, { "epoch": 2.7606311848087723, "grad_norm": 1.6139580011367798, "learning_rate": 3.320776160319927e-07, "loss": 0.3999, "step": 20644 }, { "epoch": 2.7607649104038514, "grad_norm": 1.5799717903137207, "learning_rate": 3.317087476679659e-07, "loss": 0.4059, "step": 20645 }, { "epoch": 2.76089863599893, "grad_norm": 1.4462039470672607, "learning_rate": 3.3134008083021916e-07, "loss": 0.3349, "step": 20646 }, { "epoch": 2.761032361594009, "grad_norm": 1.5910276174545288, "learning_rate": 3.309716155264364e-07, "loss": 0.3509, "step": 20647 }, { "epoch": 2.761166087189088, "grad_norm": 1.7002403736114502, "learning_rate": 3.3060335176429703e-07, "loss": 0.4012, "step": 20648 }, { "epoch": 2.761299812784167, "grad_norm": 1.4837027788162231, "learning_rate": 3.302352895514793e-07, "loss": 0.3327, "step": 20649 }, { "epoch": 2.761433538379246, "grad_norm": 1.6668310165405273, "learning_rate": 3.298674288956538e-07, "loss": 0.3792, "step": 20650 }, { "epoch": 2.7615672639743245, "grad_norm": 1.583036184310913, "learning_rate": 3.2949976980448774e-07, "loss": 0.3199, "step": 20651 }, { "epoch": 2.7617009895694036, "grad_norm": 1.4512341022491455, "learning_rate": 3.2913231228564604e-07, "loss": 0.346, "step": 20652 }, { "epoch": 2.7618347151644826, "grad_norm": 1.6365638971328735, "learning_rate": 3.28765056346787e-07, "loss": 0.3893, "step": 20653 }, { "epoch": 2.7619684407595613, "grad_norm": 1.6326844692230225, "learning_rate": 3.283980019955668e-07, "loss": 0.3955, "step": 20654 }, { "epoch": 2.7621021663546403, "grad_norm": 1.7585816383361816, "learning_rate": 3.2803114923963377e-07, "loss": 0.3775, "step": 20655 }, { "epoch": 2.762235891949719, "grad_norm": 1.8509804010391235, "learning_rate": 3.2766449808663836e-07, "loss": 0.4253, "step": 20656 }, { "epoch": 2.762369617544798, "grad_norm": 1.7923189401626587, "learning_rate": 3.272980485442201e-07, "loss": 0.3897, "step": 20657 }, { "epoch": 2.762503343139877, "grad_norm": 1.5577131509780884, "learning_rate": 3.269318006200195e-07, "loss": 0.3553, "step": 20658 }, { "epoch": 2.7626370687349557, "grad_norm": 1.5973894596099854, "learning_rate": 3.2656575432166605e-07, "loss": 0.3761, "step": 20659 }, { "epoch": 2.762770794330035, "grad_norm": 1.6508179903030396, "learning_rate": 3.2619990965679695e-07, "loss": 0.3443, "step": 20660 }, { "epoch": 2.7629045199251134, "grad_norm": 1.676206350326538, "learning_rate": 3.258342666330305e-07, "loss": 0.3283, "step": 20661 }, { "epoch": 2.7630382455201925, "grad_norm": 1.5846302509307861, "learning_rate": 3.2546882525799294e-07, "loss": 0.3687, "step": 20662 }, { "epoch": 2.7631719711152716, "grad_norm": 1.5683162212371826, "learning_rate": 3.2510358553930143e-07, "loss": 0.3297, "step": 20663 }, { "epoch": 2.76330569671035, "grad_norm": 1.611045002937317, "learning_rate": 3.247385474845655e-07, "loss": 0.3364, "step": 20664 }, { "epoch": 2.7634394223054293, "grad_norm": 1.3984476327896118, "learning_rate": 3.2437371110139895e-07, "loss": 0.3588, "step": 20665 }, { "epoch": 2.763573147900508, "grad_norm": 1.6490401029586792, "learning_rate": 3.2400907639740243e-07, "loss": 0.3539, "step": 20666 }, { "epoch": 2.763706873495587, "grad_norm": 1.5555320978164673, "learning_rate": 3.236446433801776e-07, "loss": 0.3385, "step": 20667 }, { "epoch": 2.763840599090666, "grad_norm": 1.4809041023254395, "learning_rate": 3.232804120573219e-07, "loss": 0.3217, "step": 20668 }, { "epoch": 2.763974324685745, "grad_norm": 1.4999443292617798, "learning_rate": 3.2291638243642567e-07, "loss": 0.3113, "step": 20669 }, { "epoch": 2.7641080502808237, "grad_norm": 1.4797770977020264, "learning_rate": 3.225525545250774e-07, "loss": 0.3685, "step": 20670 }, { "epoch": 2.764241775875903, "grad_norm": 1.5110925436019897, "learning_rate": 3.22188928330861e-07, "loss": 0.3922, "step": 20671 }, { "epoch": 2.7643755014709814, "grad_norm": 1.8774360418319702, "learning_rate": 3.218255038613549e-07, "loss": 0.3785, "step": 20672 }, { "epoch": 2.7645092270660605, "grad_norm": 1.5553300380706787, "learning_rate": 3.2146228112413637e-07, "loss": 0.376, "step": 20673 }, { "epoch": 2.7646429526611396, "grad_norm": 1.5218271017074585, "learning_rate": 3.2109926012677484e-07, "loss": 0.3727, "step": 20674 }, { "epoch": 2.764776678256218, "grad_norm": 1.7428843975067139, "learning_rate": 3.2073644087683654e-07, "loss": 0.3535, "step": 20675 }, { "epoch": 2.7649104038512973, "grad_norm": 1.7955750226974487, "learning_rate": 3.203738233818865e-07, "loss": 0.3743, "step": 20676 }, { "epoch": 2.765044129446376, "grad_norm": 1.3737322092056274, "learning_rate": 3.200114076494809e-07, "loss": 0.3656, "step": 20677 }, { "epoch": 2.765177855041455, "grad_norm": 1.570135235786438, "learning_rate": 3.196491936871748e-07, "loss": 0.3308, "step": 20678 }, { "epoch": 2.765311580636534, "grad_norm": 1.4440898895263672, "learning_rate": 3.1928718150252e-07, "loss": 0.3097, "step": 20679 }, { "epoch": 2.7654453062316127, "grad_norm": 1.4188389778137207, "learning_rate": 3.189253711030571e-07, "loss": 0.3147, "step": 20680 }, { "epoch": 2.7655790318266917, "grad_norm": 1.6822290420532227, "learning_rate": 3.1856376249633336e-07, "loss": 0.3906, "step": 20681 }, { "epoch": 2.7657127574217704, "grad_norm": 1.6088173389434814, "learning_rate": 3.182023556898839e-07, "loss": 0.3409, "step": 20682 }, { "epoch": 2.7658464830168494, "grad_norm": 1.6445739269256592, "learning_rate": 3.1784115069124044e-07, "loss": 0.416, "step": 20683 }, { "epoch": 2.7659802086119285, "grad_norm": 1.6406012773513794, "learning_rate": 3.1748014750793587e-07, "loss": 0.3572, "step": 20684 }, { "epoch": 2.766113934207007, "grad_norm": 1.49924898147583, "learning_rate": 3.1711934614748975e-07, "loss": 0.3583, "step": 20685 }, { "epoch": 2.766247659802086, "grad_norm": 1.590999722480774, "learning_rate": 3.1675874661742713e-07, "loss": 0.386, "step": 20686 }, { "epoch": 2.766381385397165, "grad_norm": 1.647998571395874, "learning_rate": 3.16398348925262e-07, "loss": 0.3482, "step": 20687 }, { "epoch": 2.766515110992244, "grad_norm": 1.49367094039917, "learning_rate": 3.160381530785062e-07, "loss": 0.3524, "step": 20688 }, { "epoch": 2.766648836587323, "grad_norm": 1.607854962348938, "learning_rate": 3.1567815908467023e-07, "loss": 0.3558, "step": 20689 }, { "epoch": 2.7667825621824016, "grad_norm": 1.6258554458618164, "learning_rate": 3.1531836695125495e-07, "loss": 0.3603, "step": 20690 }, { "epoch": 2.7669162877774807, "grad_norm": 1.5969023704528809, "learning_rate": 3.149587766857609e-07, "loss": 0.3592, "step": 20691 }, { "epoch": 2.7670500133725593, "grad_norm": 1.6094210147857666, "learning_rate": 3.1459938829568435e-07, "loss": 0.3477, "step": 20692 }, { "epoch": 2.7671837389676384, "grad_norm": 1.5778844356536865, "learning_rate": 3.142402017885149e-07, "loss": 0.35, "step": 20693 }, { "epoch": 2.7673174645627174, "grad_norm": 1.5329524278640747, "learning_rate": 3.1388121717174093e-07, "loss": 0.3664, "step": 20694 }, { "epoch": 2.767451190157796, "grad_norm": 1.407728910446167, "learning_rate": 3.1352243445284425e-07, "loss": 0.3205, "step": 20695 }, { "epoch": 2.767584915752875, "grad_norm": 1.551501750946045, "learning_rate": 3.1316385363930223e-07, "loss": 0.3733, "step": 20696 }, { "epoch": 2.7677186413479538, "grad_norm": 1.5153677463531494, "learning_rate": 3.1280547473859224e-07, "loss": 0.3745, "step": 20697 }, { "epoch": 2.767852366943033, "grad_norm": 1.5307896137237549, "learning_rate": 3.124472977581827e-07, "loss": 0.385, "step": 20698 }, { "epoch": 2.767986092538112, "grad_norm": 1.4883335828781128, "learning_rate": 3.120893227055366e-07, "loss": 0.3857, "step": 20699 }, { "epoch": 2.768119818133191, "grad_norm": 1.6086235046386719, "learning_rate": 3.1173154958812013e-07, "loss": 0.357, "step": 20700 }, { "epoch": 2.7682535437282696, "grad_norm": 1.6698130369186401, "learning_rate": 3.1137397841338844e-07, "loss": 0.3755, "step": 20701 }, { "epoch": 2.768387269323348, "grad_norm": 1.6747626066207886, "learning_rate": 3.110166091887956e-07, "loss": 0.361, "step": 20702 }, { "epoch": 2.7685209949184273, "grad_norm": 1.506170630455017, "learning_rate": 3.106594419217901e-07, "loss": 0.3565, "step": 20703 }, { "epoch": 2.7686547205135064, "grad_norm": 1.6966493129730225, "learning_rate": 3.1030247661981594e-07, "loss": 0.3572, "step": 20704 }, { "epoch": 2.7687884461085854, "grad_norm": 1.6657495498657227, "learning_rate": 3.099457132903161e-07, "loss": 0.3994, "step": 20705 }, { "epoch": 2.768922171703664, "grad_norm": 1.5423030853271484, "learning_rate": 3.095891519407246e-07, "loss": 0.4118, "step": 20706 }, { "epoch": 2.769055897298743, "grad_norm": 1.8879189491271973, "learning_rate": 3.0923279257847436e-07, "loss": 0.4392, "step": 20707 }, { "epoch": 2.7691896228938218, "grad_norm": 1.6100877523422241, "learning_rate": 3.0887663521099397e-07, "loss": 0.3766, "step": 20708 }, { "epoch": 2.769323348488901, "grad_norm": 1.5506712198257446, "learning_rate": 3.085206798457052e-07, "loss": 0.3436, "step": 20709 }, { "epoch": 2.76945707408398, "grad_norm": 1.517378568649292, "learning_rate": 3.081649264900322e-07, "loss": 0.3508, "step": 20710 }, { "epoch": 2.7695907996790585, "grad_norm": 1.7510780096054077, "learning_rate": 3.0780937515138444e-07, "loss": 0.4299, "step": 20711 }, { "epoch": 2.7697245252741376, "grad_norm": 1.823799729347229, "learning_rate": 3.074540258371772e-07, "loss": 0.4091, "step": 20712 }, { "epoch": 2.769858250869216, "grad_norm": 1.5755752325057983, "learning_rate": 3.070988785548157e-07, "loss": 0.3637, "step": 20713 }, { "epoch": 2.7699919764642953, "grad_norm": 1.624715805053711, "learning_rate": 3.067439333117028e-07, "loss": 0.349, "step": 20714 }, { "epoch": 2.7701257020593744, "grad_norm": 1.5108314752578735, "learning_rate": 3.0638919011523714e-07, "loss": 0.3282, "step": 20715 }, { "epoch": 2.770259427654453, "grad_norm": 1.6149975061416626, "learning_rate": 3.0603464897281275e-07, "loss": 0.3663, "step": 20716 }, { "epoch": 2.770393153249532, "grad_norm": 1.6913609504699707, "learning_rate": 3.0568030989182043e-07, "loss": 0.3752, "step": 20717 }, { "epoch": 2.7705268788446107, "grad_norm": 1.7315477132797241, "learning_rate": 3.053261728796464e-07, "loss": 0.4138, "step": 20718 }, { "epoch": 2.7706606044396898, "grad_norm": 1.6756341457366943, "learning_rate": 3.049722379436704e-07, "loss": 0.385, "step": 20719 }, { "epoch": 2.770794330034769, "grad_norm": 1.4319431781768799, "learning_rate": 3.046185050912709e-07, "loss": 0.2987, "step": 20720 }, { "epoch": 2.7709280556298475, "grad_norm": 1.7399311065673828, "learning_rate": 3.0426497432982207e-07, "loss": 0.3756, "step": 20721 }, { "epoch": 2.7710617812249265, "grad_norm": 1.4651795625686646, "learning_rate": 3.039116456666924e-07, "loss": 0.3464, "step": 20722 }, { "epoch": 2.771195506820005, "grad_norm": 1.5235135555267334, "learning_rate": 3.035585191092438e-07, "loss": 0.3584, "step": 20723 }, { "epoch": 2.771329232415084, "grad_norm": 1.7636213302612305, "learning_rate": 3.0320559466484265e-07, "loss": 0.3515, "step": 20724 }, { "epoch": 2.7714629580101633, "grad_norm": 1.6372895240783691, "learning_rate": 3.028528723408386e-07, "loss": 0.4314, "step": 20725 }, { "epoch": 2.771596683605242, "grad_norm": 1.660750150680542, "learning_rate": 3.025003521445891e-07, "loss": 0.3796, "step": 20726 }, { "epoch": 2.771730409200321, "grad_norm": 1.2646684646606445, "learning_rate": 3.021480340834415e-07, "loss": 0.3103, "step": 20727 }, { "epoch": 2.7718641347953996, "grad_norm": 1.7003023624420166, "learning_rate": 3.0179591816473566e-07, "loss": 0.3981, "step": 20728 }, { "epoch": 2.7719978603904787, "grad_norm": 1.6878200769424438, "learning_rate": 3.014440043958167e-07, "loss": 0.3958, "step": 20729 }, { "epoch": 2.7721315859855578, "grad_norm": 1.5842031240463257, "learning_rate": 3.010922927840154e-07, "loss": 0.3427, "step": 20730 }, { "epoch": 2.7722653115806364, "grad_norm": 1.4597207307815552, "learning_rate": 3.007407833366638e-07, "loss": 0.3293, "step": 20731 }, { "epoch": 2.7723990371757155, "grad_norm": 1.5233381986618042, "learning_rate": 3.0038947606109036e-07, "loss": 0.3533, "step": 20732 }, { "epoch": 2.772532762770794, "grad_norm": 1.5119308233261108, "learning_rate": 3.00038370964616e-07, "loss": 0.3398, "step": 20733 }, { "epoch": 2.772666488365873, "grad_norm": 1.579413890838623, "learning_rate": 2.996874680545603e-07, "loss": 0.3955, "step": 20734 }, { "epoch": 2.772800213960952, "grad_norm": 1.5960884094238281, "learning_rate": 2.9933676733823747e-07, "loss": 0.3731, "step": 20735 }, { "epoch": 2.7729339395560313, "grad_norm": 1.64297616481781, "learning_rate": 2.989862688229572e-07, "loss": 0.3805, "step": 20736 }, { "epoch": 2.77306766515111, "grad_norm": 1.5175938606262207, "learning_rate": 2.9863597251602484e-07, "loss": 0.3439, "step": 20737 }, { "epoch": 2.7732013907461885, "grad_norm": 1.6994036436080933, "learning_rate": 2.982858784247422e-07, "loss": 0.3897, "step": 20738 }, { "epoch": 2.7733351163412676, "grad_norm": 1.3891130685806274, "learning_rate": 2.9793598655640687e-07, "loss": 0.3949, "step": 20739 }, { "epoch": 2.7734688419363467, "grad_norm": 1.550123691558838, "learning_rate": 2.9758629691831296e-07, "loss": 0.3843, "step": 20740 }, { "epoch": 2.7736025675314258, "grad_norm": 1.4373564720153809, "learning_rate": 2.9723680951774804e-07, "loss": 0.3394, "step": 20741 }, { "epoch": 2.7737362931265044, "grad_norm": 1.5844159126281738, "learning_rate": 2.968875243619962e-07, "loss": 0.3154, "step": 20742 }, { "epoch": 2.7738700187215835, "grad_norm": 1.5807666778564453, "learning_rate": 2.9653844145834164e-07, "loss": 0.3537, "step": 20743 }, { "epoch": 2.774003744316662, "grad_norm": 1.3800562620162964, "learning_rate": 2.9618956081405525e-07, "loss": 0.3465, "step": 20744 }, { "epoch": 2.774137469911741, "grad_norm": 1.4731806516647339, "learning_rate": 2.958408824364134e-07, "loss": 0.3532, "step": 20745 }, { "epoch": 2.77427119550682, "grad_norm": 1.5849946737289429, "learning_rate": 2.954924063326814e-07, "loss": 0.3661, "step": 20746 }, { "epoch": 2.774404921101899, "grad_norm": 1.7389240264892578, "learning_rate": 2.9514413251012563e-07, "loss": 0.417, "step": 20747 }, { "epoch": 2.774538646696978, "grad_norm": 1.4546507596969604, "learning_rate": 2.947960609760037e-07, "loss": 0.3771, "step": 20748 }, { "epoch": 2.7746723722920565, "grad_norm": 1.4727009534835815, "learning_rate": 2.9444819173756966e-07, "loss": 0.3591, "step": 20749 }, { "epoch": 2.7748060978871356, "grad_norm": 1.6057275533676147, "learning_rate": 2.9410052480207674e-07, "loss": 0.331, "step": 20750 }, { "epoch": 2.7749398234822147, "grad_norm": 1.4012356996536255, "learning_rate": 2.937530601767713e-07, "loss": 0.3379, "step": 20751 }, { "epoch": 2.7750735490772933, "grad_norm": 1.4955625534057617, "learning_rate": 2.934057978688942e-07, "loss": 0.3537, "step": 20752 }, { "epoch": 2.7752072746723724, "grad_norm": 1.6552820205688477, "learning_rate": 2.9305873788568637e-07, "loss": 0.3898, "step": 20753 }, { "epoch": 2.775341000267451, "grad_norm": 1.8777143955230713, "learning_rate": 2.927118802343787e-07, "loss": 0.4045, "step": 20754 }, { "epoch": 2.77547472586253, "grad_norm": 1.5445661544799805, "learning_rate": 2.923652249222053e-07, "loss": 0.394, "step": 20755 }, { "epoch": 2.775608451457609, "grad_norm": 1.5453871488571167, "learning_rate": 2.9201877195638827e-07, "loss": 0.332, "step": 20756 }, { "epoch": 2.7757421770526878, "grad_norm": 1.5067980289459229, "learning_rate": 2.916725213441507e-07, "loss": 0.373, "step": 20757 }, { "epoch": 2.775875902647767, "grad_norm": 1.851904034614563, "learning_rate": 2.91326473092709e-07, "loss": 0.402, "step": 20758 }, { "epoch": 2.7760096282428455, "grad_norm": 1.7865034341812134, "learning_rate": 2.9098062720927746e-07, "loss": 0.426, "step": 20759 }, { "epoch": 2.7761433538379245, "grad_norm": 1.6535567045211792, "learning_rate": 2.906349837010636e-07, "loss": 0.3869, "step": 20760 }, { "epoch": 2.7762770794330036, "grad_norm": 1.5183446407318115, "learning_rate": 2.9028954257527277e-07, "loss": 0.3728, "step": 20761 }, { "epoch": 2.7764108050280822, "grad_norm": 1.5191289186477661, "learning_rate": 2.899443038391059e-07, "loss": 0.3619, "step": 20762 }, { "epoch": 2.7765445306231613, "grad_norm": 1.5413789749145508, "learning_rate": 2.895992674997583e-07, "loss": 0.3692, "step": 20763 }, { "epoch": 2.77667825621824, "grad_norm": 1.601488471031189, "learning_rate": 2.8925443356442206e-07, "loss": 0.3535, "step": 20764 }, { "epoch": 2.776811981813319, "grad_norm": 1.7223652601242065, "learning_rate": 2.8890980204028476e-07, "loss": 0.3882, "step": 20765 }, { "epoch": 2.776945707408398, "grad_norm": 1.5945628881454468, "learning_rate": 2.885653729345306e-07, "loss": 0.3773, "step": 20766 }, { "epoch": 2.7770794330034767, "grad_norm": 1.5190303325653076, "learning_rate": 2.8822114625433826e-07, "loss": 0.3719, "step": 20767 }, { "epoch": 2.7772131585985558, "grad_norm": 1.44663405418396, "learning_rate": 2.8787712200688214e-07, "loss": 0.3347, "step": 20768 }, { "epoch": 2.7773468841936344, "grad_norm": 1.4878138303756714, "learning_rate": 2.875333001993352e-07, "loss": 0.3515, "step": 20769 }, { "epoch": 2.7774806097887135, "grad_norm": 1.539793848991394, "learning_rate": 2.871896808388608e-07, "loss": 0.355, "step": 20770 }, { "epoch": 2.7776143353837925, "grad_norm": 1.6660206317901611, "learning_rate": 2.8684626393262637e-07, "loss": 0.3502, "step": 20771 }, { "epoch": 2.7777480609788716, "grad_norm": 1.448326587677002, "learning_rate": 2.865030494877852e-07, "loss": 0.3359, "step": 20772 }, { "epoch": 2.7778817865739502, "grad_norm": 1.4468492269515991, "learning_rate": 2.861600375114926e-07, "loss": 0.3326, "step": 20773 }, { "epoch": 2.7780155121690293, "grad_norm": 1.4524065256118774, "learning_rate": 2.8581722801090063e-07, "loss": 0.3487, "step": 20774 }, { "epoch": 2.778149237764108, "grad_norm": 1.63760244846344, "learning_rate": 2.854746209931514e-07, "loss": 0.3933, "step": 20775 }, { "epoch": 2.778282963359187, "grad_norm": 1.5655663013458252, "learning_rate": 2.8513221646538913e-07, "loss": 0.3372, "step": 20776 }, { "epoch": 2.778416688954266, "grad_norm": 1.61640465259552, "learning_rate": 2.847900144347493e-07, "loss": 0.3435, "step": 20777 }, { "epoch": 2.7785504145493447, "grad_norm": 1.4885908365249634, "learning_rate": 2.8444801490836505e-07, "loss": 0.3737, "step": 20778 }, { "epoch": 2.7786841401444238, "grad_norm": 1.6433523893356323, "learning_rate": 2.8410621789336513e-07, "loss": 0.3343, "step": 20779 }, { "epoch": 2.7788178657395024, "grad_norm": 1.5935453176498413, "learning_rate": 2.8376462339687383e-07, "loss": 0.3463, "step": 20780 }, { "epoch": 2.7789515913345815, "grad_norm": 1.5280020236968994, "learning_rate": 2.8342323142601104e-07, "loss": 0.3517, "step": 20781 }, { "epoch": 2.7790853169296605, "grad_norm": 1.8817561864852905, "learning_rate": 2.830820419878944e-07, "loss": 0.381, "step": 20782 }, { "epoch": 2.779219042524739, "grad_norm": 1.7712763547897339, "learning_rate": 2.827410550896337e-07, "loss": 0.3879, "step": 20783 }, { "epoch": 2.7793527681198182, "grad_norm": 1.6753543615341187, "learning_rate": 2.824002707383378e-07, "loss": 0.4368, "step": 20784 }, { "epoch": 2.779486493714897, "grad_norm": 1.6137648820877075, "learning_rate": 2.8205968894110867e-07, "loss": 0.3752, "step": 20785 }, { "epoch": 2.779620219309976, "grad_norm": 1.4507417678833008, "learning_rate": 2.8171930970504745e-07, "loss": 0.3238, "step": 20786 }, { "epoch": 2.779753944905055, "grad_norm": 1.4163216352462769, "learning_rate": 2.813791330372473e-07, "loss": 0.3348, "step": 20787 }, { "epoch": 2.7798876705001336, "grad_norm": 1.5768721103668213, "learning_rate": 2.810391589448003e-07, "loss": 0.3649, "step": 20788 }, { "epoch": 2.7800213960952127, "grad_norm": 1.5263575315475464, "learning_rate": 2.8069938743478965e-07, "loss": 0.3816, "step": 20789 }, { "epoch": 2.7801551216902913, "grad_norm": 1.8141093254089355, "learning_rate": 2.8035981851430303e-07, "loss": 0.4374, "step": 20790 }, { "epoch": 2.7802888472853704, "grad_norm": 1.5084362030029297, "learning_rate": 2.8002045219041374e-07, "loss": 0.3234, "step": 20791 }, { "epoch": 2.7804225728804495, "grad_norm": 1.472138524055481, "learning_rate": 2.79681288470196e-07, "loss": 0.3283, "step": 20792 }, { "epoch": 2.780556298475528, "grad_norm": 1.8117003440856934, "learning_rate": 2.793423273607221e-07, "loss": 0.3778, "step": 20793 }, { "epoch": 2.780690024070607, "grad_norm": 1.5973988771438599, "learning_rate": 2.79003568869054e-07, "loss": 0.319, "step": 20794 }, { "epoch": 2.780823749665686, "grad_norm": 1.5897465944290161, "learning_rate": 2.7866501300225613e-07, "loss": 0.36, "step": 20795 }, { "epoch": 2.780957475260765, "grad_norm": 1.414781928062439, "learning_rate": 2.7832665976738393e-07, "loss": 0.3459, "step": 20796 }, { "epoch": 2.781091200855844, "grad_norm": 1.5728187561035156, "learning_rate": 2.7798850917148845e-07, "loss": 0.3881, "step": 20797 }, { "epoch": 2.7812249264509226, "grad_norm": 1.6355929374694824, "learning_rate": 2.776505612216207e-07, "loss": 0.3559, "step": 20798 }, { "epoch": 2.7813586520460016, "grad_norm": 1.7737250328063965, "learning_rate": 2.7731281592482285e-07, "loss": 0.362, "step": 20799 }, { "epoch": 2.7814923776410803, "grad_norm": 1.4858434200286865, "learning_rate": 2.76975273288137e-07, "loss": 0.2964, "step": 20800 }, { "epoch": 2.7816261032361593, "grad_norm": 1.5188225507736206, "learning_rate": 2.7663793331859645e-07, "loss": 0.3882, "step": 20801 }, { "epoch": 2.7817598288312384, "grad_norm": 1.6512240171432495, "learning_rate": 2.7630079602323447e-07, "loss": 0.3517, "step": 20802 }, { "epoch": 2.7818935544263175, "grad_norm": 1.623434066772461, "learning_rate": 2.759638614090776e-07, "loss": 0.3738, "step": 20803 }, { "epoch": 2.782027280021396, "grad_norm": 1.73087739944458, "learning_rate": 2.756271294831492e-07, "loss": 0.3777, "step": 20804 }, { "epoch": 2.7821610056164747, "grad_norm": 1.4283182621002197, "learning_rate": 2.75290600252468e-07, "loss": 0.3174, "step": 20805 }, { "epoch": 2.782294731211554, "grad_norm": 1.4423762559890747, "learning_rate": 2.749542737240485e-07, "loss": 0.339, "step": 20806 }, { "epoch": 2.782428456806633, "grad_norm": 1.3682961463928223, "learning_rate": 2.746181499049028e-07, "loss": 0.3311, "step": 20807 }, { "epoch": 2.782562182401712, "grad_norm": 1.5871332883834839, "learning_rate": 2.74282228802033e-07, "loss": 0.3573, "step": 20808 }, { "epoch": 2.7826959079967906, "grad_norm": 1.54444420337677, "learning_rate": 2.739465104224459e-07, "loss": 0.3783, "step": 20809 }, { "epoch": 2.7828296335918696, "grad_norm": 1.5802757740020752, "learning_rate": 2.736109947731358e-07, "loss": 0.3433, "step": 20810 }, { "epoch": 2.7829633591869483, "grad_norm": 1.5497969388961792, "learning_rate": 2.732756818610971e-07, "loss": 0.3354, "step": 20811 }, { "epoch": 2.7830970847820273, "grad_norm": 1.3995293378829956, "learning_rate": 2.729405716933209e-07, "loss": 0.3197, "step": 20812 }, { "epoch": 2.7832308103771064, "grad_norm": 1.4622576236724854, "learning_rate": 2.7260566427678935e-07, "loss": 0.3702, "step": 20813 }, { "epoch": 2.783364535972185, "grad_norm": 1.5768816471099854, "learning_rate": 2.722709596184858e-07, "loss": 0.3865, "step": 20814 }, { "epoch": 2.783498261567264, "grad_norm": 1.5556169748306274, "learning_rate": 2.7193645772538467e-07, "loss": 0.3598, "step": 20815 }, { "epoch": 2.7836319871623427, "grad_norm": 1.6539340019226074, "learning_rate": 2.7160215860445924e-07, "loss": 0.3764, "step": 20816 }, { "epoch": 2.783765712757422, "grad_norm": 1.6140497922897339, "learning_rate": 2.7126806226267845e-07, "loss": 0.3714, "step": 20817 }, { "epoch": 2.783899438352501, "grad_norm": 1.4616377353668213, "learning_rate": 2.709341687070044e-07, "loss": 0.3589, "step": 20818 }, { "epoch": 2.7840331639475795, "grad_norm": 1.536062479019165, "learning_rate": 2.7060047794439937e-07, "loss": 0.3455, "step": 20819 }, { "epoch": 2.7841668895426586, "grad_norm": 1.5663034915924072, "learning_rate": 2.702669899818167e-07, "loss": 0.374, "step": 20820 }, { "epoch": 2.784300615137737, "grad_norm": 1.6951788663864136, "learning_rate": 2.699337048262074e-07, "loss": 0.388, "step": 20821 }, { "epoch": 2.7844343407328163, "grad_norm": 1.4148496389389038, "learning_rate": 2.6960062248452043e-07, "loss": 0.3494, "step": 20822 }, { "epoch": 2.7845680663278953, "grad_norm": 1.5569648742675781, "learning_rate": 2.6926774296369696e-07, "loss": 0.3619, "step": 20823 }, { "epoch": 2.784701791922974, "grad_norm": 1.6534479856491089, "learning_rate": 2.689350662706769e-07, "loss": 0.4166, "step": 20824 }, { "epoch": 2.784835517518053, "grad_norm": 1.5791547298431396, "learning_rate": 2.686025924123925e-07, "loss": 0.3176, "step": 20825 }, { "epoch": 2.7849692431131317, "grad_norm": 1.5644370317459106, "learning_rate": 2.6827032139577604e-07, "loss": 0.368, "step": 20826 }, { "epoch": 2.7851029687082107, "grad_norm": 1.612885594367981, "learning_rate": 2.6793825322775193e-07, "loss": 0.4188, "step": 20827 }, { "epoch": 2.78523669430329, "grad_norm": 1.6456758975982666, "learning_rate": 2.676063879152424e-07, "loss": 0.3916, "step": 20828 }, { "epoch": 2.7853704198983684, "grad_norm": 1.6820650100708008, "learning_rate": 2.672747254651653e-07, "loss": 0.3863, "step": 20829 }, { "epoch": 2.7855041454934475, "grad_norm": 1.5543450117111206, "learning_rate": 2.6694326588443286e-07, "loss": 0.3619, "step": 20830 }, { "epoch": 2.785637871088526, "grad_norm": 1.6793380975723267, "learning_rate": 2.666120091799551e-07, "loss": 0.3533, "step": 20831 }, { "epoch": 2.785771596683605, "grad_norm": 1.4780927896499634, "learning_rate": 2.662809553586354e-07, "loss": 0.3407, "step": 20832 }, { "epoch": 2.7859053222786843, "grad_norm": 1.702867865562439, "learning_rate": 2.659501044273771e-07, "loss": 0.3646, "step": 20833 }, { "epoch": 2.786039047873763, "grad_norm": 1.5688337087631226, "learning_rate": 2.656194563930714e-07, "loss": 0.3521, "step": 20834 }, { "epoch": 2.786172773468842, "grad_norm": 1.4807660579681396, "learning_rate": 2.652890112626161e-07, "loss": 0.3548, "step": 20835 }, { "epoch": 2.7863064990639206, "grad_norm": 1.6465785503387451, "learning_rate": 2.6495876904289454e-07, "loss": 0.3572, "step": 20836 }, { "epoch": 2.7864402246589997, "grad_norm": 1.8622580766677856, "learning_rate": 2.6462872974079125e-07, "loss": 0.3674, "step": 20837 }, { "epoch": 2.7865739502540787, "grad_norm": 1.3903818130493164, "learning_rate": 2.6429889336318847e-07, "loss": 0.3314, "step": 20838 }, { "epoch": 2.786707675849158, "grad_norm": 1.5297942161560059, "learning_rate": 2.6396925991695744e-07, "loss": 0.3295, "step": 20839 }, { "epoch": 2.7868414014442364, "grad_norm": 1.4815959930419922, "learning_rate": 2.636398294089726e-07, "loss": 0.3391, "step": 20840 }, { "epoch": 2.786975127039315, "grad_norm": 1.7010157108306885, "learning_rate": 2.6331060184609735e-07, "loss": 0.3852, "step": 20841 }, { "epoch": 2.787108852634394, "grad_norm": 1.6452326774597168, "learning_rate": 2.629815772351962e-07, "loss": 0.4278, "step": 20842 }, { "epoch": 2.787242578229473, "grad_norm": 1.4043594598770142, "learning_rate": 2.62652755583126e-07, "loss": 0.3702, "step": 20843 }, { "epoch": 2.7873763038245523, "grad_norm": 1.4779294729232788, "learning_rate": 2.623241368967422e-07, "loss": 0.3133, "step": 20844 }, { "epoch": 2.787510029419631, "grad_norm": 1.57975172996521, "learning_rate": 2.619957211828938e-07, "loss": 0.3677, "step": 20845 }, { "epoch": 2.78764375501471, "grad_norm": 1.4036624431610107, "learning_rate": 2.616675084484266e-07, "loss": 0.3344, "step": 20846 }, { "epoch": 2.7877774806097886, "grad_norm": 1.830676555633545, "learning_rate": 2.613394987001805e-07, "loss": 0.3764, "step": 20847 }, { "epoch": 2.7879112062048677, "grad_norm": 1.5744816064834595, "learning_rate": 2.6101169194499456e-07, "loss": 0.391, "step": 20848 }, { "epoch": 2.7880449317999467, "grad_norm": 1.7157931327819824, "learning_rate": 2.6068408818970106e-07, "loss": 0.3812, "step": 20849 }, { "epoch": 2.7881786573950254, "grad_norm": 1.6557964086532593, "learning_rate": 2.6035668744112786e-07, "loss": 0.3643, "step": 20850 }, { "epoch": 2.7883123829901044, "grad_norm": 1.4989999532699585, "learning_rate": 2.6002948970609956e-07, "loss": 0.3515, "step": 20851 }, { "epoch": 2.788446108585183, "grad_norm": 1.3471267223358154, "learning_rate": 2.597024949914373e-07, "loss": 0.2783, "step": 20852 }, { "epoch": 2.788579834180262, "grad_norm": 1.5260107517242432, "learning_rate": 2.5937570330395345e-07, "loss": 0.3608, "step": 20853 }, { "epoch": 2.788713559775341, "grad_norm": 1.5828197002410889, "learning_rate": 2.5904911465046476e-07, "loss": 0.419, "step": 20854 }, { "epoch": 2.78884728537042, "grad_norm": 1.5923101902008057, "learning_rate": 2.5872272903777473e-07, "loss": 0.3685, "step": 20855 }, { "epoch": 2.788981010965499, "grad_norm": 1.6633175611495972, "learning_rate": 2.5839654647268896e-07, "loss": 0.3703, "step": 20856 }, { "epoch": 2.7891147365605775, "grad_norm": 1.5400134325027466, "learning_rate": 2.580705669620065e-07, "loss": 0.3391, "step": 20857 }, { "epoch": 2.7892484621556566, "grad_norm": 1.5184351205825806, "learning_rate": 2.5774479051251856e-07, "loss": 0.3667, "step": 20858 }, { "epoch": 2.7893821877507357, "grad_norm": 1.537785291671753, "learning_rate": 2.574192171310197e-07, "loss": 0.3323, "step": 20859 }, { "epoch": 2.7895159133458143, "grad_norm": 1.4815930128097534, "learning_rate": 2.570938468242945e-07, "loss": 0.3632, "step": 20860 }, { "epoch": 2.7896496389408934, "grad_norm": 1.586591362953186, "learning_rate": 2.567686795991253e-07, "loss": 0.357, "step": 20861 }, { "epoch": 2.789783364535972, "grad_norm": 1.515321969985962, "learning_rate": 2.5644371546228895e-07, "loss": 0.3267, "step": 20862 }, { "epoch": 2.789917090131051, "grad_norm": 1.6835474967956543, "learning_rate": 2.561189544205589e-07, "loss": 0.3949, "step": 20863 }, { "epoch": 2.79005081572613, "grad_norm": 1.528456449508667, "learning_rate": 2.5579439648070745e-07, "loss": 0.3482, "step": 20864 }, { "epoch": 2.7901845413212087, "grad_norm": 1.5126547813415527, "learning_rate": 2.5547004164949707e-07, "loss": 0.3426, "step": 20865 }, { "epoch": 2.790318266916288, "grad_norm": 1.5286407470703125, "learning_rate": 2.5514588993368894e-07, "loss": 0.3528, "step": 20866 }, { "epoch": 2.7904519925113664, "grad_norm": 1.6199709177017212, "learning_rate": 2.548219413400399e-07, "loss": 0.3475, "step": 20867 }, { "epoch": 2.7905857181064455, "grad_norm": 1.6597638130187988, "learning_rate": 2.5449819587530233e-07, "loss": 0.3809, "step": 20868 }, { "epoch": 2.7907194437015246, "grad_norm": 1.6368234157562256, "learning_rate": 2.541746535462242e-07, "loss": 0.3385, "step": 20869 }, { "epoch": 2.790853169296603, "grad_norm": 1.6762608289718628, "learning_rate": 2.5385131435955e-07, "loss": 0.4118, "step": 20870 }, { "epoch": 2.7909868948916823, "grad_norm": 1.4200471639633179, "learning_rate": 2.5352817832201893e-07, "loss": 0.3413, "step": 20871 }, { "epoch": 2.791120620486761, "grad_norm": 1.5693608522415161, "learning_rate": 2.5320524544036664e-07, "loss": 0.3524, "step": 20872 }, { "epoch": 2.79125434608184, "grad_norm": 1.529669165611267, "learning_rate": 2.528825157213255e-07, "loss": 0.3119, "step": 20873 }, { "epoch": 2.791388071676919, "grad_norm": 1.523508906364441, "learning_rate": 2.5255998917161903e-07, "loss": 0.3518, "step": 20874 }, { "epoch": 2.791521797271998, "grad_norm": 1.5929558277130127, "learning_rate": 2.5223766579797416e-07, "loss": 0.3667, "step": 20875 }, { "epoch": 2.7916555228670767, "grad_norm": 1.5914088487625122, "learning_rate": 2.519155456071076e-07, "loss": 0.4047, "step": 20876 }, { "epoch": 2.791789248462156, "grad_norm": 1.8419655561447144, "learning_rate": 2.5159362860573187e-07, "loss": 0.4279, "step": 20877 }, { "epoch": 2.7919229740572344, "grad_norm": 1.6265692710876465, "learning_rate": 2.5127191480056044e-07, "loss": 0.3306, "step": 20878 }, { "epoch": 2.7920566996523135, "grad_norm": 1.4231890439987183, "learning_rate": 2.5095040419829575e-07, "loss": 0.3263, "step": 20879 }, { "epoch": 2.7921904252473926, "grad_norm": 1.741549015045166, "learning_rate": 2.506290968056424e-07, "loss": 0.424, "step": 20880 }, { "epoch": 2.792324150842471, "grad_norm": 1.4248981475830078, "learning_rate": 2.503079926292962e-07, "loss": 0.2997, "step": 20881 }, { "epoch": 2.7924578764375503, "grad_norm": 1.2932381629943848, "learning_rate": 2.4998709167594946e-07, "loss": 0.3078, "step": 20882 }, { "epoch": 2.792591602032629, "grad_norm": 1.496315836906433, "learning_rate": 2.4966639395229366e-07, "loss": 0.3695, "step": 20883 }, { "epoch": 2.792725327627708, "grad_norm": 1.669024109840393, "learning_rate": 2.493458994650111e-07, "loss": 0.3556, "step": 20884 }, { "epoch": 2.792859053222787, "grad_norm": 1.6058648824691772, "learning_rate": 2.4902560822078316e-07, "loss": 0.3769, "step": 20885 }, { "epoch": 2.7929927788178657, "grad_norm": 1.559538722038269, "learning_rate": 2.487055202262856e-07, "loss": 0.3597, "step": 20886 }, { "epoch": 2.7931265044129447, "grad_norm": 1.6637864112854004, "learning_rate": 2.483856354881897e-07, "loss": 0.3972, "step": 20887 }, { "epoch": 2.7932602300080234, "grad_norm": 1.4640616178512573, "learning_rate": 2.480659540131647e-07, "loss": 0.3288, "step": 20888 }, { "epoch": 2.7933939556031024, "grad_norm": 1.3532356023788452, "learning_rate": 2.477464758078729e-07, "loss": 0.316, "step": 20889 }, { "epoch": 2.7935276811981815, "grad_norm": 1.4552329778671265, "learning_rate": 2.4742720087897466e-07, "loss": 0.3394, "step": 20890 }, { "epoch": 2.79366140679326, "grad_norm": 1.4973418712615967, "learning_rate": 2.4710812923312346e-07, "loss": 0.352, "step": 20891 }, { "epoch": 2.793795132388339, "grad_norm": 1.6040809154510498, "learning_rate": 2.4678926087697177e-07, "loss": 0.3677, "step": 20892 }, { "epoch": 2.793928857983418, "grad_norm": 1.434979796409607, "learning_rate": 2.464705958171632e-07, "loss": 0.3644, "step": 20893 }, { "epoch": 2.794062583578497, "grad_norm": 1.6095519065856934, "learning_rate": 2.4615213406034345e-07, "loss": 0.3422, "step": 20894 }, { "epoch": 2.794196309173576, "grad_norm": 1.6396526098251343, "learning_rate": 2.458338756131484e-07, "loss": 0.3715, "step": 20895 }, { "epoch": 2.7943300347686546, "grad_norm": 1.4860919713974, "learning_rate": 2.455158204822128e-07, "loss": 0.3621, "step": 20896 }, { "epoch": 2.7944637603637337, "grad_norm": 1.5590410232543945, "learning_rate": 2.451979686741668e-07, "loss": 0.3539, "step": 20897 }, { "epoch": 2.7945974859588123, "grad_norm": 1.4203370809555054, "learning_rate": 2.44880320195634e-07, "loss": 0.3197, "step": 20898 }, { "epoch": 2.7947312115538914, "grad_norm": 1.452337622642517, "learning_rate": 2.4456287505323693e-07, "loss": 0.3422, "step": 20899 }, { "epoch": 2.7948649371489704, "grad_norm": 1.6493194103240967, "learning_rate": 2.442456332535903e-07, "loss": 0.3839, "step": 20900 }, { "epoch": 2.794998662744049, "grad_norm": 1.625227928161621, "learning_rate": 2.4392859480330876e-07, "loss": 0.3728, "step": 20901 }, { "epoch": 2.795132388339128, "grad_norm": 1.6872650384902954, "learning_rate": 2.4361175970900154e-07, "loss": 0.3677, "step": 20902 }, { "epoch": 2.7952661139342068, "grad_norm": 1.5454391241073608, "learning_rate": 2.4329512797726884e-07, "loss": 0.3823, "step": 20903 }, { "epoch": 2.795399839529286, "grad_norm": 1.8218274116516113, "learning_rate": 2.4297869961471544e-07, "loss": 0.4122, "step": 20904 }, { "epoch": 2.795533565124365, "grad_norm": 1.775802493095398, "learning_rate": 2.426624746279327e-07, "loss": 0.3433, "step": 20905 }, { "epoch": 2.795667290719444, "grad_norm": 1.3720479011535645, "learning_rate": 2.423464530235153e-07, "loss": 0.3137, "step": 20906 }, { "epoch": 2.7958010163145226, "grad_norm": 1.4110440015792847, "learning_rate": 2.420306348080481e-07, "loss": 0.3349, "step": 20907 }, { "epoch": 2.7959347419096012, "grad_norm": 1.6522252559661865, "learning_rate": 2.4171501998811466e-07, "loss": 0.3626, "step": 20908 }, { "epoch": 2.7960684675046803, "grad_norm": 1.6671603918075562, "learning_rate": 2.413996085702952e-07, "loss": 0.3707, "step": 20909 }, { "epoch": 2.7962021930997594, "grad_norm": 1.410933256149292, "learning_rate": 2.4108440056116236e-07, "loss": 0.3181, "step": 20910 }, { "epoch": 2.7963359186948384, "grad_norm": 1.4107152223587036, "learning_rate": 2.407693959672874e-07, "loss": 0.3082, "step": 20911 }, { "epoch": 2.796469644289917, "grad_norm": 1.4872812032699585, "learning_rate": 2.4045459479523524e-07, "loss": 0.3607, "step": 20912 }, { "epoch": 2.796603369884996, "grad_norm": 1.584105134010315, "learning_rate": 2.4013999705156834e-07, "loss": 0.3544, "step": 20913 }, { "epoch": 2.7967370954800748, "grad_norm": 1.567333459854126, "learning_rate": 2.398256027428436e-07, "loss": 0.3679, "step": 20914 }, { "epoch": 2.796870821075154, "grad_norm": 1.5118257999420166, "learning_rate": 2.395114118756148e-07, "loss": 0.3336, "step": 20915 }, { "epoch": 2.797004546670233, "grad_norm": 1.5153311491012573, "learning_rate": 2.39197424456431e-07, "loss": 0.3848, "step": 20916 }, { "epoch": 2.7971382722653115, "grad_norm": 1.5067511796951294, "learning_rate": 2.388836404918371e-07, "loss": 0.3529, "step": 20917 }, { "epoch": 2.7972719978603906, "grad_norm": 1.625604510307312, "learning_rate": 2.385700599883745e-07, "loss": 0.3446, "step": 20918 }, { "epoch": 2.7974057234554692, "grad_norm": 1.7500056028366089, "learning_rate": 2.3825668295257563e-07, "loss": 0.366, "step": 20919 }, { "epoch": 2.7975394490505483, "grad_norm": 1.5812227725982666, "learning_rate": 2.3794350939097653e-07, "loss": 0.3783, "step": 20920 }, { "epoch": 2.7976731746456274, "grad_norm": 1.5940697193145752, "learning_rate": 2.3763053931010415e-07, "loss": 0.3921, "step": 20921 }, { "epoch": 2.797806900240706, "grad_norm": 1.4274922609329224, "learning_rate": 2.3731777271647995e-07, "loss": 0.4133, "step": 20922 }, { "epoch": 2.797940625835785, "grad_norm": 1.691535234451294, "learning_rate": 2.3700520961662753e-07, "loss": 0.3702, "step": 20923 }, { "epoch": 2.7980743514308637, "grad_norm": 1.6848517656326294, "learning_rate": 2.3669285001705734e-07, "loss": 0.3559, "step": 20924 }, { "epoch": 2.7982080770259428, "grad_norm": 1.495120644569397, "learning_rate": 2.36380693924283e-07, "loss": 0.3594, "step": 20925 }, { "epoch": 2.798341802621022, "grad_norm": 1.551878571510315, "learning_rate": 2.360687413448104e-07, "loss": 0.364, "step": 20926 }, { "epoch": 2.7984755282161005, "grad_norm": 1.6506924629211426, "learning_rate": 2.3575699228514105e-07, "loss": 0.3864, "step": 20927 }, { "epoch": 2.7986092538111795, "grad_norm": 1.594507098197937, "learning_rate": 2.3544544675177528e-07, "loss": 0.3682, "step": 20928 }, { "epoch": 2.798742979406258, "grad_norm": 1.5190945863723755, "learning_rate": 2.3513410475120456e-07, "loss": 0.3536, "step": 20929 }, { "epoch": 2.7988767050013372, "grad_norm": 1.5221331119537354, "learning_rate": 2.348229662899193e-07, "loss": 0.3727, "step": 20930 }, { "epoch": 2.7990104305964163, "grad_norm": 1.519376277923584, "learning_rate": 2.3451203137440538e-07, "loss": 0.3326, "step": 20931 }, { "epoch": 2.799144156191495, "grad_norm": 1.7048168182373047, "learning_rate": 2.3420130001114317e-07, "loss": 0.3773, "step": 20932 }, { "epoch": 2.799277881786574, "grad_norm": 1.7285701036453247, "learning_rate": 2.338907722066097e-07, "loss": 0.3761, "step": 20933 }, { "epoch": 2.7994116073816526, "grad_norm": 1.6800168752670288, "learning_rate": 2.3358044796727874e-07, "loss": 0.3626, "step": 20934 }, { "epoch": 2.7995453329767317, "grad_norm": 1.5824846029281616, "learning_rate": 2.332703272996173e-07, "loss": 0.3678, "step": 20935 }, { "epoch": 2.7996790585718108, "grad_norm": 1.6931216716766357, "learning_rate": 2.329604102100913e-07, "loss": 0.3447, "step": 20936 }, { "epoch": 2.7998127841668894, "grad_norm": 1.62696373462677, "learning_rate": 2.3265069670515894e-07, "loss": 0.4196, "step": 20937 }, { "epoch": 2.7999465097619685, "grad_norm": 1.4578145742416382, "learning_rate": 2.3234118679127615e-07, "loss": 0.3547, "step": 20938 }, { "epoch": 2.800080235357047, "grad_norm": 1.4603453874588013, "learning_rate": 2.3203188047489443e-07, "loss": 0.4086, "step": 20939 }, { "epoch": 2.800213960952126, "grad_norm": 1.5470774173736572, "learning_rate": 2.317227777624609e-07, "loss": 0.3395, "step": 20940 }, { "epoch": 2.8003476865472052, "grad_norm": 1.6321628093719482, "learning_rate": 2.314138786604203e-07, "loss": 0.3884, "step": 20941 }, { "epoch": 2.8004814121422843, "grad_norm": 1.668300747871399, "learning_rate": 2.311051831752098e-07, "loss": 0.3824, "step": 20942 }, { "epoch": 2.800615137737363, "grad_norm": 1.5133758783340454, "learning_rate": 2.30796691313262e-07, "loss": 0.3393, "step": 20943 }, { "epoch": 2.800748863332442, "grad_norm": 1.3428596258163452, "learning_rate": 2.304884030810117e-07, "loss": 0.3386, "step": 20944 }, { "epoch": 2.8008825889275206, "grad_norm": 1.3497636318206787, "learning_rate": 2.3018031848488055e-07, "loss": 0.2985, "step": 20945 }, { "epoch": 2.8010163145225997, "grad_norm": 1.4529153108596802, "learning_rate": 2.2987243753129107e-07, "loss": 0.3497, "step": 20946 }, { "epoch": 2.8011500401176788, "grad_norm": 1.6158350706100464, "learning_rate": 2.2956476022666375e-07, "loss": 0.3745, "step": 20947 }, { "epoch": 2.8012837657127574, "grad_norm": 1.6476553678512573, "learning_rate": 2.2925728657740786e-07, "loss": 0.364, "step": 20948 }, { "epoch": 2.8014174913078365, "grad_norm": 1.6119074821472168, "learning_rate": 2.289500165899361e-07, "loss": 0.4103, "step": 20949 }, { "epoch": 2.801551216902915, "grad_norm": 1.6339457035064697, "learning_rate": 2.2864295027064997e-07, "loss": 0.3817, "step": 20950 }, { "epoch": 2.801684942497994, "grad_norm": 1.5531072616577148, "learning_rate": 2.2833608762595217e-07, "loss": 0.3713, "step": 20951 }, { "epoch": 2.8018186680930732, "grad_norm": 1.6813520193099976, "learning_rate": 2.2802942866223754e-07, "loss": 0.3954, "step": 20952 }, { "epoch": 2.801952393688152, "grad_norm": 1.667297124862671, "learning_rate": 2.2772297338589878e-07, "loss": 0.3632, "step": 20953 }, { "epoch": 2.802086119283231, "grad_norm": 1.5414049625396729, "learning_rate": 2.2741672180332409e-07, "loss": 0.3834, "step": 20954 }, { "epoch": 2.8022198448783096, "grad_norm": 1.3934364318847656, "learning_rate": 2.2711067392089613e-07, "loss": 0.3341, "step": 20955 }, { "epoch": 2.8023535704733886, "grad_norm": 1.796728253364563, "learning_rate": 2.268048297449943e-07, "loss": 0.3874, "step": 20956 }, { "epoch": 2.8024872960684677, "grad_norm": 1.4994337558746338, "learning_rate": 2.2649918928199455e-07, "loss": 0.363, "step": 20957 }, { "epoch": 2.8026210216635463, "grad_norm": 1.6909615993499756, "learning_rate": 2.2619375253826624e-07, "loss": 0.3642, "step": 20958 }, { "epoch": 2.8027547472586254, "grad_norm": 1.7342839241027832, "learning_rate": 2.2588851952017653e-07, "loss": 0.3913, "step": 20959 }, { "epoch": 2.802888472853704, "grad_norm": 1.452297329902649, "learning_rate": 2.255834902340881e-07, "loss": 0.3467, "step": 20960 }, { "epoch": 2.803022198448783, "grad_norm": 1.413185477256775, "learning_rate": 2.252786646863603e-07, "loss": 0.3319, "step": 20961 }, { "epoch": 2.803155924043862, "grad_norm": 1.541680097579956, "learning_rate": 2.2497404288334245e-07, "loss": 0.3726, "step": 20962 }, { "epoch": 2.803289649638941, "grad_norm": 1.5604345798492432, "learning_rate": 2.2466962483138954e-07, "loss": 0.3416, "step": 20963 }, { "epoch": 2.80342337523402, "grad_norm": 1.5624465942382812, "learning_rate": 2.2436541053684203e-07, "loss": 0.3816, "step": 20964 }, { "epoch": 2.8035571008290985, "grad_norm": 1.6201050281524658, "learning_rate": 2.240614000060448e-07, "loss": 0.3848, "step": 20965 }, { "epoch": 2.8036908264241776, "grad_norm": 1.5026683807373047, "learning_rate": 2.2375759324533398e-07, "loss": 0.3468, "step": 20966 }, { "epoch": 2.8038245520192566, "grad_norm": 1.677214503288269, "learning_rate": 2.2345399026103888e-07, "loss": 0.3813, "step": 20967 }, { "epoch": 2.8039582776143352, "grad_norm": 1.6230571269989014, "learning_rate": 2.2315059105949222e-07, "loss": 0.3822, "step": 20968 }, { "epoch": 2.8040920032094143, "grad_norm": 1.5914098024368286, "learning_rate": 2.2284739564701563e-07, "loss": 0.3636, "step": 20969 }, { "epoch": 2.804225728804493, "grad_norm": 1.461203694343567, "learning_rate": 2.225444040299285e-07, "loss": 0.3227, "step": 20970 }, { "epoch": 2.804359454399572, "grad_norm": 1.4094074964523315, "learning_rate": 2.22241616214548e-07, "loss": 0.3502, "step": 20971 }, { "epoch": 2.804493179994651, "grad_norm": 1.583274483680725, "learning_rate": 2.219390322071835e-07, "loss": 0.3779, "step": 20972 }, { "epoch": 2.8046269055897297, "grad_norm": 1.521883249282837, "learning_rate": 2.2163665201414553e-07, "loss": 0.3446, "step": 20973 }, { "epoch": 2.804760631184809, "grad_norm": 1.4544239044189453, "learning_rate": 2.2133447564173237e-07, "loss": 0.3475, "step": 20974 }, { "epoch": 2.8048943567798874, "grad_norm": 1.590610146522522, "learning_rate": 2.210325030962468e-07, "loss": 0.3556, "step": 20975 }, { "epoch": 2.8050280823749665, "grad_norm": 1.535759687423706, "learning_rate": 2.2073073438397929e-07, "loss": 0.3107, "step": 20976 }, { "epoch": 2.8051618079700456, "grad_norm": 1.7200812101364136, "learning_rate": 2.2042916951122372e-07, "loss": 0.3787, "step": 20977 }, { "epoch": 2.8052955335651246, "grad_norm": 1.624894618988037, "learning_rate": 2.2012780848426286e-07, "loss": 0.3825, "step": 20978 }, { "epoch": 2.8054292591602032, "grad_norm": 1.5929768085479736, "learning_rate": 2.1982665130938054e-07, "loss": 0.3683, "step": 20979 }, { "epoch": 2.8055629847552823, "grad_norm": 1.6137999296188354, "learning_rate": 2.1952569799285172e-07, "loss": 0.3477, "step": 20980 }, { "epoch": 2.805696710350361, "grad_norm": 1.440737247467041, "learning_rate": 2.1922494854095145e-07, "loss": 0.335, "step": 20981 }, { "epoch": 2.80583043594544, "grad_norm": 1.5126547813415527, "learning_rate": 2.189244029599491e-07, "loss": 0.3443, "step": 20982 }, { "epoch": 2.805964161540519, "grad_norm": 1.6157044172286987, "learning_rate": 2.1862406125610636e-07, "loss": 0.3701, "step": 20983 }, { "epoch": 2.8060978871355977, "grad_norm": 1.2181618213653564, "learning_rate": 2.1832392343568598e-07, "loss": 0.3417, "step": 20984 }, { "epoch": 2.806231612730677, "grad_norm": 1.5439636707305908, "learning_rate": 2.180239895049441e-07, "loss": 0.3432, "step": 20985 }, { "epoch": 2.8063653383257554, "grad_norm": 1.5068248510360718, "learning_rate": 2.1772425947013008e-07, "loss": 0.3427, "step": 20986 }, { "epoch": 2.8064990639208345, "grad_norm": 1.3487454652786255, "learning_rate": 2.1742473333749569e-07, "loss": 0.3279, "step": 20987 }, { "epoch": 2.8066327895159136, "grad_norm": 1.57566237449646, "learning_rate": 2.1712541111327924e-07, "loss": 0.3728, "step": 20988 }, { "epoch": 2.806766515110992, "grad_norm": 1.48201322555542, "learning_rate": 2.168262928037246e-07, "loss": 0.3174, "step": 20989 }, { "epoch": 2.8069002407060712, "grad_norm": 1.5476562976837158, "learning_rate": 2.1652737841506344e-07, "loss": 0.3344, "step": 20990 }, { "epoch": 2.80703396630115, "grad_norm": 1.7266650199890137, "learning_rate": 2.1622866795352638e-07, "loss": 0.3913, "step": 20991 }, { "epoch": 2.807167691896229, "grad_norm": 1.472413420677185, "learning_rate": 2.1593016142534173e-07, "loss": 0.3552, "step": 20992 }, { "epoch": 2.807301417491308, "grad_norm": 1.558553695678711, "learning_rate": 2.156318588367301e-07, "loss": 0.3367, "step": 20993 }, { "epoch": 2.8074351430863866, "grad_norm": 1.6842482089996338, "learning_rate": 2.1533376019391095e-07, "loss": 0.3727, "step": 20994 }, { "epoch": 2.8075688686814657, "grad_norm": 1.5345304012298584, "learning_rate": 2.1503586550309486e-07, "loss": 0.3545, "step": 20995 }, { "epoch": 2.8077025942765443, "grad_norm": 1.4740495681762695, "learning_rate": 2.147381747704935e-07, "loss": 0.2888, "step": 20996 }, { "epoch": 2.8078363198716234, "grad_norm": 1.6585506200790405, "learning_rate": 2.14440688002312e-07, "loss": 0.3887, "step": 20997 }, { "epoch": 2.8079700454667025, "grad_norm": 1.4644114971160889, "learning_rate": 2.1414340520475087e-07, "loss": 0.3166, "step": 20998 }, { "epoch": 2.808103771061781, "grad_norm": 1.4557394981384277, "learning_rate": 2.1384632638400515e-07, "loss": 0.3281, "step": 20999 }, { "epoch": 2.80823749665686, "grad_norm": 1.6681737899780273, "learning_rate": 2.1354945154626883e-07, "loss": 0.3371, "step": 21000 }, { "epoch": 2.808371222251939, "grad_norm": 1.4628387689590454, "learning_rate": 2.1325278069773027e-07, "loss": 0.3406, "step": 21001 }, { "epoch": 2.808504947847018, "grad_norm": 1.5058053731918335, "learning_rate": 2.1295631384457228e-07, "loss": 0.3567, "step": 21002 }, { "epoch": 2.808638673442097, "grad_norm": 1.4408000707626343, "learning_rate": 2.1266005099297436e-07, "loss": 0.3474, "step": 21003 }, { "epoch": 2.8087723990371756, "grad_norm": 1.6201496124267578, "learning_rate": 2.1236399214911274e-07, "loss": 0.4396, "step": 21004 }, { "epoch": 2.8089061246322546, "grad_norm": 1.2999467849731445, "learning_rate": 2.1206813731915798e-07, "loss": 0.3174, "step": 21005 }, { "epoch": 2.8090398502273333, "grad_norm": 1.7491109371185303, "learning_rate": 2.117724865092774e-07, "loss": 0.3968, "step": 21006 }, { "epoch": 2.8091735758224123, "grad_norm": 1.5332244634628296, "learning_rate": 2.1147703972563049e-07, "loss": 0.4134, "step": 21007 }, { "epoch": 2.8093073014174914, "grad_norm": 1.6016746759414673, "learning_rate": 2.1118179697438125e-07, "loss": 0.3565, "step": 21008 }, { "epoch": 2.8094410270125705, "grad_norm": 1.379032850265503, "learning_rate": 2.1088675826167804e-07, "loss": 0.3106, "step": 21009 }, { "epoch": 2.809574752607649, "grad_norm": 1.5215754508972168, "learning_rate": 2.1059192359367485e-07, "loss": 0.3525, "step": 21010 }, { "epoch": 2.8097084782027277, "grad_norm": 1.5460797548294067, "learning_rate": 2.102972929765157e-07, "loss": 0.3547, "step": 21011 }, { "epoch": 2.809842203797807, "grad_norm": 1.6412646770477295, "learning_rate": 2.1000286641634003e-07, "loss": 0.353, "step": 21012 }, { "epoch": 2.809975929392886, "grad_norm": 1.6331738233566284, "learning_rate": 2.0970864391928858e-07, "loss": 0.3588, "step": 21013 }, { "epoch": 2.810109654987965, "grad_norm": 1.3080657720565796, "learning_rate": 2.0941462549149083e-07, "loss": 0.305, "step": 21014 }, { "epoch": 2.8102433805830436, "grad_norm": 1.4372737407684326, "learning_rate": 2.0912081113907745e-07, "loss": 0.2812, "step": 21015 }, { "epoch": 2.8103771061781226, "grad_norm": 1.7766419649124146, "learning_rate": 2.0882720086817132e-07, "loss": 0.4017, "step": 21016 }, { "epoch": 2.8105108317732013, "grad_norm": 1.4037578105926514, "learning_rate": 2.085337946848931e-07, "loss": 0.319, "step": 21017 }, { "epoch": 2.8106445573682803, "grad_norm": 1.5752131938934326, "learning_rate": 2.082405925953579e-07, "loss": 0.3922, "step": 21018 }, { "epoch": 2.8107782829633594, "grad_norm": 1.5167311429977417, "learning_rate": 2.079475946056786e-07, "loss": 0.3743, "step": 21019 }, { "epoch": 2.810912008558438, "grad_norm": 1.6328784227371216, "learning_rate": 2.0765480072196142e-07, "loss": 0.3547, "step": 21020 }, { "epoch": 2.811045734153517, "grad_norm": 1.6029284000396729, "learning_rate": 2.073622109503104e-07, "loss": 0.3687, "step": 21021 }, { "epoch": 2.8111794597485957, "grad_norm": 1.451704978942871, "learning_rate": 2.0706982529682286e-07, "loss": 0.3423, "step": 21022 }, { "epoch": 2.811313185343675, "grad_norm": 1.5524015426635742, "learning_rate": 2.067776437675939e-07, "loss": 0.3613, "step": 21023 }, { "epoch": 2.811446910938754, "grad_norm": 1.6109240055084229, "learning_rate": 2.0648566636871426e-07, "loss": 0.3845, "step": 21024 }, { "epoch": 2.8115806365338325, "grad_norm": 1.4419208765029907, "learning_rate": 2.0619389310626903e-07, "loss": 0.3554, "step": 21025 }, { "epoch": 2.8117143621289116, "grad_norm": 1.5239206552505493, "learning_rate": 2.0590232398634114e-07, "loss": 0.3212, "step": 21026 }, { "epoch": 2.81184808772399, "grad_norm": 1.6813175678253174, "learning_rate": 2.0561095901500793e-07, "loss": 0.3685, "step": 21027 }, { "epoch": 2.8119818133190693, "grad_norm": 1.3473381996154785, "learning_rate": 2.0531979819834015e-07, "loss": 0.3305, "step": 21028 }, { "epoch": 2.8121155389141483, "grad_norm": 1.5564929246902466, "learning_rate": 2.0502884154240955e-07, "loss": 0.3524, "step": 21029 }, { "epoch": 2.812249264509227, "grad_norm": 1.5361626148223877, "learning_rate": 2.047380890532813e-07, "loss": 0.3736, "step": 21030 }, { "epoch": 2.812382990104306, "grad_norm": 1.6218271255493164, "learning_rate": 2.044475407370128e-07, "loss": 0.3689, "step": 21031 }, { "epoch": 2.8125167156993847, "grad_norm": 1.5912046432495117, "learning_rate": 2.041571965996636e-07, "loss": 0.3751, "step": 21032 }, { "epoch": 2.8126504412944637, "grad_norm": 1.7652696371078491, "learning_rate": 2.0386705664728222e-07, "loss": 0.4031, "step": 21033 }, { "epoch": 2.812784166889543, "grad_norm": 1.5469073057174683, "learning_rate": 2.0357712088591942e-07, "loss": 0.3158, "step": 21034 }, { "epoch": 2.8129178924846214, "grad_norm": 1.5513590574264526, "learning_rate": 2.0328738932161695e-07, "loss": 0.3494, "step": 21035 }, { "epoch": 2.8130516180797005, "grad_norm": 1.6858493089675903, "learning_rate": 2.0299786196041448e-07, "loss": 0.3909, "step": 21036 }, { "epoch": 2.813185343674779, "grad_norm": 1.4892266988754272, "learning_rate": 2.0270853880834608e-07, "loss": 0.3722, "step": 21037 }, { "epoch": 2.813319069269858, "grad_norm": 1.588689923286438, "learning_rate": 2.0241941987144464e-07, "loss": 0.3595, "step": 21038 }, { "epoch": 2.8134527948649373, "grad_norm": 1.515647530555725, "learning_rate": 2.021305051557343e-07, "loss": 0.3454, "step": 21039 }, { "epoch": 2.813586520460016, "grad_norm": 1.5506705045700073, "learning_rate": 2.0184179466723796e-07, "loss": 0.3363, "step": 21040 }, { "epoch": 2.813720246055095, "grad_norm": 1.565531849861145, "learning_rate": 2.0155328841197307e-07, "loss": 0.3592, "step": 21041 }, { "epoch": 2.8138539716501736, "grad_norm": 1.4972879886627197, "learning_rate": 2.0126498639595481e-07, "loss": 0.3461, "step": 21042 }, { "epoch": 2.8139876972452527, "grad_norm": 1.4020835161209106, "learning_rate": 2.009768886251906e-07, "loss": 0.3301, "step": 21043 }, { "epoch": 2.8141214228403317, "grad_norm": 1.5145186185836792, "learning_rate": 2.0068899510568783e-07, "loss": 0.3294, "step": 21044 }, { "epoch": 2.814255148435411, "grad_norm": 1.5370123386383057, "learning_rate": 2.004013058434451e-07, "loss": 0.3261, "step": 21045 }, { "epoch": 2.8143888740304894, "grad_norm": 1.3789373636245728, "learning_rate": 2.0011382084446085e-07, "loss": 0.3377, "step": 21046 }, { "epoch": 2.8145225996255685, "grad_norm": 1.5318207740783691, "learning_rate": 1.998265401147248e-07, "loss": 0.3609, "step": 21047 }, { "epoch": 2.814656325220647, "grad_norm": 1.4459102153778076, "learning_rate": 1.995394636602277e-07, "loss": 0.3096, "step": 21048 }, { "epoch": 2.814790050815726, "grad_norm": 1.4076447486877441, "learning_rate": 1.9925259148695253e-07, "loss": 0.3306, "step": 21049 }, { "epoch": 2.8149237764108053, "grad_norm": 1.4815031290054321, "learning_rate": 1.9896592360087897e-07, "loss": 0.3243, "step": 21050 }, { "epoch": 2.815057502005884, "grad_norm": 1.670536994934082, "learning_rate": 1.9867946000798223e-07, "loss": 0.3725, "step": 21051 }, { "epoch": 2.815191227600963, "grad_norm": 1.4800221920013428, "learning_rate": 1.9839320071423195e-07, "loss": 0.3386, "step": 21052 }, { "epoch": 2.8153249531960416, "grad_norm": 1.6522111892700195, "learning_rate": 1.9810714572559898e-07, "loss": 0.3574, "step": 21053 }, { "epoch": 2.8154586787911207, "grad_norm": 1.529272198677063, "learning_rate": 1.9782129504804182e-07, "loss": 0.365, "step": 21054 }, { "epoch": 2.8155924043861997, "grad_norm": 1.8183497190475464, "learning_rate": 1.9753564868751906e-07, "loss": 0.3923, "step": 21055 }, { "epoch": 2.8157261299812784, "grad_norm": 1.3781611919403076, "learning_rate": 1.9725020664998707e-07, "loss": 0.2975, "step": 21056 }, { "epoch": 2.8158598555763574, "grad_norm": 1.7327841520309448, "learning_rate": 1.9696496894139216e-07, "loss": 0.3784, "step": 21057 }, { "epoch": 2.815993581171436, "grad_norm": 1.483992338180542, "learning_rate": 1.9667993556768517e-07, "loss": 0.3886, "step": 21058 }, { "epoch": 2.816127306766515, "grad_norm": 1.6965386867523193, "learning_rate": 1.9639510653480244e-07, "loss": 0.3859, "step": 21059 }, { "epoch": 2.816261032361594, "grad_norm": 1.4392951726913452, "learning_rate": 1.9611048184868254e-07, "loss": 0.3691, "step": 21060 }, { "epoch": 2.816394757956673, "grad_norm": 1.3528062105178833, "learning_rate": 1.958260615152585e-07, "loss": 0.3322, "step": 21061 }, { "epoch": 2.816528483551752, "grad_norm": 1.8010636568069458, "learning_rate": 1.9554184554045897e-07, "loss": 0.3719, "step": 21062 }, { "epoch": 2.8166622091468305, "grad_norm": 1.544804334640503, "learning_rate": 1.9525783393020803e-07, "loss": 0.3218, "step": 21063 }, { "epoch": 2.8167959347419096, "grad_norm": 1.5690834522247314, "learning_rate": 1.949740266904243e-07, "loss": 0.3849, "step": 21064 }, { "epoch": 2.8169296603369887, "grad_norm": 1.5462055206298828, "learning_rate": 1.946904238270253e-07, "loss": 0.3435, "step": 21065 }, { "epoch": 2.8170633859320673, "grad_norm": 1.4532150030136108, "learning_rate": 1.944070253459218e-07, "loss": 0.351, "step": 21066 }, { "epoch": 2.8171971115271464, "grad_norm": 1.698218584060669, "learning_rate": 1.9412383125302136e-07, "loss": 0.3426, "step": 21067 }, { "epoch": 2.817330837122225, "grad_norm": 1.3921817541122437, "learning_rate": 1.938408415542259e-07, "loss": 0.3432, "step": 21068 }, { "epoch": 2.817464562717304, "grad_norm": 1.5417016744613647, "learning_rate": 1.93558056255434e-07, "loss": 0.3504, "step": 21069 }, { "epoch": 2.817598288312383, "grad_norm": 1.745919108390808, "learning_rate": 1.932754753625421e-07, "loss": 0.3775, "step": 21070 }, { "epoch": 2.8177320139074618, "grad_norm": 1.710904598236084, "learning_rate": 1.929930988814377e-07, "loss": 0.3794, "step": 21071 }, { "epoch": 2.817865739502541, "grad_norm": 1.4693067073822021, "learning_rate": 1.927109268180094e-07, "loss": 0.3825, "step": 21072 }, { "epoch": 2.8179994650976194, "grad_norm": 1.6029951572418213, "learning_rate": 1.9242895917813475e-07, "loss": 0.3749, "step": 21073 }, { "epoch": 2.8181331906926985, "grad_norm": 1.5685030221939087, "learning_rate": 1.921471959676957e-07, "loss": 0.3535, "step": 21074 }, { "epoch": 2.8182669162877776, "grad_norm": 1.5314923524856567, "learning_rate": 1.9186563719256313e-07, "loss": 0.4062, "step": 21075 }, { "epoch": 2.818400641882856, "grad_norm": 1.592108130455017, "learning_rate": 1.9158428285860452e-07, "loss": 0.3717, "step": 21076 }, { "epoch": 2.8185343674779353, "grad_norm": 1.82607901096344, "learning_rate": 1.9130313297168746e-07, "loss": 0.3739, "step": 21077 }, { "epoch": 2.818668093073014, "grad_norm": 1.4730095863342285, "learning_rate": 1.9102218753766943e-07, "loss": 0.3251, "step": 21078 }, { "epoch": 2.818801818668093, "grad_norm": 1.8074052333831787, "learning_rate": 1.9074144656240913e-07, "loss": 0.3685, "step": 21079 }, { "epoch": 2.818935544263172, "grad_norm": 1.5792359113693237, "learning_rate": 1.9046091005175627e-07, "loss": 0.3948, "step": 21080 }, { "epoch": 2.819069269858251, "grad_norm": 1.3176779747009277, "learning_rate": 1.9018057801155843e-07, "loss": 0.2905, "step": 21081 }, { "epoch": 2.8192029954533298, "grad_norm": 1.520768165588379, "learning_rate": 1.8990045044766093e-07, "loss": 0.3373, "step": 21082 }, { "epoch": 2.819336721048409, "grad_norm": 1.5521377325057983, "learning_rate": 1.8962052736590019e-07, "loss": 0.3294, "step": 21083 }, { "epoch": 2.8194704466434874, "grad_norm": 1.5931363105773926, "learning_rate": 1.8934080877211158e-07, "loss": 0.3629, "step": 21084 }, { "epoch": 2.8196041722385665, "grad_norm": 1.6153312921524048, "learning_rate": 1.8906129467212708e-07, "loss": 0.3749, "step": 21085 }, { "epoch": 2.8197378978336456, "grad_norm": 1.6326535940170288, "learning_rate": 1.8878198507177093e-07, "loss": 0.3519, "step": 21086 }, { "epoch": 2.819871623428724, "grad_norm": 1.5739383697509766, "learning_rate": 1.8850287997686623e-07, "loss": 0.39, "step": 21087 }, { "epoch": 2.8200053490238033, "grad_norm": 1.3473291397094727, "learning_rate": 1.8822397939323055e-07, "loss": 0.3269, "step": 21088 }, { "epoch": 2.820139074618882, "grad_norm": 1.3279112577438354, "learning_rate": 1.8794528332667816e-07, "loss": 0.3121, "step": 21089 }, { "epoch": 2.820272800213961, "grad_norm": 1.4681549072265625, "learning_rate": 1.876667917830155e-07, "loss": 0.3811, "step": 21090 }, { "epoch": 2.82040652580904, "grad_norm": 1.5945870876312256, "learning_rate": 1.8738850476805127e-07, "loss": 0.3675, "step": 21091 }, { "epoch": 2.8205402514041187, "grad_norm": 1.6379867792129517, "learning_rate": 1.871104222875819e-07, "loss": 0.3425, "step": 21092 }, { "epoch": 2.8206739769991978, "grad_norm": 1.2924295663833618, "learning_rate": 1.8683254434740617e-07, "loss": 0.3076, "step": 21093 }, { "epoch": 2.8208077025942764, "grad_norm": 1.4257365465164185, "learning_rate": 1.8655487095331716e-07, "loss": 0.3593, "step": 21094 }, { "epoch": 2.8209414281893554, "grad_norm": 1.57588529586792, "learning_rate": 1.8627740211110023e-07, "loss": 0.3817, "step": 21095 }, { "epoch": 2.8210751537844345, "grad_norm": 1.5338976383209229, "learning_rate": 1.860001378265408e-07, "loss": 0.3423, "step": 21096 }, { "epoch": 2.821208879379513, "grad_norm": 1.4424211978912354, "learning_rate": 1.8572307810541645e-07, "loss": 0.3683, "step": 21097 }, { "epoch": 2.821342604974592, "grad_norm": 1.6507391929626465, "learning_rate": 1.854462229535059e-07, "loss": 0.3596, "step": 21098 }, { "epoch": 2.821476330569671, "grad_norm": 1.5054662227630615, "learning_rate": 1.851695723765745e-07, "loss": 0.3621, "step": 21099 }, { "epoch": 2.82161005616475, "grad_norm": 1.5473171472549438, "learning_rate": 1.8489312638039325e-07, "loss": 0.3239, "step": 21100 }, { "epoch": 2.821743781759829, "grad_norm": 1.5663384199142456, "learning_rate": 1.8461688497072193e-07, "loss": 0.3574, "step": 21101 }, { "epoch": 2.8218775073549076, "grad_norm": 1.3666025400161743, "learning_rate": 1.843408481533182e-07, "loss": 0.3441, "step": 21102 }, { "epoch": 2.8220112329499867, "grad_norm": 1.4269355535507202, "learning_rate": 1.8406501593393967e-07, "loss": 0.3321, "step": 21103 }, { "epoch": 2.8221449585450653, "grad_norm": 1.6491373777389526, "learning_rate": 1.8378938831833172e-07, "loss": 0.3474, "step": 21104 }, { "epoch": 2.8222786841401444, "grad_norm": 1.6401958465576172, "learning_rate": 1.8351396531224087e-07, "loss": 0.379, "step": 21105 }, { "epoch": 2.8224124097352234, "grad_norm": 1.6312828063964844, "learning_rate": 1.8323874692140807e-07, "loss": 0.3934, "step": 21106 }, { "epoch": 2.822546135330302, "grad_norm": 1.5851949453353882, "learning_rate": 1.829637331515699e-07, "loss": 0.3225, "step": 21107 }, { "epoch": 2.822679860925381, "grad_norm": 1.52326500415802, "learning_rate": 1.8268892400845838e-07, "loss": 0.354, "step": 21108 }, { "epoch": 2.8228135865204598, "grad_norm": 1.6832385063171387, "learning_rate": 1.824143194978023e-07, "loss": 0.3662, "step": 21109 }, { "epoch": 2.822947312115539, "grad_norm": 1.3753222227096558, "learning_rate": 1.8213991962532595e-07, "loss": 0.355, "step": 21110 }, { "epoch": 2.823081037710618, "grad_norm": 1.6039220094680786, "learning_rate": 1.818657243967481e-07, "loss": 0.3501, "step": 21111 }, { "epoch": 2.823214763305697, "grad_norm": 1.6353678703308105, "learning_rate": 1.8159173381778417e-07, "loss": 0.3899, "step": 21112 }, { "epoch": 2.8233484889007756, "grad_norm": 1.6089023351669312, "learning_rate": 1.8131794789414513e-07, "loss": 0.3905, "step": 21113 }, { "epoch": 2.8234822144958542, "grad_norm": 1.4124891757965088, "learning_rate": 1.8104436663153757e-07, "loss": 0.3368, "step": 21114 }, { "epoch": 2.8236159400909333, "grad_norm": 1.6612999439239502, "learning_rate": 1.807709900356658e-07, "loss": 0.3803, "step": 21115 }, { "epoch": 2.8237496656860124, "grad_norm": 1.6417137384414673, "learning_rate": 1.8049781811222523e-07, "loss": 0.3455, "step": 21116 }, { "epoch": 2.8238833912810914, "grad_norm": 1.4563477039337158, "learning_rate": 1.8022485086691355e-07, "loss": 0.3338, "step": 21117 }, { "epoch": 2.82401711687617, "grad_norm": 1.511047601699829, "learning_rate": 1.7995208830541512e-07, "loss": 0.3516, "step": 21118 }, { "epoch": 2.824150842471249, "grad_norm": 1.490206003189087, "learning_rate": 1.7967953043342202e-07, "loss": 0.3766, "step": 21119 }, { "epoch": 2.8242845680663278, "grad_norm": 1.4757161140441895, "learning_rate": 1.7940717725661082e-07, "loss": 0.3686, "step": 21120 }, { "epoch": 2.824418293661407, "grad_norm": 1.8071578741073608, "learning_rate": 1.7913502878065814e-07, "loss": 0.4394, "step": 21121 }, { "epoch": 2.824552019256486, "grad_norm": 1.5498602390289307, "learning_rate": 1.788630850112405e-07, "loss": 0.339, "step": 21122 }, { "epoch": 2.8246857448515645, "grad_norm": 1.786620855331421, "learning_rate": 1.785913459540234e-07, "loss": 0.3134, "step": 21123 }, { "epoch": 2.8248194704466436, "grad_norm": 1.5780508518218994, "learning_rate": 1.7831981161467116e-07, "loss": 0.3577, "step": 21124 }, { "epoch": 2.8249531960417222, "grad_norm": 1.4263684749603271, "learning_rate": 1.7804848199884373e-07, "loss": 0.318, "step": 21125 }, { "epoch": 2.8250869216368013, "grad_norm": 1.4885960817337036, "learning_rate": 1.7777735711219768e-07, "loss": 0.3226, "step": 21126 }, { "epoch": 2.8252206472318804, "grad_norm": 1.5297586917877197, "learning_rate": 1.7750643696038406e-07, "loss": 0.3621, "step": 21127 }, { "epoch": 2.825354372826959, "grad_norm": 1.533209204673767, "learning_rate": 1.7723572154904944e-07, "loss": 0.326, "step": 21128 }, { "epoch": 2.825488098422038, "grad_norm": 1.5913262367248535, "learning_rate": 1.76965210883836e-07, "loss": 0.3862, "step": 21129 }, { "epoch": 2.8256218240171167, "grad_norm": 1.6883372068405151, "learning_rate": 1.7669490497038366e-07, "loss": 0.3341, "step": 21130 }, { "epoch": 2.8257555496121958, "grad_norm": 1.7060953378677368, "learning_rate": 1.764248038143268e-07, "loss": 0.353, "step": 21131 }, { "epoch": 2.825889275207275, "grad_norm": 1.5996332168579102, "learning_rate": 1.7615490742129427e-07, "loss": 0.3572, "step": 21132 }, { "epoch": 2.8260230008023535, "grad_norm": 1.8237059116363525, "learning_rate": 1.7588521579691263e-07, "loss": 0.39, "step": 21133 }, { "epoch": 2.8261567263974325, "grad_norm": 1.5537697076797485, "learning_rate": 1.756157289468019e-07, "loss": 0.3687, "step": 21134 }, { "epoch": 2.826290451992511, "grad_norm": 1.3952116966247559, "learning_rate": 1.7534644687658197e-07, "loss": 0.3325, "step": 21135 }, { "epoch": 2.8264241775875902, "grad_norm": 1.5767836570739746, "learning_rate": 1.7507736959186394e-07, "loss": 0.3825, "step": 21136 }, { "epoch": 2.8265579031826693, "grad_norm": 1.3958687782287598, "learning_rate": 1.7480849709825555e-07, "loss": 0.3437, "step": 21137 }, { "epoch": 2.826691628777748, "grad_norm": 1.5096261501312256, "learning_rate": 1.7453982940136337e-07, "loss": 0.3778, "step": 21138 }, { "epoch": 2.826825354372827, "grad_norm": 1.6935224533081055, "learning_rate": 1.7427136650678634e-07, "loss": 0.4306, "step": 21139 }, { "epoch": 2.8269590799679056, "grad_norm": 1.3541380167007446, "learning_rate": 1.740031084201188e-07, "loss": 0.361, "step": 21140 }, { "epoch": 2.8270928055629847, "grad_norm": 1.4912734031677246, "learning_rate": 1.7373505514695633e-07, "loss": 0.3643, "step": 21141 }, { "epoch": 2.8272265311580638, "grad_norm": 1.6090102195739746, "learning_rate": 1.734672066928822e-07, "loss": 0.3592, "step": 21142 }, { "epoch": 2.8273602567531424, "grad_norm": 1.589031457901001, "learning_rate": 1.7319956306348307e-07, "loss": 0.3366, "step": 21143 }, { "epoch": 2.8274939823482215, "grad_norm": 1.587558388710022, "learning_rate": 1.7293212426433447e-07, "loss": 0.3407, "step": 21144 }, { "epoch": 2.8276277079433, "grad_norm": 1.486051082611084, "learning_rate": 1.7266489030101308e-07, "loss": 0.3589, "step": 21145 }, { "epoch": 2.827761433538379, "grad_norm": 1.3847711086273193, "learning_rate": 1.7239786117908776e-07, "loss": 0.2863, "step": 21146 }, { "epoch": 2.8278951591334582, "grad_norm": 1.4588078260421753, "learning_rate": 1.7213103690412402e-07, "loss": 0.3225, "step": 21147 }, { "epoch": 2.8280288847285373, "grad_norm": 1.5200961828231812, "learning_rate": 1.7186441748168637e-07, "loss": 0.3262, "step": 21148 }, { "epoch": 2.828162610323616, "grad_norm": 1.5430275201797485, "learning_rate": 1.715980029173292e-07, "loss": 0.376, "step": 21149 }, { "epoch": 2.828296335918695, "grad_norm": 1.5299605131149292, "learning_rate": 1.7133179321660698e-07, "loss": 0.3721, "step": 21150 }, { "epoch": 2.8284300615137736, "grad_norm": 1.5517380237579346, "learning_rate": 1.710657883850697e-07, "loss": 0.3143, "step": 21151 }, { "epoch": 2.8285637871088527, "grad_norm": 1.376177191734314, "learning_rate": 1.7079998842825962e-07, "loss": 0.3408, "step": 21152 }, { "epoch": 2.8286975127039318, "grad_norm": 1.5396382808685303, "learning_rate": 1.7053439335171895e-07, "loss": 0.3107, "step": 21153 }, { "epoch": 2.8288312382990104, "grad_norm": 1.5965592861175537, "learning_rate": 1.7026900316098217e-07, "loss": 0.4055, "step": 21154 }, { "epoch": 2.8289649638940895, "grad_norm": 1.5593491792678833, "learning_rate": 1.7000381786158372e-07, "loss": 0.4026, "step": 21155 }, { "epoch": 2.829098689489168, "grad_norm": 1.6342612504959106, "learning_rate": 1.6973883745904696e-07, "loss": 0.3292, "step": 21156 }, { "epoch": 2.829232415084247, "grad_norm": 1.4602092504501343, "learning_rate": 1.694740619588997e-07, "loss": 0.3605, "step": 21157 }, { "epoch": 2.8293661406793262, "grad_norm": 1.6312198638916016, "learning_rate": 1.6920949136665753e-07, "loss": 0.3914, "step": 21158 }, { "epoch": 2.829499866274405, "grad_norm": 1.3996559381484985, "learning_rate": 1.6894512568783717e-07, "loss": 0.3093, "step": 21159 }, { "epoch": 2.829633591869484, "grad_norm": 1.5484685897827148, "learning_rate": 1.686809649279486e-07, "loss": 0.3593, "step": 21160 }, { "epoch": 2.8297673174645626, "grad_norm": 1.6423790454864502, "learning_rate": 1.6841700909249637e-07, "loss": 0.3737, "step": 21161 }, { "epoch": 2.8299010430596416, "grad_norm": 1.4821759462356567, "learning_rate": 1.6815325818698493e-07, "loss": 0.3686, "step": 21162 }, { "epoch": 2.8300347686547207, "grad_norm": 1.7089571952819824, "learning_rate": 1.6788971221690986e-07, "loss": 0.3932, "step": 21163 }, { "epoch": 2.8301684942497993, "grad_norm": 1.705830454826355, "learning_rate": 1.6762637118776681e-07, "loss": 0.3965, "step": 21164 }, { "epoch": 2.8303022198448784, "grad_norm": 1.579155445098877, "learning_rate": 1.6736323510504248e-07, "loss": 0.3413, "step": 21165 }, { "epoch": 2.830435945439957, "grad_norm": 1.4554558992385864, "learning_rate": 1.671003039742225e-07, "loss": 0.3514, "step": 21166 }, { "epoch": 2.830569671035036, "grad_norm": 1.4563628435134888, "learning_rate": 1.6683757780078913e-07, "loss": 0.3506, "step": 21167 }, { "epoch": 2.830703396630115, "grad_norm": 1.699845314025879, "learning_rate": 1.6657505659021577e-07, "loss": 0.3662, "step": 21168 }, { "epoch": 2.830837122225194, "grad_norm": 1.5582879781723022, "learning_rate": 1.6631274034797696e-07, "loss": 0.3285, "step": 21169 }, { "epoch": 2.830970847820273, "grad_norm": 1.5410879850387573, "learning_rate": 1.6605062907953829e-07, "loss": 0.3835, "step": 21170 }, { "epoch": 2.8311045734153515, "grad_norm": 1.4152581691741943, "learning_rate": 1.657887227903643e-07, "loss": 0.3622, "step": 21171 }, { "epoch": 2.8312382990104306, "grad_norm": 1.479344129562378, "learning_rate": 1.6552702148591392e-07, "loss": 0.3337, "step": 21172 }, { "epoch": 2.8313720246055096, "grad_norm": 1.8182692527770996, "learning_rate": 1.6526552517164174e-07, "loss": 0.3498, "step": 21173 }, { "epoch": 2.8315057502005883, "grad_norm": 1.6860917806625366, "learning_rate": 1.6500423385300001e-07, "loss": 0.3687, "step": 21174 }, { "epoch": 2.8316394757956673, "grad_norm": 1.6268608570098877, "learning_rate": 1.647431475354333e-07, "loss": 0.3777, "step": 21175 }, { "epoch": 2.831773201390746, "grad_norm": 1.4623850584030151, "learning_rate": 1.6448226622438503e-07, "loss": 0.3053, "step": 21176 }, { "epoch": 2.831906926985825, "grad_norm": 1.672318935394287, "learning_rate": 1.6422158992529082e-07, "loss": 0.4221, "step": 21177 }, { "epoch": 2.832040652580904, "grad_norm": 1.3909658193588257, "learning_rate": 1.6396111864358744e-07, "loss": 0.3447, "step": 21178 }, { "epoch": 2.8321743781759827, "grad_norm": 1.2756800651550293, "learning_rate": 1.6370085238470168e-07, "loss": 0.321, "step": 21179 }, { "epoch": 2.832308103771062, "grad_norm": 1.5968912839889526, "learning_rate": 1.634407911540592e-07, "loss": 0.375, "step": 21180 }, { "epoch": 2.8324418293661404, "grad_norm": 1.7612203359603882, "learning_rate": 1.631809349570823e-07, "loss": 0.373, "step": 21181 }, { "epoch": 2.8325755549612195, "grad_norm": 1.662456750869751, "learning_rate": 1.6292128379918337e-07, "loss": 0.3915, "step": 21182 }, { "epoch": 2.8327092805562986, "grad_norm": 1.5555535554885864, "learning_rate": 1.6266183768578026e-07, "loss": 0.3453, "step": 21183 }, { "epoch": 2.8328430061513776, "grad_norm": 1.5987141132354736, "learning_rate": 1.6240259662227531e-07, "loss": 0.3646, "step": 21184 }, { "epoch": 2.8329767317464563, "grad_norm": 1.6038262844085693, "learning_rate": 1.6214356061407532e-07, "loss": 0.3571, "step": 21185 }, { "epoch": 2.8331104573415353, "grad_norm": 1.4723541736602783, "learning_rate": 1.6188472966658043e-07, "loss": 0.3473, "step": 21186 }, { "epoch": 2.833244182936614, "grad_norm": 1.5981757640838623, "learning_rate": 1.6162610378518183e-07, "loss": 0.3632, "step": 21187 }, { "epoch": 2.833377908531693, "grad_norm": 1.7323797941207886, "learning_rate": 1.6136768297527527e-07, "loss": 0.353, "step": 21188 }, { "epoch": 2.833511634126772, "grad_norm": 1.5628026723861694, "learning_rate": 1.6110946724224308e-07, "loss": 0.3952, "step": 21189 }, { "epoch": 2.8336453597218507, "grad_norm": 1.591599464416504, "learning_rate": 1.6085145659146985e-07, "loss": 0.3685, "step": 21190 }, { "epoch": 2.83377908531693, "grad_norm": 1.6770014762878418, "learning_rate": 1.6059365102833346e-07, "loss": 0.353, "step": 21191 }, { "epoch": 2.8339128109120084, "grad_norm": 1.7900179624557495, "learning_rate": 1.6033605055820634e-07, "loss": 0.4218, "step": 21192 }, { "epoch": 2.8340465365070875, "grad_norm": 1.4443432092666626, "learning_rate": 1.6007865518645859e-07, "loss": 0.3215, "step": 21193 }, { "epoch": 2.8341802621021666, "grad_norm": 1.6381317377090454, "learning_rate": 1.5982146491845596e-07, "loss": 0.3523, "step": 21194 }, { "epoch": 2.834313987697245, "grad_norm": 1.5008882284164429, "learning_rate": 1.5956447975955859e-07, "loss": 0.394, "step": 21195 }, { "epoch": 2.8344477132923243, "grad_norm": 1.5776972770690918, "learning_rate": 1.5930769971512327e-07, "loss": 0.3739, "step": 21196 }, { "epoch": 2.834581438887403, "grad_norm": 1.5829923152923584, "learning_rate": 1.5905112479050354e-07, "loss": 0.3799, "step": 21197 }, { "epoch": 2.834715164482482, "grad_norm": 1.5479191541671753, "learning_rate": 1.5879475499104514e-07, "loss": 0.3508, "step": 21198 }, { "epoch": 2.834848890077561, "grad_norm": 1.4448344707489014, "learning_rate": 1.5853859032209374e-07, "loss": 0.3151, "step": 21199 }, { "epoch": 2.8349826156726396, "grad_norm": 1.5671799182891846, "learning_rate": 1.5828263078898842e-07, "loss": 0.3605, "step": 21200 }, { "epoch": 2.8351163412677187, "grad_norm": 1.4489481449127197, "learning_rate": 1.5802687639706272e-07, "loss": 0.3508, "step": 21201 }, { "epoch": 2.8352500668627973, "grad_norm": 1.6658684015274048, "learning_rate": 1.5777132715165012e-07, "loss": 0.4093, "step": 21202 }, { "epoch": 2.8353837924578764, "grad_norm": 1.4433869123458862, "learning_rate": 1.5751598305807526e-07, "loss": 0.3227, "step": 21203 }, { "epoch": 2.8355175180529555, "grad_norm": 1.4792289733886719, "learning_rate": 1.5726084412166277e-07, "loss": 0.354, "step": 21204 }, { "epoch": 2.835651243648034, "grad_norm": 1.581131100654602, "learning_rate": 1.5700591034772949e-07, "loss": 0.3639, "step": 21205 }, { "epoch": 2.835784969243113, "grad_norm": 1.6240030527114868, "learning_rate": 1.5675118174158787e-07, "loss": 0.3903, "step": 21206 }, { "epoch": 2.835918694838192, "grad_norm": 1.514824390411377, "learning_rate": 1.564966583085503e-07, "loss": 0.3658, "step": 21207 }, { "epoch": 2.836052420433271, "grad_norm": 1.382228136062622, "learning_rate": 1.5624234005392036e-07, "loss": 0.294, "step": 21208 }, { "epoch": 2.83618614602835, "grad_norm": 1.636189341545105, "learning_rate": 1.5598822698299932e-07, "loss": 0.36, "step": 21209 }, { "epoch": 2.8363198716234286, "grad_norm": 1.4737849235534668, "learning_rate": 1.5573431910108404e-07, "loss": 0.3194, "step": 21210 }, { "epoch": 2.8364535972185076, "grad_norm": 1.7016079425811768, "learning_rate": 1.554806164134659e-07, "loss": 0.352, "step": 21211 }, { "epoch": 2.8365873228135863, "grad_norm": 1.5472626686096191, "learning_rate": 1.552271189254362e-07, "loss": 0.3673, "step": 21212 }, { "epoch": 2.8367210484086653, "grad_norm": 1.2918970584869385, "learning_rate": 1.5497382664227512e-07, "loss": 0.3232, "step": 21213 }, { "epoch": 2.8368547740037444, "grad_norm": 1.6269826889038086, "learning_rate": 1.5472073956926404e-07, "loss": 0.3806, "step": 21214 }, { "epoch": 2.8369884995988235, "grad_norm": 1.4544490575790405, "learning_rate": 1.544678577116787e-07, "loss": 0.3295, "step": 21215 }, { "epoch": 2.837122225193902, "grad_norm": 1.5082616806030273, "learning_rate": 1.5421518107478939e-07, "loss": 0.3626, "step": 21216 }, { "epoch": 2.8372559507889807, "grad_norm": 1.574182152748108, "learning_rate": 1.5396270966386407e-07, "loss": 0.3532, "step": 21217 }, { "epoch": 2.83738967638406, "grad_norm": 1.6156799793243408, "learning_rate": 1.537104434841641e-07, "loss": 0.3764, "step": 21218 }, { "epoch": 2.837523401979139, "grad_norm": 1.6959151029586792, "learning_rate": 1.5345838254094746e-07, "loss": 0.3752, "step": 21219 }, { "epoch": 2.837657127574218, "grad_norm": 1.5061683654785156, "learning_rate": 1.532065268394689e-07, "loss": 0.3881, "step": 21220 }, { "epoch": 2.8377908531692966, "grad_norm": 1.6991662979125977, "learning_rate": 1.5295487638497863e-07, "loss": 0.3796, "step": 21221 }, { "epoch": 2.8379245787643756, "grad_norm": 1.6324154138565063, "learning_rate": 1.5270343118272024e-07, "loss": 0.3538, "step": 21222 }, { "epoch": 2.8380583043594543, "grad_norm": 1.6182020902633667, "learning_rate": 1.5245219123793619e-07, "loss": 0.3426, "step": 21223 }, { "epoch": 2.8381920299545333, "grad_norm": 1.6600232124328613, "learning_rate": 1.5220115655586454e-07, "loss": 0.3624, "step": 21224 }, { "epoch": 2.8383257555496124, "grad_norm": 1.6321144104003906, "learning_rate": 1.5195032714173442e-07, "loss": 0.3974, "step": 21225 }, { "epoch": 2.838459481144691, "grad_norm": 1.5164142847061157, "learning_rate": 1.516997030007783e-07, "loss": 0.3504, "step": 21226 }, { "epoch": 2.83859320673977, "grad_norm": 1.4518516063690186, "learning_rate": 1.5144928413821647e-07, "loss": 0.3449, "step": 21227 }, { "epoch": 2.8387269323348487, "grad_norm": 1.642876148223877, "learning_rate": 1.5119907055927142e-07, "loss": 0.4094, "step": 21228 }, { "epoch": 2.838860657929928, "grad_norm": 1.6323058605194092, "learning_rate": 1.5094906226915673e-07, "loss": 0.3863, "step": 21229 }, { "epoch": 2.838994383525007, "grad_norm": 1.1945912837982178, "learning_rate": 1.506992592730827e-07, "loss": 0.2975, "step": 21230 }, { "epoch": 2.8391281091200855, "grad_norm": 1.7042748928070068, "learning_rate": 1.5044966157626072e-07, "loss": 0.3751, "step": 21231 }, { "epoch": 2.8392618347151646, "grad_norm": 1.5199317932128906, "learning_rate": 1.5020026918388885e-07, "loss": 0.3506, "step": 21232 }, { "epoch": 2.839395560310243, "grad_norm": 1.6378141641616821, "learning_rate": 1.499510821011685e-07, "loss": 0.3802, "step": 21233 }, { "epoch": 2.8395292859053223, "grad_norm": 1.7438452243804932, "learning_rate": 1.4970210033329102e-07, "loss": 0.3854, "step": 21234 }, { "epoch": 2.8396630115004013, "grad_norm": 1.6094701290130615, "learning_rate": 1.4945332388544787e-07, "loss": 0.3304, "step": 21235 }, { "epoch": 2.83979673709548, "grad_norm": 1.5313116312026978, "learning_rate": 1.4920475276282487e-07, "loss": 0.3199, "step": 21236 }, { "epoch": 2.839930462690559, "grad_norm": 1.6055421829223633, "learning_rate": 1.4895638697060232e-07, "loss": 0.3757, "step": 21237 }, { "epoch": 2.8400641882856377, "grad_norm": 1.565784215927124, "learning_rate": 1.487082265139572e-07, "loss": 0.363, "step": 21238 }, { "epoch": 2.8401979138807167, "grad_norm": 1.4942741394042969, "learning_rate": 1.4846027139806207e-07, "loss": 0.3653, "step": 21239 }, { "epoch": 2.840331639475796, "grad_norm": 1.422958493232727, "learning_rate": 1.482125216280872e-07, "loss": 0.313, "step": 21240 }, { "epoch": 2.8404653650708744, "grad_norm": 1.4228938817977905, "learning_rate": 1.479649772091929e-07, "loss": 0.3312, "step": 21241 }, { "epoch": 2.8405990906659535, "grad_norm": 1.6390165090560913, "learning_rate": 1.4771763814654282e-07, "loss": 0.3501, "step": 21242 }, { "epoch": 2.840732816261032, "grad_norm": 1.4138860702514648, "learning_rate": 1.4747050444529066e-07, "loss": 0.343, "step": 21243 }, { "epoch": 2.840866541856111, "grad_norm": 1.6071960926055908, "learning_rate": 1.472235761105878e-07, "loss": 0.3966, "step": 21244 }, { "epoch": 2.8410002674511903, "grad_norm": 1.354293942451477, "learning_rate": 1.4697685314758236e-07, "loss": 0.3108, "step": 21245 }, { "epoch": 2.841133993046269, "grad_norm": 1.6347801685333252, "learning_rate": 1.467303355614147e-07, "loss": 0.3317, "step": 21246 }, { "epoch": 2.841267718641348, "grad_norm": 1.4767248630523682, "learning_rate": 1.4648402335722511e-07, "loss": 0.3308, "step": 21247 }, { "epoch": 2.8414014442364266, "grad_norm": 1.5630346536636353, "learning_rate": 1.462379165401473e-07, "loss": 0.3838, "step": 21248 }, { "epoch": 2.8415351698315057, "grad_norm": 1.4109286069869995, "learning_rate": 1.4599201511531046e-07, "loss": 0.3559, "step": 21249 }, { "epoch": 2.8416688954265847, "grad_norm": 1.5704572200775146, "learning_rate": 1.4574631908784275e-07, "loss": 0.3245, "step": 21250 }, { "epoch": 2.841802621021664, "grad_norm": 1.6264375448226929, "learning_rate": 1.4550082846286117e-07, "loss": 0.4059, "step": 21251 }, { "epoch": 2.8419363466167424, "grad_norm": 1.4583697319030762, "learning_rate": 1.452555432454872e-07, "loss": 0.3506, "step": 21252 }, { "epoch": 2.8420700722118215, "grad_norm": 1.504470705986023, "learning_rate": 1.4501046344083002e-07, "loss": 0.3508, "step": 21253 }, { "epoch": 2.8422037978069, "grad_norm": 1.5982348918914795, "learning_rate": 1.4476558905400008e-07, "loss": 0.3646, "step": 21254 }, { "epoch": 2.842337523401979, "grad_norm": 1.4951746463775635, "learning_rate": 1.44520920090101e-07, "loss": 0.3591, "step": 21255 }, { "epoch": 2.8424712489970583, "grad_norm": 1.6530685424804688, "learning_rate": 1.4427645655423205e-07, "loss": 0.3999, "step": 21256 }, { "epoch": 2.842604974592137, "grad_norm": 1.6215635538101196, "learning_rate": 1.440321984514903e-07, "loss": 0.3696, "step": 21257 }, { "epoch": 2.842738700187216, "grad_norm": 1.5934858322143555, "learning_rate": 1.437881457869661e-07, "loss": 0.3334, "step": 21258 }, { "epoch": 2.8428724257822946, "grad_norm": 1.7375500202178955, "learning_rate": 1.435442985657465e-07, "loss": 0.3875, "step": 21259 }, { "epoch": 2.8430061513773737, "grad_norm": 1.6236870288848877, "learning_rate": 1.4330065679291404e-07, "loss": 0.3204, "step": 21260 }, { "epoch": 2.8431398769724527, "grad_norm": 1.3964658975601196, "learning_rate": 1.4305722047354808e-07, "loss": 0.3276, "step": 21261 }, { "epoch": 2.8432736025675314, "grad_norm": 1.5651562213897705, "learning_rate": 1.428139896127223e-07, "loss": 0.3354, "step": 21262 }, { "epoch": 2.8434073281626104, "grad_norm": 1.8112328052520752, "learning_rate": 1.4257096421550598e-07, "loss": 0.4184, "step": 21263 }, { "epoch": 2.843541053757689, "grad_norm": 1.5994350910186768, "learning_rate": 1.4232814428696507e-07, "loss": 0.3637, "step": 21264 }, { "epoch": 2.843674779352768, "grad_norm": 1.4212368726730347, "learning_rate": 1.4208552983216218e-07, "loss": 0.3486, "step": 21265 }, { "epoch": 2.843808504947847, "grad_norm": 1.6336641311645508, "learning_rate": 1.4184312085615437e-07, "loss": 0.361, "step": 21266 }, { "epoch": 2.843942230542926, "grad_norm": 1.3432832956314087, "learning_rate": 1.4160091736399096e-07, "loss": 0.3503, "step": 21267 }, { "epoch": 2.844075956138005, "grad_norm": 1.6145756244659424, "learning_rate": 1.4135891936072456e-07, "loss": 0.4143, "step": 21268 }, { "epoch": 2.8442096817330835, "grad_norm": 1.443620204925537, "learning_rate": 1.4111712685139777e-07, "loss": 0.3614, "step": 21269 }, { "epoch": 2.8443434073281626, "grad_norm": 1.57658052444458, "learning_rate": 1.4087553984104995e-07, "loss": 0.3494, "step": 21270 }, { "epoch": 2.8444771329232417, "grad_norm": 1.3784065246582031, "learning_rate": 1.4063415833471815e-07, "loss": 0.3669, "step": 21271 }, { "epoch": 2.8446108585183203, "grad_norm": 1.827199101448059, "learning_rate": 1.4039298233743171e-07, "loss": 0.3672, "step": 21272 }, { "epoch": 2.8447445841133994, "grad_norm": 1.3486016988754272, "learning_rate": 1.401520118542199e-07, "loss": 0.3347, "step": 21273 }, { "epoch": 2.844878309708478, "grad_norm": 1.5634711980819702, "learning_rate": 1.3991124689010426e-07, "loss": 0.3606, "step": 21274 }, { "epoch": 2.845012035303557, "grad_norm": 1.6382722854614258, "learning_rate": 1.3967068745010305e-07, "loss": 0.3962, "step": 21275 }, { "epoch": 2.845145760898636, "grad_norm": 1.4224644899368286, "learning_rate": 1.394303335392322e-07, "loss": 0.3406, "step": 21276 }, { "epoch": 2.8452794864937148, "grad_norm": 1.690901756286621, "learning_rate": 1.3919018516249994e-07, "loss": 0.3643, "step": 21277 }, { "epoch": 2.845413212088794, "grad_norm": 1.618504524230957, "learning_rate": 1.3895024232491338e-07, "loss": 0.3682, "step": 21278 }, { "epoch": 2.8455469376838725, "grad_norm": 1.5880351066589355, "learning_rate": 1.387105050314719e-07, "loss": 0.365, "step": 21279 }, { "epoch": 2.8456806632789515, "grad_norm": 1.5475866794586182, "learning_rate": 1.3847097328717363e-07, "loss": 0.3402, "step": 21280 }, { "epoch": 2.8458143888740306, "grad_norm": 1.650546908378601, "learning_rate": 1.3823164709701133e-07, "loss": 0.3695, "step": 21281 }, { "epoch": 2.8459481144691092, "grad_norm": 1.5455268621444702, "learning_rate": 1.3799252646597428e-07, "loss": 0.3196, "step": 21282 }, { "epoch": 2.8460818400641883, "grad_norm": 1.831058382987976, "learning_rate": 1.377536113990463e-07, "loss": 0.4036, "step": 21283 }, { "epoch": 2.846215565659267, "grad_norm": 1.7668429613113403, "learning_rate": 1.3751490190120675e-07, "loss": 0.396, "step": 21284 }, { "epoch": 2.846349291254346, "grad_norm": 1.4617892503738403, "learning_rate": 1.3727639797743163e-07, "loss": 0.3861, "step": 21285 }, { "epoch": 2.846483016849425, "grad_norm": 1.5171540975570679, "learning_rate": 1.3703809963269256e-07, "loss": 0.3642, "step": 21286 }, { "epoch": 2.846616742444504, "grad_norm": 1.647857666015625, "learning_rate": 1.368000068719566e-07, "loss": 0.3521, "step": 21287 }, { "epoch": 2.8467504680395828, "grad_norm": 1.5477628707885742, "learning_rate": 1.365621197001854e-07, "loss": 0.3781, "step": 21288 }, { "epoch": 2.846884193634662, "grad_norm": 1.6630971431732178, "learning_rate": 1.3632443812233943e-07, "loss": 0.3646, "step": 21289 }, { "epoch": 2.8470179192297405, "grad_norm": 1.5150630474090576, "learning_rate": 1.3608696214337246e-07, "loss": 0.3615, "step": 21290 }, { "epoch": 2.8471516448248195, "grad_norm": 1.4692448377609253, "learning_rate": 1.3584969176823282e-07, "loss": 0.322, "step": 21291 }, { "epoch": 2.8472853704198986, "grad_norm": 1.6206945180892944, "learning_rate": 1.3561262700186872e-07, "loss": 0.3536, "step": 21292 }, { "epoch": 2.8474190960149772, "grad_norm": 1.3087486028671265, "learning_rate": 1.3537576784921957e-07, "loss": 0.3262, "step": 21293 }, { "epoch": 2.8475528216100563, "grad_norm": 1.5789207220077515, "learning_rate": 1.3513911431522254e-07, "loss": 0.3513, "step": 21294 }, { "epoch": 2.847686547205135, "grad_norm": 1.472839117050171, "learning_rate": 1.3490266640481254e-07, "loss": 0.3339, "step": 21295 }, { "epoch": 2.847820272800214, "grad_norm": 1.4450534582138062, "learning_rate": 1.3466642412291454e-07, "loss": 0.3354, "step": 21296 }, { "epoch": 2.847953998395293, "grad_norm": 1.7096911668777466, "learning_rate": 1.344303874744568e-07, "loss": 0.4141, "step": 21297 }, { "epoch": 2.8480877239903717, "grad_norm": 1.9521342515945435, "learning_rate": 1.3419455646435653e-07, "loss": 0.3945, "step": 21298 }, { "epoch": 2.8482214495854508, "grad_norm": 1.5692262649536133, "learning_rate": 1.3395893109752979e-07, "loss": 0.3582, "step": 21299 }, { "epoch": 2.8483551751805294, "grad_norm": 1.8141324520111084, "learning_rate": 1.3372351137888929e-07, "loss": 0.4003, "step": 21300 }, { "epoch": 2.8484889007756085, "grad_norm": 1.3479763269424438, "learning_rate": 1.3348829731334002e-07, "loss": 0.3348, "step": 21301 }, { "epoch": 2.8486226263706875, "grad_norm": 1.5192432403564453, "learning_rate": 1.3325328890578693e-07, "loss": 0.3577, "step": 21302 }, { "epoch": 2.848756351965766, "grad_norm": 1.7903512716293335, "learning_rate": 1.3301848616112724e-07, "loss": 0.3837, "step": 21303 }, { "epoch": 2.8488900775608452, "grad_norm": 1.4066026210784912, "learning_rate": 1.3278388908425477e-07, "loss": 0.3547, "step": 21304 }, { "epoch": 2.849023803155924, "grad_norm": 1.4800617694854736, "learning_rate": 1.325494976800612e-07, "loss": 0.3366, "step": 21305 }, { "epoch": 2.849157528751003, "grad_norm": 1.5031013488769531, "learning_rate": 1.323153119534315e-07, "loss": 0.3598, "step": 21306 }, { "epoch": 2.849291254346082, "grad_norm": 1.3461406230926514, "learning_rate": 1.320813319092462e-07, "loss": 0.319, "step": 21307 }, { "epoch": 2.8494249799411606, "grad_norm": 1.5111366510391235, "learning_rate": 1.3184755755238254e-07, "loss": 0.2989, "step": 21308 }, { "epoch": 2.8495587055362397, "grad_norm": 1.3939272165298462, "learning_rate": 1.3161398888771436e-07, "loss": 0.3851, "step": 21309 }, { "epoch": 2.8496924311313183, "grad_norm": 1.383384108543396, "learning_rate": 1.313806259201089e-07, "loss": 0.3349, "step": 21310 }, { "epoch": 2.8498261567263974, "grad_norm": 1.4775140285491943, "learning_rate": 1.3114746865443227e-07, "loss": 0.3488, "step": 21311 }, { "epoch": 2.8499598823214765, "grad_norm": 1.7025502920150757, "learning_rate": 1.3091451709554172e-07, "loss": 0.3582, "step": 21312 }, { "epoch": 2.850093607916555, "grad_norm": 1.4109947681427002, "learning_rate": 1.306817712482955e-07, "loss": 0.3208, "step": 21313 }, { "epoch": 2.850227333511634, "grad_norm": 1.5134276151657104, "learning_rate": 1.3044923111754427e-07, "loss": 0.3905, "step": 21314 }, { "epoch": 2.850361059106713, "grad_norm": 1.352474331855774, "learning_rate": 1.30216896708133e-07, "loss": 0.3512, "step": 21315 }, { "epoch": 2.850494784701792, "grad_norm": 1.483819603919983, "learning_rate": 1.2998476802490779e-07, "loss": 0.3508, "step": 21316 }, { "epoch": 2.850628510296871, "grad_norm": 1.434454083442688, "learning_rate": 1.297528450727048e-07, "loss": 0.3246, "step": 21317 }, { "epoch": 2.85076223589195, "grad_norm": 1.6253951787948608, "learning_rate": 1.2952112785635796e-07, "loss": 0.3678, "step": 21318 }, { "epoch": 2.8508959614870286, "grad_norm": 1.4970555305480957, "learning_rate": 1.2928961638069893e-07, "loss": 0.3511, "step": 21319 }, { "epoch": 2.8510296870821072, "grad_norm": 1.6115256547927856, "learning_rate": 1.2905831065055275e-07, "loss": 0.3951, "step": 21320 }, { "epoch": 2.8511634126771863, "grad_norm": 1.4637290239334106, "learning_rate": 1.288272106707411e-07, "loss": 0.3101, "step": 21321 }, { "epoch": 2.8512971382722654, "grad_norm": 1.5984907150268555, "learning_rate": 1.2859631644608016e-07, "loss": 0.3884, "step": 21322 }, { "epoch": 2.8514308638673445, "grad_norm": 1.8046034574508667, "learning_rate": 1.2836562798138275e-07, "loss": 0.3725, "step": 21323 }, { "epoch": 2.851564589462423, "grad_norm": 1.8774304389953613, "learning_rate": 1.2813514528145833e-07, "loss": 0.4227, "step": 21324 }, { "epoch": 2.851698315057502, "grad_norm": 1.5253602266311646, "learning_rate": 1.2790486835110972e-07, "loss": 0.3586, "step": 21325 }, { "epoch": 2.851832040652581, "grad_norm": 1.6466706991195679, "learning_rate": 1.2767479719513864e-07, "loss": 0.3626, "step": 21326 }, { "epoch": 2.85196576624766, "grad_norm": 1.4994215965270996, "learning_rate": 1.2744493181833793e-07, "loss": 0.3406, "step": 21327 }, { "epoch": 2.852099491842739, "grad_norm": 1.596832036972046, "learning_rate": 1.2721527222550267e-07, "loss": 0.3713, "step": 21328 }, { "epoch": 2.8522332174378175, "grad_norm": 1.5940698385238647, "learning_rate": 1.2698581842141567e-07, "loss": 0.3822, "step": 21329 }, { "epoch": 2.8523669430328966, "grad_norm": 1.6018999814987183, "learning_rate": 1.267565704108642e-07, "loss": 0.342, "step": 21330 }, { "epoch": 2.8525006686279752, "grad_norm": 1.4740620851516724, "learning_rate": 1.2652752819862225e-07, "loss": 0.3715, "step": 21331 }, { "epoch": 2.8526343942230543, "grad_norm": 1.371578335762024, "learning_rate": 1.2629869178946708e-07, "loss": 0.3468, "step": 21332 }, { "epoch": 2.8527681198181334, "grad_norm": 1.3511689901351929, "learning_rate": 1.2607006118816712e-07, "loss": 0.3388, "step": 21333 }, { "epoch": 2.852901845413212, "grad_norm": 1.7066760063171387, "learning_rate": 1.2584163639948853e-07, "loss": 0.4007, "step": 21334 }, { "epoch": 2.853035571008291, "grad_norm": 1.5390831232070923, "learning_rate": 1.2561341742819422e-07, "loss": 0.3679, "step": 21335 }, { "epoch": 2.8531692966033697, "grad_norm": 1.6075454950332642, "learning_rate": 1.25385404279037e-07, "loss": 0.3767, "step": 21336 }, { "epoch": 2.853303022198449, "grad_norm": 1.6106818914413452, "learning_rate": 1.2515759695677309e-07, "loss": 0.3919, "step": 21337 }, { "epoch": 2.853436747793528, "grad_norm": 1.5912941694259644, "learning_rate": 1.2492999546614982e-07, "loss": 0.3902, "step": 21338 }, { "epoch": 2.8535704733886065, "grad_norm": 1.486480951309204, "learning_rate": 1.2470259981191113e-07, "loss": 0.3683, "step": 21339 }, { "epoch": 2.8537041989836855, "grad_norm": 1.486699104309082, "learning_rate": 1.244754099987977e-07, "loss": 0.3516, "step": 21340 }, { "epoch": 2.853837924578764, "grad_norm": 1.6901589632034302, "learning_rate": 1.2424842603154353e-07, "loss": 0.3886, "step": 21341 }, { "epoch": 2.8539716501738432, "grad_norm": 1.611914873123169, "learning_rate": 1.2402164791488146e-07, "loss": 0.3409, "step": 21342 }, { "epoch": 2.8541053757689223, "grad_norm": 1.7068796157836914, "learning_rate": 1.2379507565353776e-07, "loss": 0.4039, "step": 21343 }, { "epoch": 2.854239101364001, "grad_norm": 1.8993489742279053, "learning_rate": 1.2356870925223528e-07, "loss": 0.4436, "step": 21344 }, { "epoch": 2.85437282695908, "grad_norm": 1.545749545097351, "learning_rate": 1.2334254871569252e-07, "loss": 0.3186, "step": 21345 }, { "epoch": 2.8545065525541586, "grad_norm": 1.6281611919403076, "learning_rate": 1.231165940486234e-07, "loss": 0.3637, "step": 21346 }, { "epoch": 2.8546402781492377, "grad_norm": 1.7126564979553223, "learning_rate": 1.2289084525573646e-07, "loss": 0.3402, "step": 21347 }, { "epoch": 2.854774003744317, "grad_norm": 1.7417253255844116, "learning_rate": 1.2266530234174013e-07, "loss": 0.3675, "step": 21348 }, { "epoch": 2.8549077293393954, "grad_norm": 1.5606211423873901, "learning_rate": 1.2243996531133284e-07, "loss": 0.389, "step": 21349 }, { "epoch": 2.8550414549344745, "grad_norm": 1.767960786819458, "learning_rate": 1.222148341692131e-07, "loss": 0.3868, "step": 21350 }, { "epoch": 2.855175180529553, "grad_norm": 1.6719179153442383, "learning_rate": 1.219899089200738e-07, "loss": 0.3548, "step": 21351 }, { "epoch": 2.855308906124632, "grad_norm": 1.600157380104065, "learning_rate": 1.217651895686023e-07, "loss": 0.355, "step": 21352 }, { "epoch": 2.8554426317197112, "grad_norm": 1.5621857643127441, "learning_rate": 1.215406761194826e-07, "loss": 0.3239, "step": 21353 }, { "epoch": 2.8555763573147903, "grad_norm": 1.4373791217803955, "learning_rate": 1.2131636857739548e-07, "loss": 0.323, "step": 21354 }, { "epoch": 2.855710082909869, "grad_norm": 1.8352621793746948, "learning_rate": 1.210922669470149e-07, "loss": 0.3439, "step": 21355 }, { "epoch": 2.855843808504948, "grad_norm": 1.5156341791152954, "learning_rate": 1.2086837123301388e-07, "loss": 0.36, "step": 21356 }, { "epoch": 2.8559775341000266, "grad_norm": 1.8039201498031616, "learning_rate": 1.2064468144005637e-07, "loss": 0.3544, "step": 21357 }, { "epoch": 2.8561112596951057, "grad_norm": 1.5010358095169067, "learning_rate": 1.2042119757280867e-07, "loss": 0.3725, "step": 21358 }, { "epoch": 2.856244985290185, "grad_norm": 1.594415307044983, "learning_rate": 1.201979196359282e-07, "loss": 0.3567, "step": 21359 }, { "epoch": 2.8563787108852634, "grad_norm": 1.3552346229553223, "learning_rate": 1.1997484763406564e-07, "loss": 0.3078, "step": 21360 }, { "epoch": 2.8565124364803425, "grad_norm": 1.504364013671875, "learning_rate": 1.1975198157187507e-07, "loss": 0.4023, "step": 21361 }, { "epoch": 2.856646162075421, "grad_norm": 1.5708248615264893, "learning_rate": 1.1952932145399943e-07, "loss": 0.3743, "step": 21362 }, { "epoch": 2.8567798876705, "grad_norm": 1.5693808794021606, "learning_rate": 1.1930686728508055e-07, "loss": 0.346, "step": 21363 }, { "epoch": 2.8569136132655792, "grad_norm": 1.5300365686416626, "learning_rate": 1.1908461906975588e-07, "loss": 0.3427, "step": 21364 }, { "epoch": 2.857047338860658, "grad_norm": 1.5368432998657227, "learning_rate": 1.1886257681265722e-07, "loss": 0.3315, "step": 21365 }, { "epoch": 2.857181064455737, "grad_norm": 1.4282829761505127, "learning_rate": 1.1864074051841202e-07, "loss": 0.3715, "step": 21366 }, { "epoch": 2.8573147900508156, "grad_norm": 1.4215545654296875, "learning_rate": 1.1841911019164542e-07, "loss": 0.3318, "step": 21367 }, { "epoch": 2.8574485156458946, "grad_norm": 1.6703910827636719, "learning_rate": 1.1819768583697711e-07, "loss": 0.3821, "step": 21368 }, { "epoch": 2.8575822412409737, "grad_norm": 1.3917287588119507, "learning_rate": 1.1797646745902225e-07, "loss": 0.3071, "step": 21369 }, { "epoch": 2.8577159668360523, "grad_norm": 1.5248653888702393, "learning_rate": 1.1775545506239161e-07, "loss": 0.3424, "step": 21370 }, { "epoch": 2.8578496924311314, "grad_norm": 1.5397793054580688, "learning_rate": 1.1753464865169261e-07, "loss": 0.3458, "step": 21371 }, { "epoch": 2.85798341802621, "grad_norm": 1.5369126796722412, "learning_rate": 1.1731404823152603e-07, "loss": 0.3887, "step": 21372 }, { "epoch": 2.858117143621289, "grad_norm": 1.5674604177474976, "learning_rate": 1.1709365380649263e-07, "loss": 0.3518, "step": 21373 }, { "epoch": 2.858250869216368, "grad_norm": 1.619637131690979, "learning_rate": 1.1687346538118538e-07, "loss": 0.3229, "step": 21374 }, { "epoch": 2.858384594811447, "grad_norm": 1.7939313650131226, "learning_rate": 1.1665348296019396e-07, "loss": 0.3912, "step": 21375 }, { "epoch": 2.858518320406526, "grad_norm": 1.4504554271697998, "learning_rate": 1.1643370654810138e-07, "loss": 0.3477, "step": 21376 }, { "epoch": 2.8586520460016045, "grad_norm": 1.4417351484298706, "learning_rate": 1.1621413614949173e-07, "loss": 0.3242, "step": 21377 }, { "epoch": 2.8587857715966836, "grad_norm": 1.7811124324798584, "learning_rate": 1.1599477176894136e-07, "loss": 0.3988, "step": 21378 }, { "epoch": 2.8589194971917626, "grad_norm": 1.7639611959457397, "learning_rate": 1.1577561341102106e-07, "loss": 0.3849, "step": 21379 }, { "epoch": 2.8590532227868413, "grad_norm": 1.5757520198822021, "learning_rate": 1.155566610803005e-07, "loss": 0.3746, "step": 21380 }, { "epoch": 2.8591869483819203, "grad_norm": 1.453657865524292, "learning_rate": 1.1533791478134271e-07, "loss": 0.3198, "step": 21381 }, { "epoch": 2.859320673976999, "grad_norm": 1.4833120107650757, "learning_rate": 1.1511937451870737e-07, "loss": 0.3508, "step": 21382 }, { "epoch": 2.859454399572078, "grad_norm": 1.756886601448059, "learning_rate": 1.149010402969497e-07, "loss": 0.4032, "step": 21383 }, { "epoch": 2.859588125167157, "grad_norm": 1.6146433353424072, "learning_rate": 1.1468291212062165e-07, "loss": 0.4046, "step": 21384 }, { "epoch": 2.8597218507622357, "grad_norm": 1.6746702194213867, "learning_rate": 1.1446498999426848e-07, "loss": 0.3771, "step": 21385 }, { "epoch": 2.859855576357315, "grad_norm": 1.5913323163986206, "learning_rate": 1.1424727392243317e-07, "loss": 0.3654, "step": 21386 }, { "epoch": 2.8599893019523934, "grad_norm": 1.7871694564819336, "learning_rate": 1.1402976390965326e-07, "loss": 0.3716, "step": 21387 }, { "epoch": 2.8601230275474725, "grad_norm": 1.4441653490066528, "learning_rate": 1.1381245996046397e-07, "loss": 0.35, "step": 21388 }, { "epoch": 2.8602567531425516, "grad_norm": 1.5628869533538818, "learning_rate": 1.1359536207939393e-07, "loss": 0.344, "step": 21389 }, { "epoch": 2.8603904787376306, "grad_norm": 1.4453990459442139, "learning_rate": 1.1337847027096726e-07, "loss": 0.3076, "step": 21390 }, { "epoch": 2.8605242043327093, "grad_norm": 1.7705130577087402, "learning_rate": 1.1316178453970706e-07, "loss": 0.4375, "step": 21391 }, { "epoch": 2.8606579299277883, "grad_norm": 1.588617205619812, "learning_rate": 1.1294530489012856e-07, "loss": 0.3883, "step": 21392 }, { "epoch": 2.860791655522867, "grad_norm": 1.4051568508148193, "learning_rate": 1.1272903132674374e-07, "loss": 0.3753, "step": 21393 }, { "epoch": 2.860925381117946, "grad_norm": 1.7912297248840332, "learning_rate": 1.125129638540623e-07, "loss": 0.4085, "step": 21394 }, { "epoch": 2.861059106713025, "grad_norm": 1.5637527704238892, "learning_rate": 1.122971024765851e-07, "loss": 0.3414, "step": 21395 }, { "epoch": 2.8611928323081037, "grad_norm": 1.2206496000289917, "learning_rate": 1.1208144719881408e-07, "loss": 0.3159, "step": 21396 }, { "epoch": 2.861326557903183, "grad_norm": 1.5811996459960938, "learning_rate": 1.1186599802524344e-07, "loss": 0.3359, "step": 21397 }, { "epoch": 2.8614602834982614, "grad_norm": 1.6839238405227661, "learning_rate": 1.1165075496036515e-07, "loss": 0.3957, "step": 21398 }, { "epoch": 2.8615940090933405, "grad_norm": 1.4935643672943115, "learning_rate": 1.1143571800866449e-07, "loss": 0.3526, "step": 21399 }, { "epoch": 2.8617277346884196, "grad_norm": 1.5058201551437378, "learning_rate": 1.1122088717462231e-07, "loss": 0.344, "step": 21400 }, { "epoch": 2.861861460283498, "grad_norm": 1.3692536354064941, "learning_rate": 1.1100626246272062e-07, "loss": 0.3034, "step": 21401 }, { "epoch": 2.8619951858785773, "grad_norm": 1.365714192390442, "learning_rate": 1.1079184387742914e-07, "loss": 0.288, "step": 21402 }, { "epoch": 2.862128911473656, "grad_norm": 1.5063707828521729, "learning_rate": 1.1057763142321875e-07, "loss": 0.3242, "step": 21403 }, { "epoch": 2.862262637068735, "grad_norm": 1.7072794437408447, "learning_rate": 1.1036362510455478e-07, "loss": 0.3591, "step": 21404 }, { "epoch": 2.862396362663814, "grad_norm": 1.4276528358459473, "learning_rate": 1.1014982492589698e-07, "loss": 0.3041, "step": 21405 }, { "epoch": 2.8625300882588927, "grad_norm": 1.2962820529937744, "learning_rate": 1.0993623089170402e-07, "loss": 0.3059, "step": 21406 }, { "epoch": 2.8626638138539717, "grad_norm": 1.660308837890625, "learning_rate": 1.0972284300642567e-07, "loss": 0.3674, "step": 21407 }, { "epoch": 2.8627975394490504, "grad_norm": 1.4883298873901367, "learning_rate": 1.0950966127451057e-07, "loss": 0.367, "step": 21408 }, { "epoch": 2.8629312650441294, "grad_norm": 1.4082783460617065, "learning_rate": 1.0929668570040187e-07, "loss": 0.3446, "step": 21409 }, { "epoch": 2.8630649906392085, "grad_norm": 1.4843063354492188, "learning_rate": 1.0908391628854042e-07, "loss": 0.3974, "step": 21410 }, { "epoch": 2.863198716234287, "grad_norm": 1.6850398778915405, "learning_rate": 1.0887135304335938e-07, "loss": 0.379, "step": 21411 }, { "epoch": 2.863332441829366, "grad_norm": 1.496293306350708, "learning_rate": 1.0865899596929075e-07, "loss": 0.335, "step": 21412 }, { "epoch": 2.863466167424445, "grad_norm": 1.688007116317749, "learning_rate": 1.0844684507076097e-07, "loss": 0.3635, "step": 21413 }, { "epoch": 2.863599893019524, "grad_norm": 1.4264684915542603, "learning_rate": 1.0823490035218986e-07, "loss": 0.3314, "step": 21414 }, { "epoch": 2.863733618614603, "grad_norm": 1.7247264385223389, "learning_rate": 1.0802316181799833e-07, "loss": 0.3593, "step": 21415 }, { "epoch": 2.8638673442096816, "grad_norm": 1.5128061771392822, "learning_rate": 1.0781162947259727e-07, "loss": 0.351, "step": 21416 }, { "epoch": 2.8640010698047607, "grad_norm": 1.9091788530349731, "learning_rate": 1.0760030332039761e-07, "loss": 0.4195, "step": 21417 }, { "epoch": 2.8641347953998393, "grad_norm": 1.7148890495300293, "learning_rate": 1.0738918336580362e-07, "loss": 0.408, "step": 21418 }, { "epoch": 2.8642685209949184, "grad_norm": 1.3402626514434814, "learning_rate": 1.071782696132162e-07, "loss": 0.3467, "step": 21419 }, { "epoch": 2.8644022465899974, "grad_norm": 1.4568476676940918, "learning_rate": 1.0696756206703185e-07, "loss": 0.3801, "step": 21420 }, { "epoch": 2.8645359721850765, "grad_norm": 1.5037922859191895, "learning_rate": 1.0675706073164038e-07, "loss": 0.3986, "step": 21421 }, { "epoch": 2.864669697780155, "grad_norm": 1.3684141635894775, "learning_rate": 1.0654676561143273e-07, "loss": 0.3518, "step": 21422 }, { "epoch": 2.8648034233752337, "grad_norm": 1.4641708135604858, "learning_rate": 1.0633667671078984e-07, "loss": 0.3818, "step": 21423 }, { "epoch": 2.864937148970313, "grad_norm": 1.6568219661712646, "learning_rate": 1.0612679403409154e-07, "loss": 0.3364, "step": 21424 }, { "epoch": 2.865070874565392, "grad_norm": 1.486709713935852, "learning_rate": 1.0591711758571322e-07, "loss": 0.3692, "step": 21425 }, { "epoch": 2.865204600160471, "grad_norm": 1.5510412454605103, "learning_rate": 1.057076473700247e-07, "loss": 0.3508, "step": 21426 }, { "epoch": 2.8653383257555496, "grad_norm": 1.59087336063385, "learning_rate": 1.0549838339139362e-07, "loss": 0.3635, "step": 21427 }, { "epoch": 2.8654720513506287, "grad_norm": 1.5892013311386108, "learning_rate": 1.0528932565417982e-07, "loss": 0.3502, "step": 21428 }, { "epoch": 2.8656057769457073, "grad_norm": 1.523118019104004, "learning_rate": 1.0508047416274203e-07, "loss": 0.3465, "step": 21429 }, { "epoch": 2.8657395025407864, "grad_norm": 1.6732336282730103, "learning_rate": 1.0487182892143232e-07, "loss": 0.3957, "step": 21430 }, { "epoch": 2.8658732281358654, "grad_norm": 1.7156574726104736, "learning_rate": 1.0466338993460167e-07, "loss": 0.3397, "step": 21431 }, { "epoch": 2.866006953730944, "grad_norm": 1.6031233072280884, "learning_rate": 1.0445515720659438e-07, "loss": 0.3599, "step": 21432 }, { "epoch": 2.866140679326023, "grad_norm": 1.4509334564208984, "learning_rate": 1.0424713074174919e-07, "loss": 0.3315, "step": 21433 }, { "epoch": 2.8662744049211017, "grad_norm": 1.6894257068634033, "learning_rate": 1.0403931054440375e-07, "loss": 0.3926, "step": 21434 }, { "epoch": 2.866408130516181, "grad_norm": 1.3921858072280884, "learning_rate": 1.0383169661888904e-07, "loss": 0.3476, "step": 21435 }, { "epoch": 2.86654185611126, "grad_norm": 1.5089781284332275, "learning_rate": 1.036242889695338e-07, "loss": 0.3612, "step": 21436 }, { "epoch": 2.8666755817063385, "grad_norm": 1.5736150741577148, "learning_rate": 1.0341708760066016e-07, "loss": 0.3765, "step": 21437 }, { "epoch": 2.8668093073014176, "grad_norm": 1.4795104265213013, "learning_rate": 1.0321009251658686e-07, "loss": 0.3567, "step": 21438 }, { "epoch": 2.866943032896496, "grad_norm": 1.5555113554000854, "learning_rate": 1.0300330372163047e-07, "loss": 0.4047, "step": 21439 }, { "epoch": 2.8670767584915753, "grad_norm": 1.5946283340454102, "learning_rate": 1.0279672122009865e-07, "loss": 0.3463, "step": 21440 }, { "epoch": 2.8672104840866544, "grad_norm": 1.6732900142669678, "learning_rate": 1.0259034501629795e-07, "loss": 0.3491, "step": 21441 }, { "epoch": 2.867344209681733, "grad_norm": 1.4459824562072754, "learning_rate": 1.0238417511453158e-07, "loss": 0.3245, "step": 21442 }, { "epoch": 2.867477935276812, "grad_norm": 1.4974004030227661, "learning_rate": 1.0217821151909612e-07, "loss": 0.3636, "step": 21443 }, { "epoch": 2.8676116608718907, "grad_norm": 1.5511211156845093, "learning_rate": 1.0197245423428481e-07, "loss": 0.3597, "step": 21444 }, { "epoch": 2.8677453864669697, "grad_norm": 1.5882346630096436, "learning_rate": 1.0176690326438531e-07, "loss": 0.3651, "step": 21445 }, { "epoch": 2.867879112062049, "grad_norm": 1.6177600622177124, "learning_rate": 1.0156155861368533e-07, "loss": 0.3511, "step": 21446 }, { "epoch": 2.8680128376571274, "grad_norm": 1.7877057790756226, "learning_rate": 1.0135642028646142e-07, "loss": 0.4726, "step": 21447 }, { "epoch": 2.8681465632522065, "grad_norm": 1.6083234548568726, "learning_rate": 1.0115148828699017e-07, "loss": 0.3181, "step": 21448 }, { "epoch": 2.868280288847285, "grad_norm": 1.4748469591140747, "learning_rate": 1.0094676261954484e-07, "loss": 0.3441, "step": 21449 }, { "epoch": 2.868414014442364, "grad_norm": 1.688672423362732, "learning_rate": 1.0074224328839088e-07, "loss": 0.3146, "step": 21450 }, { "epoch": 2.8685477400374433, "grad_norm": 1.5790619850158691, "learning_rate": 1.0053793029779379e-07, "loss": 0.3857, "step": 21451 }, { "epoch": 2.868681465632522, "grad_norm": 1.4694894552230835, "learning_rate": 1.0033382365201016e-07, "loss": 0.3467, "step": 21452 }, { "epoch": 2.868815191227601, "grad_norm": 1.664807915687561, "learning_rate": 1.0012992335529548e-07, "loss": 0.3458, "step": 21453 }, { "epoch": 2.8689489168226796, "grad_norm": 1.5460667610168457, "learning_rate": 9.992622941189856e-08, "loss": 0.3454, "step": 21454 }, { "epoch": 2.8690826424177587, "grad_norm": 1.5894346237182617, "learning_rate": 9.972274182606712e-08, "loss": 0.369, "step": 21455 }, { "epoch": 2.8692163680128377, "grad_norm": 1.630205750465393, "learning_rate": 9.95194606020411e-08, "loss": 0.3951, "step": 21456 }, { "epoch": 2.869350093607917, "grad_norm": 1.7381374835968018, "learning_rate": 9.931638574405711e-08, "loss": 0.3579, "step": 21457 }, { "epoch": 2.8694838192029954, "grad_norm": 1.5989984273910522, "learning_rate": 9.911351725635066e-08, "loss": 0.3877, "step": 21458 }, { "epoch": 2.8696175447980745, "grad_norm": 1.4860011339187622, "learning_rate": 9.891085514314835e-08, "loss": 0.351, "step": 21459 }, { "epoch": 2.869751270393153, "grad_norm": 1.5850794315338135, "learning_rate": 9.870839940867461e-08, "loss": 0.3734, "step": 21460 }, { "epoch": 2.869884995988232, "grad_norm": 1.543359637260437, "learning_rate": 9.850615005714936e-08, "loss": 0.3646, "step": 21461 }, { "epoch": 2.8700187215833113, "grad_norm": 1.6070114374160767, "learning_rate": 9.830410709278925e-08, "loss": 0.3749, "step": 21462 }, { "epoch": 2.87015244717839, "grad_norm": 1.5142873525619507, "learning_rate": 9.810227051980648e-08, "loss": 0.3349, "step": 21463 }, { "epoch": 2.870286172773469, "grad_norm": 1.8093655109405518, "learning_rate": 9.790064034240432e-08, "loss": 0.4021, "step": 21464 }, { "epoch": 2.8704198983685476, "grad_norm": 1.633550763130188, "learning_rate": 9.769921656479053e-08, "loss": 0.3651, "step": 21465 }, { "epoch": 2.8705536239636267, "grad_norm": 1.5115312337875366, "learning_rate": 9.749799919115844e-08, "loss": 0.3429, "step": 21466 }, { "epoch": 2.8706873495587057, "grad_norm": 1.6019270420074463, "learning_rate": 9.729698822570688e-08, "loss": 0.3665, "step": 21467 }, { "epoch": 2.8708210751537844, "grad_norm": 1.5307166576385498, "learning_rate": 9.709618367262364e-08, "loss": 0.3403, "step": 21468 }, { "epoch": 2.8709548007488634, "grad_norm": 1.6280714273452759, "learning_rate": 9.689558553609313e-08, "loss": 0.3618, "step": 21469 }, { "epoch": 2.871088526343942, "grad_norm": 1.5097849369049072, "learning_rate": 9.669519382029869e-08, "loss": 0.3226, "step": 21470 }, { "epoch": 2.871222251939021, "grad_norm": 1.4506953954696655, "learning_rate": 9.649500852941696e-08, "loss": 0.3552, "step": 21471 }, { "epoch": 2.8713559775341, "grad_norm": 1.637101650238037, "learning_rate": 9.629502966761905e-08, "loss": 0.3784, "step": 21472 }, { "epoch": 2.871489703129179, "grad_norm": 1.5781289339065552, "learning_rate": 9.609525723907498e-08, "loss": 0.3691, "step": 21473 }, { "epoch": 2.871623428724258, "grad_norm": 1.4545581340789795, "learning_rate": 9.589569124794918e-08, "loss": 0.3521, "step": 21474 }, { "epoch": 2.8717571543193365, "grad_norm": 1.584717869758606, "learning_rate": 9.569633169839943e-08, "loss": 0.3374, "step": 21475 }, { "epoch": 2.8718908799144156, "grad_norm": 1.5601285696029663, "learning_rate": 9.549717859458241e-08, "loss": 0.3745, "step": 21476 }, { "epoch": 2.8720246055094947, "grad_norm": 1.5052555799484253, "learning_rate": 9.529823194064924e-08, "loss": 0.357, "step": 21477 }, { "epoch": 2.8721583311045733, "grad_norm": 1.695675015449524, "learning_rate": 9.509949174074662e-08, "loss": 0.3319, "step": 21478 }, { "epoch": 2.8722920566996524, "grad_norm": 1.4590795040130615, "learning_rate": 9.490095799901677e-08, "loss": 0.3532, "step": 21479 }, { "epoch": 2.872425782294731, "grad_norm": 1.5261414051055908, "learning_rate": 9.470263071959862e-08, "loss": 0.3471, "step": 21480 }, { "epoch": 2.87255950788981, "grad_norm": 1.6008661985397339, "learning_rate": 9.450450990662552e-08, "loss": 0.3797, "step": 21481 }, { "epoch": 2.872693233484889, "grad_norm": 1.5007354021072388, "learning_rate": 9.43065955642275e-08, "loss": 0.406, "step": 21482 }, { "epoch": 2.8728269590799678, "grad_norm": 1.563239574432373, "learning_rate": 9.410888769653015e-08, "loss": 0.3425, "step": 21483 }, { "epoch": 2.872960684675047, "grad_norm": 1.430267333984375, "learning_rate": 9.391138630765462e-08, "loss": 0.3321, "step": 21484 }, { "epoch": 2.8730944102701255, "grad_norm": 1.542784571647644, "learning_rate": 9.37140914017154e-08, "loss": 0.3718, "step": 21485 }, { "epoch": 2.8732281358652045, "grad_norm": 1.5014311075210571, "learning_rate": 9.351700298282806e-08, "loss": 0.3558, "step": 21486 }, { "epoch": 2.8733618614602836, "grad_norm": 1.7070379257202148, "learning_rate": 9.332012105509935e-08, "loss": 0.424, "step": 21487 }, { "epoch": 2.8734955870553622, "grad_norm": 1.6422314643859863, "learning_rate": 9.312344562263153e-08, "loss": 0.3821, "step": 21488 }, { "epoch": 2.8736293126504413, "grad_norm": 1.6174280643463135, "learning_rate": 9.292697668952799e-08, "loss": 0.3734, "step": 21489 }, { "epoch": 2.87376303824552, "grad_norm": 1.6071025133132935, "learning_rate": 9.273071425987878e-08, "loss": 0.3552, "step": 21490 }, { "epoch": 2.873896763840599, "grad_norm": 1.5185357332229614, "learning_rate": 9.253465833778064e-08, "loss": 0.3377, "step": 21491 }, { "epoch": 2.874030489435678, "grad_norm": 1.7695194482803345, "learning_rate": 9.233880892731473e-08, "loss": 0.3268, "step": 21492 }, { "epoch": 2.874164215030757, "grad_norm": 1.4268543720245361, "learning_rate": 9.214316603256668e-08, "loss": 0.342, "step": 21493 }, { "epoch": 2.8742979406258358, "grad_norm": 1.6798388957977295, "learning_rate": 9.194772965761434e-08, "loss": 0.3757, "step": 21494 }, { "epoch": 2.874431666220915, "grad_norm": 1.8127113580703735, "learning_rate": 9.17524998065289e-08, "loss": 0.4133, "step": 21495 }, { "epoch": 2.8745653918159935, "grad_norm": 1.6814757585525513, "learning_rate": 9.155747648338264e-08, "loss": 0.4121, "step": 21496 }, { "epoch": 2.8746991174110725, "grad_norm": 1.6773827075958252, "learning_rate": 9.1362659692239e-08, "loss": 0.3619, "step": 21497 }, { "epoch": 2.8748328430061516, "grad_norm": 1.5675195455551147, "learning_rate": 9.116804943715918e-08, "loss": 0.3738, "step": 21498 }, { "epoch": 2.8749665686012302, "grad_norm": 1.3587759733200073, "learning_rate": 9.09736457221999e-08, "loss": 0.3193, "step": 21499 }, { "epoch": 2.8751002941963093, "grad_norm": 1.540226697921753, "learning_rate": 9.07794485514124e-08, "loss": 0.3799, "step": 21500 }, { "epoch": 2.875234019791388, "grad_norm": 1.5474010705947876, "learning_rate": 9.058545792884565e-08, "loss": 0.3745, "step": 21501 }, { "epoch": 2.875367745386467, "grad_norm": 1.6101235151290894, "learning_rate": 9.039167385854308e-08, "loss": 0.3515, "step": 21502 }, { "epoch": 2.875501470981546, "grad_norm": 1.7283473014831543, "learning_rate": 9.019809634454369e-08, "loss": 0.3748, "step": 21503 }, { "epoch": 2.8756351965766247, "grad_norm": 1.7435520887374878, "learning_rate": 9.000472539088201e-08, "loss": 0.3847, "step": 21504 }, { "epoch": 2.8757689221717038, "grad_norm": 1.4107561111450195, "learning_rate": 8.981156100158928e-08, "loss": 0.3543, "step": 21505 }, { "epoch": 2.8759026477667824, "grad_norm": 1.5231406688690186, "learning_rate": 8.961860318069115e-08, "loss": 0.3138, "step": 21506 }, { "epoch": 2.8760363733618615, "grad_norm": 1.5713320970535278, "learning_rate": 8.942585193220998e-08, "loss": 0.3726, "step": 21507 }, { "epoch": 2.8761700989569405, "grad_norm": 1.6168357133865356, "learning_rate": 8.923330726016366e-08, "loss": 0.3714, "step": 21508 }, { "epoch": 2.876303824552019, "grad_norm": 1.6079397201538086, "learning_rate": 8.904096916856452e-08, "loss": 0.3761, "step": 21509 }, { "epoch": 2.8764375501470982, "grad_norm": 1.6095151901245117, "learning_rate": 8.884883766142494e-08, "loss": 0.3508, "step": 21510 }, { "epoch": 2.876571275742177, "grad_norm": 1.5530354976654053, "learning_rate": 8.865691274274502e-08, "loss": 0.3628, "step": 21511 }, { "epoch": 2.876705001337256, "grad_norm": 1.8362239599227905, "learning_rate": 8.846519441652935e-08, "loss": 0.4241, "step": 21512 }, { "epoch": 2.876838726932335, "grad_norm": 1.554849624633789, "learning_rate": 8.827368268677139e-08, "loss": 0.359, "step": 21513 }, { "epoch": 2.8769724525274136, "grad_norm": 1.431810975074768, "learning_rate": 8.808237755746352e-08, "loss": 0.3229, "step": 21514 }, { "epoch": 2.8771061781224927, "grad_norm": 1.369589924812317, "learning_rate": 8.789127903259586e-08, "loss": 0.3654, "step": 21515 }, { "epoch": 2.8772399037175713, "grad_norm": 1.506874442100525, "learning_rate": 8.770038711614747e-08, "loss": 0.3351, "step": 21516 }, { "epoch": 2.8773736293126504, "grad_norm": 1.4686498641967773, "learning_rate": 8.750970181210072e-08, "loss": 0.3278, "step": 21517 }, { "epoch": 2.8775073549077295, "grad_norm": 1.7377760410308838, "learning_rate": 8.731922312442909e-08, "loss": 0.3807, "step": 21518 }, { "epoch": 2.877641080502808, "grad_norm": 1.4389381408691406, "learning_rate": 8.712895105710162e-08, "loss": 0.3332, "step": 21519 }, { "epoch": 2.877774806097887, "grad_norm": 1.604592204093933, "learning_rate": 8.693888561408625e-08, "loss": 0.3578, "step": 21520 }, { "epoch": 2.877908531692966, "grad_norm": 1.5750700235366821, "learning_rate": 8.674902679934427e-08, "loss": 0.3401, "step": 21521 }, { "epoch": 2.878042257288045, "grad_norm": 1.4906057119369507, "learning_rate": 8.655937461683362e-08, "loss": 0.3222, "step": 21522 }, { "epoch": 2.878175982883124, "grad_norm": 1.517045021057129, "learning_rate": 8.636992907050556e-08, "loss": 0.3651, "step": 21523 }, { "epoch": 2.878309708478203, "grad_norm": 1.5600850582122803, "learning_rate": 8.618069016431029e-08, "loss": 0.3113, "step": 21524 }, { "epoch": 2.8784434340732816, "grad_norm": 1.3813369274139404, "learning_rate": 8.599165790219133e-08, "loss": 0.353, "step": 21525 }, { "epoch": 2.8785771596683603, "grad_norm": 1.6630665063858032, "learning_rate": 8.580283228809105e-08, "loss": 0.3633, "step": 21526 }, { "epoch": 2.8787108852634393, "grad_norm": 1.6878085136413574, "learning_rate": 8.5614213325943e-08, "loss": 0.3902, "step": 21527 }, { "epoch": 2.8788446108585184, "grad_norm": 1.4341968297958374, "learning_rate": 8.542580101967957e-08, "loss": 0.3736, "step": 21528 }, { "epoch": 2.8789783364535975, "grad_norm": 1.3677970170974731, "learning_rate": 8.523759537322873e-08, "loss": 0.3413, "step": 21529 }, { "epoch": 2.879112062048676, "grad_norm": 1.5509475469589233, "learning_rate": 8.50495963905118e-08, "loss": 0.3793, "step": 21530 }, { "epoch": 2.879245787643755, "grad_norm": 1.5249499082565308, "learning_rate": 8.486180407544897e-08, "loss": 0.3523, "step": 21531 }, { "epoch": 2.879379513238834, "grad_norm": 1.6444282531738281, "learning_rate": 8.467421843195488e-08, "loss": 0.4166, "step": 21532 }, { "epoch": 2.879513238833913, "grad_norm": 1.4829626083374023, "learning_rate": 8.448683946393643e-08, "loss": 0.3282, "step": 21533 }, { "epoch": 2.879646964428992, "grad_norm": 1.6491841077804565, "learning_rate": 8.42996671753038e-08, "loss": 0.3736, "step": 21534 }, { "epoch": 2.8797806900240706, "grad_norm": 1.7757115364074707, "learning_rate": 8.41127015699561e-08, "loss": 0.3683, "step": 21535 }, { "epoch": 2.8799144156191496, "grad_norm": 1.374893307685852, "learning_rate": 8.392594265179022e-08, "loss": 0.3633, "step": 21536 }, { "epoch": 2.8800481412142283, "grad_norm": 1.5248851776123047, "learning_rate": 8.373939042469969e-08, "loss": 0.3385, "step": 21537 }, { "epoch": 2.8801818668093073, "grad_norm": 1.4480652809143066, "learning_rate": 8.355304489257254e-08, "loss": 0.3111, "step": 21538 }, { "epoch": 2.8803155924043864, "grad_norm": 1.4269475936889648, "learning_rate": 8.336690605929343e-08, "loss": 0.3367, "step": 21539 }, { "epoch": 2.880449317999465, "grad_norm": 1.7795159816741943, "learning_rate": 8.318097392874147e-08, "loss": 0.4103, "step": 21540 }, { "epoch": 2.880583043594544, "grad_norm": 1.5217500925064087, "learning_rate": 8.299524850479357e-08, "loss": 0.3972, "step": 21541 }, { "epoch": 2.8807167691896227, "grad_norm": 1.6410175561904907, "learning_rate": 8.280972979131885e-08, "loss": 0.3483, "step": 21542 }, { "epoch": 2.880850494784702, "grad_norm": 1.495924711227417, "learning_rate": 8.262441779218644e-08, "loss": 0.3916, "step": 21543 }, { "epoch": 2.880984220379781, "grad_norm": 1.6971803903579712, "learning_rate": 8.24393125112577e-08, "loss": 0.393, "step": 21544 }, { "epoch": 2.8811179459748595, "grad_norm": 1.6271405220031738, "learning_rate": 8.225441395239176e-08, "loss": 0.4159, "step": 21545 }, { "epoch": 2.8812516715699386, "grad_norm": 1.6666885614395142, "learning_rate": 8.20697221194422e-08, "loss": 0.3572, "step": 21546 }, { "epoch": 2.881385397165017, "grad_norm": 1.7279062271118164, "learning_rate": 8.188523701625928e-08, "loss": 0.4284, "step": 21547 }, { "epoch": 2.8815191227600963, "grad_norm": 1.7403970956802368, "learning_rate": 8.170095864668881e-08, "loss": 0.3495, "step": 21548 }, { "epoch": 2.8816528483551753, "grad_norm": 1.5310173034667969, "learning_rate": 8.151688701456884e-08, "loss": 0.4002, "step": 21549 }, { "epoch": 2.881786573950254, "grad_norm": 1.5115386247634888, "learning_rate": 8.133302212373961e-08, "loss": 0.3239, "step": 21550 }, { "epoch": 2.881920299545333, "grad_norm": 1.5396368503570557, "learning_rate": 8.114936397803252e-08, "loss": 0.3453, "step": 21551 }, { "epoch": 2.8820540251404116, "grad_norm": 1.5014476776123047, "learning_rate": 8.09659125812745e-08, "loss": 0.3661, "step": 21552 }, { "epoch": 2.8821877507354907, "grad_norm": 1.3916600942611694, "learning_rate": 8.07826679372925e-08, "loss": 0.3123, "step": 21553 }, { "epoch": 2.88232147633057, "grad_norm": 1.5446007251739502, "learning_rate": 8.059963004990234e-08, "loss": 0.3154, "step": 21554 }, { "epoch": 2.8824552019256484, "grad_norm": 1.567206621170044, "learning_rate": 8.041679892292209e-08, "loss": 0.3409, "step": 21555 }, { "epoch": 2.8825889275207275, "grad_norm": 1.7728365659713745, "learning_rate": 8.023417456016202e-08, "loss": 0.3876, "step": 21556 }, { "epoch": 2.882722653115806, "grad_norm": 1.587586522102356, "learning_rate": 8.005175696542688e-08, "loss": 0.3533, "step": 21557 }, { "epoch": 2.882856378710885, "grad_norm": 1.4738870859146118, "learning_rate": 7.98695461425214e-08, "loss": 0.3392, "step": 21558 }, { "epoch": 2.8829901043059643, "grad_norm": 1.4089224338531494, "learning_rate": 7.968754209524254e-08, "loss": 0.2953, "step": 21559 }, { "epoch": 2.8831238299010433, "grad_norm": 1.5764145851135254, "learning_rate": 7.950574482738505e-08, "loss": 0.3526, "step": 21560 }, { "epoch": 2.883257555496122, "grad_norm": 1.380562424659729, "learning_rate": 7.932415434273589e-08, "loss": 0.2913, "step": 21561 }, { "epoch": 2.883391281091201, "grad_norm": 1.7031933069229126, "learning_rate": 7.914277064508314e-08, "loss": 0.4326, "step": 21562 }, { "epoch": 2.8835250066862796, "grad_norm": 1.8635889291763306, "learning_rate": 7.896159373820489e-08, "loss": 0.3935, "step": 21563 }, { "epoch": 2.8836587322813587, "grad_norm": 1.550293207168579, "learning_rate": 7.878062362587924e-08, "loss": 0.3727, "step": 21564 }, { "epoch": 2.883792457876438, "grad_norm": 1.4565457105636597, "learning_rate": 7.859986031187761e-08, "loss": 0.3757, "step": 21565 }, { "epoch": 2.8839261834715164, "grad_norm": 1.6653430461883545, "learning_rate": 7.84193037999692e-08, "loss": 0.3808, "step": 21566 }, { "epoch": 2.8840599090665955, "grad_norm": 1.6085915565490723, "learning_rate": 7.823895409391546e-08, "loss": 0.3998, "step": 21567 }, { "epoch": 2.884193634661674, "grad_norm": 1.5220946073532104, "learning_rate": 7.805881119747672e-08, "loss": 0.3324, "step": 21568 }, { "epoch": 2.884327360256753, "grad_norm": 1.4650648832321167, "learning_rate": 7.787887511440883e-08, "loss": 0.3261, "step": 21569 }, { "epoch": 2.8844610858518323, "grad_norm": 1.344678521156311, "learning_rate": 7.769914584845994e-08, "loss": 0.3758, "step": 21570 }, { "epoch": 2.884594811446911, "grad_norm": 1.5516005754470825, "learning_rate": 7.751962340337815e-08, "loss": 0.3776, "step": 21571 }, { "epoch": 2.88472853704199, "grad_norm": 1.5888960361480713, "learning_rate": 7.734030778290602e-08, "loss": 0.38, "step": 21572 }, { "epoch": 2.8848622626370686, "grad_norm": 1.5200307369232178, "learning_rate": 7.716119899077834e-08, "loss": 0.3259, "step": 21573 }, { "epoch": 2.8849959882321476, "grad_norm": 1.4606342315673828, "learning_rate": 7.698229703073213e-08, "loss": 0.354, "step": 21574 }, { "epoch": 2.8851297138272267, "grad_norm": 1.3554993867874146, "learning_rate": 7.680360190649327e-08, "loss": 0.347, "step": 21575 }, { "epoch": 2.8852634394223053, "grad_norm": 1.7382012605667114, "learning_rate": 7.662511362178993e-08, "loss": 0.3517, "step": 21576 }, { "epoch": 2.8853971650173844, "grad_norm": 1.4775586128234863, "learning_rate": 7.644683218033911e-08, "loss": 0.318, "step": 21577 }, { "epoch": 2.885530890612463, "grad_norm": 1.8527549505233765, "learning_rate": 7.626875758585673e-08, "loss": 0.4231, "step": 21578 }, { "epoch": 2.885664616207542, "grad_norm": 1.694792628288269, "learning_rate": 7.60908898420587e-08, "loss": 0.3608, "step": 21579 }, { "epoch": 2.885798341802621, "grad_norm": 1.5652803182601929, "learning_rate": 7.591322895264874e-08, "loss": 0.341, "step": 21580 }, { "epoch": 2.8859320673977, "grad_norm": 1.4466875791549683, "learning_rate": 7.573577492133055e-08, "loss": 0.3392, "step": 21581 }, { "epoch": 2.886065792992779, "grad_norm": 1.4100452661514282, "learning_rate": 7.55585277518045e-08, "loss": 0.3368, "step": 21582 }, { "epoch": 2.8861995185878575, "grad_norm": 1.4746037721633911, "learning_rate": 7.53814874477643e-08, "loss": 0.3529, "step": 21583 }, { "epoch": 2.8863332441829366, "grad_norm": 1.743467926979065, "learning_rate": 7.520465401290033e-08, "loss": 0.396, "step": 21584 }, { "epoch": 2.8864669697780156, "grad_norm": 1.4487121105194092, "learning_rate": 7.502802745089743e-08, "loss": 0.3497, "step": 21585 }, { "epoch": 2.8866006953730943, "grad_norm": 1.563948631286621, "learning_rate": 7.485160776543931e-08, "loss": 0.3491, "step": 21586 }, { "epoch": 2.8867344209681733, "grad_norm": 1.705986499786377, "learning_rate": 7.467539496020082e-08, "loss": 0.3926, "step": 21587 }, { "epoch": 2.886868146563252, "grad_norm": 1.6362204551696777, "learning_rate": 7.44993890388579e-08, "loss": 0.3408, "step": 21588 }, { "epoch": 2.887001872158331, "grad_norm": 1.4489270448684692, "learning_rate": 7.43235900050765e-08, "loss": 0.3608, "step": 21589 }, { "epoch": 2.88713559775341, "grad_norm": 1.46921968460083, "learning_rate": 7.414799786252147e-08, "loss": 0.3157, "step": 21590 }, { "epoch": 2.8872693233484887, "grad_norm": 1.479576587677002, "learning_rate": 7.397261261485434e-08, "loss": 0.3196, "step": 21591 }, { "epoch": 2.887403048943568, "grad_norm": 1.6558668613433838, "learning_rate": 7.379743426572883e-08, "loss": 0.3914, "step": 21592 }, { "epoch": 2.8875367745386464, "grad_norm": 1.4549545049667358, "learning_rate": 7.36224628187987e-08, "loss": 0.3526, "step": 21593 }, { "epoch": 2.8876705001337255, "grad_norm": 1.4747297763824463, "learning_rate": 7.344769827770882e-08, "loss": 0.3554, "step": 21594 }, { "epoch": 2.8878042257288046, "grad_norm": 1.7839456796646118, "learning_rate": 7.327314064610403e-08, "loss": 0.3969, "step": 21595 }, { "epoch": 2.8879379513238836, "grad_norm": 1.547399640083313, "learning_rate": 7.309878992762142e-08, "loss": 0.3358, "step": 21596 }, { "epoch": 2.8880716769189623, "grad_norm": 1.7318370342254639, "learning_rate": 7.292464612589478e-08, "loss": 0.412, "step": 21597 }, { "epoch": 2.8882054025140413, "grad_norm": 1.4341596364974976, "learning_rate": 7.275070924455563e-08, "loss": 0.3635, "step": 21598 }, { "epoch": 2.88833912810912, "grad_norm": 1.4663244485855103, "learning_rate": 7.257697928722774e-08, "loss": 0.3732, "step": 21599 }, { "epoch": 2.888472853704199, "grad_norm": 1.7320536375045776, "learning_rate": 7.240345625753486e-08, "loss": 0.3328, "step": 21600 }, { "epoch": 2.888606579299278, "grad_norm": 1.380563497543335, "learning_rate": 7.22301401590908e-08, "loss": 0.3112, "step": 21601 }, { "epoch": 2.8887403048943567, "grad_norm": 1.6667157411575317, "learning_rate": 7.205703099551042e-08, "loss": 0.3676, "step": 21602 }, { "epoch": 2.888874030489436, "grad_norm": 1.649115800857544, "learning_rate": 7.188412877040086e-08, "loss": 0.3919, "step": 21603 }, { "epoch": 2.8890077560845144, "grad_norm": 1.5852653980255127, "learning_rate": 7.171143348736475e-08, "loss": 0.3465, "step": 21604 }, { "epoch": 2.8891414816795935, "grad_norm": 1.5737032890319824, "learning_rate": 7.153894515000592e-08, "loss": 0.3816, "step": 21605 }, { "epoch": 2.8892752072746726, "grad_norm": 1.5107169151306152, "learning_rate": 7.136666376191703e-08, "loss": 0.3756, "step": 21606 }, { "epoch": 2.889408932869751, "grad_norm": 1.8240649700164795, "learning_rate": 7.119458932668855e-08, "loss": 0.4003, "step": 21607 }, { "epoch": 2.8895426584648303, "grad_norm": 1.5535826683044434, "learning_rate": 7.10227218479076e-08, "loss": 0.3798, "step": 21608 }, { "epoch": 2.889676384059909, "grad_norm": 1.5502957105636597, "learning_rate": 7.085106132915798e-08, "loss": 0.3403, "step": 21609 }, { "epoch": 2.889810109654988, "grad_norm": 1.4516522884368896, "learning_rate": 7.067960777401684e-08, "loss": 0.363, "step": 21610 }, { "epoch": 2.889943835250067, "grad_norm": 1.4155759811401367, "learning_rate": 7.050836118605686e-08, "loss": 0.2935, "step": 21611 }, { "epoch": 2.8900775608451457, "grad_norm": 1.6853954792022705, "learning_rate": 7.033732156884965e-08, "loss": 0.3722, "step": 21612 }, { "epoch": 2.8902112864402247, "grad_norm": 1.5842230319976807, "learning_rate": 7.0166488925959e-08, "loss": 0.3713, "step": 21613 }, { "epoch": 2.8903450120353034, "grad_norm": 1.579323649406433, "learning_rate": 6.999586326094654e-08, "loss": 0.3492, "step": 21614 }, { "epoch": 2.8904787376303824, "grad_norm": 1.4228390455245972, "learning_rate": 6.982544457736717e-08, "loss": 0.3619, "step": 21615 }, { "epoch": 2.8906124632254615, "grad_norm": 1.6732838153839111, "learning_rate": 6.965523287877473e-08, "loss": 0.3465, "step": 21616 }, { "epoch": 2.89074618882054, "grad_norm": 1.5317559242248535, "learning_rate": 6.94852281687175e-08, "loss": 0.3797, "step": 21617 }, { "epoch": 2.890879914415619, "grad_norm": 1.5683553218841553, "learning_rate": 6.931543045073708e-08, "loss": 0.3631, "step": 21618 }, { "epoch": 2.891013640010698, "grad_norm": 1.3311394453048706, "learning_rate": 6.914583972837508e-08, "loss": 0.3351, "step": 21619 }, { "epoch": 2.891147365605777, "grad_norm": 1.5298680067062378, "learning_rate": 6.897645600516311e-08, "loss": 0.3557, "step": 21620 }, { "epoch": 2.891281091200856, "grad_norm": 1.7033016681671143, "learning_rate": 6.880727928463615e-08, "loss": 0.3739, "step": 21621 }, { "epoch": 2.8914148167959346, "grad_norm": 1.5434212684631348, "learning_rate": 6.863830957031803e-08, "loss": 0.3935, "step": 21622 }, { "epoch": 2.8915485423910137, "grad_norm": 1.5689938068389893, "learning_rate": 6.846954686572927e-08, "loss": 0.3284, "step": 21623 }, { "epoch": 2.8916822679860923, "grad_norm": 1.5319631099700928, "learning_rate": 6.830099117439149e-08, "loss": 0.3352, "step": 21624 }, { "epoch": 2.8918159935811714, "grad_norm": 1.6633013486862183, "learning_rate": 6.813264249981522e-08, "loss": 0.387, "step": 21625 }, { "epoch": 2.8919497191762504, "grad_norm": 1.5380678176879883, "learning_rate": 6.796450084550988e-08, "loss": 0.3664, "step": 21626 }, { "epoch": 2.8920834447713295, "grad_norm": 1.6302703619003296, "learning_rate": 6.779656621498154e-08, "loss": 0.3389, "step": 21627 }, { "epoch": 2.892217170366408, "grad_norm": 1.5005167722702026, "learning_rate": 6.762883861172853e-08, "loss": 0.3668, "step": 21628 }, { "epoch": 2.8923508959614868, "grad_norm": 1.6770083904266357, "learning_rate": 6.746131803924915e-08, "loss": 0.359, "step": 21629 }, { "epoch": 2.892484621556566, "grad_norm": 1.4912923574447632, "learning_rate": 6.729400450103285e-08, "loss": 0.3617, "step": 21630 }, { "epoch": 2.892618347151645, "grad_norm": 1.7437427043914795, "learning_rate": 6.712689800057015e-08, "loss": 0.3822, "step": 21631 }, { "epoch": 2.892752072746724, "grad_norm": 1.3525652885437012, "learning_rate": 6.695999854134161e-08, "loss": 0.3111, "step": 21632 }, { "epoch": 2.8928857983418026, "grad_norm": 1.6839276552200317, "learning_rate": 6.679330612682666e-08, "loss": 0.3803, "step": 21633 }, { "epoch": 2.8930195239368817, "grad_norm": 1.6583653688430786, "learning_rate": 6.662682076050031e-08, "loss": 0.3494, "step": 21634 }, { "epoch": 2.8931532495319603, "grad_norm": 1.8219316005706787, "learning_rate": 6.646054244583311e-08, "loss": 0.3549, "step": 21635 }, { "epoch": 2.8932869751270394, "grad_norm": 1.554471492767334, "learning_rate": 6.629447118629006e-08, "loss": 0.3544, "step": 21636 }, { "epoch": 2.8934207007221184, "grad_norm": 1.5477598905563354, "learning_rate": 6.612860698533397e-08, "loss": 0.3173, "step": 21637 }, { "epoch": 2.893554426317197, "grad_norm": 1.5822207927703857, "learning_rate": 6.596294984642093e-08, "loss": 0.3532, "step": 21638 }, { "epoch": 2.893688151912276, "grad_norm": 1.6077431440353394, "learning_rate": 6.579749977300488e-08, "loss": 0.3674, "step": 21639 }, { "epoch": 2.8938218775073548, "grad_norm": 1.3613229990005493, "learning_rate": 6.563225676853302e-08, "loss": 0.3132, "step": 21640 }, { "epoch": 2.893955603102434, "grad_norm": 1.56528639793396, "learning_rate": 6.546722083645151e-08, "loss": 0.3784, "step": 21641 }, { "epoch": 2.894089328697513, "grad_norm": 1.5418583154678345, "learning_rate": 6.530239198019872e-08, "loss": 0.3497, "step": 21642 }, { "epoch": 2.8942230542925915, "grad_norm": 1.5107418298721313, "learning_rate": 6.513777020321188e-08, "loss": 0.3464, "step": 21643 }, { "epoch": 2.8943567798876706, "grad_norm": 1.664481282234192, "learning_rate": 6.497335550892048e-08, "loss": 0.3538, "step": 21644 }, { "epoch": 2.894490505482749, "grad_norm": 1.6998982429504395, "learning_rate": 6.480914790075399e-08, "loss": 0.3757, "step": 21645 }, { "epoch": 2.8946242310778283, "grad_norm": 1.543299674987793, "learning_rate": 6.464514738213301e-08, "loss": 0.3694, "step": 21646 }, { "epoch": 2.8947579566729074, "grad_norm": 1.497301697731018, "learning_rate": 6.448135395647703e-08, "loss": 0.3017, "step": 21647 }, { "epoch": 2.894891682267986, "grad_norm": 1.543073296546936, "learning_rate": 6.43177676272e-08, "loss": 0.3939, "step": 21648 }, { "epoch": 2.895025407863065, "grad_norm": 1.47458815574646, "learning_rate": 6.415438839771137e-08, "loss": 0.3197, "step": 21649 }, { "epoch": 2.8951591334581437, "grad_norm": 1.4781126976013184, "learning_rate": 6.399121627141736e-08, "loss": 0.3114, "step": 21650 }, { "epoch": 2.8952928590532228, "grad_norm": 1.7995100021362305, "learning_rate": 6.382825125171854e-08, "loss": 0.367, "step": 21651 }, { "epoch": 2.895426584648302, "grad_norm": 1.4060759544372559, "learning_rate": 6.366549334201222e-08, "loss": 0.3206, "step": 21652 }, { "epoch": 2.8955603102433805, "grad_norm": 1.5838254690170288, "learning_rate": 6.350294254569012e-08, "loss": 0.3656, "step": 21653 }, { "epoch": 2.8956940358384595, "grad_norm": 1.448468804359436, "learning_rate": 6.334059886614063e-08, "loss": 0.3735, "step": 21654 }, { "epoch": 2.895827761433538, "grad_norm": 1.6509809494018555, "learning_rate": 6.317846230674885e-08, "loss": 0.3843, "step": 21655 }, { "epoch": 2.895961487028617, "grad_norm": 1.6306124925613403, "learning_rate": 6.301653287089315e-08, "loss": 0.3635, "step": 21656 }, { "epoch": 2.8960952126236963, "grad_norm": 1.6412004232406616, "learning_rate": 6.285481056194976e-08, "loss": 0.373, "step": 21657 }, { "epoch": 2.896228938218775, "grad_norm": 1.57150137424469, "learning_rate": 6.269329538328817e-08, "loss": 0.3722, "step": 21658 }, { "epoch": 2.896362663813854, "grad_norm": 1.6219292879104614, "learning_rate": 6.253198733827681e-08, "loss": 0.3654, "step": 21659 }, { "epoch": 2.8964963894089326, "grad_norm": 1.3908092975616455, "learning_rate": 6.237088643027633e-08, "loss": 0.2873, "step": 21660 }, { "epoch": 2.8966301150040117, "grad_norm": 1.5875223875045776, "learning_rate": 6.220999266264516e-08, "loss": 0.3334, "step": 21661 }, { "epoch": 2.8967638405990908, "grad_norm": 1.7054367065429688, "learning_rate": 6.204930603873838e-08, "loss": 0.4316, "step": 21662 }, { "epoch": 2.89689756619417, "grad_norm": 1.5833126306533813, "learning_rate": 6.188882656190331e-08, "loss": 0.3808, "step": 21663 }, { "epoch": 2.8970312917892485, "grad_norm": 1.3678572177886963, "learning_rate": 6.172855423548618e-08, "loss": 0.3646, "step": 21664 }, { "epoch": 2.8971650173843275, "grad_norm": 1.7285796403884888, "learning_rate": 6.156848906282764e-08, "loss": 0.4377, "step": 21665 }, { "epoch": 2.897298742979406, "grad_norm": 1.6165828704833984, "learning_rate": 6.140863104726391e-08, "loss": 0.3548, "step": 21666 }, { "epoch": 2.897432468574485, "grad_norm": 1.6657164096832275, "learning_rate": 6.124898019212677e-08, "loss": 0.3501, "step": 21667 }, { "epoch": 2.8975661941695643, "grad_norm": 1.6752368211746216, "learning_rate": 6.108953650074467e-08, "loss": 0.3568, "step": 21668 }, { "epoch": 2.897699919764643, "grad_norm": 1.5614625215530396, "learning_rate": 6.09302999764394e-08, "loss": 0.3671, "step": 21669 }, { "epoch": 2.897833645359722, "grad_norm": 1.6195648908615112, "learning_rate": 6.077127062253274e-08, "loss": 0.3537, "step": 21670 }, { "epoch": 2.8979673709548006, "grad_norm": 1.2677838802337646, "learning_rate": 6.06124484423376e-08, "loss": 0.3348, "step": 21671 }, { "epoch": 2.8981010965498797, "grad_norm": 1.5156317949295044, "learning_rate": 6.045383343916466e-08, "loss": 0.3715, "step": 21672 }, { "epoch": 2.8982348221449588, "grad_norm": 1.4525957107543945, "learning_rate": 6.02954256163213e-08, "loss": 0.3281, "step": 21673 }, { "epoch": 2.8983685477400374, "grad_norm": 1.6193652153015137, "learning_rate": 6.013722497710817e-08, "loss": 0.3517, "step": 21674 }, { "epoch": 2.8985022733351165, "grad_norm": 1.4548860788345337, "learning_rate": 5.997923152482377e-08, "loss": 0.3736, "step": 21675 }, { "epoch": 2.898635998930195, "grad_norm": 1.5066725015640259, "learning_rate": 5.982144526275991e-08, "loss": 0.3401, "step": 21676 }, { "epoch": 2.898769724525274, "grad_norm": 1.5357369184494019, "learning_rate": 5.966386619420617e-08, "loss": 0.3482, "step": 21677 }, { "epoch": 2.898903450120353, "grad_norm": 1.4644618034362793, "learning_rate": 5.9506494322447704e-08, "loss": 0.352, "step": 21678 }, { "epoch": 2.899037175715432, "grad_norm": 1.4940434694290161, "learning_rate": 5.934932965076412e-08, "loss": 0.3548, "step": 21679 }, { "epoch": 2.899170901310511, "grad_norm": 1.6580296754837036, "learning_rate": 5.919237218243168e-08, "loss": 0.3066, "step": 21680 }, { "epoch": 2.8993046269055895, "grad_norm": 1.5230480432510376, "learning_rate": 5.903562192072221e-08, "loss": 0.3557, "step": 21681 }, { "epoch": 2.8994383525006686, "grad_norm": 1.3444669246673584, "learning_rate": 5.887907886890199e-08, "loss": 0.3255, "step": 21682 }, { "epoch": 2.8995720780957477, "grad_norm": 1.5545517206192017, "learning_rate": 5.8722743030236174e-08, "loss": 0.3189, "step": 21683 }, { "epoch": 2.8997058036908263, "grad_norm": 1.583840012550354, "learning_rate": 5.856661440797995e-08, "loss": 0.3773, "step": 21684 }, { "epoch": 2.8998395292859054, "grad_norm": 1.5190311670303345, "learning_rate": 5.841069300539182e-08, "loss": 0.3729, "step": 21685 }, { "epoch": 2.899973254880984, "grad_norm": 1.6228069067001343, "learning_rate": 5.8254978825718065e-08, "loss": 0.3117, "step": 21686 }, { "epoch": 2.900106980476063, "grad_norm": 1.6367323398590088, "learning_rate": 5.80994718722061e-08, "loss": 0.3752, "step": 21687 }, { "epoch": 2.900240706071142, "grad_norm": 1.7038311958312988, "learning_rate": 5.794417214809889e-08, "loss": 0.3723, "step": 21688 }, { "epoch": 2.9003744316662208, "grad_norm": 1.493650197982788, "learning_rate": 5.77890796566305e-08, "loss": 0.3082, "step": 21689 }, { "epoch": 2.9005081572613, "grad_norm": 1.9125847816467285, "learning_rate": 5.763419440103613e-08, "loss": 0.3558, "step": 21690 }, { "epoch": 2.9006418828563785, "grad_norm": 1.5828602313995361, "learning_rate": 5.747951638454208e-08, "loss": 0.348, "step": 21691 }, { "epoch": 2.9007756084514575, "grad_norm": 1.4001460075378418, "learning_rate": 5.7325045610374665e-08, "loss": 0.3671, "step": 21692 }, { "epoch": 2.9009093340465366, "grad_norm": 1.6218438148498535, "learning_rate": 5.7170782081751305e-08, "loss": 0.3728, "step": 21693 }, { "epoch": 2.9010430596416152, "grad_norm": 1.4938585758209229, "learning_rate": 5.701672580188944e-08, "loss": 0.3628, "step": 21694 }, { "epoch": 2.9011767852366943, "grad_norm": 1.7457612752914429, "learning_rate": 5.686287677399982e-08, "loss": 0.3984, "step": 21695 }, { "epoch": 2.901310510831773, "grad_norm": 1.6180546283721924, "learning_rate": 5.670923500128766e-08, "loss": 0.3446, "step": 21696 }, { "epoch": 2.901444236426852, "grad_norm": 1.530826210975647, "learning_rate": 5.655580048695819e-08, "loss": 0.3945, "step": 21697 }, { "epoch": 2.901577962021931, "grad_norm": 1.53468656539917, "learning_rate": 5.6402573234207725e-08, "loss": 0.365, "step": 21698 }, { "epoch": 2.90171168761701, "grad_norm": 1.5353302955627441, "learning_rate": 5.6249553246230384e-08, "loss": 0.3277, "step": 21699 }, { "epoch": 2.9018454132120888, "grad_norm": 1.6673094034194946, "learning_rate": 5.609674052621694e-08, "loss": 0.3571, "step": 21700 }, { "epoch": 2.901979138807168, "grad_norm": 1.7075071334838867, "learning_rate": 5.5944135077350415e-08, "loss": 0.3997, "step": 21701 }, { "epoch": 2.9021128644022465, "grad_norm": 1.6711736917495728, "learning_rate": 5.579173690281381e-08, "loss": 0.3844, "step": 21702 }, { "epoch": 2.9022465899973255, "grad_norm": 1.5767652988433838, "learning_rate": 5.5639546005782365e-08, "loss": 0.3543, "step": 21703 }, { "epoch": 2.9023803155924046, "grad_norm": 1.5851879119873047, "learning_rate": 5.5487562389429095e-08, "loss": 0.3866, "step": 21704 }, { "epoch": 2.9025140411874832, "grad_norm": 1.7123242616653442, "learning_rate": 5.533578605692147e-08, "loss": 0.4059, "step": 21705 }, { "epoch": 2.9026477667825623, "grad_norm": 1.6452350616455078, "learning_rate": 5.518421701142362e-08, "loss": 0.3711, "step": 21706 }, { "epoch": 2.902781492377641, "grad_norm": 1.5667481422424316, "learning_rate": 5.5032855256095254e-08, "loss": 0.3309, "step": 21707 }, { "epoch": 2.90291521797272, "grad_norm": 1.721821904182434, "learning_rate": 5.488170079408939e-08, "loss": 0.3992, "step": 21708 }, { "epoch": 2.903048943567799, "grad_norm": 1.512250304222107, "learning_rate": 5.473075362855906e-08, "loss": 0.3349, "step": 21709 }, { "epoch": 2.9031826691628777, "grad_norm": 1.4494457244873047, "learning_rate": 5.4580013762649544e-08, "loss": 0.3468, "step": 21710 }, { "epoch": 2.9033163947579568, "grad_norm": 1.7065660953521729, "learning_rate": 5.442948119950276e-08, "loss": 0.3778, "step": 21711 }, { "epoch": 2.9034501203530354, "grad_norm": 1.4483524560928345, "learning_rate": 5.427915594225619e-08, "loss": 0.3201, "step": 21712 }, { "epoch": 2.9035838459481145, "grad_norm": 1.622246503829956, "learning_rate": 5.412903799404401e-08, "loss": 0.3366, "step": 21713 }, { "epoch": 2.9037175715431935, "grad_norm": 1.6377480030059814, "learning_rate": 5.397912735799371e-08, "loss": 0.3744, "step": 21714 }, { "epoch": 2.903851297138272, "grad_norm": 1.588748812675476, "learning_rate": 5.382942403723279e-08, "loss": 0.3923, "step": 21715 }, { "epoch": 2.9039850227333512, "grad_norm": 1.4903801679611206, "learning_rate": 5.367992803487876e-08, "loss": 0.3117, "step": 21716 }, { "epoch": 2.90411874832843, "grad_norm": 1.6881256103515625, "learning_rate": 5.353063935405023e-08, "loss": 0.34, "step": 21717 }, { "epoch": 2.904252473923509, "grad_norm": 1.5486427545547485, "learning_rate": 5.338155799785694e-08, "loss": 0.3879, "step": 21718 }, { "epoch": 2.904386199518588, "grad_norm": 1.7677642107009888, "learning_rate": 5.323268396940751e-08, "loss": 0.3849, "step": 21719 }, { "epoch": 2.9045199251136666, "grad_norm": 1.7201359272003174, "learning_rate": 5.308401727180501e-08, "loss": 0.43, "step": 21720 }, { "epoch": 2.9046536507087457, "grad_norm": 1.5433242321014404, "learning_rate": 5.2935557908146976e-08, "loss": 0.3499, "step": 21721 }, { "epoch": 2.9047873763038243, "grad_norm": 1.516464352607727, "learning_rate": 5.27873058815298e-08, "loss": 0.3809, "step": 21722 }, { "epoch": 2.9049211018989034, "grad_norm": 1.6005887985229492, "learning_rate": 5.263926119504326e-08, "loss": 0.3759, "step": 21723 }, { "epoch": 2.9050548274939825, "grad_norm": 1.5587105751037598, "learning_rate": 5.249142385177153e-08, "loss": 0.3705, "step": 21724 }, { "epoch": 2.905188553089061, "grad_norm": 1.633468747138977, "learning_rate": 5.234379385479771e-08, "loss": 0.3469, "step": 21725 }, { "epoch": 2.90532227868414, "grad_norm": 1.5826988220214844, "learning_rate": 5.2196371207199336e-08, "loss": 0.3581, "step": 21726 }, { "epoch": 2.905456004279219, "grad_norm": 1.5138053894042969, "learning_rate": 5.20491559120484e-08, "loss": 0.3446, "step": 21727 }, { "epoch": 2.905589729874298, "grad_norm": 1.489418387413025, "learning_rate": 5.190214797241355e-08, "loss": 0.3225, "step": 21728 }, { "epoch": 2.905723455469377, "grad_norm": 1.4915026426315308, "learning_rate": 5.17553473913579e-08, "loss": 0.3445, "step": 21729 }, { "epoch": 2.905857181064456, "grad_norm": 1.6610913276672363, "learning_rate": 5.1608754171944555e-08, "loss": 0.3531, "step": 21730 }, { "epoch": 2.9059909066595346, "grad_norm": 1.5077239274978638, "learning_rate": 5.1462368317226616e-08, "loss": 0.3406, "step": 21731 }, { "epoch": 2.9061246322546133, "grad_norm": 1.4788247346878052, "learning_rate": 5.131618983025499e-08, "loss": 0.3553, "step": 21732 }, { "epoch": 2.9062583578496923, "grad_norm": 1.492851734161377, "learning_rate": 5.1170218714078346e-08, "loss": 0.3356, "step": 21733 }, { "epoch": 2.9063920834447714, "grad_norm": 1.5526649951934814, "learning_rate": 5.102445497173758e-08, "loss": 0.3466, "step": 21734 }, { "epoch": 2.9065258090398505, "grad_norm": 1.5814646482467651, "learning_rate": 5.0878898606272483e-08, "loss": 0.3513, "step": 21735 }, { "epoch": 2.906659534634929, "grad_norm": 1.4282602071762085, "learning_rate": 5.0733549620717306e-08, "loss": 0.3512, "step": 21736 }, { "epoch": 2.906793260230008, "grad_norm": 1.544682264328003, "learning_rate": 5.058840801809961e-08, "loss": 0.3655, "step": 21737 }, { "epoch": 2.906926985825087, "grad_norm": 1.6191850900650024, "learning_rate": 5.044347380144698e-08, "loss": 0.3771, "step": 21738 }, { "epoch": 2.907060711420166, "grad_norm": 1.308089017868042, "learning_rate": 5.0298746973778124e-08, "loss": 0.3209, "step": 21739 }, { "epoch": 2.907194437015245, "grad_norm": 1.5420504808425903, "learning_rate": 5.015422753811172e-08, "loss": 0.364, "step": 21740 }, { "epoch": 2.9073281626103236, "grad_norm": 1.5922136306762695, "learning_rate": 5.0009915497459815e-08, "loss": 0.3788, "step": 21741 }, { "epoch": 2.9074618882054026, "grad_norm": 1.5506490468978882, "learning_rate": 4.986581085483111e-08, "loss": 0.3389, "step": 21742 }, { "epoch": 2.9075956138004813, "grad_norm": 1.3692717552185059, "learning_rate": 4.972191361322654e-08, "loss": 0.334, "step": 21743 }, { "epoch": 2.9077293393955603, "grad_norm": 1.6222630739212036, "learning_rate": 4.9578223775647026e-08, "loss": 0.3751, "step": 21744 }, { "epoch": 2.9078630649906394, "grad_norm": 1.859039545059204, "learning_rate": 4.943474134508908e-08, "loss": 0.3986, "step": 21745 }, { "epoch": 2.907996790585718, "grad_norm": 1.34371018409729, "learning_rate": 4.929146632454251e-08, "loss": 0.3276, "step": 21746 }, { "epoch": 2.908130516180797, "grad_norm": 1.6226098537445068, "learning_rate": 4.914839871699273e-08, "loss": 0.3308, "step": 21747 }, { "epoch": 2.9082642417758757, "grad_norm": 1.4297494888305664, "learning_rate": 4.900553852542289e-08, "loss": 0.3684, "step": 21748 }, { "epoch": 2.908397967370955, "grad_norm": 1.660927653312683, "learning_rate": 4.8862885752810615e-08, "loss": 0.3428, "step": 21749 }, { "epoch": 2.908531692966034, "grad_norm": 1.4111627340316772, "learning_rate": 4.872044040212909e-08, "loss": 0.3693, "step": 21750 }, { "epoch": 2.9086654185611125, "grad_norm": 1.4912859201431274, "learning_rate": 4.857820247634815e-08, "loss": 0.3686, "step": 21751 }, { "epoch": 2.9087991441561916, "grad_norm": 1.50459623336792, "learning_rate": 4.843617197843209e-08, "loss": 0.3446, "step": 21752 }, { "epoch": 2.90893286975127, "grad_norm": 1.5881121158599854, "learning_rate": 4.8294348911340774e-08, "loss": 0.3806, "step": 21753 }, { "epoch": 2.9090665953463493, "grad_norm": 1.5604761838912964, "learning_rate": 4.815273327803183e-08, "loss": 0.379, "step": 21754 }, { "epoch": 2.9092003209414283, "grad_norm": 1.5102523565292358, "learning_rate": 4.8011325081455115e-08, "loss": 0.3187, "step": 21755 }, { "epoch": 2.909334046536507, "grad_norm": 1.5953079462051392, "learning_rate": 4.787012432456051e-08, "loss": 0.3798, "step": 21756 }, { "epoch": 2.909467772131586, "grad_norm": 1.6200634241104126, "learning_rate": 4.772913101028898e-08, "loss": 0.3789, "step": 21757 }, { "epoch": 2.9096014977266647, "grad_norm": 1.362520456314087, "learning_rate": 4.7588345141580396e-08, "loss": 0.3196, "step": 21758 }, { "epoch": 2.9097352233217437, "grad_norm": 1.5362629890441895, "learning_rate": 4.744776672137019e-08, "loss": 0.3748, "step": 21759 }, { "epoch": 2.909868948916823, "grad_norm": 1.36909019947052, "learning_rate": 4.730739575258714e-08, "loss": 0.3069, "step": 21760 }, { "epoch": 2.9100026745119014, "grad_norm": 1.681099772453308, "learning_rate": 4.716723223815778e-08, "loss": 0.3955, "step": 21761 }, { "epoch": 2.9101364001069805, "grad_norm": 1.5328775644302368, "learning_rate": 4.702727618100422e-08, "loss": 0.3372, "step": 21762 }, { "epoch": 2.910270125702059, "grad_norm": 1.5747220516204834, "learning_rate": 4.688752758404302e-08, "loss": 0.3618, "step": 21763 }, { "epoch": 2.910403851297138, "grad_norm": 1.6061378717422485, "learning_rate": 4.67479864501863e-08, "loss": 0.3589, "step": 21764 }, { "epoch": 2.9105375768922173, "grad_norm": 1.631950855255127, "learning_rate": 4.660865278234394e-08, "loss": 0.3749, "step": 21765 }, { "epoch": 2.9106713024872963, "grad_norm": 1.5810906887054443, "learning_rate": 4.64695265834203e-08, "loss": 0.3389, "step": 21766 }, { "epoch": 2.910805028082375, "grad_norm": 1.4219386577606201, "learning_rate": 4.633060785631527e-08, "loss": 0.3644, "step": 21767 }, { "epoch": 2.910938753677454, "grad_norm": 1.6579660177230835, "learning_rate": 4.61918966039232e-08, "loss": 0.409, "step": 21768 }, { "epoch": 2.9110724792725327, "grad_norm": 1.4656703472137451, "learning_rate": 4.6053392829136234e-08, "loss": 0.3708, "step": 21769 }, { "epoch": 2.9112062048676117, "grad_norm": 1.7243539094924927, "learning_rate": 4.591509653484205e-08, "loss": 0.3607, "step": 21770 }, { "epoch": 2.911339930462691, "grad_norm": 1.5317944288253784, "learning_rate": 4.5777007723922796e-08, "loss": 0.348, "step": 21771 }, { "epoch": 2.9114736560577694, "grad_norm": 1.5792025327682495, "learning_rate": 4.563912639925616e-08, "loss": 0.3248, "step": 21772 }, { "epoch": 2.9116073816528485, "grad_norm": 1.4057174921035767, "learning_rate": 4.550145256371652e-08, "loss": 0.332, "step": 21773 }, { "epoch": 2.911741107247927, "grad_norm": 1.4943373203277588, "learning_rate": 4.53639862201738e-08, "loss": 0.302, "step": 21774 }, { "epoch": 2.911874832843006, "grad_norm": 1.7238916158676147, "learning_rate": 4.522672737149347e-08, "loss": 0.3906, "step": 21775 }, { "epoch": 2.9120085584380853, "grad_norm": 1.4010136127471924, "learning_rate": 4.508967602053549e-08, "loss": 0.3333, "step": 21776 }, { "epoch": 2.912142284033164, "grad_norm": 1.6481897830963135, "learning_rate": 4.495283217015867e-08, "loss": 0.3444, "step": 21777 }, { "epoch": 2.912276009628243, "grad_norm": 1.5905396938323975, "learning_rate": 4.4816195823212946e-08, "loss": 0.366, "step": 21778 }, { "epoch": 2.9124097352233216, "grad_norm": 1.5707181692123413, "learning_rate": 4.467976698254828e-08, "loss": 0.329, "step": 21779 }, { "epoch": 2.9125434608184007, "grad_norm": 1.4278359413146973, "learning_rate": 4.454354565100793e-08, "loss": 0.3267, "step": 21780 }, { "epoch": 2.9126771864134797, "grad_norm": 1.6504592895507812, "learning_rate": 4.440753183143076e-08, "loss": 0.3952, "step": 21781 }, { "epoch": 2.9128109120085584, "grad_norm": 1.5234427452087402, "learning_rate": 4.4271725526651155e-08, "loss": 0.3775, "step": 21782 }, { "epoch": 2.9129446376036374, "grad_norm": 1.7004739046096802, "learning_rate": 4.4136126739502405e-08, "loss": 0.3924, "step": 21783 }, { "epoch": 2.913078363198716, "grad_norm": 1.44155752658844, "learning_rate": 4.400073547280781e-08, "loss": 0.3072, "step": 21784 }, { "epoch": 2.913212088793795, "grad_norm": 1.5545722246170044, "learning_rate": 4.3865551729391773e-08, "loss": 0.3526, "step": 21785 }, { "epoch": 2.913345814388874, "grad_norm": 1.5478792190551758, "learning_rate": 4.373057551207205e-08, "loss": 0.3294, "step": 21786 }, { "epoch": 2.913479539983953, "grad_norm": 1.4151712656021118, "learning_rate": 4.3595806823660826e-08, "loss": 0.3243, "step": 21787 }, { "epoch": 2.913613265579032, "grad_norm": 1.553268313407898, "learning_rate": 4.346124566696697e-08, "loss": 0.3511, "step": 21788 }, { "epoch": 2.9137469911741105, "grad_norm": 1.396307110786438, "learning_rate": 4.332689204479712e-08, "loss": 0.3432, "step": 21789 }, { "epoch": 2.9138807167691896, "grad_norm": 1.6681559085845947, "learning_rate": 4.319274595995016e-08, "loss": 0.3811, "step": 21790 }, { "epoch": 2.9140144423642687, "grad_norm": 1.734554409980774, "learning_rate": 4.305880741522273e-08, "loss": 0.403, "step": 21791 }, { "epoch": 2.9141481679593473, "grad_norm": 1.7042791843414307, "learning_rate": 4.292507641340704e-08, "loss": 0.384, "step": 21792 }, { "epoch": 2.9142818935544264, "grad_norm": 1.4473533630371094, "learning_rate": 4.279155295728976e-08, "loss": 0.2971, "step": 21793 }, { "epoch": 2.914415619149505, "grad_norm": 1.6682684421539307, "learning_rate": 4.2658237049655325e-08, "loss": 0.3676, "step": 21794 }, { "epoch": 2.914549344744584, "grad_norm": 1.5631887912750244, "learning_rate": 4.252512869328151e-08, "loss": 0.3555, "step": 21795 }, { "epoch": 2.914683070339663, "grad_norm": 1.663098931312561, "learning_rate": 4.2392227890942774e-08, "loss": 0.3597, "step": 21796 }, { "epoch": 2.9148167959347417, "grad_norm": 1.610314965248108, "learning_rate": 4.225953464540911e-08, "loss": 0.3852, "step": 21797 }, { "epoch": 2.914950521529821, "grad_norm": 1.693556547164917, "learning_rate": 4.212704895944719e-08, "loss": 0.3612, "step": 21798 }, { "epoch": 2.9150842471248994, "grad_norm": 1.7170764207839966, "learning_rate": 4.199477083581926e-08, "loss": 0.341, "step": 21799 }, { "epoch": 2.9152179727199785, "grad_norm": 1.6769074201583862, "learning_rate": 4.18627002772809e-08, "loss": 0.3857, "step": 21800 }, { "epoch": 2.9153516983150576, "grad_norm": 1.5538465976715088, "learning_rate": 4.173083728658656e-08, "loss": 0.362, "step": 21801 }, { "epoch": 2.9154854239101367, "grad_norm": 1.665004849433899, "learning_rate": 4.159918186648293e-08, "loss": 0.385, "step": 21802 }, { "epoch": 2.9156191495052153, "grad_norm": 1.7422281503677368, "learning_rate": 4.146773401971449e-08, "loss": 0.3862, "step": 21803 }, { "epoch": 2.9157528751002944, "grad_norm": 1.59601891040802, "learning_rate": 4.133649374902349e-08, "loss": 0.359, "step": 21804 }, { "epoch": 2.915886600695373, "grad_norm": 1.6603186130523682, "learning_rate": 4.120546105714329e-08, "loss": 0.3793, "step": 21805 }, { "epoch": 2.916020326290452, "grad_norm": 1.5019904375076294, "learning_rate": 4.107463594680505e-08, "loss": 0.3538, "step": 21806 }, { "epoch": 2.916154051885531, "grad_norm": 1.3670494556427002, "learning_rate": 4.094401842073659e-08, "loss": 0.3515, "step": 21807 }, { "epoch": 2.9162877774806097, "grad_norm": 1.6355624198913574, "learning_rate": 4.081360848166016e-08, "loss": 0.3786, "step": 21808 }, { "epoch": 2.916421503075689, "grad_norm": 1.8192009925842285, "learning_rate": 4.068340613229471e-08, "loss": 0.424, "step": 21809 }, { "epoch": 2.9165552286707674, "grad_norm": 1.741868495941162, "learning_rate": 4.0553411375353626e-08, "loss": 0.3756, "step": 21810 }, { "epoch": 2.9166889542658465, "grad_norm": 1.7142159938812256, "learning_rate": 4.042362421354695e-08, "loss": 0.4193, "step": 21811 }, { "epoch": 2.9168226798609256, "grad_norm": 1.5600560903549194, "learning_rate": 4.029404464957809e-08, "loss": 0.3339, "step": 21812 }, { "epoch": 2.916956405456004, "grad_norm": 1.6838316917419434, "learning_rate": 4.016467268615154e-08, "loss": 0.3322, "step": 21813 }, { "epoch": 2.9170901310510833, "grad_norm": 1.5570639371871948, "learning_rate": 4.003550832595959e-08, "loss": 0.3483, "step": 21814 }, { "epoch": 2.917223856646162, "grad_norm": 1.4684759378433228, "learning_rate": 3.9906551571697874e-08, "loss": 0.3228, "step": 21815 }, { "epoch": 2.917357582241241, "grad_norm": 1.5414067506790161, "learning_rate": 3.977780242605422e-08, "loss": 0.3606, "step": 21816 }, { "epoch": 2.91749130783632, "grad_norm": 1.5478793382644653, "learning_rate": 3.964926089170984e-08, "loss": 0.3692, "step": 21817 }, { "epoch": 2.9176250334313987, "grad_norm": 1.5231963396072388, "learning_rate": 3.952092697134591e-08, "loss": 0.3436, "step": 21818 }, { "epoch": 2.9177587590264777, "grad_norm": 1.3407139778137207, "learning_rate": 3.939280066763806e-08, "loss": 0.3482, "step": 21819 }, { "epoch": 2.9178924846215564, "grad_norm": 1.5974416732788086, "learning_rate": 3.926488198325529e-08, "loss": 0.374, "step": 21820 }, { "epoch": 2.9180262102166354, "grad_norm": 1.594809651374817, "learning_rate": 3.913717092086433e-08, "loss": 0.382, "step": 21821 }, { "epoch": 2.9181599358117145, "grad_norm": 1.5401958227157593, "learning_rate": 3.900966748312862e-08, "loss": 0.3726, "step": 21822 }, { "epoch": 2.918293661406793, "grad_norm": 1.7426056861877441, "learning_rate": 3.888237167270381e-08, "loss": 0.3748, "step": 21823 }, { "epoch": 2.918427387001872, "grad_norm": 1.3408571481704712, "learning_rate": 3.875528349224444e-08, "loss": 0.3095, "step": 21824 }, { "epoch": 2.918561112596951, "grad_norm": 1.6286340951919556, "learning_rate": 3.862840294439951e-08, "loss": 0.345, "step": 21825 }, { "epoch": 2.91869483819203, "grad_norm": 1.448320984840393, "learning_rate": 3.850173003181357e-08, "loss": 0.3346, "step": 21826 }, { "epoch": 2.918828563787109, "grad_norm": 1.5646271705627441, "learning_rate": 3.8375264757126716e-08, "loss": 0.3378, "step": 21827 }, { "epoch": 2.9189622893821876, "grad_norm": 1.4516915082931519, "learning_rate": 3.824900712297464e-08, "loss": 0.3745, "step": 21828 }, { "epoch": 2.9190960149772667, "grad_norm": 1.3167798519134521, "learning_rate": 3.812295713199077e-08, "loss": 0.2913, "step": 21829 }, { "epoch": 2.9192297405723453, "grad_norm": 1.6384868621826172, "learning_rate": 3.7997114786800794e-08, "loss": 0.3845, "step": 21830 }, { "epoch": 2.9193634661674244, "grad_norm": 2.3137524127960205, "learning_rate": 3.787148009002817e-08, "loss": 0.3926, "step": 21831 }, { "epoch": 2.9194971917625034, "grad_norm": 1.5455163717269897, "learning_rate": 3.774605304429191e-08, "loss": 0.3783, "step": 21832 }, { "epoch": 2.9196309173575825, "grad_norm": 1.4665167331695557, "learning_rate": 3.762083365220659e-08, "loss": 0.3474, "step": 21833 }, { "epoch": 2.919764642952661, "grad_norm": 1.5014750957489014, "learning_rate": 3.7495821916382347e-08, "loss": 0.3758, "step": 21834 }, { "epoch": 2.9198983685477398, "grad_norm": 1.5485979318618774, "learning_rate": 3.7371017839423765e-08, "loss": 0.3422, "step": 21835 }, { "epoch": 2.920032094142819, "grad_norm": 1.6716946363449097, "learning_rate": 3.72464214239332e-08, "loss": 0.364, "step": 21836 }, { "epoch": 2.920165819737898, "grad_norm": 1.6421786546707153, "learning_rate": 3.712203267250858e-08, "loss": 0.3461, "step": 21837 }, { "epoch": 2.920299545332977, "grad_norm": 1.363187551498413, "learning_rate": 3.699785158774116e-08, "loss": 0.2774, "step": 21838 }, { "epoch": 2.9204332709280556, "grad_norm": 1.6017245054244995, "learning_rate": 3.687387817221999e-08, "loss": 0.3649, "step": 21839 }, { "epoch": 2.9205669965231347, "grad_norm": 1.374782681465149, "learning_rate": 3.675011242852966e-08, "loss": 0.3723, "step": 21840 }, { "epoch": 2.9207007221182133, "grad_norm": 1.4454938173294067, "learning_rate": 3.662655435924811e-08, "loss": 0.306, "step": 21841 }, { "epoch": 2.9208344477132924, "grad_norm": 1.5344688892364502, "learning_rate": 3.650320396695328e-08, "loss": 0.3664, "step": 21842 }, { "epoch": 2.9209681733083714, "grad_norm": 1.6449830532073975, "learning_rate": 3.638006125421423e-08, "loss": 0.3641, "step": 21843 }, { "epoch": 2.92110189890345, "grad_norm": 1.5854240655899048, "learning_rate": 3.62571262236e-08, "loss": 0.383, "step": 21844 }, { "epoch": 2.921235624498529, "grad_norm": 1.4984458684921265, "learning_rate": 3.613439887767078e-08, "loss": 0.3301, "step": 21845 }, { "epoch": 2.9213693500936078, "grad_norm": 1.4035987854003906, "learning_rate": 3.6011879218985634e-08, "loss": 0.3138, "step": 21846 }, { "epoch": 2.921503075688687, "grad_norm": 1.6949747800827026, "learning_rate": 3.588956725009807e-08, "loss": 0.3783, "step": 21847 }, { "epoch": 2.921636801283766, "grad_norm": 1.645996332168579, "learning_rate": 3.576746297355826e-08, "loss": 0.3741, "step": 21848 }, { "epoch": 2.9217705268788445, "grad_norm": 1.725846767425537, "learning_rate": 3.564556639191197e-08, "loss": 0.4032, "step": 21849 }, { "epoch": 2.9219042524739236, "grad_norm": 1.4980498552322388, "learning_rate": 3.552387750769715e-08, "loss": 0.366, "step": 21850 }, { "epoch": 2.9220379780690022, "grad_norm": 1.5417406558990479, "learning_rate": 3.540239632345288e-08, "loss": 0.3695, "step": 21851 }, { "epoch": 2.9221717036640813, "grad_norm": 1.58588445186615, "learning_rate": 3.528112284171159e-08, "loss": 0.3562, "step": 21852 }, { "epoch": 2.9223054292591604, "grad_norm": 1.6231944561004639, "learning_rate": 3.516005706499903e-08, "loss": 0.3762, "step": 21853 }, { "epoch": 2.922439154854239, "grad_norm": 1.5480135679244995, "learning_rate": 3.503919899583985e-08, "loss": 0.3206, "step": 21854 }, { "epoch": 2.922572880449318, "grad_norm": 1.5012600421905518, "learning_rate": 3.4918548636753145e-08, "loss": 0.3976, "step": 21855 }, { "epoch": 2.9227066060443967, "grad_norm": 1.6514532566070557, "learning_rate": 3.4798105990253575e-08, "loss": 0.3807, "step": 21856 }, { "epoch": 2.9228403316394758, "grad_norm": 1.7092106342315674, "learning_rate": 3.4677871058852454e-08, "loss": 0.3726, "step": 21857 }, { "epoch": 2.922974057234555, "grad_norm": 1.4381464719772339, "learning_rate": 3.455784384505445e-08, "loss": 0.3301, "step": 21858 }, { "epoch": 2.9231077828296335, "grad_norm": 1.5082228183746338, "learning_rate": 3.443802435136312e-08, "loss": 0.3784, "step": 21859 }, { "epoch": 2.9232415084247125, "grad_norm": 1.5529791116714478, "learning_rate": 3.431841258027535e-08, "loss": 0.3647, "step": 21860 }, { "epoch": 2.923375234019791, "grad_norm": 1.2835859060287476, "learning_rate": 3.41990085342836e-08, "loss": 0.2964, "step": 21861 }, { "epoch": 2.9235089596148702, "grad_norm": 1.6354966163635254, "learning_rate": 3.407981221587586e-08, "loss": 0.3585, "step": 21862 }, { "epoch": 2.9236426852099493, "grad_norm": 1.5972139835357666, "learning_rate": 3.3960823627540163e-08, "loss": 0.3763, "step": 21863 }, { "epoch": 2.923776410805028, "grad_norm": 1.3483550548553467, "learning_rate": 3.3842042771754515e-08, "loss": 0.3704, "step": 21864 }, { "epoch": 2.923910136400107, "grad_norm": 1.5618432760238647, "learning_rate": 3.37234696509936e-08, "loss": 0.341, "step": 21865 }, { "epoch": 2.9240438619951856, "grad_norm": 1.3945764303207397, "learning_rate": 3.3605104267731003e-08, "loss": 0.3178, "step": 21866 }, { "epoch": 2.9241775875902647, "grad_norm": 1.5600690841674805, "learning_rate": 3.348694662443364e-08, "loss": 0.3249, "step": 21867 }, { "epoch": 2.9243113131853438, "grad_norm": 1.5998305082321167, "learning_rate": 3.336899672356397e-08, "loss": 0.3644, "step": 21868 }, { "epoch": 2.924445038780423, "grad_norm": 1.58379065990448, "learning_rate": 3.325125456758005e-08, "loss": 0.3598, "step": 21869 }, { "epoch": 2.9245787643755015, "grad_norm": 1.5419119596481323, "learning_rate": 3.313372015893657e-08, "loss": 0.3767, "step": 21870 }, { "epoch": 2.9247124899705805, "grad_norm": 1.5822155475616455, "learning_rate": 3.301639350008379e-08, "loss": 0.3582, "step": 21871 }, { "epoch": 2.924846215565659, "grad_norm": 1.6291447877883911, "learning_rate": 3.2899274593466425e-08, "loss": 0.349, "step": 21872 }, { "epoch": 2.9249799411607382, "grad_norm": 1.6246795654296875, "learning_rate": 3.278236344152586e-08, "loss": 0.3839, "step": 21873 }, { "epoch": 2.9251136667558173, "grad_norm": 1.7607389688491821, "learning_rate": 3.266566004670013e-08, "loss": 0.3871, "step": 21874 }, { "epoch": 2.925247392350896, "grad_norm": 1.5084717273712158, "learning_rate": 3.254916441142064e-08, "loss": 0.3796, "step": 21875 }, { "epoch": 2.925381117945975, "grad_norm": 1.4809011220932007, "learning_rate": 3.2432876538116554e-08, "loss": 0.3587, "step": 21876 }, { "epoch": 2.9255148435410536, "grad_norm": 1.6005514860153198, "learning_rate": 3.2316796429210373e-08, "loss": 0.3637, "step": 21877 }, { "epoch": 2.9256485691361327, "grad_norm": 1.321458339691162, "learning_rate": 3.22009240871235e-08, "loss": 0.3343, "step": 21878 }, { "epoch": 2.9257822947312118, "grad_norm": 1.619275689125061, "learning_rate": 3.208525951426955e-08, "loss": 0.3534, "step": 21879 }, { "epoch": 2.9259160203262904, "grad_norm": 1.5018470287322998, "learning_rate": 3.196980271305994e-08, "loss": 0.347, "step": 21880 }, { "epoch": 2.9260497459213695, "grad_norm": 1.6215152740478516, "learning_rate": 3.185455368590162e-08, "loss": 0.3951, "step": 21881 }, { "epoch": 2.926183471516448, "grad_norm": 1.5148367881774902, "learning_rate": 3.1739512435197126e-08, "loss": 0.3158, "step": 21882 }, { "epoch": 2.926317197111527, "grad_norm": 1.4534218311309814, "learning_rate": 3.1624678963343426e-08, "loss": 0.3681, "step": 21883 }, { "epoch": 2.9264509227066062, "grad_norm": 1.533595085144043, "learning_rate": 3.151005327273526e-08, "loss": 0.3394, "step": 21884 }, { "epoch": 2.926584648301685, "grad_norm": 1.3647336959838867, "learning_rate": 3.1395635365760736e-08, "loss": 0.3152, "step": 21885 }, { "epoch": 2.926718373896764, "grad_norm": 1.5203237533569336, "learning_rate": 3.12814252448046e-08, "loss": 0.3621, "step": 21886 }, { "epoch": 2.9268520994918426, "grad_norm": 1.5276892185211182, "learning_rate": 3.116742291224939e-08, "loss": 0.3404, "step": 21887 }, { "epoch": 2.9269858250869216, "grad_norm": 1.599893569946289, "learning_rate": 3.105362837046877e-08, "loss": 0.3653, "step": 21888 }, { "epoch": 2.9271195506820007, "grad_norm": 1.5389573574066162, "learning_rate": 3.0940041621836395e-08, "loss": 0.3724, "step": 21889 }, { "epoch": 2.9272532762770793, "grad_norm": 1.4590905904769897, "learning_rate": 3.082666266872036e-08, "loss": 0.3414, "step": 21890 }, { "epoch": 2.9273870018721584, "grad_norm": 1.4509474039077759, "learning_rate": 3.071349151348213e-08, "loss": 0.3562, "step": 21891 }, { "epoch": 2.927520727467237, "grad_norm": 1.7496082782745361, "learning_rate": 3.060052815848202e-08, "loss": 0.4217, "step": 21892 }, { "epoch": 2.927654453062316, "grad_norm": 1.4676233530044556, "learning_rate": 3.0487772606074826e-08, "loss": 0.3363, "step": 21893 }, { "epoch": 2.927788178657395, "grad_norm": 1.5512809753417969, "learning_rate": 3.0375224858609774e-08, "loss": 0.3644, "step": 21894 }, { "epoch": 2.927921904252474, "grad_norm": 1.4243831634521484, "learning_rate": 3.026288491843277e-08, "loss": 0.3627, "step": 21895 }, { "epoch": 2.928055629847553, "grad_norm": 1.7067409753799438, "learning_rate": 3.0150752787886374e-08, "loss": 0.3652, "step": 21896 }, { "epoch": 2.9281893554426315, "grad_norm": 1.3961254358291626, "learning_rate": 3.0038828469306506e-08, "loss": 0.3588, "step": 21897 }, { "epoch": 2.9283230810377106, "grad_norm": 1.5491318702697754, "learning_rate": 2.9927111965029063e-08, "loss": 0.3349, "step": 21898 }, { "epoch": 2.9284568066327896, "grad_norm": 1.5140010118484497, "learning_rate": 2.981560327737887e-08, "loss": 0.3646, "step": 21899 }, { "epoch": 2.9285905322278682, "grad_norm": 1.536584734916687, "learning_rate": 2.970430240868183e-08, "loss": 0.3477, "step": 21900 }, { "epoch": 2.9287242578229473, "grad_norm": 1.4457896947860718, "learning_rate": 2.9593209361259422e-08, "loss": 0.3613, "step": 21901 }, { "epoch": 2.928857983418026, "grad_norm": 1.4457533359527588, "learning_rate": 2.9482324137425355e-08, "loss": 0.3738, "step": 21902 }, { "epoch": 2.928991709013105, "grad_norm": 1.4049588441848755, "learning_rate": 2.937164673949111e-08, "loss": 0.3227, "step": 21903 }, { "epoch": 2.929125434608184, "grad_norm": 1.5382344722747803, "learning_rate": 2.926117716976484e-08, "loss": 0.3783, "step": 21904 }, { "epoch": 2.929259160203263, "grad_norm": 1.505469560623169, "learning_rate": 2.9150915430548045e-08, "loss": 0.3792, "step": 21905 }, { "epoch": 2.929392885798342, "grad_norm": 1.5301388502120972, "learning_rate": 2.9040861524138876e-08, "loss": 0.3831, "step": 21906 }, { "epoch": 2.929526611393421, "grad_norm": 1.4482907056808472, "learning_rate": 2.8931015452831057e-08, "loss": 0.3438, "step": 21907 }, { "epoch": 2.9296603369884995, "grad_norm": 1.555217981338501, "learning_rate": 2.8821377218917202e-08, "loss": 0.3464, "step": 21908 }, { "epoch": 2.9297940625835786, "grad_norm": 1.5804312229156494, "learning_rate": 2.8711946824678817e-08, "loss": 0.3631, "step": 21909 }, { "epoch": 2.9299277881786576, "grad_norm": 1.5982356071472168, "learning_rate": 2.860272427239852e-08, "loss": 0.3482, "step": 21910 }, { "epoch": 2.9300615137737362, "grad_norm": 1.5752573013305664, "learning_rate": 2.8493709564353376e-08, "loss": 0.3749, "step": 21911 }, { "epoch": 2.9301952393688153, "grad_norm": 1.5073572397232056, "learning_rate": 2.838490270281491e-08, "loss": 0.3598, "step": 21912 }, { "epoch": 2.930328964963894, "grad_norm": 1.6357098817825317, "learning_rate": 2.827630369005019e-08, "loss": 0.4056, "step": 21913 }, { "epoch": 2.930462690558973, "grad_norm": 1.4607244729995728, "learning_rate": 2.816791252832518e-08, "loss": 0.3473, "step": 21914 }, { "epoch": 2.930596416154052, "grad_norm": 1.5359269380569458, "learning_rate": 2.805972921989808e-08, "loss": 0.3384, "step": 21915 }, { "epoch": 2.9307301417491307, "grad_norm": 1.4903286695480347, "learning_rate": 2.795175376702375e-08, "loss": 0.341, "step": 21916 }, { "epoch": 2.93086386734421, "grad_norm": 1.6003645658493042, "learning_rate": 2.784398617195372e-08, "loss": 0.3854, "step": 21917 }, { "epoch": 2.9309975929392884, "grad_norm": 1.4195661544799805, "learning_rate": 2.7736426436931753e-08, "loss": 0.356, "step": 21918 }, { "epoch": 2.9311313185343675, "grad_norm": 1.584608554840088, "learning_rate": 2.762907456420272e-08, "loss": 0.3767, "step": 21919 }, { "epoch": 2.9312650441294466, "grad_norm": 1.5825413465499878, "learning_rate": 2.7521930556002608e-08, "loss": 0.329, "step": 21920 }, { "epoch": 2.931398769724525, "grad_norm": 1.6157947778701782, "learning_rate": 2.7414994414565187e-08, "loss": 0.3754, "step": 21921 }, { "epoch": 2.9315324953196042, "grad_norm": 1.5587482452392578, "learning_rate": 2.7308266142119788e-08, "loss": 0.3509, "step": 21922 }, { "epoch": 2.931666220914683, "grad_norm": 1.5920610427856445, "learning_rate": 2.7201745740890186e-08, "loss": 0.3376, "step": 21923 }, { "epoch": 2.931799946509762, "grad_norm": 1.6547080278396606, "learning_rate": 2.7095433213097933e-08, "loss": 0.3683, "step": 21924 }, { "epoch": 2.931933672104841, "grad_norm": 1.6580116748809814, "learning_rate": 2.698932856095793e-08, "loss": 0.3675, "step": 21925 }, { "epoch": 2.9320673976999196, "grad_norm": 1.5862411260604858, "learning_rate": 2.6883431786682844e-08, "loss": 0.3213, "step": 21926 }, { "epoch": 2.9322011232949987, "grad_norm": 1.412866473197937, "learning_rate": 2.6777742892478697e-08, "loss": 0.3614, "step": 21927 }, { "epoch": 2.9323348488900773, "grad_norm": 1.4328703880310059, "learning_rate": 2.6672261880549276e-08, "loss": 0.3482, "step": 21928 }, { "epoch": 2.9324685744851564, "grad_norm": 1.4566869735717773, "learning_rate": 2.6566988753093938e-08, "loss": 0.3482, "step": 21929 }, { "epoch": 2.9326023000802355, "grad_norm": 1.6085487604141235, "learning_rate": 2.6461923512305367e-08, "loss": 0.3353, "step": 21930 }, { "epoch": 2.932736025675314, "grad_norm": 1.6303867101669312, "learning_rate": 2.6357066160374035e-08, "loss": 0.3876, "step": 21931 }, { "epoch": 2.932869751270393, "grad_norm": 1.377264380455017, "learning_rate": 2.625241669948597e-08, "loss": 0.3424, "step": 21932 }, { "epoch": 2.933003476865472, "grad_norm": 1.4616636037826538, "learning_rate": 2.6147975131822767e-08, "loss": 0.3574, "step": 21933 }, { "epoch": 2.933137202460551, "grad_norm": 1.4236183166503906, "learning_rate": 2.6043741459561565e-08, "loss": 0.3516, "step": 21934 }, { "epoch": 2.93327092805563, "grad_norm": 1.5016098022460938, "learning_rate": 2.5939715684873967e-08, "loss": 0.3709, "step": 21935 }, { "epoch": 2.933404653650709, "grad_norm": 1.4501601457595825, "learning_rate": 2.5835897809929345e-08, "loss": 0.3465, "step": 21936 }, { "epoch": 2.9335383792457876, "grad_norm": 1.6074968576431274, "learning_rate": 2.5732287836890413e-08, "loss": 0.4076, "step": 21937 }, { "epoch": 2.9336721048408663, "grad_norm": 1.5663450956344604, "learning_rate": 2.5628885767918777e-08, "loss": 0.3663, "step": 21938 }, { "epoch": 2.9338058304359453, "grad_norm": 1.5713156461715698, "learning_rate": 2.5525691605167156e-08, "loss": 0.3912, "step": 21939 }, { "epoch": 2.9339395560310244, "grad_norm": 1.2413362264633179, "learning_rate": 2.542270535078828e-08, "loss": 0.2813, "step": 21940 }, { "epoch": 2.9340732816261035, "grad_norm": 1.623618721961975, "learning_rate": 2.5319927006929313e-08, "loss": 0.3956, "step": 21941 }, { "epoch": 2.934207007221182, "grad_norm": 1.4769296646118164, "learning_rate": 2.5217356575730767e-08, "loss": 0.3408, "step": 21942 }, { "epoch": 2.934340732816261, "grad_norm": 1.7603259086608887, "learning_rate": 2.5114994059333154e-08, "loss": 0.367, "step": 21943 }, { "epoch": 2.93447445841134, "grad_norm": 1.5921375751495361, "learning_rate": 2.5012839459866987e-08, "loss": 0.3833, "step": 21944 }, { "epoch": 2.934608184006419, "grad_norm": 1.488053321838379, "learning_rate": 2.49108927794639e-08, "loss": 0.3486, "step": 21945 }, { "epoch": 2.934741909601498, "grad_norm": 1.5141801834106445, "learning_rate": 2.480915402024775e-08, "loss": 0.3492, "step": 21946 }, { "epoch": 2.9348756351965766, "grad_norm": 1.4018319845199585, "learning_rate": 2.4707623184339057e-08, "loss": 0.3576, "step": 21947 }, { "epoch": 2.9350093607916556, "grad_norm": 1.4830055236816406, "learning_rate": 2.4606300273856133e-08, "loss": 0.3476, "step": 21948 }, { "epoch": 2.9351430863867343, "grad_norm": 1.474337100982666, "learning_rate": 2.4505185290908396e-08, "loss": 0.3116, "step": 21949 }, { "epoch": 2.9352768119818133, "grad_norm": 1.5377109050750732, "learning_rate": 2.4404278237605272e-08, "loss": 0.3885, "step": 21950 }, { "epoch": 2.9354105375768924, "grad_norm": 1.4599922895431519, "learning_rate": 2.4303579116048416e-08, "loss": 0.3726, "step": 21951 }, { "epoch": 2.935544263171971, "grad_norm": 1.669805884361267, "learning_rate": 2.4203087928338366e-08, "loss": 0.3835, "step": 21952 }, { "epoch": 2.93567798876705, "grad_norm": 1.5633931159973145, "learning_rate": 2.4102804676569004e-08, "loss": 0.3867, "step": 21953 }, { "epoch": 2.9358117143621287, "grad_norm": 1.6189275979995728, "learning_rate": 2.400272936283088e-08, "loss": 0.3499, "step": 21954 }, { "epoch": 2.935945439957208, "grad_norm": 1.5520025491714478, "learning_rate": 2.3902861989208994e-08, "loss": 0.3465, "step": 21955 }, { "epoch": 2.936079165552287, "grad_norm": 1.5975831747055054, "learning_rate": 2.380320255778723e-08, "loss": 0.3758, "step": 21956 }, { "epoch": 2.9362128911473655, "grad_norm": 1.6494909524917603, "learning_rate": 2.37037510706406e-08, "loss": 0.3705, "step": 21957 }, { "epoch": 2.9363466167424446, "grad_norm": 1.594377040863037, "learning_rate": 2.3604507529843e-08, "loss": 0.3799, "step": 21958 }, { "epoch": 2.936480342337523, "grad_norm": 1.3874338865280151, "learning_rate": 2.3505471937463888e-08, "loss": 0.2936, "step": 21959 }, { "epoch": 2.9366140679326023, "grad_norm": 1.533348560333252, "learning_rate": 2.340664429556605e-08, "loss": 0.3491, "step": 21960 }, { "epoch": 2.9367477935276813, "grad_norm": 1.7203236818313599, "learning_rate": 2.3308024606210066e-08, "loss": 0.3753, "step": 21961 }, { "epoch": 2.93688151912276, "grad_norm": 1.729691982269287, "learning_rate": 2.320961287145207e-08, "loss": 0.3965, "step": 21962 }, { "epoch": 2.937015244717839, "grad_norm": 1.7557917833328247, "learning_rate": 2.311140909334264e-08, "loss": 0.3899, "step": 21963 }, { "epoch": 2.9371489703129177, "grad_norm": 1.9127912521362305, "learning_rate": 2.301341327392903e-08, "loss": 0.4084, "step": 21964 }, { "epoch": 2.9372826959079967, "grad_norm": 1.8002527952194214, "learning_rate": 2.291562541525405e-08, "loss": 0.4268, "step": 21965 }, { "epoch": 2.937416421503076, "grad_norm": 1.4639889001846313, "learning_rate": 2.281804551935607e-08, "loss": 0.3225, "step": 21966 }, { "epoch": 2.9375501470981544, "grad_norm": 1.4750975370407104, "learning_rate": 2.2720673588269014e-08, "loss": 0.3559, "step": 21967 }, { "epoch": 2.9376838726932335, "grad_norm": 1.6867755651474, "learning_rate": 2.2623509624021266e-08, "loss": 0.3705, "step": 21968 }, { "epoch": 2.937817598288312, "grad_norm": 1.4584228992462158, "learning_rate": 2.252655362864009e-08, "loss": 0.3853, "step": 21969 }, { "epoch": 2.937951323883391, "grad_norm": 1.5253322124481201, "learning_rate": 2.2429805604144983e-08, "loss": 0.3198, "step": 21970 }, { "epoch": 2.9380850494784703, "grad_norm": 1.6178983449935913, "learning_rate": 2.233326555255322e-08, "loss": 0.365, "step": 21971 }, { "epoch": 2.9382187750735493, "grad_norm": 1.4760771989822388, "learning_rate": 2.223693347587652e-08, "loss": 0.3333, "step": 21972 }, { "epoch": 2.938352500668628, "grad_norm": 1.761681318283081, "learning_rate": 2.2140809376124396e-08, "loss": 0.4297, "step": 21973 }, { "epoch": 2.938486226263707, "grad_norm": 1.506162405014038, "learning_rate": 2.204489325529857e-08, "loss": 0.3344, "step": 21974 }, { "epoch": 2.9386199518587857, "grad_norm": 1.5161032676696777, "learning_rate": 2.1949185115398564e-08, "loss": 0.366, "step": 21975 }, { "epoch": 2.9387536774538647, "grad_norm": 1.6477665901184082, "learning_rate": 2.1853684958420553e-08, "loss": 0.3653, "step": 21976 }, { "epoch": 2.938887403048944, "grad_norm": 1.7221617698669434, "learning_rate": 2.1758392786354056e-08, "loss": 0.4199, "step": 21977 }, { "epoch": 2.9390211286440224, "grad_norm": 1.5409016609191895, "learning_rate": 2.166330860118637e-08, "loss": 0.3788, "step": 21978 }, { "epoch": 2.9391548542391015, "grad_norm": 1.6811773777008057, "learning_rate": 2.1568432404898144e-08, "loss": 0.3806, "step": 21979 }, { "epoch": 2.93928857983418, "grad_norm": 1.5992987155914307, "learning_rate": 2.1473764199467784e-08, "loss": 0.372, "step": 21980 }, { "epoch": 2.939422305429259, "grad_norm": 1.4680454730987549, "learning_rate": 2.137930398686816e-08, "loss": 0.3218, "step": 21981 }, { "epoch": 2.9395560310243383, "grad_norm": 1.5107353925704956, "learning_rate": 2.128505176906881e-08, "loss": 0.3565, "step": 21982 }, { "epoch": 2.939689756619417, "grad_norm": 1.51665198802948, "learning_rate": 2.1191007548033715e-08, "loss": 0.3336, "step": 21983 }, { "epoch": 2.939823482214496, "grad_norm": 1.5794954299926758, "learning_rate": 2.109717132572353e-08, "loss": 0.3916, "step": 21984 }, { "epoch": 2.9399572078095746, "grad_norm": 1.8279074430465698, "learning_rate": 2.1003543104093362e-08, "loss": 0.3547, "step": 21985 }, { "epoch": 2.9400909334046537, "grad_norm": 1.580827236175537, "learning_rate": 2.0910122885097194e-08, "loss": 0.343, "step": 21986 }, { "epoch": 2.9402246589997327, "grad_norm": 1.5968875885009766, "learning_rate": 2.0816910670679035e-08, "loss": 0.3938, "step": 21987 }, { "epoch": 2.9403583845948114, "grad_norm": 1.5079947710037231, "learning_rate": 2.0723906462783995e-08, "loss": 0.3564, "step": 21988 }, { "epoch": 2.9404921101898904, "grad_norm": 1.4081978797912598, "learning_rate": 2.063111026334941e-08, "loss": 0.3564, "step": 21989 }, { "epoch": 2.940625835784969, "grad_norm": 1.8406225442886353, "learning_rate": 2.0538522074310395e-08, "loss": 0.4195, "step": 21990 }, { "epoch": 2.940759561380048, "grad_norm": 1.7217954397201538, "learning_rate": 2.0446141897596528e-08, "loss": 0.3978, "step": 21991 }, { "epoch": 2.940893286975127, "grad_norm": 1.5998014211654663, "learning_rate": 2.0353969735134037e-08, "loss": 0.2995, "step": 21992 }, { "epoch": 2.941027012570206, "grad_norm": 1.3732471466064453, "learning_rate": 2.0262005588842503e-08, "loss": 0.3098, "step": 21993 }, { "epoch": 2.941160738165285, "grad_norm": 1.4810923337936401, "learning_rate": 2.01702494606415e-08, "loss": 0.3422, "step": 21994 }, { "epoch": 2.9412944637603635, "grad_norm": 1.4648445844650269, "learning_rate": 2.007870135244061e-08, "loss": 0.3302, "step": 21995 }, { "epoch": 2.9414281893554426, "grad_norm": 1.5751278400421143, "learning_rate": 1.998736126614942e-08, "loss": 0.4216, "step": 21996 }, { "epoch": 2.9415619149505217, "grad_norm": 1.7293936014175415, "learning_rate": 1.9896229203671956e-08, "loss": 0.3534, "step": 21997 }, { "epoch": 2.9416956405456003, "grad_norm": 1.7084097862243652, "learning_rate": 1.9805305166908926e-08, "loss": 0.3931, "step": 21998 }, { "epoch": 2.9418293661406794, "grad_norm": 1.4307655096054077, "learning_rate": 1.9714589157753262e-08, "loss": 0.391, "step": 21999 }, { "epoch": 2.941963091735758, "grad_norm": 1.5646039247512817, "learning_rate": 1.9624081178096777e-08, "loss": 0.3383, "step": 22000 }, { "epoch": 2.942096817330837, "grad_norm": 1.6654305458068848, "learning_rate": 1.9533781229825742e-08, "loss": 0.3931, "step": 22001 }, { "epoch": 2.942230542925916, "grad_norm": 1.5475987195968628, "learning_rate": 1.94436893148231e-08, "loss": 0.3772, "step": 22002 }, { "epoch": 2.9423642685209948, "grad_norm": 1.4131194353103638, "learning_rate": 1.9353805434967343e-08, "loss": 0.3468, "step": 22003 }, { "epoch": 2.942497994116074, "grad_norm": 1.6756306886672974, "learning_rate": 1.926412959213031e-08, "loss": 0.395, "step": 22004 }, { "epoch": 2.9426317197111524, "grad_norm": 1.6910536289215088, "learning_rate": 1.9174661788181613e-08, "loss": 0.3761, "step": 22005 }, { "epoch": 2.9427654453062315, "grad_norm": 1.5505757331848145, "learning_rate": 1.9085402024987542e-08, "loss": 0.3658, "step": 22006 }, { "epoch": 2.9428991709013106, "grad_norm": 1.7231221199035645, "learning_rate": 1.8996350304406607e-08, "loss": 0.3579, "step": 22007 }, { "epoch": 2.9430328964963897, "grad_norm": 1.4031049013137817, "learning_rate": 1.8907506628296212e-08, "loss": 0.3396, "step": 22008 }, { "epoch": 2.9431666220914683, "grad_norm": 1.6705553531646729, "learning_rate": 1.881887099850821e-08, "loss": 0.3563, "step": 22009 }, { "epoch": 2.9433003476865474, "grad_norm": 1.3488579988479614, "learning_rate": 1.873044341689001e-08, "loss": 0.3396, "step": 22010 }, { "epoch": 2.943434073281626, "grad_norm": 1.3270381689071655, "learning_rate": 1.8642223885283474e-08, "loss": 0.3063, "step": 22011 }, { "epoch": 2.943567798876705, "grad_norm": 1.6327368021011353, "learning_rate": 1.8554212405530457e-08, "loss": 0.3448, "step": 22012 }, { "epoch": 2.943701524471784, "grad_norm": 1.7653745412826538, "learning_rate": 1.8466408979461724e-08, "loss": 0.3815, "step": 22013 }, { "epoch": 2.9438352500668628, "grad_norm": 1.6860569715499878, "learning_rate": 1.837881360891136e-08, "loss": 0.3747, "step": 22014 }, { "epoch": 2.943968975661942, "grad_norm": 1.3813573122024536, "learning_rate": 1.8291426295702353e-08, "loss": 0.3469, "step": 22015 }, { "epoch": 2.9441027012570204, "grad_norm": 1.641084909439087, "learning_rate": 1.8204247041656576e-08, "loss": 0.3488, "step": 22016 }, { "epoch": 2.9442364268520995, "grad_norm": 1.4626497030258179, "learning_rate": 1.8117275848592574e-08, "loss": 0.3248, "step": 22017 }, { "epoch": 2.9443701524471786, "grad_norm": 1.724860668182373, "learning_rate": 1.8030512718322235e-08, "loss": 0.3949, "step": 22018 }, { "epoch": 2.944503878042257, "grad_norm": 1.7224942445755005, "learning_rate": 1.7943957652653e-08, "loss": 0.3814, "step": 22019 }, { "epoch": 2.9446376036373363, "grad_norm": 1.5102858543395996, "learning_rate": 1.7857610653391198e-08, "loss": 0.3621, "step": 22020 }, { "epoch": 2.944771329232415, "grad_norm": 1.4710279703140259, "learning_rate": 1.77714717223354e-08, "loss": 0.351, "step": 22021 }, { "epoch": 2.944905054827494, "grad_norm": 1.602555513381958, "learning_rate": 1.7685540861281937e-08, "loss": 0.3118, "step": 22022 }, { "epoch": 2.945038780422573, "grad_norm": 1.6551854610443115, "learning_rate": 1.7599818072020492e-08, "loss": 0.3841, "step": 22023 }, { "epoch": 2.9451725060176517, "grad_norm": 1.7148544788360596, "learning_rate": 1.7514303356339635e-08, "loss": 0.385, "step": 22024 }, { "epoch": 2.9453062316127308, "grad_norm": 1.586101770401001, "learning_rate": 1.7428996716020163e-08, "loss": 0.3339, "step": 22025 }, { "epoch": 2.9454399572078094, "grad_norm": 1.5500149726867676, "learning_rate": 1.7343898152841765e-08, "loss": 0.3063, "step": 22026 }, { "epoch": 2.9455736828028884, "grad_norm": 1.5457959175109863, "learning_rate": 1.7259007668576355e-08, "loss": 0.3946, "step": 22027 }, { "epoch": 2.9457074083979675, "grad_norm": 1.6086974143981934, "learning_rate": 1.717432526499474e-08, "loss": 0.365, "step": 22028 }, { "epoch": 2.945841133993046, "grad_norm": 1.6505303382873535, "learning_rate": 1.7089850943862175e-08, "loss": 0.3924, "step": 22029 }, { "epoch": 2.945974859588125, "grad_norm": 1.7740213871002197, "learning_rate": 1.700558470693836e-08, "loss": 0.411, "step": 22030 }, { "epoch": 2.946108585183204, "grad_norm": 1.459455966949463, "learning_rate": 1.6921526555981894e-08, "loss": 0.3486, "step": 22031 }, { "epoch": 2.946242310778283, "grad_norm": 1.6366335153579712, "learning_rate": 1.6837676492742482e-08, "loss": 0.3855, "step": 22032 }, { "epoch": 2.946376036373362, "grad_norm": 1.9239898920059204, "learning_rate": 1.6754034518968732e-08, "loss": 0.3774, "step": 22033 }, { "epoch": 2.9465097619684406, "grad_norm": 1.683491587638855, "learning_rate": 1.667060063640369e-08, "loss": 0.3766, "step": 22034 }, { "epoch": 2.9466434875635197, "grad_norm": 1.6517918109893799, "learning_rate": 1.6587374846788186e-08, "loss": 0.3642, "step": 22035 }, { "epoch": 2.9467772131585983, "grad_norm": 1.5421899557113647, "learning_rate": 1.6504357151855277e-08, "loss": 0.3715, "step": 22036 }, { "epoch": 2.9469109387536774, "grad_norm": 1.7061314582824707, "learning_rate": 1.6421547553335805e-08, "loss": 0.3739, "step": 22037 }, { "epoch": 2.9470446643487564, "grad_norm": 1.6021326780319214, "learning_rate": 1.6338946052956163e-08, "loss": 0.3507, "step": 22038 }, { "epoch": 2.9471783899438355, "grad_norm": 1.533370018005371, "learning_rate": 1.6256552652437197e-08, "loss": 0.3103, "step": 22039 }, { "epoch": 2.947312115538914, "grad_norm": 1.5597457885742188, "learning_rate": 1.617436735349753e-08, "loss": 0.3807, "step": 22040 }, { "epoch": 2.9474458411339928, "grad_norm": 1.7895134687423706, "learning_rate": 1.6092390157849137e-08, "loss": 0.4047, "step": 22041 }, { "epoch": 2.947579566729072, "grad_norm": 1.5382256507873535, "learning_rate": 1.601062106720175e-08, "loss": 0.4017, "step": 22042 }, { "epoch": 2.947713292324151, "grad_norm": 1.496140718460083, "learning_rate": 1.5929060083259563e-08, "loss": 0.3505, "step": 22043 }, { "epoch": 2.94784701791923, "grad_norm": 1.5602507591247559, "learning_rate": 1.584770720772233e-08, "loss": 0.37, "step": 22044 }, { "epoch": 2.9479807435143086, "grad_norm": 1.5773041248321533, "learning_rate": 1.576656244228536e-08, "loss": 0.3544, "step": 22045 }, { "epoch": 2.9481144691093877, "grad_norm": 1.4929159879684448, "learning_rate": 1.5685625788640635e-08, "loss": 0.3362, "step": 22046 }, { "epoch": 2.9482481947044663, "grad_norm": 1.413486361503601, "learning_rate": 1.5604897248475692e-08, "loss": 0.3329, "step": 22047 }, { "epoch": 2.9483819202995454, "grad_norm": 1.293760061264038, "learning_rate": 1.552437682347252e-08, "loss": 0.3422, "step": 22048 }, { "epoch": 2.9485156458946244, "grad_norm": 1.4096174240112305, "learning_rate": 1.5444064515308666e-08, "loss": 0.3225, "step": 22049 }, { "epoch": 2.948649371489703, "grad_norm": 1.614760160446167, "learning_rate": 1.5363960325660565e-08, "loss": 0.4195, "step": 22050 }, { "epoch": 2.948783097084782, "grad_norm": 1.2995647192001343, "learning_rate": 1.5284064256195773e-08, "loss": 0.3477, "step": 22051 }, { "epoch": 2.9489168226798608, "grad_norm": 1.5287638902664185, "learning_rate": 1.5204376308579627e-08, "loss": 0.2896, "step": 22052 }, { "epoch": 2.94905054827494, "grad_norm": 1.5452977418899536, "learning_rate": 1.5124896484474127e-08, "loss": 0.3356, "step": 22053 }, { "epoch": 2.949184273870019, "grad_norm": 1.6063107252120972, "learning_rate": 1.504562478553684e-08, "loss": 0.3587, "step": 22054 }, { "epoch": 2.9493179994650975, "grad_norm": 1.4274415969848633, "learning_rate": 1.496656121341755e-08, "loss": 0.3376, "step": 22055 }, { "epoch": 2.9494517250601766, "grad_norm": 1.3482648134231567, "learning_rate": 1.4887705769766058e-08, "loss": 0.328, "step": 22056 }, { "epoch": 2.9495854506552552, "grad_norm": 1.5038604736328125, "learning_rate": 1.4809058456226599e-08, "loss": 0.3438, "step": 22057 }, { "epoch": 2.9497191762503343, "grad_norm": 1.632952332496643, "learning_rate": 1.4730619274435643e-08, "loss": 0.3521, "step": 22058 }, { "epoch": 2.9498529018454134, "grad_norm": 1.4397382736206055, "learning_rate": 1.4652388226031878e-08, "loss": 0.3357, "step": 22059 }, { "epoch": 2.949986627440492, "grad_norm": 1.337106466293335, "learning_rate": 1.4574365312642891e-08, "loss": 0.3172, "step": 22060 }, { "epoch": 2.950120353035571, "grad_norm": 1.4436990022659302, "learning_rate": 1.449655053589627e-08, "loss": 0.3118, "step": 22061 }, { "epoch": 2.9502540786306497, "grad_norm": 1.5434415340423584, "learning_rate": 1.441894389741516e-08, "loss": 0.3331, "step": 22062 }, { "epoch": 2.9503878042257288, "grad_norm": 1.6947423219680786, "learning_rate": 1.4341545398814937e-08, "loss": 0.35, "step": 22063 }, { "epoch": 2.950521529820808, "grad_norm": 1.38620924949646, "learning_rate": 1.4264355041709865e-08, "loss": 0.349, "step": 22064 }, { "epoch": 2.9506552554158865, "grad_norm": 1.5586435794830322, "learning_rate": 1.4187372827709766e-08, "loss": 0.3428, "step": 22065 }, { "epoch": 2.9507889810109655, "grad_norm": 1.6843657493591309, "learning_rate": 1.4110598758417804e-08, "loss": 0.3541, "step": 22066 }, { "epoch": 2.950922706606044, "grad_norm": 1.6519551277160645, "learning_rate": 1.403403283543603e-08, "loss": 0.3846, "step": 22067 }, { "epoch": 2.9510564322011232, "grad_norm": 1.3385058641433716, "learning_rate": 1.3957675060357611e-08, "loss": 0.2927, "step": 22068 }, { "epoch": 2.9511901577962023, "grad_norm": 1.5511231422424316, "learning_rate": 1.3881525434776833e-08, "loss": 0.3789, "step": 22069 }, { "epoch": 2.951323883391281, "grad_norm": 1.46129310131073, "learning_rate": 1.38055839602802e-08, "loss": 0.3717, "step": 22070 }, { "epoch": 2.95145760898636, "grad_norm": 1.7316912412643433, "learning_rate": 1.3729850638450892e-08, "loss": 0.3217, "step": 22071 }, { "epoch": 2.9515913345814386, "grad_norm": 1.7960542440414429, "learning_rate": 1.3654325470865426e-08, "loss": 0.4326, "step": 22072 }, { "epoch": 2.9517250601765177, "grad_norm": 1.5307230949401855, "learning_rate": 1.3579008459100317e-08, "loss": 0.3237, "step": 22073 }, { "epoch": 2.9518587857715968, "grad_norm": 1.6344870328903198, "learning_rate": 1.3503899604725424e-08, "loss": 0.3519, "step": 22074 }, { "epoch": 2.951992511366676, "grad_norm": 1.4651750326156616, "learning_rate": 1.3428998909305046e-08, "loss": 0.3532, "step": 22075 }, { "epoch": 2.9521262369617545, "grad_norm": 1.6745234727859497, "learning_rate": 1.3354306374401271e-08, "loss": 0.3495, "step": 22076 }, { "epoch": 2.9522599625568335, "grad_norm": 1.6372802257537842, "learning_rate": 1.327982200157063e-08, "loss": 0.3824, "step": 22077 }, { "epoch": 2.952393688151912, "grad_norm": 1.6900583505630493, "learning_rate": 1.3205545792366326e-08, "loss": 0.3762, "step": 22078 }, { "epoch": 2.9525274137469912, "grad_norm": 1.8080791234970093, "learning_rate": 1.3131477748336008e-08, "loss": 0.3944, "step": 22079 }, { "epoch": 2.9526611393420703, "grad_norm": 1.7625123262405396, "learning_rate": 1.3057617871022888e-08, "loss": 0.3712, "step": 22080 }, { "epoch": 2.952794864937149, "grad_norm": 1.6536566019058228, "learning_rate": 1.2983966161967954e-08, "loss": 0.3982, "step": 22081 }, { "epoch": 2.952928590532228, "grad_norm": 1.677746057510376, "learning_rate": 1.2910522622705534e-08, "loss": 0.384, "step": 22082 }, { "epoch": 2.9530623161273066, "grad_norm": 1.5278812646865845, "learning_rate": 1.2837287254766629e-08, "loss": 0.33, "step": 22083 }, { "epoch": 2.9531960417223857, "grad_norm": 1.6542762517929077, "learning_rate": 1.2764260059677792e-08, "loss": 0.3517, "step": 22084 }, { "epoch": 2.9533297673174648, "grad_norm": 1.583396077156067, "learning_rate": 1.2691441038961139e-08, "loss": 0.3352, "step": 22085 }, { "epoch": 2.9534634929125434, "grad_norm": 1.304334282875061, "learning_rate": 1.2618830194135456e-08, "loss": 0.3269, "step": 22086 }, { "epoch": 2.9535972185076225, "grad_norm": 1.615761160850525, "learning_rate": 1.2546427526711757e-08, "loss": 0.3398, "step": 22087 }, { "epoch": 2.953730944102701, "grad_norm": 1.5632275342941284, "learning_rate": 1.2474233038202167e-08, "loss": 0.3351, "step": 22088 }, { "epoch": 2.95386466969778, "grad_norm": 1.5585885047912598, "learning_rate": 1.2402246730109924e-08, "loss": 0.3072, "step": 22089 }, { "epoch": 2.9539983952928592, "grad_norm": 1.5410196781158447, "learning_rate": 1.2330468603934942e-08, "loss": 0.3833, "step": 22090 }, { "epoch": 2.954132120887938, "grad_norm": 1.6962624788284302, "learning_rate": 1.2258898661174911e-08, "loss": 0.367, "step": 22091 }, { "epoch": 2.954265846483017, "grad_norm": 1.795861840248108, "learning_rate": 1.2187536903320863e-08, "loss": 0.4261, "step": 22092 }, { "epoch": 2.9543995720780956, "grad_norm": 1.4791733026504517, "learning_rate": 1.2116383331860493e-08, "loss": 0.3839, "step": 22093 }, { "epoch": 2.9545332976731746, "grad_norm": 1.6617205142974854, "learning_rate": 1.2045437948275952e-08, "loss": 0.3715, "step": 22094 }, { "epoch": 2.9546670232682537, "grad_norm": 1.457046389579773, "learning_rate": 1.1974700754047164e-08, "loss": 0.3199, "step": 22095 }, { "epoch": 2.9548007488633323, "grad_norm": 1.3638380765914917, "learning_rate": 1.1904171750648508e-08, "loss": 0.3236, "step": 22096 }, { "epoch": 2.9549344744584114, "grad_norm": 1.3656083345413208, "learning_rate": 1.1833850939549918e-08, "loss": 0.3425, "step": 22097 }, { "epoch": 2.95506820005349, "grad_norm": 1.5414594411849976, "learning_rate": 1.1763738322216888e-08, "loss": 0.3903, "step": 22098 }, { "epoch": 2.955201925648569, "grad_norm": 1.62140953540802, "learning_rate": 1.1693833900110474e-08, "loss": 0.4093, "step": 22099 }, { "epoch": 2.955335651243648, "grad_norm": 1.5548747777938843, "learning_rate": 1.1624137674689507e-08, "loss": 0.3586, "step": 22100 }, { "epoch": 2.955469376838727, "grad_norm": 1.5562431812286377, "learning_rate": 1.1554649647403937e-08, "loss": 0.3388, "step": 22101 }, { "epoch": 2.955603102433806, "grad_norm": 1.5606025457382202, "learning_rate": 1.1485369819705939e-08, "loss": 0.3556, "step": 22102 }, { "epoch": 2.9557368280288845, "grad_norm": 1.5031514167785645, "learning_rate": 1.1416298193035469e-08, "loss": 0.3484, "step": 22103 }, { "epoch": 2.9558705536239636, "grad_norm": 1.3654319047927856, "learning_rate": 1.1347434768834708e-08, "loss": 0.2968, "step": 22104 }, { "epoch": 2.9560042792190426, "grad_norm": 1.4673477411270142, "learning_rate": 1.1278779548539176e-08, "loss": 0.3486, "step": 22105 }, { "epoch": 2.9561380048141217, "grad_norm": 1.5142109394073486, "learning_rate": 1.1210332533578839e-08, "loss": 0.3211, "step": 22106 }, { "epoch": 2.9562717304092003, "grad_norm": 1.4431949853897095, "learning_rate": 1.1142093725381441e-08, "loss": 0.3484, "step": 22107 }, { "epoch": 2.956405456004279, "grad_norm": 1.5324714183807373, "learning_rate": 1.1074063125368073e-08, "loss": 0.3435, "step": 22108 }, { "epoch": 2.956539181599358, "grad_norm": 1.5592246055603027, "learning_rate": 1.1006240734957596e-08, "loss": 0.3849, "step": 22109 }, { "epoch": 2.956672907194437, "grad_norm": 1.628833532333374, "learning_rate": 1.0938626555564436e-08, "loss": 0.3498, "step": 22110 }, { "epoch": 2.956806632789516, "grad_norm": 1.435444712638855, "learning_rate": 1.0871220588596353e-08, "loss": 0.3324, "step": 22111 }, { "epoch": 2.956940358384595, "grad_norm": 1.6318798065185547, "learning_rate": 1.0804022835458895e-08, "loss": 0.3725, "step": 22112 }, { "epoch": 2.957074083979674, "grad_norm": 1.7556695938110352, "learning_rate": 1.0737033297553156e-08, "loss": 0.3721, "step": 22113 }, { "epoch": 2.9572078095747525, "grad_norm": 1.6174085140228271, "learning_rate": 1.0670251976275803e-08, "loss": 0.3652, "step": 22114 }, { "epoch": 2.9573415351698316, "grad_norm": 1.4284385442733765, "learning_rate": 1.0603678873017941e-08, "loss": 0.3125, "step": 22115 }, { "epoch": 2.9574752607649106, "grad_norm": 1.5501993894577026, "learning_rate": 1.0537313989167353e-08, "loss": 0.3115, "step": 22116 }, { "epoch": 2.9576089863599893, "grad_norm": 1.473180890083313, "learning_rate": 1.0471157326107372e-08, "loss": 0.3332, "step": 22117 }, { "epoch": 2.9577427119550683, "grad_norm": 1.7096822261810303, "learning_rate": 1.040520888521801e-08, "loss": 0.3866, "step": 22118 }, { "epoch": 2.957876437550147, "grad_norm": 1.6601336002349854, "learning_rate": 1.0339468667872609e-08, "loss": 0.3677, "step": 22119 }, { "epoch": 2.958010163145226, "grad_norm": 1.614563226699829, "learning_rate": 1.0273936675441187e-08, "loss": 0.2962, "step": 22120 }, { "epoch": 2.958143888740305, "grad_norm": 1.8810795545578003, "learning_rate": 1.0208612909291537e-08, "loss": 0.4137, "step": 22121 }, { "epoch": 2.9582776143353837, "grad_norm": 1.8962736129760742, "learning_rate": 1.0143497370783683e-08, "loss": 0.4056, "step": 22122 }, { "epoch": 2.958411339930463, "grad_norm": 1.3922019004821777, "learning_rate": 1.0078590061275428e-08, "loss": 0.3012, "step": 22123 }, { "epoch": 2.9585450655255414, "grad_norm": 1.5836387872695923, "learning_rate": 1.0013890982120133e-08, "loss": 0.3382, "step": 22124 }, { "epoch": 2.9586787911206205, "grad_norm": 1.6255797147750854, "learning_rate": 9.94940013466561e-09, "loss": 0.3437, "step": 22125 }, { "epoch": 2.9588125167156996, "grad_norm": 1.48419988155365, "learning_rate": 9.885117520256338e-09, "loss": 0.3488, "step": 22126 }, { "epoch": 2.958946242310778, "grad_norm": 1.801751732826233, "learning_rate": 9.821043140232356e-09, "loss": 0.3686, "step": 22127 }, { "epoch": 2.9590799679058573, "grad_norm": 1.5909297466278076, "learning_rate": 9.757176995928153e-09, "loss": 0.365, "step": 22128 }, { "epoch": 2.959213693500936, "grad_norm": 1.6510050296783447, "learning_rate": 9.693519088677106e-09, "loss": 0.3866, "step": 22129 }, { "epoch": 2.959347419096015, "grad_norm": 1.7215285301208496, "learning_rate": 9.630069419804821e-09, "loss": 0.4232, "step": 22130 }, { "epoch": 2.959481144691094, "grad_norm": 1.5931317806243896, "learning_rate": 9.566827990633576e-09, "loss": 0.35, "step": 22131 }, { "epoch": 2.9596148702861726, "grad_norm": 1.6030317544937134, "learning_rate": 9.503794802482314e-09, "loss": 0.3215, "step": 22132 }, { "epoch": 2.9597485958812517, "grad_norm": 1.6255704164505005, "learning_rate": 9.440969856664428e-09, "loss": 0.4006, "step": 22133 }, { "epoch": 2.9598823214763303, "grad_norm": 1.7018259763717651, "learning_rate": 9.378353154489983e-09, "loss": 0.4039, "step": 22134 }, { "epoch": 2.9600160470714094, "grad_norm": 1.665073037147522, "learning_rate": 9.31594469726349e-09, "loss": 0.3974, "step": 22135 }, { "epoch": 2.9601497726664885, "grad_norm": 1.6080793142318726, "learning_rate": 9.253744486286132e-09, "loss": 0.365, "step": 22136 }, { "epoch": 2.960283498261567, "grad_norm": 1.475387454032898, "learning_rate": 9.191752522854647e-09, "loss": 0.382, "step": 22137 }, { "epoch": 2.960417223856646, "grad_norm": 1.666695475578308, "learning_rate": 9.129968808260225e-09, "loss": 0.3795, "step": 22138 }, { "epoch": 2.960550949451725, "grad_norm": 1.4904379844665527, "learning_rate": 9.068393343791837e-09, "loss": 0.3547, "step": 22139 }, { "epoch": 2.960684675046804, "grad_norm": 1.4746145009994507, "learning_rate": 9.007026130732899e-09, "loss": 0.3726, "step": 22140 }, { "epoch": 2.960818400641883, "grad_norm": 1.5225833654403687, "learning_rate": 8.945867170361278e-09, "loss": 0.3349, "step": 22141 }, { "epoch": 2.960952126236962, "grad_norm": 1.6785800457000732, "learning_rate": 8.88491646395262e-09, "loss": 0.4105, "step": 22142 }, { "epoch": 2.9610858518320406, "grad_norm": 1.7005842924118042, "learning_rate": 8.82417401277813e-09, "loss": 0.3548, "step": 22143 }, { "epoch": 2.9612195774271193, "grad_norm": 1.490071177482605, "learning_rate": 8.763639818103464e-09, "loss": 0.3651, "step": 22144 }, { "epoch": 2.9613533030221983, "grad_norm": 1.5956265926361084, "learning_rate": 8.703313881188724e-09, "loss": 0.3388, "step": 22145 }, { "epoch": 2.9614870286172774, "grad_norm": 1.63701331615448, "learning_rate": 8.643196203294013e-09, "loss": 0.3992, "step": 22146 }, { "epoch": 2.9616207542123565, "grad_norm": 1.64596426486969, "learning_rate": 8.583286785670552e-09, "loss": 0.3758, "step": 22147 }, { "epoch": 2.961754479807435, "grad_norm": 1.496146559715271, "learning_rate": 8.523585629568454e-09, "loss": 0.3438, "step": 22148 }, { "epoch": 2.961888205402514, "grad_norm": 1.3700833320617676, "learning_rate": 8.464092736231166e-09, "loss": 0.2825, "step": 22149 }, { "epoch": 2.962021930997593, "grad_norm": 1.6656126976013184, "learning_rate": 8.40480810689881e-09, "loss": 0.388, "step": 22150 }, { "epoch": 2.962155656592672, "grad_norm": 1.540081262588501, "learning_rate": 8.345731742807061e-09, "loss": 0.3316, "step": 22151 }, { "epoch": 2.962289382187751, "grad_norm": 1.5660656690597534, "learning_rate": 8.28686364518827e-09, "loss": 0.3421, "step": 22152 }, { "epoch": 2.9624231077828296, "grad_norm": 1.6048998832702637, "learning_rate": 8.228203815268121e-09, "loss": 0.359, "step": 22153 }, { "epoch": 2.9625568333779086, "grad_norm": 1.4242249727249146, "learning_rate": 8.169752254270081e-09, "loss": 0.3481, "step": 22154 }, { "epoch": 2.9626905589729873, "grad_norm": 1.5164504051208496, "learning_rate": 8.111508963412062e-09, "loss": 0.3372, "step": 22155 }, { "epoch": 2.9628242845680663, "grad_norm": 1.6279053688049316, "learning_rate": 8.053473943908651e-09, "loss": 0.381, "step": 22156 }, { "epoch": 2.9629580101631454, "grad_norm": 1.7106561660766602, "learning_rate": 7.99564719696999e-09, "loss": 0.3717, "step": 22157 }, { "epoch": 2.963091735758224, "grad_norm": 1.5342135429382324, "learning_rate": 7.938028723800672e-09, "loss": 0.3622, "step": 22158 }, { "epoch": 2.963225461353303, "grad_norm": 1.5336884260177612, "learning_rate": 7.880618525600847e-09, "loss": 0.3913, "step": 22159 }, { "epoch": 2.9633591869483817, "grad_norm": 1.5649466514587402, "learning_rate": 7.823416603568446e-09, "loss": 0.3775, "step": 22160 }, { "epoch": 2.963492912543461, "grad_norm": 1.5983009338378906, "learning_rate": 7.766422958895848e-09, "loss": 0.401, "step": 22161 }, { "epoch": 2.96362663813854, "grad_norm": 1.425809383392334, "learning_rate": 7.70963759277099e-09, "loss": 0.3492, "step": 22162 }, { "epoch": 2.9637603637336185, "grad_norm": 1.3870645761489868, "learning_rate": 7.653060506376264e-09, "loss": 0.3323, "step": 22163 }, { "epoch": 2.9638940893286976, "grad_norm": 1.6561626195907593, "learning_rate": 7.596691700891834e-09, "loss": 0.3955, "step": 22164 }, { "epoch": 2.964027814923776, "grad_norm": 1.6288796663284302, "learning_rate": 7.540531177493427e-09, "loss": 0.3479, "step": 22165 }, { "epoch": 2.9641615405188553, "grad_norm": 1.6949232816696167, "learning_rate": 7.484578937350107e-09, "loss": 0.4097, "step": 22166 }, { "epoch": 2.9642952661139343, "grad_norm": 1.4974019527435303, "learning_rate": 7.428834981629829e-09, "loss": 0.385, "step": 22167 }, { "epoch": 2.964428991709013, "grad_norm": 1.5792781114578247, "learning_rate": 7.373299311492777e-09, "loss": 0.3498, "step": 22168 }, { "epoch": 2.964562717304092, "grad_norm": 1.703270673751831, "learning_rate": 7.3179719280980225e-09, "loss": 0.3692, "step": 22169 }, { "epoch": 2.9646964428991707, "grad_norm": 1.5327261686325073, "learning_rate": 7.2628528325979774e-09, "loss": 0.3399, "step": 22170 }, { "epoch": 2.9648301684942497, "grad_norm": 1.2844221591949463, "learning_rate": 7.2079420261417235e-09, "loss": 0.3041, "step": 22171 }, { "epoch": 2.964963894089329, "grad_norm": 1.5606865882873535, "learning_rate": 7.153239509873899e-09, "loss": 0.3498, "step": 22172 }, { "epoch": 2.9650976196844074, "grad_norm": 1.583666443824768, "learning_rate": 7.0987452849347045e-09, "loss": 0.3724, "step": 22173 }, { "epoch": 2.9652313452794865, "grad_norm": 1.5024676322937012, "learning_rate": 7.044459352459898e-09, "loss": 0.3393, "step": 22174 }, { "epoch": 2.965365070874565, "grad_norm": 1.8029674291610718, "learning_rate": 6.990381713580796e-09, "loss": 0.3849, "step": 22175 }, { "epoch": 2.965498796469644, "grad_norm": 1.4698599576950073, "learning_rate": 6.936512369425386e-09, "loss": 0.3142, "step": 22176 }, { "epoch": 2.9656325220647233, "grad_norm": 1.5150262117385864, "learning_rate": 6.882851321116102e-09, "loss": 0.33, "step": 22177 }, { "epoch": 2.9657662476598023, "grad_norm": 1.4610412120819092, "learning_rate": 6.82939856977094e-09, "loss": 0.34, "step": 22178 }, { "epoch": 2.965899973254881, "grad_norm": 1.7131565809249878, "learning_rate": 6.776154116504563e-09, "loss": 0.4011, "step": 22179 }, { "epoch": 2.96603369884996, "grad_norm": 1.3283063173294067, "learning_rate": 6.723117962427195e-09, "loss": 0.3033, "step": 22180 }, { "epoch": 2.9661674244450387, "grad_norm": 1.4546793699264526, "learning_rate": 6.6702901086435065e-09, "loss": 0.3493, "step": 22181 }, { "epoch": 2.9663011500401177, "grad_norm": 1.6371980905532837, "learning_rate": 6.6176705562559506e-09, "loss": 0.3666, "step": 22182 }, { "epoch": 2.966434875635197, "grad_norm": 1.728973627090454, "learning_rate": 6.565259306359206e-09, "loss": 0.3743, "step": 22183 }, { "epoch": 2.9665686012302754, "grad_norm": 1.6249005794525146, "learning_rate": 6.513056360047954e-09, "loss": 0.344, "step": 22184 }, { "epoch": 2.9667023268253545, "grad_norm": 1.4646488428115845, "learning_rate": 6.4610617184091e-09, "loss": 0.3912, "step": 22185 }, { "epoch": 2.966836052420433, "grad_norm": 1.5914289951324463, "learning_rate": 6.4092753825262254e-09, "loss": 0.3745, "step": 22186 }, { "epoch": 2.966969778015512, "grad_norm": 1.461050033569336, "learning_rate": 6.357697353479575e-09, "loss": 0.3368, "step": 22187 }, { "epoch": 2.9671035036105913, "grad_norm": 1.6507654190063477, "learning_rate": 6.306327632342734e-09, "loss": 0.3804, "step": 22188 }, { "epoch": 2.96723722920567, "grad_norm": 1.4742457866668701, "learning_rate": 6.2551662201892905e-09, "loss": 0.3406, "step": 22189 }, { "epoch": 2.967370954800749, "grad_norm": 1.602725863456726, "learning_rate": 6.2042131180828355e-09, "loss": 0.3605, "step": 22190 }, { "epoch": 2.9675046803958276, "grad_norm": 1.4466229677200317, "learning_rate": 6.153468327086964e-09, "loss": 0.3278, "step": 22191 }, { "epoch": 2.9676384059909067, "grad_norm": 1.4026503562927246, "learning_rate": 6.1029318482586085e-09, "loss": 0.3594, "step": 22192 }, { "epoch": 2.9677721315859857, "grad_norm": 1.3978121280670166, "learning_rate": 6.0526036826513705e-09, "loss": 0.3139, "step": 22193 }, { "epoch": 2.9679058571810644, "grad_norm": 1.556868553161621, "learning_rate": 6.0024838313144095e-09, "loss": 0.3635, "step": 22194 }, { "epoch": 2.9680395827761434, "grad_norm": 1.5053924322128296, "learning_rate": 5.952572295293557e-09, "loss": 0.3746, "step": 22195 }, { "epoch": 2.968173308371222, "grad_norm": 1.4812268018722534, "learning_rate": 5.902869075626871e-09, "loss": 0.3242, "step": 22196 }, { "epoch": 2.968307033966301, "grad_norm": 1.815366268157959, "learning_rate": 5.853374173352411e-09, "loss": 0.42, "step": 22197 }, { "epoch": 2.96844075956138, "grad_norm": 1.688860535621643, "learning_rate": 5.8040875895004625e-09, "loss": 0.343, "step": 22198 }, { "epoch": 2.968574485156459, "grad_norm": 1.5160353183746338, "learning_rate": 5.755009325099092e-09, "loss": 0.3789, "step": 22199 }, { "epoch": 2.968708210751538, "grad_norm": 1.4813988208770752, "learning_rate": 5.706139381170816e-09, "loss": 0.3545, "step": 22200 }, { "epoch": 2.9688419363466165, "grad_norm": 1.5177451372146606, "learning_rate": 5.6574777587348195e-09, "loss": 0.3455, "step": 22201 }, { "epoch": 2.9689756619416956, "grad_norm": 1.6913261413574219, "learning_rate": 5.609024458804735e-09, "loss": 0.3811, "step": 22202 }, { "epoch": 2.9691093875367747, "grad_norm": 1.6593009233474731, "learning_rate": 5.560779482391976e-09, "loss": 0.4125, "step": 22203 }, { "epoch": 2.9692431131318533, "grad_norm": 1.6107127666473389, "learning_rate": 5.512742830500184e-09, "loss": 0.3843, "step": 22204 }, { "epoch": 2.9693768387269324, "grad_norm": 1.9694842100143433, "learning_rate": 5.464914504131891e-09, "loss": 0.4428, "step": 22205 }, { "epoch": 2.969510564322011, "grad_norm": 1.5029587745666504, "learning_rate": 5.417294504284076e-09, "loss": 0.3646, "step": 22206 }, { "epoch": 2.96964428991709, "grad_norm": 1.6546305418014526, "learning_rate": 5.36988283194817e-09, "loss": 0.3802, "step": 22207 }, { "epoch": 2.969778015512169, "grad_norm": 1.5915272235870361, "learning_rate": 5.32267948811338e-09, "loss": 0.3406, "step": 22208 }, { "epoch": 2.969911741107248, "grad_norm": 1.7490209341049194, "learning_rate": 5.275684473764475e-09, "loss": 0.3852, "step": 22209 }, { "epoch": 2.970045466702327, "grad_norm": 1.5525455474853516, "learning_rate": 5.228897789878451e-09, "loss": 0.3024, "step": 22210 }, { "epoch": 2.9701791922974055, "grad_norm": 1.4536575078964233, "learning_rate": 5.182319437433414e-09, "loss": 0.3821, "step": 22211 }, { "epoch": 2.9703129178924845, "grad_norm": 1.5042445659637451, "learning_rate": 5.1359494173985895e-09, "loss": 0.3207, "step": 22212 }, { "epoch": 2.9704466434875636, "grad_norm": 1.6227918863296509, "learning_rate": 5.08978773074098e-09, "loss": 0.3861, "step": 22213 }, { "epoch": 2.9705803690826427, "grad_norm": 1.5258833169937134, "learning_rate": 5.043834378422041e-09, "loss": 0.3536, "step": 22214 }, { "epoch": 2.9707140946777213, "grad_norm": 1.7199859619140625, "learning_rate": 4.998089361401004e-09, "loss": 0.3758, "step": 22215 }, { "epoch": 2.9708478202728004, "grad_norm": 1.549997091293335, "learning_rate": 4.95255268062933e-09, "loss": 0.3921, "step": 22216 }, { "epoch": 2.970981545867879, "grad_norm": 1.7030919790267944, "learning_rate": 4.907224337058481e-09, "loss": 0.3958, "step": 22217 }, { "epoch": 2.971115271462958, "grad_norm": 1.543946385383606, "learning_rate": 4.8621043316321444e-09, "loss": 0.3689, "step": 22218 }, { "epoch": 2.971248997058037, "grad_norm": 1.5781748294830322, "learning_rate": 4.817192665291792e-09, "loss": 0.3522, "step": 22219 }, { "epoch": 2.9713827226531158, "grad_norm": 1.6947940587997437, "learning_rate": 4.77248933897112e-09, "loss": 0.3919, "step": 22220 }, { "epoch": 2.971516448248195, "grad_norm": 1.6470860242843628, "learning_rate": 4.727994353604937e-09, "loss": 0.3793, "step": 22221 }, { "epoch": 2.9716501738432735, "grad_norm": 1.4695788621902466, "learning_rate": 4.683707710118057e-09, "loss": 0.3427, "step": 22222 }, { "epoch": 2.9717838994383525, "grad_norm": 1.5075627565383911, "learning_rate": 4.6396294094352975e-09, "loss": 0.3708, "step": 22223 }, { "epoch": 2.9719176250334316, "grad_norm": 1.574312686920166, "learning_rate": 4.595759452474812e-09, "loss": 0.2974, "step": 22224 }, { "epoch": 2.9720513506285102, "grad_norm": 1.4989244937896729, "learning_rate": 4.552097840151426e-09, "loss": 0.3501, "step": 22225 }, { "epoch": 2.9721850762235893, "grad_norm": 1.5745875835418701, "learning_rate": 4.50864457337441e-09, "loss": 0.3826, "step": 22226 }, { "epoch": 2.972318801818668, "grad_norm": 1.5760798454284668, "learning_rate": 4.465399653050817e-09, "loss": 0.4013, "step": 22227 }, { "epoch": 2.972452527413747, "grad_norm": 1.4698699712753296, "learning_rate": 4.422363080081038e-09, "loss": 0.3196, "step": 22228 }, { "epoch": 2.972586253008826, "grad_norm": 1.6363751888275146, "learning_rate": 4.379534855362133e-09, "loss": 0.38, "step": 22229 }, { "epoch": 2.9727199786039047, "grad_norm": 1.5081733465194702, "learning_rate": 4.336914979787832e-09, "loss": 0.3318, "step": 22230 }, { "epoch": 2.9728537041989838, "grad_norm": 1.4902421236038208, "learning_rate": 4.294503454244092e-09, "loss": 0.3416, "step": 22231 }, { "epoch": 2.9729874297940624, "grad_norm": 1.7309287786483765, "learning_rate": 4.252300279617982e-09, "loss": 0.3623, "step": 22232 }, { "epoch": 2.9731211553891415, "grad_norm": 1.535492181777954, "learning_rate": 4.2103054567876885e-09, "loss": 0.3572, "step": 22233 }, { "epoch": 2.9732548809842205, "grad_norm": 1.7999907732009888, "learning_rate": 4.1685189866280676e-09, "loss": 0.4303, "step": 22234 }, { "epoch": 2.973388606579299, "grad_norm": 1.6312158107757568, "learning_rate": 4.126940870010643e-09, "loss": 0.3393, "step": 22235 }, { "epoch": 2.9735223321743782, "grad_norm": 1.322710394859314, "learning_rate": 4.085571107802499e-09, "loss": 0.3861, "step": 22236 }, { "epoch": 2.973656057769457, "grad_norm": 1.6536288261413574, "learning_rate": 4.044409700866281e-09, "loss": 0.3776, "step": 22237 }, { "epoch": 2.973789783364536, "grad_norm": 1.6778944730758667, "learning_rate": 4.003456650057968e-09, "loss": 0.3516, "step": 22238 }, { "epoch": 2.973923508959615, "grad_norm": 1.454187273979187, "learning_rate": 3.962711956233545e-09, "loss": 0.3306, "step": 22239 }, { "epoch": 2.9740572345546936, "grad_norm": 1.6880204677581787, "learning_rate": 3.9221756202401096e-09, "loss": 0.4412, "step": 22240 }, { "epoch": 2.9741909601497727, "grad_norm": 1.5218244791030884, "learning_rate": 3.8818476429247634e-09, "loss": 0.355, "step": 22241 }, { "epoch": 2.9743246857448513, "grad_norm": 1.5361510515213013, "learning_rate": 3.8417280251257235e-09, "loss": 0.3878, "step": 22242 }, { "epoch": 2.9744584113399304, "grad_norm": 1.7068085670471191, "learning_rate": 3.80181676768121e-09, "loss": 0.3819, "step": 22243 }, { "epoch": 2.9745921369350095, "grad_norm": 1.4000853300094604, "learning_rate": 3.762113871422779e-09, "loss": 0.3415, "step": 22244 }, { "epoch": 2.9747258625300885, "grad_norm": 1.7498782873153687, "learning_rate": 3.7226193371775465e-09, "loss": 0.3567, "step": 22245 }, { "epoch": 2.974859588125167, "grad_norm": 1.6771210432052612, "learning_rate": 3.6833331657692985e-09, "loss": 0.4289, "step": 22246 }, { "epoch": 2.974993313720246, "grad_norm": 1.6229379177093506, "learning_rate": 3.6442553580162687e-09, "loss": 0.3768, "step": 22247 }, { "epoch": 2.975127039315325, "grad_norm": 1.4154161214828491, "learning_rate": 3.6053859147333614e-09, "loss": 0.324, "step": 22248 }, { "epoch": 2.975260764910404, "grad_norm": 1.418697714805603, "learning_rate": 3.5667248367310392e-09, "loss": 0.3239, "step": 22249 }, { "epoch": 2.975394490505483, "grad_norm": 1.7120435237884521, "learning_rate": 3.5282721248142137e-09, "loss": 0.381, "step": 22250 }, { "epoch": 2.9755282161005616, "grad_norm": 1.5570100545883179, "learning_rate": 3.4900277797844663e-09, "loss": 0.3555, "step": 22251 }, { "epoch": 2.9756619416956407, "grad_norm": 1.5194956064224243, "learning_rate": 3.4519918024400467e-09, "loss": 0.3557, "step": 22252 }, { "epoch": 2.9757956672907193, "grad_norm": 1.3730318546295166, "learning_rate": 3.4141641935736547e-09, "loss": 0.2945, "step": 22253 }, { "epoch": 2.9759293928857984, "grad_norm": 1.6216363906860352, "learning_rate": 3.376544953972438e-09, "loss": 0.33, "step": 22254 }, { "epoch": 2.9760631184808775, "grad_norm": 1.5357571840286255, "learning_rate": 3.3391340844224353e-09, "loss": 0.379, "step": 22255 }, { "epoch": 2.976196844075956, "grad_norm": 1.5569233894348145, "learning_rate": 3.301931585701912e-09, "loss": 0.3832, "step": 22256 }, { "epoch": 2.976330569671035, "grad_norm": 1.7576504945755005, "learning_rate": 3.264937458585804e-09, "loss": 0.3586, "step": 22257 }, { "epoch": 2.976464295266114, "grad_norm": 1.5837814807891846, "learning_rate": 3.228151703847937e-09, "loss": 0.3837, "step": 22258 }, { "epoch": 2.976598020861193, "grad_norm": 1.5091686248779297, "learning_rate": 3.1915743222521446e-09, "loss": 0.3508, "step": 22259 }, { "epoch": 2.976731746456272, "grad_norm": 1.7607334852218628, "learning_rate": 3.1552053145622596e-09, "loss": 0.4265, "step": 22260 }, { "epoch": 2.9768654720513505, "grad_norm": 1.5745912790298462, "learning_rate": 3.119044681536565e-09, "loss": 0.3447, "step": 22261 }, { "epoch": 2.9769991976464296, "grad_norm": 1.4709348678588867, "learning_rate": 3.083092423928902e-09, "loss": 0.3281, "step": 22262 }, { "epoch": 2.9771329232415082, "grad_norm": 1.81768798828125, "learning_rate": 3.0473485424875603e-09, "loss": 0.4085, "step": 22263 }, { "epoch": 2.9772666488365873, "grad_norm": 1.5832089185714722, "learning_rate": 3.0118130379575005e-09, "loss": 0.327, "step": 22264 }, { "epoch": 2.9774003744316664, "grad_norm": 1.3883129358291626, "learning_rate": 2.9764859110814614e-09, "loss": 0.3331, "step": 22265 }, { "epoch": 2.977534100026745, "grad_norm": 1.6383506059646606, "learning_rate": 2.9413671625933005e-09, "loss": 0.347, "step": 22266 }, { "epoch": 2.977667825621824, "grad_norm": 1.5711519718170166, "learning_rate": 2.906456793226875e-09, "loss": 0.3497, "step": 22267 }, { "epoch": 2.9778015512169027, "grad_norm": 1.8194258213043213, "learning_rate": 2.871754803709381e-09, "loss": 0.4144, "step": 22268 }, { "epoch": 2.977935276811982, "grad_norm": 1.4886397123336792, "learning_rate": 2.8372611947635742e-09, "loss": 0.3422, "step": 22269 }, { "epoch": 2.978069002407061, "grad_norm": 1.5778586864471436, "learning_rate": 2.8029759671088787e-09, "loss": 0.3606, "step": 22270 }, { "epoch": 2.9782027280021395, "grad_norm": 1.503562092781067, "learning_rate": 2.7688991214591677e-09, "loss": 0.3395, "step": 22271 }, { "epoch": 2.9783364535972185, "grad_norm": 1.6470410823822021, "learning_rate": 2.7350306585260943e-09, "loss": 0.3688, "step": 22272 }, { "epoch": 2.978470179192297, "grad_norm": 1.3695263862609863, "learning_rate": 2.7013705790146503e-09, "loss": 0.3381, "step": 22273 }, { "epoch": 2.9786039047873762, "grad_norm": 1.435930609703064, "learning_rate": 2.667918883627607e-09, "loss": 0.3636, "step": 22274 }, { "epoch": 2.9787376303824553, "grad_norm": 1.69691801071167, "learning_rate": 2.634675573061074e-09, "loss": 0.3973, "step": 22275 }, { "epoch": 2.978871355977534, "grad_norm": 1.6868587732315063, "learning_rate": 2.6016406480078305e-09, "loss": 0.387, "step": 22276 }, { "epoch": 2.979005081572613, "grad_norm": 1.686750888824463, "learning_rate": 2.568814109157325e-09, "loss": 0.3815, "step": 22277 }, { "epoch": 2.9791388071676916, "grad_norm": 1.6707063913345337, "learning_rate": 2.5361959571923445e-09, "loss": 0.391, "step": 22278 }, { "epoch": 2.9792725327627707, "grad_norm": 1.5386732816696167, "learning_rate": 2.5037861927945663e-09, "loss": 0.3619, "step": 22279 }, { "epoch": 2.97940625835785, "grad_norm": 1.622697114944458, "learning_rate": 2.4715848166390053e-09, "loss": 0.3524, "step": 22280 }, { "epoch": 2.979539983952929, "grad_norm": 1.5207462310791016, "learning_rate": 2.4395918293973476e-09, "loss": 0.3447, "step": 22281 }, { "epoch": 2.9796737095480075, "grad_norm": 1.4792819023132324, "learning_rate": 2.4078072317346156e-09, "loss": 0.3749, "step": 22282 }, { "epoch": 2.9798074351430865, "grad_norm": 1.7396942377090454, "learning_rate": 2.3762310243147236e-09, "loss": 0.3666, "step": 22283 }, { "epoch": 2.979941160738165, "grad_norm": 1.8536089658737183, "learning_rate": 2.3448632077960332e-09, "loss": 0.4122, "step": 22284 }, { "epoch": 2.9800748863332442, "grad_norm": 1.7033309936523438, "learning_rate": 2.313703782831356e-09, "loss": 0.3986, "step": 22285 }, { "epoch": 2.9802086119283233, "grad_norm": 1.5611618757247925, "learning_rate": 2.282752750071282e-09, "loss": 0.3711, "step": 22286 }, { "epoch": 2.980342337523402, "grad_norm": 1.4842947721481323, "learning_rate": 2.2520101101597412e-09, "loss": 0.3409, "step": 22287 }, { "epoch": 2.980476063118481, "grad_norm": 1.5789676904678345, "learning_rate": 2.2214758637384426e-09, "loss": 0.3733, "step": 22288 }, { "epoch": 2.9806097887135596, "grad_norm": 1.4404479265213013, "learning_rate": 2.1911500114446536e-09, "loss": 0.3389, "step": 22289 }, { "epoch": 2.9807435143086387, "grad_norm": 1.5626124143600464, "learning_rate": 2.1610325539089817e-09, "loss": 0.3718, "step": 22290 }, { "epoch": 2.980877239903718, "grad_norm": 1.531546950340271, "learning_rate": 2.1311234917587022e-09, "loss": 0.3191, "step": 22291 }, { "epoch": 2.9810109654987964, "grad_norm": 1.5170469284057617, "learning_rate": 2.1014228256188705e-09, "loss": 0.3378, "step": 22292 }, { "epoch": 2.9811446910938755, "grad_norm": 1.5909638404846191, "learning_rate": 2.071930556107882e-09, "loss": 0.4078, "step": 22293 }, { "epoch": 2.981278416688954, "grad_norm": 1.564225673675537, "learning_rate": 2.042646683840799e-09, "loss": 0.3739, "step": 22294 }, { "epoch": 2.981412142284033, "grad_norm": 1.7929240465164185, "learning_rate": 2.0135712094282444e-09, "loss": 0.4334, "step": 22295 }, { "epoch": 2.9815458678791122, "grad_norm": 1.5259467363357544, "learning_rate": 1.9847041334752905e-09, "loss": 0.3631, "step": 22296 }, { "epoch": 2.981679593474191, "grad_norm": 1.691005825996399, "learning_rate": 1.956045456583677e-09, "loss": 0.3593, "step": 22297 }, { "epoch": 2.98181331906927, "grad_norm": 1.2505619525909424, "learning_rate": 1.9275951793518154e-09, "loss": 0.3287, "step": 22298 }, { "epoch": 2.9819470446643486, "grad_norm": 1.4262430667877197, "learning_rate": 1.899353302371454e-09, "loss": 0.3444, "step": 22299 }, { "epoch": 2.9820807702594276, "grad_norm": 1.5075546503067017, "learning_rate": 1.8713198262321207e-09, "loss": 0.355, "step": 22300 }, { "epoch": 2.9822144958545067, "grad_norm": 1.512439489364624, "learning_rate": 1.8434947515177936e-09, "loss": 0.341, "step": 22301 }, { "epoch": 2.9823482214495853, "grad_norm": 1.821179747581482, "learning_rate": 1.815878078809119e-09, "loss": 0.3634, "step": 22302 }, { "epoch": 2.9824819470446644, "grad_norm": 1.597002625465393, "learning_rate": 1.7884698086811926e-09, "loss": 0.3633, "step": 22303 }, { "epoch": 2.982615672639743, "grad_norm": 1.823317289352417, "learning_rate": 1.7612699417057788e-09, "loss": 0.3628, "step": 22304 }, { "epoch": 2.982749398234822, "grad_norm": 1.5619186162948608, "learning_rate": 1.7342784784479817e-09, "loss": 0.3855, "step": 22305 }, { "epoch": 2.982883123829901, "grad_norm": 1.6022979021072388, "learning_rate": 1.7074954194729044e-09, "loss": 0.373, "step": 22306 }, { "epoch": 2.98301684942498, "grad_norm": 1.5845285654067993, "learning_rate": 1.680920765337879e-09, "loss": 0.356, "step": 22307 }, { "epoch": 2.983150575020059, "grad_norm": 1.5219188928604126, "learning_rate": 1.6545545165969067e-09, "loss": 0.3554, "step": 22308 }, { "epoch": 2.9832843006151375, "grad_norm": 1.5052961111068726, "learning_rate": 1.6283966737984381e-09, "loss": 0.3284, "step": 22309 }, { "epoch": 2.9834180262102166, "grad_norm": 1.4924882650375366, "learning_rate": 1.6024472374887023e-09, "loss": 0.3361, "step": 22310 }, { "epoch": 2.9835517518052956, "grad_norm": 1.5894914865493774, "learning_rate": 1.5767062082094887e-09, "loss": 0.3621, "step": 22311 }, { "epoch": 2.9836854774003747, "grad_norm": 1.7610788345336914, "learning_rate": 1.5511735864959244e-09, "loss": 0.3819, "step": 22312 }, { "epoch": 2.9838192029954533, "grad_norm": 1.6446856260299683, "learning_rate": 1.5258493728798063e-09, "loss": 0.3564, "step": 22313 }, { "epoch": 2.983952928590532, "grad_norm": 1.5324846506118774, "learning_rate": 1.500733567890711e-09, "loss": 0.3635, "step": 22314 }, { "epoch": 2.984086654185611, "grad_norm": 1.3964418172836304, "learning_rate": 1.4758261720515533e-09, "loss": 0.3274, "step": 22315 }, { "epoch": 2.98422037978069, "grad_norm": 1.382839322090149, "learning_rate": 1.4511271858808075e-09, "loss": 0.3239, "step": 22316 }, { "epoch": 2.984354105375769, "grad_norm": 1.5403969287872314, "learning_rate": 1.4266366098936169e-09, "loss": 0.3297, "step": 22317 }, { "epoch": 2.984487830970848, "grad_norm": 1.5398378372192383, "learning_rate": 1.4023544446006842e-09, "loss": 0.3629, "step": 22318 }, { "epoch": 2.984621556565927, "grad_norm": 1.351636290550232, "learning_rate": 1.3782806905082714e-09, "loss": 0.3481, "step": 22319 }, { "epoch": 2.9847552821610055, "grad_norm": 1.5044087171554565, "learning_rate": 1.3544153481181988e-09, "loss": 0.3611, "step": 22320 }, { "epoch": 2.9848890077560846, "grad_norm": 1.585081696510315, "learning_rate": 1.3307584179267364e-09, "loss": 0.4079, "step": 22321 }, { "epoch": 2.9850227333511636, "grad_norm": 1.4718503952026367, "learning_rate": 1.3073099004290436e-09, "loss": 0.3638, "step": 22322 }, { "epoch": 2.9851564589462423, "grad_norm": 1.5799646377563477, "learning_rate": 1.284069796111398e-09, "loss": 0.3947, "step": 22323 }, { "epoch": 2.9852901845413213, "grad_norm": 1.5137360095977783, "learning_rate": 1.2610381054611875e-09, "loss": 0.3383, "step": 22324 }, { "epoch": 2.9854239101364, "grad_norm": 1.7324237823486328, "learning_rate": 1.2382148289558082e-09, "loss": 0.4001, "step": 22325 }, { "epoch": 2.985557635731479, "grad_norm": 1.7351129055023193, "learning_rate": 1.2155999670726559e-09, "loss": 0.3735, "step": 22326 }, { "epoch": 2.985691361326558, "grad_norm": 1.6593226194381714, "learning_rate": 1.193193520281355e-09, "loss": 0.401, "step": 22327 }, { "epoch": 2.9858250869216367, "grad_norm": 1.5094729661941528, "learning_rate": 1.1709954890515296e-09, "loss": 0.3456, "step": 22328 }, { "epoch": 2.985958812516716, "grad_norm": 1.3878175020217896, "learning_rate": 1.1490058738439225e-09, "loss": 0.3448, "step": 22329 }, { "epoch": 2.9860925381117944, "grad_norm": 1.4065676927566528, "learning_rate": 1.1272246751170558e-09, "loss": 0.3574, "step": 22330 }, { "epoch": 2.9862262637068735, "grad_norm": 1.6719173192977905, "learning_rate": 1.1056518933261207e-09, "loss": 0.3439, "step": 22331 }, { "epoch": 2.9863599893019526, "grad_norm": 1.5907431840896606, "learning_rate": 1.0842875289196475e-09, "loss": 0.3816, "step": 22332 }, { "epoch": 2.986493714897031, "grad_norm": 1.5053400993347168, "learning_rate": 1.0631315823428357e-09, "loss": 0.3383, "step": 22333 }, { "epoch": 2.9866274404921103, "grad_norm": 1.6027690172195435, "learning_rate": 1.0421840540375538e-09, "loss": 0.3701, "step": 22334 }, { "epoch": 2.986761166087189, "grad_norm": 1.4910728931427002, "learning_rate": 1.0214449444390096e-09, "loss": 0.3592, "step": 22335 }, { "epoch": 2.986894891682268, "grad_norm": 1.4757072925567627, "learning_rate": 1.0009142539813e-09, "loss": 0.3452, "step": 22336 }, { "epoch": 2.987028617277347, "grad_norm": 1.527743935585022, "learning_rate": 9.805919830918609e-10, "loss": 0.379, "step": 22337 }, { "epoch": 2.9871623428724257, "grad_norm": 1.845774531364441, "learning_rate": 9.604781321936875e-10, "loss": 0.3512, "step": 22338 }, { "epoch": 2.9872960684675047, "grad_norm": 1.6075879335403442, "learning_rate": 9.405727017064436e-10, "loss": 0.3667, "step": 22339 }, { "epoch": 2.9874297940625834, "grad_norm": 1.3417794704437256, "learning_rate": 9.208756920442429e-10, "loss": 0.3653, "step": 22340 }, { "epoch": 2.9875635196576624, "grad_norm": 1.4942771196365356, "learning_rate": 9.013871036189781e-10, "loss": 0.3413, "step": 22341 }, { "epoch": 2.9876972452527415, "grad_norm": 1.5697009563446045, "learning_rate": 8.821069368358803e-10, "loss": 0.3598, "step": 22342 }, { "epoch": 2.98783097084782, "grad_norm": 1.5428520441055298, "learning_rate": 8.630351920968505e-10, "loss": 0.3471, "step": 22343 }, { "epoch": 2.987964696442899, "grad_norm": 1.6713427305221558, "learning_rate": 8.441718698004587e-10, "loss": 0.355, "step": 22344 }, { "epoch": 2.988098422037978, "grad_norm": 1.6043636798858643, "learning_rate": 8.255169703386134e-10, "loss": 0.3538, "step": 22345 }, { "epoch": 2.988232147633057, "grad_norm": 1.4679776430130005, "learning_rate": 8.070704941010033e-10, "loss": 0.3436, "step": 22346 }, { "epoch": 2.988365873228136, "grad_norm": 1.4655911922454834, "learning_rate": 7.888324414717652e-10, "loss": 0.3147, "step": 22347 }, { "epoch": 2.988499598823215, "grad_norm": 1.6129311323165894, "learning_rate": 7.708028128305956e-10, "loss": 0.3647, "step": 22348 }, { "epoch": 2.9886333244182937, "grad_norm": 1.6744179725646973, "learning_rate": 7.529816085549701e-10, "loss": 0.4013, "step": 22349 }, { "epoch": 2.9887670500133723, "grad_norm": 1.7384462356567383, "learning_rate": 7.353688290145933e-10, "loss": 0.3758, "step": 22350 }, { "epoch": 2.9889007756084514, "grad_norm": 1.5321229696273804, "learning_rate": 7.179644745769488e-10, "loss": 0.3457, "step": 22351 }, { "epoch": 2.9890345012035304, "grad_norm": 1.435744285583496, "learning_rate": 7.007685456050795e-10, "loss": 0.3218, "step": 22352 }, { "epoch": 2.9891682267986095, "grad_norm": 1.4888209104537964, "learning_rate": 6.837810424575875e-10, "loss": 0.3472, "step": 22353 }, { "epoch": 2.989301952393688, "grad_norm": 1.5564380884170532, "learning_rate": 6.670019654875237e-10, "loss": 0.4017, "step": 22354 }, { "epoch": 2.989435677988767, "grad_norm": 1.5097473859786987, "learning_rate": 6.504313150468289e-10, "loss": 0.3641, "step": 22355 }, { "epoch": 2.989569403583846, "grad_norm": 1.585821509361267, "learning_rate": 6.340690914785619e-10, "loss": 0.371, "step": 22356 }, { "epoch": 2.989703129178925, "grad_norm": 1.840911626815796, "learning_rate": 6.179152951257816e-10, "loss": 0.3829, "step": 22357 }, { "epoch": 2.989836854774004, "grad_norm": 1.6738406419754028, "learning_rate": 6.019699263237755e-10, "loss": 0.4168, "step": 22358 }, { "epoch": 2.9899705803690826, "grad_norm": 1.3490147590637207, "learning_rate": 5.862329854045001e-10, "loss": 0.3365, "step": 22359 }, { "epoch": 2.9901043059641617, "grad_norm": 1.7374845743179321, "learning_rate": 5.707044726976918e-10, "loss": 0.3869, "step": 22360 }, { "epoch": 2.9902380315592403, "grad_norm": 1.5178130865097046, "learning_rate": 5.553843885253151e-10, "loss": 0.3658, "step": 22361 }, { "epoch": 2.9903717571543194, "grad_norm": 1.445766806602478, "learning_rate": 5.402727332082248e-10, "loss": 0.3353, "step": 22362 }, { "epoch": 2.9905054827493984, "grad_norm": 1.5314022302627563, "learning_rate": 5.253695070606135e-10, "loss": 0.3559, "step": 22363 }, { "epoch": 2.990639208344477, "grad_norm": 1.6302567720413208, "learning_rate": 5.106747103933441e-10, "loss": 0.3809, "step": 22364 }, { "epoch": 2.990772933939556, "grad_norm": 1.994011402130127, "learning_rate": 4.961883435128378e-10, "loss": 0.41, "step": 22365 }, { "epoch": 2.9909066595346347, "grad_norm": 1.5871838331222534, "learning_rate": 4.819104067199653e-10, "loss": 0.3781, "step": 22366 }, { "epoch": 2.991040385129714, "grad_norm": 1.4956631660461426, "learning_rate": 4.678409003133766e-10, "loss": 0.3095, "step": 22367 }, { "epoch": 2.991174110724793, "grad_norm": 1.3614957332611084, "learning_rate": 4.539798245861704e-10, "loss": 0.3219, "step": 22368 }, { "epoch": 2.9913078363198715, "grad_norm": 1.5551254749298096, "learning_rate": 4.40327179828115e-10, "loss": 0.3516, "step": 22369 }, { "epoch": 2.9914415619149506, "grad_norm": 1.6438894271850586, "learning_rate": 4.2688296632120705e-10, "loss": 0.3597, "step": 22370 }, { "epoch": 2.991575287510029, "grad_norm": 1.7057816982269287, "learning_rate": 4.1364718434855343e-10, "loss": 0.375, "step": 22371 }, { "epoch": 2.9917090131051083, "grad_norm": 1.6758105754852295, "learning_rate": 4.0061983418437923e-10, "loss": 0.4108, "step": 22372 }, { "epoch": 2.9918427387001874, "grad_norm": 1.595291256904602, "learning_rate": 3.8780091610179924e-10, "loss": 0.3387, "step": 22373 }, { "epoch": 2.991976464295266, "grad_norm": 1.4250311851501465, "learning_rate": 3.751904303661569e-10, "loss": 0.3147, "step": 22374 }, { "epoch": 2.992110189890345, "grad_norm": 1.673694133758545, "learning_rate": 3.627883772405749e-10, "loss": 0.3818, "step": 22375 }, { "epoch": 2.9922439154854237, "grad_norm": 1.5873024463653564, "learning_rate": 3.505947569848456e-10, "loss": 0.3501, "step": 22376 }, { "epoch": 2.9923776410805027, "grad_norm": 1.4187895059585571, "learning_rate": 3.386095698509895e-10, "loss": 0.3258, "step": 22377 }, { "epoch": 2.992511366675582, "grad_norm": 1.6273142099380493, "learning_rate": 3.2683281609213745e-10, "loss": 0.3844, "step": 22378 }, { "epoch": 2.9926450922706604, "grad_norm": 1.5777959823608398, "learning_rate": 3.1526449595031815e-10, "loss": 0.3653, "step": 22379 }, { "epoch": 2.9927788178657395, "grad_norm": 1.7691506147384644, "learning_rate": 3.039046096686704e-10, "loss": 0.4081, "step": 22380 }, { "epoch": 2.992912543460818, "grad_norm": 1.6440637111663818, "learning_rate": 2.927531574836717e-10, "loss": 0.361, "step": 22381 }, { "epoch": 2.993046269055897, "grad_norm": 1.5825228691101074, "learning_rate": 2.818101396273587e-10, "loss": 0.3369, "step": 22382 }, { "epoch": 2.9931799946509763, "grad_norm": 1.5457130670547485, "learning_rate": 2.7107555632732705e-10, "loss": 0.3365, "step": 22383 }, { "epoch": 2.9933137202460554, "grad_norm": 1.5401756763458252, "learning_rate": 2.605494078089521e-10, "loss": 0.3651, "step": 22384 }, { "epoch": 2.993447445841134, "grad_norm": 1.5718353986740112, "learning_rate": 2.5023169429094773e-10, "loss": 0.3583, "step": 22385 }, { "epoch": 2.993581171436213, "grad_norm": 1.4894545078277588, "learning_rate": 2.40122415987587e-10, "loss": 0.3271, "step": 22386 }, { "epoch": 2.9937148970312917, "grad_norm": 1.5813277959823608, "learning_rate": 2.3022157310981231e-10, "loss": 0.3286, "step": 22387 }, { "epoch": 2.9938486226263707, "grad_norm": 1.5094188451766968, "learning_rate": 2.205291658641251e-10, "loss": 0.3503, "step": 22388 }, { "epoch": 2.99398234822145, "grad_norm": 1.5635809898376465, "learning_rate": 2.110451944536962e-10, "loss": 0.3733, "step": 22389 }, { "epoch": 2.9941160738165284, "grad_norm": 1.5083894729614258, "learning_rate": 2.0176965907503509e-10, "loss": 0.3504, "step": 22390 }, { "epoch": 2.9942497994116075, "grad_norm": 1.5902777910232544, "learning_rate": 1.927025599213206e-10, "loss": 0.3694, "step": 22391 }, { "epoch": 2.994383525006686, "grad_norm": 1.7590152025222778, "learning_rate": 1.838438971824008e-10, "loss": 0.3835, "step": 22392 }, { "epoch": 2.994517250601765, "grad_norm": 1.711905598640442, "learning_rate": 1.7519367104257279e-10, "loss": 0.3762, "step": 22393 }, { "epoch": 2.9946509761968443, "grad_norm": 1.6400275230407715, "learning_rate": 1.6675188168169266e-10, "loss": 0.3717, "step": 22394 }, { "epoch": 2.994784701791923, "grad_norm": 1.4617305994033813, "learning_rate": 1.5851852927628586e-10, "loss": 0.3662, "step": 22395 }, { "epoch": 2.994918427387002, "grad_norm": 1.565327525138855, "learning_rate": 1.5049361399732675e-10, "loss": 0.4089, "step": 22396 }, { "epoch": 2.9950521529820806, "grad_norm": 1.4674016237258911, "learning_rate": 1.4267713601245904e-10, "loss": 0.3669, "step": 22397 }, { "epoch": 2.9951858785771597, "grad_norm": 1.7457830905914307, "learning_rate": 1.3506909548488545e-10, "loss": 0.4123, "step": 22398 }, { "epoch": 2.9953196041722387, "grad_norm": 1.6267791986465454, "learning_rate": 1.2766949257336792e-10, "loss": 0.3808, "step": 22399 }, { "epoch": 2.9954533297673174, "grad_norm": 1.5260343551635742, "learning_rate": 1.204783274311172e-10, "loss": 0.3646, "step": 22400 }, { "epoch": 2.9955870553623964, "grad_norm": 1.4252662658691406, "learning_rate": 1.1349560020912364e-10, "loss": 0.3145, "step": 22401 }, { "epoch": 2.995720780957475, "grad_norm": 1.4194985628128052, "learning_rate": 1.0672131105282646e-10, "loss": 0.3209, "step": 22402 }, { "epoch": 2.995854506552554, "grad_norm": 1.7668046951293945, "learning_rate": 1.0015546010211375e-10, "loss": 0.3848, "step": 22403 }, { "epoch": 2.995988232147633, "grad_norm": 1.7604402303695679, "learning_rate": 9.379804749465316e-11, "loss": 0.3991, "step": 22404 }, { "epoch": 2.996121957742712, "grad_norm": 1.6049607992172241, "learning_rate": 8.764907336367146e-11, "loss": 0.3435, "step": 22405 }, { "epoch": 2.996255683337791, "grad_norm": 1.3902431726455688, "learning_rate": 8.170853783684429e-11, "loss": 0.3709, "step": 22406 }, { "epoch": 2.9963894089328695, "grad_norm": 1.5619003772735596, "learning_rate": 7.597644103851664e-11, "loss": 0.3549, "step": 22407 }, { "epoch": 2.9965231345279486, "grad_norm": 1.446862816810608, "learning_rate": 7.045278308637215e-11, "loss": 0.3255, "step": 22408 }, { "epoch": 2.9966568601230277, "grad_norm": 1.5430889129638672, "learning_rate": 6.513756409698424e-11, "loss": 0.3454, "step": 22409 }, { "epoch": 2.9967905857181063, "grad_norm": 1.6606395244598389, "learning_rate": 6.003078418137521e-11, "loss": 0.3638, "step": 22410 }, { "epoch": 2.9969243113131854, "grad_norm": 1.5591570138931274, "learning_rate": 5.5132443445016225e-11, "loss": 0.3338, "step": 22411 }, { "epoch": 2.997058036908264, "grad_norm": 1.5504367351531982, "learning_rate": 5.0442541991158056e-11, "loss": 0.3295, "step": 22412 }, { "epoch": 2.997191762503343, "grad_norm": 1.680036187171936, "learning_rate": 4.5961079916390095e-11, "loss": 0.3901, "step": 22413 }, { "epoch": 2.997325488098422, "grad_norm": 1.421204924583435, "learning_rate": 4.16880573150813e-11, "loss": 0.3245, "step": 22414 }, { "epoch": 2.997459213693501, "grad_norm": 1.4074370861053467, "learning_rate": 3.762347427604951e-11, "loss": 0.3419, "step": 22415 }, { "epoch": 2.99759293928858, "grad_norm": 1.824880599975586, "learning_rate": 3.376733088256145e-11, "loss": 0.4021, "step": 22416 }, { "epoch": 2.9977266648836585, "grad_norm": 1.5240685939788818, "learning_rate": 3.0119627217883864e-11, "loss": 0.3459, "step": 22417 }, { "epoch": 2.9978603904787375, "grad_norm": 1.624670386314392, "learning_rate": 2.668036335529145e-11, "loss": 0.3824, "step": 22418 }, { "epoch": 2.9979941160738166, "grad_norm": 1.462699055671692, "learning_rate": 2.3449539368058937e-11, "loss": 0.3168, "step": 22419 }, { "epoch": 2.9981278416688957, "grad_norm": 1.6156619787216187, "learning_rate": 2.042715532279971e-11, "loss": 0.3944, "step": 22420 }, { "epoch": 2.9982615672639743, "grad_norm": 1.4902485609054565, "learning_rate": 1.7613211282796472e-11, "loss": 0.3902, "step": 22421 }, { "epoch": 2.9983952928590534, "grad_norm": 1.4786412715911865, "learning_rate": 1.500770730689105e-11, "loss": 0.3614, "step": 22422 }, { "epoch": 2.998529018454132, "grad_norm": 1.7052795886993408, "learning_rate": 1.2610643449484373e-11, "loss": 0.4001, "step": 22423 }, { "epoch": 2.998662744049211, "grad_norm": 1.5927343368530273, "learning_rate": 1.0422019759426249e-11, "loss": 0.3562, "step": 22424 }, { "epoch": 2.99879646964429, "grad_norm": 1.552304983139038, "learning_rate": 8.441836284456274e-12, "loss": 0.3229, "step": 22425 }, { "epoch": 2.9989301952393688, "grad_norm": 1.882645845413208, "learning_rate": 6.670093063432248e-12, "loss": 0.3161, "step": 22426 }, { "epoch": 2.999063920834448, "grad_norm": 1.9288395643234253, "learning_rate": 5.1067901341017574e-12, "loss": 0.3462, "step": 22427 }, { "epoch": 2.9991976464295265, "grad_norm": 1.5325981378555298, "learning_rate": 3.751927530881716e-12, "loss": 0.3934, "step": 22428 }, { "epoch": 2.9993313720246055, "grad_norm": 1.5895451307296753, "learning_rate": 2.6055052793072522e-12, "loss": 0.3516, "step": 22429 }, { "epoch": 2.9994650976196846, "grad_norm": 1.758899211883545, "learning_rate": 1.667523404913496e-12, "loss": 0.4512, "step": 22430 }, { "epoch": 2.9995988232147632, "grad_norm": 1.6138715744018555, "learning_rate": 9.379819265742385e-13, "loss": 0.3469, "step": 22431 }, { "epoch": 2.9997325488098423, "grad_norm": 1.7010003328323364, "learning_rate": 4.168808598326024e-13, "loss": 0.3893, "step": 22432 }, { "epoch": 2.999866274404921, "grad_norm": 1.7058738470077515, "learning_rate": 1.0422021579081786e-13, "loss": 0.4047, "step": 22433 }, { "epoch": 3.0, "grad_norm": 1.2399357557296753, "learning_rate": 0.0, "loss": 0.2504, "step": 22434 }, { "epoch": 3.0, "step": 22434, "total_flos": 8.585653604906435e+17, "train_loss": 0.687396430635533, "train_runtime": 78944.7497, "train_samples_per_second": 18.185, "train_steps_per_second": 0.284 } ], "logging_steps": 1.0, "max_steps": 22434, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 32860, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 8.585653604906435e+17, "train_batch_size": 4, "trial_name": null, "trial_params": null }