{ "best_metric": null, "best_model_checkpoint": null, "epoch": 3.0, "eval_steps": 500, "global_step": 10686, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.00028074115665356543, "grad_norm": 6.073578357696533, "learning_rate": 9.354536950420954e-09, "loss": 0.8028, "step": 1 }, { "epoch": 0.0005614823133071309, "grad_norm": 5.649418354034424, "learning_rate": 1.870907390084191e-08, "loss": 0.7859, "step": 2 }, { "epoch": 0.0008422234699606962, "grad_norm": 5.829460620880127, "learning_rate": 2.806361085126286e-08, "loss": 0.8459, "step": 3 }, { "epoch": 0.0011229646266142617, "grad_norm": 5.927548408508301, "learning_rate": 3.741814780168382e-08, "loss": 0.8519, "step": 4 }, { "epoch": 0.001403705783267827, "grad_norm": 6.04041862487793, "learning_rate": 4.677268475210477e-08, "loss": 0.8199, "step": 5 }, { "epoch": 0.0016844469399213925, "grad_norm": 5.618837833404541, "learning_rate": 5.612722170252572e-08, "loss": 0.834, "step": 6 }, { "epoch": 0.0019651880965749578, "grad_norm": 5.914753437042236, "learning_rate": 6.548175865294669e-08, "loss": 0.8431, "step": 7 }, { "epoch": 0.0022459292532285235, "grad_norm": 6.155481815338135, "learning_rate": 7.483629560336764e-08, "loss": 0.8905, "step": 8 }, { "epoch": 0.0025266704098820887, "grad_norm": 6.104146957397461, "learning_rate": 8.419083255378861e-08, "loss": 0.8435, "step": 9 }, { "epoch": 0.002807411566535654, "grad_norm": 6.155229091644287, "learning_rate": 9.354536950420954e-08, "loss": 0.8149, "step": 10 }, { "epoch": 0.0030881527231892197, "grad_norm": 5.91041374206543, "learning_rate": 1.0289990645463051e-07, "loss": 0.8558, "step": 11 }, { "epoch": 0.003368893879842785, "grad_norm": 5.734512805938721, "learning_rate": 1.1225444340505145e-07, "loss": 0.853, "step": 12 }, { "epoch": 0.0036496350364963502, "grad_norm": 5.7818145751953125, "learning_rate": 1.216089803554724e-07, "loss": 0.8389, "step": 13 }, { "epoch": 0.0039303761931499155, "grad_norm": 6.0573272705078125, "learning_rate": 1.3096351730589338e-07, "loss": 0.8861, "step": 14 }, { "epoch": 0.004211117349803481, "grad_norm": 6.075229644775391, "learning_rate": 1.4031805425631432e-07, "loss": 0.8324, "step": 15 }, { "epoch": 0.004491858506457047, "grad_norm": 6.1600661277771, "learning_rate": 1.4967259120673527e-07, "loss": 0.8764, "step": 16 }, { "epoch": 0.004772599663110612, "grad_norm": 6.097347736358643, "learning_rate": 1.5902712815715624e-07, "loss": 0.9159, "step": 17 }, { "epoch": 0.0050533408197641775, "grad_norm": 6.373533725738525, "learning_rate": 1.6838166510757722e-07, "loss": 0.9214, "step": 18 }, { "epoch": 0.005334081976417743, "grad_norm": 6.170802116394043, "learning_rate": 1.7773620205799813e-07, "loss": 0.9023, "step": 19 }, { "epoch": 0.005614823133071308, "grad_norm": 5.920821189880371, "learning_rate": 1.8709073900841908e-07, "loss": 0.9209, "step": 20 }, { "epoch": 0.005895564289724873, "grad_norm": 6.032707214355469, "learning_rate": 1.9644527595884005e-07, "loss": 0.871, "step": 21 }, { "epoch": 0.006176305446378439, "grad_norm": 5.691333293914795, "learning_rate": 2.0579981290926103e-07, "loss": 0.8536, "step": 22 }, { "epoch": 0.006457046603032005, "grad_norm": 5.853001594543457, "learning_rate": 2.1515434985968197e-07, "loss": 0.8784, "step": 23 }, { "epoch": 0.00673778775968557, "grad_norm": 5.614751815795898, "learning_rate": 2.245088868101029e-07, "loss": 0.8405, "step": 24 }, { "epoch": 0.007018528916339135, "grad_norm": 5.751898288726807, "learning_rate": 2.3386342376052386e-07, "loss": 0.8712, "step": 25 }, { "epoch": 0.0072992700729927005, "grad_norm": 5.45972204208374, "learning_rate": 2.432179607109448e-07, "loss": 0.8191, "step": 26 }, { "epoch": 0.007580011229646266, "grad_norm": 5.904362201690674, "learning_rate": 2.525724976613658e-07, "loss": 0.8455, "step": 27 }, { "epoch": 0.007860752386299831, "grad_norm": 5.9625749588012695, "learning_rate": 2.6192703461178676e-07, "loss": 0.9043, "step": 28 }, { "epoch": 0.008141493542953397, "grad_norm": 5.246673107147217, "learning_rate": 2.712815715622077e-07, "loss": 0.8219, "step": 29 }, { "epoch": 0.008422234699606962, "grad_norm": 5.433787822723389, "learning_rate": 2.8063610851262865e-07, "loss": 0.8194, "step": 30 }, { "epoch": 0.008702975856260528, "grad_norm": 4.702797889709473, "learning_rate": 2.899906454630496e-07, "loss": 0.7633, "step": 31 }, { "epoch": 0.008983717012914094, "grad_norm": 4.799088001251221, "learning_rate": 2.9934518241347054e-07, "loss": 0.8496, "step": 32 }, { "epoch": 0.009264458169567658, "grad_norm": 4.480788230895996, "learning_rate": 3.0869971936389154e-07, "loss": 0.8413, "step": 33 }, { "epoch": 0.009545199326221224, "grad_norm": 4.927966117858887, "learning_rate": 3.180542563143125e-07, "loss": 0.787, "step": 34 }, { "epoch": 0.009825940482874789, "grad_norm": 4.581888198852539, "learning_rate": 3.2740879326473343e-07, "loss": 0.7689, "step": 35 }, { "epoch": 0.010106681639528355, "grad_norm": 4.776089668273926, "learning_rate": 3.3676333021515443e-07, "loss": 0.8478, "step": 36 }, { "epoch": 0.010387422796181921, "grad_norm": 4.4883832931518555, "learning_rate": 3.461178671655753e-07, "loss": 0.8311, "step": 37 }, { "epoch": 0.010668163952835485, "grad_norm": 4.310268402099609, "learning_rate": 3.5547240411599627e-07, "loss": 0.8243, "step": 38 }, { "epoch": 0.010948905109489052, "grad_norm": 4.470303535461426, "learning_rate": 3.648269410664172e-07, "loss": 0.8333, "step": 39 }, { "epoch": 0.011229646266142616, "grad_norm": 4.391443252563477, "learning_rate": 3.7418147801683816e-07, "loss": 0.8377, "step": 40 }, { "epoch": 0.011510387422796182, "grad_norm": 4.44397497177124, "learning_rate": 3.8353601496725916e-07, "loss": 0.8357, "step": 41 }, { "epoch": 0.011791128579449747, "grad_norm": 3.539785861968994, "learning_rate": 3.928905519176801e-07, "loss": 0.7979, "step": 42 }, { "epoch": 0.012071869736103313, "grad_norm": 2.8846137523651123, "learning_rate": 4.0224508886810105e-07, "loss": 0.6935, "step": 43 }, { "epoch": 0.012352610892756879, "grad_norm": 3.18035626411438, "learning_rate": 4.1159962581852205e-07, "loss": 0.8754, "step": 44 }, { "epoch": 0.012633352049410443, "grad_norm": 2.7209296226501465, "learning_rate": 4.20954162768943e-07, "loss": 0.8195, "step": 45 }, { "epoch": 0.01291409320606401, "grad_norm": 2.650001287460327, "learning_rate": 4.3030869971936394e-07, "loss": 0.749, "step": 46 }, { "epoch": 0.013194834362717574, "grad_norm": 2.4477689266204834, "learning_rate": 4.396632366697849e-07, "loss": 0.681, "step": 47 }, { "epoch": 0.01347557551937114, "grad_norm": 2.5114424228668213, "learning_rate": 4.490177736202058e-07, "loss": 0.7193, "step": 48 }, { "epoch": 0.013756316676024706, "grad_norm": 2.5794506072998047, "learning_rate": 4.583723105706268e-07, "loss": 0.7745, "step": 49 }, { "epoch": 0.01403705783267827, "grad_norm": 2.4139857292175293, "learning_rate": 4.6772684752104773e-07, "loss": 0.8092, "step": 50 }, { "epoch": 0.014317798989331837, "grad_norm": 2.4023053646087646, "learning_rate": 4.770813844714687e-07, "loss": 0.7918, "step": 51 }, { "epoch": 0.014598540145985401, "grad_norm": 2.1910533905029297, "learning_rate": 4.864359214218896e-07, "loss": 0.7414, "step": 52 }, { "epoch": 0.014879281302638967, "grad_norm": 2.130261182785034, "learning_rate": 4.957904583723106e-07, "loss": 0.7386, "step": 53 }, { "epoch": 0.015160022459292532, "grad_norm": 2.0518133640289307, "learning_rate": 5.051449953227316e-07, "loss": 0.745, "step": 54 }, { "epoch": 0.015440763615946098, "grad_norm": 1.911180019378662, "learning_rate": 5.144995322731526e-07, "loss": 0.7744, "step": 55 }, { "epoch": 0.015721504772599662, "grad_norm": 2.1166069507598877, "learning_rate": 5.238540692235735e-07, "loss": 0.8067, "step": 56 }, { "epoch": 0.016002245929253228, "grad_norm": 1.7378008365631104, "learning_rate": 5.332086061739945e-07, "loss": 0.7059, "step": 57 }, { "epoch": 0.016282987085906794, "grad_norm": 1.721903920173645, "learning_rate": 5.425631431244154e-07, "loss": 0.6848, "step": 58 }, { "epoch": 0.01656372824256036, "grad_norm": 1.6297296285629272, "learning_rate": 5.519176800748364e-07, "loss": 0.7134, "step": 59 }, { "epoch": 0.016844469399213923, "grad_norm": 1.826710820198059, "learning_rate": 5.612722170252573e-07, "loss": 0.7292, "step": 60 }, { "epoch": 0.01712521055586749, "grad_norm": 1.675954818725586, "learning_rate": 5.706267539756782e-07, "loss": 0.6998, "step": 61 }, { "epoch": 0.017405951712521055, "grad_norm": 1.860581874847412, "learning_rate": 5.799812909260992e-07, "loss": 0.7113, "step": 62 }, { "epoch": 0.01768669286917462, "grad_norm": 1.838109016418457, "learning_rate": 5.893358278765201e-07, "loss": 0.7165, "step": 63 }, { "epoch": 0.017967434025828188, "grad_norm": 1.7727512121200562, "learning_rate": 5.986903648269411e-07, "loss": 0.6837, "step": 64 }, { "epoch": 0.01824817518248175, "grad_norm": 1.5861293077468872, "learning_rate": 6.080449017773621e-07, "loss": 0.6792, "step": 65 }, { "epoch": 0.018528916339135316, "grad_norm": 1.6274561882019043, "learning_rate": 6.173994387277831e-07, "loss": 0.6929, "step": 66 }, { "epoch": 0.018809657495788883, "grad_norm": 1.4121606349945068, "learning_rate": 6.26753975678204e-07, "loss": 0.6376, "step": 67 }, { "epoch": 0.01909039865244245, "grad_norm": 1.5709538459777832, "learning_rate": 6.36108512628625e-07, "loss": 0.7105, "step": 68 }, { "epoch": 0.019371139809096015, "grad_norm": 1.6315401792526245, "learning_rate": 6.454630495790459e-07, "loss": 0.7196, "step": 69 }, { "epoch": 0.019651880965749578, "grad_norm": 1.3795241117477417, "learning_rate": 6.548175865294669e-07, "loss": 0.7252, "step": 70 }, { "epoch": 0.019932622122403144, "grad_norm": 1.3909331560134888, "learning_rate": 6.641721234798878e-07, "loss": 0.7319, "step": 71 }, { "epoch": 0.02021336327905671, "grad_norm": 1.493241786956787, "learning_rate": 6.735266604303089e-07, "loss": 0.7072, "step": 72 }, { "epoch": 0.020494104435710276, "grad_norm": 1.3212029933929443, "learning_rate": 6.828811973807298e-07, "loss": 0.7089, "step": 73 }, { "epoch": 0.020774845592363842, "grad_norm": 1.329487681388855, "learning_rate": 6.922357343311506e-07, "loss": 0.7599, "step": 74 }, { "epoch": 0.021055586749017405, "grad_norm": 1.239382028579712, "learning_rate": 7.015902712815716e-07, "loss": 0.6636, "step": 75 }, { "epoch": 0.02133632790567097, "grad_norm": 1.2681066989898682, "learning_rate": 7.109448082319925e-07, "loss": 0.7067, "step": 76 }, { "epoch": 0.021617069062324537, "grad_norm": 1.1106700897216797, "learning_rate": 7.202993451824135e-07, "loss": 0.6762, "step": 77 }, { "epoch": 0.021897810218978103, "grad_norm": 1.2456177473068237, "learning_rate": 7.296538821328344e-07, "loss": 0.75, "step": 78 }, { "epoch": 0.022178551375631666, "grad_norm": 1.062915563583374, "learning_rate": 7.390084190832554e-07, "loss": 0.6481, "step": 79 }, { "epoch": 0.022459292532285232, "grad_norm": 1.066072940826416, "learning_rate": 7.483629560336763e-07, "loss": 0.7333, "step": 80 }, { "epoch": 0.022740033688938798, "grad_norm": 0.975945770740509, "learning_rate": 7.577174929840974e-07, "loss": 0.6478, "step": 81 }, { "epoch": 0.023020774845592364, "grad_norm": 1.0468389987945557, "learning_rate": 7.670720299345183e-07, "loss": 0.6577, "step": 82 }, { "epoch": 0.02330151600224593, "grad_norm": 0.9923246502876282, "learning_rate": 7.764265668849393e-07, "loss": 0.6219, "step": 83 }, { "epoch": 0.023582257158899493, "grad_norm": 1.1474268436431885, "learning_rate": 7.857811038353602e-07, "loss": 0.6846, "step": 84 }, { "epoch": 0.02386299831555306, "grad_norm": 0.9405164122581482, "learning_rate": 7.951356407857812e-07, "loss": 0.6289, "step": 85 }, { "epoch": 0.024143739472206625, "grad_norm": 0.9991530179977417, "learning_rate": 8.044901777362021e-07, "loss": 0.6363, "step": 86 }, { "epoch": 0.02442448062886019, "grad_norm": 1.0833004713058472, "learning_rate": 8.138447146866231e-07, "loss": 0.6228, "step": 87 }, { "epoch": 0.024705221785513758, "grad_norm": 0.9896429181098938, "learning_rate": 8.231992516370441e-07, "loss": 0.6961, "step": 88 }, { "epoch": 0.02498596294216732, "grad_norm": 0.9045128226280212, "learning_rate": 8.32553788587465e-07, "loss": 0.6423, "step": 89 }, { "epoch": 0.025266704098820886, "grad_norm": 1.0337687730789185, "learning_rate": 8.41908325537886e-07, "loss": 0.6517, "step": 90 }, { "epoch": 0.025547445255474453, "grad_norm": 1.0262173414230347, "learning_rate": 8.512628624883069e-07, "loss": 0.6149, "step": 91 }, { "epoch": 0.02582818641212802, "grad_norm": 0.8971832990646362, "learning_rate": 8.606173994387279e-07, "loss": 0.6895, "step": 92 }, { "epoch": 0.026108927568781585, "grad_norm": 1.041611671447754, "learning_rate": 8.699719363891488e-07, "loss": 0.6284, "step": 93 }, { "epoch": 0.026389668725435148, "grad_norm": 0.9133061170578003, "learning_rate": 8.793264733395698e-07, "loss": 0.6183, "step": 94 }, { "epoch": 0.026670409882088714, "grad_norm": 0.8937678337097168, "learning_rate": 8.886810102899906e-07, "loss": 0.6211, "step": 95 }, { "epoch": 0.02695115103874228, "grad_norm": 0.9467660188674927, "learning_rate": 8.980355472404116e-07, "loss": 0.6697, "step": 96 }, { "epoch": 0.027231892195395846, "grad_norm": 0.9453322887420654, "learning_rate": 9.073900841908326e-07, "loss": 0.6369, "step": 97 }, { "epoch": 0.027512633352049412, "grad_norm": 0.7970829606056213, "learning_rate": 9.167446211412536e-07, "loss": 0.6426, "step": 98 }, { "epoch": 0.027793374508702975, "grad_norm": 0.9353342056274414, "learning_rate": 9.260991580916745e-07, "loss": 0.6234, "step": 99 }, { "epoch": 0.02807411566535654, "grad_norm": 0.782704770565033, "learning_rate": 9.354536950420955e-07, "loss": 0.607, "step": 100 }, { "epoch": 0.028354856822010107, "grad_norm": 0.8209283351898193, "learning_rate": 9.448082319925164e-07, "loss": 0.6219, "step": 101 }, { "epoch": 0.028635597978663673, "grad_norm": 0.8043917417526245, "learning_rate": 9.541627689429374e-07, "loss": 0.5847, "step": 102 }, { "epoch": 0.028916339135317236, "grad_norm": 0.882258951663971, "learning_rate": 9.635173058933584e-07, "loss": 0.6425, "step": 103 }, { "epoch": 0.029197080291970802, "grad_norm": 0.8027119040489197, "learning_rate": 9.728718428437792e-07, "loss": 0.5817, "step": 104 }, { "epoch": 0.029477821448624368, "grad_norm": 0.8394242525100708, "learning_rate": 9.822263797942003e-07, "loss": 0.6646, "step": 105 }, { "epoch": 0.029758562605277934, "grad_norm": 0.7680022120475769, "learning_rate": 9.915809167446211e-07, "loss": 0.5845, "step": 106 }, { "epoch": 0.0300393037619315, "grad_norm": 0.8337945938110352, "learning_rate": 1.0009354536950422e-06, "loss": 0.5888, "step": 107 }, { "epoch": 0.030320044918585063, "grad_norm": 0.8383430242538452, "learning_rate": 1.0102899906454632e-06, "loss": 0.6502, "step": 108 }, { "epoch": 0.03060078607523863, "grad_norm": 0.7696283459663391, "learning_rate": 1.019644527595884e-06, "loss": 0.5764, "step": 109 }, { "epoch": 0.030881527231892195, "grad_norm": 0.820706307888031, "learning_rate": 1.0289990645463051e-06, "loss": 0.5963, "step": 110 }, { "epoch": 0.03116226838854576, "grad_norm": 0.8323437571525574, "learning_rate": 1.038353601496726e-06, "loss": 0.6005, "step": 111 }, { "epoch": 0.031443009545199324, "grad_norm": 0.7819299697875977, "learning_rate": 1.047708138447147e-06, "loss": 0.6413, "step": 112 }, { "epoch": 0.03172375070185289, "grad_norm": 0.8218362331390381, "learning_rate": 1.0570626753975679e-06, "loss": 0.5802, "step": 113 }, { "epoch": 0.032004491858506456, "grad_norm": 1.021608829498291, "learning_rate": 1.066417212347989e-06, "loss": 0.656, "step": 114 }, { "epoch": 0.03228523301516002, "grad_norm": 0.7813796997070312, "learning_rate": 1.07577174929841e-06, "loss": 0.6464, "step": 115 }, { "epoch": 0.03256597417181359, "grad_norm": 0.7661817669868469, "learning_rate": 1.0851262862488308e-06, "loss": 0.6281, "step": 116 }, { "epoch": 0.032846715328467155, "grad_norm": 0.7750112414360046, "learning_rate": 1.0944808231992516e-06, "loss": 0.5962, "step": 117 }, { "epoch": 0.03312745648512072, "grad_norm": 0.8634867072105408, "learning_rate": 1.1038353601496727e-06, "loss": 0.5911, "step": 118 }, { "epoch": 0.03340819764177429, "grad_norm": 0.7029651999473572, "learning_rate": 1.1131898971000935e-06, "loss": 0.5254, "step": 119 }, { "epoch": 0.033688938798427846, "grad_norm": 0.7431548833847046, "learning_rate": 1.1225444340505146e-06, "loss": 0.5994, "step": 120 }, { "epoch": 0.03396967995508141, "grad_norm": 0.7729125618934631, "learning_rate": 1.1318989710009354e-06, "loss": 0.591, "step": 121 }, { "epoch": 0.03425042111173498, "grad_norm": 0.7862691879272461, "learning_rate": 1.1412535079513565e-06, "loss": 0.5886, "step": 122 }, { "epoch": 0.034531162268388545, "grad_norm": 0.8104708790779114, "learning_rate": 1.1506080449017775e-06, "loss": 0.585, "step": 123 }, { "epoch": 0.03481190342504211, "grad_norm": 0.7641741037368774, "learning_rate": 1.1599625818521984e-06, "loss": 0.6107, "step": 124 }, { "epoch": 0.03509264458169568, "grad_norm": 0.8046885132789612, "learning_rate": 1.1693171188026194e-06, "loss": 0.5298, "step": 125 }, { "epoch": 0.03537338573834924, "grad_norm": 0.7960103750228882, "learning_rate": 1.1786716557530403e-06, "loss": 0.573, "step": 126 }, { "epoch": 0.03565412689500281, "grad_norm": 0.8036853671073914, "learning_rate": 1.1880261927034613e-06, "loss": 0.6013, "step": 127 }, { "epoch": 0.035934868051656375, "grad_norm": 0.877988874912262, "learning_rate": 1.1973807296538822e-06, "loss": 0.5879, "step": 128 }, { "epoch": 0.03621560920830994, "grad_norm": 0.8558998107910156, "learning_rate": 1.2067352666043032e-06, "loss": 0.6272, "step": 129 }, { "epoch": 0.0364963503649635, "grad_norm": 0.8760024905204773, "learning_rate": 1.2160898035547243e-06, "loss": 0.5597, "step": 130 }, { "epoch": 0.03677709152161707, "grad_norm": 0.6800727844238281, "learning_rate": 1.225444340505145e-06, "loss": 0.5552, "step": 131 }, { "epoch": 0.03705783267827063, "grad_norm": 0.7151376008987427, "learning_rate": 1.2347988774555662e-06, "loss": 0.5566, "step": 132 }, { "epoch": 0.0373385738349242, "grad_norm": 0.7448155283927917, "learning_rate": 1.244153414405987e-06, "loss": 0.5772, "step": 133 }, { "epoch": 0.037619314991577765, "grad_norm": 0.7712084650993347, "learning_rate": 1.253507951356408e-06, "loss": 0.5934, "step": 134 }, { "epoch": 0.03790005614823133, "grad_norm": 0.8580107688903809, "learning_rate": 1.2628624883068289e-06, "loss": 0.6066, "step": 135 }, { "epoch": 0.0381807973048849, "grad_norm": 0.800658106803894, "learning_rate": 1.27221702525725e-06, "loss": 0.5592, "step": 136 }, { "epoch": 0.038461538461538464, "grad_norm": 0.7491589784622192, "learning_rate": 1.2815715622076708e-06, "loss": 0.5398, "step": 137 }, { "epoch": 0.03874227961819203, "grad_norm": 0.7043203711509705, "learning_rate": 1.2909260991580918e-06, "loss": 0.5834, "step": 138 }, { "epoch": 0.03902302077484559, "grad_norm": 0.9559151530265808, "learning_rate": 1.3002806361085127e-06, "loss": 0.6397, "step": 139 }, { "epoch": 0.039303761931499155, "grad_norm": 0.8577326536178589, "learning_rate": 1.3096351730589337e-06, "loss": 0.5862, "step": 140 }, { "epoch": 0.03958450308815272, "grad_norm": 0.9272957444190979, "learning_rate": 1.3189897100093546e-06, "loss": 0.6603, "step": 141 }, { "epoch": 0.03986524424480629, "grad_norm": 0.7770213484764099, "learning_rate": 1.3283442469597756e-06, "loss": 0.5693, "step": 142 }, { "epoch": 0.040145985401459854, "grad_norm": 0.7320612072944641, "learning_rate": 1.3376987839101965e-06, "loss": 0.537, "step": 143 }, { "epoch": 0.04042672655811342, "grad_norm": 0.8152422904968262, "learning_rate": 1.3470533208606177e-06, "loss": 0.5794, "step": 144 }, { "epoch": 0.040707467714766986, "grad_norm": 0.743893563747406, "learning_rate": 1.3564078578110384e-06, "loss": 0.5789, "step": 145 }, { "epoch": 0.04098820887142055, "grad_norm": 0.7537864446640015, "learning_rate": 1.3657623947614596e-06, "loss": 0.602, "step": 146 }, { "epoch": 0.04126895002807412, "grad_norm": 0.7398315072059631, "learning_rate": 1.3751169317118805e-06, "loss": 0.5623, "step": 147 }, { "epoch": 0.041549691184727684, "grad_norm": 0.866960346698761, "learning_rate": 1.3844714686623013e-06, "loss": 0.5189, "step": 148 }, { "epoch": 0.04183043234138124, "grad_norm": 0.7426940202713013, "learning_rate": 1.3938260056127223e-06, "loss": 0.5586, "step": 149 }, { "epoch": 0.04211117349803481, "grad_norm": 0.8084462285041809, "learning_rate": 1.4031805425631432e-06, "loss": 0.5317, "step": 150 }, { "epoch": 0.042391914654688376, "grad_norm": 0.7611818313598633, "learning_rate": 1.4125350795135642e-06, "loss": 0.5646, "step": 151 }, { "epoch": 0.04267265581134194, "grad_norm": 0.74317467212677, "learning_rate": 1.421889616463985e-06, "loss": 0.5743, "step": 152 }, { "epoch": 0.04295339696799551, "grad_norm": 0.6734655499458313, "learning_rate": 1.4312441534144061e-06, "loss": 0.5806, "step": 153 }, { "epoch": 0.043234138124649074, "grad_norm": 0.688694953918457, "learning_rate": 1.440598690364827e-06, "loss": 0.5818, "step": 154 }, { "epoch": 0.04351487928130264, "grad_norm": 0.7829697728157043, "learning_rate": 1.449953227315248e-06, "loss": 0.5194, "step": 155 }, { "epoch": 0.043795620437956206, "grad_norm": 0.9685561060905457, "learning_rate": 1.4593077642656689e-06, "loss": 0.5782, "step": 156 }, { "epoch": 0.04407636159460977, "grad_norm": 0.8144839406013489, "learning_rate": 1.46866230121609e-06, "loss": 0.5486, "step": 157 }, { "epoch": 0.04435710275126333, "grad_norm": 0.835292398929596, "learning_rate": 1.4780168381665108e-06, "loss": 0.6424, "step": 158 }, { "epoch": 0.0446378439079169, "grad_norm": 0.8725515007972717, "learning_rate": 1.487371375116932e-06, "loss": 0.5862, "step": 159 }, { "epoch": 0.044918585064570464, "grad_norm": 0.7808248400688171, "learning_rate": 1.4967259120673526e-06, "loss": 0.5901, "step": 160 }, { "epoch": 0.04519932622122403, "grad_norm": 0.7102718949317932, "learning_rate": 1.506080449017774e-06, "loss": 0.5738, "step": 161 }, { "epoch": 0.045480067377877596, "grad_norm": 0.7492978572845459, "learning_rate": 1.5154349859681948e-06, "loss": 0.5963, "step": 162 }, { "epoch": 0.04576080853453116, "grad_norm": 0.7842692732810974, "learning_rate": 1.5247895229186158e-06, "loss": 0.6135, "step": 163 }, { "epoch": 0.04604154969118473, "grad_norm": 0.731016218662262, "learning_rate": 1.5341440598690366e-06, "loss": 0.5738, "step": 164 }, { "epoch": 0.046322290847838295, "grad_norm": 0.7418588995933533, "learning_rate": 1.5434985968194577e-06, "loss": 0.5433, "step": 165 }, { "epoch": 0.04660303200449186, "grad_norm": 0.7768568396568298, "learning_rate": 1.5528531337698785e-06, "loss": 0.5264, "step": 166 }, { "epoch": 0.04688377316114543, "grad_norm": 0.7436287999153137, "learning_rate": 1.5622076707202996e-06, "loss": 0.5662, "step": 167 }, { "epoch": 0.047164514317798986, "grad_norm": 0.7058064341545105, "learning_rate": 1.5715622076707204e-06, "loss": 0.5687, "step": 168 }, { "epoch": 0.04744525547445255, "grad_norm": 0.7297882437705994, "learning_rate": 1.5809167446211413e-06, "loss": 0.5808, "step": 169 }, { "epoch": 0.04772599663110612, "grad_norm": 0.6743118166923523, "learning_rate": 1.5902712815715623e-06, "loss": 0.5539, "step": 170 }, { "epoch": 0.048006737787759685, "grad_norm": 0.7409380078315735, "learning_rate": 1.5996258185219832e-06, "loss": 0.6305, "step": 171 }, { "epoch": 0.04828747894441325, "grad_norm": 0.9666582942008972, "learning_rate": 1.6089803554724042e-06, "loss": 0.6139, "step": 172 }, { "epoch": 0.04856822010106682, "grad_norm": 0.7424812912940979, "learning_rate": 1.618334892422825e-06, "loss": 0.537, "step": 173 }, { "epoch": 0.04884896125772038, "grad_norm": 0.7016400098800659, "learning_rate": 1.6276894293732461e-06, "loss": 0.5316, "step": 174 }, { "epoch": 0.04912970241437395, "grad_norm": 0.7044817805290222, "learning_rate": 1.637043966323667e-06, "loss": 0.5476, "step": 175 }, { "epoch": 0.049410443571027515, "grad_norm": 0.8542489409446716, "learning_rate": 1.6463985032740882e-06, "loss": 0.5406, "step": 176 }, { "epoch": 0.04969118472768108, "grad_norm": 0.7255383729934692, "learning_rate": 1.655753040224509e-06, "loss": 0.4932, "step": 177 }, { "epoch": 0.04997192588433464, "grad_norm": 0.8024505376815796, "learning_rate": 1.66510757717493e-06, "loss": 0.6012, "step": 178 }, { "epoch": 0.05025266704098821, "grad_norm": 0.7083516716957092, "learning_rate": 1.674462114125351e-06, "loss": 0.5535, "step": 179 }, { "epoch": 0.05053340819764177, "grad_norm": 0.8453170657157898, "learning_rate": 1.683816651075772e-06, "loss": 0.5506, "step": 180 }, { "epoch": 0.05081414935429534, "grad_norm": 0.742327094078064, "learning_rate": 1.6931711880261928e-06, "loss": 0.5545, "step": 181 }, { "epoch": 0.051094890510948905, "grad_norm": 0.7339834570884705, "learning_rate": 1.7025257249766139e-06, "loss": 0.5283, "step": 182 }, { "epoch": 0.05137563166760247, "grad_norm": 0.8246902823448181, "learning_rate": 1.7118802619270347e-06, "loss": 0.593, "step": 183 }, { "epoch": 0.05165637282425604, "grad_norm": 0.7709143161773682, "learning_rate": 1.7212347988774558e-06, "loss": 0.507, "step": 184 }, { "epoch": 0.051937113980909604, "grad_norm": 0.8270139694213867, "learning_rate": 1.7305893358278766e-06, "loss": 0.5726, "step": 185 }, { "epoch": 0.05221785513756317, "grad_norm": 0.982842206954956, "learning_rate": 1.7399438727782977e-06, "loss": 0.5737, "step": 186 }, { "epoch": 0.05249859629421673, "grad_norm": 0.6899937987327576, "learning_rate": 1.7492984097287185e-06, "loss": 0.5721, "step": 187 }, { "epoch": 0.052779337450870295, "grad_norm": 0.8084338307380676, "learning_rate": 1.7586529466791396e-06, "loss": 0.5704, "step": 188 }, { "epoch": 0.05306007860752386, "grad_norm": 0.8003899455070496, "learning_rate": 1.7680074836295604e-06, "loss": 0.5308, "step": 189 }, { "epoch": 0.05334081976417743, "grad_norm": 0.9019588232040405, "learning_rate": 1.7773620205799812e-06, "loss": 0.5492, "step": 190 }, { "epoch": 0.05362156092083099, "grad_norm": 0.865822434425354, "learning_rate": 1.7867165575304025e-06, "loss": 0.5138, "step": 191 }, { "epoch": 0.05390230207748456, "grad_norm": 0.8150571584701538, "learning_rate": 1.7960710944808231e-06, "loss": 0.5333, "step": 192 }, { "epoch": 0.054183043234138126, "grad_norm": 0.8458138704299927, "learning_rate": 1.8054256314312444e-06, "loss": 0.549, "step": 193 }, { "epoch": 0.05446378439079169, "grad_norm": 0.7980986833572388, "learning_rate": 1.8147801683816652e-06, "loss": 0.5312, "step": 194 }, { "epoch": 0.05474452554744526, "grad_norm": 0.8078594207763672, "learning_rate": 1.8241347053320863e-06, "loss": 0.5506, "step": 195 }, { "epoch": 0.055025266704098824, "grad_norm": 0.8306414484977722, "learning_rate": 1.8334892422825071e-06, "loss": 0.5478, "step": 196 }, { "epoch": 0.05530600786075238, "grad_norm": 0.748626708984375, "learning_rate": 1.8428437792329282e-06, "loss": 0.5562, "step": 197 }, { "epoch": 0.05558674901740595, "grad_norm": 0.774010181427002, "learning_rate": 1.852198316183349e-06, "loss": 0.5638, "step": 198 }, { "epoch": 0.055867490174059516, "grad_norm": 0.7905158996582031, "learning_rate": 1.86155285313377e-06, "loss": 0.5299, "step": 199 }, { "epoch": 0.05614823133071308, "grad_norm": 0.7886589169502258, "learning_rate": 1.870907390084191e-06, "loss": 0.571, "step": 200 }, { "epoch": 0.05642897248736665, "grad_norm": 0.805158257484436, "learning_rate": 1.880261927034612e-06, "loss": 0.525, "step": 201 }, { "epoch": 0.056709713644020214, "grad_norm": 0.6530075073242188, "learning_rate": 1.8896164639850328e-06, "loss": 0.5196, "step": 202 }, { "epoch": 0.05699045480067378, "grad_norm": 0.7828499674797058, "learning_rate": 1.8989710009354539e-06, "loss": 0.5465, "step": 203 }, { "epoch": 0.057271195957327346, "grad_norm": 1.0039658546447754, "learning_rate": 1.9083255378858747e-06, "loss": 0.5339, "step": 204 }, { "epoch": 0.05755193711398091, "grad_norm": 1.0169117450714111, "learning_rate": 1.9176800748362958e-06, "loss": 0.572, "step": 205 }, { "epoch": 0.05783267827063447, "grad_norm": 0.8177130222320557, "learning_rate": 1.927034611786717e-06, "loss": 0.5335, "step": 206 }, { "epoch": 0.05811341942728804, "grad_norm": 0.6351256966590881, "learning_rate": 1.936389148737138e-06, "loss": 0.5451, "step": 207 }, { "epoch": 0.058394160583941604, "grad_norm": 0.7631227374076843, "learning_rate": 1.9457436856875585e-06, "loss": 0.5436, "step": 208 }, { "epoch": 0.05867490174059517, "grad_norm": 0.7937856912612915, "learning_rate": 1.9550982226379795e-06, "loss": 0.5426, "step": 209 }, { "epoch": 0.058955642897248736, "grad_norm": 0.768334150314331, "learning_rate": 1.9644527595884006e-06, "loss": 0.5088, "step": 210 }, { "epoch": 0.0592363840539023, "grad_norm": 0.8432625532150269, "learning_rate": 1.9738072965388212e-06, "loss": 0.5384, "step": 211 }, { "epoch": 0.05951712521055587, "grad_norm": 0.798975944519043, "learning_rate": 1.9831618334892423e-06, "loss": 0.5788, "step": 212 }, { "epoch": 0.059797866367209435, "grad_norm": 0.7429670691490173, "learning_rate": 1.9925163704396633e-06, "loss": 0.5478, "step": 213 }, { "epoch": 0.060078607523863, "grad_norm": 0.8425019979476929, "learning_rate": 2.0018709073900844e-06, "loss": 0.5571, "step": 214 }, { "epoch": 0.06035934868051657, "grad_norm": 0.777231752872467, "learning_rate": 2.011225444340505e-06, "loss": 0.5368, "step": 215 }, { "epoch": 0.060640089837170126, "grad_norm": 0.8038666844367981, "learning_rate": 2.0205799812909265e-06, "loss": 0.5585, "step": 216 }, { "epoch": 0.06092083099382369, "grad_norm": 0.6847572922706604, "learning_rate": 2.029934518241347e-06, "loss": 0.5692, "step": 217 }, { "epoch": 0.06120157215047726, "grad_norm": 0.6985204815864563, "learning_rate": 2.039289055191768e-06, "loss": 0.5254, "step": 218 }, { "epoch": 0.061482313307130824, "grad_norm": 0.7749029397964478, "learning_rate": 2.048643592142189e-06, "loss": 0.5529, "step": 219 }, { "epoch": 0.06176305446378439, "grad_norm": 0.7894192337989807, "learning_rate": 2.0579981290926103e-06, "loss": 0.5753, "step": 220 }, { "epoch": 0.06204379562043796, "grad_norm": 0.9177107214927673, "learning_rate": 2.067352666043031e-06, "loss": 0.5163, "step": 221 }, { "epoch": 0.06232453677709152, "grad_norm": 0.8762850761413574, "learning_rate": 2.076707202993452e-06, "loss": 0.5601, "step": 222 }, { "epoch": 0.06260527793374508, "grad_norm": 0.9435070753097534, "learning_rate": 2.086061739943873e-06, "loss": 0.5493, "step": 223 }, { "epoch": 0.06288601909039865, "grad_norm": 0.8169218301773071, "learning_rate": 2.095416276894294e-06, "loss": 0.5298, "step": 224 }, { "epoch": 0.06316676024705221, "grad_norm": 0.7907678484916687, "learning_rate": 2.1047708138447147e-06, "loss": 0.5219, "step": 225 }, { "epoch": 0.06344750140370578, "grad_norm": 0.676476001739502, "learning_rate": 2.1141253507951357e-06, "loss": 0.4889, "step": 226 }, { "epoch": 0.06372824256035935, "grad_norm": 0.7545673251152039, "learning_rate": 2.1234798877455568e-06, "loss": 0.5141, "step": 227 }, { "epoch": 0.06400898371701291, "grad_norm": 0.6979267001152039, "learning_rate": 2.132834424695978e-06, "loss": 0.5098, "step": 228 }, { "epoch": 0.06428972487366648, "grad_norm": 0.783541738986969, "learning_rate": 2.1421889616463985e-06, "loss": 0.5096, "step": 229 }, { "epoch": 0.06457046603032005, "grad_norm": 0.7176050543785095, "learning_rate": 2.15154349859682e-06, "loss": 0.5264, "step": 230 }, { "epoch": 0.06485120718697361, "grad_norm": 0.8634214401245117, "learning_rate": 2.1608980355472406e-06, "loss": 0.548, "step": 231 }, { "epoch": 0.06513194834362718, "grad_norm": 0.8606464266777039, "learning_rate": 2.1702525724976616e-06, "loss": 0.5787, "step": 232 }, { "epoch": 0.06541268950028074, "grad_norm": 0.7644342184066772, "learning_rate": 2.1796071094480827e-06, "loss": 0.5503, "step": 233 }, { "epoch": 0.06569343065693431, "grad_norm": 0.7599000930786133, "learning_rate": 2.1889616463985033e-06, "loss": 0.5946, "step": 234 }, { "epoch": 0.06597417181358788, "grad_norm": 0.9466094374656677, "learning_rate": 2.1983161833489243e-06, "loss": 0.5723, "step": 235 }, { "epoch": 0.06625491297024144, "grad_norm": 0.6076518893241882, "learning_rate": 2.2076707202993454e-06, "loss": 0.502, "step": 236 }, { "epoch": 0.06653565412689501, "grad_norm": 0.6740292906761169, "learning_rate": 2.2170252572497665e-06, "loss": 0.488, "step": 237 }, { "epoch": 0.06681639528354857, "grad_norm": 0.6609193086624146, "learning_rate": 2.226379794200187e-06, "loss": 0.5479, "step": 238 }, { "epoch": 0.06709713644020214, "grad_norm": 0.7286792397499084, "learning_rate": 2.235734331150608e-06, "loss": 0.5128, "step": 239 }, { "epoch": 0.06737787759685569, "grad_norm": 0.7384173274040222, "learning_rate": 2.245088868101029e-06, "loss": 0.5679, "step": 240 }, { "epoch": 0.06765861875350926, "grad_norm": 0.7667889595031738, "learning_rate": 2.2544434050514502e-06, "loss": 0.5323, "step": 241 }, { "epoch": 0.06793935991016282, "grad_norm": 0.7510852217674255, "learning_rate": 2.263797942001871e-06, "loss": 0.5013, "step": 242 }, { "epoch": 0.06822010106681639, "grad_norm": 0.9144022464752197, "learning_rate": 2.2731524789522923e-06, "loss": 0.5599, "step": 243 }, { "epoch": 0.06850084222346996, "grad_norm": 0.8114398717880249, "learning_rate": 2.282507015902713e-06, "loss": 0.5345, "step": 244 }, { "epoch": 0.06878158338012352, "grad_norm": 0.8570141792297363, "learning_rate": 2.291861552853134e-06, "loss": 0.5804, "step": 245 }, { "epoch": 0.06906232453677709, "grad_norm": 0.8347529768943787, "learning_rate": 2.301216089803555e-06, "loss": 0.5321, "step": 246 }, { "epoch": 0.06934306569343066, "grad_norm": 0.6538413166999817, "learning_rate": 2.310570626753976e-06, "loss": 0.4891, "step": 247 }, { "epoch": 0.06962380685008422, "grad_norm": 0.7439711689949036, "learning_rate": 2.3199251637043968e-06, "loss": 0.5169, "step": 248 }, { "epoch": 0.06990454800673779, "grad_norm": 0.7807654142379761, "learning_rate": 2.329279700654818e-06, "loss": 0.5353, "step": 249 }, { "epoch": 0.07018528916339135, "grad_norm": 0.7158603072166443, "learning_rate": 2.338634237605239e-06, "loss": 0.5381, "step": 250 }, { "epoch": 0.07046603032004492, "grad_norm": 0.6879047751426697, "learning_rate": 2.34798877455566e-06, "loss": 0.4972, "step": 251 }, { "epoch": 0.07074677147669849, "grad_norm": 0.7796744108200073, "learning_rate": 2.3573433115060805e-06, "loss": 0.5541, "step": 252 }, { "epoch": 0.07102751263335205, "grad_norm": 0.7775213718414307, "learning_rate": 2.3666978484565016e-06, "loss": 0.5263, "step": 253 }, { "epoch": 0.07130825379000562, "grad_norm": 0.7353174090385437, "learning_rate": 2.3760523854069226e-06, "loss": 0.4889, "step": 254 }, { "epoch": 0.07158899494665918, "grad_norm": 0.8232805728912354, "learning_rate": 2.3854069223573433e-06, "loss": 0.5443, "step": 255 }, { "epoch": 0.07186973610331275, "grad_norm": 0.7795940637588501, "learning_rate": 2.3947614593077643e-06, "loss": 0.5768, "step": 256 }, { "epoch": 0.07215047725996632, "grad_norm": 0.7369815111160278, "learning_rate": 2.4041159962581854e-06, "loss": 0.5662, "step": 257 }, { "epoch": 0.07243121841661988, "grad_norm": 0.723147451877594, "learning_rate": 2.4134705332086064e-06, "loss": 0.5206, "step": 258 }, { "epoch": 0.07271195957327344, "grad_norm": 0.8946998119354248, "learning_rate": 2.422825070159027e-06, "loss": 0.5619, "step": 259 }, { "epoch": 0.072992700729927, "grad_norm": 0.837316632270813, "learning_rate": 2.4321796071094485e-06, "loss": 0.5184, "step": 260 }, { "epoch": 0.07327344188658057, "grad_norm": 0.7697188854217529, "learning_rate": 2.441534144059869e-06, "loss": 0.5332, "step": 261 }, { "epoch": 0.07355418304323413, "grad_norm": 0.9006131887435913, "learning_rate": 2.45088868101029e-06, "loss": 0.5867, "step": 262 }, { "epoch": 0.0738349241998877, "grad_norm": 0.7689294815063477, "learning_rate": 2.4602432179607113e-06, "loss": 0.5182, "step": 263 }, { "epoch": 0.07411566535654127, "grad_norm": 0.7972959280014038, "learning_rate": 2.4695977549111323e-06, "loss": 0.5812, "step": 264 }, { "epoch": 0.07439640651319483, "grad_norm": 0.7685443162918091, "learning_rate": 2.478952291861553e-06, "loss": 0.5638, "step": 265 }, { "epoch": 0.0746771476698484, "grad_norm": 0.6880684494972229, "learning_rate": 2.488306828811974e-06, "loss": 0.5003, "step": 266 }, { "epoch": 0.07495788882650196, "grad_norm": 0.7760937213897705, "learning_rate": 2.497661365762395e-06, "loss": 0.5633, "step": 267 }, { "epoch": 0.07523862998315553, "grad_norm": 0.7446058988571167, "learning_rate": 2.507015902712816e-06, "loss": 0.5474, "step": 268 }, { "epoch": 0.0755193711398091, "grad_norm": 0.8981422781944275, "learning_rate": 2.516370439663237e-06, "loss": 0.5511, "step": 269 }, { "epoch": 0.07580011229646266, "grad_norm": 0.7858171463012695, "learning_rate": 2.5257249766136578e-06, "loss": 0.5815, "step": 270 }, { "epoch": 0.07608085345311623, "grad_norm": 0.7271562218666077, "learning_rate": 2.535079513564079e-06, "loss": 0.5551, "step": 271 }, { "epoch": 0.0763615946097698, "grad_norm": 0.7712613344192505, "learning_rate": 2.5444340505145e-06, "loss": 0.5458, "step": 272 }, { "epoch": 0.07664233576642336, "grad_norm": 0.7486150860786438, "learning_rate": 2.5537885874649205e-06, "loss": 0.4753, "step": 273 }, { "epoch": 0.07692307692307693, "grad_norm": 0.8282490968704224, "learning_rate": 2.5631431244153416e-06, "loss": 0.5463, "step": 274 }, { "epoch": 0.0772038180797305, "grad_norm": 0.8079036474227905, "learning_rate": 2.5724976613657626e-06, "loss": 0.5006, "step": 275 }, { "epoch": 0.07748455923638406, "grad_norm": 0.7917653918266296, "learning_rate": 2.5818521983161837e-06, "loss": 0.5391, "step": 276 }, { "epoch": 0.07776530039303763, "grad_norm": 0.7724319696426392, "learning_rate": 2.5912067352666043e-06, "loss": 0.5012, "step": 277 }, { "epoch": 0.07804604154969118, "grad_norm": 0.7696590423583984, "learning_rate": 2.6005612722170253e-06, "loss": 0.5501, "step": 278 }, { "epoch": 0.07832678270634474, "grad_norm": 0.7920299768447876, "learning_rate": 2.6099158091674464e-06, "loss": 0.5605, "step": 279 }, { "epoch": 0.07860752386299831, "grad_norm": 0.812937319278717, "learning_rate": 2.6192703461178675e-06, "loss": 0.5564, "step": 280 }, { "epoch": 0.07888826501965188, "grad_norm": 0.8756580352783203, "learning_rate": 2.628624883068288e-06, "loss": 0.5502, "step": 281 }, { "epoch": 0.07916900617630544, "grad_norm": 0.7585451602935791, "learning_rate": 2.637979420018709e-06, "loss": 0.5378, "step": 282 }, { "epoch": 0.07944974733295901, "grad_norm": 0.9798131585121155, "learning_rate": 2.64733395696913e-06, "loss": 0.5406, "step": 283 }, { "epoch": 0.07973048848961257, "grad_norm": 0.6927319169044495, "learning_rate": 2.6566884939195512e-06, "loss": 0.5404, "step": 284 }, { "epoch": 0.08001122964626614, "grad_norm": 0.795004665851593, "learning_rate": 2.666043030869972e-06, "loss": 0.5058, "step": 285 }, { "epoch": 0.08029197080291971, "grad_norm": 0.8104981780052185, "learning_rate": 2.675397567820393e-06, "loss": 0.5122, "step": 286 }, { "epoch": 0.08057271195957327, "grad_norm": 0.910351574420929, "learning_rate": 2.684752104770814e-06, "loss": 0.5457, "step": 287 }, { "epoch": 0.08085345311622684, "grad_norm": 0.752348780632019, "learning_rate": 2.6941066417212354e-06, "loss": 0.5467, "step": 288 }, { "epoch": 0.0811341942728804, "grad_norm": 0.7559342980384827, "learning_rate": 2.7034611786716557e-06, "loss": 0.553, "step": 289 }, { "epoch": 0.08141493542953397, "grad_norm": 0.896124541759491, "learning_rate": 2.7128157156220767e-06, "loss": 0.5408, "step": 290 }, { "epoch": 0.08169567658618754, "grad_norm": 0.7341452836990356, "learning_rate": 2.722170252572498e-06, "loss": 0.5549, "step": 291 }, { "epoch": 0.0819764177428411, "grad_norm": 0.7190515398979187, "learning_rate": 2.7315247895229192e-06, "loss": 0.4733, "step": 292 }, { "epoch": 0.08225715889949467, "grad_norm": 0.8105028867721558, "learning_rate": 2.74087932647334e-06, "loss": 0.5223, "step": 293 }, { "epoch": 0.08253790005614824, "grad_norm": 0.7164011001586914, "learning_rate": 2.750233863423761e-06, "loss": 0.5237, "step": 294 }, { "epoch": 0.0828186412128018, "grad_norm": 0.7514110207557678, "learning_rate": 2.759588400374182e-06, "loss": 0.488, "step": 295 }, { "epoch": 0.08309938236945537, "grad_norm": 0.7827313542366028, "learning_rate": 2.7689429373246026e-06, "loss": 0.5395, "step": 296 }, { "epoch": 0.08338012352610892, "grad_norm": 0.7245772480964661, "learning_rate": 2.7782974742750236e-06, "loss": 0.5206, "step": 297 }, { "epoch": 0.08366086468276249, "grad_norm": 0.8863216638565063, "learning_rate": 2.7876520112254447e-06, "loss": 0.5715, "step": 298 }, { "epoch": 0.08394160583941605, "grad_norm": 0.7554477453231812, "learning_rate": 2.7970065481758657e-06, "loss": 0.5577, "step": 299 }, { "epoch": 0.08422234699606962, "grad_norm": 0.7475144267082214, "learning_rate": 2.8063610851262864e-06, "loss": 0.5233, "step": 300 }, { "epoch": 0.08450308815272319, "grad_norm": 0.7585038542747498, "learning_rate": 2.8157156220767074e-06, "loss": 0.5567, "step": 301 }, { "epoch": 0.08478382930937675, "grad_norm": 0.7732409834861755, "learning_rate": 2.8250701590271285e-06, "loss": 0.541, "step": 302 }, { "epoch": 0.08506457046603032, "grad_norm": 0.7325016260147095, "learning_rate": 2.8344246959775495e-06, "loss": 0.5193, "step": 303 }, { "epoch": 0.08534531162268388, "grad_norm": 0.6840338110923767, "learning_rate": 2.84377923292797e-06, "loss": 0.5083, "step": 304 }, { "epoch": 0.08562605277933745, "grad_norm": 0.6712573766708374, "learning_rate": 2.853133769878391e-06, "loss": 0.5338, "step": 305 }, { "epoch": 0.08590679393599102, "grad_norm": 0.8282577991485596, "learning_rate": 2.8624883068288123e-06, "loss": 0.6124, "step": 306 }, { "epoch": 0.08618753509264458, "grad_norm": 0.6931818723678589, "learning_rate": 2.8718428437792333e-06, "loss": 0.5231, "step": 307 }, { "epoch": 0.08646827624929815, "grad_norm": 0.7463021874427795, "learning_rate": 2.881197380729654e-06, "loss": 0.4949, "step": 308 }, { "epoch": 0.08674901740595171, "grad_norm": 0.7152007222175598, "learning_rate": 2.890551917680075e-06, "loss": 0.539, "step": 309 }, { "epoch": 0.08702975856260528, "grad_norm": 0.7241409420967102, "learning_rate": 2.899906454630496e-06, "loss": 0.5563, "step": 310 }, { "epoch": 0.08731049971925885, "grad_norm": 0.7746958136558533, "learning_rate": 2.909260991580917e-06, "loss": 0.5484, "step": 311 }, { "epoch": 0.08759124087591241, "grad_norm": 0.7572389245033264, "learning_rate": 2.9186155285313377e-06, "loss": 0.5298, "step": 312 }, { "epoch": 0.08787198203256598, "grad_norm": 0.7889935374259949, "learning_rate": 2.9279700654817588e-06, "loss": 0.543, "step": 313 }, { "epoch": 0.08815272318921955, "grad_norm": 0.7501704096794128, "learning_rate": 2.93732460243218e-06, "loss": 0.5314, "step": 314 }, { "epoch": 0.08843346434587311, "grad_norm": 0.8016058206558228, "learning_rate": 2.9466791393826005e-06, "loss": 0.5535, "step": 315 }, { "epoch": 0.08871420550252666, "grad_norm": 0.7872318029403687, "learning_rate": 2.9560336763330215e-06, "loss": 0.4671, "step": 316 }, { "epoch": 0.08899494665918023, "grad_norm": 0.7549393177032471, "learning_rate": 2.9653882132834426e-06, "loss": 0.5145, "step": 317 }, { "epoch": 0.0892756878158338, "grad_norm": 0.8216416835784912, "learning_rate": 2.974742750233864e-06, "loss": 0.5564, "step": 318 }, { "epoch": 0.08955642897248736, "grad_norm": 0.8333849906921387, "learning_rate": 2.9840972871842842e-06, "loss": 0.5188, "step": 319 }, { "epoch": 0.08983717012914093, "grad_norm": 0.8397712111473083, "learning_rate": 2.9934518241347053e-06, "loss": 0.5891, "step": 320 }, { "epoch": 0.0901179112857945, "grad_norm": 0.8094610571861267, "learning_rate": 3.0028063610851268e-06, "loss": 0.4991, "step": 321 }, { "epoch": 0.09039865244244806, "grad_norm": 0.6761815547943115, "learning_rate": 3.012160898035548e-06, "loss": 0.5209, "step": 322 }, { "epoch": 0.09067939359910163, "grad_norm": 0.7161853909492493, "learning_rate": 3.021515434985968e-06, "loss": 0.5243, "step": 323 }, { "epoch": 0.09096013475575519, "grad_norm": 0.7544697523117065, "learning_rate": 3.0308699719363895e-06, "loss": 0.5265, "step": 324 }, { "epoch": 0.09124087591240876, "grad_norm": 0.823232889175415, "learning_rate": 3.0402245088868106e-06, "loss": 0.5217, "step": 325 }, { "epoch": 0.09152161706906232, "grad_norm": 0.9152257442474365, "learning_rate": 3.0495790458372316e-06, "loss": 0.5331, "step": 326 }, { "epoch": 0.09180235822571589, "grad_norm": 0.832825779914856, "learning_rate": 3.0589335827876522e-06, "loss": 0.5325, "step": 327 }, { "epoch": 0.09208309938236946, "grad_norm": 0.8086512684822083, "learning_rate": 3.0682881197380733e-06, "loss": 0.5288, "step": 328 }, { "epoch": 0.09236384053902302, "grad_norm": 0.7992445230484009, "learning_rate": 3.0776426566884943e-06, "loss": 0.5526, "step": 329 }, { "epoch": 0.09264458169567659, "grad_norm": 0.726046621799469, "learning_rate": 3.0869971936389154e-06, "loss": 0.4995, "step": 330 }, { "epoch": 0.09292532285233016, "grad_norm": 0.8194549083709717, "learning_rate": 3.096351730589336e-06, "loss": 0.5416, "step": 331 }, { "epoch": 0.09320606400898372, "grad_norm": 0.7737677097320557, "learning_rate": 3.105706267539757e-06, "loss": 0.5339, "step": 332 }, { "epoch": 0.09348680516563729, "grad_norm": 0.8151153922080994, "learning_rate": 3.115060804490178e-06, "loss": 0.5544, "step": 333 }, { "epoch": 0.09376754632229085, "grad_norm": 0.7140392661094666, "learning_rate": 3.124415341440599e-06, "loss": 0.528, "step": 334 }, { "epoch": 0.09404828747894442, "grad_norm": 0.8134286999702454, "learning_rate": 3.13376987839102e-06, "loss": 0.4922, "step": 335 }, { "epoch": 0.09432902863559797, "grad_norm": 0.7863975763320923, "learning_rate": 3.143124415341441e-06, "loss": 0.4945, "step": 336 }, { "epoch": 0.09460976979225154, "grad_norm": 0.8091383576393127, "learning_rate": 3.152478952291862e-06, "loss": 0.5016, "step": 337 }, { "epoch": 0.0948905109489051, "grad_norm": 0.7497513890266418, "learning_rate": 3.1618334892422825e-06, "loss": 0.5357, "step": 338 }, { "epoch": 0.09517125210555867, "grad_norm": 0.7403958439826965, "learning_rate": 3.1711880261927036e-06, "loss": 0.5273, "step": 339 }, { "epoch": 0.09545199326221224, "grad_norm": 0.7487135529518127, "learning_rate": 3.1805425631431246e-06, "loss": 0.4795, "step": 340 }, { "epoch": 0.0957327344188658, "grad_norm": 0.7567718029022217, "learning_rate": 3.1898971000935457e-06, "loss": 0.5271, "step": 341 }, { "epoch": 0.09601347557551937, "grad_norm": 0.7643928527832031, "learning_rate": 3.1992516370439663e-06, "loss": 0.5559, "step": 342 }, { "epoch": 0.09629421673217294, "grad_norm": 0.7757812738418579, "learning_rate": 3.2086061739943874e-06, "loss": 0.5476, "step": 343 }, { "epoch": 0.0965749578888265, "grad_norm": 0.6897493600845337, "learning_rate": 3.2179607109448084e-06, "loss": 0.4861, "step": 344 }, { "epoch": 0.09685569904548007, "grad_norm": 0.8549035787582397, "learning_rate": 3.2273152478952295e-06, "loss": 0.5076, "step": 345 }, { "epoch": 0.09713644020213363, "grad_norm": 0.8369840979576111, "learning_rate": 3.23666978484565e-06, "loss": 0.5164, "step": 346 }, { "epoch": 0.0974171813587872, "grad_norm": 0.7894216775894165, "learning_rate": 3.246024321796071e-06, "loss": 0.5282, "step": 347 }, { "epoch": 0.09769792251544077, "grad_norm": 0.7484057545661926, "learning_rate": 3.2553788587464922e-06, "loss": 0.4986, "step": 348 }, { "epoch": 0.09797866367209433, "grad_norm": 0.8106874823570251, "learning_rate": 3.2647333956969137e-06, "loss": 0.5534, "step": 349 }, { "epoch": 0.0982594048287479, "grad_norm": 0.6894437670707703, "learning_rate": 3.274087932647334e-06, "loss": 0.5158, "step": 350 }, { "epoch": 0.09854014598540146, "grad_norm": 0.7431429028511047, "learning_rate": 3.283442469597755e-06, "loss": 0.51, "step": 351 }, { "epoch": 0.09882088714205503, "grad_norm": 0.7787994742393494, "learning_rate": 3.2927970065481764e-06, "loss": 0.4997, "step": 352 }, { "epoch": 0.0991016282987086, "grad_norm": 0.7309817671775818, "learning_rate": 3.3021515434985975e-06, "loss": 0.544, "step": 353 }, { "epoch": 0.09938236945536216, "grad_norm": 0.7808282971382141, "learning_rate": 3.311506080449018e-06, "loss": 0.5241, "step": 354 }, { "epoch": 0.09966311061201572, "grad_norm": 0.7501707077026367, "learning_rate": 3.320860617399439e-06, "loss": 0.5323, "step": 355 }, { "epoch": 0.09994385176866928, "grad_norm": 0.7073619365692139, "learning_rate": 3.33021515434986e-06, "loss": 0.4831, "step": 356 }, { "epoch": 0.10022459292532285, "grad_norm": 0.8075127601623535, "learning_rate": 3.339569691300281e-06, "loss": 0.5469, "step": 357 }, { "epoch": 0.10050533408197641, "grad_norm": 0.7519919872283936, "learning_rate": 3.348924228250702e-06, "loss": 0.4982, "step": 358 }, { "epoch": 0.10078607523862998, "grad_norm": 0.827935516834259, "learning_rate": 3.358278765201123e-06, "loss": 0.5476, "step": 359 }, { "epoch": 0.10106681639528355, "grad_norm": 0.7817774415016174, "learning_rate": 3.367633302151544e-06, "loss": 0.5202, "step": 360 }, { "epoch": 0.10134755755193711, "grad_norm": 0.8603132963180542, "learning_rate": 3.3769878391019646e-06, "loss": 0.5865, "step": 361 }, { "epoch": 0.10162829870859068, "grad_norm": 0.7862941026687622, "learning_rate": 3.3863423760523857e-06, "loss": 0.5135, "step": 362 }, { "epoch": 0.10190903986524424, "grad_norm": 0.8191604614257812, "learning_rate": 3.3956969130028067e-06, "loss": 0.5339, "step": 363 }, { "epoch": 0.10218978102189781, "grad_norm": 0.7199459671974182, "learning_rate": 3.4050514499532278e-06, "loss": 0.5013, "step": 364 }, { "epoch": 0.10247052217855138, "grad_norm": 0.8049266934394836, "learning_rate": 3.4144059869036484e-06, "loss": 0.494, "step": 365 }, { "epoch": 0.10275126333520494, "grad_norm": 0.8080098628997803, "learning_rate": 3.4237605238540695e-06, "loss": 0.5302, "step": 366 }, { "epoch": 0.10303200449185851, "grad_norm": 0.8382777571678162, "learning_rate": 3.4331150608044905e-06, "loss": 0.5034, "step": 367 }, { "epoch": 0.10331274564851207, "grad_norm": 0.6838870644569397, "learning_rate": 3.4424695977549116e-06, "loss": 0.5109, "step": 368 }, { "epoch": 0.10359348680516564, "grad_norm": 0.7601078152656555, "learning_rate": 3.451824134705332e-06, "loss": 0.4835, "step": 369 }, { "epoch": 0.10387422796181921, "grad_norm": 0.7310308218002319, "learning_rate": 3.4611786716557532e-06, "loss": 0.5082, "step": 370 }, { "epoch": 0.10415496911847277, "grad_norm": 0.813713788986206, "learning_rate": 3.4705332086061743e-06, "loss": 0.5292, "step": 371 }, { "epoch": 0.10443571027512634, "grad_norm": 0.7797373533248901, "learning_rate": 3.4798877455565953e-06, "loss": 0.4605, "step": 372 }, { "epoch": 0.1047164514317799, "grad_norm": 0.7014128565788269, "learning_rate": 3.489242282507016e-06, "loss": 0.4903, "step": 373 }, { "epoch": 0.10499719258843346, "grad_norm": 0.7359755635261536, "learning_rate": 3.498596819457437e-06, "loss": 0.4911, "step": 374 }, { "epoch": 0.10527793374508702, "grad_norm": 0.7577720880508423, "learning_rate": 3.507951356407858e-06, "loss": 0.503, "step": 375 }, { "epoch": 0.10555867490174059, "grad_norm": 0.8713043928146362, "learning_rate": 3.517305893358279e-06, "loss": 0.5975, "step": 376 }, { "epoch": 0.10583941605839416, "grad_norm": 0.7561119794845581, "learning_rate": 3.5266604303086998e-06, "loss": 0.5536, "step": 377 }, { "epoch": 0.10612015721504772, "grad_norm": 0.8345180749893188, "learning_rate": 3.536014967259121e-06, "loss": 0.5266, "step": 378 }, { "epoch": 0.10640089837170129, "grad_norm": 0.7421478629112244, "learning_rate": 3.545369504209542e-06, "loss": 0.4975, "step": 379 }, { "epoch": 0.10668163952835485, "grad_norm": 0.8593834638595581, "learning_rate": 3.5547240411599625e-06, "loss": 0.5739, "step": 380 }, { "epoch": 0.10696238068500842, "grad_norm": 0.6971132755279541, "learning_rate": 3.5640785781103835e-06, "loss": 0.4987, "step": 381 }, { "epoch": 0.10724312184166199, "grad_norm": 0.7154144048690796, "learning_rate": 3.573433115060805e-06, "loss": 0.5, "step": 382 }, { "epoch": 0.10752386299831555, "grad_norm": 0.6905863285064697, "learning_rate": 3.582787652011226e-06, "loss": 0.5299, "step": 383 }, { "epoch": 0.10780460415496912, "grad_norm": 0.7130893468856812, "learning_rate": 3.5921421889616463e-06, "loss": 0.4624, "step": 384 }, { "epoch": 0.10808534531162269, "grad_norm": 0.7693150043487549, "learning_rate": 3.6014967259120677e-06, "loss": 0.5376, "step": 385 }, { "epoch": 0.10836608646827625, "grad_norm": 0.8047574758529663, "learning_rate": 3.610851262862489e-06, "loss": 0.4938, "step": 386 }, { "epoch": 0.10864682762492982, "grad_norm": 0.7278048396110535, "learning_rate": 3.62020579981291e-06, "loss": 0.5318, "step": 387 }, { "epoch": 0.10892756878158338, "grad_norm": 0.783423900604248, "learning_rate": 3.6295603367633305e-06, "loss": 0.5297, "step": 388 }, { "epoch": 0.10920830993823695, "grad_norm": 0.7358670234680176, "learning_rate": 3.6389148737137515e-06, "loss": 0.5068, "step": 389 }, { "epoch": 0.10948905109489052, "grad_norm": 0.7775084972381592, "learning_rate": 3.6482694106641726e-06, "loss": 0.5555, "step": 390 }, { "epoch": 0.10976979225154408, "grad_norm": 0.7384195923805237, "learning_rate": 3.6576239476145936e-06, "loss": 0.5267, "step": 391 }, { "epoch": 0.11005053340819765, "grad_norm": 0.8676061034202576, "learning_rate": 3.6669784845650143e-06, "loss": 0.5341, "step": 392 }, { "epoch": 0.1103312745648512, "grad_norm": 0.7602493762969971, "learning_rate": 3.6763330215154353e-06, "loss": 0.4882, "step": 393 }, { "epoch": 0.11061201572150477, "grad_norm": 0.7962198853492737, "learning_rate": 3.6856875584658564e-06, "loss": 0.5062, "step": 394 }, { "epoch": 0.11089275687815833, "grad_norm": 0.8236138224601746, "learning_rate": 3.6950420954162774e-06, "loss": 0.4992, "step": 395 }, { "epoch": 0.1111734980348119, "grad_norm": 0.8599743843078613, "learning_rate": 3.704396632366698e-06, "loss": 0.5382, "step": 396 }, { "epoch": 0.11145423919146547, "grad_norm": 0.8462352156639099, "learning_rate": 3.713751169317119e-06, "loss": 0.5305, "step": 397 }, { "epoch": 0.11173498034811903, "grad_norm": 0.7529622912406921, "learning_rate": 3.72310570626754e-06, "loss": 0.4833, "step": 398 }, { "epoch": 0.1120157215047726, "grad_norm": 0.9346505999565125, "learning_rate": 3.7324602432179608e-06, "loss": 0.5364, "step": 399 }, { "epoch": 0.11229646266142616, "grad_norm": 0.9087169170379639, "learning_rate": 3.741814780168382e-06, "loss": 0.5304, "step": 400 }, { "epoch": 0.11257720381807973, "grad_norm": 0.7055246829986572, "learning_rate": 3.751169317118803e-06, "loss": 0.4843, "step": 401 }, { "epoch": 0.1128579449747333, "grad_norm": 0.8225287795066833, "learning_rate": 3.760523854069224e-06, "loss": 0.5396, "step": 402 }, { "epoch": 0.11313868613138686, "grad_norm": 0.8399577140808105, "learning_rate": 3.7698783910196446e-06, "loss": 0.4898, "step": 403 }, { "epoch": 0.11341942728804043, "grad_norm": 0.8058261275291443, "learning_rate": 3.7792329279700656e-06, "loss": 0.5178, "step": 404 }, { "epoch": 0.113700168444694, "grad_norm": 0.7236977219581604, "learning_rate": 3.7885874649204867e-06, "loss": 0.4987, "step": 405 }, { "epoch": 0.11398090960134756, "grad_norm": 0.8208003640174866, "learning_rate": 3.7979420018709077e-06, "loss": 0.5161, "step": 406 }, { "epoch": 0.11426165075800113, "grad_norm": 0.8044561147689819, "learning_rate": 3.8072965388213284e-06, "loss": 0.5231, "step": 407 }, { "epoch": 0.11454239191465469, "grad_norm": 0.9247372150421143, "learning_rate": 3.816651075771749e-06, "loss": 0.5591, "step": 408 }, { "epoch": 0.11482313307130826, "grad_norm": 0.7564449906349182, "learning_rate": 3.826005612722171e-06, "loss": 0.5211, "step": 409 }, { "epoch": 0.11510387422796182, "grad_norm": 0.7962033152580261, "learning_rate": 3.8353601496725915e-06, "loss": 0.4561, "step": 410 }, { "epoch": 0.11538461538461539, "grad_norm": 0.8253943920135498, "learning_rate": 3.844714686623012e-06, "loss": 0.4698, "step": 411 }, { "epoch": 0.11566535654126894, "grad_norm": 0.7705097198486328, "learning_rate": 3.854069223573434e-06, "loss": 0.5227, "step": 412 }, { "epoch": 0.11594609769792251, "grad_norm": 0.7166139483451843, "learning_rate": 3.863423760523854e-06, "loss": 0.5114, "step": 413 }, { "epoch": 0.11622683885457608, "grad_norm": 0.8675190210342407, "learning_rate": 3.872778297474276e-06, "loss": 0.5241, "step": 414 }, { "epoch": 0.11650758001122964, "grad_norm": 0.7419388294219971, "learning_rate": 3.882132834424696e-06, "loss": 0.474, "step": 415 }, { "epoch": 0.11678832116788321, "grad_norm": 0.7719217538833618, "learning_rate": 3.891487371375117e-06, "loss": 0.5058, "step": 416 }, { "epoch": 0.11706906232453677, "grad_norm": 0.9033870697021484, "learning_rate": 3.9008419083255384e-06, "loss": 0.4945, "step": 417 }, { "epoch": 0.11734980348119034, "grad_norm": 0.754767119884491, "learning_rate": 3.910196445275959e-06, "loss": 0.4682, "step": 418 }, { "epoch": 0.1176305446378439, "grad_norm": 0.7341263294219971, "learning_rate": 3.91955098222638e-06, "loss": 0.5304, "step": 419 }, { "epoch": 0.11791128579449747, "grad_norm": 0.7991729378700256, "learning_rate": 3.928905519176801e-06, "loss": 0.5031, "step": 420 }, { "epoch": 0.11819202695115104, "grad_norm": 0.9032796025276184, "learning_rate": 3.938260056127222e-06, "loss": 0.5169, "step": 421 }, { "epoch": 0.1184727681078046, "grad_norm": 0.7505165934562683, "learning_rate": 3.9476145930776424e-06, "loss": 0.4967, "step": 422 }, { "epoch": 0.11875350926445817, "grad_norm": 0.7698094844818115, "learning_rate": 3.956969130028064e-06, "loss": 0.5515, "step": 423 }, { "epoch": 0.11903425042111174, "grad_norm": 0.7911322116851807, "learning_rate": 3.9663236669784845e-06, "loss": 0.4701, "step": 424 }, { "epoch": 0.1193149915777653, "grad_norm": 0.7555925846099854, "learning_rate": 3.975678203928906e-06, "loss": 0.5028, "step": 425 }, { "epoch": 0.11959573273441887, "grad_norm": 0.8901088833808899, "learning_rate": 3.985032740879327e-06, "loss": 0.5457, "step": 426 }, { "epoch": 0.11987647389107244, "grad_norm": 0.7833061218261719, "learning_rate": 3.994387277829747e-06, "loss": 0.5151, "step": 427 }, { "epoch": 0.120157215047726, "grad_norm": 0.7623012065887451, "learning_rate": 4.003741814780169e-06, "loss": 0.4722, "step": 428 }, { "epoch": 0.12043795620437957, "grad_norm": 0.8048515915870667, "learning_rate": 4.01309635173059e-06, "loss": 0.4987, "step": 429 }, { "epoch": 0.12071869736103313, "grad_norm": 0.9146219491958618, "learning_rate": 4.02245088868101e-06, "loss": 0.5422, "step": 430 }, { "epoch": 0.12099943851768669, "grad_norm": 0.8742251992225647, "learning_rate": 4.0318054256314315e-06, "loss": 0.5352, "step": 431 }, { "epoch": 0.12128017967434025, "grad_norm": 0.8957265615463257, "learning_rate": 4.041159962581853e-06, "loss": 0.5251, "step": 432 }, { "epoch": 0.12156092083099382, "grad_norm": 0.7855954766273499, "learning_rate": 4.050514499532274e-06, "loss": 0.5136, "step": 433 }, { "epoch": 0.12184166198764738, "grad_norm": 0.8036879897117615, "learning_rate": 4.059869036482694e-06, "loss": 0.5542, "step": 434 }, { "epoch": 0.12212240314430095, "grad_norm": 0.727243185043335, "learning_rate": 4.069223573433116e-06, "loss": 0.5156, "step": 435 }, { "epoch": 0.12240314430095452, "grad_norm": 0.8112772703170776, "learning_rate": 4.078578110383536e-06, "loss": 0.5292, "step": 436 }, { "epoch": 0.12268388545760808, "grad_norm": 0.7895253896713257, "learning_rate": 4.087932647333958e-06, "loss": 0.4958, "step": 437 }, { "epoch": 0.12296462661426165, "grad_norm": 0.7211445569992065, "learning_rate": 4.097287184284378e-06, "loss": 0.5347, "step": 438 }, { "epoch": 0.12324536777091522, "grad_norm": 0.8120772242546082, "learning_rate": 4.106641721234799e-06, "loss": 0.5109, "step": 439 }, { "epoch": 0.12352610892756878, "grad_norm": 0.9701956510543823, "learning_rate": 4.1159962581852205e-06, "loss": 0.5731, "step": 440 }, { "epoch": 0.12380685008422235, "grad_norm": 0.7655613422393799, "learning_rate": 4.125350795135641e-06, "loss": 0.5056, "step": 441 }, { "epoch": 0.12408759124087591, "grad_norm": 0.8565111756324768, "learning_rate": 4.134705332086062e-06, "loss": 0.5134, "step": 442 }, { "epoch": 0.12436833239752948, "grad_norm": 0.8162031769752502, "learning_rate": 4.144059869036483e-06, "loss": 0.5414, "step": 443 }, { "epoch": 0.12464907355418305, "grad_norm": 0.8924745917320251, "learning_rate": 4.153414405986904e-06, "loss": 0.529, "step": 444 }, { "epoch": 0.12492981471083661, "grad_norm": 0.8853678703308105, "learning_rate": 4.1627689429373245e-06, "loss": 0.5486, "step": 445 }, { "epoch": 0.12521055586749016, "grad_norm": 0.7069456577301025, "learning_rate": 4.172123479887746e-06, "loss": 0.4879, "step": 446 }, { "epoch": 0.12549129702414374, "grad_norm": 0.7341776490211487, "learning_rate": 4.181478016838167e-06, "loss": 0.4936, "step": 447 }, { "epoch": 0.1257720381807973, "grad_norm": 0.7338918447494507, "learning_rate": 4.190832553788588e-06, "loss": 0.5155, "step": 448 }, { "epoch": 0.12605277933745088, "grad_norm": 0.868898868560791, "learning_rate": 4.200187090739009e-06, "loss": 0.5179, "step": 449 }, { "epoch": 0.12633352049410443, "grad_norm": 0.7320483326911926, "learning_rate": 4.209541627689429e-06, "loss": 0.5242, "step": 450 }, { "epoch": 0.126614261650758, "grad_norm": 0.7627094388008118, "learning_rate": 4.218896164639851e-06, "loss": 0.5039, "step": 451 }, { "epoch": 0.12689500280741156, "grad_norm": 0.8147349953651428, "learning_rate": 4.2282507015902715e-06, "loss": 0.4611, "step": 452 }, { "epoch": 0.12717574396406514, "grad_norm": 0.8715364933013916, "learning_rate": 4.237605238540692e-06, "loss": 0.4628, "step": 453 }, { "epoch": 0.1274564851207187, "grad_norm": 0.8515135645866394, "learning_rate": 4.2469597754911136e-06, "loss": 0.4578, "step": 454 }, { "epoch": 0.12773722627737227, "grad_norm": 0.8269696831703186, "learning_rate": 4.256314312441534e-06, "loss": 0.5201, "step": 455 }, { "epoch": 0.12801796743402583, "grad_norm": 0.7364990711212158, "learning_rate": 4.265668849391956e-06, "loss": 0.5088, "step": 456 }, { "epoch": 0.1282987085906794, "grad_norm": 0.8844892382621765, "learning_rate": 4.275023386342376e-06, "loss": 0.5124, "step": 457 }, { "epoch": 0.12857944974733296, "grad_norm": 1.0102354288101196, "learning_rate": 4.284377923292797e-06, "loss": 0.5183, "step": 458 }, { "epoch": 0.12886019090398654, "grad_norm": 0.7829555869102478, "learning_rate": 4.293732460243218e-06, "loss": 0.5009, "step": 459 }, { "epoch": 0.1291409320606401, "grad_norm": 0.7153803706169128, "learning_rate": 4.30308699719364e-06, "loss": 0.5156, "step": 460 }, { "epoch": 0.12942167321729364, "grad_norm": 0.8135461807250977, "learning_rate": 4.3124415341440605e-06, "loss": 0.4938, "step": 461 }, { "epoch": 0.12970241437394722, "grad_norm": 0.8829972743988037, "learning_rate": 4.321796071094481e-06, "loss": 0.506, "step": 462 }, { "epoch": 0.12998315553060077, "grad_norm": 0.8309484124183655, "learning_rate": 4.331150608044903e-06, "loss": 0.5434, "step": 463 }, { "epoch": 0.13026389668725435, "grad_norm": 0.7044673562049866, "learning_rate": 4.340505144995323e-06, "loss": 0.4991, "step": 464 }, { "epoch": 0.1305446378439079, "grad_norm": 0.9659757018089294, "learning_rate": 4.349859681945744e-06, "loss": 0.551, "step": 465 }, { "epoch": 0.1308253790005615, "grad_norm": 0.8155401349067688, "learning_rate": 4.359214218896165e-06, "loss": 0.5389, "step": 466 }, { "epoch": 0.13110612015721504, "grad_norm": 0.6680974364280701, "learning_rate": 4.368568755846586e-06, "loss": 0.4989, "step": 467 }, { "epoch": 0.13138686131386862, "grad_norm": 0.7897123098373413, "learning_rate": 4.377923292797007e-06, "loss": 0.4981, "step": 468 }, { "epoch": 0.13166760247052217, "grad_norm": 0.844268262386322, "learning_rate": 4.387277829747428e-06, "loss": 0.5175, "step": 469 }, { "epoch": 0.13194834362717575, "grad_norm": 0.7481633424758911, "learning_rate": 4.396632366697849e-06, "loss": 0.5522, "step": 470 }, { "epoch": 0.1322290847838293, "grad_norm": 0.8489326238632202, "learning_rate": 4.40598690364827e-06, "loss": 0.5321, "step": 471 }, { "epoch": 0.13250982594048288, "grad_norm": 0.8275486826896667, "learning_rate": 4.415341440598691e-06, "loss": 0.4853, "step": 472 }, { "epoch": 0.13279056709713644, "grad_norm": 0.7152136564254761, "learning_rate": 4.4246959775491114e-06, "loss": 0.5229, "step": 473 }, { "epoch": 0.13307130825379002, "grad_norm": 0.7578805088996887, "learning_rate": 4.434050514499533e-06, "loss": 0.4997, "step": 474 }, { "epoch": 0.13335204941044357, "grad_norm": 0.7343823313713074, "learning_rate": 4.4434050514499535e-06, "loss": 0.5085, "step": 475 }, { "epoch": 0.13363279056709715, "grad_norm": 0.8387718796730042, "learning_rate": 4.452759588400374e-06, "loss": 0.5004, "step": 476 }, { "epoch": 0.1339135317237507, "grad_norm": 0.7833738327026367, "learning_rate": 4.462114125350796e-06, "loss": 0.5447, "step": 477 }, { "epoch": 0.13419427288040428, "grad_norm": 0.9023407697677612, "learning_rate": 4.471468662301216e-06, "loss": 0.5389, "step": 478 }, { "epoch": 0.13447501403705783, "grad_norm": 0.7810462117195129, "learning_rate": 4.480823199251638e-06, "loss": 0.4702, "step": 479 }, { "epoch": 0.13475575519371139, "grad_norm": 0.8092748522758484, "learning_rate": 4.490177736202058e-06, "loss": 0.5314, "step": 480 }, { "epoch": 0.13503649635036497, "grad_norm": 0.7407997250556946, "learning_rate": 4.499532273152479e-06, "loss": 0.5026, "step": 481 }, { "epoch": 0.13531723750701852, "grad_norm": 0.8457898497581482, "learning_rate": 4.5088868101029005e-06, "loss": 0.5533, "step": 482 }, { "epoch": 0.1355979786636721, "grad_norm": 0.765034019947052, "learning_rate": 4.518241347053321e-06, "loss": 0.5362, "step": 483 }, { "epoch": 0.13587871982032565, "grad_norm": 0.6809737682342529, "learning_rate": 4.527595884003742e-06, "loss": 0.4814, "step": 484 }, { "epoch": 0.13615946097697923, "grad_norm": 0.8527835011482239, "learning_rate": 4.536950420954163e-06, "loss": 0.5112, "step": 485 }, { "epoch": 0.13644020213363278, "grad_norm": 0.7911974191665649, "learning_rate": 4.546304957904585e-06, "loss": 0.516, "step": 486 }, { "epoch": 0.13672094329028636, "grad_norm": 0.8519447445869446, "learning_rate": 4.5556594948550045e-06, "loss": 0.5101, "step": 487 }, { "epoch": 0.13700168444693991, "grad_norm": 0.6963992714881897, "learning_rate": 4.565014031805426e-06, "loss": 0.4595, "step": 488 }, { "epoch": 0.1372824256035935, "grad_norm": 0.8214945793151855, "learning_rate": 4.574368568755847e-06, "loss": 0.5205, "step": 489 }, { "epoch": 0.13756316676024705, "grad_norm": 0.7888759970664978, "learning_rate": 4.583723105706268e-06, "loss": 0.5406, "step": 490 }, { "epoch": 0.13784390791690063, "grad_norm": 0.7536157369613647, "learning_rate": 4.593077642656689e-06, "loss": 0.5322, "step": 491 }, { "epoch": 0.13812464907355418, "grad_norm": 0.8491219282150269, "learning_rate": 4.60243217960711e-06, "loss": 0.5074, "step": 492 }, { "epoch": 0.13840539023020776, "grad_norm": 0.7836640477180481, "learning_rate": 4.611786716557531e-06, "loss": 0.4925, "step": 493 }, { "epoch": 0.1386861313868613, "grad_norm": 0.8089984059333801, "learning_rate": 4.621141253507952e-06, "loss": 0.4947, "step": 494 }, { "epoch": 0.1389668725435149, "grad_norm": 0.9444265365600586, "learning_rate": 4.630495790458373e-06, "loss": 0.4985, "step": 495 }, { "epoch": 0.13924761370016844, "grad_norm": 0.8405911326408386, "learning_rate": 4.6398503274087935e-06, "loss": 0.5427, "step": 496 }, { "epoch": 0.13952835485682202, "grad_norm": 0.7384021282196045, "learning_rate": 4.649204864359215e-06, "loss": 0.5331, "step": 497 }, { "epoch": 0.13980909601347558, "grad_norm": 0.8780004978179932, "learning_rate": 4.658559401309636e-06, "loss": 0.5129, "step": 498 }, { "epoch": 0.14008983717012913, "grad_norm": 0.9964781403541565, "learning_rate": 4.667913938260056e-06, "loss": 0.5178, "step": 499 }, { "epoch": 0.1403705783267827, "grad_norm": 0.7410679459571838, "learning_rate": 4.677268475210478e-06, "loss": 0.495, "step": 500 }, { "epoch": 0.14065131948343626, "grad_norm": 0.7884122729301453, "learning_rate": 4.686623012160898e-06, "loss": 0.4963, "step": 501 }, { "epoch": 0.14093206064008984, "grad_norm": 0.9369380474090576, "learning_rate": 4.69597754911132e-06, "loss": 0.4986, "step": 502 }, { "epoch": 0.1412128017967434, "grad_norm": 1.0544527769088745, "learning_rate": 4.7053320860617404e-06, "loss": 0.5399, "step": 503 }, { "epoch": 0.14149354295339697, "grad_norm": 0.7701802849769592, "learning_rate": 4.714686623012161e-06, "loss": 0.5134, "step": 504 }, { "epoch": 0.14177428411005052, "grad_norm": 0.8956480622291565, "learning_rate": 4.7240411599625826e-06, "loss": 0.5135, "step": 505 }, { "epoch": 0.1420550252667041, "grad_norm": 1.0529237985610962, "learning_rate": 4.733395696913003e-06, "loss": 0.523, "step": 506 }, { "epoch": 0.14233576642335766, "grad_norm": 0.8469558358192444, "learning_rate": 4.742750233863424e-06, "loss": 0.5178, "step": 507 }, { "epoch": 0.14261650758001124, "grad_norm": 0.8177878856658936, "learning_rate": 4.752104770813845e-06, "loss": 0.5613, "step": 508 }, { "epoch": 0.1428972487366648, "grad_norm": 0.8858785033226013, "learning_rate": 4.761459307764266e-06, "loss": 0.491, "step": 509 }, { "epoch": 0.14317798989331837, "grad_norm": 0.8501063585281372, "learning_rate": 4.7708138447146865e-06, "loss": 0.4929, "step": 510 }, { "epoch": 0.14345873104997192, "grad_norm": 0.9841606616973877, "learning_rate": 4.780168381665108e-06, "loss": 0.51, "step": 511 }, { "epoch": 0.1437394722066255, "grad_norm": 0.7775784730911255, "learning_rate": 4.789522918615529e-06, "loss": 0.481, "step": 512 }, { "epoch": 0.14402021336327905, "grad_norm": 0.7898653149604797, "learning_rate": 4.79887745556595e-06, "loss": 0.4996, "step": 513 }, { "epoch": 0.14430095451993263, "grad_norm": 0.7949855923652649, "learning_rate": 4.808231992516371e-06, "loss": 0.5754, "step": 514 }, { "epoch": 0.14458169567658619, "grad_norm": 0.9908952713012695, "learning_rate": 4.817586529466791e-06, "loss": 0.5185, "step": 515 }, { "epoch": 0.14486243683323977, "grad_norm": 0.9723168015480042, "learning_rate": 4.826941066417213e-06, "loss": 0.4764, "step": 516 }, { "epoch": 0.14514317798989332, "grad_norm": 0.8574391603469849, "learning_rate": 4.836295603367634e-06, "loss": 0.5662, "step": 517 }, { "epoch": 0.14542391914654687, "grad_norm": 0.8870858550071716, "learning_rate": 4.845650140318054e-06, "loss": 0.4795, "step": 518 }, { "epoch": 0.14570466030320045, "grad_norm": 0.8039255738258362, "learning_rate": 4.855004677268476e-06, "loss": 0.4968, "step": 519 }, { "epoch": 0.145985401459854, "grad_norm": 0.6876815557479858, "learning_rate": 4.864359214218897e-06, "loss": 0.4784, "step": 520 }, { "epoch": 0.14626614261650758, "grad_norm": 0.7060603499412537, "learning_rate": 4.873713751169318e-06, "loss": 0.4669, "step": 521 }, { "epoch": 0.14654688377316114, "grad_norm": 0.7625956535339355, "learning_rate": 4.883068288119738e-06, "loss": 0.4737, "step": 522 }, { "epoch": 0.14682762492981472, "grad_norm": 0.9577537775039673, "learning_rate": 4.89242282507016e-06, "loss": 0.5497, "step": 523 }, { "epoch": 0.14710836608646827, "grad_norm": 0.7729389071464539, "learning_rate": 4.90177736202058e-06, "loss": 0.5415, "step": 524 }, { "epoch": 0.14738910724312185, "grad_norm": 0.8255321383476257, "learning_rate": 4.911131898971001e-06, "loss": 0.496, "step": 525 }, { "epoch": 0.1476698483997754, "grad_norm": 0.823871374130249, "learning_rate": 4.9204864359214225e-06, "loss": 0.5014, "step": 526 }, { "epoch": 0.14795058955642898, "grad_norm": 0.7970291376113892, "learning_rate": 4.929840972871843e-06, "loss": 0.5102, "step": 527 }, { "epoch": 0.14823133071308253, "grad_norm": 0.7161914706230164, "learning_rate": 4.939195509822265e-06, "loss": 0.4838, "step": 528 }, { "epoch": 0.1485120718697361, "grad_norm": 0.8029255270957947, "learning_rate": 4.948550046772685e-06, "loss": 0.5437, "step": 529 }, { "epoch": 0.14879281302638966, "grad_norm": 0.7042182087898254, "learning_rate": 4.957904583723106e-06, "loss": 0.4834, "step": 530 }, { "epoch": 0.14907355418304324, "grad_norm": 0.8169024586677551, "learning_rate": 4.967259120673527e-06, "loss": 0.4586, "step": 531 }, { "epoch": 0.1493542953396968, "grad_norm": 0.8217435479164124, "learning_rate": 4.976613657623948e-06, "loss": 0.5296, "step": 532 }, { "epoch": 0.14963503649635038, "grad_norm": 0.7397123575210571, "learning_rate": 4.985968194574369e-06, "loss": 0.5598, "step": 533 }, { "epoch": 0.14991577765300393, "grad_norm": 0.8947880864143372, "learning_rate": 4.99532273152479e-06, "loss": 0.5462, "step": 534 }, { "epoch": 0.1501965188096575, "grad_norm": 0.7352718710899353, "learning_rate": 5.004677268475211e-06, "loss": 0.4843, "step": 535 }, { "epoch": 0.15047725996631106, "grad_norm": 0.7786549925804138, "learning_rate": 5.014031805425632e-06, "loss": 0.4706, "step": 536 }, { "epoch": 0.1507580011229646, "grad_norm": 0.7344022989273071, "learning_rate": 5.023386342376053e-06, "loss": 0.5204, "step": 537 }, { "epoch": 0.1510387422796182, "grad_norm": 0.8131575584411621, "learning_rate": 5.032740879326474e-06, "loss": 0.5751, "step": 538 }, { "epoch": 0.15131948343627175, "grad_norm": 0.7318029999732971, "learning_rate": 5.042095416276894e-06, "loss": 0.4698, "step": 539 }, { "epoch": 0.15160022459292533, "grad_norm": 0.8344904780387878, "learning_rate": 5.0514499532273156e-06, "loss": 0.4934, "step": 540 }, { "epoch": 0.15188096574957888, "grad_norm": 0.6257030963897705, "learning_rate": 5.060804490177736e-06, "loss": 0.4479, "step": 541 }, { "epoch": 0.15216170690623246, "grad_norm": 0.8552340865135193, "learning_rate": 5.070159027128158e-06, "loss": 0.4709, "step": 542 }, { "epoch": 0.152442448062886, "grad_norm": 0.7414129376411438, "learning_rate": 5.079513564078578e-06, "loss": 0.5039, "step": 543 }, { "epoch": 0.1527231892195396, "grad_norm": 0.7615332007408142, "learning_rate": 5.088868101029e-06, "loss": 0.5441, "step": 544 }, { "epoch": 0.15300393037619314, "grad_norm": 0.690945565700531, "learning_rate": 5.098222637979421e-06, "loss": 0.5114, "step": 545 }, { "epoch": 0.15328467153284672, "grad_norm": 0.6874892115592957, "learning_rate": 5.107577174929841e-06, "loss": 0.4722, "step": 546 }, { "epoch": 0.15356541268950027, "grad_norm": 0.7465002536773682, "learning_rate": 5.116931711880262e-06, "loss": 0.4996, "step": 547 }, { "epoch": 0.15384615384615385, "grad_norm": 0.7759626507759094, "learning_rate": 5.126286248830683e-06, "loss": 0.5134, "step": 548 }, { "epoch": 0.1541268950028074, "grad_norm": 0.8571668863296509, "learning_rate": 5.135640785781104e-06, "loss": 0.5298, "step": 549 }, { "epoch": 0.154407636159461, "grad_norm": 0.7905661463737488, "learning_rate": 5.144995322731525e-06, "loss": 0.5319, "step": 550 }, { "epoch": 0.15468837731611454, "grad_norm": 0.7992151975631714, "learning_rate": 5.154349859681947e-06, "loss": 0.534, "step": 551 }, { "epoch": 0.15496911847276812, "grad_norm": 0.8805502653121948, "learning_rate": 5.163704396632367e-06, "loss": 0.5318, "step": 552 }, { "epoch": 0.15524985962942167, "grad_norm": 0.894903302192688, "learning_rate": 5.173058933582789e-06, "loss": 0.565, "step": 553 }, { "epoch": 0.15553060078607525, "grad_norm": 0.8050834536552429, "learning_rate": 5.182413470533209e-06, "loss": 0.4873, "step": 554 }, { "epoch": 0.1558113419427288, "grad_norm": 0.9257166385650635, "learning_rate": 5.191768007483629e-06, "loss": 0.5192, "step": 555 }, { "epoch": 0.15609208309938236, "grad_norm": 0.7694621682167053, "learning_rate": 5.201122544434051e-06, "loss": 0.5001, "step": 556 }, { "epoch": 0.15637282425603594, "grad_norm": 0.8481784462928772, "learning_rate": 5.210477081384472e-06, "loss": 0.4991, "step": 557 }, { "epoch": 0.1566535654126895, "grad_norm": 0.8397626876831055, "learning_rate": 5.219831618334893e-06, "loss": 0.4966, "step": 558 }, { "epoch": 0.15693430656934307, "grad_norm": 0.7801057696342468, "learning_rate": 5.229186155285314e-06, "loss": 0.4751, "step": 559 }, { "epoch": 0.15721504772599662, "grad_norm": 0.7261902689933777, "learning_rate": 5.238540692235735e-06, "loss": 0.4794, "step": 560 }, { "epoch": 0.1574957888826502, "grad_norm": 0.8400854468345642, "learning_rate": 5.247895229186156e-06, "loss": 0.465, "step": 561 }, { "epoch": 0.15777653003930375, "grad_norm": 0.8911288976669312, "learning_rate": 5.257249766136576e-06, "loss": 0.4677, "step": 562 }, { "epoch": 0.15805727119595733, "grad_norm": 0.8608343601226807, "learning_rate": 5.266604303086998e-06, "loss": 0.501, "step": 563 }, { "epoch": 0.15833801235261089, "grad_norm": 0.8770518898963928, "learning_rate": 5.275958840037418e-06, "loss": 0.5028, "step": 564 }, { "epoch": 0.15861875350926447, "grad_norm": 0.8536120057106018, "learning_rate": 5.28531337698784e-06, "loss": 0.5544, "step": 565 }, { "epoch": 0.15889949466591802, "grad_norm": 0.9643396735191345, "learning_rate": 5.29466791393826e-06, "loss": 0.5322, "step": 566 }, { "epoch": 0.1591802358225716, "grad_norm": 0.793518602848053, "learning_rate": 5.304022450888682e-06, "loss": 0.5367, "step": 567 }, { "epoch": 0.15946097697922515, "grad_norm": 0.7644721865653992, "learning_rate": 5.3133769878391025e-06, "loss": 0.4954, "step": 568 }, { "epoch": 0.15974171813587873, "grad_norm": 0.7848473191261292, "learning_rate": 5.322731524789523e-06, "loss": 0.5036, "step": 569 }, { "epoch": 0.16002245929253228, "grad_norm": 0.7806656956672668, "learning_rate": 5.332086061739944e-06, "loss": 0.4985, "step": 570 }, { "epoch": 0.16030320044918586, "grad_norm": 0.8278757333755493, "learning_rate": 5.341440598690365e-06, "loss": 0.4639, "step": 571 }, { "epoch": 0.16058394160583941, "grad_norm": 0.7348732948303223, "learning_rate": 5.350795135640786e-06, "loss": 0.4956, "step": 572 }, { "epoch": 0.160864682762493, "grad_norm": 0.8381409645080566, "learning_rate": 5.360149672591207e-06, "loss": 0.4553, "step": 573 }, { "epoch": 0.16114542391914655, "grad_norm": 0.7404391765594482, "learning_rate": 5.369504209541628e-06, "loss": 0.5078, "step": 574 }, { "epoch": 0.1614261650758001, "grad_norm": 0.8644616007804871, "learning_rate": 5.378858746492049e-06, "loss": 0.4824, "step": 575 }, { "epoch": 0.16170690623245368, "grad_norm": 0.8510028719902039, "learning_rate": 5.388213283442471e-06, "loss": 0.4946, "step": 576 }, { "epoch": 0.16198764738910723, "grad_norm": 0.7837504744529724, "learning_rate": 5.397567820392891e-06, "loss": 0.5025, "step": 577 }, { "epoch": 0.1622683885457608, "grad_norm": 0.8370034098625183, "learning_rate": 5.406922357343311e-06, "loss": 0.4808, "step": 578 }, { "epoch": 0.16254912970241436, "grad_norm": 0.8849330544471741, "learning_rate": 5.416276894293733e-06, "loss": 0.4866, "step": 579 }, { "epoch": 0.16282987085906794, "grad_norm": 0.775743305683136, "learning_rate": 5.425631431244153e-06, "loss": 0.4642, "step": 580 }, { "epoch": 0.1631106120157215, "grad_norm": 0.8211396336555481, "learning_rate": 5.434985968194575e-06, "loss": 0.4993, "step": 581 }, { "epoch": 0.16339135317237508, "grad_norm": 0.8382355570793152, "learning_rate": 5.444340505144996e-06, "loss": 0.4747, "step": 582 }, { "epoch": 0.16367209432902863, "grad_norm": 0.8368803858757019, "learning_rate": 5.453695042095417e-06, "loss": 0.461, "step": 583 }, { "epoch": 0.1639528354856822, "grad_norm": 0.7276535034179688, "learning_rate": 5.4630495790458385e-06, "loss": 0.4788, "step": 584 }, { "epoch": 0.16423357664233576, "grad_norm": 0.8614891171455383, "learning_rate": 5.472404115996258e-06, "loss": 0.5592, "step": 585 }, { "epoch": 0.16451431779898934, "grad_norm": 0.7511591911315918, "learning_rate": 5.48175865294668e-06, "loss": 0.4818, "step": 586 }, { "epoch": 0.1647950589556429, "grad_norm": 0.7985215187072754, "learning_rate": 5.4911131898971e-06, "loss": 0.4707, "step": 587 }, { "epoch": 0.16507580011229647, "grad_norm": 0.9363226890563965, "learning_rate": 5.500467726847522e-06, "loss": 0.4896, "step": 588 }, { "epoch": 0.16535654126895002, "grad_norm": 0.7941949367523193, "learning_rate": 5.5098222637979424e-06, "loss": 0.5211, "step": 589 }, { "epoch": 0.1656372824256036, "grad_norm": 0.8037974834442139, "learning_rate": 5.519176800748364e-06, "loss": 0.5263, "step": 590 }, { "epoch": 0.16591802358225716, "grad_norm": 0.7701349854469299, "learning_rate": 5.5285313376987846e-06, "loss": 0.5092, "step": 591 }, { "epoch": 0.16619876473891074, "grad_norm": 0.7301192879676819, "learning_rate": 5.537885874649205e-06, "loss": 0.4826, "step": 592 }, { "epoch": 0.1664795058955643, "grad_norm": 0.8852382898330688, "learning_rate": 5.547240411599626e-06, "loss": 0.5481, "step": 593 }, { "epoch": 0.16676024705221784, "grad_norm": 0.8560103178024292, "learning_rate": 5.556594948550047e-06, "loss": 0.5154, "step": 594 }, { "epoch": 0.16704098820887142, "grad_norm": 0.7502108812332153, "learning_rate": 5.565949485500468e-06, "loss": 0.5251, "step": 595 }, { "epoch": 0.16732172936552497, "grad_norm": 0.7865407466888428, "learning_rate": 5.575304022450889e-06, "loss": 0.5201, "step": 596 }, { "epoch": 0.16760247052217855, "grad_norm": 0.7632266283035278, "learning_rate": 5.58465855940131e-06, "loss": 0.4567, "step": 597 }, { "epoch": 0.1678832116788321, "grad_norm": 0.83583664894104, "learning_rate": 5.5940130963517315e-06, "loss": 0.5082, "step": 598 }, { "epoch": 0.16816395283548569, "grad_norm": 0.8887547254562378, "learning_rate": 5.603367633302152e-06, "loss": 0.5472, "step": 599 }, { "epoch": 0.16844469399213924, "grad_norm": 0.712386965751648, "learning_rate": 5.612722170252573e-06, "loss": 0.4863, "step": 600 }, { "epoch": 0.16872543514879282, "grad_norm": 0.7156517505645752, "learning_rate": 5.622076707202993e-06, "loss": 0.4614, "step": 601 }, { "epoch": 0.16900617630544637, "grad_norm": 0.7918566465377808, "learning_rate": 5.631431244153415e-06, "loss": 0.5613, "step": 602 }, { "epoch": 0.16928691746209995, "grad_norm": 0.7155835628509521, "learning_rate": 5.6407857811038355e-06, "loss": 0.4798, "step": 603 }, { "epoch": 0.1695676586187535, "grad_norm": 0.7834767699241638, "learning_rate": 5.650140318054257e-06, "loss": 0.4335, "step": 604 }, { "epoch": 0.16984839977540708, "grad_norm": 0.6875774264335632, "learning_rate": 5.659494855004678e-06, "loss": 0.4752, "step": 605 }, { "epoch": 0.17012914093206064, "grad_norm": 0.8880415558815002, "learning_rate": 5.668849391955099e-06, "loss": 0.5201, "step": 606 }, { "epoch": 0.17040988208871422, "grad_norm": 0.8246658444404602, "learning_rate": 5.6782039289055205e-06, "loss": 0.5114, "step": 607 }, { "epoch": 0.17069062324536777, "grad_norm": 0.8733250498771667, "learning_rate": 5.68755846585594e-06, "loss": 0.541, "step": 608 }, { "epoch": 0.17097136440202135, "grad_norm": 0.7933526635169983, "learning_rate": 5.696913002806361e-06, "loss": 0.4487, "step": 609 }, { "epoch": 0.1712521055586749, "grad_norm": 0.842877984046936, "learning_rate": 5.706267539756782e-06, "loss": 0.51, "step": 610 }, { "epoch": 0.17153284671532848, "grad_norm": 0.8397455811500549, "learning_rate": 5.715622076707204e-06, "loss": 0.5534, "step": 611 }, { "epoch": 0.17181358787198203, "grad_norm": 0.7597702741622925, "learning_rate": 5.7249766136576245e-06, "loss": 0.5146, "step": 612 }, { "epoch": 0.17209432902863558, "grad_norm": 0.849120557308197, "learning_rate": 5.734331150608046e-06, "loss": 0.4783, "step": 613 }, { "epoch": 0.17237507018528916, "grad_norm": 0.7869701981544495, "learning_rate": 5.743685687558467e-06, "loss": 0.5101, "step": 614 }, { "epoch": 0.17265581134194272, "grad_norm": 0.7406596541404724, "learning_rate": 5.753040224508886e-06, "loss": 0.4854, "step": 615 }, { "epoch": 0.1729365524985963, "grad_norm": 0.6839237213134766, "learning_rate": 5.762394761459308e-06, "loss": 0.4381, "step": 616 }, { "epoch": 0.17321729365524985, "grad_norm": 0.8371248245239258, "learning_rate": 5.771749298409729e-06, "loss": 0.5202, "step": 617 }, { "epoch": 0.17349803481190343, "grad_norm": 0.6959989666938782, "learning_rate": 5.78110383536015e-06, "loss": 0.4705, "step": 618 }, { "epoch": 0.17377877596855698, "grad_norm": 0.7967045903205872, "learning_rate": 5.7904583723105715e-06, "loss": 0.5166, "step": 619 }, { "epoch": 0.17405951712521056, "grad_norm": 0.8553176522254944, "learning_rate": 5.799812909260992e-06, "loss": 0.5272, "step": 620 }, { "epoch": 0.1743402582818641, "grad_norm": 0.8010025024414062, "learning_rate": 5.8091674462114136e-06, "loss": 0.475, "step": 621 }, { "epoch": 0.1746209994385177, "grad_norm": 0.7816404104232788, "learning_rate": 5.818521983161834e-06, "loss": 0.5193, "step": 622 }, { "epoch": 0.17490174059517125, "grad_norm": 0.7686100602149963, "learning_rate": 5.827876520112255e-06, "loss": 0.5058, "step": 623 }, { "epoch": 0.17518248175182483, "grad_norm": 0.8996552228927612, "learning_rate": 5.8372310570626755e-06, "loss": 0.5281, "step": 624 }, { "epoch": 0.17546322290847838, "grad_norm": 0.9018763899803162, "learning_rate": 5.846585594013097e-06, "loss": 0.4906, "step": 625 }, { "epoch": 0.17574396406513196, "grad_norm": 0.8745623826980591, "learning_rate": 5.8559401309635176e-06, "loss": 0.5317, "step": 626 }, { "epoch": 0.1760247052217855, "grad_norm": 0.8026597499847412, "learning_rate": 5.865294667913939e-06, "loss": 0.4755, "step": 627 }, { "epoch": 0.1763054463784391, "grad_norm": 0.8113706111907959, "learning_rate": 5.87464920486436e-06, "loss": 0.4656, "step": 628 }, { "epoch": 0.17658618753509264, "grad_norm": 0.7952340245246887, "learning_rate": 5.884003741814781e-06, "loss": 0.5326, "step": 629 }, { "epoch": 0.17686692869174622, "grad_norm": 0.7512227892875671, "learning_rate": 5.893358278765201e-06, "loss": 0.4843, "step": 630 }, { "epoch": 0.17714766984839977, "grad_norm": 0.7184258103370667, "learning_rate": 5.902712815715622e-06, "loss": 0.547, "step": 631 }, { "epoch": 0.17742841100505333, "grad_norm": 0.7102201581001282, "learning_rate": 5.912067352666043e-06, "loss": 0.5041, "step": 632 }, { "epoch": 0.1777091521617069, "grad_norm": 0.7374149560928345, "learning_rate": 5.9214218896164645e-06, "loss": 0.4294, "step": 633 }, { "epoch": 0.17798989331836046, "grad_norm": 0.7687684893608093, "learning_rate": 5.930776426566885e-06, "loss": 0.5131, "step": 634 }, { "epoch": 0.17827063447501404, "grad_norm": 0.6948454976081848, "learning_rate": 5.940130963517307e-06, "loss": 0.475, "step": 635 }, { "epoch": 0.1785513756316676, "grad_norm": 0.7867432832717896, "learning_rate": 5.949485500467728e-06, "loss": 0.4894, "step": 636 }, { "epoch": 0.17883211678832117, "grad_norm": 0.8605855107307434, "learning_rate": 5.958840037418149e-06, "loss": 0.4981, "step": 637 }, { "epoch": 0.17911285794497472, "grad_norm": 0.838919460773468, "learning_rate": 5.9681945743685685e-06, "loss": 0.4816, "step": 638 }, { "epoch": 0.1793935991016283, "grad_norm": 0.7709580063819885, "learning_rate": 5.97754911131899e-06, "loss": 0.5236, "step": 639 }, { "epoch": 0.17967434025828186, "grad_norm": 0.8052390217781067, "learning_rate": 5.986903648269411e-06, "loss": 0.5261, "step": 640 }, { "epoch": 0.17995508141493544, "grad_norm": 0.8202112317085266, "learning_rate": 5.996258185219832e-06, "loss": 0.5035, "step": 641 }, { "epoch": 0.180235822571589, "grad_norm": 0.8466426134109497, "learning_rate": 6.0056127221702535e-06, "loss": 0.5071, "step": 642 }, { "epoch": 0.18051656372824257, "grad_norm": 0.8519512414932251, "learning_rate": 6.014967259120674e-06, "loss": 0.4966, "step": 643 }, { "epoch": 0.18079730488489612, "grad_norm": 0.8277428150177002, "learning_rate": 6.024321796071096e-06, "loss": 0.4786, "step": 644 }, { "epoch": 0.1810780460415497, "grad_norm": 0.7670875787734985, "learning_rate": 6.033676333021516e-06, "loss": 0.5597, "step": 645 }, { "epoch": 0.18135878719820325, "grad_norm": 0.7683556079864502, "learning_rate": 6.043030869971936e-06, "loss": 0.4834, "step": 646 }, { "epoch": 0.18163952835485683, "grad_norm": 0.9911069273948669, "learning_rate": 6.0523854069223575e-06, "loss": 0.5175, "step": 647 }, { "epoch": 0.18192026951151039, "grad_norm": 1.0118271112442017, "learning_rate": 6.061739943872779e-06, "loss": 0.5203, "step": 648 }, { "epoch": 0.18220101066816397, "grad_norm": 0.7321862578392029, "learning_rate": 6.0710944808232e-06, "loss": 0.4718, "step": 649 }, { "epoch": 0.18248175182481752, "grad_norm": 0.8685011863708496, "learning_rate": 6.080449017773621e-06, "loss": 0.5076, "step": 650 }, { "epoch": 0.1827624929814711, "grad_norm": 0.8869417309761047, "learning_rate": 6.089803554724042e-06, "loss": 0.4809, "step": 651 }, { "epoch": 0.18304323413812465, "grad_norm": 0.8386750817298889, "learning_rate": 6.099158091674463e-06, "loss": 0.4401, "step": 652 }, { "epoch": 0.1833239752947782, "grad_norm": 0.9820486307144165, "learning_rate": 6.108512628624883e-06, "loss": 0.5499, "step": 653 }, { "epoch": 0.18360471645143178, "grad_norm": 0.8415496349334717, "learning_rate": 6.1178671655753045e-06, "loss": 0.4853, "step": 654 }, { "epoch": 0.18388545760808533, "grad_norm": 0.9291137456893921, "learning_rate": 6.127221702525725e-06, "loss": 0.5085, "step": 655 }, { "epoch": 0.18416619876473891, "grad_norm": 0.8530355095863342, "learning_rate": 6.136576239476147e-06, "loss": 0.5417, "step": 656 }, { "epoch": 0.18444693992139247, "grad_norm": 0.8005567193031311, "learning_rate": 6.145930776426567e-06, "loss": 0.4913, "step": 657 }, { "epoch": 0.18472768107804605, "grad_norm": 0.6735771298408508, "learning_rate": 6.155285313376989e-06, "loss": 0.4819, "step": 658 }, { "epoch": 0.1850084222346996, "grad_norm": 0.8707764148712158, "learning_rate": 6.164639850327409e-06, "loss": 0.5603, "step": 659 }, { "epoch": 0.18528916339135318, "grad_norm": 0.799589216709137, "learning_rate": 6.173994387277831e-06, "loss": 0.4453, "step": 660 }, { "epoch": 0.18556990454800673, "grad_norm": 0.8446917533874512, "learning_rate": 6.1833489242282506e-06, "loss": 0.4905, "step": 661 }, { "epoch": 0.1858506457046603, "grad_norm": 0.781111478805542, "learning_rate": 6.192703461178672e-06, "loss": 0.4821, "step": 662 }, { "epoch": 0.18613138686131386, "grad_norm": 0.8124823570251465, "learning_rate": 6.202057998129093e-06, "loss": 0.5284, "step": 663 }, { "epoch": 0.18641212801796744, "grad_norm": 0.8359686136245728, "learning_rate": 6.211412535079514e-06, "loss": 0.4538, "step": 664 }, { "epoch": 0.186692869174621, "grad_norm": 0.6956129670143127, "learning_rate": 6.220767072029935e-06, "loss": 0.5337, "step": 665 }, { "epoch": 0.18697361033127458, "grad_norm": 0.7677740454673767, "learning_rate": 6.230121608980356e-06, "loss": 0.4858, "step": 666 }, { "epoch": 0.18725435148792813, "grad_norm": 0.796335756778717, "learning_rate": 6.239476145930778e-06, "loss": 0.5479, "step": 667 }, { "epoch": 0.1875350926445817, "grad_norm": 0.6870970129966736, "learning_rate": 6.248830682881198e-06, "loss": 0.4741, "step": 668 }, { "epoch": 0.18781583380123526, "grad_norm": 0.674141526222229, "learning_rate": 6.258185219831618e-06, "loss": 0.4668, "step": 669 }, { "epoch": 0.18809657495788884, "grad_norm": 0.7898195385932922, "learning_rate": 6.26753975678204e-06, "loss": 0.486, "step": 670 }, { "epoch": 0.1883773161145424, "grad_norm": 0.8173038363456726, "learning_rate": 6.27689429373246e-06, "loss": 0.4851, "step": 671 }, { "epoch": 0.18865805727119594, "grad_norm": 0.6723657846450806, "learning_rate": 6.286248830682882e-06, "loss": 0.4558, "step": 672 }, { "epoch": 0.18893879842784952, "grad_norm": 0.7023763060569763, "learning_rate": 6.295603367633303e-06, "loss": 0.4698, "step": 673 }, { "epoch": 0.18921953958450308, "grad_norm": 0.6620508432388306, "learning_rate": 6.304957904583724e-06, "loss": 0.4724, "step": 674 }, { "epoch": 0.18950028074115666, "grad_norm": 0.7251138091087341, "learning_rate": 6.314312441534145e-06, "loss": 0.4584, "step": 675 }, { "epoch": 0.1897810218978102, "grad_norm": 0.8622398972511292, "learning_rate": 6.323666978484565e-06, "loss": 0.5493, "step": 676 }, { "epoch": 0.1900617630544638, "grad_norm": 0.8090211153030396, "learning_rate": 6.333021515434986e-06, "loss": 0.4458, "step": 677 }, { "epoch": 0.19034250421111734, "grad_norm": 0.80698162317276, "learning_rate": 6.342376052385407e-06, "loss": 0.5148, "step": 678 }, { "epoch": 0.19062324536777092, "grad_norm": 0.8406893014907837, "learning_rate": 6.351730589335829e-06, "loss": 0.4809, "step": 679 }, { "epoch": 0.19090398652442447, "grad_norm": 0.8074467182159424, "learning_rate": 6.361085126286249e-06, "loss": 0.513, "step": 680 }, { "epoch": 0.19118472768107805, "grad_norm": 0.7587609887123108, "learning_rate": 6.370439663236671e-06, "loss": 0.5189, "step": 681 }, { "epoch": 0.1914654688377316, "grad_norm": 0.7114958763122559, "learning_rate": 6.379794200187091e-06, "loss": 0.4952, "step": 682 }, { "epoch": 0.19174620999438519, "grad_norm": 0.8224939107894897, "learning_rate": 6.389148737137513e-06, "loss": 0.4878, "step": 683 }, { "epoch": 0.19202695115103874, "grad_norm": 0.749329686164856, "learning_rate": 6.398503274087933e-06, "loss": 0.478, "step": 684 }, { "epoch": 0.19230769230769232, "grad_norm": 0.7548496127128601, "learning_rate": 6.407857811038354e-06, "loss": 0.428, "step": 685 }, { "epoch": 0.19258843346434587, "grad_norm": 0.8775320053100586, "learning_rate": 6.417212347988775e-06, "loss": 0.5282, "step": 686 }, { "epoch": 0.19286917462099945, "grad_norm": 0.8071384429931641, "learning_rate": 6.426566884939196e-06, "loss": 0.4887, "step": 687 }, { "epoch": 0.193149915777653, "grad_norm": 0.7695320844650269, "learning_rate": 6.435921421889617e-06, "loss": 0.5031, "step": 688 }, { "epoch": 0.19343065693430658, "grad_norm": 0.8379629254341125, "learning_rate": 6.445275958840038e-06, "loss": 0.5068, "step": 689 }, { "epoch": 0.19371139809096014, "grad_norm": 0.8163147568702698, "learning_rate": 6.454630495790459e-06, "loss": 0.5065, "step": 690 }, { "epoch": 0.1939921392476137, "grad_norm": 0.8712754845619202, "learning_rate": 6.4639850327408804e-06, "loss": 0.5463, "step": 691 }, { "epoch": 0.19427288040426727, "grad_norm": 0.7699676156044006, "learning_rate": 6.4733395696913e-06, "loss": 0.5188, "step": 692 }, { "epoch": 0.19455362156092082, "grad_norm": 0.8428667187690735, "learning_rate": 6.482694106641722e-06, "loss": 0.4543, "step": 693 }, { "epoch": 0.1948343627175744, "grad_norm": 0.8588039875030518, "learning_rate": 6.492048643592142e-06, "loss": 0.4977, "step": 694 }, { "epoch": 0.19511510387422795, "grad_norm": 0.8210405707359314, "learning_rate": 6.501403180542564e-06, "loss": 0.4532, "step": 695 }, { "epoch": 0.19539584503088153, "grad_norm": 0.6708239912986755, "learning_rate": 6.5107577174929844e-06, "loss": 0.4776, "step": 696 }, { "epoch": 0.19567658618753508, "grad_norm": 0.6946752667427063, "learning_rate": 6.520112254443406e-06, "loss": 0.4976, "step": 697 }, { "epoch": 0.19595732734418866, "grad_norm": 0.7242135405540466, "learning_rate": 6.529466791393827e-06, "loss": 0.5051, "step": 698 }, { "epoch": 0.19623806850084222, "grad_norm": 0.8506885766983032, "learning_rate": 6.538821328344247e-06, "loss": 0.5328, "step": 699 }, { "epoch": 0.1965188096574958, "grad_norm": 0.7978003025054932, "learning_rate": 6.548175865294668e-06, "loss": 0.5217, "step": 700 }, { "epoch": 0.19679955081414935, "grad_norm": 0.771725058555603, "learning_rate": 6.557530402245089e-06, "loss": 0.4604, "step": 701 }, { "epoch": 0.19708029197080293, "grad_norm": 0.8751739859580994, "learning_rate": 6.56688493919551e-06, "loss": 0.5024, "step": 702 }, { "epoch": 0.19736103312745648, "grad_norm": 0.744221031665802, "learning_rate": 6.576239476145931e-06, "loss": 0.4438, "step": 703 }, { "epoch": 0.19764177428411006, "grad_norm": 0.7264887690544128, "learning_rate": 6.585594013096353e-06, "loss": 0.4674, "step": 704 }, { "epoch": 0.1979225154407636, "grad_norm": 0.7788257002830505, "learning_rate": 6.5949485500467735e-06, "loss": 0.532, "step": 705 }, { "epoch": 0.1982032565974172, "grad_norm": 0.8833885192871094, "learning_rate": 6.604303086997195e-06, "loss": 0.4563, "step": 706 }, { "epoch": 0.19848399775407075, "grad_norm": 0.7958397269248962, "learning_rate": 6.613657623947615e-06, "loss": 0.476, "step": 707 }, { "epoch": 0.19876473891072433, "grad_norm": 0.8469579219818115, "learning_rate": 6.623012160898036e-06, "loss": 0.5257, "step": 708 }, { "epoch": 0.19904548006737788, "grad_norm": 0.7643548250198364, "learning_rate": 6.632366697848457e-06, "loss": 0.4322, "step": 709 }, { "epoch": 0.19932622122403143, "grad_norm": 0.7553983926773071, "learning_rate": 6.641721234798878e-06, "loss": 0.5362, "step": 710 }, { "epoch": 0.199606962380685, "grad_norm": 0.8119388818740845, "learning_rate": 6.651075771749299e-06, "loss": 0.5136, "step": 711 }, { "epoch": 0.19988770353733856, "grad_norm": 0.7699570059776306, "learning_rate": 6.66043030869972e-06, "loss": 0.4586, "step": 712 }, { "epoch": 0.20016844469399214, "grad_norm": 0.8307033181190491, "learning_rate": 6.669784845650141e-06, "loss": 0.4792, "step": 713 }, { "epoch": 0.2004491858506457, "grad_norm": 0.7847017645835876, "learning_rate": 6.679139382600562e-06, "loss": 0.4868, "step": 714 }, { "epoch": 0.20072992700729927, "grad_norm": 0.7745780348777771, "learning_rate": 6.688493919550982e-06, "loss": 0.472, "step": 715 }, { "epoch": 0.20101066816395283, "grad_norm": 1.0744963884353638, "learning_rate": 6.697848456501404e-06, "loss": 0.4936, "step": 716 }, { "epoch": 0.2012914093206064, "grad_norm": 0.814922571182251, "learning_rate": 6.707202993451824e-06, "loss": 0.4654, "step": 717 }, { "epoch": 0.20157215047725996, "grad_norm": 0.7702688574790955, "learning_rate": 6.716557530402246e-06, "loss": 0.477, "step": 718 }, { "epoch": 0.20185289163391354, "grad_norm": 0.9415417313575745, "learning_rate": 6.7259120673526665e-06, "loss": 0.5186, "step": 719 }, { "epoch": 0.2021336327905671, "grad_norm": 0.8672822117805481, "learning_rate": 6.735266604303088e-06, "loss": 0.4938, "step": 720 }, { "epoch": 0.20241437394722067, "grad_norm": 0.8722926378250122, "learning_rate": 6.744621141253509e-06, "loss": 0.5423, "step": 721 }, { "epoch": 0.20269511510387422, "grad_norm": 0.7042394876480103, "learning_rate": 6.753975678203929e-06, "loss": 0.4621, "step": 722 }, { "epoch": 0.2029758562605278, "grad_norm": 0.7742260694503784, "learning_rate": 6.76333021515435e-06, "loss": 0.4619, "step": 723 }, { "epoch": 0.20325659741718136, "grad_norm": 0.7998137474060059, "learning_rate": 6.772684752104771e-06, "loss": 0.475, "step": 724 }, { "epoch": 0.20353733857383494, "grad_norm": 0.7976365089416504, "learning_rate": 6.782039289055192e-06, "loss": 0.4698, "step": 725 }, { "epoch": 0.2038180797304885, "grad_norm": 0.8283064365386963, "learning_rate": 6.7913938260056134e-06, "loss": 0.4838, "step": 726 }, { "epoch": 0.20409882088714207, "grad_norm": 0.7168331146240234, "learning_rate": 6.800748362956034e-06, "loss": 0.537, "step": 727 }, { "epoch": 0.20437956204379562, "grad_norm": 0.9389481544494629, "learning_rate": 6.8101028999064555e-06, "loss": 0.578, "step": 728 }, { "epoch": 0.20466030320044917, "grad_norm": 0.8230177760124207, "learning_rate": 6.819457436856877e-06, "loss": 0.5107, "step": 729 }, { "epoch": 0.20494104435710275, "grad_norm": 0.8442697525024414, "learning_rate": 6.828811973807297e-06, "loss": 0.4883, "step": 730 }, { "epoch": 0.2052217855137563, "grad_norm": 0.8169631958007812, "learning_rate": 6.8381665107577174e-06, "loss": 0.4912, "step": 731 }, { "epoch": 0.20550252667040989, "grad_norm": 0.7780888676643372, "learning_rate": 6.847521047708139e-06, "loss": 0.5034, "step": 732 }, { "epoch": 0.20578326782706344, "grad_norm": 0.9030914306640625, "learning_rate": 6.8568755846585595e-06, "loss": 0.5266, "step": 733 }, { "epoch": 0.20606400898371702, "grad_norm": 0.759200394153595, "learning_rate": 6.866230121608981e-06, "loss": 0.5234, "step": 734 }, { "epoch": 0.20634475014037057, "grad_norm": 0.7931351661682129, "learning_rate": 6.8755846585594025e-06, "loss": 0.456, "step": 735 }, { "epoch": 0.20662549129702415, "grad_norm": 0.8052343130111694, "learning_rate": 6.884939195509823e-06, "loss": 0.4853, "step": 736 }, { "epoch": 0.2069062324536777, "grad_norm": 0.7927553653717041, "learning_rate": 6.894293732460243e-06, "loss": 0.4991, "step": 737 }, { "epoch": 0.20718697361033128, "grad_norm": 0.9182446599006653, "learning_rate": 6.903648269410664e-06, "loss": 0.479, "step": 738 }, { "epoch": 0.20746771476698483, "grad_norm": 0.7966193556785583, "learning_rate": 6.913002806361086e-06, "loss": 0.5107, "step": 739 }, { "epoch": 0.20774845592363841, "grad_norm": 0.796349048614502, "learning_rate": 6.9223573433115065e-06, "loss": 0.5064, "step": 740 }, { "epoch": 0.20802919708029197, "grad_norm": 0.8751774430274963, "learning_rate": 6.931711880261928e-06, "loss": 0.4919, "step": 741 }, { "epoch": 0.20830993823694555, "grad_norm": 0.8394516706466675, "learning_rate": 6.941066417212349e-06, "loss": 0.4924, "step": 742 }, { "epoch": 0.2085906793935991, "grad_norm": 0.8675036430358887, "learning_rate": 6.95042095416277e-06, "loss": 0.4964, "step": 743 }, { "epoch": 0.20887142055025268, "grad_norm": 0.7031323909759521, "learning_rate": 6.959775491113191e-06, "loss": 0.4696, "step": 744 }, { "epoch": 0.20915216170690623, "grad_norm": 0.9879941940307617, "learning_rate": 6.969130028063611e-06, "loss": 0.4884, "step": 745 }, { "epoch": 0.2094329028635598, "grad_norm": 0.8013218641281128, "learning_rate": 6.978484565014032e-06, "loss": 0.46, "step": 746 }, { "epoch": 0.20971364402021336, "grad_norm": 0.7790707945823669, "learning_rate": 6.987839101964453e-06, "loss": 0.5173, "step": 747 }, { "epoch": 0.20999438517686692, "grad_norm": 0.8945547342300415, "learning_rate": 6.997193638914874e-06, "loss": 0.4542, "step": 748 }, { "epoch": 0.2102751263335205, "grad_norm": 0.8125542402267456, "learning_rate": 7.0065481758652955e-06, "loss": 0.5067, "step": 749 }, { "epoch": 0.21055586749017405, "grad_norm": 0.7749972939491272, "learning_rate": 7.015902712815716e-06, "loss": 0.4425, "step": 750 }, { "epoch": 0.21083660864682763, "grad_norm": 0.9507055282592773, "learning_rate": 7.025257249766138e-06, "loss": 0.5095, "step": 751 }, { "epoch": 0.21111734980348118, "grad_norm": 0.8765051364898682, "learning_rate": 7.034611786716558e-06, "loss": 0.5172, "step": 752 }, { "epoch": 0.21139809096013476, "grad_norm": 0.8438290953636169, "learning_rate": 7.043966323666979e-06, "loss": 0.4626, "step": 753 }, { "epoch": 0.2116788321167883, "grad_norm": 0.791168212890625, "learning_rate": 7.0533208606173995e-06, "loss": 0.5015, "step": 754 }, { "epoch": 0.2119595732734419, "grad_norm": 0.7929601073265076, "learning_rate": 7.062675397567821e-06, "loss": 0.5085, "step": 755 }, { "epoch": 0.21224031443009544, "grad_norm": 0.7710103392601013, "learning_rate": 7.072029934518242e-06, "loss": 0.4502, "step": 756 }, { "epoch": 0.21252105558674902, "grad_norm": 0.8204118013381958, "learning_rate": 7.081384471468663e-06, "loss": 0.4549, "step": 757 }, { "epoch": 0.21280179674340258, "grad_norm": 0.8469942808151245, "learning_rate": 7.090739008419084e-06, "loss": 0.4856, "step": 758 }, { "epoch": 0.21308253790005616, "grad_norm": 0.8816447854042053, "learning_rate": 7.100093545369505e-06, "loss": 0.5089, "step": 759 }, { "epoch": 0.2133632790567097, "grad_norm": 0.7904311418533325, "learning_rate": 7.109448082319925e-06, "loss": 0.4753, "step": 760 }, { "epoch": 0.2136440202133633, "grad_norm": 0.8346189260482788, "learning_rate": 7.1188026192703465e-06, "loss": 0.5039, "step": 761 }, { "epoch": 0.21392476137001684, "grad_norm": 0.94911789894104, "learning_rate": 7.128157156220767e-06, "loss": 0.5588, "step": 762 }, { "epoch": 0.21420550252667042, "grad_norm": 0.837386429309845, "learning_rate": 7.1375116931711886e-06, "loss": 0.4809, "step": 763 }, { "epoch": 0.21448624368332397, "grad_norm": 0.8271566033363342, "learning_rate": 7.14686623012161e-06, "loss": 0.4947, "step": 764 }, { "epoch": 0.21476698483997755, "grad_norm": 0.8610383868217468, "learning_rate": 7.156220767072031e-06, "loss": 0.4717, "step": 765 }, { "epoch": 0.2150477259966311, "grad_norm": 0.9502079486846924, "learning_rate": 7.165575304022452e-06, "loss": 0.4758, "step": 766 }, { "epoch": 0.21532846715328466, "grad_norm": 0.9139910936355591, "learning_rate": 7.174929840972873e-06, "loss": 0.5117, "step": 767 }, { "epoch": 0.21560920830993824, "grad_norm": 0.809079647064209, "learning_rate": 7.1842843779232925e-06, "loss": 0.4904, "step": 768 }, { "epoch": 0.2158899494665918, "grad_norm": 0.8286628723144531, "learning_rate": 7.193638914873714e-06, "loss": 0.4818, "step": 769 }, { "epoch": 0.21617069062324537, "grad_norm": 0.8030611872673035, "learning_rate": 7.2029934518241355e-06, "loss": 0.4776, "step": 770 }, { "epoch": 0.21645143177989892, "grad_norm": 0.7689762115478516, "learning_rate": 7.212347988774556e-06, "loss": 0.4948, "step": 771 }, { "epoch": 0.2167321729365525, "grad_norm": 0.9789795875549316, "learning_rate": 7.221702525724978e-06, "loss": 0.4869, "step": 772 }, { "epoch": 0.21701291409320606, "grad_norm": 0.8280055522918701, "learning_rate": 7.231057062675398e-06, "loss": 0.5029, "step": 773 }, { "epoch": 0.21729365524985964, "grad_norm": 0.8097561001777649, "learning_rate": 7.24041159962582e-06, "loss": 0.4568, "step": 774 }, { "epoch": 0.2175743964065132, "grad_norm": 0.8653045892715454, "learning_rate": 7.24976613657624e-06, "loss": 0.4876, "step": 775 }, { "epoch": 0.21785513756316677, "grad_norm": 0.8296322822570801, "learning_rate": 7.259120673526661e-06, "loss": 0.558, "step": 776 }, { "epoch": 0.21813587871982032, "grad_norm": 0.7704964280128479, "learning_rate": 7.268475210477082e-06, "loss": 0.4396, "step": 777 }, { "epoch": 0.2184166198764739, "grad_norm": 0.8957526683807373, "learning_rate": 7.277829747427503e-06, "loss": 0.5055, "step": 778 }, { "epoch": 0.21869736103312745, "grad_norm": 0.7658538222312927, "learning_rate": 7.287184284377924e-06, "loss": 0.4509, "step": 779 }, { "epoch": 0.21897810218978103, "grad_norm": 0.7775703072547913, "learning_rate": 7.296538821328345e-06, "loss": 0.4828, "step": 780 }, { "epoch": 0.21925884334643458, "grad_norm": 0.751159131526947, "learning_rate": 7.305893358278766e-06, "loss": 0.486, "step": 781 }, { "epoch": 0.21953958450308816, "grad_norm": 0.8056408762931824, "learning_rate": 7.315247895229187e-06, "loss": 0.4552, "step": 782 }, { "epoch": 0.21982032565974172, "grad_norm": 0.703741729259491, "learning_rate": 7.324602432179607e-06, "loss": 0.4853, "step": 783 }, { "epoch": 0.2201010668163953, "grad_norm": 0.8325309753417969, "learning_rate": 7.3339569691300285e-06, "loss": 0.484, "step": 784 }, { "epoch": 0.22038180797304885, "grad_norm": 0.7223569750785828, "learning_rate": 7.343311506080449e-06, "loss": 0.5043, "step": 785 }, { "epoch": 0.2206625491297024, "grad_norm": 0.7619519233703613, "learning_rate": 7.352666043030871e-06, "loss": 0.4649, "step": 786 }, { "epoch": 0.22094329028635598, "grad_norm": 0.9175875782966614, "learning_rate": 7.362020579981291e-06, "loss": 0.5978, "step": 787 }, { "epoch": 0.22122403144300953, "grad_norm": 0.7340946197509766, "learning_rate": 7.371375116931713e-06, "loss": 0.4801, "step": 788 }, { "epoch": 0.2215047725996631, "grad_norm": 0.7784373164176941, "learning_rate": 7.380729653882134e-06, "loss": 0.5125, "step": 789 }, { "epoch": 0.22178551375631667, "grad_norm": 0.8114541172981262, "learning_rate": 7.390084190832555e-06, "loss": 0.4761, "step": 790 }, { "epoch": 0.22206625491297025, "grad_norm": 0.9015792608261108, "learning_rate": 7.399438727782975e-06, "loss": 0.5065, "step": 791 }, { "epoch": 0.2223469960696238, "grad_norm": 0.7953155040740967, "learning_rate": 7.408793264733396e-06, "loss": 0.5121, "step": 792 }, { "epoch": 0.22262773722627738, "grad_norm": 0.7775832414627075, "learning_rate": 7.418147801683817e-06, "loss": 0.4871, "step": 793 }, { "epoch": 0.22290847838293093, "grad_norm": 0.8492860794067383, "learning_rate": 7.427502338634238e-06, "loss": 0.5279, "step": 794 }, { "epoch": 0.2231892195395845, "grad_norm": 0.8550410866737366, "learning_rate": 7.43685687558466e-06, "loss": 0.4912, "step": 795 }, { "epoch": 0.22346996069623806, "grad_norm": 0.8710159063339233, "learning_rate": 7.44621141253508e-06, "loss": 0.5149, "step": 796 }, { "epoch": 0.22375070185289164, "grad_norm": 0.7900233268737793, "learning_rate": 7.455565949485502e-06, "loss": 0.4885, "step": 797 }, { "epoch": 0.2240314430095452, "grad_norm": 0.7633907198905945, "learning_rate": 7.4649204864359216e-06, "loss": 0.4923, "step": 798 }, { "epoch": 0.22431218416619877, "grad_norm": 0.9308202266693115, "learning_rate": 7.474275023386342e-06, "loss": 0.4673, "step": 799 }, { "epoch": 0.22459292532285233, "grad_norm": 0.8563190698623657, "learning_rate": 7.483629560336764e-06, "loss": 0.501, "step": 800 }, { "epoch": 0.2248736664795059, "grad_norm": 0.8566862344741821, "learning_rate": 7.492984097287185e-06, "loss": 0.527, "step": 801 }, { "epoch": 0.22515440763615946, "grad_norm": 0.7848474979400635, "learning_rate": 7.502338634237606e-06, "loss": 0.4928, "step": 802 }, { "epoch": 0.22543514879281304, "grad_norm": 0.7709822654724121, "learning_rate": 7.511693171188027e-06, "loss": 0.4791, "step": 803 }, { "epoch": 0.2257158899494666, "grad_norm": 0.8138049840927124, "learning_rate": 7.521047708138448e-06, "loss": 0.4524, "step": 804 }, { "epoch": 0.22599663110612014, "grad_norm": 0.7268965840339661, "learning_rate": 7.530402245088869e-06, "loss": 0.478, "step": 805 }, { "epoch": 0.22627737226277372, "grad_norm": 0.7944256663322449, "learning_rate": 7.539756782039289e-06, "loss": 0.4657, "step": 806 }, { "epoch": 0.22655811341942728, "grad_norm": 0.7516258955001831, "learning_rate": 7.549111318989711e-06, "loss": 0.4577, "step": 807 }, { "epoch": 0.22683885457608086, "grad_norm": 0.8455751538276672, "learning_rate": 7.558465855940131e-06, "loss": 0.4604, "step": 808 }, { "epoch": 0.2271195957327344, "grad_norm": 0.9007487893104553, "learning_rate": 7.567820392890553e-06, "loss": 0.5352, "step": 809 }, { "epoch": 0.227400336889388, "grad_norm": 0.8126270771026611, "learning_rate": 7.577174929840973e-06, "loss": 0.471, "step": 810 }, { "epoch": 0.22768107804604154, "grad_norm": 0.8163447380065918, "learning_rate": 7.586529466791395e-06, "loss": 0.4879, "step": 811 }, { "epoch": 0.22796181920269512, "grad_norm": 0.783903956413269, "learning_rate": 7.5958840037418154e-06, "loss": 0.5218, "step": 812 }, { "epoch": 0.22824256035934867, "grad_norm": 0.7607461214065552, "learning_rate": 7.605238540692237e-06, "loss": 0.4878, "step": 813 }, { "epoch": 0.22852330151600225, "grad_norm": 0.8193497657775879, "learning_rate": 7.614593077642657e-06, "loss": 0.4666, "step": 814 }, { "epoch": 0.2288040426726558, "grad_norm": 0.8364272713661194, "learning_rate": 7.623947614593078e-06, "loss": 0.4932, "step": 815 }, { "epoch": 0.22908478382930939, "grad_norm": 0.6981427073478699, "learning_rate": 7.633302151543499e-06, "loss": 0.4862, "step": 816 }, { "epoch": 0.22936552498596294, "grad_norm": 0.8305123448371887, "learning_rate": 7.64265668849392e-06, "loss": 0.5026, "step": 817 }, { "epoch": 0.22964626614261652, "grad_norm": 0.9503836631774902, "learning_rate": 7.652011225444342e-06, "loss": 0.4959, "step": 818 }, { "epoch": 0.22992700729927007, "grad_norm": 0.8699612021446228, "learning_rate": 7.661365762394762e-06, "loss": 0.4976, "step": 819 }, { "epoch": 0.23020774845592365, "grad_norm": 0.6767401695251465, "learning_rate": 7.670720299345183e-06, "loss": 0.4245, "step": 820 }, { "epoch": 0.2304884896125772, "grad_norm": 0.8819581866264343, "learning_rate": 7.680074836295604e-06, "loss": 0.5003, "step": 821 }, { "epoch": 0.23076923076923078, "grad_norm": 0.9521064758300781, "learning_rate": 7.689429373246024e-06, "loss": 0.4196, "step": 822 }, { "epoch": 0.23104997192588433, "grad_norm": 0.8548203706741333, "learning_rate": 7.698783910196445e-06, "loss": 0.5253, "step": 823 }, { "epoch": 0.2313307130825379, "grad_norm": 0.784529447555542, "learning_rate": 7.708138447146867e-06, "loss": 0.4748, "step": 824 }, { "epoch": 0.23161145423919147, "grad_norm": 0.8814212083816528, "learning_rate": 7.717492984097288e-06, "loss": 0.4517, "step": 825 }, { "epoch": 0.23189219539584502, "grad_norm": 0.8412429690361023, "learning_rate": 7.726847521047708e-06, "loss": 0.4789, "step": 826 }, { "epoch": 0.2321729365524986, "grad_norm": 0.7028629779815674, "learning_rate": 7.73620205799813e-06, "loss": 0.5404, "step": 827 }, { "epoch": 0.23245367770915215, "grad_norm": 0.8391116261482239, "learning_rate": 7.745556594948551e-06, "loss": 0.4269, "step": 828 }, { "epoch": 0.23273441886580573, "grad_norm": 0.7596981525421143, "learning_rate": 7.75491113189897e-06, "loss": 0.4879, "step": 829 }, { "epoch": 0.23301516002245928, "grad_norm": 0.8645800948143005, "learning_rate": 7.764265668849393e-06, "loss": 0.4648, "step": 830 }, { "epoch": 0.23329590117911286, "grad_norm": 1.0553699731826782, "learning_rate": 7.773620205799813e-06, "loss": 0.5485, "step": 831 }, { "epoch": 0.23357664233576642, "grad_norm": 0.9821456670761108, "learning_rate": 7.782974742750234e-06, "loss": 0.557, "step": 832 }, { "epoch": 0.23385738349242, "grad_norm": 0.8769895434379578, "learning_rate": 7.792329279700656e-06, "loss": 0.509, "step": 833 }, { "epoch": 0.23413812464907355, "grad_norm": 0.9701917767524719, "learning_rate": 7.801683816651077e-06, "loss": 0.5332, "step": 834 }, { "epoch": 0.23441886580572713, "grad_norm": 0.9042077660560608, "learning_rate": 7.811038353601498e-06, "loss": 0.4989, "step": 835 }, { "epoch": 0.23469960696238068, "grad_norm": 0.863466739654541, "learning_rate": 7.820392890551918e-06, "loss": 0.4811, "step": 836 }, { "epoch": 0.23498034811903426, "grad_norm": 0.7645045518875122, "learning_rate": 7.829747427502339e-06, "loss": 0.4729, "step": 837 }, { "epoch": 0.2352610892756878, "grad_norm": 0.8438298106193542, "learning_rate": 7.83910196445276e-06, "loss": 0.46, "step": 838 }, { "epoch": 0.2355418304323414, "grad_norm": 0.849493145942688, "learning_rate": 7.848456501403182e-06, "loss": 0.4868, "step": 839 }, { "epoch": 0.23582257158899494, "grad_norm": 0.9705619812011719, "learning_rate": 7.857811038353602e-06, "loss": 0.4465, "step": 840 }, { "epoch": 0.23610331274564852, "grad_norm": 0.8479672074317932, "learning_rate": 7.867165575304023e-06, "loss": 0.5, "step": 841 }, { "epoch": 0.23638405390230208, "grad_norm": 0.7416703701019287, "learning_rate": 7.876520112254444e-06, "loss": 0.5244, "step": 842 }, { "epoch": 0.23666479505895563, "grad_norm": 0.7811509370803833, "learning_rate": 7.885874649204866e-06, "loss": 0.5247, "step": 843 }, { "epoch": 0.2369455362156092, "grad_norm": 0.7777257561683655, "learning_rate": 7.895229186155285e-06, "loss": 0.4863, "step": 844 }, { "epoch": 0.23722627737226276, "grad_norm": 0.7654300928115845, "learning_rate": 7.904583723105707e-06, "loss": 0.4927, "step": 845 }, { "epoch": 0.23750701852891634, "grad_norm": 0.7922725081443787, "learning_rate": 7.913938260056128e-06, "loss": 0.4634, "step": 846 }, { "epoch": 0.2377877596855699, "grad_norm": 0.7853131294250488, "learning_rate": 7.923292797006548e-06, "loss": 0.4677, "step": 847 }, { "epoch": 0.23806850084222347, "grad_norm": 0.9648452997207642, "learning_rate": 7.932647333956969e-06, "loss": 0.4835, "step": 848 }, { "epoch": 0.23834924199887703, "grad_norm": 0.7550539374351501, "learning_rate": 7.942001870907391e-06, "loss": 0.4879, "step": 849 }, { "epoch": 0.2386299831555306, "grad_norm": 0.8540953397750854, "learning_rate": 7.951356407857812e-06, "loss": 0.49, "step": 850 }, { "epoch": 0.23891072431218416, "grad_norm": 0.9588361978530884, "learning_rate": 7.960710944808233e-06, "loss": 0.4817, "step": 851 }, { "epoch": 0.23919146546883774, "grad_norm": 0.7889480590820312, "learning_rate": 7.970065481758653e-06, "loss": 0.4509, "step": 852 }, { "epoch": 0.2394722066254913, "grad_norm": 0.9453020095825195, "learning_rate": 7.979420018709074e-06, "loss": 0.5955, "step": 853 }, { "epoch": 0.23975294778214487, "grad_norm": 0.9234386682510376, "learning_rate": 7.988774555659495e-06, "loss": 0.4713, "step": 854 }, { "epoch": 0.24003368893879842, "grad_norm": 0.8285678625106812, "learning_rate": 7.998129092609917e-06, "loss": 0.4857, "step": 855 }, { "epoch": 0.240314430095452, "grad_norm": 0.7617980241775513, "learning_rate": 8.007483629560337e-06, "loss": 0.451, "step": 856 }, { "epoch": 0.24059517125210556, "grad_norm": 0.9261062145233154, "learning_rate": 8.016838166510758e-06, "loss": 0.5317, "step": 857 }, { "epoch": 0.24087591240875914, "grad_norm": 0.9025489091873169, "learning_rate": 8.02619270346118e-06, "loss": 0.4408, "step": 858 }, { "epoch": 0.2411566535654127, "grad_norm": 0.8039227724075317, "learning_rate": 8.035547240411601e-06, "loss": 0.4828, "step": 859 }, { "epoch": 0.24143739472206627, "grad_norm": 0.8967242240905762, "learning_rate": 8.04490177736202e-06, "loss": 0.5024, "step": 860 }, { "epoch": 0.24171813587871982, "grad_norm": 0.8802300095558167, "learning_rate": 8.054256314312442e-06, "loss": 0.5036, "step": 861 }, { "epoch": 0.24199887703537337, "grad_norm": 0.8861883878707886, "learning_rate": 8.063610851262863e-06, "loss": 0.5273, "step": 862 }, { "epoch": 0.24227961819202695, "grad_norm": 0.7743239998817444, "learning_rate": 8.072965388213284e-06, "loss": 0.4993, "step": 863 }, { "epoch": 0.2425603593486805, "grad_norm": 0.7340776920318604, "learning_rate": 8.082319925163706e-06, "loss": 0.4608, "step": 864 }, { "epoch": 0.24284110050533408, "grad_norm": 0.7196219563484192, "learning_rate": 8.091674462114127e-06, "loss": 0.4381, "step": 865 }, { "epoch": 0.24312184166198764, "grad_norm": 0.7719977498054504, "learning_rate": 8.101028999064547e-06, "loss": 0.4581, "step": 866 }, { "epoch": 0.24340258281864122, "grad_norm": 0.9021661877632141, "learning_rate": 8.110383536014968e-06, "loss": 0.4692, "step": 867 }, { "epoch": 0.24368332397529477, "grad_norm": 0.7454041242599487, "learning_rate": 8.119738072965388e-06, "loss": 0.4559, "step": 868 }, { "epoch": 0.24396406513194835, "grad_norm": 0.8102445602416992, "learning_rate": 8.129092609915809e-06, "loss": 0.5006, "step": 869 }, { "epoch": 0.2442448062886019, "grad_norm": 0.7215291261672974, "learning_rate": 8.138447146866231e-06, "loss": 0.4426, "step": 870 }, { "epoch": 0.24452554744525548, "grad_norm": 0.7336854934692383, "learning_rate": 8.147801683816652e-06, "loss": 0.4685, "step": 871 }, { "epoch": 0.24480628860190903, "grad_norm": 0.8260940909385681, "learning_rate": 8.157156220767073e-06, "loss": 0.5054, "step": 872 }, { "epoch": 0.2450870297585626, "grad_norm": 0.8100261092185974, "learning_rate": 8.166510757717493e-06, "loss": 0.5025, "step": 873 }, { "epoch": 0.24536777091521617, "grad_norm": 0.6854899525642395, "learning_rate": 8.175865294667916e-06, "loss": 0.5013, "step": 874 }, { "epoch": 0.24564851207186975, "grad_norm": 0.7612625360488892, "learning_rate": 8.185219831618335e-06, "loss": 0.4197, "step": 875 }, { "epoch": 0.2459292532285233, "grad_norm": 0.775981068611145, "learning_rate": 8.194574368568757e-06, "loss": 0.4416, "step": 876 }, { "epoch": 0.24620999438517688, "grad_norm": 0.8787760734558105, "learning_rate": 8.203928905519177e-06, "loss": 0.496, "step": 877 }, { "epoch": 0.24649073554183043, "grad_norm": 0.8954545259475708, "learning_rate": 8.213283442469598e-06, "loss": 0.5222, "step": 878 }, { "epoch": 0.246771476698484, "grad_norm": 0.7083318829536438, "learning_rate": 8.222637979420019e-06, "loss": 0.4849, "step": 879 }, { "epoch": 0.24705221785513756, "grad_norm": 0.7826118469238281, "learning_rate": 8.231992516370441e-06, "loss": 0.4799, "step": 880 }, { "epoch": 0.24733295901179111, "grad_norm": 0.7675488591194153, "learning_rate": 8.241347053320862e-06, "loss": 0.5113, "step": 881 }, { "epoch": 0.2476137001684447, "grad_norm": 0.8048291206359863, "learning_rate": 8.250701590271282e-06, "loss": 0.5162, "step": 882 }, { "epoch": 0.24789444132509825, "grad_norm": 0.8667749166488647, "learning_rate": 8.260056127221703e-06, "loss": 0.4802, "step": 883 }, { "epoch": 0.24817518248175183, "grad_norm": 0.8236709237098694, "learning_rate": 8.269410664172124e-06, "loss": 0.4491, "step": 884 }, { "epoch": 0.24845592363840538, "grad_norm": 0.7885945439338684, "learning_rate": 8.278765201122544e-06, "loss": 0.472, "step": 885 }, { "epoch": 0.24873666479505896, "grad_norm": 0.8309069275856018, "learning_rate": 8.288119738072967e-06, "loss": 0.4879, "step": 886 }, { "epoch": 0.2490174059517125, "grad_norm": 0.9352370500564575, "learning_rate": 8.297474275023387e-06, "loss": 0.5002, "step": 887 }, { "epoch": 0.2492981471083661, "grad_norm": 0.9705796837806702, "learning_rate": 8.306828811973808e-06, "loss": 0.5102, "step": 888 }, { "epoch": 0.24957888826501964, "grad_norm": 0.7391785383224487, "learning_rate": 8.31618334892423e-06, "loss": 0.4565, "step": 889 }, { "epoch": 0.24985962942167322, "grad_norm": 0.859291136264801, "learning_rate": 8.325537885874649e-06, "loss": 0.4779, "step": 890 }, { "epoch": 0.2501403705783268, "grad_norm": 0.7881708145141602, "learning_rate": 8.33489242282507e-06, "loss": 0.4949, "step": 891 }, { "epoch": 0.25042111173498033, "grad_norm": 0.8595026731491089, "learning_rate": 8.344246959775492e-06, "loss": 0.478, "step": 892 }, { "epoch": 0.25070185289163394, "grad_norm": 0.7372920513153076, "learning_rate": 8.353601496725913e-06, "loss": 0.5033, "step": 893 }, { "epoch": 0.2509825940482875, "grad_norm": 0.7447819113731384, "learning_rate": 8.362956033676333e-06, "loss": 0.4458, "step": 894 }, { "epoch": 0.25126333520494104, "grad_norm": 0.9821788668632507, "learning_rate": 8.372310570626756e-06, "loss": 0.5061, "step": 895 }, { "epoch": 0.2515440763615946, "grad_norm": 0.7504124045372009, "learning_rate": 8.381665107577176e-06, "loss": 0.4628, "step": 896 }, { "epoch": 0.2518248175182482, "grad_norm": 0.7859714031219482, "learning_rate": 8.391019644527597e-06, "loss": 0.4936, "step": 897 }, { "epoch": 0.25210555867490175, "grad_norm": 0.8116353154182434, "learning_rate": 8.400374181478017e-06, "loss": 0.4905, "step": 898 }, { "epoch": 0.2523862998315553, "grad_norm": 0.7412129044532776, "learning_rate": 8.409728718428438e-06, "loss": 0.4779, "step": 899 }, { "epoch": 0.25266704098820886, "grad_norm": 0.77248615026474, "learning_rate": 8.419083255378859e-06, "loss": 0.4896, "step": 900 }, { "epoch": 0.2529477821448624, "grad_norm": 0.7527933716773987, "learning_rate": 8.428437792329281e-06, "loss": 0.4932, "step": 901 }, { "epoch": 0.253228523301516, "grad_norm": 0.7819905281066895, "learning_rate": 8.437792329279702e-06, "loss": 0.4309, "step": 902 }, { "epoch": 0.25350926445816957, "grad_norm": 0.7580601572990417, "learning_rate": 8.447146866230122e-06, "loss": 0.5103, "step": 903 }, { "epoch": 0.2537900056148231, "grad_norm": 0.7849022150039673, "learning_rate": 8.456501403180543e-06, "loss": 0.4792, "step": 904 }, { "epoch": 0.2540707467714767, "grad_norm": 0.746990442276001, "learning_rate": 8.465855940130964e-06, "loss": 0.473, "step": 905 }, { "epoch": 0.2543514879281303, "grad_norm": 0.7876943945884705, "learning_rate": 8.475210477081384e-06, "loss": 0.4204, "step": 906 }, { "epoch": 0.25463222908478383, "grad_norm": 0.8317561149597168, "learning_rate": 8.484565014031806e-06, "loss": 0.5359, "step": 907 }, { "epoch": 0.2549129702414374, "grad_norm": 0.7951239943504333, "learning_rate": 8.493919550982227e-06, "loss": 0.4973, "step": 908 }, { "epoch": 0.25519371139809094, "grad_norm": 0.7526887655258179, "learning_rate": 8.503274087932648e-06, "loss": 0.489, "step": 909 }, { "epoch": 0.25547445255474455, "grad_norm": 0.8517107963562012, "learning_rate": 8.512628624883068e-06, "loss": 0.4819, "step": 910 }, { "epoch": 0.2557551937113981, "grad_norm": 0.7782130241394043, "learning_rate": 8.52198316183349e-06, "loss": 0.5018, "step": 911 }, { "epoch": 0.25603593486805165, "grad_norm": 0.8412166833877563, "learning_rate": 8.531337698783911e-06, "loss": 0.5138, "step": 912 }, { "epoch": 0.2563166760247052, "grad_norm": 0.7934753894805908, "learning_rate": 8.540692235734332e-06, "loss": 0.4733, "step": 913 }, { "epoch": 0.2565974171813588, "grad_norm": 0.7621031403541565, "learning_rate": 8.550046772684753e-06, "loss": 0.4957, "step": 914 }, { "epoch": 0.25687815833801236, "grad_norm": 0.7486236691474915, "learning_rate": 8.559401309635173e-06, "loss": 0.5315, "step": 915 }, { "epoch": 0.2571588994946659, "grad_norm": 0.7432385683059692, "learning_rate": 8.568755846585594e-06, "loss": 0.4396, "step": 916 }, { "epoch": 0.25743964065131947, "grad_norm": 0.9275770783424377, "learning_rate": 8.578110383536016e-06, "loss": 0.4905, "step": 917 }, { "epoch": 0.2577203818079731, "grad_norm": 0.7362551689147949, "learning_rate": 8.587464920486437e-06, "loss": 0.425, "step": 918 }, { "epoch": 0.25800112296462663, "grad_norm": 0.8446869254112244, "learning_rate": 8.596819457436857e-06, "loss": 0.5109, "step": 919 }, { "epoch": 0.2582818641212802, "grad_norm": 1.0435951948165894, "learning_rate": 8.60617399438728e-06, "loss": 0.5141, "step": 920 }, { "epoch": 0.25856260527793373, "grad_norm": 0.8806620240211487, "learning_rate": 8.615528531337699e-06, "loss": 0.4957, "step": 921 }, { "epoch": 0.2588433464345873, "grad_norm": 0.8784282207489014, "learning_rate": 8.624883068288121e-06, "loss": 0.4836, "step": 922 }, { "epoch": 0.2591240875912409, "grad_norm": 0.9344772100448608, "learning_rate": 8.634237605238542e-06, "loss": 0.5401, "step": 923 }, { "epoch": 0.25940482874789444, "grad_norm": 0.885509192943573, "learning_rate": 8.643592142188962e-06, "loss": 0.4499, "step": 924 }, { "epoch": 0.259685569904548, "grad_norm": 0.7840343713760376, "learning_rate": 8.652946679139383e-06, "loss": 0.4851, "step": 925 }, { "epoch": 0.25996631106120155, "grad_norm": 0.8780069351196289, "learning_rate": 8.662301216089805e-06, "loss": 0.4678, "step": 926 }, { "epoch": 0.26024705221785516, "grad_norm": 0.7959757447242737, "learning_rate": 8.671655753040226e-06, "loss": 0.4463, "step": 927 }, { "epoch": 0.2605277933745087, "grad_norm": 0.7591774463653564, "learning_rate": 8.681010289990646e-06, "loss": 0.4804, "step": 928 }, { "epoch": 0.26080853453116226, "grad_norm": 0.9297831058502197, "learning_rate": 8.690364826941067e-06, "loss": 0.5108, "step": 929 }, { "epoch": 0.2610892756878158, "grad_norm": 0.7070633769035339, "learning_rate": 8.699719363891488e-06, "loss": 0.434, "step": 930 }, { "epoch": 0.2613700168444694, "grad_norm": 0.903701901435852, "learning_rate": 8.709073900841908e-06, "loss": 0.525, "step": 931 }, { "epoch": 0.261650758001123, "grad_norm": 0.7866299152374268, "learning_rate": 8.71842843779233e-06, "loss": 0.4782, "step": 932 }, { "epoch": 0.2619314991577765, "grad_norm": 0.6943819522857666, "learning_rate": 8.727782974742751e-06, "loss": 0.4678, "step": 933 }, { "epoch": 0.2622122403144301, "grad_norm": 0.845110297203064, "learning_rate": 8.737137511693172e-06, "loss": 0.5255, "step": 934 }, { "epoch": 0.2624929814710837, "grad_norm": 0.7377384901046753, "learning_rate": 8.746492048643593e-06, "loss": 0.5018, "step": 935 }, { "epoch": 0.26277372262773724, "grad_norm": 0.8059272766113281, "learning_rate": 8.755846585594013e-06, "loss": 0.4423, "step": 936 }, { "epoch": 0.2630544637843908, "grad_norm": 0.7214736342430115, "learning_rate": 8.765201122544434e-06, "loss": 0.4988, "step": 937 }, { "epoch": 0.26333520494104434, "grad_norm": 0.7720814347267151, "learning_rate": 8.774555659494856e-06, "loss": 0.5019, "step": 938 }, { "epoch": 0.2636159460976979, "grad_norm": 0.776086151599884, "learning_rate": 8.783910196445277e-06, "loss": 0.4787, "step": 939 }, { "epoch": 0.2638966872543515, "grad_norm": 0.7871274948120117, "learning_rate": 8.793264733395697e-06, "loss": 0.5098, "step": 940 }, { "epoch": 0.26417742841100506, "grad_norm": 0.8371344208717346, "learning_rate": 8.802619270346118e-06, "loss": 0.4995, "step": 941 }, { "epoch": 0.2644581695676586, "grad_norm": 1.0325953960418701, "learning_rate": 8.81197380729654e-06, "loss": 0.5568, "step": 942 }, { "epoch": 0.26473891072431216, "grad_norm": 0.8071467876434326, "learning_rate": 8.821328344246961e-06, "loss": 0.4658, "step": 943 }, { "epoch": 0.26501965188096577, "grad_norm": 0.7698601484298706, "learning_rate": 8.830682881197382e-06, "loss": 0.4532, "step": 944 }, { "epoch": 0.2653003930376193, "grad_norm": 0.9465731382369995, "learning_rate": 8.840037418147802e-06, "loss": 0.4382, "step": 945 }, { "epoch": 0.26558113419427287, "grad_norm": 1.0514284372329712, "learning_rate": 8.849391955098223e-06, "loss": 0.4715, "step": 946 }, { "epoch": 0.2658618753509264, "grad_norm": 0.8537533283233643, "learning_rate": 8.858746492048645e-06, "loss": 0.497, "step": 947 }, { "epoch": 0.26614261650758003, "grad_norm": 0.9736228585243225, "learning_rate": 8.868101028999066e-06, "loss": 0.4909, "step": 948 }, { "epoch": 0.2664233576642336, "grad_norm": 0.8221850991249084, "learning_rate": 8.877455565949486e-06, "loss": 0.4597, "step": 949 }, { "epoch": 0.26670409882088714, "grad_norm": 1.005744218826294, "learning_rate": 8.886810102899907e-06, "loss": 0.4813, "step": 950 }, { "epoch": 0.2669848399775407, "grad_norm": 0.8730907440185547, "learning_rate": 8.896164639850328e-06, "loss": 0.4546, "step": 951 }, { "epoch": 0.2672655811341943, "grad_norm": 0.8560677766799927, "learning_rate": 8.905519176800748e-06, "loss": 0.4836, "step": 952 }, { "epoch": 0.26754632229084785, "grad_norm": 0.8395571112632751, "learning_rate": 8.91487371375117e-06, "loss": 0.4865, "step": 953 }, { "epoch": 0.2678270634475014, "grad_norm": 0.8776417374610901, "learning_rate": 8.924228250701591e-06, "loss": 0.4915, "step": 954 }, { "epoch": 0.26810780460415495, "grad_norm": 0.9050156474113464, "learning_rate": 8.933582787652012e-06, "loss": 0.4671, "step": 955 }, { "epoch": 0.26838854576080856, "grad_norm": 0.7902247309684753, "learning_rate": 8.942937324602433e-06, "loss": 0.4976, "step": 956 }, { "epoch": 0.2686692869174621, "grad_norm": 0.7983662486076355, "learning_rate": 8.952291861552855e-06, "loss": 0.512, "step": 957 }, { "epoch": 0.26895002807411567, "grad_norm": 0.813231348991394, "learning_rate": 8.961646398503275e-06, "loss": 0.45, "step": 958 }, { "epoch": 0.2692307692307692, "grad_norm": 0.8848547339439392, "learning_rate": 8.971000935453696e-06, "loss": 0.4848, "step": 959 }, { "epoch": 0.26951151038742277, "grad_norm": 0.8239362835884094, "learning_rate": 8.980355472404117e-06, "loss": 0.4623, "step": 960 }, { "epoch": 0.2697922515440764, "grad_norm": 0.9324841499328613, "learning_rate": 8.989710009354537e-06, "loss": 0.5013, "step": 961 }, { "epoch": 0.27007299270072993, "grad_norm": 0.871340811252594, "learning_rate": 8.999064546304958e-06, "loss": 0.4507, "step": 962 }, { "epoch": 0.2703537338573835, "grad_norm": 0.9839334487915039, "learning_rate": 9.00841908325538e-06, "loss": 0.5147, "step": 963 }, { "epoch": 0.27063447501403703, "grad_norm": 0.9232575297355652, "learning_rate": 9.017773620205801e-06, "loss": 0.4329, "step": 964 }, { "epoch": 0.27091521617069064, "grad_norm": 1.1634669303894043, "learning_rate": 9.027128157156222e-06, "loss": 0.5702, "step": 965 }, { "epoch": 0.2711959573273442, "grad_norm": 0.8469628691673279, "learning_rate": 9.036482694106642e-06, "loss": 0.4695, "step": 966 }, { "epoch": 0.27147669848399775, "grad_norm": 0.8922038078308105, "learning_rate": 9.045837231057063e-06, "loss": 0.4382, "step": 967 }, { "epoch": 0.2717574396406513, "grad_norm": 0.8791254162788391, "learning_rate": 9.055191768007483e-06, "loss": 0.4659, "step": 968 }, { "epoch": 0.2720381807973049, "grad_norm": 0.839027464389801, "learning_rate": 9.064546304957906e-06, "loss": 0.4501, "step": 969 }, { "epoch": 0.27231892195395846, "grad_norm": 0.9237929582595825, "learning_rate": 9.073900841908326e-06, "loss": 0.4699, "step": 970 }, { "epoch": 0.272599663110612, "grad_norm": 0.9299217462539673, "learning_rate": 9.083255378858747e-06, "loss": 0.4637, "step": 971 }, { "epoch": 0.27288040426726556, "grad_norm": 0.7219536900520325, "learning_rate": 9.09260991580917e-06, "loss": 0.4869, "step": 972 }, { "epoch": 0.27316114542391917, "grad_norm": 0.9024264812469482, "learning_rate": 9.10196445275959e-06, "loss": 0.487, "step": 973 }, { "epoch": 0.2734418865805727, "grad_norm": 0.8910660147666931, "learning_rate": 9.111318989710009e-06, "loss": 0.5116, "step": 974 }, { "epoch": 0.2737226277372263, "grad_norm": 0.8204740285873413, "learning_rate": 9.120673526660431e-06, "loss": 0.4493, "step": 975 }, { "epoch": 0.27400336889387983, "grad_norm": 0.9932129383087158, "learning_rate": 9.130028063610852e-06, "loss": 0.4483, "step": 976 }, { "epoch": 0.2742841100505334, "grad_norm": 0.8457167148590088, "learning_rate": 9.139382600561273e-06, "loss": 0.4244, "step": 977 }, { "epoch": 0.274564851207187, "grad_norm": 1.0124696493148804, "learning_rate": 9.148737137511695e-06, "loss": 0.5319, "step": 978 }, { "epoch": 0.27484559236384054, "grad_norm": 0.8043119311332703, "learning_rate": 9.158091674462115e-06, "loss": 0.4687, "step": 979 }, { "epoch": 0.2751263335204941, "grad_norm": 0.8999631404876709, "learning_rate": 9.167446211412536e-06, "loss": 0.4663, "step": 980 }, { "epoch": 0.27540707467714765, "grad_norm": 0.7950866222381592, "learning_rate": 9.176800748362957e-06, "loss": 0.4648, "step": 981 }, { "epoch": 0.27568781583380125, "grad_norm": 0.8977778553962708, "learning_rate": 9.186155285313377e-06, "loss": 0.4644, "step": 982 }, { "epoch": 0.2759685569904548, "grad_norm": 0.8251086473464966, "learning_rate": 9.195509822263798e-06, "loss": 0.4705, "step": 983 }, { "epoch": 0.27624929814710836, "grad_norm": 0.8819026947021484, "learning_rate": 9.20486435921422e-06, "loss": 0.474, "step": 984 }, { "epoch": 0.2765300393037619, "grad_norm": 0.9170657396316528, "learning_rate": 9.214218896164641e-06, "loss": 0.5067, "step": 985 }, { "epoch": 0.2768107804604155, "grad_norm": 0.9154438972473145, "learning_rate": 9.223573433115062e-06, "loss": 0.433, "step": 986 }, { "epoch": 0.27709152161706907, "grad_norm": 0.8111538887023926, "learning_rate": 9.232927970065482e-06, "loss": 0.484, "step": 987 }, { "epoch": 0.2773722627737226, "grad_norm": 0.808013916015625, "learning_rate": 9.242282507015905e-06, "loss": 0.4879, "step": 988 }, { "epoch": 0.2776530039303762, "grad_norm": 0.9945582747459412, "learning_rate": 9.251637043966323e-06, "loss": 0.4738, "step": 989 }, { "epoch": 0.2779337450870298, "grad_norm": 0.8452256917953491, "learning_rate": 9.260991580916746e-06, "loss": 0.5337, "step": 990 }, { "epoch": 0.27821448624368333, "grad_norm": 0.7302437424659729, "learning_rate": 9.270346117867166e-06, "loss": 0.4836, "step": 991 }, { "epoch": 0.2784952274003369, "grad_norm": 0.8313810229301453, "learning_rate": 9.279700654817587e-06, "loss": 0.5328, "step": 992 }, { "epoch": 0.27877596855699044, "grad_norm": 0.9550543427467346, "learning_rate": 9.289055191768008e-06, "loss": 0.4587, "step": 993 }, { "epoch": 0.27905670971364405, "grad_norm": 0.8457921147346497, "learning_rate": 9.29840972871843e-06, "loss": 0.4718, "step": 994 }, { "epoch": 0.2793374508702976, "grad_norm": 0.8598791360855103, "learning_rate": 9.30776426566885e-06, "loss": 0.4654, "step": 995 }, { "epoch": 0.27961819202695115, "grad_norm": 0.9834221601486206, "learning_rate": 9.317118802619271e-06, "loss": 0.4971, "step": 996 }, { "epoch": 0.2798989331836047, "grad_norm": 0.8372482061386108, "learning_rate": 9.326473339569692e-06, "loss": 0.4551, "step": 997 }, { "epoch": 0.28017967434025826, "grad_norm": 0.92179274559021, "learning_rate": 9.335827876520112e-06, "loss": 0.5081, "step": 998 }, { "epoch": 0.28046041549691186, "grad_norm": 0.8274542689323425, "learning_rate": 9.345182413470533e-06, "loss": 0.5604, "step": 999 }, { "epoch": 0.2807411566535654, "grad_norm": 0.8723477721214294, "learning_rate": 9.354536950420955e-06, "loss": 0.5213, "step": 1000 }, { "epoch": 0.28102189781021897, "grad_norm": 0.975670576095581, "learning_rate": 9.363891487371376e-06, "loss": 0.5363, "step": 1001 }, { "epoch": 0.2813026389668725, "grad_norm": 0.8007491827011108, "learning_rate": 9.373246024321797e-06, "loss": 0.4587, "step": 1002 }, { "epoch": 0.28158338012352613, "grad_norm": 0.9563086032867432, "learning_rate": 9.382600561272219e-06, "loss": 0.5113, "step": 1003 }, { "epoch": 0.2818641212801797, "grad_norm": 0.9842604994773865, "learning_rate": 9.39195509822264e-06, "loss": 0.5133, "step": 1004 }, { "epoch": 0.28214486243683323, "grad_norm": 0.7433784008026123, "learning_rate": 9.401309635173059e-06, "loss": 0.4674, "step": 1005 }, { "epoch": 0.2824256035934868, "grad_norm": 0.8338155746459961, "learning_rate": 9.410664172123481e-06, "loss": 0.51, "step": 1006 }, { "epoch": 0.2827063447501404, "grad_norm": 0.8775511384010315, "learning_rate": 9.420018709073902e-06, "loss": 0.4848, "step": 1007 }, { "epoch": 0.28298708590679394, "grad_norm": 0.8014006614685059, "learning_rate": 9.429373246024322e-06, "loss": 0.5229, "step": 1008 }, { "epoch": 0.2832678270634475, "grad_norm": 0.8549612760543823, "learning_rate": 9.438727782974744e-06, "loss": 0.523, "step": 1009 }, { "epoch": 0.28354856822010105, "grad_norm": 0.9535396099090576, "learning_rate": 9.448082319925165e-06, "loss": 0.5106, "step": 1010 }, { "epoch": 0.28382930937675466, "grad_norm": 0.7287841439247131, "learning_rate": 9.457436856875586e-06, "loss": 0.4796, "step": 1011 }, { "epoch": 0.2841100505334082, "grad_norm": 0.7829171419143677, "learning_rate": 9.466791393826006e-06, "loss": 0.4585, "step": 1012 }, { "epoch": 0.28439079169006176, "grad_norm": 0.8058018684387207, "learning_rate": 9.476145930776427e-06, "loss": 0.435, "step": 1013 }, { "epoch": 0.2846715328467153, "grad_norm": 0.7625716924667358, "learning_rate": 9.485500467726848e-06, "loss": 0.4703, "step": 1014 }, { "epoch": 0.28495227400336887, "grad_norm": 0.7859020829200745, "learning_rate": 9.49485500467727e-06, "loss": 0.4794, "step": 1015 }, { "epoch": 0.2852330151600225, "grad_norm": 0.8216314315795898, "learning_rate": 9.50420954162769e-06, "loss": 0.4536, "step": 1016 }, { "epoch": 0.285513756316676, "grad_norm": 0.709894061088562, "learning_rate": 9.513564078578111e-06, "loss": 0.4608, "step": 1017 }, { "epoch": 0.2857944974733296, "grad_norm": 0.7770177125930786, "learning_rate": 9.522918615528532e-06, "loss": 0.4991, "step": 1018 }, { "epoch": 0.28607523862998313, "grad_norm": 0.8208000063896179, "learning_rate": 9.532273152478954e-06, "loss": 0.5088, "step": 1019 }, { "epoch": 0.28635597978663674, "grad_norm": 0.7777297496795654, "learning_rate": 9.541627689429373e-06, "loss": 0.4546, "step": 1020 }, { "epoch": 0.2866367209432903, "grad_norm": 0.6712039113044739, "learning_rate": 9.550982226379795e-06, "loss": 0.4791, "step": 1021 }, { "epoch": 0.28691746209994384, "grad_norm": 0.7849695682525635, "learning_rate": 9.560336763330216e-06, "loss": 0.4831, "step": 1022 }, { "epoch": 0.2871982032565974, "grad_norm": 0.7336786389350891, "learning_rate": 9.569691300280637e-06, "loss": 0.4849, "step": 1023 }, { "epoch": 0.287478944413251, "grad_norm": 0.7290138602256775, "learning_rate": 9.579045837231057e-06, "loss": 0.4704, "step": 1024 }, { "epoch": 0.28775968556990456, "grad_norm": 0.9773189425468445, "learning_rate": 9.58840037418148e-06, "loss": 0.4793, "step": 1025 }, { "epoch": 0.2880404267265581, "grad_norm": 0.8708929419517517, "learning_rate": 9.5977549111319e-06, "loss": 0.4886, "step": 1026 }, { "epoch": 0.28832116788321166, "grad_norm": 0.7952057719230652, "learning_rate": 9.607109448082321e-06, "loss": 0.4942, "step": 1027 }, { "epoch": 0.28860190903986527, "grad_norm": 0.8154239058494568, "learning_rate": 9.616463985032741e-06, "loss": 0.5241, "step": 1028 }, { "epoch": 0.2888826501965188, "grad_norm": 0.7335970997810364, "learning_rate": 9.625818521983162e-06, "loss": 0.5128, "step": 1029 }, { "epoch": 0.28916339135317237, "grad_norm": 0.701531171798706, "learning_rate": 9.635173058933583e-06, "loss": 0.4949, "step": 1030 }, { "epoch": 0.2894441325098259, "grad_norm": 0.7591395378112793, "learning_rate": 9.644527595884005e-06, "loss": 0.4846, "step": 1031 }, { "epoch": 0.28972487366647953, "grad_norm": 0.8313401341438293, "learning_rate": 9.653882132834426e-06, "loss": 0.5033, "step": 1032 }, { "epoch": 0.2900056148231331, "grad_norm": 0.8509560823440552, "learning_rate": 9.663236669784846e-06, "loss": 0.4974, "step": 1033 }, { "epoch": 0.29028635597978664, "grad_norm": 0.7456276416778564, "learning_rate": 9.672591206735269e-06, "loss": 0.5008, "step": 1034 }, { "epoch": 0.2905670971364402, "grad_norm": 0.8189723491668701, "learning_rate": 9.681945743685688e-06, "loss": 0.4596, "step": 1035 }, { "epoch": 0.29084783829309374, "grad_norm": 0.791438102722168, "learning_rate": 9.691300280636108e-06, "loss": 0.4789, "step": 1036 }, { "epoch": 0.29112857944974735, "grad_norm": 0.69429612159729, "learning_rate": 9.70065481758653e-06, "loss": 0.406, "step": 1037 }, { "epoch": 0.2914093206064009, "grad_norm": 0.7351028919219971, "learning_rate": 9.710009354536951e-06, "loss": 0.4522, "step": 1038 }, { "epoch": 0.29169006176305445, "grad_norm": 0.80379319190979, "learning_rate": 9.719363891487372e-06, "loss": 0.5104, "step": 1039 }, { "epoch": 0.291970802919708, "grad_norm": 0.7888717651367188, "learning_rate": 9.728718428437794e-06, "loss": 0.4552, "step": 1040 }, { "epoch": 0.2922515440763616, "grad_norm": 0.7304790616035461, "learning_rate": 9.738072965388215e-06, "loss": 0.4246, "step": 1041 }, { "epoch": 0.29253228523301517, "grad_norm": 0.875408411026001, "learning_rate": 9.747427502338635e-06, "loss": 0.43, "step": 1042 }, { "epoch": 0.2928130263896687, "grad_norm": 0.8990151286125183, "learning_rate": 9.756782039289056e-06, "loss": 0.4987, "step": 1043 }, { "epoch": 0.29309376754632227, "grad_norm": 0.722625732421875, "learning_rate": 9.766136576239477e-06, "loss": 0.474, "step": 1044 }, { "epoch": 0.2933745087029759, "grad_norm": 0.9640076756477356, "learning_rate": 9.775491113189897e-06, "loss": 0.4753, "step": 1045 }, { "epoch": 0.29365524985962943, "grad_norm": 0.8131601810455322, "learning_rate": 9.78484565014032e-06, "loss": 0.5016, "step": 1046 }, { "epoch": 0.293935991016283, "grad_norm": 0.7900562286376953, "learning_rate": 9.79420018709074e-06, "loss": 0.491, "step": 1047 }, { "epoch": 0.29421673217293653, "grad_norm": 0.8465869426727295, "learning_rate": 9.80355472404116e-06, "loss": 0.4802, "step": 1048 }, { "epoch": 0.29449747332959014, "grad_norm": 0.9690655469894409, "learning_rate": 9.812909260991581e-06, "loss": 0.5153, "step": 1049 }, { "epoch": 0.2947782144862437, "grad_norm": 0.8377925753593445, "learning_rate": 9.822263797942002e-06, "loss": 0.4918, "step": 1050 }, { "epoch": 0.29505895564289725, "grad_norm": 0.772736668586731, "learning_rate": 9.831618334892423e-06, "loss": 0.4505, "step": 1051 }, { "epoch": 0.2953396967995508, "grad_norm": 0.9311164617538452, "learning_rate": 9.840972871842845e-06, "loss": 0.4547, "step": 1052 }, { "epoch": 0.2956204379562044, "grad_norm": 0.9110825061798096, "learning_rate": 9.850327408793266e-06, "loss": 0.4864, "step": 1053 }, { "epoch": 0.29590117911285796, "grad_norm": 0.8068156242370605, "learning_rate": 9.859681945743686e-06, "loss": 0.4701, "step": 1054 }, { "epoch": 0.2961819202695115, "grad_norm": 0.833731472492218, "learning_rate": 9.869036482694107e-06, "loss": 0.455, "step": 1055 }, { "epoch": 0.29646266142616506, "grad_norm": 0.9055677056312561, "learning_rate": 9.87839101964453e-06, "loss": 0.4432, "step": 1056 }, { "epoch": 0.2967434025828186, "grad_norm": 0.9414280652999878, "learning_rate": 9.88774555659495e-06, "loss": 0.5132, "step": 1057 }, { "epoch": 0.2970241437394722, "grad_norm": 0.877994179725647, "learning_rate": 9.89710009354537e-06, "loss": 0.5158, "step": 1058 }, { "epoch": 0.2973048848961258, "grad_norm": 0.8954114317893982, "learning_rate": 9.906454630495791e-06, "loss": 0.5154, "step": 1059 }, { "epoch": 0.29758562605277933, "grad_norm": 0.9333704113960266, "learning_rate": 9.915809167446212e-06, "loss": 0.5306, "step": 1060 }, { "epoch": 0.2978663672094329, "grad_norm": 0.9117119908332825, "learning_rate": 9.925163704396632e-06, "loss": 0.5018, "step": 1061 }, { "epoch": 0.2981471083660865, "grad_norm": 0.8949404358863831, "learning_rate": 9.934518241347055e-06, "loss": 0.4871, "step": 1062 }, { "epoch": 0.29842784952274004, "grad_norm": 0.8439606428146362, "learning_rate": 9.943872778297475e-06, "loss": 0.4553, "step": 1063 }, { "epoch": 0.2987085906793936, "grad_norm": 0.9122105836868286, "learning_rate": 9.953227315247896e-06, "loss": 0.4793, "step": 1064 }, { "epoch": 0.29898933183604715, "grad_norm": 0.7889140844345093, "learning_rate": 9.962581852198318e-06, "loss": 0.4867, "step": 1065 }, { "epoch": 0.29927007299270075, "grad_norm": 0.6639648079872131, "learning_rate": 9.971936389148737e-06, "loss": 0.476, "step": 1066 }, { "epoch": 0.2995508141493543, "grad_norm": 0.77021324634552, "learning_rate": 9.981290926099158e-06, "loss": 0.4622, "step": 1067 }, { "epoch": 0.29983155530600786, "grad_norm": 0.8649744391441345, "learning_rate": 9.99064546304958e-06, "loss": 0.4847, "step": 1068 }, { "epoch": 0.3001122964626614, "grad_norm": 0.7904282808303833, "learning_rate": 1e-05, "loss": 0.4737, "step": 1069 }, { "epoch": 0.300393037619315, "grad_norm": 0.7468514442443848, "learning_rate": 9.999999733215548e-06, "loss": 0.4923, "step": 1070 }, { "epoch": 0.30067377877596857, "grad_norm": 0.9646374583244324, "learning_rate": 9.999998932862217e-06, "loss": 0.5091, "step": 1071 }, { "epoch": 0.3009545199326221, "grad_norm": 0.8719750642776489, "learning_rate": 9.99999759894009e-06, "loss": 0.4564, "step": 1072 }, { "epoch": 0.3012352610892757, "grad_norm": 0.7746025919914246, "learning_rate": 9.999995731449315e-06, "loss": 0.4724, "step": 1073 }, { "epoch": 0.3015160022459292, "grad_norm": 0.7543782591819763, "learning_rate": 9.999993330390085e-06, "loss": 0.5004, "step": 1074 }, { "epoch": 0.30179674340258283, "grad_norm": 0.7941534519195557, "learning_rate": 9.999990395762663e-06, "loss": 0.4597, "step": 1075 }, { "epoch": 0.3020774845592364, "grad_norm": 0.8257895708084106, "learning_rate": 9.999986927567358e-06, "loss": 0.4847, "step": 1076 }, { "epoch": 0.30235822571588994, "grad_norm": 0.9384962320327759, "learning_rate": 9.999982925804541e-06, "loss": 0.5341, "step": 1077 }, { "epoch": 0.3026389668725435, "grad_norm": 0.9379695653915405, "learning_rate": 9.999978390474639e-06, "loss": 0.4905, "step": 1078 }, { "epoch": 0.3029197080291971, "grad_norm": 0.863917350769043, "learning_rate": 9.999973321578136e-06, "loss": 0.4363, "step": 1079 }, { "epoch": 0.30320044918585065, "grad_norm": 0.9006621241569519, "learning_rate": 9.999967719115574e-06, "loss": 0.4703, "step": 1080 }, { "epoch": 0.3034811903425042, "grad_norm": 0.8605801463127136, "learning_rate": 9.999961583087548e-06, "loss": 0.4343, "step": 1081 }, { "epoch": 0.30376193149915776, "grad_norm": 0.8910146355628967, "learning_rate": 9.999954913494713e-06, "loss": 0.5191, "step": 1082 }, { "epoch": 0.30404267265581136, "grad_norm": 0.6636241674423218, "learning_rate": 9.999947710337785e-06, "loss": 0.4793, "step": 1083 }, { "epoch": 0.3043234138124649, "grad_norm": 0.8792728781700134, "learning_rate": 9.99993997361753e-06, "loss": 0.445, "step": 1084 }, { "epoch": 0.30460415496911847, "grad_norm": 0.7850635051727295, "learning_rate": 9.999931703334774e-06, "loss": 0.4996, "step": 1085 }, { "epoch": 0.304884896125772, "grad_norm": 0.8990797996520996, "learning_rate": 9.999922899490396e-06, "loss": 0.502, "step": 1086 }, { "epoch": 0.30516563728242563, "grad_norm": 0.946212649345398, "learning_rate": 9.999913562085342e-06, "loss": 0.4797, "step": 1087 }, { "epoch": 0.3054463784390792, "grad_norm": 0.8670285940170288, "learning_rate": 9.999903691120603e-06, "loss": 0.4525, "step": 1088 }, { "epoch": 0.30572711959573273, "grad_norm": 0.8761336207389832, "learning_rate": 9.999893286597235e-06, "loss": 0.5337, "step": 1089 }, { "epoch": 0.3060078607523863, "grad_norm": 0.8910177946090698, "learning_rate": 9.999882348516348e-06, "loss": 0.4737, "step": 1090 }, { "epoch": 0.3062886019090399, "grad_norm": 1.042477011680603, "learning_rate": 9.99987087687911e-06, "loss": 0.523, "step": 1091 }, { "epoch": 0.30656934306569344, "grad_norm": 0.8660892844200134, "learning_rate": 9.999858871686743e-06, "loss": 0.4171, "step": 1092 }, { "epoch": 0.306850084222347, "grad_norm": 0.8873071074485779, "learning_rate": 9.999846332940528e-06, "loss": 0.463, "step": 1093 }, { "epoch": 0.30713082537900055, "grad_norm": 0.8817505240440369, "learning_rate": 9.999833260641807e-06, "loss": 0.4774, "step": 1094 }, { "epoch": 0.3074115665356541, "grad_norm": 0.840127170085907, "learning_rate": 9.99981965479197e-06, "loss": 0.4881, "step": 1095 }, { "epoch": 0.3076923076923077, "grad_norm": 0.8102737665176392, "learning_rate": 9.999805515392473e-06, "loss": 0.459, "step": 1096 }, { "epoch": 0.30797304884896126, "grad_norm": 0.7922653555870056, "learning_rate": 9.999790842444822e-06, "loss": 0.4728, "step": 1097 }, { "epoch": 0.3082537900056148, "grad_norm": 0.805242121219635, "learning_rate": 9.999775635950584e-06, "loss": 0.503, "step": 1098 }, { "epoch": 0.30853453116226837, "grad_norm": 0.7929728031158447, "learning_rate": 9.999759895911383e-06, "loss": 0.4475, "step": 1099 }, { "epoch": 0.308815272318922, "grad_norm": 0.8671475052833557, "learning_rate": 9.999743622328895e-06, "loss": 0.509, "step": 1100 }, { "epoch": 0.3090960134755755, "grad_norm": 0.9393376111984253, "learning_rate": 9.999726815204862e-06, "loss": 0.4587, "step": 1101 }, { "epoch": 0.3093767546322291, "grad_norm": 0.8894326686859131, "learning_rate": 9.999709474541072e-06, "loss": 0.513, "step": 1102 }, { "epoch": 0.30965749578888263, "grad_norm": 0.7650179266929626, "learning_rate": 9.99969160033938e-06, "loss": 0.4715, "step": 1103 }, { "epoch": 0.30993823694553624, "grad_norm": 0.7481208443641663, "learning_rate": 9.99967319260169e-06, "loss": 0.4853, "step": 1104 }, { "epoch": 0.3102189781021898, "grad_norm": 0.8617684841156006, "learning_rate": 9.999654251329967e-06, "loss": 0.4982, "step": 1105 }, { "epoch": 0.31049971925884334, "grad_norm": 0.6560022830963135, "learning_rate": 9.999634776526234e-06, "loss": 0.4079, "step": 1106 }, { "epoch": 0.3107804604154969, "grad_norm": 0.6817672252655029, "learning_rate": 9.999614768192569e-06, "loss": 0.4814, "step": 1107 }, { "epoch": 0.3110612015721505, "grad_norm": 0.8304376006126404, "learning_rate": 9.999594226331107e-06, "loss": 0.4919, "step": 1108 }, { "epoch": 0.31134194272880406, "grad_norm": 0.7779669165611267, "learning_rate": 9.999573150944039e-06, "loss": 0.4816, "step": 1109 }, { "epoch": 0.3116226838854576, "grad_norm": 0.7018353343009949, "learning_rate": 9.999551542033614e-06, "loss": 0.4522, "step": 1110 }, { "epoch": 0.31190342504211116, "grad_norm": 0.7014132142066956, "learning_rate": 9.999529399602139e-06, "loss": 0.4363, "step": 1111 }, { "epoch": 0.3121841661987647, "grad_norm": 0.7255052328109741, "learning_rate": 9.999506723651976e-06, "loss": 0.452, "step": 1112 }, { "epoch": 0.3124649073554183, "grad_norm": 0.8355602025985718, "learning_rate": 9.999483514185547e-06, "loss": 0.4634, "step": 1113 }, { "epoch": 0.31274564851207187, "grad_norm": 0.9734678864479065, "learning_rate": 9.999459771205324e-06, "loss": 0.5471, "step": 1114 }, { "epoch": 0.3130263896687254, "grad_norm": 0.7540462613105774, "learning_rate": 9.999435494713847e-06, "loss": 0.4628, "step": 1115 }, { "epoch": 0.313307130825379, "grad_norm": 0.7741772532463074, "learning_rate": 9.999410684713701e-06, "loss": 0.4551, "step": 1116 }, { "epoch": 0.3135878719820326, "grad_norm": 0.9902048707008362, "learning_rate": 9.999385341207536e-06, "loss": 0.508, "step": 1117 }, { "epoch": 0.31386861313868614, "grad_norm": 0.8954352736473083, "learning_rate": 9.999359464198059e-06, "loss": 0.4685, "step": 1118 }, { "epoch": 0.3141493542953397, "grad_norm": 0.9386639595031738, "learning_rate": 9.999333053688028e-06, "loss": 0.4448, "step": 1119 }, { "epoch": 0.31443009545199324, "grad_norm": 0.9378950595855713, "learning_rate": 9.999306109680262e-06, "loss": 0.4827, "step": 1120 }, { "epoch": 0.31471083660864685, "grad_norm": 0.8391536474227905, "learning_rate": 9.999278632177635e-06, "loss": 0.4895, "step": 1121 }, { "epoch": 0.3149915777653004, "grad_norm": 0.8170937895774841, "learning_rate": 9.999250621183083e-06, "loss": 0.4735, "step": 1122 }, { "epoch": 0.31527231892195395, "grad_norm": 0.8841730356216431, "learning_rate": 9.999222076699593e-06, "loss": 0.4549, "step": 1123 }, { "epoch": 0.3155530600786075, "grad_norm": 0.9412198066711426, "learning_rate": 9.999192998730211e-06, "loss": 0.4064, "step": 1124 }, { "epoch": 0.3158338012352611, "grad_norm": 0.835308313369751, "learning_rate": 9.999163387278039e-06, "loss": 0.4729, "step": 1125 }, { "epoch": 0.31611454239191467, "grad_norm": 0.7908468842506409, "learning_rate": 9.999133242346239e-06, "loss": 0.4466, "step": 1126 }, { "epoch": 0.3163952835485682, "grad_norm": 0.881558358669281, "learning_rate": 9.999102563938025e-06, "loss": 0.5225, "step": 1127 }, { "epoch": 0.31667602470522177, "grad_norm": 0.85551917552948, "learning_rate": 9.999071352056676e-06, "loss": 0.5211, "step": 1128 }, { "epoch": 0.3169567658618754, "grad_norm": 0.7841977477073669, "learning_rate": 9.999039606705516e-06, "loss": 0.4472, "step": 1129 }, { "epoch": 0.31723750701852893, "grad_norm": 0.9335364699363708, "learning_rate": 9.999007327887939e-06, "loss": 0.5199, "step": 1130 }, { "epoch": 0.3175182481751825, "grad_norm": 0.9114176034927368, "learning_rate": 9.998974515607384e-06, "loss": 0.4696, "step": 1131 }, { "epoch": 0.31779898933183603, "grad_norm": 0.9091213941574097, "learning_rate": 9.998941169867357e-06, "loss": 0.4742, "step": 1132 }, { "epoch": 0.3180797304884896, "grad_norm": 1.1805241107940674, "learning_rate": 9.998907290671415e-06, "loss": 0.5277, "step": 1133 }, { "epoch": 0.3183604716451432, "grad_norm": 1.036816120147705, "learning_rate": 9.998872878023174e-06, "loss": 0.4831, "step": 1134 }, { "epoch": 0.31864121280179675, "grad_norm": 0.8304394483566284, "learning_rate": 9.998837931926304e-06, "loss": 0.4965, "step": 1135 }, { "epoch": 0.3189219539584503, "grad_norm": 0.782892644405365, "learning_rate": 9.998802452384536e-06, "loss": 0.4742, "step": 1136 }, { "epoch": 0.31920269511510385, "grad_norm": 0.9833454489707947, "learning_rate": 9.998766439401655e-06, "loss": 0.429, "step": 1137 }, { "epoch": 0.31948343627175746, "grad_norm": 0.996681809425354, "learning_rate": 9.998729892981505e-06, "loss": 0.4632, "step": 1138 }, { "epoch": 0.319764177428411, "grad_norm": 0.7468957901000977, "learning_rate": 9.998692813127986e-06, "loss": 0.4746, "step": 1139 }, { "epoch": 0.32004491858506456, "grad_norm": 0.9790645837783813, "learning_rate": 9.998655199845055e-06, "loss": 0.5058, "step": 1140 }, { "epoch": 0.3203256597417181, "grad_norm": 0.8107130527496338, "learning_rate": 9.998617053136726e-06, "loss": 0.4848, "step": 1141 }, { "epoch": 0.3206064008983717, "grad_norm": 0.8455003499984741, "learning_rate": 9.998578373007068e-06, "loss": 0.494, "step": 1142 }, { "epoch": 0.3208871420550253, "grad_norm": 0.9056618809700012, "learning_rate": 9.998539159460213e-06, "loss": 0.5063, "step": 1143 }, { "epoch": 0.32116788321167883, "grad_norm": 0.7718350887298584, "learning_rate": 9.998499412500339e-06, "loss": 0.4429, "step": 1144 }, { "epoch": 0.3214486243683324, "grad_norm": 0.8149054646492004, "learning_rate": 9.998459132131695e-06, "loss": 0.4876, "step": 1145 }, { "epoch": 0.321729365524986, "grad_norm": 0.9570802450180054, "learning_rate": 9.998418318358573e-06, "loss": 0.5506, "step": 1146 }, { "epoch": 0.32201010668163954, "grad_norm": 0.7342217564582825, "learning_rate": 9.998376971185333e-06, "loss": 0.5012, "step": 1147 }, { "epoch": 0.3222908478382931, "grad_norm": 0.7465083003044128, "learning_rate": 9.998335090616384e-06, "loss": 0.4983, "step": 1148 }, { "epoch": 0.32257158899494665, "grad_norm": 0.8385224342346191, "learning_rate": 9.998292676656199e-06, "loss": 0.4948, "step": 1149 }, { "epoch": 0.3228523301516002, "grad_norm": 0.7445923686027527, "learning_rate": 9.998249729309299e-06, "loss": 0.5014, "step": 1150 }, { "epoch": 0.3231330713082538, "grad_norm": 0.8759795427322388, "learning_rate": 9.998206248580272e-06, "loss": 0.5285, "step": 1151 }, { "epoch": 0.32341381246490736, "grad_norm": 0.9513266682624817, "learning_rate": 9.998162234473756e-06, "loss": 0.5295, "step": 1152 }, { "epoch": 0.3236945536215609, "grad_norm": 0.763493537902832, "learning_rate": 9.998117686994446e-06, "loss": 0.4679, "step": 1153 }, { "epoch": 0.32397529477821446, "grad_norm": 0.8014282584190369, "learning_rate": 9.9980726061471e-06, "loss": 0.4575, "step": 1154 }, { "epoch": 0.32425603593486807, "grad_norm": 0.8929340243339539, "learning_rate": 9.998026991936525e-06, "loss": 0.4627, "step": 1155 }, { "epoch": 0.3245367770915216, "grad_norm": 0.9553277492523193, "learning_rate": 9.99798084436759e-06, "loss": 0.4817, "step": 1156 }, { "epoch": 0.3248175182481752, "grad_norm": 0.7893467545509338, "learning_rate": 9.99793416344522e-06, "loss": 0.4743, "step": 1157 }, { "epoch": 0.3250982594048287, "grad_norm": 0.8612390756607056, "learning_rate": 9.997886949174397e-06, "loss": 0.5213, "step": 1158 }, { "epoch": 0.32537900056148233, "grad_norm": 0.77967369556427, "learning_rate": 9.997839201560158e-06, "loss": 0.4631, "step": 1159 }, { "epoch": 0.3256597417181359, "grad_norm": 0.7877880930900574, "learning_rate": 9.997790920607597e-06, "loss": 0.5018, "step": 1160 }, { "epoch": 0.32594048287478944, "grad_norm": 0.7800800204277039, "learning_rate": 9.99774210632187e-06, "loss": 0.4838, "step": 1161 }, { "epoch": 0.326221224031443, "grad_norm": 0.8173455595970154, "learning_rate": 9.997692758708186e-06, "loss": 0.453, "step": 1162 }, { "epoch": 0.3265019651880966, "grad_norm": 0.7861931324005127, "learning_rate": 9.997642877771807e-06, "loss": 0.5204, "step": 1163 }, { "epoch": 0.32678270634475015, "grad_norm": 0.7018530368804932, "learning_rate": 9.997592463518059e-06, "loss": 0.4835, "step": 1164 }, { "epoch": 0.3270634475014037, "grad_norm": 0.8089879751205444, "learning_rate": 9.997541515952321e-06, "loss": 0.4532, "step": 1165 }, { "epoch": 0.32734418865805726, "grad_norm": 0.7837215662002563, "learning_rate": 9.99749003508003e-06, "loss": 0.4317, "step": 1166 }, { "epoch": 0.32762492981471086, "grad_norm": 0.8741607069969177, "learning_rate": 9.99743802090668e-06, "loss": 0.5138, "step": 1167 }, { "epoch": 0.3279056709713644, "grad_norm": 0.7507151365280151, "learning_rate": 9.997385473437822e-06, "loss": 0.4047, "step": 1168 }, { "epoch": 0.32818641212801797, "grad_norm": 0.8245847821235657, "learning_rate": 9.997332392679063e-06, "loss": 0.5032, "step": 1169 }, { "epoch": 0.3284671532846715, "grad_norm": 0.905717134475708, "learning_rate": 9.997278778636067e-06, "loss": 0.4344, "step": 1170 }, { "epoch": 0.3287478944413251, "grad_norm": 0.7550010085105896, "learning_rate": 9.997224631314556e-06, "loss": 0.4952, "step": 1171 }, { "epoch": 0.3290286355979787, "grad_norm": 0.8762251734733582, "learning_rate": 9.997169950720307e-06, "loss": 0.4596, "step": 1172 }, { "epoch": 0.32930937675463223, "grad_norm": 0.7687079310417175, "learning_rate": 9.997114736859158e-06, "loss": 0.4804, "step": 1173 }, { "epoch": 0.3295901179112858, "grad_norm": 1.1469238996505737, "learning_rate": 9.997058989736997e-06, "loss": 0.5505, "step": 1174 }, { "epoch": 0.32987085906793934, "grad_norm": 0.9569076299667358, "learning_rate": 9.997002709359776e-06, "loss": 0.4758, "step": 1175 }, { "epoch": 0.33015160022459294, "grad_norm": 0.8654143214225769, "learning_rate": 9.9969458957335e-06, "loss": 0.5084, "step": 1176 }, { "epoch": 0.3304323413812465, "grad_norm": 0.84196537733078, "learning_rate": 9.996888548864234e-06, "loss": 0.4747, "step": 1177 }, { "epoch": 0.33071308253790005, "grad_norm": 0.7889792323112488, "learning_rate": 9.996830668758095e-06, "loss": 0.4473, "step": 1178 }, { "epoch": 0.3309938236945536, "grad_norm": 0.7202876806259155, "learning_rate": 9.99677225542126e-06, "loss": 0.5425, "step": 1179 }, { "epoch": 0.3312745648512072, "grad_norm": 0.6800077557563782, "learning_rate": 9.99671330885996e-06, "loss": 0.4971, "step": 1180 }, { "epoch": 0.33155530600786076, "grad_norm": 0.8296113014221191, "learning_rate": 9.996653829080492e-06, "loss": 0.4586, "step": 1181 }, { "epoch": 0.3318360471645143, "grad_norm": 0.7130815386772156, "learning_rate": 9.996593816089197e-06, "loss": 0.478, "step": 1182 }, { "epoch": 0.33211678832116787, "grad_norm": 0.8269796967506409, "learning_rate": 9.996533269892483e-06, "loss": 0.4493, "step": 1183 }, { "epoch": 0.3323975294778215, "grad_norm": 0.8497306704521179, "learning_rate": 9.99647219049681e-06, "loss": 0.4777, "step": 1184 }, { "epoch": 0.332678270634475, "grad_norm": 0.7915348410606384, "learning_rate": 9.996410577908695e-06, "loss": 0.4862, "step": 1185 }, { "epoch": 0.3329590117911286, "grad_norm": 0.8377079367637634, "learning_rate": 9.996348432134714e-06, "loss": 0.4927, "step": 1186 }, { "epoch": 0.33323975294778213, "grad_norm": 0.8476775884628296, "learning_rate": 9.996285753181499e-06, "loss": 0.5068, "step": 1187 }, { "epoch": 0.3335204941044357, "grad_norm": 0.8414422869682312, "learning_rate": 9.996222541055739e-06, "loss": 0.448, "step": 1188 }, { "epoch": 0.3338012352610893, "grad_norm": 0.8037646412849426, "learning_rate": 9.996158795764177e-06, "loss": 0.4817, "step": 1189 }, { "epoch": 0.33408197641774284, "grad_norm": 0.8846210241317749, "learning_rate": 9.996094517313618e-06, "loss": 0.4698, "step": 1190 }, { "epoch": 0.3343627175743964, "grad_norm": 0.8361542224884033, "learning_rate": 9.996029705710921e-06, "loss": 0.4929, "step": 1191 }, { "epoch": 0.33464345873104995, "grad_norm": 0.8588730096817017, "learning_rate": 9.995964360963003e-06, "loss": 0.48, "step": 1192 }, { "epoch": 0.33492419988770356, "grad_norm": 0.8661556839942932, "learning_rate": 9.995898483076835e-06, "loss": 0.5238, "step": 1193 }, { "epoch": 0.3352049410443571, "grad_norm": 0.9000097513198853, "learning_rate": 9.995832072059449e-06, "loss": 0.5252, "step": 1194 }, { "epoch": 0.33548568220101066, "grad_norm": 0.8409597873687744, "learning_rate": 9.99576512791793e-06, "loss": 0.5082, "step": 1195 }, { "epoch": 0.3357664233576642, "grad_norm": 0.7277405261993408, "learning_rate": 9.995697650659426e-06, "loss": 0.5317, "step": 1196 }, { "epoch": 0.3360471645143178, "grad_norm": 0.7315642833709717, "learning_rate": 9.995629640291132e-06, "loss": 0.4866, "step": 1197 }, { "epoch": 0.33632790567097137, "grad_norm": 0.7630830407142639, "learning_rate": 9.995561096820309e-06, "loss": 0.497, "step": 1198 }, { "epoch": 0.3366086468276249, "grad_norm": 0.7109522819519043, "learning_rate": 9.995492020254271e-06, "loss": 0.5072, "step": 1199 }, { "epoch": 0.3368893879842785, "grad_norm": 0.7166508436203003, "learning_rate": 9.995422410600391e-06, "loss": 0.4433, "step": 1200 }, { "epoch": 0.3371701291409321, "grad_norm": 0.9349340796470642, "learning_rate": 9.995352267866095e-06, "loss": 0.4625, "step": 1201 }, { "epoch": 0.33745087029758564, "grad_norm": 0.702035665512085, "learning_rate": 9.99528159205887e-06, "loss": 0.4314, "step": 1202 }, { "epoch": 0.3377316114542392, "grad_norm": 0.7309128046035767, "learning_rate": 9.995210383186256e-06, "loss": 0.4176, "step": 1203 }, { "epoch": 0.33801235261089274, "grad_norm": 0.8737656474113464, "learning_rate": 9.995138641255853e-06, "loss": 0.5018, "step": 1204 }, { "epoch": 0.33829309376754635, "grad_norm": 0.888888955116272, "learning_rate": 9.995066366275317e-06, "loss": 0.5339, "step": 1205 }, { "epoch": 0.3385738349241999, "grad_norm": 0.8548439741134644, "learning_rate": 9.99499355825236e-06, "loss": 0.4878, "step": 1206 }, { "epoch": 0.33885457608085345, "grad_norm": 0.9180452823638916, "learning_rate": 9.994920217194755e-06, "loss": 0.5312, "step": 1207 }, { "epoch": 0.339135317237507, "grad_norm": 0.8303162455558777, "learning_rate": 9.994846343110323e-06, "loss": 0.4159, "step": 1208 }, { "epoch": 0.33941605839416056, "grad_norm": 0.9149907231330872, "learning_rate": 9.99477193600695e-06, "loss": 0.4983, "step": 1209 }, { "epoch": 0.33969679955081417, "grad_norm": 0.9785766005516052, "learning_rate": 9.99469699589258e-06, "loss": 0.4575, "step": 1210 }, { "epoch": 0.3399775407074677, "grad_norm": 0.8752284049987793, "learning_rate": 9.994621522775201e-06, "loss": 0.4245, "step": 1211 }, { "epoch": 0.34025828186412127, "grad_norm": 0.8960926532745361, "learning_rate": 9.994545516662876e-06, "loss": 0.4675, "step": 1212 }, { "epoch": 0.3405390230207748, "grad_norm": 0.7819463014602661, "learning_rate": 9.994468977563712e-06, "loss": 0.5165, "step": 1213 }, { "epoch": 0.34081976417742843, "grad_norm": 0.8910586833953857, "learning_rate": 9.994391905485879e-06, "loss": 0.4834, "step": 1214 }, { "epoch": 0.341100505334082, "grad_norm": 0.7877460718154907, "learning_rate": 9.994314300437598e-06, "loss": 0.4403, "step": 1215 }, { "epoch": 0.34138124649073553, "grad_norm": 0.7668460011482239, "learning_rate": 9.994236162427152e-06, "loss": 0.518, "step": 1216 }, { "epoch": 0.3416619876473891, "grad_norm": 0.9465773105621338, "learning_rate": 9.99415749146288e-06, "loss": 0.4881, "step": 1217 }, { "epoch": 0.3419427288040427, "grad_norm": 0.9357730746269226, "learning_rate": 9.994078287553179e-06, "loss": 0.4611, "step": 1218 }, { "epoch": 0.34222346996069625, "grad_norm": 0.7182336449623108, "learning_rate": 9.993998550706498e-06, "loss": 0.4418, "step": 1219 }, { "epoch": 0.3425042111173498, "grad_norm": 1.0413166284561157, "learning_rate": 9.993918280931347e-06, "loss": 0.4809, "step": 1220 }, { "epoch": 0.34278495227400335, "grad_norm": 0.9022256731987, "learning_rate": 9.993837478236293e-06, "loss": 0.5131, "step": 1221 }, { "epoch": 0.34306569343065696, "grad_norm": 0.8306872248649597, "learning_rate": 9.993756142629957e-06, "loss": 0.4447, "step": 1222 }, { "epoch": 0.3433464345873105, "grad_norm": 0.8597789406776428, "learning_rate": 9.993674274121018e-06, "loss": 0.4933, "step": 1223 }, { "epoch": 0.34362717574396406, "grad_norm": 0.8198334574699402, "learning_rate": 9.993591872718218e-06, "loss": 0.4617, "step": 1224 }, { "epoch": 0.3439079169006176, "grad_norm": 0.8512240052223206, "learning_rate": 9.993508938430344e-06, "loss": 0.5089, "step": 1225 }, { "epoch": 0.34418865805727117, "grad_norm": 0.8421564102172852, "learning_rate": 9.99342547126625e-06, "loss": 0.4637, "step": 1226 }, { "epoch": 0.3444693992139248, "grad_norm": 0.7788011431694031, "learning_rate": 9.99334147123484e-06, "loss": 0.511, "step": 1227 }, { "epoch": 0.34475014037057833, "grad_norm": 0.7721831202507019, "learning_rate": 9.993256938345082e-06, "loss": 0.4678, "step": 1228 }, { "epoch": 0.3450308815272319, "grad_norm": 0.946810781955719, "learning_rate": 9.993171872605992e-06, "loss": 0.4807, "step": 1229 }, { "epoch": 0.34531162268388543, "grad_norm": 0.818935215473175, "learning_rate": 9.99308627402665e-06, "loss": 0.4654, "step": 1230 }, { "epoch": 0.34559236384053904, "grad_norm": 0.7893509864807129, "learning_rate": 9.993000142616193e-06, "loss": 0.489, "step": 1231 }, { "epoch": 0.3458731049971926, "grad_norm": 0.8334790468215942, "learning_rate": 9.99291347838381e-06, "loss": 0.518, "step": 1232 }, { "epoch": 0.34615384615384615, "grad_norm": 0.8282754421234131, "learning_rate": 9.99282628133875e-06, "loss": 0.4254, "step": 1233 }, { "epoch": 0.3464345873104997, "grad_norm": 0.8387258052825928, "learning_rate": 9.992738551490315e-06, "loss": 0.477, "step": 1234 }, { "epoch": 0.3467153284671533, "grad_norm": 0.8990135788917542, "learning_rate": 9.99265028884787e-06, "loss": 0.4903, "step": 1235 }, { "epoch": 0.34699606962380686, "grad_norm": 0.751071572303772, "learning_rate": 9.992561493420835e-06, "loss": 0.4891, "step": 1236 }, { "epoch": 0.3472768107804604, "grad_norm": 0.8329818248748779, "learning_rate": 9.992472165218685e-06, "loss": 0.4593, "step": 1237 }, { "epoch": 0.34755755193711396, "grad_norm": 0.8205108642578125, "learning_rate": 9.99238230425095e-06, "loss": 0.4817, "step": 1238 }, { "epoch": 0.34783829309376757, "grad_norm": 1.082516074180603, "learning_rate": 9.992291910527223e-06, "loss": 0.5169, "step": 1239 }, { "epoch": 0.3481190342504211, "grad_norm": 0.7027360200881958, "learning_rate": 9.992200984057146e-06, "loss": 0.4638, "step": 1240 }, { "epoch": 0.3483997754070747, "grad_norm": 0.7534340023994446, "learning_rate": 9.992109524850423e-06, "loss": 0.4388, "step": 1241 }, { "epoch": 0.3486805165637282, "grad_norm": 0.8990961909294128, "learning_rate": 9.99201753291682e-06, "loss": 0.5136, "step": 1242 }, { "epoch": 0.34896125772038183, "grad_norm": 0.8145226240158081, "learning_rate": 9.991925008266145e-06, "loss": 0.4261, "step": 1243 }, { "epoch": 0.3492419988770354, "grad_norm": 0.8352614641189575, "learning_rate": 9.991831950908278e-06, "loss": 0.4867, "step": 1244 }, { "epoch": 0.34952274003368894, "grad_norm": 0.7495833039283752, "learning_rate": 9.991738360853147e-06, "loss": 0.4323, "step": 1245 }, { "epoch": 0.3498034811903425, "grad_norm": 0.8435266613960266, "learning_rate": 9.991644238110741e-06, "loss": 0.4659, "step": 1246 }, { "epoch": 0.35008422234699604, "grad_norm": 0.8464834094047546, "learning_rate": 9.9915495826911e-06, "loss": 0.5104, "step": 1247 }, { "epoch": 0.35036496350364965, "grad_norm": 0.9711482524871826, "learning_rate": 9.99145439460433e-06, "loss": 0.5038, "step": 1248 }, { "epoch": 0.3506457046603032, "grad_norm": 1.0533941984176636, "learning_rate": 9.991358673860586e-06, "loss": 0.4762, "step": 1249 }, { "epoch": 0.35092644581695676, "grad_norm": 0.774887204170227, "learning_rate": 9.991262420470086e-06, "loss": 0.4886, "step": 1250 }, { "epoch": 0.3512071869736103, "grad_norm": 0.9104116559028625, "learning_rate": 9.991165634443095e-06, "loss": 0.4336, "step": 1251 }, { "epoch": 0.3514879281302639, "grad_norm": 0.8911571502685547, "learning_rate": 9.991068315789947e-06, "loss": 0.4751, "step": 1252 }, { "epoch": 0.35176866928691747, "grad_norm": 0.7602444887161255, "learning_rate": 9.990970464521026e-06, "loss": 0.4601, "step": 1253 }, { "epoch": 0.352049410443571, "grad_norm": 0.9924885034561157, "learning_rate": 9.990872080646774e-06, "loss": 0.5201, "step": 1254 }, { "epoch": 0.3523301516002246, "grad_norm": 0.748501181602478, "learning_rate": 9.99077316417769e-06, "loss": 0.4751, "step": 1255 }, { "epoch": 0.3526108927568782, "grad_norm": 0.8485338687896729, "learning_rate": 9.990673715124329e-06, "loss": 0.5258, "step": 1256 }, { "epoch": 0.35289163391353173, "grad_norm": 0.9282192587852478, "learning_rate": 9.990573733497305e-06, "loss": 0.4783, "step": 1257 }, { "epoch": 0.3531723750701853, "grad_norm": 0.7793827652931213, "learning_rate": 9.990473219307286e-06, "loss": 0.5003, "step": 1258 }, { "epoch": 0.35345311622683884, "grad_norm": 0.8053379058837891, "learning_rate": 9.990372172564998e-06, "loss": 0.4637, "step": 1259 }, { "epoch": 0.35373385738349244, "grad_norm": 0.8316372632980347, "learning_rate": 9.990270593281225e-06, "loss": 0.4892, "step": 1260 }, { "epoch": 0.354014598540146, "grad_norm": 0.8421614766120911, "learning_rate": 9.990168481466806e-06, "loss": 0.5076, "step": 1261 }, { "epoch": 0.35429533969679955, "grad_norm": 0.7738340497016907, "learning_rate": 9.99006583713264e-06, "loss": 0.501, "step": 1262 }, { "epoch": 0.3545760808534531, "grad_norm": 0.7720453143119812, "learning_rate": 9.989962660289679e-06, "loss": 0.4427, "step": 1263 }, { "epoch": 0.35485682201010665, "grad_norm": 0.6586256623268127, "learning_rate": 9.989858950948934e-06, "loss": 0.4455, "step": 1264 }, { "epoch": 0.35513756316676026, "grad_norm": 0.8287173509597778, "learning_rate": 9.98975470912147e-06, "loss": 0.4571, "step": 1265 }, { "epoch": 0.3554183043234138, "grad_norm": 0.7584671378135681, "learning_rate": 9.989649934818413e-06, "loss": 0.4747, "step": 1266 }, { "epoch": 0.35569904548006737, "grad_norm": 0.872772216796875, "learning_rate": 9.989544628050944e-06, "loss": 0.518, "step": 1267 }, { "epoch": 0.3559797866367209, "grad_norm": 0.789007842540741, "learning_rate": 9.9894387888303e-06, "loss": 0.4963, "step": 1268 }, { "epoch": 0.3562605277933745, "grad_norm": 0.9237364530563354, "learning_rate": 9.989332417167776e-06, "loss": 0.5423, "step": 1269 }, { "epoch": 0.3565412689500281, "grad_norm": 0.793707013130188, "learning_rate": 9.989225513074723e-06, "loss": 0.5002, "step": 1270 }, { "epoch": 0.35682201010668163, "grad_norm": 0.8023290634155273, "learning_rate": 9.989118076562549e-06, "loss": 0.5043, "step": 1271 }, { "epoch": 0.3571027512633352, "grad_norm": 0.6765355467796326, "learning_rate": 9.989010107642718e-06, "loss": 0.4761, "step": 1272 }, { "epoch": 0.3573834924199888, "grad_norm": 0.8727385997772217, "learning_rate": 9.988901606326756e-06, "loss": 0.4965, "step": 1273 }, { "epoch": 0.35766423357664234, "grad_norm": 0.835047721862793, "learning_rate": 9.988792572626236e-06, "loss": 0.4792, "step": 1274 }, { "epoch": 0.3579449747332959, "grad_norm": 0.7939520478248596, "learning_rate": 9.988683006552796e-06, "loss": 0.4364, "step": 1275 }, { "epoch": 0.35822571588994945, "grad_norm": 0.876715898513794, "learning_rate": 9.988572908118129e-06, "loss": 0.4847, "step": 1276 }, { "epoch": 0.35850645704660306, "grad_norm": 0.8438725471496582, "learning_rate": 9.988462277333983e-06, "loss": 0.483, "step": 1277 }, { "epoch": 0.3587871982032566, "grad_norm": 0.804371178150177, "learning_rate": 9.988351114212163e-06, "loss": 0.4601, "step": 1278 }, { "epoch": 0.35906793935991016, "grad_norm": 0.7393758296966553, "learning_rate": 9.988239418764534e-06, "loss": 0.4858, "step": 1279 }, { "epoch": 0.3593486805165637, "grad_norm": 0.7303123474121094, "learning_rate": 9.988127191003011e-06, "loss": 0.4427, "step": 1280 }, { "epoch": 0.3596294216732173, "grad_norm": 0.7270464897155762, "learning_rate": 9.988014430939577e-06, "loss": 0.4911, "step": 1281 }, { "epoch": 0.35991016282987087, "grad_norm": 0.7835894227027893, "learning_rate": 9.98790113858626e-06, "loss": 0.4954, "step": 1282 }, { "epoch": 0.3601909039865244, "grad_norm": 0.8116075396537781, "learning_rate": 9.987787313955151e-06, "loss": 0.4852, "step": 1283 }, { "epoch": 0.360471645143178, "grad_norm": 0.8052205443382263, "learning_rate": 9.987672957058398e-06, "loss": 0.4868, "step": 1284 }, { "epoch": 0.36075238629983153, "grad_norm": 0.6934790015220642, "learning_rate": 9.987558067908203e-06, "loss": 0.4788, "step": 1285 }, { "epoch": 0.36103312745648514, "grad_norm": 0.876417875289917, "learning_rate": 9.987442646516825e-06, "loss": 0.4554, "step": 1286 }, { "epoch": 0.3613138686131387, "grad_norm": 0.7848859429359436, "learning_rate": 9.987326692896584e-06, "loss": 0.492, "step": 1287 }, { "epoch": 0.36159460976979224, "grad_norm": 0.7674251794815063, "learning_rate": 9.987210207059852e-06, "loss": 0.4928, "step": 1288 }, { "epoch": 0.3618753509264458, "grad_norm": 0.7805658578872681, "learning_rate": 9.987093189019058e-06, "loss": 0.4764, "step": 1289 }, { "epoch": 0.3621560920830994, "grad_norm": 0.7619417309761047, "learning_rate": 9.986975638786696e-06, "loss": 0.4562, "step": 1290 }, { "epoch": 0.36243683323975295, "grad_norm": 0.7314834594726562, "learning_rate": 9.986857556375302e-06, "loss": 0.4478, "step": 1291 }, { "epoch": 0.3627175743964065, "grad_norm": 0.7276840209960938, "learning_rate": 9.986738941797482e-06, "loss": 0.4287, "step": 1292 }, { "epoch": 0.36299831555306006, "grad_norm": 0.8957933783531189, "learning_rate": 9.986619795065894e-06, "loss": 0.519, "step": 1293 }, { "epoch": 0.36327905670971367, "grad_norm": 0.7751873731613159, "learning_rate": 9.986500116193249e-06, "loss": 0.4899, "step": 1294 }, { "epoch": 0.3635597978663672, "grad_norm": 0.9643718600273132, "learning_rate": 9.986379905192322e-06, "loss": 0.5239, "step": 1295 }, { "epoch": 0.36384053902302077, "grad_norm": 0.8136892318725586, "learning_rate": 9.98625916207594e-06, "loss": 0.4314, "step": 1296 }, { "epoch": 0.3641212801796743, "grad_norm": 1.0789484977722168, "learning_rate": 9.986137886856988e-06, "loss": 0.5421, "step": 1297 }, { "epoch": 0.36440202133632793, "grad_norm": 0.8058463335037231, "learning_rate": 9.986016079548406e-06, "loss": 0.4646, "step": 1298 }, { "epoch": 0.3646827624929815, "grad_norm": 0.8146299719810486, "learning_rate": 9.985893740163195e-06, "loss": 0.4648, "step": 1299 }, { "epoch": 0.36496350364963503, "grad_norm": 0.8253329992294312, "learning_rate": 9.985770868714409e-06, "loss": 0.5193, "step": 1300 }, { "epoch": 0.3652442448062886, "grad_norm": 0.7895228266716003, "learning_rate": 9.98564746521516e-06, "loss": 0.4948, "step": 1301 }, { "epoch": 0.3655249859629422, "grad_norm": 0.8136559128761292, "learning_rate": 9.985523529678617e-06, "loss": 0.4576, "step": 1302 }, { "epoch": 0.36580572711959575, "grad_norm": 0.7098286747932434, "learning_rate": 9.985399062118006e-06, "loss": 0.4374, "step": 1303 }, { "epoch": 0.3660864682762493, "grad_norm": 0.7789715528488159, "learning_rate": 9.98527406254661e-06, "loss": 0.4676, "step": 1304 }, { "epoch": 0.36636720943290285, "grad_norm": 0.766231894493103, "learning_rate": 9.985148530977767e-06, "loss": 0.4915, "step": 1305 }, { "epoch": 0.3666479505895564, "grad_norm": 0.8277270197868347, "learning_rate": 9.985022467424873e-06, "loss": 0.508, "step": 1306 }, { "epoch": 0.36692869174621, "grad_norm": 0.810585081577301, "learning_rate": 9.984895871901382e-06, "loss": 0.4925, "step": 1307 }, { "epoch": 0.36720943290286356, "grad_norm": 0.7740399837493896, "learning_rate": 9.984768744420802e-06, "loss": 0.502, "step": 1308 }, { "epoch": 0.3674901740595171, "grad_norm": 0.820045530796051, "learning_rate": 9.9846410849967e-06, "loss": 0.517, "step": 1309 }, { "epoch": 0.36777091521617067, "grad_norm": 0.873641848564148, "learning_rate": 9.984512893642699e-06, "loss": 0.5007, "step": 1310 }, { "epoch": 0.3680516563728243, "grad_norm": 1.0452135801315308, "learning_rate": 9.984384170372478e-06, "loss": 0.5194, "step": 1311 }, { "epoch": 0.36833239752947783, "grad_norm": 0.7366434335708618, "learning_rate": 9.984254915199773e-06, "loss": 0.4454, "step": 1312 }, { "epoch": 0.3686131386861314, "grad_norm": 0.7293533086776733, "learning_rate": 9.98412512813838e-06, "loss": 0.4487, "step": 1313 }, { "epoch": 0.36889387984278493, "grad_norm": 0.9081177115440369, "learning_rate": 9.983994809202148e-06, "loss": 0.4958, "step": 1314 }, { "epoch": 0.36917462099943854, "grad_norm": 0.9251126646995544, "learning_rate": 9.983863958404983e-06, "loss": 0.4455, "step": 1315 }, { "epoch": 0.3694553621560921, "grad_norm": 1.0090391635894775, "learning_rate": 9.983732575760849e-06, "loss": 0.426, "step": 1316 }, { "epoch": 0.36973610331274565, "grad_norm": 0.7077220678329468, "learning_rate": 9.983600661283766e-06, "loss": 0.4615, "step": 1317 }, { "epoch": 0.3700168444693992, "grad_norm": 0.8807445168495178, "learning_rate": 9.983468214987812e-06, "loss": 0.5233, "step": 1318 }, { "epoch": 0.3702975856260528, "grad_norm": 0.7944371700286865, "learning_rate": 9.98333523688712e-06, "loss": 0.483, "step": 1319 }, { "epoch": 0.37057832678270636, "grad_norm": 0.7848116159439087, "learning_rate": 9.98320172699588e-06, "loss": 0.4714, "step": 1320 }, { "epoch": 0.3708590679393599, "grad_norm": 0.7266755104064941, "learning_rate": 9.983067685328341e-06, "loss": 0.4354, "step": 1321 }, { "epoch": 0.37113980909601346, "grad_norm": 0.8377553224563599, "learning_rate": 9.982933111898806e-06, "loss": 0.5186, "step": 1322 }, { "epoch": 0.371420550252667, "grad_norm": 0.7535090446472168, "learning_rate": 9.982798006721637e-06, "loss": 0.4874, "step": 1323 }, { "epoch": 0.3717012914093206, "grad_norm": 0.807345986366272, "learning_rate": 9.982662369811249e-06, "loss": 0.5177, "step": 1324 }, { "epoch": 0.3719820325659742, "grad_norm": 0.8962818384170532, "learning_rate": 9.982526201182118e-06, "loss": 0.4823, "step": 1325 }, { "epoch": 0.3722627737226277, "grad_norm": 0.8004371523857117, "learning_rate": 9.982389500848777e-06, "loss": 0.4973, "step": 1326 }, { "epoch": 0.3725435148792813, "grad_norm": 0.8684409856796265, "learning_rate": 9.98225226882581e-06, "loss": 0.5043, "step": 1327 }, { "epoch": 0.3728242560359349, "grad_norm": 0.7550148367881775, "learning_rate": 9.982114505127865e-06, "loss": 0.4859, "step": 1328 }, { "epoch": 0.37310499719258844, "grad_norm": 0.7923074960708618, "learning_rate": 9.981976209769642e-06, "loss": 0.4842, "step": 1329 }, { "epoch": 0.373385738349242, "grad_norm": 0.7989718317985535, "learning_rate": 9.981837382765898e-06, "loss": 0.4885, "step": 1330 }, { "epoch": 0.37366647950589554, "grad_norm": 0.8559819459915161, "learning_rate": 9.981698024131448e-06, "loss": 0.484, "step": 1331 }, { "epoch": 0.37394722066254915, "grad_norm": 0.690994143486023, "learning_rate": 9.981558133881163e-06, "loss": 0.4413, "step": 1332 }, { "epoch": 0.3742279618192027, "grad_norm": 0.811465859413147, "learning_rate": 9.981417712029975e-06, "loss": 0.5188, "step": 1333 }, { "epoch": 0.37450870297585626, "grad_norm": 0.7612215280532837, "learning_rate": 9.981276758592863e-06, "loss": 0.4742, "step": 1334 }, { "epoch": 0.3747894441325098, "grad_norm": 0.7821604013442993, "learning_rate": 9.981135273584875e-06, "loss": 0.4604, "step": 1335 }, { "epoch": 0.3750701852891634, "grad_norm": 0.780653715133667, "learning_rate": 9.980993257021105e-06, "loss": 0.4694, "step": 1336 }, { "epoch": 0.37535092644581697, "grad_norm": 0.7403636574745178, "learning_rate": 9.98085070891671e-06, "loss": 0.5041, "step": 1337 }, { "epoch": 0.3756316676024705, "grad_norm": 0.7289802432060242, "learning_rate": 9.980707629286899e-06, "loss": 0.4881, "step": 1338 }, { "epoch": 0.3759124087591241, "grad_norm": 0.744711697101593, "learning_rate": 9.980564018146944e-06, "loss": 0.4724, "step": 1339 }, { "epoch": 0.3761931499157777, "grad_norm": 0.7679106593132019, "learning_rate": 9.980419875512169e-06, "loss": 0.4773, "step": 1340 }, { "epoch": 0.37647389107243123, "grad_norm": 0.7225204110145569, "learning_rate": 9.980275201397958e-06, "loss": 0.4373, "step": 1341 }, { "epoch": 0.3767546322290848, "grad_norm": 0.8641905784606934, "learning_rate": 9.980129995819745e-06, "loss": 0.5268, "step": 1342 }, { "epoch": 0.37703537338573834, "grad_norm": 0.8561159372329712, "learning_rate": 9.97998425879303e-06, "loss": 0.5046, "step": 1343 }, { "epoch": 0.3773161145423919, "grad_norm": 0.6561315059661865, "learning_rate": 9.979837990333361e-06, "loss": 0.4526, "step": 1344 }, { "epoch": 0.3775968556990455, "grad_norm": 0.8008257746696472, "learning_rate": 9.979691190456352e-06, "loss": 0.5145, "step": 1345 }, { "epoch": 0.37787759685569905, "grad_norm": 0.7799999713897705, "learning_rate": 9.979543859177664e-06, "loss": 0.4722, "step": 1346 }, { "epoch": 0.3781583380123526, "grad_norm": 0.7862280011177063, "learning_rate": 9.979395996513023e-06, "loss": 0.4596, "step": 1347 }, { "epoch": 0.37843907916900615, "grad_norm": 0.7400721311569214, "learning_rate": 9.979247602478204e-06, "loss": 0.4961, "step": 1348 }, { "epoch": 0.37871982032565976, "grad_norm": 0.7986732125282288, "learning_rate": 9.979098677089046e-06, "loss": 0.4756, "step": 1349 }, { "epoch": 0.3790005614823133, "grad_norm": 0.8387327194213867, "learning_rate": 9.97894922036144e-06, "loss": 0.5021, "step": 1350 }, { "epoch": 0.37928130263896687, "grad_norm": 0.8232660889625549, "learning_rate": 9.978799232311336e-06, "loss": 0.4918, "step": 1351 }, { "epoch": 0.3795620437956204, "grad_norm": 0.7406255602836609, "learning_rate": 9.978648712954738e-06, "loss": 0.4329, "step": 1352 }, { "epoch": 0.379842784952274, "grad_norm": 0.87729811668396, "learning_rate": 9.978497662307709e-06, "loss": 0.4685, "step": 1353 }, { "epoch": 0.3801235261089276, "grad_norm": 0.8358006477355957, "learning_rate": 9.978346080386369e-06, "loss": 0.4578, "step": 1354 }, { "epoch": 0.38040426726558113, "grad_norm": 0.8884652853012085, "learning_rate": 9.978193967206895e-06, "loss": 0.4757, "step": 1355 }, { "epoch": 0.3806850084222347, "grad_norm": 0.8674014806747437, "learning_rate": 9.978041322785517e-06, "loss": 0.4737, "step": 1356 }, { "epoch": 0.3809657495788883, "grad_norm": 0.7499932646751404, "learning_rate": 9.977888147138526e-06, "loss": 0.4703, "step": 1357 }, { "epoch": 0.38124649073554184, "grad_norm": 0.8546051979064941, "learning_rate": 9.977734440282267e-06, "loss": 0.4375, "step": 1358 }, { "epoch": 0.3815272318921954, "grad_norm": 0.7380093336105347, "learning_rate": 9.97758020223314e-06, "loss": 0.4808, "step": 1359 }, { "epoch": 0.38180797304884895, "grad_norm": 0.8195316195487976, "learning_rate": 9.977425433007612e-06, "loss": 0.5102, "step": 1360 }, { "epoch": 0.3820887142055025, "grad_norm": 0.8140376806259155, "learning_rate": 9.977270132622193e-06, "loss": 0.4562, "step": 1361 }, { "epoch": 0.3823694553621561, "grad_norm": 0.7844399213790894, "learning_rate": 9.977114301093456e-06, "loss": 0.4587, "step": 1362 }, { "epoch": 0.38265019651880966, "grad_norm": 0.921621561050415, "learning_rate": 9.976957938438033e-06, "loss": 0.5032, "step": 1363 }, { "epoch": 0.3829309376754632, "grad_norm": 0.8237884640693665, "learning_rate": 9.976801044672608e-06, "loss": 0.4569, "step": 1364 }, { "epoch": 0.38321167883211676, "grad_norm": 0.7739622592926025, "learning_rate": 9.976643619813924e-06, "loss": 0.452, "step": 1365 }, { "epoch": 0.38349241998877037, "grad_norm": 0.8974728584289551, "learning_rate": 9.97648566387878e-06, "loss": 0.4814, "step": 1366 }, { "epoch": 0.3837731611454239, "grad_norm": 0.838535726070404, "learning_rate": 9.976327176884034e-06, "loss": 0.4335, "step": 1367 }, { "epoch": 0.3840539023020775, "grad_norm": 0.8377835750579834, "learning_rate": 9.976168158846596e-06, "loss": 0.4787, "step": 1368 }, { "epoch": 0.38433464345873103, "grad_norm": 0.9407914280891418, "learning_rate": 9.976008609783436e-06, "loss": 0.4428, "step": 1369 }, { "epoch": 0.38461538461538464, "grad_norm": 0.9758520722389221, "learning_rate": 9.975848529711583e-06, "loss": 0.4676, "step": 1370 }, { "epoch": 0.3848961257720382, "grad_norm": 0.8121294379234314, "learning_rate": 9.975687918648115e-06, "loss": 0.4446, "step": 1371 }, { "epoch": 0.38517686692869174, "grad_norm": 0.861008882522583, "learning_rate": 9.975526776610178e-06, "loss": 0.5343, "step": 1372 }, { "epoch": 0.3854576080853453, "grad_norm": 1.0093066692352295, "learning_rate": 9.975365103614962e-06, "loss": 0.4832, "step": 1373 }, { "epoch": 0.3857383492419989, "grad_norm": 0.8748912811279297, "learning_rate": 9.97520289967972e-06, "loss": 0.4914, "step": 1374 }, { "epoch": 0.38601909039865245, "grad_norm": 0.8520193099975586, "learning_rate": 9.975040164821767e-06, "loss": 0.4343, "step": 1375 }, { "epoch": 0.386299831555306, "grad_norm": 0.9347444176673889, "learning_rate": 9.974876899058464e-06, "loss": 0.5065, "step": 1376 }, { "epoch": 0.38658057271195956, "grad_norm": 0.7243751287460327, "learning_rate": 9.974713102407234e-06, "loss": 0.4528, "step": 1377 }, { "epoch": 0.38686131386861317, "grad_norm": 0.9694708585739136, "learning_rate": 9.974548774885558e-06, "loss": 0.5738, "step": 1378 }, { "epoch": 0.3871420550252667, "grad_norm": 0.9710278511047363, "learning_rate": 9.974383916510973e-06, "loss": 0.5006, "step": 1379 }, { "epoch": 0.38742279618192027, "grad_norm": 0.8056460022926331, "learning_rate": 9.974218527301067e-06, "loss": 0.4436, "step": 1380 }, { "epoch": 0.3877035373385738, "grad_norm": 0.83726966381073, "learning_rate": 9.974052607273494e-06, "loss": 0.451, "step": 1381 }, { "epoch": 0.3879842784952274, "grad_norm": 0.9945221543312073, "learning_rate": 9.97388615644596e-06, "loss": 0.4864, "step": 1382 }, { "epoch": 0.388265019651881, "grad_norm": 0.8952162861824036, "learning_rate": 9.973719174836224e-06, "loss": 0.483, "step": 1383 }, { "epoch": 0.38854576080853453, "grad_norm": 0.9157962799072266, "learning_rate": 9.973551662462106e-06, "loss": 0.4456, "step": 1384 }, { "epoch": 0.3888265019651881, "grad_norm": 0.9072278738021851, "learning_rate": 9.973383619341486e-06, "loss": 0.4322, "step": 1385 }, { "epoch": 0.38910724312184164, "grad_norm": 0.8230103254318237, "learning_rate": 9.973215045492292e-06, "loss": 0.4803, "step": 1386 }, { "epoch": 0.38938798427849525, "grad_norm": 0.9844091534614563, "learning_rate": 9.973045940932515e-06, "loss": 0.4944, "step": 1387 }, { "epoch": 0.3896687254351488, "grad_norm": 0.733823835849762, "learning_rate": 9.972876305680201e-06, "loss": 0.4699, "step": 1388 }, { "epoch": 0.38994946659180235, "grad_norm": 0.7970441579818726, "learning_rate": 9.97270613975345e-06, "loss": 0.454, "step": 1389 }, { "epoch": 0.3902302077484559, "grad_norm": 0.7929754257202148, "learning_rate": 9.972535443170425e-06, "loss": 0.4799, "step": 1390 }, { "epoch": 0.3905109489051095, "grad_norm": 0.7899355888366699, "learning_rate": 9.972364215949338e-06, "loss": 0.431, "step": 1391 }, { "epoch": 0.39079169006176306, "grad_norm": 0.8784908056259155, "learning_rate": 9.972192458108465e-06, "loss": 0.4708, "step": 1392 }, { "epoch": 0.3910724312184166, "grad_norm": 0.841095507144928, "learning_rate": 9.97202016966613e-06, "loss": 0.4702, "step": 1393 }, { "epoch": 0.39135317237507017, "grad_norm": 0.780816376209259, "learning_rate": 9.971847350640724e-06, "loss": 0.463, "step": 1394 }, { "epoch": 0.3916339135317238, "grad_norm": 0.7796720862388611, "learning_rate": 9.971674001050687e-06, "loss": 0.4648, "step": 1395 }, { "epoch": 0.39191465468837733, "grad_norm": 0.8291729092597961, "learning_rate": 9.971500120914515e-06, "loss": 0.502, "step": 1396 }, { "epoch": 0.3921953958450309, "grad_norm": 0.7778762578964233, "learning_rate": 9.971325710250768e-06, "loss": 0.4338, "step": 1397 }, { "epoch": 0.39247613700168443, "grad_norm": 0.8537712693214417, "learning_rate": 9.971150769078056e-06, "loss": 0.4454, "step": 1398 }, { "epoch": 0.392756878158338, "grad_norm": 0.9105302691459656, "learning_rate": 9.970975297415045e-06, "loss": 0.4964, "step": 1399 }, { "epoch": 0.3930376193149916, "grad_norm": 0.7636066675186157, "learning_rate": 9.970799295280464e-06, "loss": 0.4532, "step": 1400 }, { "epoch": 0.39331836047164515, "grad_norm": 0.8331524729728699, "learning_rate": 9.970622762693093e-06, "loss": 0.4574, "step": 1401 }, { "epoch": 0.3935991016282987, "grad_norm": 0.8816007375717163, "learning_rate": 9.970445699671773e-06, "loss": 0.4654, "step": 1402 }, { "epoch": 0.39387984278495225, "grad_norm": 0.9071862697601318, "learning_rate": 9.970268106235395e-06, "loss": 0.4585, "step": 1403 }, { "epoch": 0.39416058394160586, "grad_norm": 0.7492117881774902, "learning_rate": 9.970089982402915e-06, "loss": 0.4568, "step": 1404 }, { "epoch": 0.3944413250982594, "grad_norm": 1.2280957698822021, "learning_rate": 9.969911328193337e-06, "loss": 0.4999, "step": 1405 }, { "epoch": 0.39472206625491296, "grad_norm": 0.9285614490509033, "learning_rate": 9.969732143625728e-06, "loss": 0.4757, "step": 1406 }, { "epoch": 0.3950028074115665, "grad_norm": 0.823926568031311, "learning_rate": 9.96955242871921e-06, "loss": 0.4654, "step": 1407 }, { "epoch": 0.3952835485682201, "grad_norm": 0.7346882224082947, "learning_rate": 9.96937218349296e-06, "loss": 0.419, "step": 1408 }, { "epoch": 0.3955642897248737, "grad_norm": 0.9443112015724182, "learning_rate": 9.969191407966214e-06, "loss": 0.4837, "step": 1409 }, { "epoch": 0.3958450308815272, "grad_norm": 0.7753540873527527, "learning_rate": 9.969010102158262e-06, "loss": 0.4637, "step": 1410 }, { "epoch": 0.3961257720381808, "grad_norm": 0.8248151540756226, "learning_rate": 9.96882826608845e-06, "loss": 0.4988, "step": 1411 }, { "epoch": 0.3964065131948344, "grad_norm": 0.85450279712677, "learning_rate": 9.968645899776187e-06, "loss": 0.4896, "step": 1412 }, { "epoch": 0.39668725435148794, "grad_norm": 0.9409697651863098, "learning_rate": 9.968463003240931e-06, "loss": 0.4807, "step": 1413 }, { "epoch": 0.3969679955081415, "grad_norm": 0.8878933191299438, "learning_rate": 9.9682795765022e-06, "loss": 0.4162, "step": 1414 }, { "epoch": 0.39724873666479504, "grad_norm": 0.8156781196594238, "learning_rate": 9.96809561957957e-06, "loss": 0.535, "step": 1415 }, { "epoch": 0.39752947782144865, "grad_norm": 0.9529138207435608, "learning_rate": 9.967911132492667e-06, "loss": 0.502, "step": 1416 }, { "epoch": 0.3978102189781022, "grad_norm": 0.9243906736373901, "learning_rate": 9.967726115261183e-06, "loss": 0.4879, "step": 1417 }, { "epoch": 0.39809096013475576, "grad_norm": 0.7522918581962585, "learning_rate": 9.96754056790486e-06, "loss": 0.4685, "step": 1418 }, { "epoch": 0.3983717012914093, "grad_norm": 0.8215938806533813, "learning_rate": 9.967354490443497e-06, "loss": 0.4936, "step": 1419 }, { "epoch": 0.39865244244806286, "grad_norm": 0.9339632987976074, "learning_rate": 9.967167882896956e-06, "loss": 0.4927, "step": 1420 }, { "epoch": 0.39893318360471647, "grad_norm": 0.7769731283187866, "learning_rate": 9.966980745285144e-06, "loss": 0.4361, "step": 1421 }, { "epoch": 0.39921392476137, "grad_norm": 0.8242982029914856, "learning_rate": 9.966793077628037e-06, "loss": 0.4621, "step": 1422 }, { "epoch": 0.3994946659180236, "grad_norm": 0.7599133849143982, "learning_rate": 9.966604879945659e-06, "loss": 0.4679, "step": 1423 }, { "epoch": 0.3997754070746771, "grad_norm": 0.8209311366081238, "learning_rate": 9.966416152258091e-06, "loss": 0.489, "step": 1424 }, { "epoch": 0.40005614823133073, "grad_norm": 0.8286378979682922, "learning_rate": 9.966226894585478e-06, "loss": 0.5006, "step": 1425 }, { "epoch": 0.4003368893879843, "grad_norm": 0.7345553040504456, "learning_rate": 9.966037106948012e-06, "loss": 0.5022, "step": 1426 }, { "epoch": 0.40061763054463784, "grad_norm": 0.8585434556007385, "learning_rate": 9.96584678936595e-06, "loss": 0.5224, "step": 1427 }, { "epoch": 0.4008983717012914, "grad_norm": 0.69450843334198, "learning_rate": 9.965655941859597e-06, "loss": 0.5126, "step": 1428 }, { "epoch": 0.401179112857945, "grad_norm": 0.7183843851089478, "learning_rate": 9.965464564449322e-06, "loss": 0.4258, "step": 1429 }, { "epoch": 0.40145985401459855, "grad_norm": 0.9425116777420044, "learning_rate": 9.965272657155546e-06, "loss": 0.495, "step": 1430 }, { "epoch": 0.4017405951712521, "grad_norm": 0.6672155857086182, "learning_rate": 9.96508021999875e-06, "loss": 0.4726, "step": 1431 }, { "epoch": 0.40202133632790565, "grad_norm": 0.6973167061805725, "learning_rate": 9.96488725299947e-06, "loss": 0.4776, "step": 1432 }, { "epoch": 0.40230207748455926, "grad_norm": 0.8882748484611511, "learning_rate": 9.964693756178295e-06, "loss": 0.4543, "step": 1433 }, { "epoch": 0.4025828186412128, "grad_norm": 0.7760128974914551, "learning_rate": 9.964499729555876e-06, "loss": 0.441, "step": 1434 }, { "epoch": 0.40286355979786637, "grad_norm": 0.7548131942749023, "learning_rate": 9.964305173152919e-06, "loss": 0.4888, "step": 1435 }, { "epoch": 0.4031443009545199, "grad_norm": 0.7253437638282776, "learning_rate": 9.964110086990184e-06, "loss": 0.4973, "step": 1436 }, { "epoch": 0.40342504211117347, "grad_norm": 0.8014014363288879, "learning_rate": 9.96391447108849e-06, "loss": 0.4264, "step": 1437 }, { "epoch": 0.4037057832678271, "grad_norm": 0.8057489395141602, "learning_rate": 9.963718325468712e-06, "loss": 0.4906, "step": 1438 }, { "epoch": 0.40398652442448063, "grad_norm": 0.7576271891593933, "learning_rate": 9.963521650151783e-06, "loss": 0.4648, "step": 1439 }, { "epoch": 0.4042672655811342, "grad_norm": 0.866041898727417, "learning_rate": 9.963324445158688e-06, "loss": 0.4921, "step": 1440 }, { "epoch": 0.40454800673778774, "grad_norm": 0.8300007581710815, "learning_rate": 9.963126710510476e-06, "loss": 0.454, "step": 1441 }, { "epoch": 0.40482874789444134, "grad_norm": 0.7814754247665405, "learning_rate": 9.962928446228241e-06, "loss": 0.4376, "step": 1442 }, { "epoch": 0.4051094890510949, "grad_norm": 0.6946207880973816, "learning_rate": 9.962729652333147e-06, "loss": 0.4209, "step": 1443 }, { "epoch": 0.40539023020774845, "grad_norm": 0.7424529194831848, "learning_rate": 9.962530328846407e-06, "loss": 0.4108, "step": 1444 }, { "epoch": 0.405670971364402, "grad_norm": 0.8224674463272095, "learning_rate": 9.962330475789287e-06, "loss": 0.53, "step": 1445 }, { "epoch": 0.4059517125210556, "grad_norm": 0.714143693447113, "learning_rate": 9.96213009318312e-06, "loss": 0.4943, "step": 1446 }, { "epoch": 0.40623245367770916, "grad_norm": 0.8385562896728516, "learning_rate": 9.961929181049286e-06, "loss": 0.4736, "step": 1447 }, { "epoch": 0.4065131948343627, "grad_norm": 0.6077457070350647, "learning_rate": 9.961727739409226e-06, "loss": 0.4747, "step": 1448 }, { "epoch": 0.40679393599101626, "grad_norm": 0.7630495429039001, "learning_rate": 9.961525768284438e-06, "loss": 0.48, "step": 1449 }, { "epoch": 0.40707467714766987, "grad_norm": 0.7672988772392273, "learning_rate": 9.96132326769647e-06, "loss": 0.4887, "step": 1450 }, { "epoch": 0.4073554183043234, "grad_norm": 0.7725625038146973, "learning_rate": 9.961120237666938e-06, "loss": 0.4883, "step": 1451 }, { "epoch": 0.407636159460977, "grad_norm": 0.749697208404541, "learning_rate": 9.960916678217504e-06, "loss": 0.4646, "step": 1452 }, { "epoch": 0.40791690061763053, "grad_norm": 0.8230544328689575, "learning_rate": 9.960712589369894e-06, "loss": 0.4293, "step": 1453 }, { "epoch": 0.40819764177428414, "grad_norm": 0.7162449359893799, "learning_rate": 9.960507971145884e-06, "loss": 0.534, "step": 1454 }, { "epoch": 0.4084783829309377, "grad_norm": 0.7527154684066772, "learning_rate": 9.960302823567311e-06, "loss": 0.4873, "step": 1455 }, { "epoch": 0.40875912408759124, "grad_norm": 0.7369965314865112, "learning_rate": 9.960097146656065e-06, "loss": 0.482, "step": 1456 }, { "epoch": 0.4090398652442448, "grad_norm": 0.7361452579498291, "learning_rate": 9.959890940434098e-06, "loss": 0.4207, "step": 1457 }, { "epoch": 0.40932060640089835, "grad_norm": 0.8282372951507568, "learning_rate": 9.959684204923415e-06, "loss": 0.506, "step": 1458 }, { "epoch": 0.40960134755755195, "grad_norm": 0.7200673222541809, "learning_rate": 9.959476940146074e-06, "loss": 0.4781, "step": 1459 }, { "epoch": 0.4098820887142055, "grad_norm": 0.7844985723495483, "learning_rate": 9.959269146124195e-06, "loss": 0.4968, "step": 1460 }, { "epoch": 0.41016282987085906, "grad_norm": 0.83040452003479, "learning_rate": 9.959060822879952e-06, "loss": 0.4742, "step": 1461 }, { "epoch": 0.4104435710275126, "grad_norm": 0.8366664052009583, "learning_rate": 9.958851970435576e-06, "loss": 0.4932, "step": 1462 }, { "epoch": 0.4107243121841662, "grad_norm": 0.8647298812866211, "learning_rate": 9.958642588813355e-06, "loss": 0.443, "step": 1463 }, { "epoch": 0.41100505334081977, "grad_norm": 0.929803192615509, "learning_rate": 9.958432678035633e-06, "loss": 0.477, "step": 1464 }, { "epoch": 0.4112857944974733, "grad_norm": 0.76978600025177, "learning_rate": 9.958222238124811e-06, "loss": 0.4367, "step": 1465 }, { "epoch": 0.4115665356541269, "grad_norm": 0.7822108268737793, "learning_rate": 9.958011269103343e-06, "loss": 0.4694, "step": 1466 }, { "epoch": 0.4118472768107805, "grad_norm": 0.7481580376625061, "learning_rate": 9.957799770993746e-06, "loss": 0.472, "step": 1467 }, { "epoch": 0.41212801796743403, "grad_norm": 0.7773211002349854, "learning_rate": 9.957587743818586e-06, "loss": 0.4853, "step": 1468 }, { "epoch": 0.4124087591240876, "grad_norm": 0.7309362292289734, "learning_rate": 9.957375187600493e-06, "loss": 0.4408, "step": 1469 }, { "epoch": 0.41268950028074114, "grad_norm": 0.8492318391799927, "learning_rate": 9.957162102362147e-06, "loss": 0.4844, "step": 1470 }, { "epoch": 0.41297024143739475, "grad_norm": 0.7797303199768066, "learning_rate": 9.95694848812629e-06, "loss": 0.4609, "step": 1471 }, { "epoch": 0.4132509825940483, "grad_norm": 0.8322198987007141, "learning_rate": 9.956734344915713e-06, "loss": 0.5176, "step": 1472 }, { "epoch": 0.41353172375070185, "grad_norm": 0.8106900453567505, "learning_rate": 9.956519672753271e-06, "loss": 0.4792, "step": 1473 }, { "epoch": 0.4138124649073554, "grad_norm": 0.8217254877090454, "learning_rate": 9.956304471661873e-06, "loss": 0.4491, "step": 1474 }, { "epoch": 0.41409320606400896, "grad_norm": 0.8479796648025513, "learning_rate": 9.956088741664483e-06, "loss": 0.4203, "step": 1475 }, { "epoch": 0.41437394722066256, "grad_norm": 0.7619498372077942, "learning_rate": 9.955872482784122e-06, "loss": 0.4971, "step": 1476 }, { "epoch": 0.4146546883773161, "grad_norm": 0.7956091165542603, "learning_rate": 9.955655695043868e-06, "loss": 0.4715, "step": 1477 }, { "epoch": 0.41493542953396967, "grad_norm": 0.7198827266693115, "learning_rate": 9.955438378466855e-06, "loss": 0.4713, "step": 1478 }, { "epoch": 0.4152161706906232, "grad_norm": 0.7029407024383545, "learning_rate": 9.955220533076276e-06, "loss": 0.447, "step": 1479 }, { "epoch": 0.41549691184727683, "grad_norm": 0.7933072447776794, "learning_rate": 9.955002158895374e-06, "loss": 0.4698, "step": 1480 }, { "epoch": 0.4157776530039304, "grad_norm": 0.732537031173706, "learning_rate": 9.954783255947456e-06, "loss": 0.398, "step": 1481 }, { "epoch": 0.41605839416058393, "grad_norm": 0.8646494746208191, "learning_rate": 9.954563824255879e-06, "loss": 0.5254, "step": 1482 }, { "epoch": 0.4163391353172375, "grad_norm": 0.7793468236923218, "learning_rate": 9.954343863844063e-06, "loss": 0.4341, "step": 1483 }, { "epoch": 0.4166198764738911, "grad_norm": 0.705496072769165, "learning_rate": 9.954123374735478e-06, "loss": 0.4845, "step": 1484 }, { "epoch": 0.41690061763054465, "grad_norm": 0.7429677248001099, "learning_rate": 9.953902356953653e-06, "loss": 0.4863, "step": 1485 }, { "epoch": 0.4171813587871982, "grad_norm": 0.7521372437477112, "learning_rate": 9.953680810522178e-06, "loss": 0.504, "step": 1486 }, { "epoch": 0.41746209994385175, "grad_norm": 0.8362607359886169, "learning_rate": 9.953458735464689e-06, "loss": 0.5103, "step": 1487 }, { "epoch": 0.41774284110050536, "grad_norm": 0.979227602481842, "learning_rate": 9.95323613180489e-06, "loss": 0.4921, "step": 1488 }, { "epoch": 0.4180235822571589, "grad_norm": 0.8398364186286926, "learning_rate": 9.95301299956653e-06, "loss": 0.4748, "step": 1489 }, { "epoch": 0.41830432341381246, "grad_norm": 0.8194150328636169, "learning_rate": 9.952789338773423e-06, "loss": 0.4934, "step": 1490 }, { "epoch": 0.418585064570466, "grad_norm": 0.9343910813331604, "learning_rate": 9.95256514944944e-06, "loss": 0.5287, "step": 1491 }, { "epoch": 0.4188658057271196, "grad_norm": 1.1770141124725342, "learning_rate": 9.952340431618502e-06, "loss": 0.4856, "step": 1492 }, { "epoch": 0.4191465468837732, "grad_norm": 0.8844005465507507, "learning_rate": 9.952115185304587e-06, "loss": 0.4633, "step": 1493 }, { "epoch": 0.4194272880404267, "grad_norm": 0.8328038454055786, "learning_rate": 9.951889410531737e-06, "loss": 0.4553, "step": 1494 }, { "epoch": 0.4197080291970803, "grad_norm": 0.8194181323051453, "learning_rate": 9.951663107324042e-06, "loss": 0.4636, "step": 1495 }, { "epoch": 0.41998877035373383, "grad_norm": 0.8475966453552246, "learning_rate": 9.951436275705653e-06, "loss": 0.5128, "step": 1496 }, { "epoch": 0.42026951151038744, "grad_norm": 0.770342230796814, "learning_rate": 9.951208915700776e-06, "loss": 0.4889, "step": 1497 }, { "epoch": 0.420550252667041, "grad_norm": 0.9271401762962341, "learning_rate": 9.950981027333672e-06, "loss": 0.4543, "step": 1498 }, { "epoch": 0.42083099382369454, "grad_norm": 0.7796668410301208, "learning_rate": 9.95075261062866e-06, "loss": 0.4798, "step": 1499 }, { "epoch": 0.4211117349803481, "grad_norm": 0.8748780488967896, "learning_rate": 9.950523665610118e-06, "loss": 0.4667, "step": 1500 }, { "epoch": 0.4213924761370017, "grad_norm": 0.8888767957687378, "learning_rate": 9.950294192302475e-06, "loss": 0.4436, "step": 1501 }, { "epoch": 0.42167321729365526, "grad_norm": 0.8639453649520874, "learning_rate": 9.95006419073022e-06, "loss": 0.4863, "step": 1502 }, { "epoch": 0.4219539584503088, "grad_norm": 0.7428751587867737, "learning_rate": 9.949833660917897e-06, "loss": 0.4854, "step": 1503 }, { "epoch": 0.42223469960696236, "grad_norm": 0.856082022190094, "learning_rate": 9.949602602890107e-06, "loss": 0.4637, "step": 1504 }, { "epoch": 0.42251544076361597, "grad_norm": 0.9224918484687805, "learning_rate": 9.949371016671505e-06, "loss": 0.4895, "step": 1505 }, { "epoch": 0.4227961819202695, "grad_norm": 0.7828034162521362, "learning_rate": 9.949138902286807e-06, "loss": 0.4702, "step": 1506 }, { "epoch": 0.4230769230769231, "grad_norm": 0.9434019327163696, "learning_rate": 9.948906259760785e-06, "loss": 0.5116, "step": 1507 }, { "epoch": 0.4233576642335766, "grad_norm": 0.9723303318023682, "learning_rate": 9.948673089118259e-06, "loss": 0.4614, "step": 1508 }, { "epoch": 0.42363840539023023, "grad_norm": 0.8419533967971802, "learning_rate": 9.948439390384115e-06, "loss": 0.4921, "step": 1509 }, { "epoch": 0.4239191465468838, "grad_norm": 0.8092457056045532, "learning_rate": 9.948205163583292e-06, "loss": 0.4875, "step": 1510 }, { "epoch": 0.42419988770353734, "grad_norm": 0.9529274702072144, "learning_rate": 9.947970408740783e-06, "loss": 0.4838, "step": 1511 }, { "epoch": 0.4244806288601909, "grad_norm": 0.7889905571937561, "learning_rate": 9.947735125881644e-06, "loss": 0.4495, "step": 1512 }, { "epoch": 0.42476137001684444, "grad_norm": 0.7975249290466309, "learning_rate": 9.947499315030979e-06, "loss": 0.4847, "step": 1513 }, { "epoch": 0.42504211117349805, "grad_norm": 0.7952381372451782, "learning_rate": 9.947262976213954e-06, "loss": 0.448, "step": 1514 }, { "epoch": 0.4253228523301516, "grad_norm": 0.8523067235946655, "learning_rate": 9.947026109455789e-06, "loss": 0.5072, "step": 1515 }, { "epoch": 0.42560359348680515, "grad_norm": 0.8707007169723511, "learning_rate": 9.946788714781761e-06, "loss": 0.4931, "step": 1516 }, { "epoch": 0.4258843346434587, "grad_norm": 0.830998420715332, "learning_rate": 9.946550792217204e-06, "loss": 0.4841, "step": 1517 }, { "epoch": 0.4261650758001123, "grad_norm": 0.741875171661377, "learning_rate": 9.946312341787507e-06, "loss": 0.4465, "step": 1518 }, { "epoch": 0.42644581695676587, "grad_norm": 0.9552786350250244, "learning_rate": 9.946073363518115e-06, "loss": 0.468, "step": 1519 }, { "epoch": 0.4267265581134194, "grad_norm": 0.8409485816955566, "learning_rate": 9.945833857434533e-06, "loss": 0.5204, "step": 1520 }, { "epoch": 0.42700729927007297, "grad_norm": 0.6598392724990845, "learning_rate": 9.945593823562316e-06, "loss": 0.4635, "step": 1521 }, { "epoch": 0.4272880404267266, "grad_norm": 0.7886444926261902, "learning_rate": 9.945353261927081e-06, "loss": 0.4528, "step": 1522 }, { "epoch": 0.42756878158338013, "grad_norm": 0.8217816352844238, "learning_rate": 9.9451121725545e-06, "loss": 0.4471, "step": 1523 }, { "epoch": 0.4278495227400337, "grad_norm": 0.8745150566101074, "learning_rate": 9.944870555470298e-06, "loss": 0.4729, "step": 1524 }, { "epoch": 0.42813026389668724, "grad_norm": 0.7911088466644287, "learning_rate": 9.944628410700262e-06, "loss": 0.4849, "step": 1525 }, { "epoch": 0.42841100505334084, "grad_norm": 0.7825942635536194, "learning_rate": 9.94438573827023e-06, "loss": 0.4911, "step": 1526 }, { "epoch": 0.4286917462099944, "grad_norm": 0.8885286450386047, "learning_rate": 9.9441425382061e-06, "loss": 0.4249, "step": 1527 }, { "epoch": 0.42897248736664795, "grad_norm": 0.7094171047210693, "learning_rate": 9.943898810533823e-06, "loss": 0.4342, "step": 1528 }, { "epoch": 0.4292532285233015, "grad_norm": 0.772149920463562, "learning_rate": 9.94365455527941e-06, "loss": 0.4713, "step": 1529 }, { "epoch": 0.4295339696799551, "grad_norm": 0.8422811031341553, "learning_rate": 9.943409772468923e-06, "loss": 0.4715, "step": 1530 }, { "epoch": 0.42981471083660866, "grad_norm": 0.7839064002037048, "learning_rate": 9.943164462128487e-06, "loss": 0.4097, "step": 1531 }, { "epoch": 0.4300954519932622, "grad_norm": 0.7710862159729004, "learning_rate": 9.942918624284282e-06, "loss": 0.4303, "step": 1532 }, { "epoch": 0.43037619314991576, "grad_norm": 0.8447971343994141, "learning_rate": 9.942672258962537e-06, "loss": 0.5089, "step": 1533 }, { "epoch": 0.4306569343065693, "grad_norm": 0.7621978521347046, "learning_rate": 9.942425366189545e-06, "loss": 0.4849, "step": 1534 }, { "epoch": 0.4309376754632229, "grad_norm": 0.6854081153869629, "learning_rate": 9.942177945991652e-06, "loss": 0.4185, "step": 1535 }, { "epoch": 0.4312184166198765, "grad_norm": 0.8562795519828796, "learning_rate": 9.941929998395263e-06, "loss": 0.4907, "step": 1536 }, { "epoch": 0.43149915777653003, "grad_norm": 0.8944031596183777, "learning_rate": 9.941681523426835e-06, "loss": 0.4517, "step": 1537 }, { "epoch": 0.4317798989331836, "grad_norm": 0.7585890293121338, "learning_rate": 9.941432521112887e-06, "loss": 0.446, "step": 1538 }, { "epoch": 0.4320606400898372, "grad_norm": 0.7893147468566895, "learning_rate": 9.94118299147999e-06, "loss": 0.4825, "step": 1539 }, { "epoch": 0.43234138124649074, "grad_norm": 0.8792277574539185, "learning_rate": 9.94093293455477e-06, "loss": 0.4426, "step": 1540 }, { "epoch": 0.4326221224031443, "grad_norm": 0.7946180701255798, "learning_rate": 9.940682350363913e-06, "loss": 0.5026, "step": 1541 }, { "epoch": 0.43290286355979785, "grad_norm": 0.7558562159538269, "learning_rate": 9.940431238934158e-06, "loss": 0.4722, "step": 1542 }, { "epoch": 0.43318360471645145, "grad_norm": 0.7382895946502686, "learning_rate": 9.940179600292305e-06, "loss": 0.4698, "step": 1543 }, { "epoch": 0.433464345873105, "grad_norm": 0.8346493244171143, "learning_rate": 9.939927434465206e-06, "loss": 0.5273, "step": 1544 }, { "epoch": 0.43374508702975856, "grad_norm": 0.7740429043769836, "learning_rate": 9.939674741479772e-06, "loss": 0.4671, "step": 1545 }, { "epoch": 0.4340258281864121, "grad_norm": 0.7033578753471375, "learning_rate": 9.939421521362966e-06, "loss": 0.4802, "step": 1546 }, { "epoch": 0.4343065693430657, "grad_norm": 0.792039692401886, "learning_rate": 9.939167774141811e-06, "loss": 0.4912, "step": 1547 }, { "epoch": 0.43458731049971927, "grad_norm": 0.7619795799255371, "learning_rate": 9.938913499843386e-06, "loss": 0.4564, "step": 1548 }, { "epoch": 0.4348680516563728, "grad_norm": 0.7204170227050781, "learning_rate": 9.938658698494826e-06, "loss": 0.4554, "step": 1549 }, { "epoch": 0.4351487928130264, "grad_norm": 0.7560875415802002, "learning_rate": 9.938403370123321e-06, "loss": 0.4724, "step": 1550 }, { "epoch": 0.43542953396968, "grad_norm": 0.6911844611167908, "learning_rate": 9.938147514756117e-06, "loss": 0.5127, "step": 1551 }, { "epoch": 0.43571027512633353, "grad_norm": 0.8913024663925171, "learning_rate": 9.93789113242052e-06, "loss": 0.4806, "step": 1552 }, { "epoch": 0.4359910162829871, "grad_norm": 0.7620805501937866, "learning_rate": 9.937634223143889e-06, "loss": 0.4561, "step": 1553 }, { "epoch": 0.43627175743964064, "grad_norm": 0.9313696622848511, "learning_rate": 9.937376786953637e-06, "loss": 0.4356, "step": 1554 }, { "epoch": 0.4365524985962942, "grad_norm": 0.6566257476806641, "learning_rate": 9.93711882387724e-06, "loss": 0.4554, "step": 1555 }, { "epoch": 0.4368332397529478, "grad_norm": 0.6699604392051697, "learning_rate": 9.936860333942221e-06, "loss": 0.4352, "step": 1556 }, { "epoch": 0.43711398090960135, "grad_norm": 0.8826926350593567, "learning_rate": 9.93660131717617e-06, "loss": 0.5356, "step": 1557 }, { "epoch": 0.4373947220662549, "grad_norm": 0.8786247372627258, "learning_rate": 9.936341773606723e-06, "loss": 0.4661, "step": 1558 }, { "epoch": 0.43767546322290846, "grad_norm": 0.6881845593452454, "learning_rate": 9.93608170326158e-06, "loss": 0.4566, "step": 1559 }, { "epoch": 0.43795620437956206, "grad_norm": 0.7192972898483276, "learning_rate": 9.935821106168493e-06, "loss": 0.4161, "step": 1560 }, { "epoch": 0.4382369455362156, "grad_norm": 0.8811171054840088, "learning_rate": 9.935559982355271e-06, "loss": 0.4917, "step": 1561 }, { "epoch": 0.43851768669286917, "grad_norm": 0.788429319858551, "learning_rate": 9.935298331849783e-06, "loss": 0.4911, "step": 1562 }, { "epoch": 0.4387984278495227, "grad_norm": 0.8126656413078308, "learning_rate": 9.935036154679945e-06, "loss": 0.4891, "step": 1563 }, { "epoch": 0.43907916900617633, "grad_norm": 0.7749803066253662, "learning_rate": 9.934773450873737e-06, "loss": 0.473, "step": 1564 }, { "epoch": 0.4393599101628299, "grad_norm": 0.8331796526908875, "learning_rate": 9.934510220459193e-06, "loss": 0.4875, "step": 1565 }, { "epoch": 0.43964065131948343, "grad_norm": 0.7581439018249512, "learning_rate": 9.934246463464405e-06, "loss": 0.4686, "step": 1566 }, { "epoch": 0.439921392476137, "grad_norm": 0.7353121042251587, "learning_rate": 9.933982179917519e-06, "loss": 0.4451, "step": 1567 }, { "epoch": 0.4402021336327906, "grad_norm": 0.8095428347587585, "learning_rate": 9.933717369846737e-06, "loss": 0.4955, "step": 1568 }, { "epoch": 0.44048287478944415, "grad_norm": 0.8571195602416992, "learning_rate": 9.933452033280319e-06, "loss": 0.4613, "step": 1569 }, { "epoch": 0.4407636159460977, "grad_norm": 0.6963633894920349, "learning_rate": 9.933186170246579e-06, "loss": 0.4291, "step": 1570 }, { "epoch": 0.44104435710275125, "grad_norm": 0.7224349975585938, "learning_rate": 9.932919780773886e-06, "loss": 0.4578, "step": 1571 }, { "epoch": 0.4413250982594048, "grad_norm": 0.9017399549484253, "learning_rate": 9.932652864890671e-06, "loss": 0.4873, "step": 1572 }, { "epoch": 0.4416058394160584, "grad_norm": 0.7524573802947998, "learning_rate": 9.932385422625418e-06, "loss": 0.4774, "step": 1573 }, { "epoch": 0.44188658057271196, "grad_norm": 0.8817484974861145, "learning_rate": 9.932117454006662e-06, "loss": 0.4847, "step": 1574 }, { "epoch": 0.4421673217293655, "grad_norm": 0.7601636052131653, "learning_rate": 9.931848959063004e-06, "loss": 0.4163, "step": 1575 }, { "epoch": 0.44244806288601907, "grad_norm": 0.8592706918716431, "learning_rate": 9.931579937823094e-06, "loss": 0.4385, "step": 1576 }, { "epoch": 0.4427288040426727, "grad_norm": 0.7996790409088135, "learning_rate": 9.931310390315642e-06, "loss": 0.4685, "step": 1577 }, { "epoch": 0.4430095451993262, "grad_norm": 0.8284817934036255, "learning_rate": 9.931040316569409e-06, "loss": 0.4946, "step": 1578 }, { "epoch": 0.4432902863559798, "grad_norm": 0.8335215449333191, "learning_rate": 9.930769716613218e-06, "loss": 0.4647, "step": 1579 }, { "epoch": 0.44357102751263333, "grad_norm": 0.8053838610649109, "learning_rate": 9.930498590475945e-06, "loss": 0.4935, "step": 1580 }, { "epoch": 0.44385176866928694, "grad_norm": 0.8241704106330872, "learning_rate": 9.930226938186525e-06, "loss": 0.4465, "step": 1581 }, { "epoch": 0.4441325098259405, "grad_norm": 0.7485541701316833, "learning_rate": 9.929954759773943e-06, "loss": 0.4882, "step": 1582 }, { "epoch": 0.44441325098259404, "grad_norm": 0.7574432492256165, "learning_rate": 9.929682055267246e-06, "loss": 0.4345, "step": 1583 }, { "epoch": 0.4446939921392476, "grad_norm": 0.8734549880027771, "learning_rate": 9.929408824695539e-06, "loss": 0.4552, "step": 1584 }, { "epoch": 0.4449747332959012, "grad_norm": 0.769991934299469, "learning_rate": 9.929135068087975e-06, "loss": 0.4565, "step": 1585 }, { "epoch": 0.44525547445255476, "grad_norm": 0.7188234925270081, "learning_rate": 9.928860785473767e-06, "loss": 0.4535, "step": 1586 }, { "epoch": 0.4455362156092083, "grad_norm": 0.8100794553756714, "learning_rate": 9.928585976882187e-06, "loss": 0.4705, "step": 1587 }, { "epoch": 0.44581695676586186, "grad_norm": 0.7685830593109131, "learning_rate": 9.92831064234256e-06, "loss": 0.4515, "step": 1588 }, { "epoch": 0.44609769792251547, "grad_norm": 0.6984787583351135, "learning_rate": 9.92803478188427e-06, "loss": 0.4332, "step": 1589 }, { "epoch": 0.446378439079169, "grad_norm": 0.7266493439674377, "learning_rate": 9.927758395536753e-06, "loss": 0.4594, "step": 1590 }, { "epoch": 0.4466591802358226, "grad_norm": 0.783992350101471, "learning_rate": 9.927481483329503e-06, "loss": 0.4426, "step": 1591 }, { "epoch": 0.4469399213924761, "grad_norm": 0.7979679703712463, "learning_rate": 9.927204045292071e-06, "loss": 0.4821, "step": 1592 }, { "epoch": 0.4472206625491297, "grad_norm": 0.7410790324211121, "learning_rate": 9.926926081454065e-06, "loss": 0.4638, "step": 1593 }, { "epoch": 0.4475014037057833, "grad_norm": 0.7430979013442993, "learning_rate": 9.926647591845144e-06, "loss": 0.4643, "step": 1594 }, { "epoch": 0.44778214486243684, "grad_norm": 0.7869971990585327, "learning_rate": 9.926368576495031e-06, "loss": 0.4221, "step": 1595 }, { "epoch": 0.4480628860190904, "grad_norm": 0.7983207106590271, "learning_rate": 9.926089035433497e-06, "loss": 0.4693, "step": 1596 }, { "epoch": 0.44834362717574394, "grad_norm": 0.8259583115577698, "learning_rate": 9.925808968690376e-06, "loss": 0.5099, "step": 1597 }, { "epoch": 0.44862436833239755, "grad_norm": 0.7925142049789429, "learning_rate": 9.925528376295552e-06, "loss": 0.4541, "step": 1598 }, { "epoch": 0.4489051094890511, "grad_norm": 0.6849550604820251, "learning_rate": 9.92524725827897e-06, "loss": 0.4452, "step": 1599 }, { "epoch": 0.44918585064570465, "grad_norm": 0.9882868528366089, "learning_rate": 9.924965614670629e-06, "loss": 0.4764, "step": 1600 }, { "epoch": 0.4494665918023582, "grad_norm": 0.8814857602119446, "learning_rate": 9.924683445500584e-06, "loss": 0.4699, "step": 1601 }, { "epoch": 0.4497473329590118, "grad_norm": 0.7847899794578552, "learning_rate": 9.924400750798947e-06, "loss": 0.4377, "step": 1602 }, { "epoch": 0.45002807411566537, "grad_norm": 0.7930905222892761, "learning_rate": 9.924117530595882e-06, "loss": 0.4912, "step": 1603 }, { "epoch": 0.4503088152723189, "grad_norm": 0.810623288154602, "learning_rate": 9.923833784921617e-06, "loss": 0.4587, "step": 1604 }, { "epoch": 0.45058955642897247, "grad_norm": 0.9229034185409546, "learning_rate": 9.92354951380643e-06, "loss": 0.4293, "step": 1605 }, { "epoch": 0.4508702975856261, "grad_norm": 0.9106329679489136, "learning_rate": 9.923264717280656e-06, "loss": 0.5011, "step": 1606 }, { "epoch": 0.45115103874227963, "grad_norm": 0.8749147057533264, "learning_rate": 9.922979395374687e-06, "loss": 0.4763, "step": 1607 }, { "epoch": 0.4514317798989332, "grad_norm": 0.8200501799583435, "learning_rate": 9.92269354811897e-06, "loss": 0.4594, "step": 1608 }, { "epoch": 0.45171252105558674, "grad_norm": 0.9100151658058167, "learning_rate": 9.922407175544012e-06, "loss": 0.4603, "step": 1609 }, { "epoch": 0.4519932622122403, "grad_norm": 0.911439836025238, "learning_rate": 9.922120277680369e-06, "loss": 0.4665, "step": 1610 }, { "epoch": 0.4522740033688939, "grad_norm": 0.7262864112854004, "learning_rate": 9.92183285455866e-06, "loss": 0.5072, "step": 1611 }, { "epoch": 0.45255474452554745, "grad_norm": 0.8657403588294983, "learning_rate": 9.921544906209554e-06, "loss": 0.5121, "step": 1612 }, { "epoch": 0.452835485682201, "grad_norm": 0.8669281005859375, "learning_rate": 9.921256432663781e-06, "loss": 0.4388, "step": 1613 }, { "epoch": 0.45311622683885455, "grad_norm": 0.9119303822517395, "learning_rate": 9.920967433952126e-06, "loss": 0.5122, "step": 1614 }, { "epoch": 0.45339696799550816, "grad_norm": 0.7300405502319336, "learning_rate": 9.920677910105428e-06, "loss": 0.5011, "step": 1615 }, { "epoch": 0.4536777091521617, "grad_norm": 0.7571413516998291, "learning_rate": 9.92038786115458e-06, "loss": 0.4831, "step": 1616 }, { "epoch": 0.45395845030881526, "grad_norm": 0.9800949692726135, "learning_rate": 9.92009728713054e-06, "loss": 0.4989, "step": 1617 }, { "epoch": 0.4542391914654688, "grad_norm": 0.8586767911911011, "learning_rate": 9.919806188064314e-06, "loss": 0.5118, "step": 1618 }, { "epoch": 0.4545199326221224, "grad_norm": 0.7775992155075073, "learning_rate": 9.919514563986965e-06, "loss": 0.4514, "step": 1619 }, { "epoch": 0.454800673778776, "grad_norm": 0.8338137865066528, "learning_rate": 9.919222414929614e-06, "loss": 0.4799, "step": 1620 }, { "epoch": 0.45508141493542953, "grad_norm": 0.7976445555686951, "learning_rate": 9.91892974092344e-06, "loss": 0.4953, "step": 1621 }, { "epoch": 0.4553621560920831, "grad_norm": 0.7487460970878601, "learning_rate": 9.91863654199967e-06, "loss": 0.4395, "step": 1622 }, { "epoch": 0.4556428972487367, "grad_norm": 0.7499296069145203, "learning_rate": 9.918342818189594e-06, "loss": 0.4953, "step": 1623 }, { "epoch": 0.45592363840539024, "grad_norm": 0.7772714495658875, "learning_rate": 9.91804856952456e-06, "loss": 0.4385, "step": 1624 }, { "epoch": 0.4562043795620438, "grad_norm": 0.7606932520866394, "learning_rate": 9.917753796035965e-06, "loss": 0.4545, "step": 1625 }, { "epoch": 0.45648512071869735, "grad_norm": 0.7545024752616882, "learning_rate": 9.917458497755267e-06, "loss": 0.4663, "step": 1626 }, { "epoch": 0.45676586187535095, "grad_norm": 0.8371989130973816, "learning_rate": 9.917162674713979e-06, "loss": 0.4154, "step": 1627 }, { "epoch": 0.4570466030320045, "grad_norm": 0.7079052925109863, "learning_rate": 9.916866326943666e-06, "loss": 0.4643, "step": 1628 }, { "epoch": 0.45732734418865806, "grad_norm": 0.8473893404006958, "learning_rate": 9.916569454475954e-06, "loss": 0.4883, "step": 1629 }, { "epoch": 0.4576080853453116, "grad_norm": 0.9153130650520325, "learning_rate": 9.916272057342527e-06, "loss": 0.4841, "step": 1630 }, { "epoch": 0.45788882650196516, "grad_norm": 0.8404461741447449, "learning_rate": 9.915974135575115e-06, "loss": 0.4754, "step": 1631 }, { "epoch": 0.45816956765861877, "grad_norm": 0.7715436220169067, "learning_rate": 9.915675689205516e-06, "loss": 0.469, "step": 1632 }, { "epoch": 0.4584503088152723, "grad_norm": 0.8396292924880981, "learning_rate": 9.915376718265575e-06, "loss": 0.45, "step": 1633 }, { "epoch": 0.4587310499719259, "grad_norm": 0.7500838041305542, "learning_rate": 9.915077222787197e-06, "loss": 0.4445, "step": 1634 }, { "epoch": 0.4590117911285794, "grad_norm": 0.8071966171264648, "learning_rate": 9.914777202802343e-06, "loss": 0.4591, "step": 1635 }, { "epoch": 0.45929253228523303, "grad_norm": 0.758062481880188, "learning_rate": 9.91447665834303e-06, "loss": 0.4708, "step": 1636 }, { "epoch": 0.4595732734418866, "grad_norm": 0.7524632811546326, "learning_rate": 9.914175589441328e-06, "loss": 0.4156, "step": 1637 }, { "epoch": 0.45985401459854014, "grad_norm": 0.8921982049942017, "learning_rate": 9.913873996129367e-06, "loss": 0.4928, "step": 1638 }, { "epoch": 0.4601347557551937, "grad_norm": 0.788564920425415, "learning_rate": 9.91357187843933e-06, "loss": 0.4954, "step": 1639 }, { "epoch": 0.4604154969118473, "grad_norm": 0.8892732262611389, "learning_rate": 9.913269236403457e-06, "loss": 0.5074, "step": 1640 }, { "epoch": 0.46069623806850085, "grad_norm": 0.9354593753814697, "learning_rate": 9.912966070054045e-06, "loss": 0.4672, "step": 1641 }, { "epoch": 0.4609769792251544, "grad_norm": 0.7764392495155334, "learning_rate": 9.912662379423447e-06, "loss": 0.5, "step": 1642 }, { "epoch": 0.46125772038180796, "grad_norm": 0.8332095742225647, "learning_rate": 9.912358164544068e-06, "loss": 0.4983, "step": 1643 }, { "epoch": 0.46153846153846156, "grad_norm": 0.8388085961341858, "learning_rate": 9.912053425448375e-06, "loss": 0.486, "step": 1644 }, { "epoch": 0.4618192026951151, "grad_norm": 0.7964088916778564, "learning_rate": 9.911748162168886e-06, "loss": 0.4782, "step": 1645 }, { "epoch": 0.46209994385176867, "grad_norm": 0.684843122959137, "learning_rate": 9.911442374738177e-06, "loss": 0.4474, "step": 1646 }, { "epoch": 0.4623806850084222, "grad_norm": 0.8008718490600586, "learning_rate": 9.911136063188879e-06, "loss": 0.4766, "step": 1647 }, { "epoch": 0.4626614261650758, "grad_norm": 0.7822255492210388, "learning_rate": 9.910829227553683e-06, "loss": 0.4693, "step": 1648 }, { "epoch": 0.4629421673217294, "grad_norm": 0.7523805499076843, "learning_rate": 9.91052186786533e-06, "loss": 0.4622, "step": 1649 }, { "epoch": 0.46322290847838293, "grad_norm": 0.8271676301956177, "learning_rate": 9.910213984156618e-06, "loss": 0.467, "step": 1650 }, { "epoch": 0.4635036496350365, "grad_norm": 0.714205801486969, "learning_rate": 9.909905576460405e-06, "loss": 0.4825, "step": 1651 }, { "epoch": 0.46378439079169004, "grad_norm": 0.6896019577980042, "learning_rate": 9.909596644809601e-06, "loss": 0.4256, "step": 1652 }, { "epoch": 0.46406513194834365, "grad_norm": 0.7542632222175598, "learning_rate": 9.909287189237175e-06, "loss": 0.434, "step": 1653 }, { "epoch": 0.4643458731049972, "grad_norm": 0.8367012143135071, "learning_rate": 9.908977209776149e-06, "loss": 0.4808, "step": 1654 }, { "epoch": 0.46462661426165075, "grad_norm": 0.790479302406311, "learning_rate": 9.908666706459602e-06, "loss": 0.464, "step": 1655 }, { "epoch": 0.4649073554183043, "grad_norm": 0.6635505557060242, "learning_rate": 9.908355679320668e-06, "loss": 0.4576, "step": 1656 }, { "epoch": 0.4651880965749579, "grad_norm": 0.8331106305122375, "learning_rate": 9.90804412839254e-06, "loss": 0.4516, "step": 1657 }, { "epoch": 0.46546883773161146, "grad_norm": 0.7288654446601868, "learning_rate": 9.907732053708463e-06, "loss": 0.4304, "step": 1658 }, { "epoch": 0.465749578888265, "grad_norm": 0.8938093781471252, "learning_rate": 9.90741945530174e-06, "loss": 0.4573, "step": 1659 }, { "epoch": 0.46603032004491857, "grad_norm": 0.9637454152107239, "learning_rate": 9.907106333205731e-06, "loss": 0.4768, "step": 1660 }, { "epoch": 0.4663110612015722, "grad_norm": 0.7337474822998047, "learning_rate": 9.90679268745385e-06, "loss": 0.455, "step": 1661 }, { "epoch": 0.4665918023582257, "grad_norm": 0.7972109317779541, "learning_rate": 9.906478518079564e-06, "loss": 0.4839, "step": 1662 }, { "epoch": 0.4668725435148793, "grad_norm": 0.7869675159454346, "learning_rate": 9.906163825116406e-06, "loss": 0.4418, "step": 1663 }, { "epoch": 0.46715328467153283, "grad_norm": 0.7835783958435059, "learning_rate": 9.90584860859795e-06, "loss": 0.473, "step": 1664 }, { "epoch": 0.46743402582818644, "grad_norm": 0.8191174268722534, "learning_rate": 9.905532868557841e-06, "loss": 0.4685, "step": 1665 }, { "epoch": 0.46771476698484, "grad_norm": 0.8050325512886047, "learning_rate": 9.905216605029768e-06, "loss": 0.4777, "step": 1666 }, { "epoch": 0.46799550814149354, "grad_norm": 0.8423100709915161, "learning_rate": 9.904899818047483e-06, "loss": 0.4874, "step": 1667 }, { "epoch": 0.4682762492981471, "grad_norm": 0.8271785378456116, "learning_rate": 9.904582507644791e-06, "loss": 0.4673, "step": 1668 }, { "epoch": 0.46855699045480065, "grad_norm": 0.8468924164772034, "learning_rate": 9.904264673855553e-06, "loss": 0.4361, "step": 1669 }, { "epoch": 0.46883773161145426, "grad_norm": 0.8870489597320557, "learning_rate": 9.903946316713687e-06, "loss": 0.4564, "step": 1670 }, { "epoch": 0.4691184727681078, "grad_norm": 0.8100656867027283, "learning_rate": 9.903627436253166e-06, "loss": 0.4289, "step": 1671 }, { "epoch": 0.46939921392476136, "grad_norm": 0.7862449288368225, "learning_rate": 9.903308032508019e-06, "loss": 0.4387, "step": 1672 }, { "epoch": 0.4696799550814149, "grad_norm": 0.8773373365402222, "learning_rate": 9.902988105512328e-06, "loss": 0.4436, "step": 1673 }, { "epoch": 0.4699606962380685, "grad_norm": 0.8158847093582153, "learning_rate": 9.902667655300237e-06, "loss": 0.4848, "step": 1674 }, { "epoch": 0.4702414373947221, "grad_norm": 0.7537901997566223, "learning_rate": 9.902346681905944e-06, "loss": 0.4255, "step": 1675 }, { "epoch": 0.4705221785513756, "grad_norm": 0.8028502464294434, "learning_rate": 9.902025185363697e-06, "loss": 0.4752, "step": 1676 }, { "epoch": 0.4708029197080292, "grad_norm": 0.7397227883338928, "learning_rate": 9.901703165707805e-06, "loss": 0.4505, "step": 1677 }, { "epoch": 0.4710836608646828, "grad_norm": 0.7525709271430969, "learning_rate": 9.901380622972634e-06, "loss": 0.439, "step": 1678 }, { "epoch": 0.47136440202133634, "grad_norm": 0.9410326480865479, "learning_rate": 9.901057557192603e-06, "loss": 0.4574, "step": 1679 }, { "epoch": 0.4716451431779899, "grad_norm": 0.6887338757514954, "learning_rate": 9.900733968402186e-06, "loss": 0.4515, "step": 1680 }, { "epoch": 0.47192588433464344, "grad_norm": 0.7193292379379272, "learning_rate": 9.900409856635916e-06, "loss": 0.4393, "step": 1681 }, { "epoch": 0.47220662549129705, "grad_norm": 0.7619711756706238, "learning_rate": 9.90008522192838e-06, "loss": 0.4356, "step": 1682 }, { "epoch": 0.4724873666479506, "grad_norm": 0.8705488443374634, "learning_rate": 9.899760064314221e-06, "loss": 0.5041, "step": 1683 }, { "epoch": 0.47276810780460415, "grad_norm": 0.733539342880249, "learning_rate": 9.899434383828137e-06, "loss": 0.4908, "step": 1684 }, { "epoch": 0.4730488489612577, "grad_norm": 0.8176860809326172, "learning_rate": 9.899108180504883e-06, "loss": 0.4769, "step": 1685 }, { "epoch": 0.47332959011791126, "grad_norm": 0.8628261685371399, "learning_rate": 9.89878145437927e-06, "loss": 0.467, "step": 1686 }, { "epoch": 0.47361033127456487, "grad_norm": 0.7747090458869934, "learning_rate": 9.898454205486164e-06, "loss": 0.5571, "step": 1687 }, { "epoch": 0.4738910724312184, "grad_norm": 0.7447236776351929, "learning_rate": 9.898126433860484e-06, "loss": 0.4442, "step": 1688 }, { "epoch": 0.47417181358787197, "grad_norm": 0.7353898882865906, "learning_rate": 9.897798139537214e-06, "loss": 0.3964, "step": 1689 }, { "epoch": 0.4744525547445255, "grad_norm": 0.7461681365966797, "learning_rate": 9.897469322551381e-06, "loss": 0.4388, "step": 1690 }, { "epoch": 0.47473329590117913, "grad_norm": 0.8284276127815247, "learning_rate": 9.89713998293808e-06, "loss": 0.5372, "step": 1691 }, { "epoch": 0.4750140370578327, "grad_norm": 0.7964382171630859, "learning_rate": 9.896810120732452e-06, "loss": 0.4263, "step": 1692 }, { "epoch": 0.47529477821448624, "grad_norm": 0.8379276394844055, "learning_rate": 9.8964797359697e-06, "loss": 0.4609, "step": 1693 }, { "epoch": 0.4755755193711398, "grad_norm": 0.7556933164596558, "learning_rate": 9.896148828685077e-06, "loss": 0.4225, "step": 1694 }, { "epoch": 0.4758562605277934, "grad_norm": 0.8176212310791016, "learning_rate": 9.895817398913901e-06, "loss": 0.4548, "step": 1695 }, { "epoch": 0.47613700168444695, "grad_norm": 0.8952212929725647, "learning_rate": 9.895485446691537e-06, "loss": 0.4612, "step": 1696 }, { "epoch": 0.4764177428411005, "grad_norm": 0.7643344402313232, "learning_rate": 9.895152972053408e-06, "loss": 0.4882, "step": 1697 }, { "epoch": 0.47669848399775405, "grad_norm": 0.7792640328407288, "learning_rate": 9.894819975034995e-06, "loss": 0.4505, "step": 1698 }, { "epoch": 0.47697922515440766, "grad_norm": 0.7249532341957092, "learning_rate": 9.894486455671833e-06, "loss": 0.4439, "step": 1699 }, { "epoch": 0.4772599663110612, "grad_norm": 0.8037902116775513, "learning_rate": 9.894152413999514e-06, "loss": 0.4635, "step": 1700 }, { "epoch": 0.47754070746771476, "grad_norm": 0.8761308789253235, "learning_rate": 9.893817850053683e-06, "loss": 0.4588, "step": 1701 }, { "epoch": 0.4778214486243683, "grad_norm": 0.7966299057006836, "learning_rate": 9.893482763870046e-06, "loss": 0.4748, "step": 1702 }, { "epoch": 0.4781021897810219, "grad_norm": 0.6806185841560364, "learning_rate": 9.893147155484357e-06, "loss": 0.4698, "step": 1703 }, { "epoch": 0.4783829309376755, "grad_norm": 0.8648146986961365, "learning_rate": 9.892811024932433e-06, "loss": 0.4815, "step": 1704 }, { "epoch": 0.47866367209432903, "grad_norm": 0.8874828815460205, "learning_rate": 9.892474372250141e-06, "loss": 0.4552, "step": 1705 }, { "epoch": 0.4789444132509826, "grad_norm": 0.7632767558097839, "learning_rate": 9.89213719747341e-06, "loss": 0.4308, "step": 1706 }, { "epoch": 0.47922515440763613, "grad_norm": 0.7456077337265015, "learning_rate": 9.89179950063822e-06, "loss": 0.4683, "step": 1707 }, { "epoch": 0.47950589556428974, "grad_norm": 0.8706619739532471, "learning_rate": 9.891461281780606e-06, "loss": 0.4552, "step": 1708 }, { "epoch": 0.4797866367209433, "grad_norm": 0.724503219127655, "learning_rate": 9.891122540936661e-06, "loss": 0.4558, "step": 1709 }, { "epoch": 0.48006737787759685, "grad_norm": 0.9172972440719604, "learning_rate": 9.890783278142537e-06, "loss": 0.4365, "step": 1710 }, { "epoch": 0.4803481190342504, "grad_norm": 0.796964168548584, "learning_rate": 9.890443493434434e-06, "loss": 0.4908, "step": 1711 }, { "epoch": 0.480628860190904, "grad_norm": 0.7678824067115784, "learning_rate": 9.890103186848614e-06, "loss": 0.4613, "step": 1712 }, { "epoch": 0.48090960134755756, "grad_norm": 0.9513916969299316, "learning_rate": 9.88976235842139e-06, "loss": 0.4717, "step": 1713 }, { "epoch": 0.4811903425042111, "grad_norm": 0.9423227906227112, "learning_rate": 9.889421008189135e-06, "loss": 0.4948, "step": 1714 }, { "epoch": 0.48147108366086466, "grad_norm": 0.7791553735733032, "learning_rate": 9.889079136188276e-06, "loss": 0.4246, "step": 1715 }, { "epoch": 0.48175182481751827, "grad_norm": 0.8257462382316589, "learning_rate": 9.888736742455294e-06, "loss": 0.4384, "step": 1716 }, { "epoch": 0.4820325659741718, "grad_norm": 0.8112425804138184, "learning_rate": 9.888393827026728e-06, "loss": 0.4939, "step": 1717 }, { "epoch": 0.4823133071308254, "grad_norm": 0.7351109385490417, "learning_rate": 9.888050389939172e-06, "loss": 0.4958, "step": 1718 }, { "epoch": 0.4825940482874789, "grad_norm": 0.7306787371635437, "learning_rate": 9.887706431229274e-06, "loss": 0.4605, "step": 1719 }, { "epoch": 0.48287478944413254, "grad_norm": 0.7030362486839294, "learning_rate": 9.887361950933742e-06, "loss": 0.4529, "step": 1720 }, { "epoch": 0.4831555306007861, "grad_norm": 0.6981697082519531, "learning_rate": 9.887016949089334e-06, "loss": 0.4915, "step": 1721 }, { "epoch": 0.48343627175743964, "grad_norm": 0.7343105673789978, "learning_rate": 9.886671425732868e-06, "loss": 0.5037, "step": 1722 }, { "epoch": 0.4837170129140932, "grad_norm": 0.6748523116111755, "learning_rate": 9.886325380901217e-06, "loss": 0.4758, "step": 1723 }, { "epoch": 0.48399775407074674, "grad_norm": 0.7729491591453552, "learning_rate": 9.885978814631306e-06, "loss": 0.4684, "step": 1724 }, { "epoch": 0.48427849522740035, "grad_norm": 0.8255395889282227, "learning_rate": 9.885631726960119e-06, "loss": 0.4684, "step": 1725 }, { "epoch": 0.4845592363840539, "grad_norm": 0.7924033403396606, "learning_rate": 9.885284117924696e-06, "loss": 0.4562, "step": 1726 }, { "epoch": 0.48483997754070746, "grad_norm": 0.6522700786590576, "learning_rate": 9.884935987562132e-06, "loss": 0.418, "step": 1727 }, { "epoch": 0.485120718697361, "grad_norm": 0.7519421577453613, "learning_rate": 9.884587335909577e-06, "loss": 0.4698, "step": 1728 }, { "epoch": 0.4854014598540146, "grad_norm": 0.7885614633560181, "learning_rate": 9.884238163004237e-06, "loss": 0.5158, "step": 1729 }, { "epoch": 0.48568220101066817, "grad_norm": 0.790217399597168, "learning_rate": 9.883888468883373e-06, "loss": 0.5119, "step": 1730 }, { "epoch": 0.4859629421673217, "grad_norm": 0.8577813506126404, "learning_rate": 9.883538253584302e-06, "loss": 0.4703, "step": 1731 }, { "epoch": 0.4862436833239753, "grad_norm": 0.7423000931739807, "learning_rate": 9.883187517144397e-06, "loss": 0.4507, "step": 1732 }, { "epoch": 0.4865244244806289, "grad_norm": 0.6888643503189087, "learning_rate": 9.882836259601087e-06, "loss": 0.4063, "step": 1733 }, { "epoch": 0.48680516563728243, "grad_norm": 0.82935631275177, "learning_rate": 9.882484480991857e-06, "loss": 0.4997, "step": 1734 }, { "epoch": 0.487085906793936, "grad_norm": 0.9646106958389282, "learning_rate": 9.882132181354244e-06, "loss": 0.489, "step": 1735 }, { "epoch": 0.48736664795058954, "grad_norm": 0.7377066612243652, "learning_rate": 9.881779360725847e-06, "loss": 0.4142, "step": 1736 }, { "epoch": 0.48764738910724315, "grad_norm": 0.9286917448043823, "learning_rate": 9.881426019144311e-06, "loss": 0.4301, "step": 1737 }, { "epoch": 0.4879281302638967, "grad_norm": 0.8288553953170776, "learning_rate": 9.881072156647347e-06, "loss": 0.4988, "step": 1738 }, { "epoch": 0.48820887142055025, "grad_norm": 0.7531358599662781, "learning_rate": 9.880717773272717e-06, "loss": 0.4544, "step": 1739 }, { "epoch": 0.4884896125772038, "grad_norm": 0.9587254524230957, "learning_rate": 9.880362869058237e-06, "loss": 0.4704, "step": 1740 }, { "epoch": 0.4887703537338574, "grad_norm": 0.8731741905212402, "learning_rate": 9.880007444041779e-06, "loss": 0.4812, "step": 1741 }, { "epoch": 0.48905109489051096, "grad_norm": 0.760473370552063, "learning_rate": 9.879651498261275e-06, "loss": 0.4975, "step": 1742 }, { "epoch": 0.4893318360471645, "grad_norm": 0.8611308932304382, "learning_rate": 9.879295031754708e-06, "loss": 0.4463, "step": 1743 }, { "epoch": 0.48961257720381807, "grad_norm": 0.7820237874984741, "learning_rate": 9.878938044560117e-06, "loss": 0.445, "step": 1744 }, { "epoch": 0.4898933183604716, "grad_norm": 0.7753906846046448, "learning_rate": 9.878580536715597e-06, "loss": 0.464, "step": 1745 }, { "epoch": 0.4901740595171252, "grad_norm": 0.7010873556137085, "learning_rate": 9.878222508259301e-06, "loss": 0.4784, "step": 1746 }, { "epoch": 0.4904548006737788, "grad_norm": 0.8321229219436646, "learning_rate": 9.877863959229435e-06, "loss": 0.4658, "step": 1747 }, { "epoch": 0.49073554183043233, "grad_norm": 0.7034891843795776, "learning_rate": 9.87750488966426e-06, "loss": 0.4785, "step": 1748 }, { "epoch": 0.4910162829870859, "grad_norm": 0.7781282067298889, "learning_rate": 9.877145299602093e-06, "loss": 0.4487, "step": 1749 }, { "epoch": 0.4912970241437395, "grad_norm": 0.7160866856575012, "learning_rate": 9.87678518908131e-06, "loss": 0.4861, "step": 1750 }, { "epoch": 0.49157776530039304, "grad_norm": 0.8705825805664062, "learning_rate": 9.876424558140338e-06, "loss": 0.4492, "step": 1751 }, { "epoch": 0.4918585064570466, "grad_norm": 0.8751598000526428, "learning_rate": 9.876063406817664e-06, "loss": 0.4853, "step": 1752 }, { "epoch": 0.49213924761370015, "grad_norm": 0.8282373547554016, "learning_rate": 9.875701735151823e-06, "loss": 0.4529, "step": 1753 }, { "epoch": 0.49241998877035376, "grad_norm": 0.8029273152351379, "learning_rate": 9.875339543181413e-06, "loss": 0.4559, "step": 1754 }, { "epoch": 0.4927007299270073, "grad_norm": 0.8402436971664429, "learning_rate": 9.874976830945085e-06, "loss": 0.4225, "step": 1755 }, { "epoch": 0.49298147108366086, "grad_norm": 0.8531173467636108, "learning_rate": 9.874613598481545e-06, "loss": 0.4886, "step": 1756 }, { "epoch": 0.4932622122403144, "grad_norm": 0.8536921143531799, "learning_rate": 9.874249845829556e-06, "loss": 0.4655, "step": 1757 }, { "epoch": 0.493542953396968, "grad_norm": 0.7912077307701111, "learning_rate": 9.873885573027932e-06, "loss": 0.4587, "step": 1758 }, { "epoch": 0.4938236945536216, "grad_norm": 0.7647911906242371, "learning_rate": 9.87352078011555e-06, "loss": 0.4748, "step": 1759 }, { "epoch": 0.4941044357102751, "grad_norm": 0.8365086317062378, "learning_rate": 9.873155467131336e-06, "loss": 0.4741, "step": 1760 }, { "epoch": 0.4943851768669287, "grad_norm": 0.7844578623771667, "learning_rate": 9.872789634114277e-06, "loss": 0.441, "step": 1761 }, { "epoch": 0.49466591802358223, "grad_norm": 0.9620003700256348, "learning_rate": 9.872423281103406e-06, "loss": 0.4754, "step": 1762 }, { "epoch": 0.49494665918023584, "grad_norm": 0.7750184535980225, "learning_rate": 9.872056408137826e-06, "loss": 0.4352, "step": 1763 }, { "epoch": 0.4952274003368894, "grad_norm": 0.7962160110473633, "learning_rate": 9.871689015256682e-06, "loss": 0.4589, "step": 1764 }, { "epoch": 0.49550814149354294, "grad_norm": 0.9381359219551086, "learning_rate": 9.871321102499181e-06, "loss": 0.5046, "step": 1765 }, { "epoch": 0.4957888826501965, "grad_norm": 0.8677588701248169, "learning_rate": 9.870952669904585e-06, "loss": 0.4957, "step": 1766 }, { "epoch": 0.4960696238068501, "grad_norm": 0.7756445407867432, "learning_rate": 9.87058371751221e-06, "loss": 0.4448, "step": 1767 }, { "epoch": 0.49635036496350365, "grad_norm": 0.8854823708534241, "learning_rate": 9.870214245361429e-06, "loss": 0.4712, "step": 1768 }, { "epoch": 0.4966311061201572, "grad_norm": 0.8065089583396912, "learning_rate": 9.869844253491669e-06, "loss": 0.4698, "step": 1769 }, { "epoch": 0.49691184727681076, "grad_norm": 0.8647491335868835, "learning_rate": 9.869473741942415e-06, "loss": 0.499, "step": 1770 }, { "epoch": 0.49719258843346437, "grad_norm": 0.7648187875747681, "learning_rate": 9.869102710753204e-06, "loss": 0.4492, "step": 1771 }, { "epoch": 0.4974733295901179, "grad_norm": 0.7908899784088135, "learning_rate": 9.868731159963631e-06, "loss": 0.4415, "step": 1772 }, { "epoch": 0.49775407074677147, "grad_norm": 0.7534183859825134, "learning_rate": 9.868359089613346e-06, "loss": 0.4815, "step": 1773 }, { "epoch": 0.498034811903425, "grad_norm": 0.7666630148887634, "learning_rate": 9.867986499742052e-06, "loss": 0.4592, "step": 1774 }, { "epoch": 0.49831555306007863, "grad_norm": 0.7794538736343384, "learning_rate": 9.86761339038951e-06, "loss": 0.4911, "step": 1775 }, { "epoch": 0.4985962942167322, "grad_norm": 0.7326319217681885, "learning_rate": 9.86723976159554e-06, "loss": 0.4299, "step": 1776 }, { "epoch": 0.49887703537338574, "grad_norm": 0.8433898687362671, "learning_rate": 9.866865613400008e-06, "loss": 0.455, "step": 1777 }, { "epoch": 0.4991577765300393, "grad_norm": 0.8451767563819885, "learning_rate": 9.866490945842841e-06, "loss": 0.5088, "step": 1778 }, { "epoch": 0.4994385176866929, "grad_norm": 0.8145613074302673, "learning_rate": 9.866115758964026e-06, "loss": 0.4789, "step": 1779 }, { "epoch": 0.49971925884334645, "grad_norm": 0.6962898969650269, "learning_rate": 9.865740052803596e-06, "loss": 0.3938, "step": 1780 }, { "epoch": 0.5, "grad_norm": 0.8139388561248779, "learning_rate": 9.865363827401643e-06, "loss": 0.4132, "step": 1781 }, { "epoch": 0.5002807411566536, "grad_norm": 0.7577467560768127, "learning_rate": 9.864987082798323e-06, "loss": 0.4542, "step": 1782 }, { "epoch": 0.5005614823133071, "grad_norm": 0.7806551456451416, "learning_rate": 9.864609819033833e-06, "loss": 0.5121, "step": 1783 }, { "epoch": 0.5008422234699607, "grad_norm": 0.852153480052948, "learning_rate": 9.864232036148434e-06, "loss": 0.4866, "step": 1784 }, { "epoch": 0.5011229646266142, "grad_norm": 0.7819837927818298, "learning_rate": 9.86385373418244e-06, "loss": 0.45, "step": 1785 }, { "epoch": 0.5014037057832679, "grad_norm": 0.6908717751502991, "learning_rate": 9.863474913176222e-06, "loss": 0.5197, "step": 1786 }, { "epoch": 0.5016844469399214, "grad_norm": 0.7145721912384033, "learning_rate": 9.863095573170206e-06, "loss": 0.4477, "step": 1787 }, { "epoch": 0.501965188096575, "grad_norm": 0.8932965397834778, "learning_rate": 9.862715714204872e-06, "loss": 0.4559, "step": 1788 }, { "epoch": 0.5022459292532285, "grad_norm": 0.8007551431655884, "learning_rate": 9.862335336320753e-06, "loss": 0.4498, "step": 1789 }, { "epoch": 0.5025266704098821, "grad_norm": 0.76729416847229, "learning_rate": 9.861954439558448e-06, "loss": 0.4409, "step": 1790 }, { "epoch": 0.5028074115665356, "grad_norm": 0.8064449429512024, "learning_rate": 9.8615730239586e-06, "loss": 0.4683, "step": 1791 }, { "epoch": 0.5030881527231892, "grad_norm": 0.7141784429550171, "learning_rate": 9.861191089561908e-06, "loss": 0.4319, "step": 1792 }, { "epoch": 0.5033688938798427, "grad_norm": 0.8147256970405579, "learning_rate": 9.860808636409134e-06, "loss": 0.4517, "step": 1793 }, { "epoch": 0.5036496350364964, "grad_norm": 0.7154982089996338, "learning_rate": 9.86042566454109e-06, "loss": 0.4578, "step": 1794 }, { "epoch": 0.50393037619315, "grad_norm": 0.8301019072532654, "learning_rate": 9.860042173998643e-06, "loss": 0.4388, "step": 1795 }, { "epoch": 0.5042111173498035, "grad_norm": 0.8481314778327942, "learning_rate": 9.859658164822718e-06, "loss": 0.489, "step": 1796 }, { "epoch": 0.5044918585064571, "grad_norm": 0.8445191383361816, "learning_rate": 9.859273637054295e-06, "loss": 0.52, "step": 1797 }, { "epoch": 0.5047725996631106, "grad_norm": 0.8490387201309204, "learning_rate": 9.858888590734406e-06, "loss": 0.4881, "step": 1798 }, { "epoch": 0.5050533408197642, "grad_norm": 0.812320351600647, "learning_rate": 9.858503025904143e-06, "loss": 0.4528, "step": 1799 }, { "epoch": 0.5053340819764177, "grad_norm": 0.8201896548271179, "learning_rate": 9.858116942604649e-06, "loss": 0.4681, "step": 1800 }, { "epoch": 0.5056148231330713, "grad_norm": 0.9138181805610657, "learning_rate": 9.857730340877128e-06, "loss": 0.4774, "step": 1801 }, { "epoch": 0.5058955642897248, "grad_norm": 0.6980558633804321, "learning_rate": 9.857343220762831e-06, "loss": 0.4542, "step": 1802 }, { "epoch": 0.5061763054463785, "grad_norm": 0.7874338626861572, "learning_rate": 9.856955582303072e-06, "loss": 0.5028, "step": 1803 }, { "epoch": 0.506457046603032, "grad_norm": 0.7695236802101135, "learning_rate": 9.856567425539217e-06, "loss": 0.4738, "step": 1804 }, { "epoch": 0.5067377877596856, "grad_norm": 0.8767713904380798, "learning_rate": 9.856178750512688e-06, "loss": 0.4734, "step": 1805 }, { "epoch": 0.5070185289163391, "grad_norm": 0.6862586736679077, "learning_rate": 9.85578955726496e-06, "loss": 0.512, "step": 1806 }, { "epoch": 0.5072992700729927, "grad_norm": 0.9676357507705688, "learning_rate": 9.855399845837565e-06, "loss": 0.4657, "step": 1807 }, { "epoch": 0.5075800112296462, "grad_norm": 0.8110551834106445, "learning_rate": 9.855009616272095e-06, "loss": 0.4411, "step": 1808 }, { "epoch": 0.5078607523862998, "grad_norm": 0.8683038353919983, "learning_rate": 9.854618868610188e-06, "loss": 0.5059, "step": 1809 }, { "epoch": 0.5081414935429533, "grad_norm": 0.9837791323661804, "learning_rate": 9.854227602893547e-06, "loss": 0.458, "step": 1810 }, { "epoch": 0.508422234699607, "grad_norm": 0.8323397040367126, "learning_rate": 9.853835819163921e-06, "loss": 0.4763, "step": 1811 }, { "epoch": 0.5087029758562606, "grad_norm": 0.7720654010772705, "learning_rate": 9.85344351746312e-06, "loss": 0.5114, "step": 1812 }, { "epoch": 0.5089837170129141, "grad_norm": 0.7435499429702759, "learning_rate": 9.853050697833009e-06, "loss": 0.4489, "step": 1813 }, { "epoch": 0.5092644581695677, "grad_norm": 0.7995089292526245, "learning_rate": 9.852657360315505e-06, "loss": 0.4712, "step": 1814 }, { "epoch": 0.5095451993262212, "grad_norm": 0.7547017931938171, "learning_rate": 9.852263504952585e-06, "loss": 0.4818, "step": 1815 }, { "epoch": 0.5098259404828748, "grad_norm": 0.7278504967689514, "learning_rate": 9.85186913178628e-06, "loss": 0.5011, "step": 1816 }, { "epoch": 0.5101066816395283, "grad_norm": 0.8858345150947571, "learning_rate": 9.851474240858671e-06, "loss": 0.4514, "step": 1817 }, { "epoch": 0.5103874227961819, "grad_norm": 0.8252277970314026, "learning_rate": 9.851078832211903e-06, "loss": 0.4764, "step": 1818 }, { "epoch": 0.5106681639528355, "grad_norm": 0.836026668548584, "learning_rate": 9.850682905888165e-06, "loss": 0.5051, "step": 1819 }, { "epoch": 0.5109489051094891, "grad_norm": 0.6830624938011169, "learning_rate": 9.850286461929714e-06, "loss": 0.4316, "step": 1820 }, { "epoch": 0.5112296462661426, "grad_norm": 0.6491891741752625, "learning_rate": 9.849889500378854e-06, "loss": 0.4308, "step": 1821 }, { "epoch": 0.5115103874227962, "grad_norm": 0.7383559942245483, "learning_rate": 9.849492021277946e-06, "loss": 0.4704, "step": 1822 }, { "epoch": 0.5117911285794498, "grad_norm": 0.7036583423614502, "learning_rate": 9.849094024669405e-06, "loss": 0.4578, "step": 1823 }, { "epoch": 0.5120718697361033, "grad_norm": 0.7398111820220947, "learning_rate": 9.848695510595705e-06, "loss": 0.4467, "step": 1824 }, { "epoch": 0.5123526108927569, "grad_norm": 0.6627861857414246, "learning_rate": 9.848296479099373e-06, "loss": 0.4478, "step": 1825 }, { "epoch": 0.5126333520494104, "grad_norm": 0.7219253182411194, "learning_rate": 9.847896930222989e-06, "loss": 0.4185, "step": 1826 }, { "epoch": 0.512914093206064, "grad_norm": 0.84356689453125, "learning_rate": 9.847496864009191e-06, "loss": 0.5129, "step": 1827 }, { "epoch": 0.5131948343627176, "grad_norm": 0.765963077545166, "learning_rate": 9.847096280500675e-06, "loss": 0.4656, "step": 1828 }, { "epoch": 0.5134755755193712, "grad_norm": 0.7722097039222717, "learning_rate": 9.846695179740184e-06, "loss": 0.4865, "step": 1829 }, { "epoch": 0.5137563166760247, "grad_norm": 0.7085173726081848, "learning_rate": 9.846293561770523e-06, "loss": 0.4209, "step": 1830 }, { "epoch": 0.5140370578326783, "grad_norm": 0.6874461770057678, "learning_rate": 9.845891426634551e-06, "loss": 0.4673, "step": 1831 }, { "epoch": 0.5143177989893318, "grad_norm": 0.8429763317108154, "learning_rate": 9.84548877437518e-06, "loss": 0.4672, "step": 1832 }, { "epoch": 0.5145985401459854, "grad_norm": 0.6847505569458008, "learning_rate": 9.84508560503538e-06, "loss": 0.4489, "step": 1833 }, { "epoch": 0.5148792813026389, "grad_norm": 0.6943210959434509, "learning_rate": 9.844681918658172e-06, "loss": 0.4422, "step": 1834 }, { "epoch": 0.5151600224592925, "grad_norm": 0.8705496191978455, "learning_rate": 9.844277715286639e-06, "loss": 0.4773, "step": 1835 }, { "epoch": 0.5154407636159462, "grad_norm": 0.6923941373825073, "learning_rate": 9.843872994963912e-06, "loss": 0.4355, "step": 1836 }, { "epoch": 0.5157215047725997, "grad_norm": 0.7502871155738831, "learning_rate": 9.84346775773318e-06, "loss": 0.4859, "step": 1837 }, { "epoch": 0.5160022459292533, "grad_norm": 0.736991822719574, "learning_rate": 9.84306200363769e-06, "loss": 0.4332, "step": 1838 }, { "epoch": 0.5162829870859068, "grad_norm": 0.7772122621536255, "learning_rate": 9.842655732720738e-06, "loss": 0.4416, "step": 1839 }, { "epoch": 0.5165637282425604, "grad_norm": 0.6768174171447754, "learning_rate": 9.842248945025682e-06, "loss": 0.4348, "step": 1840 }, { "epoch": 0.5168444693992139, "grad_norm": 0.7615413069725037, "learning_rate": 9.84184164059593e-06, "loss": 0.5154, "step": 1841 }, { "epoch": 0.5171252105558675, "grad_norm": 0.8892810940742493, "learning_rate": 9.84143381947495e-06, "loss": 0.5142, "step": 1842 }, { "epoch": 0.517405951712521, "grad_norm": 0.8347628712654114, "learning_rate": 9.841025481706256e-06, "loss": 0.4566, "step": 1843 }, { "epoch": 0.5176866928691746, "grad_norm": 0.7237997055053711, "learning_rate": 9.84061662733343e-06, "loss": 0.4733, "step": 1844 }, { "epoch": 0.5179674340258282, "grad_norm": 0.7402610778808594, "learning_rate": 9.840207256400097e-06, "loss": 0.4682, "step": 1845 }, { "epoch": 0.5182481751824818, "grad_norm": 0.7389965653419495, "learning_rate": 9.839797368949946e-06, "loss": 0.421, "step": 1846 }, { "epoch": 0.5185289163391353, "grad_norm": 0.7180278897285461, "learning_rate": 9.839386965026716e-06, "loss": 0.4503, "step": 1847 }, { "epoch": 0.5188096574957889, "grad_norm": 0.7654619812965393, "learning_rate": 9.838976044674204e-06, "loss": 0.4503, "step": 1848 }, { "epoch": 0.5190903986524424, "grad_norm": 0.8214221000671387, "learning_rate": 9.838564607936259e-06, "loss": 0.4682, "step": 1849 }, { "epoch": 0.519371139809096, "grad_norm": 0.7136037349700928, "learning_rate": 9.83815265485679e-06, "loss": 0.4364, "step": 1850 }, { "epoch": 0.5196518809657495, "grad_norm": 0.7680732607841492, "learning_rate": 9.837740185479755e-06, "loss": 0.5183, "step": 1851 }, { "epoch": 0.5199326221224031, "grad_norm": 0.7541682124137878, "learning_rate": 9.83732719984917e-06, "loss": 0.4879, "step": 1852 }, { "epoch": 0.5202133632790568, "grad_norm": 0.7221009731292725, "learning_rate": 9.836913698009109e-06, "loss": 0.4726, "step": 1853 }, { "epoch": 0.5204941044357103, "grad_norm": 0.6487541794776917, "learning_rate": 9.836499680003697e-06, "loss": 0.4558, "step": 1854 }, { "epoch": 0.5207748455923639, "grad_norm": 0.7128645181655884, "learning_rate": 9.836085145877115e-06, "loss": 0.4665, "step": 1855 }, { "epoch": 0.5210555867490174, "grad_norm": 0.8577792644500732, "learning_rate": 9.835670095673599e-06, "loss": 0.5188, "step": 1856 }, { "epoch": 0.521336327905671, "grad_norm": 0.8320954442024231, "learning_rate": 9.835254529437444e-06, "loss": 0.458, "step": 1857 }, { "epoch": 0.5216170690623245, "grad_norm": 0.7644145488739014, "learning_rate": 9.834838447212991e-06, "loss": 0.4492, "step": 1858 }, { "epoch": 0.5218978102189781, "grad_norm": 0.7536954283714294, "learning_rate": 9.834421849044646e-06, "loss": 0.4481, "step": 1859 }, { "epoch": 0.5221785513756316, "grad_norm": 0.7009553909301758, "learning_rate": 9.834004734976865e-06, "loss": 0.4779, "step": 1860 }, { "epoch": 0.5224592925322852, "grad_norm": 0.7501538991928101, "learning_rate": 9.83358710505416e-06, "loss": 0.4499, "step": 1861 }, { "epoch": 0.5227400336889388, "grad_norm": 0.7410726547241211, "learning_rate": 9.833168959321097e-06, "loss": 0.4466, "step": 1862 }, { "epoch": 0.5230207748455924, "grad_norm": 0.7580085396766663, "learning_rate": 9.832750297822298e-06, "loss": 0.454, "step": 1863 }, { "epoch": 0.523301516002246, "grad_norm": 0.810674250125885, "learning_rate": 9.832331120602439e-06, "loss": 0.4561, "step": 1864 }, { "epoch": 0.5235822571588995, "grad_norm": 0.7958224415779114, "learning_rate": 9.831911427706253e-06, "loss": 0.4859, "step": 1865 }, { "epoch": 0.523862998315553, "grad_norm": 0.7352412939071655, "learning_rate": 9.831491219178528e-06, "loss": 0.4653, "step": 1866 }, { "epoch": 0.5241437394722066, "grad_norm": 0.7566716074943542, "learning_rate": 9.831070495064106e-06, "loss": 0.4384, "step": 1867 }, { "epoch": 0.5244244806288602, "grad_norm": 0.7479974031448364, "learning_rate": 9.830649255407882e-06, "loss": 0.4641, "step": 1868 }, { "epoch": 0.5247052217855137, "grad_norm": 1.0138598680496216, "learning_rate": 9.83022750025481e-06, "loss": 0.4781, "step": 1869 }, { "epoch": 0.5249859629421674, "grad_norm": 0.7030500769615173, "learning_rate": 9.829805229649896e-06, "loss": 0.4739, "step": 1870 }, { "epoch": 0.5252667040988209, "grad_norm": 0.7876224517822266, "learning_rate": 9.829382443638202e-06, "loss": 0.4826, "step": 1871 }, { "epoch": 0.5255474452554745, "grad_norm": 0.8127958178520203, "learning_rate": 9.828959142264845e-06, "loss": 0.4795, "step": 1872 }, { "epoch": 0.525828186412128, "grad_norm": 0.8334431648254395, "learning_rate": 9.828535325574999e-06, "loss": 0.4765, "step": 1873 }, { "epoch": 0.5261089275687816, "grad_norm": 0.8717887997627258, "learning_rate": 9.82811099361389e-06, "loss": 0.4354, "step": 1874 }, { "epoch": 0.5263896687254351, "grad_norm": 0.7463579773902893, "learning_rate": 9.827686146426798e-06, "loss": 0.4283, "step": 1875 }, { "epoch": 0.5266704098820887, "grad_norm": 0.8334963917732239, "learning_rate": 9.827260784059064e-06, "loss": 0.4554, "step": 1876 }, { "epoch": 0.5269511510387422, "grad_norm": 0.6488940715789795, "learning_rate": 9.826834906556077e-06, "loss": 0.4422, "step": 1877 }, { "epoch": 0.5272318921953958, "grad_norm": 0.6755518913269043, "learning_rate": 9.826408513963283e-06, "loss": 0.4323, "step": 1878 }, { "epoch": 0.5275126333520495, "grad_norm": 0.7829365730285645, "learning_rate": 9.825981606326189e-06, "loss": 0.5051, "step": 1879 }, { "epoch": 0.527793374508703, "grad_norm": 0.7612887620925903, "learning_rate": 9.825554183690347e-06, "loss": 0.5031, "step": 1880 }, { "epoch": 0.5280741156653566, "grad_norm": 0.700315535068512, "learning_rate": 9.82512624610137e-06, "loss": 0.4804, "step": 1881 }, { "epoch": 0.5283548568220101, "grad_norm": 0.7135051488876343, "learning_rate": 9.824697793604929e-06, "loss": 0.474, "step": 1882 }, { "epoch": 0.5286355979786637, "grad_norm": 0.911785900592804, "learning_rate": 9.82426882624674e-06, "loss": 0.5015, "step": 1883 }, { "epoch": 0.5289163391353172, "grad_norm": 0.8162867426872253, "learning_rate": 9.823839344072582e-06, "loss": 0.4659, "step": 1884 }, { "epoch": 0.5291970802919708, "grad_norm": 0.7398881316184998, "learning_rate": 9.823409347128286e-06, "loss": 0.4696, "step": 1885 }, { "epoch": 0.5294778214486243, "grad_norm": 0.8000018000602722, "learning_rate": 9.822978835459738e-06, "loss": 0.4745, "step": 1886 }, { "epoch": 0.529758562605278, "grad_norm": 0.7813568711280823, "learning_rate": 9.822547809112883e-06, "loss": 0.4689, "step": 1887 }, { "epoch": 0.5300393037619315, "grad_norm": 0.7030794620513916, "learning_rate": 9.822116268133715e-06, "loss": 0.4661, "step": 1888 }, { "epoch": 0.5303200449185851, "grad_norm": 0.7161295413970947, "learning_rate": 9.821684212568286e-06, "loss": 0.4725, "step": 1889 }, { "epoch": 0.5306007860752386, "grad_norm": 0.7135481834411621, "learning_rate": 9.821251642462701e-06, "loss": 0.4403, "step": 1890 }, { "epoch": 0.5308815272318922, "grad_norm": 0.7595284581184387, "learning_rate": 9.820818557863123e-06, "loss": 0.4175, "step": 1891 }, { "epoch": 0.5311622683885457, "grad_norm": 0.7693842053413391, "learning_rate": 9.820384958815766e-06, "loss": 0.4789, "step": 1892 }, { "epoch": 0.5314430095451993, "grad_norm": 0.7659890651702881, "learning_rate": 9.819950845366904e-06, "loss": 0.4893, "step": 1893 }, { "epoch": 0.5317237507018528, "grad_norm": 0.8019644021987915, "learning_rate": 9.819516217562859e-06, "loss": 0.4266, "step": 1894 }, { "epoch": 0.5320044918585065, "grad_norm": 0.7651217579841614, "learning_rate": 9.819081075450014e-06, "loss": 0.5205, "step": 1895 }, { "epoch": 0.5322852330151601, "grad_norm": 0.7626285552978516, "learning_rate": 9.818645419074807e-06, "loss": 0.4727, "step": 1896 }, { "epoch": 0.5325659741718136, "grad_norm": 0.8264485001564026, "learning_rate": 9.818209248483724e-06, "loss": 0.416, "step": 1897 }, { "epoch": 0.5328467153284672, "grad_norm": 0.7829334139823914, "learning_rate": 9.817772563723313e-06, "loss": 0.4942, "step": 1898 }, { "epoch": 0.5331274564851207, "grad_norm": 0.7674833536148071, "learning_rate": 9.817335364840173e-06, "loss": 0.3953, "step": 1899 }, { "epoch": 0.5334081976417743, "grad_norm": 0.7134654521942139, "learning_rate": 9.816897651880962e-06, "loss": 0.408, "step": 1900 }, { "epoch": 0.5336889387984278, "grad_norm": 0.7987240552902222, "learning_rate": 9.816459424892385e-06, "loss": 0.4509, "step": 1901 }, { "epoch": 0.5339696799550814, "grad_norm": 0.8815852403640747, "learning_rate": 9.81602068392121e-06, "loss": 0.4644, "step": 1902 }, { "epoch": 0.5342504211117349, "grad_norm": 0.889531135559082, "learning_rate": 9.815581429014259e-06, "loss": 0.4997, "step": 1903 }, { "epoch": 0.5345311622683886, "grad_norm": 0.896373450756073, "learning_rate": 9.815141660218402e-06, "loss": 0.4973, "step": 1904 }, { "epoch": 0.5348119034250421, "grad_norm": 0.7559667825698853, "learning_rate": 9.814701377580571e-06, "loss": 0.457, "step": 1905 }, { "epoch": 0.5350926445816957, "grad_norm": 0.9544247984886169, "learning_rate": 9.814260581147749e-06, "loss": 0.4698, "step": 1906 }, { "epoch": 0.5353733857383493, "grad_norm": 0.8227559328079224, "learning_rate": 9.813819270966978e-06, "loss": 0.4788, "step": 1907 }, { "epoch": 0.5356541268950028, "grad_norm": 0.6959999203681946, "learning_rate": 9.813377447085347e-06, "loss": 0.4957, "step": 1908 }, { "epoch": 0.5359348680516564, "grad_norm": 0.8869747519493103, "learning_rate": 9.812935109550008e-06, "loss": 0.4636, "step": 1909 }, { "epoch": 0.5362156092083099, "grad_norm": 0.8593977689743042, "learning_rate": 9.812492258408164e-06, "loss": 0.4438, "step": 1910 }, { "epoch": 0.5364963503649635, "grad_norm": 0.8060788512229919, "learning_rate": 9.812048893707073e-06, "loss": 0.465, "step": 1911 }, { "epoch": 0.5367770915216171, "grad_norm": 0.7863687872886658, "learning_rate": 9.811605015494048e-06, "loss": 0.4348, "step": 1912 }, { "epoch": 0.5370578326782707, "grad_norm": 0.8226597905158997, "learning_rate": 9.811160623816458e-06, "loss": 0.4424, "step": 1913 }, { "epoch": 0.5373385738349242, "grad_norm": 0.9052649140357971, "learning_rate": 9.810715718721723e-06, "loss": 0.4204, "step": 1914 }, { "epoch": 0.5376193149915778, "grad_norm": 1.0391721725463867, "learning_rate": 9.810270300257325e-06, "loss": 0.4733, "step": 1915 }, { "epoch": 0.5379000561482313, "grad_norm": 0.8370290994644165, "learning_rate": 9.809824368470794e-06, "loss": 0.4671, "step": 1916 }, { "epoch": 0.5381807973048849, "grad_norm": 0.9258934855461121, "learning_rate": 9.809377923409713e-06, "loss": 0.4404, "step": 1917 }, { "epoch": 0.5384615384615384, "grad_norm": 0.9329887628555298, "learning_rate": 9.80893096512173e-06, "loss": 0.4897, "step": 1918 }, { "epoch": 0.538742279618192, "grad_norm": 0.9769116640090942, "learning_rate": 9.80848349365454e-06, "loss": 0.4841, "step": 1919 }, { "epoch": 0.5390230207748455, "grad_norm": 0.6780902743339539, "learning_rate": 9.80803550905589e-06, "loss": 0.4816, "step": 1920 }, { "epoch": 0.5393037619314992, "grad_norm": 0.9711014628410339, "learning_rate": 9.807587011373594e-06, "loss": 0.4728, "step": 1921 }, { "epoch": 0.5395845030881528, "grad_norm": 1.0224378108978271, "learning_rate": 9.807138000655506e-06, "loss": 0.4554, "step": 1922 }, { "epoch": 0.5398652442448063, "grad_norm": 0.7928524613380432, "learning_rate": 9.806688476949544e-06, "loss": 0.4499, "step": 1923 }, { "epoch": 0.5401459854014599, "grad_norm": 0.7543311715126038, "learning_rate": 9.806238440303679e-06, "loss": 0.4302, "step": 1924 }, { "epoch": 0.5404267265581134, "grad_norm": 1.0170397758483887, "learning_rate": 9.805787890765937e-06, "loss": 0.4592, "step": 1925 }, { "epoch": 0.540707467714767, "grad_norm": 0.8489850163459778, "learning_rate": 9.805336828384395e-06, "loss": 0.4695, "step": 1926 }, { "epoch": 0.5409882088714205, "grad_norm": 0.797697126865387, "learning_rate": 9.80488525320719e-06, "loss": 0.4918, "step": 1927 }, { "epoch": 0.5412689500280741, "grad_norm": 0.8735690712928772, "learning_rate": 9.80443316528251e-06, "loss": 0.4745, "step": 1928 }, { "epoch": 0.5415496911847277, "grad_norm": 0.8410695195198059, "learning_rate": 9.8039805646586e-06, "loss": 0.4697, "step": 1929 }, { "epoch": 0.5418304323413813, "grad_norm": 0.817071259021759, "learning_rate": 9.803527451383757e-06, "loss": 0.4678, "step": 1930 }, { "epoch": 0.5421111734980348, "grad_norm": 0.7217406630516052, "learning_rate": 9.803073825506336e-06, "loss": 0.4205, "step": 1931 }, { "epoch": 0.5423919146546884, "grad_norm": 0.8115424513816833, "learning_rate": 9.802619687074743e-06, "loss": 0.4728, "step": 1932 }, { "epoch": 0.5426726558113419, "grad_norm": 0.8847445249557495, "learning_rate": 9.802165036137446e-06, "loss": 0.5106, "step": 1933 }, { "epoch": 0.5429533969679955, "grad_norm": 0.892156183719635, "learning_rate": 9.801709872742958e-06, "loss": 0.4993, "step": 1934 }, { "epoch": 0.543234138124649, "grad_norm": 0.784192681312561, "learning_rate": 9.80125419693985e-06, "loss": 0.4765, "step": 1935 }, { "epoch": 0.5435148792813026, "grad_norm": 0.8306336402893066, "learning_rate": 9.800798008776753e-06, "loss": 0.5158, "step": 1936 }, { "epoch": 0.5437956204379562, "grad_norm": 0.6939802169799805, "learning_rate": 9.800341308302346e-06, "loss": 0.4275, "step": 1937 }, { "epoch": 0.5440763615946098, "grad_norm": 0.7448406219482422, "learning_rate": 9.799884095565366e-06, "loss": 0.441, "step": 1938 }, { "epoch": 0.5443571027512634, "grad_norm": 0.7934439778327942, "learning_rate": 9.799426370614605e-06, "loss": 0.4582, "step": 1939 }, { "epoch": 0.5446378439079169, "grad_norm": 0.764974057674408, "learning_rate": 9.798968133498906e-06, "loss": 0.4403, "step": 1940 }, { "epoch": 0.5449185850645705, "grad_norm": 0.7812139987945557, "learning_rate": 9.798509384267172e-06, "loss": 0.4652, "step": 1941 }, { "epoch": 0.545199326221224, "grad_norm": 0.7582302689552307, "learning_rate": 9.798050122968354e-06, "loss": 0.4388, "step": 1942 }, { "epoch": 0.5454800673778776, "grad_norm": 1.008747935295105, "learning_rate": 9.797590349651467e-06, "loss": 0.4973, "step": 1943 }, { "epoch": 0.5457608085345311, "grad_norm": 0.8279508948326111, "learning_rate": 9.79713006436557e-06, "loss": 0.4885, "step": 1944 }, { "epoch": 0.5460415496911847, "grad_norm": 0.689279317855835, "learning_rate": 9.796669267159784e-06, "loss": 0.4725, "step": 1945 }, { "epoch": 0.5463222908478383, "grad_norm": 0.6917114853858948, "learning_rate": 9.796207958083283e-06, "loss": 0.4561, "step": 1946 }, { "epoch": 0.5466030320044919, "grad_norm": 0.8557814359664917, "learning_rate": 9.795746137185296e-06, "loss": 0.4853, "step": 1947 }, { "epoch": 0.5468837731611454, "grad_norm": 0.8069921135902405, "learning_rate": 9.795283804515101e-06, "loss": 0.4525, "step": 1948 }, { "epoch": 0.547164514317799, "grad_norm": 0.8353968262672424, "learning_rate": 9.79482096012204e-06, "loss": 0.4396, "step": 1949 }, { "epoch": 0.5474452554744526, "grad_norm": 0.7278546690940857, "learning_rate": 9.794357604055502e-06, "loss": 0.4545, "step": 1950 }, { "epoch": 0.5477259966311061, "grad_norm": 0.8496597409248352, "learning_rate": 9.793893736364937e-06, "loss": 0.4843, "step": 1951 }, { "epoch": 0.5480067377877597, "grad_norm": 0.859420895576477, "learning_rate": 9.793429357099842e-06, "loss": 0.4501, "step": 1952 }, { "epoch": 0.5482874789444132, "grad_norm": 0.7816826701164246, "learning_rate": 9.792964466309773e-06, "loss": 0.4634, "step": 1953 }, { "epoch": 0.5485682201010668, "grad_norm": 0.8437268733978271, "learning_rate": 9.792499064044343e-06, "loss": 0.4397, "step": 1954 }, { "epoch": 0.5488489612577204, "grad_norm": 0.7572882175445557, "learning_rate": 9.792033150353216e-06, "loss": 0.4853, "step": 1955 }, { "epoch": 0.549129702414374, "grad_norm": 0.6619787216186523, "learning_rate": 9.79156672528611e-06, "loss": 0.445, "step": 1956 }, { "epoch": 0.5494104435710275, "grad_norm": 0.8600680232048035, "learning_rate": 9.791099788892801e-06, "loss": 0.4568, "step": 1957 }, { "epoch": 0.5496911847276811, "grad_norm": 0.6911255717277527, "learning_rate": 9.790632341223116e-06, "loss": 0.4255, "step": 1958 }, { "epoch": 0.5499719258843346, "grad_norm": 0.6408498883247375, "learning_rate": 9.790164382326938e-06, "loss": 0.4571, "step": 1959 }, { "epoch": 0.5502526670409882, "grad_norm": 0.6756793260574341, "learning_rate": 9.789695912254206e-06, "loss": 0.4194, "step": 1960 }, { "epoch": 0.5505334081976417, "grad_norm": 0.6890113353729248, "learning_rate": 9.789226931054911e-06, "loss": 0.4173, "step": 1961 }, { "epoch": 0.5508141493542953, "grad_norm": 0.6933071613311768, "learning_rate": 9.7887574387791e-06, "loss": 0.4548, "step": 1962 }, { "epoch": 0.551094890510949, "grad_norm": 0.8239140510559082, "learning_rate": 9.788287435476874e-06, "loss": 0.4757, "step": 1963 }, { "epoch": 0.5513756316676025, "grad_norm": 0.7228875160217285, "learning_rate": 9.78781692119839e-06, "loss": 0.4604, "step": 1964 }, { "epoch": 0.5516563728242561, "grad_norm": 0.8217010498046875, "learning_rate": 9.787345895993857e-06, "loss": 0.4938, "step": 1965 }, { "epoch": 0.5519371139809096, "grad_norm": 0.8807656168937683, "learning_rate": 9.78687435991354e-06, "loss": 0.4698, "step": 1966 }, { "epoch": 0.5522178551375632, "grad_norm": 0.8089063167572021, "learning_rate": 9.786402313007762e-06, "loss": 0.4458, "step": 1967 }, { "epoch": 0.5524985962942167, "grad_norm": 0.7698459625244141, "learning_rate": 9.78592975532689e-06, "loss": 0.4661, "step": 1968 }, { "epoch": 0.5527793374508703, "grad_norm": 0.7129600048065186, "learning_rate": 9.785456686921358e-06, "loss": 0.4336, "step": 1969 }, { "epoch": 0.5530600786075238, "grad_norm": 0.8214575052261353, "learning_rate": 9.784983107841649e-06, "loss": 0.4605, "step": 1970 }, { "epoch": 0.5533408197641775, "grad_norm": 0.8851922750473022, "learning_rate": 9.784509018138295e-06, "loss": 0.4713, "step": 1971 }, { "epoch": 0.553621560920831, "grad_norm": 0.9047035574913025, "learning_rate": 9.784034417861893e-06, "loss": 0.4655, "step": 1972 }, { "epoch": 0.5539023020774846, "grad_norm": 0.8648396730422974, "learning_rate": 9.78355930706309e-06, "loss": 0.483, "step": 1973 }, { "epoch": 0.5541830432341381, "grad_norm": 0.8606545925140381, "learning_rate": 9.78308368579258e-06, "loss": 0.4571, "step": 1974 }, { "epoch": 0.5544637843907917, "grad_norm": 0.8742926716804504, "learning_rate": 9.782607554101127e-06, "loss": 0.4574, "step": 1975 }, { "epoch": 0.5547445255474452, "grad_norm": 0.8614062070846558, "learning_rate": 9.782130912039535e-06, "loss": 0.4481, "step": 1976 }, { "epoch": 0.5550252667040988, "grad_norm": 0.8186282515525818, "learning_rate": 9.781653759658671e-06, "loss": 0.4827, "step": 1977 }, { "epoch": 0.5553060078607523, "grad_norm": 0.8074194192886353, "learning_rate": 9.781176097009453e-06, "loss": 0.4917, "step": 1978 }, { "epoch": 0.5555867490174059, "grad_norm": 0.7881940603256226, "learning_rate": 9.780697924142854e-06, "loss": 0.4195, "step": 1979 }, { "epoch": 0.5558674901740596, "grad_norm": 0.8685644865036011, "learning_rate": 9.7802192411099e-06, "loss": 0.4868, "step": 1980 }, { "epoch": 0.5561482313307131, "grad_norm": 0.8442627191543579, "learning_rate": 9.779740047961677e-06, "loss": 0.453, "step": 1981 }, { "epoch": 0.5564289724873667, "grad_norm": 0.8148524761199951, "learning_rate": 9.77926034474932e-06, "loss": 0.5291, "step": 1982 }, { "epoch": 0.5567097136440202, "grad_norm": 0.8087888360023499, "learning_rate": 9.778780131524017e-06, "loss": 0.4746, "step": 1983 }, { "epoch": 0.5569904548006738, "grad_norm": 0.8510912656784058, "learning_rate": 9.778299408337018e-06, "loss": 0.4666, "step": 1984 }, { "epoch": 0.5572711959573273, "grad_norm": 0.9262344837188721, "learning_rate": 9.777818175239618e-06, "loss": 0.4475, "step": 1985 }, { "epoch": 0.5575519371139809, "grad_norm": 0.7814247012138367, "learning_rate": 9.777336432283175e-06, "loss": 0.4555, "step": 1986 }, { "epoch": 0.5578326782706344, "grad_norm": 0.8396809101104736, "learning_rate": 9.776854179519096e-06, "loss": 0.4374, "step": 1987 }, { "epoch": 0.5581134194272881, "grad_norm": 0.7959112524986267, "learning_rate": 9.776371416998844e-06, "loss": 0.4514, "step": 1988 }, { "epoch": 0.5583941605839416, "grad_norm": 0.760784387588501, "learning_rate": 9.775888144773937e-06, "loss": 0.4003, "step": 1989 }, { "epoch": 0.5586749017405952, "grad_norm": 0.7153075933456421, "learning_rate": 9.775404362895946e-06, "loss": 0.4582, "step": 1990 }, { "epoch": 0.5589556428972488, "grad_norm": 0.7210965752601624, "learning_rate": 9.774920071416499e-06, "loss": 0.4741, "step": 1991 }, { "epoch": 0.5592363840539023, "grad_norm": 0.7483850121498108, "learning_rate": 9.774435270387274e-06, "loss": 0.4659, "step": 1992 }, { "epoch": 0.5595171252105559, "grad_norm": 0.6827176809310913, "learning_rate": 9.773949959860008e-06, "loss": 0.4772, "step": 1993 }, { "epoch": 0.5597978663672094, "grad_norm": 0.851970911026001, "learning_rate": 9.773464139886489e-06, "loss": 0.5254, "step": 1994 }, { "epoch": 0.560078607523863, "grad_norm": 0.7237651944160461, "learning_rate": 9.77297781051856e-06, "loss": 0.517, "step": 1995 }, { "epoch": 0.5603593486805165, "grad_norm": 0.7680930495262146, "learning_rate": 9.772490971808122e-06, "loss": 0.4433, "step": 1996 }, { "epoch": 0.5606400898371702, "grad_norm": 0.817929744720459, "learning_rate": 9.772003623807125e-06, "loss": 0.4211, "step": 1997 }, { "epoch": 0.5609208309938237, "grad_norm": 0.7939580082893372, "learning_rate": 9.771515766567576e-06, "loss": 0.4668, "step": 1998 }, { "epoch": 0.5612015721504773, "grad_norm": 0.7887587547302246, "learning_rate": 9.771027400141538e-06, "loss": 0.4788, "step": 1999 }, { "epoch": 0.5614823133071308, "grad_norm": 0.809152364730835, "learning_rate": 9.770538524581124e-06, "loss": 0.4576, "step": 2000 }, { "epoch": 0.5617630544637844, "grad_norm": 0.8647423982620239, "learning_rate": 9.770049139938505e-06, "loss": 0.4745, "step": 2001 }, { "epoch": 0.5620437956204379, "grad_norm": 0.7462626695632935, "learning_rate": 9.769559246265903e-06, "loss": 0.4547, "step": 2002 }, { "epoch": 0.5623245367770915, "grad_norm": 0.7057831287384033, "learning_rate": 9.7690688436156e-06, "loss": 0.4267, "step": 2003 }, { "epoch": 0.562605277933745, "grad_norm": 0.7603277564048767, "learning_rate": 9.768577932039927e-06, "loss": 0.4768, "step": 2004 }, { "epoch": 0.5628860190903987, "grad_norm": 0.8108817338943481, "learning_rate": 9.76808651159127e-06, "loss": 0.4681, "step": 2005 }, { "epoch": 0.5631667602470523, "grad_norm": 0.7390596866607666, "learning_rate": 9.767594582322071e-06, "loss": 0.4408, "step": 2006 }, { "epoch": 0.5634475014037058, "grad_norm": 0.7935249209403992, "learning_rate": 9.767102144284826e-06, "loss": 0.4857, "step": 2007 }, { "epoch": 0.5637282425603594, "grad_norm": 0.7647663950920105, "learning_rate": 9.766609197532087e-06, "loss": 0.4658, "step": 2008 }, { "epoch": 0.5640089837170129, "grad_norm": 0.780458927154541, "learning_rate": 9.766115742116454e-06, "loss": 0.4893, "step": 2009 }, { "epoch": 0.5642897248736665, "grad_norm": 0.842208981513977, "learning_rate": 9.765621778090587e-06, "loss": 0.5028, "step": 2010 }, { "epoch": 0.56457046603032, "grad_norm": 0.7980358600616455, "learning_rate": 9.765127305507201e-06, "loss": 0.4295, "step": 2011 }, { "epoch": 0.5648512071869736, "grad_norm": 0.7977677583694458, "learning_rate": 9.76463232441906e-06, "loss": 0.4384, "step": 2012 }, { "epoch": 0.5651319483436271, "grad_norm": 0.7563501596450806, "learning_rate": 9.764136834878987e-06, "loss": 0.4187, "step": 2013 }, { "epoch": 0.5654126895002808, "grad_norm": 0.7552735805511475, "learning_rate": 9.763640836939857e-06, "loss": 0.4276, "step": 2014 }, { "epoch": 0.5656934306569343, "grad_norm": 0.7085328698158264, "learning_rate": 9.7631443306546e-06, "loss": 0.48, "step": 2015 }, { "epoch": 0.5659741718135879, "grad_norm": 0.7801918387413025, "learning_rate": 9.762647316076201e-06, "loss": 0.432, "step": 2016 }, { "epoch": 0.5662549129702414, "grad_norm": 0.7970189452171326, "learning_rate": 9.762149793257695e-06, "loss": 0.4282, "step": 2017 }, { "epoch": 0.566535654126895, "grad_norm": 0.7315121293067932, "learning_rate": 9.76165176225218e-06, "loss": 0.4477, "step": 2018 }, { "epoch": 0.5668163952835485, "grad_norm": 0.7222184538841248, "learning_rate": 9.761153223112799e-06, "loss": 0.4448, "step": 2019 }, { "epoch": 0.5670971364402021, "grad_norm": 0.8473417162895203, "learning_rate": 9.760654175892751e-06, "loss": 0.4348, "step": 2020 }, { "epoch": 0.5673778775968557, "grad_norm": 0.8478591442108154, "learning_rate": 9.760154620645297e-06, "loss": 0.4659, "step": 2021 }, { "epoch": 0.5676586187535093, "grad_norm": 0.8104862570762634, "learning_rate": 9.759654557423743e-06, "loss": 0.454, "step": 2022 }, { "epoch": 0.5679393599101629, "grad_norm": 0.6937376260757446, "learning_rate": 9.759153986281452e-06, "loss": 0.4603, "step": 2023 }, { "epoch": 0.5682201010668164, "grad_norm": 0.8246472477912903, "learning_rate": 9.758652907271842e-06, "loss": 0.4474, "step": 2024 }, { "epoch": 0.56850084222347, "grad_norm": 0.899146318435669, "learning_rate": 9.758151320448388e-06, "loss": 0.4948, "step": 2025 }, { "epoch": 0.5687815833801235, "grad_norm": 0.8296086192131042, "learning_rate": 9.757649225864612e-06, "loss": 0.4731, "step": 2026 }, { "epoch": 0.5690623245367771, "grad_norm": 0.7202047109603882, "learning_rate": 9.757146623574098e-06, "loss": 0.436, "step": 2027 }, { "epoch": 0.5693430656934306, "grad_norm": 0.7963749766349792, "learning_rate": 9.75664351363048e-06, "loss": 0.4491, "step": 2028 }, { "epoch": 0.5696238068500842, "grad_norm": 0.709155261516571, "learning_rate": 9.756139896087444e-06, "loss": 0.4421, "step": 2029 }, { "epoch": 0.5699045480067377, "grad_norm": 0.8129062056541443, "learning_rate": 9.755635770998734e-06, "loss": 0.449, "step": 2030 }, { "epoch": 0.5701852891633914, "grad_norm": 0.7801936268806458, "learning_rate": 9.755131138418149e-06, "loss": 0.4348, "step": 2031 }, { "epoch": 0.570466030320045, "grad_norm": 0.8874030709266663, "learning_rate": 9.754625998399539e-06, "loss": 0.5274, "step": 2032 }, { "epoch": 0.5707467714766985, "grad_norm": 0.7172839641571045, "learning_rate": 9.75412035099681e-06, "loss": 0.4733, "step": 2033 }, { "epoch": 0.571027512633352, "grad_norm": 0.7903905510902405, "learning_rate": 9.753614196263921e-06, "loss": 0.4621, "step": 2034 }, { "epoch": 0.5713082537900056, "grad_norm": 0.8660666346549988, "learning_rate": 9.753107534254885e-06, "loss": 0.4448, "step": 2035 }, { "epoch": 0.5715889949466592, "grad_norm": 0.6763301491737366, "learning_rate": 9.75260036502377e-06, "loss": 0.4458, "step": 2036 }, { "epoch": 0.5718697361033127, "grad_norm": 0.7507290840148926, "learning_rate": 9.752092688624702e-06, "loss": 0.4788, "step": 2037 }, { "epoch": 0.5721504772599663, "grad_norm": 0.7282779216766357, "learning_rate": 9.75158450511185e-06, "loss": 0.4729, "step": 2038 }, { "epoch": 0.5724312184166199, "grad_norm": 0.8436610698699951, "learning_rate": 9.751075814539448e-06, "loss": 0.4363, "step": 2039 }, { "epoch": 0.5727119595732735, "grad_norm": 0.850831151008606, "learning_rate": 9.750566616961782e-06, "loss": 0.4677, "step": 2040 }, { "epoch": 0.572992700729927, "grad_norm": 0.7979633808135986, "learning_rate": 9.750056912433187e-06, "loss": 0.5169, "step": 2041 }, { "epoch": 0.5732734418865806, "grad_norm": 0.7856271266937256, "learning_rate": 9.749546701008056e-06, "loss": 0.4459, "step": 2042 }, { "epoch": 0.5735541830432341, "grad_norm": 0.8156360983848572, "learning_rate": 9.74903598274084e-06, "loss": 0.4496, "step": 2043 }, { "epoch": 0.5738349241998877, "grad_norm": 0.8404834866523743, "learning_rate": 9.748524757686034e-06, "loss": 0.5005, "step": 2044 }, { "epoch": 0.5741156653565412, "grad_norm": 0.7475012540817261, "learning_rate": 9.748013025898196e-06, "loss": 0.4495, "step": 2045 }, { "epoch": 0.5743964065131948, "grad_norm": 0.822589635848999, "learning_rate": 9.747500787431932e-06, "loss": 0.4905, "step": 2046 }, { "epoch": 0.5746771476698485, "grad_norm": 0.7559536099433899, "learning_rate": 9.746988042341907e-06, "loss": 0.439, "step": 2047 }, { "epoch": 0.574957888826502, "grad_norm": 0.7868804335594177, "learning_rate": 9.746474790682838e-06, "loss": 0.4429, "step": 2048 }, { "epoch": 0.5752386299831556, "grad_norm": 0.8868182301521301, "learning_rate": 9.745961032509497e-06, "loss": 0.4609, "step": 2049 }, { "epoch": 0.5755193711398091, "grad_norm": 0.8592252135276794, "learning_rate": 9.745446767876708e-06, "loss": 0.5002, "step": 2050 }, { "epoch": 0.5758001122964627, "grad_norm": 0.8985242247581482, "learning_rate": 9.744931996839347e-06, "loss": 0.4661, "step": 2051 }, { "epoch": 0.5760808534531162, "grad_norm": 0.8594610691070557, "learning_rate": 9.744416719452352e-06, "loss": 0.4634, "step": 2052 }, { "epoch": 0.5763615946097698, "grad_norm": 0.827816903591156, "learning_rate": 9.743900935770709e-06, "loss": 0.478, "step": 2053 }, { "epoch": 0.5766423357664233, "grad_norm": 0.6995730400085449, "learning_rate": 9.743384645849456e-06, "loss": 0.4301, "step": 2054 }, { "epoch": 0.5769230769230769, "grad_norm": 0.6672750115394592, "learning_rate": 9.742867849743694e-06, "loss": 0.485, "step": 2055 }, { "epoch": 0.5772038180797305, "grad_norm": 0.8091338276863098, "learning_rate": 9.742350547508568e-06, "loss": 0.4236, "step": 2056 }, { "epoch": 0.5774845592363841, "grad_norm": 0.889529824256897, "learning_rate": 9.741832739199281e-06, "loss": 0.4448, "step": 2057 }, { "epoch": 0.5777653003930376, "grad_norm": 0.8087449669837952, "learning_rate": 9.741314424871092e-06, "loss": 0.4384, "step": 2058 }, { "epoch": 0.5780460415496912, "grad_norm": 0.7608034014701843, "learning_rate": 9.740795604579312e-06, "loss": 0.4592, "step": 2059 }, { "epoch": 0.5783267827063447, "grad_norm": 0.9347120523452759, "learning_rate": 9.740276278379306e-06, "loss": 0.4603, "step": 2060 }, { "epoch": 0.5786075238629983, "grad_norm": 0.8066372871398926, "learning_rate": 9.739756446326494e-06, "loss": 0.4778, "step": 2061 }, { "epoch": 0.5788882650196518, "grad_norm": 0.8516121506690979, "learning_rate": 9.739236108476348e-06, "loss": 0.4046, "step": 2062 }, { "epoch": 0.5791690061763054, "grad_norm": 0.8246614336967468, "learning_rate": 9.738715264884397e-06, "loss": 0.4899, "step": 2063 }, { "epoch": 0.5794497473329591, "grad_norm": 0.7649110555648804, "learning_rate": 9.73819391560622e-06, "loss": 0.4569, "step": 2064 }, { "epoch": 0.5797304884896126, "grad_norm": 0.7903653979301453, "learning_rate": 9.737672060697454e-06, "loss": 0.4789, "step": 2065 }, { "epoch": 0.5800112296462662, "grad_norm": 0.9227196574211121, "learning_rate": 9.737149700213787e-06, "loss": 0.4672, "step": 2066 }, { "epoch": 0.5802919708029197, "grad_norm": 0.7712948322296143, "learning_rate": 9.736626834210963e-06, "loss": 0.4367, "step": 2067 }, { "epoch": 0.5805727119595733, "grad_norm": 0.8210005164146423, "learning_rate": 9.736103462744776e-06, "loss": 0.4837, "step": 2068 }, { "epoch": 0.5808534531162268, "grad_norm": 0.8951388001441956, "learning_rate": 9.735579585871081e-06, "loss": 0.5036, "step": 2069 }, { "epoch": 0.5811341942728804, "grad_norm": 0.9867268800735474, "learning_rate": 9.735055203645782e-06, "loss": 0.4101, "step": 2070 }, { "epoch": 0.5814149354295339, "grad_norm": 0.7119899392127991, "learning_rate": 9.734530316124836e-06, "loss": 0.416, "step": 2071 }, { "epoch": 0.5816956765861875, "grad_norm": 0.8316327929496765, "learning_rate": 9.734004923364258e-06, "loss": 0.4133, "step": 2072 }, { "epoch": 0.5819764177428411, "grad_norm": 1.04079008102417, "learning_rate": 9.733479025420111e-06, "loss": 0.4902, "step": 2073 }, { "epoch": 0.5822571588994947, "grad_norm": 0.7793975472450256, "learning_rate": 9.732952622348519e-06, "loss": 0.4251, "step": 2074 }, { "epoch": 0.5825379000561483, "grad_norm": 0.6976045966148376, "learning_rate": 9.732425714205657e-06, "loss": 0.4479, "step": 2075 }, { "epoch": 0.5828186412128018, "grad_norm": 0.873542845249176, "learning_rate": 9.731898301047751e-06, "loss": 0.4219, "step": 2076 }, { "epoch": 0.5830993823694554, "grad_norm": 0.8051063418388367, "learning_rate": 9.731370382931082e-06, "loss": 0.4599, "step": 2077 }, { "epoch": 0.5833801235261089, "grad_norm": 0.9318996071815491, "learning_rate": 9.73084195991199e-06, "loss": 0.448, "step": 2078 }, { "epoch": 0.5836608646827625, "grad_norm": 0.7169986367225647, "learning_rate": 9.730313032046863e-06, "loss": 0.4181, "step": 2079 }, { "epoch": 0.583941605839416, "grad_norm": 0.7468517422676086, "learning_rate": 9.729783599392147e-06, "loss": 0.4413, "step": 2080 }, { "epoch": 0.5842223469960697, "grad_norm": 0.810914933681488, "learning_rate": 9.729253662004334e-06, "loss": 0.4867, "step": 2081 }, { "epoch": 0.5845030881527232, "grad_norm": 0.8537750244140625, "learning_rate": 9.728723219939982e-06, "loss": 0.4792, "step": 2082 }, { "epoch": 0.5847838293093768, "grad_norm": 0.8743634223937988, "learning_rate": 9.728192273255693e-06, "loss": 0.521, "step": 2083 }, { "epoch": 0.5850645704660303, "grad_norm": 0.7106368541717529, "learning_rate": 9.727660822008129e-06, "loss": 0.4415, "step": 2084 }, { "epoch": 0.5853453116226839, "grad_norm": 0.8619228601455688, "learning_rate": 9.727128866253999e-06, "loss": 0.5018, "step": 2085 }, { "epoch": 0.5856260527793374, "grad_norm": 0.8768312931060791, "learning_rate": 9.726596406050073e-06, "loss": 0.5083, "step": 2086 }, { "epoch": 0.585906793935991, "grad_norm": 0.6641687154769897, "learning_rate": 9.726063441453173e-06, "loss": 0.4249, "step": 2087 }, { "epoch": 0.5861875350926445, "grad_norm": 0.7184276580810547, "learning_rate": 9.725529972520172e-06, "loss": 0.4645, "step": 2088 }, { "epoch": 0.5864682762492981, "grad_norm": 0.7163186073303223, "learning_rate": 9.724995999307996e-06, "loss": 0.4566, "step": 2089 }, { "epoch": 0.5867490174059518, "grad_norm": 0.8219218254089355, "learning_rate": 9.724461521873631e-06, "loss": 0.4296, "step": 2090 }, { "epoch": 0.5870297585626053, "grad_norm": 0.8303726315498352, "learning_rate": 9.723926540274112e-06, "loss": 0.4496, "step": 2091 }, { "epoch": 0.5873104997192589, "grad_norm": 0.8900569081306458, "learning_rate": 9.72339105456653e-06, "loss": 0.5211, "step": 2092 }, { "epoch": 0.5875912408759124, "grad_norm": 0.6788725256919861, "learning_rate": 9.722855064808026e-06, "loss": 0.4641, "step": 2093 }, { "epoch": 0.587871982032566, "grad_norm": 0.7321316599845886, "learning_rate": 9.722318571055799e-06, "loss": 0.4799, "step": 2094 }, { "epoch": 0.5881527231892195, "grad_norm": 0.7232583165168762, "learning_rate": 9.721781573367099e-06, "loss": 0.4372, "step": 2095 }, { "epoch": 0.5884334643458731, "grad_norm": 0.6985263228416443, "learning_rate": 9.721244071799235e-06, "loss": 0.4282, "step": 2096 }, { "epoch": 0.5887142055025266, "grad_norm": 0.8235869407653809, "learning_rate": 9.720706066409561e-06, "loss": 0.4714, "step": 2097 }, { "epoch": 0.5889949466591803, "grad_norm": 0.7926880121231079, "learning_rate": 9.720167557255494e-06, "loss": 0.4511, "step": 2098 }, { "epoch": 0.5892756878158338, "grad_norm": 0.7409059405326843, "learning_rate": 9.719628544394497e-06, "loss": 0.41, "step": 2099 }, { "epoch": 0.5895564289724874, "grad_norm": 0.8219630122184753, "learning_rate": 9.71908902788409e-06, "loss": 0.4475, "step": 2100 }, { "epoch": 0.5898371701291409, "grad_norm": 0.853156328201294, "learning_rate": 9.71854900778185e-06, "loss": 0.4624, "step": 2101 }, { "epoch": 0.5901179112857945, "grad_norm": 0.7232211232185364, "learning_rate": 9.7180084841454e-06, "loss": 0.4223, "step": 2102 }, { "epoch": 0.590398652442448, "grad_norm": 0.824863612651825, "learning_rate": 9.717467457032425e-06, "loss": 0.4398, "step": 2103 }, { "epoch": 0.5906793935991016, "grad_norm": 0.7342715263366699, "learning_rate": 9.71692592650066e-06, "loss": 0.4608, "step": 2104 }, { "epoch": 0.5909601347557552, "grad_norm": 0.7489803433418274, "learning_rate": 9.716383892607893e-06, "loss": 0.4639, "step": 2105 }, { "epoch": 0.5912408759124088, "grad_norm": 0.7693859934806824, "learning_rate": 9.715841355411965e-06, "loss": 0.4514, "step": 2106 }, { "epoch": 0.5915216170690624, "grad_norm": 0.7569899559020996, "learning_rate": 9.715298314970775e-06, "loss": 0.4303, "step": 2107 }, { "epoch": 0.5918023582257159, "grad_norm": 0.8443773984909058, "learning_rate": 9.71475477134227e-06, "loss": 0.4189, "step": 2108 }, { "epoch": 0.5920830993823695, "grad_norm": 0.8607834577560425, "learning_rate": 9.714210724584455e-06, "loss": 0.445, "step": 2109 }, { "epoch": 0.592363840539023, "grad_norm": 0.8464186191558838, "learning_rate": 9.713666174755388e-06, "loss": 0.4633, "step": 2110 }, { "epoch": 0.5926445816956766, "grad_norm": 0.8905773758888245, "learning_rate": 9.713121121913179e-06, "loss": 0.4576, "step": 2111 }, { "epoch": 0.5929253228523301, "grad_norm": 0.9899441599845886, "learning_rate": 9.712575566115992e-06, "loss": 0.4186, "step": 2112 }, { "epoch": 0.5932060640089837, "grad_norm": 0.7846997976303101, "learning_rate": 9.712029507422045e-06, "loss": 0.4573, "step": 2113 }, { "epoch": 0.5934868051656372, "grad_norm": 0.7968783378601074, "learning_rate": 9.711482945889613e-06, "loss": 0.4326, "step": 2114 }, { "epoch": 0.5937675463222909, "grad_norm": 0.9178119897842407, "learning_rate": 9.710935881577019e-06, "loss": 0.5156, "step": 2115 }, { "epoch": 0.5940482874789444, "grad_norm": 0.8109418749809265, "learning_rate": 9.710388314542644e-06, "loss": 0.4439, "step": 2116 }, { "epoch": 0.594329028635598, "grad_norm": 0.8619762659072876, "learning_rate": 9.70984024484492e-06, "loss": 0.4981, "step": 2117 }, { "epoch": 0.5946097697922516, "grad_norm": 0.8317869901657104, "learning_rate": 9.709291672542333e-06, "loss": 0.4395, "step": 2118 }, { "epoch": 0.5948905109489051, "grad_norm": 0.9120360612869263, "learning_rate": 9.708742597693425e-06, "loss": 0.5175, "step": 2119 }, { "epoch": 0.5951712521055587, "grad_norm": 0.7006910443305969, "learning_rate": 9.708193020356787e-06, "loss": 0.4377, "step": 2120 }, { "epoch": 0.5954519932622122, "grad_norm": 0.8110869526863098, "learning_rate": 9.707642940591068e-06, "loss": 0.4409, "step": 2121 }, { "epoch": 0.5957327344188658, "grad_norm": 0.7683268785476685, "learning_rate": 9.707092358454972e-06, "loss": 0.4287, "step": 2122 }, { "epoch": 0.5960134755755194, "grad_norm": 0.8027356863021851, "learning_rate": 9.706541274007249e-06, "loss": 0.5123, "step": 2123 }, { "epoch": 0.596294216732173, "grad_norm": 0.6715095043182373, "learning_rate": 9.70598968730671e-06, "loss": 0.3924, "step": 2124 }, { "epoch": 0.5965749578888265, "grad_norm": 0.7607294321060181, "learning_rate": 9.705437598412216e-06, "loss": 0.4335, "step": 2125 }, { "epoch": 0.5968556990454801, "grad_norm": 0.7370964288711548, "learning_rate": 9.704885007382681e-06, "loss": 0.4457, "step": 2126 }, { "epoch": 0.5971364402021336, "grad_norm": 0.7293221950531006, "learning_rate": 9.704331914277078e-06, "loss": 0.4502, "step": 2127 }, { "epoch": 0.5974171813587872, "grad_norm": 0.7145909070968628, "learning_rate": 9.703778319154427e-06, "loss": 0.4449, "step": 2128 }, { "epoch": 0.5976979225154407, "grad_norm": 0.8036126494407654, "learning_rate": 9.703224222073803e-06, "loss": 0.5145, "step": 2129 }, { "epoch": 0.5979786636720943, "grad_norm": 0.8295819759368896, "learning_rate": 9.702669623094339e-06, "loss": 0.4813, "step": 2130 }, { "epoch": 0.5982594048287478, "grad_norm": 0.9092859625816345, "learning_rate": 9.702114522275216e-06, "loss": 0.4847, "step": 2131 }, { "epoch": 0.5985401459854015, "grad_norm": 0.7231720685958862, "learning_rate": 9.701558919675672e-06, "loss": 0.4356, "step": 2132 }, { "epoch": 0.5988208871420551, "grad_norm": 0.719670295715332, "learning_rate": 9.701002815354999e-06, "loss": 0.5005, "step": 2133 }, { "epoch": 0.5991016282987086, "grad_norm": 0.750986635684967, "learning_rate": 9.700446209372537e-06, "loss": 0.4747, "step": 2134 }, { "epoch": 0.5993823694553622, "grad_norm": 1.015622854232788, "learning_rate": 9.699889101787687e-06, "loss": 0.5376, "step": 2135 }, { "epoch": 0.5996631106120157, "grad_norm": 0.7454087734222412, "learning_rate": 9.699331492659897e-06, "loss": 0.4398, "step": 2136 }, { "epoch": 0.5999438517686693, "grad_norm": 0.7963955998420715, "learning_rate": 9.698773382048673e-06, "loss": 0.4509, "step": 2137 }, { "epoch": 0.6002245929253228, "grad_norm": 0.7308932542800903, "learning_rate": 9.698214770013576e-06, "loss": 0.4476, "step": 2138 }, { "epoch": 0.6005053340819764, "grad_norm": 0.7614893913269043, "learning_rate": 9.697655656614214e-06, "loss": 0.414, "step": 2139 }, { "epoch": 0.60078607523863, "grad_norm": 0.6617187261581421, "learning_rate": 9.697096041910251e-06, "loss": 0.4323, "step": 2140 }, { "epoch": 0.6010668163952836, "grad_norm": 0.7306409478187561, "learning_rate": 9.69653592596141e-06, "loss": 0.4525, "step": 2141 }, { "epoch": 0.6013475575519371, "grad_norm": 0.7639381289482117, "learning_rate": 9.69597530882746e-06, "loss": 0.4925, "step": 2142 }, { "epoch": 0.6016282987085907, "grad_norm": 0.7443253993988037, "learning_rate": 9.695414190568229e-06, "loss": 0.4536, "step": 2143 }, { "epoch": 0.6019090398652442, "grad_norm": 0.7563796639442444, "learning_rate": 9.694852571243593e-06, "loss": 0.4673, "step": 2144 }, { "epoch": 0.6021897810218978, "grad_norm": 0.8298394083976746, "learning_rate": 9.694290450913486e-06, "loss": 0.4802, "step": 2145 }, { "epoch": 0.6024705221785513, "grad_norm": 0.8806003928184509, "learning_rate": 9.693727829637895e-06, "loss": 0.5414, "step": 2146 }, { "epoch": 0.6027512633352049, "grad_norm": 0.635846734046936, "learning_rate": 9.693164707476856e-06, "loss": 0.4635, "step": 2147 }, { "epoch": 0.6030320044918585, "grad_norm": 0.6990854740142822, "learning_rate": 9.692601084490468e-06, "loss": 0.4309, "step": 2148 }, { "epoch": 0.6033127456485121, "grad_norm": 0.9040673971176147, "learning_rate": 9.69203696073887e-06, "loss": 0.4853, "step": 2149 }, { "epoch": 0.6035934868051657, "grad_norm": 0.6646368503570557, "learning_rate": 9.691472336282267e-06, "loss": 0.4132, "step": 2150 }, { "epoch": 0.6038742279618192, "grad_norm": 0.8943970799446106, "learning_rate": 9.690907211180909e-06, "loss": 0.4666, "step": 2151 }, { "epoch": 0.6041549691184728, "grad_norm": 0.9120403528213501, "learning_rate": 9.690341585495107e-06, "loss": 0.4754, "step": 2152 }, { "epoch": 0.6044357102751263, "grad_norm": 0.845994234085083, "learning_rate": 9.689775459285216e-06, "loss": 0.4911, "step": 2153 }, { "epoch": 0.6047164514317799, "grad_norm": 0.7136091589927673, "learning_rate": 9.689208832611653e-06, "loss": 0.4524, "step": 2154 }, { "epoch": 0.6049971925884334, "grad_norm": 0.8505575060844421, "learning_rate": 9.688641705534883e-06, "loss": 0.4301, "step": 2155 }, { "epoch": 0.605277933745087, "grad_norm": 0.8498470783233643, "learning_rate": 9.688074078115428e-06, "loss": 0.4616, "step": 2156 }, { "epoch": 0.6055586749017406, "grad_norm": 0.9020117521286011, "learning_rate": 9.687505950413861e-06, "loss": 0.4896, "step": 2157 }, { "epoch": 0.6058394160583942, "grad_norm": 0.8544296622276306, "learning_rate": 9.686937322490806e-06, "loss": 0.4291, "step": 2158 }, { "epoch": 0.6061201572150478, "grad_norm": 0.7850180864334106, "learning_rate": 9.686368194406948e-06, "loss": 0.3939, "step": 2159 }, { "epoch": 0.6064008983717013, "grad_norm": 1.0482336282730103, "learning_rate": 9.685798566223018e-06, "loss": 0.4647, "step": 2160 }, { "epoch": 0.6066816395283549, "grad_norm": 0.8706359267234802, "learning_rate": 9.685228437999805e-06, "loss": 0.4345, "step": 2161 }, { "epoch": 0.6069623806850084, "grad_norm": 0.7320350408554077, "learning_rate": 9.684657809798148e-06, "loss": 0.4501, "step": 2162 }, { "epoch": 0.607243121841662, "grad_norm": 0.9550179243087769, "learning_rate": 9.68408668167894e-06, "loss": 0.5091, "step": 2163 }, { "epoch": 0.6075238629983155, "grad_norm": 1.0302115678787231, "learning_rate": 9.683515053703133e-06, "loss": 0.4659, "step": 2164 }, { "epoch": 0.6078046041549691, "grad_norm": 0.7839834690093994, "learning_rate": 9.682942925931722e-06, "loss": 0.5299, "step": 2165 }, { "epoch": 0.6080853453116227, "grad_norm": 0.8000436425209045, "learning_rate": 9.682370298425766e-06, "loss": 0.4884, "step": 2166 }, { "epoch": 0.6083660864682763, "grad_norm": 0.8380921483039856, "learning_rate": 9.681797171246365e-06, "loss": 0.4929, "step": 2167 }, { "epoch": 0.6086468276249298, "grad_norm": 0.8474700450897217, "learning_rate": 9.681223544454687e-06, "loss": 0.4347, "step": 2168 }, { "epoch": 0.6089275687815834, "grad_norm": 0.8120713829994202, "learning_rate": 9.680649418111942e-06, "loss": 0.4552, "step": 2169 }, { "epoch": 0.6092083099382369, "grad_norm": 0.7763919234275818, "learning_rate": 9.680074792279399e-06, "loss": 0.4434, "step": 2170 }, { "epoch": 0.6094890510948905, "grad_norm": 0.8411002159118652, "learning_rate": 9.679499667018376e-06, "loss": 0.4451, "step": 2171 }, { "epoch": 0.609769792251544, "grad_norm": 0.8674899339675903, "learning_rate": 9.678924042390252e-06, "loss": 0.4619, "step": 2172 }, { "epoch": 0.6100505334081976, "grad_norm": 0.7417405247688293, "learning_rate": 9.678347918456448e-06, "loss": 0.4515, "step": 2173 }, { "epoch": 0.6103312745648513, "grad_norm": 0.7583059668540955, "learning_rate": 9.677771295278446e-06, "loss": 0.4528, "step": 2174 }, { "epoch": 0.6106120157215048, "grad_norm": 0.8104037046432495, "learning_rate": 9.677194172917781e-06, "loss": 0.5128, "step": 2175 }, { "epoch": 0.6108927568781584, "grad_norm": 0.792428731918335, "learning_rate": 9.676616551436042e-06, "loss": 0.4191, "step": 2176 }, { "epoch": 0.6111734980348119, "grad_norm": 0.72653728723526, "learning_rate": 9.676038430894863e-06, "loss": 0.4401, "step": 2177 }, { "epoch": 0.6114542391914655, "grad_norm": 0.6194968819618225, "learning_rate": 9.675459811355944e-06, "loss": 0.4254, "step": 2178 }, { "epoch": 0.611734980348119, "grad_norm": 0.7762696743011475, "learning_rate": 9.674880692881026e-06, "loss": 0.4652, "step": 2179 }, { "epoch": 0.6120157215047726, "grad_norm": 0.6617141366004944, "learning_rate": 9.674301075531913e-06, "loss": 0.4376, "step": 2180 }, { "epoch": 0.6122964626614261, "grad_norm": 0.6666786670684814, "learning_rate": 9.673720959370458e-06, "loss": 0.4386, "step": 2181 }, { "epoch": 0.6125772038180798, "grad_norm": 0.8252989649772644, "learning_rate": 9.673140344458565e-06, "loss": 0.4881, "step": 2182 }, { "epoch": 0.6128579449747333, "grad_norm": 0.7981967329978943, "learning_rate": 9.672559230858194e-06, "loss": 0.4417, "step": 2183 }, { "epoch": 0.6131386861313869, "grad_norm": 0.6259914040565491, "learning_rate": 9.671977618631359e-06, "loss": 0.4676, "step": 2184 }, { "epoch": 0.6134194272880404, "grad_norm": 0.7408134937286377, "learning_rate": 9.671395507840126e-06, "loss": 0.4635, "step": 2185 }, { "epoch": 0.613700168444694, "grad_norm": 0.818143367767334, "learning_rate": 9.670812898546613e-06, "loss": 0.4538, "step": 2186 }, { "epoch": 0.6139809096013475, "grad_norm": 0.7853017449378967, "learning_rate": 9.670229790812994e-06, "loss": 0.4679, "step": 2187 }, { "epoch": 0.6142616507580011, "grad_norm": 0.6382354497909546, "learning_rate": 9.669646184701494e-06, "loss": 0.4307, "step": 2188 }, { "epoch": 0.6145423919146547, "grad_norm": 0.6988775730133057, "learning_rate": 9.669062080274391e-06, "loss": 0.4371, "step": 2189 }, { "epoch": 0.6148231330713082, "grad_norm": 0.794001579284668, "learning_rate": 9.668477477594021e-06, "loss": 0.4761, "step": 2190 }, { "epoch": 0.6151038742279619, "grad_norm": 0.8090159296989441, "learning_rate": 9.667892376722763e-06, "loss": 0.4631, "step": 2191 }, { "epoch": 0.6153846153846154, "grad_norm": 0.7847395539283752, "learning_rate": 9.667306777723058e-06, "loss": 0.4903, "step": 2192 }, { "epoch": 0.615665356541269, "grad_norm": 0.8224895000457764, "learning_rate": 9.666720680657399e-06, "loss": 0.4868, "step": 2193 }, { "epoch": 0.6159460976979225, "grad_norm": 0.8005455732345581, "learning_rate": 9.666134085588329e-06, "loss": 0.441, "step": 2194 }, { "epoch": 0.6162268388545761, "grad_norm": 0.8278933167457581, "learning_rate": 9.665546992578446e-06, "loss": 0.4662, "step": 2195 }, { "epoch": 0.6165075800112296, "grad_norm": 0.8037208318710327, "learning_rate": 9.6649594016904e-06, "loss": 0.4814, "step": 2196 }, { "epoch": 0.6167883211678832, "grad_norm": 0.8241268396377563, "learning_rate": 9.6643713129869e-06, "loss": 0.4458, "step": 2197 }, { "epoch": 0.6170690623245367, "grad_norm": 0.7292011380195618, "learning_rate": 9.663782726530696e-06, "loss": 0.4591, "step": 2198 }, { "epoch": 0.6173498034811904, "grad_norm": 0.6113244891166687, "learning_rate": 9.6631936423846e-06, "loss": 0.4147, "step": 2199 }, { "epoch": 0.617630544637844, "grad_norm": 0.6649948358535767, "learning_rate": 9.66260406061148e-06, "loss": 0.4117, "step": 2200 }, { "epoch": 0.6179112857944975, "grad_norm": 0.8504791855812073, "learning_rate": 9.66201398127425e-06, "loss": 0.5089, "step": 2201 }, { "epoch": 0.618192026951151, "grad_norm": 0.7351915240287781, "learning_rate": 9.661423404435877e-06, "loss": 0.4125, "step": 2202 }, { "epoch": 0.6184727681078046, "grad_norm": 0.7622799277305603, "learning_rate": 9.660832330159387e-06, "loss": 0.4494, "step": 2203 }, { "epoch": 0.6187535092644582, "grad_norm": 0.7854540944099426, "learning_rate": 9.660240758507852e-06, "loss": 0.4603, "step": 2204 }, { "epoch": 0.6190342504211117, "grad_norm": 0.861838161945343, "learning_rate": 9.659648689544406e-06, "loss": 0.4706, "step": 2205 }, { "epoch": 0.6193149915777653, "grad_norm": 0.7758306860923767, "learning_rate": 9.659056123332229e-06, "loss": 0.4796, "step": 2206 }, { "epoch": 0.6195957327344188, "grad_norm": 0.7804763317108154, "learning_rate": 9.658463059934553e-06, "loss": 0.3905, "step": 2207 }, { "epoch": 0.6198764738910725, "grad_norm": 0.7302863001823425, "learning_rate": 9.657869499414669e-06, "loss": 0.4803, "step": 2208 }, { "epoch": 0.620157215047726, "grad_norm": 0.7502059936523438, "learning_rate": 9.657275441835919e-06, "loss": 0.4721, "step": 2209 }, { "epoch": 0.6204379562043796, "grad_norm": 0.7972210645675659, "learning_rate": 9.656680887261693e-06, "loss": 0.4521, "step": 2210 }, { "epoch": 0.6207186973610331, "grad_norm": 0.690517008304596, "learning_rate": 9.656085835755442e-06, "loss": 0.4554, "step": 2211 }, { "epoch": 0.6209994385176867, "grad_norm": 0.6880694031715393, "learning_rate": 9.655490287380664e-06, "loss": 0.4203, "step": 2212 }, { "epoch": 0.6212801796743402, "grad_norm": 0.8624227643013, "learning_rate": 9.654894242200914e-06, "loss": 0.4467, "step": 2213 }, { "epoch": 0.6215609208309938, "grad_norm": 0.7833415865898132, "learning_rate": 9.654297700279798e-06, "loss": 0.468, "step": 2214 }, { "epoch": 0.6218416619876473, "grad_norm": 0.7033445835113525, "learning_rate": 9.653700661680973e-06, "loss": 0.493, "step": 2215 }, { "epoch": 0.622122403144301, "grad_norm": 0.9222918748855591, "learning_rate": 9.653103126468154e-06, "loss": 0.4662, "step": 2216 }, { "epoch": 0.6224031443009546, "grad_norm": 0.7879701852798462, "learning_rate": 9.652505094705105e-06, "loss": 0.5095, "step": 2217 }, { "epoch": 0.6226838854576081, "grad_norm": 0.6616983413696289, "learning_rate": 9.651906566455645e-06, "loss": 0.4761, "step": 2218 }, { "epoch": 0.6229646266142617, "grad_norm": 0.8261833190917969, "learning_rate": 9.651307541783643e-06, "loss": 0.4322, "step": 2219 }, { "epoch": 0.6232453677709152, "grad_norm": 0.8467888236045837, "learning_rate": 9.650708020753025e-06, "loss": 0.4686, "step": 2220 }, { "epoch": 0.6235261089275688, "grad_norm": 0.7467922568321228, "learning_rate": 9.650108003427767e-06, "loss": 0.431, "step": 2221 }, { "epoch": 0.6238068500842223, "grad_norm": 0.9102432131767273, "learning_rate": 9.649507489871902e-06, "loss": 0.4263, "step": 2222 }, { "epoch": 0.6240875912408759, "grad_norm": 0.9594277143478394, "learning_rate": 9.64890648014951e-06, "loss": 0.4684, "step": 2223 }, { "epoch": 0.6243683323975294, "grad_norm": 0.7878795266151428, "learning_rate": 9.64830497432473e-06, "loss": 0.4443, "step": 2224 }, { "epoch": 0.6246490735541831, "grad_norm": 0.8136244416236877, "learning_rate": 9.647702972461745e-06, "loss": 0.4573, "step": 2225 }, { "epoch": 0.6249298147108366, "grad_norm": 0.8782902956008911, "learning_rate": 9.647100474624805e-06, "loss": 0.4236, "step": 2226 }, { "epoch": 0.6252105558674902, "grad_norm": 0.7650416493415833, "learning_rate": 9.646497480878199e-06, "loss": 0.4489, "step": 2227 }, { "epoch": 0.6254912970241437, "grad_norm": 0.8226631879806519, "learning_rate": 9.645893991286276e-06, "loss": 0.4495, "step": 2228 }, { "epoch": 0.6257720381807973, "grad_norm": 0.7743797302246094, "learning_rate": 9.645290005913437e-06, "loss": 0.4519, "step": 2229 }, { "epoch": 0.6260527793374508, "grad_norm": 0.6662293672561646, "learning_rate": 9.644685524824137e-06, "loss": 0.4652, "step": 2230 }, { "epoch": 0.6263335204941044, "grad_norm": 0.6626827716827393, "learning_rate": 9.64408054808288e-06, "loss": 0.4685, "step": 2231 }, { "epoch": 0.626614261650758, "grad_norm": 0.7970080375671387, "learning_rate": 9.643475075754227e-06, "loss": 0.476, "step": 2232 }, { "epoch": 0.6268950028074116, "grad_norm": 0.7575878500938416, "learning_rate": 9.642869107902791e-06, "loss": 0.4162, "step": 2233 }, { "epoch": 0.6271757439640652, "grad_norm": 0.7158808708190918, "learning_rate": 9.642262644593235e-06, "loss": 0.4474, "step": 2234 }, { "epoch": 0.6274564851207187, "grad_norm": 0.7752962112426758, "learning_rate": 9.641655685890277e-06, "loss": 0.4456, "step": 2235 }, { "epoch": 0.6277372262773723, "grad_norm": 0.7573445439338684, "learning_rate": 9.641048231858689e-06, "loss": 0.4133, "step": 2236 }, { "epoch": 0.6280179674340258, "grad_norm": 0.733131468296051, "learning_rate": 9.640440282563294e-06, "loss": 0.4745, "step": 2237 }, { "epoch": 0.6282987085906794, "grad_norm": 0.8936368227005005, "learning_rate": 9.639831838068972e-06, "loss": 0.4777, "step": 2238 }, { "epoch": 0.6285794497473329, "grad_norm": 0.737271249294281, "learning_rate": 9.639222898440647e-06, "loss": 0.4595, "step": 2239 }, { "epoch": 0.6288601909039865, "grad_norm": 0.7406973838806152, "learning_rate": 9.638613463743303e-06, "loss": 0.4238, "step": 2240 }, { "epoch": 0.62914093206064, "grad_norm": 0.7626855969429016, "learning_rate": 9.638003534041977e-06, "loss": 0.45, "step": 2241 }, { "epoch": 0.6294216732172937, "grad_norm": 0.7607765197753906, "learning_rate": 9.637393109401755e-06, "loss": 0.4457, "step": 2242 }, { "epoch": 0.6297024143739473, "grad_norm": 0.7057299613952637, "learning_rate": 9.63678218988778e-06, "loss": 0.4707, "step": 2243 }, { "epoch": 0.6299831555306008, "grad_norm": 0.8158141374588013, "learning_rate": 9.636170775565243e-06, "loss": 0.4902, "step": 2244 }, { "epoch": 0.6302638966872544, "grad_norm": 0.8315247893333435, "learning_rate": 9.63555886649939e-06, "loss": 0.5091, "step": 2245 }, { "epoch": 0.6305446378439079, "grad_norm": 0.7286932468414307, "learning_rate": 9.634946462755523e-06, "loss": 0.5043, "step": 2246 }, { "epoch": 0.6308253790005615, "grad_norm": 0.6606234312057495, "learning_rate": 9.634333564398992e-06, "loss": 0.4394, "step": 2247 }, { "epoch": 0.631106120157215, "grad_norm": 0.8718246817588806, "learning_rate": 9.633720171495202e-06, "loss": 0.4927, "step": 2248 }, { "epoch": 0.6313868613138686, "grad_norm": 0.753736138343811, "learning_rate": 9.633106284109612e-06, "loss": 0.4545, "step": 2249 }, { "epoch": 0.6316676024705222, "grad_norm": 0.7505636215209961, "learning_rate": 9.632491902307727e-06, "loss": 0.4711, "step": 2250 }, { "epoch": 0.6319483436271758, "grad_norm": 0.7484976649284363, "learning_rate": 9.631877026155118e-06, "loss": 0.4726, "step": 2251 }, { "epoch": 0.6322290847838293, "grad_norm": 0.9118866920471191, "learning_rate": 9.631261655717394e-06, "loss": 0.5145, "step": 2252 }, { "epoch": 0.6325098259404829, "grad_norm": 0.6723572015762329, "learning_rate": 9.630645791060226e-06, "loss": 0.4593, "step": 2253 }, { "epoch": 0.6327905670971364, "grad_norm": 0.6758120059967041, "learning_rate": 9.630029432249336e-06, "loss": 0.3962, "step": 2254 }, { "epoch": 0.63307130825379, "grad_norm": 0.7874472141265869, "learning_rate": 9.629412579350496e-06, "loss": 0.4346, "step": 2255 }, { "epoch": 0.6333520494104435, "grad_norm": 0.7685884833335876, "learning_rate": 9.628795232429535e-06, "loss": 0.4687, "step": 2256 }, { "epoch": 0.6336327905670971, "grad_norm": 0.7021225094795227, "learning_rate": 9.628177391552333e-06, "loss": 0.4444, "step": 2257 }, { "epoch": 0.6339135317237508, "grad_norm": 0.7794039249420166, "learning_rate": 9.627559056784818e-06, "loss": 0.4489, "step": 2258 }, { "epoch": 0.6341942728804043, "grad_norm": 0.9281190633773804, "learning_rate": 9.626940228192979e-06, "loss": 0.4624, "step": 2259 }, { "epoch": 0.6344750140370579, "grad_norm": 0.643397331237793, "learning_rate": 9.626320905842849e-06, "loss": 0.4257, "step": 2260 }, { "epoch": 0.6347557551937114, "grad_norm": 0.7969174981117249, "learning_rate": 9.625701089800525e-06, "loss": 0.4429, "step": 2261 }, { "epoch": 0.635036496350365, "grad_norm": 0.9377960562705994, "learning_rate": 9.625080780132143e-06, "loss": 0.4882, "step": 2262 }, { "epoch": 0.6353172375070185, "grad_norm": 0.728842556476593, "learning_rate": 9.624459976903903e-06, "loss": 0.4482, "step": 2263 }, { "epoch": 0.6355979786636721, "grad_norm": 0.683595597743988, "learning_rate": 9.623838680182051e-06, "loss": 0.451, "step": 2264 }, { "epoch": 0.6358787198203256, "grad_norm": 0.8157421350479126, "learning_rate": 9.623216890032892e-06, "loss": 0.4768, "step": 2265 }, { "epoch": 0.6361594609769792, "grad_norm": 0.6925378441810608, "learning_rate": 9.622594606522772e-06, "loss": 0.4301, "step": 2266 }, { "epoch": 0.6364402021336328, "grad_norm": 0.8240598440170288, "learning_rate": 9.621971829718104e-06, "loss": 0.4379, "step": 2267 }, { "epoch": 0.6367209432902864, "grad_norm": 0.737712562084198, "learning_rate": 9.621348559685345e-06, "loss": 0.4519, "step": 2268 }, { "epoch": 0.6370016844469399, "grad_norm": 0.845904529094696, "learning_rate": 9.620724796491004e-06, "loss": 0.4489, "step": 2269 }, { "epoch": 0.6372824256035935, "grad_norm": 0.7575750350952148, "learning_rate": 9.620100540201648e-06, "loss": 0.4642, "step": 2270 }, { "epoch": 0.637563166760247, "grad_norm": 0.7919850945472717, "learning_rate": 9.619475790883894e-06, "loss": 0.4513, "step": 2271 }, { "epoch": 0.6378439079169006, "grad_norm": 0.856126606464386, "learning_rate": 9.618850548604409e-06, "loss": 0.4749, "step": 2272 }, { "epoch": 0.6381246490735542, "grad_norm": 0.7882614135742188, "learning_rate": 9.618224813429916e-06, "loss": 0.4168, "step": 2273 }, { "epoch": 0.6384053902302077, "grad_norm": 0.7316656112670898, "learning_rate": 9.61759858542719e-06, "loss": 0.4741, "step": 2274 }, { "epoch": 0.6386861313868614, "grad_norm": 0.7819839119911194, "learning_rate": 9.616971864663059e-06, "loss": 0.4935, "step": 2275 }, { "epoch": 0.6389668725435149, "grad_norm": 0.8177666664123535, "learning_rate": 9.616344651204398e-06, "loss": 0.4671, "step": 2276 }, { "epoch": 0.6392476137001685, "grad_norm": 0.6885898113250732, "learning_rate": 9.615716945118147e-06, "loss": 0.4029, "step": 2277 }, { "epoch": 0.639528354856822, "grad_norm": 0.8757466673851013, "learning_rate": 9.615088746471286e-06, "loss": 0.4449, "step": 2278 }, { "epoch": 0.6398090960134756, "grad_norm": 0.7296600937843323, "learning_rate": 9.614460055330852e-06, "loss": 0.4331, "step": 2279 }, { "epoch": 0.6400898371701291, "grad_norm": 0.8537064790725708, "learning_rate": 9.613830871763939e-06, "loss": 0.4844, "step": 2280 }, { "epoch": 0.6403705783267827, "grad_norm": 0.7637564539909363, "learning_rate": 9.613201195837684e-06, "loss": 0.4167, "step": 2281 }, { "epoch": 0.6406513194834362, "grad_norm": 0.8047333359718323, "learning_rate": 9.612571027619287e-06, "loss": 0.4301, "step": 2282 }, { "epoch": 0.6409320606400898, "grad_norm": 0.7688471674919128, "learning_rate": 9.611940367175992e-06, "loss": 0.4373, "step": 2283 }, { "epoch": 0.6412128017967434, "grad_norm": 0.7930221557617188, "learning_rate": 9.611309214575103e-06, "loss": 0.4407, "step": 2284 }, { "epoch": 0.641493542953397, "grad_norm": 0.7173995971679688, "learning_rate": 9.610677569883967e-06, "loss": 0.4651, "step": 2285 }, { "epoch": 0.6417742841100506, "grad_norm": 0.6970399022102356, "learning_rate": 9.610045433169994e-06, "loss": 0.4458, "step": 2286 }, { "epoch": 0.6420550252667041, "grad_norm": 0.7596144676208496, "learning_rate": 9.609412804500642e-06, "loss": 0.4504, "step": 2287 }, { "epoch": 0.6423357664233577, "grad_norm": 0.8646596074104309, "learning_rate": 9.608779683943417e-06, "loss": 0.4748, "step": 2288 }, { "epoch": 0.6426165075800112, "grad_norm": 0.7043774127960205, "learning_rate": 9.608146071565888e-06, "loss": 0.4651, "step": 2289 }, { "epoch": 0.6428972487366648, "grad_norm": 0.6364893913269043, "learning_rate": 9.607511967435663e-06, "loss": 0.4205, "step": 2290 }, { "epoch": 0.6431779898933183, "grad_norm": 0.7925765514373779, "learning_rate": 9.606877371620413e-06, "loss": 0.4129, "step": 2291 }, { "epoch": 0.643458731049972, "grad_norm": 0.7484250068664551, "learning_rate": 9.606242284187861e-06, "loss": 0.4414, "step": 2292 }, { "epoch": 0.6437394722066255, "grad_norm": 0.7754836082458496, "learning_rate": 9.605606705205774e-06, "loss": 0.442, "step": 2293 }, { "epoch": 0.6440202133632791, "grad_norm": 0.6823911070823669, "learning_rate": 9.604970634741981e-06, "loss": 0.4544, "step": 2294 }, { "epoch": 0.6443009545199326, "grad_norm": 0.6883746981620789, "learning_rate": 9.604334072864358e-06, "loss": 0.427, "step": 2295 }, { "epoch": 0.6445816956765862, "grad_norm": 0.7669408917427063, "learning_rate": 9.603697019640837e-06, "loss": 0.4852, "step": 2296 }, { "epoch": 0.6448624368332397, "grad_norm": 0.7926437258720398, "learning_rate": 9.603059475139395e-06, "loss": 0.4602, "step": 2297 }, { "epoch": 0.6451431779898933, "grad_norm": 0.6788368225097656, "learning_rate": 9.602421439428073e-06, "loss": 0.4353, "step": 2298 }, { "epoch": 0.6454239191465468, "grad_norm": 0.791560709476471, "learning_rate": 9.601782912574955e-06, "loss": 0.5169, "step": 2299 }, { "epoch": 0.6457046603032004, "grad_norm": 0.7834374904632568, "learning_rate": 9.601143894648182e-06, "loss": 0.4565, "step": 2300 }, { "epoch": 0.6459854014598541, "grad_norm": 0.8093973994255066, "learning_rate": 9.600504385715943e-06, "loss": 0.4327, "step": 2301 }, { "epoch": 0.6462661426165076, "grad_norm": 0.7137249112129211, "learning_rate": 9.599864385846487e-06, "loss": 0.4228, "step": 2302 }, { "epoch": 0.6465468837731612, "grad_norm": 0.7782304286956787, "learning_rate": 9.599223895108107e-06, "loss": 0.4197, "step": 2303 }, { "epoch": 0.6468276249298147, "grad_norm": 0.8250675797462463, "learning_rate": 9.598582913569153e-06, "loss": 0.5315, "step": 2304 }, { "epoch": 0.6471083660864683, "grad_norm": 0.8861746788024902, "learning_rate": 9.597941441298028e-06, "loss": 0.5203, "step": 2305 }, { "epoch": 0.6473891072431218, "grad_norm": 0.7449389100074768, "learning_rate": 9.597299478363186e-06, "loss": 0.4792, "step": 2306 }, { "epoch": 0.6476698483997754, "grad_norm": 0.6926552057266235, "learning_rate": 9.596657024833132e-06, "loss": 0.4126, "step": 2307 }, { "epoch": 0.6479505895564289, "grad_norm": 0.8158414959907532, "learning_rate": 9.596014080776424e-06, "loss": 0.4573, "step": 2308 }, { "epoch": 0.6482313307130826, "grad_norm": 0.7573313117027283, "learning_rate": 9.595370646261674e-06, "loss": 0.4671, "step": 2309 }, { "epoch": 0.6485120718697361, "grad_norm": 0.7863513827323914, "learning_rate": 9.594726721357545e-06, "loss": 0.4695, "step": 2310 }, { "epoch": 0.6487928130263897, "grad_norm": 0.8010240793228149, "learning_rate": 9.594082306132755e-06, "loss": 0.5111, "step": 2311 }, { "epoch": 0.6490735541830432, "grad_norm": 0.8068008422851562, "learning_rate": 9.593437400656069e-06, "loss": 0.4253, "step": 2312 }, { "epoch": 0.6493542953396968, "grad_norm": 0.7970138788223267, "learning_rate": 9.592792004996307e-06, "loss": 0.4752, "step": 2313 }, { "epoch": 0.6496350364963503, "grad_norm": 0.7540660500526428, "learning_rate": 9.592146119222345e-06, "loss": 0.4656, "step": 2314 }, { "epoch": 0.6499157776530039, "grad_norm": 0.7930955290794373, "learning_rate": 9.591499743403105e-06, "loss": 0.4496, "step": 2315 }, { "epoch": 0.6501965188096575, "grad_norm": 0.9169591069221497, "learning_rate": 9.590852877607566e-06, "loss": 0.4957, "step": 2316 }, { "epoch": 0.6504772599663111, "grad_norm": 0.815403938293457, "learning_rate": 9.590205521904753e-06, "loss": 0.4478, "step": 2317 }, { "epoch": 0.6507580011229647, "grad_norm": 0.7266177535057068, "learning_rate": 9.589557676363755e-06, "loss": 0.4439, "step": 2318 }, { "epoch": 0.6510387422796182, "grad_norm": 0.7989054918289185, "learning_rate": 9.588909341053702e-06, "loss": 0.4838, "step": 2319 }, { "epoch": 0.6513194834362718, "grad_norm": 0.8077318072319031, "learning_rate": 9.58826051604378e-06, "loss": 0.4566, "step": 2320 }, { "epoch": 0.6516002245929253, "grad_norm": 0.7144681215286255, "learning_rate": 9.587611201403228e-06, "loss": 0.4249, "step": 2321 }, { "epoch": 0.6518809657495789, "grad_norm": 0.7660030126571655, "learning_rate": 9.586961397201338e-06, "loss": 0.4477, "step": 2322 }, { "epoch": 0.6521617069062324, "grad_norm": 0.7299714088439941, "learning_rate": 9.58631110350745e-06, "loss": 0.4455, "step": 2323 }, { "epoch": 0.652442448062886, "grad_norm": 0.8112562298774719, "learning_rate": 9.585660320390964e-06, "loss": 0.4506, "step": 2324 }, { "epoch": 0.6527231892195395, "grad_norm": 0.8056146502494812, "learning_rate": 9.585009047921323e-06, "loss": 0.425, "step": 2325 }, { "epoch": 0.6530039303761932, "grad_norm": 0.7698877453804016, "learning_rate": 9.58435728616803e-06, "loss": 0.4735, "step": 2326 }, { "epoch": 0.6532846715328468, "grad_norm": 0.6862674355506897, "learning_rate": 9.583705035200634e-06, "loss": 0.4949, "step": 2327 }, { "epoch": 0.6535654126895003, "grad_norm": 0.84004145860672, "learning_rate": 9.583052295088742e-06, "loss": 0.5078, "step": 2328 }, { "epoch": 0.6538461538461539, "grad_norm": 0.7269312739372253, "learning_rate": 9.582399065902008e-06, "loss": 0.4443, "step": 2329 }, { "epoch": 0.6541268950028074, "grad_norm": 0.8159030079841614, "learning_rate": 9.581745347710143e-06, "loss": 0.4615, "step": 2330 }, { "epoch": 0.654407636159461, "grad_norm": 0.7063059210777283, "learning_rate": 9.581091140582906e-06, "loss": 0.4521, "step": 2331 }, { "epoch": 0.6546883773161145, "grad_norm": 0.6708444952964783, "learning_rate": 9.58043644459011e-06, "loss": 0.4062, "step": 2332 }, { "epoch": 0.6549691184727681, "grad_norm": 0.7174159288406372, "learning_rate": 9.579781259801623e-06, "loss": 0.4654, "step": 2333 }, { "epoch": 0.6552498596294217, "grad_norm": 0.6760228276252747, "learning_rate": 9.579125586287357e-06, "loss": 0.4308, "step": 2334 }, { "epoch": 0.6555306007860753, "grad_norm": 0.8395696878433228, "learning_rate": 9.578469424117284e-06, "loss": 0.4963, "step": 2335 }, { "epoch": 0.6558113419427288, "grad_norm": 0.6900395750999451, "learning_rate": 9.577812773361428e-06, "loss": 0.4627, "step": 2336 }, { "epoch": 0.6560920830993824, "grad_norm": 0.6162226796150208, "learning_rate": 9.57715563408986e-06, "loss": 0.4273, "step": 2337 }, { "epoch": 0.6563728242560359, "grad_norm": 0.6915835738182068, "learning_rate": 9.576498006372705e-06, "loss": 0.4195, "step": 2338 }, { "epoch": 0.6566535654126895, "grad_norm": 0.7447922825813293, "learning_rate": 9.575839890280141e-06, "loss": 0.4395, "step": 2339 }, { "epoch": 0.656934306569343, "grad_norm": 0.6708216071128845, "learning_rate": 9.5751812858824e-06, "loss": 0.4265, "step": 2340 }, { "epoch": 0.6572150477259966, "grad_norm": 0.7260141968727112, "learning_rate": 9.574522193249764e-06, "loss": 0.4462, "step": 2341 }, { "epoch": 0.6574957888826501, "grad_norm": 0.8416393995285034, "learning_rate": 9.573862612452567e-06, "loss": 0.436, "step": 2342 }, { "epoch": 0.6577765300393038, "grad_norm": 0.8120285272598267, "learning_rate": 9.573202543561195e-06, "loss": 0.4726, "step": 2343 }, { "epoch": 0.6580572711959574, "grad_norm": 0.7599145770072937, "learning_rate": 9.572541986646087e-06, "loss": 0.4428, "step": 2344 }, { "epoch": 0.6583380123526109, "grad_norm": 0.7690832018852234, "learning_rate": 9.571880941777732e-06, "loss": 0.438, "step": 2345 }, { "epoch": 0.6586187535092645, "grad_norm": 0.8145992755889893, "learning_rate": 9.571219409026672e-06, "loss": 0.4551, "step": 2346 }, { "epoch": 0.658899494665918, "grad_norm": 0.7256839871406555, "learning_rate": 9.570557388463504e-06, "loss": 0.406, "step": 2347 }, { "epoch": 0.6591802358225716, "grad_norm": 0.7386422157287598, "learning_rate": 9.569894880158876e-06, "loss": 0.4488, "step": 2348 }, { "epoch": 0.6594609769792251, "grad_norm": 0.8676342964172363, "learning_rate": 9.569231884183483e-06, "loss": 0.5044, "step": 2349 }, { "epoch": 0.6597417181358787, "grad_norm": 0.7732373476028442, "learning_rate": 9.568568400608079e-06, "loss": 0.4671, "step": 2350 }, { "epoch": 0.6600224592925323, "grad_norm": 0.8180080652236938, "learning_rate": 9.567904429503463e-06, "loss": 0.4935, "step": 2351 }, { "epoch": 0.6603032004491859, "grad_norm": 0.7789162397384644, "learning_rate": 9.567239970940492e-06, "loss": 0.4285, "step": 2352 }, { "epoch": 0.6605839416058394, "grad_norm": 0.7517969608306885, "learning_rate": 9.566575024990075e-06, "loss": 0.4401, "step": 2353 }, { "epoch": 0.660864682762493, "grad_norm": 0.8047889471054077, "learning_rate": 9.565909591723169e-06, "loss": 0.4281, "step": 2354 }, { "epoch": 0.6611454239191465, "grad_norm": 0.6843998432159424, "learning_rate": 9.565243671210783e-06, "loss": 0.4996, "step": 2355 }, { "epoch": 0.6614261650758001, "grad_norm": 0.7578331232070923, "learning_rate": 9.564577263523985e-06, "loss": 0.4404, "step": 2356 }, { "epoch": 0.6617069062324537, "grad_norm": 0.7489643096923828, "learning_rate": 9.563910368733883e-06, "loss": 0.4583, "step": 2357 }, { "epoch": 0.6619876473891072, "grad_norm": 0.8898212313652039, "learning_rate": 9.56324298691165e-06, "loss": 0.4822, "step": 2358 }, { "epoch": 0.6622683885457608, "grad_norm": 0.7135800123214722, "learning_rate": 9.562575118128501e-06, "loss": 0.4378, "step": 2359 }, { "epoch": 0.6625491297024144, "grad_norm": 0.7930676937103271, "learning_rate": 9.561906762455708e-06, "loss": 0.4626, "step": 2360 }, { "epoch": 0.662829870859068, "grad_norm": 0.6866939663887024, "learning_rate": 9.561237919964595e-06, "loss": 0.3879, "step": 2361 }, { "epoch": 0.6631106120157215, "grad_norm": 0.8476772904396057, "learning_rate": 9.560568590726536e-06, "loss": 0.4268, "step": 2362 }, { "epoch": 0.6633913531723751, "grad_norm": 0.7971569895744324, "learning_rate": 9.559898774812957e-06, "loss": 0.4879, "step": 2363 }, { "epoch": 0.6636720943290286, "grad_norm": 0.8160806894302368, "learning_rate": 9.559228472295336e-06, "loss": 0.5194, "step": 2364 }, { "epoch": 0.6639528354856822, "grad_norm": 0.7013716697692871, "learning_rate": 9.558557683245204e-06, "loss": 0.4323, "step": 2365 }, { "epoch": 0.6642335766423357, "grad_norm": 0.7933659553527832, "learning_rate": 9.557886407734145e-06, "loss": 0.4903, "step": 2366 }, { "epoch": 0.6645143177989893, "grad_norm": 0.7558932900428772, "learning_rate": 9.557214645833792e-06, "loss": 0.4799, "step": 2367 }, { "epoch": 0.664795058955643, "grad_norm": 0.743954062461853, "learning_rate": 9.556542397615831e-06, "loss": 0.4406, "step": 2368 }, { "epoch": 0.6650758001122965, "grad_norm": 0.7228794097900391, "learning_rate": 9.555869663152003e-06, "loss": 0.4065, "step": 2369 }, { "epoch": 0.66535654126895, "grad_norm": 0.7404186129570007, "learning_rate": 9.555196442514091e-06, "loss": 0.4371, "step": 2370 }, { "epoch": 0.6656372824256036, "grad_norm": 0.7843636274337769, "learning_rate": 9.554522735773946e-06, "loss": 0.4449, "step": 2371 }, { "epoch": 0.6659180235822572, "grad_norm": 0.7291850447654724, "learning_rate": 9.553848543003454e-06, "loss": 0.4812, "step": 2372 }, { "epoch": 0.6661987647389107, "grad_norm": 0.7260723114013672, "learning_rate": 9.553173864274567e-06, "loss": 0.5139, "step": 2373 }, { "epoch": 0.6664795058955643, "grad_norm": 0.7064835429191589, "learning_rate": 9.552498699659279e-06, "loss": 0.468, "step": 2374 }, { "epoch": 0.6667602470522178, "grad_norm": 0.7111903429031372, "learning_rate": 9.551823049229638e-06, "loss": 0.475, "step": 2375 }, { "epoch": 0.6670409882088714, "grad_norm": 0.8541937470436096, "learning_rate": 9.551146913057747e-06, "loss": 0.4862, "step": 2376 }, { "epoch": 0.667321729365525, "grad_norm": 0.8285909295082092, "learning_rate": 9.55047029121576e-06, "loss": 0.4276, "step": 2377 }, { "epoch": 0.6676024705221786, "grad_norm": 0.6595309972763062, "learning_rate": 9.549793183775882e-06, "loss": 0.4413, "step": 2378 }, { "epoch": 0.6678832116788321, "grad_norm": 0.7900859713554382, "learning_rate": 9.549115590810369e-06, "loss": 0.4404, "step": 2379 }, { "epoch": 0.6681639528354857, "grad_norm": 0.8686185479164124, "learning_rate": 9.548437512391527e-06, "loss": 0.4782, "step": 2380 }, { "epoch": 0.6684446939921392, "grad_norm": 0.8771662712097168, "learning_rate": 9.54775894859172e-06, "loss": 0.4371, "step": 2381 }, { "epoch": 0.6687254351487928, "grad_norm": 0.7942057251930237, "learning_rate": 9.547079899483358e-06, "loss": 0.464, "step": 2382 }, { "epoch": 0.6690061763054463, "grad_norm": 0.7294292449951172, "learning_rate": 9.546400365138906e-06, "loss": 0.44, "step": 2383 }, { "epoch": 0.6692869174620999, "grad_norm": 0.7692420482635498, "learning_rate": 9.54572034563088e-06, "loss": 0.4585, "step": 2384 }, { "epoch": 0.6695676586187536, "grad_norm": 0.7228407859802246, "learning_rate": 9.545039841031845e-06, "loss": 0.4761, "step": 2385 }, { "epoch": 0.6698483997754071, "grad_norm": 0.7915753126144409, "learning_rate": 9.544358851414423e-06, "loss": 0.3975, "step": 2386 }, { "epoch": 0.6701291409320607, "grad_norm": 0.81806480884552, "learning_rate": 9.543677376851284e-06, "loss": 0.4716, "step": 2387 }, { "epoch": 0.6704098820887142, "grad_norm": 0.702312707901001, "learning_rate": 9.542995417415151e-06, "loss": 0.4594, "step": 2388 }, { "epoch": 0.6706906232453678, "grad_norm": 0.7722580432891846, "learning_rate": 9.542312973178797e-06, "loss": 0.4851, "step": 2389 }, { "epoch": 0.6709713644020213, "grad_norm": 0.7912092208862305, "learning_rate": 9.54163004421505e-06, "loss": 0.4423, "step": 2390 }, { "epoch": 0.6712521055586749, "grad_norm": 0.690192461013794, "learning_rate": 9.540946630596786e-06, "loss": 0.4178, "step": 2391 }, { "epoch": 0.6715328467153284, "grad_norm": 0.7059841752052307, "learning_rate": 9.540262732396936e-06, "loss": 0.4542, "step": 2392 }, { "epoch": 0.6718135878719821, "grad_norm": 0.6695817112922668, "learning_rate": 9.539578349688483e-06, "loss": 0.4384, "step": 2393 }, { "epoch": 0.6720943290286356, "grad_norm": 0.8164772987365723, "learning_rate": 9.538893482544457e-06, "loss": 0.4891, "step": 2394 }, { "epoch": 0.6723750701852892, "grad_norm": 0.8997693657875061, "learning_rate": 9.538208131037945e-06, "loss": 0.4789, "step": 2395 }, { "epoch": 0.6726558113419427, "grad_norm": 0.7119197249412537, "learning_rate": 9.53752229524208e-06, "loss": 0.429, "step": 2396 }, { "epoch": 0.6729365524985963, "grad_norm": 0.7313930988311768, "learning_rate": 9.536835975230055e-06, "loss": 0.468, "step": 2397 }, { "epoch": 0.6732172936552498, "grad_norm": 0.8206318616867065, "learning_rate": 9.536149171075106e-06, "loss": 0.462, "step": 2398 }, { "epoch": 0.6734980348119034, "grad_norm": 0.671920120716095, "learning_rate": 9.535461882850527e-06, "loss": 0.4192, "step": 2399 }, { "epoch": 0.673778775968557, "grad_norm": 0.6229581832885742, "learning_rate": 9.534774110629661e-06, "loss": 0.4273, "step": 2400 }, { "epoch": 0.6740595171252105, "grad_norm": 0.7329521179199219, "learning_rate": 9.5340858544859e-06, "loss": 0.4996, "step": 2401 }, { "epoch": 0.6743402582818642, "grad_norm": 0.7659728527069092, "learning_rate": 9.533397114492692e-06, "loss": 0.4524, "step": 2402 }, { "epoch": 0.6746209994385177, "grad_norm": 0.7722841501235962, "learning_rate": 9.532707890723537e-06, "loss": 0.4457, "step": 2403 }, { "epoch": 0.6749017405951713, "grad_norm": 0.791482150554657, "learning_rate": 9.532018183251984e-06, "loss": 0.4838, "step": 2404 }, { "epoch": 0.6751824817518248, "grad_norm": 0.8250871300697327, "learning_rate": 9.53132799215163e-06, "loss": 0.4715, "step": 2405 }, { "epoch": 0.6754632229084784, "grad_norm": 0.9840292930603027, "learning_rate": 9.530637317496132e-06, "loss": 0.5335, "step": 2406 }, { "epoch": 0.6757439640651319, "grad_norm": 0.7690650820732117, "learning_rate": 9.529946159359194e-06, "loss": 0.4639, "step": 2407 }, { "epoch": 0.6760247052217855, "grad_norm": 0.8268058896064758, "learning_rate": 9.529254517814573e-06, "loss": 0.4876, "step": 2408 }, { "epoch": 0.676305446378439, "grad_norm": 0.7992638349533081, "learning_rate": 9.528562392936074e-06, "loss": 0.4807, "step": 2409 }, { "epoch": 0.6765861875350927, "grad_norm": 0.9305452108383179, "learning_rate": 9.527869784797558e-06, "loss": 0.4933, "step": 2410 }, { "epoch": 0.6768669286917463, "grad_norm": 0.8309610486030579, "learning_rate": 9.527176693472935e-06, "loss": 0.4476, "step": 2411 }, { "epoch": 0.6771476698483998, "grad_norm": 0.7115912437438965, "learning_rate": 9.526483119036169e-06, "loss": 0.4714, "step": 2412 }, { "epoch": 0.6774284110050534, "grad_norm": 0.7805620431900024, "learning_rate": 9.525789061561273e-06, "loss": 0.4416, "step": 2413 }, { "epoch": 0.6777091521617069, "grad_norm": 0.7787017226219177, "learning_rate": 9.525094521122311e-06, "loss": 0.4346, "step": 2414 }, { "epoch": 0.6779898933183605, "grad_norm": 0.7078654170036316, "learning_rate": 9.524399497793401e-06, "loss": 0.4637, "step": 2415 }, { "epoch": 0.678270634475014, "grad_norm": 0.7167945504188538, "learning_rate": 9.523703991648713e-06, "loss": 0.4513, "step": 2416 }, { "epoch": 0.6785513756316676, "grad_norm": 0.7387191653251648, "learning_rate": 9.523008002762468e-06, "loss": 0.4733, "step": 2417 }, { "epoch": 0.6788321167883211, "grad_norm": 0.7310633659362793, "learning_rate": 9.522311531208932e-06, "loss": 0.495, "step": 2418 }, { "epoch": 0.6791128579449748, "grad_norm": 0.6858799457550049, "learning_rate": 9.521614577062434e-06, "loss": 0.4783, "step": 2419 }, { "epoch": 0.6793935991016283, "grad_norm": 0.6893883347511292, "learning_rate": 9.520917140397346e-06, "loss": 0.4738, "step": 2420 }, { "epoch": 0.6796743402582819, "grad_norm": 0.770002007484436, "learning_rate": 9.520219221288095e-06, "loss": 0.4545, "step": 2421 }, { "epoch": 0.6799550814149354, "grad_norm": 0.7189306616783142, "learning_rate": 9.519520819809158e-06, "loss": 0.4528, "step": 2422 }, { "epoch": 0.680235822571589, "grad_norm": 0.9201745390892029, "learning_rate": 9.518821936035063e-06, "loss": 0.5134, "step": 2423 }, { "epoch": 0.6805165637282425, "grad_norm": 0.739543616771698, "learning_rate": 9.518122570040393e-06, "loss": 0.4746, "step": 2424 }, { "epoch": 0.6807973048848961, "grad_norm": 0.8252633810043335, "learning_rate": 9.517422721899779e-06, "loss": 0.4553, "step": 2425 }, { "epoch": 0.6810780460415496, "grad_norm": 0.8291049003601074, "learning_rate": 9.516722391687903e-06, "loss": 0.4629, "step": 2426 }, { "epoch": 0.6813587871982033, "grad_norm": 0.7182469964027405, "learning_rate": 9.5160215794795e-06, "loss": 0.4784, "step": 2427 }, { "epoch": 0.6816395283548569, "grad_norm": 0.6084442138671875, "learning_rate": 9.515320285349359e-06, "loss": 0.4337, "step": 2428 }, { "epoch": 0.6819202695115104, "grad_norm": 0.8051382303237915, "learning_rate": 9.514618509372315e-06, "loss": 0.4674, "step": 2429 }, { "epoch": 0.682201010668164, "grad_norm": 0.9197446703910828, "learning_rate": 9.513916251623259e-06, "loss": 0.5006, "step": 2430 }, { "epoch": 0.6824817518248175, "grad_norm": 0.809824526309967, "learning_rate": 9.513213512177131e-06, "loss": 0.4441, "step": 2431 }, { "epoch": 0.6827624929814711, "grad_norm": 0.7757403254508972, "learning_rate": 9.512510291108924e-06, "loss": 0.4671, "step": 2432 }, { "epoch": 0.6830432341381246, "grad_norm": 0.9004009962081909, "learning_rate": 9.511806588493678e-06, "loss": 0.4431, "step": 2433 }, { "epoch": 0.6833239752947782, "grad_norm": 0.8466460108757019, "learning_rate": 9.51110240440649e-06, "loss": 0.4636, "step": 2434 }, { "epoch": 0.6836047164514317, "grad_norm": 0.825817883014679, "learning_rate": 9.510397738922508e-06, "loss": 0.4396, "step": 2435 }, { "epoch": 0.6838854576080854, "grad_norm": 0.6038423180580139, "learning_rate": 9.509692592116926e-06, "loss": 0.4766, "step": 2436 }, { "epoch": 0.6841661987647389, "grad_norm": 0.6919887661933899, "learning_rate": 9.508986964064994e-06, "loss": 0.4299, "step": 2437 }, { "epoch": 0.6844469399213925, "grad_norm": 0.8460870385169983, "learning_rate": 9.508280854842014e-06, "loss": 0.4623, "step": 2438 }, { "epoch": 0.684727681078046, "grad_norm": 0.9004149436950684, "learning_rate": 9.507574264523337e-06, "loss": 0.4913, "step": 2439 }, { "epoch": 0.6850084222346996, "grad_norm": 0.6217430233955383, "learning_rate": 9.506867193184363e-06, "loss": 0.4478, "step": 2440 }, { "epoch": 0.6852891633913532, "grad_norm": 0.8997867703437805, "learning_rate": 9.50615964090055e-06, "loss": 0.4657, "step": 2441 }, { "epoch": 0.6855699045480067, "grad_norm": 0.9446470737457275, "learning_rate": 9.505451607747402e-06, "loss": 0.4673, "step": 2442 }, { "epoch": 0.6858506457046603, "grad_norm": 0.6886206269264221, "learning_rate": 9.504743093800474e-06, "loss": 0.4502, "step": 2443 }, { "epoch": 0.6861313868613139, "grad_norm": 0.764959990978241, "learning_rate": 9.50403409913538e-06, "loss": 0.4802, "step": 2444 }, { "epoch": 0.6864121280179675, "grad_norm": 0.8891680240631104, "learning_rate": 9.503324623827773e-06, "loss": 0.5012, "step": 2445 }, { "epoch": 0.686692869174621, "grad_norm": 0.930833101272583, "learning_rate": 9.502614667953366e-06, "loss": 0.4822, "step": 2446 }, { "epoch": 0.6869736103312746, "grad_norm": 0.7132047414779663, "learning_rate": 9.501904231587924e-06, "loss": 0.4728, "step": 2447 }, { "epoch": 0.6872543514879281, "grad_norm": 0.9185259342193604, "learning_rate": 9.501193314807256e-06, "loss": 0.4525, "step": 2448 }, { "epoch": 0.6875350926445817, "grad_norm": 0.7687292695045471, "learning_rate": 9.50048191768723e-06, "loss": 0.4711, "step": 2449 }, { "epoch": 0.6878158338012352, "grad_norm": 0.6602697372436523, "learning_rate": 9.499770040303759e-06, "loss": 0.4476, "step": 2450 }, { "epoch": 0.6880965749578888, "grad_norm": 0.7810957431793213, "learning_rate": 9.499057682732812e-06, "loss": 0.4447, "step": 2451 }, { "epoch": 0.6883773161145423, "grad_norm": 0.7534429430961609, "learning_rate": 9.498344845050406e-06, "loss": 0.479, "step": 2452 }, { "epoch": 0.688658057271196, "grad_norm": 0.6799314618110657, "learning_rate": 9.497631527332613e-06, "loss": 0.4915, "step": 2453 }, { "epoch": 0.6889387984278496, "grad_norm": 0.7317137122154236, "learning_rate": 9.496917729655552e-06, "loss": 0.4033, "step": 2454 }, { "epoch": 0.6892195395845031, "grad_norm": 0.7076913118362427, "learning_rate": 9.496203452095395e-06, "loss": 0.4123, "step": 2455 }, { "epoch": 0.6895002807411567, "grad_norm": 0.7963237166404724, "learning_rate": 9.495488694728366e-06, "loss": 0.4317, "step": 2456 }, { "epoch": 0.6897810218978102, "grad_norm": 0.7819330096244812, "learning_rate": 9.494773457630738e-06, "loss": 0.4786, "step": 2457 }, { "epoch": 0.6900617630544638, "grad_norm": 0.9064457416534424, "learning_rate": 9.494057740878838e-06, "loss": 0.471, "step": 2458 }, { "epoch": 0.6903425042111173, "grad_norm": 0.7059385180473328, "learning_rate": 9.493341544549044e-06, "loss": 0.5004, "step": 2459 }, { "epoch": 0.6906232453677709, "grad_norm": 0.7412230968475342, "learning_rate": 9.492624868717782e-06, "loss": 0.4656, "step": 2460 }, { "epoch": 0.6909039865244245, "grad_norm": 0.8034632205963135, "learning_rate": 9.49190771346153e-06, "loss": 0.4688, "step": 2461 }, { "epoch": 0.6911847276810781, "grad_norm": 0.8217840790748596, "learning_rate": 9.491190078856822e-06, "loss": 0.4753, "step": 2462 }, { "epoch": 0.6914654688377316, "grad_norm": 0.7424009442329407, "learning_rate": 9.490471964980236e-06, "loss": 0.4754, "step": 2463 }, { "epoch": 0.6917462099943852, "grad_norm": 0.7725020051002502, "learning_rate": 9.48975337190841e-06, "loss": 0.452, "step": 2464 }, { "epoch": 0.6920269511510387, "grad_norm": 0.8121838569641113, "learning_rate": 9.48903429971802e-06, "loss": 0.5129, "step": 2465 }, { "epoch": 0.6923076923076923, "grad_norm": 0.8661502003669739, "learning_rate": 9.488314748485807e-06, "loss": 0.4799, "step": 2466 }, { "epoch": 0.6925884334643458, "grad_norm": 1.0192327499389648, "learning_rate": 9.487594718288555e-06, "loss": 0.4925, "step": 2467 }, { "epoch": 0.6928691746209994, "grad_norm": 0.6838007569313049, "learning_rate": 9.4868742092031e-06, "loss": 0.4564, "step": 2468 }, { "epoch": 0.6931499157776531, "grad_norm": 0.9444794058799744, "learning_rate": 9.486153221306333e-06, "loss": 0.4609, "step": 2469 }, { "epoch": 0.6934306569343066, "grad_norm": 0.7631643414497375, "learning_rate": 9.485431754675192e-06, "loss": 0.4527, "step": 2470 }, { "epoch": 0.6937113980909602, "grad_norm": 0.8071423172950745, "learning_rate": 9.484709809386667e-06, "loss": 0.4956, "step": 2471 }, { "epoch": 0.6939921392476137, "grad_norm": 0.7490708827972412, "learning_rate": 9.483987385517798e-06, "loss": 0.4774, "step": 2472 }, { "epoch": 0.6942728804042673, "grad_norm": 0.7812394499778748, "learning_rate": 9.48326448314568e-06, "loss": 0.4388, "step": 2473 }, { "epoch": 0.6945536215609208, "grad_norm": 0.6985755562782288, "learning_rate": 9.482541102347455e-06, "loss": 0.4439, "step": 2474 }, { "epoch": 0.6948343627175744, "grad_norm": 0.6900184750556946, "learning_rate": 9.481817243200321e-06, "loss": 0.4162, "step": 2475 }, { "epoch": 0.6951151038742279, "grad_norm": 0.6658081412315369, "learning_rate": 9.481092905781522e-06, "loss": 0.4384, "step": 2476 }, { "epoch": 0.6953958450308815, "grad_norm": 0.6750561594963074, "learning_rate": 9.48036809016835e-06, "loss": 0.4016, "step": 2477 }, { "epoch": 0.6956765861875351, "grad_norm": 0.7680978178977966, "learning_rate": 9.47964279643816e-06, "loss": 0.4465, "step": 2478 }, { "epoch": 0.6959573273441887, "grad_norm": 0.7434172630310059, "learning_rate": 9.478917024668349e-06, "loss": 0.4862, "step": 2479 }, { "epoch": 0.6962380685008422, "grad_norm": 0.7565758228302002, "learning_rate": 9.478190774936362e-06, "loss": 0.4587, "step": 2480 }, { "epoch": 0.6965188096574958, "grad_norm": 0.6876004338264465, "learning_rate": 9.477464047319706e-06, "loss": 0.4514, "step": 2481 }, { "epoch": 0.6967995508141493, "grad_norm": 0.7988496422767639, "learning_rate": 9.47673684189593e-06, "loss": 0.498, "step": 2482 }, { "epoch": 0.6970802919708029, "grad_norm": 0.6999518871307373, "learning_rate": 9.476009158742638e-06, "loss": 0.4252, "step": 2483 }, { "epoch": 0.6973610331274565, "grad_norm": 0.6802830696105957, "learning_rate": 9.475280997937482e-06, "loss": 0.4479, "step": 2484 }, { "epoch": 0.69764177428411, "grad_norm": 0.7858578562736511, "learning_rate": 9.474552359558167e-06, "loss": 0.4592, "step": 2485 }, { "epoch": 0.6979225154407637, "grad_norm": 0.7689603567123413, "learning_rate": 9.473823243682451e-06, "loss": 0.4533, "step": 2486 }, { "epoch": 0.6982032565974172, "grad_norm": 0.7005722522735596, "learning_rate": 9.473093650388138e-06, "loss": 0.458, "step": 2487 }, { "epoch": 0.6984839977540708, "grad_norm": 0.7821038961410522, "learning_rate": 9.472363579753088e-06, "loss": 0.4634, "step": 2488 }, { "epoch": 0.6987647389107243, "grad_norm": 0.8071959018707275, "learning_rate": 9.471633031855208e-06, "loss": 0.5033, "step": 2489 }, { "epoch": 0.6990454800673779, "grad_norm": 0.7244101762771606, "learning_rate": 9.47090200677246e-06, "loss": 0.416, "step": 2490 }, { "epoch": 0.6993262212240314, "grad_norm": 0.7001928091049194, "learning_rate": 9.47017050458285e-06, "loss": 0.4284, "step": 2491 }, { "epoch": 0.699606962380685, "grad_norm": 0.6796762943267822, "learning_rate": 9.469438525364442e-06, "loss": 0.4183, "step": 2492 }, { "epoch": 0.6998877035373385, "grad_norm": 0.6669578552246094, "learning_rate": 9.46870606919535e-06, "loss": 0.4371, "step": 2493 }, { "epoch": 0.7001684446939921, "grad_norm": 0.7107380032539368, "learning_rate": 9.467973136153734e-06, "loss": 0.4376, "step": 2494 }, { "epoch": 0.7004491858506458, "grad_norm": 0.6801512241363525, "learning_rate": 9.467239726317811e-06, "loss": 0.4525, "step": 2495 }, { "epoch": 0.7007299270072993, "grad_norm": 0.7749803066253662, "learning_rate": 9.466505839765842e-06, "loss": 0.4208, "step": 2496 }, { "epoch": 0.7010106681639529, "grad_norm": 0.7285341620445251, "learning_rate": 9.465771476576146e-06, "loss": 0.4433, "step": 2497 }, { "epoch": 0.7012914093206064, "grad_norm": 0.8107213377952576, "learning_rate": 9.46503663682709e-06, "loss": 0.5056, "step": 2498 }, { "epoch": 0.70157215047726, "grad_norm": 0.7753007411956787, "learning_rate": 9.46430132059709e-06, "loss": 0.4399, "step": 2499 }, { "epoch": 0.7018528916339135, "grad_norm": 0.8164193630218506, "learning_rate": 9.463565527964612e-06, "loss": 0.4448, "step": 2500 }, { "epoch": 0.7021336327905671, "grad_norm": 0.6882041096687317, "learning_rate": 9.462829259008182e-06, "loss": 0.4339, "step": 2501 }, { "epoch": 0.7024143739472206, "grad_norm": 0.8022362589836121, "learning_rate": 9.462092513806364e-06, "loss": 0.4391, "step": 2502 }, { "epoch": 0.7026951151038743, "grad_norm": 0.6818189024925232, "learning_rate": 9.461355292437782e-06, "loss": 0.4583, "step": 2503 }, { "epoch": 0.7029758562605278, "grad_norm": 0.8273773193359375, "learning_rate": 9.460617594981104e-06, "loss": 0.5037, "step": 2504 }, { "epoch": 0.7032565974171814, "grad_norm": 0.7536837458610535, "learning_rate": 9.459879421515057e-06, "loss": 0.498, "step": 2505 }, { "epoch": 0.7035373385738349, "grad_norm": 0.6416440606117249, "learning_rate": 9.45914077211841e-06, "loss": 0.44, "step": 2506 }, { "epoch": 0.7038180797304885, "grad_norm": 0.7490890622138977, "learning_rate": 9.458401646869992e-06, "loss": 0.4497, "step": 2507 }, { "epoch": 0.704098820887142, "grad_norm": 0.9517717361450195, "learning_rate": 9.457662045848674e-06, "loss": 0.4104, "step": 2508 }, { "epoch": 0.7043795620437956, "grad_norm": 0.7885116338729858, "learning_rate": 9.456921969133385e-06, "loss": 0.4445, "step": 2509 }, { "epoch": 0.7046603032004491, "grad_norm": 0.8300957083702087, "learning_rate": 9.456181416803096e-06, "loss": 0.4213, "step": 2510 }, { "epoch": 0.7049410443571027, "grad_norm": 0.8503570556640625, "learning_rate": 9.45544038893684e-06, "loss": 0.4574, "step": 2511 }, { "epoch": 0.7052217855137564, "grad_norm": 0.6946449875831604, "learning_rate": 9.454698885613691e-06, "loss": 0.4484, "step": 2512 }, { "epoch": 0.7055025266704099, "grad_norm": 0.7207089066505432, "learning_rate": 9.453956906912779e-06, "loss": 0.4673, "step": 2513 }, { "epoch": 0.7057832678270635, "grad_norm": 0.8768999576568604, "learning_rate": 9.453214452913284e-06, "loss": 0.44, "step": 2514 }, { "epoch": 0.706064008983717, "grad_norm": 0.8195427656173706, "learning_rate": 9.452471523694434e-06, "loss": 0.4721, "step": 2515 }, { "epoch": 0.7063447501403706, "grad_norm": 0.7296256422996521, "learning_rate": 9.45172811933551e-06, "loss": 0.4288, "step": 2516 }, { "epoch": 0.7066254912970241, "grad_norm": 0.7585498094558716, "learning_rate": 9.450984239915848e-06, "loss": 0.4691, "step": 2517 }, { "epoch": 0.7069062324536777, "grad_norm": 0.7398808598518372, "learning_rate": 9.450239885514824e-06, "loss": 0.4337, "step": 2518 }, { "epoch": 0.7071869736103312, "grad_norm": 0.8726441264152527, "learning_rate": 9.449495056211874e-06, "loss": 0.466, "step": 2519 }, { "epoch": 0.7074677147669849, "grad_norm": 0.7603203654289246, "learning_rate": 9.448749752086482e-06, "loss": 0.4295, "step": 2520 }, { "epoch": 0.7077484559236384, "grad_norm": 0.8504489660263062, "learning_rate": 9.448003973218181e-06, "loss": 0.4655, "step": 2521 }, { "epoch": 0.708029197080292, "grad_norm": 0.9464203715324402, "learning_rate": 9.447257719686557e-06, "loss": 0.4404, "step": 2522 }, { "epoch": 0.7083099382369455, "grad_norm": 0.8075183033943176, "learning_rate": 9.446510991571243e-06, "loss": 0.4548, "step": 2523 }, { "epoch": 0.7085906793935991, "grad_norm": 0.7006256580352783, "learning_rate": 9.44576378895193e-06, "loss": 0.4606, "step": 2524 }, { "epoch": 0.7088714205502527, "grad_norm": 0.6948923468589783, "learning_rate": 9.445016111908349e-06, "loss": 0.4215, "step": 2525 }, { "epoch": 0.7091521617069062, "grad_norm": 0.8745613694190979, "learning_rate": 9.444267960520292e-06, "loss": 0.4809, "step": 2526 }, { "epoch": 0.7094329028635598, "grad_norm": 0.8244311809539795, "learning_rate": 9.443519334867595e-06, "loss": 0.5293, "step": 2527 }, { "epoch": 0.7097136440202133, "grad_norm": 0.750098705291748, "learning_rate": 9.442770235030145e-06, "loss": 0.4484, "step": 2528 }, { "epoch": 0.709994385176867, "grad_norm": 0.8122600317001343, "learning_rate": 9.442020661087885e-06, "loss": 0.4568, "step": 2529 }, { "epoch": 0.7102751263335205, "grad_norm": 0.7557501792907715, "learning_rate": 9.441270613120803e-06, "loss": 0.4357, "step": 2530 }, { "epoch": 0.7105558674901741, "grad_norm": 0.820770263671875, "learning_rate": 9.44052009120894e-06, "loss": 0.4499, "step": 2531 }, { "epoch": 0.7108366086468276, "grad_norm": 0.664503276348114, "learning_rate": 9.439769095432387e-06, "loss": 0.4077, "step": 2532 }, { "epoch": 0.7111173498034812, "grad_norm": 0.7776325345039368, "learning_rate": 9.439017625871285e-06, "loss": 0.4862, "step": 2533 }, { "epoch": 0.7113980909601347, "grad_norm": 0.7743126153945923, "learning_rate": 9.438265682605826e-06, "loss": 0.4426, "step": 2534 }, { "epoch": 0.7116788321167883, "grad_norm": 0.8332822918891907, "learning_rate": 9.437513265716253e-06, "loss": 0.4954, "step": 2535 }, { "epoch": 0.7119595732734418, "grad_norm": 0.7532265186309814, "learning_rate": 9.436760375282858e-06, "loss": 0.4424, "step": 2536 }, { "epoch": 0.7122403144300955, "grad_norm": 0.6855920553207397, "learning_rate": 9.436007011385988e-06, "loss": 0.4272, "step": 2537 }, { "epoch": 0.712521055586749, "grad_norm": 0.6882874965667725, "learning_rate": 9.435253174106036e-06, "loss": 0.4638, "step": 2538 }, { "epoch": 0.7128017967434026, "grad_norm": 0.8163425326347351, "learning_rate": 9.434498863523444e-06, "loss": 0.4793, "step": 2539 }, { "epoch": 0.7130825379000562, "grad_norm": 0.762488842010498, "learning_rate": 9.433744079718712e-06, "loss": 0.4574, "step": 2540 }, { "epoch": 0.7133632790567097, "grad_norm": 0.6940890550613403, "learning_rate": 9.432988822772382e-06, "loss": 0.4628, "step": 2541 }, { "epoch": 0.7136440202133633, "grad_norm": 0.6820698976516724, "learning_rate": 9.432233092765052e-06, "loss": 0.4578, "step": 2542 }, { "epoch": 0.7139247613700168, "grad_norm": 0.8692309856414795, "learning_rate": 9.43147688977737e-06, "loss": 0.5115, "step": 2543 }, { "epoch": 0.7142055025266704, "grad_norm": 0.654083251953125, "learning_rate": 9.43072021389003e-06, "loss": 0.4836, "step": 2544 }, { "epoch": 0.714486243683324, "grad_norm": 0.6994615793228149, "learning_rate": 9.429963065183781e-06, "loss": 0.4982, "step": 2545 }, { "epoch": 0.7147669848399776, "grad_norm": 0.8428835272789001, "learning_rate": 9.429205443739424e-06, "loss": 0.4899, "step": 2546 }, { "epoch": 0.7150477259966311, "grad_norm": 0.7471094727516174, "learning_rate": 9.428447349637804e-06, "loss": 0.4135, "step": 2547 }, { "epoch": 0.7153284671532847, "grad_norm": 0.7090533375740051, "learning_rate": 9.427688782959821e-06, "loss": 0.4321, "step": 2548 }, { "epoch": 0.7156092083099382, "grad_norm": 0.8219285011291504, "learning_rate": 9.426929743786426e-06, "loss": 0.4222, "step": 2549 }, { "epoch": 0.7158899494665918, "grad_norm": 0.7486048936843872, "learning_rate": 9.42617023219862e-06, "loss": 0.4577, "step": 2550 }, { "epoch": 0.7161706906232453, "grad_norm": 0.751075029373169, "learning_rate": 9.42541024827745e-06, "loss": 0.5137, "step": 2551 }, { "epoch": 0.7164514317798989, "grad_norm": 0.7988788485527039, "learning_rate": 9.424649792104016e-06, "loss": 0.4987, "step": 2552 }, { "epoch": 0.7167321729365524, "grad_norm": 0.6931146383285522, "learning_rate": 9.423888863759473e-06, "loss": 0.4494, "step": 2553 }, { "epoch": 0.7170129140932061, "grad_norm": 0.7630744576454163, "learning_rate": 9.42312746332502e-06, "loss": 0.4668, "step": 2554 }, { "epoch": 0.7172936552498597, "grad_norm": 0.928024411201477, "learning_rate": 9.42236559088191e-06, "loss": 0.4995, "step": 2555 }, { "epoch": 0.7175743964065132, "grad_norm": 0.7794921398162842, "learning_rate": 9.421603246511446e-06, "loss": 0.4671, "step": 2556 }, { "epoch": 0.7178551375631668, "grad_norm": 0.7536506056785583, "learning_rate": 9.42084043029498e-06, "loss": 0.4747, "step": 2557 }, { "epoch": 0.7181358787198203, "grad_norm": 0.7516429424285889, "learning_rate": 9.420077142313912e-06, "loss": 0.4614, "step": 2558 }, { "epoch": 0.7184166198764739, "grad_norm": 0.8014383912086487, "learning_rate": 9.419313382649699e-06, "loss": 0.4381, "step": 2559 }, { "epoch": 0.7186973610331274, "grad_norm": 0.8374093770980835, "learning_rate": 9.418549151383846e-06, "loss": 0.4249, "step": 2560 }, { "epoch": 0.718978102189781, "grad_norm": 0.8145158886909485, "learning_rate": 9.417784448597901e-06, "loss": 0.4368, "step": 2561 }, { "epoch": 0.7192588433464346, "grad_norm": 0.7724982500076294, "learning_rate": 9.417019274373476e-06, "loss": 0.4103, "step": 2562 }, { "epoch": 0.7195395845030882, "grad_norm": 0.7369022965431213, "learning_rate": 9.41625362879222e-06, "loss": 0.4599, "step": 2563 }, { "epoch": 0.7198203256597417, "grad_norm": 0.9183072447776794, "learning_rate": 9.415487511935838e-06, "loss": 0.416, "step": 2564 }, { "epoch": 0.7201010668163953, "grad_norm": 0.8330563902854919, "learning_rate": 9.41472092388609e-06, "loss": 0.4918, "step": 2565 }, { "epoch": 0.7203818079730488, "grad_norm": 0.7774697542190552, "learning_rate": 9.413953864724777e-06, "loss": 0.4765, "step": 2566 }, { "epoch": 0.7206625491297024, "grad_norm": 0.8402326703071594, "learning_rate": 9.413186334533755e-06, "loss": 0.446, "step": 2567 }, { "epoch": 0.720943290286356, "grad_norm": 0.8870805501937866, "learning_rate": 9.41241833339493e-06, "loss": 0.4717, "step": 2568 }, { "epoch": 0.7212240314430095, "grad_norm": 0.7984758615493774, "learning_rate": 9.411649861390261e-06, "loss": 0.4089, "step": 2569 }, { "epoch": 0.7215047725996631, "grad_norm": 0.8126974105834961, "learning_rate": 9.410880918601755e-06, "loss": 0.4458, "step": 2570 }, { "epoch": 0.7217855137563167, "grad_norm": 0.6914010643959045, "learning_rate": 9.410111505111466e-06, "loss": 0.4407, "step": 2571 }, { "epoch": 0.7220662549129703, "grad_norm": 0.7459649443626404, "learning_rate": 9.4093416210015e-06, "loss": 0.4435, "step": 2572 }, { "epoch": 0.7223469960696238, "grad_norm": 0.7722095847129822, "learning_rate": 9.408571266354017e-06, "loss": 0.4375, "step": 2573 }, { "epoch": 0.7226277372262774, "grad_norm": 0.8616966009140015, "learning_rate": 9.407800441251224e-06, "loss": 0.4755, "step": 2574 }, { "epoch": 0.7229084783829309, "grad_norm": 0.7333300709724426, "learning_rate": 9.407029145775377e-06, "loss": 0.4368, "step": 2575 }, { "epoch": 0.7231892195395845, "grad_norm": 0.805834174156189, "learning_rate": 9.406257380008788e-06, "loss": 0.4841, "step": 2576 }, { "epoch": 0.723469960696238, "grad_norm": 0.8962896466255188, "learning_rate": 9.40548514403381e-06, "loss": 0.4578, "step": 2577 }, { "epoch": 0.7237507018528916, "grad_norm": 0.8885279893875122, "learning_rate": 9.404712437932852e-06, "loss": 0.476, "step": 2578 }, { "epoch": 0.7240314430095453, "grad_norm": 0.8999950289726257, "learning_rate": 9.403939261788375e-06, "loss": 0.4839, "step": 2579 }, { "epoch": 0.7243121841661988, "grad_norm": 0.7031996250152588, "learning_rate": 9.403165615682888e-06, "loss": 0.4066, "step": 2580 }, { "epoch": 0.7245929253228524, "grad_norm": 0.8968575596809387, "learning_rate": 9.402391499698946e-06, "loss": 0.4359, "step": 2581 }, { "epoch": 0.7248736664795059, "grad_norm": 0.8842137455940247, "learning_rate": 9.40161691391916e-06, "loss": 0.5041, "step": 2582 }, { "epoch": 0.7251544076361595, "grad_norm": 0.7458582520484924, "learning_rate": 9.400841858426188e-06, "loss": 0.3874, "step": 2583 }, { "epoch": 0.725435148792813, "grad_norm": 0.912441611289978, "learning_rate": 9.400066333302742e-06, "loss": 0.5015, "step": 2584 }, { "epoch": 0.7257158899494666, "grad_norm": 0.8796662092208862, "learning_rate": 9.399290338631578e-06, "loss": 0.4423, "step": 2585 }, { "epoch": 0.7259966311061201, "grad_norm": 0.8011311292648315, "learning_rate": 9.398513874495506e-06, "loss": 0.4452, "step": 2586 }, { "epoch": 0.7262773722627737, "grad_norm": 0.8862401843070984, "learning_rate": 9.397736940977387e-06, "loss": 0.4832, "step": 2587 }, { "epoch": 0.7265581134194273, "grad_norm": 0.7400742173194885, "learning_rate": 9.39695953816013e-06, "loss": 0.4102, "step": 2588 }, { "epoch": 0.7268388545760809, "grad_norm": 0.6960920691490173, "learning_rate": 9.39618166612669e-06, "loss": 0.4858, "step": 2589 }, { "epoch": 0.7271195957327344, "grad_norm": 0.7522947788238525, "learning_rate": 9.395403324960084e-06, "loss": 0.4098, "step": 2590 }, { "epoch": 0.727400336889388, "grad_norm": 0.8645053505897522, "learning_rate": 9.394624514743368e-06, "loss": 0.459, "step": 2591 }, { "epoch": 0.7276810780460415, "grad_norm": 0.787739634513855, "learning_rate": 9.39384523555965e-06, "loss": 0.4518, "step": 2592 }, { "epoch": 0.7279618192026951, "grad_norm": 0.6793380379676819, "learning_rate": 9.393065487492095e-06, "loss": 0.4389, "step": 2593 }, { "epoch": 0.7282425603593486, "grad_norm": 0.9285565614700317, "learning_rate": 9.392285270623908e-06, "loss": 0.4026, "step": 2594 }, { "epoch": 0.7285233015160022, "grad_norm": 0.7372851371765137, "learning_rate": 9.391504585038353e-06, "loss": 0.4188, "step": 2595 }, { "epoch": 0.7288040426726559, "grad_norm": 0.763027012348175, "learning_rate": 9.390723430818735e-06, "loss": 0.4319, "step": 2596 }, { "epoch": 0.7290847838293094, "grad_norm": 0.7152758836746216, "learning_rate": 9.389941808048417e-06, "loss": 0.4743, "step": 2597 }, { "epoch": 0.729365524985963, "grad_norm": 0.7588361501693726, "learning_rate": 9.38915971681081e-06, "loss": 0.4491, "step": 2598 }, { "epoch": 0.7296462661426165, "grad_norm": 0.7377399206161499, "learning_rate": 9.388377157189373e-06, "loss": 0.432, "step": 2599 }, { "epoch": 0.7299270072992701, "grad_norm": 0.7724609375, "learning_rate": 9.387594129267612e-06, "loss": 0.4394, "step": 2600 }, { "epoch": 0.7302077484559236, "grad_norm": 0.7818425297737122, "learning_rate": 9.386810633129093e-06, "loss": 0.4068, "step": 2601 }, { "epoch": 0.7304884896125772, "grad_norm": 0.814519464969635, "learning_rate": 9.386026668857421e-06, "loss": 0.4305, "step": 2602 }, { "epoch": 0.7307692307692307, "grad_norm": 0.7100637555122375, "learning_rate": 9.385242236536259e-06, "loss": 0.4704, "step": 2603 }, { "epoch": 0.7310499719258844, "grad_norm": 0.8754744529724121, "learning_rate": 9.384457336249316e-06, "loss": 0.4577, "step": 2604 }, { "epoch": 0.7313307130825379, "grad_norm": 0.6805042624473572, "learning_rate": 9.38367196808035e-06, "loss": 0.4422, "step": 2605 }, { "epoch": 0.7316114542391915, "grad_norm": 0.9314867258071899, "learning_rate": 9.382886132113172e-06, "loss": 0.4575, "step": 2606 }, { "epoch": 0.731892195395845, "grad_norm": 0.7781674265861511, "learning_rate": 9.382099828431643e-06, "loss": 0.4258, "step": 2607 }, { "epoch": 0.7321729365524986, "grad_norm": 0.7923882603645325, "learning_rate": 9.381313057119669e-06, "loss": 0.497, "step": 2608 }, { "epoch": 0.7324536777091522, "grad_norm": 0.7565465569496155, "learning_rate": 9.380525818261211e-06, "loss": 0.466, "step": 2609 }, { "epoch": 0.7327344188658057, "grad_norm": 0.7454366087913513, "learning_rate": 9.37973811194028e-06, "loss": 0.4508, "step": 2610 }, { "epoch": 0.7330151600224593, "grad_norm": 0.8961068987846375, "learning_rate": 9.378949938240932e-06, "loss": 0.4454, "step": 2611 }, { "epoch": 0.7332959011791128, "grad_norm": 0.7383410930633545, "learning_rate": 9.378161297247278e-06, "loss": 0.4532, "step": 2612 }, { "epoch": 0.7335766423357665, "grad_norm": 0.6697604060173035, "learning_rate": 9.377372189043477e-06, "loss": 0.4546, "step": 2613 }, { "epoch": 0.73385738349242, "grad_norm": 0.7382734417915344, "learning_rate": 9.376582613713736e-06, "loss": 0.4346, "step": 2614 }, { "epoch": 0.7341381246490736, "grad_norm": 0.7783324718475342, "learning_rate": 9.375792571342314e-06, "loss": 0.4422, "step": 2615 }, { "epoch": 0.7344188658057271, "grad_norm": 0.6934186816215515, "learning_rate": 9.375002062013521e-06, "loss": 0.4458, "step": 2616 }, { "epoch": 0.7346996069623807, "grad_norm": 0.7689214944839478, "learning_rate": 9.374211085811714e-06, "loss": 0.4635, "step": 2617 }, { "epoch": 0.7349803481190342, "grad_norm": 0.7955026030540466, "learning_rate": 9.373419642821302e-06, "loss": 0.4586, "step": 2618 }, { "epoch": 0.7352610892756878, "grad_norm": 0.7794889211654663, "learning_rate": 9.372627733126743e-06, "loss": 0.4527, "step": 2619 }, { "epoch": 0.7355418304323413, "grad_norm": 0.7205256223678589, "learning_rate": 9.371835356812542e-06, "loss": 0.4143, "step": 2620 }, { "epoch": 0.735822571588995, "grad_norm": 0.8978113532066345, "learning_rate": 9.37104251396326e-06, "loss": 0.4683, "step": 2621 }, { "epoch": 0.7361033127456486, "grad_norm": 0.7564862966537476, "learning_rate": 9.370249204663502e-06, "loss": 0.4187, "step": 2622 }, { "epoch": 0.7363840539023021, "grad_norm": 0.7632102966308594, "learning_rate": 9.369455428997925e-06, "loss": 0.4575, "step": 2623 }, { "epoch": 0.7366647950589557, "grad_norm": 0.8099504113197327, "learning_rate": 9.368661187051238e-06, "loss": 0.4418, "step": 2624 }, { "epoch": 0.7369455362156092, "grad_norm": 0.8187891244888306, "learning_rate": 9.367866478908194e-06, "loss": 0.4875, "step": 2625 }, { "epoch": 0.7372262773722628, "grad_norm": 0.6581400632858276, "learning_rate": 9.367071304653603e-06, "loss": 0.399, "step": 2626 }, { "epoch": 0.7375070185289163, "grad_norm": 0.8173044323921204, "learning_rate": 9.366275664372317e-06, "loss": 0.4899, "step": 2627 }, { "epoch": 0.7377877596855699, "grad_norm": 0.8202473521232605, "learning_rate": 9.365479558149246e-06, "loss": 0.4487, "step": 2628 }, { "epoch": 0.7380685008422234, "grad_norm": 0.7705366015434265, "learning_rate": 9.364682986069344e-06, "loss": 0.5156, "step": 2629 }, { "epoch": 0.7383492419988771, "grad_norm": 0.6788835525512695, "learning_rate": 9.363885948217615e-06, "loss": 0.4232, "step": 2630 }, { "epoch": 0.7386299831555306, "grad_norm": 0.8718878030776978, "learning_rate": 9.363088444679116e-06, "loss": 0.4578, "step": 2631 }, { "epoch": 0.7389107243121842, "grad_norm": 0.7273135781288147, "learning_rate": 9.36229047553895e-06, "loss": 0.4215, "step": 2632 }, { "epoch": 0.7391914654688377, "grad_norm": 0.789441704750061, "learning_rate": 9.361492040882272e-06, "loss": 0.4474, "step": 2633 }, { "epoch": 0.7394722066254913, "grad_norm": 0.8108557462692261, "learning_rate": 9.360693140794285e-06, "loss": 0.4684, "step": 2634 }, { "epoch": 0.7397529477821448, "grad_norm": 0.6832651495933533, "learning_rate": 9.359893775360244e-06, "loss": 0.4142, "step": 2635 }, { "epoch": 0.7400336889387984, "grad_norm": 0.8333257436752319, "learning_rate": 9.35909394466545e-06, "loss": 0.4751, "step": 2636 }, { "epoch": 0.740314430095452, "grad_norm": 0.8688099980354309, "learning_rate": 9.35829364879526e-06, "loss": 0.5005, "step": 2637 }, { "epoch": 0.7405951712521056, "grad_norm": 0.7191597819328308, "learning_rate": 9.357492887835073e-06, "loss": 0.426, "step": 2638 }, { "epoch": 0.7408759124087592, "grad_norm": 0.8580526113510132, "learning_rate": 9.356691661870342e-06, "loss": 0.4623, "step": 2639 }, { "epoch": 0.7411566535654127, "grad_norm": 0.80132657289505, "learning_rate": 9.355889970986571e-06, "loss": 0.4536, "step": 2640 }, { "epoch": 0.7414373947220663, "grad_norm": 0.7642393708229065, "learning_rate": 9.355087815269307e-06, "loss": 0.4646, "step": 2641 }, { "epoch": 0.7417181358787198, "grad_norm": 0.7316069006919861, "learning_rate": 9.354285194804156e-06, "loss": 0.4471, "step": 2642 }, { "epoch": 0.7419988770353734, "grad_norm": 0.7916228175163269, "learning_rate": 9.353482109676767e-06, "loss": 0.4837, "step": 2643 }, { "epoch": 0.7422796181920269, "grad_norm": 0.7023115158081055, "learning_rate": 9.352678559972839e-06, "loss": 0.451, "step": 2644 }, { "epoch": 0.7425603593486805, "grad_norm": 0.7417224049568176, "learning_rate": 9.35187454577812e-06, "loss": 0.437, "step": 2645 }, { "epoch": 0.742841100505334, "grad_norm": 0.8513741493225098, "learning_rate": 9.351070067178416e-06, "loss": 0.4638, "step": 2646 }, { "epoch": 0.7431218416619877, "grad_norm": 0.758176326751709, "learning_rate": 9.350265124259571e-06, "loss": 0.4392, "step": 2647 }, { "epoch": 0.7434025828186412, "grad_norm": 0.7236112952232361, "learning_rate": 9.349459717107484e-06, "loss": 0.4508, "step": 2648 }, { "epoch": 0.7436833239752948, "grad_norm": 0.7783223390579224, "learning_rate": 9.348653845808103e-06, "loss": 0.4326, "step": 2649 }, { "epoch": 0.7439640651319483, "grad_norm": 0.8385047316551208, "learning_rate": 9.347847510447427e-06, "loss": 0.4438, "step": 2650 }, { "epoch": 0.7442448062886019, "grad_norm": 0.7798228859901428, "learning_rate": 9.347040711111501e-06, "loss": 0.4449, "step": 2651 }, { "epoch": 0.7445255474452555, "grad_norm": 0.7581650018692017, "learning_rate": 9.346233447886424e-06, "loss": 0.4328, "step": 2652 }, { "epoch": 0.744806288601909, "grad_norm": 0.8718856573104858, "learning_rate": 9.34542572085834e-06, "loss": 0.5039, "step": 2653 }, { "epoch": 0.7450870297585626, "grad_norm": 0.9271716475486755, "learning_rate": 9.344617530113446e-06, "loss": 0.4096, "step": 2654 }, { "epoch": 0.7453677709152162, "grad_norm": 0.8231848478317261, "learning_rate": 9.343808875737985e-06, "loss": 0.5526, "step": 2655 }, { "epoch": 0.7456485120718698, "grad_norm": 0.8132502436637878, "learning_rate": 9.342999757818256e-06, "loss": 0.4904, "step": 2656 }, { "epoch": 0.7459292532285233, "grad_norm": 0.7610129714012146, "learning_rate": 9.342190176440598e-06, "loss": 0.4681, "step": 2657 }, { "epoch": 0.7462099943851769, "grad_norm": 0.6845329999923706, "learning_rate": 9.341380131691406e-06, "loss": 0.4222, "step": 2658 }, { "epoch": 0.7464907355418304, "grad_norm": 0.7476111650466919, "learning_rate": 9.340569623657127e-06, "loss": 0.4427, "step": 2659 }, { "epoch": 0.746771476698484, "grad_norm": 0.9219915866851807, "learning_rate": 9.339758652424246e-06, "loss": 0.4787, "step": 2660 }, { "epoch": 0.7470522178551375, "grad_norm": 0.8040258884429932, "learning_rate": 9.338947218079312e-06, "loss": 0.4732, "step": 2661 }, { "epoch": 0.7473329590117911, "grad_norm": 0.7521624565124512, "learning_rate": 9.338135320708912e-06, "loss": 0.5128, "step": 2662 }, { "epoch": 0.7476137001684446, "grad_norm": 0.810127854347229, "learning_rate": 9.337322960399686e-06, "loss": 0.3961, "step": 2663 }, { "epoch": 0.7478944413250983, "grad_norm": 0.8981438279151917, "learning_rate": 9.336510137238328e-06, "loss": 0.4779, "step": 2664 }, { "epoch": 0.7481751824817519, "grad_norm": 0.8440060615539551, "learning_rate": 9.335696851311573e-06, "loss": 0.4866, "step": 2665 }, { "epoch": 0.7484559236384054, "grad_norm": 1.0122367143630981, "learning_rate": 9.334883102706214e-06, "loss": 0.491, "step": 2666 }, { "epoch": 0.748736664795059, "grad_norm": 0.8927567601203918, "learning_rate": 9.334068891509087e-06, "loss": 0.51, "step": 2667 }, { "epoch": 0.7490174059517125, "grad_norm": 0.7860920429229736, "learning_rate": 9.333254217807079e-06, "loss": 0.4393, "step": 2668 }, { "epoch": 0.7492981471083661, "grad_norm": 0.8904474973678589, "learning_rate": 9.332439081687128e-06, "loss": 0.4185, "step": 2669 }, { "epoch": 0.7495788882650196, "grad_norm": 0.839434802532196, "learning_rate": 9.331623483236218e-06, "loss": 0.4436, "step": 2670 }, { "epoch": 0.7498596294216732, "grad_norm": 0.8348051309585571, "learning_rate": 9.330807422541388e-06, "loss": 0.3844, "step": 2671 }, { "epoch": 0.7501403705783268, "grad_norm": 0.8297469615936279, "learning_rate": 9.329990899689723e-06, "loss": 0.4686, "step": 2672 }, { "epoch": 0.7504211117349804, "grad_norm": 0.7008973956108093, "learning_rate": 9.329173914768352e-06, "loss": 0.4336, "step": 2673 }, { "epoch": 0.7507018528916339, "grad_norm": 0.7615330219268799, "learning_rate": 9.328356467864466e-06, "loss": 0.4789, "step": 2674 }, { "epoch": 0.7509825940482875, "grad_norm": 0.850792407989502, "learning_rate": 9.327538559065292e-06, "loss": 0.4828, "step": 2675 }, { "epoch": 0.751263335204941, "grad_norm": 0.7741012573242188, "learning_rate": 9.326720188458113e-06, "loss": 0.4609, "step": 2676 }, { "epoch": 0.7515440763615946, "grad_norm": 0.6990548372268677, "learning_rate": 9.325901356130262e-06, "loss": 0.4384, "step": 2677 }, { "epoch": 0.7518248175182481, "grad_norm": 0.7923767566680908, "learning_rate": 9.325082062169122e-06, "loss": 0.4596, "step": 2678 }, { "epoch": 0.7521055586749017, "grad_norm": 0.7546195387840271, "learning_rate": 9.324262306662118e-06, "loss": 0.4544, "step": 2679 }, { "epoch": 0.7523862998315554, "grad_norm": 0.7449183464050293, "learning_rate": 9.323442089696731e-06, "loss": 0.4462, "step": 2680 }, { "epoch": 0.7526670409882089, "grad_norm": 0.7192502021789551, "learning_rate": 9.322621411360492e-06, "loss": 0.4377, "step": 2681 }, { "epoch": 0.7529477821448625, "grad_norm": 0.822742223739624, "learning_rate": 9.321800271740974e-06, "loss": 0.4444, "step": 2682 }, { "epoch": 0.753228523301516, "grad_norm": 0.7490763664245605, "learning_rate": 9.320978670925808e-06, "loss": 0.4186, "step": 2683 }, { "epoch": 0.7535092644581696, "grad_norm": 0.8998096585273743, "learning_rate": 9.320156609002668e-06, "loss": 0.4851, "step": 2684 }, { "epoch": 0.7537900056148231, "grad_norm": 0.7903598546981812, "learning_rate": 9.319334086059281e-06, "loss": 0.4381, "step": 2685 }, { "epoch": 0.7540707467714767, "grad_norm": 0.7860065698623657, "learning_rate": 9.318511102183421e-06, "loss": 0.3971, "step": 2686 }, { "epoch": 0.7543514879281302, "grad_norm": 0.7966722846031189, "learning_rate": 9.31768765746291e-06, "loss": 0.4809, "step": 2687 }, { "epoch": 0.7546322290847838, "grad_norm": 0.7666751146316528, "learning_rate": 9.316863751985621e-06, "loss": 0.4592, "step": 2688 }, { "epoch": 0.7549129702414374, "grad_norm": 0.8527171015739441, "learning_rate": 9.31603938583948e-06, "loss": 0.4475, "step": 2689 }, { "epoch": 0.755193711398091, "grad_norm": 0.705956757068634, "learning_rate": 9.315214559112454e-06, "loss": 0.4585, "step": 2690 }, { "epoch": 0.7554744525547445, "grad_norm": 0.7720552086830139, "learning_rate": 9.314389271892563e-06, "loss": 0.4396, "step": 2691 }, { "epoch": 0.7557551937113981, "grad_norm": 0.8717186450958252, "learning_rate": 9.31356352426788e-06, "loss": 0.4567, "step": 2692 }, { "epoch": 0.7560359348680517, "grad_norm": 0.8001463413238525, "learning_rate": 9.312737316326524e-06, "loss": 0.436, "step": 2693 }, { "epoch": 0.7563166760247052, "grad_norm": 0.6348888278007507, "learning_rate": 9.311910648156657e-06, "loss": 0.4088, "step": 2694 }, { "epoch": 0.7565974171813588, "grad_norm": 0.6831598281860352, "learning_rate": 9.311083519846502e-06, "loss": 0.4595, "step": 2695 }, { "epoch": 0.7568781583380123, "grad_norm": 0.8067934513092041, "learning_rate": 9.310255931484322e-06, "loss": 0.4339, "step": 2696 }, { "epoch": 0.757158899494666, "grad_norm": 0.6594023108482361, "learning_rate": 9.309427883158433e-06, "loss": 0.4293, "step": 2697 }, { "epoch": 0.7574396406513195, "grad_norm": 0.7691739201545715, "learning_rate": 9.308599374957198e-06, "loss": 0.4536, "step": 2698 }, { "epoch": 0.7577203818079731, "grad_norm": 0.7178975939750671, "learning_rate": 9.307770406969032e-06, "loss": 0.4255, "step": 2699 }, { "epoch": 0.7580011229646266, "grad_norm": 0.698609471321106, "learning_rate": 9.306940979282395e-06, "loss": 0.429, "step": 2700 }, { "epoch": 0.7582818641212802, "grad_norm": 0.7874751091003418, "learning_rate": 9.306111091985802e-06, "loss": 0.4856, "step": 2701 }, { "epoch": 0.7585626052779337, "grad_norm": 0.8140864968299866, "learning_rate": 9.305280745167809e-06, "loss": 0.4576, "step": 2702 }, { "epoch": 0.7588433464345873, "grad_norm": 0.872873067855835, "learning_rate": 9.304449938917029e-06, "loss": 0.4807, "step": 2703 }, { "epoch": 0.7591240875912408, "grad_norm": 0.7322248816490173, "learning_rate": 9.303618673322119e-06, "loss": 0.463, "step": 2704 }, { "epoch": 0.7594048287478944, "grad_norm": 0.6696175932884216, "learning_rate": 9.302786948471787e-06, "loss": 0.4232, "step": 2705 }, { "epoch": 0.759685569904548, "grad_norm": 0.7476364970207214, "learning_rate": 9.301954764454788e-06, "loss": 0.4252, "step": 2706 }, { "epoch": 0.7599663110612016, "grad_norm": 0.8410789370536804, "learning_rate": 9.30112212135993e-06, "loss": 0.4681, "step": 2707 }, { "epoch": 0.7602470522178552, "grad_norm": 0.8708897233009338, "learning_rate": 9.300289019276066e-06, "loss": 0.457, "step": 2708 }, { "epoch": 0.7605277933745087, "grad_norm": 0.7599568367004395, "learning_rate": 9.299455458292097e-06, "loss": 0.4662, "step": 2709 }, { "epoch": 0.7608085345311623, "grad_norm": 0.7744013667106628, "learning_rate": 9.29862143849698e-06, "loss": 0.4816, "step": 2710 }, { "epoch": 0.7610892756878158, "grad_norm": 0.8597412109375, "learning_rate": 9.297786959979715e-06, "loss": 0.4601, "step": 2711 }, { "epoch": 0.7613700168444694, "grad_norm": 0.8001948595046997, "learning_rate": 9.29695202282935e-06, "loss": 0.461, "step": 2712 }, { "epoch": 0.7616507580011229, "grad_norm": 0.7523707747459412, "learning_rate": 9.296116627134988e-06, "loss": 0.4746, "step": 2713 }, { "epoch": 0.7619314991577766, "grad_norm": 0.8415935039520264, "learning_rate": 9.295280772985775e-06, "loss": 0.4287, "step": 2714 }, { "epoch": 0.7622122403144301, "grad_norm": 0.7757467031478882, "learning_rate": 9.294444460470909e-06, "loss": 0.4568, "step": 2715 }, { "epoch": 0.7624929814710837, "grad_norm": 0.8341929912567139, "learning_rate": 9.293607689679633e-06, "loss": 0.4796, "step": 2716 }, { "epoch": 0.7627737226277372, "grad_norm": 0.6295900940895081, "learning_rate": 9.292770460701247e-06, "loss": 0.3763, "step": 2717 }, { "epoch": 0.7630544637843908, "grad_norm": 0.6859361529350281, "learning_rate": 9.291932773625092e-06, "loss": 0.4721, "step": 2718 }, { "epoch": 0.7633352049410443, "grad_norm": 0.7676633596420288, "learning_rate": 9.29109462854056e-06, "loss": 0.4323, "step": 2719 }, { "epoch": 0.7636159460976979, "grad_norm": 0.7976534366607666, "learning_rate": 9.290256025537096e-06, "loss": 0.4654, "step": 2720 }, { "epoch": 0.7638966872543514, "grad_norm": 0.7481630444526672, "learning_rate": 9.289416964704186e-06, "loss": 0.4683, "step": 2721 }, { "epoch": 0.764177428411005, "grad_norm": 0.6892101168632507, "learning_rate": 9.288577446131372e-06, "loss": 0.4738, "step": 2722 }, { "epoch": 0.7644581695676587, "grad_norm": 0.6679108738899231, "learning_rate": 9.287737469908243e-06, "loss": 0.4585, "step": 2723 }, { "epoch": 0.7647389107243122, "grad_norm": 0.7333400249481201, "learning_rate": 9.286897036124435e-06, "loss": 0.4694, "step": 2724 }, { "epoch": 0.7650196518809658, "grad_norm": 0.866708517074585, "learning_rate": 9.286056144869633e-06, "loss": 0.4638, "step": 2725 }, { "epoch": 0.7653003930376193, "grad_norm": 0.7348212003707886, "learning_rate": 9.285214796233573e-06, "loss": 0.46, "step": 2726 }, { "epoch": 0.7655811341942729, "grad_norm": 0.9280748963356018, "learning_rate": 9.284372990306036e-06, "loss": 0.4356, "step": 2727 }, { "epoch": 0.7658618753509264, "grad_norm": 0.6627675890922546, "learning_rate": 9.283530727176857e-06, "loss": 0.4305, "step": 2728 }, { "epoch": 0.76614261650758, "grad_norm": 0.7102477550506592, "learning_rate": 9.282688006935918e-06, "loss": 0.4492, "step": 2729 }, { "epoch": 0.7664233576642335, "grad_norm": 0.7039529085159302, "learning_rate": 9.281844829673146e-06, "loss": 0.3993, "step": 2730 }, { "epoch": 0.7667040988208872, "grad_norm": 0.8434668779373169, "learning_rate": 9.281001195478522e-06, "loss": 0.4584, "step": 2731 }, { "epoch": 0.7669848399775407, "grad_norm": 0.6976370811462402, "learning_rate": 9.280157104442072e-06, "loss": 0.4196, "step": 2732 }, { "epoch": 0.7672655811341943, "grad_norm": 0.764058530330658, "learning_rate": 9.27931255665387e-06, "loss": 0.4895, "step": 2733 }, { "epoch": 0.7675463222908478, "grad_norm": 0.8501991629600525, "learning_rate": 9.278467552204045e-06, "loss": 0.5196, "step": 2734 }, { "epoch": 0.7678270634475014, "grad_norm": 0.6693174242973328, "learning_rate": 9.277622091182769e-06, "loss": 0.4278, "step": 2735 }, { "epoch": 0.768107804604155, "grad_norm": 0.8203902244567871, "learning_rate": 9.276776173680264e-06, "loss": 0.427, "step": 2736 }, { "epoch": 0.7683885457608085, "grad_norm": 0.864119291305542, "learning_rate": 9.275929799786801e-06, "loss": 0.441, "step": 2737 }, { "epoch": 0.7686692869174621, "grad_norm": 0.7313922047615051, "learning_rate": 9.2750829695927e-06, "loss": 0.4288, "step": 2738 }, { "epoch": 0.7689500280741156, "grad_norm": 0.7067921161651611, "learning_rate": 9.27423568318833e-06, "loss": 0.4531, "step": 2739 }, { "epoch": 0.7692307692307693, "grad_norm": 0.858953595161438, "learning_rate": 9.273387940664108e-06, "loss": 0.4526, "step": 2740 }, { "epoch": 0.7695115103874228, "grad_norm": 0.822174608707428, "learning_rate": 9.272539742110498e-06, "loss": 0.4631, "step": 2741 }, { "epoch": 0.7697922515440764, "grad_norm": 0.7382689118385315, "learning_rate": 9.271691087618015e-06, "loss": 0.4529, "step": 2742 }, { "epoch": 0.7700729927007299, "grad_norm": 0.9063779711723328, "learning_rate": 9.270841977277226e-06, "loss": 0.4479, "step": 2743 }, { "epoch": 0.7703537338573835, "grad_norm": 0.8486456274986267, "learning_rate": 9.269992411178738e-06, "loss": 0.4504, "step": 2744 }, { "epoch": 0.770634475014037, "grad_norm": 0.8725316524505615, "learning_rate": 9.269142389413213e-06, "loss": 0.474, "step": 2745 }, { "epoch": 0.7709152161706906, "grad_norm": 0.8478258848190308, "learning_rate": 9.268291912071362e-06, "loss": 0.4731, "step": 2746 }, { "epoch": 0.7711959573273441, "grad_norm": 1.1193541288375854, "learning_rate": 9.26744097924394e-06, "loss": 0.4823, "step": 2747 }, { "epoch": 0.7714766984839978, "grad_norm": 0.8588424921035767, "learning_rate": 9.266589591021752e-06, "loss": 0.4613, "step": 2748 }, { "epoch": 0.7717574396406514, "grad_norm": 0.7786567211151123, "learning_rate": 9.265737747495657e-06, "loss": 0.4995, "step": 2749 }, { "epoch": 0.7720381807973049, "grad_norm": 1.015882134437561, "learning_rate": 9.264885448756557e-06, "loss": 0.4784, "step": 2750 }, { "epoch": 0.7723189219539585, "grad_norm": 0.9489424824714661, "learning_rate": 9.264032694895404e-06, "loss": 0.4435, "step": 2751 }, { "epoch": 0.772599663110612, "grad_norm": 0.7556193470954895, "learning_rate": 9.263179486003196e-06, "loss": 0.4302, "step": 2752 }, { "epoch": 0.7728804042672656, "grad_norm": 0.8387235999107361, "learning_rate": 9.262325822170985e-06, "loss": 0.4245, "step": 2753 }, { "epoch": 0.7731611454239191, "grad_norm": 1.0417672395706177, "learning_rate": 9.261471703489869e-06, "loss": 0.4428, "step": 2754 }, { "epoch": 0.7734418865805727, "grad_norm": 0.7097517848014832, "learning_rate": 9.260617130050991e-06, "loss": 0.4779, "step": 2755 }, { "epoch": 0.7737226277372263, "grad_norm": 0.7087475657463074, "learning_rate": 9.25976210194555e-06, "loss": 0.3939, "step": 2756 }, { "epoch": 0.7740033688938799, "grad_norm": 0.7660328149795532, "learning_rate": 9.258906619264785e-06, "loss": 0.4167, "step": 2757 }, { "epoch": 0.7742841100505334, "grad_norm": 0.696075439453125, "learning_rate": 9.258050682099993e-06, "loss": 0.3943, "step": 2758 }, { "epoch": 0.774564851207187, "grad_norm": 0.7289900183677673, "learning_rate": 9.257194290542508e-06, "loss": 0.4311, "step": 2759 }, { "epoch": 0.7748455923638405, "grad_norm": 0.7284126877784729, "learning_rate": 9.256337444683725e-06, "loss": 0.4583, "step": 2760 }, { "epoch": 0.7751263335204941, "grad_norm": 0.7524017691612244, "learning_rate": 9.255480144615077e-06, "loss": 0.4505, "step": 2761 }, { "epoch": 0.7754070746771476, "grad_norm": 0.7010413408279419, "learning_rate": 9.25462239042805e-06, "loss": 0.4214, "step": 2762 }, { "epoch": 0.7756878158338012, "grad_norm": 0.8451077938079834, "learning_rate": 9.25376418221418e-06, "loss": 0.4309, "step": 2763 }, { "epoch": 0.7759685569904547, "grad_norm": 0.8602747917175293, "learning_rate": 9.25290552006505e-06, "loss": 0.4438, "step": 2764 }, { "epoch": 0.7762492981471084, "grad_norm": 0.7057498693466187, "learning_rate": 9.252046404072288e-06, "loss": 0.4528, "step": 2765 }, { "epoch": 0.776530039303762, "grad_norm": 0.9576298594474792, "learning_rate": 9.251186834327577e-06, "loss": 0.4671, "step": 2766 }, { "epoch": 0.7768107804604155, "grad_norm": 0.8898288011550903, "learning_rate": 9.250326810922643e-06, "loss": 0.4412, "step": 2767 }, { "epoch": 0.7770915216170691, "grad_norm": 0.6958866715431213, "learning_rate": 9.249466333949264e-06, "loss": 0.4981, "step": 2768 }, { "epoch": 0.7773722627737226, "grad_norm": 0.9550066590309143, "learning_rate": 9.248605403499262e-06, "loss": 0.4692, "step": 2769 }, { "epoch": 0.7776530039303762, "grad_norm": 1.0029186010360718, "learning_rate": 9.24774401966451e-06, "loss": 0.5155, "step": 2770 }, { "epoch": 0.7779337450870297, "grad_norm": 0.8125027418136597, "learning_rate": 9.246882182536935e-06, "loss": 0.4989, "step": 2771 }, { "epoch": 0.7782144862436833, "grad_norm": 0.7664905190467834, "learning_rate": 9.2460198922085e-06, "loss": 0.4757, "step": 2772 }, { "epoch": 0.7784952274003369, "grad_norm": 0.7412874102592468, "learning_rate": 9.245157148771229e-06, "loss": 0.403, "step": 2773 }, { "epoch": 0.7787759685569905, "grad_norm": 0.8610168695449829, "learning_rate": 9.244293952317184e-06, "loss": 0.4119, "step": 2774 }, { "epoch": 0.779056709713644, "grad_norm": 0.830284059047699, "learning_rate": 9.243430302938483e-06, "loss": 0.4354, "step": 2775 }, { "epoch": 0.7793374508702976, "grad_norm": 0.7288861870765686, "learning_rate": 9.242566200727288e-06, "loss": 0.4577, "step": 2776 }, { "epoch": 0.7796181920269512, "grad_norm": 0.8208156824111938, "learning_rate": 9.24170164577581e-06, "loss": 0.4942, "step": 2777 }, { "epoch": 0.7798989331836047, "grad_norm": 0.8003494143486023, "learning_rate": 9.24083663817631e-06, "loss": 0.4731, "step": 2778 }, { "epoch": 0.7801796743402583, "grad_norm": 0.8178521990776062, "learning_rate": 9.239971178021096e-06, "loss": 0.4764, "step": 2779 }, { "epoch": 0.7804604154969118, "grad_norm": 0.7557169198989868, "learning_rate": 9.239105265402525e-06, "loss": 0.4152, "step": 2780 }, { "epoch": 0.7807411566535654, "grad_norm": 0.7637736797332764, "learning_rate": 9.238238900413e-06, "loss": 0.4447, "step": 2781 }, { "epoch": 0.781021897810219, "grad_norm": 0.7243945598602295, "learning_rate": 9.237372083144977e-06, "loss": 0.4095, "step": 2782 }, { "epoch": 0.7813026389668726, "grad_norm": 0.7808176279067993, "learning_rate": 9.236504813690957e-06, "loss": 0.4295, "step": 2783 }, { "epoch": 0.7815833801235261, "grad_norm": 0.874909520149231, "learning_rate": 9.235637092143486e-06, "loss": 0.4434, "step": 2784 }, { "epoch": 0.7818641212801797, "grad_norm": 0.7889077663421631, "learning_rate": 9.234768918595165e-06, "loss": 0.4465, "step": 2785 }, { "epoch": 0.7821448624368332, "grad_norm": 0.8345769047737122, "learning_rate": 9.23390029313864e-06, "loss": 0.4444, "step": 2786 }, { "epoch": 0.7824256035934868, "grad_norm": 0.8471890687942505, "learning_rate": 9.233031215866603e-06, "loss": 0.4704, "step": 2787 }, { "epoch": 0.7827063447501403, "grad_norm": 0.8265674710273743, "learning_rate": 9.232161686871798e-06, "loss": 0.4182, "step": 2788 }, { "epoch": 0.7829870859067939, "grad_norm": 0.8616026639938354, "learning_rate": 9.231291706247018e-06, "loss": 0.4536, "step": 2789 }, { "epoch": 0.7832678270634476, "grad_norm": 0.8215060830116272, "learning_rate": 9.2304212740851e-06, "loss": 0.456, "step": 2790 }, { "epoch": 0.7835485682201011, "grad_norm": 0.7380470633506775, "learning_rate": 9.229550390478928e-06, "loss": 0.4337, "step": 2791 }, { "epoch": 0.7838293093767547, "grad_norm": 0.709854245185852, "learning_rate": 9.228679055521442e-06, "loss": 0.4849, "step": 2792 }, { "epoch": 0.7841100505334082, "grad_norm": 0.9454172253608704, "learning_rate": 9.227807269305624e-06, "loss": 0.5318, "step": 2793 }, { "epoch": 0.7843907916900618, "grad_norm": 0.7940680980682373, "learning_rate": 9.226935031924505e-06, "loss": 0.4719, "step": 2794 }, { "epoch": 0.7846715328467153, "grad_norm": 0.6572303771972656, "learning_rate": 9.226062343471165e-06, "loss": 0.4582, "step": 2795 }, { "epoch": 0.7849522740033689, "grad_norm": 0.7868558168411255, "learning_rate": 9.225189204038728e-06, "loss": 0.4732, "step": 2796 }, { "epoch": 0.7852330151600224, "grad_norm": 0.7322407364845276, "learning_rate": 9.224315613720378e-06, "loss": 0.4402, "step": 2797 }, { "epoch": 0.785513756316676, "grad_norm": 0.7923116087913513, "learning_rate": 9.223441572609335e-06, "loss": 0.4362, "step": 2798 }, { "epoch": 0.7857944974733296, "grad_norm": 0.6431361436843872, "learning_rate": 9.222567080798871e-06, "loss": 0.452, "step": 2799 }, { "epoch": 0.7860752386299832, "grad_norm": 0.7228332161903381, "learning_rate": 9.221692138382305e-06, "loss": 0.4618, "step": 2800 }, { "epoch": 0.7863559797866367, "grad_norm": 0.7577100396156311, "learning_rate": 9.22081674545301e-06, "loss": 0.4192, "step": 2801 }, { "epoch": 0.7866367209432903, "grad_norm": 0.7033113837242126, "learning_rate": 9.219940902104396e-06, "loss": 0.4457, "step": 2802 }, { "epoch": 0.7869174620999438, "grad_norm": 0.7653865814208984, "learning_rate": 9.219064608429932e-06, "loss": 0.4921, "step": 2803 }, { "epoch": 0.7871982032565974, "grad_norm": 0.7377403974533081, "learning_rate": 9.218187864523131e-06, "loss": 0.4363, "step": 2804 }, { "epoch": 0.787478944413251, "grad_norm": 0.7712346911430359, "learning_rate": 9.21731067047755e-06, "loss": 0.5021, "step": 2805 }, { "epoch": 0.7877596855699045, "grad_norm": 0.7734142541885376, "learning_rate": 9.216433026386802e-06, "loss": 0.4691, "step": 2806 }, { "epoch": 0.7880404267265582, "grad_norm": 0.7538473010063171, "learning_rate": 9.215554932344542e-06, "loss": 0.4895, "step": 2807 }, { "epoch": 0.7883211678832117, "grad_norm": 0.7117080092430115, "learning_rate": 9.214676388444472e-06, "loss": 0.4257, "step": 2808 }, { "epoch": 0.7886019090398653, "grad_norm": 0.7666523456573486, "learning_rate": 9.213797394780349e-06, "loss": 0.4409, "step": 2809 }, { "epoch": 0.7888826501965188, "grad_norm": 0.6807367205619812, "learning_rate": 9.212917951445971e-06, "loss": 0.4444, "step": 2810 }, { "epoch": 0.7891633913531724, "grad_norm": 0.7090035676956177, "learning_rate": 9.212038058535189e-06, "loss": 0.4526, "step": 2811 }, { "epoch": 0.7894441325098259, "grad_norm": 0.854701817035675, "learning_rate": 9.211157716141896e-06, "loss": 0.4461, "step": 2812 }, { "epoch": 0.7897248736664795, "grad_norm": 0.7089012265205383, "learning_rate": 9.21027692436004e-06, "loss": 0.4842, "step": 2813 }, { "epoch": 0.790005614823133, "grad_norm": 0.8200495839118958, "learning_rate": 9.209395683283615e-06, "loss": 0.4178, "step": 2814 }, { "epoch": 0.7902863559797867, "grad_norm": 0.6886423230171204, "learning_rate": 9.208513993006655e-06, "loss": 0.4509, "step": 2815 }, { "epoch": 0.7905670971364402, "grad_norm": 0.8361442685127258, "learning_rate": 9.207631853623256e-06, "loss": 0.481, "step": 2816 }, { "epoch": 0.7908478382930938, "grad_norm": 0.6882129907608032, "learning_rate": 9.206749265227551e-06, "loss": 0.4046, "step": 2817 }, { "epoch": 0.7911285794497473, "grad_norm": 0.7281439900398254, "learning_rate": 9.205866227913723e-06, "loss": 0.4241, "step": 2818 }, { "epoch": 0.7914093206064009, "grad_norm": 0.7652041912078857, "learning_rate": 9.204982741776005e-06, "loss": 0.4264, "step": 2819 }, { "epoch": 0.7916900617630545, "grad_norm": 0.8479816913604736, "learning_rate": 9.20409880690868e-06, "loss": 0.4743, "step": 2820 }, { "epoch": 0.791970802919708, "grad_norm": 0.741902768611908, "learning_rate": 9.203214423406073e-06, "loss": 0.4311, "step": 2821 }, { "epoch": 0.7922515440763616, "grad_norm": 0.6678258776664734, "learning_rate": 9.20232959136256e-06, "loss": 0.4046, "step": 2822 }, { "epoch": 0.7925322852330151, "grad_norm": 0.7125440835952759, "learning_rate": 9.201444310872566e-06, "loss": 0.4241, "step": 2823 }, { "epoch": 0.7928130263896688, "grad_norm": 0.7699744701385498, "learning_rate": 9.200558582030563e-06, "loss": 0.4255, "step": 2824 }, { "epoch": 0.7930937675463223, "grad_norm": 0.6562171578407288, "learning_rate": 9.199672404931068e-06, "loss": 0.4814, "step": 2825 }, { "epoch": 0.7933745087029759, "grad_norm": 0.7204014658927917, "learning_rate": 9.198785779668652e-06, "loss": 0.4513, "step": 2826 }, { "epoch": 0.7936552498596294, "grad_norm": 0.7226943373680115, "learning_rate": 9.197898706337927e-06, "loss": 0.4542, "step": 2827 }, { "epoch": 0.793935991016283, "grad_norm": 0.7600888013839722, "learning_rate": 9.197011185033558e-06, "loss": 0.4457, "step": 2828 }, { "epoch": 0.7942167321729365, "grad_norm": 0.7142552733421326, "learning_rate": 9.196123215850254e-06, "loss": 0.4777, "step": 2829 }, { "epoch": 0.7944974733295901, "grad_norm": 0.713534414768219, "learning_rate": 9.195234798882774e-06, "loss": 0.4491, "step": 2830 }, { "epoch": 0.7947782144862436, "grad_norm": 0.6138408780097961, "learning_rate": 9.194345934225925e-06, "loss": 0.4308, "step": 2831 }, { "epoch": 0.7950589556428973, "grad_norm": 0.6582531929016113, "learning_rate": 9.19345662197456e-06, "loss": 0.4151, "step": 2832 }, { "epoch": 0.7953396967995509, "grad_norm": 0.7898809313774109, "learning_rate": 9.192566862223585e-06, "loss": 0.4682, "step": 2833 }, { "epoch": 0.7956204379562044, "grad_norm": 0.6748396158218384, "learning_rate": 9.191676655067944e-06, "loss": 0.3966, "step": 2834 }, { "epoch": 0.795901179112858, "grad_norm": 0.7965052127838135, "learning_rate": 9.190786000602635e-06, "loss": 0.5002, "step": 2835 }, { "epoch": 0.7961819202695115, "grad_norm": 0.7563027739524841, "learning_rate": 9.189894898922708e-06, "loss": 0.4898, "step": 2836 }, { "epoch": 0.7964626614261651, "grad_norm": 0.7985860705375671, "learning_rate": 9.189003350123252e-06, "loss": 0.4847, "step": 2837 }, { "epoch": 0.7967434025828186, "grad_norm": 0.7177184820175171, "learning_rate": 9.188111354299407e-06, "loss": 0.4486, "step": 2838 }, { "epoch": 0.7970241437394722, "grad_norm": 0.679217517375946, "learning_rate": 9.187218911546363e-06, "loss": 0.4346, "step": 2839 }, { "epoch": 0.7973048848961257, "grad_norm": 0.6719887256622314, "learning_rate": 9.186326021959354e-06, "loss": 0.4408, "step": 2840 }, { "epoch": 0.7975856260527794, "grad_norm": 0.6565520763397217, "learning_rate": 9.185432685633666e-06, "loss": 0.4355, "step": 2841 }, { "epoch": 0.7978663672094329, "grad_norm": 0.69264817237854, "learning_rate": 9.184538902664628e-06, "loss": 0.4126, "step": 2842 }, { "epoch": 0.7981471083660865, "grad_norm": 0.7278082966804504, "learning_rate": 9.183644673147622e-06, "loss": 0.448, "step": 2843 }, { "epoch": 0.79842784952274, "grad_norm": 0.7132008075714111, "learning_rate": 9.182749997178074e-06, "loss": 0.46, "step": 2844 }, { "epoch": 0.7987085906793936, "grad_norm": 0.6451550126075745, "learning_rate": 9.181854874851454e-06, "loss": 0.4441, "step": 2845 }, { "epoch": 0.7989893318360471, "grad_norm": 0.7662902474403381, "learning_rate": 9.18095930626329e-06, "loss": 0.4358, "step": 2846 }, { "epoch": 0.7992700729927007, "grad_norm": 0.6235150098800659, "learning_rate": 9.180063291509148e-06, "loss": 0.4111, "step": 2847 }, { "epoch": 0.7995508141493542, "grad_norm": 0.7264047861099243, "learning_rate": 9.179166830684643e-06, "loss": 0.4382, "step": 2848 }, { "epoch": 0.7998315553060079, "grad_norm": 0.6406546831130981, "learning_rate": 9.178269923885444e-06, "loss": 0.432, "step": 2849 }, { "epoch": 0.8001122964626615, "grad_norm": 0.7896562218666077, "learning_rate": 9.17737257120726e-06, "loss": 0.4674, "step": 2850 }, { "epoch": 0.800393037619315, "grad_norm": 0.698302686214447, "learning_rate": 9.176474772745855e-06, "loss": 0.419, "step": 2851 }, { "epoch": 0.8006737787759686, "grad_norm": 0.7514543533325195, "learning_rate": 9.175576528597035e-06, "loss": 0.4471, "step": 2852 }, { "epoch": 0.8009545199326221, "grad_norm": 0.8828864097595215, "learning_rate": 9.174677838856651e-06, "loss": 0.4819, "step": 2853 }, { "epoch": 0.8012352610892757, "grad_norm": 0.6965618133544922, "learning_rate": 9.17377870362061e-06, "loss": 0.4305, "step": 2854 }, { "epoch": 0.8015160022459292, "grad_norm": 0.8631529211997986, "learning_rate": 9.172879122984861e-06, "loss": 0.4782, "step": 2855 }, { "epoch": 0.8017967434025828, "grad_norm": 0.9167166948318481, "learning_rate": 9.171979097045402e-06, "loss": 0.4496, "step": 2856 }, { "epoch": 0.8020774845592363, "grad_norm": 0.803749144077301, "learning_rate": 9.171078625898277e-06, "loss": 0.3924, "step": 2857 }, { "epoch": 0.80235822571589, "grad_norm": 0.7713818550109863, "learning_rate": 9.170177709639578e-06, "loss": 0.4555, "step": 2858 }, { "epoch": 0.8026389668725435, "grad_norm": 0.7084963321685791, "learning_rate": 9.169276348365447e-06, "loss": 0.4507, "step": 2859 }, { "epoch": 0.8029197080291971, "grad_norm": 0.8877184391021729, "learning_rate": 9.168374542172073e-06, "loss": 0.445, "step": 2860 }, { "epoch": 0.8032004491858507, "grad_norm": 0.9707546830177307, "learning_rate": 9.167472291155688e-06, "loss": 0.4303, "step": 2861 }, { "epoch": 0.8034811903425042, "grad_norm": 0.7353629469871521, "learning_rate": 9.166569595412576e-06, "loss": 0.4691, "step": 2862 }, { "epoch": 0.8037619314991578, "grad_norm": 0.725790798664093, "learning_rate": 9.165666455039065e-06, "loss": 0.4713, "step": 2863 }, { "epoch": 0.8040426726558113, "grad_norm": 0.7861348986625671, "learning_rate": 9.164762870131538e-06, "loss": 0.458, "step": 2864 }, { "epoch": 0.8043234138124649, "grad_norm": 0.8006153702735901, "learning_rate": 9.163858840786415e-06, "loss": 0.4518, "step": 2865 }, { "epoch": 0.8046041549691185, "grad_norm": 0.7562771439552307, "learning_rate": 9.162954367100169e-06, "loss": 0.4325, "step": 2866 }, { "epoch": 0.8048848961257721, "grad_norm": 0.6728021502494812, "learning_rate": 9.162049449169321e-06, "loss": 0.4948, "step": 2867 }, { "epoch": 0.8051656372824256, "grad_norm": 0.8100345134735107, "learning_rate": 9.161144087090438e-06, "loss": 0.4912, "step": 2868 }, { "epoch": 0.8054463784390792, "grad_norm": 0.6807064414024353, "learning_rate": 9.160238280960134e-06, "loss": 0.4895, "step": 2869 }, { "epoch": 0.8057271195957327, "grad_norm": 0.7116173505783081, "learning_rate": 9.159332030875072e-06, "loss": 0.4396, "step": 2870 }, { "epoch": 0.8060078607523863, "grad_norm": 0.7416521310806274, "learning_rate": 9.158425336931961e-06, "loss": 0.4543, "step": 2871 }, { "epoch": 0.8062886019090398, "grad_norm": 0.6447952389717102, "learning_rate": 9.157518199227558e-06, "loss": 0.4139, "step": 2872 }, { "epoch": 0.8065693430656934, "grad_norm": 0.7108721733093262, "learning_rate": 9.156610617858665e-06, "loss": 0.4872, "step": 2873 }, { "epoch": 0.8068500842223469, "grad_norm": 0.7568307518959045, "learning_rate": 9.155702592922138e-06, "loss": 0.4733, "step": 2874 }, { "epoch": 0.8071308253790006, "grad_norm": 0.7475255131721497, "learning_rate": 9.15479412451487e-06, "loss": 0.4626, "step": 2875 }, { "epoch": 0.8074115665356542, "grad_norm": 0.7216377854347229, "learning_rate": 9.15388521273381e-06, "loss": 0.4544, "step": 2876 }, { "epoch": 0.8076923076923077, "grad_norm": 0.7445193529129028, "learning_rate": 9.152975857675954e-06, "loss": 0.4728, "step": 2877 }, { "epoch": 0.8079730488489613, "grad_norm": 0.7817115783691406, "learning_rate": 9.152066059438339e-06, "loss": 0.4119, "step": 2878 }, { "epoch": 0.8082537900056148, "grad_norm": 0.5849059820175171, "learning_rate": 9.151155818118055e-06, "loss": 0.4306, "step": 2879 }, { "epoch": 0.8085345311622684, "grad_norm": 0.7628145813941956, "learning_rate": 9.150245133812233e-06, "loss": 0.4423, "step": 2880 }, { "epoch": 0.8088152723189219, "grad_norm": 0.8405887484550476, "learning_rate": 9.149334006618062e-06, "loss": 0.4941, "step": 2881 }, { "epoch": 0.8090960134755755, "grad_norm": 0.6933674216270447, "learning_rate": 9.148422436632768e-06, "loss": 0.4865, "step": 2882 }, { "epoch": 0.8093767546322291, "grad_norm": 0.6658686995506287, "learning_rate": 9.147510423953628e-06, "loss": 0.4648, "step": 2883 }, { "epoch": 0.8096574957888827, "grad_norm": 0.7244076728820801, "learning_rate": 9.146597968677968e-06, "loss": 0.4654, "step": 2884 }, { "epoch": 0.8099382369455362, "grad_norm": 0.6563810110092163, "learning_rate": 9.145685070903158e-06, "loss": 0.4436, "step": 2885 }, { "epoch": 0.8102189781021898, "grad_norm": 0.7178528904914856, "learning_rate": 9.14477173072662e-06, "loss": 0.4505, "step": 2886 }, { "epoch": 0.8104997192588433, "grad_norm": 0.7493821978569031, "learning_rate": 9.143857948245815e-06, "loss": 0.4372, "step": 2887 }, { "epoch": 0.8107804604154969, "grad_norm": 0.7121507525444031, "learning_rate": 9.14294372355826e-06, "loss": 0.4815, "step": 2888 }, { "epoch": 0.8110612015721504, "grad_norm": 0.7137668132781982, "learning_rate": 9.142029056761515e-06, "loss": 0.4552, "step": 2889 }, { "epoch": 0.811341942728804, "grad_norm": 0.7351555824279785, "learning_rate": 9.141113947953184e-06, "loss": 0.5112, "step": 2890 }, { "epoch": 0.8116226838854577, "grad_norm": 0.7549033164978027, "learning_rate": 9.140198397230926e-06, "loss": 0.4575, "step": 2891 }, { "epoch": 0.8119034250421112, "grad_norm": 0.7129078507423401, "learning_rate": 9.139282404692442e-06, "loss": 0.5095, "step": 2892 }, { "epoch": 0.8121841661987648, "grad_norm": 0.667210042476654, "learning_rate": 9.13836597043548e-06, "loss": 0.469, "step": 2893 }, { "epoch": 0.8124649073554183, "grad_norm": 0.8358027338981628, "learning_rate": 9.137449094557834e-06, "loss": 0.431, "step": 2894 }, { "epoch": 0.8127456485120719, "grad_norm": 0.8525927662849426, "learning_rate": 9.136531777157352e-06, "loss": 0.3989, "step": 2895 }, { "epoch": 0.8130263896687254, "grad_norm": 0.6969891786575317, "learning_rate": 9.135614018331922e-06, "loss": 0.4646, "step": 2896 }, { "epoch": 0.813307130825379, "grad_norm": 0.6864852905273438, "learning_rate": 9.13469581817948e-06, "loss": 0.389, "step": 2897 }, { "epoch": 0.8135878719820325, "grad_norm": 0.8523910045623779, "learning_rate": 9.133777176798013e-06, "loss": 0.4372, "step": 2898 }, { "epoch": 0.8138686131386861, "grad_norm": 0.7501009106636047, "learning_rate": 9.132858094285554e-06, "loss": 0.4855, "step": 2899 }, { "epoch": 0.8141493542953397, "grad_norm": 0.7910122871398926, "learning_rate": 9.131938570740177e-06, "loss": 0.4781, "step": 2900 }, { "epoch": 0.8144300954519933, "grad_norm": 0.8193109631538391, "learning_rate": 9.131018606260012e-06, "loss": 0.4149, "step": 2901 }, { "epoch": 0.8147108366086468, "grad_norm": 0.6814030408859253, "learning_rate": 9.13009820094323e-06, "loss": 0.4362, "step": 2902 }, { "epoch": 0.8149915777653004, "grad_norm": 0.7575172781944275, "learning_rate": 9.129177354888053e-06, "loss": 0.4787, "step": 2903 }, { "epoch": 0.815272318921954, "grad_norm": 0.6326907277107239, "learning_rate": 9.128256068192744e-06, "loss": 0.4439, "step": 2904 }, { "epoch": 0.8155530600786075, "grad_norm": 0.9037715792655945, "learning_rate": 9.12733434095562e-06, "loss": 0.465, "step": 2905 }, { "epoch": 0.8158338012352611, "grad_norm": 0.8178196549415588, "learning_rate": 9.12641217327504e-06, "loss": 0.4422, "step": 2906 }, { "epoch": 0.8161145423919146, "grad_norm": 0.7674875259399414, "learning_rate": 9.125489565249417e-06, "loss": 0.4536, "step": 2907 }, { "epoch": 0.8163952835485683, "grad_norm": 0.6643429398536682, "learning_rate": 9.1245665169772e-06, "loss": 0.4268, "step": 2908 }, { "epoch": 0.8166760247052218, "grad_norm": 0.683995246887207, "learning_rate": 9.123643028556894e-06, "loss": 0.4596, "step": 2909 }, { "epoch": 0.8169567658618754, "grad_norm": 0.8194730281829834, "learning_rate": 9.122719100087045e-06, "loss": 0.475, "step": 2910 }, { "epoch": 0.8172375070185289, "grad_norm": 0.7791884541511536, "learning_rate": 9.121794731666253e-06, "loss": 0.4867, "step": 2911 }, { "epoch": 0.8175182481751825, "grad_norm": 0.6854553818702698, "learning_rate": 9.12086992339316e-06, "loss": 0.4707, "step": 2912 }, { "epoch": 0.817798989331836, "grad_norm": 0.8855614066123962, "learning_rate": 9.119944675366453e-06, "loss": 0.4906, "step": 2913 }, { "epoch": 0.8180797304884896, "grad_norm": 0.7338504791259766, "learning_rate": 9.119018987684872e-06, "loss": 0.4181, "step": 2914 }, { "epoch": 0.8183604716451431, "grad_norm": 0.7446621656417847, "learning_rate": 9.118092860447198e-06, "loss": 0.4713, "step": 2915 }, { "epoch": 0.8186412128017967, "grad_norm": 0.7724880576133728, "learning_rate": 9.117166293752263e-06, "loss": 0.4471, "step": 2916 }, { "epoch": 0.8189219539584504, "grad_norm": 0.7833882570266724, "learning_rate": 9.116239287698944e-06, "loss": 0.4616, "step": 2917 }, { "epoch": 0.8192026951151039, "grad_norm": 0.7517616152763367, "learning_rate": 9.115311842386166e-06, "loss": 0.4527, "step": 2918 }, { "epoch": 0.8194834362717575, "grad_norm": 0.6753953099250793, "learning_rate": 9.114383957912898e-06, "loss": 0.4699, "step": 2919 }, { "epoch": 0.819764177428411, "grad_norm": 0.7758336067199707, "learning_rate": 9.11345563437816e-06, "loss": 0.473, "step": 2920 }, { "epoch": 0.8200449185850646, "grad_norm": 0.6592584252357483, "learning_rate": 9.112526871881019e-06, "loss": 0.4522, "step": 2921 }, { "epoch": 0.8203256597417181, "grad_norm": 0.6775717735290527, "learning_rate": 9.111597670520583e-06, "loss": 0.4808, "step": 2922 }, { "epoch": 0.8206064008983717, "grad_norm": 0.6942693591117859, "learning_rate": 9.110668030396011e-06, "loss": 0.4266, "step": 2923 }, { "epoch": 0.8208871420550252, "grad_norm": 0.6692604422569275, "learning_rate": 9.10973795160651e-06, "loss": 0.44, "step": 2924 }, { "epoch": 0.8211678832116789, "grad_norm": 0.6583316326141357, "learning_rate": 9.108807434251331e-06, "loss": 0.4197, "step": 2925 }, { "epoch": 0.8214486243683324, "grad_norm": 0.7477753162384033, "learning_rate": 9.107876478429773e-06, "loss": 0.4533, "step": 2926 }, { "epoch": 0.821729365524986, "grad_norm": 0.6819732189178467, "learning_rate": 9.106945084241185e-06, "loss": 0.4749, "step": 2927 }, { "epoch": 0.8220101066816395, "grad_norm": 0.6898617744445801, "learning_rate": 9.106013251784956e-06, "loss": 0.4463, "step": 2928 }, { "epoch": 0.8222908478382931, "grad_norm": 0.7254061698913574, "learning_rate": 9.105080981160525e-06, "loss": 0.4259, "step": 2929 }, { "epoch": 0.8225715889949466, "grad_norm": 0.8229217529296875, "learning_rate": 9.104148272467381e-06, "loss": 0.4616, "step": 2930 }, { "epoch": 0.8228523301516002, "grad_norm": 0.7421444654464722, "learning_rate": 9.103215125805054e-06, "loss": 0.4527, "step": 2931 }, { "epoch": 0.8231330713082537, "grad_norm": 0.6365233063697815, "learning_rate": 9.102281541273126e-06, "loss": 0.4251, "step": 2932 }, { "epoch": 0.8234138124649073, "grad_norm": 0.6618478298187256, "learning_rate": 9.101347518971223e-06, "loss": 0.4721, "step": 2933 }, { "epoch": 0.823694553621561, "grad_norm": 0.6431057453155518, "learning_rate": 9.100413058999015e-06, "loss": 0.4725, "step": 2934 }, { "epoch": 0.8239752947782145, "grad_norm": 0.6888259053230286, "learning_rate": 9.099478161456226e-06, "loss": 0.4443, "step": 2935 }, { "epoch": 0.8242560359348681, "grad_norm": 0.693355143070221, "learning_rate": 9.09854282644262e-06, "loss": 0.4585, "step": 2936 }, { "epoch": 0.8245367770915216, "grad_norm": 0.6703828573226929, "learning_rate": 9.09760705405801e-06, "loss": 0.4661, "step": 2937 }, { "epoch": 0.8248175182481752, "grad_norm": 0.6152010560035706, "learning_rate": 9.09667084440226e-06, "loss": 0.4251, "step": 2938 }, { "epoch": 0.8250982594048287, "grad_norm": 0.6722482442855835, "learning_rate": 9.09573419757527e-06, "loss": 0.4764, "step": 2939 }, { "epoch": 0.8253790005614823, "grad_norm": 0.8198792934417725, "learning_rate": 9.094797113676997e-06, "loss": 0.4774, "step": 2940 }, { "epoch": 0.8256597417181358, "grad_norm": 0.7244009971618652, "learning_rate": 9.093859592807439e-06, "loss": 0.4225, "step": 2941 }, { "epoch": 0.8259404828747895, "grad_norm": 0.66555255651474, "learning_rate": 9.092921635066643e-06, "loss": 0.4377, "step": 2942 }, { "epoch": 0.826221224031443, "grad_norm": 0.6388446092605591, "learning_rate": 9.091983240554703e-06, "loss": 0.4317, "step": 2943 }, { "epoch": 0.8265019651880966, "grad_norm": 0.6635749340057373, "learning_rate": 9.091044409371759e-06, "loss": 0.4841, "step": 2944 }, { "epoch": 0.8267827063447502, "grad_norm": 0.6262797117233276, "learning_rate": 9.090105141617995e-06, "loss": 0.4157, "step": 2945 }, { "epoch": 0.8270634475014037, "grad_norm": 0.7906641960144043, "learning_rate": 9.089165437393645e-06, "loss": 0.4703, "step": 2946 }, { "epoch": 0.8273441886580573, "grad_norm": 0.7401872277259827, "learning_rate": 9.08822529679899e-06, "loss": 0.444, "step": 2947 }, { "epoch": 0.8276249298147108, "grad_norm": 0.6760455965995789, "learning_rate": 9.08728471993435e-06, "loss": 0.4674, "step": 2948 }, { "epoch": 0.8279056709713644, "grad_norm": 0.6818065047264099, "learning_rate": 9.086343706900105e-06, "loss": 0.4739, "step": 2949 }, { "epoch": 0.8281864121280179, "grad_norm": 0.7495522499084473, "learning_rate": 9.085402257796671e-06, "loss": 0.4641, "step": 2950 }, { "epoch": 0.8284671532846716, "grad_norm": 0.6967856884002686, "learning_rate": 9.084460372724514e-06, "loss": 0.4173, "step": 2951 }, { "epoch": 0.8287478944413251, "grad_norm": 0.7353311777114868, "learning_rate": 9.083518051784143e-06, "loss": 0.4236, "step": 2952 }, { "epoch": 0.8290286355979787, "grad_norm": 0.7496944069862366, "learning_rate": 9.082575295076121e-06, "loss": 0.4191, "step": 2953 }, { "epoch": 0.8293093767546322, "grad_norm": 0.7949837446212769, "learning_rate": 9.081632102701053e-06, "loss": 0.4701, "step": 2954 }, { "epoch": 0.8295901179112858, "grad_norm": 0.7971872687339783, "learning_rate": 9.080688474759587e-06, "loss": 0.4395, "step": 2955 }, { "epoch": 0.8298708590679393, "grad_norm": 0.7470804452896118, "learning_rate": 9.079744411352422e-06, "loss": 0.4189, "step": 2956 }, { "epoch": 0.8301516002245929, "grad_norm": 0.8063899874687195, "learning_rate": 9.078799912580305e-06, "loss": 0.541, "step": 2957 }, { "epoch": 0.8304323413812464, "grad_norm": 0.6667802333831787, "learning_rate": 9.077854978544027e-06, "loss": 0.4142, "step": 2958 }, { "epoch": 0.8307130825379001, "grad_norm": 0.8121907114982605, "learning_rate": 9.076909609344422e-06, "loss": 0.4515, "step": 2959 }, { "epoch": 0.8309938236945537, "grad_norm": 0.8178734183311462, "learning_rate": 9.075963805082378e-06, "loss": 0.4538, "step": 2960 }, { "epoch": 0.8312745648512072, "grad_norm": 0.7481734752655029, "learning_rate": 9.075017565858822e-06, "loss": 0.4633, "step": 2961 }, { "epoch": 0.8315553060078608, "grad_norm": 0.6679221391677856, "learning_rate": 9.074070891774733e-06, "loss": 0.4215, "step": 2962 }, { "epoch": 0.8318360471645143, "grad_norm": 0.9229344725608826, "learning_rate": 9.073123782931133e-06, "loss": 0.4561, "step": 2963 }, { "epoch": 0.8321167883211679, "grad_norm": 0.7118955254554749, "learning_rate": 9.07217623942909e-06, "loss": 0.4186, "step": 2964 }, { "epoch": 0.8323975294778214, "grad_norm": 0.6958334445953369, "learning_rate": 9.071228261369726e-06, "loss": 0.3944, "step": 2965 }, { "epoch": 0.832678270634475, "grad_norm": 0.718093752861023, "learning_rate": 9.070279848854198e-06, "loss": 0.4287, "step": 2966 }, { "epoch": 0.8329590117911286, "grad_norm": 0.91909259557724, "learning_rate": 9.069331001983715e-06, "loss": 0.4863, "step": 2967 }, { "epoch": 0.8332397529477822, "grad_norm": 0.8083088397979736, "learning_rate": 9.068381720859532e-06, "loss": 0.4631, "step": 2968 }, { "epoch": 0.8335204941044357, "grad_norm": 0.710812509059906, "learning_rate": 9.067432005582953e-06, "loss": 0.4502, "step": 2969 }, { "epoch": 0.8338012352610893, "grad_norm": 0.8467698693275452, "learning_rate": 9.066481856255323e-06, "loss": 0.505, "step": 2970 }, { "epoch": 0.8340819764177428, "grad_norm": 0.7723353505134583, "learning_rate": 9.065531272978039e-06, "loss": 0.4744, "step": 2971 }, { "epoch": 0.8343627175743964, "grad_norm": 0.9657412171363831, "learning_rate": 9.064580255852537e-06, "loss": 0.5081, "step": 2972 }, { "epoch": 0.83464345873105, "grad_norm": 0.7016030550003052, "learning_rate": 9.063628804980308e-06, "loss": 0.445, "step": 2973 }, { "epoch": 0.8349241998877035, "grad_norm": 0.7023826241493225, "learning_rate": 9.062676920462882e-06, "loss": 0.4317, "step": 2974 }, { "epoch": 0.835204941044357, "grad_norm": 0.7801896929740906, "learning_rate": 9.061724602401838e-06, "loss": 0.461, "step": 2975 }, { "epoch": 0.8354856822010107, "grad_norm": 0.7139474153518677, "learning_rate": 9.060771850898806e-06, "loss": 0.4247, "step": 2976 }, { "epoch": 0.8357664233576643, "grad_norm": 0.7815770506858826, "learning_rate": 9.05981866605545e-06, "loss": 0.4165, "step": 2977 }, { "epoch": 0.8360471645143178, "grad_norm": 0.7460588216781616, "learning_rate": 9.058865047973495e-06, "loss": 0.4716, "step": 2978 }, { "epoch": 0.8363279056709714, "grad_norm": 0.6795597076416016, "learning_rate": 9.057910996754704e-06, "loss": 0.4401, "step": 2979 }, { "epoch": 0.8366086468276249, "grad_norm": 0.6955469846725464, "learning_rate": 9.056956512500882e-06, "loss": 0.4591, "step": 2980 }, { "epoch": 0.8368893879842785, "grad_norm": 0.6433377265930176, "learning_rate": 9.056001595313892e-06, "loss": 0.476, "step": 2981 }, { "epoch": 0.837170129140932, "grad_norm": 0.7584505677223206, "learning_rate": 9.055046245295634e-06, "loss": 0.4811, "step": 2982 }, { "epoch": 0.8374508702975856, "grad_norm": 0.6828482747077942, "learning_rate": 9.054090462548058e-06, "loss": 0.4782, "step": 2983 }, { "epoch": 0.8377316114542392, "grad_norm": 0.7746155858039856, "learning_rate": 9.053134247173158e-06, "loss": 0.4278, "step": 2984 }, { "epoch": 0.8380123526108928, "grad_norm": 0.6910436749458313, "learning_rate": 9.052177599272976e-06, "loss": 0.4889, "step": 2985 }, { "epoch": 0.8382930937675463, "grad_norm": 0.742337167263031, "learning_rate": 9.051220518949598e-06, "loss": 0.4337, "step": 2986 }, { "epoch": 0.8385738349241999, "grad_norm": 0.5568944215774536, "learning_rate": 9.05026300630516e-06, "loss": 0.4366, "step": 2987 }, { "epoch": 0.8388545760808535, "grad_norm": 0.7992352247238159, "learning_rate": 9.049305061441842e-06, "loss": 0.4724, "step": 2988 }, { "epoch": 0.839135317237507, "grad_norm": 0.800186038017273, "learning_rate": 9.048346684461867e-06, "loss": 0.4655, "step": 2989 }, { "epoch": 0.8394160583941606, "grad_norm": 0.7031164169311523, "learning_rate": 9.04738787546751e-06, "loss": 0.4117, "step": 2990 }, { "epoch": 0.8396967995508141, "grad_norm": 0.6729116439819336, "learning_rate": 9.046428634561089e-06, "loss": 0.4126, "step": 2991 }, { "epoch": 0.8399775407074677, "grad_norm": 0.7558688521385193, "learning_rate": 9.045468961844966e-06, "loss": 0.4166, "step": 2992 }, { "epoch": 0.8402582818641213, "grad_norm": 0.7242925763130188, "learning_rate": 9.044508857421552e-06, "loss": 0.4336, "step": 2993 }, { "epoch": 0.8405390230207749, "grad_norm": 0.7024223804473877, "learning_rate": 9.043548321393305e-06, "loss": 0.4303, "step": 2994 }, { "epoch": 0.8408197641774284, "grad_norm": 0.7583803534507751, "learning_rate": 9.042587353862723e-06, "loss": 0.4541, "step": 2995 }, { "epoch": 0.841100505334082, "grad_norm": 0.7492231726646423, "learning_rate": 9.041625954932363e-06, "loss": 0.4946, "step": 2996 }, { "epoch": 0.8413812464907355, "grad_norm": 0.8081197738647461, "learning_rate": 9.04066412470481e-06, "loss": 0.4623, "step": 2997 }, { "epoch": 0.8416619876473891, "grad_norm": 0.7065810561180115, "learning_rate": 9.03970186328271e-06, "loss": 0.4022, "step": 2998 }, { "epoch": 0.8419427288040426, "grad_norm": 0.8383647799491882, "learning_rate": 9.038739170768751e-06, "loss": 0.4854, "step": 2999 }, { "epoch": 0.8422234699606962, "grad_norm": 0.6942509412765503, "learning_rate": 9.037776047265661e-06, "loss": 0.4587, "step": 3000 }, { "epoch": 0.8425042111173499, "grad_norm": 0.8069538474082947, "learning_rate": 9.03681249287622e-06, "loss": 0.4329, "step": 3001 }, { "epoch": 0.8427849522740034, "grad_norm": 0.8150032162666321, "learning_rate": 9.035848507703253e-06, "loss": 0.4601, "step": 3002 }, { "epoch": 0.843065693430657, "grad_norm": 0.7180718779563904, "learning_rate": 9.034884091849632e-06, "loss": 0.4661, "step": 3003 }, { "epoch": 0.8433464345873105, "grad_norm": 0.7106649875640869, "learning_rate": 9.03391924541827e-06, "loss": 0.4342, "step": 3004 }, { "epoch": 0.8436271757439641, "grad_norm": 0.778570294380188, "learning_rate": 9.032953968512132e-06, "loss": 0.4594, "step": 3005 }, { "epoch": 0.8439079169006176, "grad_norm": 0.7676113843917847, "learning_rate": 9.031988261234226e-06, "loss": 0.4634, "step": 3006 }, { "epoch": 0.8441886580572712, "grad_norm": 0.6679142117500305, "learning_rate": 9.031022123687607e-06, "loss": 0.4497, "step": 3007 }, { "epoch": 0.8444693992139247, "grad_norm": 0.8467196822166443, "learning_rate": 9.030055555975373e-06, "loss": 0.4472, "step": 3008 }, { "epoch": 0.8447501403705783, "grad_norm": 0.8035460710525513, "learning_rate": 9.029088558200672e-06, "loss": 0.432, "step": 3009 }, { "epoch": 0.8450308815272319, "grad_norm": 0.6552834510803223, "learning_rate": 9.028121130466696e-06, "loss": 0.4093, "step": 3010 }, { "epoch": 0.8453116226838855, "grad_norm": 0.7501563429832458, "learning_rate": 9.02715327287668e-06, "loss": 0.4549, "step": 3011 }, { "epoch": 0.845592363840539, "grad_norm": 0.6984012126922607, "learning_rate": 9.026184985533913e-06, "loss": 0.4796, "step": 3012 }, { "epoch": 0.8458731049971926, "grad_norm": 0.6520709991455078, "learning_rate": 9.02521626854172e-06, "loss": 0.4339, "step": 3013 }, { "epoch": 0.8461538461538461, "grad_norm": 0.742888331413269, "learning_rate": 9.024247122003477e-06, "loss": 0.4433, "step": 3014 }, { "epoch": 0.8464345873104997, "grad_norm": 0.8158312439918518, "learning_rate": 9.023277546022606e-06, "loss": 0.4798, "step": 3015 }, { "epoch": 0.8467153284671532, "grad_norm": 0.8237171769142151, "learning_rate": 9.022307540702576e-06, "loss": 0.4696, "step": 3016 }, { "epoch": 0.8469960696238068, "grad_norm": 0.7952943444252014, "learning_rate": 9.021337106146898e-06, "loss": 0.5073, "step": 3017 }, { "epoch": 0.8472768107804605, "grad_norm": 0.7563936710357666, "learning_rate": 9.02036624245913e-06, "loss": 0.4126, "step": 3018 }, { "epoch": 0.847557551937114, "grad_norm": 0.675305187702179, "learning_rate": 9.019394949742879e-06, "loss": 0.4364, "step": 3019 }, { "epoch": 0.8478382930937676, "grad_norm": 0.8370843529701233, "learning_rate": 9.018423228101793e-06, "loss": 0.4709, "step": 3020 }, { "epoch": 0.8481190342504211, "grad_norm": 0.749310314655304, "learning_rate": 9.017451077639569e-06, "loss": 0.4018, "step": 3021 }, { "epoch": 0.8483997754070747, "grad_norm": 0.6608916521072388, "learning_rate": 9.01647849845995e-06, "loss": 0.4688, "step": 3022 }, { "epoch": 0.8486805165637282, "grad_norm": 0.7035723924636841, "learning_rate": 9.015505490666722e-06, "loss": 0.457, "step": 3023 }, { "epoch": 0.8489612577203818, "grad_norm": 0.771713137626648, "learning_rate": 9.014532054363719e-06, "loss": 0.4467, "step": 3024 }, { "epoch": 0.8492419988770353, "grad_norm": 0.7923526167869568, "learning_rate": 9.013558189654819e-06, "loss": 0.5159, "step": 3025 }, { "epoch": 0.8495227400336889, "grad_norm": 0.6970590949058533, "learning_rate": 9.012583896643949e-06, "loss": 0.4514, "step": 3026 }, { "epoch": 0.8498034811903425, "grad_norm": 0.6298572421073914, "learning_rate": 9.011609175435077e-06, "loss": 0.4585, "step": 3027 }, { "epoch": 0.8500842223469961, "grad_norm": 0.7198712825775146, "learning_rate": 9.01063402613222e-06, "loss": 0.516, "step": 3028 }, { "epoch": 0.8503649635036497, "grad_norm": 0.8174310922622681, "learning_rate": 9.009658448839441e-06, "loss": 0.4829, "step": 3029 }, { "epoch": 0.8506457046603032, "grad_norm": 0.660001277923584, "learning_rate": 9.008682443660848e-06, "loss": 0.4184, "step": 3030 }, { "epoch": 0.8509264458169568, "grad_norm": 0.8710374236106873, "learning_rate": 9.00770601070059e-06, "loss": 0.4829, "step": 3031 }, { "epoch": 0.8512071869736103, "grad_norm": 0.7065457105636597, "learning_rate": 9.00672915006287e-06, "loss": 0.4596, "step": 3032 }, { "epoch": 0.8514879281302639, "grad_norm": 0.8196362257003784, "learning_rate": 9.005751861851933e-06, "loss": 0.5039, "step": 3033 }, { "epoch": 0.8517686692869174, "grad_norm": 0.6606584191322327, "learning_rate": 9.004774146172067e-06, "loss": 0.4225, "step": 3034 }, { "epoch": 0.8520494104435711, "grad_norm": 0.7076795697212219, "learning_rate": 9.003796003127606e-06, "loss": 0.4868, "step": 3035 }, { "epoch": 0.8523301516002246, "grad_norm": 0.6806330680847168, "learning_rate": 9.002817432822934e-06, "loss": 0.4269, "step": 3036 }, { "epoch": 0.8526108927568782, "grad_norm": 0.6327815651893616, "learning_rate": 9.00183843536248e-06, "loss": 0.4746, "step": 3037 }, { "epoch": 0.8528916339135317, "grad_norm": 0.7250660061836243, "learning_rate": 9.00085901085071e-06, "loss": 0.4664, "step": 3038 }, { "epoch": 0.8531723750701853, "grad_norm": 0.6600928902626038, "learning_rate": 8.99987915939215e-06, "loss": 0.3859, "step": 3039 }, { "epoch": 0.8534531162268388, "grad_norm": 0.7053912281990051, "learning_rate": 8.998898881091358e-06, "loss": 0.4645, "step": 3040 }, { "epoch": 0.8537338573834924, "grad_norm": 0.688148558139801, "learning_rate": 8.997918176052945e-06, "loss": 0.4288, "step": 3041 }, { "epoch": 0.8540145985401459, "grad_norm": 0.7710590362548828, "learning_rate": 8.996937044381565e-06, "loss": 0.4707, "step": 3042 }, { "epoch": 0.8542953396967996, "grad_norm": 0.7533964514732361, "learning_rate": 8.99595548618192e-06, "loss": 0.4476, "step": 3043 }, { "epoch": 0.8545760808534532, "grad_norm": 0.6575049161911011, "learning_rate": 8.994973501558754e-06, "loss": 0.4097, "step": 3044 }, { "epoch": 0.8548568220101067, "grad_norm": 0.7209513187408447, "learning_rate": 8.99399109061686e-06, "loss": 0.4277, "step": 3045 }, { "epoch": 0.8551375631667603, "grad_norm": 0.744247317314148, "learning_rate": 8.993008253461073e-06, "loss": 0.4202, "step": 3046 }, { "epoch": 0.8554183043234138, "grad_norm": 0.7632492780685425, "learning_rate": 8.992024990196276e-06, "loss": 0.4748, "step": 3047 }, { "epoch": 0.8556990454800674, "grad_norm": 0.7649577856063843, "learning_rate": 8.991041300927397e-06, "loss": 0.4327, "step": 3048 }, { "epoch": 0.8559797866367209, "grad_norm": 0.7533644437789917, "learning_rate": 8.990057185759409e-06, "loss": 0.468, "step": 3049 }, { "epoch": 0.8562605277933745, "grad_norm": 0.7657700777053833, "learning_rate": 8.98907264479733e-06, "loss": 0.4726, "step": 3050 }, { "epoch": 0.856541268950028, "grad_norm": 0.74150550365448, "learning_rate": 8.988087678146225e-06, "loss": 0.4649, "step": 3051 }, { "epoch": 0.8568220101066817, "grad_norm": 0.7767965793609619, "learning_rate": 8.987102285911204e-06, "loss": 0.4911, "step": 3052 }, { "epoch": 0.8571027512633352, "grad_norm": 0.8834661841392517, "learning_rate": 8.986116468197422e-06, "loss": 0.5185, "step": 3053 }, { "epoch": 0.8573834924199888, "grad_norm": 0.8438416719436646, "learning_rate": 8.985130225110077e-06, "loss": 0.473, "step": 3054 }, { "epoch": 0.8576642335766423, "grad_norm": 0.6546692848205566, "learning_rate": 8.984143556754416e-06, "loss": 0.4466, "step": 3055 }, { "epoch": 0.8579449747332959, "grad_norm": 0.8710799217224121, "learning_rate": 8.983156463235731e-06, "loss": 0.4892, "step": 3056 }, { "epoch": 0.8582257158899494, "grad_norm": 0.6808534264564514, "learning_rate": 8.982168944659359e-06, "loss": 0.4673, "step": 3057 }, { "epoch": 0.858506457046603, "grad_norm": 0.6909962892532349, "learning_rate": 8.981181001130678e-06, "loss": 0.4454, "step": 3058 }, { "epoch": 0.8587871982032566, "grad_norm": 0.7451123595237732, "learning_rate": 8.98019263275512e-06, "loss": 0.4541, "step": 3059 }, { "epoch": 0.8590679393599102, "grad_norm": 0.7801520824432373, "learning_rate": 8.979203839638155e-06, "loss": 0.3956, "step": 3060 }, { "epoch": 0.8593486805165638, "grad_norm": 0.781225323677063, "learning_rate": 8.978214621885301e-06, "loss": 0.4904, "step": 3061 }, { "epoch": 0.8596294216732173, "grad_norm": 0.833699107170105, "learning_rate": 8.977224979602123e-06, "loss": 0.4668, "step": 3062 }, { "epoch": 0.8599101628298709, "grad_norm": 0.8141199350357056, "learning_rate": 8.976234912894226e-06, "loss": 0.5174, "step": 3063 }, { "epoch": 0.8601909039865244, "grad_norm": 0.7246940732002258, "learning_rate": 8.975244421867267e-06, "loss": 0.4553, "step": 3064 }, { "epoch": 0.860471645143178, "grad_norm": 0.6886260509490967, "learning_rate": 8.974253506626943e-06, "loss": 0.4545, "step": 3065 }, { "epoch": 0.8607523862998315, "grad_norm": 0.7634584307670593, "learning_rate": 8.973262167278997e-06, "loss": 0.4305, "step": 3066 }, { "epoch": 0.8610331274564851, "grad_norm": 0.6877836585044861, "learning_rate": 8.972270403929223e-06, "loss": 0.4367, "step": 3067 }, { "epoch": 0.8613138686131386, "grad_norm": 0.7027422785758972, "learning_rate": 8.971278216683454e-06, "loss": 0.4301, "step": 3068 }, { "epoch": 0.8615946097697923, "grad_norm": 0.6292520761489868, "learning_rate": 8.970285605647568e-06, "loss": 0.4273, "step": 3069 }, { "epoch": 0.8618753509264458, "grad_norm": 0.6710954904556274, "learning_rate": 8.969292570927493e-06, "loss": 0.4557, "step": 3070 }, { "epoch": 0.8621560920830994, "grad_norm": 0.7669581770896912, "learning_rate": 8.968299112629196e-06, "loss": 0.4891, "step": 3071 }, { "epoch": 0.862436833239753, "grad_norm": 0.7548537254333496, "learning_rate": 8.967305230858696e-06, "loss": 0.4537, "step": 3072 }, { "epoch": 0.8627175743964065, "grad_norm": 0.6805199384689331, "learning_rate": 8.966310925722054e-06, "loss": 0.4135, "step": 3073 }, { "epoch": 0.8629983155530601, "grad_norm": 0.714707612991333, "learning_rate": 8.965316197325374e-06, "loss": 0.4524, "step": 3074 }, { "epoch": 0.8632790567097136, "grad_norm": 0.6804215908050537, "learning_rate": 8.964321045774808e-06, "loss": 0.4432, "step": 3075 }, { "epoch": 0.8635597978663672, "grad_norm": 0.789669394493103, "learning_rate": 8.963325471176552e-06, "loss": 0.4624, "step": 3076 }, { "epoch": 0.8638405390230208, "grad_norm": 0.9625343084335327, "learning_rate": 8.962329473636848e-06, "loss": 0.4467, "step": 3077 }, { "epoch": 0.8641212801796744, "grad_norm": 0.8646829128265381, "learning_rate": 8.961333053261984e-06, "loss": 0.4651, "step": 3078 }, { "epoch": 0.8644020213363279, "grad_norm": 0.7739148736000061, "learning_rate": 8.96033621015829e-06, "loss": 0.4417, "step": 3079 }, { "epoch": 0.8646827624929815, "grad_norm": 0.9266568422317505, "learning_rate": 8.959338944432144e-06, "loss": 0.4283, "step": 3080 }, { "epoch": 0.864963503649635, "grad_norm": 0.8844408392906189, "learning_rate": 8.958341256189966e-06, "loss": 0.5005, "step": 3081 }, { "epoch": 0.8652442448062886, "grad_norm": 0.7739181518554688, "learning_rate": 8.957343145538225e-06, "loss": 0.4523, "step": 3082 }, { "epoch": 0.8655249859629421, "grad_norm": 0.712052047252655, "learning_rate": 8.956344612583433e-06, "loss": 0.4531, "step": 3083 }, { "epoch": 0.8658057271195957, "grad_norm": 0.7869881987571716, "learning_rate": 8.955345657432144e-06, "loss": 0.4482, "step": 3084 }, { "epoch": 0.8660864682762492, "grad_norm": 0.8225265741348267, "learning_rate": 8.954346280190966e-06, "loss": 0.4773, "step": 3085 }, { "epoch": 0.8663672094329029, "grad_norm": 0.7096180319786072, "learning_rate": 8.953346480966543e-06, "loss": 0.4322, "step": 3086 }, { "epoch": 0.8666479505895565, "grad_norm": 0.7668026089668274, "learning_rate": 8.952346259865568e-06, "loss": 0.4447, "step": 3087 }, { "epoch": 0.86692869174621, "grad_norm": 0.7819424271583557, "learning_rate": 8.951345616994777e-06, "loss": 0.4906, "step": 3088 }, { "epoch": 0.8672094329028636, "grad_norm": 0.7087759971618652, "learning_rate": 8.950344552460955e-06, "loss": 0.4261, "step": 3089 }, { "epoch": 0.8674901740595171, "grad_norm": 0.6854142546653748, "learning_rate": 8.949343066370927e-06, "loss": 0.4386, "step": 3090 }, { "epoch": 0.8677709152161707, "grad_norm": 0.8375086784362793, "learning_rate": 8.948341158831565e-06, "loss": 0.4754, "step": 3091 }, { "epoch": 0.8680516563728242, "grad_norm": 0.7688049077987671, "learning_rate": 8.947338829949789e-06, "loss": 0.4668, "step": 3092 }, { "epoch": 0.8683323975294778, "grad_norm": 0.6722404956817627, "learning_rate": 8.946336079832562e-06, "loss": 0.4403, "step": 3093 }, { "epoch": 0.8686131386861314, "grad_norm": 0.7951295375823975, "learning_rate": 8.945332908586887e-06, "loss": 0.409, "step": 3094 }, { "epoch": 0.868893879842785, "grad_norm": 0.8671542406082153, "learning_rate": 8.944329316319819e-06, "loss": 0.4759, "step": 3095 }, { "epoch": 0.8691746209994385, "grad_norm": 0.6443971991539001, "learning_rate": 8.943325303138455e-06, "loss": 0.4458, "step": 3096 }, { "epoch": 0.8694553621560921, "grad_norm": 0.7402511835098267, "learning_rate": 8.942320869149933e-06, "loss": 0.4943, "step": 3097 }, { "epoch": 0.8697361033127456, "grad_norm": 0.7752165198326111, "learning_rate": 8.941316014461448e-06, "loss": 0.474, "step": 3098 }, { "epoch": 0.8700168444693992, "grad_norm": 0.6883082389831543, "learning_rate": 8.940310739180227e-06, "loss": 0.4641, "step": 3099 }, { "epoch": 0.8702975856260527, "grad_norm": 0.7413114309310913, "learning_rate": 8.939305043413543e-06, "loss": 0.4447, "step": 3100 }, { "epoch": 0.8705783267827063, "grad_norm": 0.7216244339942932, "learning_rate": 8.938298927268728e-06, "loss": 0.4962, "step": 3101 }, { "epoch": 0.87085906793936, "grad_norm": 0.6698883771896362, "learning_rate": 8.937292390853136e-06, "loss": 0.4559, "step": 3102 }, { "epoch": 0.8711398090960135, "grad_norm": 0.6504644155502319, "learning_rate": 8.936285434274189e-06, "loss": 0.4342, "step": 3103 }, { "epoch": 0.8714205502526671, "grad_norm": 0.7135512232780457, "learning_rate": 8.935278057639336e-06, "loss": 0.4597, "step": 3104 }, { "epoch": 0.8717012914093206, "grad_norm": 0.6523886322975159, "learning_rate": 8.934270261056081e-06, "loss": 0.4278, "step": 3105 }, { "epoch": 0.8719820325659742, "grad_norm": 0.6546775698661804, "learning_rate": 8.93326204463197e-06, "loss": 0.4594, "step": 3106 }, { "epoch": 0.8722627737226277, "grad_norm": 0.6894417405128479, "learning_rate": 8.932253408474592e-06, "loss": 0.4453, "step": 3107 }, { "epoch": 0.8725435148792813, "grad_norm": 0.6604836583137512, "learning_rate": 8.931244352691584e-06, "loss": 0.4097, "step": 3108 }, { "epoch": 0.8728242560359348, "grad_norm": 0.731212854385376, "learning_rate": 8.930234877390626e-06, "loss": 0.4184, "step": 3109 }, { "epoch": 0.8731049971925884, "grad_norm": 0.6962615847587585, "learning_rate": 8.929224982679441e-06, "loss": 0.459, "step": 3110 }, { "epoch": 0.873385738349242, "grad_norm": 0.6900382041931152, "learning_rate": 8.928214668665802e-06, "loss": 0.4406, "step": 3111 }, { "epoch": 0.8736664795058956, "grad_norm": 0.6988445520401001, "learning_rate": 8.92720393545752e-06, "loss": 0.4368, "step": 3112 }, { "epoch": 0.8739472206625492, "grad_norm": 0.6465603113174438, "learning_rate": 8.926192783162456e-06, "loss": 0.4174, "step": 3113 }, { "epoch": 0.8742279618192027, "grad_norm": 0.6586212515830994, "learning_rate": 8.925181211888514e-06, "loss": 0.4466, "step": 3114 }, { "epoch": 0.8745087029758563, "grad_norm": 0.6625884771347046, "learning_rate": 8.92416922174364e-06, "loss": 0.4329, "step": 3115 }, { "epoch": 0.8747894441325098, "grad_norm": 0.7261552214622498, "learning_rate": 8.923156812835831e-06, "loss": 0.4247, "step": 3116 }, { "epoch": 0.8750701852891634, "grad_norm": 0.614919900894165, "learning_rate": 8.922143985273125e-06, "loss": 0.4021, "step": 3117 }, { "epoch": 0.8753509264458169, "grad_norm": 0.7092061042785645, "learning_rate": 8.921130739163602e-06, "loss": 0.4417, "step": 3118 }, { "epoch": 0.8756316676024706, "grad_norm": 0.6687312126159668, "learning_rate": 8.92011707461539e-06, "loss": 0.4381, "step": 3119 }, { "epoch": 0.8759124087591241, "grad_norm": 0.8217565417289734, "learning_rate": 8.91910299173666e-06, "loss": 0.4336, "step": 3120 }, { "epoch": 0.8761931499157777, "grad_norm": 0.6277878284454346, "learning_rate": 8.918088490635633e-06, "loss": 0.4055, "step": 3121 }, { "epoch": 0.8764738910724312, "grad_norm": 0.6974098086357117, "learning_rate": 8.917073571420565e-06, "loss": 0.4409, "step": 3122 }, { "epoch": 0.8767546322290848, "grad_norm": 0.757596492767334, "learning_rate": 8.916058234199766e-06, "loss": 0.4763, "step": 3123 }, { "epoch": 0.8770353733857383, "grad_norm": 0.7230308651924133, "learning_rate": 8.915042479081584e-06, "loss": 0.4771, "step": 3124 }, { "epoch": 0.8773161145423919, "grad_norm": 0.667575478553772, "learning_rate": 8.914026306174413e-06, "loss": 0.4139, "step": 3125 }, { "epoch": 0.8775968556990454, "grad_norm": 0.6815164089202881, "learning_rate": 8.913009715586695e-06, "loss": 0.4136, "step": 3126 }, { "epoch": 0.877877596855699, "grad_norm": 0.7295694351196289, "learning_rate": 8.911992707426915e-06, "loss": 0.4365, "step": 3127 }, { "epoch": 0.8781583380123527, "grad_norm": 0.6554813385009766, "learning_rate": 8.910975281803599e-06, "loss": 0.4426, "step": 3128 }, { "epoch": 0.8784390791690062, "grad_norm": 0.6871294379234314, "learning_rate": 8.909957438825324e-06, "loss": 0.4543, "step": 3129 }, { "epoch": 0.8787198203256598, "grad_norm": 0.7401013374328613, "learning_rate": 8.908939178600702e-06, "loss": 0.4397, "step": 3130 }, { "epoch": 0.8790005614823133, "grad_norm": 0.7382338047027588, "learning_rate": 8.907920501238402e-06, "loss": 0.447, "step": 3131 }, { "epoch": 0.8792813026389669, "grad_norm": 0.8176373839378357, "learning_rate": 8.906901406847127e-06, "loss": 0.4459, "step": 3132 }, { "epoch": 0.8795620437956204, "grad_norm": 0.7015848159790039, "learning_rate": 8.905881895535628e-06, "loss": 0.4162, "step": 3133 }, { "epoch": 0.879842784952274, "grad_norm": 0.6791051030158997, "learning_rate": 8.904861967412702e-06, "loss": 0.4037, "step": 3134 }, { "epoch": 0.8801235261089275, "grad_norm": 0.7253435254096985, "learning_rate": 8.90384162258719e-06, "loss": 0.4687, "step": 3135 }, { "epoch": 0.8804042672655812, "grad_norm": 0.732147753238678, "learning_rate": 8.902820861167978e-06, "loss": 0.4808, "step": 3136 }, { "epoch": 0.8806850084222347, "grad_norm": 0.8130166530609131, "learning_rate": 8.901799683263993e-06, "loss": 0.4357, "step": 3137 }, { "epoch": 0.8809657495788883, "grad_norm": 0.8349103927612305, "learning_rate": 8.90077808898421e-06, "loss": 0.4633, "step": 3138 }, { "epoch": 0.8812464907355418, "grad_norm": 0.7550891637802124, "learning_rate": 8.899756078437645e-06, "loss": 0.436, "step": 3139 }, { "epoch": 0.8815272318921954, "grad_norm": 0.833274245262146, "learning_rate": 8.898733651733362e-06, "loss": 0.4437, "step": 3140 }, { "epoch": 0.881807973048849, "grad_norm": 0.8202539682388306, "learning_rate": 8.897710808980472e-06, "loss": 0.4464, "step": 3141 }, { "epoch": 0.8820887142055025, "grad_norm": 0.7455984354019165, "learning_rate": 8.896687550288119e-06, "loss": 0.3987, "step": 3142 }, { "epoch": 0.882369455362156, "grad_norm": 0.9817519783973694, "learning_rate": 8.895663875765503e-06, "loss": 0.456, "step": 3143 }, { "epoch": 0.8826501965188096, "grad_norm": 0.7784855961799622, "learning_rate": 8.894639785521866e-06, "loss": 0.4575, "step": 3144 }, { "epoch": 0.8829309376754633, "grad_norm": 0.7350506782531738, "learning_rate": 8.893615279666488e-06, "loss": 0.4711, "step": 3145 }, { "epoch": 0.8832116788321168, "grad_norm": 0.8772125840187073, "learning_rate": 8.8925903583087e-06, "loss": 0.467, "step": 3146 }, { "epoch": 0.8834924199887704, "grad_norm": 0.8276565670967102, "learning_rate": 8.891565021557877e-06, "loss": 0.4205, "step": 3147 }, { "epoch": 0.8837731611454239, "grad_norm": 0.7918325066566467, "learning_rate": 8.890539269523435e-06, "loss": 0.4498, "step": 3148 }, { "epoch": 0.8840539023020775, "grad_norm": 0.8533987998962402, "learning_rate": 8.889513102314833e-06, "loss": 0.4483, "step": 3149 }, { "epoch": 0.884334643458731, "grad_norm": 0.7781782746315002, "learning_rate": 8.888486520041583e-06, "loss": 0.4347, "step": 3150 }, { "epoch": 0.8846153846153846, "grad_norm": 0.7314184904098511, "learning_rate": 8.887459522813232e-06, "loss": 0.4787, "step": 3151 }, { "epoch": 0.8848961257720381, "grad_norm": 0.6887636780738831, "learning_rate": 8.886432110739374e-06, "loss": 0.4403, "step": 3152 }, { "epoch": 0.8851768669286918, "grad_norm": 0.7512714862823486, "learning_rate": 8.885404283929651e-06, "loss": 0.432, "step": 3153 }, { "epoch": 0.8854576080853453, "grad_norm": 0.8041152954101562, "learning_rate": 8.884376042493742e-06, "loss": 0.4581, "step": 3154 }, { "epoch": 0.8857383492419989, "grad_norm": 0.7227070927619934, "learning_rate": 8.883347386541378e-06, "loss": 0.4137, "step": 3155 }, { "epoch": 0.8860190903986525, "grad_norm": 0.6934637427330017, "learning_rate": 8.882318316182333e-06, "loss": 0.449, "step": 3156 }, { "epoch": 0.886299831555306, "grad_norm": 0.7745835781097412, "learning_rate": 8.881288831526416e-06, "loss": 0.4566, "step": 3157 }, { "epoch": 0.8865805727119596, "grad_norm": 0.6885500550270081, "learning_rate": 8.880258932683493e-06, "loss": 0.4156, "step": 3158 }, { "epoch": 0.8868613138686131, "grad_norm": 0.7272955179214478, "learning_rate": 8.879228619763467e-06, "loss": 0.4832, "step": 3159 }, { "epoch": 0.8871420550252667, "grad_norm": 0.6531155109405518, "learning_rate": 8.878197892876284e-06, "loss": 0.4589, "step": 3160 }, { "epoch": 0.8874227961819202, "grad_norm": 0.7912184000015259, "learning_rate": 8.877166752131939e-06, "loss": 0.5203, "step": 3161 }, { "epoch": 0.8877035373385739, "grad_norm": 0.7932146191596985, "learning_rate": 8.87613519764047e-06, "loss": 0.4992, "step": 3162 }, { "epoch": 0.8879842784952274, "grad_norm": 0.8007762432098389, "learning_rate": 8.875103229511957e-06, "loss": 0.4631, "step": 3163 }, { "epoch": 0.888265019651881, "grad_norm": 0.7280421257019043, "learning_rate": 8.874070847856524e-06, "loss": 0.4738, "step": 3164 }, { "epoch": 0.8885457608085345, "grad_norm": 0.6544811725616455, "learning_rate": 8.87303805278434e-06, "loss": 0.4413, "step": 3165 }, { "epoch": 0.8888265019651881, "grad_norm": 0.67585688829422, "learning_rate": 8.872004844405622e-06, "loss": 0.4353, "step": 3166 }, { "epoch": 0.8891072431218416, "grad_norm": 0.9749636054039001, "learning_rate": 8.870971222830624e-06, "loss": 0.5339, "step": 3167 }, { "epoch": 0.8893879842784952, "grad_norm": 0.8017314076423645, "learning_rate": 8.86993718816965e-06, "loss": 0.4825, "step": 3168 }, { "epoch": 0.8896687254351487, "grad_norm": 0.6650331020355225, "learning_rate": 8.868902740533045e-06, "loss": 0.4365, "step": 3169 }, { "epoch": 0.8899494665918024, "grad_norm": 0.8409380316734314, "learning_rate": 8.8678678800312e-06, "loss": 0.3997, "step": 3170 }, { "epoch": 0.890230207748456, "grad_norm": 0.8529126644134521, "learning_rate": 8.866832606774544e-06, "loss": 0.4469, "step": 3171 }, { "epoch": 0.8905109489051095, "grad_norm": 0.6713091731071472, "learning_rate": 8.865796920873561e-06, "loss": 0.4477, "step": 3172 }, { "epoch": 0.8907916900617631, "grad_norm": 0.7690662741661072, "learning_rate": 8.864760822438769e-06, "loss": 0.4449, "step": 3173 }, { "epoch": 0.8910724312184166, "grad_norm": 0.8488250374794006, "learning_rate": 8.863724311580738e-06, "loss": 0.4371, "step": 3174 }, { "epoch": 0.8913531723750702, "grad_norm": 0.7574658989906311, "learning_rate": 8.862687388410073e-06, "loss": 0.4491, "step": 3175 }, { "epoch": 0.8916339135317237, "grad_norm": 0.8288156986236572, "learning_rate": 8.86165005303743e-06, "loss": 0.4468, "step": 3176 }, { "epoch": 0.8919146546883773, "grad_norm": 0.8906120657920837, "learning_rate": 8.860612305573508e-06, "loss": 0.479, "step": 3177 }, { "epoch": 0.8921953958450309, "grad_norm": 0.7341766953468323, "learning_rate": 8.85957414612905e-06, "loss": 0.4704, "step": 3178 }, { "epoch": 0.8924761370016845, "grad_norm": 0.7411095499992371, "learning_rate": 8.858535574814838e-06, "loss": 0.4399, "step": 3179 }, { "epoch": 0.892756878158338, "grad_norm": 0.7041527628898621, "learning_rate": 8.857496591741705e-06, "loss": 0.4901, "step": 3180 }, { "epoch": 0.8930376193149916, "grad_norm": 0.7300823330879211, "learning_rate": 8.856457197020526e-06, "loss": 0.4674, "step": 3181 }, { "epoch": 0.8933183604716451, "grad_norm": 0.6751479506492615, "learning_rate": 8.855417390762212e-06, "loss": 0.4461, "step": 3182 }, { "epoch": 0.8935991016282987, "grad_norm": 0.7834952473640442, "learning_rate": 8.854377173077733e-06, "loss": 0.4578, "step": 3183 }, { "epoch": 0.8938798427849522, "grad_norm": 0.5938035249710083, "learning_rate": 8.853336544078089e-06, "loss": 0.4061, "step": 3184 }, { "epoch": 0.8941605839416058, "grad_norm": 0.8587839007377625, "learning_rate": 8.852295503874331e-06, "loss": 0.4948, "step": 3185 }, { "epoch": 0.8944413250982594, "grad_norm": 0.7783791422843933, "learning_rate": 8.851254052577555e-06, "loss": 0.4443, "step": 3186 }, { "epoch": 0.894722066254913, "grad_norm": 0.6832489967346191, "learning_rate": 8.850212190298894e-06, "loss": 0.4167, "step": 3187 }, { "epoch": 0.8950028074115666, "grad_norm": 0.6670812368392944, "learning_rate": 8.849169917149532e-06, "loss": 0.427, "step": 3188 }, { "epoch": 0.8952835485682201, "grad_norm": 0.8351960778236389, "learning_rate": 8.848127233240693e-06, "loss": 0.4807, "step": 3189 }, { "epoch": 0.8955642897248737, "grad_norm": 0.7806240320205688, "learning_rate": 8.847084138683644e-06, "loss": 0.4548, "step": 3190 }, { "epoch": 0.8958450308815272, "grad_norm": 0.6731281280517578, "learning_rate": 8.8460406335897e-06, "loss": 0.4007, "step": 3191 }, { "epoch": 0.8961257720381808, "grad_norm": 0.6744766235351562, "learning_rate": 8.844996718070218e-06, "loss": 0.4357, "step": 3192 }, { "epoch": 0.8964065131948343, "grad_norm": 0.8585912585258484, "learning_rate": 8.843952392236595e-06, "loss": 0.4865, "step": 3193 }, { "epoch": 0.8966872543514879, "grad_norm": 0.8856034278869629, "learning_rate": 8.842907656200277e-06, "loss": 0.45, "step": 3194 }, { "epoch": 0.8969679955081415, "grad_norm": 0.698112428188324, "learning_rate": 8.841862510072751e-06, "loss": 0.427, "step": 3195 }, { "epoch": 0.8972487366647951, "grad_norm": 0.750064492225647, "learning_rate": 8.84081695396555e-06, "loss": 0.4826, "step": 3196 }, { "epoch": 0.8975294778214487, "grad_norm": 0.6911453008651733, "learning_rate": 8.839770987990245e-06, "loss": 0.4264, "step": 3197 }, { "epoch": 0.8978102189781022, "grad_norm": 0.744818925857544, "learning_rate": 8.838724612258462e-06, "loss": 0.4018, "step": 3198 }, { "epoch": 0.8980909601347558, "grad_norm": 0.8290770053863525, "learning_rate": 8.837677826881858e-06, "loss": 0.4159, "step": 3199 }, { "epoch": 0.8983717012914093, "grad_norm": 0.6693921089172363, "learning_rate": 8.836630631972142e-06, "loss": 0.4308, "step": 3200 }, { "epoch": 0.8986524424480629, "grad_norm": 0.7737688422203064, "learning_rate": 8.835583027641064e-06, "loss": 0.4225, "step": 3201 }, { "epoch": 0.8989331836047164, "grad_norm": 0.9004019498825073, "learning_rate": 8.834535014000417e-06, "loss": 0.4812, "step": 3202 }, { "epoch": 0.89921392476137, "grad_norm": 0.6781492233276367, "learning_rate": 8.833486591162037e-06, "loss": 0.4397, "step": 3203 }, { "epoch": 0.8994946659180236, "grad_norm": 0.6477671265602112, "learning_rate": 8.832437759237808e-06, "loss": 0.3967, "step": 3204 }, { "epoch": 0.8997754070746772, "grad_norm": 0.672127902507782, "learning_rate": 8.831388518339652e-06, "loss": 0.4118, "step": 3205 }, { "epoch": 0.9000561482313307, "grad_norm": 0.7684900760650635, "learning_rate": 8.830338868579542e-06, "loss": 0.475, "step": 3206 }, { "epoch": 0.9003368893879843, "grad_norm": 0.7863741517066956, "learning_rate": 8.829288810069486e-06, "loss": 0.4807, "step": 3207 }, { "epoch": 0.9006176305446378, "grad_norm": 0.7152606844902039, "learning_rate": 8.82823834292154e-06, "loss": 0.4412, "step": 3208 }, { "epoch": 0.9008983717012914, "grad_norm": 0.7831460237503052, "learning_rate": 8.827187467247806e-06, "loss": 0.4698, "step": 3209 }, { "epoch": 0.9011791128579449, "grad_norm": 0.7695897817611694, "learning_rate": 8.826136183160424e-06, "loss": 0.4737, "step": 3210 }, { "epoch": 0.9014598540145985, "grad_norm": 0.7289673089981079, "learning_rate": 8.825084490771583e-06, "loss": 0.4536, "step": 3211 }, { "epoch": 0.9017405951712522, "grad_norm": 0.7347771525382996, "learning_rate": 8.82403239019351e-06, "loss": 0.4325, "step": 3212 }, { "epoch": 0.9020213363279057, "grad_norm": 0.7328828573226929, "learning_rate": 8.822979881538482e-06, "loss": 0.4169, "step": 3213 }, { "epoch": 0.9023020774845593, "grad_norm": 0.8392121195793152, "learning_rate": 8.821926964918814e-06, "loss": 0.4613, "step": 3214 }, { "epoch": 0.9025828186412128, "grad_norm": 0.7489368915557861, "learning_rate": 8.820873640446866e-06, "loss": 0.461, "step": 3215 }, { "epoch": 0.9028635597978664, "grad_norm": 0.6864010691642761, "learning_rate": 8.819819908235045e-06, "loss": 0.4441, "step": 3216 }, { "epoch": 0.9031443009545199, "grad_norm": 0.8161920309066772, "learning_rate": 8.818765768395796e-06, "loss": 0.5073, "step": 3217 }, { "epoch": 0.9034250421111735, "grad_norm": 0.7584202289581299, "learning_rate": 8.817711221041613e-06, "loss": 0.4337, "step": 3218 }, { "epoch": 0.903705783267827, "grad_norm": 0.7495251297950745, "learning_rate": 8.816656266285028e-06, "loss": 0.4185, "step": 3219 }, { "epoch": 0.9039865244244806, "grad_norm": 0.6710088849067688, "learning_rate": 8.815600904238623e-06, "loss": 0.4733, "step": 3220 }, { "epoch": 0.9042672655811342, "grad_norm": 0.8690589666366577, "learning_rate": 8.814545135015015e-06, "loss": 0.4668, "step": 3221 }, { "epoch": 0.9045480067377878, "grad_norm": 0.8617925643920898, "learning_rate": 8.813488958726872e-06, "loss": 0.5133, "step": 3222 }, { "epoch": 0.9048287478944413, "grad_norm": 0.8535025119781494, "learning_rate": 8.812432375486902e-06, "loss": 0.4717, "step": 3223 }, { "epoch": 0.9051094890510949, "grad_norm": 0.6097338199615479, "learning_rate": 8.811375385407855e-06, "loss": 0.4367, "step": 3224 }, { "epoch": 0.9053902302077484, "grad_norm": 0.764920711517334, "learning_rate": 8.81031798860253e-06, "loss": 0.4476, "step": 3225 }, { "epoch": 0.905670971364402, "grad_norm": 0.9842613935470581, "learning_rate": 8.809260185183763e-06, "loss": 0.4683, "step": 3226 }, { "epoch": 0.9059517125210556, "grad_norm": 0.7564629912376404, "learning_rate": 8.80820197526444e-06, "loss": 0.4318, "step": 3227 }, { "epoch": 0.9062324536777091, "grad_norm": 0.6793269515037537, "learning_rate": 8.80714335895748e-06, "loss": 0.4297, "step": 3228 }, { "epoch": 0.9065131948343628, "grad_norm": 0.8259584307670593, "learning_rate": 8.806084336375857e-06, "loss": 0.4628, "step": 3229 }, { "epoch": 0.9067939359910163, "grad_norm": 0.8333523273468018, "learning_rate": 8.805024907632585e-06, "loss": 0.4539, "step": 3230 }, { "epoch": 0.9070746771476699, "grad_norm": 0.6882826089859009, "learning_rate": 8.803965072840713e-06, "loss": 0.42, "step": 3231 }, { "epoch": 0.9073554183043234, "grad_norm": 0.6492483019828796, "learning_rate": 8.802904832113345e-06, "loss": 0.4766, "step": 3232 }, { "epoch": 0.907636159460977, "grad_norm": 0.7343829274177551, "learning_rate": 8.801844185563622e-06, "loss": 0.4461, "step": 3233 }, { "epoch": 0.9079169006176305, "grad_norm": 0.7494798898696899, "learning_rate": 8.800783133304731e-06, "loss": 0.4169, "step": 3234 }, { "epoch": 0.9081976417742841, "grad_norm": 0.7179716229438782, "learning_rate": 8.799721675449897e-06, "loss": 0.4718, "step": 3235 }, { "epoch": 0.9084783829309376, "grad_norm": 0.6898223757743835, "learning_rate": 8.798659812112397e-06, "loss": 0.4273, "step": 3236 }, { "epoch": 0.9087591240875912, "grad_norm": 0.6938149929046631, "learning_rate": 8.797597543405543e-06, "loss": 0.423, "step": 3237 }, { "epoch": 0.9090398652442448, "grad_norm": 0.8423193097114563, "learning_rate": 8.796534869442694e-06, "loss": 0.4652, "step": 3238 }, { "epoch": 0.9093206064008984, "grad_norm": 0.8559891581535339, "learning_rate": 8.795471790337256e-06, "loss": 0.4867, "step": 3239 }, { "epoch": 0.909601347557552, "grad_norm": 0.6462901830673218, "learning_rate": 8.794408306202668e-06, "loss": 0.4446, "step": 3240 }, { "epoch": 0.9098820887142055, "grad_norm": 0.7510899305343628, "learning_rate": 8.793344417152423e-06, "loss": 0.4457, "step": 3241 }, { "epoch": 0.9101628298708591, "grad_norm": 0.7380796074867249, "learning_rate": 8.79228012330005e-06, "loss": 0.4372, "step": 3242 }, { "epoch": 0.9104435710275126, "grad_norm": 0.7024365067481995, "learning_rate": 8.791215424759126e-06, "loss": 0.4301, "step": 3243 }, { "epoch": 0.9107243121841662, "grad_norm": 0.6903531551361084, "learning_rate": 8.790150321643266e-06, "loss": 0.4035, "step": 3244 }, { "epoch": 0.9110050533408197, "grad_norm": 0.7847535610198975, "learning_rate": 8.789084814066133e-06, "loss": 0.4378, "step": 3245 }, { "epoch": 0.9112857944974734, "grad_norm": 0.7383411526679993, "learning_rate": 8.788018902141435e-06, "loss": 0.3852, "step": 3246 }, { "epoch": 0.9115665356541269, "grad_norm": 0.7784723043441772, "learning_rate": 8.786952585982913e-06, "loss": 0.4729, "step": 3247 }, { "epoch": 0.9118472768107805, "grad_norm": 0.7858310341835022, "learning_rate": 8.78588586570436e-06, "loss": 0.4539, "step": 3248 }, { "epoch": 0.912128017967434, "grad_norm": 0.7449018955230713, "learning_rate": 8.784818741419611e-06, "loss": 0.4474, "step": 3249 }, { "epoch": 0.9124087591240876, "grad_norm": 0.847886323928833, "learning_rate": 8.783751213242543e-06, "loss": 0.498, "step": 3250 }, { "epoch": 0.9126895002807411, "grad_norm": 0.8486092686653137, "learning_rate": 8.782683281287075e-06, "loss": 0.4109, "step": 3251 }, { "epoch": 0.9129702414373947, "grad_norm": 0.8215112090110779, "learning_rate": 8.78161494566717e-06, "loss": 0.4399, "step": 3252 }, { "epoch": 0.9132509825940482, "grad_norm": 0.8668664693832397, "learning_rate": 8.780546206496833e-06, "loss": 0.4448, "step": 3253 }, { "epoch": 0.9135317237507019, "grad_norm": 0.8881429433822632, "learning_rate": 8.779477063890116e-06, "loss": 0.5114, "step": 3254 }, { "epoch": 0.9138124649073555, "grad_norm": 0.6719093918800354, "learning_rate": 8.77840751796111e-06, "loss": 0.4325, "step": 3255 }, { "epoch": 0.914093206064009, "grad_norm": 0.7087459564208984, "learning_rate": 8.777337568823948e-06, "loss": 0.3946, "step": 3256 }, { "epoch": 0.9143739472206626, "grad_norm": 0.8789963126182556, "learning_rate": 8.776267216592814e-06, "loss": 0.4345, "step": 3257 }, { "epoch": 0.9146546883773161, "grad_norm": 0.7974157929420471, "learning_rate": 8.775196461381922e-06, "loss": 0.49, "step": 3258 }, { "epoch": 0.9149354295339697, "grad_norm": 0.7480311393737793, "learning_rate": 8.774125303305542e-06, "loss": 0.4538, "step": 3259 }, { "epoch": 0.9152161706906232, "grad_norm": 0.7599238157272339, "learning_rate": 8.773053742477979e-06, "loss": 0.4249, "step": 3260 }, { "epoch": 0.9154969118472768, "grad_norm": 0.790607213973999, "learning_rate": 8.771981779013582e-06, "loss": 0.4063, "step": 3261 }, { "epoch": 0.9157776530039303, "grad_norm": 0.6766489148139954, "learning_rate": 8.770909413026749e-06, "loss": 0.4336, "step": 3262 }, { "epoch": 0.916058394160584, "grad_norm": 0.8385114669799805, "learning_rate": 8.769836644631911e-06, "loss": 0.4367, "step": 3263 }, { "epoch": 0.9163391353172375, "grad_norm": 0.7480401396751404, "learning_rate": 8.76876347394355e-06, "loss": 0.4692, "step": 3264 }, { "epoch": 0.9166198764738911, "grad_norm": 0.8067586421966553, "learning_rate": 8.767689901076188e-06, "loss": 0.4456, "step": 3265 }, { "epoch": 0.9169006176305446, "grad_norm": 0.7194135189056396, "learning_rate": 8.766615926144389e-06, "loss": 0.4543, "step": 3266 }, { "epoch": 0.9171813587871982, "grad_norm": 0.8041672110557556, "learning_rate": 8.765541549262762e-06, "loss": 0.4115, "step": 3267 }, { "epoch": 0.9174620999438517, "grad_norm": 0.8199875354766846, "learning_rate": 8.764466770545956e-06, "loss": 0.4521, "step": 3268 }, { "epoch": 0.9177428411005053, "grad_norm": 0.7689574360847473, "learning_rate": 8.763391590108666e-06, "loss": 0.4925, "step": 3269 }, { "epoch": 0.9180235822571589, "grad_norm": 0.6917686462402344, "learning_rate": 8.762316008065629e-06, "loss": 0.4152, "step": 3270 }, { "epoch": 0.9183043234138125, "grad_norm": 0.747933566570282, "learning_rate": 8.761240024531624e-06, "loss": 0.4525, "step": 3271 }, { "epoch": 0.9185850645704661, "grad_norm": 0.7590529918670654, "learning_rate": 8.760163639621473e-06, "loss": 0.432, "step": 3272 }, { "epoch": 0.9188658057271196, "grad_norm": 0.6946105360984802, "learning_rate": 8.759086853450042e-06, "loss": 0.4743, "step": 3273 }, { "epoch": 0.9191465468837732, "grad_norm": 0.8134940266609192, "learning_rate": 8.758009666132237e-06, "loss": 0.4833, "step": 3274 }, { "epoch": 0.9194272880404267, "grad_norm": 0.7616158723831177, "learning_rate": 8.75693207778301e-06, "loss": 0.47, "step": 3275 }, { "epoch": 0.9197080291970803, "grad_norm": 0.762461245059967, "learning_rate": 8.755854088517356e-06, "loss": 0.4368, "step": 3276 }, { "epoch": 0.9199887703537338, "grad_norm": 0.7236607074737549, "learning_rate": 8.754775698450308e-06, "loss": 0.4555, "step": 3277 }, { "epoch": 0.9202695115103874, "grad_norm": 0.7588319778442383, "learning_rate": 8.753696907696948e-06, "loss": 0.5008, "step": 3278 }, { "epoch": 0.9205502526670409, "grad_norm": 0.923145592212677, "learning_rate": 8.752617716372397e-06, "loss": 0.4782, "step": 3279 }, { "epoch": 0.9208309938236946, "grad_norm": 0.7173494100570679, "learning_rate": 8.75153812459182e-06, "loss": 0.4647, "step": 3280 }, { "epoch": 0.9211117349803482, "grad_norm": 0.6904281973838806, "learning_rate": 8.75045813247042e-06, "loss": 0.4444, "step": 3281 }, { "epoch": 0.9213924761370017, "grad_norm": 0.7474949359893799, "learning_rate": 8.749377740123454e-06, "loss": 0.4691, "step": 3282 }, { "epoch": 0.9216732172936553, "grad_norm": 0.761879563331604, "learning_rate": 8.74829694766621e-06, "loss": 0.4991, "step": 3283 }, { "epoch": 0.9219539584503088, "grad_norm": 0.7057278752326965, "learning_rate": 8.747215755214024e-06, "loss": 0.4316, "step": 3284 }, { "epoch": 0.9222346996069624, "grad_norm": 0.7089288830757141, "learning_rate": 8.746134162882278e-06, "loss": 0.4484, "step": 3285 }, { "epoch": 0.9225154407636159, "grad_norm": 0.6737016439437866, "learning_rate": 8.745052170786388e-06, "loss": 0.44, "step": 3286 }, { "epoch": 0.9227961819202695, "grad_norm": 0.772479772567749, "learning_rate": 8.743969779041819e-06, "loss": 0.4764, "step": 3287 }, { "epoch": 0.9230769230769231, "grad_norm": 0.7977652549743652, "learning_rate": 8.742886987764077e-06, "loss": 0.4845, "step": 3288 }, { "epoch": 0.9233576642335767, "grad_norm": 0.6931220293045044, "learning_rate": 8.741803797068713e-06, "loss": 0.4234, "step": 3289 }, { "epoch": 0.9236384053902302, "grad_norm": 0.8048646450042725, "learning_rate": 8.740720207071316e-06, "loss": 0.4754, "step": 3290 }, { "epoch": 0.9239191465468838, "grad_norm": 0.6938891410827637, "learning_rate": 8.73963621788752e-06, "loss": 0.4526, "step": 3291 }, { "epoch": 0.9241998877035373, "grad_norm": 0.7346242070198059, "learning_rate": 8.738551829633e-06, "loss": 0.4533, "step": 3292 }, { "epoch": 0.9244806288601909, "grad_norm": 0.7148845791816711, "learning_rate": 8.73746704242348e-06, "loss": 0.4376, "step": 3293 }, { "epoch": 0.9247613700168444, "grad_norm": 0.7251663208007812, "learning_rate": 8.736381856374719e-06, "loss": 0.4273, "step": 3294 }, { "epoch": 0.925042111173498, "grad_norm": 0.6632020473480225, "learning_rate": 8.73529627160252e-06, "loss": 0.4658, "step": 3295 }, { "epoch": 0.9253228523301515, "grad_norm": 0.7892797589302063, "learning_rate": 8.734210288222733e-06, "loss": 0.4717, "step": 3296 }, { "epoch": 0.9256035934868052, "grad_norm": 0.7627503871917725, "learning_rate": 8.733123906351243e-06, "loss": 0.4288, "step": 3297 }, { "epoch": 0.9258843346434588, "grad_norm": 0.775695264339447, "learning_rate": 8.732037126103987e-06, "loss": 0.4468, "step": 3298 }, { "epoch": 0.9261650758001123, "grad_norm": 0.78183913230896, "learning_rate": 8.730949947596934e-06, "loss": 0.4671, "step": 3299 }, { "epoch": 0.9264458169567659, "grad_norm": 0.7109136581420898, "learning_rate": 8.729862370946106e-06, "loss": 0.4143, "step": 3300 }, { "epoch": 0.9267265581134194, "grad_norm": 0.7258944511413574, "learning_rate": 8.72877439626756e-06, "loss": 0.4268, "step": 3301 }, { "epoch": 0.927007299270073, "grad_norm": 0.667835533618927, "learning_rate": 8.727686023677398e-06, "loss": 0.4377, "step": 3302 }, { "epoch": 0.9272880404267265, "grad_norm": 0.7879599928855896, "learning_rate": 8.726597253291764e-06, "loss": 0.4493, "step": 3303 }, { "epoch": 0.9275687815833801, "grad_norm": 0.7433174252510071, "learning_rate": 8.725508085226846e-06, "loss": 0.4511, "step": 3304 }, { "epoch": 0.9278495227400337, "grad_norm": 0.8071883916854858, "learning_rate": 8.724418519598872e-06, "loss": 0.4901, "step": 3305 }, { "epoch": 0.9281302638966873, "grad_norm": 0.6438072919845581, "learning_rate": 8.723328556524116e-06, "loss": 0.4447, "step": 3306 }, { "epoch": 0.9284110050533408, "grad_norm": 0.7367581725120544, "learning_rate": 8.722238196118888e-06, "loss": 0.4168, "step": 3307 }, { "epoch": 0.9286917462099944, "grad_norm": 0.6820290684700012, "learning_rate": 8.721147438499547e-06, "loss": 0.4152, "step": 3308 }, { "epoch": 0.928972487366648, "grad_norm": 0.6569641828536987, "learning_rate": 8.720056283782491e-06, "loss": 0.4201, "step": 3309 }, { "epoch": 0.9292532285233015, "grad_norm": 0.6963849067687988, "learning_rate": 8.718964732084165e-06, "loss": 0.4673, "step": 3310 }, { "epoch": 0.929533969679955, "grad_norm": 0.6471827626228333, "learning_rate": 8.717872783521048e-06, "loss": 0.4461, "step": 3311 }, { "epoch": 0.9298147108366086, "grad_norm": 0.8178622722625732, "learning_rate": 8.716780438209666e-06, "loss": 0.4265, "step": 3312 }, { "epoch": 0.9300954519932623, "grad_norm": 0.7103288769721985, "learning_rate": 8.71568769626659e-06, "loss": 0.4216, "step": 3313 }, { "epoch": 0.9303761931499158, "grad_norm": 0.6218788623809814, "learning_rate": 8.71459455780843e-06, "loss": 0.4347, "step": 3314 }, { "epoch": 0.9306569343065694, "grad_norm": 0.8654505014419556, "learning_rate": 8.713501022951838e-06, "loss": 0.4317, "step": 3315 }, { "epoch": 0.9309376754632229, "grad_norm": 0.7331998348236084, "learning_rate": 8.712407091813508e-06, "loss": 0.4676, "step": 3316 }, { "epoch": 0.9312184166198765, "grad_norm": 0.7468166351318359, "learning_rate": 8.71131276451018e-06, "loss": 0.4631, "step": 3317 }, { "epoch": 0.93149915777653, "grad_norm": 0.6815680265426636, "learning_rate": 8.710218041158633e-06, "loss": 0.4527, "step": 3318 }, { "epoch": 0.9317798989331836, "grad_norm": 0.8265864849090576, "learning_rate": 8.70912292187569e-06, "loss": 0.4583, "step": 3319 }, { "epoch": 0.9320606400898371, "grad_norm": 0.853699803352356, "learning_rate": 8.708027406778214e-06, "loss": 0.4325, "step": 3320 }, { "epoch": 0.9323413812464907, "grad_norm": 0.8384313583374023, "learning_rate": 8.706931495983111e-06, "loss": 0.4675, "step": 3321 }, { "epoch": 0.9326221224031443, "grad_norm": 0.8724325895309448, "learning_rate": 8.70583518960733e-06, "loss": 0.4428, "step": 3322 }, { "epoch": 0.9329028635597979, "grad_norm": 0.9371477365493774, "learning_rate": 8.704738487767864e-06, "loss": 0.4765, "step": 3323 }, { "epoch": 0.9331836047164515, "grad_norm": 0.7754281163215637, "learning_rate": 8.703641390581745e-06, "loss": 0.4344, "step": 3324 }, { "epoch": 0.933464345873105, "grad_norm": 0.754790723323822, "learning_rate": 8.702543898166047e-06, "loss": 0.4562, "step": 3325 }, { "epoch": 0.9337450870297586, "grad_norm": 0.7967555522918701, "learning_rate": 8.701446010637889e-06, "loss": 0.4094, "step": 3326 }, { "epoch": 0.9340258281864121, "grad_norm": 0.7168818712234497, "learning_rate": 8.700347728114431e-06, "loss": 0.4081, "step": 3327 }, { "epoch": 0.9343065693430657, "grad_norm": 0.7467126250267029, "learning_rate": 8.699249050712874e-06, "loss": 0.4396, "step": 3328 }, { "epoch": 0.9345873104997192, "grad_norm": 0.7604356408119202, "learning_rate": 8.698149978550463e-06, "loss": 0.5038, "step": 3329 }, { "epoch": 0.9348680516563729, "grad_norm": 0.6994295120239258, "learning_rate": 8.697050511744484e-06, "loss": 0.4616, "step": 3330 }, { "epoch": 0.9351487928130264, "grad_norm": 0.8779392838478088, "learning_rate": 8.695950650412264e-06, "loss": 0.5152, "step": 3331 }, { "epoch": 0.93542953396968, "grad_norm": 0.7325848340988159, "learning_rate": 8.694850394671175e-06, "loss": 0.4782, "step": 3332 }, { "epoch": 0.9357102751263335, "grad_norm": 0.8201211094856262, "learning_rate": 8.693749744638626e-06, "loss": 0.4464, "step": 3333 }, { "epoch": 0.9359910162829871, "grad_norm": 0.8083143830299377, "learning_rate": 8.692648700432078e-06, "loss": 0.4772, "step": 3334 }, { "epoch": 0.9362717574396406, "grad_norm": 0.7418912649154663, "learning_rate": 8.691547262169021e-06, "loss": 0.4426, "step": 3335 }, { "epoch": 0.9365524985962942, "grad_norm": 0.6536455750465393, "learning_rate": 8.690445429966998e-06, "loss": 0.4068, "step": 3336 }, { "epoch": 0.9368332397529477, "grad_norm": 0.7285296320915222, "learning_rate": 8.689343203943588e-06, "loss": 0.4507, "step": 3337 }, { "epoch": 0.9371139809096013, "grad_norm": 0.8919716477394104, "learning_rate": 8.688240584216412e-06, "loss": 0.4526, "step": 3338 }, { "epoch": 0.937394722066255, "grad_norm": 0.6783115863800049, "learning_rate": 8.687137570903139e-06, "loss": 0.4229, "step": 3339 }, { "epoch": 0.9376754632229085, "grad_norm": 0.7820303440093994, "learning_rate": 8.68603416412147e-06, "loss": 0.4332, "step": 3340 }, { "epoch": 0.9379562043795621, "grad_norm": 0.7348349690437317, "learning_rate": 8.684930363989159e-06, "loss": 0.4271, "step": 3341 }, { "epoch": 0.9382369455362156, "grad_norm": 0.6245999336242676, "learning_rate": 8.683826170623995e-06, "loss": 0.4261, "step": 3342 }, { "epoch": 0.9385176866928692, "grad_norm": 0.7995222210884094, "learning_rate": 8.682721584143809e-06, "loss": 0.4252, "step": 3343 }, { "epoch": 0.9387984278495227, "grad_norm": 0.6974126100540161, "learning_rate": 8.681616604666479e-06, "loss": 0.4402, "step": 3344 }, { "epoch": 0.9390791690061763, "grad_norm": 0.8546789288520813, "learning_rate": 8.680511232309917e-06, "loss": 0.4912, "step": 3345 }, { "epoch": 0.9393599101628298, "grad_norm": 0.688572883605957, "learning_rate": 8.679405467192085e-06, "loss": 0.4905, "step": 3346 }, { "epoch": 0.9396406513194835, "grad_norm": 0.7426177859306335, "learning_rate": 8.678299309430982e-06, "loss": 0.4602, "step": 3347 }, { "epoch": 0.939921392476137, "grad_norm": 0.8280917406082153, "learning_rate": 8.67719275914465e-06, "loss": 0.4562, "step": 3348 }, { "epoch": 0.9402021336327906, "grad_norm": 0.8141161203384399, "learning_rate": 8.676085816451176e-06, "loss": 0.4544, "step": 3349 }, { "epoch": 0.9404828747894441, "grad_norm": 0.7345113754272461, "learning_rate": 8.674978481468681e-06, "loss": 0.4848, "step": 3350 }, { "epoch": 0.9407636159460977, "grad_norm": 0.702564537525177, "learning_rate": 8.673870754315336e-06, "loss": 0.4833, "step": 3351 }, { "epoch": 0.9410443571027512, "grad_norm": 0.8589380383491516, "learning_rate": 8.672762635109351e-06, "loss": 0.4173, "step": 3352 }, { "epoch": 0.9413250982594048, "grad_norm": 0.8307361602783203, "learning_rate": 8.671654123968977e-06, "loss": 0.4069, "step": 3353 }, { "epoch": 0.9416058394160584, "grad_norm": 0.8155995011329651, "learning_rate": 8.67054522101251e-06, "loss": 0.4382, "step": 3354 }, { "epoch": 0.9418865805727119, "grad_norm": 0.9018946290016174, "learning_rate": 8.669435926358278e-06, "loss": 0.4632, "step": 3355 }, { "epoch": 0.9421673217293656, "grad_norm": 0.7798970341682434, "learning_rate": 8.668326240124666e-06, "loss": 0.4419, "step": 3356 }, { "epoch": 0.9424480628860191, "grad_norm": 0.6875411868095398, "learning_rate": 8.667216162430088e-06, "loss": 0.4231, "step": 3357 }, { "epoch": 0.9427288040426727, "grad_norm": 0.6356247067451477, "learning_rate": 8.666105693393007e-06, "loss": 0.4412, "step": 3358 }, { "epoch": 0.9430095451993262, "grad_norm": 0.7255062460899353, "learning_rate": 8.664994833131923e-06, "loss": 0.4225, "step": 3359 }, { "epoch": 0.9432902863559798, "grad_norm": 0.7604843974113464, "learning_rate": 8.663883581765381e-06, "loss": 0.4879, "step": 3360 }, { "epoch": 0.9435710275126333, "grad_norm": 0.6840789318084717, "learning_rate": 8.66277193941197e-06, "loss": 0.4289, "step": 3361 }, { "epoch": 0.9438517686692869, "grad_norm": 0.7160725593566895, "learning_rate": 8.661659906190314e-06, "loss": 0.4596, "step": 3362 }, { "epoch": 0.9441325098259404, "grad_norm": 0.832273006439209, "learning_rate": 8.660547482219082e-06, "loss": 0.4473, "step": 3363 }, { "epoch": 0.9444132509825941, "grad_norm": 0.744295060634613, "learning_rate": 8.659434667616987e-06, "loss": 0.4439, "step": 3364 }, { "epoch": 0.9446939921392477, "grad_norm": 0.7506633400917053, "learning_rate": 8.658321462502782e-06, "loss": 0.4898, "step": 3365 }, { "epoch": 0.9449747332959012, "grad_norm": 0.6658370494842529, "learning_rate": 8.657207866995257e-06, "loss": 0.4086, "step": 3366 }, { "epoch": 0.9452554744525548, "grad_norm": 0.7962737679481506, "learning_rate": 8.656093881213253e-06, "loss": 0.4416, "step": 3367 }, { "epoch": 0.9455362156092083, "grad_norm": 0.76934814453125, "learning_rate": 8.654979505275646e-06, "loss": 0.4574, "step": 3368 }, { "epoch": 0.9458169567658619, "grad_norm": 0.7706449627876282, "learning_rate": 8.653864739301354e-06, "loss": 0.4906, "step": 3369 }, { "epoch": 0.9460976979225154, "grad_norm": 0.7856554388999939, "learning_rate": 8.65274958340934e-06, "loss": 0.4279, "step": 3370 }, { "epoch": 0.946378439079169, "grad_norm": 0.7563718557357788, "learning_rate": 8.651634037718604e-06, "loss": 0.4619, "step": 3371 }, { "epoch": 0.9466591802358225, "grad_norm": 0.7916227579116821, "learning_rate": 8.650518102348193e-06, "loss": 0.4059, "step": 3372 }, { "epoch": 0.9469399213924762, "grad_norm": 0.7160757780075073, "learning_rate": 8.64940177741719e-06, "loss": 0.4192, "step": 3373 }, { "epoch": 0.9472206625491297, "grad_norm": 0.7741641998291016, "learning_rate": 8.648285063044724e-06, "loss": 0.464, "step": 3374 }, { "epoch": 0.9475014037057833, "grad_norm": 0.6588005423545837, "learning_rate": 8.647167959349964e-06, "loss": 0.4651, "step": 3375 }, { "epoch": 0.9477821448624368, "grad_norm": 0.7646698355674744, "learning_rate": 8.646050466452118e-06, "loss": 0.4457, "step": 3376 }, { "epoch": 0.9480628860190904, "grad_norm": 0.9929888844490051, "learning_rate": 8.644932584470442e-06, "loss": 0.4691, "step": 3377 }, { "epoch": 0.9483436271757439, "grad_norm": 0.8656958937644958, "learning_rate": 8.643814313524224e-06, "loss": 0.465, "step": 3378 }, { "epoch": 0.9486243683323975, "grad_norm": 0.7341300845146179, "learning_rate": 8.642695653732804e-06, "loss": 0.4715, "step": 3379 }, { "epoch": 0.948905109489051, "grad_norm": 0.6429067850112915, "learning_rate": 8.641576605215556e-06, "loss": 0.4124, "step": 3380 }, { "epoch": 0.9491858506457047, "grad_norm": 0.9043833613395691, "learning_rate": 8.640457168091898e-06, "loss": 0.4586, "step": 3381 }, { "epoch": 0.9494665918023583, "grad_norm": 0.7792132496833801, "learning_rate": 8.639337342481289e-06, "loss": 0.4596, "step": 3382 }, { "epoch": 0.9497473329590118, "grad_norm": 0.7375581860542297, "learning_rate": 8.63821712850323e-06, "loss": 0.4429, "step": 3383 }, { "epoch": 0.9500280741156654, "grad_norm": 0.6678343415260315, "learning_rate": 8.637096526277264e-06, "loss": 0.4431, "step": 3384 }, { "epoch": 0.9503088152723189, "grad_norm": 0.7538302540779114, "learning_rate": 8.635975535922974e-06, "loss": 0.4527, "step": 3385 }, { "epoch": 0.9505895564289725, "grad_norm": 0.9098194241523743, "learning_rate": 8.634854157559987e-06, "loss": 0.496, "step": 3386 }, { "epoch": 0.950870297585626, "grad_norm": 0.6951583027839661, "learning_rate": 8.633732391307967e-06, "loss": 0.4006, "step": 3387 }, { "epoch": 0.9511510387422796, "grad_norm": 0.708798348903656, "learning_rate": 8.632610237286622e-06, "loss": 0.4065, "step": 3388 }, { "epoch": 0.9514317798989332, "grad_norm": 0.6822143793106079, "learning_rate": 8.631487695615704e-06, "loss": 0.3906, "step": 3389 }, { "epoch": 0.9517125210555868, "grad_norm": 0.8026174306869507, "learning_rate": 8.630364766415e-06, "loss": 0.451, "step": 3390 }, { "epoch": 0.9519932622122403, "grad_norm": 0.7409147620201111, "learning_rate": 8.629241449804344e-06, "loss": 0.4672, "step": 3391 }, { "epoch": 0.9522740033688939, "grad_norm": 0.6837946772575378, "learning_rate": 8.62811774590361e-06, "loss": 0.4578, "step": 3392 }, { "epoch": 0.9525547445255474, "grad_norm": 0.6938409805297852, "learning_rate": 8.626993654832711e-06, "loss": 0.4442, "step": 3393 }, { "epoch": 0.952835485682201, "grad_norm": 0.8176584839820862, "learning_rate": 8.625869176711605e-06, "loss": 0.4755, "step": 3394 }, { "epoch": 0.9531162268388546, "grad_norm": 0.8156872987747192, "learning_rate": 8.624744311660289e-06, "loss": 0.4752, "step": 3395 }, { "epoch": 0.9533969679955081, "grad_norm": 0.7634759545326233, "learning_rate": 8.6236190597988e-06, "loss": 0.4407, "step": 3396 }, { "epoch": 0.9536777091521617, "grad_norm": 0.7569522857666016, "learning_rate": 8.622493421247218e-06, "loss": 0.3901, "step": 3397 }, { "epoch": 0.9539584503088153, "grad_norm": 0.7346652746200562, "learning_rate": 8.621367396125666e-06, "loss": 0.4364, "step": 3398 }, { "epoch": 0.9542391914654689, "grad_norm": 0.7377001047134399, "learning_rate": 8.620240984554305e-06, "loss": 0.4102, "step": 3399 }, { "epoch": 0.9545199326221224, "grad_norm": 0.7047996520996094, "learning_rate": 8.61911418665334e-06, "loss": 0.433, "step": 3400 }, { "epoch": 0.954800673778776, "grad_norm": 0.6999577879905701, "learning_rate": 8.617987002543012e-06, "loss": 0.4404, "step": 3401 }, { "epoch": 0.9550814149354295, "grad_norm": 0.6502342820167542, "learning_rate": 8.616859432343612e-06, "loss": 0.4072, "step": 3402 }, { "epoch": 0.9553621560920831, "grad_norm": 0.9056606888771057, "learning_rate": 8.615731476175464e-06, "loss": 0.4228, "step": 3403 }, { "epoch": 0.9556428972487366, "grad_norm": 0.7005042433738708, "learning_rate": 8.614603134158938e-06, "loss": 0.4122, "step": 3404 }, { "epoch": 0.9559236384053902, "grad_norm": 0.6874243021011353, "learning_rate": 8.613474406414443e-06, "loss": 0.4559, "step": 3405 }, { "epoch": 0.9562043795620438, "grad_norm": 0.7888487577438354, "learning_rate": 8.612345293062433e-06, "loss": 0.4843, "step": 3406 }, { "epoch": 0.9564851207186974, "grad_norm": 0.9484554529190063, "learning_rate": 8.611215794223393e-06, "loss": 0.4455, "step": 3407 }, { "epoch": 0.956765861875351, "grad_norm": 0.7570213675498962, "learning_rate": 8.610085910017861e-06, "loss": 0.4466, "step": 3408 }, { "epoch": 0.9570466030320045, "grad_norm": 0.7415023446083069, "learning_rate": 8.60895564056641e-06, "loss": 0.4598, "step": 3409 }, { "epoch": 0.9573273441886581, "grad_norm": 0.7095988392829895, "learning_rate": 8.607824985989658e-06, "loss": 0.4183, "step": 3410 }, { "epoch": 0.9576080853453116, "grad_norm": 0.7952398657798767, "learning_rate": 8.606693946408258e-06, "loss": 0.498, "step": 3411 }, { "epoch": 0.9578888265019652, "grad_norm": 0.718460202217102, "learning_rate": 8.605562521942907e-06, "loss": 0.4356, "step": 3412 }, { "epoch": 0.9581695676586187, "grad_norm": 0.8813295364379883, "learning_rate": 8.604430712714348e-06, "loss": 0.4674, "step": 3413 }, { "epoch": 0.9584503088152723, "grad_norm": 0.6620175242424011, "learning_rate": 8.603298518843354e-06, "loss": 0.435, "step": 3414 }, { "epoch": 0.9587310499719259, "grad_norm": 0.7960380911827087, "learning_rate": 8.602165940450754e-06, "loss": 0.4917, "step": 3415 }, { "epoch": 0.9590117911285795, "grad_norm": 0.8146663308143616, "learning_rate": 8.601032977657402e-06, "loss": 0.3953, "step": 3416 }, { "epoch": 0.959292532285233, "grad_norm": 0.8786693215370178, "learning_rate": 8.599899630584206e-06, "loss": 0.4852, "step": 3417 }, { "epoch": 0.9595732734418866, "grad_norm": 0.7795003056526184, "learning_rate": 8.598765899352106e-06, "loss": 0.4919, "step": 3418 }, { "epoch": 0.9598540145985401, "grad_norm": 0.7619941234588623, "learning_rate": 8.597631784082089e-06, "loss": 0.4788, "step": 3419 }, { "epoch": 0.9601347557551937, "grad_norm": 0.90803462266922, "learning_rate": 8.59649728489518e-06, "loss": 0.4726, "step": 3420 }, { "epoch": 0.9604154969118472, "grad_norm": 0.6915274262428284, "learning_rate": 8.595362401912446e-06, "loss": 0.3921, "step": 3421 }, { "epoch": 0.9606962380685008, "grad_norm": 0.6664067506790161, "learning_rate": 8.594227135254996e-06, "loss": 0.3992, "step": 3422 }, { "epoch": 0.9609769792251545, "grad_norm": 0.8315762877464294, "learning_rate": 8.593091485043976e-06, "loss": 0.4604, "step": 3423 }, { "epoch": 0.961257720381808, "grad_norm": 0.8344126343727112, "learning_rate": 8.591955451400575e-06, "loss": 0.4371, "step": 3424 }, { "epoch": 0.9615384615384616, "grad_norm": 0.8421126008033752, "learning_rate": 8.590819034446027e-06, "loss": 0.4177, "step": 3425 }, { "epoch": 0.9618192026951151, "grad_norm": 0.7112802863121033, "learning_rate": 8.589682234301601e-06, "loss": 0.433, "step": 3426 }, { "epoch": 0.9620999438517687, "grad_norm": 0.7226215600967407, "learning_rate": 8.58854505108861e-06, "loss": 0.4044, "step": 3427 }, { "epoch": 0.9623806850084222, "grad_norm": 0.8644697666168213, "learning_rate": 8.587407484928408e-06, "loss": 0.4163, "step": 3428 }, { "epoch": 0.9626614261650758, "grad_norm": 0.7910282611846924, "learning_rate": 8.586269535942386e-06, "loss": 0.4567, "step": 3429 }, { "epoch": 0.9629421673217293, "grad_norm": 0.8188333511352539, "learning_rate": 8.585131204251982e-06, "loss": 0.4933, "step": 3430 }, { "epoch": 0.9632229084783829, "grad_norm": 0.6796020865440369, "learning_rate": 8.583992489978669e-06, "loss": 0.4275, "step": 3431 }, { "epoch": 0.9635036496350365, "grad_norm": 0.8388454914093018, "learning_rate": 8.582853393243965e-06, "loss": 0.444, "step": 3432 }, { "epoch": 0.9637843907916901, "grad_norm": 0.6707009077072144, "learning_rate": 8.581713914169428e-06, "loss": 0.4368, "step": 3433 }, { "epoch": 0.9640651319483436, "grad_norm": 0.8923273682594299, "learning_rate": 8.580574052876653e-06, "loss": 0.4547, "step": 3434 }, { "epoch": 0.9643458731049972, "grad_norm": 0.6617672443389893, "learning_rate": 8.579433809487285e-06, "loss": 0.4365, "step": 3435 }, { "epoch": 0.9646266142616507, "grad_norm": 0.677503764629364, "learning_rate": 8.578293184122997e-06, "loss": 0.4347, "step": 3436 }, { "epoch": 0.9649073554183043, "grad_norm": 0.6488812565803528, "learning_rate": 8.577152176905515e-06, "loss": 0.3938, "step": 3437 }, { "epoch": 0.9651880965749579, "grad_norm": 0.8241518139839172, "learning_rate": 8.576010787956595e-06, "loss": 0.4447, "step": 3438 }, { "epoch": 0.9654688377316114, "grad_norm": 0.7706436514854431, "learning_rate": 8.574869017398042e-06, "loss": 0.421, "step": 3439 }, { "epoch": 0.9657495788882651, "grad_norm": 0.6720361709594727, "learning_rate": 8.573726865351698e-06, "loss": 0.4072, "step": 3440 }, { "epoch": 0.9660303200449186, "grad_norm": 0.7769226431846619, "learning_rate": 8.572584331939447e-06, "loss": 0.4641, "step": 3441 }, { "epoch": 0.9663110612015722, "grad_norm": 0.6810237169265747, "learning_rate": 8.571441417283214e-06, "loss": 0.4512, "step": 3442 }, { "epoch": 0.9665918023582257, "grad_norm": 0.7702351808547974, "learning_rate": 8.570298121504958e-06, "loss": 0.4223, "step": 3443 }, { "epoch": 0.9668725435148793, "grad_norm": 0.7678765058517456, "learning_rate": 8.569154444726692e-06, "loss": 0.446, "step": 3444 }, { "epoch": 0.9671532846715328, "grad_norm": 0.8248004913330078, "learning_rate": 8.568010387070458e-06, "loss": 0.512, "step": 3445 }, { "epoch": 0.9674340258281864, "grad_norm": 0.633748471736908, "learning_rate": 8.566865948658344e-06, "loss": 0.4211, "step": 3446 }, { "epoch": 0.9677147669848399, "grad_norm": 0.6683886647224426, "learning_rate": 8.565721129612476e-06, "loss": 0.4307, "step": 3447 }, { "epoch": 0.9679955081414935, "grad_norm": 0.6899271607398987, "learning_rate": 8.564575930055023e-06, "loss": 0.4438, "step": 3448 }, { "epoch": 0.9682762492981472, "grad_norm": 0.7076660394668579, "learning_rate": 8.563430350108194e-06, "loss": 0.4136, "step": 3449 }, { "epoch": 0.9685569904548007, "grad_norm": 0.8247249722480774, "learning_rate": 8.562284389894238e-06, "loss": 0.4718, "step": 3450 }, { "epoch": 0.9688377316114543, "grad_norm": 0.8279039263725281, "learning_rate": 8.561138049535443e-06, "loss": 0.454, "step": 3451 }, { "epoch": 0.9691184727681078, "grad_norm": 0.730094313621521, "learning_rate": 8.55999132915414e-06, "loss": 0.4339, "step": 3452 }, { "epoch": 0.9693992139247614, "grad_norm": 0.7048096656799316, "learning_rate": 8.558844228872702e-06, "loss": 0.4793, "step": 3453 }, { "epoch": 0.9696799550814149, "grad_norm": 0.6670785546302795, "learning_rate": 8.55769674881354e-06, "loss": 0.3888, "step": 3454 }, { "epoch": 0.9699606962380685, "grad_norm": 0.6821185350418091, "learning_rate": 8.556548889099102e-06, "loss": 0.4079, "step": 3455 }, { "epoch": 0.970241437394722, "grad_norm": 0.7230536937713623, "learning_rate": 8.555400649851884e-06, "loss": 0.4325, "step": 3456 }, { "epoch": 0.9705221785513757, "grad_norm": 0.7355298399925232, "learning_rate": 8.554252031194418e-06, "loss": 0.4413, "step": 3457 }, { "epoch": 0.9708029197080292, "grad_norm": 0.7354752421379089, "learning_rate": 8.55310303324928e-06, "loss": 0.4112, "step": 3458 }, { "epoch": 0.9710836608646828, "grad_norm": 0.6842572093009949, "learning_rate": 8.551953656139079e-06, "loss": 0.4457, "step": 3459 }, { "epoch": 0.9713644020213363, "grad_norm": 0.6387802362442017, "learning_rate": 8.550803899986473e-06, "loss": 0.4223, "step": 3460 }, { "epoch": 0.9716451431779899, "grad_norm": 0.6626890301704407, "learning_rate": 8.549653764914157e-06, "loss": 0.4146, "step": 3461 }, { "epoch": 0.9719258843346434, "grad_norm": 0.8330475687980652, "learning_rate": 8.548503251044863e-06, "loss": 0.4799, "step": 3462 }, { "epoch": 0.972206625491297, "grad_norm": 0.7225596308708191, "learning_rate": 8.54735235850137e-06, "loss": 0.4397, "step": 3463 }, { "epoch": 0.9724873666479505, "grad_norm": 0.6990557909011841, "learning_rate": 8.546201087406491e-06, "loss": 0.3915, "step": 3464 }, { "epoch": 0.9727681078046042, "grad_norm": 0.8358392715454102, "learning_rate": 8.545049437883087e-06, "loss": 0.4082, "step": 3465 }, { "epoch": 0.9730488489612578, "grad_norm": 0.810123860836029, "learning_rate": 8.54389741005405e-06, "loss": 0.4455, "step": 3466 }, { "epoch": 0.9733295901179113, "grad_norm": 0.8371599912643433, "learning_rate": 8.542745004042321e-06, "loss": 0.4897, "step": 3467 }, { "epoch": 0.9736103312745649, "grad_norm": 0.8034262657165527, "learning_rate": 8.541592219970876e-06, "loss": 0.4369, "step": 3468 }, { "epoch": 0.9738910724312184, "grad_norm": 0.8848324418067932, "learning_rate": 8.540439057962731e-06, "loss": 0.4607, "step": 3469 }, { "epoch": 0.974171813587872, "grad_norm": 0.691663920879364, "learning_rate": 8.539285518140947e-06, "loss": 0.4495, "step": 3470 }, { "epoch": 0.9744525547445255, "grad_norm": 0.6771031022071838, "learning_rate": 8.538131600628624e-06, "loss": 0.4768, "step": 3471 }, { "epoch": 0.9747332959011791, "grad_norm": 0.819758415222168, "learning_rate": 8.536977305548898e-06, "loss": 0.4754, "step": 3472 }, { "epoch": 0.9750140370578326, "grad_norm": 0.7135564684867859, "learning_rate": 8.535822633024946e-06, "loss": 0.4282, "step": 3473 }, { "epoch": 0.9752947782144863, "grad_norm": 0.7830508351325989, "learning_rate": 8.534667583179993e-06, "loss": 0.4637, "step": 3474 }, { "epoch": 0.9755755193711398, "grad_norm": 0.6742008328437805, "learning_rate": 8.533512156137297e-06, "loss": 0.4641, "step": 3475 }, { "epoch": 0.9758562605277934, "grad_norm": 0.8535793423652649, "learning_rate": 8.532356352020155e-06, "loss": 0.468, "step": 3476 }, { "epoch": 0.976137001684447, "grad_norm": 0.7796273231506348, "learning_rate": 8.53120017095191e-06, "loss": 0.4336, "step": 3477 }, { "epoch": 0.9764177428411005, "grad_norm": 0.8205191493034363, "learning_rate": 8.530043613055942e-06, "loss": 0.4699, "step": 3478 }, { "epoch": 0.976698483997754, "grad_norm": 0.7456270456314087, "learning_rate": 8.528886678455671e-06, "loss": 0.4745, "step": 3479 }, { "epoch": 0.9769792251544076, "grad_norm": 0.8388075232505798, "learning_rate": 8.527729367274559e-06, "loss": 0.4143, "step": 3480 }, { "epoch": 0.9772599663110612, "grad_norm": 0.6366397142410278, "learning_rate": 8.526571679636107e-06, "loss": 0.4288, "step": 3481 }, { "epoch": 0.9775407074677148, "grad_norm": 0.6700262427330017, "learning_rate": 8.525413615663855e-06, "loss": 0.4778, "step": 3482 }, { "epoch": 0.9778214486243684, "grad_norm": 0.9022010564804077, "learning_rate": 8.524255175481387e-06, "loss": 0.4453, "step": 3483 }, { "epoch": 0.9781021897810219, "grad_norm": 0.7886511087417603, "learning_rate": 8.52309635921232e-06, "loss": 0.4261, "step": 3484 }, { "epoch": 0.9783829309376755, "grad_norm": 0.9262488484382629, "learning_rate": 8.521937166980318e-06, "loss": 0.4301, "step": 3485 }, { "epoch": 0.978663672094329, "grad_norm": 0.9617725014686584, "learning_rate": 8.520777598909084e-06, "loss": 0.4843, "step": 3486 }, { "epoch": 0.9789444132509826, "grad_norm": 0.7633271813392639, "learning_rate": 8.51961765512236e-06, "loss": 0.4659, "step": 3487 }, { "epoch": 0.9792251544076361, "grad_norm": 0.7121231555938721, "learning_rate": 8.518457335743927e-06, "loss": 0.4424, "step": 3488 }, { "epoch": 0.9795058955642897, "grad_norm": 0.7436328530311584, "learning_rate": 8.517296640897606e-06, "loss": 0.5038, "step": 3489 }, { "epoch": 0.9797866367209432, "grad_norm": 0.7411491274833679, "learning_rate": 8.516135570707258e-06, "loss": 0.4454, "step": 3490 }, { "epoch": 0.9800673778775969, "grad_norm": 0.783284068107605, "learning_rate": 8.51497412529679e-06, "loss": 0.433, "step": 3491 }, { "epoch": 0.9803481190342505, "grad_norm": 0.6556893587112427, "learning_rate": 8.513812304790141e-06, "loss": 0.4297, "step": 3492 }, { "epoch": 0.980628860190904, "grad_norm": 0.706886887550354, "learning_rate": 8.512650109311293e-06, "loss": 0.4145, "step": 3493 }, { "epoch": 0.9809096013475576, "grad_norm": 0.7100974917411804, "learning_rate": 8.511487538984268e-06, "loss": 0.4335, "step": 3494 }, { "epoch": 0.9811903425042111, "grad_norm": 0.791081964969635, "learning_rate": 8.510324593933132e-06, "loss": 0.458, "step": 3495 }, { "epoch": 0.9814710836608647, "grad_norm": 0.7544689178466797, "learning_rate": 8.509161274281984e-06, "loss": 0.4499, "step": 3496 }, { "epoch": 0.9817518248175182, "grad_norm": 0.6856833100318909, "learning_rate": 8.507997580154967e-06, "loss": 0.4043, "step": 3497 }, { "epoch": 0.9820325659741718, "grad_norm": 0.7366673946380615, "learning_rate": 8.506833511676262e-06, "loss": 0.47, "step": 3498 }, { "epoch": 0.9823133071308254, "grad_norm": 0.7325769066810608, "learning_rate": 8.505669068970092e-06, "loss": 0.4502, "step": 3499 }, { "epoch": 0.982594048287479, "grad_norm": 0.7868366241455078, "learning_rate": 8.50450425216072e-06, "loss": 0.4358, "step": 3500 }, { "epoch": 0.9828747894441325, "grad_norm": 0.7951871156692505, "learning_rate": 8.503339061372449e-06, "loss": 0.4496, "step": 3501 }, { "epoch": 0.9831555306007861, "grad_norm": 0.7538761496543884, "learning_rate": 8.502173496729615e-06, "loss": 0.4516, "step": 3502 }, { "epoch": 0.9834362717574396, "grad_norm": 0.6773754358291626, "learning_rate": 8.501007558356607e-06, "loss": 0.397, "step": 3503 }, { "epoch": 0.9837170129140932, "grad_norm": 0.8450381755828857, "learning_rate": 8.499841246377844e-06, "loss": 0.4612, "step": 3504 }, { "epoch": 0.9839977540707467, "grad_norm": 0.8215748071670532, "learning_rate": 8.498674560917785e-06, "loss": 0.4717, "step": 3505 }, { "epoch": 0.9842784952274003, "grad_norm": 0.7071011662483215, "learning_rate": 8.497507502100935e-06, "loss": 0.3929, "step": 3506 }, { "epoch": 0.9845592363840538, "grad_norm": 0.7242488861083984, "learning_rate": 8.496340070051834e-06, "loss": 0.4434, "step": 3507 }, { "epoch": 0.9848399775407075, "grad_norm": 0.8573377728462219, "learning_rate": 8.495172264895065e-06, "loss": 0.434, "step": 3508 }, { "epoch": 0.9851207186973611, "grad_norm": 0.8474485278129578, "learning_rate": 8.494004086755243e-06, "loss": 0.4508, "step": 3509 }, { "epoch": 0.9854014598540146, "grad_norm": 0.9176875948905945, "learning_rate": 8.492835535757037e-06, "loss": 0.463, "step": 3510 }, { "epoch": 0.9856822010106682, "grad_norm": 0.8488715291023254, "learning_rate": 8.491666612025139e-06, "loss": 0.4434, "step": 3511 }, { "epoch": 0.9859629421673217, "grad_norm": 0.9118342995643616, "learning_rate": 8.490497315684295e-06, "loss": 0.4793, "step": 3512 }, { "epoch": 0.9862436833239753, "grad_norm": 0.7651602625846863, "learning_rate": 8.489327646859284e-06, "loss": 0.4614, "step": 3513 }, { "epoch": 0.9865244244806288, "grad_norm": 0.8609891533851624, "learning_rate": 8.488157605674924e-06, "loss": 0.4728, "step": 3514 }, { "epoch": 0.9868051656372824, "grad_norm": 0.8994102478027344, "learning_rate": 8.486987192256077e-06, "loss": 0.4449, "step": 3515 }, { "epoch": 0.987085906793936, "grad_norm": 0.8351258635520935, "learning_rate": 8.48581640672764e-06, "loss": 0.4221, "step": 3516 }, { "epoch": 0.9873666479505896, "grad_norm": 0.7111465334892273, "learning_rate": 8.484645249214554e-06, "loss": 0.4358, "step": 3517 }, { "epoch": 0.9876473891072431, "grad_norm": 0.7977031469345093, "learning_rate": 8.483473719841794e-06, "loss": 0.4559, "step": 3518 }, { "epoch": 0.9879281302638967, "grad_norm": 0.8247277140617371, "learning_rate": 8.482301818734384e-06, "loss": 0.4703, "step": 3519 }, { "epoch": 0.9882088714205502, "grad_norm": 0.7320131063461304, "learning_rate": 8.481129546017379e-06, "loss": 0.4569, "step": 3520 }, { "epoch": 0.9884896125772038, "grad_norm": 0.6837695837020874, "learning_rate": 8.479956901815875e-06, "loss": 0.4665, "step": 3521 }, { "epoch": 0.9887703537338574, "grad_norm": 0.7357917428016663, "learning_rate": 8.47878388625501e-06, "loss": 0.4456, "step": 3522 }, { "epoch": 0.9890510948905109, "grad_norm": 0.8145801424980164, "learning_rate": 8.477610499459964e-06, "loss": 0.4541, "step": 3523 }, { "epoch": 0.9893318360471645, "grad_norm": 0.6550049185752869, "learning_rate": 8.476436741555952e-06, "loss": 0.4096, "step": 3524 }, { "epoch": 0.9896125772038181, "grad_norm": 0.718496561050415, "learning_rate": 8.475262612668227e-06, "loss": 0.4405, "step": 3525 }, { "epoch": 0.9898933183604717, "grad_norm": 0.8126087784767151, "learning_rate": 8.474088112922087e-06, "loss": 0.4461, "step": 3526 }, { "epoch": 0.9901740595171252, "grad_norm": 0.7250719666481018, "learning_rate": 8.47291324244287e-06, "loss": 0.4229, "step": 3527 }, { "epoch": 0.9904548006737788, "grad_norm": 0.8736149668693542, "learning_rate": 8.471738001355947e-06, "loss": 0.4433, "step": 3528 }, { "epoch": 0.9907355418304323, "grad_norm": 0.6776615381240845, "learning_rate": 8.470562389786733e-06, "loss": 0.4244, "step": 3529 }, { "epoch": 0.9910162829870859, "grad_norm": 0.7001596689224243, "learning_rate": 8.469386407860683e-06, "loss": 0.4742, "step": 3530 }, { "epoch": 0.9912970241437394, "grad_norm": 0.697855532169342, "learning_rate": 8.468210055703291e-06, "loss": 0.4188, "step": 3531 }, { "epoch": 0.991577765300393, "grad_norm": 0.7609755992889404, "learning_rate": 8.467033333440089e-06, "loss": 0.4595, "step": 3532 }, { "epoch": 0.9918585064570467, "grad_norm": 0.7395150065422058, "learning_rate": 8.46585624119665e-06, "loss": 0.4547, "step": 3533 }, { "epoch": 0.9921392476137002, "grad_norm": 0.7125252485275269, "learning_rate": 8.464678779098586e-06, "loss": 0.433, "step": 3534 }, { "epoch": 0.9924199887703538, "grad_norm": 0.7173366546630859, "learning_rate": 8.463500947271547e-06, "loss": 0.4771, "step": 3535 }, { "epoch": 0.9927007299270073, "grad_norm": 0.6565790176391602, "learning_rate": 8.462322745841225e-06, "loss": 0.4403, "step": 3536 }, { "epoch": 0.9929814710836609, "grad_norm": 0.7130043506622314, "learning_rate": 8.46114417493335e-06, "loss": 0.4347, "step": 3537 }, { "epoch": 0.9932622122403144, "grad_norm": 0.7157816886901855, "learning_rate": 8.459965234673695e-06, "loss": 0.4277, "step": 3538 }, { "epoch": 0.993542953396968, "grad_norm": 0.8052875399589539, "learning_rate": 8.458785925188064e-06, "loss": 0.4847, "step": 3539 }, { "epoch": 0.9938236945536215, "grad_norm": 0.6643232703208923, "learning_rate": 8.457606246602307e-06, "loss": 0.4288, "step": 3540 }, { "epoch": 0.9941044357102752, "grad_norm": 0.6808356046676636, "learning_rate": 8.456426199042314e-06, "loss": 0.4316, "step": 3541 }, { "epoch": 0.9943851768669287, "grad_norm": 0.6699117422103882, "learning_rate": 8.455245782634011e-06, "loss": 0.4125, "step": 3542 }, { "epoch": 0.9946659180235823, "grad_norm": 0.7270050048828125, "learning_rate": 8.454064997503365e-06, "loss": 0.4592, "step": 3543 }, { "epoch": 0.9949466591802358, "grad_norm": 0.8155306577682495, "learning_rate": 8.45288384377638e-06, "loss": 0.4148, "step": 3544 }, { "epoch": 0.9952274003368894, "grad_norm": 0.6916264295578003, "learning_rate": 8.451702321579106e-06, "loss": 0.4164, "step": 3545 }, { "epoch": 0.9955081414935429, "grad_norm": 0.682720422744751, "learning_rate": 8.450520431037624e-06, "loss": 0.4084, "step": 3546 }, { "epoch": 0.9957888826501965, "grad_norm": 0.777711033821106, "learning_rate": 8.44933817227806e-06, "loss": 0.4258, "step": 3547 }, { "epoch": 0.99606962380685, "grad_norm": 0.6997545957565308, "learning_rate": 8.448155545426573e-06, "loss": 0.4614, "step": 3548 }, { "epoch": 0.9963503649635036, "grad_norm": 0.7430494427680969, "learning_rate": 8.446972550609372e-06, "loss": 0.4869, "step": 3549 }, { "epoch": 0.9966311061201573, "grad_norm": 0.7802561521530151, "learning_rate": 8.445789187952696e-06, "loss": 0.4555, "step": 3550 }, { "epoch": 0.9969118472768108, "grad_norm": 0.7442938089370728, "learning_rate": 8.444605457582823e-06, "loss": 0.4606, "step": 3551 }, { "epoch": 0.9971925884334644, "grad_norm": 0.6580045223236084, "learning_rate": 8.443421359626078e-06, "loss": 0.4634, "step": 3552 }, { "epoch": 0.9974733295901179, "grad_norm": 0.6235171556472778, "learning_rate": 8.442236894208819e-06, "loss": 0.4203, "step": 3553 }, { "epoch": 0.9977540707467715, "grad_norm": 0.6908484697341919, "learning_rate": 8.441052061457444e-06, "loss": 0.4687, "step": 3554 }, { "epoch": 0.998034811903425, "grad_norm": 0.7158430218696594, "learning_rate": 8.439866861498392e-06, "loss": 0.4334, "step": 3555 }, { "epoch": 0.9983155530600786, "grad_norm": 0.6933430433273315, "learning_rate": 8.438681294458137e-06, "loss": 0.4424, "step": 3556 }, { "epoch": 0.9985962942167321, "grad_norm": 0.8389679193496704, "learning_rate": 8.4374953604632e-06, "loss": 0.4369, "step": 3557 }, { "epoch": 0.9988770353733858, "grad_norm": 0.6856235861778259, "learning_rate": 8.436309059640136e-06, "loss": 0.4675, "step": 3558 }, { "epoch": 0.9991577765300393, "grad_norm": 0.7605506181716919, "learning_rate": 8.435122392115536e-06, "loss": 0.4366, "step": 3559 }, { "epoch": 0.9994385176866929, "grad_norm": 0.6978572607040405, "learning_rate": 8.433935358016037e-06, "loss": 0.4646, "step": 3560 }, { "epoch": 0.9997192588433464, "grad_norm": 0.6760448217391968, "learning_rate": 8.43274795746831e-06, "loss": 0.4125, "step": 3561 }, { "epoch": 1.0, "grad_norm": 0.6113824248313904, "learning_rate": 8.431560190599069e-06, "loss": 0.3944, "step": 3562 }, { "epoch": 1.0002807411566537, "grad_norm": 0.8328418135643005, "learning_rate": 8.430372057535063e-06, "loss": 0.4023, "step": 3563 }, { "epoch": 1.000561482313307, "grad_norm": 0.7610487341880798, "learning_rate": 8.429183558403083e-06, "loss": 0.4073, "step": 3564 }, { "epoch": 1.0008422234699608, "grad_norm": 0.8734410405158997, "learning_rate": 8.427994693329959e-06, "loss": 0.4443, "step": 3565 }, { "epoch": 1.0011229646266142, "grad_norm": 0.7603929042816162, "learning_rate": 8.426805462442558e-06, "loss": 0.3791, "step": 3566 }, { "epoch": 1.0014037057832679, "grad_norm": 0.7288072109222412, "learning_rate": 8.42561586586779e-06, "loss": 0.3872, "step": 3567 }, { "epoch": 1.0016844469399213, "grad_norm": 0.7560978531837463, "learning_rate": 8.424425903732596e-06, "loss": 0.3625, "step": 3568 }, { "epoch": 1.001965188096575, "grad_norm": 0.6720370054244995, "learning_rate": 8.423235576163966e-06, "loss": 0.4031, "step": 3569 }, { "epoch": 1.0022459292532284, "grad_norm": 0.6554220914840698, "learning_rate": 8.422044883288922e-06, "loss": 0.3758, "step": 3570 }, { "epoch": 1.002526670409882, "grad_norm": 0.7182668447494507, "learning_rate": 8.42085382523453e-06, "loss": 0.3685, "step": 3571 }, { "epoch": 1.0028074115665357, "grad_norm": 0.6917582154273987, "learning_rate": 8.41966240212789e-06, "loss": 0.3874, "step": 3572 }, { "epoch": 1.0030881527231892, "grad_norm": 0.6168217062950134, "learning_rate": 8.418470614096144e-06, "loss": 0.419, "step": 3573 }, { "epoch": 1.0033688938798428, "grad_norm": 0.6358725428581238, "learning_rate": 8.417278461266472e-06, "loss": 0.3979, "step": 3574 }, { "epoch": 1.0036496350364963, "grad_norm": 0.6770613789558411, "learning_rate": 8.416085943766095e-06, "loss": 0.4123, "step": 3575 }, { "epoch": 1.00393037619315, "grad_norm": 0.5870041847229004, "learning_rate": 8.414893061722267e-06, "loss": 0.4045, "step": 3576 }, { "epoch": 1.0042111173498034, "grad_norm": 0.6959606409072876, "learning_rate": 8.413699815262289e-06, "loss": 0.3818, "step": 3577 }, { "epoch": 1.004491858506457, "grad_norm": 0.669116199016571, "learning_rate": 8.412506204513494e-06, "loss": 0.3662, "step": 3578 }, { "epoch": 1.0047725996631107, "grad_norm": 0.5783640742301941, "learning_rate": 8.411312229603257e-06, "loss": 0.3563, "step": 3579 }, { "epoch": 1.0050533408197642, "grad_norm": 0.7830600142478943, "learning_rate": 8.410117890658994e-06, "loss": 0.427, "step": 3580 }, { "epoch": 1.0053340819764178, "grad_norm": 0.7403177618980408, "learning_rate": 8.408923187808156e-06, "loss": 0.4321, "step": 3581 }, { "epoch": 1.0056148231330713, "grad_norm": 0.8039252161979675, "learning_rate": 8.407728121178232e-06, "loss": 0.3812, "step": 3582 }, { "epoch": 1.005895564289725, "grad_norm": 0.6774498224258423, "learning_rate": 8.406532690896756e-06, "loss": 0.4349, "step": 3583 }, { "epoch": 1.0061763054463784, "grad_norm": 0.7238128185272217, "learning_rate": 8.405336897091294e-06, "loss": 0.3924, "step": 3584 }, { "epoch": 1.006457046603032, "grad_norm": 0.7300281524658203, "learning_rate": 8.404140739889455e-06, "loss": 0.3747, "step": 3585 }, { "epoch": 1.0067377877596855, "grad_norm": 0.6838752627372742, "learning_rate": 8.402944219418887e-06, "loss": 0.3846, "step": 3586 }, { "epoch": 1.0070185289163391, "grad_norm": 0.6838813424110413, "learning_rate": 8.40174733580727e-06, "loss": 0.3811, "step": 3587 }, { "epoch": 1.0072992700729928, "grad_norm": 0.7132014632225037, "learning_rate": 8.400550089182334e-06, "loss": 0.402, "step": 3588 }, { "epoch": 1.0075800112296462, "grad_norm": 0.594083845615387, "learning_rate": 8.399352479671839e-06, "loss": 0.3855, "step": 3589 }, { "epoch": 1.0078607523863, "grad_norm": 0.638372540473938, "learning_rate": 8.398154507403587e-06, "loss": 0.3787, "step": 3590 }, { "epoch": 1.0081414935429533, "grad_norm": 0.626323401927948, "learning_rate": 8.396956172505414e-06, "loss": 0.3891, "step": 3591 }, { "epoch": 1.008422234699607, "grad_norm": 0.7636907696723938, "learning_rate": 8.395757475105206e-06, "loss": 0.404, "step": 3592 }, { "epoch": 1.0087029758562605, "grad_norm": 0.6529296636581421, "learning_rate": 8.394558415330879e-06, "loss": 0.3461, "step": 3593 }, { "epoch": 1.0089837170129141, "grad_norm": 0.692482054233551, "learning_rate": 8.393358993310384e-06, "loss": 0.3659, "step": 3594 }, { "epoch": 1.0092644581695676, "grad_norm": 0.6872764229774475, "learning_rate": 8.392159209171717e-06, "loss": 0.3768, "step": 3595 }, { "epoch": 1.0095451993262212, "grad_norm": 0.7262110710144043, "learning_rate": 8.390959063042917e-06, "loss": 0.3879, "step": 3596 }, { "epoch": 1.0098259404828749, "grad_norm": 0.6634448766708374, "learning_rate": 8.389758555052053e-06, "loss": 0.3694, "step": 3597 }, { "epoch": 1.0101066816395283, "grad_norm": 0.6361666321754456, "learning_rate": 8.388557685327234e-06, "loss": 0.3851, "step": 3598 }, { "epoch": 1.010387422796182, "grad_norm": 0.6331276297569275, "learning_rate": 8.387356453996612e-06, "loss": 0.3729, "step": 3599 }, { "epoch": 1.0106681639528354, "grad_norm": 0.66754150390625, "learning_rate": 8.386154861188374e-06, "loss": 0.3871, "step": 3600 }, { "epoch": 1.010948905109489, "grad_norm": 0.822075605392456, "learning_rate": 8.384952907030744e-06, "loss": 0.4391, "step": 3601 }, { "epoch": 1.0112296462661425, "grad_norm": 0.8297271132469177, "learning_rate": 8.383750591651991e-06, "loss": 0.4098, "step": 3602 }, { "epoch": 1.0115103874227962, "grad_norm": 0.6594547629356384, "learning_rate": 8.382547915180417e-06, "loss": 0.4068, "step": 3603 }, { "epoch": 1.0117911285794496, "grad_norm": 0.7082101702690125, "learning_rate": 8.381344877744366e-06, "loss": 0.3876, "step": 3604 }, { "epoch": 1.0120718697361033, "grad_norm": 0.7290804386138916, "learning_rate": 8.380141479472214e-06, "loss": 0.4073, "step": 3605 }, { "epoch": 1.012352610892757, "grad_norm": 0.6972177028656006, "learning_rate": 8.378937720492384e-06, "loss": 0.3706, "step": 3606 }, { "epoch": 1.0126333520494104, "grad_norm": 0.6854816675186157, "learning_rate": 8.377733600933333e-06, "loss": 0.4191, "step": 3607 }, { "epoch": 1.012914093206064, "grad_norm": 0.6364105939865112, "learning_rate": 8.376529120923556e-06, "loss": 0.4325, "step": 3608 }, { "epoch": 1.0131948343627175, "grad_norm": 0.7041125893592834, "learning_rate": 8.37532428059159e-06, "loss": 0.3666, "step": 3609 }, { "epoch": 1.0134755755193712, "grad_norm": 0.6840842962265015, "learning_rate": 8.374119080066005e-06, "loss": 0.392, "step": 3610 }, { "epoch": 1.0137563166760246, "grad_norm": 0.7031016945838928, "learning_rate": 8.372913519475415e-06, "loss": 0.3791, "step": 3611 }, { "epoch": 1.0140370578326783, "grad_norm": 0.7323938012123108, "learning_rate": 8.371707598948468e-06, "loss": 0.3947, "step": 3612 }, { "epoch": 1.014317798989332, "grad_norm": 0.6816534996032715, "learning_rate": 8.370501318613855e-06, "loss": 0.3445, "step": 3613 }, { "epoch": 1.0145985401459854, "grad_norm": 0.7405982613563538, "learning_rate": 8.3692946786003e-06, "loss": 0.4005, "step": 3614 }, { "epoch": 1.014879281302639, "grad_norm": 0.5886854529380798, "learning_rate": 8.36808767903657e-06, "loss": 0.3746, "step": 3615 }, { "epoch": 1.0151600224592925, "grad_norm": 0.7090999484062195, "learning_rate": 8.366880320051465e-06, "loss": 0.3456, "step": 3616 }, { "epoch": 1.0154407636159462, "grad_norm": 0.6102776527404785, "learning_rate": 8.365672601773833e-06, "loss": 0.3948, "step": 3617 }, { "epoch": 1.0157215047725996, "grad_norm": 0.7303576469421387, "learning_rate": 8.364464524332547e-06, "loss": 0.3747, "step": 3618 }, { "epoch": 1.0160022459292533, "grad_norm": 0.6708539724349976, "learning_rate": 8.363256087856532e-06, "loss": 0.3588, "step": 3619 }, { "epoch": 1.0162829870859067, "grad_norm": 0.6503663659095764, "learning_rate": 8.362047292474741e-06, "loss": 0.3458, "step": 3620 }, { "epoch": 1.0165637282425604, "grad_norm": 0.6906838417053223, "learning_rate": 8.36083813831617e-06, "loss": 0.3953, "step": 3621 }, { "epoch": 1.016844469399214, "grad_norm": 0.6733444333076477, "learning_rate": 8.359628625509852e-06, "loss": 0.3914, "step": 3622 }, { "epoch": 1.0171252105558675, "grad_norm": 0.7370371222496033, "learning_rate": 8.35841875418486e-06, "loss": 0.4315, "step": 3623 }, { "epoch": 1.0174059517125211, "grad_norm": 0.7200049757957458, "learning_rate": 8.357208524470304e-06, "loss": 0.3529, "step": 3624 }, { "epoch": 1.0176866928691746, "grad_norm": 0.6907103657722473, "learning_rate": 8.355997936495332e-06, "loss": 0.3867, "step": 3625 }, { "epoch": 1.0179674340258282, "grad_norm": 0.6707966327667236, "learning_rate": 8.354786990389128e-06, "loss": 0.3675, "step": 3626 }, { "epoch": 1.0182481751824817, "grad_norm": 0.710354208946228, "learning_rate": 8.35357568628092e-06, "loss": 0.3761, "step": 3627 }, { "epoch": 1.0185289163391353, "grad_norm": 0.629179060459137, "learning_rate": 8.352364024299966e-06, "loss": 0.3714, "step": 3628 }, { "epoch": 1.0188096574957888, "grad_norm": 0.677626371383667, "learning_rate": 8.351152004575573e-06, "loss": 0.3879, "step": 3629 }, { "epoch": 1.0190903986524424, "grad_norm": 0.7375317811965942, "learning_rate": 8.349939627237079e-06, "loss": 0.3909, "step": 3630 }, { "epoch": 1.019371139809096, "grad_norm": 0.6857993006706238, "learning_rate": 8.348726892413857e-06, "loss": 0.3787, "step": 3631 }, { "epoch": 1.0196518809657495, "grad_norm": 0.6406326293945312, "learning_rate": 8.347513800235325e-06, "loss": 0.4423, "step": 3632 }, { "epoch": 1.0199326221224032, "grad_norm": 0.6543883085250854, "learning_rate": 8.346300350830938e-06, "loss": 0.3737, "step": 3633 }, { "epoch": 1.0202133632790567, "grad_norm": 0.6704428195953369, "learning_rate": 8.345086544330188e-06, "loss": 0.3783, "step": 3634 }, { "epoch": 1.0204941044357103, "grad_norm": 0.7376469969749451, "learning_rate": 8.343872380862601e-06, "loss": 0.4056, "step": 3635 }, { "epoch": 1.0207748455923638, "grad_norm": 0.6677947044372559, "learning_rate": 8.34265786055775e-06, "loss": 0.3739, "step": 3636 }, { "epoch": 1.0210555867490174, "grad_norm": 0.7404154539108276, "learning_rate": 8.341442983545239e-06, "loss": 0.383, "step": 3637 }, { "epoch": 1.0213363279056709, "grad_norm": 0.7266256213188171, "learning_rate": 8.340227749954712e-06, "loss": 0.4031, "step": 3638 }, { "epoch": 1.0216170690623245, "grad_norm": 0.6936408281326294, "learning_rate": 8.339012159915848e-06, "loss": 0.3589, "step": 3639 }, { "epoch": 1.0218978102189782, "grad_norm": 0.7971001267433167, "learning_rate": 8.337796213558374e-06, "loss": 0.412, "step": 3640 }, { "epoch": 1.0221785513756316, "grad_norm": 0.7692808508872986, "learning_rate": 8.336579911012043e-06, "loss": 0.3846, "step": 3641 }, { "epoch": 1.0224592925322853, "grad_norm": 0.6137217283248901, "learning_rate": 8.335363252406652e-06, "loss": 0.3909, "step": 3642 }, { "epoch": 1.0227400336889387, "grad_norm": 0.8090940117835999, "learning_rate": 8.334146237872037e-06, "loss": 0.442, "step": 3643 }, { "epoch": 1.0230207748455924, "grad_norm": 0.716333270072937, "learning_rate": 8.332928867538068e-06, "loss": 0.4048, "step": 3644 }, { "epoch": 1.0233015160022458, "grad_norm": 0.6243662238121033, "learning_rate": 8.331711141534657e-06, "loss": 0.3809, "step": 3645 }, { "epoch": 1.0235822571588995, "grad_norm": 0.6206414699554443, "learning_rate": 8.33049305999175e-06, "loss": 0.3563, "step": 3646 }, { "epoch": 1.0238629983155532, "grad_norm": 0.6339302062988281, "learning_rate": 8.329274623039339e-06, "loss": 0.3873, "step": 3647 }, { "epoch": 1.0241437394722066, "grad_norm": 0.6579312086105347, "learning_rate": 8.32805583080744e-06, "loss": 0.4038, "step": 3648 }, { "epoch": 1.0244244806288603, "grad_norm": 0.687745213508606, "learning_rate": 8.326836683426118e-06, "loss": 0.4212, "step": 3649 }, { "epoch": 1.0247052217855137, "grad_norm": 0.642025351524353, "learning_rate": 8.325617181025476e-06, "loss": 0.4016, "step": 3650 }, { "epoch": 1.0249859629421674, "grad_norm": 0.5885717272758484, "learning_rate": 8.324397323735646e-06, "loss": 0.3893, "step": 3651 }, { "epoch": 1.0252667040988208, "grad_norm": 0.6964549422264099, "learning_rate": 8.32317711168681e-06, "loss": 0.3858, "step": 3652 }, { "epoch": 1.0255474452554745, "grad_norm": 0.757373034954071, "learning_rate": 8.321956545009176e-06, "loss": 0.3824, "step": 3653 }, { "epoch": 1.025828186412128, "grad_norm": 0.656338632106781, "learning_rate": 8.320735623832998e-06, "loss": 0.3795, "step": 3654 }, { "epoch": 1.0261089275687816, "grad_norm": 0.6454774737358093, "learning_rate": 8.319514348288566e-06, "loss": 0.388, "step": 3655 }, { "epoch": 1.0263896687254352, "grad_norm": 0.8298314213752747, "learning_rate": 8.318292718506204e-06, "loss": 0.4123, "step": 3656 }, { "epoch": 1.0266704098820887, "grad_norm": 0.7114923000335693, "learning_rate": 8.317070734616278e-06, "loss": 0.3873, "step": 3657 }, { "epoch": 1.0269511510387423, "grad_norm": 0.7187616229057312, "learning_rate": 8.31584839674919e-06, "loss": 0.3787, "step": 3658 }, { "epoch": 1.0272318921953958, "grad_norm": 0.680168628692627, "learning_rate": 8.314625705035382e-06, "loss": 0.4077, "step": 3659 }, { "epoch": 1.0275126333520495, "grad_norm": 0.6636437773704529, "learning_rate": 8.313402659605332e-06, "loss": 0.3907, "step": 3660 }, { "epoch": 1.027793374508703, "grad_norm": 0.719252347946167, "learning_rate": 8.312179260589553e-06, "loss": 0.3776, "step": 3661 }, { "epoch": 1.0280741156653566, "grad_norm": 0.9567844271659851, "learning_rate": 8.310955508118601e-06, "loss": 0.3921, "step": 3662 }, { "epoch": 1.02835485682201, "grad_norm": 0.7299419045448303, "learning_rate": 8.309731402323066e-06, "loss": 0.4071, "step": 3663 }, { "epoch": 1.0286355979786637, "grad_norm": 0.7468762397766113, "learning_rate": 8.308506943333578e-06, "loss": 0.3831, "step": 3664 }, { "epoch": 1.0289163391353173, "grad_norm": 0.8965431451797485, "learning_rate": 8.307282131280805e-06, "loss": 0.3694, "step": 3665 }, { "epoch": 1.0291970802919708, "grad_norm": 0.8244647979736328, "learning_rate": 8.306056966295448e-06, "loss": 0.3921, "step": 3666 }, { "epoch": 1.0294778214486244, "grad_norm": 0.6560270190238953, "learning_rate": 8.30483144850825e-06, "loss": 0.4107, "step": 3667 }, { "epoch": 1.0297585626052779, "grad_norm": 0.8128765225410461, "learning_rate": 8.303605578049993e-06, "loss": 0.4124, "step": 3668 }, { "epoch": 1.0300393037619315, "grad_norm": 0.7475265264511108, "learning_rate": 8.302379355051491e-06, "loss": 0.3652, "step": 3669 }, { "epoch": 1.030320044918585, "grad_norm": 0.713414192199707, "learning_rate": 8.301152779643602e-06, "loss": 0.4044, "step": 3670 }, { "epoch": 1.0306007860752386, "grad_norm": 0.6739659905433655, "learning_rate": 8.299925851957216e-06, "loss": 0.3963, "step": 3671 }, { "epoch": 1.0308815272318923, "grad_norm": 0.729546844959259, "learning_rate": 8.298698572123263e-06, "loss": 0.3907, "step": 3672 }, { "epoch": 1.0311622683885457, "grad_norm": 0.7554054260253906, "learning_rate": 8.297470940272712e-06, "loss": 0.366, "step": 3673 }, { "epoch": 1.0314430095451994, "grad_norm": 0.6990939378738403, "learning_rate": 8.296242956536569e-06, "loss": 0.4435, "step": 3674 }, { "epoch": 1.0317237507018528, "grad_norm": 0.6825934648513794, "learning_rate": 8.295014621045874e-06, "loss": 0.3902, "step": 3675 }, { "epoch": 1.0320044918585065, "grad_norm": 0.7339544296264648, "learning_rate": 8.29378593393171e-06, "loss": 0.4016, "step": 3676 }, { "epoch": 1.03228523301516, "grad_norm": 0.5781506299972534, "learning_rate": 8.292556895325195e-06, "loss": 0.4108, "step": 3677 }, { "epoch": 1.0325659741718136, "grad_norm": 0.6548964381217957, "learning_rate": 8.29132750535748e-06, "loss": 0.3509, "step": 3678 }, { "epoch": 1.032846715328467, "grad_norm": 0.6997511386871338, "learning_rate": 8.290097764159764e-06, "loss": 0.392, "step": 3679 }, { "epoch": 1.0331274564851207, "grad_norm": 0.776130199432373, "learning_rate": 8.288867671863274e-06, "loss": 0.3587, "step": 3680 }, { "epoch": 1.0334081976417744, "grad_norm": 0.6548985242843628, "learning_rate": 8.287637228599278e-06, "loss": 0.4048, "step": 3681 }, { "epoch": 1.0336889387984278, "grad_norm": 0.6806820034980774, "learning_rate": 8.28640643449908e-06, "loss": 0.4257, "step": 3682 }, { "epoch": 1.0339696799550815, "grad_norm": 0.7635098099708557, "learning_rate": 8.285175289694024e-06, "loss": 0.3998, "step": 3683 }, { "epoch": 1.034250421111735, "grad_norm": 0.7298235297203064, "learning_rate": 8.283943794315492e-06, "loss": 0.3871, "step": 3684 }, { "epoch": 1.0345311622683886, "grad_norm": 0.6716488003730774, "learning_rate": 8.282711948494899e-06, "loss": 0.3432, "step": 3685 }, { "epoch": 1.034811903425042, "grad_norm": 0.6999754905700684, "learning_rate": 8.2814797523637e-06, "loss": 0.4539, "step": 3686 }, { "epoch": 1.0350926445816957, "grad_norm": 0.7473323345184326, "learning_rate": 8.28024720605339e-06, "loss": 0.3962, "step": 3687 }, { "epoch": 1.0353733857383491, "grad_norm": 0.6757204532623291, "learning_rate": 8.279014309695494e-06, "loss": 0.3905, "step": 3688 }, { "epoch": 1.0356541268950028, "grad_norm": 0.804010808467865, "learning_rate": 8.277781063421584e-06, "loss": 0.4076, "step": 3689 }, { "epoch": 1.0359348680516565, "grad_norm": 0.686629593372345, "learning_rate": 8.276547467363263e-06, "loss": 0.3904, "step": 3690 }, { "epoch": 1.03621560920831, "grad_norm": 0.6530104875564575, "learning_rate": 8.275313521652168e-06, "loss": 0.3445, "step": 3691 }, { "epoch": 1.0364963503649636, "grad_norm": 0.6870188117027283, "learning_rate": 8.274079226419984e-06, "loss": 0.4052, "step": 3692 }, { "epoch": 1.036777091521617, "grad_norm": 0.8461865782737732, "learning_rate": 8.272844581798426e-06, "loss": 0.3839, "step": 3693 }, { "epoch": 1.0370578326782707, "grad_norm": 0.783974826335907, "learning_rate": 8.271609587919245e-06, "loss": 0.3916, "step": 3694 }, { "epoch": 1.0373385738349241, "grad_norm": 0.627433180809021, "learning_rate": 8.270374244914234e-06, "loss": 0.3824, "step": 3695 }, { "epoch": 1.0376193149915778, "grad_norm": 0.6883553266525269, "learning_rate": 8.269138552915221e-06, "loss": 0.3889, "step": 3696 }, { "epoch": 1.0379000561482314, "grad_norm": 0.7421615123748779, "learning_rate": 8.267902512054071e-06, "loss": 0.4252, "step": 3697 }, { "epoch": 1.0381807973048849, "grad_norm": 0.801162600517273, "learning_rate": 8.266666122462687e-06, "loss": 0.4435, "step": 3698 }, { "epoch": 1.0384615384615385, "grad_norm": 0.7508783936500549, "learning_rate": 8.265429384273007e-06, "loss": 0.4253, "step": 3699 }, { "epoch": 1.038742279618192, "grad_norm": 0.6713810563087463, "learning_rate": 8.264192297617011e-06, "loss": 0.3849, "step": 3700 }, { "epoch": 1.0390230207748457, "grad_norm": 0.7695631384849548, "learning_rate": 8.262954862626711e-06, "loss": 0.3659, "step": 3701 }, { "epoch": 1.039303761931499, "grad_norm": 0.7910667657852173, "learning_rate": 8.26171707943416e-06, "loss": 0.3943, "step": 3702 }, { "epoch": 1.0395845030881528, "grad_norm": 0.860735297203064, "learning_rate": 8.260478948171444e-06, "loss": 0.4034, "step": 3703 }, { "epoch": 1.0398652442448062, "grad_norm": 0.7097742557525635, "learning_rate": 8.259240468970692e-06, "loss": 0.3796, "step": 3704 }, { "epoch": 1.0401459854014599, "grad_norm": 0.6722464561462402, "learning_rate": 8.258001641964064e-06, "loss": 0.4049, "step": 3705 }, { "epoch": 1.0404267265581135, "grad_norm": 0.6745389699935913, "learning_rate": 8.256762467283762e-06, "loss": 0.3896, "step": 3706 }, { "epoch": 1.040707467714767, "grad_norm": 0.7041593194007874, "learning_rate": 8.25552294506202e-06, "loss": 0.388, "step": 3707 }, { "epoch": 1.0409882088714206, "grad_norm": 0.7543497085571289, "learning_rate": 8.254283075431115e-06, "loss": 0.3784, "step": 3708 }, { "epoch": 1.041268950028074, "grad_norm": 0.6669436097145081, "learning_rate": 8.253042858523356e-06, "loss": 0.4165, "step": 3709 }, { "epoch": 1.0415496911847277, "grad_norm": 0.664459764957428, "learning_rate": 8.251802294471094e-06, "loss": 0.3455, "step": 3710 }, { "epoch": 1.0418304323413812, "grad_norm": 0.7049517035484314, "learning_rate": 8.250561383406713e-06, "loss": 0.3815, "step": 3711 }, { "epoch": 1.0421111734980348, "grad_norm": 0.6471162438392639, "learning_rate": 8.249320125462636e-06, "loss": 0.4003, "step": 3712 }, { "epoch": 1.0423919146546883, "grad_norm": 0.6745674014091492, "learning_rate": 8.248078520771319e-06, "loss": 0.3649, "step": 3713 }, { "epoch": 1.042672655811342, "grad_norm": 0.799483060836792, "learning_rate": 8.246836569465262e-06, "loss": 0.3882, "step": 3714 }, { "epoch": 1.0429533969679956, "grad_norm": 0.6792709231376648, "learning_rate": 8.245594271676998e-06, "loss": 0.4062, "step": 3715 }, { "epoch": 1.043234138124649, "grad_norm": 0.633111298084259, "learning_rate": 8.244351627539093e-06, "loss": 0.4272, "step": 3716 }, { "epoch": 1.0435148792813027, "grad_norm": 0.7625438570976257, "learning_rate": 8.24310863718416e-06, "loss": 0.4234, "step": 3717 }, { "epoch": 1.0437956204379562, "grad_norm": 0.6400211453437805, "learning_rate": 8.24186530074484e-06, "loss": 0.3414, "step": 3718 }, { "epoch": 1.0440763615946098, "grad_norm": 0.6569346189498901, "learning_rate": 8.240621618353817e-06, "loss": 0.3917, "step": 3719 }, { "epoch": 1.0443571027512633, "grad_norm": 0.7927652597427368, "learning_rate": 8.239377590143804e-06, "loss": 0.389, "step": 3720 }, { "epoch": 1.044637843907917, "grad_norm": 0.6913268566131592, "learning_rate": 8.23813321624756e-06, "loss": 0.414, "step": 3721 }, { "epoch": 1.0449185850645704, "grad_norm": 0.6259279847145081, "learning_rate": 8.236888496797878e-06, "loss": 0.3848, "step": 3722 }, { "epoch": 1.045199326221224, "grad_norm": 0.6240760684013367, "learning_rate": 8.235643431927582e-06, "loss": 0.3473, "step": 3723 }, { "epoch": 1.0454800673778777, "grad_norm": 0.6741257309913635, "learning_rate": 8.234398021769541e-06, "loss": 0.3839, "step": 3724 }, { "epoch": 1.0457608085345311, "grad_norm": 0.634797215461731, "learning_rate": 8.233152266456656e-06, "loss": 0.4012, "step": 3725 }, { "epoch": 1.0460415496911848, "grad_norm": 0.7278827428817749, "learning_rate": 8.231906166121868e-06, "loss": 0.3915, "step": 3726 }, { "epoch": 1.0463222908478382, "grad_norm": 0.6704578399658203, "learning_rate": 8.23065972089815e-06, "loss": 0.3334, "step": 3727 }, { "epoch": 1.046603032004492, "grad_norm": 0.678528904914856, "learning_rate": 8.229412930918517e-06, "loss": 0.3794, "step": 3728 }, { "epoch": 1.0468837731611453, "grad_norm": 0.7016115188598633, "learning_rate": 8.22816579631602e-06, "loss": 0.3424, "step": 3729 }, { "epoch": 1.047164514317799, "grad_norm": 0.7756428122520447, "learning_rate": 8.226918317223744e-06, "loss": 0.4266, "step": 3730 }, { "epoch": 1.0474452554744524, "grad_norm": 0.8074459433555603, "learning_rate": 8.22567049377481e-06, "loss": 0.3867, "step": 3731 }, { "epoch": 1.047725996631106, "grad_norm": 0.7199363708496094, "learning_rate": 8.224422326102381e-06, "loss": 0.4191, "step": 3732 }, { "epoch": 1.0480067377877598, "grad_norm": 0.7219660878181458, "learning_rate": 8.223173814339653e-06, "loss": 0.4009, "step": 3733 }, { "epoch": 1.0482874789444132, "grad_norm": 0.838511049747467, "learning_rate": 8.221924958619857e-06, "loss": 0.3618, "step": 3734 }, { "epoch": 1.0485682201010669, "grad_norm": 0.8532513380050659, "learning_rate": 8.220675759076269e-06, "loss": 0.4255, "step": 3735 }, { "epoch": 1.0488489612577203, "grad_norm": 0.7771586775779724, "learning_rate": 8.21942621584219e-06, "loss": 0.3698, "step": 3736 }, { "epoch": 1.049129702414374, "grad_norm": 0.7285014390945435, "learning_rate": 8.218176329050965e-06, "loss": 0.3944, "step": 3737 }, { "epoch": 1.0494104435710274, "grad_norm": 0.6804565787315369, "learning_rate": 8.216926098835975e-06, "loss": 0.412, "step": 3738 }, { "epoch": 1.049691184727681, "grad_norm": 0.7419346570968628, "learning_rate": 8.215675525330637e-06, "loss": 0.4103, "step": 3739 }, { "epoch": 1.0499719258843347, "grad_norm": 0.744171142578125, "learning_rate": 8.214424608668404e-06, "loss": 0.3855, "step": 3740 }, { "epoch": 1.0502526670409882, "grad_norm": 0.7131140232086182, "learning_rate": 8.213173348982766e-06, "loss": 0.4163, "step": 3741 }, { "epoch": 1.0505334081976418, "grad_norm": 0.643436849117279, "learning_rate": 8.21192174640725e-06, "loss": 0.4241, "step": 3742 }, { "epoch": 1.0508141493542953, "grad_norm": 0.6585485935211182, "learning_rate": 8.210669801075417e-06, "loss": 0.4082, "step": 3743 }, { "epoch": 1.051094890510949, "grad_norm": 0.7669865489006042, "learning_rate": 8.20941751312087e-06, "loss": 0.4145, "step": 3744 }, { "epoch": 1.0513756316676024, "grad_norm": 0.6367470622062683, "learning_rate": 8.208164882677244e-06, "loss": 0.3739, "step": 3745 }, { "epoch": 1.051656372824256, "grad_norm": 0.6548689007759094, "learning_rate": 8.206911909878212e-06, "loss": 0.3783, "step": 3746 }, { "epoch": 1.0519371139809095, "grad_norm": 0.6977734565734863, "learning_rate": 8.205658594857483e-06, "loss": 0.4005, "step": 3747 }, { "epoch": 1.0522178551375632, "grad_norm": 0.6368660926818848, "learning_rate": 8.204404937748805e-06, "loss": 0.3603, "step": 3748 }, { "epoch": 1.0524985962942168, "grad_norm": 0.7640892267227173, "learning_rate": 8.203150938685957e-06, "loss": 0.3853, "step": 3749 }, { "epoch": 1.0527793374508703, "grad_norm": 0.6732112169265747, "learning_rate": 8.20189659780276e-06, "loss": 0.4105, "step": 3750 }, { "epoch": 1.053060078607524, "grad_norm": 0.6741898655891418, "learning_rate": 8.20064191523307e-06, "loss": 0.395, "step": 3751 }, { "epoch": 1.0533408197641774, "grad_norm": 0.7830673456192017, "learning_rate": 8.199386891110778e-06, "loss": 0.3988, "step": 3752 }, { "epoch": 1.053621560920831, "grad_norm": 0.6740986704826355, "learning_rate": 8.198131525569812e-06, "loss": 0.3725, "step": 3753 }, { "epoch": 1.0539023020774845, "grad_norm": 0.7528538703918457, "learning_rate": 8.196875818744138e-06, "loss": 0.4328, "step": 3754 }, { "epoch": 1.0541830432341381, "grad_norm": 0.6501534581184387, "learning_rate": 8.195619770767758e-06, "loss": 0.3695, "step": 3755 }, { "epoch": 1.0544637843907916, "grad_norm": 0.6537755131721497, "learning_rate": 8.194363381774708e-06, "loss": 0.3796, "step": 3756 }, { "epoch": 1.0547445255474452, "grad_norm": 0.6635921001434326, "learning_rate": 8.193106651899061e-06, "loss": 0.3731, "step": 3757 }, { "epoch": 1.055025266704099, "grad_norm": 0.6463021636009216, "learning_rate": 8.19184958127493e-06, "loss": 0.3818, "step": 3758 }, { "epoch": 1.0553060078607523, "grad_norm": 0.7013039588928223, "learning_rate": 8.19059217003646e-06, "loss": 0.3829, "step": 3759 }, { "epoch": 1.055586749017406, "grad_norm": 0.690478503704071, "learning_rate": 8.189334418317834e-06, "loss": 0.3758, "step": 3760 }, { "epoch": 1.0558674901740595, "grad_norm": 0.726870596408844, "learning_rate": 8.188076326253272e-06, "loss": 0.3802, "step": 3761 }, { "epoch": 1.0561482313307131, "grad_norm": 0.7228400111198425, "learning_rate": 8.18681789397703e-06, "loss": 0.4015, "step": 3762 }, { "epoch": 1.0564289724873666, "grad_norm": 0.6611549258232117, "learning_rate": 8.1855591216234e-06, "loss": 0.3876, "step": 3763 }, { "epoch": 1.0567097136440202, "grad_norm": 0.6233832836151123, "learning_rate": 8.18430000932671e-06, "loss": 0.3835, "step": 3764 }, { "epoch": 1.0569904548006739, "grad_norm": 0.7990942597389221, "learning_rate": 8.183040557221326e-06, "loss": 0.4109, "step": 3765 }, { "epoch": 1.0572711959573273, "grad_norm": 0.6729419827461243, "learning_rate": 8.181780765441647e-06, "loss": 0.3981, "step": 3766 }, { "epoch": 1.057551937113981, "grad_norm": 0.7742902636528015, "learning_rate": 8.18052063412211e-06, "loss": 0.4007, "step": 3767 }, { "epoch": 1.0578326782706344, "grad_norm": 0.8096804618835449, "learning_rate": 8.179260163397191e-06, "loss": 0.4007, "step": 3768 }, { "epoch": 1.058113419427288, "grad_norm": 0.652288556098938, "learning_rate": 8.177999353401398e-06, "loss": 0.3933, "step": 3769 }, { "epoch": 1.0583941605839415, "grad_norm": 0.8095072507858276, "learning_rate": 8.176738204269276e-06, "loss": 0.4036, "step": 3770 }, { "epoch": 1.0586749017405952, "grad_norm": 0.7715938687324524, "learning_rate": 8.175476716135407e-06, "loss": 0.3928, "step": 3771 }, { "epoch": 1.0589556428972486, "grad_norm": 0.6101293563842773, "learning_rate": 8.17421488913441e-06, "loss": 0.4006, "step": 3772 }, { "epoch": 1.0592363840539023, "grad_norm": 0.6438723206520081, "learning_rate": 8.172952723400938e-06, "loss": 0.4201, "step": 3773 }, { "epoch": 1.059517125210556, "grad_norm": 0.7053725123405457, "learning_rate": 8.171690219069686e-06, "loss": 0.3934, "step": 3774 }, { "epoch": 1.0597978663672094, "grad_norm": 0.7070673704147339, "learning_rate": 8.170427376275374e-06, "loss": 0.3921, "step": 3775 }, { "epoch": 1.060078607523863, "grad_norm": 0.771934449672699, "learning_rate": 8.16916419515277e-06, "loss": 0.4117, "step": 3776 }, { "epoch": 1.0603593486805165, "grad_norm": 0.6540143489837646, "learning_rate": 8.167900675836669e-06, "loss": 0.4106, "step": 3777 }, { "epoch": 1.0606400898371702, "grad_norm": 0.6747614741325378, "learning_rate": 8.16663681846191e-06, "loss": 0.3754, "step": 3778 }, { "epoch": 1.0609208309938236, "grad_norm": 0.6334322094917297, "learning_rate": 8.16537262316336e-06, "loss": 0.3775, "step": 3779 }, { "epoch": 1.0612015721504773, "grad_norm": 0.7444552779197693, "learning_rate": 8.164108090075929e-06, "loss": 0.3851, "step": 3780 }, { "epoch": 1.0614823133071307, "grad_norm": 0.7173585295677185, "learning_rate": 8.162843219334559e-06, "loss": 0.3771, "step": 3781 }, { "epoch": 1.0617630544637844, "grad_norm": 0.6801967024803162, "learning_rate": 8.161578011074229e-06, "loss": 0.4322, "step": 3782 }, { "epoch": 1.062043795620438, "grad_norm": 0.748656153678894, "learning_rate": 8.160312465429952e-06, "loss": 0.37, "step": 3783 }, { "epoch": 1.0623245367770915, "grad_norm": 0.8064485788345337, "learning_rate": 8.159046582536784e-06, "loss": 0.3818, "step": 3784 }, { "epoch": 1.0626052779337452, "grad_norm": 0.7356282472610474, "learning_rate": 8.157780362529809e-06, "loss": 0.3927, "step": 3785 }, { "epoch": 1.0628860190903986, "grad_norm": 0.6002188920974731, "learning_rate": 8.156513805544148e-06, "loss": 0.3642, "step": 3786 }, { "epoch": 1.0631667602470523, "grad_norm": 0.691355288028717, "learning_rate": 8.155246911714965e-06, "loss": 0.3859, "step": 3787 }, { "epoch": 1.0634475014037057, "grad_norm": 0.7035458087921143, "learning_rate": 8.153979681177453e-06, "loss": 0.397, "step": 3788 }, { "epoch": 1.0637282425603594, "grad_norm": 0.6652584671974182, "learning_rate": 8.152712114066842e-06, "loss": 0.4021, "step": 3789 }, { "epoch": 1.064008983717013, "grad_norm": 0.5629920959472656, "learning_rate": 8.1514442105184e-06, "loss": 0.3324, "step": 3790 }, { "epoch": 1.0642897248736665, "grad_norm": 0.6746225357055664, "learning_rate": 8.150175970667432e-06, "loss": 0.4047, "step": 3791 }, { "epoch": 1.0645704660303201, "grad_norm": 0.6971006989479065, "learning_rate": 8.14890739464927e-06, "loss": 0.3861, "step": 3792 }, { "epoch": 1.0648512071869736, "grad_norm": 0.6421023011207581, "learning_rate": 8.147638482599294e-06, "loss": 0.366, "step": 3793 }, { "epoch": 1.0651319483436272, "grad_norm": 0.5770668983459473, "learning_rate": 8.146369234652913e-06, "loss": 0.3866, "step": 3794 }, { "epoch": 1.0654126895002807, "grad_norm": 0.6772415637969971, "learning_rate": 8.145099650945574e-06, "loss": 0.3968, "step": 3795 }, { "epoch": 1.0656934306569343, "grad_norm": 0.6348497271537781, "learning_rate": 8.143829731612757e-06, "loss": 0.3709, "step": 3796 }, { "epoch": 1.0659741718135878, "grad_norm": 0.7543554306030273, "learning_rate": 8.142559476789982e-06, "loss": 0.3851, "step": 3797 }, { "epoch": 1.0662549129702414, "grad_norm": 0.7075092792510986, "learning_rate": 8.141288886612801e-06, "loss": 0.394, "step": 3798 }, { "epoch": 1.066535654126895, "grad_norm": 0.6890093088150024, "learning_rate": 8.140017961216807e-06, "loss": 0.3853, "step": 3799 }, { "epoch": 1.0668163952835485, "grad_norm": 0.6252447366714478, "learning_rate": 8.13874670073762e-06, "loss": 0.4105, "step": 3800 }, { "epoch": 1.0670971364402022, "grad_norm": 0.6732771396636963, "learning_rate": 8.137475105310903e-06, "loss": 0.3713, "step": 3801 }, { "epoch": 1.0673778775968557, "grad_norm": 0.7609824538230896, "learning_rate": 8.136203175072357e-06, "loss": 0.4072, "step": 3802 }, { "epoch": 1.0676586187535093, "grad_norm": 0.6842123866081238, "learning_rate": 8.134930910157708e-06, "loss": 0.3409, "step": 3803 }, { "epoch": 1.0679393599101628, "grad_norm": 0.7474604249000549, "learning_rate": 8.133658310702729e-06, "loss": 0.4253, "step": 3804 }, { "epoch": 1.0682201010668164, "grad_norm": 0.7342727184295654, "learning_rate": 8.132385376843221e-06, "loss": 0.4085, "step": 3805 }, { "epoch": 1.0685008422234699, "grad_norm": 0.6349467039108276, "learning_rate": 8.131112108715024e-06, "loss": 0.3756, "step": 3806 }, { "epoch": 1.0687815833801235, "grad_norm": 0.7352294921875, "learning_rate": 8.129838506454015e-06, "loss": 0.4099, "step": 3807 }, { "epoch": 1.0690623245367772, "grad_norm": 0.6199530959129333, "learning_rate": 8.128564570196102e-06, "loss": 0.3777, "step": 3808 }, { "epoch": 1.0693430656934306, "grad_norm": 0.7040279507637024, "learning_rate": 8.127290300077236e-06, "loss": 0.4022, "step": 3809 }, { "epoch": 1.0696238068500843, "grad_norm": 0.6669299602508545, "learning_rate": 8.126015696233396e-06, "loss": 0.3905, "step": 3810 }, { "epoch": 1.0699045480067377, "grad_norm": 0.7671824097633362, "learning_rate": 8.124740758800598e-06, "loss": 0.3816, "step": 3811 }, { "epoch": 1.0701852891633914, "grad_norm": 0.6888076663017273, "learning_rate": 8.123465487914898e-06, "loss": 0.3785, "step": 3812 }, { "epoch": 1.0704660303200448, "grad_norm": 0.6825580596923828, "learning_rate": 8.122189883712387e-06, "loss": 0.3698, "step": 3813 }, { "epoch": 1.0707467714766985, "grad_norm": 0.721526563167572, "learning_rate": 8.120913946329186e-06, "loss": 0.3713, "step": 3814 }, { "epoch": 1.0710275126333522, "grad_norm": 0.7052328586578369, "learning_rate": 8.119637675901457e-06, "loss": 0.3825, "step": 3815 }, { "epoch": 1.0713082537900056, "grad_norm": 0.8516050577163696, "learning_rate": 8.118361072565394e-06, "loss": 0.4036, "step": 3816 }, { "epoch": 1.0715889949466593, "grad_norm": 0.7296687960624695, "learning_rate": 8.11708413645723e-06, "loss": 0.3922, "step": 3817 }, { "epoch": 1.0718697361033127, "grad_norm": 0.6892791986465454, "learning_rate": 8.115806867713229e-06, "loss": 0.41, "step": 3818 }, { "epoch": 1.0721504772599664, "grad_norm": 0.8589861989021301, "learning_rate": 8.114529266469697e-06, "loss": 0.4311, "step": 3819 }, { "epoch": 1.0724312184166198, "grad_norm": 0.7154029607772827, "learning_rate": 8.113251332862969e-06, "loss": 0.3736, "step": 3820 }, { "epoch": 1.0727119595732735, "grad_norm": 0.7286500930786133, "learning_rate": 8.11197306702942e-06, "loss": 0.4383, "step": 3821 }, { "epoch": 1.072992700729927, "grad_norm": 0.6642131805419922, "learning_rate": 8.110694469105456e-06, "loss": 0.3609, "step": 3822 }, { "epoch": 1.0732734418865806, "grad_norm": 0.6660007834434509, "learning_rate": 8.109415539227522e-06, "loss": 0.4267, "step": 3823 }, { "epoch": 1.073554183043234, "grad_norm": 0.735863208770752, "learning_rate": 8.108136277532096e-06, "loss": 0.3674, "step": 3824 }, { "epoch": 1.0738349241998877, "grad_norm": 0.7106322646141052, "learning_rate": 8.1068566841557e-06, "loss": 0.36, "step": 3825 }, { "epoch": 1.0741156653565413, "grad_norm": 0.723576545715332, "learning_rate": 8.105576759234874e-06, "loss": 0.3763, "step": 3826 }, { "epoch": 1.0743964065131948, "grad_norm": 0.6807546019554138, "learning_rate": 8.10429650290621e-06, "loss": 0.429, "step": 3827 }, { "epoch": 1.0746771476698485, "grad_norm": 0.7537394165992737, "learning_rate": 8.10301591530633e-06, "loss": 0.3705, "step": 3828 }, { "epoch": 1.074957888826502, "grad_norm": 0.7843241095542908, "learning_rate": 8.101734996571886e-06, "loss": 0.3479, "step": 3829 }, { "epoch": 1.0752386299831556, "grad_norm": 0.7866134643554688, "learning_rate": 8.100453746839571e-06, "loss": 0.44, "step": 3830 }, { "epoch": 1.075519371139809, "grad_norm": 0.7115223407745361, "learning_rate": 8.099172166246113e-06, "loss": 0.3798, "step": 3831 }, { "epoch": 1.0758001122964627, "grad_norm": 0.7375951409339905, "learning_rate": 8.097890254928274e-06, "loss": 0.4256, "step": 3832 }, { "epoch": 1.0760808534531163, "grad_norm": 0.7635318040847778, "learning_rate": 8.096608013022854e-06, "loss": 0.3746, "step": 3833 }, { "epoch": 1.0763615946097698, "grad_norm": 0.7682521343231201, "learning_rate": 8.095325440666679e-06, "loss": 0.3834, "step": 3834 }, { "epoch": 1.0766423357664234, "grad_norm": 0.7489264011383057, "learning_rate": 8.094042537996624e-06, "loss": 0.3951, "step": 3835 }, { "epoch": 1.0769230769230769, "grad_norm": 0.682855486869812, "learning_rate": 8.092759305149588e-06, "loss": 0.4004, "step": 3836 }, { "epoch": 1.0772038180797305, "grad_norm": 0.7808113098144531, "learning_rate": 8.091475742262514e-06, "loss": 0.3939, "step": 3837 }, { "epoch": 1.077484559236384, "grad_norm": 0.7783780694007874, "learning_rate": 8.090191849472371e-06, "loss": 0.3593, "step": 3838 }, { "epoch": 1.0777653003930376, "grad_norm": 0.7571975588798523, "learning_rate": 8.088907626916173e-06, "loss": 0.3672, "step": 3839 }, { "epoch": 1.078046041549691, "grad_norm": 0.7357426285743713, "learning_rate": 8.08762307473096e-06, "loss": 0.4213, "step": 3840 }, { "epoch": 1.0783267827063447, "grad_norm": 0.8001245260238647, "learning_rate": 8.086338193053814e-06, "loss": 0.382, "step": 3841 }, { "epoch": 1.0786075238629984, "grad_norm": 0.6524637341499329, "learning_rate": 8.085052982021849e-06, "loss": 0.384, "step": 3842 }, { "epoch": 1.0788882650196518, "grad_norm": 0.7771375179290771, "learning_rate": 8.083767441772212e-06, "loss": 0.3788, "step": 3843 }, { "epoch": 1.0791690061763055, "grad_norm": 0.7094160914421082, "learning_rate": 8.082481572442093e-06, "loss": 0.3765, "step": 3844 }, { "epoch": 1.079449747332959, "grad_norm": 0.7189434170722961, "learning_rate": 8.081195374168708e-06, "loss": 0.4138, "step": 3845 }, { "epoch": 1.0797304884896126, "grad_norm": 0.6085180640220642, "learning_rate": 8.079908847089314e-06, "loss": 0.378, "step": 3846 }, { "epoch": 1.080011229646266, "grad_norm": 0.5959956645965576, "learning_rate": 8.078621991341202e-06, "loss": 0.3599, "step": 3847 }, { "epoch": 1.0802919708029197, "grad_norm": 0.7881020307540894, "learning_rate": 8.077334807061692e-06, "loss": 0.4257, "step": 3848 }, { "epoch": 1.0805727119595732, "grad_norm": 0.6509604454040527, "learning_rate": 8.076047294388151e-06, "loss": 0.381, "step": 3849 }, { "epoch": 1.0808534531162268, "grad_norm": 0.6959467530250549, "learning_rate": 8.074759453457969e-06, "loss": 0.4064, "step": 3850 }, { "epoch": 1.0811341942728805, "grad_norm": 0.5917371511459351, "learning_rate": 8.07347128440858e-06, "loss": 0.3978, "step": 3851 }, { "epoch": 1.081414935429534, "grad_norm": 0.632373571395874, "learning_rate": 8.07218278737745e-06, "loss": 0.3909, "step": 3852 }, { "epoch": 1.0816956765861876, "grad_norm": 0.6964919567108154, "learning_rate": 8.070893962502076e-06, "loss": 0.4364, "step": 3853 }, { "epoch": 1.081976417742841, "grad_norm": 0.7179551720619202, "learning_rate": 8.069604809919996e-06, "loss": 0.3781, "step": 3854 }, { "epoch": 1.0822571588994947, "grad_norm": 0.6049095988273621, "learning_rate": 8.068315329768778e-06, "loss": 0.3661, "step": 3855 }, { "epoch": 1.0825379000561481, "grad_norm": 0.6889928579330444, "learning_rate": 8.067025522186028e-06, "loss": 0.3932, "step": 3856 }, { "epoch": 1.0828186412128018, "grad_norm": 0.6682705283164978, "learning_rate": 8.065735387309388e-06, "loss": 0.3719, "step": 3857 }, { "epoch": 1.0830993823694555, "grad_norm": 0.7578855156898499, "learning_rate": 8.064444925276532e-06, "loss": 0.3824, "step": 3858 }, { "epoch": 1.083380123526109, "grad_norm": 0.7042250633239746, "learning_rate": 8.063154136225172e-06, "loss": 0.371, "step": 3859 }, { "epoch": 1.0836608646827626, "grad_norm": 0.714329719543457, "learning_rate": 8.061863020293046e-06, "loss": 0.4127, "step": 3860 }, { "epoch": 1.083941605839416, "grad_norm": 0.7102716565132141, "learning_rate": 8.060571577617944e-06, "loss": 0.4013, "step": 3861 }, { "epoch": 1.0842223469960697, "grad_norm": 0.8229711651802063, "learning_rate": 8.059279808337672e-06, "loss": 0.4461, "step": 3862 }, { "epoch": 1.0845030881527231, "grad_norm": 0.6872700452804565, "learning_rate": 8.057987712590085e-06, "loss": 0.3826, "step": 3863 }, { "epoch": 1.0847838293093768, "grad_norm": 0.7488064169883728, "learning_rate": 8.056695290513066e-06, "loss": 0.4195, "step": 3864 }, { "epoch": 1.0850645704660302, "grad_norm": 0.6129146218299866, "learning_rate": 8.055402542244533e-06, "loss": 0.3511, "step": 3865 }, { "epoch": 1.0853453116226839, "grad_norm": 0.6663370728492737, "learning_rate": 8.054109467922441e-06, "loss": 0.3954, "step": 3866 }, { "epoch": 1.0856260527793375, "grad_norm": 0.7446199655532837, "learning_rate": 8.05281606768478e-06, "loss": 0.4107, "step": 3867 }, { "epoch": 1.085906793935991, "grad_norm": 0.7511297464370728, "learning_rate": 8.051522341669571e-06, "loss": 0.4026, "step": 3868 }, { "epoch": 1.0861875350926447, "grad_norm": 0.6736665964126587, "learning_rate": 8.050228290014875e-06, "loss": 0.3767, "step": 3869 }, { "epoch": 1.086468276249298, "grad_norm": 0.6423181295394897, "learning_rate": 8.048933912858783e-06, "loss": 0.4045, "step": 3870 }, { "epoch": 1.0867490174059518, "grad_norm": 0.7433770895004272, "learning_rate": 8.047639210339425e-06, "loss": 0.4022, "step": 3871 }, { "epoch": 1.0870297585626052, "grad_norm": 0.7664890289306641, "learning_rate": 8.046344182594963e-06, "loss": 0.4393, "step": 3872 }, { "epoch": 1.0873104997192589, "grad_norm": 0.7127290964126587, "learning_rate": 8.045048829763593e-06, "loss": 0.3717, "step": 3873 }, { "epoch": 1.0875912408759123, "grad_norm": 0.6565137505531311, "learning_rate": 8.043753151983546e-06, "loss": 0.3741, "step": 3874 }, { "epoch": 1.087871982032566, "grad_norm": 0.6661744713783264, "learning_rate": 8.042457149393092e-06, "loss": 0.4163, "step": 3875 }, { "epoch": 1.0881527231892196, "grad_norm": 0.6414688229560852, "learning_rate": 8.041160822130532e-06, "loss": 0.3815, "step": 3876 }, { "epoch": 1.088433464345873, "grad_norm": 0.770722508430481, "learning_rate": 8.0398641703342e-06, "loss": 0.4112, "step": 3877 }, { "epoch": 1.0887142055025267, "grad_norm": 0.6567904949188232, "learning_rate": 8.038567194142466e-06, "loss": 0.3748, "step": 3878 }, { "epoch": 1.0889949466591802, "grad_norm": 0.738982081413269, "learning_rate": 8.037269893693738e-06, "loss": 0.3973, "step": 3879 }, { "epoch": 1.0892756878158338, "grad_norm": 0.7579796314239502, "learning_rate": 8.035972269126456e-06, "loss": 0.3833, "step": 3880 }, { "epoch": 1.0895564289724873, "grad_norm": 0.6975324749946594, "learning_rate": 8.03467432057909e-06, "loss": 0.3701, "step": 3881 }, { "epoch": 1.089837170129141, "grad_norm": 0.6883982419967651, "learning_rate": 8.033376048190152e-06, "loss": 0.3601, "step": 3882 }, { "epoch": 1.0901179112857946, "grad_norm": 0.79848712682724, "learning_rate": 8.032077452098187e-06, "loss": 0.3952, "step": 3883 }, { "epoch": 1.090398652442448, "grad_norm": 0.7479804158210754, "learning_rate": 8.03077853244177e-06, "loss": 0.3815, "step": 3884 }, { "epoch": 1.0906793935991017, "grad_norm": 0.6892729997634888, "learning_rate": 8.029479289359517e-06, "loss": 0.3907, "step": 3885 }, { "epoch": 1.0909601347557552, "grad_norm": 0.7771220803260803, "learning_rate": 8.028179722990073e-06, "loss": 0.3693, "step": 3886 }, { "epoch": 1.0912408759124088, "grad_norm": 0.686834990978241, "learning_rate": 8.02687983347212e-06, "loss": 0.3778, "step": 3887 }, { "epoch": 1.0915216170690623, "grad_norm": 0.6629605293273926, "learning_rate": 8.025579620944372e-06, "loss": 0.3989, "step": 3888 }, { "epoch": 1.091802358225716, "grad_norm": 0.6506296396255493, "learning_rate": 8.024279085545584e-06, "loss": 0.3804, "step": 3889 }, { "epoch": 1.0920830993823694, "grad_norm": 0.6472775936126709, "learning_rate": 8.022978227414537e-06, "loss": 0.4016, "step": 3890 }, { "epoch": 1.092363840539023, "grad_norm": 0.8023062944412231, "learning_rate": 8.021677046690055e-06, "loss": 0.3864, "step": 3891 }, { "epoch": 1.0926445816956767, "grad_norm": 0.8447490930557251, "learning_rate": 8.020375543510986e-06, "loss": 0.3828, "step": 3892 }, { "epoch": 1.0929253228523301, "grad_norm": 0.7679696083068848, "learning_rate": 8.019073718016223e-06, "loss": 0.4142, "step": 3893 }, { "epoch": 1.0932060640089838, "grad_norm": 0.7488394975662231, "learning_rate": 8.017771570344687e-06, "loss": 0.453, "step": 3894 }, { "epoch": 1.0934868051656372, "grad_norm": 0.7049294114112854, "learning_rate": 8.016469100635336e-06, "loss": 0.341, "step": 3895 }, { "epoch": 1.093767546322291, "grad_norm": 0.7560601830482483, "learning_rate": 8.01516630902716e-06, "loss": 0.416, "step": 3896 }, { "epoch": 1.0940482874789443, "grad_norm": 0.7238718271255493, "learning_rate": 8.013863195659187e-06, "loss": 0.3637, "step": 3897 }, { "epoch": 1.094329028635598, "grad_norm": 0.7373055219650269, "learning_rate": 8.012559760670472e-06, "loss": 0.3995, "step": 3898 }, { "epoch": 1.0946097697922514, "grad_norm": 0.66459721326828, "learning_rate": 8.011256004200118e-06, "loss": 0.4071, "step": 3899 }, { "epoch": 1.094890510948905, "grad_norm": 0.6978521943092346, "learning_rate": 8.009951926387245e-06, "loss": 0.3953, "step": 3900 }, { "epoch": 1.0951712521055588, "grad_norm": 0.6564455032348633, "learning_rate": 8.008647527371022e-06, "loss": 0.4034, "step": 3901 }, { "epoch": 1.0954519932622122, "grad_norm": 0.6997036933898926, "learning_rate": 8.007342807290644e-06, "loss": 0.3742, "step": 3902 }, { "epoch": 1.0957327344188659, "grad_norm": 0.7327374219894409, "learning_rate": 8.006037766285344e-06, "loss": 0.3838, "step": 3903 }, { "epoch": 1.0960134755755193, "grad_norm": 0.6831018924713135, "learning_rate": 8.004732404494386e-06, "loss": 0.3632, "step": 3904 }, { "epoch": 1.096294216732173, "grad_norm": 0.7136168479919434, "learning_rate": 8.003426722057071e-06, "loss": 0.3916, "step": 3905 }, { "epoch": 1.0965749578888264, "grad_norm": 0.6591780185699463, "learning_rate": 8.002120719112734e-06, "loss": 0.3646, "step": 3906 }, { "epoch": 1.09685569904548, "grad_norm": 0.6764297485351562, "learning_rate": 8.000814395800742e-06, "loss": 0.4084, "step": 3907 }, { "epoch": 1.0971364402021337, "grad_norm": 0.7040059566497803, "learning_rate": 7.999507752260499e-06, "loss": 0.404, "step": 3908 }, { "epoch": 1.0974171813587872, "grad_norm": 0.6634208559989929, "learning_rate": 7.998200788631441e-06, "loss": 0.3485, "step": 3909 }, { "epoch": 1.0976979225154408, "grad_norm": 0.729281485080719, "learning_rate": 7.99689350505304e-06, "loss": 0.3859, "step": 3910 }, { "epoch": 1.0979786636720943, "grad_norm": 0.7145592570304871, "learning_rate": 7.9955859016648e-06, "loss": 0.3836, "step": 3911 }, { "epoch": 1.098259404828748, "grad_norm": 0.644367516040802, "learning_rate": 7.994277978606259e-06, "loss": 0.3872, "step": 3912 }, { "epoch": 1.0985401459854014, "grad_norm": 0.5941146612167358, "learning_rate": 7.992969736016996e-06, "loss": 0.4076, "step": 3913 }, { "epoch": 1.098820887142055, "grad_norm": 0.6180407404899597, "learning_rate": 7.99166117403661e-06, "loss": 0.3769, "step": 3914 }, { "epoch": 1.0991016282987085, "grad_norm": 0.7049715518951416, "learning_rate": 7.990352292804752e-06, "loss": 0.3769, "step": 3915 }, { "epoch": 1.0993823694553622, "grad_norm": 0.7062937617301941, "learning_rate": 7.989043092461094e-06, "loss": 0.3958, "step": 3916 }, { "epoch": 1.0996631106120156, "grad_norm": 0.6752821803092957, "learning_rate": 7.98773357314534e-06, "loss": 0.3994, "step": 3917 }, { "epoch": 1.0999438517686693, "grad_norm": 0.6410258412361145, "learning_rate": 7.986423734997243e-06, "loss": 0.4132, "step": 3918 }, { "epoch": 1.100224592925323, "grad_norm": 0.7151635885238647, "learning_rate": 7.985113578156573e-06, "loss": 0.3976, "step": 3919 }, { "epoch": 1.1005053340819764, "grad_norm": 0.6709374189376831, "learning_rate": 7.983803102763146e-06, "loss": 0.4026, "step": 3920 }, { "epoch": 1.10078607523863, "grad_norm": 0.720008909702301, "learning_rate": 7.982492308956809e-06, "loss": 0.3594, "step": 3921 }, { "epoch": 1.1010668163952835, "grad_norm": 0.6780417561531067, "learning_rate": 7.981181196877437e-06, "loss": 0.3823, "step": 3922 }, { "epoch": 1.1013475575519371, "grad_norm": 0.74256432056427, "learning_rate": 7.979869766664947e-06, "loss": 0.4017, "step": 3923 }, { "epoch": 1.1016282987085906, "grad_norm": 0.6556293964385986, "learning_rate": 7.978558018459288e-06, "loss": 0.3896, "step": 3924 }, { "epoch": 1.1019090398652442, "grad_norm": 0.6396933197975159, "learning_rate": 7.977245952400436e-06, "loss": 0.414, "step": 3925 }, { "epoch": 1.102189781021898, "grad_norm": 0.702535092830658, "learning_rate": 7.975933568628413e-06, "loss": 0.4357, "step": 3926 }, { "epoch": 1.1024705221785513, "grad_norm": 0.6938668489456177, "learning_rate": 7.974620867283267e-06, "loss": 0.4172, "step": 3927 }, { "epoch": 1.102751263335205, "grad_norm": 0.701370358467102, "learning_rate": 7.973307848505076e-06, "loss": 0.4292, "step": 3928 }, { "epoch": 1.1030320044918585, "grad_norm": 0.665600597858429, "learning_rate": 7.971994512433965e-06, "loss": 0.3658, "step": 3929 }, { "epoch": 1.1033127456485121, "grad_norm": 0.6574090719223022, "learning_rate": 7.97068085921008e-06, "loss": 0.3844, "step": 3930 }, { "epoch": 1.1035934868051656, "grad_norm": 0.7160803079605103, "learning_rate": 7.969366888973606e-06, "loss": 0.4122, "step": 3931 }, { "epoch": 1.1038742279618192, "grad_norm": 0.7250133752822876, "learning_rate": 7.968052601864764e-06, "loss": 0.3684, "step": 3932 }, { "epoch": 1.1041549691184729, "grad_norm": 0.6573944687843323, "learning_rate": 7.966737998023804e-06, "loss": 0.3786, "step": 3933 }, { "epoch": 1.1044357102751263, "grad_norm": 0.7087840437889099, "learning_rate": 7.965423077591016e-06, "loss": 0.3926, "step": 3934 }, { "epoch": 1.10471645143178, "grad_norm": 0.6879131197929382, "learning_rate": 7.964107840706717e-06, "loss": 0.4313, "step": 3935 }, { "epoch": 1.1049971925884334, "grad_norm": 0.7839788198471069, "learning_rate": 7.96279228751126e-06, "loss": 0.3994, "step": 3936 }, { "epoch": 1.105277933745087, "grad_norm": 0.7168227434158325, "learning_rate": 7.961476418145037e-06, "loss": 0.4207, "step": 3937 }, { "epoch": 1.1055586749017405, "grad_norm": 0.885214626789093, "learning_rate": 7.960160232748466e-06, "loss": 0.3973, "step": 3938 }, { "epoch": 1.1058394160583942, "grad_norm": 0.7359375357627869, "learning_rate": 7.958843731462003e-06, "loss": 0.4176, "step": 3939 }, { "epoch": 1.1061201572150476, "grad_norm": 0.7108869552612305, "learning_rate": 7.957526914426137e-06, "loss": 0.4332, "step": 3940 }, { "epoch": 1.1064008983717013, "grad_norm": 0.8117178678512573, "learning_rate": 7.95620978178139e-06, "loss": 0.3826, "step": 3941 }, { "epoch": 1.1066816395283547, "grad_norm": 0.7513662576675415, "learning_rate": 7.954892333668318e-06, "loss": 0.4262, "step": 3942 }, { "epoch": 1.1069623806850084, "grad_norm": 0.6816915273666382, "learning_rate": 7.953574570227512e-06, "loss": 0.3902, "step": 3943 }, { "epoch": 1.107243121841662, "grad_norm": 0.7227581739425659, "learning_rate": 7.952256491599594e-06, "loss": 0.3654, "step": 3944 }, { "epoch": 1.1075238629983155, "grad_norm": 0.6222494840621948, "learning_rate": 7.950938097925224e-06, "loss": 0.341, "step": 3945 }, { "epoch": 1.1078046041549692, "grad_norm": 0.742754340171814, "learning_rate": 7.94961938934509e-06, "loss": 0.3895, "step": 3946 }, { "epoch": 1.1080853453116226, "grad_norm": 0.7121419906616211, "learning_rate": 7.948300365999917e-06, "loss": 0.3845, "step": 3947 }, { "epoch": 1.1083660864682763, "grad_norm": 0.5612559914588928, "learning_rate": 7.946981028030463e-06, "loss": 0.3646, "step": 3948 }, { "epoch": 1.1086468276249297, "grad_norm": 0.6668398976325989, "learning_rate": 7.94566137557752e-06, "loss": 0.3666, "step": 3949 }, { "epoch": 1.1089275687815834, "grad_norm": 0.6178943514823914, "learning_rate": 7.944341408781914e-06, "loss": 0.3649, "step": 3950 }, { "epoch": 1.109208309938237, "grad_norm": 0.7678118944168091, "learning_rate": 7.9430211277845e-06, "loss": 0.3747, "step": 3951 }, { "epoch": 1.1094890510948905, "grad_norm": 0.6977105736732483, "learning_rate": 7.941700532726174e-06, "loss": 0.3772, "step": 3952 }, { "epoch": 1.1097697922515442, "grad_norm": 0.5625650882720947, "learning_rate": 7.940379623747861e-06, "loss": 0.3677, "step": 3953 }, { "epoch": 1.1100505334081976, "grad_norm": 0.7186122536659241, "learning_rate": 7.939058400990518e-06, "loss": 0.393, "step": 3954 }, { "epoch": 1.1103312745648513, "grad_norm": 0.6502189040184021, "learning_rate": 7.93773686459514e-06, "loss": 0.3892, "step": 3955 }, { "epoch": 1.1106120157215047, "grad_norm": 0.7585682272911072, "learning_rate": 7.936415014702754e-06, "loss": 0.3548, "step": 3956 }, { "epoch": 1.1108927568781584, "grad_norm": 0.7043623924255371, "learning_rate": 7.935092851454416e-06, "loss": 0.4114, "step": 3957 }, { "epoch": 1.1111734980348118, "grad_norm": 0.6830571889877319, "learning_rate": 7.933770374991223e-06, "loss": 0.3904, "step": 3958 }, { "epoch": 1.1114542391914655, "grad_norm": 0.6246893405914307, "learning_rate": 7.932447585454298e-06, "loss": 0.3611, "step": 3959 }, { "epoch": 1.1117349803481191, "grad_norm": 0.6417291164398193, "learning_rate": 7.931124482984802e-06, "loss": 0.3709, "step": 3960 }, { "epoch": 1.1120157215047726, "grad_norm": 0.7732900381088257, "learning_rate": 7.92980106772393e-06, "loss": 0.4169, "step": 3961 }, { "epoch": 1.1122964626614262, "grad_norm": 0.6309844851493835, "learning_rate": 7.928477339812906e-06, "loss": 0.3871, "step": 3962 }, { "epoch": 1.1125772038180797, "grad_norm": 0.7527498602867126, "learning_rate": 7.927153299392993e-06, "loss": 0.3913, "step": 3963 }, { "epoch": 1.1128579449747333, "grad_norm": 0.7211992740631104, "learning_rate": 7.925828946605481e-06, "loss": 0.405, "step": 3964 }, { "epoch": 1.1131386861313868, "grad_norm": 0.6866170167922974, "learning_rate": 7.924504281591698e-06, "loss": 0.3781, "step": 3965 }, { "epoch": 1.1134194272880404, "grad_norm": 0.6364414095878601, "learning_rate": 7.923179304493005e-06, "loss": 0.3695, "step": 3966 }, { "epoch": 1.1137001684446939, "grad_norm": 0.6579988598823547, "learning_rate": 7.921854015450794e-06, "loss": 0.4142, "step": 3967 }, { "epoch": 1.1139809096013475, "grad_norm": 0.6545124053955078, "learning_rate": 7.920528414606495e-06, "loss": 0.3819, "step": 3968 }, { "epoch": 1.1142616507580012, "grad_norm": 0.7061275243759155, "learning_rate": 7.919202502101562e-06, "loss": 0.4118, "step": 3969 }, { "epoch": 1.1145423919146547, "grad_norm": 0.7414146065711975, "learning_rate": 7.917876278077493e-06, "loss": 0.4475, "step": 3970 }, { "epoch": 1.1148231330713083, "grad_norm": 0.5749108195304871, "learning_rate": 7.916549742675812e-06, "loss": 0.3836, "step": 3971 }, { "epoch": 1.1151038742279618, "grad_norm": 0.6774276494979858, "learning_rate": 7.91522289603808e-06, "loss": 0.4011, "step": 3972 }, { "epoch": 1.1153846153846154, "grad_norm": 0.7002820372581482, "learning_rate": 7.91389573830589e-06, "loss": 0.4116, "step": 3973 }, { "epoch": 1.1156653565412689, "grad_norm": 0.6322146058082581, "learning_rate": 7.912568269620864e-06, "loss": 0.3826, "step": 3974 }, { "epoch": 1.1159460976979225, "grad_norm": 0.6247429251670837, "learning_rate": 7.911240490124667e-06, "loss": 0.3888, "step": 3975 }, { "epoch": 1.1162268388545762, "grad_norm": 0.6144675016403198, "learning_rate": 7.909912399958986e-06, "loss": 0.4358, "step": 3976 }, { "epoch": 1.1165075800112296, "grad_norm": 0.6474701166152954, "learning_rate": 7.908583999265552e-06, "loss": 0.4135, "step": 3977 }, { "epoch": 1.1167883211678833, "grad_norm": 0.6898013353347778, "learning_rate": 7.90725528818612e-06, "loss": 0.3373, "step": 3978 }, { "epoch": 1.1170690623245367, "grad_norm": 0.6261772513389587, "learning_rate": 7.905926266862483e-06, "loss": 0.3731, "step": 3979 }, { "epoch": 1.1173498034811904, "grad_norm": 0.669147253036499, "learning_rate": 7.904596935436464e-06, "loss": 0.3998, "step": 3980 }, { "epoch": 1.1176305446378438, "grad_norm": 0.6951127052307129, "learning_rate": 7.903267294049926e-06, "loss": 0.4025, "step": 3981 }, { "epoch": 1.1179112857944975, "grad_norm": 0.6515661478042603, "learning_rate": 7.901937342844753e-06, "loss": 0.4261, "step": 3982 }, { "epoch": 1.118192026951151, "grad_norm": 0.644037663936615, "learning_rate": 7.900607081962875e-06, "loss": 0.3985, "step": 3983 }, { "epoch": 1.1184727681078046, "grad_norm": 0.734929084777832, "learning_rate": 7.899276511546245e-06, "loss": 0.3896, "step": 3984 }, { "epoch": 1.1187535092644583, "grad_norm": 0.7229781150817871, "learning_rate": 7.897945631736856e-06, "loss": 0.3814, "step": 3985 }, { "epoch": 1.1190342504211117, "grad_norm": 0.5938378572463989, "learning_rate": 7.896614442676731e-06, "loss": 0.4119, "step": 3986 }, { "epoch": 1.1193149915777654, "grad_norm": 0.6958502531051636, "learning_rate": 7.895282944507925e-06, "loss": 0.4011, "step": 3987 }, { "epoch": 1.1195957327344188, "grad_norm": 0.7006041407585144, "learning_rate": 7.893951137372527e-06, "loss": 0.3505, "step": 3988 }, { "epoch": 1.1198764738910725, "grad_norm": 0.674226701259613, "learning_rate": 7.892619021412659e-06, "loss": 0.3948, "step": 3989 }, { "epoch": 1.120157215047726, "grad_norm": 0.7255149483680725, "learning_rate": 7.89128659677048e-06, "loss": 0.3947, "step": 3990 }, { "epoch": 1.1204379562043796, "grad_norm": 0.7130322456359863, "learning_rate": 7.889953863588173e-06, "loss": 0.3889, "step": 3991 }, { "epoch": 1.120718697361033, "grad_norm": 0.6574380993843079, "learning_rate": 7.888620822007963e-06, "loss": 0.4091, "step": 3992 }, { "epoch": 1.1209994385176867, "grad_norm": 0.695537269115448, "learning_rate": 7.8872874721721e-06, "loss": 0.3811, "step": 3993 }, { "epoch": 1.1212801796743403, "grad_norm": 0.6779100894927979, "learning_rate": 7.885953814222874e-06, "loss": 0.3854, "step": 3994 }, { "epoch": 1.1215609208309938, "grad_norm": 0.7288315892219543, "learning_rate": 7.884619848302603e-06, "loss": 0.3784, "step": 3995 }, { "epoch": 1.1218416619876475, "grad_norm": 0.6675150990486145, "learning_rate": 7.883285574553641e-06, "loss": 0.3877, "step": 3996 }, { "epoch": 1.122122403144301, "grad_norm": 0.6860742568969727, "learning_rate": 7.881950993118372e-06, "loss": 0.4033, "step": 3997 }, { "epoch": 1.1224031443009546, "grad_norm": 0.6912956833839417, "learning_rate": 7.880616104139214e-06, "loss": 0.4343, "step": 3998 }, { "epoch": 1.122683885457608, "grad_norm": 0.6946637034416199, "learning_rate": 7.87928090775862e-06, "loss": 0.4107, "step": 3999 }, { "epoch": 1.1229646266142617, "grad_norm": 0.6523827910423279, "learning_rate": 7.877945404119071e-06, "loss": 0.4002, "step": 4000 }, { "epoch": 1.1232453677709153, "grad_norm": 0.6998991370201111, "learning_rate": 7.876609593363086e-06, "loss": 0.431, "step": 4001 }, { "epoch": 1.1235261089275688, "grad_norm": 0.6889566779136658, "learning_rate": 7.875273475633212e-06, "loss": 0.3927, "step": 4002 }, { "epoch": 1.1238068500842224, "grad_norm": 0.6717015504837036, "learning_rate": 7.873937051072037e-06, "loss": 0.3751, "step": 4003 }, { "epoch": 1.1240875912408759, "grad_norm": 0.6775625348091125, "learning_rate": 7.872600319822168e-06, "loss": 0.3783, "step": 4004 }, { "epoch": 1.1243683323975295, "grad_norm": 0.7042632102966309, "learning_rate": 7.871263282026256e-06, "loss": 0.3949, "step": 4005 }, { "epoch": 1.124649073554183, "grad_norm": 0.8770792484283447, "learning_rate": 7.869925937826984e-06, "loss": 0.3743, "step": 4006 }, { "epoch": 1.1249298147108366, "grad_norm": 0.7265608310699463, "learning_rate": 7.868588287367062e-06, "loss": 0.4012, "step": 4007 }, { "epoch": 1.12521055586749, "grad_norm": 0.8121118545532227, "learning_rate": 7.867250330789237e-06, "loss": 0.4143, "step": 4008 }, { "epoch": 1.1254912970241437, "grad_norm": 0.6335089206695557, "learning_rate": 7.865912068236286e-06, "loss": 0.3642, "step": 4009 }, { "epoch": 1.1257720381807972, "grad_norm": 0.8560290336608887, "learning_rate": 7.864573499851022e-06, "loss": 0.37, "step": 4010 }, { "epoch": 1.1260527793374508, "grad_norm": 0.6950612664222717, "learning_rate": 7.863234625776289e-06, "loss": 0.4, "step": 4011 }, { "epoch": 1.1263335204941045, "grad_norm": 0.6369311213493347, "learning_rate": 7.861895446154959e-06, "loss": 0.4145, "step": 4012 }, { "epoch": 1.126614261650758, "grad_norm": 0.6790870428085327, "learning_rate": 7.860555961129945e-06, "loss": 0.3876, "step": 4013 }, { "epoch": 1.1268950028074116, "grad_norm": 0.795529842376709, "learning_rate": 7.859216170844187e-06, "loss": 0.4086, "step": 4014 }, { "epoch": 1.127175743964065, "grad_norm": 0.7263389229774475, "learning_rate": 7.85787607544066e-06, "loss": 0.3635, "step": 4015 }, { "epoch": 1.1274564851207187, "grad_norm": 0.7352930903434753, "learning_rate": 7.856535675062371e-06, "loss": 0.4146, "step": 4016 }, { "epoch": 1.1277372262773722, "grad_norm": 0.7245302200317383, "learning_rate": 7.855194969852358e-06, "loss": 0.3749, "step": 4017 }, { "epoch": 1.1280179674340258, "grad_norm": 0.666125476360321, "learning_rate": 7.853853959953692e-06, "loss": 0.3911, "step": 4018 }, { "epoch": 1.1282987085906795, "grad_norm": 0.6330158114433289, "learning_rate": 7.85251264550948e-06, "loss": 0.4036, "step": 4019 }, { "epoch": 1.128579449747333, "grad_norm": 0.7882659435272217, "learning_rate": 7.851171026662857e-06, "loss": 0.4063, "step": 4020 }, { "epoch": 1.1288601909039866, "grad_norm": 0.7144256234169006, "learning_rate": 7.849829103556991e-06, "loss": 0.4033, "step": 4021 }, { "epoch": 1.12914093206064, "grad_norm": 0.7497358918190002, "learning_rate": 7.848486876335086e-06, "loss": 0.4031, "step": 4022 }, { "epoch": 1.1294216732172937, "grad_norm": 0.7111562490463257, "learning_rate": 7.847144345140375e-06, "loss": 0.3828, "step": 4023 }, { "epoch": 1.1297024143739471, "grad_norm": 0.7431685328483582, "learning_rate": 7.845801510116124e-06, "loss": 0.4383, "step": 4024 }, { "epoch": 1.1299831555306008, "grad_norm": 0.7841300368309021, "learning_rate": 7.844458371405634e-06, "loss": 0.3627, "step": 4025 }, { "epoch": 1.1302638966872545, "grad_norm": 0.7278240919113159, "learning_rate": 7.843114929152234e-06, "loss": 0.3705, "step": 4026 }, { "epoch": 1.130544637843908, "grad_norm": 0.7185124754905701, "learning_rate": 7.841771183499289e-06, "loss": 0.4071, "step": 4027 }, { "epoch": 1.1308253790005616, "grad_norm": 0.6828190684318542, "learning_rate": 7.840427134590196e-06, "loss": 0.3812, "step": 4028 }, { "epoch": 1.131106120157215, "grad_norm": 0.7162874937057495, "learning_rate": 7.839082782568382e-06, "loss": 0.3628, "step": 4029 }, { "epoch": 1.1313868613138687, "grad_norm": 0.7726592421531677, "learning_rate": 7.837738127577307e-06, "loss": 0.3761, "step": 4030 }, { "epoch": 1.1316676024705221, "grad_norm": 0.7891879081726074, "learning_rate": 7.836393169760467e-06, "loss": 0.3373, "step": 4031 }, { "epoch": 1.1319483436271758, "grad_norm": 0.6850951910018921, "learning_rate": 7.835047909261387e-06, "loss": 0.3865, "step": 4032 }, { "epoch": 1.1322290847838292, "grad_norm": 0.7357378005981445, "learning_rate": 7.833702346223624e-06, "loss": 0.3946, "step": 4033 }, { "epoch": 1.1325098259404829, "grad_norm": 0.761630654335022, "learning_rate": 7.832356480790767e-06, "loss": 0.4249, "step": 4034 }, { "epoch": 1.1327905670971363, "grad_norm": 0.816126823425293, "learning_rate": 7.831010313106441e-06, "loss": 0.4132, "step": 4035 }, { "epoch": 1.13307130825379, "grad_norm": 0.7078574895858765, "learning_rate": 7.829663843314301e-06, "loss": 0.4246, "step": 4036 }, { "epoch": 1.1333520494104437, "grad_norm": 0.6342118978500366, "learning_rate": 7.828317071558029e-06, "loss": 0.3885, "step": 4037 }, { "epoch": 1.133632790567097, "grad_norm": 0.7254751920700073, "learning_rate": 7.826969997981349e-06, "loss": 0.4345, "step": 4038 }, { "epoch": 1.1339135317237508, "grad_norm": 0.7696471810340881, "learning_rate": 7.825622622728008e-06, "loss": 0.3946, "step": 4039 }, { "epoch": 1.1341942728804042, "grad_norm": 0.7007644772529602, "learning_rate": 7.824274945941794e-06, "loss": 0.414, "step": 4040 }, { "epoch": 1.1344750140370579, "grad_norm": 0.6638544797897339, "learning_rate": 7.82292696776652e-06, "loss": 0.4122, "step": 4041 }, { "epoch": 1.1347557551937113, "grad_norm": 0.7450946569442749, "learning_rate": 7.821578688346037e-06, "loss": 0.3923, "step": 4042 }, { "epoch": 1.135036496350365, "grad_norm": 0.7676528096199036, "learning_rate": 7.82023010782422e-06, "loss": 0.3575, "step": 4043 }, { "epoch": 1.1353172375070186, "grad_norm": 0.6937967538833618, "learning_rate": 7.818881226344985e-06, "loss": 0.4193, "step": 4044 }, { "epoch": 1.135597978663672, "grad_norm": 0.7304214835166931, "learning_rate": 7.817532044052275e-06, "loss": 0.4366, "step": 4045 }, { "epoch": 1.1358787198203257, "grad_norm": 0.591670036315918, "learning_rate": 7.816182561090066e-06, "loss": 0.3783, "step": 4046 }, { "epoch": 1.1361594609769792, "grad_norm": 0.796985924243927, "learning_rate": 7.814832777602367e-06, "loss": 0.3986, "step": 4047 }, { "epoch": 1.1364402021336328, "grad_norm": 0.7883581519126892, "learning_rate": 7.81348269373322e-06, "loss": 0.4015, "step": 4048 }, { "epoch": 1.1367209432902863, "grad_norm": 0.7149470448493958, "learning_rate": 7.812132309626692e-06, "loss": 0.4174, "step": 4049 }, { "epoch": 1.13700168444694, "grad_norm": 0.8815571665763855, "learning_rate": 7.810781625426893e-06, "loss": 0.4509, "step": 4050 }, { "epoch": 1.1372824256035936, "grad_norm": 0.6563960313796997, "learning_rate": 7.809430641277959e-06, "loss": 0.3817, "step": 4051 }, { "epoch": 1.137563166760247, "grad_norm": 0.8129245042800903, "learning_rate": 7.808079357324057e-06, "loss": 0.4071, "step": 4052 }, { "epoch": 1.1378439079169007, "grad_norm": 0.6671165823936462, "learning_rate": 7.806727773709388e-06, "loss": 0.403, "step": 4053 }, { "epoch": 1.1381246490735542, "grad_norm": 0.5800145864486694, "learning_rate": 7.805375890578184e-06, "loss": 0.3579, "step": 4054 }, { "epoch": 1.1384053902302078, "grad_norm": 0.7509213089942932, "learning_rate": 7.804023708074714e-06, "loss": 0.3987, "step": 4055 }, { "epoch": 1.1386861313868613, "grad_norm": 0.6746266484260559, "learning_rate": 7.802671226343266e-06, "loss": 0.4248, "step": 4056 }, { "epoch": 1.138966872543515, "grad_norm": 0.7355132102966309, "learning_rate": 7.801318445528177e-06, "loss": 0.43, "step": 4057 }, { "epoch": 1.1392476137001684, "grad_norm": 0.6688141822814941, "learning_rate": 7.799965365773803e-06, "loss": 0.3977, "step": 4058 }, { "epoch": 1.139528354856822, "grad_norm": 0.7025774717330933, "learning_rate": 7.798611987224535e-06, "loss": 0.4038, "step": 4059 }, { "epoch": 1.1398090960134755, "grad_norm": 0.6520720720291138, "learning_rate": 7.797258310024802e-06, "loss": 0.3624, "step": 4060 }, { "epoch": 1.1400898371701291, "grad_norm": 0.6350058913230896, "learning_rate": 7.795904334319056e-06, "loss": 0.3818, "step": 4061 }, { "epoch": 1.1403705783267828, "grad_norm": 0.6513971090316772, "learning_rate": 7.794550060251786e-06, "loss": 0.4089, "step": 4062 }, { "epoch": 1.1406513194834362, "grad_norm": 0.7588690519332886, "learning_rate": 7.79319548796751e-06, "loss": 0.3827, "step": 4063 }, { "epoch": 1.14093206064009, "grad_norm": 0.763512134552002, "learning_rate": 7.791840617610784e-06, "loss": 0.4002, "step": 4064 }, { "epoch": 1.1412128017967433, "grad_norm": 0.7148080468177795, "learning_rate": 7.790485449326188e-06, "loss": 0.3573, "step": 4065 }, { "epoch": 1.141493542953397, "grad_norm": 0.7314262986183167, "learning_rate": 7.789129983258336e-06, "loss": 0.3869, "step": 4066 }, { "epoch": 1.1417742841100504, "grad_norm": 0.6132856011390686, "learning_rate": 7.787774219551878e-06, "loss": 0.3484, "step": 4067 }, { "epoch": 1.142055025266704, "grad_norm": 0.7298488616943359, "learning_rate": 7.786418158351491e-06, "loss": 0.3768, "step": 4068 }, { "epoch": 1.1423357664233578, "grad_norm": 0.6746385097503662, "learning_rate": 7.785061799801888e-06, "loss": 0.3538, "step": 4069 }, { "epoch": 1.1426165075800112, "grad_norm": 0.6759898066520691, "learning_rate": 7.783705144047805e-06, "loss": 0.3556, "step": 4070 }, { "epoch": 1.1428972487366649, "grad_norm": 0.6515828371047974, "learning_rate": 7.782348191234022e-06, "loss": 0.4138, "step": 4071 }, { "epoch": 1.1431779898933183, "grad_norm": 0.6762593984603882, "learning_rate": 7.780990941505342e-06, "loss": 0.3785, "step": 4072 }, { "epoch": 1.143458731049972, "grad_norm": 0.6707088351249695, "learning_rate": 7.779633395006603e-06, "loss": 0.4018, "step": 4073 }, { "epoch": 1.1437394722066254, "grad_norm": 0.7524548172950745, "learning_rate": 7.778275551882673e-06, "loss": 0.3949, "step": 4074 }, { "epoch": 1.144020213363279, "grad_norm": 0.7122347950935364, "learning_rate": 7.776917412278454e-06, "loss": 0.3791, "step": 4075 }, { "epoch": 1.1443009545199327, "grad_norm": 0.762192964553833, "learning_rate": 7.775558976338875e-06, "loss": 0.4066, "step": 4076 }, { "epoch": 1.1445816956765862, "grad_norm": 0.6764295101165771, "learning_rate": 7.774200244208903e-06, "loss": 0.3711, "step": 4077 }, { "epoch": 1.1448624368332398, "grad_norm": 0.7432030439376831, "learning_rate": 7.772841216033534e-06, "loss": 0.3548, "step": 4078 }, { "epoch": 1.1451431779898933, "grad_norm": 0.6911014318466187, "learning_rate": 7.771481891957792e-06, "loss": 0.4129, "step": 4079 }, { "epoch": 1.145423919146547, "grad_norm": 0.74835205078125, "learning_rate": 7.770122272126738e-06, "loss": 0.3853, "step": 4080 }, { "epoch": 1.1457046603032004, "grad_norm": 0.7849177122116089, "learning_rate": 7.76876235668546e-06, "loss": 0.3847, "step": 4081 }, { "epoch": 1.145985401459854, "grad_norm": 0.7110073566436768, "learning_rate": 7.767402145779083e-06, "loss": 0.3946, "step": 4082 }, { "epoch": 1.1462661426165075, "grad_norm": 0.7056792378425598, "learning_rate": 7.766041639552757e-06, "loss": 0.4053, "step": 4083 }, { "epoch": 1.1465468837731612, "grad_norm": 0.7177689075469971, "learning_rate": 7.764680838151669e-06, "loss": 0.4309, "step": 4084 }, { "epoch": 1.1468276249298146, "grad_norm": 0.6438093185424805, "learning_rate": 7.763319741721034e-06, "loss": 0.4055, "step": 4085 }, { "epoch": 1.1471083660864683, "grad_norm": 0.731724739074707, "learning_rate": 7.7619583504061e-06, "loss": 0.4371, "step": 4086 }, { "epoch": 1.147389107243122, "grad_norm": 0.6364150047302246, "learning_rate": 7.760596664352148e-06, "loss": 0.403, "step": 4087 }, { "epoch": 1.1476698483997754, "grad_norm": 0.6307037472724915, "learning_rate": 7.759234683704485e-06, "loss": 0.3921, "step": 4088 }, { "epoch": 1.147950589556429, "grad_norm": 0.7023909091949463, "learning_rate": 7.757872408608456e-06, "loss": 0.3825, "step": 4089 }, { "epoch": 1.1482313307130825, "grad_norm": 0.6668989658355713, "learning_rate": 7.756509839209431e-06, "loss": 0.3466, "step": 4090 }, { "epoch": 1.1485120718697361, "grad_norm": 0.610808789730072, "learning_rate": 7.75514697565282e-06, "loss": 0.3692, "step": 4091 }, { "epoch": 1.1487928130263896, "grad_norm": 0.64811772108078, "learning_rate": 7.753783818084057e-06, "loss": 0.3854, "step": 4092 }, { "epoch": 1.1490735541830432, "grad_norm": 0.6047309041023254, "learning_rate": 7.75242036664861e-06, "loss": 0.387, "step": 4093 }, { "epoch": 1.149354295339697, "grad_norm": 0.6245483756065369, "learning_rate": 7.751056621491977e-06, "loss": 0.3935, "step": 4094 }, { "epoch": 1.1496350364963503, "grad_norm": 0.6940885782241821, "learning_rate": 7.749692582759689e-06, "loss": 0.3924, "step": 4095 }, { "epoch": 1.149915777653004, "grad_norm": 0.7416813969612122, "learning_rate": 7.748328250597308e-06, "loss": 0.4011, "step": 4096 }, { "epoch": 1.1501965188096575, "grad_norm": 0.7112900018692017, "learning_rate": 7.746963625150425e-06, "loss": 0.3809, "step": 4097 }, { "epoch": 1.1504772599663111, "grad_norm": 0.6424203515052795, "learning_rate": 7.745598706564668e-06, "loss": 0.3746, "step": 4098 }, { "epoch": 1.1507580011229646, "grad_norm": 0.7422674894332886, "learning_rate": 7.744233494985691e-06, "loss": 0.4076, "step": 4099 }, { "epoch": 1.1510387422796182, "grad_norm": 0.6848623156547546, "learning_rate": 7.74286799055918e-06, "loss": 0.4129, "step": 4100 }, { "epoch": 1.1513194834362717, "grad_norm": 0.7590399980545044, "learning_rate": 7.741502193430854e-06, "loss": 0.4041, "step": 4101 }, { "epoch": 1.1516002245929253, "grad_norm": 0.6886131763458252, "learning_rate": 7.740136103746463e-06, "loss": 0.417, "step": 4102 }, { "epoch": 1.1518809657495788, "grad_norm": 0.7533678412437439, "learning_rate": 7.738769721651784e-06, "loss": 0.4097, "step": 4103 }, { "epoch": 1.1521617069062324, "grad_norm": 0.7325572967529297, "learning_rate": 7.737403047292634e-06, "loss": 0.4018, "step": 4104 }, { "epoch": 1.152442448062886, "grad_norm": 0.665576696395874, "learning_rate": 7.736036080814853e-06, "loss": 0.371, "step": 4105 }, { "epoch": 1.1527231892195395, "grad_norm": 0.6930864453315735, "learning_rate": 7.734668822364315e-06, "loss": 0.3908, "step": 4106 }, { "epoch": 1.1530039303761932, "grad_norm": 0.6323667764663696, "learning_rate": 7.733301272086929e-06, "loss": 0.3888, "step": 4107 }, { "epoch": 1.1532846715328466, "grad_norm": 0.6767651438713074, "learning_rate": 7.731933430128624e-06, "loss": 0.3917, "step": 4108 }, { "epoch": 1.1535654126895003, "grad_norm": 0.8144638538360596, "learning_rate": 7.730565296635376e-06, "loss": 0.3984, "step": 4109 }, { "epoch": 1.1538461538461537, "grad_norm": 0.6930384635925293, "learning_rate": 7.729196871753178e-06, "loss": 0.3868, "step": 4110 }, { "epoch": 1.1541268950028074, "grad_norm": 0.6677954792976379, "learning_rate": 7.727828155628063e-06, "loss": 0.3692, "step": 4111 }, { "epoch": 1.154407636159461, "grad_norm": 0.6747219562530518, "learning_rate": 7.726459148406089e-06, "loss": 0.4066, "step": 4112 }, { "epoch": 1.1546883773161145, "grad_norm": 0.7844739556312561, "learning_rate": 7.72508985023335e-06, "loss": 0.3898, "step": 4113 }, { "epoch": 1.1549691184727682, "grad_norm": 0.7765569686889648, "learning_rate": 7.723720261255967e-06, "loss": 0.4141, "step": 4114 }, { "epoch": 1.1552498596294216, "grad_norm": 0.6895026564598083, "learning_rate": 7.722350381620099e-06, "loss": 0.4056, "step": 4115 }, { "epoch": 1.1555306007860753, "grad_norm": 0.7399360537528992, "learning_rate": 7.720980211471922e-06, "loss": 0.4075, "step": 4116 }, { "epoch": 1.1558113419427287, "grad_norm": 0.7794618010520935, "learning_rate": 7.719609750957662e-06, "loss": 0.4062, "step": 4117 }, { "epoch": 1.1560920830993824, "grad_norm": 0.7110264301300049, "learning_rate": 7.71823900022356e-06, "loss": 0.3875, "step": 4118 }, { "epoch": 1.156372824256036, "grad_norm": 0.7261342406272888, "learning_rate": 7.716867959415895e-06, "loss": 0.4133, "step": 4119 }, { "epoch": 1.1566535654126895, "grad_norm": 0.711584210395813, "learning_rate": 7.715496628680977e-06, "loss": 0.3735, "step": 4120 }, { "epoch": 1.1569343065693432, "grad_norm": 0.6330934166908264, "learning_rate": 7.714125008165146e-06, "loss": 0.3581, "step": 4121 }, { "epoch": 1.1572150477259966, "grad_norm": 0.6390913724899292, "learning_rate": 7.712753098014771e-06, "loss": 0.377, "step": 4122 }, { "epoch": 1.1574957888826503, "grad_norm": 0.6860611438751221, "learning_rate": 7.711380898376257e-06, "loss": 0.3909, "step": 4123 }, { "epoch": 1.1577765300393037, "grad_norm": 0.7579001784324646, "learning_rate": 7.710008409396032e-06, "loss": 0.4041, "step": 4124 }, { "epoch": 1.1580572711959574, "grad_norm": 0.6396341919898987, "learning_rate": 7.708635631220564e-06, "loss": 0.3445, "step": 4125 }, { "epoch": 1.1583380123526108, "grad_norm": 0.6361292600631714, "learning_rate": 7.707262563996343e-06, "loss": 0.3805, "step": 4126 }, { "epoch": 1.1586187535092645, "grad_norm": 0.821531355381012, "learning_rate": 7.705889207869898e-06, "loss": 0.4316, "step": 4127 }, { "epoch": 1.158899494665918, "grad_norm": 0.685020923614502, "learning_rate": 7.704515562987784e-06, "loss": 0.4029, "step": 4128 }, { "epoch": 1.1591802358225716, "grad_norm": 0.601153552532196, "learning_rate": 7.703141629496587e-06, "loss": 0.3705, "step": 4129 }, { "epoch": 1.1594609769792252, "grad_norm": 0.8576675653457642, "learning_rate": 7.701767407542924e-06, "loss": 0.441, "step": 4130 }, { "epoch": 1.1597417181358787, "grad_norm": 0.7202056050300598, "learning_rate": 7.700392897273446e-06, "loss": 0.3824, "step": 4131 }, { "epoch": 1.1600224592925323, "grad_norm": 0.8050165176391602, "learning_rate": 7.699018098834828e-06, "loss": 0.3553, "step": 4132 }, { "epoch": 1.1603032004491858, "grad_norm": 0.6949837803840637, "learning_rate": 7.697643012373786e-06, "loss": 0.4272, "step": 4133 }, { "epoch": 1.1605839416058394, "grad_norm": 0.6781432628631592, "learning_rate": 7.696267638037055e-06, "loss": 0.4283, "step": 4134 }, { "epoch": 1.1608646827624929, "grad_norm": 0.8500373363494873, "learning_rate": 7.69489197597141e-06, "loss": 0.4156, "step": 4135 }, { "epoch": 1.1611454239191465, "grad_norm": 0.7929197549819946, "learning_rate": 7.69351602632365e-06, "loss": 0.4049, "step": 4136 }, { "epoch": 1.1614261650758002, "grad_norm": 0.6842719912528992, "learning_rate": 7.692139789240611e-06, "loss": 0.4341, "step": 4137 }, { "epoch": 1.1617069062324537, "grad_norm": 0.8185324668884277, "learning_rate": 7.690763264869154e-06, "loss": 0.403, "step": 4138 }, { "epoch": 1.1619876473891073, "grad_norm": 0.6700324416160583, "learning_rate": 7.689386453356175e-06, "loss": 0.3571, "step": 4139 }, { "epoch": 1.1622683885457608, "grad_norm": 0.6507755517959595, "learning_rate": 7.6880093548486e-06, "loss": 0.3547, "step": 4140 }, { "epoch": 1.1625491297024144, "grad_norm": 0.8207817673683167, "learning_rate": 7.68663196949338e-06, "loss": 0.3834, "step": 4141 }, { "epoch": 1.1628298708590679, "grad_norm": 0.7874570488929749, "learning_rate": 7.685254297437501e-06, "loss": 0.4405, "step": 4142 }, { "epoch": 1.1631106120157215, "grad_norm": 0.7565908432006836, "learning_rate": 7.683876338827984e-06, "loss": 0.3889, "step": 4143 }, { "epoch": 1.1633913531723752, "grad_norm": 0.7770771384239197, "learning_rate": 7.682498093811875e-06, "loss": 0.4114, "step": 4144 }, { "epoch": 1.1636720943290286, "grad_norm": 0.743602991104126, "learning_rate": 7.68111956253625e-06, "loss": 0.4182, "step": 4145 }, { "epoch": 1.1639528354856823, "grad_norm": 0.7257493734359741, "learning_rate": 7.679740745148216e-06, "loss": 0.3871, "step": 4146 }, { "epoch": 1.1642335766423357, "grad_norm": 0.7658679485321045, "learning_rate": 7.678361641794917e-06, "loss": 0.4128, "step": 4147 }, { "epoch": 1.1645143177989894, "grad_norm": 0.7146362662315369, "learning_rate": 7.676982252623518e-06, "loss": 0.3783, "step": 4148 }, { "epoch": 1.1647950589556428, "grad_norm": 0.7754902243614197, "learning_rate": 7.675602577781221e-06, "loss": 0.4098, "step": 4149 }, { "epoch": 1.1650758001122965, "grad_norm": 0.6947715878486633, "learning_rate": 7.674222617415255e-06, "loss": 0.3951, "step": 4150 }, { "epoch": 1.16535654126895, "grad_norm": 0.6923962235450745, "learning_rate": 7.672842371672879e-06, "loss": 0.3917, "step": 4151 }, { "epoch": 1.1656372824256036, "grad_norm": 0.724932074546814, "learning_rate": 7.671461840701389e-06, "loss": 0.411, "step": 4152 }, { "epoch": 1.165918023582257, "grad_norm": 0.7778397798538208, "learning_rate": 7.670081024648102e-06, "loss": 0.3858, "step": 4153 }, { "epoch": 1.1661987647389107, "grad_norm": 0.6858928799629211, "learning_rate": 7.668699923660373e-06, "loss": 0.3671, "step": 4154 }, { "epoch": 1.1664795058955644, "grad_norm": 0.730816662311554, "learning_rate": 7.667318537885586e-06, "loss": 0.4002, "step": 4155 }, { "epoch": 1.1667602470522178, "grad_norm": 0.6910610795021057, "learning_rate": 7.665936867471148e-06, "loss": 0.3881, "step": 4156 }, { "epoch": 1.1670409882088715, "grad_norm": 0.6639518737792969, "learning_rate": 7.664554912564509e-06, "loss": 0.3988, "step": 4157 }, { "epoch": 1.167321729365525, "grad_norm": 0.7089428305625916, "learning_rate": 7.663172673313137e-06, "loss": 0.4119, "step": 4158 }, { "epoch": 1.1676024705221786, "grad_norm": 0.6947149634361267, "learning_rate": 7.66179014986454e-06, "loss": 0.4202, "step": 4159 }, { "epoch": 1.167883211678832, "grad_norm": 0.7748801112174988, "learning_rate": 7.66040734236625e-06, "loss": 0.3732, "step": 4160 }, { "epoch": 1.1681639528354857, "grad_norm": 0.7255113124847412, "learning_rate": 7.659024250965833e-06, "loss": 0.3776, "step": 4161 }, { "epoch": 1.1684446939921393, "grad_norm": 0.5578462481498718, "learning_rate": 7.657640875810884e-06, "loss": 0.362, "step": 4162 }, { "epoch": 1.1687254351487928, "grad_norm": 0.7239642143249512, "learning_rate": 7.656257217049025e-06, "loss": 0.3805, "step": 4163 }, { "epoch": 1.1690061763054465, "grad_norm": 0.6771806478500366, "learning_rate": 7.654873274827915e-06, "loss": 0.393, "step": 4164 }, { "epoch": 1.1692869174621, "grad_norm": 0.7037496566772461, "learning_rate": 7.65348904929524e-06, "loss": 0.3499, "step": 4165 }, { "epoch": 1.1695676586187536, "grad_norm": 0.6214662790298462, "learning_rate": 7.652104540598712e-06, "loss": 0.3838, "step": 4166 }, { "epoch": 1.169848399775407, "grad_norm": 0.7172297239303589, "learning_rate": 7.650719748886082e-06, "loss": 0.3517, "step": 4167 }, { "epoch": 1.1701291409320607, "grad_norm": 0.6152862906455994, "learning_rate": 7.649334674305124e-06, "loss": 0.3734, "step": 4168 }, { "epoch": 1.1704098820887143, "grad_norm": 0.7730669379234314, "learning_rate": 7.647949317003645e-06, "loss": 0.3872, "step": 4169 }, { "epoch": 1.1706906232453678, "grad_norm": 0.6491602659225464, "learning_rate": 7.64656367712948e-06, "loss": 0.3701, "step": 4170 }, { "epoch": 1.1709713644020214, "grad_norm": 0.7138637900352478, "learning_rate": 7.645177754830497e-06, "loss": 0.3791, "step": 4171 }, { "epoch": 1.1712521055586749, "grad_norm": 0.701231062412262, "learning_rate": 7.643791550254595e-06, "loss": 0.3742, "step": 4172 }, { "epoch": 1.1715328467153285, "grad_norm": 0.714582622051239, "learning_rate": 7.6424050635497e-06, "loss": 0.409, "step": 4173 }, { "epoch": 1.171813587871982, "grad_norm": 0.746593177318573, "learning_rate": 7.641018294863768e-06, "loss": 0.4367, "step": 4174 }, { "epoch": 1.1720943290286356, "grad_norm": 0.7003906965255737, "learning_rate": 7.639631244344786e-06, "loss": 0.3661, "step": 4175 }, { "epoch": 1.172375070185289, "grad_norm": 0.7029473185539246, "learning_rate": 7.638243912140772e-06, "loss": 0.3724, "step": 4176 }, { "epoch": 1.1726558113419427, "grad_norm": 0.6909000873565674, "learning_rate": 7.636856298399774e-06, "loss": 0.3918, "step": 4177 }, { "epoch": 1.1729365524985962, "grad_norm": 0.6882992386817932, "learning_rate": 7.635468403269871e-06, "loss": 0.381, "step": 4178 }, { "epoch": 1.1732172936552498, "grad_norm": 0.7175175547599792, "learning_rate": 7.634080226899168e-06, "loss": 0.3519, "step": 4179 }, { "epoch": 1.1734980348119035, "grad_norm": 0.6818076372146606, "learning_rate": 7.632691769435803e-06, "loss": 0.4044, "step": 4180 }, { "epoch": 1.173778775968557, "grad_norm": 0.6545034050941467, "learning_rate": 7.631303031027944e-06, "loss": 0.4161, "step": 4181 }, { "epoch": 1.1740595171252106, "grad_norm": 0.6610690951347351, "learning_rate": 7.629914011823788e-06, "loss": 0.4019, "step": 4182 }, { "epoch": 1.174340258281864, "grad_norm": 0.6658121347427368, "learning_rate": 7.628524711971566e-06, "loss": 0.3804, "step": 4183 }, { "epoch": 1.1746209994385177, "grad_norm": 0.7791524529457092, "learning_rate": 7.62713513161953e-06, "loss": 0.4162, "step": 4184 }, { "epoch": 1.1749017405951712, "grad_norm": 1.1453559398651123, "learning_rate": 7.625745270915969e-06, "loss": 0.4044, "step": 4185 }, { "epoch": 1.1751824817518248, "grad_norm": 0.6825242638587952, "learning_rate": 7.624355130009202e-06, "loss": 0.4257, "step": 4186 }, { "epoch": 1.1754632229084785, "grad_norm": 0.7512766122817993, "learning_rate": 7.622964709047576e-06, "loss": 0.3708, "step": 4187 }, { "epoch": 1.175743964065132, "grad_norm": 0.7023360729217529, "learning_rate": 7.6215740081794665e-06, "loss": 0.4249, "step": 4188 }, { "epoch": 1.1760247052217856, "grad_norm": 0.753903865814209, "learning_rate": 7.620183027553283e-06, "loss": 0.3943, "step": 4189 }, { "epoch": 1.176305446378439, "grad_norm": 0.5684053301811218, "learning_rate": 7.61879176731746e-06, "loss": 0.4126, "step": 4190 }, { "epoch": 1.1765861875350927, "grad_norm": 0.7779684662818909, "learning_rate": 7.617400227620463e-06, "loss": 0.4077, "step": 4191 }, { "epoch": 1.1768669286917461, "grad_norm": 0.6655716896057129, "learning_rate": 7.616008408610791e-06, "loss": 0.3712, "step": 4192 }, { "epoch": 1.1771476698483998, "grad_norm": 0.7407196164131165, "learning_rate": 7.614616310436971e-06, "loss": 0.4351, "step": 4193 }, { "epoch": 1.1774284110050532, "grad_norm": 0.7647140026092529, "learning_rate": 7.613223933247555e-06, "loss": 0.4277, "step": 4194 }, { "epoch": 1.177709152161707, "grad_norm": 0.6379993557929993, "learning_rate": 7.6118312771911325e-06, "loss": 0.4016, "step": 4195 }, { "epoch": 1.1779898933183603, "grad_norm": 0.6983136534690857, "learning_rate": 7.61043834241632e-06, "loss": 0.3982, "step": 4196 }, { "epoch": 1.178270634475014, "grad_norm": 0.7108660936355591, "learning_rate": 7.609045129071759e-06, "loss": 0.3951, "step": 4197 }, { "epoch": 1.1785513756316677, "grad_norm": 0.6539588570594788, "learning_rate": 7.607651637306126e-06, "loss": 0.3821, "step": 4198 }, { "epoch": 1.1788321167883211, "grad_norm": 0.7670096755027771, "learning_rate": 7.6062578672681275e-06, "loss": 0.3963, "step": 4199 }, { "epoch": 1.1791128579449748, "grad_norm": 0.666587233543396, "learning_rate": 7.604863819106496e-06, "loss": 0.4053, "step": 4200 }, { "epoch": 1.1793935991016282, "grad_norm": 0.7293751239776611, "learning_rate": 7.603469492969997e-06, "loss": 0.442, "step": 4201 }, { "epoch": 1.1796743402582819, "grad_norm": 0.8012384176254272, "learning_rate": 7.602074889007423e-06, "loss": 0.4257, "step": 4202 }, { "epoch": 1.1799550814149353, "grad_norm": 0.6272772550582886, "learning_rate": 7.600680007367598e-06, "loss": 0.3746, "step": 4203 }, { "epoch": 1.180235822571589, "grad_norm": 0.6824804544448853, "learning_rate": 7.599284848199375e-06, "loss": 0.374, "step": 4204 }, { "epoch": 1.1805165637282427, "grad_norm": 0.6905078291893005, "learning_rate": 7.597889411651636e-06, "loss": 0.3836, "step": 4205 }, { "epoch": 1.180797304884896, "grad_norm": 0.6570492386817932, "learning_rate": 7.596493697873295e-06, "loss": 0.3559, "step": 4206 }, { "epoch": 1.1810780460415498, "grad_norm": 0.783523440361023, "learning_rate": 7.595097707013295e-06, "loss": 0.447, "step": 4207 }, { "epoch": 1.1813587871982032, "grad_norm": 0.6097093820571899, "learning_rate": 7.593701439220602e-06, "loss": 0.3731, "step": 4208 }, { "epoch": 1.1816395283548569, "grad_norm": 0.7884918451309204, "learning_rate": 7.592304894644223e-06, "loss": 0.4275, "step": 4209 }, { "epoch": 1.1819202695115103, "grad_norm": 0.6653420329093933, "learning_rate": 7.5909080734331875e-06, "loss": 0.4094, "step": 4210 }, { "epoch": 1.182201010668164, "grad_norm": 0.698269248008728, "learning_rate": 7.5895109757365515e-06, "loss": 0.4093, "step": 4211 }, { "epoch": 1.1824817518248176, "grad_norm": 0.6784840822219849, "learning_rate": 7.588113601703408e-06, "loss": 0.4136, "step": 4212 }, { "epoch": 1.182762492981471, "grad_norm": 0.7869148254394531, "learning_rate": 7.5867159514828745e-06, "loss": 0.397, "step": 4213 }, { "epoch": 1.1830432341381247, "grad_norm": 0.7723643183708191, "learning_rate": 7.585318025224102e-06, "loss": 0.3898, "step": 4214 }, { "epoch": 1.1833239752947782, "grad_norm": 0.6724240183830261, "learning_rate": 7.583919823076267e-06, "loss": 0.4115, "step": 4215 }, { "epoch": 1.1836047164514318, "grad_norm": 0.5880551338195801, "learning_rate": 7.582521345188576e-06, "loss": 0.3874, "step": 4216 }, { "epoch": 1.1838854576080853, "grad_norm": 0.6574677228927612, "learning_rate": 7.581122591710266e-06, "loss": 0.4159, "step": 4217 }, { "epoch": 1.184166198764739, "grad_norm": 0.6764013171195984, "learning_rate": 7.579723562790604e-06, "loss": 0.4091, "step": 4218 }, { "epoch": 1.1844469399213924, "grad_norm": 0.7931365966796875, "learning_rate": 7.5783242585788865e-06, "loss": 0.3928, "step": 4219 }, { "epoch": 1.184727681078046, "grad_norm": 0.6253363490104675, "learning_rate": 7.576924679224438e-06, "loss": 0.3876, "step": 4220 }, { "epoch": 1.1850084222346995, "grad_norm": 0.6436002254486084, "learning_rate": 7.575524824876612e-06, "loss": 0.3787, "step": 4221 }, { "epoch": 1.1852891633913532, "grad_norm": 0.6307581067085266, "learning_rate": 7.574124695684793e-06, "loss": 0.3809, "step": 4222 }, { "epoch": 1.1855699045480068, "grad_norm": 0.6719784140586853, "learning_rate": 7.572724291798394e-06, "loss": 0.3722, "step": 4223 }, { "epoch": 1.1858506457046603, "grad_norm": 0.6689779758453369, "learning_rate": 7.5713236133668566e-06, "loss": 0.3644, "step": 4224 }, { "epoch": 1.186131386861314, "grad_norm": 0.6395378708839417, "learning_rate": 7.569922660539654e-06, "loss": 0.3984, "step": 4225 }, { "epoch": 1.1864121280179674, "grad_norm": 0.6845116019248962, "learning_rate": 7.568521433466285e-06, "loss": 0.3787, "step": 4226 }, { "epoch": 1.186692869174621, "grad_norm": 0.8106564283370972, "learning_rate": 7.567119932296283e-06, "loss": 0.426, "step": 4227 }, { "epoch": 1.1869736103312745, "grad_norm": 0.6468884944915771, "learning_rate": 7.565718157179205e-06, "loss": 0.4067, "step": 4228 }, { "epoch": 1.1872543514879281, "grad_norm": 0.6525301337242126, "learning_rate": 7.56431610826464e-06, "loss": 0.3647, "step": 4229 }, { "epoch": 1.1875350926445818, "grad_norm": 0.7215001583099365, "learning_rate": 7.562913785702208e-06, "loss": 0.4292, "step": 4230 }, { "epoch": 1.1878158338012352, "grad_norm": 0.7083243131637573, "learning_rate": 7.5615111896415506e-06, "loss": 0.3921, "step": 4231 }, { "epoch": 1.188096574957889, "grad_norm": 0.6091982126235962, "learning_rate": 7.5601083202323525e-06, "loss": 0.3805, "step": 4232 }, { "epoch": 1.1883773161145423, "grad_norm": 0.7021400928497314, "learning_rate": 7.558705177624312e-06, "loss": 0.4257, "step": 4233 }, { "epoch": 1.188658057271196, "grad_norm": 0.548466682434082, "learning_rate": 7.557301761967167e-06, "loss": 0.3744, "step": 4234 }, { "epoch": 1.1889387984278494, "grad_norm": 0.6605693697929382, "learning_rate": 7.5558980734106814e-06, "loss": 0.3982, "step": 4235 }, { "epoch": 1.189219539584503, "grad_norm": 0.7793446183204651, "learning_rate": 7.554494112104647e-06, "loss": 0.4009, "step": 4236 }, { "epoch": 1.1895002807411568, "grad_norm": 0.6424089074134827, "learning_rate": 7.553089878198887e-06, "loss": 0.3536, "step": 4237 }, { "epoch": 1.1897810218978102, "grad_norm": 0.7062333822250366, "learning_rate": 7.551685371843251e-06, "loss": 0.3962, "step": 4238 }, { "epoch": 1.1900617630544639, "grad_norm": 0.6652928590774536, "learning_rate": 7.550280593187621e-06, "loss": 0.4093, "step": 4239 }, { "epoch": 1.1903425042111173, "grad_norm": 0.6986287832260132, "learning_rate": 7.548875542381904e-06, "loss": 0.4097, "step": 4240 }, { "epoch": 1.190623245367771, "grad_norm": 0.6457189917564392, "learning_rate": 7.547470219576041e-06, "loss": 0.4157, "step": 4241 }, { "epoch": 1.1909039865244244, "grad_norm": 0.7715068459510803, "learning_rate": 7.5460646249199956e-06, "loss": 0.4076, "step": 4242 }, { "epoch": 1.191184727681078, "grad_norm": 0.6989235877990723, "learning_rate": 7.544658758563768e-06, "loss": 0.4136, "step": 4243 }, { "epoch": 1.1914654688377315, "grad_norm": 0.7448019981384277, "learning_rate": 7.543252620657382e-06, "loss": 0.3926, "step": 4244 }, { "epoch": 1.1917462099943852, "grad_norm": 0.7202630639076233, "learning_rate": 7.5418462113508906e-06, "loss": 0.3287, "step": 4245 }, { "epoch": 1.1920269511510386, "grad_norm": 0.7235342264175415, "learning_rate": 7.540439530794379e-06, "loss": 0.4007, "step": 4246 }, { "epoch": 1.1923076923076923, "grad_norm": 0.6588276624679565, "learning_rate": 7.539032579137958e-06, "loss": 0.3819, "step": 4247 }, { "epoch": 1.192588433464346, "grad_norm": 0.7202818393707275, "learning_rate": 7.53762535653177e-06, "loss": 0.4136, "step": 4248 }, { "epoch": 1.1928691746209994, "grad_norm": 0.8266968727111816, "learning_rate": 7.536217863125985e-06, "loss": 0.4443, "step": 4249 }, { "epoch": 1.193149915777653, "grad_norm": 0.7003817558288574, "learning_rate": 7.534810099070801e-06, "loss": 0.4214, "step": 4250 }, { "epoch": 1.1934306569343065, "grad_norm": 0.7337723970413208, "learning_rate": 7.533402064516445e-06, "loss": 0.3992, "step": 4251 }, { "epoch": 1.1937113980909602, "grad_norm": 0.6616443395614624, "learning_rate": 7.5319937596131764e-06, "loss": 0.357, "step": 4252 }, { "epoch": 1.1939921392476136, "grad_norm": 0.6619863510131836, "learning_rate": 7.530585184511278e-06, "loss": 0.3843, "step": 4253 }, { "epoch": 1.1942728804042673, "grad_norm": 0.6348530054092407, "learning_rate": 7.529176339361066e-06, "loss": 0.3468, "step": 4254 }, { "epoch": 1.194553621560921, "grad_norm": 0.6510490775108337, "learning_rate": 7.527767224312883e-06, "loss": 0.4198, "step": 4255 }, { "epoch": 1.1948343627175744, "grad_norm": 0.7726225256919861, "learning_rate": 7.5263578395171e-06, "loss": 0.3689, "step": 4256 }, { "epoch": 1.195115103874228, "grad_norm": 0.7218582630157471, "learning_rate": 7.5249481851241195e-06, "loss": 0.3984, "step": 4257 }, { "epoch": 1.1953958450308815, "grad_norm": 0.6921402812004089, "learning_rate": 7.523538261284371e-06, "loss": 0.4278, "step": 4258 }, { "epoch": 1.1956765861875351, "grad_norm": 0.7084171175956726, "learning_rate": 7.522128068148311e-06, "loss": 0.4064, "step": 4259 }, { "epoch": 1.1959573273441886, "grad_norm": 0.6785997152328491, "learning_rate": 7.520717605866429e-06, "loss": 0.4098, "step": 4260 }, { "epoch": 1.1962380685008422, "grad_norm": 0.6730110049247742, "learning_rate": 7.519306874589238e-06, "loss": 0.3765, "step": 4261 }, { "epoch": 1.196518809657496, "grad_norm": 0.6426097750663757, "learning_rate": 7.517895874467285e-06, "loss": 0.3874, "step": 4262 }, { "epoch": 1.1967995508141493, "grad_norm": 0.6854656338691711, "learning_rate": 7.516484605651141e-06, "loss": 0.4128, "step": 4263 }, { "epoch": 1.197080291970803, "grad_norm": 0.7365685701370239, "learning_rate": 7.5150730682914085e-06, "loss": 0.3806, "step": 4264 }, { "epoch": 1.1973610331274565, "grad_norm": 0.6268132328987122, "learning_rate": 7.513661262538721e-06, "loss": 0.3833, "step": 4265 }, { "epoch": 1.1976417742841101, "grad_norm": 0.7252869606018066, "learning_rate": 7.5122491885437324e-06, "loss": 0.4033, "step": 4266 }, { "epoch": 1.1979225154407636, "grad_norm": 0.7028063535690308, "learning_rate": 7.510836846457134e-06, "loss": 0.3861, "step": 4267 }, { "epoch": 1.1982032565974172, "grad_norm": 0.6498178839683533, "learning_rate": 7.509424236429641e-06, "loss": 0.3918, "step": 4268 }, { "epoch": 1.1984839977540707, "grad_norm": 0.7453802824020386, "learning_rate": 7.508011358611997e-06, "loss": 0.3619, "step": 4269 }, { "epoch": 1.1987647389107243, "grad_norm": 0.7189194560050964, "learning_rate": 7.5065982131549795e-06, "loss": 0.3859, "step": 4270 }, { "epoch": 1.1990454800673778, "grad_norm": 0.6720621585845947, "learning_rate": 7.505184800209387e-06, "loss": 0.3784, "step": 4271 }, { "epoch": 1.1993262212240314, "grad_norm": 0.6525327563285828, "learning_rate": 7.503771119926052e-06, "loss": 0.3805, "step": 4272 }, { "epoch": 1.199606962380685, "grad_norm": 0.6999576687812805, "learning_rate": 7.502357172455832e-06, "loss": 0.3985, "step": 4273 }, { "epoch": 1.1998877035373385, "grad_norm": 0.7010440826416016, "learning_rate": 7.5009429579496174e-06, "loss": 0.4044, "step": 4274 }, { "epoch": 1.2001684446939922, "grad_norm": 0.707637369632721, "learning_rate": 7.499528476558321e-06, "loss": 0.3679, "step": 4275 }, { "epoch": 1.2004491858506456, "grad_norm": 0.6848056316375732, "learning_rate": 7.498113728432891e-06, "loss": 0.3782, "step": 4276 }, { "epoch": 1.2007299270072993, "grad_norm": 0.6553831696510315, "learning_rate": 7.4966987137242975e-06, "loss": 0.3867, "step": 4277 }, { "epoch": 1.2010106681639527, "grad_norm": 0.7628927826881409, "learning_rate": 7.495283432583542e-06, "loss": 0.3743, "step": 4278 }, { "epoch": 1.2012914093206064, "grad_norm": 0.791347086429596, "learning_rate": 7.493867885161658e-06, "loss": 0.4186, "step": 4279 }, { "epoch": 1.20157215047726, "grad_norm": 0.6676722168922424, "learning_rate": 7.4924520716096995e-06, "loss": 0.4047, "step": 4280 }, { "epoch": 1.2018528916339135, "grad_norm": 0.6947941184043884, "learning_rate": 7.491035992078757e-06, "loss": 0.3969, "step": 4281 }, { "epoch": 1.2021336327905672, "grad_norm": 0.8484598994255066, "learning_rate": 7.489619646719943e-06, "loss": 0.434, "step": 4282 }, { "epoch": 1.2024143739472206, "grad_norm": 0.6089879274368286, "learning_rate": 7.4882030356844025e-06, "loss": 0.3917, "step": 4283 }, { "epoch": 1.2026951151038743, "grad_norm": 0.7126756906509399, "learning_rate": 7.486786159123307e-06, "loss": 0.4022, "step": 4284 }, { "epoch": 1.2029758562605277, "grad_norm": 0.8028550744056702, "learning_rate": 7.485369017187858e-06, "loss": 0.4079, "step": 4285 }, { "epoch": 1.2032565974171814, "grad_norm": 0.6776315569877625, "learning_rate": 7.483951610029282e-06, "loss": 0.3326, "step": 4286 }, { "epoch": 1.203537338573835, "grad_norm": 0.7978324890136719, "learning_rate": 7.482533937798838e-06, "loss": 0.3583, "step": 4287 }, { "epoch": 1.2038180797304885, "grad_norm": 0.7162542939186096, "learning_rate": 7.48111600064781e-06, "loss": 0.385, "step": 4288 }, { "epoch": 1.2040988208871422, "grad_norm": 0.6677225232124329, "learning_rate": 7.47969779872751e-06, "loss": 0.3427, "step": 4289 }, { "epoch": 1.2043795620437956, "grad_norm": 0.7364921569824219, "learning_rate": 7.478279332189282e-06, "loss": 0.3826, "step": 4290 }, { "epoch": 1.2046603032004493, "grad_norm": 0.6630323529243469, "learning_rate": 7.476860601184495e-06, "loss": 0.4213, "step": 4291 }, { "epoch": 1.2049410443571027, "grad_norm": 0.6920050382614136, "learning_rate": 7.475441605864546e-06, "loss": 0.3748, "step": 4292 }, { "epoch": 1.2052217855137564, "grad_norm": 0.6308068037033081, "learning_rate": 7.4740223463808644e-06, "loss": 0.4059, "step": 4293 }, { "epoch": 1.2055025266704098, "grad_norm": 0.7217302918434143, "learning_rate": 7.472602822884903e-06, "loss": 0.4223, "step": 4294 }, { "epoch": 1.2057832678270635, "grad_norm": 0.6837269067764282, "learning_rate": 7.471183035528142e-06, "loss": 0.3681, "step": 4295 }, { "epoch": 1.206064008983717, "grad_norm": 0.5798900723457336, "learning_rate": 7.469762984462097e-06, "loss": 0.4099, "step": 4296 }, { "epoch": 1.2063447501403706, "grad_norm": 0.7235340476036072, "learning_rate": 7.468342669838304e-06, "loss": 0.4243, "step": 4297 }, { "epoch": 1.2066254912970242, "grad_norm": 0.6759546399116516, "learning_rate": 7.466922091808332e-06, "loss": 0.3993, "step": 4298 }, { "epoch": 1.2069062324536777, "grad_norm": 0.7389718890190125, "learning_rate": 7.465501250523773e-06, "loss": 0.4034, "step": 4299 }, { "epoch": 1.2071869736103313, "grad_norm": 0.5856978893280029, "learning_rate": 7.464080146136255e-06, "loss": 0.3981, "step": 4300 }, { "epoch": 1.2074677147669848, "grad_norm": 0.6909798979759216, "learning_rate": 7.462658778797425e-06, "loss": 0.3528, "step": 4301 }, { "epoch": 1.2077484559236384, "grad_norm": 0.6939312219619751, "learning_rate": 7.461237148658964e-06, "loss": 0.4294, "step": 4302 }, { "epoch": 1.2080291970802919, "grad_norm": 0.6418517827987671, "learning_rate": 7.459815255872581e-06, "loss": 0.3889, "step": 4303 }, { "epoch": 1.2083099382369455, "grad_norm": 0.6352454423904419, "learning_rate": 7.458393100590011e-06, "loss": 0.4098, "step": 4304 }, { "epoch": 1.2085906793935992, "grad_norm": 0.7874202132225037, "learning_rate": 7.456970682963016e-06, "loss": 0.4049, "step": 4305 }, { "epoch": 1.2088714205502527, "grad_norm": 0.6554611921310425, "learning_rate": 7.455548003143389e-06, "loss": 0.3827, "step": 4306 }, { "epoch": 1.2091521617069063, "grad_norm": 0.6753202080726624, "learning_rate": 7.4541250612829485e-06, "loss": 0.3924, "step": 4307 }, { "epoch": 1.2094329028635598, "grad_norm": 0.6789506077766418, "learning_rate": 7.452701857533543e-06, "loss": 0.3799, "step": 4308 }, { "epoch": 1.2097136440202134, "grad_norm": 0.7028040289878845, "learning_rate": 7.451278392047049e-06, "loss": 0.3756, "step": 4309 }, { "epoch": 1.2099943851768669, "grad_norm": 0.7353365421295166, "learning_rate": 7.449854664975366e-06, "loss": 0.4344, "step": 4310 }, { "epoch": 1.2102751263335205, "grad_norm": 0.7207481265068054, "learning_rate": 7.448430676470431e-06, "loss": 0.3834, "step": 4311 }, { "epoch": 1.210555867490174, "grad_norm": 0.6377664804458618, "learning_rate": 7.447006426684198e-06, "loss": 0.3741, "step": 4312 }, { "epoch": 1.2108366086468276, "grad_norm": 0.7059157490730286, "learning_rate": 7.445581915768656e-06, "loss": 0.3996, "step": 4313 }, { "epoch": 1.211117349803481, "grad_norm": 0.6970432996749878, "learning_rate": 7.44415714387582e-06, "loss": 0.3722, "step": 4314 }, { "epoch": 1.2113980909601347, "grad_norm": 0.7008577585220337, "learning_rate": 7.442732111157734e-06, "loss": 0.403, "step": 4315 }, { "epoch": 1.2116788321167884, "grad_norm": 0.7184580564498901, "learning_rate": 7.4413068177664664e-06, "loss": 0.3773, "step": 4316 }, { "epoch": 1.2119595732734418, "grad_norm": 0.7665238976478577, "learning_rate": 7.439881263854116e-06, "loss": 0.3613, "step": 4317 }, { "epoch": 1.2122403144300955, "grad_norm": 0.6726934909820557, "learning_rate": 7.438455449572811e-06, "loss": 0.4, "step": 4318 }, { "epoch": 1.212521055586749, "grad_norm": 0.7335178852081299, "learning_rate": 7.437029375074704e-06, "loss": 0.4192, "step": 4319 }, { "epoch": 1.2128017967434026, "grad_norm": 0.6724697351455688, "learning_rate": 7.435603040511976e-06, "loss": 0.3784, "step": 4320 }, { "epoch": 1.213082537900056, "grad_norm": 0.7500842809677124, "learning_rate": 7.4341764460368385e-06, "loss": 0.4021, "step": 4321 }, { "epoch": 1.2133632790567097, "grad_norm": 0.6556463837623596, "learning_rate": 7.432749591801527e-06, "loss": 0.3962, "step": 4322 }, { "epoch": 1.2136440202133634, "grad_norm": 0.6699510812759399, "learning_rate": 7.431322477958308e-06, "loss": 0.3723, "step": 4323 }, { "epoch": 1.2139247613700168, "grad_norm": 0.7302048206329346, "learning_rate": 7.429895104659473e-06, "loss": 0.3964, "step": 4324 }, { "epoch": 1.2142055025266705, "grad_norm": 0.7734420299530029, "learning_rate": 7.428467472057345e-06, "loss": 0.3965, "step": 4325 }, { "epoch": 1.214486243683324, "grad_norm": 0.5939376950263977, "learning_rate": 7.427039580304268e-06, "loss": 0.3571, "step": 4326 }, { "epoch": 1.2147669848399776, "grad_norm": 0.6674864292144775, "learning_rate": 7.425611429552621e-06, "loss": 0.3839, "step": 4327 }, { "epoch": 1.215047725996631, "grad_norm": 0.6552734375, "learning_rate": 7.424183019954805e-06, "loss": 0.3999, "step": 4328 }, { "epoch": 1.2153284671532847, "grad_norm": 0.7163453102111816, "learning_rate": 7.422754351663252e-06, "loss": 0.3809, "step": 4329 }, { "epoch": 1.2156092083099383, "grad_norm": 0.6272565722465515, "learning_rate": 7.421325424830421e-06, "loss": 0.3996, "step": 4330 }, { "epoch": 1.2158899494665918, "grad_norm": 0.7307857871055603, "learning_rate": 7.419896239608799e-06, "loss": 0.3932, "step": 4331 }, { "epoch": 1.2161706906232455, "grad_norm": 0.6637787222862244, "learning_rate": 7.418466796150896e-06, "loss": 0.368, "step": 4332 }, { "epoch": 1.216451431779899, "grad_norm": 0.7471299171447754, "learning_rate": 7.417037094609258e-06, "loss": 0.4043, "step": 4333 }, { "epoch": 1.2167321729365526, "grad_norm": 0.7452800869941711, "learning_rate": 7.415607135136451e-06, "loss": 0.42, "step": 4334 }, { "epoch": 1.217012914093206, "grad_norm": 0.6763039231300354, "learning_rate": 7.414176917885072e-06, "loss": 0.3994, "step": 4335 }, { "epoch": 1.2172936552498597, "grad_norm": 0.6500682830810547, "learning_rate": 7.412746443007748e-06, "loss": 0.4056, "step": 4336 }, { "epoch": 1.217574396406513, "grad_norm": 0.7071866393089294, "learning_rate": 7.411315710657124e-06, "loss": 0.3858, "step": 4337 }, { "epoch": 1.2178551375631668, "grad_norm": 0.7375584244728088, "learning_rate": 7.409884720985884e-06, "loss": 0.41, "step": 4338 }, { "epoch": 1.2181358787198202, "grad_norm": 0.6340219378471375, "learning_rate": 7.408453474146731e-06, "loss": 0.3664, "step": 4339 }, { "epoch": 1.2184166198764739, "grad_norm": 0.6191103458404541, "learning_rate": 7.407021970292403e-06, "loss": 0.399, "step": 4340 }, { "epoch": 1.2186973610331275, "grad_norm": 0.5875264406204224, "learning_rate": 7.405590209575657e-06, "loss": 0.357, "step": 4341 }, { "epoch": 1.218978102189781, "grad_norm": 0.6360987424850464, "learning_rate": 7.404158192149285e-06, "loss": 0.3812, "step": 4342 }, { "epoch": 1.2192588433464346, "grad_norm": 0.7508474588394165, "learning_rate": 7.402725918166099e-06, "loss": 0.3645, "step": 4343 }, { "epoch": 1.219539584503088, "grad_norm": 0.796063244342804, "learning_rate": 7.401293387778945e-06, "loss": 0.4028, "step": 4344 }, { "epoch": 1.2198203256597417, "grad_norm": 0.6457430720329285, "learning_rate": 7.399860601140693e-06, "loss": 0.3877, "step": 4345 }, { "epoch": 1.2201010668163952, "grad_norm": 0.6755557060241699, "learning_rate": 7.398427558404241e-06, "loss": 0.3582, "step": 4346 }, { "epoch": 1.2203818079730488, "grad_norm": 0.752692699432373, "learning_rate": 7.396994259722517e-06, "loss": 0.4437, "step": 4347 }, { "epoch": 1.2206625491297025, "grad_norm": 0.7466068863868713, "learning_rate": 7.39556070524847e-06, "loss": 0.4005, "step": 4348 }, { "epoch": 1.220943290286356, "grad_norm": 0.7716429233551025, "learning_rate": 7.394126895135082e-06, "loss": 0.4037, "step": 4349 }, { "epoch": 1.2212240314430096, "grad_norm": 0.6424754858016968, "learning_rate": 7.392692829535359e-06, "loss": 0.3833, "step": 4350 }, { "epoch": 1.221504772599663, "grad_norm": 0.6294004917144775, "learning_rate": 7.391258508602337e-06, "loss": 0.3644, "step": 4351 }, { "epoch": 1.2217855137563167, "grad_norm": 0.6806685924530029, "learning_rate": 7.389823932489078e-06, "loss": 0.377, "step": 4352 }, { "epoch": 1.2220662549129702, "grad_norm": 0.7771987318992615, "learning_rate": 7.38838910134867e-06, "loss": 0.4393, "step": 4353 }, { "epoch": 1.2223469960696238, "grad_norm": 0.6604443788528442, "learning_rate": 7.386954015334229e-06, "loss": 0.3772, "step": 4354 }, { "epoch": 1.2226277372262775, "grad_norm": 0.7321616411209106, "learning_rate": 7.385518674598899e-06, "loss": 0.3993, "step": 4355 }, { "epoch": 1.222908478382931, "grad_norm": 0.661529004573822, "learning_rate": 7.38408307929585e-06, "loss": 0.381, "step": 4356 }, { "epoch": 1.2231892195395846, "grad_norm": 0.6855313777923584, "learning_rate": 7.382647229578282e-06, "loss": 0.3318, "step": 4357 }, { "epoch": 1.223469960696238, "grad_norm": 0.7962076663970947, "learning_rate": 7.381211125599417e-06, "loss": 0.4035, "step": 4358 }, { "epoch": 1.2237507018528917, "grad_norm": 0.6684330701828003, "learning_rate": 7.379774767512509e-06, "loss": 0.3802, "step": 4359 }, { "epoch": 1.2240314430095451, "grad_norm": 0.7023851275444031, "learning_rate": 7.3783381554708366e-06, "loss": 0.3929, "step": 4360 }, { "epoch": 1.2243121841661988, "grad_norm": 0.6956204175949097, "learning_rate": 7.376901289627706e-06, "loss": 0.3774, "step": 4361 }, { "epoch": 1.2245929253228522, "grad_norm": 0.8025999069213867, "learning_rate": 7.3754641701364504e-06, "loss": 0.4143, "step": 4362 }, { "epoch": 1.224873666479506, "grad_norm": 0.6624419689178467, "learning_rate": 7.374026797150431e-06, "loss": 0.3838, "step": 4363 }, { "epoch": 1.2251544076361593, "grad_norm": 0.6223154067993164, "learning_rate": 7.3725891708230355e-06, "loss": 0.3765, "step": 4364 }, { "epoch": 1.225435148792813, "grad_norm": 0.6757875084877014, "learning_rate": 7.371151291307677e-06, "loss": 0.376, "step": 4365 }, { "epoch": 1.2257158899494667, "grad_norm": 0.7055069804191589, "learning_rate": 7.3697131587577985e-06, "loss": 0.4029, "step": 4366 }, { "epoch": 1.2259966311061201, "grad_norm": 0.7640204429626465, "learning_rate": 7.368274773326868e-06, "loss": 0.3598, "step": 4367 }, { "epoch": 1.2262773722627738, "grad_norm": 0.6684814095497131, "learning_rate": 7.366836135168381e-06, "loss": 0.3686, "step": 4368 }, { "epoch": 1.2265581134194272, "grad_norm": 0.7763293981552124, "learning_rate": 7.365397244435859e-06, "loss": 0.4126, "step": 4369 }, { "epoch": 1.2268388545760809, "grad_norm": 0.6652472615242004, "learning_rate": 7.3639581012828545e-06, "loss": 0.3997, "step": 4370 }, { "epoch": 1.2271195957327343, "grad_norm": 0.7757605910301208, "learning_rate": 7.362518705862939e-06, "loss": 0.3872, "step": 4371 }, { "epoch": 1.227400336889388, "grad_norm": 0.8113671541213989, "learning_rate": 7.361079058329721e-06, "loss": 0.3981, "step": 4372 }, { "epoch": 1.2276810780460417, "grad_norm": 0.6892746090888977, "learning_rate": 7.359639158836828e-06, "loss": 0.408, "step": 4373 }, { "epoch": 1.227961819202695, "grad_norm": 0.6588771343231201, "learning_rate": 7.358199007537916e-06, "loss": 0.4066, "step": 4374 }, { "epoch": 1.2282425603593488, "grad_norm": 0.7860007286071777, "learning_rate": 7.3567586045866734e-06, "loss": 0.3845, "step": 4375 }, { "epoch": 1.2285233015160022, "grad_norm": 0.6605427265167236, "learning_rate": 7.355317950136807e-06, "loss": 0.3813, "step": 4376 }, { "epoch": 1.2288040426726559, "grad_norm": 0.7304157614707947, "learning_rate": 7.353877044342056e-06, "loss": 0.4147, "step": 4377 }, { "epoch": 1.2290847838293093, "grad_norm": 0.6476666331291199, "learning_rate": 7.352435887356184e-06, "loss": 0.3857, "step": 4378 }, { "epoch": 1.229365524985963, "grad_norm": 0.7970354557037354, "learning_rate": 7.350994479332983e-06, "loss": 0.3632, "step": 4379 }, { "epoch": 1.2296462661426166, "grad_norm": 0.8846747875213623, "learning_rate": 7.349552820426271e-06, "loss": 0.358, "step": 4380 }, { "epoch": 1.22992700729927, "grad_norm": 0.7176412343978882, "learning_rate": 7.348110910789894e-06, "loss": 0.4284, "step": 4381 }, { "epoch": 1.2302077484559237, "grad_norm": 0.7293211221694946, "learning_rate": 7.346668750577721e-06, "loss": 0.4196, "step": 4382 }, { "epoch": 1.2304884896125772, "grad_norm": 0.8100600838661194, "learning_rate": 7.345226339943652e-06, "loss": 0.4024, "step": 4383 }, { "epoch": 1.2307692307692308, "grad_norm": 0.7535078525543213, "learning_rate": 7.343783679041613e-06, "loss": 0.4172, "step": 4384 }, { "epoch": 1.2310499719258843, "grad_norm": 0.6468392610549927, "learning_rate": 7.342340768025555e-06, "loss": 0.4109, "step": 4385 }, { "epoch": 1.231330713082538, "grad_norm": 0.7342042326927185, "learning_rate": 7.3408976070494555e-06, "loss": 0.3921, "step": 4386 }, { "epoch": 1.2316114542391914, "grad_norm": 0.7490071058273315, "learning_rate": 7.33945419626732e-06, "loss": 0.3891, "step": 4387 }, { "epoch": 1.231892195395845, "grad_norm": 0.7169302701950073, "learning_rate": 7.338010535833182e-06, "loss": 0.3515, "step": 4388 }, { "epoch": 1.2321729365524985, "grad_norm": 0.7124811410903931, "learning_rate": 7.336566625901098e-06, "loss": 0.372, "step": 4389 }, { "epoch": 1.2324536777091522, "grad_norm": 0.7964193820953369, "learning_rate": 7.335122466625153e-06, "loss": 0.4042, "step": 4390 }, { "epoch": 1.2327344188658058, "grad_norm": 0.6440420150756836, "learning_rate": 7.333678058159461e-06, "loss": 0.3808, "step": 4391 }, { "epoch": 1.2330151600224593, "grad_norm": 0.590033233165741, "learning_rate": 7.33223340065816e-06, "loss": 0.3866, "step": 4392 }, { "epoch": 1.233295901179113, "grad_norm": 0.730883777141571, "learning_rate": 7.330788494275411e-06, "loss": 0.3758, "step": 4393 }, { "epoch": 1.2335766423357664, "grad_norm": 0.7454805970191956, "learning_rate": 7.329343339165409e-06, "loss": 0.3747, "step": 4394 }, { "epoch": 1.23385738349242, "grad_norm": 0.6938319802284241, "learning_rate": 7.327897935482371e-06, "loss": 0.4059, "step": 4395 }, { "epoch": 1.2341381246490735, "grad_norm": 0.6656152009963989, "learning_rate": 7.326452283380542e-06, "loss": 0.3665, "step": 4396 }, { "epoch": 1.2344188658057271, "grad_norm": 0.5903286337852478, "learning_rate": 7.325006383014193e-06, "loss": 0.4085, "step": 4397 }, { "epoch": 1.2346996069623808, "grad_norm": 0.6602579951286316, "learning_rate": 7.323560234537619e-06, "loss": 0.3764, "step": 4398 }, { "epoch": 1.2349803481190342, "grad_norm": 0.6968615651130676, "learning_rate": 7.3221138381051475e-06, "loss": 0.4454, "step": 4399 }, { "epoch": 1.235261089275688, "grad_norm": 0.6112107038497925, "learning_rate": 7.320667193871127e-06, "loss": 0.3626, "step": 4400 }, { "epoch": 1.2355418304323413, "grad_norm": 0.7879973649978638, "learning_rate": 7.319220301989936e-06, "loss": 0.4109, "step": 4401 }, { "epoch": 1.235822571588995, "grad_norm": 0.7149852514266968, "learning_rate": 7.317773162615976e-06, "loss": 0.4146, "step": 4402 }, { "epoch": 1.2361033127456484, "grad_norm": 0.699227511882782, "learning_rate": 7.316325775903678e-06, "loss": 0.4224, "step": 4403 }, { "epoch": 1.236384053902302, "grad_norm": 0.6559727787971497, "learning_rate": 7.314878142007497e-06, "loss": 0.3852, "step": 4404 }, { "epoch": 1.2366647950589555, "grad_norm": 0.6238139867782593, "learning_rate": 7.313430261081915e-06, "loss": 0.3946, "step": 4405 }, { "epoch": 1.2369455362156092, "grad_norm": 0.6538670659065247, "learning_rate": 7.311982133281442e-06, "loss": 0.4028, "step": 4406 }, { "epoch": 1.2372262773722627, "grad_norm": 0.632474422454834, "learning_rate": 7.310533758760614e-06, "loss": 0.42, "step": 4407 }, { "epoch": 1.2375070185289163, "grad_norm": 0.6869728565216064, "learning_rate": 7.3090851376739915e-06, "loss": 0.4116, "step": 4408 }, { "epoch": 1.23778775968557, "grad_norm": 0.6850327849388123, "learning_rate": 7.3076362701761615e-06, "loss": 0.3722, "step": 4409 }, { "epoch": 1.2380685008422234, "grad_norm": 0.7792261838912964, "learning_rate": 7.306187156421739e-06, "loss": 0.4284, "step": 4410 }, { "epoch": 1.238349241998877, "grad_norm": 0.6640065312385559, "learning_rate": 7.304737796565364e-06, "loss": 0.3776, "step": 4411 }, { "epoch": 1.2386299831555305, "grad_norm": 0.6064174771308899, "learning_rate": 7.303288190761705e-06, "loss": 0.3913, "step": 4412 }, { "epoch": 1.2389107243121842, "grad_norm": 0.7190853357315063, "learning_rate": 7.301838339165454e-06, "loss": 0.3657, "step": 4413 }, { "epoch": 1.2391914654688376, "grad_norm": 0.743168294429779, "learning_rate": 7.300388241931328e-06, "loss": 0.4124, "step": 4414 }, { "epoch": 1.2394722066254913, "grad_norm": 0.5954951643943787, "learning_rate": 7.298937899214073e-06, "loss": 0.385, "step": 4415 }, { "epoch": 1.239752947782145, "grad_norm": 0.6010693311691284, "learning_rate": 7.297487311168464e-06, "loss": 0.3631, "step": 4416 }, { "epoch": 1.2400336889387984, "grad_norm": 0.6950507164001465, "learning_rate": 7.296036477949295e-06, "loss": 0.3785, "step": 4417 }, { "epoch": 1.240314430095452, "grad_norm": 0.6173332333564758, "learning_rate": 7.294585399711391e-06, "loss": 0.409, "step": 4418 }, { "epoch": 1.2405951712521055, "grad_norm": 0.6691762804985046, "learning_rate": 7.293134076609605e-06, "loss": 0.3891, "step": 4419 }, { "epoch": 1.2408759124087592, "grad_norm": 0.6664168238639832, "learning_rate": 7.291682508798808e-06, "loss": 0.3618, "step": 4420 }, { "epoch": 1.2411566535654126, "grad_norm": 0.7321335077285767, "learning_rate": 7.290230696433903e-06, "loss": 0.3792, "step": 4421 }, { "epoch": 1.2414373947220663, "grad_norm": 0.6839069128036499, "learning_rate": 7.288778639669822e-06, "loss": 0.4251, "step": 4422 }, { "epoch": 1.24171813587872, "grad_norm": 0.5564275979995728, "learning_rate": 7.287326338661518e-06, "loss": 0.3888, "step": 4423 }, { "epoch": 1.2419988770353734, "grad_norm": 0.6266826391220093, "learning_rate": 7.285873793563972e-06, "loss": 0.379, "step": 4424 }, { "epoch": 1.242279618192027, "grad_norm": 0.661496102809906, "learning_rate": 7.284421004532187e-06, "loss": 0.3889, "step": 4425 }, { "epoch": 1.2425603593486805, "grad_norm": 0.5980477333068848, "learning_rate": 7.282967971721199e-06, "loss": 0.3778, "step": 4426 }, { "epoch": 1.2428411005053341, "grad_norm": 0.7250111699104309, "learning_rate": 7.281514695286066e-06, "loss": 0.3935, "step": 4427 }, { "epoch": 1.2431218416619876, "grad_norm": 0.6160321235656738, "learning_rate": 7.280061175381873e-06, "loss": 0.3681, "step": 4428 }, { "epoch": 1.2434025828186412, "grad_norm": 0.7048326134681702, "learning_rate": 7.278607412163729e-06, "loss": 0.3821, "step": 4429 }, { "epoch": 1.2436833239752947, "grad_norm": 0.7044711709022522, "learning_rate": 7.277153405786774e-06, "loss": 0.4152, "step": 4430 }, { "epoch": 1.2439640651319483, "grad_norm": 0.6765097975730896, "learning_rate": 7.275699156406167e-06, "loss": 0.3917, "step": 4431 }, { "epoch": 1.2442448062886018, "grad_norm": 0.6712751984596252, "learning_rate": 7.2742446641770985e-06, "loss": 0.395, "step": 4432 }, { "epoch": 1.2445255474452555, "grad_norm": 0.647374153137207, "learning_rate": 7.27278992925478e-06, "loss": 0.339, "step": 4433 }, { "epoch": 1.2448062886019091, "grad_norm": 0.5925500988960266, "learning_rate": 7.271334951794455e-06, "loss": 0.3714, "step": 4434 }, { "epoch": 1.2450870297585626, "grad_norm": 0.7028294801712036, "learning_rate": 7.269879731951388e-06, "loss": 0.4011, "step": 4435 }, { "epoch": 1.2453677709152162, "grad_norm": 0.6963602304458618, "learning_rate": 7.268424269880872e-06, "loss": 0.3703, "step": 4436 }, { "epoch": 1.2456485120718697, "grad_norm": 0.6790087223052979, "learning_rate": 7.266968565738224e-06, "loss": 0.3891, "step": 4437 }, { "epoch": 1.2459292532285233, "grad_norm": 0.6815446615219116, "learning_rate": 7.265512619678789e-06, "loss": 0.4022, "step": 4438 }, { "epoch": 1.2462099943851768, "grad_norm": 0.6858093738555908, "learning_rate": 7.264056431857934e-06, "loss": 0.3755, "step": 4439 }, { "epoch": 1.2464907355418304, "grad_norm": 0.7268288135528564, "learning_rate": 7.262600002431056e-06, "loss": 0.425, "step": 4440 }, { "epoch": 1.246771476698484, "grad_norm": 0.758263349533081, "learning_rate": 7.261143331553577e-06, "loss": 0.4372, "step": 4441 }, { "epoch": 1.2470522178551375, "grad_norm": 0.6037503480911255, "learning_rate": 7.259686419380942e-06, "loss": 0.4161, "step": 4442 }, { "epoch": 1.2473329590117912, "grad_norm": 0.643770694732666, "learning_rate": 7.258229266068625e-06, "loss": 0.4194, "step": 4443 }, { "epoch": 1.2476137001684446, "grad_norm": 0.8054676651954651, "learning_rate": 7.256771871772124e-06, "loss": 0.3104, "step": 4444 }, { "epoch": 1.2478944413250983, "grad_norm": 0.6536027789115906, "learning_rate": 7.255314236646962e-06, "loss": 0.3777, "step": 4445 }, { "epoch": 1.2481751824817517, "grad_norm": 0.7296000123023987, "learning_rate": 7.253856360848689e-06, "loss": 0.4218, "step": 4446 }, { "epoch": 1.2484559236384054, "grad_norm": 0.6687675714492798, "learning_rate": 7.252398244532881e-06, "loss": 0.3678, "step": 4447 }, { "epoch": 1.248736664795059, "grad_norm": 0.757503092288971, "learning_rate": 7.25093988785514e-06, "loss": 0.4333, "step": 4448 }, { "epoch": 1.2490174059517125, "grad_norm": 0.6565677523612976, "learning_rate": 7.249481290971092e-06, "loss": 0.3587, "step": 4449 }, { "epoch": 1.2492981471083662, "grad_norm": 0.7528462409973145, "learning_rate": 7.248022454036389e-06, "loss": 0.3811, "step": 4450 }, { "epoch": 1.2495788882650196, "grad_norm": 0.7380896210670471, "learning_rate": 7.246563377206709e-06, "loss": 0.3768, "step": 4451 }, { "epoch": 1.2498596294216733, "grad_norm": 0.6426535844802856, "learning_rate": 7.245104060637755e-06, "loss": 0.3918, "step": 4452 }, { "epoch": 1.2501403705783267, "grad_norm": 0.7681300640106201, "learning_rate": 7.2436445044852585e-06, "loss": 0.3986, "step": 4453 }, { "epoch": 1.2504211117349804, "grad_norm": 0.6747211813926697, "learning_rate": 7.2421847089049724e-06, "loss": 0.3821, "step": 4454 }, { "epoch": 1.250701852891634, "grad_norm": 0.6966944336891174, "learning_rate": 7.240724674052677e-06, "loss": 0.4028, "step": 4455 }, { "epoch": 1.2509825940482875, "grad_norm": 0.6423379778862, "learning_rate": 7.239264400084178e-06, "loss": 0.4268, "step": 4456 }, { "epoch": 1.251263335204941, "grad_norm": 0.7469407916069031, "learning_rate": 7.237803887155308e-06, "loss": 0.3656, "step": 4457 }, { "epoch": 1.2515440763615946, "grad_norm": 0.7970960140228271, "learning_rate": 7.236343135421924e-06, "loss": 0.4237, "step": 4458 }, { "epoch": 1.2518248175182483, "grad_norm": 0.6215502619743347, "learning_rate": 7.234882145039906e-06, "loss": 0.405, "step": 4459 }, { "epoch": 1.2521055586749017, "grad_norm": 0.725236713886261, "learning_rate": 7.233420916165164e-06, "loss": 0.3527, "step": 4460 }, { "epoch": 1.2523862998315554, "grad_norm": 0.7854107618331909, "learning_rate": 7.231959448953629e-06, "loss": 0.3903, "step": 4461 }, { "epoch": 1.2526670409882088, "grad_norm": 0.6946197152137756, "learning_rate": 7.230497743561265e-06, "loss": 0.3815, "step": 4462 }, { "epoch": 1.2529477821448625, "grad_norm": 0.641368567943573, "learning_rate": 7.22903580014405e-06, "loss": 0.3651, "step": 4463 }, { "epoch": 1.253228523301516, "grad_norm": 0.6621046662330627, "learning_rate": 7.227573618857995e-06, "loss": 0.3845, "step": 4464 }, { "epoch": 1.2535092644581696, "grad_norm": 0.5420482754707336, "learning_rate": 7.226111199859137e-06, "loss": 0.3902, "step": 4465 }, { "epoch": 1.2537900056148232, "grad_norm": 0.6948087811470032, "learning_rate": 7.224648543303534e-06, "loss": 0.4198, "step": 4466 }, { "epoch": 1.2540707467714767, "grad_norm": 0.8189418315887451, "learning_rate": 7.223185649347274e-06, "loss": 0.437, "step": 4467 }, { "epoch": 1.2543514879281303, "grad_norm": 0.7353101372718811, "learning_rate": 7.221722518146467e-06, "loss": 0.3968, "step": 4468 }, { "epoch": 1.2546322290847838, "grad_norm": 0.6004009246826172, "learning_rate": 7.220259149857247e-06, "loss": 0.367, "step": 4469 }, { "epoch": 1.2549129702414374, "grad_norm": 0.6492067575454712, "learning_rate": 7.218795544635778e-06, "loss": 0.3861, "step": 4470 }, { "epoch": 1.2551937113980909, "grad_norm": 0.7004754543304443, "learning_rate": 7.2173317026382465e-06, "loss": 0.3975, "step": 4471 }, { "epoch": 1.2554744525547445, "grad_norm": 0.6812534928321838, "learning_rate": 7.215867624020863e-06, "loss": 0.3993, "step": 4472 }, { "epoch": 1.2557551937113982, "grad_norm": 0.6776750683784485, "learning_rate": 7.214403308939869e-06, "loss": 0.4008, "step": 4473 }, { "epoch": 1.2560359348680517, "grad_norm": 0.5960357785224915, "learning_rate": 7.212938757551522e-06, "loss": 0.3607, "step": 4474 }, { "epoch": 1.256316676024705, "grad_norm": 0.5871161222457886, "learning_rate": 7.211473970012113e-06, "loss": 0.3623, "step": 4475 }, { "epoch": 1.2565974171813588, "grad_norm": 0.605216920375824, "learning_rate": 7.210008946477954e-06, "loss": 0.4004, "step": 4476 }, { "epoch": 1.2568781583380124, "grad_norm": 0.5579923391342163, "learning_rate": 7.208543687105384e-06, "loss": 0.3792, "step": 4477 }, { "epoch": 1.2571588994946659, "grad_norm": 0.6565082669258118, "learning_rate": 7.207078192050765e-06, "loss": 0.3888, "step": 4478 }, { "epoch": 1.2574396406513195, "grad_norm": 0.6623365879058838, "learning_rate": 7.205612461470488e-06, "loss": 0.3972, "step": 4479 }, { "epoch": 1.2577203818079732, "grad_norm": 0.7521390318870544, "learning_rate": 7.2041464955209625e-06, "loss": 0.3865, "step": 4480 }, { "epoch": 1.2580011229646266, "grad_norm": 0.6704253554344177, "learning_rate": 7.202680294358631e-06, "loss": 0.4221, "step": 4481 }, { "epoch": 1.25828186412128, "grad_norm": 0.562984049320221, "learning_rate": 7.201213858139956e-06, "loss": 0.3913, "step": 4482 }, { "epoch": 1.2585626052779337, "grad_norm": 0.6283015608787537, "learning_rate": 7.199747187021427e-06, "loss": 0.37, "step": 4483 }, { "epoch": 1.2588433464345874, "grad_norm": 0.5919360518455505, "learning_rate": 7.198280281159556e-06, "loss": 0.3826, "step": 4484 }, { "epoch": 1.2591240875912408, "grad_norm": 0.6767114400863647, "learning_rate": 7.1968131407108835e-06, "loss": 0.3658, "step": 4485 }, { "epoch": 1.2594048287478945, "grad_norm": 0.6459712982177734, "learning_rate": 7.195345765831975e-06, "loss": 0.4175, "step": 4486 }, { "epoch": 1.259685569904548, "grad_norm": 0.6185809969902039, "learning_rate": 7.193878156679417e-06, "loss": 0.3764, "step": 4487 }, { "epoch": 1.2599663110612016, "grad_norm": 0.6733853816986084, "learning_rate": 7.1924103134098254e-06, "loss": 0.3657, "step": 4488 }, { "epoch": 1.260247052217855, "grad_norm": 0.7166831493377686, "learning_rate": 7.19094223617984e-06, "loss": 0.3635, "step": 4489 }, { "epoch": 1.2605277933745087, "grad_norm": 0.6229596734046936, "learning_rate": 7.189473925146124e-06, "loss": 0.4057, "step": 4490 }, { "epoch": 1.2608085345311624, "grad_norm": 0.6371515989303589, "learning_rate": 7.188005380465365e-06, "loss": 0.4026, "step": 4491 }, { "epoch": 1.2610892756878158, "grad_norm": 0.7031722068786621, "learning_rate": 7.186536602294278e-06, "loss": 0.3936, "step": 4492 }, { "epoch": 1.2613700168444695, "grad_norm": 0.6174464821815491, "learning_rate": 7.185067590789602e-06, "loss": 0.3815, "step": 4493 }, { "epoch": 1.261650758001123, "grad_norm": 0.7459012866020203, "learning_rate": 7.183598346108101e-06, "loss": 0.4227, "step": 4494 }, { "epoch": 1.2619314991577766, "grad_norm": 0.632767379283905, "learning_rate": 7.1821288684065635e-06, "loss": 0.3733, "step": 4495 }, { "epoch": 1.26221224031443, "grad_norm": 0.7163012027740479, "learning_rate": 7.180659157841803e-06, "loss": 0.3764, "step": 4496 }, { "epoch": 1.2624929814710837, "grad_norm": 0.7456682324409485, "learning_rate": 7.179189214570658e-06, "loss": 0.4029, "step": 4497 }, { "epoch": 1.2627737226277373, "grad_norm": 0.737336277961731, "learning_rate": 7.17771903874999e-06, "loss": 0.4172, "step": 4498 }, { "epoch": 1.2630544637843908, "grad_norm": 0.6734129786491394, "learning_rate": 7.17624863053669e-06, "loss": 0.4146, "step": 4499 }, { "epoch": 1.2633352049410442, "grad_norm": 0.653513491153717, "learning_rate": 7.174777990087668e-06, "loss": 0.3847, "step": 4500 }, { "epoch": 1.263615946097698, "grad_norm": 0.6573703289031982, "learning_rate": 7.173307117559865e-06, "loss": 0.3884, "step": 4501 }, { "epoch": 1.2638966872543516, "grad_norm": 0.7086719274520874, "learning_rate": 7.17183601311024e-06, "loss": 0.3638, "step": 4502 }, { "epoch": 1.264177428411005, "grad_norm": 0.6080599427223206, "learning_rate": 7.170364676895782e-06, "loss": 0.3999, "step": 4503 }, { "epoch": 1.2644581695676587, "grad_norm": 0.6222747564315796, "learning_rate": 7.168893109073502e-06, "loss": 0.359, "step": 4504 }, { "epoch": 1.264738910724312, "grad_norm": 0.6191179752349854, "learning_rate": 7.167421309800436e-06, "loss": 0.3997, "step": 4505 }, { "epoch": 1.2650196518809658, "grad_norm": 0.7251825332641602, "learning_rate": 7.165949279233647e-06, "loss": 0.3808, "step": 4506 }, { "epoch": 1.2653003930376192, "grad_norm": 0.6500434279441833, "learning_rate": 7.164477017530221e-06, "loss": 0.4165, "step": 4507 }, { "epoch": 1.2655811341942729, "grad_norm": 0.6976383328437805, "learning_rate": 7.1630045248472665e-06, "loss": 0.436, "step": 4508 }, { "epoch": 1.2658618753509265, "grad_norm": 0.6470715403556824, "learning_rate": 7.16153180134192e-06, "loss": 0.4134, "step": 4509 }, { "epoch": 1.26614261650758, "grad_norm": 0.6182039976119995, "learning_rate": 7.160058847171342e-06, "loss": 0.3937, "step": 4510 }, { "epoch": 1.2664233576642336, "grad_norm": 0.6108161807060242, "learning_rate": 7.158585662492715e-06, "loss": 0.3997, "step": 4511 }, { "epoch": 1.266704098820887, "grad_norm": 0.6060779094696045, "learning_rate": 7.1571122474632505e-06, "loss": 0.3347, "step": 4512 }, { "epoch": 1.2669848399775407, "grad_norm": 0.6959679126739502, "learning_rate": 7.15563860224018e-06, "loss": 0.4157, "step": 4513 }, { "epoch": 1.2672655811341942, "grad_norm": 0.7535659074783325, "learning_rate": 7.154164726980764e-06, "loss": 0.4114, "step": 4514 }, { "epoch": 1.2675463222908478, "grad_norm": 0.7022605538368225, "learning_rate": 7.152690621842284e-06, "loss": 0.3758, "step": 4515 }, { "epoch": 1.2678270634475015, "grad_norm": 0.7148244380950928, "learning_rate": 7.151216286982048e-06, "loss": 0.3723, "step": 4516 }, { "epoch": 1.268107804604155, "grad_norm": 0.6735973954200745, "learning_rate": 7.1497417225573865e-06, "loss": 0.3861, "step": 4517 }, { "epoch": 1.2683885457608086, "grad_norm": 0.641895055770874, "learning_rate": 7.1482669287256575e-06, "loss": 0.387, "step": 4518 }, { "epoch": 1.268669286917462, "grad_norm": 0.7048580646514893, "learning_rate": 7.146791905644241e-06, "loss": 0.427, "step": 4519 }, { "epoch": 1.2689500280741157, "grad_norm": 0.6548430919647217, "learning_rate": 7.145316653470542e-06, "loss": 0.352, "step": 4520 }, { "epoch": 1.2692307692307692, "grad_norm": 0.6917059421539307, "learning_rate": 7.143841172361991e-06, "loss": 0.3406, "step": 4521 }, { "epoch": 1.2695115103874228, "grad_norm": 0.6860353946685791, "learning_rate": 7.142365462476042e-06, "loss": 0.4143, "step": 4522 }, { "epoch": 1.2697922515440765, "grad_norm": 0.6748304963111877, "learning_rate": 7.140889523970173e-06, "loss": 0.3962, "step": 4523 }, { "epoch": 1.27007299270073, "grad_norm": 0.6810723543167114, "learning_rate": 7.139413357001886e-06, "loss": 0.3934, "step": 4524 }, { "epoch": 1.2703537338573834, "grad_norm": 0.6174544095993042, "learning_rate": 7.1379369617287105e-06, "loss": 0.4198, "step": 4525 }, { "epoch": 1.270634475014037, "grad_norm": 0.680924117565155, "learning_rate": 7.136460338308197e-06, "loss": 0.4235, "step": 4526 }, { "epoch": 1.2709152161706907, "grad_norm": 0.6375547647476196, "learning_rate": 7.134983486897924e-06, "loss": 0.3882, "step": 4527 }, { "epoch": 1.2711959573273441, "grad_norm": 0.6849893927574158, "learning_rate": 7.133506407655488e-06, "loss": 0.3744, "step": 4528 }, { "epoch": 1.2714766984839978, "grad_norm": 0.6663550138473511, "learning_rate": 7.132029100738517e-06, "loss": 0.3773, "step": 4529 }, { "epoch": 1.2717574396406512, "grad_norm": 0.6391407251358032, "learning_rate": 7.130551566304657e-06, "loss": 0.4126, "step": 4530 }, { "epoch": 1.272038180797305, "grad_norm": 0.6585769057273865, "learning_rate": 7.129073804511584e-06, "loss": 0.4077, "step": 4531 }, { "epoch": 1.2723189219539583, "grad_norm": 0.7295487523078918, "learning_rate": 7.127595815516993e-06, "loss": 0.3754, "step": 4532 }, { "epoch": 1.272599663110612, "grad_norm": 0.631105363368988, "learning_rate": 7.126117599478608e-06, "loss": 0.3674, "step": 4533 }, { "epoch": 1.2728804042672657, "grad_norm": 0.7147228121757507, "learning_rate": 7.124639156554176e-06, "loss": 0.4603, "step": 4534 }, { "epoch": 1.2731611454239191, "grad_norm": 0.6172426342964172, "learning_rate": 7.123160486901464e-06, "loss": 0.3626, "step": 4535 }, { "epoch": 1.2734418865805728, "grad_norm": 0.7407633066177368, "learning_rate": 7.121681590678267e-06, "loss": 0.3988, "step": 4536 }, { "epoch": 1.2737226277372262, "grad_norm": 0.6925725936889648, "learning_rate": 7.120202468042404e-06, "loss": 0.3724, "step": 4537 }, { "epoch": 1.2740033688938799, "grad_norm": 0.6817641854286194, "learning_rate": 7.11872311915172e-06, "loss": 0.4323, "step": 4538 }, { "epoch": 1.2742841100505333, "grad_norm": 0.7241194844245911, "learning_rate": 7.117243544164081e-06, "loss": 0.3942, "step": 4539 }, { "epoch": 1.274564851207187, "grad_norm": 0.6990113258361816, "learning_rate": 7.115763743237375e-06, "loss": 0.4256, "step": 4540 }, { "epoch": 1.2748455923638407, "grad_norm": 0.763110876083374, "learning_rate": 7.1142837165295206e-06, "loss": 0.4015, "step": 4541 }, { "epoch": 1.275126333520494, "grad_norm": 0.6815094351768494, "learning_rate": 7.112803464198455e-06, "loss": 0.3997, "step": 4542 }, { "epoch": 1.2754070746771475, "grad_norm": 0.7032138705253601, "learning_rate": 7.111322986402143e-06, "loss": 0.4123, "step": 4543 }, { "epoch": 1.2756878158338012, "grad_norm": 0.7433674931526184, "learning_rate": 7.109842283298572e-06, "loss": 0.3829, "step": 4544 }, { "epoch": 1.2759685569904549, "grad_norm": 0.6728748083114624, "learning_rate": 7.108361355045752e-06, "loss": 0.3915, "step": 4545 }, { "epoch": 1.2762492981471083, "grad_norm": 0.7070676684379578, "learning_rate": 7.10688020180172e-06, "loss": 0.386, "step": 4546 }, { "epoch": 1.276530039303762, "grad_norm": 0.6893311142921448, "learning_rate": 7.1053988237245345e-06, "loss": 0.3893, "step": 4547 }, { "epoch": 1.2768107804604156, "grad_norm": 0.6288242340087891, "learning_rate": 7.103917220972277e-06, "loss": 0.3819, "step": 4548 }, { "epoch": 1.277091521617069, "grad_norm": 0.7185351252555847, "learning_rate": 7.102435393703058e-06, "loss": 0.3633, "step": 4549 }, { "epoch": 1.2773722627737225, "grad_norm": 0.6069153547286987, "learning_rate": 7.10095334207501e-06, "loss": 0.3942, "step": 4550 }, { "epoch": 1.2776530039303762, "grad_norm": 0.6017578840255737, "learning_rate": 7.099471066246284e-06, "loss": 0.4023, "step": 4551 }, { "epoch": 1.2779337450870298, "grad_norm": 0.6145133376121521, "learning_rate": 7.097988566375063e-06, "loss": 0.3857, "step": 4552 }, { "epoch": 1.2782144862436833, "grad_norm": 0.6975274682044983, "learning_rate": 7.096505842619547e-06, "loss": 0.3487, "step": 4553 }, { "epoch": 1.278495227400337, "grad_norm": 0.7050109505653381, "learning_rate": 7.095022895137968e-06, "loss": 0.3856, "step": 4554 }, { "epoch": 1.2787759685569904, "grad_norm": 0.6894435286521912, "learning_rate": 7.0935397240885705e-06, "loss": 0.3619, "step": 4555 }, { "epoch": 1.279056709713644, "grad_norm": 0.7413249015808105, "learning_rate": 7.092056329629635e-06, "loss": 0.3906, "step": 4556 }, { "epoch": 1.2793374508702975, "grad_norm": 0.6645633578300476, "learning_rate": 7.090572711919457e-06, "loss": 0.4181, "step": 4557 }, { "epoch": 1.2796181920269512, "grad_norm": 0.659733772277832, "learning_rate": 7.089088871116358e-06, "loss": 0.4172, "step": 4558 }, { "epoch": 1.2798989331836048, "grad_norm": 0.695060133934021, "learning_rate": 7.087604807378687e-06, "loss": 0.3894, "step": 4559 }, { "epoch": 1.2801796743402583, "grad_norm": 0.6080955862998962, "learning_rate": 7.086120520864812e-06, "loss": 0.3982, "step": 4560 }, { "epoch": 1.280460415496912, "grad_norm": 0.7476343512535095, "learning_rate": 7.084636011733129e-06, "loss": 0.3969, "step": 4561 }, { "epoch": 1.2807411566535654, "grad_norm": 0.6847297549247742, "learning_rate": 7.083151280142053e-06, "loss": 0.3908, "step": 4562 }, { "epoch": 1.281021897810219, "grad_norm": 0.6581273078918457, "learning_rate": 7.0816663262500275e-06, "loss": 0.3605, "step": 4563 }, { "epoch": 1.2813026389668725, "grad_norm": 0.6434887051582336, "learning_rate": 7.080181150215517e-06, "loss": 0.4163, "step": 4564 }, { "epoch": 1.2815833801235261, "grad_norm": 0.786142110824585, "learning_rate": 7.078695752197009e-06, "loss": 0.4492, "step": 4565 }, { "epoch": 1.2818641212801798, "grad_norm": 0.7068789005279541, "learning_rate": 7.0772101323530184e-06, "loss": 0.3972, "step": 4566 }, { "epoch": 1.2821448624368332, "grad_norm": 0.6115596890449524, "learning_rate": 7.07572429084208e-06, "loss": 0.4151, "step": 4567 }, { "epoch": 1.2824256035934867, "grad_norm": 0.7637412548065186, "learning_rate": 7.074238227822752e-06, "loss": 0.3977, "step": 4568 }, { "epoch": 1.2827063447501403, "grad_norm": 0.6415371298789978, "learning_rate": 7.0727519434536185e-06, "loss": 0.3745, "step": 4569 }, { "epoch": 1.282987085906794, "grad_norm": 0.6550493836402893, "learning_rate": 7.071265437893289e-06, "loss": 0.4112, "step": 4570 }, { "epoch": 1.2832678270634474, "grad_norm": 0.7450150847434998, "learning_rate": 7.06977871130039e-06, "loss": 0.4056, "step": 4571 }, { "epoch": 1.283548568220101, "grad_norm": 0.6011745929718018, "learning_rate": 7.06829176383358e-06, "loss": 0.3866, "step": 4572 }, { "epoch": 1.2838293093767548, "grad_norm": 0.6650264263153076, "learning_rate": 7.066804595651535e-06, "loss": 0.3731, "step": 4573 }, { "epoch": 1.2841100505334082, "grad_norm": 0.6137609481811523, "learning_rate": 7.065317206912954e-06, "loss": 0.3993, "step": 4574 }, { "epoch": 1.2843907916900617, "grad_norm": 0.668127179145813, "learning_rate": 7.0638295977765654e-06, "loss": 0.3718, "step": 4575 }, { "epoch": 1.2846715328467153, "grad_norm": 0.7284173965454102, "learning_rate": 7.062341768401117e-06, "loss": 0.4086, "step": 4576 }, { "epoch": 1.284952274003369, "grad_norm": 0.6427273750305176, "learning_rate": 7.060853718945378e-06, "loss": 0.3908, "step": 4577 }, { "epoch": 1.2852330151600224, "grad_norm": 0.668682336807251, "learning_rate": 7.059365449568148e-06, "loss": 0.4184, "step": 4578 }, { "epoch": 1.285513756316676, "grad_norm": 0.6647806763648987, "learning_rate": 7.057876960428243e-06, "loss": 0.4035, "step": 4579 }, { "epoch": 1.2857944974733295, "grad_norm": 0.63369220495224, "learning_rate": 7.056388251684505e-06, "loss": 0.4016, "step": 4580 }, { "epoch": 1.2860752386299832, "grad_norm": 0.6674875020980835, "learning_rate": 7.054899323495801e-06, "loss": 0.3932, "step": 4581 }, { "epoch": 1.2863559797866366, "grad_norm": 0.6439608335494995, "learning_rate": 7.0534101760210206e-06, "loss": 0.3866, "step": 4582 }, { "epoch": 1.2866367209432903, "grad_norm": 0.7269125580787659, "learning_rate": 7.0519208094190735e-06, "loss": 0.3778, "step": 4583 }, { "epoch": 1.286917462099944, "grad_norm": 0.6372610926628113, "learning_rate": 7.0504312238489e-06, "loss": 0.3753, "step": 4584 }, { "epoch": 1.2871982032565974, "grad_norm": 0.6168985366821289, "learning_rate": 7.048941419469456e-06, "loss": 0.3966, "step": 4585 }, { "epoch": 1.287478944413251, "grad_norm": 0.6626611948013306, "learning_rate": 7.0474513964397255e-06, "loss": 0.4384, "step": 4586 }, { "epoch": 1.2877596855699045, "grad_norm": 0.6824188828468323, "learning_rate": 7.0459611549187126e-06, "loss": 0.3753, "step": 4587 }, { "epoch": 1.2880404267265582, "grad_norm": 0.6214696168899536, "learning_rate": 7.04447069506545e-06, "loss": 0.3798, "step": 4588 }, { "epoch": 1.2883211678832116, "grad_norm": 0.6406710743904114, "learning_rate": 7.042980017038988e-06, "loss": 0.3652, "step": 4589 }, { "epoch": 1.2886019090398653, "grad_norm": 0.6062746644020081, "learning_rate": 7.041489120998403e-06, "loss": 0.3859, "step": 4590 }, { "epoch": 1.288882650196519, "grad_norm": 0.757871150970459, "learning_rate": 7.0399980071027955e-06, "loss": 0.3947, "step": 4591 }, { "epoch": 1.2891633913531724, "grad_norm": 0.5964840054512024, "learning_rate": 7.038506675511285e-06, "loss": 0.3819, "step": 4592 }, { "epoch": 1.2894441325098258, "grad_norm": 0.6043334603309631, "learning_rate": 7.037015126383019e-06, "loss": 0.3766, "step": 4593 }, { "epoch": 1.2897248736664795, "grad_norm": 0.747077465057373, "learning_rate": 7.035523359877167e-06, "loss": 0.4247, "step": 4594 }, { "epoch": 1.2900056148231331, "grad_norm": 0.7753455638885498, "learning_rate": 7.0340313761529185e-06, "loss": 0.3674, "step": 4595 }, { "epoch": 1.2902863559797866, "grad_norm": 0.8529857993125916, "learning_rate": 7.032539175369491e-06, "loss": 0.4015, "step": 4596 }, { "epoch": 1.2905670971364402, "grad_norm": 0.8258609771728516, "learning_rate": 7.031046757686123e-06, "loss": 0.4435, "step": 4597 }, { "epoch": 1.2908478382930937, "grad_norm": 0.7256587743759155, "learning_rate": 7.029554123262075e-06, "loss": 0.3806, "step": 4598 }, { "epoch": 1.2911285794497473, "grad_norm": 0.7312936186790466, "learning_rate": 7.028061272256631e-06, "loss": 0.3874, "step": 4599 }, { "epoch": 1.2914093206064008, "grad_norm": 0.7907974123954773, "learning_rate": 7.0265682048291005e-06, "loss": 0.384, "step": 4600 }, { "epoch": 1.2916900617630545, "grad_norm": 0.8494774103164673, "learning_rate": 7.025074921138813e-06, "loss": 0.4253, "step": 4601 }, { "epoch": 1.2919708029197081, "grad_norm": 0.6940559148788452, "learning_rate": 7.023581421345124e-06, "loss": 0.3402, "step": 4602 }, { "epoch": 1.2922515440763616, "grad_norm": 0.6202247142791748, "learning_rate": 7.022087705607409e-06, "loss": 0.4177, "step": 4603 }, { "epoch": 1.2925322852330152, "grad_norm": 0.6664928793907166, "learning_rate": 7.020593774085068e-06, "loss": 0.3901, "step": 4604 }, { "epoch": 1.2928130263896687, "grad_norm": 0.6981474757194519, "learning_rate": 7.019099626937527e-06, "loss": 0.379, "step": 4605 }, { "epoch": 1.2930937675463223, "grad_norm": 0.8299378752708435, "learning_rate": 7.017605264324227e-06, "loss": 0.4382, "step": 4606 }, { "epoch": 1.2933745087029758, "grad_norm": 0.6710399389266968, "learning_rate": 7.016110686404642e-06, "loss": 0.3913, "step": 4607 }, { "epoch": 1.2936552498596294, "grad_norm": 0.6304298043251038, "learning_rate": 7.014615893338259e-06, "loss": 0.3665, "step": 4608 }, { "epoch": 1.293935991016283, "grad_norm": 0.6952391266822815, "learning_rate": 7.013120885284599e-06, "loss": 0.3765, "step": 4609 }, { "epoch": 1.2942167321729365, "grad_norm": 0.7461031079292297, "learning_rate": 7.0116256624031945e-06, "loss": 0.3934, "step": 4610 }, { "epoch": 1.2944974733295902, "grad_norm": 0.7327235341072083, "learning_rate": 7.0101302248536105e-06, "loss": 0.3925, "step": 4611 }, { "epoch": 1.2947782144862436, "grad_norm": 0.6534166932106018, "learning_rate": 7.008634572795427e-06, "loss": 0.3925, "step": 4612 }, { "epoch": 1.2950589556428973, "grad_norm": 0.6636688113212585, "learning_rate": 7.007138706388254e-06, "loss": 0.4072, "step": 4613 }, { "epoch": 1.2953396967995507, "grad_norm": 0.629555881023407, "learning_rate": 7.005642625791721e-06, "loss": 0.3744, "step": 4614 }, { "epoch": 1.2956204379562044, "grad_norm": 0.7262469530105591, "learning_rate": 7.004146331165478e-06, "loss": 0.365, "step": 4615 }, { "epoch": 1.295901179112858, "grad_norm": 0.67493736743927, "learning_rate": 7.002649822669203e-06, "loss": 0.364, "step": 4616 }, { "epoch": 1.2961819202695115, "grad_norm": 0.729296863079071, "learning_rate": 7.001153100462591e-06, "loss": 0.384, "step": 4617 }, { "epoch": 1.296462661426165, "grad_norm": 0.7144120931625366, "learning_rate": 6.999656164705365e-06, "loss": 0.3775, "step": 4618 }, { "epoch": 1.2967434025828186, "grad_norm": 0.6951630711555481, "learning_rate": 6.9981590155572675e-06, "loss": 0.3757, "step": 4619 }, { "epoch": 1.2970241437394723, "grad_norm": 0.7642596960067749, "learning_rate": 6.996661653178067e-06, "loss": 0.3543, "step": 4620 }, { "epoch": 1.2973048848961257, "grad_norm": 0.7871041297912598, "learning_rate": 6.99516407772755e-06, "loss": 0.4006, "step": 4621 }, { "epoch": 1.2975856260527794, "grad_norm": 0.693824291229248, "learning_rate": 6.993666289365531e-06, "loss": 0.4093, "step": 4622 }, { "epoch": 1.2978663672094328, "grad_norm": 0.742197573184967, "learning_rate": 6.9921682882518414e-06, "loss": 0.4218, "step": 4623 }, { "epoch": 1.2981471083660865, "grad_norm": 0.8252508044242859, "learning_rate": 6.990670074546342e-06, "loss": 0.3815, "step": 4624 }, { "epoch": 1.29842784952274, "grad_norm": 0.7689230442047119, "learning_rate": 6.98917164840891e-06, "loss": 0.404, "step": 4625 }, { "epoch": 1.2987085906793936, "grad_norm": 0.616387665271759, "learning_rate": 6.9876730099994504e-06, "loss": 0.3916, "step": 4626 }, { "epoch": 1.2989893318360473, "grad_norm": 0.5897514820098877, "learning_rate": 6.9861741594778885e-06, "loss": 0.3425, "step": 4627 }, { "epoch": 1.2992700729927007, "grad_norm": 0.7335630059242249, "learning_rate": 6.984675097004171e-06, "loss": 0.3576, "step": 4628 }, { "epoch": 1.2995508141493544, "grad_norm": 0.7389088869094849, "learning_rate": 6.983175822738268e-06, "loss": 0.4253, "step": 4629 }, { "epoch": 1.2998315553060078, "grad_norm": 0.7147209048271179, "learning_rate": 6.9816763368401755e-06, "loss": 0.3943, "step": 4630 }, { "epoch": 1.3001122964626615, "grad_norm": 0.7022382020950317, "learning_rate": 6.980176639469907e-06, "loss": 0.3814, "step": 4631 }, { "epoch": 1.300393037619315, "grad_norm": 0.8424223065376282, "learning_rate": 6.978676730787502e-06, "loss": 0.4243, "step": 4632 }, { "epoch": 1.3006737787759686, "grad_norm": 0.7687547206878662, "learning_rate": 6.97717661095302e-06, "loss": 0.3854, "step": 4633 }, { "epoch": 1.3009545199326222, "grad_norm": 0.7166351675987244, "learning_rate": 6.975676280126545e-06, "loss": 0.3763, "step": 4634 }, { "epoch": 1.3012352610892757, "grad_norm": 0.8682746291160583, "learning_rate": 6.974175738468183e-06, "loss": 0.3786, "step": 4635 }, { "epoch": 1.3015160022459291, "grad_norm": 0.6913161277770996, "learning_rate": 6.972674986138064e-06, "loss": 0.3932, "step": 4636 }, { "epoch": 1.3017967434025828, "grad_norm": 0.7797034978866577, "learning_rate": 6.971174023296337e-06, "loss": 0.409, "step": 4637 }, { "epoch": 1.3020774845592364, "grad_norm": 0.8386295437812805, "learning_rate": 6.969672850103176e-06, "loss": 0.3876, "step": 4638 }, { "epoch": 1.3023582257158899, "grad_norm": 0.6327503323554993, "learning_rate": 6.968171466718777e-06, "loss": 0.3772, "step": 4639 }, { "epoch": 1.3026389668725435, "grad_norm": 0.671227216720581, "learning_rate": 6.966669873303359e-06, "loss": 0.3812, "step": 4640 }, { "epoch": 1.3029197080291972, "grad_norm": 0.8056958913803101, "learning_rate": 6.965168070017162e-06, "loss": 0.4121, "step": 4641 }, { "epoch": 1.3032004491858507, "grad_norm": 0.7812299132347107, "learning_rate": 6.963666057020448e-06, "loss": 0.462, "step": 4642 }, { "epoch": 1.303481190342504, "grad_norm": 0.6905235052108765, "learning_rate": 6.962163834473506e-06, "loss": 0.3778, "step": 4643 }, { "epoch": 1.3037619314991578, "grad_norm": 0.8202456831932068, "learning_rate": 6.960661402536639e-06, "loss": 0.4063, "step": 4644 }, { "epoch": 1.3040426726558114, "grad_norm": 0.6483975648880005, "learning_rate": 6.959158761370181e-06, "loss": 0.3859, "step": 4645 }, { "epoch": 1.3043234138124649, "grad_norm": 0.6956228017807007, "learning_rate": 6.957655911134484e-06, "loss": 0.3739, "step": 4646 }, { "epoch": 1.3046041549691185, "grad_norm": 0.779199481010437, "learning_rate": 6.956152851989919e-06, "loss": 0.444, "step": 4647 }, { "epoch": 1.304884896125772, "grad_norm": 0.8077702522277832, "learning_rate": 6.9546495840968885e-06, "loss": 0.3742, "step": 4648 }, { "epoch": 1.3051656372824256, "grad_norm": 0.6182279586791992, "learning_rate": 6.953146107615809e-06, "loss": 0.381, "step": 4649 }, { "epoch": 1.305446378439079, "grad_norm": 0.6720951795578003, "learning_rate": 6.951642422707123e-06, "loss": 0.3864, "step": 4650 }, { "epoch": 1.3057271195957327, "grad_norm": 0.8674094676971436, "learning_rate": 6.950138529531294e-06, "loss": 0.427, "step": 4651 }, { "epoch": 1.3060078607523864, "grad_norm": 0.8232053518295288, "learning_rate": 6.948634428248807e-06, "loss": 0.3995, "step": 4652 }, { "epoch": 1.3062886019090398, "grad_norm": 0.7324113845825195, "learning_rate": 6.947130119020173e-06, "loss": 0.3881, "step": 4653 }, { "epoch": 1.3065693430656935, "grad_norm": 0.6415976881980896, "learning_rate": 6.945625602005922e-06, "loss": 0.3767, "step": 4654 }, { "epoch": 1.306850084222347, "grad_norm": 0.6434155106544495, "learning_rate": 6.944120877366605e-06, "loss": 0.3903, "step": 4655 }, { "epoch": 1.3071308253790006, "grad_norm": 0.7344347834587097, "learning_rate": 6.942615945262796e-06, "loss": 0.3712, "step": 4656 }, { "epoch": 1.307411566535654, "grad_norm": 0.6861408948898315, "learning_rate": 6.9411108058550955e-06, "loss": 0.3868, "step": 4657 }, { "epoch": 1.3076923076923077, "grad_norm": 0.7437962293624878, "learning_rate": 6.9396054593041196e-06, "loss": 0.3829, "step": 4658 }, { "epoch": 1.3079730488489614, "grad_norm": 0.6208816766738892, "learning_rate": 6.938099905770511e-06, "loss": 0.3673, "step": 4659 }, { "epoch": 1.3082537900056148, "grad_norm": 0.7573358416557312, "learning_rate": 6.936594145414934e-06, "loss": 0.3972, "step": 4660 }, { "epoch": 1.3085345311622683, "grad_norm": 0.7199413180351257, "learning_rate": 6.935088178398072e-06, "loss": 0.4038, "step": 4661 }, { "epoch": 1.308815272318922, "grad_norm": 0.6689475178718567, "learning_rate": 6.9335820048806326e-06, "loss": 0.3737, "step": 4662 }, { "epoch": 1.3090960134755756, "grad_norm": 0.6980438232421875, "learning_rate": 6.932075625023344e-06, "loss": 0.3853, "step": 4663 }, { "epoch": 1.309376754632229, "grad_norm": 0.747400164604187, "learning_rate": 6.930569038986962e-06, "loss": 0.3957, "step": 4664 }, { "epoch": 1.3096574957888827, "grad_norm": 0.8000480532646179, "learning_rate": 6.929062246932258e-06, "loss": 0.4203, "step": 4665 }, { "epoch": 1.3099382369455363, "grad_norm": 0.7655662894248962, "learning_rate": 6.927555249020026e-06, "loss": 0.4033, "step": 4666 }, { "epoch": 1.3102189781021898, "grad_norm": 0.7041757702827454, "learning_rate": 6.9260480454110845e-06, "loss": 0.3708, "step": 4667 }, { "epoch": 1.3104997192588432, "grad_norm": 0.6548405289649963, "learning_rate": 6.924540636266272e-06, "loss": 0.3569, "step": 4668 }, { "epoch": 1.310780460415497, "grad_norm": 0.6934897303581238, "learning_rate": 6.923033021746453e-06, "loss": 0.4432, "step": 4669 }, { "epoch": 1.3110612015721506, "grad_norm": 0.7211781144142151, "learning_rate": 6.921525202012507e-06, "loss": 0.4008, "step": 4670 }, { "epoch": 1.311341942728804, "grad_norm": 0.6320825815200806, "learning_rate": 6.920017177225341e-06, "loss": 0.3678, "step": 4671 }, { "epoch": 1.3116226838854577, "grad_norm": 0.6099188327789307, "learning_rate": 6.918508947545881e-06, "loss": 0.3341, "step": 4672 }, { "epoch": 1.311903425042111, "grad_norm": 0.6788693070411682, "learning_rate": 6.917000513135077e-06, "loss": 0.3569, "step": 4673 }, { "epoch": 1.3121841661987648, "grad_norm": 0.7344499230384827, "learning_rate": 6.915491874153899e-06, "loss": 0.4154, "step": 4674 }, { "epoch": 1.3124649073554182, "grad_norm": 0.6813397407531738, "learning_rate": 6.91398303076334e-06, "loss": 0.3867, "step": 4675 }, { "epoch": 1.3127456485120719, "grad_norm": 0.7256249785423279, "learning_rate": 6.912473983124414e-06, "loss": 0.3861, "step": 4676 }, { "epoch": 1.3130263896687255, "grad_norm": 0.6895406246185303, "learning_rate": 6.910964731398158e-06, "loss": 0.3696, "step": 4677 }, { "epoch": 1.313307130825379, "grad_norm": 0.6773607134819031, "learning_rate": 6.909455275745629e-06, "loss": 0.3931, "step": 4678 }, { "epoch": 1.3135878719820326, "grad_norm": 0.6725152730941772, "learning_rate": 6.907945616327907e-06, "loss": 0.4195, "step": 4679 }, { "epoch": 1.313868613138686, "grad_norm": 0.6737792491912842, "learning_rate": 6.906435753306094e-06, "loss": 0.3797, "step": 4680 }, { "epoch": 1.3141493542953397, "grad_norm": 0.6173524856567383, "learning_rate": 6.904925686841313e-06, "loss": 0.4058, "step": 4681 }, { "epoch": 1.3144300954519932, "grad_norm": 0.7905880808830261, "learning_rate": 6.903415417094709e-06, "loss": 0.3481, "step": 4682 }, { "epoch": 1.3147108366086468, "grad_norm": 0.7190317511558533, "learning_rate": 6.901904944227446e-06, "loss": 0.3783, "step": 4683 }, { "epoch": 1.3149915777653005, "grad_norm": 0.649093747138977, "learning_rate": 6.900394268400716e-06, "loss": 0.3654, "step": 4684 }, { "epoch": 1.315272318921954, "grad_norm": 0.6908594965934753, "learning_rate": 6.898883389775728e-06, "loss": 0.4037, "step": 4685 }, { "epoch": 1.3155530600786074, "grad_norm": 0.7254620790481567, "learning_rate": 6.897372308513712e-06, "loss": 0.4032, "step": 4686 }, { "epoch": 1.315833801235261, "grad_norm": 0.678902804851532, "learning_rate": 6.895861024775922e-06, "loss": 0.4111, "step": 4687 }, { "epoch": 1.3161145423919147, "grad_norm": 0.7710278034210205, "learning_rate": 6.894349538723635e-06, "loss": 0.3768, "step": 4688 }, { "epoch": 1.3163952835485682, "grad_norm": 0.7409265637397766, "learning_rate": 6.892837850518144e-06, "loss": 0.3601, "step": 4689 }, { "epoch": 1.3166760247052218, "grad_norm": 0.7537786960601807, "learning_rate": 6.891325960320767e-06, "loss": 0.3888, "step": 4690 }, { "epoch": 1.3169567658618755, "grad_norm": 0.7065321207046509, "learning_rate": 6.889813868292846e-06, "loss": 0.3763, "step": 4691 }, { "epoch": 1.317237507018529, "grad_norm": 0.750227153301239, "learning_rate": 6.888301574595742e-06, "loss": 0.3961, "step": 4692 }, { "epoch": 1.3175182481751824, "grad_norm": 0.604944109916687, "learning_rate": 6.886789079390837e-06, "loss": 0.4111, "step": 4693 }, { "epoch": 1.317798989331836, "grad_norm": 0.8548795580863953, "learning_rate": 6.885276382839533e-06, "loss": 0.3593, "step": 4694 }, { "epoch": 1.3180797304884897, "grad_norm": 0.7772150635719299, "learning_rate": 6.883763485103257e-06, "loss": 0.4223, "step": 4695 }, { "epoch": 1.3183604716451431, "grad_norm": 0.7162172794342041, "learning_rate": 6.882250386343456e-06, "loss": 0.4122, "step": 4696 }, { "epoch": 1.3186412128017968, "grad_norm": 0.6120654940605164, "learning_rate": 6.8807370867216e-06, "loss": 0.3827, "step": 4697 }, { "epoch": 1.3189219539584502, "grad_norm": 0.7186130881309509, "learning_rate": 6.879223586399178e-06, "loss": 0.3805, "step": 4698 }, { "epoch": 1.319202695115104, "grad_norm": 0.6152911186218262, "learning_rate": 6.8777098855377e-06, "loss": 0.3824, "step": 4699 }, { "epoch": 1.3194834362717573, "grad_norm": 0.7523589730262756, "learning_rate": 6.8761959842987e-06, "loss": 0.3878, "step": 4700 }, { "epoch": 1.319764177428411, "grad_norm": 0.6040613651275635, "learning_rate": 6.87468188284373e-06, "loss": 0.3592, "step": 4701 }, { "epoch": 1.3200449185850647, "grad_norm": 0.6443442106246948, "learning_rate": 6.87316758133437e-06, "loss": 0.3611, "step": 4702 }, { "epoch": 1.3203256597417181, "grad_norm": 0.7300065755844116, "learning_rate": 6.871653079932213e-06, "loss": 0.3827, "step": 4703 }, { "epoch": 1.3206064008983718, "grad_norm": 0.7388545274734497, "learning_rate": 6.87013837879888e-06, "loss": 0.3998, "step": 4704 }, { "epoch": 1.3208871420550252, "grad_norm": 0.7331262826919556, "learning_rate": 6.868623478096006e-06, "loss": 0.3922, "step": 4705 }, { "epoch": 1.3211678832116789, "grad_norm": 0.6619069576263428, "learning_rate": 6.867108377985257e-06, "loss": 0.4034, "step": 4706 }, { "epoch": 1.3214486243683323, "grad_norm": 0.6988587975502014, "learning_rate": 6.865593078628311e-06, "loss": 0.4308, "step": 4707 }, { "epoch": 1.321729365524986, "grad_norm": 0.7060587406158447, "learning_rate": 6.8640775801868746e-06, "loss": 0.4065, "step": 4708 }, { "epoch": 1.3220101066816397, "grad_norm": 0.6800733804702759, "learning_rate": 6.8625618828226695e-06, "loss": 0.47, "step": 4709 }, { "epoch": 1.322290847838293, "grad_norm": 0.7192081212997437, "learning_rate": 6.861045986697443e-06, "loss": 0.4265, "step": 4710 }, { "epoch": 1.3225715889949465, "grad_norm": 0.703007698059082, "learning_rate": 6.8595298919729624e-06, "loss": 0.3654, "step": 4711 }, { "epoch": 1.3228523301516002, "grad_norm": 0.7066115736961365, "learning_rate": 6.858013598811015e-06, "loss": 0.3571, "step": 4712 }, { "epoch": 1.3231330713082539, "grad_norm": 0.6508426666259766, "learning_rate": 6.8564971073734095e-06, "loss": 0.3763, "step": 4713 }, { "epoch": 1.3234138124649073, "grad_norm": 0.6828879714012146, "learning_rate": 6.8549804178219794e-06, "loss": 0.3662, "step": 4714 }, { "epoch": 1.323694553621561, "grad_norm": 0.6254562139511108, "learning_rate": 6.8534635303185735e-06, "loss": 0.3912, "step": 4715 }, { "epoch": 1.3239752947782144, "grad_norm": 0.7081087231636047, "learning_rate": 6.8519464450250665e-06, "loss": 0.422, "step": 4716 }, { "epoch": 1.324256035934868, "grad_norm": 0.6534614562988281, "learning_rate": 6.850429162103349e-06, "loss": 0.3728, "step": 4717 }, { "epoch": 1.3245367770915215, "grad_norm": 0.7958827614784241, "learning_rate": 6.84891168171534e-06, "loss": 0.4647, "step": 4718 }, { "epoch": 1.3248175182481752, "grad_norm": 0.7704675197601318, "learning_rate": 6.847394004022975e-06, "loss": 0.4027, "step": 4719 }, { "epoch": 1.3250982594048288, "grad_norm": 0.5801774263381958, "learning_rate": 6.845876129188209e-06, "loss": 0.3593, "step": 4720 }, { "epoch": 1.3253790005614823, "grad_norm": 0.6821433305740356, "learning_rate": 6.844358057373021e-06, "loss": 0.3843, "step": 4721 }, { "epoch": 1.325659741718136, "grad_norm": 0.7506273984909058, "learning_rate": 6.8428397887394105e-06, "loss": 0.4549, "step": 4722 }, { "epoch": 1.3259404828747894, "grad_norm": 0.7236044406890869, "learning_rate": 6.841321323449398e-06, "loss": 0.3661, "step": 4723 }, { "epoch": 1.326221224031443, "grad_norm": 0.6838476061820984, "learning_rate": 6.8398026616650246e-06, "loss": 0.3912, "step": 4724 }, { "epoch": 1.3265019651880965, "grad_norm": 0.7017772793769836, "learning_rate": 6.838283803548351e-06, "loss": 0.4085, "step": 4725 }, { "epoch": 1.3267827063447502, "grad_norm": 0.7446981072425842, "learning_rate": 6.836764749261463e-06, "loss": 0.4056, "step": 4726 }, { "epoch": 1.3270634475014038, "grad_norm": 0.7386606335639954, "learning_rate": 6.835245498966461e-06, "loss": 0.4392, "step": 4727 }, { "epoch": 1.3273441886580573, "grad_norm": 0.6297206878662109, "learning_rate": 6.833726052825474e-06, "loss": 0.3706, "step": 4728 }, { "epoch": 1.327624929814711, "grad_norm": 0.6871871948242188, "learning_rate": 6.832206411000644e-06, "loss": 0.4382, "step": 4729 }, { "epoch": 1.3279056709713644, "grad_norm": 0.6700953841209412, "learning_rate": 6.83068657365414e-06, "loss": 0.3578, "step": 4730 }, { "epoch": 1.328186412128018, "grad_norm": 0.6798109412193298, "learning_rate": 6.829166540948151e-06, "loss": 0.3947, "step": 4731 }, { "epoch": 1.3284671532846715, "grad_norm": 0.7043603658676147, "learning_rate": 6.827646313044882e-06, "loss": 0.3808, "step": 4732 }, { "epoch": 1.3287478944413251, "grad_norm": 0.7176543474197388, "learning_rate": 6.826125890106563e-06, "loss": 0.3915, "step": 4733 }, { "epoch": 1.3290286355979788, "grad_norm": 0.5913230776786804, "learning_rate": 6.824605272295446e-06, "loss": 0.3551, "step": 4734 }, { "epoch": 1.3293093767546322, "grad_norm": 0.5929445028305054, "learning_rate": 6.8230844597738014e-06, "loss": 0.4258, "step": 4735 }, { "epoch": 1.3295901179112857, "grad_norm": 0.6223021745681763, "learning_rate": 6.821563452703919e-06, "loss": 0.4027, "step": 4736 }, { "epoch": 1.3298708590679393, "grad_norm": 0.7025001049041748, "learning_rate": 6.8200422512481146e-06, "loss": 0.3603, "step": 4737 }, { "epoch": 1.330151600224593, "grad_norm": 0.73182213306427, "learning_rate": 6.818520855568717e-06, "loss": 0.4313, "step": 4738 }, { "epoch": 1.3304323413812464, "grad_norm": 0.6106387972831726, "learning_rate": 6.816999265828083e-06, "loss": 0.3771, "step": 4739 }, { "epoch": 1.3307130825379, "grad_norm": 0.7461013793945312, "learning_rate": 6.8154774821885864e-06, "loss": 0.4075, "step": 4740 }, { "epoch": 1.3309938236945535, "grad_norm": 0.6924073696136475, "learning_rate": 6.813955504812621e-06, "loss": 0.3849, "step": 4741 }, { "epoch": 1.3312745648512072, "grad_norm": 0.6440805792808533, "learning_rate": 6.812433333862608e-06, "loss": 0.3861, "step": 4742 }, { "epoch": 1.3315553060078607, "grad_norm": 0.716837465763092, "learning_rate": 6.810910969500979e-06, "loss": 0.391, "step": 4743 }, { "epoch": 1.3318360471645143, "grad_norm": 0.6487555503845215, "learning_rate": 6.809388411890192e-06, "loss": 0.352, "step": 4744 }, { "epoch": 1.332116788321168, "grad_norm": 0.6747896671295166, "learning_rate": 6.807865661192725e-06, "loss": 0.3625, "step": 4745 }, { "epoch": 1.3323975294778214, "grad_norm": 0.7779172658920288, "learning_rate": 6.806342717571078e-06, "loss": 0.3995, "step": 4746 }, { "epoch": 1.332678270634475, "grad_norm": 0.671245813369751, "learning_rate": 6.80481958118777e-06, "loss": 0.4116, "step": 4747 }, { "epoch": 1.3329590117911285, "grad_norm": 0.5985339879989624, "learning_rate": 6.803296252205338e-06, "loss": 0.3933, "step": 4748 }, { "epoch": 1.3332397529477822, "grad_norm": 0.6273095607757568, "learning_rate": 6.8017727307863445e-06, "loss": 0.3972, "step": 4749 }, { "epoch": 1.3335204941044356, "grad_norm": 0.6496458649635315, "learning_rate": 6.80024901709337e-06, "loss": 0.4196, "step": 4750 }, { "epoch": 1.3338012352610893, "grad_norm": 0.5970061421394348, "learning_rate": 6.798725111289015e-06, "loss": 0.3995, "step": 4751 }, { "epoch": 1.334081976417743, "grad_norm": 0.638443112373352, "learning_rate": 6.797201013535901e-06, "loss": 0.4375, "step": 4752 }, { "epoch": 1.3343627175743964, "grad_norm": 0.6787781119346619, "learning_rate": 6.795676723996672e-06, "loss": 0.3667, "step": 4753 }, { "epoch": 1.3346434587310498, "grad_norm": 0.6766923666000366, "learning_rate": 6.794152242833989e-06, "loss": 0.3838, "step": 4754 }, { "epoch": 1.3349241998877035, "grad_norm": 0.6247906684875488, "learning_rate": 6.792627570210536e-06, "loss": 0.3787, "step": 4755 }, { "epoch": 1.3352049410443572, "grad_norm": 0.6960628032684326, "learning_rate": 6.7911027062890165e-06, "loss": 0.3338, "step": 4756 }, { "epoch": 1.3354856822010106, "grad_norm": 0.6159615516662598, "learning_rate": 6.789577651232153e-06, "loss": 0.3983, "step": 4757 }, { "epoch": 1.3357664233576643, "grad_norm": 0.572173535823822, "learning_rate": 6.788052405202693e-06, "loss": 0.4016, "step": 4758 }, { "epoch": 1.336047164514318, "grad_norm": 0.6744593381881714, "learning_rate": 6.7865269683634e-06, "loss": 0.3672, "step": 4759 }, { "epoch": 1.3363279056709714, "grad_norm": 0.6162299513816833, "learning_rate": 6.7850013408770565e-06, "loss": 0.3888, "step": 4760 }, { "epoch": 1.3366086468276248, "grad_norm": 0.6695587635040283, "learning_rate": 6.783475522906471e-06, "loss": 0.4109, "step": 4761 }, { "epoch": 1.3368893879842785, "grad_norm": 0.6276293396949768, "learning_rate": 6.781949514614469e-06, "loss": 0.3826, "step": 4762 }, { "epoch": 1.3371701291409321, "grad_norm": 0.7240124344825745, "learning_rate": 6.780423316163896e-06, "loss": 0.393, "step": 4763 }, { "epoch": 1.3374508702975856, "grad_norm": 0.6815361380577087, "learning_rate": 6.7788969277176165e-06, "loss": 0.3703, "step": 4764 }, { "epoch": 1.3377316114542392, "grad_norm": 0.6647552251815796, "learning_rate": 6.77737034943852e-06, "loss": 0.3896, "step": 4765 }, { "epoch": 1.3380123526108927, "grad_norm": 0.6063252687454224, "learning_rate": 6.775843581489513e-06, "loss": 0.3891, "step": 4766 }, { "epoch": 1.3382930937675463, "grad_norm": 0.6022602319717407, "learning_rate": 6.774316624033522e-06, "loss": 0.3921, "step": 4767 }, { "epoch": 1.3385738349241998, "grad_norm": 0.6594793796539307, "learning_rate": 6.772789477233494e-06, "loss": 0.4026, "step": 4768 }, { "epoch": 1.3388545760808535, "grad_norm": 0.7128239274024963, "learning_rate": 6.771262141252399e-06, "loss": 0.3933, "step": 4769 }, { "epoch": 1.3391353172375071, "grad_norm": 0.5674261450767517, "learning_rate": 6.769734616253223e-06, "loss": 0.3872, "step": 4770 }, { "epoch": 1.3394160583941606, "grad_norm": 0.7471799850463867, "learning_rate": 6.7682069023989725e-06, "loss": 0.379, "step": 4771 }, { "epoch": 1.3396967995508142, "grad_norm": 0.7530657649040222, "learning_rate": 6.766678999852678e-06, "loss": 0.36, "step": 4772 }, { "epoch": 1.3399775407074677, "grad_norm": 0.6597678065299988, "learning_rate": 6.765150908777387e-06, "loss": 0.3906, "step": 4773 }, { "epoch": 1.3402582818641213, "grad_norm": 0.6415244936943054, "learning_rate": 6.763622629336168e-06, "loss": 0.3793, "step": 4774 }, { "epoch": 1.3405390230207748, "grad_norm": 0.6748092174530029, "learning_rate": 6.7620941616921076e-06, "loss": 0.3995, "step": 4775 }, { "epoch": 1.3408197641774284, "grad_norm": 0.7586249709129333, "learning_rate": 6.760565506008319e-06, "loss": 0.3841, "step": 4776 }, { "epoch": 1.341100505334082, "grad_norm": 0.6067720055580139, "learning_rate": 6.759036662447924e-06, "loss": 0.4036, "step": 4777 }, { "epoch": 1.3413812464907355, "grad_norm": 0.6660099029541016, "learning_rate": 6.757507631174076e-06, "loss": 0.3918, "step": 4778 }, { "epoch": 1.341661987647389, "grad_norm": 0.6044845581054688, "learning_rate": 6.755978412349944e-06, "loss": 0.3747, "step": 4779 }, { "epoch": 1.3419427288040426, "grad_norm": 0.6778552532196045, "learning_rate": 6.754449006138717e-06, "loss": 0.3929, "step": 4780 }, { "epoch": 1.3422234699606963, "grad_norm": 0.7300291061401367, "learning_rate": 6.7529194127036005e-06, "loss": 0.3853, "step": 4781 }, { "epoch": 1.3425042111173497, "grad_norm": 0.7000691294670105, "learning_rate": 6.7513896322078246e-06, "loss": 0.3733, "step": 4782 }, { "epoch": 1.3427849522740034, "grad_norm": 0.7240469455718994, "learning_rate": 6.749859664814639e-06, "loss": 0.4096, "step": 4783 }, { "epoch": 1.343065693430657, "grad_norm": 0.6703082919120789, "learning_rate": 6.7483295106873104e-06, "loss": 0.3749, "step": 4784 }, { "epoch": 1.3433464345873105, "grad_norm": 0.6959947943687439, "learning_rate": 6.74679916998913e-06, "loss": 0.4297, "step": 4785 }, { "epoch": 1.343627175743964, "grad_norm": 0.6914154887199402, "learning_rate": 6.7452686428834045e-06, "loss": 0.3624, "step": 4786 }, { "epoch": 1.3439079169006176, "grad_norm": 0.8058205842971802, "learning_rate": 6.743737929533462e-06, "loss": 0.3956, "step": 4787 }, { "epoch": 1.3441886580572713, "grad_norm": 0.6181507706642151, "learning_rate": 6.742207030102652e-06, "loss": 0.4118, "step": 4788 }, { "epoch": 1.3444693992139247, "grad_norm": 0.7075345516204834, "learning_rate": 6.740675944754343e-06, "loss": 0.3525, "step": 4789 }, { "epoch": 1.3447501403705784, "grad_norm": 0.7400590181350708, "learning_rate": 6.739144673651918e-06, "loss": 0.3979, "step": 4790 }, { "epoch": 1.3450308815272318, "grad_norm": 0.9430344700813293, "learning_rate": 6.7376132169587915e-06, "loss": 0.4275, "step": 4791 }, { "epoch": 1.3453116226838855, "grad_norm": 0.6225433945655823, "learning_rate": 6.7360815748383865e-06, "loss": 0.4083, "step": 4792 }, { "epoch": 1.345592363840539, "grad_norm": 0.6359659433364868, "learning_rate": 6.7345497474541534e-06, "loss": 0.3592, "step": 4793 }, { "epoch": 1.3458731049971926, "grad_norm": 0.619032084941864, "learning_rate": 6.733017734969557e-06, "loss": 0.3815, "step": 4794 }, { "epoch": 1.3461538461538463, "grad_norm": 0.6978216767311096, "learning_rate": 6.731485537548084e-06, "loss": 0.3659, "step": 4795 }, { "epoch": 1.3464345873104997, "grad_norm": 0.7958037257194519, "learning_rate": 6.729953155353243e-06, "loss": 0.3671, "step": 4796 }, { "epoch": 1.3467153284671534, "grad_norm": 0.8144029378890991, "learning_rate": 6.728420588548558e-06, "loss": 0.3808, "step": 4797 }, { "epoch": 1.3469960696238068, "grad_norm": 0.7494853734970093, "learning_rate": 6.726887837297578e-06, "loss": 0.4052, "step": 4798 }, { "epoch": 1.3472768107804605, "grad_norm": 0.678192675113678, "learning_rate": 6.725354901763865e-06, "loss": 0.3847, "step": 4799 }, { "epoch": 1.347557551937114, "grad_norm": 0.7762433290481567, "learning_rate": 6.7238217821110066e-06, "loss": 0.3866, "step": 4800 }, { "epoch": 1.3478382930937676, "grad_norm": 0.7528349757194519, "learning_rate": 6.722288478502608e-06, "loss": 0.3616, "step": 4801 }, { "epoch": 1.3481190342504212, "grad_norm": 0.6515021920204163, "learning_rate": 6.720754991102292e-06, "loss": 0.379, "step": 4802 }, { "epoch": 1.3483997754070747, "grad_norm": 0.6461201906204224, "learning_rate": 6.719221320073705e-06, "loss": 0.4429, "step": 4803 }, { "epoch": 1.3486805165637281, "grad_norm": 0.8549379706382751, "learning_rate": 6.717687465580509e-06, "loss": 0.3687, "step": 4804 }, { "epoch": 1.3489612577203818, "grad_norm": 0.6953334212303162, "learning_rate": 6.716153427786388e-06, "loss": 0.3656, "step": 4805 }, { "epoch": 1.3492419988770354, "grad_norm": 0.7070066928863525, "learning_rate": 6.714619206855046e-06, "loss": 0.3889, "step": 4806 }, { "epoch": 1.3495227400336889, "grad_norm": 0.7246028184890747, "learning_rate": 6.713084802950205e-06, "loss": 0.3919, "step": 4807 }, { "epoch": 1.3498034811903425, "grad_norm": 0.7758237719535828, "learning_rate": 6.711550216235607e-06, "loss": 0.4151, "step": 4808 }, { "epoch": 1.350084222346996, "grad_norm": 0.7004920840263367, "learning_rate": 6.7100154468750135e-06, "loss": 0.3666, "step": 4809 }, { "epoch": 1.3503649635036497, "grad_norm": 0.7784600853919983, "learning_rate": 6.7084804950322045e-06, "loss": 0.4238, "step": 4810 }, { "epoch": 1.350645704660303, "grad_norm": 0.6851533055305481, "learning_rate": 6.706945360870982e-06, "loss": 0.4086, "step": 4811 }, { "epoch": 1.3509264458169568, "grad_norm": 0.7173120379447937, "learning_rate": 6.705410044555165e-06, "loss": 0.4195, "step": 4812 }, { "epoch": 1.3512071869736104, "grad_norm": 0.846400260925293, "learning_rate": 6.703874546248593e-06, "loss": 0.4133, "step": 4813 }, { "epoch": 1.3514879281302639, "grad_norm": 0.7642650008201599, "learning_rate": 6.7023388661151265e-06, "loss": 0.4062, "step": 4814 }, { "epoch": 1.3517686692869175, "grad_norm": 0.6799836754798889, "learning_rate": 6.700803004318641e-06, "loss": 0.4051, "step": 4815 }, { "epoch": 1.352049410443571, "grad_norm": 0.7603617906570435, "learning_rate": 6.6992669610230345e-06, "loss": 0.4453, "step": 4816 }, { "epoch": 1.3523301516002246, "grad_norm": 0.6981773376464844, "learning_rate": 6.697730736392226e-06, "loss": 0.3966, "step": 4817 }, { "epoch": 1.352610892756878, "grad_norm": 0.6745737195014954, "learning_rate": 6.6961943305901515e-06, "loss": 0.3902, "step": 4818 }, { "epoch": 1.3528916339135317, "grad_norm": 0.7249606251716614, "learning_rate": 6.694657743780767e-06, "loss": 0.3549, "step": 4819 }, { "epoch": 1.3531723750701854, "grad_norm": 0.6501696109771729, "learning_rate": 6.6931209761280445e-06, "loss": 0.3608, "step": 4820 }, { "epoch": 1.3534531162268388, "grad_norm": 0.663993775844574, "learning_rate": 6.691584027795981e-06, "loss": 0.3795, "step": 4821 }, { "epoch": 1.3537338573834925, "grad_norm": 0.6785128712654114, "learning_rate": 6.690046898948589e-06, "loss": 0.3785, "step": 4822 }, { "epoch": 1.354014598540146, "grad_norm": 0.700816810131073, "learning_rate": 6.688509589749901e-06, "loss": 0.404, "step": 4823 }, { "epoch": 1.3542953396967996, "grad_norm": 0.6838839650154114, "learning_rate": 6.686972100363971e-06, "loss": 0.3666, "step": 4824 }, { "epoch": 1.354576080853453, "grad_norm": 0.6519018411636353, "learning_rate": 6.685434430954869e-06, "loss": 0.4002, "step": 4825 }, { "epoch": 1.3548568220101067, "grad_norm": 0.6772266626358032, "learning_rate": 6.683896581686685e-06, "loss": 0.3679, "step": 4826 }, { "epoch": 1.3551375631667604, "grad_norm": 0.7054872512817383, "learning_rate": 6.682358552723529e-06, "loss": 0.3874, "step": 4827 }, { "epoch": 1.3554183043234138, "grad_norm": 0.6136159300804138, "learning_rate": 6.68082034422953e-06, "loss": 0.3956, "step": 4828 }, { "epoch": 1.3556990454800673, "grad_norm": 0.6421452164649963, "learning_rate": 6.679281956368836e-06, "loss": 0.355, "step": 4829 }, { "epoch": 1.355979786636721, "grad_norm": 0.6787823438644409, "learning_rate": 6.6777433893056165e-06, "loss": 0.3823, "step": 4830 }, { "epoch": 1.3562605277933746, "grad_norm": 0.8672777414321899, "learning_rate": 6.676204643204054e-06, "loss": 0.3905, "step": 4831 }, { "epoch": 1.356541268950028, "grad_norm": 0.7196842432022095, "learning_rate": 6.674665718228356e-06, "loss": 0.3964, "step": 4832 }, { "epoch": 1.3568220101066817, "grad_norm": 0.7156896591186523, "learning_rate": 6.673126614542746e-06, "loss": 0.3911, "step": 4833 }, { "epoch": 1.3571027512633351, "grad_norm": 0.672362744808197, "learning_rate": 6.671587332311468e-06, "loss": 0.4078, "step": 4834 }, { "epoch": 1.3573834924199888, "grad_norm": 0.718880295753479, "learning_rate": 6.670047871698786e-06, "loss": 0.3649, "step": 4835 }, { "epoch": 1.3576642335766422, "grad_norm": 0.643750786781311, "learning_rate": 6.668508232868981e-06, "loss": 0.3827, "step": 4836 }, { "epoch": 1.357944974733296, "grad_norm": 0.6865444183349609, "learning_rate": 6.666968415986352e-06, "loss": 0.433, "step": 4837 }, { "epoch": 1.3582257158899496, "grad_norm": 0.6739438772201538, "learning_rate": 6.6654284212152195e-06, "loss": 0.3741, "step": 4838 }, { "epoch": 1.358506457046603, "grad_norm": 0.597885012626648, "learning_rate": 6.663888248719923e-06, "loss": 0.3479, "step": 4839 }, { "epoch": 1.3587871982032567, "grad_norm": 0.6266430616378784, "learning_rate": 6.6623478986648205e-06, "loss": 0.3961, "step": 4840 }, { "epoch": 1.35906793935991, "grad_norm": 0.5871906876564026, "learning_rate": 6.660807371214286e-06, "loss": 0.3815, "step": 4841 }, { "epoch": 1.3593486805165638, "grad_norm": 0.6124931573867798, "learning_rate": 6.6592666665327176e-06, "loss": 0.3567, "step": 4842 }, { "epoch": 1.3596294216732172, "grad_norm": 0.6495899558067322, "learning_rate": 6.657725784784529e-06, "loss": 0.3442, "step": 4843 }, { "epoch": 1.3599101628298709, "grad_norm": 0.5793033838272095, "learning_rate": 6.656184726134153e-06, "loss": 0.3927, "step": 4844 }, { "epoch": 1.3601909039865245, "grad_norm": 0.637464165687561, "learning_rate": 6.654643490746042e-06, "loss": 0.3471, "step": 4845 }, { "epoch": 1.360471645143178, "grad_norm": 0.6115562915802002, "learning_rate": 6.653102078784667e-06, "loss": 0.3981, "step": 4846 }, { "epoch": 1.3607523862998314, "grad_norm": 0.6376574039459229, "learning_rate": 6.651560490414519e-06, "loss": 0.3637, "step": 4847 }, { "epoch": 1.361033127456485, "grad_norm": 0.6842833757400513, "learning_rate": 6.6500187258001055e-06, "loss": 0.3926, "step": 4848 }, { "epoch": 1.3613138686131387, "grad_norm": 0.7011694312095642, "learning_rate": 6.648476785105955e-06, "loss": 0.3943, "step": 4849 }, { "epoch": 1.3615946097697922, "grad_norm": 0.6348097324371338, "learning_rate": 6.646934668496612e-06, "loss": 0.4112, "step": 4850 }, { "epoch": 1.3618753509264458, "grad_norm": 0.6216190457344055, "learning_rate": 6.645392376136644e-06, "loss": 0.3563, "step": 4851 }, { "epoch": 1.3621560920830995, "grad_norm": 0.6453918814659119, "learning_rate": 6.643849908190632e-06, "loss": 0.3932, "step": 4852 }, { "epoch": 1.362436833239753, "grad_norm": 0.6672836542129517, "learning_rate": 6.642307264823182e-06, "loss": 0.4234, "step": 4853 }, { "epoch": 1.3627175743964064, "grad_norm": 0.6598169207572937, "learning_rate": 6.640764446198912e-06, "loss": 0.3894, "step": 4854 }, { "epoch": 1.36299831555306, "grad_norm": 0.7138548493385315, "learning_rate": 6.639221452482464e-06, "loss": 0.3739, "step": 4855 }, { "epoch": 1.3632790567097137, "grad_norm": 0.6872824430465698, "learning_rate": 6.637678283838497e-06, "loss": 0.4325, "step": 4856 }, { "epoch": 1.3635597978663672, "grad_norm": 0.6734063625335693, "learning_rate": 6.636134940431688e-06, "loss": 0.3829, "step": 4857 }, { "epoch": 1.3638405390230208, "grad_norm": 0.7197776436805725, "learning_rate": 6.634591422426731e-06, "loss": 0.397, "step": 4858 }, { "epoch": 1.3641212801796743, "grad_norm": 0.6476694941520691, "learning_rate": 6.633047729988343e-06, "loss": 0.4005, "step": 4859 }, { "epoch": 1.364402021336328, "grad_norm": 0.6788256764411926, "learning_rate": 6.6315038632812565e-06, "loss": 0.4071, "step": 4860 }, { "epoch": 1.3646827624929814, "grad_norm": 0.6949490308761597, "learning_rate": 6.629959822470223e-06, "loss": 0.3624, "step": 4861 }, { "epoch": 1.364963503649635, "grad_norm": 0.5946491956710815, "learning_rate": 6.628415607720013e-06, "loss": 0.3583, "step": 4862 }, { "epoch": 1.3652442448062887, "grad_norm": 0.7144595980644226, "learning_rate": 6.626871219195418e-06, "loss": 0.3718, "step": 4863 }, { "epoch": 1.3655249859629421, "grad_norm": 0.6326375007629395, "learning_rate": 6.625326657061242e-06, "loss": 0.4067, "step": 4864 }, { "epoch": 1.3658057271195958, "grad_norm": 0.7528015375137329, "learning_rate": 6.6237819214823115e-06, "loss": 0.3922, "step": 4865 }, { "epoch": 1.3660864682762492, "grad_norm": 0.7716882228851318, "learning_rate": 6.622237012623473e-06, "loss": 0.3999, "step": 4866 }, { "epoch": 1.366367209432903, "grad_norm": 0.6801896095275879, "learning_rate": 6.620691930649586e-06, "loss": 0.3482, "step": 4867 }, { "epoch": 1.3666479505895563, "grad_norm": 0.6368662118911743, "learning_rate": 6.619146675725539e-06, "loss": 0.3849, "step": 4868 }, { "epoch": 1.36692869174621, "grad_norm": 0.6772675514221191, "learning_rate": 6.617601248016224e-06, "loss": 0.3671, "step": 4869 }, { "epoch": 1.3672094329028637, "grad_norm": 0.6529778242111206, "learning_rate": 6.616055647686565e-06, "loss": 0.3821, "step": 4870 }, { "epoch": 1.3674901740595171, "grad_norm": 0.6284794807434082, "learning_rate": 6.6145098749014954e-06, "loss": 0.3529, "step": 4871 }, { "epoch": 1.3677709152161706, "grad_norm": 0.6432361006736755, "learning_rate": 6.612963929825973e-06, "loss": 0.3974, "step": 4872 }, { "epoch": 1.3680516563728242, "grad_norm": 0.6178905367851257, "learning_rate": 6.6114178126249694e-06, "loss": 0.3484, "step": 4873 }, { "epoch": 1.3683323975294779, "grad_norm": 0.6260415315628052, "learning_rate": 6.6098715234634805e-06, "loss": 0.3705, "step": 4874 }, { "epoch": 1.3686131386861313, "grad_norm": 0.7609507441520691, "learning_rate": 6.608325062506511e-06, "loss": 0.4133, "step": 4875 }, { "epoch": 1.368893879842785, "grad_norm": 0.6837617754936218, "learning_rate": 6.606778429919093e-06, "loss": 0.3862, "step": 4876 }, { "epoch": 1.3691746209994387, "grad_norm": 0.6646842360496521, "learning_rate": 6.605231625866272e-06, "loss": 0.3645, "step": 4877 }, { "epoch": 1.369455362156092, "grad_norm": 0.6701329946517944, "learning_rate": 6.603684650513115e-06, "loss": 0.4086, "step": 4878 }, { "epoch": 1.3697361033127455, "grad_norm": 0.6591203212738037, "learning_rate": 6.602137504024705e-06, "loss": 0.3404, "step": 4879 }, { "epoch": 1.3700168444693992, "grad_norm": 0.6211579442024231, "learning_rate": 6.600590186566143e-06, "loss": 0.3951, "step": 4880 }, { "epoch": 1.3702975856260529, "grad_norm": 0.6577760577201843, "learning_rate": 6.599042698302549e-06, "loss": 0.3723, "step": 4881 }, { "epoch": 1.3705783267827063, "grad_norm": 0.6943831443786621, "learning_rate": 6.597495039399064e-06, "loss": 0.3806, "step": 4882 }, { "epoch": 1.37085906793936, "grad_norm": 0.6222783923149109, "learning_rate": 6.595947210020841e-06, "loss": 0.3814, "step": 4883 }, { "epoch": 1.3711398090960134, "grad_norm": 0.6409193277359009, "learning_rate": 6.594399210333057e-06, "loss": 0.3773, "step": 4884 }, { "epoch": 1.371420550252667, "grad_norm": 0.6976940631866455, "learning_rate": 6.592851040500905e-06, "loss": 0.4153, "step": 4885 }, { "epoch": 1.3717012914093205, "grad_norm": 0.5962629318237305, "learning_rate": 6.591302700689593e-06, "loss": 0.3999, "step": 4886 }, { "epoch": 1.3719820325659742, "grad_norm": 0.6599648594856262, "learning_rate": 6.5897541910643545e-06, "loss": 0.3937, "step": 4887 }, { "epoch": 1.3722627737226278, "grad_norm": 0.6184437870979309, "learning_rate": 6.5882055117904334e-06, "loss": 0.3736, "step": 4888 }, { "epoch": 1.3725435148792813, "grad_norm": 0.5633988380432129, "learning_rate": 6.586656663033098e-06, "loss": 0.3721, "step": 4889 }, { "epoch": 1.372824256035935, "grad_norm": 0.6845596432685852, "learning_rate": 6.5851076449576276e-06, "loss": 0.3708, "step": 4890 }, { "epoch": 1.3731049971925884, "grad_norm": 0.6809899210929871, "learning_rate": 6.58355845772933e-06, "loss": 0.3743, "step": 4891 }, { "epoch": 1.373385738349242, "grad_norm": 0.7047613263130188, "learning_rate": 6.582009101513518e-06, "loss": 0.3859, "step": 4892 }, { "epoch": 1.3736664795058955, "grad_norm": 0.6444752216339111, "learning_rate": 6.580459576475534e-06, "loss": 0.3774, "step": 4893 }, { "epoch": 1.3739472206625492, "grad_norm": 0.6714679002761841, "learning_rate": 6.578909882780732e-06, "loss": 0.3867, "step": 4894 }, { "epoch": 1.3742279618192028, "grad_norm": 0.7270873785018921, "learning_rate": 6.577360020594487e-06, "loss": 0.3859, "step": 4895 }, { "epoch": 1.3745087029758563, "grad_norm": 0.6965042948722839, "learning_rate": 6.575809990082189e-06, "loss": 0.3975, "step": 4896 }, { "epoch": 1.3747894441325097, "grad_norm": 0.6982359290122986, "learning_rate": 6.574259791409248e-06, "loss": 0.4052, "step": 4897 }, { "epoch": 1.3750701852891634, "grad_norm": 0.667641282081604, "learning_rate": 6.57270942474109e-06, "loss": 0.3276, "step": 4898 }, { "epoch": 1.375350926445817, "grad_norm": 0.7631139159202576, "learning_rate": 6.571158890243166e-06, "loss": 0.4356, "step": 4899 }, { "epoch": 1.3756316676024705, "grad_norm": 0.6603594422340393, "learning_rate": 6.5696081880809325e-06, "loss": 0.3702, "step": 4900 }, { "epoch": 1.3759124087591241, "grad_norm": 0.5941827893257141, "learning_rate": 6.5680573184198745e-06, "loss": 0.3826, "step": 4901 }, { "epoch": 1.3761931499157778, "grad_norm": 0.6959962248802185, "learning_rate": 6.566506281425492e-06, "loss": 0.3845, "step": 4902 }, { "epoch": 1.3764738910724312, "grad_norm": 0.7347832322120667, "learning_rate": 6.5649550772633e-06, "loss": 0.3814, "step": 4903 }, { "epoch": 1.3767546322290847, "grad_norm": 0.6277152895927429, "learning_rate": 6.563403706098833e-06, "loss": 0.3303, "step": 4904 }, { "epoch": 1.3770353733857383, "grad_norm": 0.677270770072937, "learning_rate": 6.561852168097644e-06, "loss": 0.3823, "step": 4905 }, { "epoch": 1.377316114542392, "grad_norm": 0.7346622943878174, "learning_rate": 6.560300463425306e-06, "loss": 0.39, "step": 4906 }, { "epoch": 1.3775968556990454, "grad_norm": 0.6248444318771362, "learning_rate": 6.558748592247404e-06, "loss": 0.3838, "step": 4907 }, { "epoch": 1.377877596855699, "grad_norm": 0.6768785119056702, "learning_rate": 6.557196554729547e-06, "loss": 0.3925, "step": 4908 }, { "epoch": 1.3781583380123525, "grad_norm": 0.703629195690155, "learning_rate": 6.555644351037356e-06, "loss": 0.3763, "step": 4909 }, { "epoch": 1.3784390791690062, "grad_norm": 0.7016772031784058, "learning_rate": 6.554091981336475e-06, "loss": 0.4042, "step": 4910 }, { "epoch": 1.3787198203256597, "grad_norm": 0.6389033794403076, "learning_rate": 6.5525394457925605e-06, "loss": 0.3759, "step": 4911 }, { "epoch": 1.3790005614823133, "grad_norm": 0.6705660223960876, "learning_rate": 6.550986744571291e-06, "loss": 0.414, "step": 4912 }, { "epoch": 1.379281302638967, "grad_norm": 0.6910821795463562, "learning_rate": 6.549433877838362e-06, "loss": 0.3772, "step": 4913 }, { "epoch": 1.3795620437956204, "grad_norm": 0.6090564727783203, "learning_rate": 6.547880845759486e-06, "loss": 0.3552, "step": 4914 }, { "epoch": 1.379842784952274, "grad_norm": 0.5981763601303101, "learning_rate": 6.5463276485003905e-06, "loss": 0.3699, "step": 4915 }, { "epoch": 1.3801235261089275, "grad_norm": 0.6429130434989929, "learning_rate": 6.544774286226824e-06, "loss": 0.3839, "step": 4916 }, { "epoch": 1.3804042672655812, "grad_norm": 0.7170768976211548, "learning_rate": 6.543220759104552e-06, "loss": 0.3879, "step": 4917 }, { "epoch": 1.3806850084222346, "grad_norm": 0.6578571796417236, "learning_rate": 6.541667067299358e-06, "loss": 0.3756, "step": 4918 }, { "epoch": 1.3809657495788883, "grad_norm": 0.6086967587471008, "learning_rate": 6.5401132109770395e-06, "loss": 0.4147, "step": 4919 }, { "epoch": 1.381246490735542, "grad_norm": 0.6940132975578308, "learning_rate": 6.538559190303418e-06, "loss": 0.4086, "step": 4920 }, { "epoch": 1.3815272318921954, "grad_norm": 0.5924859642982483, "learning_rate": 6.537005005444328e-06, "loss": 0.3927, "step": 4921 }, { "epoch": 1.3818079730488488, "grad_norm": 0.6510424613952637, "learning_rate": 6.535450656565621e-06, "loss": 0.4249, "step": 4922 }, { "epoch": 1.3820887142055025, "grad_norm": 0.647047221660614, "learning_rate": 6.533896143833169e-06, "loss": 0.3646, "step": 4923 }, { "epoch": 1.3823694553621562, "grad_norm": 0.6695600152015686, "learning_rate": 6.532341467412858e-06, "loss": 0.3888, "step": 4924 }, { "epoch": 1.3826501965188096, "grad_norm": 0.6573482751846313, "learning_rate": 6.5307866274705955e-06, "loss": 0.4147, "step": 4925 }, { "epoch": 1.3829309376754633, "grad_norm": 0.6649966239929199, "learning_rate": 6.529231624172303e-06, "loss": 0.3755, "step": 4926 }, { "epoch": 1.3832116788321167, "grad_norm": 0.6034950017929077, "learning_rate": 6.527676457683921e-06, "loss": 0.3802, "step": 4927 }, { "epoch": 1.3834924199887704, "grad_norm": 0.6298536658287048, "learning_rate": 6.526121128171408e-06, "loss": 0.3943, "step": 4928 }, { "epoch": 1.3837731611454238, "grad_norm": 0.6359330415725708, "learning_rate": 6.524565635800739e-06, "loss": 0.3633, "step": 4929 }, { "epoch": 1.3840539023020775, "grad_norm": 0.6504920721054077, "learning_rate": 6.523009980737904e-06, "loss": 0.3355, "step": 4930 }, { "epoch": 1.3843346434587311, "grad_norm": 0.6498956680297852, "learning_rate": 6.521454163148917e-06, "loss": 0.3937, "step": 4931 }, { "epoch": 1.3846153846153846, "grad_norm": 0.7074066996574402, "learning_rate": 6.519898183199802e-06, "loss": 0.4384, "step": 4932 }, { "epoch": 1.3848961257720382, "grad_norm": 0.6834805011749268, "learning_rate": 6.518342041056604e-06, "loss": 0.3631, "step": 4933 }, { "epoch": 1.3851768669286917, "grad_norm": 0.6755250096321106, "learning_rate": 6.516785736885387e-06, "loss": 0.4312, "step": 4934 }, { "epoch": 1.3854576080853453, "grad_norm": 0.7766903042793274, "learning_rate": 6.515229270852228e-06, "loss": 0.4174, "step": 4935 }, { "epoch": 1.3857383492419988, "grad_norm": 0.7174411416053772, "learning_rate": 6.513672643123223e-06, "loss": 0.4022, "step": 4936 }, { "epoch": 1.3860190903986525, "grad_norm": 0.8040292263031006, "learning_rate": 6.512115853864487e-06, "loss": 0.4064, "step": 4937 }, { "epoch": 1.3862998315553061, "grad_norm": 0.6910301446914673, "learning_rate": 6.51055890324215e-06, "loss": 0.4323, "step": 4938 }, { "epoch": 1.3865805727119596, "grad_norm": 0.6850510835647583, "learning_rate": 6.50900179142236e-06, "loss": 0.3709, "step": 4939 }, { "epoch": 1.3868613138686132, "grad_norm": 0.7027866840362549, "learning_rate": 6.507444518571284e-06, "loss": 0.3695, "step": 4940 }, { "epoch": 1.3871420550252667, "grad_norm": 0.6455894708633423, "learning_rate": 6.505887084855103e-06, "loss": 0.3557, "step": 4941 }, { "epoch": 1.3874227961819203, "grad_norm": 0.6405916213989258, "learning_rate": 6.504329490440016e-06, "loss": 0.4098, "step": 4942 }, { "epoch": 1.3877035373385738, "grad_norm": 0.6554495096206665, "learning_rate": 6.502771735492238e-06, "loss": 0.3433, "step": 4943 }, { "epoch": 1.3879842784952274, "grad_norm": 0.6610128283500671, "learning_rate": 6.5012138201780085e-06, "loss": 0.4254, "step": 4944 }, { "epoch": 1.388265019651881, "grad_norm": 0.6976993083953857, "learning_rate": 6.499655744663577e-06, "loss": 0.371, "step": 4945 }, { "epoch": 1.3885457608085345, "grad_norm": 0.7344253063201904, "learning_rate": 6.498097509115207e-06, "loss": 0.3852, "step": 4946 }, { "epoch": 1.388826501965188, "grad_norm": 0.6160487532615662, "learning_rate": 6.496539113699189e-06, "loss": 0.3353, "step": 4947 }, { "epoch": 1.3891072431218416, "grad_norm": 0.6966599225997925, "learning_rate": 6.4949805585818215e-06, "loss": 0.4081, "step": 4948 }, { "epoch": 1.3893879842784953, "grad_norm": 0.7261844873428345, "learning_rate": 6.4934218439294265e-06, "loss": 0.3848, "step": 4949 }, { "epoch": 1.3896687254351487, "grad_norm": 0.6698335409164429, "learning_rate": 6.491862969908339e-06, "loss": 0.3474, "step": 4950 }, { "epoch": 1.3899494665918024, "grad_norm": 0.6629172563552856, "learning_rate": 6.4903039366849145e-06, "loss": 0.3298, "step": 4951 }, { "epoch": 1.3902302077484558, "grad_norm": 0.568679928779602, "learning_rate": 6.488744744425519e-06, "loss": 0.3688, "step": 4952 }, { "epoch": 1.3905109489051095, "grad_norm": 0.7038002014160156, "learning_rate": 6.487185393296542e-06, "loss": 0.4113, "step": 4953 }, { "epoch": 1.390791690061763, "grad_norm": 0.6793254017829895, "learning_rate": 6.48562588346439e-06, "loss": 0.3948, "step": 4954 }, { "epoch": 1.3910724312184166, "grad_norm": 0.6914476752281189, "learning_rate": 6.484066215095481e-06, "loss": 0.3825, "step": 4955 }, { "epoch": 1.3913531723750703, "grad_norm": 0.6814020872116089, "learning_rate": 6.4825063883562545e-06, "loss": 0.4026, "step": 4956 }, { "epoch": 1.3916339135317237, "grad_norm": 0.7289256453514099, "learning_rate": 6.480946403413166e-06, "loss": 0.3818, "step": 4957 }, { "epoch": 1.3919146546883774, "grad_norm": 0.7625842690467834, "learning_rate": 6.4793862604326855e-06, "loss": 0.3881, "step": 4958 }, { "epoch": 1.3921953958450308, "grad_norm": 0.6781641244888306, "learning_rate": 6.477825959581303e-06, "loss": 0.4003, "step": 4959 }, { "epoch": 1.3924761370016845, "grad_norm": 0.7230656147003174, "learning_rate": 6.476265501025525e-06, "loss": 0.378, "step": 4960 }, { "epoch": 1.392756878158338, "grad_norm": 0.6661189198493958, "learning_rate": 6.474704884931873e-06, "loss": 0.3761, "step": 4961 }, { "epoch": 1.3930376193149916, "grad_norm": 0.6882614493370056, "learning_rate": 6.473144111466887e-06, "loss": 0.3803, "step": 4962 }, { "epoch": 1.3933183604716453, "grad_norm": 0.6907784938812256, "learning_rate": 6.471583180797121e-06, "loss": 0.3543, "step": 4963 }, { "epoch": 1.3935991016282987, "grad_norm": 0.7187560796737671, "learning_rate": 6.470022093089149e-06, "loss": 0.388, "step": 4964 }, { "epoch": 1.3938798427849521, "grad_norm": 0.6923490166664124, "learning_rate": 6.468460848509561e-06, "loss": 0.3994, "step": 4965 }, { "epoch": 1.3941605839416058, "grad_norm": 0.7261494398117065, "learning_rate": 6.466899447224963e-06, "loss": 0.4299, "step": 4966 }, { "epoch": 1.3944413250982595, "grad_norm": 0.6663200855255127, "learning_rate": 6.465337889401978e-06, "loss": 0.3598, "step": 4967 }, { "epoch": 1.394722066254913, "grad_norm": 0.8251053690910339, "learning_rate": 6.463776175207246e-06, "loss": 0.382, "step": 4968 }, { "epoch": 1.3950028074115666, "grad_norm": 0.730740487575531, "learning_rate": 6.462214304807422e-06, "loss": 0.3935, "step": 4969 }, { "epoch": 1.3952835485682202, "grad_norm": 0.7686986327171326, "learning_rate": 6.4606522783691816e-06, "loss": 0.4, "step": 4970 }, { "epoch": 1.3955642897248737, "grad_norm": 0.6586964130401611, "learning_rate": 6.459090096059213e-06, "loss": 0.4015, "step": 4971 }, { "epoch": 1.3958450308815271, "grad_norm": 0.7674559354782104, "learning_rate": 6.457527758044223e-06, "loss": 0.4047, "step": 4972 }, { "epoch": 1.3961257720381808, "grad_norm": 0.6154000759124756, "learning_rate": 6.455965264490935e-06, "loss": 0.3486, "step": 4973 }, { "epoch": 1.3964065131948344, "grad_norm": 0.7266693711280823, "learning_rate": 6.454402615566088e-06, "loss": 0.3267, "step": 4974 }, { "epoch": 1.3966872543514879, "grad_norm": 0.7308728098869324, "learning_rate": 6.452839811436436e-06, "loss": 0.4139, "step": 4975 }, { "epoch": 1.3969679955081415, "grad_norm": 0.6754895448684692, "learning_rate": 6.451276852268757e-06, "loss": 0.3611, "step": 4976 }, { "epoch": 1.397248736664795, "grad_norm": 0.5917999744415283, "learning_rate": 6.449713738229835e-06, "loss": 0.355, "step": 4977 }, { "epoch": 1.3975294778214487, "grad_norm": 0.7222596406936646, "learning_rate": 6.448150469486478e-06, "loss": 0.4194, "step": 4978 }, { "epoch": 1.397810218978102, "grad_norm": 0.6465803384780884, "learning_rate": 6.446587046205511e-06, "loss": 0.383, "step": 4979 }, { "epoch": 1.3980909601347558, "grad_norm": 0.7141954302787781, "learning_rate": 6.445023468553768e-06, "loss": 0.3849, "step": 4980 }, { "epoch": 1.3983717012914094, "grad_norm": 0.7008252739906311, "learning_rate": 6.443459736698106e-06, "loss": 0.4069, "step": 4981 }, { "epoch": 1.3986524424480629, "grad_norm": 0.6069467067718506, "learning_rate": 6.441895850805397e-06, "loss": 0.4174, "step": 4982 }, { "epoch": 1.3989331836047165, "grad_norm": 0.6798213720321655, "learning_rate": 6.440331811042531e-06, "loss": 0.3845, "step": 4983 }, { "epoch": 1.39921392476137, "grad_norm": 0.6617259383201599, "learning_rate": 6.43876761757641e-06, "loss": 0.3566, "step": 4984 }, { "epoch": 1.3994946659180236, "grad_norm": 0.6779190301895142, "learning_rate": 6.4372032705739565e-06, "loss": 0.3751, "step": 4985 }, { "epoch": 1.399775407074677, "grad_norm": 0.6222606897354126, "learning_rate": 6.435638770202106e-06, "loss": 0.3957, "step": 4986 }, { "epoch": 1.4000561482313307, "grad_norm": 0.710183322429657, "learning_rate": 6.434074116627815e-06, "loss": 0.4077, "step": 4987 }, { "epoch": 1.4003368893879844, "grad_norm": 0.6390467286109924, "learning_rate": 6.432509310018051e-06, "loss": 0.368, "step": 4988 }, { "epoch": 1.4006176305446378, "grad_norm": 0.6966363191604614, "learning_rate": 6.430944350539804e-06, "loss": 0.3659, "step": 4989 }, { "epoch": 1.4008983717012913, "grad_norm": 0.6830536723136902, "learning_rate": 6.429379238360073e-06, "loss": 0.4024, "step": 4990 }, { "epoch": 1.401179112857945, "grad_norm": 0.6816349029541016, "learning_rate": 6.427813973645878e-06, "loss": 0.3578, "step": 4991 }, { "epoch": 1.4014598540145986, "grad_norm": 0.6343541145324707, "learning_rate": 6.426248556564254e-06, "loss": 0.3661, "step": 4992 }, { "epoch": 1.401740595171252, "grad_norm": 0.7885885834693909, "learning_rate": 6.424682987282255e-06, "loss": 0.4378, "step": 4993 }, { "epoch": 1.4020213363279057, "grad_norm": 0.6680176258087158, "learning_rate": 6.423117265966946e-06, "loss": 0.3843, "step": 4994 }, { "epoch": 1.4023020774845594, "grad_norm": 0.6686276793479919, "learning_rate": 6.4215513927854125e-06, "loss": 0.3693, "step": 4995 }, { "epoch": 1.4025828186412128, "grad_norm": 0.6055652499198914, "learning_rate": 6.419985367904754e-06, "loss": 0.3945, "step": 4996 }, { "epoch": 1.4028635597978663, "grad_norm": 0.7359544038772583, "learning_rate": 6.418419191492088e-06, "loss": 0.4042, "step": 4997 }, { "epoch": 1.40314430095452, "grad_norm": 0.7134687900543213, "learning_rate": 6.416852863714545e-06, "loss": 0.4205, "step": 4998 }, { "epoch": 1.4034250421111736, "grad_norm": 0.6834651231765747, "learning_rate": 6.415286384739277e-06, "loss": 0.3803, "step": 4999 }, { "epoch": 1.403705783267827, "grad_norm": 0.6677467823028564, "learning_rate": 6.413719754733447e-06, "loss": 0.4085, "step": 5000 }, { "epoch": 1.4039865244244807, "grad_norm": 0.5563065409660339, "learning_rate": 6.412152973864236e-06, "loss": 0.3835, "step": 5001 }, { "epoch": 1.4042672655811341, "grad_norm": 0.6281640529632568, "learning_rate": 6.410586042298841e-06, "loss": 0.3813, "step": 5002 }, { "epoch": 1.4045480067377878, "grad_norm": 0.6774550676345825, "learning_rate": 6.409018960204475e-06, "loss": 0.3584, "step": 5003 }, { "epoch": 1.4048287478944412, "grad_norm": 0.6842412948608398, "learning_rate": 6.407451727748367e-06, "loss": 0.3957, "step": 5004 }, { "epoch": 1.405109489051095, "grad_norm": 0.7258167266845703, "learning_rate": 6.405884345097764e-06, "loss": 0.3367, "step": 5005 }, { "epoch": 1.4053902302077486, "grad_norm": 0.710361897945404, "learning_rate": 6.404316812419927e-06, "loss": 0.4234, "step": 5006 }, { "epoch": 1.405670971364402, "grad_norm": 0.6374581456184387, "learning_rate": 6.402749129882131e-06, "loss": 0.3832, "step": 5007 }, { "epoch": 1.4059517125210557, "grad_norm": 0.6684624552726746, "learning_rate": 6.401181297651672e-06, "loss": 0.3808, "step": 5008 }, { "epoch": 1.406232453677709, "grad_norm": 0.6398100256919861, "learning_rate": 6.399613315895858e-06, "loss": 0.3983, "step": 5009 }, { "epoch": 1.4065131948343628, "grad_norm": 0.8064338564872742, "learning_rate": 6.398045184782015e-06, "loss": 0.3898, "step": 5010 }, { "epoch": 1.4067939359910162, "grad_norm": 0.6622109413146973, "learning_rate": 6.396476904477484e-06, "loss": 0.3617, "step": 5011 }, { "epoch": 1.4070746771476699, "grad_norm": 0.6772120594978333, "learning_rate": 6.3949084751496215e-06, "loss": 0.3579, "step": 5012 }, { "epoch": 1.4073554183043235, "grad_norm": 0.781470537185669, "learning_rate": 6.3933398969658e-06, "loss": 0.3879, "step": 5013 }, { "epoch": 1.407636159460977, "grad_norm": 0.6676806807518005, "learning_rate": 6.3917711700934106e-06, "loss": 0.4139, "step": 5014 }, { "epoch": 1.4079169006176304, "grad_norm": 0.5935172438621521, "learning_rate": 6.390202294699855e-06, "loss": 0.4123, "step": 5015 }, { "epoch": 1.408197641774284, "grad_norm": 0.6429613828659058, "learning_rate": 6.388633270952558e-06, "loss": 0.3915, "step": 5016 }, { "epoch": 1.4084783829309377, "grad_norm": 0.6627713441848755, "learning_rate": 6.387064099018953e-06, "loss": 0.374, "step": 5017 }, { "epoch": 1.4087591240875912, "grad_norm": 0.671201765537262, "learning_rate": 6.385494779066492e-06, "loss": 0.4084, "step": 5018 }, { "epoch": 1.4090398652442448, "grad_norm": 0.7396451830863953, "learning_rate": 6.383925311262643e-06, "loss": 0.4551, "step": 5019 }, { "epoch": 1.4093206064008983, "grad_norm": 0.7062227129936218, "learning_rate": 6.382355695774892e-06, "loss": 0.3951, "step": 5020 }, { "epoch": 1.409601347557552, "grad_norm": 0.6947963237762451, "learning_rate": 6.3807859327707375e-06, "loss": 0.3887, "step": 5021 }, { "epoch": 1.4098820887142054, "grad_norm": 0.6252344846725464, "learning_rate": 6.379216022417695e-06, "loss": 0.3988, "step": 5022 }, { "epoch": 1.410162829870859, "grad_norm": 0.6357190608978271, "learning_rate": 6.377645964883297e-06, "loss": 0.3774, "step": 5023 }, { "epoch": 1.4104435710275127, "grad_norm": 0.6681320667266846, "learning_rate": 6.376075760335086e-06, "loss": 0.403, "step": 5024 }, { "epoch": 1.4107243121841662, "grad_norm": 0.715579628944397, "learning_rate": 6.374505408940627e-06, "loss": 0.4181, "step": 5025 }, { "epoch": 1.4110050533408198, "grad_norm": 0.6407375931739807, "learning_rate": 6.372934910867501e-06, "loss": 0.3899, "step": 5026 }, { "epoch": 1.4112857944974733, "grad_norm": 0.6472136974334717, "learning_rate": 6.371364266283296e-06, "loss": 0.3909, "step": 5027 }, { "epoch": 1.411566535654127, "grad_norm": 0.681711733341217, "learning_rate": 6.369793475355628e-06, "loss": 0.4094, "step": 5028 }, { "epoch": 1.4118472768107804, "grad_norm": 0.6502349376678467, "learning_rate": 6.368222538252116e-06, "loss": 0.406, "step": 5029 }, { "epoch": 1.412128017967434, "grad_norm": 0.6270555257797241, "learning_rate": 6.366651455140403e-06, "loss": 0.3585, "step": 5030 }, { "epoch": 1.4124087591240877, "grad_norm": 0.7469307780265808, "learning_rate": 6.365080226188145e-06, "loss": 0.3481, "step": 5031 }, { "epoch": 1.4126895002807411, "grad_norm": 0.6458436250686646, "learning_rate": 6.363508851563014e-06, "loss": 0.4041, "step": 5032 }, { "epoch": 1.4129702414373948, "grad_norm": 0.6175792813301086, "learning_rate": 6.361937331432699e-06, "loss": 0.375, "step": 5033 }, { "epoch": 1.4132509825940482, "grad_norm": 0.7301008701324463, "learning_rate": 6.360365665964901e-06, "loss": 0.4322, "step": 5034 }, { "epoch": 1.413531723750702, "grad_norm": 0.6470361351966858, "learning_rate": 6.358793855327339e-06, "loss": 0.3916, "step": 5035 }, { "epoch": 1.4138124649073553, "grad_norm": 0.6229981780052185, "learning_rate": 6.357221899687746e-06, "loss": 0.3563, "step": 5036 }, { "epoch": 1.414093206064009, "grad_norm": 0.65587317943573, "learning_rate": 6.355649799213871e-06, "loss": 0.3632, "step": 5037 }, { "epoch": 1.4143739472206627, "grad_norm": 0.6841045022010803, "learning_rate": 6.354077554073481e-06, "loss": 0.3733, "step": 5038 }, { "epoch": 1.4146546883773161, "grad_norm": 0.6161452531814575, "learning_rate": 6.3525051644343545e-06, "loss": 0.3751, "step": 5039 }, { "epoch": 1.4149354295339696, "grad_norm": 0.70626300573349, "learning_rate": 6.350932630464288e-06, "loss": 0.3851, "step": 5040 }, { "epoch": 1.4152161706906232, "grad_norm": 0.7271189093589783, "learning_rate": 6.349359952331091e-06, "loss": 0.3759, "step": 5041 }, { "epoch": 1.4154969118472769, "grad_norm": 0.7051572203636169, "learning_rate": 6.347787130202592e-06, "loss": 0.3771, "step": 5042 }, { "epoch": 1.4157776530039303, "grad_norm": 0.7936788201332092, "learning_rate": 6.34621416424663e-06, "loss": 0.4111, "step": 5043 }, { "epoch": 1.416058394160584, "grad_norm": 0.6933422088623047, "learning_rate": 6.344641054631065e-06, "loss": 0.3619, "step": 5044 }, { "epoch": 1.4163391353172374, "grad_norm": 0.6461235284805298, "learning_rate": 6.343067801523769e-06, "loss": 0.3907, "step": 5045 }, { "epoch": 1.416619876473891, "grad_norm": 0.6267691254615784, "learning_rate": 6.341494405092628e-06, "loss": 0.3845, "step": 5046 }, { "epoch": 1.4169006176305445, "grad_norm": 0.7046382427215576, "learning_rate": 6.339920865505548e-06, "loss": 0.3845, "step": 5047 }, { "epoch": 1.4171813587871982, "grad_norm": 0.6728993058204651, "learning_rate": 6.338347182930445e-06, "loss": 0.3442, "step": 5048 }, { "epoch": 1.4174620999438519, "grad_norm": 0.6944672465324402, "learning_rate": 6.336773357535253e-06, "loss": 0.3654, "step": 5049 }, { "epoch": 1.4177428411005053, "grad_norm": 0.6600845456123352, "learning_rate": 6.335199389487922e-06, "loss": 0.385, "step": 5050 }, { "epoch": 1.418023582257159, "grad_norm": 0.6061417460441589, "learning_rate": 6.333625278956413e-06, "loss": 0.4183, "step": 5051 }, { "epoch": 1.4183043234138124, "grad_norm": 0.6814526915550232, "learning_rate": 6.33205102610871e-06, "loss": 0.4054, "step": 5052 }, { "epoch": 1.418585064570466, "grad_norm": 0.6553138494491577, "learning_rate": 6.330476631112803e-06, "loss": 0.3896, "step": 5053 }, { "epoch": 1.4188658057271195, "grad_norm": 0.6747038960456848, "learning_rate": 6.328902094136704e-06, "loss": 0.3696, "step": 5054 }, { "epoch": 1.4191465468837732, "grad_norm": 0.580277681350708, "learning_rate": 6.3273274153484384e-06, "loss": 0.3924, "step": 5055 }, { "epoch": 1.4194272880404268, "grad_norm": 0.6985778212547302, "learning_rate": 6.325752594916046e-06, "loss": 0.409, "step": 5056 }, { "epoch": 1.4197080291970803, "grad_norm": 0.696919858455658, "learning_rate": 6.324177633007578e-06, "loss": 0.3494, "step": 5057 }, { "epoch": 1.4199887703537337, "grad_norm": 0.6058250069618225, "learning_rate": 6.322602529791109e-06, "loss": 0.364, "step": 5058 }, { "epoch": 1.4202695115103874, "grad_norm": 0.7440758347511292, "learning_rate": 6.321027285434722e-06, "loss": 0.3736, "step": 5059 }, { "epoch": 1.420550252667041, "grad_norm": 0.6947153210639954, "learning_rate": 6.31945190010652e-06, "loss": 0.3567, "step": 5060 }, { "epoch": 1.4208309938236945, "grad_norm": 0.6216912865638733, "learning_rate": 6.317876373974616e-06, "loss": 0.3485, "step": 5061 }, { "epoch": 1.4211117349803482, "grad_norm": 0.7084161043167114, "learning_rate": 6.3163007072071395e-06, "loss": 0.4088, "step": 5062 }, { "epoch": 1.4213924761370018, "grad_norm": 0.6685324907302856, "learning_rate": 6.314724899972238e-06, "loss": 0.4537, "step": 5063 }, { "epoch": 1.4216732172936553, "grad_norm": 0.6328791975975037, "learning_rate": 6.31314895243807e-06, "loss": 0.3822, "step": 5064 }, { "epoch": 1.4219539584503087, "grad_norm": 0.7225044965744019, "learning_rate": 6.311572864772811e-06, "loss": 0.3776, "step": 5065 }, { "epoch": 1.4222346996069624, "grad_norm": 0.8034189343452454, "learning_rate": 6.3099966371446556e-06, "loss": 0.4106, "step": 5066 }, { "epoch": 1.422515440763616, "grad_norm": 0.6231784224510193, "learning_rate": 6.308420269721802e-06, "loss": 0.3653, "step": 5067 }, { "epoch": 1.4227961819202695, "grad_norm": 0.770412802696228, "learning_rate": 6.306843762672474e-06, "loss": 0.4098, "step": 5068 }, { "epoch": 1.4230769230769231, "grad_norm": 0.7057575583457947, "learning_rate": 6.305267116164908e-06, "loss": 0.389, "step": 5069 }, { "epoch": 1.4233576642335766, "grad_norm": 0.6264330744743347, "learning_rate": 6.3036903303673495e-06, "loss": 0.4156, "step": 5070 }, { "epoch": 1.4236384053902302, "grad_norm": 0.6959660053253174, "learning_rate": 6.302113405448069e-06, "loss": 0.4026, "step": 5071 }, { "epoch": 1.4239191465468837, "grad_norm": 0.6862159371376038, "learning_rate": 6.300536341575342e-06, "loss": 0.3975, "step": 5072 }, { "epoch": 1.4241998877035373, "grad_norm": 0.7660701274871826, "learning_rate": 6.2989591389174645e-06, "loss": 0.3946, "step": 5073 }, { "epoch": 1.424480628860191, "grad_norm": 0.6575465202331543, "learning_rate": 6.2973817976427455e-06, "loss": 0.3739, "step": 5074 }, { "epoch": 1.4247613700168444, "grad_norm": 0.6916999220848083, "learning_rate": 6.295804317919507e-06, "loss": 0.4288, "step": 5075 }, { "epoch": 1.425042111173498, "grad_norm": 0.6507255434989929, "learning_rate": 6.294226699916092e-06, "loss": 0.3685, "step": 5076 }, { "epoch": 1.4253228523301515, "grad_norm": 0.6341503858566284, "learning_rate": 6.292648943800852e-06, "loss": 0.3885, "step": 5077 }, { "epoch": 1.4256035934868052, "grad_norm": 0.623775064945221, "learning_rate": 6.291071049742154e-06, "loss": 0.4102, "step": 5078 }, { "epoch": 1.4258843346434587, "grad_norm": 0.6329973340034485, "learning_rate": 6.289493017908383e-06, "loss": 0.4072, "step": 5079 }, { "epoch": 1.4261650758001123, "grad_norm": 0.7985526323318481, "learning_rate": 6.287914848467935e-06, "loss": 0.4063, "step": 5080 }, { "epoch": 1.426445816956766, "grad_norm": 0.6926229596138, "learning_rate": 6.286336541589224e-06, "loss": 0.3551, "step": 5081 }, { "epoch": 1.4267265581134194, "grad_norm": 0.6243866682052612, "learning_rate": 6.284758097440676e-06, "loss": 0.3587, "step": 5082 }, { "epoch": 1.4270072992700729, "grad_norm": 0.7212977409362793, "learning_rate": 6.283179516190734e-06, "loss": 0.3762, "step": 5083 }, { "epoch": 1.4272880404267265, "grad_norm": 0.6903027892112732, "learning_rate": 6.281600798007853e-06, "loss": 0.4086, "step": 5084 }, { "epoch": 1.4275687815833802, "grad_norm": 0.7182713150978088, "learning_rate": 6.280021943060505e-06, "loss": 0.3884, "step": 5085 }, { "epoch": 1.4278495227400336, "grad_norm": 0.7404643297195435, "learning_rate": 6.278442951517174e-06, "loss": 0.3623, "step": 5086 }, { "epoch": 1.4281302638966873, "grad_norm": 0.6994548439979553, "learning_rate": 6.276863823546362e-06, "loss": 0.3857, "step": 5087 }, { "epoch": 1.428411005053341, "grad_norm": 0.7237161993980408, "learning_rate": 6.275284559316585e-06, "loss": 0.4047, "step": 5088 }, { "epoch": 1.4286917462099944, "grad_norm": 0.7051738500595093, "learning_rate": 6.273705158996368e-06, "loss": 0.4071, "step": 5089 }, { "epoch": 1.4289724873666478, "grad_norm": 0.7517426013946533, "learning_rate": 6.272125622754257e-06, "loss": 0.3897, "step": 5090 }, { "epoch": 1.4292532285233015, "grad_norm": 0.8073099851608276, "learning_rate": 6.2705459507588115e-06, "loss": 0.4211, "step": 5091 }, { "epoch": 1.4295339696799552, "grad_norm": 0.5920656323432922, "learning_rate": 6.268966143178603e-06, "loss": 0.3422, "step": 5092 }, { "epoch": 1.4298147108366086, "grad_norm": 0.6506666541099548, "learning_rate": 6.267386200182218e-06, "loss": 0.3767, "step": 5093 }, { "epoch": 1.4300954519932623, "grad_norm": 0.7356073260307312, "learning_rate": 6.265806121938261e-06, "loss": 0.4074, "step": 5094 }, { "epoch": 1.4303761931499157, "grad_norm": 0.7603099346160889, "learning_rate": 6.2642259086153445e-06, "loss": 0.3942, "step": 5095 }, { "epoch": 1.4306569343065694, "grad_norm": 0.6621203422546387, "learning_rate": 6.2626455603821e-06, "loss": 0.3211, "step": 5096 }, { "epoch": 1.4309376754632228, "grad_norm": 0.657948911190033, "learning_rate": 6.261065077407173e-06, "loss": 0.3523, "step": 5097 }, { "epoch": 1.4312184166198765, "grad_norm": 0.7687973380088806, "learning_rate": 6.2594844598592256e-06, "loss": 0.388, "step": 5098 }, { "epoch": 1.4314991577765301, "grad_norm": 0.6468183398246765, "learning_rate": 6.257903707906928e-06, "loss": 0.3633, "step": 5099 }, { "epoch": 1.4317798989331836, "grad_norm": 0.6889108419418335, "learning_rate": 6.2563228217189686e-06, "loss": 0.3722, "step": 5100 }, { "epoch": 1.4320606400898372, "grad_norm": 0.6281739473342896, "learning_rate": 6.25474180146405e-06, "loss": 0.4251, "step": 5101 }, { "epoch": 1.4323413812464907, "grad_norm": 0.6302910447120667, "learning_rate": 6.25316064731089e-06, "loss": 0.4036, "step": 5102 }, { "epoch": 1.4326221224031443, "grad_norm": 0.7975131273269653, "learning_rate": 6.251579359428217e-06, "loss": 0.4034, "step": 5103 }, { "epoch": 1.4329028635597978, "grad_norm": 0.6228559613227844, "learning_rate": 6.249997937984778e-06, "loss": 0.3894, "step": 5104 }, { "epoch": 1.4331836047164515, "grad_norm": 0.7324758172035217, "learning_rate": 6.248416383149335e-06, "loss": 0.4363, "step": 5105 }, { "epoch": 1.4334643458731051, "grad_norm": 0.729753315448761, "learning_rate": 6.2468346950906565e-06, "loss": 0.3883, "step": 5106 }, { "epoch": 1.4337450870297586, "grad_norm": 0.6479659080505371, "learning_rate": 6.245252873977533e-06, "loss": 0.3705, "step": 5107 }, { "epoch": 1.434025828186412, "grad_norm": 0.6021603941917419, "learning_rate": 6.243670919978766e-06, "loss": 0.4169, "step": 5108 }, { "epoch": 1.4343065693430657, "grad_norm": 0.7919953465461731, "learning_rate": 6.2420888332631735e-06, "loss": 0.3934, "step": 5109 }, { "epoch": 1.4345873104997193, "grad_norm": 0.7754859924316406, "learning_rate": 6.240506613999585e-06, "loss": 0.4505, "step": 5110 }, { "epoch": 1.4348680516563728, "grad_norm": 0.6871572136878967, "learning_rate": 6.238924262356845e-06, "loss": 0.42, "step": 5111 }, { "epoch": 1.4351487928130264, "grad_norm": 0.6975321173667908, "learning_rate": 6.23734177850381e-06, "loss": 0.3978, "step": 5112 }, { "epoch": 1.43542953396968, "grad_norm": 0.6754171252250671, "learning_rate": 6.235759162609356e-06, "loss": 0.3773, "step": 5113 }, { "epoch": 1.4357102751263335, "grad_norm": 0.6637935638427734, "learning_rate": 6.23417641484237e-06, "loss": 0.3749, "step": 5114 }, { "epoch": 1.435991016282987, "grad_norm": 0.7114065289497375, "learning_rate": 6.23259353537175e-06, "loss": 0.4158, "step": 5115 }, { "epoch": 1.4362717574396406, "grad_norm": 0.6718721389770508, "learning_rate": 6.231010524366415e-06, "loss": 0.4135, "step": 5116 }, { "epoch": 1.4365524985962943, "grad_norm": 0.6565790176391602, "learning_rate": 6.229427381995291e-06, "loss": 0.383, "step": 5117 }, { "epoch": 1.4368332397529477, "grad_norm": 0.6728942394256592, "learning_rate": 6.2278441084273224e-06, "loss": 0.3851, "step": 5118 }, { "epoch": 1.4371139809096014, "grad_norm": 0.6315937042236328, "learning_rate": 6.226260703831465e-06, "loss": 0.3628, "step": 5119 }, { "epoch": 1.4373947220662548, "grad_norm": 0.7243862748146057, "learning_rate": 6.224677168376692e-06, "loss": 0.4102, "step": 5120 }, { "epoch": 1.4376754632229085, "grad_norm": 0.6211796998977661, "learning_rate": 6.223093502231986e-06, "loss": 0.4051, "step": 5121 }, { "epoch": 1.437956204379562, "grad_norm": 0.7139093279838562, "learning_rate": 6.221509705566348e-06, "loss": 0.3756, "step": 5122 }, { "epoch": 1.4382369455362156, "grad_norm": 0.6886172890663147, "learning_rate": 6.21992577854879e-06, "loss": 0.3514, "step": 5123 }, { "epoch": 1.4385176866928693, "grad_norm": 0.6658490300178528, "learning_rate": 6.21834172134834e-06, "loss": 0.3719, "step": 5124 }, { "epoch": 1.4387984278495227, "grad_norm": 0.6449331641197205, "learning_rate": 6.216757534134037e-06, "loss": 0.3596, "step": 5125 }, { "epoch": 1.4390791690061764, "grad_norm": 0.689836323261261, "learning_rate": 6.215173217074938e-06, "loss": 0.417, "step": 5126 }, { "epoch": 1.4393599101628298, "grad_norm": 0.6353428959846497, "learning_rate": 6.213588770340109e-06, "loss": 0.4016, "step": 5127 }, { "epoch": 1.4396406513194835, "grad_norm": 0.6529582738876343, "learning_rate": 6.212004194098633e-06, "loss": 0.3354, "step": 5128 }, { "epoch": 1.439921392476137, "grad_norm": 0.72930508852005, "learning_rate": 6.210419488519606e-06, "loss": 0.3965, "step": 5129 }, { "epoch": 1.4402021336327906, "grad_norm": 0.6347339749336243, "learning_rate": 6.208834653772139e-06, "loss": 0.3941, "step": 5130 }, { "epoch": 1.4404828747894443, "grad_norm": 0.6254330277442932, "learning_rate": 6.207249690025354e-06, "loss": 0.3676, "step": 5131 }, { "epoch": 1.4407636159460977, "grad_norm": 0.6846107244491577, "learning_rate": 6.205664597448392e-06, "loss": 0.4145, "step": 5132 }, { "epoch": 1.4410443571027511, "grad_norm": 0.6662903428077698, "learning_rate": 6.204079376210399e-06, "loss": 0.3774, "step": 5133 }, { "epoch": 1.4413250982594048, "grad_norm": 0.7583791613578796, "learning_rate": 6.2024940264805434e-06, "loss": 0.387, "step": 5134 }, { "epoch": 1.4416058394160585, "grad_norm": 0.6241505146026611, "learning_rate": 6.200908548428003e-06, "loss": 0.3911, "step": 5135 }, { "epoch": 1.441886580572712, "grad_norm": 0.7127392888069153, "learning_rate": 6.199322942221971e-06, "loss": 0.3658, "step": 5136 }, { "epoch": 1.4421673217293656, "grad_norm": 0.6791383624076843, "learning_rate": 6.197737208031652e-06, "loss": 0.4141, "step": 5137 }, { "epoch": 1.442448062886019, "grad_norm": 0.7384278178215027, "learning_rate": 6.196151346026267e-06, "loss": 0.3746, "step": 5138 }, { "epoch": 1.4427288040426727, "grad_norm": 0.6535114645957947, "learning_rate": 6.1945653563750485e-06, "loss": 0.3674, "step": 5139 }, { "epoch": 1.4430095451993261, "grad_norm": 0.7013922333717346, "learning_rate": 6.192979239247243e-06, "loss": 0.4047, "step": 5140 }, { "epoch": 1.4432902863559798, "grad_norm": 0.6561954617500305, "learning_rate": 6.191392994812112e-06, "loss": 0.4055, "step": 5141 }, { "epoch": 1.4435710275126334, "grad_norm": 0.639331579208374, "learning_rate": 6.18980662323893e-06, "loss": 0.3724, "step": 5142 }, { "epoch": 1.4438517686692869, "grad_norm": 0.7580430507659912, "learning_rate": 6.1882201246969845e-06, "loss": 0.4205, "step": 5143 }, { "epoch": 1.4441325098259405, "grad_norm": 0.7018490433692932, "learning_rate": 6.186633499355576e-06, "loss": 0.398, "step": 5144 }, { "epoch": 1.444413250982594, "grad_norm": 0.6287434101104736, "learning_rate": 6.185046747384018e-06, "loss": 0.3624, "step": 5145 }, { "epoch": 1.4446939921392477, "grad_norm": 0.684122622013092, "learning_rate": 6.183459868951642e-06, "loss": 0.3724, "step": 5146 }, { "epoch": 1.444974733295901, "grad_norm": 0.6380981802940369, "learning_rate": 6.181872864227787e-06, "loss": 0.4284, "step": 5147 }, { "epoch": 1.4452554744525548, "grad_norm": 0.6782366633415222, "learning_rate": 6.180285733381811e-06, "loss": 0.3887, "step": 5148 }, { "epoch": 1.4455362156092084, "grad_norm": 0.6177129149436951, "learning_rate": 6.17869847658308e-06, "loss": 0.4087, "step": 5149 }, { "epoch": 1.4458169567658619, "grad_norm": 0.649387001991272, "learning_rate": 6.177111094000978e-06, "loss": 0.3467, "step": 5150 }, { "epoch": 1.4460976979225155, "grad_norm": 0.7056505680084229, "learning_rate": 6.175523585804901e-06, "loss": 0.3866, "step": 5151 }, { "epoch": 1.446378439079169, "grad_norm": 0.6559800505638123, "learning_rate": 6.173935952164256e-06, "loss": 0.3893, "step": 5152 }, { "epoch": 1.4466591802358226, "grad_norm": 0.7441030740737915, "learning_rate": 6.172348193248466e-06, "loss": 0.3607, "step": 5153 }, { "epoch": 1.446939921392476, "grad_norm": 0.6608142852783203, "learning_rate": 6.170760309226969e-06, "loss": 0.4059, "step": 5154 }, { "epoch": 1.4472206625491297, "grad_norm": 0.6568796038627625, "learning_rate": 6.169172300269211e-06, "loss": 0.3678, "step": 5155 }, { "epoch": 1.4475014037057834, "grad_norm": 0.676991879940033, "learning_rate": 6.167584166544655e-06, "loss": 0.3704, "step": 5156 }, { "epoch": 1.4477821448624368, "grad_norm": 0.7802074551582336, "learning_rate": 6.165995908222778e-06, "loss": 0.4331, "step": 5157 }, { "epoch": 1.4480628860190903, "grad_norm": 0.7982289791107178, "learning_rate": 6.164407525473069e-06, "loss": 0.4263, "step": 5158 }, { "epoch": 1.448343627175744, "grad_norm": 0.5990244150161743, "learning_rate": 6.162819018465029e-06, "loss": 0.3738, "step": 5159 }, { "epoch": 1.4486243683323976, "grad_norm": 0.6717208027839661, "learning_rate": 6.161230387368175e-06, "loss": 0.3954, "step": 5160 }, { "epoch": 1.448905109489051, "grad_norm": 0.8350700736045837, "learning_rate": 6.159641632352036e-06, "loss": 0.4125, "step": 5161 }, { "epoch": 1.4491858506457047, "grad_norm": 0.684823215007782, "learning_rate": 6.158052753586152e-06, "loss": 0.4194, "step": 5162 }, { "epoch": 1.4494665918023582, "grad_norm": 0.6564561724662781, "learning_rate": 6.15646375124008e-06, "loss": 0.4372, "step": 5163 }, { "epoch": 1.4497473329590118, "grad_norm": 0.6363829970359802, "learning_rate": 6.154874625483388e-06, "loss": 0.4184, "step": 5164 }, { "epoch": 1.4500280741156653, "grad_norm": 0.670998752117157, "learning_rate": 6.153285376485659e-06, "loss": 0.4047, "step": 5165 }, { "epoch": 1.450308815272319, "grad_norm": 0.706078052520752, "learning_rate": 6.1516960044164855e-06, "loss": 0.3762, "step": 5166 }, { "epoch": 1.4505895564289726, "grad_norm": 0.6266189813613892, "learning_rate": 6.150106509445476e-06, "loss": 0.4298, "step": 5167 }, { "epoch": 1.450870297585626, "grad_norm": 0.5693655014038086, "learning_rate": 6.148516891742251e-06, "loss": 0.4207, "step": 5168 }, { "epoch": 1.4511510387422797, "grad_norm": 0.7362409234046936, "learning_rate": 6.146927151476447e-06, "loss": 0.3714, "step": 5169 }, { "epoch": 1.4514317798989331, "grad_norm": 0.5743008255958557, "learning_rate": 6.145337288817709e-06, "loss": 0.358, "step": 5170 }, { "epoch": 1.4517125210555868, "grad_norm": 0.6924434900283813, "learning_rate": 6.143747303935699e-06, "loss": 0.4015, "step": 5171 }, { "epoch": 1.4519932622122402, "grad_norm": 0.6799778342247009, "learning_rate": 6.142157197000087e-06, "loss": 0.389, "step": 5172 }, { "epoch": 1.452274003368894, "grad_norm": 0.7883371710777283, "learning_rate": 6.1405669681805634e-06, "loss": 0.3762, "step": 5173 }, { "epoch": 1.4525547445255476, "grad_norm": 0.6141828894615173, "learning_rate": 6.138976617646824e-06, "loss": 0.38, "step": 5174 }, { "epoch": 1.452835485682201, "grad_norm": 0.6267702579498291, "learning_rate": 6.137386145568584e-06, "loss": 0.4254, "step": 5175 }, { "epoch": 1.4531162268388544, "grad_norm": 0.6863150596618652, "learning_rate": 6.135795552115569e-06, "loss": 0.4153, "step": 5176 }, { "epoch": 1.453396967995508, "grad_norm": 0.614777147769928, "learning_rate": 6.134204837457514e-06, "loss": 0.3838, "step": 5177 }, { "epoch": 1.4536777091521618, "grad_norm": 0.6711261868476868, "learning_rate": 6.132614001764171e-06, "loss": 0.3868, "step": 5178 }, { "epoch": 1.4539584503088152, "grad_norm": 0.6433554291725159, "learning_rate": 6.131023045205306e-06, "loss": 0.4148, "step": 5179 }, { "epoch": 1.4542391914654689, "grad_norm": 0.6728886365890503, "learning_rate": 6.129431967950695e-06, "loss": 0.3798, "step": 5180 }, { "epoch": 1.4545199326221225, "grad_norm": 0.6076109409332275, "learning_rate": 6.127840770170128e-06, "loss": 0.4035, "step": 5181 }, { "epoch": 1.454800673778776, "grad_norm": 0.6427233815193176, "learning_rate": 6.126249452033408e-06, "loss": 0.3793, "step": 5182 }, { "epoch": 1.4550814149354294, "grad_norm": 0.7428321838378906, "learning_rate": 6.12465801371035e-06, "loss": 0.3593, "step": 5183 }, { "epoch": 1.455362156092083, "grad_norm": 0.6285116076469421, "learning_rate": 6.123066455370782e-06, "loss": 0.4224, "step": 5184 }, { "epoch": 1.4556428972487367, "grad_norm": 0.652482271194458, "learning_rate": 6.121474777184544e-06, "loss": 0.3746, "step": 5185 }, { "epoch": 1.4559236384053902, "grad_norm": 0.6782539486885071, "learning_rate": 6.119882979321495e-06, "loss": 0.4115, "step": 5186 }, { "epoch": 1.4562043795620438, "grad_norm": 0.7005321383476257, "learning_rate": 6.1182910619514975e-06, "loss": 0.4285, "step": 5187 }, { "epoch": 1.4564851207186973, "grad_norm": 0.7454401850700378, "learning_rate": 6.116699025244431e-06, "loss": 0.3917, "step": 5188 }, { "epoch": 1.456765861875351, "grad_norm": 0.570483922958374, "learning_rate": 6.11510686937019e-06, "loss": 0.4052, "step": 5189 }, { "epoch": 1.4570466030320044, "grad_norm": 0.5567541122436523, "learning_rate": 6.113514594498677e-06, "loss": 0.3812, "step": 5190 }, { "epoch": 1.457327344188658, "grad_norm": 0.6308498382568359, "learning_rate": 6.1119222007998125e-06, "loss": 0.3748, "step": 5191 }, { "epoch": 1.4576080853453117, "grad_norm": 0.6073638200759888, "learning_rate": 6.110329688443526e-06, "loss": 0.3943, "step": 5192 }, { "epoch": 1.4578888265019652, "grad_norm": 0.6466220021247864, "learning_rate": 6.108737057599758e-06, "loss": 0.3835, "step": 5193 }, { "epoch": 1.4581695676586188, "grad_norm": 0.6801443696022034, "learning_rate": 6.107144308438466e-06, "loss": 0.3877, "step": 5194 }, { "epoch": 1.4584503088152723, "grad_norm": 0.6578395366668701, "learning_rate": 6.105551441129619e-06, "loss": 0.4343, "step": 5195 }, { "epoch": 1.458731049971926, "grad_norm": 0.6512399315834045, "learning_rate": 6.103958455843198e-06, "loss": 0.396, "step": 5196 }, { "epoch": 1.4590117911285794, "grad_norm": 0.6606807112693787, "learning_rate": 6.102365352749193e-06, "loss": 0.3757, "step": 5197 }, { "epoch": 1.459292532285233, "grad_norm": 0.6820982098579407, "learning_rate": 6.100772132017615e-06, "loss": 0.3817, "step": 5198 }, { "epoch": 1.4595732734418867, "grad_norm": 0.681573748588562, "learning_rate": 6.099178793818479e-06, "loss": 0.3855, "step": 5199 }, { "epoch": 1.4598540145985401, "grad_norm": 0.5980532169342041, "learning_rate": 6.097585338321819e-06, "loss": 0.3946, "step": 5200 }, { "epoch": 1.4601347557551936, "grad_norm": 0.6430398225784302, "learning_rate": 6.095991765697675e-06, "loss": 0.3408, "step": 5201 }, { "epoch": 1.4604154969118472, "grad_norm": 0.694545328617096, "learning_rate": 6.094398076116107e-06, "loss": 0.3828, "step": 5202 }, { "epoch": 1.460696238068501, "grad_norm": 0.6895719766616821, "learning_rate": 6.092804269747183e-06, "loss": 0.4021, "step": 5203 }, { "epoch": 1.4609769792251543, "grad_norm": 0.6689839959144592, "learning_rate": 6.091210346760981e-06, "loss": 0.3952, "step": 5204 }, { "epoch": 1.461257720381808, "grad_norm": 0.6083852648735046, "learning_rate": 6.089616307327597e-06, "loss": 0.4226, "step": 5205 }, { "epoch": 1.4615384615384617, "grad_norm": 0.6489900350570679, "learning_rate": 6.088022151617137e-06, "loss": 0.3543, "step": 5206 }, { "epoch": 1.4618192026951151, "grad_norm": 0.6742801070213318, "learning_rate": 6.0864278797997176e-06, "loss": 0.404, "step": 5207 }, { "epoch": 1.4620999438517686, "grad_norm": 0.655458390712738, "learning_rate": 6.084833492045472e-06, "loss": 0.364, "step": 5208 }, { "epoch": 1.4623806850084222, "grad_norm": 0.6500082612037659, "learning_rate": 6.083238988524542e-06, "loss": 0.3886, "step": 5209 }, { "epoch": 1.4626614261650759, "grad_norm": 0.6608209609985352, "learning_rate": 6.081644369407084e-06, "loss": 0.3694, "step": 5210 }, { "epoch": 1.4629421673217293, "grad_norm": 0.7076460719108582, "learning_rate": 6.080049634863264e-06, "loss": 0.4411, "step": 5211 }, { "epoch": 1.463222908478383, "grad_norm": 0.6633089184761047, "learning_rate": 6.078454785063263e-06, "loss": 0.4391, "step": 5212 }, { "epoch": 1.4635036496350364, "grad_norm": 0.696105420589447, "learning_rate": 6.076859820177275e-06, "loss": 0.3536, "step": 5213 }, { "epoch": 1.46378439079169, "grad_norm": 0.6126047372817993, "learning_rate": 6.075264740375505e-06, "loss": 0.4349, "step": 5214 }, { "epoch": 1.4640651319483435, "grad_norm": 0.6296899914741516, "learning_rate": 6.073669545828167e-06, "loss": 0.366, "step": 5215 }, { "epoch": 1.4643458731049972, "grad_norm": 0.5691863894462585, "learning_rate": 6.072074236705492e-06, "loss": 0.3717, "step": 5216 }, { "epoch": 1.4646266142616509, "grad_norm": 0.6138046383857727, "learning_rate": 6.07047881317772e-06, "loss": 0.3671, "step": 5217 }, { "epoch": 1.4649073554183043, "grad_norm": 0.5991971492767334, "learning_rate": 6.068883275415107e-06, "loss": 0.4068, "step": 5218 }, { "epoch": 1.465188096574958, "grad_norm": 0.6231683492660522, "learning_rate": 6.067287623587917e-06, "loss": 0.3644, "step": 5219 }, { "epoch": 1.4654688377316114, "grad_norm": 0.6731587052345276, "learning_rate": 6.0656918578664315e-06, "loss": 0.4324, "step": 5220 }, { "epoch": 1.465749578888265, "grad_norm": 0.6973851919174194, "learning_rate": 6.064095978420936e-06, "loss": 0.3871, "step": 5221 }, { "epoch": 1.4660303200449185, "grad_norm": 0.6489410996437073, "learning_rate": 6.0624999854217346e-06, "loss": 0.3636, "step": 5222 }, { "epoch": 1.4663110612015722, "grad_norm": 0.5894689559936523, "learning_rate": 6.0609038790391415e-06, "loss": 0.3416, "step": 5223 }, { "epoch": 1.4665918023582258, "grad_norm": 0.6763150095939636, "learning_rate": 6.059307659443484e-06, "loss": 0.3928, "step": 5224 }, { "epoch": 1.4668725435148793, "grad_norm": 0.7071971893310547, "learning_rate": 6.0577113268051025e-06, "loss": 0.397, "step": 5225 }, { "epoch": 1.4671532846715327, "grad_norm": 0.7107048630714417, "learning_rate": 6.0561148812943435e-06, "loss": 0.3993, "step": 5226 }, { "epoch": 1.4674340258281864, "grad_norm": 0.6285058856010437, "learning_rate": 6.0545183230815725e-06, "loss": 0.4107, "step": 5227 }, { "epoch": 1.46771476698484, "grad_norm": 0.7170884013175964, "learning_rate": 6.0529216523371635e-06, "loss": 0.4125, "step": 5228 }, { "epoch": 1.4679955081414935, "grad_norm": 0.5967025756835938, "learning_rate": 6.051324869231504e-06, "loss": 0.3696, "step": 5229 }, { "epoch": 1.4682762492981472, "grad_norm": 0.673880934715271, "learning_rate": 6.049727973934993e-06, "loss": 0.3435, "step": 5230 }, { "epoch": 1.4685569904548006, "grad_norm": 0.6483648419380188, "learning_rate": 6.04813096661804e-06, "loss": 0.388, "step": 5231 }, { "epoch": 1.4688377316114543, "grad_norm": 0.7351468205451965, "learning_rate": 6.046533847451067e-06, "loss": 0.3824, "step": 5232 }, { "epoch": 1.4691184727681077, "grad_norm": 0.7011606097221375, "learning_rate": 6.04493661660451e-06, "loss": 0.3654, "step": 5233 }, { "epoch": 1.4693992139247614, "grad_norm": 0.6505141854286194, "learning_rate": 6.043339274248816e-06, "loss": 0.361, "step": 5234 }, { "epoch": 1.469679955081415, "grad_norm": 0.6735453605651855, "learning_rate": 6.041741820554442e-06, "loss": 0.4113, "step": 5235 }, { "epoch": 1.4699606962380685, "grad_norm": 0.596533477306366, "learning_rate": 6.04014425569186e-06, "loss": 0.3739, "step": 5236 }, { "epoch": 1.4702414373947221, "grad_norm": 0.7310095429420471, "learning_rate": 6.03854657983155e-06, "loss": 0.379, "step": 5237 }, { "epoch": 1.4705221785513756, "grad_norm": 0.7709599733352661, "learning_rate": 6.036948793144008e-06, "loss": 0.3856, "step": 5238 }, { "epoch": 1.4708029197080292, "grad_norm": 0.7274706959724426, "learning_rate": 6.035350895799739e-06, "loss": 0.3892, "step": 5239 }, { "epoch": 1.4710836608646827, "grad_norm": 0.632496178150177, "learning_rate": 6.033752887969261e-06, "loss": 0.3933, "step": 5240 }, { "epoch": 1.4713644020213363, "grad_norm": 0.7112442255020142, "learning_rate": 6.032154769823103e-06, "loss": 0.4182, "step": 5241 }, { "epoch": 1.47164514317799, "grad_norm": 0.6606371402740479, "learning_rate": 6.030556541531808e-06, "loss": 0.3856, "step": 5242 }, { "epoch": 1.4719258843346434, "grad_norm": 0.6617858409881592, "learning_rate": 6.028958203265926e-06, "loss": 0.3883, "step": 5243 }, { "epoch": 1.472206625491297, "grad_norm": 0.6967059969902039, "learning_rate": 6.027359755196024e-06, "loss": 0.427, "step": 5244 }, { "epoch": 1.4724873666479505, "grad_norm": 0.5987151265144348, "learning_rate": 6.0257611974926764e-06, "loss": 0.4207, "step": 5245 }, { "epoch": 1.4727681078046042, "grad_norm": 0.6150304675102234, "learning_rate": 6.024162530326474e-06, "loss": 0.4101, "step": 5246 }, { "epoch": 1.4730488489612577, "grad_norm": 0.674147367477417, "learning_rate": 6.022563753868014e-06, "loss": 0.4238, "step": 5247 }, { "epoch": 1.4733295901179113, "grad_norm": 0.6694990992546082, "learning_rate": 6.0209648682879095e-06, "loss": 0.4326, "step": 5248 }, { "epoch": 1.473610331274565, "grad_norm": 0.664541482925415, "learning_rate": 6.019365873756784e-06, "loss": 0.4074, "step": 5249 }, { "epoch": 1.4738910724312184, "grad_norm": 0.6293095350265503, "learning_rate": 6.0177667704452706e-06, "loss": 0.4024, "step": 5250 }, { "epoch": 1.4741718135878719, "grad_norm": 0.6363199949264526, "learning_rate": 6.0161675585240165e-06, "loss": 0.3974, "step": 5251 }, { "epoch": 1.4744525547445255, "grad_norm": 0.6345648169517517, "learning_rate": 6.014568238163681e-06, "loss": 0.3867, "step": 5252 }, { "epoch": 1.4747332959011792, "grad_norm": 0.7065677046775818, "learning_rate": 6.0129688095349315e-06, "loss": 0.4126, "step": 5253 }, { "epoch": 1.4750140370578326, "grad_norm": 0.6076880097389221, "learning_rate": 6.011369272808449e-06, "loss": 0.4191, "step": 5254 }, { "epoch": 1.4752947782144863, "grad_norm": 0.6089729070663452, "learning_rate": 6.009769628154928e-06, "loss": 0.37, "step": 5255 }, { "epoch": 1.4755755193711397, "grad_norm": 0.6556140184402466, "learning_rate": 6.008169875745071e-06, "loss": 0.4119, "step": 5256 }, { "epoch": 1.4758562605277934, "grad_norm": 0.7148545980453491, "learning_rate": 6.006570015749594e-06, "loss": 0.3902, "step": 5257 }, { "epoch": 1.4761370016844468, "grad_norm": 0.6153672933578491, "learning_rate": 6.0049700483392256e-06, "loss": 0.3719, "step": 5258 }, { "epoch": 1.4764177428411005, "grad_norm": 0.5607266426086426, "learning_rate": 6.003369973684703e-06, "loss": 0.3671, "step": 5259 }, { "epoch": 1.4766984839977542, "grad_norm": 0.6392752528190613, "learning_rate": 6.0017697919567755e-06, "loss": 0.3762, "step": 5260 }, { "epoch": 1.4769792251544076, "grad_norm": 0.7464618682861328, "learning_rate": 6.000169503326204e-06, "loss": 0.3669, "step": 5261 }, { "epoch": 1.4772599663110613, "grad_norm": 0.6039334535598755, "learning_rate": 5.998569107963765e-06, "loss": 0.3605, "step": 5262 }, { "epoch": 1.4775407074677147, "grad_norm": 0.5802311897277832, "learning_rate": 5.996968606040241e-06, "loss": 0.3658, "step": 5263 }, { "epoch": 1.4778214486243684, "grad_norm": 0.5855432152748108, "learning_rate": 5.995367997726426e-06, "loss": 0.4361, "step": 5264 }, { "epoch": 1.4781021897810218, "grad_norm": 0.7310748100280762, "learning_rate": 5.993767283193128e-06, "loss": 0.4102, "step": 5265 }, { "epoch": 1.4783829309376755, "grad_norm": 0.7403544187545776, "learning_rate": 5.992166462611165e-06, "loss": 0.3685, "step": 5266 }, { "epoch": 1.4786636720943291, "grad_norm": 0.5496102571487427, "learning_rate": 5.990565536151367e-06, "loss": 0.4098, "step": 5267 }, { "epoch": 1.4789444132509826, "grad_norm": 0.6038678288459778, "learning_rate": 5.988964503984575e-06, "loss": 0.3511, "step": 5268 }, { "epoch": 1.479225154407636, "grad_norm": 0.6533830165863037, "learning_rate": 5.9873633662816435e-06, "loss": 0.3553, "step": 5269 }, { "epoch": 1.4795058955642897, "grad_norm": 0.6522130966186523, "learning_rate": 5.985762123213431e-06, "loss": 0.3863, "step": 5270 }, { "epoch": 1.4797866367209433, "grad_norm": 0.7871561050415039, "learning_rate": 5.984160774950816e-06, "loss": 0.4152, "step": 5271 }, { "epoch": 1.4800673778775968, "grad_norm": 0.6167128086090088, "learning_rate": 5.982559321664681e-06, "loss": 0.3992, "step": 5272 }, { "epoch": 1.4803481190342505, "grad_norm": 0.7990025281906128, "learning_rate": 5.980957763525927e-06, "loss": 0.3848, "step": 5273 }, { "epoch": 1.4806288601909041, "grad_norm": 0.6659459471702576, "learning_rate": 5.97935610070546e-06, "loss": 0.4005, "step": 5274 }, { "epoch": 1.4809096013475576, "grad_norm": 0.6580727696418762, "learning_rate": 5.977754333374201e-06, "loss": 0.3583, "step": 5275 }, { "epoch": 1.481190342504211, "grad_norm": 0.7282503843307495, "learning_rate": 5.9761524617030796e-06, "loss": 0.3631, "step": 5276 }, { "epoch": 1.4814710836608647, "grad_norm": 0.7209925055503845, "learning_rate": 5.974550485863038e-06, "loss": 0.3747, "step": 5277 }, { "epoch": 1.4817518248175183, "grad_norm": 0.7016963362693787, "learning_rate": 5.972948406025028e-06, "loss": 0.3659, "step": 5278 }, { "epoch": 1.4820325659741718, "grad_norm": 0.6650273203849792, "learning_rate": 5.971346222360015e-06, "loss": 0.3735, "step": 5279 }, { "epoch": 1.4823133071308254, "grad_norm": 0.620310366153717, "learning_rate": 5.969743935038974e-06, "loss": 0.3954, "step": 5280 }, { "epoch": 1.4825940482874789, "grad_norm": 0.7159186601638794, "learning_rate": 5.96814154423289e-06, "loss": 0.416, "step": 5281 }, { "epoch": 1.4828747894441325, "grad_norm": 0.729147732257843, "learning_rate": 5.966539050112761e-06, "loss": 0.3969, "step": 5282 }, { "epoch": 1.483155530600786, "grad_norm": 0.7633885145187378, "learning_rate": 5.964936452849594e-06, "loss": 0.4151, "step": 5283 }, { "epoch": 1.4834362717574396, "grad_norm": 0.6439753174781799, "learning_rate": 5.963333752614411e-06, "loss": 0.4113, "step": 5284 }, { "epoch": 1.4837170129140933, "grad_norm": 0.6582778692245483, "learning_rate": 5.961730949578239e-06, "loss": 0.3844, "step": 5285 }, { "epoch": 1.4839977540707467, "grad_norm": 0.7423039674758911, "learning_rate": 5.9601280439121224e-06, "loss": 0.4004, "step": 5286 }, { "epoch": 1.4842784952274004, "grad_norm": 0.6271695494651794, "learning_rate": 5.9585250357871105e-06, "loss": 0.3826, "step": 5287 }, { "epoch": 1.4845592363840538, "grad_norm": 0.6961673498153687, "learning_rate": 5.956921925374269e-06, "loss": 0.3965, "step": 5288 }, { "epoch": 1.4848399775407075, "grad_norm": 0.7871799468994141, "learning_rate": 5.955318712844668e-06, "loss": 0.4116, "step": 5289 }, { "epoch": 1.485120718697361, "grad_norm": 0.548133134841919, "learning_rate": 5.953715398369395e-06, "loss": 0.3959, "step": 5290 }, { "epoch": 1.4854014598540146, "grad_norm": 0.7311726212501526, "learning_rate": 5.9521119821195475e-06, "loss": 0.3948, "step": 5291 }, { "epoch": 1.4856822010106683, "grad_norm": 0.5908982157707214, "learning_rate": 5.9505084642662295e-06, "loss": 0.3693, "step": 5292 }, { "epoch": 1.4859629421673217, "grad_norm": 0.7001266479492188, "learning_rate": 5.948904844980558e-06, "loss": 0.4142, "step": 5293 }, { "epoch": 1.4862436833239752, "grad_norm": 0.7001531720161438, "learning_rate": 5.947301124433662e-06, "loss": 0.3822, "step": 5294 }, { "epoch": 1.4865244244806288, "grad_norm": 0.6736369132995605, "learning_rate": 5.945697302796681e-06, "loss": 0.3953, "step": 5295 }, { "epoch": 1.4868051656372825, "grad_norm": 0.6351548433303833, "learning_rate": 5.944093380240765e-06, "loss": 0.3622, "step": 5296 }, { "epoch": 1.487085906793936, "grad_norm": 0.664350152015686, "learning_rate": 5.942489356937075e-06, "loss": 0.4058, "step": 5297 }, { "epoch": 1.4873666479505896, "grad_norm": 0.7064056396484375, "learning_rate": 5.940885233056782e-06, "loss": 0.387, "step": 5298 }, { "epoch": 1.4876473891072433, "grad_norm": 0.6060472726821899, "learning_rate": 5.9392810087710666e-06, "loss": 0.348, "step": 5299 }, { "epoch": 1.4879281302638967, "grad_norm": 0.5615829825401306, "learning_rate": 5.937676684251124e-06, "loss": 0.402, "step": 5300 }, { "epoch": 1.4882088714205501, "grad_norm": 0.6724618673324585, "learning_rate": 5.936072259668155e-06, "loss": 0.3923, "step": 5301 }, { "epoch": 1.4884896125772038, "grad_norm": 0.8215413689613342, "learning_rate": 5.9344677351933785e-06, "loss": 0.3659, "step": 5302 }, { "epoch": 1.4887703537338575, "grad_norm": 0.6384742259979248, "learning_rate": 5.932863110998014e-06, "loss": 0.3858, "step": 5303 }, { "epoch": 1.489051094890511, "grad_norm": 0.6819884777069092, "learning_rate": 5.9312583872533e-06, "loss": 0.4005, "step": 5304 }, { "epoch": 1.4893318360471646, "grad_norm": 0.6694992184638977, "learning_rate": 5.929653564130482e-06, "loss": 0.4105, "step": 5305 }, { "epoch": 1.489612577203818, "grad_norm": 0.6424428224563599, "learning_rate": 5.928048641800817e-06, "loss": 0.3477, "step": 5306 }, { "epoch": 1.4898933183604717, "grad_norm": 0.651040256023407, "learning_rate": 5.926443620435572e-06, "loss": 0.389, "step": 5307 }, { "epoch": 1.4901740595171251, "grad_norm": 0.6359730958938599, "learning_rate": 5.924838500206026e-06, "loss": 0.3662, "step": 5308 }, { "epoch": 1.4904548006737788, "grad_norm": 0.707542896270752, "learning_rate": 5.923233281283465e-06, "loss": 0.439, "step": 5309 }, { "epoch": 1.4907355418304324, "grad_norm": 0.6115970611572266, "learning_rate": 5.92162796383919e-06, "loss": 0.328, "step": 5310 }, { "epoch": 1.4910162829870859, "grad_norm": 0.6650423407554626, "learning_rate": 5.920022548044509e-06, "loss": 0.3628, "step": 5311 }, { "epoch": 1.4912970241437395, "grad_norm": 0.6958677172660828, "learning_rate": 5.918417034070745e-06, "loss": 0.3635, "step": 5312 }, { "epoch": 1.491577765300393, "grad_norm": 0.678145170211792, "learning_rate": 5.916811422089224e-06, "loss": 0.3891, "step": 5313 }, { "epoch": 1.4918585064570467, "grad_norm": 0.7073054909706116, "learning_rate": 5.91520571227129e-06, "loss": 0.4188, "step": 5314 }, { "epoch": 1.4921392476137, "grad_norm": 0.6573716402053833, "learning_rate": 5.913599904788294e-06, "loss": 0.3435, "step": 5315 }, { "epoch": 1.4924199887703538, "grad_norm": 0.5908942222595215, "learning_rate": 5.9119939998115984e-06, "loss": 0.3456, "step": 5316 }, { "epoch": 1.4927007299270074, "grad_norm": 0.6898457407951355, "learning_rate": 5.910387997512573e-06, "loss": 0.4218, "step": 5317 }, { "epoch": 1.4929814710836609, "grad_norm": 0.7363622188568115, "learning_rate": 5.908781898062604e-06, "loss": 0.3607, "step": 5318 }, { "epoch": 1.4932622122403143, "grad_norm": 0.695950448513031, "learning_rate": 5.90717570163308e-06, "loss": 0.3921, "step": 5319 }, { "epoch": 1.493542953396968, "grad_norm": 0.7554194927215576, "learning_rate": 5.905569408395407e-06, "loss": 0.4059, "step": 5320 }, { "epoch": 1.4938236945536216, "grad_norm": 0.6438939571380615, "learning_rate": 5.903963018520997e-06, "loss": 0.3712, "step": 5321 }, { "epoch": 1.494104435710275, "grad_norm": 0.6880878806114197, "learning_rate": 5.902356532181277e-06, "loss": 0.3861, "step": 5322 }, { "epoch": 1.4943851768669287, "grad_norm": 0.684183657169342, "learning_rate": 5.900749949547679e-06, "loss": 0.4044, "step": 5323 }, { "epoch": 1.4946659180235822, "grad_norm": 0.658062756061554, "learning_rate": 5.899143270791648e-06, "loss": 0.34, "step": 5324 }, { "epoch": 1.4949466591802358, "grad_norm": 0.7454277276992798, "learning_rate": 5.897536496084636e-06, "loss": 0.4106, "step": 5325 }, { "epoch": 1.4952274003368893, "grad_norm": 0.6257035732269287, "learning_rate": 5.895929625598113e-06, "loss": 0.3756, "step": 5326 }, { "epoch": 1.495508141493543, "grad_norm": 0.6086931824684143, "learning_rate": 5.894322659503551e-06, "loss": 0.3817, "step": 5327 }, { "epoch": 1.4957888826501966, "grad_norm": 0.593729555606842, "learning_rate": 5.892715597972436e-06, "loss": 0.408, "step": 5328 }, { "epoch": 1.49606962380685, "grad_norm": 0.6202966570854187, "learning_rate": 5.891108441176266e-06, "loss": 0.3942, "step": 5329 }, { "epoch": 1.4963503649635037, "grad_norm": 0.6379451751708984, "learning_rate": 5.889501189286542e-06, "loss": 0.397, "step": 5330 }, { "epoch": 1.4966311061201572, "grad_norm": 0.5945736765861511, "learning_rate": 5.887893842474783e-06, "loss": 0.3994, "step": 5331 }, { "epoch": 1.4969118472768108, "grad_norm": 0.721962034702301, "learning_rate": 5.886286400912514e-06, "loss": 0.4003, "step": 5332 }, { "epoch": 1.4971925884334643, "grad_norm": 0.6053306460380554, "learning_rate": 5.884678864771273e-06, "loss": 0.3712, "step": 5333 }, { "epoch": 1.497473329590118, "grad_norm": 0.670535147190094, "learning_rate": 5.883071234222604e-06, "loss": 0.4128, "step": 5334 }, { "epoch": 1.4977540707467716, "grad_norm": 0.6948467493057251, "learning_rate": 5.881463509438066e-06, "loss": 0.3906, "step": 5335 }, { "epoch": 1.498034811903425, "grad_norm": 0.6200743913650513, "learning_rate": 5.879855690589223e-06, "loss": 0.4406, "step": 5336 }, { "epoch": 1.4983155530600787, "grad_norm": 0.6302000284194946, "learning_rate": 5.8782477778476495e-06, "loss": 0.3975, "step": 5337 }, { "epoch": 1.4985962942167321, "grad_norm": 0.532776951789856, "learning_rate": 5.876639771384938e-06, "loss": 0.3407, "step": 5338 }, { "epoch": 1.4988770353733858, "grad_norm": 0.607579231262207, "learning_rate": 5.87503167137268e-06, "loss": 0.4003, "step": 5339 }, { "epoch": 1.4991577765300392, "grad_norm": 0.6802986860275269, "learning_rate": 5.873423477982485e-06, "loss": 0.4227, "step": 5340 }, { "epoch": 1.499438517686693, "grad_norm": 0.6502795219421387, "learning_rate": 5.871815191385967e-06, "loss": 0.4116, "step": 5341 }, { "epoch": 1.4997192588433466, "grad_norm": 0.6263981461524963, "learning_rate": 5.8702068117547525e-06, "loss": 0.3711, "step": 5342 }, { "epoch": 1.5, "grad_norm": 0.6796420812606812, "learning_rate": 5.86859833926048e-06, "loss": 0.3804, "step": 5343 }, { "epoch": 1.5002807411566534, "grad_norm": 0.6035329103469849, "learning_rate": 5.8669897740747924e-06, "loss": 0.4039, "step": 5344 }, { "epoch": 1.500561482313307, "grad_norm": 0.7662019729614258, "learning_rate": 5.865381116369348e-06, "loss": 0.3979, "step": 5345 }, { "epoch": 1.5008422234699608, "grad_norm": 0.6464301943778992, "learning_rate": 5.863772366315814e-06, "loss": 0.3918, "step": 5346 }, { "epoch": 1.5011229646266142, "grad_norm": 0.6173228025436401, "learning_rate": 5.8621635240858635e-06, "loss": 0.4121, "step": 5347 }, { "epoch": 1.5014037057832679, "grad_norm": 0.6510396003723145, "learning_rate": 5.860554589851183e-06, "loss": 0.3676, "step": 5348 }, { "epoch": 1.5016844469399215, "grad_norm": 0.6980282068252563, "learning_rate": 5.858945563783468e-06, "loss": 0.3932, "step": 5349 }, { "epoch": 1.501965188096575, "grad_norm": 0.7186689972877502, "learning_rate": 5.857336446054423e-06, "loss": 0.4263, "step": 5350 }, { "epoch": 1.5022459292532284, "grad_norm": 0.7035834789276123, "learning_rate": 5.8557272368357655e-06, "loss": 0.3772, "step": 5351 }, { "epoch": 1.502526670409882, "grad_norm": 0.644368588924408, "learning_rate": 5.854117936299217e-06, "loss": 0.3852, "step": 5352 }, { "epoch": 1.5028074115665357, "grad_norm": 0.6306403279304504, "learning_rate": 5.852508544616515e-06, "loss": 0.4048, "step": 5353 }, { "epoch": 1.5030881527231892, "grad_norm": 0.6548639535903931, "learning_rate": 5.850899061959403e-06, "loss": 0.3425, "step": 5354 }, { "epoch": 1.5033688938798426, "grad_norm": 0.6650660037994385, "learning_rate": 5.8492894884996334e-06, "loss": 0.373, "step": 5355 }, { "epoch": 1.5036496350364965, "grad_norm": 0.6372920274734497, "learning_rate": 5.847679824408972e-06, "loss": 0.3832, "step": 5356 }, { "epoch": 1.50393037619315, "grad_norm": 0.6380687355995178, "learning_rate": 5.846070069859191e-06, "loss": 0.3824, "step": 5357 }, { "epoch": 1.5042111173498034, "grad_norm": 0.5915088057518005, "learning_rate": 5.8444602250220726e-06, "loss": 0.3712, "step": 5358 }, { "epoch": 1.504491858506457, "grad_norm": 0.661666989326477, "learning_rate": 5.84285029006941e-06, "loss": 0.3929, "step": 5359 }, { "epoch": 1.5047725996631107, "grad_norm": 0.6691256761550903, "learning_rate": 5.841240265173007e-06, "loss": 0.3963, "step": 5360 }, { "epoch": 1.5050533408197642, "grad_norm": 0.6810079216957092, "learning_rate": 5.8396301505046735e-06, "loss": 0.3857, "step": 5361 }, { "epoch": 1.5053340819764176, "grad_norm": 0.7217987775802612, "learning_rate": 5.8380199462362315e-06, "loss": 0.3524, "step": 5362 }, { "epoch": 1.5056148231330713, "grad_norm": 0.7055343985557556, "learning_rate": 5.836409652539513e-06, "loss": 0.4055, "step": 5363 }, { "epoch": 1.505895564289725, "grad_norm": 0.681816577911377, "learning_rate": 5.834799269586358e-06, "loss": 0.4012, "step": 5364 }, { "epoch": 1.5061763054463784, "grad_norm": 0.6335623264312744, "learning_rate": 5.833188797548614e-06, "loss": 0.3969, "step": 5365 }, { "epoch": 1.506457046603032, "grad_norm": 0.7278549671173096, "learning_rate": 5.831578236598145e-06, "loss": 0.4264, "step": 5366 }, { "epoch": 1.5067377877596857, "grad_norm": 0.8221374750137329, "learning_rate": 5.8299675869068166e-06, "loss": 0.3996, "step": 5367 }, { "epoch": 1.5070185289163391, "grad_norm": 0.7820380330085754, "learning_rate": 5.82835684864651e-06, "loss": 0.3678, "step": 5368 }, { "epoch": 1.5072992700729926, "grad_norm": 0.5716264843940735, "learning_rate": 5.8267460219891105e-06, "loss": 0.3813, "step": 5369 }, { "epoch": 1.5075800112296462, "grad_norm": 0.7560267448425293, "learning_rate": 5.825135107106517e-06, "loss": 0.4229, "step": 5370 }, { "epoch": 1.5078607523863, "grad_norm": 0.5916890501976013, "learning_rate": 5.823524104170636e-06, "loss": 0.3894, "step": 5371 }, { "epoch": 1.5081414935429533, "grad_norm": 0.6364879012107849, "learning_rate": 5.821913013353383e-06, "loss": 0.3902, "step": 5372 }, { "epoch": 1.508422234699607, "grad_norm": 0.7223371863365173, "learning_rate": 5.820301834826685e-06, "loss": 0.4066, "step": 5373 }, { "epoch": 1.5087029758562607, "grad_norm": 0.7059696912765503, "learning_rate": 5.818690568762477e-06, "loss": 0.3313, "step": 5374 }, { "epoch": 1.5089837170129141, "grad_norm": 0.7071421146392822, "learning_rate": 5.817079215332703e-06, "loss": 0.4008, "step": 5375 }, { "epoch": 1.5092644581695676, "grad_norm": 0.7023800015449524, "learning_rate": 5.815467774709314e-06, "loss": 0.4313, "step": 5376 }, { "epoch": 1.5095451993262212, "grad_norm": 0.5554521679878235, "learning_rate": 5.813856247064276e-06, "loss": 0.3913, "step": 5377 }, { "epoch": 1.5098259404828749, "grad_norm": 0.6641628742218018, "learning_rate": 5.812244632569561e-06, "loss": 0.4017, "step": 5378 }, { "epoch": 1.5101066816395283, "grad_norm": 0.6552072763442993, "learning_rate": 5.81063293139715e-06, "loss": 0.3742, "step": 5379 }, { "epoch": 1.5103874227961818, "grad_norm": 0.7391794919967651, "learning_rate": 5.8090211437190335e-06, "loss": 0.4016, "step": 5380 }, { "epoch": 1.5106681639528357, "grad_norm": 0.608917772769928, "learning_rate": 5.807409269707211e-06, "loss": 0.376, "step": 5381 }, { "epoch": 1.510948905109489, "grad_norm": 0.6967856884002686, "learning_rate": 5.805797309533692e-06, "loss": 0.3931, "step": 5382 }, { "epoch": 1.5112296462661425, "grad_norm": 0.6006859540939331, "learning_rate": 5.8041852633704955e-06, "loss": 0.3481, "step": 5383 }, { "epoch": 1.5115103874227962, "grad_norm": 0.6341225504875183, "learning_rate": 5.80257313138965e-06, "loss": 0.4189, "step": 5384 }, { "epoch": 1.5117911285794499, "grad_norm": 0.6314254999160767, "learning_rate": 5.8009609137631886e-06, "loss": 0.3615, "step": 5385 }, { "epoch": 1.5120718697361033, "grad_norm": 0.7232957482337952, "learning_rate": 5.7993486106631595e-06, "loss": 0.3636, "step": 5386 }, { "epoch": 1.5123526108927567, "grad_norm": 0.6637201309204102, "learning_rate": 5.797736222261617e-06, "loss": 0.3657, "step": 5387 }, { "epoch": 1.5126333520494104, "grad_norm": 0.6195018887519836, "learning_rate": 5.7961237487306265e-06, "loss": 0.3679, "step": 5388 }, { "epoch": 1.512914093206064, "grad_norm": 0.5997506976127625, "learning_rate": 5.794511190242261e-06, "loss": 0.4312, "step": 5389 }, { "epoch": 1.5131948343627175, "grad_norm": 0.6237205266952515, "learning_rate": 5.792898546968601e-06, "loss": 0.3597, "step": 5390 }, { "epoch": 1.5134755755193712, "grad_norm": 0.6758390665054321, "learning_rate": 5.79128581908174e-06, "loss": 0.414, "step": 5391 }, { "epoch": 1.5137563166760248, "grad_norm": 0.6171358823776245, "learning_rate": 5.789673006753776e-06, "loss": 0.3444, "step": 5392 }, { "epoch": 1.5140370578326783, "grad_norm": 0.7520563006401062, "learning_rate": 5.788060110156819e-06, "loss": 0.4251, "step": 5393 }, { "epoch": 1.5143177989893317, "grad_norm": 0.7279042601585388, "learning_rate": 5.786447129462989e-06, "loss": 0.4328, "step": 5394 }, { "epoch": 1.5145985401459854, "grad_norm": 0.5959756374359131, "learning_rate": 5.784834064844411e-06, "loss": 0.3886, "step": 5395 }, { "epoch": 1.514879281302639, "grad_norm": 0.7278431057929993, "learning_rate": 5.783220916473224e-06, "loss": 0.408, "step": 5396 }, { "epoch": 1.5151600224592925, "grad_norm": 0.7128639221191406, "learning_rate": 5.781607684521568e-06, "loss": 0.3918, "step": 5397 }, { "epoch": 1.5154407636159462, "grad_norm": 0.7180032730102539, "learning_rate": 5.779994369161602e-06, "loss": 0.3705, "step": 5398 }, { "epoch": 1.5157215047725998, "grad_norm": 0.6366936564445496, "learning_rate": 5.778380970565488e-06, "loss": 0.3333, "step": 5399 }, { "epoch": 1.5160022459292533, "grad_norm": 0.6799874305725098, "learning_rate": 5.776767488905397e-06, "loss": 0.3705, "step": 5400 }, { "epoch": 1.5162829870859067, "grad_norm": 0.6618354916572571, "learning_rate": 5.7751539243535096e-06, "loss": 0.3751, "step": 5401 }, { "epoch": 1.5165637282425604, "grad_norm": 0.6983927488327026, "learning_rate": 5.773540277082016e-06, "loss": 0.3722, "step": 5402 }, { "epoch": 1.516844469399214, "grad_norm": 0.7446848750114441, "learning_rate": 5.7719265472631134e-06, "loss": 0.3902, "step": 5403 }, { "epoch": 1.5171252105558675, "grad_norm": 0.6266611218452454, "learning_rate": 5.770312735069012e-06, "loss": 0.409, "step": 5404 }, { "epoch": 1.517405951712521, "grad_norm": 0.6330932974815369, "learning_rate": 5.768698840671924e-06, "loss": 0.3883, "step": 5405 }, { "epoch": 1.5176866928691746, "grad_norm": 0.6011622548103333, "learning_rate": 5.767084864244077e-06, "loss": 0.3445, "step": 5406 }, { "epoch": 1.5179674340258282, "grad_norm": 0.6243938207626343, "learning_rate": 5.765470805957704e-06, "loss": 0.3844, "step": 5407 }, { "epoch": 1.5182481751824817, "grad_norm": 0.6916384100914001, "learning_rate": 5.763856665985045e-06, "loss": 0.4055, "step": 5408 }, { "epoch": 1.5185289163391353, "grad_norm": 0.6826633810997009, "learning_rate": 5.762242444498353e-06, "loss": 0.4252, "step": 5409 }, { "epoch": 1.518809657495789, "grad_norm": 0.7094565629959106, "learning_rate": 5.7606281416698886e-06, "loss": 0.3954, "step": 5410 }, { "epoch": 1.5190903986524424, "grad_norm": 0.648792028427124, "learning_rate": 5.7590137576719174e-06, "loss": 0.3884, "step": 5411 }, { "epoch": 1.5193711398090959, "grad_norm": 0.5709918141365051, "learning_rate": 5.75739929267672e-06, "loss": 0.4086, "step": 5412 }, { "epoch": 1.5196518809657495, "grad_norm": 0.6704549789428711, "learning_rate": 5.7557847468565785e-06, "loss": 0.3918, "step": 5413 }, { "epoch": 1.5199326221224032, "grad_norm": 0.6673620939254761, "learning_rate": 5.754170120383789e-06, "loss": 0.3996, "step": 5414 }, { "epoch": 1.5202133632790567, "grad_norm": 0.6866858601570129, "learning_rate": 5.752555413430654e-06, "loss": 0.3987, "step": 5415 }, { "epoch": 1.5204941044357103, "grad_norm": 0.6254552602767944, "learning_rate": 5.7509406261694846e-06, "loss": 0.3571, "step": 5416 }, { "epoch": 1.520774845592364, "grad_norm": 0.6172268986701965, "learning_rate": 5.749325758772604e-06, "loss": 0.3999, "step": 5417 }, { "epoch": 1.5210555867490174, "grad_norm": 0.686348021030426, "learning_rate": 5.747710811412335e-06, "loss": 0.3975, "step": 5418 }, { "epoch": 1.5213363279056709, "grad_norm": 0.6760064959526062, "learning_rate": 5.74609578426102e-06, "loss": 0.386, "step": 5419 }, { "epoch": 1.5216170690623245, "grad_norm": 0.6784769296646118, "learning_rate": 5.744480677491001e-06, "loss": 0.4086, "step": 5420 }, { "epoch": 1.5218978102189782, "grad_norm": 0.5766478180885315, "learning_rate": 5.742865491274634e-06, "loss": 0.3686, "step": 5421 }, { "epoch": 1.5221785513756316, "grad_norm": 0.614371657371521, "learning_rate": 5.741250225784282e-06, "loss": 0.3734, "step": 5422 }, { "epoch": 1.522459292532285, "grad_norm": 0.6568720936775208, "learning_rate": 5.739634881192316e-06, "loss": 0.3863, "step": 5423 }, { "epoch": 1.522740033688939, "grad_norm": 0.7889798879623413, "learning_rate": 5.738019457671115e-06, "loss": 0.4398, "step": 5424 }, { "epoch": 1.5230207748455924, "grad_norm": 0.685434103012085, "learning_rate": 5.736403955393066e-06, "loss": 0.4039, "step": 5425 }, { "epoch": 1.5233015160022458, "grad_norm": 0.5949421525001526, "learning_rate": 5.734788374530565e-06, "loss": 0.3699, "step": 5426 }, { "epoch": 1.5235822571588995, "grad_norm": 0.6006492972373962, "learning_rate": 5.733172715256019e-06, "loss": 0.3843, "step": 5427 }, { "epoch": 1.5238629983155532, "grad_norm": 0.6904444098472595, "learning_rate": 5.731556977741841e-06, "loss": 0.3813, "step": 5428 }, { "epoch": 1.5241437394722066, "grad_norm": 0.7011377811431885, "learning_rate": 5.729941162160452e-06, "loss": 0.3952, "step": 5429 }, { "epoch": 1.52442448062886, "grad_norm": 0.6623652577400208, "learning_rate": 5.72832526868428e-06, "loss": 0.3673, "step": 5430 }, { "epoch": 1.5247052217855137, "grad_norm": 0.7013071775436401, "learning_rate": 5.726709297485765e-06, "loss": 0.3739, "step": 5431 }, { "epoch": 1.5249859629421674, "grad_norm": 0.6543093919754028, "learning_rate": 5.725093248737352e-06, "loss": 0.4227, "step": 5432 }, { "epoch": 1.5252667040988208, "grad_norm": 0.6951916217803955, "learning_rate": 5.723477122611499e-06, "loss": 0.3842, "step": 5433 }, { "epoch": 1.5255474452554745, "grad_norm": 0.6171422600746155, "learning_rate": 5.721860919280665e-06, "loss": 0.4137, "step": 5434 }, { "epoch": 1.5258281864121281, "grad_norm": 0.7122002243995667, "learning_rate": 5.7202446389173225e-06, "loss": 0.3993, "step": 5435 }, { "epoch": 1.5261089275687816, "grad_norm": 0.6579063534736633, "learning_rate": 5.7186282816939506e-06, "loss": 0.3922, "step": 5436 }, { "epoch": 1.526389668725435, "grad_norm": 0.6793777346611023, "learning_rate": 5.717011847783039e-06, "loss": 0.3699, "step": 5437 }, { "epoch": 1.5266704098820887, "grad_norm": 0.7135227918624878, "learning_rate": 5.715395337357079e-06, "loss": 0.3952, "step": 5438 }, { "epoch": 1.5269511510387423, "grad_norm": 0.7062365412712097, "learning_rate": 5.71377875058858e-06, "loss": 0.373, "step": 5439 }, { "epoch": 1.5272318921953958, "grad_norm": 0.6488957405090332, "learning_rate": 5.712162087650051e-06, "loss": 0.3951, "step": 5440 }, { "epoch": 1.5275126333520495, "grad_norm": 0.6518882513046265, "learning_rate": 5.71054534871401e-06, "loss": 0.3937, "step": 5441 }, { "epoch": 1.5277933745087031, "grad_norm": 0.6719104051589966, "learning_rate": 5.7089285339529906e-06, "loss": 0.3696, "step": 5442 }, { "epoch": 1.5280741156653566, "grad_norm": 0.6756449341773987, "learning_rate": 5.707311643539526e-06, "loss": 0.3965, "step": 5443 }, { "epoch": 1.52835485682201, "grad_norm": 0.6698692440986633, "learning_rate": 5.705694677646162e-06, "loss": 0.3698, "step": 5444 }, { "epoch": 1.5286355979786637, "grad_norm": 0.7081566452980042, "learning_rate": 5.704077636445451e-06, "loss": 0.362, "step": 5445 }, { "epoch": 1.5289163391353173, "grad_norm": 0.5820019245147705, "learning_rate": 5.702460520109952e-06, "loss": 0.4134, "step": 5446 }, { "epoch": 1.5291970802919708, "grad_norm": 0.7080488204956055, "learning_rate": 5.700843328812234e-06, "loss": 0.4127, "step": 5447 }, { "epoch": 1.5294778214486242, "grad_norm": 0.6721872687339783, "learning_rate": 5.699226062724874e-06, "loss": 0.3849, "step": 5448 }, { "epoch": 1.529758562605278, "grad_norm": 0.6288347840309143, "learning_rate": 5.697608722020457e-06, "loss": 0.3897, "step": 5449 }, { "epoch": 1.5300393037619315, "grad_norm": 0.6673933863639832, "learning_rate": 5.6959913068715755e-06, "loss": 0.4067, "step": 5450 }, { "epoch": 1.530320044918585, "grad_norm": 0.7181184887886047, "learning_rate": 5.694373817450831e-06, "loss": 0.4224, "step": 5451 }, { "epoch": 1.5306007860752386, "grad_norm": 0.6134187579154968, "learning_rate": 5.692756253930829e-06, "loss": 0.4048, "step": 5452 }, { "epoch": 1.5308815272318923, "grad_norm": 0.7308587431907654, "learning_rate": 5.691138616484188e-06, "loss": 0.4185, "step": 5453 }, { "epoch": 1.5311622683885457, "grad_norm": 0.6228800415992737, "learning_rate": 5.689520905283532e-06, "loss": 0.3899, "step": 5454 }, { "epoch": 1.5314430095451992, "grad_norm": 0.5505013465881348, "learning_rate": 5.687903120501493e-06, "loss": 0.3814, "step": 5455 }, { "epoch": 1.5317237507018528, "grad_norm": 0.5905176401138306, "learning_rate": 5.686285262310711e-06, "loss": 0.4157, "step": 5456 }, { "epoch": 1.5320044918585065, "grad_norm": 0.5852351188659668, "learning_rate": 5.684667330883833e-06, "loss": 0.3955, "step": 5457 }, { "epoch": 1.53228523301516, "grad_norm": 0.7054872512817383, "learning_rate": 5.683049326393515e-06, "loss": 0.4057, "step": 5458 }, { "epoch": 1.5325659741718136, "grad_norm": 0.6706883311271667, "learning_rate": 5.681431249012421e-06, "loss": 0.3792, "step": 5459 }, { "epoch": 1.5328467153284673, "grad_norm": 0.6272156238555908, "learning_rate": 5.679813098913222e-06, "loss": 0.4217, "step": 5460 }, { "epoch": 1.5331274564851207, "grad_norm": 0.6789434552192688, "learning_rate": 5.6781948762685964e-06, "loss": 0.3303, "step": 5461 }, { "epoch": 1.5334081976417742, "grad_norm": 0.5765001773834229, "learning_rate": 5.6765765812512305e-06, "loss": 0.4084, "step": 5462 }, { "epoch": 1.5336889387984278, "grad_norm": 0.6353691816329956, "learning_rate": 5.674958214033819e-06, "loss": 0.3779, "step": 5463 }, { "epoch": 1.5339696799550815, "grad_norm": 0.6867348551750183, "learning_rate": 5.6733397747890654e-06, "loss": 0.3928, "step": 5464 }, { "epoch": 1.534250421111735, "grad_norm": 0.6241796612739563, "learning_rate": 5.671721263689675e-06, "loss": 0.4177, "step": 5465 }, { "epoch": 1.5345311622683886, "grad_norm": 0.6127474308013916, "learning_rate": 5.670102680908372e-06, "loss": 0.3968, "step": 5466 }, { "epoch": 1.5348119034250423, "grad_norm": 0.7174510955810547, "learning_rate": 5.668484026617878e-06, "loss": 0.3708, "step": 5467 }, { "epoch": 1.5350926445816957, "grad_norm": 0.6681004166603088, "learning_rate": 5.666865300990923e-06, "loss": 0.4002, "step": 5468 }, { "epoch": 1.5353733857383491, "grad_norm": 0.6794464588165283, "learning_rate": 5.665246504200253e-06, "loss": 0.4073, "step": 5469 }, { "epoch": 1.5356541268950028, "grad_norm": 0.6528903841972351, "learning_rate": 5.663627636418611e-06, "loss": 0.3979, "step": 5470 }, { "epoch": 1.5359348680516565, "grad_norm": 0.5895650386810303, "learning_rate": 5.662008697818754e-06, "loss": 0.4001, "step": 5471 }, { "epoch": 1.53621560920831, "grad_norm": 0.6318337917327881, "learning_rate": 5.660389688573448e-06, "loss": 0.3612, "step": 5472 }, { "epoch": 1.5364963503649633, "grad_norm": 0.7096831798553467, "learning_rate": 5.658770608855459e-06, "loss": 0.3752, "step": 5473 }, { "epoch": 1.5367770915216172, "grad_norm": 0.5959429740905762, "learning_rate": 5.657151458837569e-06, "loss": 0.3754, "step": 5474 }, { "epoch": 1.5370578326782707, "grad_norm": 0.6204558610916138, "learning_rate": 5.65553223869256e-06, "loss": 0.3697, "step": 5475 }, { "epoch": 1.5373385738349241, "grad_norm": 0.6069909930229187, "learning_rate": 5.653912948593227e-06, "loss": 0.3898, "step": 5476 }, { "epoch": 1.5376193149915778, "grad_norm": 0.6009364128112793, "learning_rate": 5.652293588712372e-06, "loss": 0.3678, "step": 5477 }, { "epoch": 1.5379000561482314, "grad_norm": 0.6900011301040649, "learning_rate": 5.650674159222801e-06, "loss": 0.4194, "step": 5478 }, { "epoch": 1.5381807973048849, "grad_norm": 0.6191831231117249, "learning_rate": 5.64905466029733e-06, "loss": 0.361, "step": 5479 }, { "epoch": 1.5384615384615383, "grad_norm": 0.6132239699363708, "learning_rate": 5.64743509210878e-06, "loss": 0.3786, "step": 5480 }, { "epoch": 1.538742279618192, "grad_norm": 0.6818129420280457, "learning_rate": 5.645815454829986e-06, "loss": 0.3878, "step": 5481 }, { "epoch": 1.5390230207748457, "grad_norm": 0.6904420256614685, "learning_rate": 5.644195748633781e-06, "loss": 0.3872, "step": 5482 }, { "epoch": 1.539303761931499, "grad_norm": 0.5789188742637634, "learning_rate": 5.642575973693013e-06, "loss": 0.4358, "step": 5483 }, { "epoch": 1.5395845030881528, "grad_norm": 0.6155490875244141, "learning_rate": 5.640956130180533e-06, "loss": 0.3667, "step": 5484 }, { "epoch": 1.5398652442448064, "grad_norm": 0.7449417114257812, "learning_rate": 5.639336218269199e-06, "loss": 0.3935, "step": 5485 }, { "epoch": 1.5401459854014599, "grad_norm": 0.6869012713432312, "learning_rate": 5.6377162381318806e-06, "loss": 0.3808, "step": 5486 }, { "epoch": 1.5404267265581133, "grad_norm": 0.5921604037284851, "learning_rate": 5.6360961899414515e-06, "loss": 0.4041, "step": 5487 }, { "epoch": 1.540707467714767, "grad_norm": 0.7154459357261658, "learning_rate": 5.634476073870791e-06, "loss": 0.3615, "step": 5488 }, { "epoch": 1.5409882088714206, "grad_norm": 0.6011552214622498, "learning_rate": 5.632855890092791e-06, "loss": 0.4014, "step": 5489 }, { "epoch": 1.541268950028074, "grad_norm": 0.8709184527397156, "learning_rate": 5.631235638780345e-06, "loss": 0.3773, "step": 5490 }, { "epoch": 1.5415496911847277, "grad_norm": 0.710959255695343, "learning_rate": 5.629615320106356e-06, "loss": 0.3899, "step": 5491 }, { "epoch": 1.5418304323413814, "grad_norm": 0.6088156700134277, "learning_rate": 5.627994934243737e-06, "loss": 0.4004, "step": 5492 }, { "epoch": 1.5421111734980348, "grad_norm": 0.7583803534507751, "learning_rate": 5.626374481365404e-06, "loss": 0.4415, "step": 5493 }, { "epoch": 1.5423919146546883, "grad_norm": 0.7273849248886108, "learning_rate": 5.624753961644281e-06, "loss": 0.4285, "step": 5494 }, { "epoch": 1.542672655811342, "grad_norm": 0.7240543365478516, "learning_rate": 5.623133375253301e-06, "loss": 0.3664, "step": 5495 }, { "epoch": 1.5429533969679956, "grad_norm": 0.6836703419685364, "learning_rate": 5.621512722365401e-06, "loss": 0.3607, "step": 5496 }, { "epoch": 1.543234138124649, "grad_norm": 0.6441492438316345, "learning_rate": 5.619892003153529e-06, "loss": 0.4024, "step": 5497 }, { "epoch": 1.5435148792813025, "grad_norm": 0.6888523101806641, "learning_rate": 5.618271217790636e-06, "loss": 0.3961, "step": 5498 }, { "epoch": 1.5437956204379562, "grad_norm": 0.6339767575263977, "learning_rate": 5.616650366449685e-06, "loss": 0.3833, "step": 5499 }, { "epoch": 1.5440763615946098, "grad_norm": 0.6974245309829712, "learning_rate": 5.615029449303642e-06, "loss": 0.3928, "step": 5500 }, { "epoch": 1.5443571027512633, "grad_norm": 0.6889846920967102, "learning_rate": 5.613408466525479e-06, "loss": 0.3889, "step": 5501 }, { "epoch": 1.544637843907917, "grad_norm": 0.6527835726737976, "learning_rate": 5.61178741828818e-06, "loss": 0.4131, "step": 5502 }, { "epoch": 1.5449185850645706, "grad_norm": 0.6665393114089966, "learning_rate": 5.610166304764732e-06, "loss": 0.4026, "step": 5503 }, { "epoch": 1.545199326221224, "grad_norm": 0.638247013092041, "learning_rate": 5.60854512612813e-06, "loss": 0.3957, "step": 5504 }, { "epoch": 1.5454800673778775, "grad_norm": 0.7534692883491516, "learning_rate": 5.6069238825513774e-06, "loss": 0.3884, "step": 5505 }, { "epoch": 1.5457608085345311, "grad_norm": 0.6691770553588867, "learning_rate": 5.6053025742074805e-06, "loss": 0.3791, "step": 5506 }, { "epoch": 1.5460415496911848, "grad_norm": 0.5810383558273315, "learning_rate": 5.603681201269458e-06, "loss": 0.4009, "step": 5507 }, { "epoch": 1.5463222908478382, "grad_norm": 0.6450157165527344, "learning_rate": 5.6020597639103325e-06, "loss": 0.4004, "step": 5508 }, { "epoch": 1.546603032004492, "grad_norm": 0.7024112939834595, "learning_rate": 5.600438262303132e-06, "loss": 0.4178, "step": 5509 }, { "epoch": 1.5468837731611456, "grad_norm": 0.6183632612228394, "learning_rate": 5.598816696620895e-06, "loss": 0.4037, "step": 5510 }, { "epoch": 1.547164514317799, "grad_norm": 0.6242706775665283, "learning_rate": 5.597195067036663e-06, "loss": 0.3941, "step": 5511 }, { "epoch": 1.5474452554744524, "grad_norm": 0.9261346459388733, "learning_rate": 5.595573373723487e-06, "loss": 0.4497, "step": 5512 }, { "epoch": 1.547725996631106, "grad_norm": 0.7260050177574158, "learning_rate": 5.593951616854425e-06, "loss": 0.3641, "step": 5513 }, { "epoch": 1.5480067377877598, "grad_norm": 0.5826572179794312, "learning_rate": 5.59232979660254e-06, "loss": 0.4066, "step": 5514 }, { "epoch": 1.5482874789444132, "grad_norm": 0.7458318471908569, "learning_rate": 5.590707913140901e-06, "loss": 0.3966, "step": 5515 }, { "epoch": 1.5485682201010667, "grad_norm": 0.61967533826828, "learning_rate": 5.589085966642589e-06, "loss": 0.3988, "step": 5516 }, { "epoch": 1.5488489612577205, "grad_norm": 0.6171215772628784, "learning_rate": 5.587463957280685e-06, "loss": 0.3914, "step": 5517 }, { "epoch": 1.549129702414374, "grad_norm": 0.6467472910881042, "learning_rate": 5.585841885228281e-06, "loss": 0.3976, "step": 5518 }, { "epoch": 1.5494104435710274, "grad_norm": 0.7427176833152771, "learning_rate": 5.584219750658473e-06, "loss": 0.4056, "step": 5519 }, { "epoch": 1.549691184727681, "grad_norm": 0.7593867182731628, "learning_rate": 5.582597553744366e-06, "loss": 0.398, "step": 5520 }, { "epoch": 1.5499719258843347, "grad_norm": 0.6774862408638, "learning_rate": 5.580975294659074e-06, "loss": 0.355, "step": 5521 }, { "epoch": 1.5502526670409882, "grad_norm": 0.6058395504951477, "learning_rate": 5.579352973575709e-06, "loss": 0.377, "step": 5522 }, { "epoch": 1.5505334081976416, "grad_norm": 0.7323144674301147, "learning_rate": 5.577730590667397e-06, "loss": 0.3708, "step": 5523 }, { "epoch": 1.5508141493542953, "grad_norm": 0.729806661605835, "learning_rate": 5.5761081461072695e-06, "loss": 0.3996, "step": 5524 }, { "epoch": 1.551094890510949, "grad_norm": 0.6938438415527344, "learning_rate": 5.574485640068464e-06, "loss": 0.3463, "step": 5525 }, { "epoch": 1.5513756316676024, "grad_norm": 0.6121125221252441, "learning_rate": 5.572863072724123e-06, "loss": 0.3984, "step": 5526 }, { "epoch": 1.551656372824256, "grad_norm": 0.6394217610359192, "learning_rate": 5.571240444247399e-06, "loss": 0.3701, "step": 5527 }, { "epoch": 1.5519371139809097, "grad_norm": 0.7209957242012024, "learning_rate": 5.569617754811444e-06, "loss": 0.404, "step": 5528 }, { "epoch": 1.5522178551375632, "grad_norm": 0.6942537426948547, "learning_rate": 5.567995004589425e-06, "loss": 0.3906, "step": 5529 }, { "epoch": 1.5524985962942166, "grad_norm": 0.6493688225746155, "learning_rate": 5.566372193754512e-06, "loss": 0.4436, "step": 5530 }, { "epoch": 1.5527793374508703, "grad_norm": 0.5768901109695435, "learning_rate": 5.564749322479881e-06, "loss": 0.4155, "step": 5531 }, { "epoch": 1.553060078607524, "grad_norm": 0.675581157207489, "learning_rate": 5.5631263909387145e-06, "loss": 0.3932, "step": 5532 }, { "epoch": 1.5533408197641774, "grad_norm": 0.6773494482040405, "learning_rate": 5.561503399304201e-06, "loss": 0.4234, "step": 5533 }, { "epoch": 1.553621560920831, "grad_norm": 0.6229548454284668, "learning_rate": 5.559880347749536e-06, "loss": 0.3775, "step": 5534 }, { "epoch": 1.5539023020774847, "grad_norm": 0.6098642349243164, "learning_rate": 5.558257236447921e-06, "loss": 0.3462, "step": 5535 }, { "epoch": 1.5541830432341381, "grad_norm": 0.6481459736824036, "learning_rate": 5.556634065572567e-06, "loss": 0.3707, "step": 5536 }, { "epoch": 1.5544637843907916, "grad_norm": 0.6870434880256653, "learning_rate": 5.555010835296687e-06, "loss": 0.3573, "step": 5537 }, { "epoch": 1.5547445255474452, "grad_norm": 0.6114113330841064, "learning_rate": 5.553387545793503e-06, "loss": 0.3716, "step": 5538 }, { "epoch": 1.555025266704099, "grad_norm": 0.6437608599662781, "learning_rate": 5.551764197236239e-06, "loss": 0.4068, "step": 5539 }, { "epoch": 1.5553060078607523, "grad_norm": 0.6629282236099243, "learning_rate": 5.550140789798132e-06, "loss": 0.3855, "step": 5540 }, { "epoch": 1.5555867490174058, "grad_norm": 0.6390807032585144, "learning_rate": 5.54851732365242e-06, "loss": 0.3635, "step": 5541 }, { "epoch": 1.5558674901740597, "grad_norm": 0.6676781177520752, "learning_rate": 5.5468937989723506e-06, "loss": 0.4189, "step": 5542 }, { "epoch": 1.5561482313307131, "grad_norm": 0.6239802837371826, "learning_rate": 5.545270215931177e-06, "loss": 0.3526, "step": 5543 }, { "epoch": 1.5564289724873666, "grad_norm": 0.6265968680381775, "learning_rate": 5.543646574702158e-06, "loss": 0.4239, "step": 5544 }, { "epoch": 1.5567097136440202, "grad_norm": 0.6826972365379333, "learning_rate": 5.5420228754585545e-06, "loss": 0.4096, "step": 5545 }, { "epoch": 1.5569904548006739, "grad_norm": 0.6733637452125549, "learning_rate": 5.540399118373641e-06, "loss": 0.3595, "step": 5546 }, { "epoch": 1.5572711959573273, "grad_norm": 0.6451590061187744, "learning_rate": 5.538775303620695e-06, "loss": 0.3829, "step": 5547 }, { "epoch": 1.5575519371139808, "grad_norm": 0.675022304058075, "learning_rate": 5.5371514313729975e-06, "loss": 0.3709, "step": 5548 }, { "epoch": 1.5578326782706344, "grad_norm": 0.6459131836891174, "learning_rate": 5.535527501803842e-06, "loss": 0.4349, "step": 5549 }, { "epoch": 1.558113419427288, "grad_norm": 0.6688864231109619, "learning_rate": 5.533903515086521e-06, "loss": 0.3794, "step": 5550 }, { "epoch": 1.5583941605839415, "grad_norm": 0.6998948454856873, "learning_rate": 5.5322794713943355e-06, "loss": 0.3694, "step": 5551 }, { "epoch": 1.5586749017405952, "grad_norm": 0.632996678352356, "learning_rate": 5.530655370900596e-06, "loss": 0.3921, "step": 5552 }, { "epoch": 1.5589556428972489, "grad_norm": 0.6345375180244446, "learning_rate": 5.529031213778615e-06, "loss": 0.3684, "step": 5553 }, { "epoch": 1.5592363840539023, "grad_norm": 0.7264622449874878, "learning_rate": 5.527407000201712e-06, "loss": 0.3744, "step": 5554 }, { "epoch": 1.5595171252105557, "grad_norm": 0.6772736310958862, "learning_rate": 5.525782730343215e-06, "loss": 0.3893, "step": 5555 }, { "epoch": 1.5597978663672094, "grad_norm": 0.649009644985199, "learning_rate": 5.524158404376453e-06, "loss": 0.3654, "step": 5556 }, { "epoch": 1.560078607523863, "grad_norm": 0.741586446762085, "learning_rate": 5.522534022474766e-06, "loss": 0.409, "step": 5557 }, { "epoch": 1.5603593486805165, "grad_norm": 0.6765753030776978, "learning_rate": 5.520909584811498e-06, "loss": 0.4212, "step": 5558 }, { "epoch": 1.5606400898371702, "grad_norm": 0.6014144420623779, "learning_rate": 5.519285091559998e-06, "loss": 0.3859, "step": 5559 }, { "epoch": 1.5609208309938238, "grad_norm": 0.6442937850952148, "learning_rate": 5.517660542893625e-06, "loss": 0.3625, "step": 5560 }, { "epoch": 1.5612015721504773, "grad_norm": 0.6993957757949829, "learning_rate": 5.516035938985735e-06, "loss": 0.3748, "step": 5561 }, { "epoch": 1.5614823133071307, "grad_norm": 0.7138069272041321, "learning_rate": 5.5144112800097e-06, "loss": 0.4089, "step": 5562 }, { "epoch": 1.5617630544637844, "grad_norm": 0.6776037216186523, "learning_rate": 5.51278656613889e-06, "loss": 0.3774, "step": 5563 }, { "epoch": 1.562043795620438, "grad_norm": 0.5727848410606384, "learning_rate": 5.5111617975466895e-06, "loss": 0.3572, "step": 5564 }, { "epoch": 1.5623245367770915, "grad_norm": 0.6910765767097473, "learning_rate": 5.5095369744064776e-06, "loss": 0.3996, "step": 5565 }, { "epoch": 1.562605277933745, "grad_norm": 0.7952026128768921, "learning_rate": 5.50791209689165e-06, "loss": 0.3761, "step": 5566 }, { "epoch": 1.5628860190903988, "grad_norm": 0.6733259558677673, "learning_rate": 5.506287165175602e-06, "loss": 0.3828, "step": 5567 }, { "epoch": 1.5631667602470523, "grad_norm": 0.814070999622345, "learning_rate": 5.504662179431735e-06, "loss": 0.3774, "step": 5568 }, { "epoch": 1.5634475014037057, "grad_norm": 0.6128706336021423, "learning_rate": 5.503037139833461e-06, "loss": 0.3603, "step": 5569 }, { "epoch": 1.5637282425603594, "grad_norm": 0.6916417479515076, "learning_rate": 5.5014120465541885e-06, "loss": 0.3528, "step": 5570 }, { "epoch": 1.564008983717013, "grad_norm": 0.7357199788093567, "learning_rate": 5.4997868997673435e-06, "loss": 0.4089, "step": 5571 }, { "epoch": 1.5642897248736665, "grad_norm": 0.7291268110275269, "learning_rate": 5.498161699646347e-06, "loss": 0.3958, "step": 5572 }, { "epoch": 1.56457046603032, "grad_norm": 0.6182436347007751, "learning_rate": 5.496536446364632e-06, "loss": 0.3955, "step": 5573 }, { "epoch": 1.5648512071869736, "grad_norm": 0.6410479545593262, "learning_rate": 5.494911140095634e-06, "loss": 0.346, "step": 5574 }, { "epoch": 1.5651319483436272, "grad_norm": 0.6170321106910706, "learning_rate": 5.493285781012798e-06, "loss": 0.3817, "step": 5575 }, { "epoch": 1.5654126895002807, "grad_norm": 0.6629190444946289, "learning_rate": 5.491660369289571e-06, "loss": 0.4293, "step": 5576 }, { "epoch": 1.5656934306569343, "grad_norm": 0.7044650912284851, "learning_rate": 5.490034905099408e-06, "loss": 0.3708, "step": 5577 }, { "epoch": 1.565974171813588, "grad_norm": 0.6208305358886719, "learning_rate": 5.4884093886157654e-06, "loss": 0.4206, "step": 5578 }, { "epoch": 1.5662549129702414, "grad_norm": 0.8037087321281433, "learning_rate": 5.486783820012109e-06, "loss": 0.3823, "step": 5579 }, { "epoch": 1.5665356541268949, "grad_norm": 0.6731278896331787, "learning_rate": 5.485158199461912e-06, "loss": 0.4122, "step": 5580 }, { "epoch": 1.5668163952835485, "grad_norm": 0.704522967338562, "learning_rate": 5.48353252713865e-06, "loss": 0.4276, "step": 5581 }, { "epoch": 1.5670971364402022, "grad_norm": 0.6292304396629333, "learning_rate": 5.481906803215803e-06, "loss": 0.3593, "step": 5582 }, { "epoch": 1.5673778775968557, "grad_norm": 0.6704569458961487, "learning_rate": 5.4802810278668575e-06, "loss": 0.3968, "step": 5583 }, { "epoch": 1.5676586187535093, "grad_norm": 0.6457850933074951, "learning_rate": 5.478655201265308e-06, "loss": 0.3673, "step": 5584 }, { "epoch": 1.567939359910163, "grad_norm": 0.6567925810813904, "learning_rate": 5.477029323584652e-06, "loss": 0.4081, "step": 5585 }, { "epoch": 1.5682201010668164, "grad_norm": 0.6074472069740295, "learning_rate": 5.475403394998393e-06, "loss": 0.3884, "step": 5586 }, { "epoch": 1.5685008422234699, "grad_norm": 0.699834942817688, "learning_rate": 5.473777415680042e-06, "loss": 0.4117, "step": 5587 }, { "epoch": 1.5687815833801235, "grad_norm": 0.6256060004234314, "learning_rate": 5.472151385803108e-06, "loss": 0.3448, "step": 5588 }, { "epoch": 1.5690623245367772, "grad_norm": 0.6225060224533081, "learning_rate": 5.470525305541117e-06, "loss": 0.3771, "step": 5589 }, { "epoch": 1.5693430656934306, "grad_norm": 0.5951137542724609, "learning_rate": 5.46889917506759e-06, "loss": 0.3699, "step": 5590 }, { "epoch": 1.569623806850084, "grad_norm": 0.7257815003395081, "learning_rate": 5.467272994556059e-06, "loss": 0.3987, "step": 5591 }, { "epoch": 1.5699045480067377, "grad_norm": 0.5655238032341003, "learning_rate": 5.465646764180059e-06, "loss": 0.4205, "step": 5592 }, { "epoch": 1.5701852891633914, "grad_norm": 0.7138909697532654, "learning_rate": 5.464020484113134e-06, "loss": 0.4162, "step": 5593 }, { "epoch": 1.5704660303200448, "grad_norm": 0.6702297329902649, "learning_rate": 5.462394154528827e-06, "loss": 0.4072, "step": 5594 }, { "epoch": 1.5707467714766985, "grad_norm": 0.5846837162971497, "learning_rate": 5.460767775600691e-06, "loss": 0.3813, "step": 5595 }, { "epoch": 1.5710275126333522, "grad_norm": 0.6576070189476013, "learning_rate": 5.459141347502284e-06, "loss": 0.3778, "step": 5596 }, { "epoch": 1.5713082537900056, "grad_norm": 0.6405521035194397, "learning_rate": 5.457514870407168e-06, "loss": 0.3421, "step": 5597 }, { "epoch": 1.571588994946659, "grad_norm": 0.6876504421234131, "learning_rate": 5.4558883444889114e-06, "loss": 0.363, "step": 5598 }, { "epoch": 1.5718697361033127, "grad_norm": 0.7009930610656738, "learning_rate": 5.454261769921083e-06, "loss": 0.3508, "step": 5599 }, { "epoch": 1.5721504772599664, "grad_norm": 0.7065655589103699, "learning_rate": 5.452635146877264e-06, "loss": 0.4081, "step": 5600 }, { "epoch": 1.5724312184166198, "grad_norm": 0.6671035289764404, "learning_rate": 5.4510084755310375e-06, "loss": 0.4366, "step": 5601 }, { "epoch": 1.5727119595732735, "grad_norm": 0.6496288180351257, "learning_rate": 5.44938175605599e-06, "loss": 0.4247, "step": 5602 }, { "epoch": 1.5729927007299271, "grad_norm": 0.8295820951461792, "learning_rate": 5.447754988625717e-06, "loss": 0.4129, "step": 5603 }, { "epoch": 1.5732734418865806, "grad_norm": 0.6814701557159424, "learning_rate": 5.446128173413817e-06, "loss": 0.4212, "step": 5604 }, { "epoch": 1.573554183043234, "grad_norm": 0.6117609739303589, "learning_rate": 5.44450131059389e-06, "loss": 0.3632, "step": 5605 }, { "epoch": 1.5738349241998877, "grad_norm": 0.7425245046615601, "learning_rate": 5.4428744003395496e-06, "loss": 0.4001, "step": 5606 }, { "epoch": 1.5741156653565413, "grad_norm": 0.6963576078414917, "learning_rate": 5.441247442824407e-06, "loss": 0.3764, "step": 5607 }, { "epoch": 1.5743964065131948, "grad_norm": 0.7188160419464111, "learning_rate": 5.4396204382220795e-06, "loss": 0.3745, "step": 5608 }, { "epoch": 1.5746771476698485, "grad_norm": 0.6298017501831055, "learning_rate": 5.437993386706195e-06, "loss": 0.3872, "step": 5609 }, { "epoch": 1.5749578888265021, "grad_norm": 0.660544216632843, "learning_rate": 5.436366288450379e-06, "loss": 0.3676, "step": 5610 }, { "epoch": 1.5752386299831556, "grad_norm": 0.7274445295333862, "learning_rate": 5.4347391436282656e-06, "loss": 0.4052, "step": 5611 }, { "epoch": 1.575519371139809, "grad_norm": 0.6884285807609558, "learning_rate": 5.433111952413496e-06, "loss": 0.3688, "step": 5612 }, { "epoch": 1.5758001122964627, "grad_norm": 0.6660681366920471, "learning_rate": 5.43148471497971e-06, "loss": 0.4059, "step": 5613 }, { "epoch": 1.5760808534531163, "grad_norm": 0.6194701790809631, "learning_rate": 5.429857431500559e-06, "loss": 0.3832, "step": 5614 }, { "epoch": 1.5763615946097698, "grad_norm": 0.6025277972221375, "learning_rate": 5.428230102149697e-06, "loss": 0.3782, "step": 5615 }, { "epoch": 1.5766423357664232, "grad_norm": 0.5469680428504944, "learning_rate": 5.426602727100782e-06, "loss": 0.3582, "step": 5616 }, { "epoch": 1.5769230769230769, "grad_norm": 0.6515589952468872, "learning_rate": 5.424975306527474e-06, "loss": 0.4364, "step": 5617 }, { "epoch": 1.5772038180797305, "grad_norm": 0.6261181831359863, "learning_rate": 5.423347840603446e-06, "loss": 0.393, "step": 5618 }, { "epoch": 1.577484559236384, "grad_norm": 0.6409482359886169, "learning_rate": 5.421720329502369e-06, "loss": 0.3687, "step": 5619 }, { "epoch": 1.5777653003930376, "grad_norm": 0.6708099246025085, "learning_rate": 5.420092773397922e-06, "loss": 0.4339, "step": 5620 }, { "epoch": 1.5780460415496913, "grad_norm": 0.6219494938850403, "learning_rate": 5.418465172463785e-06, "loss": 0.3659, "step": 5621 }, { "epoch": 1.5783267827063447, "grad_norm": 0.7946568131446838, "learning_rate": 5.416837526873647e-06, "loss": 0.4062, "step": 5622 }, { "epoch": 1.5786075238629982, "grad_norm": 0.6432189345359802, "learning_rate": 5.415209836801201e-06, "loss": 0.3883, "step": 5623 }, { "epoch": 1.5788882650196518, "grad_norm": 0.6144047975540161, "learning_rate": 5.4135821024201425e-06, "loss": 0.3949, "step": 5624 }, { "epoch": 1.5791690061763055, "grad_norm": 0.6671247482299805, "learning_rate": 5.411954323904175e-06, "loss": 0.4313, "step": 5625 }, { "epoch": 1.579449747332959, "grad_norm": 0.5982580780982971, "learning_rate": 5.410326501427004e-06, "loss": 0.3392, "step": 5626 }, { "epoch": 1.5797304884896126, "grad_norm": 0.6136658787727356, "learning_rate": 5.40869863516234e-06, "loss": 0.3647, "step": 5627 }, { "epoch": 1.5800112296462663, "grad_norm": 0.6476240158081055, "learning_rate": 5.407070725283898e-06, "loss": 0.3885, "step": 5628 }, { "epoch": 1.5802919708029197, "grad_norm": 0.7178700566291809, "learning_rate": 5.4054427719654e-06, "loss": 0.3636, "step": 5629 }, { "epoch": 1.5805727119595732, "grad_norm": 0.6783363223075867, "learning_rate": 5.40381477538057e-06, "loss": 0.3823, "step": 5630 }, { "epoch": 1.5808534531162268, "grad_norm": 0.623234212398529, "learning_rate": 5.402186735703141e-06, "loss": 0.3826, "step": 5631 }, { "epoch": 1.5811341942728805, "grad_norm": 0.6377182006835938, "learning_rate": 5.4005586531068425e-06, "loss": 0.4293, "step": 5632 }, { "epoch": 1.581414935429534, "grad_norm": 0.6228262782096863, "learning_rate": 5.398930527765416e-06, "loss": 0.3748, "step": 5633 }, { "epoch": 1.5816956765861874, "grad_norm": 0.6211974024772644, "learning_rate": 5.3973023598526045e-06, "loss": 0.3807, "step": 5634 }, { "epoch": 1.5819764177428413, "grad_norm": 0.6278573274612427, "learning_rate": 5.395674149542155e-06, "loss": 0.3784, "step": 5635 }, { "epoch": 1.5822571588994947, "grad_norm": 0.6505194902420044, "learning_rate": 5.394045897007821e-06, "loss": 0.4335, "step": 5636 }, { "epoch": 1.5825379000561481, "grad_norm": 0.6234167814254761, "learning_rate": 5.392417602423361e-06, "loss": 0.4005, "step": 5637 }, { "epoch": 1.5828186412128018, "grad_norm": 0.6796559691429138, "learning_rate": 5.390789265962534e-06, "loss": 0.4065, "step": 5638 }, { "epoch": 1.5830993823694555, "grad_norm": 0.7159935235977173, "learning_rate": 5.389160887799105e-06, "loss": 0.3921, "step": 5639 }, { "epoch": 1.583380123526109, "grad_norm": 0.6569457054138184, "learning_rate": 5.387532468106848e-06, "loss": 0.3437, "step": 5640 }, { "epoch": 1.5836608646827623, "grad_norm": 0.6812229156494141, "learning_rate": 5.385904007059535e-06, "loss": 0.3895, "step": 5641 }, { "epoch": 1.583941605839416, "grad_norm": 0.571449875831604, "learning_rate": 5.384275504830946e-06, "loss": 0.3738, "step": 5642 }, { "epoch": 1.5842223469960697, "grad_norm": 0.5636094212532043, "learning_rate": 5.382646961594865e-06, "loss": 0.4131, "step": 5643 }, { "epoch": 1.5845030881527231, "grad_norm": 0.6847487092018127, "learning_rate": 5.38101837752508e-06, "loss": 0.3666, "step": 5644 }, { "epoch": 1.5847838293093768, "grad_norm": 0.6250169277191162, "learning_rate": 5.379389752795383e-06, "loss": 0.4011, "step": 5645 }, { "epoch": 1.5850645704660304, "grad_norm": 0.6602939367294312, "learning_rate": 5.377761087579571e-06, "loss": 0.3728, "step": 5646 }, { "epoch": 1.5853453116226839, "grad_norm": 0.6117948293685913, "learning_rate": 5.376132382051445e-06, "loss": 0.367, "step": 5647 }, { "epoch": 1.5856260527793373, "grad_norm": 0.5795977711677551, "learning_rate": 5.3745036363848105e-06, "loss": 0.3457, "step": 5648 }, { "epoch": 1.585906793935991, "grad_norm": 0.6143127679824829, "learning_rate": 5.3728748507534755e-06, "loss": 0.3461, "step": 5649 }, { "epoch": 1.5861875350926447, "grad_norm": 0.6950699090957642, "learning_rate": 5.371246025331256e-06, "loss": 0.3638, "step": 5650 }, { "epoch": 1.586468276249298, "grad_norm": 0.6955627202987671, "learning_rate": 5.36961716029197e-06, "loss": 0.4029, "step": 5651 }, { "epoch": 1.5867490174059518, "grad_norm": 0.6370987296104431, "learning_rate": 5.367988255809438e-06, "loss": 0.3897, "step": 5652 }, { "epoch": 1.5870297585626054, "grad_norm": 0.6839419007301331, "learning_rate": 5.366359312057489e-06, "loss": 0.4192, "step": 5653 }, { "epoch": 1.5873104997192589, "grad_norm": 0.5921178460121155, "learning_rate": 5.364730329209951e-06, "loss": 0.3852, "step": 5654 }, { "epoch": 1.5875912408759123, "grad_norm": 0.6688896417617798, "learning_rate": 5.3631013074406606e-06, "loss": 0.3945, "step": 5655 }, { "epoch": 1.587871982032566, "grad_norm": 0.637823224067688, "learning_rate": 5.361472246923457e-06, "loss": 0.3999, "step": 5656 }, { "epoch": 1.5881527231892196, "grad_norm": 0.5959929823875427, "learning_rate": 5.359843147832183e-06, "loss": 0.3443, "step": 5657 }, { "epoch": 1.588433464345873, "grad_norm": 0.6044209003448486, "learning_rate": 5.358214010340686e-06, "loss": 0.4039, "step": 5658 }, { "epoch": 1.5887142055025265, "grad_norm": 0.6545689105987549, "learning_rate": 5.356584834622818e-06, "loss": 0.4111, "step": 5659 }, { "epoch": 1.5889949466591804, "grad_norm": 0.612241804599762, "learning_rate": 5.3549556208524336e-06, "loss": 0.4077, "step": 5660 }, { "epoch": 1.5892756878158338, "grad_norm": 0.5983982086181641, "learning_rate": 5.353326369203392e-06, "loss": 0.4107, "step": 5661 }, { "epoch": 1.5895564289724873, "grad_norm": 0.6796982884407043, "learning_rate": 5.351697079849557e-06, "loss": 0.3761, "step": 5662 }, { "epoch": 1.589837170129141, "grad_norm": 0.61302250623703, "learning_rate": 5.350067752964798e-06, "loss": 0.3949, "step": 5663 }, { "epoch": 1.5901179112857946, "grad_norm": 0.6317418813705444, "learning_rate": 5.348438388722986e-06, "loss": 0.3863, "step": 5664 }, { "epoch": 1.590398652442448, "grad_norm": 0.5928323864936829, "learning_rate": 5.3468089872979945e-06, "loss": 0.36, "step": 5665 }, { "epoch": 1.5906793935991015, "grad_norm": 0.6907023191452026, "learning_rate": 5.345179548863705e-06, "loss": 0.3854, "step": 5666 }, { "epoch": 1.5909601347557552, "grad_norm": 0.6863617300987244, "learning_rate": 5.343550073594e-06, "loss": 0.3868, "step": 5667 }, { "epoch": 1.5912408759124088, "grad_norm": 0.6140517592430115, "learning_rate": 5.341920561662767e-06, "loss": 0.3997, "step": 5668 }, { "epoch": 1.5915216170690623, "grad_norm": 0.6900655031204224, "learning_rate": 5.3402910132439004e-06, "loss": 0.4194, "step": 5669 }, { "epoch": 1.591802358225716, "grad_norm": 0.6943616271018982, "learning_rate": 5.338661428511292e-06, "loss": 0.4013, "step": 5670 }, { "epoch": 1.5920830993823696, "grad_norm": 0.585360050201416, "learning_rate": 5.3370318076388405e-06, "loss": 0.3818, "step": 5671 }, { "epoch": 1.592363840539023, "grad_norm": 0.6311351656913757, "learning_rate": 5.335402150800451e-06, "loss": 0.3663, "step": 5672 }, { "epoch": 1.5926445816956765, "grad_norm": 0.6898561120033264, "learning_rate": 5.33377245817003e-06, "loss": 0.3546, "step": 5673 }, { "epoch": 1.5929253228523301, "grad_norm": 0.661670982837677, "learning_rate": 5.332142729921488e-06, "loss": 0.3921, "step": 5674 }, { "epoch": 1.5932060640089838, "grad_norm": 0.6570422053337097, "learning_rate": 5.33051296622874e-06, "loss": 0.4157, "step": 5675 }, { "epoch": 1.5934868051656372, "grad_norm": 0.5399648547172546, "learning_rate": 5.328883167265703e-06, "loss": 0.375, "step": 5676 }, { "epoch": 1.593767546322291, "grad_norm": 0.7241529822349548, "learning_rate": 5.327253333206299e-06, "loss": 0.3709, "step": 5677 }, { "epoch": 1.5940482874789446, "grad_norm": 0.6967384815216064, "learning_rate": 5.325623464224454e-06, "loss": 0.3932, "step": 5678 }, { "epoch": 1.594329028635598, "grad_norm": 0.7840999364852905, "learning_rate": 5.323993560494099e-06, "loss": 0.4236, "step": 5679 }, { "epoch": 1.5946097697922514, "grad_norm": 0.6832603812217712, "learning_rate": 5.322363622189165e-06, "loss": 0.3965, "step": 5680 }, { "epoch": 1.594890510948905, "grad_norm": 0.6805334687232971, "learning_rate": 5.320733649483591e-06, "loss": 0.3309, "step": 5681 }, { "epoch": 1.5951712521055588, "grad_norm": 0.6894299983978271, "learning_rate": 5.319103642551315e-06, "loss": 0.3613, "step": 5682 }, { "epoch": 1.5954519932622122, "grad_norm": 0.7123468518257141, "learning_rate": 5.3174736015662845e-06, "loss": 0.3768, "step": 5683 }, { "epoch": 1.5957327344188657, "grad_norm": 0.7026888728141785, "learning_rate": 5.315843526702443e-06, "loss": 0.3622, "step": 5684 }, { "epoch": 1.5960134755755195, "grad_norm": 0.6979182958602905, "learning_rate": 5.3142134181337466e-06, "loss": 0.4203, "step": 5685 }, { "epoch": 1.596294216732173, "grad_norm": 0.6742028594017029, "learning_rate": 5.312583276034148e-06, "loss": 0.3579, "step": 5686 }, { "epoch": 1.5965749578888264, "grad_norm": 0.7389938831329346, "learning_rate": 5.310953100577606e-06, "loss": 0.3686, "step": 5687 }, { "epoch": 1.59685569904548, "grad_norm": 0.7732906937599182, "learning_rate": 5.309322891938082e-06, "loss": 0.4171, "step": 5688 }, { "epoch": 1.5971364402021337, "grad_norm": 0.6303626894950867, "learning_rate": 5.307692650289542e-06, "loss": 0.3753, "step": 5689 }, { "epoch": 1.5974171813587872, "grad_norm": 0.7214590907096863, "learning_rate": 5.306062375805957e-06, "loss": 0.3934, "step": 5690 }, { "epoch": 1.5976979225154406, "grad_norm": 0.6761311888694763, "learning_rate": 5.304432068661298e-06, "loss": 0.3229, "step": 5691 }, { "epoch": 1.5979786636720943, "grad_norm": 0.736258864402771, "learning_rate": 5.302801729029543e-06, "loss": 0.3751, "step": 5692 }, { "epoch": 1.598259404828748, "grad_norm": 0.7438814043998718, "learning_rate": 5.301171357084669e-06, "loss": 0.3805, "step": 5693 }, { "epoch": 1.5985401459854014, "grad_norm": 0.695715606212616, "learning_rate": 5.29954095300066e-06, "loss": 0.3869, "step": 5694 }, { "epoch": 1.598820887142055, "grad_norm": 0.6188850998878479, "learning_rate": 5.2979105169515045e-06, "loss": 0.4087, "step": 5695 }, { "epoch": 1.5991016282987087, "grad_norm": 0.6104736924171448, "learning_rate": 5.2962800491111895e-06, "loss": 0.3956, "step": 5696 }, { "epoch": 1.5993823694553622, "grad_norm": 0.665642261505127, "learning_rate": 5.294649549653713e-06, "loss": 0.3867, "step": 5697 }, { "epoch": 1.5996631106120156, "grad_norm": 0.593701183795929, "learning_rate": 5.2930190187530675e-06, "loss": 0.3557, "step": 5698 }, { "epoch": 1.5999438517686693, "grad_norm": 0.6774048209190369, "learning_rate": 5.291388456583254e-06, "loss": 0.3939, "step": 5699 }, { "epoch": 1.600224592925323, "grad_norm": 0.5818973779678345, "learning_rate": 5.289757863318277e-06, "loss": 0.3877, "step": 5700 }, { "epoch": 1.6005053340819764, "grad_norm": 0.6529117822647095, "learning_rate": 5.288127239132143e-06, "loss": 0.3597, "step": 5701 }, { "epoch": 1.60078607523863, "grad_norm": 0.652026891708374, "learning_rate": 5.28649658419886e-06, "loss": 0.3814, "step": 5702 }, { "epoch": 1.6010668163952837, "grad_norm": 0.5884618163108826, "learning_rate": 5.284865898692446e-06, "loss": 0.3917, "step": 5703 }, { "epoch": 1.6013475575519371, "grad_norm": 0.6404399871826172, "learning_rate": 5.2832351827869135e-06, "loss": 0.3547, "step": 5704 }, { "epoch": 1.6016282987085906, "grad_norm": 0.7353585958480835, "learning_rate": 5.281604436656283e-06, "loss": 0.3876, "step": 5705 }, { "epoch": 1.6019090398652442, "grad_norm": 0.59206223487854, "learning_rate": 5.2799736604745765e-06, "loss": 0.3729, "step": 5706 }, { "epoch": 1.602189781021898, "grad_norm": 0.6682827472686768, "learning_rate": 5.278342854415825e-06, "loss": 0.394, "step": 5707 }, { "epoch": 1.6024705221785513, "grad_norm": 0.6857581734657288, "learning_rate": 5.276712018654054e-06, "loss": 0.3888, "step": 5708 }, { "epoch": 1.6027512633352048, "grad_norm": 0.6567980051040649, "learning_rate": 5.275081153363297e-06, "loss": 0.3857, "step": 5709 }, { "epoch": 1.6030320044918585, "grad_norm": 0.7068313360214233, "learning_rate": 5.27345025871759e-06, "loss": 0.4007, "step": 5710 }, { "epoch": 1.6033127456485121, "grad_norm": 0.6105488538742065, "learning_rate": 5.271819334890972e-06, "loss": 0.3754, "step": 5711 }, { "epoch": 1.6035934868051656, "grad_norm": 0.5581768155097961, "learning_rate": 5.270188382057485e-06, "loss": 0.3347, "step": 5712 }, { "epoch": 1.6038742279618192, "grad_norm": 0.6744940876960754, "learning_rate": 5.2685574003911745e-06, "loss": 0.4348, "step": 5713 }, { "epoch": 1.6041549691184729, "grad_norm": 0.7591729164123535, "learning_rate": 5.266926390066089e-06, "loss": 0.3978, "step": 5714 }, { "epoch": 1.6044357102751263, "grad_norm": 0.7207170724868774, "learning_rate": 5.265295351256277e-06, "loss": 0.3934, "step": 5715 }, { "epoch": 1.6047164514317798, "grad_norm": 0.6559902429580688, "learning_rate": 5.263664284135795e-06, "loss": 0.4285, "step": 5716 }, { "epoch": 1.6049971925884334, "grad_norm": 0.6645886301994324, "learning_rate": 5.2620331888787e-06, "loss": 0.4305, "step": 5717 }, { "epoch": 1.605277933745087, "grad_norm": 0.6138094663619995, "learning_rate": 5.260402065659054e-06, "loss": 0.3626, "step": 5718 }, { "epoch": 1.6055586749017405, "grad_norm": 0.604344367980957, "learning_rate": 5.258770914650918e-06, "loss": 0.3848, "step": 5719 }, { "epoch": 1.6058394160583942, "grad_norm": 0.5603964924812317, "learning_rate": 5.25713973602836e-06, "loss": 0.3518, "step": 5720 }, { "epoch": 1.6061201572150479, "grad_norm": 0.6279407143592834, "learning_rate": 5.255508529965447e-06, "loss": 0.3836, "step": 5721 }, { "epoch": 1.6064008983717013, "grad_norm": 0.627970278263092, "learning_rate": 5.253877296636254e-06, "loss": 0.3928, "step": 5722 }, { "epoch": 1.6066816395283547, "grad_norm": 0.6636152267456055, "learning_rate": 5.252246036214853e-06, "loss": 0.3824, "step": 5723 }, { "epoch": 1.6069623806850084, "grad_norm": 0.6894207000732422, "learning_rate": 5.250614748875327e-06, "loss": 0.3978, "step": 5724 }, { "epoch": 1.607243121841662, "grad_norm": 0.6381357312202454, "learning_rate": 5.24898343479175e-06, "loss": 0.3801, "step": 5725 }, { "epoch": 1.6075238629983155, "grad_norm": 0.6617277264595032, "learning_rate": 5.24735209413821e-06, "loss": 0.4457, "step": 5726 }, { "epoch": 1.607804604154969, "grad_norm": 0.6681424975395203, "learning_rate": 5.2457207270887935e-06, "loss": 0.3761, "step": 5727 }, { "epoch": 1.6080853453116228, "grad_norm": 0.6610637903213501, "learning_rate": 5.244089333817588e-06, "loss": 0.4367, "step": 5728 }, { "epoch": 1.6083660864682763, "grad_norm": 0.7265160083770752, "learning_rate": 5.242457914498688e-06, "loss": 0.4346, "step": 5729 }, { "epoch": 1.6086468276249297, "grad_norm": 0.6373753547668457, "learning_rate": 5.240826469306187e-06, "loss": 0.3769, "step": 5730 }, { "epoch": 1.6089275687815834, "grad_norm": 0.6125161051750183, "learning_rate": 5.239194998414182e-06, "loss": 0.3775, "step": 5731 }, { "epoch": 1.609208309938237, "grad_norm": 0.700334370136261, "learning_rate": 5.237563501996773e-06, "loss": 0.3834, "step": 5732 }, { "epoch": 1.6094890510948905, "grad_norm": 0.6575661301612854, "learning_rate": 5.235931980228066e-06, "loss": 0.3508, "step": 5733 }, { "epoch": 1.609769792251544, "grad_norm": 0.7324906587600708, "learning_rate": 5.234300433282165e-06, "loss": 0.4197, "step": 5734 }, { "epoch": 1.6100505334081976, "grad_norm": 0.7340754270553589, "learning_rate": 5.23266886133318e-06, "loss": 0.4245, "step": 5735 }, { "epoch": 1.6103312745648513, "grad_norm": 0.7993359565734863, "learning_rate": 5.23103726455522e-06, "loss": 0.3587, "step": 5736 }, { "epoch": 1.6106120157215047, "grad_norm": 0.627616286277771, "learning_rate": 5.229405643122399e-06, "loss": 0.3923, "step": 5737 }, { "epoch": 1.6108927568781584, "grad_norm": 0.7044618129730225, "learning_rate": 5.227773997208835e-06, "loss": 0.3955, "step": 5738 }, { "epoch": 1.611173498034812, "grad_norm": 0.7278639674186707, "learning_rate": 5.226142326988646e-06, "loss": 0.3911, "step": 5739 }, { "epoch": 1.6114542391914655, "grad_norm": 0.7551366090774536, "learning_rate": 5.224510632635955e-06, "loss": 0.4101, "step": 5740 }, { "epoch": 1.611734980348119, "grad_norm": 0.7136589884757996, "learning_rate": 5.222878914324886e-06, "loss": 0.389, "step": 5741 }, { "epoch": 1.6120157215047726, "grad_norm": 0.630165159702301, "learning_rate": 5.221247172229564e-06, "loss": 0.3553, "step": 5742 }, { "epoch": 1.6122964626614262, "grad_norm": 0.7382895946502686, "learning_rate": 5.2196154065241204e-06, "loss": 0.3893, "step": 5743 }, { "epoch": 1.6125772038180797, "grad_norm": 0.594893217086792, "learning_rate": 5.217983617382684e-06, "loss": 0.3962, "step": 5744 }, { "epoch": 1.6128579449747333, "grad_norm": 0.7302297949790955, "learning_rate": 5.2163518049793935e-06, "loss": 0.4039, "step": 5745 }, { "epoch": 1.613138686131387, "grad_norm": 0.6644755601882935, "learning_rate": 5.214719969488384e-06, "loss": 0.375, "step": 5746 }, { "epoch": 1.6134194272880404, "grad_norm": 0.6315532326698303, "learning_rate": 5.213088111083795e-06, "loss": 0.3851, "step": 5747 }, { "epoch": 1.6137001684446939, "grad_norm": 0.5566315650939941, "learning_rate": 5.2114562299397665e-06, "loss": 0.3664, "step": 5748 }, { "epoch": 1.6139809096013475, "grad_norm": 0.7148703336715698, "learning_rate": 5.209824326230445e-06, "loss": 0.401, "step": 5749 }, { "epoch": 1.6142616507580012, "grad_norm": 0.6244839429855347, "learning_rate": 5.2081924001299754e-06, "loss": 0.3599, "step": 5750 }, { "epoch": 1.6145423919146547, "grad_norm": 0.6982523798942566, "learning_rate": 5.206560451812508e-06, "loss": 0.3601, "step": 5751 }, { "epoch": 1.614823133071308, "grad_norm": 0.5379160046577454, "learning_rate": 5.204928481452195e-06, "loss": 0.356, "step": 5752 }, { "epoch": 1.615103874227962, "grad_norm": 0.6875414252281189, "learning_rate": 5.203296489223187e-06, "loss": 0.4464, "step": 5753 }, { "epoch": 1.6153846153846154, "grad_norm": 0.6543856263160706, "learning_rate": 5.201664475299643e-06, "loss": 0.3838, "step": 5754 }, { "epoch": 1.6156653565412689, "grad_norm": 0.5894539952278137, "learning_rate": 5.200032439855719e-06, "loss": 0.3794, "step": 5755 }, { "epoch": 1.6159460976979225, "grad_norm": 0.6440432667732239, "learning_rate": 5.198400383065577e-06, "loss": 0.372, "step": 5756 }, { "epoch": 1.6162268388545762, "grad_norm": 0.5637151002883911, "learning_rate": 5.196768305103381e-06, "loss": 0.3473, "step": 5757 }, { "epoch": 1.6165075800112296, "grad_norm": 0.6018733978271484, "learning_rate": 5.195136206143294e-06, "loss": 0.3749, "step": 5758 }, { "epoch": 1.616788321167883, "grad_norm": 0.6205705404281616, "learning_rate": 5.193504086359485e-06, "loss": 0.397, "step": 5759 }, { "epoch": 1.6170690623245367, "grad_norm": 0.6449079513549805, "learning_rate": 5.191871945926123e-06, "loss": 0.3903, "step": 5760 }, { "epoch": 1.6173498034811904, "grad_norm": 0.5725222229957581, "learning_rate": 5.190239785017382e-06, "loss": 0.3929, "step": 5761 }, { "epoch": 1.6176305446378438, "grad_norm": 0.7197320461273193, "learning_rate": 5.188607603807432e-06, "loss": 0.3867, "step": 5762 }, { "epoch": 1.6179112857944975, "grad_norm": 0.5944594144821167, "learning_rate": 5.186975402470453e-06, "loss": 0.378, "step": 5763 }, { "epoch": 1.6181920269511512, "grad_norm": 0.6972327828407288, "learning_rate": 5.185343181180621e-06, "loss": 0.4369, "step": 5764 }, { "epoch": 1.6184727681078046, "grad_norm": 0.6686606407165527, "learning_rate": 5.183710940112117e-06, "loss": 0.3793, "step": 5765 }, { "epoch": 1.618753509264458, "grad_norm": 0.6122421622276306, "learning_rate": 5.182078679439124e-06, "loss": 0.4051, "step": 5766 }, { "epoch": 1.6190342504211117, "grad_norm": 0.5697387456893921, "learning_rate": 5.180446399335826e-06, "loss": 0.3671, "step": 5767 }, { "epoch": 1.6193149915777654, "grad_norm": 0.6529817581176758, "learning_rate": 5.178814099976411e-06, "loss": 0.3993, "step": 5768 }, { "epoch": 1.6195957327344188, "grad_norm": 0.5563371777534485, "learning_rate": 5.177181781535069e-06, "loss": 0.38, "step": 5769 }, { "epoch": 1.6198764738910725, "grad_norm": 0.6792994141578674, "learning_rate": 5.175549444185986e-06, "loss": 0.3902, "step": 5770 }, { "epoch": 1.6201572150477261, "grad_norm": 0.6415961384773254, "learning_rate": 5.173917088103358e-06, "loss": 0.3641, "step": 5771 }, { "epoch": 1.6204379562043796, "grad_norm": 0.6560315489768982, "learning_rate": 5.172284713461382e-06, "loss": 0.3802, "step": 5772 }, { "epoch": 1.620718697361033, "grad_norm": 0.6932475566864014, "learning_rate": 5.170652320434252e-06, "loss": 0.3777, "step": 5773 }, { "epoch": 1.6209994385176867, "grad_norm": 0.6296159625053406, "learning_rate": 5.169019909196168e-06, "loss": 0.3853, "step": 5774 }, { "epoch": 1.6212801796743403, "grad_norm": 0.5964828133583069, "learning_rate": 5.1673874799213295e-06, "loss": 0.3803, "step": 5775 }, { "epoch": 1.6215609208309938, "grad_norm": 0.6716960668563843, "learning_rate": 5.165755032783941e-06, "loss": 0.3629, "step": 5776 }, { "epoch": 1.6218416619876472, "grad_norm": 0.5418384075164795, "learning_rate": 5.164122567958205e-06, "loss": 0.41, "step": 5777 }, { "epoch": 1.6221224031443011, "grad_norm": 0.7283366322517395, "learning_rate": 5.16249008561833e-06, "loss": 0.4203, "step": 5778 }, { "epoch": 1.6224031443009546, "grad_norm": 0.6761435866355896, "learning_rate": 5.160857585938523e-06, "loss": 0.3773, "step": 5779 }, { "epoch": 1.622683885457608, "grad_norm": 0.6270684599876404, "learning_rate": 5.159225069092996e-06, "loss": 0.3775, "step": 5780 }, { "epoch": 1.6229646266142617, "grad_norm": 0.7111120820045471, "learning_rate": 5.157592535255958e-06, "loss": 0.3792, "step": 5781 }, { "epoch": 1.6232453677709153, "grad_norm": 0.6534535884857178, "learning_rate": 5.155959984601626e-06, "loss": 0.3697, "step": 5782 }, { "epoch": 1.6235261089275688, "grad_norm": 0.6421679258346558, "learning_rate": 5.1543274173042125e-06, "loss": 0.369, "step": 5783 }, { "epoch": 1.6238068500842222, "grad_norm": 0.6642189621925354, "learning_rate": 5.152694833537939e-06, "loss": 0.3826, "step": 5784 }, { "epoch": 1.6240875912408759, "grad_norm": 0.7172495126724243, "learning_rate": 5.151062233477021e-06, "loss": 0.3565, "step": 5785 }, { "epoch": 1.6243683323975295, "grad_norm": 0.6423928737640381, "learning_rate": 5.149429617295682e-06, "loss": 0.3785, "step": 5786 }, { "epoch": 1.624649073554183, "grad_norm": 0.6350986361503601, "learning_rate": 5.147796985168142e-06, "loss": 0.3899, "step": 5787 }, { "epoch": 1.6249298147108366, "grad_norm": 0.6232904195785522, "learning_rate": 5.146164337268628e-06, "loss": 0.4013, "step": 5788 }, { "epoch": 1.6252105558674903, "grad_norm": 0.5838711857795715, "learning_rate": 5.144531673771364e-06, "loss": 0.3989, "step": 5789 }, { "epoch": 1.6254912970241437, "grad_norm": 0.625808835029602, "learning_rate": 5.14289899485058e-06, "loss": 0.4052, "step": 5790 }, { "epoch": 1.6257720381807972, "grad_norm": 0.6351568698883057, "learning_rate": 5.141266300680503e-06, "loss": 0.4101, "step": 5791 }, { "epoch": 1.6260527793374508, "grad_norm": 0.5987807512283325, "learning_rate": 5.139633591435364e-06, "loss": 0.3526, "step": 5792 }, { "epoch": 1.6263335204941045, "grad_norm": 0.6660354733467102, "learning_rate": 5.138000867289397e-06, "loss": 0.3572, "step": 5793 }, { "epoch": 1.626614261650758, "grad_norm": 0.7153281569480896, "learning_rate": 5.136368128416835e-06, "loss": 0.3883, "step": 5794 }, { "epoch": 1.6268950028074116, "grad_norm": 0.7444567680358887, "learning_rate": 5.134735374991916e-06, "loss": 0.3765, "step": 5795 }, { "epoch": 1.6271757439640653, "grad_norm": 0.6267321705818176, "learning_rate": 5.133102607188875e-06, "loss": 0.4184, "step": 5796 }, { "epoch": 1.6274564851207187, "grad_norm": 0.6239635944366455, "learning_rate": 5.13146982518195e-06, "loss": 0.442, "step": 5797 }, { "epoch": 1.6277372262773722, "grad_norm": 0.6209805011749268, "learning_rate": 5.129837029145385e-06, "loss": 0.3629, "step": 5798 }, { "epoch": 1.6280179674340258, "grad_norm": 0.6415537595748901, "learning_rate": 5.128204219253418e-06, "loss": 0.4024, "step": 5799 }, { "epoch": 1.6282987085906795, "grad_norm": 0.6636104583740234, "learning_rate": 5.126571395680294e-06, "loss": 0.3867, "step": 5800 }, { "epoch": 1.628579449747333, "grad_norm": 0.7155022621154785, "learning_rate": 5.124938558600259e-06, "loss": 0.4119, "step": 5801 }, { "epoch": 1.6288601909039864, "grad_norm": 0.6171215772628784, "learning_rate": 5.123305708187558e-06, "loss": 0.3423, "step": 5802 }, { "epoch": 1.62914093206064, "grad_norm": 0.6507341265678406, "learning_rate": 5.121672844616439e-06, "loss": 0.3409, "step": 5803 }, { "epoch": 1.6294216732172937, "grad_norm": 0.7989486455917358, "learning_rate": 5.120039968061149e-06, "loss": 0.4406, "step": 5804 }, { "epoch": 1.6297024143739471, "grad_norm": 0.6134814620018005, "learning_rate": 5.1184070786959405e-06, "loss": 0.3551, "step": 5805 }, { "epoch": 1.6299831555306008, "grad_norm": 0.5829088687896729, "learning_rate": 5.116774176695065e-06, "loss": 0.3649, "step": 5806 }, { "epoch": 1.6302638966872545, "grad_norm": 0.6511884927749634, "learning_rate": 5.115141262232777e-06, "loss": 0.3583, "step": 5807 }, { "epoch": 1.630544637843908, "grad_norm": 0.7171990871429443, "learning_rate": 5.113508335483327e-06, "loss": 0.3634, "step": 5808 }, { "epoch": 1.6308253790005613, "grad_norm": 0.5896447896957397, "learning_rate": 5.1118753966209745e-06, "loss": 0.3963, "step": 5809 }, { "epoch": 1.631106120157215, "grad_norm": 0.6296886801719666, "learning_rate": 5.110242445819975e-06, "loss": 0.3508, "step": 5810 }, { "epoch": 1.6313868613138687, "grad_norm": 0.6544142961502075, "learning_rate": 5.108609483254587e-06, "loss": 0.3571, "step": 5811 }, { "epoch": 1.6316676024705221, "grad_norm": 0.6321722269058228, "learning_rate": 5.106976509099072e-06, "loss": 0.3681, "step": 5812 }, { "epoch": 1.6319483436271758, "grad_norm": 0.6789606213569641, "learning_rate": 5.1053435235276885e-06, "loss": 0.3842, "step": 5813 }, { "epoch": 1.6322290847838294, "grad_norm": 0.580205500125885, "learning_rate": 5.103710526714698e-06, "loss": 0.3726, "step": 5814 }, { "epoch": 1.6325098259404829, "grad_norm": 0.6430047750473022, "learning_rate": 5.102077518834366e-06, "loss": 0.3868, "step": 5815 }, { "epoch": 1.6327905670971363, "grad_norm": 0.6262224316596985, "learning_rate": 5.100444500060956e-06, "loss": 0.3895, "step": 5816 }, { "epoch": 1.63307130825379, "grad_norm": 0.6608339548110962, "learning_rate": 5.098811470568733e-06, "loss": 0.4123, "step": 5817 }, { "epoch": 1.6333520494104437, "grad_norm": 0.5748572945594788, "learning_rate": 5.097178430531966e-06, "loss": 0.3776, "step": 5818 }, { "epoch": 1.633632790567097, "grad_norm": 0.6591218709945679, "learning_rate": 5.09554538012492e-06, "loss": 0.3291, "step": 5819 }, { "epoch": 1.6339135317237508, "grad_norm": 0.7473452687263489, "learning_rate": 5.093912319521865e-06, "loss": 0.4303, "step": 5820 }, { "epoch": 1.6341942728804044, "grad_norm": 0.608173668384552, "learning_rate": 5.092279248897071e-06, "loss": 0.3635, "step": 5821 }, { "epoch": 1.6344750140370579, "grad_norm": 0.6118491888046265, "learning_rate": 5.090646168424809e-06, "loss": 0.4119, "step": 5822 }, { "epoch": 1.6347557551937113, "grad_norm": 0.7723951935768127, "learning_rate": 5.089013078279353e-06, "loss": 0.3836, "step": 5823 }, { "epoch": 1.635036496350365, "grad_norm": 0.7044379115104675, "learning_rate": 5.0873799786349755e-06, "loss": 0.3862, "step": 5824 }, { "epoch": 1.6353172375070186, "grad_norm": 0.6145278811454773, "learning_rate": 5.085746869665948e-06, "loss": 0.3978, "step": 5825 }, { "epoch": 1.635597978663672, "grad_norm": 0.7669884562492371, "learning_rate": 5.084113751546549e-06, "loss": 0.3903, "step": 5826 }, { "epoch": 1.6358787198203255, "grad_norm": 0.6864380240440369, "learning_rate": 5.082480624451053e-06, "loss": 0.4236, "step": 5827 }, { "epoch": 1.6361594609769792, "grad_norm": 0.6729112863540649, "learning_rate": 5.080847488553738e-06, "loss": 0.3793, "step": 5828 }, { "epoch": 1.6364402021336328, "grad_norm": 0.6278758645057678, "learning_rate": 5.079214344028882e-06, "loss": 0.3701, "step": 5829 }, { "epoch": 1.6367209432902863, "grad_norm": 0.7034110426902771, "learning_rate": 5.077581191050763e-06, "loss": 0.3567, "step": 5830 }, { "epoch": 1.63700168444694, "grad_norm": 0.7015382647514343, "learning_rate": 5.075948029793663e-06, "loss": 0.372, "step": 5831 }, { "epoch": 1.6372824256035936, "grad_norm": 0.7056487798690796, "learning_rate": 5.074314860431859e-06, "loss": 0.3678, "step": 5832 }, { "epoch": 1.637563166760247, "grad_norm": 0.6272594332695007, "learning_rate": 5.072681683139636e-06, "loss": 0.3777, "step": 5833 }, { "epoch": 1.6378439079169005, "grad_norm": 0.6215318441390991, "learning_rate": 5.071048498091278e-06, "loss": 0.3847, "step": 5834 }, { "epoch": 1.6381246490735542, "grad_norm": 0.6224110126495361, "learning_rate": 5.0694153054610655e-06, "loss": 0.4487, "step": 5835 }, { "epoch": 1.6384053902302078, "grad_norm": 0.5982664823532104, "learning_rate": 5.067782105423281e-06, "loss": 0.3638, "step": 5836 }, { "epoch": 1.6386861313868613, "grad_norm": 0.6761977672576904, "learning_rate": 5.066148898152213e-06, "loss": 0.357, "step": 5837 }, { "epoch": 1.638966872543515, "grad_norm": 0.6112979650497437, "learning_rate": 5.064515683822147e-06, "loss": 0.4099, "step": 5838 }, { "epoch": 1.6392476137001686, "grad_norm": 0.6694360375404358, "learning_rate": 5.062882462607367e-06, "loss": 0.3891, "step": 5839 }, { "epoch": 1.639528354856822, "grad_norm": 0.6972580552101135, "learning_rate": 5.061249234682164e-06, "loss": 0.3683, "step": 5840 }, { "epoch": 1.6398090960134755, "grad_norm": 0.6658865213394165, "learning_rate": 5.059616000220822e-06, "loss": 0.3701, "step": 5841 }, { "epoch": 1.6400898371701291, "grad_norm": 0.645904541015625, "learning_rate": 5.057982759397631e-06, "loss": 0.3815, "step": 5842 }, { "epoch": 1.6403705783267828, "grad_norm": 0.665382981300354, "learning_rate": 5.056349512386879e-06, "loss": 0.3217, "step": 5843 }, { "epoch": 1.6406513194834362, "grad_norm": 0.5599098205566406, "learning_rate": 5.0547162593628595e-06, "loss": 0.3558, "step": 5844 }, { "epoch": 1.6409320606400897, "grad_norm": 0.6961774230003357, "learning_rate": 5.05308300049986e-06, "loss": 0.4203, "step": 5845 }, { "epoch": 1.6412128017967436, "grad_norm": 0.6967402696609497, "learning_rate": 5.051449735972174e-06, "loss": 0.3944, "step": 5846 }, { "epoch": 1.641493542953397, "grad_norm": 0.6849242448806763, "learning_rate": 5.0498164659540905e-06, "loss": 0.3852, "step": 5847 }, { "epoch": 1.6417742841100504, "grad_norm": 0.6335440874099731, "learning_rate": 5.048183190619904e-06, "loss": 0.3931, "step": 5848 }, { "epoch": 1.642055025266704, "grad_norm": 0.6914854645729065, "learning_rate": 5.046549910143907e-06, "loss": 0.3897, "step": 5849 }, { "epoch": 1.6423357664233578, "grad_norm": 0.62864750623703, "learning_rate": 5.044916624700395e-06, "loss": 0.4085, "step": 5850 }, { "epoch": 1.6426165075800112, "grad_norm": 0.6330033540725708, "learning_rate": 5.043283334463659e-06, "loss": 0.3884, "step": 5851 }, { "epoch": 1.6428972487366647, "grad_norm": 0.5764800906181335, "learning_rate": 5.0416500396079936e-06, "loss": 0.3579, "step": 5852 }, { "epoch": 1.6431779898933183, "grad_norm": 0.8093725442886353, "learning_rate": 5.040016740307696e-06, "loss": 0.4166, "step": 5853 }, { "epoch": 1.643458731049972, "grad_norm": 0.6535652279853821, "learning_rate": 5.03838343673706e-06, "loss": 0.3765, "step": 5854 }, { "epoch": 1.6437394722066254, "grad_norm": 0.6010372042655945, "learning_rate": 5.036750129070384e-06, "loss": 0.3969, "step": 5855 }, { "epoch": 1.644020213363279, "grad_norm": 0.6564108729362488, "learning_rate": 5.035116817481962e-06, "loss": 0.3771, "step": 5856 }, { "epoch": 1.6443009545199327, "grad_norm": 0.6179079413414001, "learning_rate": 5.03348350214609e-06, "loss": 0.3837, "step": 5857 }, { "epoch": 1.6445816956765862, "grad_norm": 0.6608940362930298, "learning_rate": 5.031850183237068e-06, "loss": 0.3372, "step": 5858 }, { "epoch": 1.6448624368332396, "grad_norm": 0.6407791376113892, "learning_rate": 5.030216860929192e-06, "loss": 0.3997, "step": 5859 }, { "epoch": 1.6451431779898933, "grad_norm": 0.7145275473594666, "learning_rate": 5.02858353539676e-06, "loss": 0.3847, "step": 5860 }, { "epoch": 1.645423919146547, "grad_norm": 0.6945395469665527, "learning_rate": 5.026950206814074e-06, "loss": 0.3604, "step": 5861 }, { "epoch": 1.6457046603032004, "grad_norm": 0.5824368596076965, "learning_rate": 5.025316875355427e-06, "loss": 0.3551, "step": 5862 }, { "epoch": 1.645985401459854, "grad_norm": 0.6859902739524841, "learning_rate": 5.023683541195121e-06, "loss": 0.3463, "step": 5863 }, { "epoch": 1.6462661426165077, "grad_norm": 0.6709204316139221, "learning_rate": 5.022050204507455e-06, "loss": 0.3613, "step": 5864 }, { "epoch": 1.6465468837731612, "grad_norm": 0.6162554621696472, "learning_rate": 5.020416865466728e-06, "loss": 0.3416, "step": 5865 }, { "epoch": 1.6468276249298146, "grad_norm": 0.6482139229774475, "learning_rate": 5.01878352424724e-06, "loss": 0.3716, "step": 5866 }, { "epoch": 1.6471083660864683, "grad_norm": 0.6603090763092041, "learning_rate": 5.017150181023291e-06, "loss": 0.3813, "step": 5867 }, { "epoch": 1.647389107243122, "grad_norm": 0.5874245166778564, "learning_rate": 5.015516835969182e-06, "loss": 0.412, "step": 5868 }, { "epoch": 1.6476698483997754, "grad_norm": 0.699480414390564, "learning_rate": 5.013883489259212e-06, "loss": 0.3763, "step": 5869 }, { "epoch": 1.6479505895564288, "grad_norm": 0.7026716470718384, "learning_rate": 5.012250141067683e-06, "loss": 0.3454, "step": 5870 }, { "epoch": 1.6482313307130827, "grad_norm": 0.8506357669830322, "learning_rate": 5.010616791568894e-06, "loss": 0.4086, "step": 5871 }, { "epoch": 1.6485120718697361, "grad_norm": 0.6266496777534485, "learning_rate": 5.008983440937147e-06, "loss": 0.3934, "step": 5872 }, { "epoch": 1.6487928130263896, "grad_norm": 0.6439083218574524, "learning_rate": 5.007350089346744e-06, "loss": 0.3812, "step": 5873 }, { "epoch": 1.6490735541830432, "grad_norm": 0.6669334173202515, "learning_rate": 5.005716736971985e-06, "loss": 0.3783, "step": 5874 }, { "epoch": 1.649354295339697, "grad_norm": 0.7153447866439819, "learning_rate": 5.004083383987172e-06, "loss": 0.4105, "step": 5875 }, { "epoch": 1.6496350364963503, "grad_norm": 0.6811407804489136, "learning_rate": 5.002450030566604e-06, "loss": 0.4035, "step": 5876 }, { "epoch": 1.6499157776530038, "grad_norm": 0.6322611570358276, "learning_rate": 5.000816676884586e-06, "loss": 0.4038, "step": 5877 }, { "epoch": 1.6501965188096575, "grad_norm": 0.7674782872200012, "learning_rate": 4.999183323115416e-06, "loss": 0.4112, "step": 5878 }, { "epoch": 1.6504772599663111, "grad_norm": 0.6602808833122253, "learning_rate": 4.9975499694333974e-06, "loss": 0.4213, "step": 5879 }, { "epoch": 1.6507580011229646, "grad_norm": 0.6431399583816528, "learning_rate": 4.99591661601283e-06, "loss": 0.3523, "step": 5880 }, { "epoch": 1.6510387422796182, "grad_norm": 0.7292598485946655, "learning_rate": 4.994283263028016e-06, "loss": 0.3859, "step": 5881 }, { "epoch": 1.6513194834362719, "grad_norm": 0.6640633940696716, "learning_rate": 4.9926499106532575e-06, "loss": 0.3742, "step": 5882 }, { "epoch": 1.6516002245929253, "grad_norm": 0.583101749420166, "learning_rate": 4.991016559062854e-06, "loss": 0.3665, "step": 5883 }, { "epoch": 1.6518809657495788, "grad_norm": 0.6108763217926025, "learning_rate": 4.9893832084311085e-06, "loss": 0.3857, "step": 5884 }, { "epoch": 1.6521617069062324, "grad_norm": 0.6275140047073364, "learning_rate": 4.987749858932321e-06, "loss": 0.3725, "step": 5885 }, { "epoch": 1.652442448062886, "grad_norm": 0.8515222668647766, "learning_rate": 4.986116510740791e-06, "loss": 0.4015, "step": 5886 }, { "epoch": 1.6527231892195395, "grad_norm": 0.6848206520080566, "learning_rate": 4.984483164030821e-06, "loss": 0.4262, "step": 5887 }, { "epoch": 1.6530039303761932, "grad_norm": 0.635560929775238, "learning_rate": 4.982849818976711e-06, "loss": 0.3727, "step": 5888 }, { "epoch": 1.6532846715328469, "grad_norm": 0.671344518661499, "learning_rate": 4.981216475752763e-06, "loss": 0.3595, "step": 5889 }, { "epoch": 1.6535654126895003, "grad_norm": 0.6194545030593872, "learning_rate": 4.979583134533275e-06, "loss": 0.334, "step": 5890 }, { "epoch": 1.6538461538461537, "grad_norm": 0.6954805254936218, "learning_rate": 4.977949795492546e-06, "loss": 0.3659, "step": 5891 }, { "epoch": 1.6541268950028074, "grad_norm": 0.6220340132713318, "learning_rate": 4.97631645880488e-06, "loss": 0.3656, "step": 5892 }, { "epoch": 1.654407636159461, "grad_norm": 0.6406189799308777, "learning_rate": 4.974683124644573e-06, "loss": 0.3449, "step": 5893 }, { "epoch": 1.6546883773161145, "grad_norm": 0.7023875713348389, "learning_rate": 4.973049793185928e-06, "loss": 0.3711, "step": 5894 }, { "epoch": 1.654969118472768, "grad_norm": 0.730359673500061, "learning_rate": 4.97141646460324e-06, "loss": 0.4041, "step": 5895 }, { "epoch": 1.6552498596294218, "grad_norm": 0.6749394536018372, "learning_rate": 4.969783139070809e-06, "loss": 0.4101, "step": 5896 }, { "epoch": 1.6555306007860753, "grad_norm": 0.5995941758155823, "learning_rate": 4.968149816762933e-06, "loss": 0.3984, "step": 5897 }, { "epoch": 1.6558113419427287, "grad_norm": 0.6909562945365906, "learning_rate": 4.966516497853911e-06, "loss": 0.3919, "step": 5898 }, { "epoch": 1.6560920830993824, "grad_norm": 0.7594336271286011, "learning_rate": 4.96488318251804e-06, "loss": 0.3773, "step": 5899 }, { "epoch": 1.656372824256036, "grad_norm": 0.7784666419029236, "learning_rate": 4.963249870929619e-06, "loss": 0.4054, "step": 5900 }, { "epoch": 1.6566535654126895, "grad_norm": 0.5823115110397339, "learning_rate": 4.961616563262941e-06, "loss": 0.3981, "step": 5901 }, { "epoch": 1.656934306569343, "grad_norm": 0.7006237506866455, "learning_rate": 4.959983259692305e-06, "loss": 0.3813, "step": 5902 }, { "epoch": 1.6572150477259966, "grad_norm": 0.6254827380180359, "learning_rate": 4.958349960392007e-06, "loss": 0.3993, "step": 5903 }, { "epoch": 1.6574957888826503, "grad_norm": 0.6537759900093079, "learning_rate": 4.956716665536342e-06, "loss": 0.3271, "step": 5904 }, { "epoch": 1.6577765300393037, "grad_norm": 0.6395942568778992, "learning_rate": 4.955083375299606e-06, "loss": 0.4178, "step": 5905 }, { "epoch": 1.6580572711959574, "grad_norm": 0.6403214931488037, "learning_rate": 4.953450089856094e-06, "loss": 0.4017, "step": 5906 }, { "epoch": 1.658338012352611, "grad_norm": 0.6025586724281311, "learning_rate": 4.951816809380098e-06, "loss": 0.3854, "step": 5907 }, { "epoch": 1.6586187535092645, "grad_norm": 0.6520612835884094, "learning_rate": 4.950183534045911e-06, "loss": 0.3765, "step": 5908 }, { "epoch": 1.658899494665918, "grad_norm": 0.6875594258308411, "learning_rate": 4.948550264027828e-06, "loss": 0.3818, "step": 5909 }, { "epoch": 1.6591802358225716, "grad_norm": 0.5716404914855957, "learning_rate": 4.946916999500141e-06, "loss": 0.4154, "step": 5910 }, { "epoch": 1.6594609769792252, "grad_norm": 0.6810140013694763, "learning_rate": 4.945283740637142e-06, "loss": 0.3967, "step": 5911 }, { "epoch": 1.6597417181358787, "grad_norm": 0.7087152600288391, "learning_rate": 4.943650487613123e-06, "loss": 0.3401, "step": 5912 }, { "epoch": 1.6600224592925323, "grad_norm": 0.5920955538749695, "learning_rate": 4.942017240602373e-06, "loss": 0.3554, "step": 5913 }, { "epoch": 1.660303200449186, "grad_norm": 0.7348477244377136, "learning_rate": 4.940383999779182e-06, "loss": 0.3658, "step": 5914 }, { "epoch": 1.6605839416058394, "grad_norm": 0.6357693076133728, "learning_rate": 4.938750765317839e-06, "loss": 0.3862, "step": 5915 }, { "epoch": 1.6608646827624929, "grad_norm": 0.6174179911613464, "learning_rate": 4.937117537392633e-06, "loss": 0.3638, "step": 5916 }, { "epoch": 1.6611454239191465, "grad_norm": 0.6077427268028259, "learning_rate": 4.935484316177854e-06, "loss": 0.3908, "step": 5917 }, { "epoch": 1.6614261650758002, "grad_norm": 0.7190478444099426, "learning_rate": 4.933851101847787e-06, "loss": 0.3929, "step": 5918 }, { "epoch": 1.6617069062324537, "grad_norm": 0.6962661743164062, "learning_rate": 4.932217894576718e-06, "loss": 0.3821, "step": 5919 }, { "epoch": 1.661987647389107, "grad_norm": 0.714051365852356, "learning_rate": 4.930584694538935e-06, "loss": 0.3751, "step": 5920 }, { "epoch": 1.6622683885457608, "grad_norm": 0.669011116027832, "learning_rate": 4.928951501908724e-06, "loss": 0.3792, "step": 5921 }, { "epoch": 1.6625491297024144, "grad_norm": 0.5968210101127625, "learning_rate": 4.927318316860364e-06, "loss": 0.3719, "step": 5922 }, { "epoch": 1.6628298708590679, "grad_norm": 0.6652485728263855, "learning_rate": 4.925685139568142e-06, "loss": 0.378, "step": 5923 }, { "epoch": 1.6631106120157215, "grad_norm": 0.6216427683830261, "learning_rate": 4.92405197020634e-06, "loss": 0.367, "step": 5924 }, { "epoch": 1.6633913531723752, "grad_norm": 0.6448199152946472, "learning_rate": 4.922418808949238e-06, "loss": 0.3701, "step": 5925 }, { "epoch": 1.6636720943290286, "grad_norm": 0.684476912021637, "learning_rate": 4.92078565597112e-06, "loss": 0.3452, "step": 5926 }, { "epoch": 1.663952835485682, "grad_norm": 0.5752292275428772, "learning_rate": 4.919152511446264e-06, "loss": 0.3826, "step": 5927 }, { "epoch": 1.6642335766423357, "grad_norm": 0.6469265222549438, "learning_rate": 4.917519375548949e-06, "loss": 0.401, "step": 5928 }, { "epoch": 1.6645143177989894, "grad_norm": 0.65733402967453, "learning_rate": 4.915886248453453e-06, "loss": 0.3856, "step": 5929 }, { "epoch": 1.6647950589556428, "grad_norm": 0.6798132658004761, "learning_rate": 4.914253130334053e-06, "loss": 0.4177, "step": 5930 }, { "epoch": 1.6650758001122965, "grad_norm": 0.7080673575401306, "learning_rate": 4.912620021365026e-06, "loss": 0.3782, "step": 5931 }, { "epoch": 1.6653565412689502, "grad_norm": 0.7040212750434875, "learning_rate": 4.9109869217206475e-06, "loss": 0.3815, "step": 5932 }, { "epoch": 1.6656372824256036, "grad_norm": 0.6813741326332092, "learning_rate": 4.909353831575192e-06, "loss": 0.3652, "step": 5933 }, { "epoch": 1.665918023582257, "grad_norm": 0.683027446269989, "learning_rate": 4.9077207511029315e-06, "loss": 0.3928, "step": 5934 }, { "epoch": 1.6661987647389107, "grad_norm": 0.6870583891868591, "learning_rate": 4.906087680478137e-06, "loss": 0.3485, "step": 5935 }, { "epoch": 1.6664795058955644, "grad_norm": 0.5466635227203369, "learning_rate": 4.9044546198750825e-06, "loss": 0.3595, "step": 5936 }, { "epoch": 1.6667602470522178, "grad_norm": 0.6759864687919617, "learning_rate": 4.902821569468036e-06, "loss": 0.3517, "step": 5937 }, { "epoch": 1.6670409882088713, "grad_norm": 0.6421211957931519, "learning_rate": 4.901188529431268e-06, "loss": 0.3987, "step": 5938 }, { "epoch": 1.6673217293655251, "grad_norm": 0.7061609029769897, "learning_rate": 4.899555499939046e-06, "loss": 0.389, "step": 5939 }, { "epoch": 1.6676024705221786, "grad_norm": 0.6985163688659668, "learning_rate": 4.897922481165636e-06, "loss": 0.4113, "step": 5940 }, { "epoch": 1.667883211678832, "grad_norm": 0.5786982178688049, "learning_rate": 4.896289473285304e-06, "loss": 0.3908, "step": 5941 }, { "epoch": 1.6681639528354857, "grad_norm": 0.6494597792625427, "learning_rate": 4.894656476472312e-06, "loss": 0.3873, "step": 5942 }, { "epoch": 1.6684446939921393, "grad_norm": 0.6812187433242798, "learning_rate": 4.893023490900929e-06, "loss": 0.3664, "step": 5943 }, { "epoch": 1.6687254351487928, "grad_norm": 0.7137150168418884, "learning_rate": 4.891390516745413e-06, "loss": 0.4004, "step": 5944 }, { "epoch": 1.6690061763054462, "grad_norm": 0.6889640688896179, "learning_rate": 4.889757554180026e-06, "loss": 0.3971, "step": 5945 }, { "epoch": 1.6692869174621, "grad_norm": 0.6488991379737854, "learning_rate": 4.888124603379026e-06, "loss": 0.3733, "step": 5946 }, { "epoch": 1.6695676586187536, "grad_norm": 0.6044003367424011, "learning_rate": 4.886491664516674e-06, "loss": 0.3866, "step": 5947 }, { "epoch": 1.669848399775407, "grad_norm": 0.6599321961402893, "learning_rate": 4.884858737767225e-06, "loss": 0.3477, "step": 5948 }, { "epoch": 1.6701291409320607, "grad_norm": 0.7093262672424316, "learning_rate": 4.883225823304936e-06, "loss": 0.3957, "step": 5949 }, { "epoch": 1.6704098820887143, "grad_norm": 0.7099141478538513, "learning_rate": 4.881592921304061e-06, "loss": 0.3659, "step": 5950 }, { "epoch": 1.6706906232453678, "grad_norm": 0.5497747659683228, "learning_rate": 4.879960031938852e-06, "loss": 0.4154, "step": 5951 }, { "epoch": 1.6709713644020212, "grad_norm": 0.6621913313865662, "learning_rate": 4.8783271553835635e-06, "loss": 0.4363, "step": 5952 }, { "epoch": 1.6712521055586749, "grad_norm": 0.5906604528427124, "learning_rate": 4.876694291812443e-06, "loss": 0.3754, "step": 5953 }, { "epoch": 1.6715328467153285, "grad_norm": 0.6203334331512451, "learning_rate": 4.8750614413997414e-06, "loss": 0.3653, "step": 5954 }, { "epoch": 1.671813587871982, "grad_norm": 0.5811553001403809, "learning_rate": 4.8734286043197064e-06, "loss": 0.3847, "step": 5955 }, { "epoch": 1.6720943290286356, "grad_norm": 0.6775395274162292, "learning_rate": 4.871795780746583e-06, "loss": 0.4004, "step": 5956 }, { "epoch": 1.6723750701852893, "grad_norm": 0.6454271674156189, "learning_rate": 4.870162970854617e-06, "loss": 0.3712, "step": 5957 }, { "epoch": 1.6726558113419427, "grad_norm": 0.6467093229293823, "learning_rate": 4.8685301748180505e-06, "loss": 0.4088, "step": 5958 }, { "epoch": 1.6729365524985962, "grad_norm": 0.6088839173316956, "learning_rate": 4.866897392811127e-06, "loss": 0.3681, "step": 5959 }, { "epoch": 1.6732172936552498, "grad_norm": 0.7110549211502075, "learning_rate": 4.8652646250080855e-06, "loss": 0.4544, "step": 5960 }, { "epoch": 1.6734980348119035, "grad_norm": 0.6865145564079285, "learning_rate": 4.8636318715831665e-06, "loss": 0.4096, "step": 5961 }, { "epoch": 1.673778775968557, "grad_norm": 0.6610509157180786, "learning_rate": 4.861999132710606e-06, "loss": 0.3895, "step": 5962 }, { "epoch": 1.6740595171252104, "grad_norm": 0.6774160861968994, "learning_rate": 4.860366408564638e-06, "loss": 0.4073, "step": 5963 }, { "epoch": 1.6743402582818643, "grad_norm": 0.6872767806053162, "learning_rate": 4.8587336993195e-06, "loss": 0.3659, "step": 5964 }, { "epoch": 1.6746209994385177, "grad_norm": 0.6550089120864868, "learning_rate": 4.857101005149422e-06, "loss": 0.3616, "step": 5965 }, { "epoch": 1.6749017405951712, "grad_norm": 0.6324899792671204, "learning_rate": 4.855468326228638e-06, "loss": 0.4, "step": 5966 }, { "epoch": 1.6751824817518248, "grad_norm": 0.6363287568092346, "learning_rate": 4.853835662731372e-06, "loss": 0.4195, "step": 5967 }, { "epoch": 1.6754632229084785, "grad_norm": 0.6213005185127258, "learning_rate": 4.852203014831858e-06, "loss": 0.3967, "step": 5968 }, { "epoch": 1.675743964065132, "grad_norm": 0.6869015097618103, "learning_rate": 4.850570382704319e-06, "loss": 0.3737, "step": 5969 }, { "epoch": 1.6760247052217854, "grad_norm": 0.5972256660461426, "learning_rate": 4.84893776652298e-06, "loss": 0.3792, "step": 5970 }, { "epoch": 1.676305446378439, "grad_norm": 0.6419148445129395, "learning_rate": 4.847305166462062e-06, "loss": 0.4108, "step": 5971 }, { "epoch": 1.6765861875350927, "grad_norm": 0.6109099388122559, "learning_rate": 4.8456725826957875e-06, "loss": 0.3746, "step": 5972 }, { "epoch": 1.6768669286917461, "grad_norm": 0.6137343049049377, "learning_rate": 4.844040015398375e-06, "loss": 0.3884, "step": 5973 }, { "epoch": 1.6771476698483998, "grad_norm": 0.6361182332038879, "learning_rate": 4.842407464744043e-06, "loss": 0.3591, "step": 5974 }, { "epoch": 1.6774284110050535, "grad_norm": 0.5942631363868713, "learning_rate": 4.840774930907005e-06, "loss": 0.3525, "step": 5975 }, { "epoch": 1.677709152161707, "grad_norm": 0.6512374877929688, "learning_rate": 4.839142414061478e-06, "loss": 0.3744, "step": 5976 }, { "epoch": 1.6779898933183603, "grad_norm": 0.6963050961494446, "learning_rate": 4.837509914381671e-06, "loss": 0.3882, "step": 5977 }, { "epoch": 1.678270634475014, "grad_norm": 0.6489622592926025, "learning_rate": 4.835877432041796e-06, "loss": 0.3513, "step": 5978 }, { "epoch": 1.6785513756316677, "grad_norm": 0.7303745746612549, "learning_rate": 4.834244967216061e-06, "loss": 0.353, "step": 5979 }, { "epoch": 1.6788321167883211, "grad_norm": 0.7013206481933594, "learning_rate": 4.832612520078671e-06, "loss": 0.3722, "step": 5980 }, { "epoch": 1.6791128579449748, "grad_norm": 0.738486647605896, "learning_rate": 4.830980090803834e-06, "loss": 0.4062, "step": 5981 }, { "epoch": 1.6793935991016284, "grad_norm": 0.755413830280304, "learning_rate": 4.82934767956575e-06, "loss": 0.3573, "step": 5982 }, { "epoch": 1.6796743402582819, "grad_norm": 0.6180960536003113, "learning_rate": 4.82771528653862e-06, "loss": 0.3952, "step": 5983 }, { "epoch": 1.6799550814149353, "grad_norm": 0.6720296740531921, "learning_rate": 4.826082911896643e-06, "loss": 0.3548, "step": 5984 }, { "epoch": 1.680235822571589, "grad_norm": 0.6452472805976868, "learning_rate": 4.824450555814016e-06, "loss": 0.3591, "step": 5985 }, { "epoch": 1.6805165637282427, "grad_norm": 0.6786510348320007, "learning_rate": 4.822818218464934e-06, "loss": 0.3983, "step": 5986 }, { "epoch": 1.680797304884896, "grad_norm": 0.725750207901001, "learning_rate": 4.8211859000235905e-06, "loss": 0.3933, "step": 5987 }, { "epoch": 1.6810780460415495, "grad_norm": 0.641161322593689, "learning_rate": 4.819553600664175e-06, "loss": 0.4031, "step": 5988 }, { "epoch": 1.6813587871982034, "grad_norm": 0.6939529180526733, "learning_rate": 4.8179213205608784e-06, "loss": 0.3522, "step": 5989 }, { "epoch": 1.6816395283548569, "grad_norm": 0.593222439289093, "learning_rate": 4.8162890598878855e-06, "loss": 0.3768, "step": 5990 }, { "epoch": 1.6819202695115103, "grad_norm": 0.6571763157844543, "learning_rate": 4.814656818819381e-06, "loss": 0.3648, "step": 5991 }, { "epoch": 1.682201010668164, "grad_norm": 0.6553687453269958, "learning_rate": 4.8130245975295486e-06, "loss": 0.377, "step": 5992 }, { "epoch": 1.6824817518248176, "grad_norm": 0.6839633584022522, "learning_rate": 4.811392396192569e-06, "loss": 0.3978, "step": 5993 }, { "epoch": 1.682762492981471, "grad_norm": 0.6256217360496521, "learning_rate": 4.809760214982619e-06, "loss": 0.3806, "step": 5994 }, { "epoch": 1.6830432341381245, "grad_norm": 0.6105203628540039, "learning_rate": 4.808128054073876e-06, "loss": 0.3426, "step": 5995 }, { "epoch": 1.6833239752947782, "grad_norm": 0.5967744588851929, "learning_rate": 4.806495913640515e-06, "loss": 0.3889, "step": 5996 }, { "epoch": 1.6836047164514318, "grad_norm": 0.6517393589019775, "learning_rate": 4.804863793856706e-06, "loss": 0.3642, "step": 5997 }, { "epoch": 1.6838854576080853, "grad_norm": 0.6016674041748047, "learning_rate": 4.803231694896621e-06, "loss": 0.3625, "step": 5998 }, { "epoch": 1.684166198764739, "grad_norm": 0.6955810189247131, "learning_rate": 4.801599616934424e-06, "loss": 0.4115, "step": 5999 }, { "epoch": 1.6844469399213926, "grad_norm": 0.6546865105628967, "learning_rate": 4.799967560144283e-06, "loss": 0.3695, "step": 6000 }, { "epoch": 1.684727681078046, "grad_norm": 0.6968745589256287, "learning_rate": 4.798335524700359e-06, "loss": 0.3643, "step": 6001 }, { "epoch": 1.6850084222346995, "grad_norm": 0.6732944250106812, "learning_rate": 4.796703510776814e-06, "loss": 0.3941, "step": 6002 }, { "epoch": 1.6852891633913532, "grad_norm": 0.5669814944267273, "learning_rate": 4.795071518547807e-06, "loss": 0.3801, "step": 6003 }, { "epoch": 1.6855699045480068, "grad_norm": 0.6808241605758667, "learning_rate": 4.793439548187494e-06, "loss": 0.3587, "step": 6004 }, { "epoch": 1.6858506457046603, "grad_norm": 0.7255261540412903, "learning_rate": 4.791807599870026e-06, "loss": 0.3839, "step": 6005 }, { "epoch": 1.686131386861314, "grad_norm": 0.607886016368866, "learning_rate": 4.790175673769557e-06, "loss": 0.396, "step": 6006 }, { "epoch": 1.6864121280179676, "grad_norm": 0.6321319937705994, "learning_rate": 4.788543770060235e-06, "loss": 0.3798, "step": 6007 }, { "epoch": 1.686692869174621, "grad_norm": 0.6118468046188354, "learning_rate": 4.786911888916207e-06, "loss": 0.3922, "step": 6008 }, { "epoch": 1.6869736103312745, "grad_norm": 0.6063696146011353, "learning_rate": 4.785280030511617e-06, "loss": 0.3926, "step": 6009 }, { "epoch": 1.6872543514879281, "grad_norm": 0.6252252459526062, "learning_rate": 4.783648195020608e-06, "loss": 0.3608, "step": 6010 }, { "epoch": 1.6875350926445818, "grad_norm": 0.6016137599945068, "learning_rate": 4.782016382617317e-06, "loss": 0.3927, "step": 6011 }, { "epoch": 1.6878158338012352, "grad_norm": 0.8158223628997803, "learning_rate": 4.780384593475882e-06, "loss": 0.4098, "step": 6012 }, { "epoch": 1.6880965749578887, "grad_norm": 0.7017393112182617, "learning_rate": 4.778752827770439e-06, "loss": 0.4322, "step": 6013 }, { "epoch": 1.6883773161145423, "grad_norm": 0.6651670932769775, "learning_rate": 4.777121085675116e-06, "loss": 0.3957, "step": 6014 }, { "epoch": 1.688658057271196, "grad_norm": 0.6182573437690735, "learning_rate": 4.775489367364047e-06, "loss": 0.3486, "step": 6015 }, { "epoch": 1.6889387984278494, "grad_norm": 0.7296952605247498, "learning_rate": 4.773857673011356e-06, "loss": 0.3876, "step": 6016 }, { "epoch": 1.689219539584503, "grad_norm": 0.6980814933776855, "learning_rate": 4.772226002791168e-06, "loss": 0.3805, "step": 6017 }, { "epoch": 1.6895002807411568, "grad_norm": 0.6811695098876953, "learning_rate": 4.7705943568776015e-06, "loss": 0.3997, "step": 6018 }, { "epoch": 1.6897810218978102, "grad_norm": 0.5936205387115479, "learning_rate": 4.768962735444781e-06, "loss": 0.4092, "step": 6019 }, { "epoch": 1.6900617630544637, "grad_norm": 0.6474246382713318, "learning_rate": 4.767331138666822e-06, "loss": 0.4018, "step": 6020 }, { "epoch": 1.6903425042111173, "grad_norm": 0.6197667717933655, "learning_rate": 4.765699566717835e-06, "loss": 0.3318, "step": 6021 }, { "epoch": 1.690623245367771, "grad_norm": 0.6729341745376587, "learning_rate": 4.764068019771934e-06, "loss": 0.3744, "step": 6022 }, { "epoch": 1.6909039865244244, "grad_norm": 0.685239851474762, "learning_rate": 4.762436498003227e-06, "loss": 0.3818, "step": 6023 }, { "epoch": 1.691184727681078, "grad_norm": 0.6004669070243835, "learning_rate": 4.760805001585819e-06, "loss": 0.3839, "step": 6024 }, { "epoch": 1.6914654688377317, "grad_norm": 0.528843104839325, "learning_rate": 4.7591735306938144e-06, "loss": 0.3775, "step": 6025 }, { "epoch": 1.6917462099943852, "grad_norm": 0.6972212791442871, "learning_rate": 4.757542085501314e-06, "loss": 0.3702, "step": 6026 }, { "epoch": 1.6920269511510386, "grad_norm": 0.6752337217330933, "learning_rate": 4.755910666182413e-06, "loss": 0.3602, "step": 6027 }, { "epoch": 1.6923076923076923, "grad_norm": 0.6402177810668945, "learning_rate": 4.754279272911208e-06, "loss": 0.3978, "step": 6028 }, { "epoch": 1.692588433464346, "grad_norm": 0.6916014552116394, "learning_rate": 4.7526479058617904e-06, "loss": 0.3876, "step": 6029 }, { "epoch": 1.6928691746209994, "grad_norm": 0.6594701409339905, "learning_rate": 4.751016565208251e-06, "loss": 0.411, "step": 6030 }, { "epoch": 1.693149915777653, "grad_norm": 0.678952693939209, "learning_rate": 4.749385251124675e-06, "loss": 0.3663, "step": 6031 }, { "epoch": 1.6934306569343067, "grad_norm": 0.6876115202903748, "learning_rate": 4.7477539637851475e-06, "loss": 0.4004, "step": 6032 }, { "epoch": 1.6937113980909602, "grad_norm": 0.7282420992851257, "learning_rate": 4.746122703363748e-06, "loss": 0.3702, "step": 6033 }, { "epoch": 1.6939921392476136, "grad_norm": 0.7316907644271851, "learning_rate": 4.744491470034554e-06, "loss": 0.4198, "step": 6034 }, { "epoch": 1.6942728804042673, "grad_norm": 0.7218764424324036, "learning_rate": 4.742860263971642e-06, "loss": 0.4007, "step": 6035 }, { "epoch": 1.694553621560921, "grad_norm": 0.6547330021858215, "learning_rate": 4.741229085349083e-06, "loss": 0.3817, "step": 6036 }, { "epoch": 1.6948343627175744, "grad_norm": 0.7427129149436951, "learning_rate": 4.7395979343409475e-06, "loss": 0.3925, "step": 6037 }, { "epoch": 1.6951151038742278, "grad_norm": 0.6655198931694031, "learning_rate": 4.737966811121302e-06, "loss": 0.3621, "step": 6038 }, { "epoch": 1.6953958450308815, "grad_norm": 0.6993547081947327, "learning_rate": 4.736335715864207e-06, "loss": 0.4127, "step": 6039 }, { "epoch": 1.6956765861875351, "grad_norm": 0.7520627975463867, "learning_rate": 4.734704648743726e-06, "loss": 0.4052, "step": 6040 }, { "epoch": 1.6959573273441886, "grad_norm": 0.6116990447044373, "learning_rate": 4.733073609933915e-06, "loss": 0.3693, "step": 6041 }, { "epoch": 1.6962380685008422, "grad_norm": 0.7016486525535583, "learning_rate": 4.731442599608828e-06, "loss": 0.3685, "step": 6042 }, { "epoch": 1.696518809657496, "grad_norm": 0.7572161555290222, "learning_rate": 4.729811617942515e-06, "loss": 0.4026, "step": 6043 }, { "epoch": 1.6967995508141493, "grad_norm": 0.7121775150299072, "learning_rate": 4.728180665109028e-06, "loss": 0.3877, "step": 6044 }, { "epoch": 1.6970802919708028, "grad_norm": 0.6513147354125977, "learning_rate": 4.7265497412824096e-06, "loss": 0.4139, "step": 6045 }, { "epoch": 1.6973610331274565, "grad_norm": 0.6890811920166016, "learning_rate": 4.724918846636703e-06, "loss": 0.3418, "step": 6046 }, { "epoch": 1.6976417742841101, "grad_norm": 0.5832764506340027, "learning_rate": 4.723287981345947e-06, "loss": 0.3561, "step": 6047 }, { "epoch": 1.6979225154407636, "grad_norm": 0.658863365650177, "learning_rate": 4.721657145584176e-06, "loss": 0.4022, "step": 6048 }, { "epoch": 1.6982032565974172, "grad_norm": 0.7016426920890808, "learning_rate": 4.7200263395254235e-06, "loss": 0.3924, "step": 6049 }, { "epoch": 1.6984839977540709, "grad_norm": 0.5901565551757812, "learning_rate": 4.718395563343718e-06, "loss": 0.3872, "step": 6050 }, { "epoch": 1.6987647389107243, "grad_norm": 0.6345401406288147, "learning_rate": 4.716764817213088e-06, "loss": 0.3997, "step": 6051 }, { "epoch": 1.6990454800673778, "grad_norm": 0.6664313077926636, "learning_rate": 4.715134101307555e-06, "loss": 0.3864, "step": 6052 }, { "epoch": 1.6993262212240314, "grad_norm": 0.6883613467216492, "learning_rate": 4.713503415801141e-06, "loss": 0.3684, "step": 6053 }, { "epoch": 1.699606962380685, "grad_norm": 0.65483558177948, "learning_rate": 4.711872760867859e-06, "loss": 0.3698, "step": 6054 }, { "epoch": 1.6998877035373385, "grad_norm": 0.6401641964912415, "learning_rate": 4.710242136681725e-06, "loss": 0.3714, "step": 6055 }, { "epoch": 1.700168444693992, "grad_norm": 0.7197986841201782, "learning_rate": 4.708611543416747e-06, "loss": 0.4473, "step": 6056 }, { "epoch": 1.7004491858506459, "grad_norm": 0.7501349449157715, "learning_rate": 4.706980981246934e-06, "loss": 0.3615, "step": 6057 }, { "epoch": 1.7007299270072993, "grad_norm": 0.7164209485054016, "learning_rate": 4.705350450346289e-06, "loss": 0.3721, "step": 6058 }, { "epoch": 1.7010106681639527, "grad_norm": 0.62754887342453, "learning_rate": 4.703719950888811e-06, "loss": 0.4012, "step": 6059 }, { "epoch": 1.7012914093206064, "grad_norm": 0.6176878809928894, "learning_rate": 4.702089483048497e-06, "loss": 0.3734, "step": 6060 }, { "epoch": 1.70157215047726, "grad_norm": 0.6346988677978516, "learning_rate": 4.700459046999341e-06, "loss": 0.3895, "step": 6061 }, { "epoch": 1.7018528916339135, "grad_norm": 0.6312441229820251, "learning_rate": 4.698828642915334e-06, "loss": 0.403, "step": 6062 }, { "epoch": 1.702133632790567, "grad_norm": 0.648977518081665, "learning_rate": 4.69719827097046e-06, "loss": 0.3532, "step": 6063 }, { "epoch": 1.7024143739472206, "grad_norm": 0.6212430596351624, "learning_rate": 4.695567931338703e-06, "loss": 0.3686, "step": 6064 }, { "epoch": 1.7026951151038743, "grad_norm": 0.6183661222457886, "learning_rate": 4.693937624194045e-06, "loss": 0.3468, "step": 6065 }, { "epoch": 1.7029758562605277, "grad_norm": 0.5506702065467834, "learning_rate": 4.69230734971046e-06, "loss": 0.3297, "step": 6066 }, { "epoch": 1.7032565974171814, "grad_norm": 0.6448323130607605, "learning_rate": 4.690677108061921e-06, "loss": 0.397, "step": 6067 }, { "epoch": 1.703537338573835, "grad_norm": 0.6663951277732849, "learning_rate": 4.689046899422397e-06, "loss": 0.4069, "step": 6068 }, { "epoch": 1.7038180797304885, "grad_norm": 0.7060367465019226, "learning_rate": 4.687416723965853e-06, "loss": 0.3985, "step": 6069 }, { "epoch": 1.704098820887142, "grad_norm": 0.7022783756256104, "learning_rate": 4.685786581866254e-06, "loss": 0.4073, "step": 6070 }, { "epoch": 1.7043795620437956, "grad_norm": 0.620241641998291, "learning_rate": 4.684156473297557e-06, "loss": 0.4315, "step": 6071 }, { "epoch": 1.7046603032004493, "grad_norm": 0.6034104228019714, "learning_rate": 4.682526398433716e-06, "loss": 0.3792, "step": 6072 }, { "epoch": 1.7049410443571027, "grad_norm": 0.6438170075416565, "learning_rate": 4.680896357448685e-06, "loss": 0.3946, "step": 6073 }, { "epoch": 1.7052217855137564, "grad_norm": 0.6571474075317383, "learning_rate": 4.67926635051641e-06, "loss": 0.3572, "step": 6074 }, { "epoch": 1.70550252667041, "grad_norm": 0.6134425401687622, "learning_rate": 4.677636377810836e-06, "loss": 0.3888, "step": 6075 }, { "epoch": 1.7057832678270635, "grad_norm": 0.6544412970542908, "learning_rate": 4.676006439505902e-06, "loss": 0.3851, "step": 6076 }, { "epoch": 1.706064008983717, "grad_norm": 0.5887376666069031, "learning_rate": 4.6743765357755465e-06, "loss": 0.3585, "step": 6077 }, { "epoch": 1.7063447501403706, "grad_norm": 0.5665528178215027, "learning_rate": 4.672746666793703e-06, "loss": 0.3877, "step": 6078 }, { "epoch": 1.7066254912970242, "grad_norm": 0.6077118515968323, "learning_rate": 4.671116832734299e-06, "loss": 0.4167, "step": 6079 }, { "epoch": 1.7069062324536777, "grad_norm": 0.649574339389801, "learning_rate": 4.669487033771261e-06, "loss": 0.3892, "step": 6080 }, { "epoch": 1.7071869736103311, "grad_norm": 0.6175322532653809, "learning_rate": 4.667857270078513e-06, "loss": 0.3688, "step": 6081 }, { "epoch": 1.707467714766985, "grad_norm": 0.6835350394248962, "learning_rate": 4.666227541829971e-06, "loss": 0.3492, "step": 6082 }, { "epoch": 1.7077484559236384, "grad_norm": 0.6302521228790283, "learning_rate": 4.66459784919955e-06, "loss": 0.3685, "step": 6083 }, { "epoch": 1.7080291970802919, "grad_norm": 0.5762865543365479, "learning_rate": 4.662968192361161e-06, "loss": 0.3705, "step": 6084 }, { "epoch": 1.7083099382369455, "grad_norm": 0.6340917944908142, "learning_rate": 4.661338571488711e-06, "loss": 0.3417, "step": 6085 }, { "epoch": 1.7085906793935992, "grad_norm": 0.6836395263671875, "learning_rate": 4.659708986756101e-06, "loss": 0.3718, "step": 6086 }, { "epoch": 1.7088714205502527, "grad_norm": 0.7231102585792542, "learning_rate": 4.658079438337234e-06, "loss": 0.376, "step": 6087 }, { "epoch": 1.709152161706906, "grad_norm": 0.7105762958526611, "learning_rate": 4.6564499264060024e-06, "loss": 0.3924, "step": 6088 }, { "epoch": 1.7094329028635598, "grad_norm": 0.7268754243850708, "learning_rate": 4.654820451136297e-06, "loss": 0.4305, "step": 6089 }, { "epoch": 1.7097136440202134, "grad_norm": 0.5852659940719604, "learning_rate": 4.653191012702008e-06, "loss": 0.3614, "step": 6090 }, { "epoch": 1.7099943851768669, "grad_norm": 0.644648551940918, "learning_rate": 4.6515616112770165e-06, "loss": 0.4235, "step": 6091 }, { "epoch": 1.7102751263335205, "grad_norm": 0.6437558531761169, "learning_rate": 4.6499322470352035e-06, "loss": 0.3926, "step": 6092 }, { "epoch": 1.7105558674901742, "grad_norm": 0.6182278990745544, "learning_rate": 4.6483029201504445e-06, "loss": 0.366, "step": 6093 }, { "epoch": 1.7108366086468276, "grad_norm": 0.6242963075637817, "learning_rate": 4.646673630796608e-06, "loss": 0.3518, "step": 6094 }, { "epoch": 1.711117349803481, "grad_norm": 0.6547690629959106, "learning_rate": 4.645044379147567e-06, "loss": 0.4349, "step": 6095 }, { "epoch": 1.7113980909601347, "grad_norm": 0.6208428740501404, "learning_rate": 4.643415165377184e-06, "loss": 0.3873, "step": 6096 }, { "epoch": 1.7116788321167884, "grad_norm": 0.6276224851608276, "learning_rate": 4.641785989659314e-06, "loss": 0.3593, "step": 6097 }, { "epoch": 1.7119595732734418, "grad_norm": 0.654462993144989, "learning_rate": 4.640156852167818e-06, "loss": 0.386, "step": 6098 }, { "epoch": 1.7122403144300955, "grad_norm": 0.6660346388816833, "learning_rate": 4.638527753076544e-06, "loss": 0.3912, "step": 6099 }, { "epoch": 1.7125210555867492, "grad_norm": 0.7017571330070496, "learning_rate": 4.63689869255934e-06, "loss": 0.4131, "step": 6100 }, { "epoch": 1.7128017967434026, "grad_norm": 0.6872237324714661, "learning_rate": 4.63526967079005e-06, "loss": 0.4045, "step": 6101 }, { "epoch": 1.713082537900056, "grad_norm": 0.6601234078407288, "learning_rate": 4.633640687942512e-06, "loss": 0.3775, "step": 6102 }, { "epoch": 1.7133632790567097, "grad_norm": 0.6169291138648987, "learning_rate": 4.632011744190563e-06, "loss": 0.4002, "step": 6103 }, { "epoch": 1.7136440202133634, "grad_norm": 0.6733725666999817, "learning_rate": 4.630382839708032e-06, "loss": 0.4231, "step": 6104 }, { "epoch": 1.7139247613700168, "grad_norm": 0.6713374257087708, "learning_rate": 4.628753974668745e-06, "loss": 0.3771, "step": 6105 }, { "epoch": 1.7142055025266703, "grad_norm": 0.6887063384056091, "learning_rate": 4.627125149246525e-06, "loss": 0.3817, "step": 6106 }, { "epoch": 1.7144862436833241, "grad_norm": 0.5848514437675476, "learning_rate": 4.625496363615191e-06, "loss": 0.3665, "step": 6107 }, { "epoch": 1.7147669848399776, "grad_norm": 0.6316863894462585, "learning_rate": 4.623867617948556e-06, "loss": 0.3588, "step": 6108 }, { "epoch": 1.715047725996631, "grad_norm": 0.5803934335708618, "learning_rate": 4.62223891242043e-06, "loss": 0.3709, "step": 6109 }, { "epoch": 1.7153284671532847, "grad_norm": 0.6939713358879089, "learning_rate": 4.6206102472046185e-06, "loss": 0.3842, "step": 6110 }, { "epoch": 1.7156092083099383, "grad_norm": 0.607570469379425, "learning_rate": 4.618981622474921e-06, "loss": 0.3622, "step": 6111 }, { "epoch": 1.7158899494665918, "grad_norm": 0.5985682606697083, "learning_rate": 4.617353038405136e-06, "loss": 0.3695, "step": 6112 }, { "epoch": 1.7161706906232452, "grad_norm": 0.5189514756202698, "learning_rate": 4.615724495169055e-06, "loss": 0.364, "step": 6113 }, { "epoch": 1.716451431779899, "grad_norm": 0.6978327035903931, "learning_rate": 4.614095992940466e-06, "loss": 0.4608, "step": 6114 }, { "epoch": 1.7167321729365526, "grad_norm": 0.6184882521629333, "learning_rate": 4.612467531893154e-06, "loss": 0.3752, "step": 6115 }, { "epoch": 1.717012914093206, "grad_norm": 0.7372332811355591, "learning_rate": 4.610839112200896e-06, "loss": 0.3983, "step": 6116 }, { "epoch": 1.7172936552498597, "grad_norm": 0.7011968493461609, "learning_rate": 4.6092107340374685e-06, "loss": 0.3694, "step": 6117 }, { "epoch": 1.7175743964065133, "grad_norm": 0.6919954419136047, "learning_rate": 4.607582397576641e-06, "loss": 0.4044, "step": 6118 }, { "epoch": 1.7178551375631668, "grad_norm": 0.6805636882781982, "learning_rate": 4.60595410299218e-06, "loss": 0.3698, "step": 6119 }, { "epoch": 1.7181358787198202, "grad_norm": 0.5570012331008911, "learning_rate": 4.604325850457845e-06, "loss": 0.3824, "step": 6120 }, { "epoch": 1.7184166198764739, "grad_norm": 0.6912919282913208, "learning_rate": 4.602697640147396e-06, "loss": 0.42, "step": 6121 }, { "epoch": 1.7186973610331275, "grad_norm": 0.6265649795532227, "learning_rate": 4.601069472234584e-06, "loss": 0.4127, "step": 6122 }, { "epoch": 1.718978102189781, "grad_norm": 0.6834555864334106, "learning_rate": 4.5994413468931575e-06, "loss": 0.4102, "step": 6123 }, { "epoch": 1.7192588433464346, "grad_norm": 0.6186806559562683, "learning_rate": 4.597813264296861e-06, "loss": 0.428, "step": 6124 }, { "epoch": 1.7195395845030883, "grad_norm": 0.5749118328094482, "learning_rate": 4.59618522461943e-06, "loss": 0.4105, "step": 6125 }, { "epoch": 1.7198203256597417, "grad_norm": 0.6539474725723267, "learning_rate": 4.594557228034602e-06, "loss": 0.3942, "step": 6126 }, { "epoch": 1.7201010668163952, "grad_norm": 0.5971326231956482, "learning_rate": 4.5929292747161035e-06, "loss": 0.3799, "step": 6127 }, { "epoch": 1.7203818079730488, "grad_norm": 0.6099491715431213, "learning_rate": 4.591301364837662e-06, "loss": 0.4484, "step": 6128 }, { "epoch": 1.7206625491297025, "grad_norm": 0.6051498651504517, "learning_rate": 4.589673498572998e-06, "loss": 0.3661, "step": 6129 }, { "epoch": 1.720943290286356, "grad_norm": 0.7305816411972046, "learning_rate": 4.5880456760958266e-06, "loss": 0.3665, "step": 6130 }, { "epoch": 1.7212240314430094, "grad_norm": 0.6616935729980469, "learning_rate": 4.586417897579859e-06, "loss": 0.4187, "step": 6131 }, { "epoch": 1.721504772599663, "grad_norm": 0.70457923412323, "learning_rate": 4.584790163198801e-06, "loss": 0.4152, "step": 6132 }, { "epoch": 1.7217855137563167, "grad_norm": 0.6524569392204285, "learning_rate": 4.583162473126354e-06, "loss": 0.3795, "step": 6133 }, { "epoch": 1.7220662549129702, "grad_norm": 0.645473301410675, "learning_rate": 4.581534827536216e-06, "loss": 0.396, "step": 6134 }, { "epoch": 1.7223469960696238, "grad_norm": 0.6396334171295166, "learning_rate": 4.57990722660208e-06, "loss": 0.3689, "step": 6135 }, { "epoch": 1.7226277372262775, "grad_norm": 0.6571674942970276, "learning_rate": 4.578279670497633e-06, "loss": 0.3966, "step": 6136 }, { "epoch": 1.722908478382931, "grad_norm": 0.6665908098220825, "learning_rate": 4.576652159396556e-06, "loss": 0.3754, "step": 6137 }, { "epoch": 1.7231892195395844, "grad_norm": 0.6403006315231323, "learning_rate": 4.575024693472527e-06, "loss": 0.4184, "step": 6138 }, { "epoch": 1.723469960696238, "grad_norm": 0.6201865077018738, "learning_rate": 4.573397272899221e-06, "loss": 0.3925, "step": 6139 }, { "epoch": 1.7237507018528917, "grad_norm": 0.5629845261573792, "learning_rate": 4.571769897850305e-06, "loss": 0.4269, "step": 6140 }, { "epoch": 1.7240314430095451, "grad_norm": 0.5768033862113953, "learning_rate": 4.570142568499442e-06, "loss": 0.379, "step": 6141 }, { "epoch": 1.7243121841661988, "grad_norm": 0.6511726975440979, "learning_rate": 4.568515285020292e-06, "loss": 0.383, "step": 6142 }, { "epoch": 1.7245929253228525, "grad_norm": 0.7166250348091125, "learning_rate": 4.5668880475865074e-06, "loss": 0.3599, "step": 6143 }, { "epoch": 1.724873666479506, "grad_norm": 0.6251681447029114, "learning_rate": 4.565260856371737e-06, "loss": 0.3556, "step": 6144 }, { "epoch": 1.7251544076361593, "grad_norm": 0.6248157024383545, "learning_rate": 4.563633711549621e-06, "loss": 0.3787, "step": 6145 }, { "epoch": 1.725435148792813, "grad_norm": 0.6260890960693359, "learning_rate": 4.562006613293806e-06, "loss": 0.4035, "step": 6146 }, { "epoch": 1.7257158899494667, "grad_norm": 0.6268472075462341, "learning_rate": 4.5603795617779204e-06, "loss": 0.3866, "step": 6147 }, { "epoch": 1.7259966311061201, "grad_norm": 0.7070551514625549, "learning_rate": 4.558752557175594e-06, "loss": 0.3616, "step": 6148 }, { "epoch": 1.7262773722627736, "grad_norm": 0.6147276163101196, "learning_rate": 4.55712559966045e-06, "loss": 0.37, "step": 6149 }, { "epoch": 1.7265581134194274, "grad_norm": 0.611661970615387, "learning_rate": 4.55549868940611e-06, "loss": 0.3809, "step": 6150 }, { "epoch": 1.7268388545760809, "grad_norm": 0.6389936208724976, "learning_rate": 4.553871826586184e-06, "loss": 0.3691, "step": 6151 }, { "epoch": 1.7271195957327343, "grad_norm": 0.6252506971359253, "learning_rate": 4.552245011374284e-06, "loss": 0.4309, "step": 6152 }, { "epoch": 1.727400336889388, "grad_norm": 0.5754144787788391, "learning_rate": 4.550618243944011e-06, "loss": 0.3742, "step": 6153 }, { "epoch": 1.7276810780460417, "grad_norm": 0.62964928150177, "learning_rate": 4.548991524468964e-06, "loss": 0.3729, "step": 6154 }, { "epoch": 1.727961819202695, "grad_norm": 0.6977580785751343, "learning_rate": 4.547364853122737e-06, "loss": 0.3624, "step": 6155 }, { "epoch": 1.7282425603593485, "grad_norm": 0.6553834080696106, "learning_rate": 4.545738230078918e-06, "loss": 0.3484, "step": 6156 }, { "epoch": 1.7285233015160022, "grad_norm": 0.7282357811927795, "learning_rate": 4.544111655511091e-06, "loss": 0.3407, "step": 6157 }, { "epoch": 1.7288040426726559, "grad_norm": 0.5911629796028137, "learning_rate": 4.542485129592833e-06, "loss": 0.4099, "step": 6158 }, { "epoch": 1.7290847838293093, "grad_norm": 0.6260474324226379, "learning_rate": 4.540858652497717e-06, "loss": 0.3887, "step": 6159 }, { "epoch": 1.729365524985963, "grad_norm": 0.5876392126083374, "learning_rate": 4.5392322243993105e-06, "loss": 0.3965, "step": 6160 }, { "epoch": 1.7296462661426166, "grad_norm": 0.6654799580574036, "learning_rate": 4.537605845471174e-06, "loss": 0.399, "step": 6161 }, { "epoch": 1.72992700729927, "grad_norm": 0.5956904888153076, "learning_rate": 4.535979515886868e-06, "loss": 0.3853, "step": 6162 }, { "epoch": 1.7302077484559235, "grad_norm": 0.7051137089729309, "learning_rate": 4.5343532358199415e-06, "loss": 0.3822, "step": 6163 }, { "epoch": 1.7304884896125772, "grad_norm": 0.6873592138290405, "learning_rate": 4.532727005443944e-06, "loss": 0.4121, "step": 6164 }, { "epoch": 1.7307692307692308, "grad_norm": 0.6353142261505127, "learning_rate": 4.531100824932413e-06, "loss": 0.3624, "step": 6165 }, { "epoch": 1.7310499719258843, "grad_norm": 0.652218222618103, "learning_rate": 4.5294746944588855e-06, "loss": 0.409, "step": 6166 }, { "epoch": 1.731330713082538, "grad_norm": 0.7085467576980591, "learning_rate": 4.527848614196893e-06, "loss": 0.3851, "step": 6167 }, { "epoch": 1.7316114542391916, "grad_norm": 0.580599844455719, "learning_rate": 4.526222584319961e-06, "loss": 0.3905, "step": 6168 }, { "epoch": 1.731892195395845, "grad_norm": 0.6966289281845093, "learning_rate": 4.524596605001609e-06, "loss": 0.3993, "step": 6169 }, { "epoch": 1.7321729365524985, "grad_norm": 0.6031883358955383, "learning_rate": 4.52297067641535e-06, "loss": 0.3994, "step": 6170 }, { "epoch": 1.7324536777091522, "grad_norm": 0.5960671901702881, "learning_rate": 4.521344798734692e-06, "loss": 0.4145, "step": 6171 }, { "epoch": 1.7327344188658058, "grad_norm": 0.7738549709320068, "learning_rate": 4.5197189721331425e-06, "loss": 0.3737, "step": 6172 }, { "epoch": 1.7330151600224593, "grad_norm": 0.6027511358261108, "learning_rate": 4.518093196784199e-06, "loss": 0.382, "step": 6173 }, { "epoch": 1.7332959011791127, "grad_norm": 0.6172485947608948, "learning_rate": 4.516467472861351e-06, "loss": 0.3741, "step": 6174 }, { "epoch": 1.7335766423357666, "grad_norm": 0.6590242981910706, "learning_rate": 4.514841800538088e-06, "loss": 0.3876, "step": 6175 }, { "epoch": 1.73385738349242, "grad_norm": 0.6483175158500671, "learning_rate": 4.513216179987891e-06, "loss": 0.4139, "step": 6176 }, { "epoch": 1.7341381246490735, "grad_norm": 0.6050055623054504, "learning_rate": 4.511590611384236e-06, "loss": 0.4409, "step": 6177 }, { "epoch": 1.7344188658057271, "grad_norm": 0.6095812320709229, "learning_rate": 4.509965094900593e-06, "loss": 0.3975, "step": 6178 }, { "epoch": 1.7346996069623808, "grad_norm": 0.6329188346862793, "learning_rate": 4.508339630710431e-06, "loss": 0.4147, "step": 6179 }, { "epoch": 1.7349803481190342, "grad_norm": 0.6765221357345581, "learning_rate": 4.5067142189872034e-06, "loss": 0.3702, "step": 6180 }, { "epoch": 1.7352610892756877, "grad_norm": 0.7201967239379883, "learning_rate": 4.505088859904367e-06, "loss": 0.4053, "step": 6181 }, { "epoch": 1.7355418304323413, "grad_norm": 0.6717000603675842, "learning_rate": 4.50346355363537e-06, "loss": 0.399, "step": 6182 }, { "epoch": 1.735822571588995, "grad_norm": 0.622174859046936, "learning_rate": 4.501838300353654e-06, "loss": 0.4101, "step": 6183 }, { "epoch": 1.7361033127456484, "grad_norm": 0.6078985929489136, "learning_rate": 4.500213100232657e-06, "loss": 0.3702, "step": 6184 }, { "epoch": 1.736384053902302, "grad_norm": 0.622093141078949, "learning_rate": 4.498587953445812e-06, "loss": 0.3629, "step": 6185 }, { "epoch": 1.7366647950589558, "grad_norm": 0.6237002015113831, "learning_rate": 4.496962860166542e-06, "loss": 0.3836, "step": 6186 }, { "epoch": 1.7369455362156092, "grad_norm": 0.6947446465492249, "learning_rate": 4.4953378205682655e-06, "loss": 0.3795, "step": 6187 }, { "epoch": 1.7372262773722627, "grad_norm": 0.7730202674865723, "learning_rate": 4.4937128348244e-06, "loss": 0.37, "step": 6188 }, { "epoch": 1.7375070185289163, "grad_norm": 0.6687898635864258, "learning_rate": 4.492087903108351e-06, "loss": 0.3759, "step": 6189 }, { "epoch": 1.73778775968557, "grad_norm": 0.7137091755867004, "learning_rate": 4.490463025593523e-06, "loss": 0.3424, "step": 6190 }, { "epoch": 1.7380685008422234, "grad_norm": 0.6977535486221313, "learning_rate": 4.488838202453314e-06, "loss": 0.3984, "step": 6191 }, { "epoch": 1.738349241998877, "grad_norm": 0.6577920317649841, "learning_rate": 4.487213433861111e-06, "loss": 0.3529, "step": 6192 }, { "epoch": 1.7386299831555307, "grad_norm": 0.6382343173027039, "learning_rate": 4.485588719990303e-06, "loss": 0.3587, "step": 6193 }, { "epoch": 1.7389107243121842, "grad_norm": 0.6272525787353516, "learning_rate": 4.483964061014268e-06, "loss": 0.4062, "step": 6194 }, { "epoch": 1.7391914654688376, "grad_norm": 0.7789995074272156, "learning_rate": 4.482339457106378e-06, "loss": 0.3862, "step": 6195 }, { "epoch": 1.7394722066254913, "grad_norm": 0.729128897190094, "learning_rate": 4.480714908440002e-06, "loss": 0.3939, "step": 6196 }, { "epoch": 1.739752947782145, "grad_norm": 0.6368979215621948, "learning_rate": 4.479090415188502e-06, "loss": 0.3974, "step": 6197 }, { "epoch": 1.7400336889387984, "grad_norm": 0.6093249320983887, "learning_rate": 4.477465977525234e-06, "loss": 0.3604, "step": 6198 }, { "epoch": 1.7403144300954518, "grad_norm": 0.7181936502456665, "learning_rate": 4.475841595623547e-06, "loss": 0.3768, "step": 6199 }, { "epoch": 1.7405951712521057, "grad_norm": 0.7454440593719482, "learning_rate": 4.474217269656786e-06, "loss": 0.3705, "step": 6200 }, { "epoch": 1.7408759124087592, "grad_norm": 0.6315555572509766, "learning_rate": 4.4725929997982895e-06, "loss": 0.3806, "step": 6201 }, { "epoch": 1.7411566535654126, "grad_norm": 0.6051346659660339, "learning_rate": 4.4709687862213866e-06, "loss": 0.3965, "step": 6202 }, { "epoch": 1.7414373947220663, "grad_norm": 0.6589784026145935, "learning_rate": 4.469344629099406e-06, "loss": 0.3929, "step": 6203 }, { "epoch": 1.74171813587872, "grad_norm": 0.692035973072052, "learning_rate": 4.467720528605665e-06, "loss": 0.392, "step": 6204 }, { "epoch": 1.7419988770353734, "grad_norm": 0.6870393753051758, "learning_rate": 4.466096484913481e-06, "loss": 0.4543, "step": 6205 }, { "epoch": 1.7422796181920268, "grad_norm": 0.6039896607398987, "learning_rate": 4.464472498196159e-06, "loss": 0.3611, "step": 6206 }, { "epoch": 1.7425603593486805, "grad_norm": 0.6538866758346558, "learning_rate": 4.462848568627003e-06, "loss": 0.3562, "step": 6207 }, { "epoch": 1.7428411005053341, "grad_norm": 0.703467845916748, "learning_rate": 4.461224696379307e-06, "loss": 0.3841, "step": 6208 }, { "epoch": 1.7431218416619876, "grad_norm": 0.6572993993759155, "learning_rate": 4.45960088162636e-06, "loss": 0.3609, "step": 6209 }, { "epoch": 1.7434025828186412, "grad_norm": 0.5625954270362854, "learning_rate": 4.457977124541447e-06, "loss": 0.417, "step": 6210 }, { "epoch": 1.743683323975295, "grad_norm": 0.6067602038383484, "learning_rate": 4.456353425297845e-06, "loss": 0.3588, "step": 6211 }, { "epoch": 1.7439640651319483, "grad_norm": 0.7094693779945374, "learning_rate": 4.4547297840688235e-06, "loss": 0.3708, "step": 6212 }, { "epoch": 1.7442448062886018, "grad_norm": 0.6259050965309143, "learning_rate": 4.45310620102765e-06, "loss": 0.3712, "step": 6213 }, { "epoch": 1.7445255474452555, "grad_norm": 0.5816469788551331, "learning_rate": 4.4514826763475816e-06, "loss": 0.3658, "step": 6214 }, { "epoch": 1.7448062886019091, "grad_norm": 0.573767364025116, "learning_rate": 4.44985921020187e-06, "loss": 0.4047, "step": 6215 }, { "epoch": 1.7450870297585626, "grad_norm": 0.5543454885482788, "learning_rate": 4.448235802763764e-06, "loss": 0.3805, "step": 6216 }, { "epoch": 1.7453677709152162, "grad_norm": 0.5985108017921448, "learning_rate": 4.4466124542065e-06, "loss": 0.4046, "step": 6217 }, { "epoch": 1.7456485120718699, "grad_norm": 0.6127582788467407, "learning_rate": 4.444989164703315e-06, "loss": 0.4083, "step": 6218 }, { "epoch": 1.7459292532285233, "grad_norm": 0.6588864922523499, "learning_rate": 4.443365934427435e-06, "loss": 0.3801, "step": 6219 }, { "epoch": 1.7462099943851768, "grad_norm": 0.5574650168418884, "learning_rate": 4.44174276355208e-06, "loss": 0.3787, "step": 6220 }, { "epoch": 1.7464907355418304, "grad_norm": 0.661906898021698, "learning_rate": 4.440119652250465e-06, "loss": 0.4387, "step": 6221 }, { "epoch": 1.746771476698484, "grad_norm": 0.6496044993400574, "learning_rate": 4.4384966006958e-06, "loss": 0.3752, "step": 6222 }, { "epoch": 1.7470522178551375, "grad_norm": 0.7547746896743774, "learning_rate": 4.436873609061287e-06, "loss": 0.4123, "step": 6223 }, { "epoch": 1.747332959011791, "grad_norm": 0.6067736744880676, "learning_rate": 4.43525067752012e-06, "loss": 0.4282, "step": 6224 }, { "epoch": 1.7476137001684446, "grad_norm": 0.6351024508476257, "learning_rate": 4.433627806245488e-06, "loss": 0.3896, "step": 6225 }, { "epoch": 1.7478944413250983, "grad_norm": 0.7073475122451782, "learning_rate": 4.432004995410575e-06, "loss": 0.3881, "step": 6226 }, { "epoch": 1.7481751824817517, "grad_norm": 0.4947356879711151, "learning_rate": 4.430382245188557e-06, "loss": 0.3488, "step": 6227 }, { "epoch": 1.7484559236384054, "grad_norm": 0.719068706035614, "learning_rate": 4.428759555752603e-06, "loss": 0.4192, "step": 6228 }, { "epoch": 1.748736664795059, "grad_norm": 0.6581749320030212, "learning_rate": 4.427136927275879e-06, "loss": 0.3948, "step": 6229 }, { "epoch": 1.7490174059517125, "grad_norm": 0.5638178586959839, "learning_rate": 4.4255143599315375e-06, "loss": 0.3817, "step": 6230 }, { "epoch": 1.749298147108366, "grad_norm": 0.6202285289764404, "learning_rate": 4.423891853892731e-06, "loss": 0.36, "step": 6231 }, { "epoch": 1.7495788882650196, "grad_norm": 0.5796914100646973, "learning_rate": 4.422269409332604e-06, "loss": 0.368, "step": 6232 }, { "epoch": 1.7498596294216733, "grad_norm": 0.609451413154602, "learning_rate": 4.420647026424293e-06, "loss": 0.3887, "step": 6233 }, { "epoch": 1.7501403705783267, "grad_norm": 0.5809699296951294, "learning_rate": 4.419024705340928e-06, "loss": 0.3584, "step": 6234 }, { "epoch": 1.7504211117349804, "grad_norm": 0.6108555793762207, "learning_rate": 4.4174024462556346e-06, "loss": 0.3753, "step": 6235 }, { "epoch": 1.750701852891634, "grad_norm": 0.640383243560791, "learning_rate": 4.415780249341529e-06, "loss": 0.4057, "step": 6236 }, { "epoch": 1.7509825940482875, "grad_norm": 0.6090337038040161, "learning_rate": 4.414158114771722e-06, "loss": 0.3845, "step": 6237 }, { "epoch": 1.751263335204941, "grad_norm": 0.7175537347793579, "learning_rate": 4.412536042719317e-06, "loss": 0.3618, "step": 6238 }, { "epoch": 1.7515440763615946, "grad_norm": 0.6068211793899536, "learning_rate": 4.410914033357413e-06, "loss": 0.3628, "step": 6239 }, { "epoch": 1.7518248175182483, "grad_norm": 0.65510493516922, "learning_rate": 4.4092920868590995e-06, "loss": 0.3327, "step": 6240 }, { "epoch": 1.7521055586749017, "grad_norm": 0.612531840801239, "learning_rate": 4.407670203397462e-06, "loss": 0.3947, "step": 6241 }, { "epoch": 1.7523862998315554, "grad_norm": 0.6644596457481384, "learning_rate": 4.4060483831455775e-06, "loss": 0.3759, "step": 6242 }, { "epoch": 1.752667040988209, "grad_norm": 0.642458975315094, "learning_rate": 4.404426626276514e-06, "loss": 0.4211, "step": 6243 }, { "epoch": 1.7529477821448625, "grad_norm": 0.772847592830658, "learning_rate": 4.402804932963339e-06, "loss": 0.4511, "step": 6244 }, { "epoch": 1.753228523301516, "grad_norm": 0.6655374765396118, "learning_rate": 4.401183303379107e-06, "loss": 0.3554, "step": 6245 }, { "epoch": 1.7535092644581696, "grad_norm": 0.6555061936378479, "learning_rate": 4.399561737696869e-06, "loss": 0.3631, "step": 6246 }, { "epoch": 1.7537900056148232, "grad_norm": 0.6439672112464905, "learning_rate": 4.3979402360896675e-06, "loss": 0.378, "step": 6247 }, { "epoch": 1.7540707467714767, "grad_norm": 0.5472911596298218, "learning_rate": 4.396318798730542e-06, "loss": 0.378, "step": 6248 }, { "epoch": 1.7543514879281301, "grad_norm": 0.6403830051422119, "learning_rate": 4.394697425792519e-06, "loss": 0.366, "step": 6249 }, { "epoch": 1.7546322290847838, "grad_norm": 0.6984131336212158, "learning_rate": 4.393076117448624e-06, "loss": 0.4171, "step": 6250 }, { "epoch": 1.7549129702414374, "grad_norm": 0.7900029420852661, "learning_rate": 4.391454873871871e-06, "loss": 0.4234, "step": 6251 }, { "epoch": 1.7551937113980909, "grad_norm": 0.6361263394355774, "learning_rate": 4.389833695235269e-06, "loss": 0.4082, "step": 6252 }, { "epoch": 1.7554744525547445, "grad_norm": 0.5593291521072388, "learning_rate": 4.388212581711821e-06, "loss": 0.3987, "step": 6253 }, { "epoch": 1.7557551937113982, "grad_norm": 0.6575533151626587, "learning_rate": 4.3865915334745216e-06, "loss": 0.4187, "step": 6254 }, { "epoch": 1.7560359348680517, "grad_norm": 0.6631331443786621, "learning_rate": 4.384970550696359e-06, "loss": 0.3923, "step": 6255 }, { "epoch": 1.756316676024705, "grad_norm": 0.6196184158325195, "learning_rate": 4.3833496335503164e-06, "loss": 0.415, "step": 6256 }, { "epoch": 1.7565974171813588, "grad_norm": 0.6458535194396973, "learning_rate": 4.381728782209365e-06, "loss": 0.414, "step": 6257 }, { "epoch": 1.7568781583380124, "grad_norm": 0.7704488635063171, "learning_rate": 4.380107996846473e-06, "loss": 0.4066, "step": 6258 }, { "epoch": 1.7571588994946659, "grad_norm": 0.6514649391174316, "learning_rate": 4.3784872776346e-06, "loss": 0.3607, "step": 6259 }, { "epoch": 1.7574396406513195, "grad_norm": 0.6268259286880493, "learning_rate": 4.376866624746701e-06, "loss": 0.4031, "step": 6260 }, { "epoch": 1.7577203818079732, "grad_norm": 0.6264336109161377, "learning_rate": 4.3752460383557195e-06, "loss": 0.3885, "step": 6261 }, { "epoch": 1.7580011229646266, "grad_norm": 0.5954325795173645, "learning_rate": 4.3736255186345975e-06, "loss": 0.3617, "step": 6262 }, { "epoch": 1.75828186412128, "grad_norm": 0.6842427253723145, "learning_rate": 4.372005065756264e-06, "loss": 0.3937, "step": 6263 }, { "epoch": 1.7585626052779337, "grad_norm": 0.6825528740882874, "learning_rate": 4.370384679893645e-06, "loss": 0.3974, "step": 6264 }, { "epoch": 1.7588433464345874, "grad_norm": 0.5167978405952454, "learning_rate": 4.368764361219657e-06, "loss": 0.369, "step": 6265 }, { "epoch": 1.7591240875912408, "grad_norm": 0.6316250562667847, "learning_rate": 4.367144109907211e-06, "loss": 0.3385, "step": 6266 }, { "epoch": 1.7594048287478943, "grad_norm": 0.6032918691635132, "learning_rate": 4.36552392612921e-06, "loss": 0.3801, "step": 6267 }, { "epoch": 1.7596855699045482, "grad_norm": 0.6955772042274475, "learning_rate": 4.363903810058552e-06, "loss": 0.3715, "step": 6268 }, { "epoch": 1.7599663110612016, "grad_norm": 0.666412889957428, "learning_rate": 4.362283761868122e-06, "loss": 0.3742, "step": 6269 }, { "epoch": 1.760247052217855, "grad_norm": 0.6349466443061829, "learning_rate": 4.360663781730803e-06, "loss": 0.3439, "step": 6270 }, { "epoch": 1.7605277933745087, "grad_norm": 0.6407955288887024, "learning_rate": 4.3590438698194695e-06, "loss": 0.4182, "step": 6271 }, { "epoch": 1.7608085345311624, "grad_norm": 0.7088667750358582, "learning_rate": 4.357424026306988e-06, "loss": 0.3657, "step": 6272 }, { "epoch": 1.7610892756878158, "grad_norm": 0.6948704123497009, "learning_rate": 4.355804251366219e-06, "loss": 0.4408, "step": 6273 }, { "epoch": 1.7613700168444693, "grad_norm": 0.6439430117607117, "learning_rate": 4.354184545170015e-06, "loss": 0.3945, "step": 6274 }, { "epoch": 1.761650758001123, "grad_norm": 0.7262217402458191, "learning_rate": 4.352564907891219e-06, "loss": 0.4043, "step": 6275 }, { "epoch": 1.7619314991577766, "grad_norm": 0.7246805429458618, "learning_rate": 4.350945339702671e-06, "loss": 0.4272, "step": 6276 }, { "epoch": 1.76221224031443, "grad_norm": 0.5724625587463379, "learning_rate": 4.3493258407772e-06, "loss": 0.3539, "step": 6277 }, { "epoch": 1.7624929814710837, "grad_norm": 0.7266539931297302, "learning_rate": 4.34770641128763e-06, "loss": 0.4067, "step": 6278 }, { "epoch": 1.7627737226277373, "grad_norm": 0.5940781235694885, "learning_rate": 4.3460870514067735e-06, "loss": 0.3424, "step": 6279 }, { "epoch": 1.7630544637843908, "grad_norm": 0.6305220723152161, "learning_rate": 4.3444677613074415e-06, "loss": 0.3624, "step": 6280 }, { "epoch": 1.7633352049410442, "grad_norm": 0.565099835395813, "learning_rate": 4.342848541162433e-06, "loss": 0.3855, "step": 6281 }, { "epoch": 1.763615946097698, "grad_norm": 0.6301993727684021, "learning_rate": 4.3412293911445416e-06, "loss": 0.3894, "step": 6282 }, { "epoch": 1.7638966872543516, "grad_norm": 0.6357986927032471, "learning_rate": 4.339610311426554e-06, "loss": 0.3925, "step": 6283 }, { "epoch": 1.764177428411005, "grad_norm": 0.6513936519622803, "learning_rate": 4.337991302181247e-06, "loss": 0.3836, "step": 6284 }, { "epoch": 1.7644581695676587, "grad_norm": 0.6839202642440796, "learning_rate": 4.336372363581391e-06, "loss": 0.4126, "step": 6285 }, { "epoch": 1.7647389107243123, "grad_norm": 0.663027286529541, "learning_rate": 4.33475349579975e-06, "loss": 0.4074, "step": 6286 }, { "epoch": 1.7650196518809658, "grad_norm": 0.7076448202133179, "learning_rate": 4.333134699009078e-06, "loss": 0.4145, "step": 6287 }, { "epoch": 1.7653003930376192, "grad_norm": 0.5735254883766174, "learning_rate": 4.331515973382125e-06, "loss": 0.3597, "step": 6288 }, { "epoch": 1.7655811341942729, "grad_norm": 0.6507977843284607, "learning_rate": 4.3298973190916294e-06, "loss": 0.4041, "step": 6289 }, { "epoch": 1.7658618753509265, "grad_norm": 0.6680722236633301, "learning_rate": 4.328278736310326e-06, "loss": 0.387, "step": 6290 }, { "epoch": 1.76614261650758, "grad_norm": 0.6661301851272583, "learning_rate": 4.326660225210938e-06, "loss": 0.3795, "step": 6291 }, { "epoch": 1.7664233576642334, "grad_norm": 0.6460405588150024, "learning_rate": 4.325041785966183e-06, "loss": 0.4159, "step": 6292 }, { "epoch": 1.7667040988208873, "grad_norm": 0.6425129175186157, "learning_rate": 4.323423418748772e-06, "loss": 0.3691, "step": 6293 }, { "epoch": 1.7669848399775407, "grad_norm": 0.6333646178245544, "learning_rate": 4.321805123731406e-06, "loss": 0.4006, "step": 6294 }, { "epoch": 1.7672655811341942, "grad_norm": 0.6111431121826172, "learning_rate": 4.320186901086781e-06, "loss": 0.3578, "step": 6295 }, { "epoch": 1.7675463222908478, "grad_norm": 0.5983756184577942, "learning_rate": 4.318568750987582e-06, "loss": 0.3586, "step": 6296 }, { "epoch": 1.7678270634475015, "grad_norm": 0.6899738907814026, "learning_rate": 4.316950673606487e-06, "loss": 0.3956, "step": 6297 }, { "epoch": 1.768107804604155, "grad_norm": 0.6273054480552673, "learning_rate": 4.315332669116167e-06, "loss": 0.3558, "step": 6298 }, { "epoch": 1.7683885457608084, "grad_norm": 0.6247169971466064, "learning_rate": 4.31371473768929e-06, "loss": 0.3611, "step": 6299 }, { "epoch": 1.768669286917462, "grad_norm": 0.6382704973220825, "learning_rate": 4.312096879498508e-06, "loss": 0.3633, "step": 6300 }, { "epoch": 1.7689500280741157, "grad_norm": 0.6193411946296692, "learning_rate": 4.310479094716469e-06, "loss": 0.3428, "step": 6301 }, { "epoch": 1.7692307692307692, "grad_norm": 0.6488775610923767, "learning_rate": 4.308861383515813e-06, "loss": 0.3553, "step": 6302 }, { "epoch": 1.7695115103874228, "grad_norm": 0.5911656022071838, "learning_rate": 4.307243746069172e-06, "loss": 0.3721, "step": 6303 }, { "epoch": 1.7697922515440765, "grad_norm": 0.7117661237716675, "learning_rate": 4.30562618254917e-06, "loss": 0.3546, "step": 6304 }, { "epoch": 1.77007299270073, "grad_norm": 0.7182649374008179, "learning_rate": 4.304008693128426e-06, "loss": 0.3865, "step": 6305 }, { "epoch": 1.7703537338573834, "grad_norm": 0.6753956079483032, "learning_rate": 4.302391277979545e-06, "loss": 0.399, "step": 6306 }, { "epoch": 1.770634475014037, "grad_norm": 0.7102121710777283, "learning_rate": 4.3007739372751275e-06, "loss": 0.3477, "step": 6307 }, { "epoch": 1.7709152161706907, "grad_norm": 0.6047407984733582, "learning_rate": 4.299156671187768e-06, "loss": 0.3956, "step": 6308 }, { "epoch": 1.7711959573273441, "grad_norm": 0.6499601602554321, "learning_rate": 4.297539479890051e-06, "loss": 0.3764, "step": 6309 }, { "epoch": 1.7714766984839978, "grad_norm": 0.658145010471344, "learning_rate": 4.295922363554551e-06, "loss": 0.3467, "step": 6310 }, { "epoch": 1.7717574396406515, "grad_norm": 0.6435934901237488, "learning_rate": 4.29430532235384e-06, "loss": 0.3763, "step": 6311 }, { "epoch": 1.772038180797305, "grad_norm": 0.7077054977416992, "learning_rate": 4.292688356460475e-06, "loss": 0.3486, "step": 6312 }, { "epoch": 1.7723189219539583, "grad_norm": 0.6282106041908264, "learning_rate": 4.29107146604701e-06, "loss": 0.362, "step": 6313 }, { "epoch": 1.772599663110612, "grad_norm": 0.6815602779388428, "learning_rate": 4.289454651285991e-06, "loss": 0.3688, "step": 6314 }, { "epoch": 1.7728804042672657, "grad_norm": 0.5672601461410522, "learning_rate": 4.287837912349952e-06, "loss": 0.3805, "step": 6315 }, { "epoch": 1.7731611454239191, "grad_norm": 0.6486336588859558, "learning_rate": 4.286221249411422e-06, "loss": 0.3973, "step": 6316 }, { "epoch": 1.7734418865805726, "grad_norm": 0.5920996069908142, "learning_rate": 4.2846046626429215e-06, "loss": 0.3811, "step": 6317 }, { "epoch": 1.7737226277372264, "grad_norm": 0.6090525388717651, "learning_rate": 4.282988152216964e-06, "loss": 0.3893, "step": 6318 }, { "epoch": 1.7740033688938799, "grad_norm": 0.7044205665588379, "learning_rate": 4.281371718306052e-06, "loss": 0.3792, "step": 6319 }, { "epoch": 1.7742841100505333, "grad_norm": 0.6818379759788513, "learning_rate": 4.27975536108268e-06, "loss": 0.3813, "step": 6320 }, { "epoch": 1.774564851207187, "grad_norm": 0.6091445088386536, "learning_rate": 4.278139080719338e-06, "loss": 0.3495, "step": 6321 }, { "epoch": 1.7748455923638407, "grad_norm": 0.6549208760261536, "learning_rate": 4.276522877388503e-06, "loss": 0.3786, "step": 6322 }, { "epoch": 1.775126333520494, "grad_norm": 0.6359061598777771, "learning_rate": 4.274906751262647e-06, "loss": 0.4105, "step": 6323 }, { "epoch": 1.7754070746771475, "grad_norm": 0.5990251898765564, "learning_rate": 4.273290702514236e-06, "loss": 0.401, "step": 6324 }, { "epoch": 1.7756878158338012, "grad_norm": 0.6826092004776001, "learning_rate": 4.2716747313157206e-06, "loss": 0.3592, "step": 6325 }, { "epoch": 1.7759685569904549, "grad_norm": 0.6266413927078247, "learning_rate": 4.270058837839548e-06, "loss": 0.3585, "step": 6326 }, { "epoch": 1.7762492981471083, "grad_norm": 0.600288450717926, "learning_rate": 4.26844302225816e-06, "loss": 0.3811, "step": 6327 }, { "epoch": 1.776530039303762, "grad_norm": 0.6904387474060059, "learning_rate": 4.266827284743981e-06, "loss": 0.3741, "step": 6328 }, { "epoch": 1.7768107804604156, "grad_norm": 0.7091556191444397, "learning_rate": 4.265211625469435e-06, "loss": 0.3433, "step": 6329 }, { "epoch": 1.777091521617069, "grad_norm": 0.66339510679245, "learning_rate": 4.263596044606936e-06, "loss": 0.4235, "step": 6330 }, { "epoch": 1.7773722627737225, "grad_norm": 0.626258134841919, "learning_rate": 4.261980542328887e-06, "loss": 0.3755, "step": 6331 }, { "epoch": 1.7776530039303762, "grad_norm": 0.6411709785461426, "learning_rate": 4.260365118807685e-06, "loss": 0.3838, "step": 6332 }, { "epoch": 1.7779337450870298, "grad_norm": 0.6367449760437012, "learning_rate": 4.258749774215719e-06, "loss": 0.3755, "step": 6333 }, { "epoch": 1.7782144862436833, "grad_norm": 0.619971513748169, "learning_rate": 4.2571345087253665e-06, "loss": 0.3917, "step": 6334 }, { "epoch": 1.778495227400337, "grad_norm": 0.6844624280929565, "learning_rate": 4.2555193225090005e-06, "loss": 0.3674, "step": 6335 }, { "epoch": 1.7787759685569906, "grad_norm": 0.6690794229507446, "learning_rate": 4.253904215738982e-06, "loss": 0.3742, "step": 6336 }, { "epoch": 1.779056709713644, "grad_norm": 0.6709421873092651, "learning_rate": 4.252289188587666e-06, "loss": 0.38, "step": 6337 }, { "epoch": 1.7793374508702975, "grad_norm": 0.668391227722168, "learning_rate": 4.2506742412273986e-06, "loss": 0.335, "step": 6338 }, { "epoch": 1.7796181920269512, "grad_norm": 0.5918411612510681, "learning_rate": 4.249059373830517e-06, "loss": 0.4, "step": 6339 }, { "epoch": 1.7798989331836048, "grad_norm": 0.6350464820861816, "learning_rate": 4.247444586569348e-06, "loss": 0.3877, "step": 6340 }, { "epoch": 1.7801796743402583, "grad_norm": 0.5575278401374817, "learning_rate": 4.245829879616214e-06, "loss": 0.3834, "step": 6341 }, { "epoch": 1.7804604154969117, "grad_norm": 0.5742670893669128, "learning_rate": 4.244215253143423e-06, "loss": 0.3852, "step": 6342 }, { "epoch": 1.7807411566535654, "grad_norm": 0.6611730456352234, "learning_rate": 4.242600707323282e-06, "loss": 0.3849, "step": 6343 }, { "epoch": 1.781021897810219, "grad_norm": 0.5678191184997559, "learning_rate": 4.240986242328083e-06, "loss": 0.3535, "step": 6344 }, { "epoch": 1.7813026389668725, "grad_norm": 0.582705020904541, "learning_rate": 4.239371858330115e-06, "loss": 0.3634, "step": 6345 }, { "epoch": 1.7815833801235261, "grad_norm": 0.6255151033401489, "learning_rate": 4.237757555501649e-06, "loss": 0.3499, "step": 6346 }, { "epoch": 1.7818641212801798, "grad_norm": 0.6268560886383057, "learning_rate": 4.236143334014958e-06, "loss": 0.3739, "step": 6347 }, { "epoch": 1.7821448624368332, "grad_norm": 0.5669793486595154, "learning_rate": 4.2345291940423e-06, "loss": 0.4106, "step": 6348 }, { "epoch": 1.7824256035934867, "grad_norm": 0.5445272326469421, "learning_rate": 4.232915135755924e-06, "loss": 0.3622, "step": 6349 }, { "epoch": 1.7827063447501403, "grad_norm": 0.7186790108680725, "learning_rate": 4.231301159328076e-06, "loss": 0.4353, "step": 6350 }, { "epoch": 1.782987085906794, "grad_norm": 0.5925012826919556, "learning_rate": 4.229687264930989e-06, "loss": 0.3828, "step": 6351 }, { "epoch": 1.7832678270634474, "grad_norm": 0.6374977827072144, "learning_rate": 4.2280734527368865e-06, "loss": 0.3847, "step": 6352 }, { "epoch": 1.783548568220101, "grad_norm": 0.5620571970939636, "learning_rate": 4.226459722917985e-06, "loss": 0.3703, "step": 6353 }, { "epoch": 1.7838293093767548, "grad_norm": 0.5990384221076965, "learning_rate": 4.224846075646491e-06, "loss": 0.3905, "step": 6354 }, { "epoch": 1.7841100505334082, "grad_norm": 0.6036943793296814, "learning_rate": 4.223232511094605e-06, "loss": 0.3564, "step": 6355 }, { "epoch": 1.7843907916900617, "grad_norm": 0.7412349581718445, "learning_rate": 4.221619029434513e-06, "loss": 0.4274, "step": 6356 }, { "epoch": 1.7846715328467153, "grad_norm": 0.577129602432251, "learning_rate": 4.220005630838399e-06, "loss": 0.376, "step": 6357 }, { "epoch": 1.784952274003369, "grad_norm": 0.5761657357215881, "learning_rate": 4.2183923154784325e-06, "loss": 0.3975, "step": 6358 }, { "epoch": 1.7852330151600224, "grad_norm": 0.5842590928077698, "learning_rate": 4.216779083526779e-06, "loss": 0.3785, "step": 6359 }, { "epoch": 1.7855137563166759, "grad_norm": 0.6966604590415955, "learning_rate": 4.2151659351555895e-06, "loss": 0.3885, "step": 6360 }, { "epoch": 1.7857944974733297, "grad_norm": 0.551936149597168, "learning_rate": 4.213552870537013e-06, "loss": 0.4025, "step": 6361 }, { "epoch": 1.7860752386299832, "grad_norm": 0.6577399969100952, "learning_rate": 4.211939889843182e-06, "loss": 0.3438, "step": 6362 }, { "epoch": 1.7863559797866366, "grad_norm": 0.6121562719345093, "learning_rate": 4.210326993246225e-06, "loss": 0.3654, "step": 6363 }, { "epoch": 1.7866367209432903, "grad_norm": 0.6832751035690308, "learning_rate": 4.208714180918262e-06, "loss": 0.3733, "step": 6364 }, { "epoch": 1.786917462099944, "grad_norm": 0.6492119431495667, "learning_rate": 4.2071014530314e-06, "loss": 0.423, "step": 6365 }, { "epoch": 1.7871982032565974, "grad_norm": 0.6280763745307922, "learning_rate": 4.205488809757741e-06, "loss": 0.3487, "step": 6366 }, { "epoch": 1.7874789444132508, "grad_norm": 0.5772631764411926, "learning_rate": 4.203876251269375e-06, "loss": 0.398, "step": 6367 }, { "epoch": 1.7877596855699045, "grad_norm": 0.6471928954124451, "learning_rate": 4.202263777738385e-06, "loss": 0.409, "step": 6368 }, { "epoch": 1.7880404267265582, "grad_norm": 0.7307161688804626, "learning_rate": 4.200651389336843e-06, "loss": 0.3565, "step": 6369 }, { "epoch": 1.7883211678832116, "grad_norm": 0.7405007481575012, "learning_rate": 4.199039086236815e-06, "loss": 0.3534, "step": 6370 }, { "epoch": 1.7886019090398653, "grad_norm": 0.6418318152427673, "learning_rate": 4.197426868610354e-06, "loss": 0.3561, "step": 6371 }, { "epoch": 1.788882650196519, "grad_norm": 0.6842749714851379, "learning_rate": 4.195814736629506e-06, "loss": 0.4021, "step": 6372 }, { "epoch": 1.7891633913531724, "grad_norm": 0.5714514851570129, "learning_rate": 4.194202690466311e-06, "loss": 0.3819, "step": 6373 }, { "epoch": 1.7894441325098258, "grad_norm": 0.7125022411346436, "learning_rate": 4.19259073029279e-06, "loss": 0.4236, "step": 6374 }, { "epoch": 1.7897248736664795, "grad_norm": 0.6383434534072876, "learning_rate": 4.190978856280967e-06, "loss": 0.4211, "step": 6375 }, { "epoch": 1.7900056148231331, "grad_norm": 0.6919798254966736, "learning_rate": 4.189367068602852e-06, "loss": 0.3707, "step": 6376 }, { "epoch": 1.7902863559797866, "grad_norm": 0.6926742196083069, "learning_rate": 4.18775536743044e-06, "loss": 0.3941, "step": 6377 }, { "epoch": 1.7905670971364402, "grad_norm": 0.6804854273796082, "learning_rate": 4.186143752935725e-06, "loss": 0.405, "step": 6378 }, { "epoch": 1.790847838293094, "grad_norm": 0.6851863861083984, "learning_rate": 4.184532225290687e-06, "loss": 0.3632, "step": 6379 }, { "epoch": 1.7911285794497473, "grad_norm": 0.6039943695068359, "learning_rate": 4.182920784667299e-06, "loss": 0.3582, "step": 6380 }, { "epoch": 1.7914093206064008, "grad_norm": 0.8127399682998657, "learning_rate": 4.181309431237523e-06, "loss": 0.3652, "step": 6381 }, { "epoch": 1.7916900617630545, "grad_norm": 0.7083007097244263, "learning_rate": 4.179698165173316e-06, "loss": 0.3763, "step": 6382 }, { "epoch": 1.7919708029197081, "grad_norm": 0.5860080718994141, "learning_rate": 4.178086986646618e-06, "loss": 0.3549, "step": 6383 }, { "epoch": 1.7922515440763616, "grad_norm": 0.6356669664382935, "learning_rate": 4.1764758958293665e-06, "loss": 0.3987, "step": 6384 }, { "epoch": 1.792532285233015, "grad_norm": 0.6829647421836853, "learning_rate": 4.174864892893485e-06, "loss": 0.3733, "step": 6385 }, { "epoch": 1.7928130263896689, "grad_norm": 0.6945798993110657, "learning_rate": 4.173253978010891e-06, "loss": 0.3611, "step": 6386 }, { "epoch": 1.7930937675463223, "grad_norm": 0.7382370829582214, "learning_rate": 4.171643151353492e-06, "loss": 0.4309, "step": 6387 }, { "epoch": 1.7933745087029758, "grad_norm": 0.6458000540733337, "learning_rate": 4.170032413093185e-06, "loss": 0.3478, "step": 6388 }, { "epoch": 1.7936552498596294, "grad_norm": 0.7223645448684692, "learning_rate": 4.168421763401857e-06, "loss": 0.3803, "step": 6389 }, { "epoch": 1.793935991016283, "grad_norm": 0.6951597332954407, "learning_rate": 4.1668112024513875e-06, "loss": 0.4198, "step": 6390 }, { "epoch": 1.7942167321729365, "grad_norm": 0.6678778529167175, "learning_rate": 4.1652007304136446e-06, "loss": 0.3857, "step": 6391 }, { "epoch": 1.79449747332959, "grad_norm": 0.745650053024292, "learning_rate": 4.163590347460489e-06, "loss": 0.3989, "step": 6392 }, { "epoch": 1.7947782144862436, "grad_norm": 0.6009384989738464, "learning_rate": 4.161980053763769e-06, "loss": 0.3711, "step": 6393 }, { "epoch": 1.7950589556428973, "grad_norm": 0.6509097814559937, "learning_rate": 4.160369849495329e-06, "loss": 0.4101, "step": 6394 }, { "epoch": 1.7953396967995507, "grad_norm": 0.6226939558982849, "learning_rate": 4.158759734826995e-06, "loss": 0.3934, "step": 6395 }, { "epoch": 1.7956204379562044, "grad_norm": 0.6726289391517639, "learning_rate": 4.157149709930592e-06, "loss": 0.3966, "step": 6396 }, { "epoch": 1.795901179112858, "grad_norm": 0.641761302947998, "learning_rate": 4.15553977497793e-06, "loss": 0.3934, "step": 6397 }, { "epoch": 1.7961819202695115, "grad_norm": 0.5958701372146606, "learning_rate": 4.153929930140812e-06, "loss": 0.3615, "step": 6398 }, { "epoch": 1.796462661426165, "grad_norm": 0.681663990020752, "learning_rate": 4.15232017559103e-06, "loss": 0.357, "step": 6399 }, { "epoch": 1.7967434025828186, "grad_norm": 0.6625244617462158, "learning_rate": 4.1507105115003665e-06, "loss": 0.3852, "step": 6400 }, { "epoch": 1.7970241437394723, "grad_norm": 0.6397461295127869, "learning_rate": 4.149100938040598e-06, "loss": 0.3958, "step": 6401 }, { "epoch": 1.7973048848961257, "grad_norm": 0.6216028332710266, "learning_rate": 4.1474914553834846e-06, "loss": 0.368, "step": 6402 }, { "epoch": 1.7975856260527794, "grad_norm": 0.5505136251449585, "learning_rate": 4.145882063700783e-06, "loss": 0.385, "step": 6403 }, { "epoch": 1.797866367209433, "grad_norm": 0.6373128890991211, "learning_rate": 4.144272763164236e-06, "loss": 0.441, "step": 6404 }, { "epoch": 1.7981471083660865, "grad_norm": 0.6197081208229065, "learning_rate": 4.142663553945578e-06, "loss": 0.3666, "step": 6405 }, { "epoch": 1.79842784952274, "grad_norm": 0.5677542090415955, "learning_rate": 4.141054436216533e-06, "loss": 0.3614, "step": 6406 }, { "epoch": 1.7987085906793936, "grad_norm": 0.6772831678390503, "learning_rate": 4.1394454101488185e-06, "loss": 0.42, "step": 6407 }, { "epoch": 1.7989893318360473, "grad_norm": 0.590928316116333, "learning_rate": 4.137836475914137e-06, "loss": 0.3672, "step": 6408 }, { "epoch": 1.7992700729927007, "grad_norm": 0.6583422422409058, "learning_rate": 4.136227633684187e-06, "loss": 0.4089, "step": 6409 }, { "epoch": 1.7995508141493541, "grad_norm": 0.6012908816337585, "learning_rate": 4.134618883630653e-06, "loss": 0.3712, "step": 6410 }, { "epoch": 1.799831555306008, "grad_norm": 0.6087749600410461, "learning_rate": 4.133010225925208e-06, "loss": 0.3761, "step": 6411 }, { "epoch": 1.8001122964626615, "grad_norm": 0.6169080138206482, "learning_rate": 4.131401660739522e-06, "loss": 0.3768, "step": 6412 }, { "epoch": 1.800393037619315, "grad_norm": 0.5806935429573059, "learning_rate": 4.129793188245248e-06, "loss": 0.3887, "step": 6413 }, { "epoch": 1.8006737787759686, "grad_norm": 0.6660763621330261, "learning_rate": 4.128184808614035e-06, "loss": 0.4043, "step": 6414 }, { "epoch": 1.8009545199326222, "grad_norm": 0.6095353960990906, "learning_rate": 4.126576522017516e-06, "loss": 0.3823, "step": 6415 }, { "epoch": 1.8012352610892757, "grad_norm": 0.6177636981010437, "learning_rate": 4.124968328627321e-06, "loss": 0.4277, "step": 6416 }, { "epoch": 1.8015160022459291, "grad_norm": 0.6765494346618652, "learning_rate": 4.123360228615064e-06, "loss": 0.3682, "step": 6417 }, { "epoch": 1.8017967434025828, "grad_norm": 0.5754491686820984, "learning_rate": 4.121752222152351e-06, "loss": 0.3381, "step": 6418 }, { "epoch": 1.8020774845592364, "grad_norm": 0.6277416944503784, "learning_rate": 4.12014430941078e-06, "loss": 0.4018, "step": 6419 }, { "epoch": 1.8023582257158899, "grad_norm": 0.6139590740203857, "learning_rate": 4.1185364905619365e-06, "loss": 0.3685, "step": 6420 }, { "epoch": 1.8026389668725435, "grad_norm": 0.6387202739715576, "learning_rate": 4.116928765777397e-06, "loss": 0.3638, "step": 6421 }, { "epoch": 1.8029197080291972, "grad_norm": 0.5808118581771851, "learning_rate": 4.11532113522873e-06, "loss": 0.4067, "step": 6422 }, { "epoch": 1.8032004491858507, "grad_norm": 0.6156762838363647, "learning_rate": 4.113713599087488e-06, "loss": 0.3581, "step": 6423 }, { "epoch": 1.803481190342504, "grad_norm": 0.6466835141181946, "learning_rate": 4.11210615752522e-06, "loss": 0.4458, "step": 6424 }, { "epoch": 1.8037619314991578, "grad_norm": 0.5484521389007568, "learning_rate": 4.1104988107134605e-06, "loss": 0.408, "step": 6425 }, { "epoch": 1.8040426726558114, "grad_norm": 0.656670868396759, "learning_rate": 4.108891558823737e-06, "loss": 0.3845, "step": 6426 }, { "epoch": 1.8043234138124649, "grad_norm": 0.6025376319885254, "learning_rate": 4.1072844020275646e-06, "loss": 0.3551, "step": 6427 }, { "epoch": 1.8046041549691185, "grad_norm": 0.6003783345222473, "learning_rate": 4.10567734049645e-06, "loss": 0.3834, "step": 6428 }, { "epoch": 1.8048848961257722, "grad_norm": 0.5879949927330017, "learning_rate": 4.104070374401888e-06, "loss": 0.3454, "step": 6429 }, { "epoch": 1.8051656372824256, "grad_norm": 0.6855700016021729, "learning_rate": 4.102463503915364e-06, "loss": 0.3708, "step": 6430 }, { "epoch": 1.805446378439079, "grad_norm": 0.6719605326652527, "learning_rate": 4.100856729208354e-06, "loss": 0.413, "step": 6431 }, { "epoch": 1.8057271195957327, "grad_norm": 0.621315062046051, "learning_rate": 4.099250050452323e-06, "loss": 0.3742, "step": 6432 }, { "epoch": 1.8060078607523864, "grad_norm": 0.6734421849250793, "learning_rate": 4.097643467818724e-06, "loss": 0.4077, "step": 6433 }, { "epoch": 1.8062886019090398, "grad_norm": 0.6684174537658691, "learning_rate": 4.0960369814790035e-06, "loss": 0.3671, "step": 6434 }, { "epoch": 1.8065693430656933, "grad_norm": 0.6640192270278931, "learning_rate": 4.094430591604594e-06, "loss": 0.4058, "step": 6435 }, { "epoch": 1.806850084222347, "grad_norm": 0.6472187638282776, "learning_rate": 4.092824298366922e-06, "loss": 0.3693, "step": 6436 }, { "epoch": 1.8071308253790006, "grad_norm": 0.6261100172996521, "learning_rate": 4.091218101937398e-06, "loss": 0.3705, "step": 6437 }, { "epoch": 1.807411566535654, "grad_norm": 0.6097570061683655, "learning_rate": 4.089612002487428e-06, "loss": 0.3594, "step": 6438 }, { "epoch": 1.8076923076923077, "grad_norm": 0.6226575374603271, "learning_rate": 4.088006000188403e-06, "loss": 0.38, "step": 6439 }, { "epoch": 1.8079730488489614, "grad_norm": 0.6200900673866272, "learning_rate": 4.086400095211707e-06, "loss": 0.3751, "step": 6440 }, { "epoch": 1.8082537900056148, "grad_norm": 0.6498468518257141, "learning_rate": 4.0847942877287105e-06, "loss": 0.4159, "step": 6441 }, { "epoch": 1.8085345311622683, "grad_norm": 0.6783927083015442, "learning_rate": 4.083188577910777e-06, "loss": 0.3936, "step": 6442 }, { "epoch": 1.808815272318922, "grad_norm": 0.6968936324119568, "learning_rate": 4.081582965929257e-06, "loss": 0.3893, "step": 6443 }, { "epoch": 1.8090960134755756, "grad_norm": 0.6645230650901794, "learning_rate": 4.079977451955493e-06, "loss": 0.3772, "step": 6444 }, { "epoch": 1.809376754632229, "grad_norm": 0.5472118854522705, "learning_rate": 4.078372036160812e-06, "loss": 0.3699, "step": 6445 }, { "epoch": 1.8096574957888827, "grad_norm": 0.6444216370582581, "learning_rate": 4.076766718716537e-06, "loss": 0.3696, "step": 6446 }, { "epoch": 1.8099382369455363, "grad_norm": 0.5640847682952881, "learning_rate": 4.075161499793976e-06, "loss": 0.3738, "step": 6447 }, { "epoch": 1.8102189781021898, "grad_norm": 0.6268128752708435, "learning_rate": 4.073556379564429e-06, "loss": 0.375, "step": 6448 }, { "epoch": 1.8104997192588432, "grad_norm": 0.6185505390167236, "learning_rate": 4.071951358199184e-06, "loss": 0.3408, "step": 6449 }, { "epoch": 1.810780460415497, "grad_norm": 0.7345503568649292, "learning_rate": 4.070346435869518e-06, "loss": 0.3982, "step": 6450 }, { "epoch": 1.8110612015721506, "grad_norm": 0.6497979164123535, "learning_rate": 4.0687416127467e-06, "loss": 0.405, "step": 6451 }, { "epoch": 1.811341942728804, "grad_norm": 0.6260815262794495, "learning_rate": 4.067136889001986e-06, "loss": 0.4078, "step": 6452 }, { "epoch": 1.8116226838854577, "grad_norm": 0.6811968684196472, "learning_rate": 4.065532264806623e-06, "loss": 0.4309, "step": 6453 }, { "epoch": 1.8119034250421113, "grad_norm": 0.6197689771652222, "learning_rate": 4.063927740331845e-06, "loss": 0.3978, "step": 6454 }, { "epoch": 1.8121841661987648, "grad_norm": 0.7707491517066956, "learning_rate": 4.062323315748877e-06, "loss": 0.423, "step": 6455 }, { "epoch": 1.8124649073554182, "grad_norm": 0.7443587183952332, "learning_rate": 4.060718991228934e-06, "loss": 0.4071, "step": 6456 }, { "epoch": 1.8127456485120719, "grad_norm": 0.7394616603851318, "learning_rate": 4.059114766943219e-06, "loss": 0.3712, "step": 6457 }, { "epoch": 1.8130263896687255, "grad_norm": 0.7203782200813293, "learning_rate": 4.0575106430629255e-06, "loss": 0.3824, "step": 6458 }, { "epoch": 1.813307130825379, "grad_norm": 0.6342647075653076, "learning_rate": 4.055906619759236e-06, "loss": 0.4031, "step": 6459 }, { "epoch": 1.8135878719820324, "grad_norm": 0.5927562713623047, "learning_rate": 4.05430269720332e-06, "loss": 0.3899, "step": 6460 }, { "epoch": 1.813868613138686, "grad_norm": 0.61030113697052, "learning_rate": 4.052698875566339e-06, "loss": 0.4144, "step": 6461 }, { "epoch": 1.8141493542953397, "grad_norm": 0.6469379663467407, "learning_rate": 4.051095155019444e-06, "loss": 0.3706, "step": 6462 }, { "epoch": 1.8144300954519932, "grad_norm": 0.77492755651474, "learning_rate": 4.049491535733773e-06, "loss": 0.3409, "step": 6463 }, { "epoch": 1.8147108366086468, "grad_norm": 0.6135831475257874, "learning_rate": 4.047888017880453e-06, "loss": 0.369, "step": 6464 }, { "epoch": 1.8149915777653005, "grad_norm": 0.772193968296051, "learning_rate": 4.046284601630606e-06, "loss": 0.4013, "step": 6465 }, { "epoch": 1.815272318921954, "grad_norm": 0.6679842472076416, "learning_rate": 4.044681287155334e-06, "loss": 0.3721, "step": 6466 }, { "epoch": 1.8155530600786074, "grad_norm": 0.7335694432258606, "learning_rate": 4.043078074625734e-06, "loss": 0.3764, "step": 6467 }, { "epoch": 1.815833801235261, "grad_norm": 0.6807160973548889, "learning_rate": 4.041474964212891e-06, "loss": 0.3692, "step": 6468 }, { "epoch": 1.8161145423919147, "grad_norm": 0.6571961641311646, "learning_rate": 4.039871956087879e-06, "loss": 0.3923, "step": 6469 }, { "epoch": 1.8163952835485682, "grad_norm": 0.6566359400749207, "learning_rate": 4.0382690504217615e-06, "loss": 0.3867, "step": 6470 }, { "epoch": 1.8166760247052218, "grad_norm": 0.6149438619613647, "learning_rate": 4.036666247385591e-06, "loss": 0.3503, "step": 6471 }, { "epoch": 1.8169567658618755, "grad_norm": 0.6013513207435608, "learning_rate": 4.035063547150408e-06, "loss": 0.3967, "step": 6472 }, { "epoch": 1.817237507018529, "grad_norm": 0.5649804472923279, "learning_rate": 4.033460949887242e-06, "loss": 0.3718, "step": 6473 }, { "epoch": 1.8175182481751824, "grad_norm": 0.6157485246658325, "learning_rate": 4.031858455767113e-06, "loss": 0.3748, "step": 6474 }, { "epoch": 1.817798989331836, "grad_norm": 0.6589345335960388, "learning_rate": 4.030256064961029e-06, "loss": 0.3966, "step": 6475 }, { "epoch": 1.8180797304884897, "grad_norm": 0.6747671961784363, "learning_rate": 4.0286537776399855e-06, "loss": 0.3776, "step": 6476 }, { "epoch": 1.8183604716451431, "grad_norm": 0.7009803056716919, "learning_rate": 4.027051593974973e-06, "loss": 0.4255, "step": 6477 }, { "epoch": 1.8186412128017966, "grad_norm": 0.6249062418937683, "learning_rate": 4.025449514136963e-06, "loss": 0.3657, "step": 6478 }, { "epoch": 1.8189219539584505, "grad_norm": 0.6347492337226868, "learning_rate": 4.023847538296921e-06, "loss": 0.4021, "step": 6479 }, { "epoch": 1.819202695115104, "grad_norm": 0.6176151633262634, "learning_rate": 4.0222456666257994e-06, "loss": 0.4387, "step": 6480 }, { "epoch": 1.8194834362717573, "grad_norm": 0.5777949094772339, "learning_rate": 4.020643899294541e-06, "loss": 0.3304, "step": 6481 }, { "epoch": 1.819764177428411, "grad_norm": 0.6171284914016724, "learning_rate": 4.0190422364740745e-06, "loss": 0.3835, "step": 6482 }, { "epoch": 1.8200449185850647, "grad_norm": 0.6122297048568726, "learning_rate": 4.017440678335319e-06, "loss": 0.3948, "step": 6483 }, { "epoch": 1.8203256597417181, "grad_norm": 0.5959627628326416, "learning_rate": 4.015839225049186e-06, "loss": 0.3404, "step": 6484 }, { "epoch": 1.8206064008983716, "grad_norm": 0.5845166444778442, "learning_rate": 4.01423787678657e-06, "loss": 0.3491, "step": 6485 }, { "epoch": 1.8208871420550252, "grad_norm": 0.6463956832885742, "learning_rate": 4.012636633718359e-06, "loss": 0.3933, "step": 6486 }, { "epoch": 1.8211678832116789, "grad_norm": 0.6332153081893921, "learning_rate": 4.0110354960154256e-06, "loss": 0.3838, "step": 6487 }, { "epoch": 1.8214486243683323, "grad_norm": 0.6473224759101868, "learning_rate": 4.009434463848634e-06, "loss": 0.3393, "step": 6488 }, { "epoch": 1.821729365524986, "grad_norm": 0.6339811682701111, "learning_rate": 4.007833537388836e-06, "loss": 0.3964, "step": 6489 }, { "epoch": 1.8220101066816397, "grad_norm": 0.6294807195663452, "learning_rate": 4.006232716806874e-06, "loss": 0.3986, "step": 6490 }, { "epoch": 1.822290847838293, "grad_norm": 0.6058825850486755, "learning_rate": 4.004632002273576e-06, "loss": 0.3617, "step": 6491 }, { "epoch": 1.8225715889949465, "grad_norm": 0.6486465930938721, "learning_rate": 4.003031393959761e-06, "loss": 0.3812, "step": 6492 }, { "epoch": 1.8228523301516002, "grad_norm": 0.6626247763633728, "learning_rate": 4.001430892036236e-06, "loss": 0.4084, "step": 6493 }, { "epoch": 1.8231330713082539, "grad_norm": 0.7068142294883728, "learning_rate": 3.999830496673797e-06, "loss": 0.4151, "step": 6494 }, { "epoch": 1.8234138124649073, "grad_norm": 0.6361899971961975, "learning_rate": 3.998230208043227e-06, "loss": 0.3832, "step": 6495 }, { "epoch": 1.823694553621561, "grad_norm": 0.6086771488189697, "learning_rate": 3.9966300263153e-06, "loss": 0.3535, "step": 6496 }, { "epoch": 1.8239752947782146, "grad_norm": 0.6495249271392822, "learning_rate": 3.995029951660777e-06, "loss": 0.3953, "step": 6497 }, { "epoch": 1.824256035934868, "grad_norm": 0.6098946332931519, "learning_rate": 3.993429984250408e-06, "loss": 0.3616, "step": 6498 }, { "epoch": 1.8245367770915215, "grad_norm": 0.680414617061615, "learning_rate": 3.9918301242549316e-06, "loss": 0.3586, "step": 6499 }, { "epoch": 1.8248175182481752, "grad_norm": 0.6312323808670044, "learning_rate": 3.990230371845075e-06, "loss": 0.3427, "step": 6500 }, { "epoch": 1.8250982594048288, "grad_norm": 0.6725577116012573, "learning_rate": 3.988630727191552e-06, "loss": 0.3775, "step": 6501 }, { "epoch": 1.8253790005614823, "grad_norm": 0.6414360404014587, "learning_rate": 3.98703119046507e-06, "loss": 0.3586, "step": 6502 }, { "epoch": 1.8256597417181357, "grad_norm": 0.6267603635787964, "learning_rate": 3.985431761836321e-06, "loss": 0.3808, "step": 6503 }, { "epoch": 1.8259404828747896, "grad_norm": 0.6660203337669373, "learning_rate": 3.983832441475984e-06, "loss": 0.3718, "step": 6504 }, { "epoch": 1.826221224031443, "grad_norm": 0.5696361064910889, "learning_rate": 3.98223322955473e-06, "loss": 0.3814, "step": 6505 }, { "epoch": 1.8265019651880965, "grad_norm": 0.6294177174568176, "learning_rate": 3.980634126243217e-06, "loss": 0.3941, "step": 6506 }, { "epoch": 1.8267827063447502, "grad_norm": 0.6637787222862244, "learning_rate": 3.9790351317120904e-06, "loss": 0.3888, "step": 6507 }, { "epoch": 1.8270634475014038, "grad_norm": 0.5955951809883118, "learning_rate": 3.977436246131987e-06, "loss": 0.385, "step": 6508 }, { "epoch": 1.8273441886580573, "grad_norm": 0.6251006126403809, "learning_rate": 3.975837469673528e-06, "loss": 0.4361, "step": 6509 }, { "epoch": 1.8276249298147107, "grad_norm": 0.6367130875587463, "learning_rate": 3.974238802507324e-06, "loss": 0.376, "step": 6510 }, { "epoch": 1.8279056709713644, "grad_norm": 0.6377788782119751, "learning_rate": 3.972640244803978e-06, "loss": 0.3701, "step": 6511 }, { "epoch": 1.828186412128018, "grad_norm": 0.6418980956077576, "learning_rate": 3.971041796734075e-06, "loss": 0.3804, "step": 6512 }, { "epoch": 1.8284671532846715, "grad_norm": 0.6587963104248047, "learning_rate": 3.969443458468194e-06, "loss": 0.3728, "step": 6513 }, { "epoch": 1.8287478944413251, "grad_norm": 0.6418294906616211, "learning_rate": 3.967845230176898e-06, "loss": 0.3442, "step": 6514 }, { "epoch": 1.8290286355979788, "grad_norm": 0.600695788860321, "learning_rate": 3.9662471120307406e-06, "loss": 0.3981, "step": 6515 }, { "epoch": 1.8293093767546322, "grad_norm": 0.7527453899383545, "learning_rate": 3.964649104200262e-06, "loss": 0.3924, "step": 6516 }, { "epoch": 1.8295901179112857, "grad_norm": 0.6880242824554443, "learning_rate": 3.963051206855993e-06, "loss": 0.3854, "step": 6517 }, { "epoch": 1.8298708590679393, "grad_norm": 0.5922836065292358, "learning_rate": 3.961453420168451e-06, "loss": 0.4188, "step": 6518 }, { "epoch": 1.830151600224593, "grad_norm": 0.6433671116828918, "learning_rate": 3.959855744308142e-06, "loss": 0.3837, "step": 6519 }, { "epoch": 1.8304323413812464, "grad_norm": 0.6398699879646301, "learning_rate": 3.95825817944556e-06, "loss": 0.377, "step": 6520 }, { "epoch": 1.8307130825379, "grad_norm": 0.6227861046791077, "learning_rate": 3.956660725751187e-06, "loss": 0.3595, "step": 6521 }, { "epoch": 1.8309938236945538, "grad_norm": 0.6193345189094543, "learning_rate": 3.955063383395492e-06, "loss": 0.3496, "step": 6522 }, { "epoch": 1.8312745648512072, "grad_norm": 0.6425995230674744, "learning_rate": 3.953466152548935e-06, "loss": 0.3517, "step": 6523 }, { "epoch": 1.8315553060078607, "grad_norm": 0.733466386795044, "learning_rate": 3.951869033381963e-06, "loss": 0.3263, "step": 6524 }, { "epoch": 1.8318360471645143, "grad_norm": 0.6365712285041809, "learning_rate": 3.95027202606501e-06, "loss": 0.3844, "step": 6525 }, { "epoch": 1.832116788321168, "grad_norm": 0.6588440537452698, "learning_rate": 3.948675130768497e-06, "loss": 0.3684, "step": 6526 }, { "epoch": 1.8323975294778214, "grad_norm": 0.6574601531028748, "learning_rate": 3.947078347662836e-06, "loss": 0.3709, "step": 6527 }, { "epoch": 1.8326782706344749, "grad_norm": 0.6721467971801758, "learning_rate": 3.945481676918428e-06, "loss": 0.3809, "step": 6528 }, { "epoch": 1.8329590117911287, "grad_norm": 0.6558837890625, "learning_rate": 3.9438851187056564e-06, "loss": 0.3893, "step": 6529 }, { "epoch": 1.8332397529477822, "grad_norm": 0.5771139860153198, "learning_rate": 3.942288673194899e-06, "loss": 0.3655, "step": 6530 }, { "epoch": 1.8335204941044356, "grad_norm": 0.6688631176948547, "learning_rate": 3.940692340556516e-06, "loss": 0.379, "step": 6531 }, { "epoch": 1.8338012352610893, "grad_norm": 0.7210232615470886, "learning_rate": 3.939096120960859e-06, "loss": 0.3791, "step": 6532 }, { "epoch": 1.834081976417743, "grad_norm": 0.7092217206954956, "learning_rate": 3.937500014578267e-06, "loss": 0.3901, "step": 6533 }, { "epoch": 1.8343627175743964, "grad_norm": 0.7015949487686157, "learning_rate": 3.9359040215790656e-06, "loss": 0.383, "step": 6534 }, { "epoch": 1.8346434587310498, "grad_norm": 0.6470916867256165, "learning_rate": 3.93430814213357e-06, "loss": 0.3674, "step": 6535 }, { "epoch": 1.8349241998877035, "grad_norm": 0.6438083648681641, "learning_rate": 3.932712376412084e-06, "loss": 0.3823, "step": 6536 }, { "epoch": 1.8352049410443572, "grad_norm": 0.6496115326881409, "learning_rate": 3.9311167245848945e-06, "loss": 0.3173, "step": 6537 }, { "epoch": 1.8354856822010106, "grad_norm": 0.7356016039848328, "learning_rate": 3.929521186822281e-06, "loss": 0.4127, "step": 6538 }, { "epoch": 1.8357664233576643, "grad_norm": 0.6692792773246765, "learning_rate": 3.92792576329451e-06, "loss": 0.4103, "step": 6539 }, { "epoch": 1.836047164514318, "grad_norm": 0.6190904378890991, "learning_rate": 3.926330454171835e-06, "loss": 0.371, "step": 6540 }, { "epoch": 1.8363279056709714, "grad_norm": 0.6613806486129761, "learning_rate": 3.924735259624496e-06, "loss": 0.3771, "step": 6541 }, { "epoch": 1.8366086468276248, "grad_norm": 0.5960564613342285, "learning_rate": 3.9231401798227256e-06, "loss": 0.4025, "step": 6542 }, { "epoch": 1.8368893879842785, "grad_norm": 0.6149198412895203, "learning_rate": 3.9215452149367375e-06, "loss": 0.3946, "step": 6543 }, { "epoch": 1.8371701291409321, "grad_norm": 0.6016678214073181, "learning_rate": 3.919950365136737e-06, "loss": 0.3798, "step": 6544 }, { "epoch": 1.8374508702975856, "grad_norm": 0.6572104692459106, "learning_rate": 3.918355630592919e-06, "loss": 0.3869, "step": 6545 }, { "epoch": 1.8377316114542392, "grad_norm": 0.5891427993774414, "learning_rate": 3.9167610114754595e-06, "loss": 0.3549, "step": 6546 }, { "epoch": 1.838012352610893, "grad_norm": 0.6644848585128784, "learning_rate": 3.91516650795453e-06, "loss": 0.3458, "step": 6547 }, { "epoch": 1.8382930937675463, "grad_norm": 0.6368195414543152, "learning_rate": 3.913572120200285e-06, "loss": 0.4116, "step": 6548 }, { "epoch": 1.8385738349241998, "grad_norm": 0.6424891352653503, "learning_rate": 3.911977848382867e-06, "loss": 0.3442, "step": 6549 }, { "epoch": 1.8388545760808535, "grad_norm": 0.6905144453048706, "learning_rate": 3.910383692672406e-06, "loss": 0.3817, "step": 6550 }, { "epoch": 1.8391353172375071, "grad_norm": 0.6814104318618774, "learning_rate": 3.908789653239022e-06, "loss": 0.4331, "step": 6551 }, { "epoch": 1.8394160583941606, "grad_norm": 0.5546183586120605, "learning_rate": 3.907195730252819e-06, "loss": 0.4039, "step": 6552 }, { "epoch": 1.839696799550814, "grad_norm": 0.6172484159469604, "learning_rate": 3.905601923883894e-06, "loss": 0.4318, "step": 6553 }, { "epoch": 1.8399775407074677, "grad_norm": 0.609162449836731, "learning_rate": 3.904008234302325e-06, "loss": 0.3627, "step": 6554 }, { "epoch": 1.8402582818641213, "grad_norm": 0.5798518061637878, "learning_rate": 3.902414661678182e-06, "loss": 0.3811, "step": 6555 }, { "epoch": 1.8405390230207748, "grad_norm": 0.6442756652832031, "learning_rate": 3.900821206181521e-06, "loss": 0.3718, "step": 6556 }, { "epoch": 1.8408197641774284, "grad_norm": 0.6947007775306702, "learning_rate": 3.899227867982386e-06, "loss": 0.4414, "step": 6557 }, { "epoch": 1.841100505334082, "grad_norm": 0.6912604570388794, "learning_rate": 3.897634647250808e-06, "loss": 0.4063, "step": 6558 }, { "epoch": 1.8413812464907355, "grad_norm": 0.6262841820716858, "learning_rate": 3.896041544156805e-06, "loss": 0.3996, "step": 6559 }, { "epoch": 1.841661987647389, "grad_norm": 0.573444128036499, "learning_rate": 3.894448558870382e-06, "loss": 0.3816, "step": 6560 }, { "epoch": 1.8419427288040426, "grad_norm": 0.5473210215568542, "learning_rate": 3.892855691561535e-06, "loss": 0.3919, "step": 6561 }, { "epoch": 1.8422234699606963, "grad_norm": 0.6592857837677002, "learning_rate": 3.891262942400243e-06, "loss": 0.4205, "step": 6562 }, { "epoch": 1.8425042111173497, "grad_norm": 0.6161134243011475, "learning_rate": 3.889670311556476e-06, "loss": 0.3769, "step": 6563 }, { "epoch": 1.8427849522740034, "grad_norm": 0.6447630524635315, "learning_rate": 3.888077799200189e-06, "loss": 0.3758, "step": 6564 }, { "epoch": 1.843065693430657, "grad_norm": 0.6067754030227661, "learning_rate": 3.8864854055013235e-06, "loss": 0.3791, "step": 6565 }, { "epoch": 1.8433464345873105, "grad_norm": 0.7233385443687439, "learning_rate": 3.8848931306298115e-06, "loss": 0.3878, "step": 6566 }, { "epoch": 1.843627175743964, "grad_norm": 0.6731755137443542, "learning_rate": 3.88330097475557e-06, "loss": 0.3664, "step": 6567 }, { "epoch": 1.8439079169006176, "grad_norm": 0.6081092357635498, "learning_rate": 3.881708938048504e-06, "loss": 0.4029, "step": 6568 }, { "epoch": 1.8441886580572713, "grad_norm": 0.6320384740829468, "learning_rate": 3.880117020678506e-06, "loss": 0.3976, "step": 6569 }, { "epoch": 1.8444693992139247, "grad_norm": 0.7215650081634521, "learning_rate": 3.878525222815457e-06, "loss": 0.3794, "step": 6570 }, { "epoch": 1.8447501403705782, "grad_norm": 0.6690097451210022, "learning_rate": 3.876933544629221e-06, "loss": 0.3505, "step": 6571 }, { "epoch": 1.845030881527232, "grad_norm": 0.6703141331672668, "learning_rate": 3.875341986289653e-06, "loss": 0.3982, "step": 6572 }, { "epoch": 1.8453116226838855, "grad_norm": 0.6017905473709106, "learning_rate": 3.8737505479665946e-06, "loss": 0.3662, "step": 6573 }, { "epoch": 1.845592363840539, "grad_norm": 0.575630247592926, "learning_rate": 3.872159229829873e-06, "loss": 0.3891, "step": 6574 }, { "epoch": 1.8458731049971926, "grad_norm": 0.6483700275421143, "learning_rate": 3.870568032049306e-06, "loss": 0.4058, "step": 6575 }, { "epoch": 1.8461538461538463, "grad_norm": 0.6435593366622925, "learning_rate": 3.868976954794696e-06, "loss": 0.4025, "step": 6576 }, { "epoch": 1.8464345873104997, "grad_norm": 0.671330988407135, "learning_rate": 3.867385998235831e-06, "loss": 0.4031, "step": 6577 }, { "epoch": 1.8467153284671531, "grad_norm": 0.6058936715126038, "learning_rate": 3.865795162542487e-06, "loss": 0.3666, "step": 6578 }, { "epoch": 1.8469960696238068, "grad_norm": 0.5750553011894226, "learning_rate": 3.864204447884433e-06, "loss": 0.339, "step": 6579 }, { "epoch": 1.8472768107804605, "grad_norm": 0.6059145331382751, "learning_rate": 3.8626138544314165e-06, "loss": 0.3304, "step": 6580 }, { "epoch": 1.847557551937114, "grad_norm": 0.6579108238220215, "learning_rate": 3.861023382353176e-06, "loss": 0.3919, "step": 6581 }, { "epoch": 1.8478382930937676, "grad_norm": 0.6709635853767395, "learning_rate": 3.859433031819437e-06, "loss": 0.392, "step": 6582 }, { "epoch": 1.8481190342504212, "grad_norm": 0.6408994197845459, "learning_rate": 3.857842802999913e-06, "loss": 0.3889, "step": 6583 }, { "epoch": 1.8483997754070747, "grad_norm": 0.6497156023979187, "learning_rate": 3.856252696064302e-06, "loss": 0.406, "step": 6584 }, { "epoch": 1.8486805165637281, "grad_norm": 0.5648993849754333, "learning_rate": 3.854662711182292e-06, "loss": 0.3931, "step": 6585 }, { "epoch": 1.8489612577203818, "grad_norm": 0.5973063111305237, "learning_rate": 3.853072848523555e-06, "loss": 0.3692, "step": 6586 }, { "epoch": 1.8492419988770354, "grad_norm": 0.6557505130767822, "learning_rate": 3.85148310825775e-06, "loss": 0.3887, "step": 6587 }, { "epoch": 1.8495227400336889, "grad_norm": 0.5865505933761597, "learning_rate": 3.8498934905545254e-06, "loss": 0.387, "step": 6588 }, { "epoch": 1.8498034811903425, "grad_norm": 0.6405991911888123, "learning_rate": 3.848303995583516e-06, "loss": 0.3851, "step": 6589 }, { "epoch": 1.8500842223469962, "grad_norm": 0.6908926963806152, "learning_rate": 3.846714623514342e-06, "loss": 0.3862, "step": 6590 }, { "epoch": 1.8503649635036497, "grad_norm": 0.6192941665649414, "learning_rate": 3.845125374516614e-06, "loss": 0.3625, "step": 6591 }, { "epoch": 1.850645704660303, "grad_norm": 0.6998831629753113, "learning_rate": 3.8435362487599214e-06, "loss": 0.3871, "step": 6592 }, { "epoch": 1.8509264458169568, "grad_norm": 0.5711450576782227, "learning_rate": 3.841947246413849e-06, "loss": 0.3667, "step": 6593 }, { "epoch": 1.8512071869736104, "grad_norm": 0.602340579032898, "learning_rate": 3.840358367647966e-06, "loss": 0.3868, "step": 6594 }, { "epoch": 1.8514879281302639, "grad_norm": 0.6366960406303406, "learning_rate": 3.838769612631826e-06, "loss": 0.3544, "step": 6595 }, { "epoch": 1.8517686692869173, "grad_norm": 0.6178829073905945, "learning_rate": 3.837180981534972e-06, "loss": 0.3735, "step": 6596 }, { "epoch": 1.8520494104435712, "grad_norm": 0.6999847292900085, "learning_rate": 3.835592474526934e-06, "loss": 0.351, "step": 6597 }, { "epoch": 1.8523301516002246, "grad_norm": 0.6134530305862427, "learning_rate": 3.8340040917772245e-06, "loss": 0.3829, "step": 6598 }, { "epoch": 1.852610892756878, "grad_norm": 0.5505448579788208, "learning_rate": 3.832415833455347e-06, "loss": 0.3568, "step": 6599 }, { "epoch": 1.8528916339135317, "grad_norm": 0.6162842512130737, "learning_rate": 3.830827699730792e-06, "loss": 0.367, "step": 6600 }, { "epoch": 1.8531723750701854, "grad_norm": 0.6293978691101074, "learning_rate": 3.829239690773033e-06, "loss": 0.3829, "step": 6601 }, { "epoch": 1.8534531162268388, "grad_norm": 0.545942485332489, "learning_rate": 3.827651806751535e-06, "loss": 0.3475, "step": 6602 }, { "epoch": 1.8537338573834923, "grad_norm": 0.5861099362373352, "learning_rate": 3.826064047835745e-06, "loss": 0.4153, "step": 6603 }, { "epoch": 1.854014598540146, "grad_norm": 0.585117518901825, "learning_rate": 3.824476414195099e-06, "loss": 0.388, "step": 6604 }, { "epoch": 1.8542953396967996, "grad_norm": 0.7277740240097046, "learning_rate": 3.822888905999021e-06, "loss": 0.4142, "step": 6605 }, { "epoch": 1.854576080853453, "grad_norm": 0.7299200296401978, "learning_rate": 3.82130152341692e-06, "loss": 0.3728, "step": 6606 }, { "epoch": 1.8548568220101067, "grad_norm": 0.5398320555686951, "learning_rate": 3.81971426661819e-06, "loss": 0.3618, "step": 6607 }, { "epoch": 1.8551375631667604, "grad_norm": 0.6225571632385254, "learning_rate": 3.8181271357722135e-06, "loss": 0.4139, "step": 6608 }, { "epoch": 1.8554183043234138, "grad_norm": 0.5465289354324341, "learning_rate": 3.8165401310483594e-06, "loss": 0.3345, "step": 6609 }, { "epoch": 1.8556990454800673, "grad_norm": 0.48412251472473145, "learning_rate": 3.814953252615983e-06, "loss": 0.3724, "step": 6610 }, { "epoch": 1.855979786636721, "grad_norm": 0.6475601196289062, "learning_rate": 3.813366500644426e-06, "loss": 0.3863, "step": 6611 }, { "epoch": 1.8562605277933746, "grad_norm": 0.5891608595848083, "learning_rate": 3.8117798753030167e-06, "loss": 0.3448, "step": 6612 }, { "epoch": 1.856541268950028, "grad_norm": 0.6342847347259521, "learning_rate": 3.8101933767610715e-06, "loss": 0.4103, "step": 6613 }, { "epoch": 1.8568220101066817, "grad_norm": 0.6834299564361572, "learning_rate": 3.8086070051878894e-06, "loss": 0.3786, "step": 6614 }, { "epoch": 1.8571027512633353, "grad_norm": 0.6929450035095215, "learning_rate": 3.8070207607527587e-06, "loss": 0.3871, "step": 6615 }, { "epoch": 1.8573834924199888, "grad_norm": 0.6023022532463074, "learning_rate": 3.805434643624953e-06, "loss": 0.3518, "step": 6616 }, { "epoch": 1.8576642335766422, "grad_norm": 0.6579223871231079, "learning_rate": 3.8038486539737348e-06, "loss": 0.4233, "step": 6617 }, { "epoch": 1.857944974733296, "grad_norm": 0.747478723526001, "learning_rate": 3.802262791968349e-06, "loss": 0.3782, "step": 6618 }, { "epoch": 1.8582257158899496, "grad_norm": 0.6331729292869568, "learning_rate": 3.8006770577780315e-06, "loss": 0.3791, "step": 6619 }, { "epoch": 1.858506457046603, "grad_norm": 0.720064640045166, "learning_rate": 3.799091451571999e-06, "loss": 0.3602, "step": 6620 }, { "epoch": 1.8587871982032564, "grad_norm": 0.6993894577026367, "learning_rate": 3.7975059735194587e-06, "loss": 0.3247, "step": 6621 }, { "epoch": 1.8590679393599103, "grad_norm": 0.6288126111030579, "learning_rate": 3.7959206237896027e-06, "loss": 0.4001, "step": 6622 }, { "epoch": 1.8593486805165638, "grad_norm": 0.6296070218086243, "learning_rate": 3.794335402551611e-06, "loss": 0.4012, "step": 6623 }, { "epoch": 1.8596294216732172, "grad_norm": 0.5715782642364502, "learning_rate": 3.792750309974647e-06, "loss": 0.3634, "step": 6624 }, { "epoch": 1.8599101628298709, "grad_norm": 0.5731468796730042, "learning_rate": 3.7911653462278634e-06, "loss": 0.39, "step": 6625 }, { "epoch": 1.8601909039865245, "grad_norm": 0.7277904748916626, "learning_rate": 3.7895805114803962e-06, "loss": 0.353, "step": 6626 }, { "epoch": 1.860471645143178, "grad_norm": 0.6582594513893127, "learning_rate": 3.78799580590137e-06, "loss": 0.4148, "step": 6627 }, { "epoch": 1.8607523862998314, "grad_norm": 0.6079365015029907, "learning_rate": 3.7864112296598936e-06, "loss": 0.4209, "step": 6628 }, { "epoch": 1.861033127456485, "grad_norm": 0.6307871341705322, "learning_rate": 3.7848267829250634e-06, "loss": 0.3617, "step": 6629 }, { "epoch": 1.8613138686131387, "grad_norm": 0.7056549787521362, "learning_rate": 3.7832424658659635e-06, "loss": 0.4091, "step": 6630 }, { "epoch": 1.8615946097697922, "grad_norm": 0.5833138823509216, "learning_rate": 3.7816582786516603e-06, "loss": 0.3934, "step": 6631 }, { "epoch": 1.8618753509264458, "grad_norm": 0.553715705871582, "learning_rate": 3.78007422145121e-06, "loss": 0.396, "step": 6632 }, { "epoch": 1.8621560920830995, "grad_norm": 0.7350575923919678, "learning_rate": 3.778490294433652e-06, "loss": 0.3464, "step": 6633 }, { "epoch": 1.862436833239753, "grad_norm": 0.673321008682251, "learning_rate": 3.7769064977680153e-06, "loss": 0.4206, "step": 6634 }, { "epoch": 1.8627175743964064, "grad_norm": 0.6504115462303162, "learning_rate": 3.7753228316233104e-06, "loss": 0.3649, "step": 6635 }, { "epoch": 1.86299831555306, "grad_norm": 0.7077226042747498, "learning_rate": 3.7737392961685365e-06, "loss": 0.3924, "step": 6636 }, { "epoch": 1.8632790567097137, "grad_norm": 0.5814265608787537, "learning_rate": 3.7721558915726797e-06, "loss": 0.3773, "step": 6637 }, { "epoch": 1.8635597978663672, "grad_norm": 0.6288542747497559, "learning_rate": 3.7705726180047103e-06, "loss": 0.3887, "step": 6638 }, { "epoch": 1.8638405390230208, "grad_norm": 0.6334558129310608, "learning_rate": 3.7689894756335864e-06, "loss": 0.372, "step": 6639 }, { "epoch": 1.8641212801796745, "grad_norm": 0.6678167581558228, "learning_rate": 3.767406464628251e-06, "loss": 0.3395, "step": 6640 }, { "epoch": 1.864402021336328, "grad_norm": 0.6088495850563049, "learning_rate": 3.765823585157632e-06, "loss": 0.3579, "step": 6641 }, { "epoch": 1.8646827624929814, "grad_norm": 0.6270818114280701, "learning_rate": 3.7642408373906445e-06, "loss": 0.37, "step": 6642 }, { "epoch": 1.864963503649635, "grad_norm": 0.6393176317214966, "learning_rate": 3.762658221496191e-06, "loss": 0.3967, "step": 6643 }, { "epoch": 1.8652442448062887, "grad_norm": 0.6403645277023315, "learning_rate": 3.7610757376431575e-06, "loss": 0.3932, "step": 6644 }, { "epoch": 1.8655249859629421, "grad_norm": 0.6309423446655273, "learning_rate": 3.759493386000417e-06, "loss": 0.4042, "step": 6645 }, { "epoch": 1.8658057271195956, "grad_norm": 0.6744917631149292, "learning_rate": 3.7579111667368273e-06, "loss": 0.4014, "step": 6646 }, { "epoch": 1.8660864682762492, "grad_norm": 0.6249033808708191, "learning_rate": 3.7563290800212355e-06, "loss": 0.4366, "step": 6647 }, { "epoch": 1.866367209432903, "grad_norm": 0.6633877754211426, "learning_rate": 3.7547471260224695e-06, "loss": 0.3755, "step": 6648 }, { "epoch": 1.8666479505895563, "grad_norm": 0.6555549502372742, "learning_rate": 3.753165304909346e-06, "loss": 0.3902, "step": 6649 }, { "epoch": 1.86692869174621, "grad_norm": 0.6909569501876831, "learning_rate": 3.751583616850668e-06, "loss": 0.381, "step": 6650 }, { "epoch": 1.8672094329028637, "grad_norm": 0.5843143463134766, "learning_rate": 3.7500020620152226e-06, "loss": 0.3694, "step": 6651 }, { "epoch": 1.8674901740595171, "grad_norm": 0.5654796957969666, "learning_rate": 3.7484206405717844e-06, "loss": 0.3714, "step": 6652 }, { "epoch": 1.8677709152161706, "grad_norm": 0.6510779857635498, "learning_rate": 3.7468393526891133e-06, "loss": 0.4177, "step": 6653 }, { "epoch": 1.8680516563728242, "grad_norm": 0.6091763973236084, "learning_rate": 3.7452581985359505e-06, "loss": 0.4191, "step": 6654 }, { "epoch": 1.8683323975294779, "grad_norm": 0.6130009889602661, "learning_rate": 3.7436771782810314e-06, "loss": 0.3491, "step": 6655 }, { "epoch": 1.8686131386861313, "grad_norm": 0.6302722692489624, "learning_rate": 3.742096292093073e-06, "loss": 0.36, "step": 6656 }, { "epoch": 1.868893879842785, "grad_norm": 0.692513644695282, "learning_rate": 3.740515540140775e-06, "loss": 0.3739, "step": 6657 }, { "epoch": 1.8691746209994387, "grad_norm": 0.6807888746261597, "learning_rate": 3.7389349225928262e-06, "loss": 0.4135, "step": 6658 }, { "epoch": 1.869455362156092, "grad_norm": 0.6932981610298157, "learning_rate": 3.737354439617901e-06, "loss": 0.3831, "step": 6659 }, { "epoch": 1.8697361033127455, "grad_norm": 0.6126266717910767, "learning_rate": 3.7357740913846567e-06, "loss": 0.376, "step": 6660 }, { "epoch": 1.8700168444693992, "grad_norm": 0.6369067430496216, "learning_rate": 3.7341938780617404e-06, "loss": 0.4159, "step": 6661 }, { "epoch": 1.8702975856260529, "grad_norm": 0.587691605091095, "learning_rate": 3.732613799817783e-06, "loss": 0.3612, "step": 6662 }, { "epoch": 1.8705783267827063, "grad_norm": 0.6611619591712952, "learning_rate": 3.7310338568213987e-06, "loss": 0.382, "step": 6663 }, { "epoch": 1.87085906793936, "grad_norm": 0.6849161982536316, "learning_rate": 3.7294540492411898e-06, "loss": 0.3478, "step": 6664 }, { "epoch": 1.8711398090960136, "grad_norm": 0.6290566921234131, "learning_rate": 3.7278743772457438e-06, "loss": 0.4332, "step": 6665 }, { "epoch": 1.871420550252667, "grad_norm": 0.6598408222198486, "learning_rate": 3.726294841003633e-06, "loss": 0.4205, "step": 6666 }, { "epoch": 1.8717012914093205, "grad_norm": 0.6161982417106628, "learning_rate": 3.724715440683417e-06, "loss": 0.4043, "step": 6667 }, { "epoch": 1.8719820325659742, "grad_norm": 0.6266809701919556, "learning_rate": 3.723136176453639e-06, "loss": 0.4025, "step": 6668 }, { "epoch": 1.8722627737226278, "grad_norm": 0.6120468974113464, "learning_rate": 3.721557048482827e-06, "loss": 0.3845, "step": 6669 }, { "epoch": 1.8725435148792813, "grad_norm": 0.6067700982093811, "learning_rate": 3.719978056939497e-06, "loss": 0.3945, "step": 6670 }, { "epoch": 1.8728242560359347, "grad_norm": 0.6045942902565002, "learning_rate": 3.718399201992149e-06, "loss": 0.3828, "step": 6671 }, { "epoch": 1.8731049971925884, "grad_norm": 0.6053402423858643, "learning_rate": 3.716820483809268e-06, "loss": 0.3514, "step": 6672 }, { "epoch": 1.873385738349242, "grad_norm": 0.5942814350128174, "learning_rate": 3.7152419025593257e-06, "loss": 0.4029, "step": 6673 }, { "epoch": 1.8736664795058955, "grad_norm": 0.5933666229248047, "learning_rate": 3.7136634584107787e-06, "loss": 0.3755, "step": 6674 }, { "epoch": 1.8739472206625492, "grad_norm": 0.6381656527519226, "learning_rate": 3.7120851515320676e-06, "loss": 0.3525, "step": 6675 }, { "epoch": 1.8742279618192028, "grad_norm": 0.7048853635787964, "learning_rate": 3.7105069820916193e-06, "loss": 0.3558, "step": 6676 }, { "epoch": 1.8745087029758563, "grad_norm": 0.7625410556793213, "learning_rate": 3.7089289502578486e-06, "loss": 0.3953, "step": 6677 }, { "epoch": 1.8747894441325097, "grad_norm": 0.686286985874176, "learning_rate": 3.7073510561991503e-06, "loss": 0.3895, "step": 6678 }, { "epoch": 1.8750701852891634, "grad_norm": 0.6566884517669678, "learning_rate": 3.7057733000839086e-06, "loss": 0.4032, "step": 6679 }, { "epoch": 1.875350926445817, "grad_norm": 0.5950343012809753, "learning_rate": 3.7041956820804925e-06, "loss": 0.3832, "step": 6680 }, { "epoch": 1.8756316676024705, "grad_norm": 0.49978721141815186, "learning_rate": 3.7026182023572553e-06, "loss": 0.3465, "step": 6681 }, { "epoch": 1.8759124087591241, "grad_norm": 0.6579840183258057, "learning_rate": 3.701040861082536e-06, "loss": 0.3377, "step": 6682 }, { "epoch": 1.8761931499157778, "grad_norm": 0.5922228097915649, "learning_rate": 3.699463658424658e-06, "loss": 0.415, "step": 6683 }, { "epoch": 1.8764738910724312, "grad_norm": 0.6216049194335938, "learning_rate": 3.6978865945519327e-06, "loss": 0.3986, "step": 6684 }, { "epoch": 1.8767546322290847, "grad_norm": 0.6333596110343933, "learning_rate": 3.6963096696326505e-06, "loss": 0.3729, "step": 6685 }, { "epoch": 1.8770353733857383, "grad_norm": 0.7083112597465515, "learning_rate": 3.694732883835094e-06, "loss": 0.3858, "step": 6686 }, { "epoch": 1.877316114542392, "grad_norm": 0.6354517936706543, "learning_rate": 3.6931562373275265e-06, "loss": 0.3647, "step": 6687 }, { "epoch": 1.8775968556990454, "grad_norm": 0.6213771104812622, "learning_rate": 3.6915797302782e-06, "loss": 0.3857, "step": 6688 }, { "epoch": 1.8778775968556989, "grad_norm": 0.6547612547874451, "learning_rate": 3.6900033628553465e-06, "loss": 0.3593, "step": 6689 }, { "epoch": 1.8781583380123528, "grad_norm": 0.5587164163589478, "learning_rate": 3.6884271352271895e-06, "loss": 0.3583, "step": 6690 }, { "epoch": 1.8784390791690062, "grad_norm": 0.6224437952041626, "learning_rate": 3.6868510475619316e-06, "loss": 0.3996, "step": 6691 }, { "epoch": 1.8787198203256597, "grad_norm": 0.6226052045822144, "learning_rate": 3.685275100027764e-06, "loss": 0.3561, "step": 6692 }, { "epoch": 1.8790005614823133, "grad_norm": 0.5926626920700073, "learning_rate": 3.6836992927928618e-06, "loss": 0.3934, "step": 6693 }, { "epoch": 1.879281302638967, "grad_norm": 0.7276115417480469, "learning_rate": 3.682123626025386e-06, "loss": 0.3674, "step": 6694 }, { "epoch": 1.8795620437956204, "grad_norm": 0.5634215474128723, "learning_rate": 3.6805480998934807e-06, "loss": 0.4247, "step": 6695 }, { "epoch": 1.8798427849522739, "grad_norm": 0.5653231143951416, "learning_rate": 3.6789727145652786e-06, "loss": 0.3882, "step": 6696 }, { "epoch": 1.8801235261089275, "grad_norm": 0.5527107119560242, "learning_rate": 3.677397470208892e-06, "loss": 0.3763, "step": 6697 }, { "epoch": 1.8804042672655812, "grad_norm": 0.5930175185203552, "learning_rate": 3.6758223669924232e-06, "loss": 0.3605, "step": 6698 }, { "epoch": 1.8806850084222346, "grad_norm": 0.6014449596405029, "learning_rate": 3.6742474050839573e-06, "loss": 0.3915, "step": 6699 }, { "epoch": 1.8809657495788883, "grad_norm": 0.642204999923706, "learning_rate": 3.6726725846515632e-06, "loss": 0.4136, "step": 6700 }, { "epoch": 1.881246490735542, "grad_norm": 0.5835151672363281, "learning_rate": 3.6710979058632966e-06, "loss": 0.3806, "step": 6701 }, { "epoch": 1.8815272318921954, "grad_norm": 0.6496613025665283, "learning_rate": 3.669523368887199e-06, "loss": 0.3884, "step": 6702 }, { "epoch": 1.8818079730488488, "grad_norm": 0.6161884665489197, "learning_rate": 3.667948973891293e-06, "loss": 0.3931, "step": 6703 }, { "epoch": 1.8820887142055025, "grad_norm": 0.5649263858795166, "learning_rate": 3.6663747210435886e-06, "loss": 0.3605, "step": 6704 }, { "epoch": 1.8823694553621562, "grad_norm": 0.6075252890586853, "learning_rate": 3.6648006105120796e-06, "loss": 0.4164, "step": 6705 }, { "epoch": 1.8826501965188096, "grad_norm": 0.6512606143951416, "learning_rate": 3.6632266424647477e-06, "loss": 0.3717, "step": 6706 }, { "epoch": 1.8829309376754633, "grad_norm": 0.6432591080665588, "learning_rate": 3.661652817069556e-06, "loss": 0.3738, "step": 6707 }, { "epoch": 1.883211678832117, "grad_norm": 0.5726814270019531, "learning_rate": 3.6600791344944523e-06, "loss": 0.3637, "step": 6708 }, { "epoch": 1.8834924199887704, "grad_norm": 0.6158642172813416, "learning_rate": 3.6585055949073717e-06, "loss": 0.3511, "step": 6709 }, { "epoch": 1.8837731611454238, "grad_norm": 0.6513944864273071, "learning_rate": 3.6569321984762314e-06, "loss": 0.3551, "step": 6710 }, { "epoch": 1.8840539023020775, "grad_norm": 0.64405757188797, "learning_rate": 3.655358945368936e-06, "loss": 0.3637, "step": 6711 }, { "epoch": 1.8843346434587311, "grad_norm": 0.6633872389793396, "learning_rate": 3.6537858357533706e-06, "loss": 0.3761, "step": 6712 }, { "epoch": 1.8846153846153846, "grad_norm": 0.6905936002731323, "learning_rate": 3.6522128697974103e-06, "loss": 0.3708, "step": 6713 }, { "epoch": 1.884896125772038, "grad_norm": 0.6328822374343872, "learning_rate": 3.6506400476689107e-06, "loss": 0.3908, "step": 6714 }, { "epoch": 1.885176866928692, "grad_norm": 0.654472291469574, "learning_rate": 3.6490673695357136e-06, "loss": 0.374, "step": 6715 }, { "epoch": 1.8854576080853453, "grad_norm": 0.6299577355384827, "learning_rate": 3.6474948355656463e-06, "loss": 0.3689, "step": 6716 }, { "epoch": 1.8857383492419988, "grad_norm": 0.7236530780792236, "learning_rate": 3.6459224459265207e-06, "loss": 0.3584, "step": 6717 }, { "epoch": 1.8860190903986525, "grad_norm": 0.6885652542114258, "learning_rate": 3.64435020078613e-06, "loss": 0.4165, "step": 6718 }, { "epoch": 1.8862998315553061, "grad_norm": 0.5869594812393188, "learning_rate": 3.642778100312256e-06, "loss": 0.3821, "step": 6719 }, { "epoch": 1.8865805727119596, "grad_norm": 0.6333834528923035, "learning_rate": 3.6412061446726626e-06, "loss": 0.4168, "step": 6720 }, { "epoch": 1.886861313868613, "grad_norm": 0.6479399800300598, "learning_rate": 3.6396343340351003e-06, "loss": 0.3861, "step": 6721 }, { "epoch": 1.8871420550252667, "grad_norm": 0.7683178782463074, "learning_rate": 3.6380626685673016e-06, "loss": 0.4155, "step": 6722 }, { "epoch": 1.8874227961819203, "grad_norm": 0.6431917548179626, "learning_rate": 3.6364911484369867e-06, "loss": 0.3798, "step": 6723 }, { "epoch": 1.8877035373385738, "grad_norm": 0.6315298080444336, "learning_rate": 3.6349197738118567e-06, "loss": 0.3678, "step": 6724 }, { "epoch": 1.8879842784952274, "grad_norm": 0.6385400891304016, "learning_rate": 3.6333485448595994e-06, "loss": 0.3522, "step": 6725 }, { "epoch": 1.888265019651881, "grad_norm": 0.6229247450828552, "learning_rate": 3.631777461747887e-06, "loss": 0.3636, "step": 6726 }, { "epoch": 1.8885457608085345, "grad_norm": 0.615041196346283, "learning_rate": 3.630206524644375e-06, "loss": 0.3971, "step": 6727 }, { "epoch": 1.888826501965188, "grad_norm": 0.6098858714103699, "learning_rate": 3.6286357337167044e-06, "loss": 0.3832, "step": 6728 }, { "epoch": 1.8891072431218416, "grad_norm": 0.6216406226158142, "learning_rate": 3.627065089132502e-06, "loss": 0.3711, "step": 6729 }, { "epoch": 1.8893879842784953, "grad_norm": 0.5764333009719849, "learning_rate": 3.625494591059372e-06, "loss": 0.3994, "step": 6730 }, { "epoch": 1.8896687254351487, "grad_norm": 0.6698734164237976, "learning_rate": 3.623924239664914e-06, "loss": 0.379, "step": 6731 }, { "epoch": 1.8899494665918024, "grad_norm": 0.6468973755836487, "learning_rate": 3.6223540351167043e-06, "loss": 0.3849, "step": 6732 }, { "epoch": 1.890230207748456, "grad_norm": 0.578467071056366, "learning_rate": 3.620783977582305e-06, "loss": 0.3756, "step": 6733 }, { "epoch": 1.8905109489051095, "grad_norm": 0.6839062571525574, "learning_rate": 3.6192140672292625e-06, "loss": 0.4014, "step": 6734 }, { "epoch": 1.890791690061763, "grad_norm": 0.664242148399353, "learning_rate": 3.6176443042251084e-06, "loss": 0.4039, "step": 6735 }, { "epoch": 1.8910724312184166, "grad_norm": 0.5890271663665771, "learning_rate": 3.6160746887373575e-06, "loss": 0.366, "step": 6736 }, { "epoch": 1.8913531723750703, "grad_norm": 0.6562938094139099, "learning_rate": 3.6145052209335097e-06, "loss": 0.3471, "step": 6737 }, { "epoch": 1.8916339135317237, "grad_norm": 0.5657323598861694, "learning_rate": 3.6129359009810488e-06, "loss": 0.3807, "step": 6738 }, { "epoch": 1.8919146546883772, "grad_norm": 0.706623375415802, "learning_rate": 3.611366729047444e-06, "loss": 0.4152, "step": 6739 }, { "epoch": 1.892195395845031, "grad_norm": 0.6669846773147583, "learning_rate": 3.609797705300146e-06, "loss": 0.375, "step": 6740 }, { "epoch": 1.8924761370016845, "grad_norm": 0.6773914694786072, "learning_rate": 3.6082288299065915e-06, "loss": 0.3538, "step": 6741 }, { "epoch": 1.892756878158338, "grad_norm": 0.6867550015449524, "learning_rate": 3.6066601030342014e-06, "loss": 0.405, "step": 6742 }, { "epoch": 1.8930376193149916, "grad_norm": 0.5890536904335022, "learning_rate": 3.60509152485038e-06, "loss": 0.3521, "step": 6743 }, { "epoch": 1.8933183604716453, "grad_norm": 0.6439083814620972, "learning_rate": 3.6035230955225176e-06, "loss": 0.4042, "step": 6744 }, { "epoch": 1.8935991016282987, "grad_norm": 0.649586021900177, "learning_rate": 3.6019548152179874e-06, "loss": 0.4085, "step": 6745 }, { "epoch": 1.8938798427849521, "grad_norm": 0.7091128826141357, "learning_rate": 3.6003866841041434e-06, "loss": 0.384, "step": 6746 }, { "epoch": 1.8941605839416058, "grad_norm": 0.6923140287399292, "learning_rate": 3.5988187023483296e-06, "loss": 0.3471, "step": 6747 }, { "epoch": 1.8944413250982595, "grad_norm": 0.6134341359138489, "learning_rate": 3.59725087011787e-06, "loss": 0.3534, "step": 6748 }, { "epoch": 1.894722066254913, "grad_norm": 0.6707556247711182, "learning_rate": 3.5956831875800747e-06, "loss": 0.4049, "step": 6749 }, { "epoch": 1.8950028074115666, "grad_norm": 0.5964996218681335, "learning_rate": 3.5941156549022373e-06, "loss": 0.4019, "step": 6750 }, { "epoch": 1.8952835485682202, "grad_norm": 0.6699771881103516, "learning_rate": 3.5925482722516345e-06, "loss": 0.3981, "step": 6751 }, { "epoch": 1.8955642897248737, "grad_norm": 0.5670035481452942, "learning_rate": 3.590981039795528e-06, "loss": 0.3561, "step": 6752 }, { "epoch": 1.8958450308815271, "grad_norm": 0.6419764161109924, "learning_rate": 3.589413957701162e-06, "loss": 0.402, "step": 6753 }, { "epoch": 1.8961257720381808, "grad_norm": 0.6489019393920898, "learning_rate": 3.5878470261357666e-06, "loss": 0.3832, "step": 6754 }, { "epoch": 1.8964065131948344, "grad_norm": 0.7206984162330627, "learning_rate": 3.586280245266555e-06, "loss": 0.3905, "step": 6755 }, { "epoch": 1.8966872543514879, "grad_norm": 0.5839571356773376, "learning_rate": 3.584713615260723e-06, "loss": 0.3853, "step": 6756 }, { "epoch": 1.8969679955081415, "grad_norm": 0.6096206903457642, "learning_rate": 3.5831471362854547e-06, "loss": 0.3766, "step": 6757 }, { "epoch": 1.8972487366647952, "grad_norm": 0.6558371186256409, "learning_rate": 3.5815808085079127e-06, "loss": 0.4206, "step": 6758 }, { "epoch": 1.8975294778214487, "grad_norm": 0.5894262790679932, "learning_rate": 3.5800146320952465e-06, "loss": 0.3434, "step": 6759 }, { "epoch": 1.897810218978102, "grad_norm": 0.5367259383201599, "learning_rate": 3.578448607214588e-06, "loss": 0.388, "step": 6760 }, { "epoch": 1.8980909601347558, "grad_norm": 0.6186708807945251, "learning_rate": 3.5768827340330557e-06, "loss": 0.3534, "step": 6761 }, { "epoch": 1.8983717012914094, "grad_norm": 0.5753026604652405, "learning_rate": 3.5753170127177467e-06, "loss": 0.3969, "step": 6762 }, { "epoch": 1.8986524424480629, "grad_norm": 0.6549876928329468, "learning_rate": 3.573751443435747e-06, "loss": 0.4152, "step": 6763 }, { "epoch": 1.8989331836047163, "grad_norm": 0.5275825262069702, "learning_rate": 3.5721860263541235e-06, "loss": 0.4088, "step": 6764 }, { "epoch": 1.89921392476137, "grad_norm": 0.5949759483337402, "learning_rate": 3.5706207616399287e-06, "loss": 0.3474, "step": 6765 }, { "epoch": 1.8994946659180236, "grad_norm": 0.5990729928016663, "learning_rate": 3.569055649460197e-06, "loss": 0.3525, "step": 6766 }, { "epoch": 1.899775407074677, "grad_norm": 0.6396803855895996, "learning_rate": 3.5674906899819494e-06, "loss": 0.4261, "step": 6767 }, { "epoch": 1.9000561482313307, "grad_norm": 0.5973544120788574, "learning_rate": 3.5659258833721867e-06, "loss": 0.3901, "step": 6768 }, { "epoch": 1.9003368893879844, "grad_norm": 0.6160799264907837, "learning_rate": 3.564361229797895e-06, "loss": 0.3694, "step": 6769 }, { "epoch": 1.9006176305446378, "grad_norm": 0.6115102171897888, "learning_rate": 3.562796729426045e-06, "loss": 0.3848, "step": 6770 }, { "epoch": 1.9008983717012913, "grad_norm": 0.6193190813064575, "learning_rate": 3.5612323824235913e-06, "loss": 0.3426, "step": 6771 }, { "epoch": 1.901179112857945, "grad_norm": 0.6612417697906494, "learning_rate": 3.55966818895747e-06, "loss": 0.3894, "step": 6772 }, { "epoch": 1.9014598540145986, "grad_norm": 0.6099318861961365, "learning_rate": 3.5581041491946045e-06, "loss": 0.3654, "step": 6773 }, { "epoch": 1.901740595171252, "grad_norm": 0.6059949994087219, "learning_rate": 3.5565402633018963e-06, "loss": 0.3885, "step": 6774 }, { "epoch": 1.9020213363279057, "grad_norm": 0.6448893547058105, "learning_rate": 3.5549765314462347e-06, "loss": 0.3997, "step": 6775 }, { "epoch": 1.9023020774845594, "grad_norm": 0.5469254851341248, "learning_rate": 3.5534129537944915e-06, "loss": 0.3648, "step": 6776 }, { "epoch": 1.9025828186412128, "grad_norm": 0.6060861349105835, "learning_rate": 3.5518495305135225e-06, "loss": 0.3806, "step": 6777 }, { "epoch": 1.9028635597978663, "grad_norm": 0.5298658013343811, "learning_rate": 3.550286261770166e-06, "loss": 0.3592, "step": 6778 }, { "epoch": 1.90314430095452, "grad_norm": 0.5830897092819214, "learning_rate": 3.5487231477312463e-06, "loss": 0.3884, "step": 6779 }, { "epoch": 1.9034250421111736, "grad_norm": 0.591823160648346, "learning_rate": 3.5471601885635654e-06, "loss": 0.3819, "step": 6780 }, { "epoch": 1.903705783267827, "grad_norm": 0.6853220462799072, "learning_rate": 3.545597384433913e-06, "loss": 0.368, "step": 6781 }, { "epoch": 1.9039865244244805, "grad_norm": 0.6024075746536255, "learning_rate": 3.5440347355090666e-06, "loss": 0.3904, "step": 6782 }, { "epoch": 1.9042672655811343, "grad_norm": 0.6517391204833984, "learning_rate": 3.542472241955778e-06, "loss": 0.3402, "step": 6783 }, { "epoch": 1.9045480067377878, "grad_norm": 0.6168352365493774, "learning_rate": 3.5409099039407867e-06, "loss": 0.3997, "step": 6784 }, { "epoch": 1.9048287478944412, "grad_norm": 0.6700246930122375, "learning_rate": 3.539347721630818e-06, "loss": 0.3822, "step": 6785 }, { "epoch": 1.905109489051095, "grad_norm": 0.6758705973625183, "learning_rate": 3.537785695192578e-06, "loss": 0.4051, "step": 6786 }, { "epoch": 1.9053902302077486, "grad_norm": 0.5712435245513916, "learning_rate": 3.536223824792755e-06, "loss": 0.405, "step": 6787 }, { "epoch": 1.905670971364402, "grad_norm": 0.6648273468017578, "learning_rate": 3.5346621105980237e-06, "loss": 0.3793, "step": 6788 }, { "epoch": 1.9059517125210554, "grad_norm": 0.6850698590278625, "learning_rate": 3.5331005527750385e-06, "loss": 0.3817, "step": 6789 }, { "epoch": 1.906232453677709, "grad_norm": 0.6969839930534363, "learning_rate": 3.5315391514904408e-06, "loss": 0.3413, "step": 6790 }, { "epoch": 1.9065131948343628, "grad_norm": 0.601905107498169, "learning_rate": 3.5299779069108524e-06, "loss": 0.3445, "step": 6791 }, { "epoch": 1.9067939359910162, "grad_norm": 0.6207190155982971, "learning_rate": 3.528416819202881e-06, "loss": 0.3744, "step": 6792 }, { "epoch": 1.9070746771476699, "grad_norm": 0.6887391805648804, "learning_rate": 3.526855888533115e-06, "loss": 0.3885, "step": 6793 }, { "epoch": 1.9073554183043235, "grad_norm": 0.7935128808021545, "learning_rate": 3.525295115068129e-06, "loss": 0.3879, "step": 6794 }, { "epoch": 1.907636159460977, "grad_norm": 0.5594736933708191, "learning_rate": 3.5237344989744765e-06, "loss": 0.3921, "step": 6795 }, { "epoch": 1.9079169006176304, "grad_norm": 0.5839893817901611, "learning_rate": 3.5221740404186983e-06, "loss": 0.3995, "step": 6796 }, { "epoch": 1.908197641774284, "grad_norm": 0.6778796315193176, "learning_rate": 3.520613739567316e-06, "loss": 0.366, "step": 6797 }, { "epoch": 1.9084783829309377, "grad_norm": 0.673738420009613, "learning_rate": 3.519053596586836e-06, "loss": 0.4061, "step": 6798 }, { "epoch": 1.9087591240875912, "grad_norm": 0.6413092613220215, "learning_rate": 3.5174936116437467e-06, "loss": 0.3779, "step": 6799 }, { "epoch": 1.9090398652442448, "grad_norm": 0.6005680561065674, "learning_rate": 3.5159337849045217e-06, "loss": 0.3613, "step": 6800 }, { "epoch": 1.9093206064008985, "grad_norm": 0.6098957061767578, "learning_rate": 3.5143741165356127e-06, "loss": 0.3758, "step": 6801 }, { "epoch": 1.909601347557552, "grad_norm": 0.7080655097961426, "learning_rate": 3.5128146067034595e-06, "loss": 0.3731, "step": 6802 }, { "epoch": 1.9098820887142054, "grad_norm": 0.5995592474937439, "learning_rate": 3.5112552555744837e-06, "loss": 0.3948, "step": 6803 }, { "epoch": 1.910162829870859, "grad_norm": 0.5612696409225464, "learning_rate": 3.509696063315089e-06, "loss": 0.3722, "step": 6804 }, { "epoch": 1.9104435710275127, "grad_norm": 0.6568280458450317, "learning_rate": 3.5081370300916623e-06, "loss": 0.3355, "step": 6805 }, { "epoch": 1.9107243121841662, "grad_norm": 0.6519230008125305, "learning_rate": 3.506578156070576e-06, "loss": 0.3889, "step": 6806 }, { "epoch": 1.9110050533408196, "grad_norm": 0.6857989430427551, "learning_rate": 3.505019441418178e-06, "loss": 0.4229, "step": 6807 }, { "epoch": 1.9112857944974735, "grad_norm": 0.646470844745636, "learning_rate": 3.5034608863008117e-06, "loss": 0.3649, "step": 6808 }, { "epoch": 1.911566535654127, "grad_norm": 0.570564329624176, "learning_rate": 3.501902490884793e-06, "loss": 0.352, "step": 6809 }, { "epoch": 1.9118472768107804, "grad_norm": 0.5701771378517151, "learning_rate": 3.5003442553364253e-06, "loss": 0.3734, "step": 6810 }, { "epoch": 1.912128017967434, "grad_norm": 0.6223947405815125, "learning_rate": 3.498786179821992e-06, "loss": 0.4247, "step": 6811 }, { "epoch": 1.9124087591240877, "grad_norm": 0.6876004338264465, "learning_rate": 3.4972282645077617e-06, "loss": 0.3813, "step": 6812 }, { "epoch": 1.9126895002807411, "grad_norm": 0.6655701398849487, "learning_rate": 3.4956705095599865e-06, "loss": 0.369, "step": 6813 }, { "epoch": 1.9129702414373946, "grad_norm": 0.6083945035934448, "learning_rate": 3.4941129151448995e-06, "loss": 0.3866, "step": 6814 }, { "epoch": 1.9132509825940482, "grad_norm": 0.6104804277420044, "learning_rate": 3.4925554814287177e-06, "loss": 0.3397, "step": 6815 }, { "epoch": 1.913531723750702, "grad_norm": 0.698229968547821, "learning_rate": 3.4909982085776417e-06, "loss": 0.365, "step": 6816 }, { "epoch": 1.9138124649073553, "grad_norm": 0.6584321856498718, "learning_rate": 3.489441096757852e-06, "loss": 0.3846, "step": 6817 }, { "epoch": 1.914093206064009, "grad_norm": 0.6015378832817078, "learning_rate": 3.4878841461355147e-06, "loss": 0.3417, "step": 6818 }, { "epoch": 1.9143739472206627, "grad_norm": 0.6599941253662109, "learning_rate": 3.4863273568767787e-06, "loss": 0.3802, "step": 6819 }, { "epoch": 1.9146546883773161, "grad_norm": 0.688912034034729, "learning_rate": 3.4847707291477735e-06, "loss": 0.3931, "step": 6820 }, { "epoch": 1.9149354295339696, "grad_norm": 0.6507009863853455, "learning_rate": 3.483214263114614e-06, "loss": 0.3508, "step": 6821 }, { "epoch": 1.9152161706906232, "grad_norm": 0.5559006929397583, "learning_rate": 3.4816579589433967e-06, "loss": 0.4068, "step": 6822 }, { "epoch": 1.9154969118472769, "grad_norm": 0.6691533327102661, "learning_rate": 3.4801018168001994e-06, "loss": 0.4489, "step": 6823 }, { "epoch": 1.9157776530039303, "grad_norm": 0.6477255821228027, "learning_rate": 3.4785458368510844e-06, "loss": 0.3773, "step": 6824 }, { "epoch": 1.916058394160584, "grad_norm": 0.6950054168701172, "learning_rate": 3.4769900192620964e-06, "loss": 0.3832, "step": 6825 }, { "epoch": 1.9163391353172377, "grad_norm": 0.7205079197883606, "learning_rate": 3.4754343641992627e-06, "loss": 0.3661, "step": 6826 }, { "epoch": 1.916619876473891, "grad_norm": 0.6157258152961731, "learning_rate": 3.473878871828593e-06, "loss": 0.3646, "step": 6827 }, { "epoch": 1.9169006176305445, "grad_norm": 0.586892306804657, "learning_rate": 3.4723235423160808e-06, "loss": 0.3707, "step": 6828 }, { "epoch": 1.9171813587871982, "grad_norm": 0.7581172585487366, "learning_rate": 3.470768375827699e-06, "loss": 0.4054, "step": 6829 }, { "epoch": 1.9174620999438519, "grad_norm": 0.61969393491745, "learning_rate": 3.4692133725294066e-06, "loss": 0.4026, "step": 6830 }, { "epoch": 1.9177428411005053, "grad_norm": 0.7075392007827759, "learning_rate": 3.4676585325871435e-06, "loss": 0.4167, "step": 6831 }, { "epoch": 1.9180235822571587, "grad_norm": 0.655890703201294, "learning_rate": 3.4661038561668326e-06, "loss": 0.3419, "step": 6832 }, { "epoch": 1.9183043234138126, "grad_norm": 0.6394795775413513, "learning_rate": 3.4645493434343797e-06, "loss": 0.3659, "step": 6833 }, { "epoch": 1.918585064570466, "grad_norm": 0.663291871547699, "learning_rate": 3.462994994555673e-06, "loss": 0.3793, "step": 6834 }, { "epoch": 1.9188658057271195, "grad_norm": 0.736150324344635, "learning_rate": 3.4614408096965822e-06, "loss": 0.3919, "step": 6835 }, { "epoch": 1.9191465468837732, "grad_norm": 0.6177143454551697, "learning_rate": 3.4598867890229605e-06, "loss": 0.3707, "step": 6836 }, { "epoch": 1.9194272880404268, "grad_norm": 0.5929267406463623, "learning_rate": 3.4583329327006445e-06, "loss": 0.4282, "step": 6837 }, { "epoch": 1.9197080291970803, "grad_norm": 0.6003937721252441, "learning_rate": 3.45677924089545e-06, "loss": 0.3754, "step": 6838 }, { "epoch": 1.9199887703537337, "grad_norm": 0.6654490232467651, "learning_rate": 3.455225713773178e-06, "loss": 0.377, "step": 6839 }, { "epoch": 1.9202695115103874, "grad_norm": 0.7084203958511353, "learning_rate": 3.453672351499611e-06, "loss": 0.4031, "step": 6840 }, { "epoch": 1.920550252667041, "grad_norm": 0.6988268494606018, "learning_rate": 3.452119154240515e-06, "loss": 0.374, "step": 6841 }, { "epoch": 1.9208309938236945, "grad_norm": 0.6115239262580872, "learning_rate": 3.4505661221616382e-06, "loss": 0.3694, "step": 6842 }, { "epoch": 1.9211117349803482, "grad_norm": 0.6088030934333801, "learning_rate": 3.44901325542871e-06, "loss": 0.3848, "step": 6843 }, { "epoch": 1.9213924761370018, "grad_norm": 0.7091503739356995, "learning_rate": 3.447460554207441e-06, "loss": 0.3365, "step": 6844 }, { "epoch": 1.9216732172936553, "grad_norm": 0.6756137609481812, "learning_rate": 3.4459080186635275e-06, "loss": 0.4009, "step": 6845 }, { "epoch": 1.9219539584503087, "grad_norm": 0.6201615333557129, "learning_rate": 3.444355648962645e-06, "loss": 0.3667, "step": 6846 }, { "epoch": 1.9222346996069624, "grad_norm": 0.6235066056251526, "learning_rate": 3.4428034452704546e-06, "loss": 0.4193, "step": 6847 }, { "epoch": 1.922515440763616, "grad_norm": 0.6128908395767212, "learning_rate": 3.4412514077525964e-06, "loss": 0.397, "step": 6848 }, { "epoch": 1.9227961819202695, "grad_norm": 0.6845629811286926, "learning_rate": 3.4396995365746965e-06, "loss": 0.3652, "step": 6849 }, { "epoch": 1.9230769230769231, "grad_norm": 0.5951507091522217, "learning_rate": 3.4381478319023575e-06, "loss": 0.3886, "step": 6850 }, { "epoch": 1.9233576642335768, "grad_norm": 0.7217908501625061, "learning_rate": 3.43659629390117e-06, "loss": 0.4387, "step": 6851 }, { "epoch": 1.9236384053902302, "grad_norm": 0.662869393825531, "learning_rate": 3.4350449227367034e-06, "loss": 0.3805, "step": 6852 }, { "epoch": 1.9239191465468837, "grad_norm": 0.6102414727210999, "learning_rate": 3.4334937185745104e-06, "loss": 0.3737, "step": 6853 }, { "epoch": 1.9241998877035373, "grad_norm": 0.6550286412239075, "learning_rate": 3.431942681580127e-06, "loss": 0.3728, "step": 6854 }, { "epoch": 1.924480628860191, "grad_norm": 0.5773801803588867, "learning_rate": 3.430391811919069e-06, "loss": 0.3717, "step": 6855 }, { "epoch": 1.9247613700168444, "grad_norm": 0.5805873870849609, "learning_rate": 3.4288411097568375e-06, "loss": 0.3662, "step": 6856 }, { "epoch": 1.9250421111734979, "grad_norm": 0.7163267731666565, "learning_rate": 3.4272905752589113e-06, "loss": 0.3705, "step": 6857 }, { "epoch": 1.9253228523301515, "grad_norm": 0.6486401557922363, "learning_rate": 3.425740208590753e-06, "loss": 0.3736, "step": 6858 }, { "epoch": 1.9256035934868052, "grad_norm": 0.6509899497032166, "learning_rate": 3.4241900099178125e-06, "loss": 0.3933, "step": 6859 }, { "epoch": 1.9258843346434587, "grad_norm": 0.6873028874397278, "learning_rate": 3.4226399794055144e-06, "loss": 0.3539, "step": 6860 }, { "epoch": 1.9261650758001123, "grad_norm": 0.6148975491523743, "learning_rate": 3.421090117219268e-06, "loss": 0.3756, "step": 6861 }, { "epoch": 1.926445816956766, "grad_norm": 0.600109338760376, "learning_rate": 3.4195404235244665e-06, "loss": 0.4183, "step": 6862 }, { "epoch": 1.9267265581134194, "grad_norm": 0.567651629447937, "learning_rate": 3.4179908984864823e-06, "loss": 0.3429, "step": 6863 }, { "epoch": 1.9270072992700729, "grad_norm": 0.6116517186164856, "learning_rate": 3.4164415422706716e-06, "loss": 0.4281, "step": 6864 }, { "epoch": 1.9272880404267265, "grad_norm": 0.594724178314209, "learning_rate": 3.414892355042373e-06, "loss": 0.391, "step": 6865 }, { "epoch": 1.9275687815833802, "grad_norm": 0.6017888188362122, "learning_rate": 3.4133433369669043e-06, "loss": 0.4176, "step": 6866 }, { "epoch": 1.9278495227400336, "grad_norm": 0.5930371284484863, "learning_rate": 3.411794488209568e-06, "loss": 0.4013, "step": 6867 }, { "epoch": 1.9281302638966873, "grad_norm": 0.634795606136322, "learning_rate": 3.410245808935647e-06, "loss": 0.3922, "step": 6868 }, { "epoch": 1.928411005053341, "grad_norm": 0.5888826251029968, "learning_rate": 3.4086972993104076e-06, "loss": 0.3541, "step": 6869 }, { "epoch": 1.9286917462099944, "grad_norm": 0.592771053314209, "learning_rate": 3.407148959499097e-06, "loss": 0.373, "step": 6870 }, { "epoch": 1.9289724873666478, "grad_norm": 0.6941747069358826, "learning_rate": 3.405600789666945e-06, "loss": 0.41, "step": 6871 }, { "epoch": 1.9292532285233015, "grad_norm": 0.5978795886039734, "learning_rate": 3.404052789979161e-06, "loss": 0.3842, "step": 6872 }, { "epoch": 1.9295339696799552, "grad_norm": 0.5973474383354187, "learning_rate": 3.402504960600938e-06, "loss": 0.3843, "step": 6873 }, { "epoch": 1.9298147108366086, "grad_norm": 0.5809810161590576, "learning_rate": 3.4009573016974517e-06, "loss": 0.3621, "step": 6874 }, { "epoch": 1.9300954519932623, "grad_norm": 0.6391447186470032, "learning_rate": 3.3994098134338587e-06, "loss": 0.3933, "step": 6875 }, { "epoch": 1.930376193149916, "grad_norm": 0.5237780213356018, "learning_rate": 3.397862495975297e-06, "loss": 0.3833, "step": 6876 }, { "epoch": 1.9306569343065694, "grad_norm": 0.6448827981948853, "learning_rate": 3.3963153494868873e-06, "loss": 0.3456, "step": 6877 }, { "epoch": 1.9309376754632228, "grad_norm": 0.6682508587837219, "learning_rate": 3.39476837413373e-06, "loss": 0.4004, "step": 6878 }, { "epoch": 1.9312184166198765, "grad_norm": 0.6115602254867554, "learning_rate": 3.39322157008091e-06, "loss": 0.3822, "step": 6879 }, { "epoch": 1.9314991577765301, "grad_norm": 0.6135326027870178, "learning_rate": 3.3916749374934917e-06, "loss": 0.3374, "step": 6880 }, { "epoch": 1.9317798989331836, "grad_norm": 0.6781626343727112, "learning_rate": 3.390128476536523e-06, "loss": 0.3421, "step": 6881 }, { "epoch": 1.932060640089837, "grad_norm": 0.6635868549346924, "learning_rate": 3.3885821873750314e-06, "loss": 0.3802, "step": 6882 }, { "epoch": 1.9323413812464907, "grad_norm": 0.5993078947067261, "learning_rate": 3.387036070174027e-06, "loss": 0.3896, "step": 6883 }, { "epoch": 1.9326221224031443, "grad_norm": 0.6735298037528992, "learning_rate": 3.3854901250985045e-06, "loss": 0.4156, "step": 6884 }, { "epoch": 1.9329028635597978, "grad_norm": 0.5911180973052979, "learning_rate": 3.383944352313435e-06, "loss": 0.397, "step": 6885 }, { "epoch": 1.9331836047164515, "grad_norm": 0.6654474139213562, "learning_rate": 3.3823987519837752e-06, "loss": 0.3541, "step": 6886 }, { "epoch": 1.9334643458731051, "grad_norm": 0.6673989295959473, "learning_rate": 3.380853324274463e-06, "loss": 0.3677, "step": 6887 }, { "epoch": 1.9337450870297586, "grad_norm": 0.7239603400230408, "learning_rate": 3.3793080693504132e-06, "loss": 0.3958, "step": 6888 }, { "epoch": 1.934025828186412, "grad_norm": 0.6146863102912903, "learning_rate": 3.3777629873765283e-06, "loss": 0.3899, "step": 6889 }, { "epoch": 1.9343065693430657, "grad_norm": 0.573029637336731, "learning_rate": 3.3762180785176897e-06, "loss": 0.3721, "step": 6890 }, { "epoch": 1.9345873104997193, "grad_norm": 0.6924031376838684, "learning_rate": 3.3746733429387596e-06, "loss": 0.4149, "step": 6891 }, { "epoch": 1.9348680516563728, "grad_norm": 0.6505733132362366, "learning_rate": 3.373128780804583e-06, "loss": 0.3501, "step": 6892 }, { "epoch": 1.9351487928130264, "grad_norm": 0.6565077304840088, "learning_rate": 3.3715843922799873e-06, "loss": 0.4297, "step": 6893 }, { "epoch": 1.93542953396968, "grad_norm": 0.681267261505127, "learning_rate": 3.3700401775297787e-06, "loss": 0.3784, "step": 6894 }, { "epoch": 1.9357102751263335, "grad_norm": 0.7059513926506042, "learning_rate": 3.368496136718745e-06, "loss": 0.3856, "step": 6895 }, { "epoch": 1.935991016282987, "grad_norm": 0.6833927035331726, "learning_rate": 3.3669522700116585e-06, "loss": 0.3679, "step": 6896 }, { "epoch": 1.9362717574396406, "grad_norm": 0.6739688515663147, "learning_rate": 3.3654085775732703e-06, "loss": 0.3588, "step": 6897 }, { "epoch": 1.9365524985962943, "grad_norm": 0.6699977517127991, "learning_rate": 3.3638650595683135e-06, "loss": 0.413, "step": 6898 }, { "epoch": 1.9368332397529477, "grad_norm": 0.6002979278564453, "learning_rate": 3.362321716161505e-06, "loss": 0.3458, "step": 6899 }, { "epoch": 1.9371139809096012, "grad_norm": 0.6465573906898499, "learning_rate": 3.360778547517537e-06, "loss": 0.373, "step": 6900 }, { "epoch": 1.937394722066255, "grad_norm": 0.6303117871284485, "learning_rate": 3.359235553801089e-06, "loss": 0.3694, "step": 6901 }, { "epoch": 1.9376754632229085, "grad_norm": 0.655850350856781, "learning_rate": 3.3576927351768195e-06, "loss": 0.3831, "step": 6902 }, { "epoch": 1.937956204379562, "grad_norm": 0.5944316983222961, "learning_rate": 3.3561500918093693e-06, "loss": 0.3822, "step": 6903 }, { "epoch": 1.9382369455362156, "grad_norm": 0.6386849880218506, "learning_rate": 3.354607623863358e-06, "loss": 0.4114, "step": 6904 }, { "epoch": 1.9385176866928693, "grad_norm": 0.6671199202537537, "learning_rate": 3.3530653315033902e-06, "loss": 0.399, "step": 6905 }, { "epoch": 1.9387984278495227, "grad_norm": 0.6202429533004761, "learning_rate": 3.351523214894048e-06, "loss": 0.3604, "step": 6906 }, { "epoch": 1.9390791690061762, "grad_norm": 0.5951457023620605, "learning_rate": 3.349981274199896e-06, "loss": 0.3769, "step": 6907 }, { "epoch": 1.9393599101628298, "grad_norm": 0.6183099746704102, "learning_rate": 3.3484395095854815e-06, "loss": 0.388, "step": 6908 }, { "epoch": 1.9396406513194835, "grad_norm": 0.703260064125061, "learning_rate": 3.3468979212153328e-06, "loss": 0.3831, "step": 6909 }, { "epoch": 1.939921392476137, "grad_norm": 0.6947627663612366, "learning_rate": 3.3453565092539586e-06, "loss": 0.4032, "step": 6910 }, { "epoch": 1.9402021336327906, "grad_norm": 0.6286624670028687, "learning_rate": 3.343815273865848e-06, "loss": 0.3652, "step": 6911 }, { "epoch": 1.9404828747894443, "grad_norm": 0.5934192538261414, "learning_rate": 3.342274215215472e-06, "loss": 0.3909, "step": 6912 }, { "epoch": 1.9407636159460977, "grad_norm": 0.7036631107330322, "learning_rate": 3.3407333334672832e-06, "loss": 0.4325, "step": 6913 }, { "epoch": 1.9410443571027511, "grad_norm": 0.5805580019950867, "learning_rate": 3.339192628785716e-06, "loss": 0.4018, "step": 6914 }, { "epoch": 1.9413250982594048, "grad_norm": 0.6935589909553528, "learning_rate": 3.3376521013351816e-06, "loss": 0.3545, "step": 6915 }, { "epoch": 1.9416058394160585, "grad_norm": 0.5639068484306335, "learning_rate": 3.336111751280078e-06, "loss": 0.3604, "step": 6916 }, { "epoch": 1.941886580572712, "grad_norm": 0.6157374382019043, "learning_rate": 3.3345715787847814e-06, "loss": 0.3801, "step": 6917 }, { "epoch": 1.9421673217293656, "grad_norm": 0.5812360048294067, "learning_rate": 3.3330315840136494e-06, "loss": 0.3986, "step": 6918 }, { "epoch": 1.9424480628860192, "grad_norm": 0.6319735646247864, "learning_rate": 3.3314917671310204e-06, "loss": 0.4064, "step": 6919 }, { "epoch": 1.9427288040426727, "grad_norm": 0.5736815929412842, "learning_rate": 3.329952128301215e-06, "loss": 0.3741, "step": 6920 }, { "epoch": 1.9430095451993261, "grad_norm": 0.577810525894165, "learning_rate": 3.3284126676885324e-06, "loss": 0.3515, "step": 6921 }, { "epoch": 1.9432902863559798, "grad_norm": 0.6701428294181824, "learning_rate": 3.3268733854572553e-06, "loss": 0.4244, "step": 6922 }, { "epoch": 1.9435710275126334, "grad_norm": 0.6287543773651123, "learning_rate": 3.3253342817716456e-06, "loss": 0.3776, "step": 6923 }, { "epoch": 1.9438517686692869, "grad_norm": 0.618564784526825, "learning_rate": 3.323795356795947e-06, "loss": 0.3389, "step": 6924 }, { "epoch": 1.9441325098259403, "grad_norm": 0.6012948751449585, "learning_rate": 3.3222566106943848e-06, "loss": 0.369, "step": 6925 }, { "epoch": 1.9444132509825942, "grad_norm": 0.6347872614860535, "learning_rate": 3.3207180436311646e-06, "loss": 0.3737, "step": 6926 }, { "epoch": 1.9446939921392477, "grad_norm": 0.5722830295562744, "learning_rate": 3.3191796557704712e-06, "loss": 0.3621, "step": 6927 }, { "epoch": 1.944974733295901, "grad_norm": 0.6756933927536011, "learning_rate": 3.3176414472764727e-06, "loss": 0.3688, "step": 6928 }, { "epoch": 1.9452554744525548, "grad_norm": 0.627498209476471, "learning_rate": 3.3161034183133173e-06, "loss": 0.3649, "step": 6929 }, { "epoch": 1.9455362156092084, "grad_norm": 0.6117562055587769, "learning_rate": 3.314565569045133e-06, "loss": 0.3488, "step": 6930 }, { "epoch": 1.9458169567658619, "grad_norm": 0.5619035363197327, "learning_rate": 3.313027899636031e-06, "loss": 0.3735, "step": 6931 }, { "epoch": 1.9460976979225153, "grad_norm": 0.704190731048584, "learning_rate": 3.311490410250101e-06, "loss": 0.3853, "step": 6932 }, { "epoch": 1.946378439079169, "grad_norm": 0.6487050652503967, "learning_rate": 3.309953101051414e-06, "loss": 0.3795, "step": 6933 }, { "epoch": 1.9466591802358226, "grad_norm": 0.6865701675415039, "learning_rate": 3.30841597220402e-06, "loss": 0.3703, "step": 6934 }, { "epoch": 1.946939921392476, "grad_norm": 0.5946937799453735, "learning_rate": 3.3068790238719563e-06, "loss": 0.3701, "step": 6935 }, { "epoch": 1.9472206625491297, "grad_norm": 0.6469739675521851, "learning_rate": 3.305342256219235e-06, "loss": 0.4324, "step": 6936 }, { "epoch": 1.9475014037057834, "grad_norm": 0.7273101806640625, "learning_rate": 3.3038056694098485e-06, "loss": 0.4094, "step": 6937 }, { "epoch": 1.9477821448624368, "grad_norm": 0.6214479804039001, "learning_rate": 3.3022692636077734e-06, "loss": 0.3569, "step": 6938 }, { "epoch": 1.9480628860190903, "grad_norm": 0.6211503148078918, "learning_rate": 3.3007330389769655e-06, "loss": 0.3713, "step": 6939 }, { "epoch": 1.948343627175744, "grad_norm": 0.6319563388824463, "learning_rate": 3.2991969956813604e-06, "loss": 0.4089, "step": 6940 }, { "epoch": 1.9486243683323976, "grad_norm": 0.6137008666992188, "learning_rate": 3.297661133884875e-06, "loss": 0.379, "step": 6941 }, { "epoch": 1.948905109489051, "grad_norm": 0.7523574829101562, "learning_rate": 3.296125453751409e-06, "loss": 0.4119, "step": 6942 }, { "epoch": 1.9491858506457047, "grad_norm": 0.6027659773826599, "learning_rate": 3.294589955444837e-06, "loss": 0.3915, "step": 6943 }, { "epoch": 1.9494665918023584, "grad_norm": 0.515748143196106, "learning_rate": 3.29305463912902e-06, "loss": 0.4017, "step": 6944 }, { "epoch": 1.9497473329590118, "grad_norm": 0.6369849443435669, "learning_rate": 3.291519504967797e-06, "loss": 0.3587, "step": 6945 }, { "epoch": 1.9500280741156653, "grad_norm": 0.6632637977600098, "learning_rate": 3.2899845531249878e-06, "loss": 0.3521, "step": 6946 }, { "epoch": 1.950308815272319, "grad_norm": 0.6441149711608887, "learning_rate": 3.2884497837643934e-06, "loss": 0.3886, "step": 6947 }, { "epoch": 1.9505895564289726, "grad_norm": 0.6686705350875854, "learning_rate": 3.2869151970497964e-06, "loss": 0.3789, "step": 6948 }, { "epoch": 1.950870297585626, "grad_norm": 0.6360732913017273, "learning_rate": 3.285380793144955e-06, "loss": 0.3802, "step": 6949 }, { "epoch": 1.9511510387422795, "grad_norm": 0.6022663712501526, "learning_rate": 3.2838465722136126e-06, "loss": 0.3563, "step": 6950 }, { "epoch": 1.9514317798989333, "grad_norm": 0.54377681016922, "learning_rate": 3.282312534419493e-06, "loss": 0.4103, "step": 6951 }, { "epoch": 1.9517125210555868, "grad_norm": 0.5532845854759216, "learning_rate": 3.280778679926297e-06, "loss": 0.3644, "step": 6952 }, { "epoch": 1.9519932622122402, "grad_norm": 0.6353257894515991, "learning_rate": 3.2792450088977097e-06, "loss": 0.3898, "step": 6953 }, { "epoch": 1.952274003368894, "grad_norm": 0.6258017420768738, "learning_rate": 3.2777115214973953e-06, "loss": 0.371, "step": 6954 }, { "epoch": 1.9525547445255476, "grad_norm": 0.6752704381942749, "learning_rate": 3.2761782178889955e-06, "loss": 0.3796, "step": 6955 }, { "epoch": 1.952835485682201, "grad_norm": 0.6499730944633484, "learning_rate": 3.274645098236138e-06, "loss": 0.3976, "step": 6956 }, { "epoch": 1.9531162268388544, "grad_norm": 0.7651991248130798, "learning_rate": 3.273112162702425e-06, "loss": 0.3648, "step": 6957 }, { "epoch": 1.953396967995508, "grad_norm": 0.6649768948554993, "learning_rate": 3.2715794114514433e-06, "loss": 0.3806, "step": 6958 }, { "epoch": 1.9536777091521618, "grad_norm": 0.6103923320770264, "learning_rate": 3.270046844646758e-06, "loss": 0.368, "step": 6959 }, { "epoch": 1.9539584503088152, "grad_norm": 0.7249850630760193, "learning_rate": 3.268514462451916e-06, "loss": 0.3778, "step": 6960 }, { "epoch": 1.9542391914654689, "grad_norm": 0.6887420415878296, "learning_rate": 3.266982265030444e-06, "loss": 0.3693, "step": 6961 }, { "epoch": 1.9545199326221225, "grad_norm": 0.593892514705658, "learning_rate": 3.265450252545847e-06, "loss": 0.4007, "step": 6962 }, { "epoch": 1.954800673778776, "grad_norm": 0.6767063736915588, "learning_rate": 3.263918425161614e-06, "loss": 0.3579, "step": 6963 }, { "epoch": 1.9550814149354294, "grad_norm": 0.6518818736076355, "learning_rate": 3.2623867830412093e-06, "loss": 0.3879, "step": 6964 }, { "epoch": 1.955362156092083, "grad_norm": 0.6803855299949646, "learning_rate": 3.2608553263480826e-06, "loss": 0.3831, "step": 6965 }, { "epoch": 1.9556428972487367, "grad_norm": 0.5994442701339722, "learning_rate": 3.2593240552456594e-06, "loss": 0.3853, "step": 6966 }, { "epoch": 1.9559236384053902, "grad_norm": 0.6772409081459045, "learning_rate": 3.2577929698973486e-06, "loss": 0.3637, "step": 6967 }, { "epoch": 1.9562043795620438, "grad_norm": 0.6775597333908081, "learning_rate": 3.256262070466538e-06, "loss": 0.3719, "step": 6968 }, { "epoch": 1.9564851207186975, "grad_norm": 0.6733676195144653, "learning_rate": 3.254731357116597e-06, "loss": 0.4176, "step": 6969 }, { "epoch": 1.956765861875351, "grad_norm": 0.6897537112236023, "learning_rate": 3.2532008300108715e-06, "loss": 0.3605, "step": 6970 }, { "epoch": 1.9570466030320044, "grad_norm": 0.5646283030509949, "learning_rate": 3.2516704893126904e-06, "loss": 0.3731, "step": 6971 }, { "epoch": 1.957327344188658, "grad_norm": 0.5377715229988098, "learning_rate": 3.250140335185363e-06, "loss": 0.3694, "step": 6972 }, { "epoch": 1.9576080853453117, "grad_norm": 0.6581278443336487, "learning_rate": 3.2486103677921767e-06, "loss": 0.3723, "step": 6973 }, { "epoch": 1.9578888265019652, "grad_norm": 0.6968143582344055, "learning_rate": 3.2470805872964016e-06, "loss": 0.4159, "step": 6974 }, { "epoch": 1.9581695676586186, "grad_norm": 0.6114397048950195, "learning_rate": 3.245550993861285e-06, "loss": 0.3793, "step": 6975 }, { "epoch": 1.9584503088152723, "grad_norm": 0.6772613525390625, "learning_rate": 3.244021587650057e-06, "loss": 0.4359, "step": 6976 }, { "epoch": 1.958731049971926, "grad_norm": 0.6400285959243774, "learning_rate": 3.2424923688259246e-06, "loss": 0.3723, "step": 6977 }, { "epoch": 1.9590117911285794, "grad_norm": 0.6172521114349365, "learning_rate": 3.2409633375520777e-06, "loss": 0.3647, "step": 6978 }, { "epoch": 1.959292532285233, "grad_norm": 0.5661091208457947, "learning_rate": 3.2394344939916845e-06, "loss": 0.3964, "step": 6979 }, { "epoch": 1.9595732734418867, "grad_norm": 0.5954875349998474, "learning_rate": 3.2379058383078937e-06, "loss": 0.3698, "step": 6980 }, { "epoch": 1.9598540145985401, "grad_norm": 0.5770564675331116, "learning_rate": 3.2363773706638345e-06, "loss": 0.4045, "step": 6981 }, { "epoch": 1.9601347557551936, "grad_norm": 0.7111275792121887, "learning_rate": 3.234849091222616e-06, "loss": 0.3546, "step": 6982 }, { "epoch": 1.9604154969118472, "grad_norm": 0.5821360945701599, "learning_rate": 3.233321000147324e-06, "loss": 0.3822, "step": 6983 }, { "epoch": 1.960696238068501, "grad_norm": 0.6313339471817017, "learning_rate": 3.231793097601029e-06, "loss": 0.3621, "step": 6984 }, { "epoch": 1.9609769792251543, "grad_norm": 0.5764349102973938, "learning_rate": 3.230265383746778e-06, "loss": 0.3608, "step": 6985 }, { "epoch": 1.961257720381808, "grad_norm": 0.6047051548957825, "learning_rate": 3.2287378587476014e-06, "loss": 0.3884, "step": 6986 }, { "epoch": 1.9615384615384617, "grad_norm": 0.6179006099700928, "learning_rate": 3.227210522766505e-06, "loss": 0.3826, "step": 6987 }, { "epoch": 1.9618192026951151, "grad_norm": 0.6662248969078064, "learning_rate": 3.225683375966478e-06, "loss": 0.3407, "step": 6988 }, { "epoch": 1.9620999438517686, "grad_norm": 0.6197724342346191, "learning_rate": 3.224156418510487e-06, "loss": 0.38, "step": 6989 }, { "epoch": 1.9623806850084222, "grad_norm": 0.6409143805503845, "learning_rate": 3.2226296505614796e-06, "loss": 0.4148, "step": 6990 }, { "epoch": 1.9626614261650759, "grad_norm": 0.6775321364402771, "learning_rate": 3.2211030722823844e-06, "loss": 0.3673, "step": 6991 }, { "epoch": 1.9629421673217293, "grad_norm": 0.5687212944030762, "learning_rate": 3.2195766838361065e-06, "loss": 0.3936, "step": 6992 }, { "epoch": 1.9632229084783828, "grad_norm": 0.5648772120475769, "learning_rate": 3.2180504853855334e-06, "loss": 0.3863, "step": 6993 }, { "epoch": 1.9635036496350367, "grad_norm": 0.5899771451950073, "learning_rate": 3.21652447709353e-06, "loss": 0.3845, "step": 6994 }, { "epoch": 1.96378439079169, "grad_norm": 0.602634072303772, "learning_rate": 3.2149986591229443e-06, "loss": 0.4047, "step": 6995 }, { "epoch": 1.9640651319483435, "grad_norm": 0.6572073101997375, "learning_rate": 3.2134730316366015e-06, "loss": 0.4277, "step": 6996 }, { "epoch": 1.9643458731049972, "grad_norm": 0.526705265045166, "learning_rate": 3.211947594797309e-06, "loss": 0.3753, "step": 6997 }, { "epoch": 1.9646266142616509, "grad_norm": 0.6437777876853943, "learning_rate": 3.2104223487678476e-06, "loss": 0.3445, "step": 6998 }, { "epoch": 1.9649073554183043, "grad_norm": 0.6343139410018921, "learning_rate": 3.208897293710985e-06, "loss": 0.4043, "step": 6999 }, { "epoch": 1.9651880965749577, "grad_norm": 0.621269941329956, "learning_rate": 3.2073724297894654e-06, "loss": 0.3572, "step": 7000 }, { "epoch": 1.9654688377316114, "grad_norm": 0.6208169460296631, "learning_rate": 3.205847757166012e-06, "loss": 0.3759, "step": 7001 }, { "epoch": 1.965749578888265, "grad_norm": 0.7012404799461365, "learning_rate": 3.2043232760033294e-06, "loss": 0.3962, "step": 7002 }, { "epoch": 1.9660303200449185, "grad_norm": 0.6112543940544128, "learning_rate": 3.2027989864641008e-06, "loss": 0.356, "step": 7003 }, { "epoch": 1.9663110612015722, "grad_norm": 0.603643536567688, "learning_rate": 3.2012748887109873e-06, "loss": 0.3771, "step": 7004 }, { "epoch": 1.9665918023582258, "grad_norm": 0.7005816102027893, "learning_rate": 3.1997509829066324e-06, "loss": 0.4341, "step": 7005 }, { "epoch": 1.9668725435148793, "grad_norm": 0.6568206548690796, "learning_rate": 3.198227269213657e-06, "loss": 0.3561, "step": 7006 }, { "epoch": 1.9671532846715327, "grad_norm": 0.6353272795677185, "learning_rate": 3.196703747794664e-06, "loss": 0.439, "step": 7007 }, { "epoch": 1.9674340258281864, "grad_norm": 0.6421124339103699, "learning_rate": 3.1951804188122324e-06, "loss": 0.3894, "step": 7008 }, { "epoch": 1.96771476698484, "grad_norm": 0.6794632077217102, "learning_rate": 3.193657282428924e-06, "loss": 0.3903, "step": 7009 }, { "epoch": 1.9679955081414935, "grad_norm": 0.6266258358955383, "learning_rate": 3.1921343388072746e-06, "loss": 0.3201, "step": 7010 }, { "epoch": 1.9682762492981472, "grad_norm": 0.6301803588867188, "learning_rate": 3.1906115881098086e-06, "loss": 0.3973, "step": 7011 }, { "epoch": 1.9685569904548008, "grad_norm": 0.6951670050621033, "learning_rate": 3.1890890304990217e-06, "loss": 0.3904, "step": 7012 }, { "epoch": 1.9688377316114543, "grad_norm": 0.6230060458183289, "learning_rate": 3.1875666661373927e-06, "loss": 0.373, "step": 7013 }, { "epoch": 1.9691184727681077, "grad_norm": 0.6953141093254089, "learning_rate": 3.1860444951873783e-06, "loss": 0.3742, "step": 7014 }, { "epoch": 1.9693992139247614, "grad_norm": 0.6503199338912964, "learning_rate": 3.184522517811415e-06, "loss": 0.3592, "step": 7015 }, { "epoch": 1.969679955081415, "grad_norm": 0.6654722094535828, "learning_rate": 3.1830007341719182e-06, "loss": 0.3723, "step": 7016 }, { "epoch": 1.9699606962380685, "grad_norm": 0.6210854053497314, "learning_rate": 3.1814791444312843e-06, "loss": 0.3341, "step": 7017 }, { "epoch": 1.970241437394722, "grad_norm": 0.6457767486572266, "learning_rate": 3.1799577487518875e-06, "loss": 0.3821, "step": 7018 }, { "epoch": 1.9705221785513758, "grad_norm": 0.6870471239089966, "learning_rate": 3.178436547296082e-06, "loss": 0.3652, "step": 7019 }, { "epoch": 1.9708029197080292, "grad_norm": 0.6089987754821777, "learning_rate": 3.1769155402262002e-06, "loss": 0.3559, "step": 7020 }, { "epoch": 1.9710836608646827, "grad_norm": 0.6179919242858887, "learning_rate": 3.175394727704555e-06, "loss": 0.3526, "step": 7021 }, { "epoch": 1.9713644020213363, "grad_norm": 0.5888646841049194, "learning_rate": 3.173874109893438e-06, "loss": 0.3808, "step": 7022 }, { "epoch": 1.97164514317799, "grad_norm": 0.5951924920082092, "learning_rate": 3.1723536869551197e-06, "loss": 0.3519, "step": 7023 }, { "epoch": 1.9719258843346434, "grad_norm": 0.6952611207962036, "learning_rate": 3.170833459051851e-06, "loss": 0.3557, "step": 7024 }, { "epoch": 1.9722066254912969, "grad_norm": 0.6679577231407166, "learning_rate": 3.1693134263458614e-06, "loss": 0.3927, "step": 7025 }, { "epoch": 1.9724873666479505, "grad_norm": 0.5919626951217651, "learning_rate": 3.167793588999358e-06, "loss": 0.402, "step": 7026 }, { "epoch": 1.9727681078046042, "grad_norm": 0.5656420588493347, "learning_rate": 3.1662739471745287e-06, "loss": 0.3738, "step": 7027 }, { "epoch": 1.9730488489612577, "grad_norm": 0.6829491853713989, "learning_rate": 3.16475450103354e-06, "loss": 0.4032, "step": 7028 }, { "epoch": 1.9733295901179113, "grad_norm": 0.6164791584014893, "learning_rate": 3.1632352507385393e-06, "loss": 0.3654, "step": 7029 }, { "epoch": 1.973610331274565, "grad_norm": 0.7377853989601135, "learning_rate": 3.1617161964516503e-06, "loss": 0.375, "step": 7030 }, { "epoch": 1.9738910724312184, "grad_norm": 0.635339081287384, "learning_rate": 3.1601973383349784e-06, "loss": 0.4071, "step": 7031 }, { "epoch": 1.9741718135878719, "grad_norm": 0.6464971899986267, "learning_rate": 3.158678676550604e-06, "loss": 0.3729, "step": 7032 }, { "epoch": 1.9744525547445255, "grad_norm": 0.59506756067276, "learning_rate": 3.1571602112605916e-06, "loss": 0.3461, "step": 7033 }, { "epoch": 1.9747332959011792, "grad_norm": 0.7854287624359131, "learning_rate": 3.1556419426269808e-06, "loss": 0.3778, "step": 7034 }, { "epoch": 1.9750140370578326, "grad_norm": 0.6562832593917847, "learning_rate": 3.1541238708117926e-06, "loss": 0.3554, "step": 7035 }, { "epoch": 1.9752947782144863, "grad_norm": 0.6035671234130859, "learning_rate": 3.152605995977026e-06, "loss": 0.3804, "step": 7036 }, { "epoch": 1.97557551937114, "grad_norm": 0.5490044355392456, "learning_rate": 3.15108831828466e-06, "loss": 0.3672, "step": 7037 }, { "epoch": 1.9758562605277934, "grad_norm": 0.6840856075286865, "learning_rate": 3.1495708378966507e-06, "loss": 0.4226, "step": 7038 }, { "epoch": 1.9761370016844468, "grad_norm": 0.6101973056793213, "learning_rate": 3.1480535549749348e-06, "loss": 0.3825, "step": 7039 }, { "epoch": 1.9764177428411005, "grad_norm": 0.6528775095939636, "learning_rate": 3.1465364696814277e-06, "loss": 0.3835, "step": 7040 }, { "epoch": 1.9766984839977542, "grad_norm": 0.6299150586128235, "learning_rate": 3.145019582178022e-06, "loss": 0.3847, "step": 7041 }, { "epoch": 1.9769792251544076, "grad_norm": 0.5839090943336487, "learning_rate": 3.143502892626591e-06, "loss": 0.3966, "step": 7042 }, { "epoch": 1.977259966311061, "grad_norm": 0.6374778747558594, "learning_rate": 3.141986401188987e-06, "loss": 0.3617, "step": 7043 }, { "epoch": 1.977540707467715, "grad_norm": 0.5736613869667053, "learning_rate": 3.140470108027039e-06, "loss": 0.3905, "step": 7044 }, { "epoch": 1.9778214486243684, "grad_norm": 0.6634061336517334, "learning_rate": 3.1389540133025575e-06, "loss": 0.4334, "step": 7045 }, { "epoch": 1.9781021897810218, "grad_norm": 0.7333037257194519, "learning_rate": 3.1374381171773326e-06, "loss": 0.3997, "step": 7046 }, { "epoch": 1.9783829309376755, "grad_norm": 0.7173023819923401, "learning_rate": 3.135922419813128e-06, "loss": 0.4126, "step": 7047 }, { "epoch": 1.9786636720943291, "grad_norm": 0.6173637509346008, "learning_rate": 3.13440692137169e-06, "loss": 0.36, "step": 7048 }, { "epoch": 1.9789444132509826, "grad_norm": 0.618675172328949, "learning_rate": 3.1328916220147447e-06, "loss": 0.3511, "step": 7049 }, { "epoch": 1.979225154407636, "grad_norm": 0.6620262861251831, "learning_rate": 3.1313765219039947e-06, "loss": 0.3853, "step": 7050 }, { "epoch": 1.9795058955642897, "grad_norm": 0.7026667594909668, "learning_rate": 3.1298616212011224e-06, "loss": 0.3585, "step": 7051 }, { "epoch": 1.9797866367209433, "grad_norm": 0.6468698978424072, "learning_rate": 3.1283469200677886e-06, "loss": 0.3959, "step": 7052 }, { "epoch": 1.9800673778775968, "grad_norm": 0.6284026503562927, "learning_rate": 3.126832418665632e-06, "loss": 0.3833, "step": 7053 }, { "epoch": 1.9803481190342505, "grad_norm": 0.7425382733345032, "learning_rate": 3.1253181171562707e-06, "loss": 0.4048, "step": 7054 }, { "epoch": 1.9806288601909041, "grad_norm": 0.6566505432128906, "learning_rate": 3.123804015701302e-06, "loss": 0.3514, "step": 7055 }, { "epoch": 1.9809096013475576, "grad_norm": 0.5765173435211182, "learning_rate": 3.1222901144623018e-06, "loss": 0.3483, "step": 7056 }, { "epoch": 1.981190342504211, "grad_norm": 0.6245824098587036, "learning_rate": 3.120776413600824e-06, "loss": 0.4012, "step": 7057 }, { "epoch": 1.9814710836608647, "grad_norm": 0.533491849899292, "learning_rate": 3.1192629132784023e-06, "loss": 0.3971, "step": 7058 }, { "epoch": 1.9817518248175183, "grad_norm": 0.7285841703414917, "learning_rate": 3.1177496136565455e-06, "loss": 0.3703, "step": 7059 }, { "epoch": 1.9820325659741718, "grad_norm": 0.6480452418327332, "learning_rate": 3.1162365148967453e-06, "loss": 0.3495, "step": 7060 }, { "epoch": 1.9823133071308254, "grad_norm": 0.6141634583473206, "learning_rate": 3.114723617160468e-06, "loss": 0.3925, "step": 7061 }, { "epoch": 1.982594048287479, "grad_norm": 0.6793792247772217, "learning_rate": 3.113210920609165e-06, "loss": 0.3921, "step": 7062 }, { "epoch": 1.9828747894441325, "grad_norm": 0.6222654581069946, "learning_rate": 3.1116984254042587e-06, "loss": 0.4115, "step": 7063 }, { "epoch": 1.983155530600786, "grad_norm": 0.6494109034538269, "learning_rate": 3.1101861317071536e-06, "loss": 0.4008, "step": 7064 }, { "epoch": 1.9834362717574396, "grad_norm": 0.6529889106750488, "learning_rate": 3.108674039679233e-06, "loss": 0.3543, "step": 7065 }, { "epoch": 1.9837170129140933, "grad_norm": 0.6445561051368713, "learning_rate": 3.107162149481857e-06, "loss": 0.389, "step": 7066 }, { "epoch": 1.9839977540707467, "grad_norm": 0.6440285444259644, "learning_rate": 3.105650461276366e-06, "loss": 0.4036, "step": 7067 }, { "epoch": 1.9842784952274002, "grad_norm": 0.5650212168693542, "learning_rate": 3.1041389752240793e-06, "loss": 0.382, "step": 7068 }, { "epoch": 1.9845592363840538, "grad_norm": 0.7039580345153809, "learning_rate": 3.1026276914862896e-06, "loss": 0.3736, "step": 7069 }, { "epoch": 1.9848399775407075, "grad_norm": 0.7007707953453064, "learning_rate": 3.1011166102242733e-06, "loss": 0.3706, "step": 7070 }, { "epoch": 1.985120718697361, "grad_norm": 0.5565095543861389, "learning_rate": 3.0996057315992844e-06, "loss": 0.3656, "step": 7071 }, { "epoch": 1.9854014598540146, "grad_norm": 0.6265332698822021, "learning_rate": 3.0980950557725546e-06, "loss": 0.3984, "step": 7072 }, { "epoch": 1.9856822010106683, "grad_norm": 0.6178296804428101, "learning_rate": 3.096584582905293e-06, "loss": 0.3621, "step": 7073 }, { "epoch": 1.9859629421673217, "grad_norm": 0.7236082553863525, "learning_rate": 3.095074313158689e-06, "loss": 0.3549, "step": 7074 }, { "epoch": 1.9862436833239752, "grad_norm": 0.6384782195091248, "learning_rate": 3.0935642466939076e-06, "loss": 0.4202, "step": 7075 }, { "epoch": 1.9865244244806288, "grad_norm": 0.7140527963638306, "learning_rate": 3.092054383672094e-06, "loss": 0.386, "step": 7076 }, { "epoch": 1.9868051656372825, "grad_norm": 0.5895060896873474, "learning_rate": 3.090544724254372e-06, "loss": 0.3422, "step": 7077 }, { "epoch": 1.987085906793936, "grad_norm": 0.544243574142456, "learning_rate": 3.089035268601843e-06, "loss": 0.4101, "step": 7078 }, { "epoch": 1.9873666479505896, "grad_norm": 0.6739355325698853, "learning_rate": 3.0875260168755873e-06, "loss": 0.3777, "step": 7079 }, { "epoch": 1.9876473891072433, "grad_norm": 0.5470808744430542, "learning_rate": 3.086016969236662e-06, "loss": 0.3704, "step": 7080 }, { "epoch": 1.9879281302638967, "grad_norm": 0.629051148891449, "learning_rate": 3.084508125846103e-06, "loss": 0.3662, "step": 7081 }, { "epoch": 1.9882088714205501, "grad_norm": 0.6512294411659241, "learning_rate": 3.082999486864925e-06, "loss": 0.354, "step": 7082 }, { "epoch": 1.9884896125772038, "grad_norm": 0.6564597487449646, "learning_rate": 3.081491052454121e-06, "loss": 0.3861, "step": 7083 }, { "epoch": 1.9887703537338575, "grad_norm": 0.5574722290039062, "learning_rate": 3.0799828227746615e-06, "loss": 0.3711, "step": 7084 }, { "epoch": 1.989051094890511, "grad_norm": 0.6071930527687073, "learning_rate": 3.0784747979874954e-06, "loss": 0.3486, "step": 7085 }, { "epoch": 1.9893318360471643, "grad_norm": 0.6464855074882507, "learning_rate": 3.076966978253548e-06, "loss": 0.3358, "step": 7086 }, { "epoch": 1.9896125772038182, "grad_norm": 0.5567142367362976, "learning_rate": 3.0754593637337276e-06, "loss": 0.419, "step": 7087 }, { "epoch": 1.9898933183604717, "grad_norm": 0.6773258447647095, "learning_rate": 3.0739519545889163e-06, "loss": 0.4196, "step": 7088 }, { "epoch": 1.9901740595171251, "grad_norm": 0.7013642191886902, "learning_rate": 3.0724447509799747e-06, "loss": 0.4267, "step": 7089 }, { "epoch": 1.9904548006737788, "grad_norm": 0.6178555488586426, "learning_rate": 3.0709377530677433e-06, "loss": 0.398, "step": 7090 }, { "epoch": 1.9907355418304324, "grad_norm": 0.6290339231491089, "learning_rate": 3.0694309610130386e-06, "loss": 0.3896, "step": 7091 }, { "epoch": 1.9910162829870859, "grad_norm": 0.681408166885376, "learning_rate": 3.0679243749766557e-06, "loss": 0.3943, "step": 7092 }, { "epoch": 1.9912970241437393, "grad_norm": 0.6687710285186768, "learning_rate": 3.066417995119369e-06, "loss": 0.3724, "step": 7093 }, { "epoch": 1.991577765300393, "grad_norm": 0.6002901792526245, "learning_rate": 3.0649118216019296e-06, "loss": 0.3951, "step": 7094 }, { "epoch": 1.9918585064570467, "grad_norm": 0.6790820360183716, "learning_rate": 3.0634058545850677e-06, "loss": 0.403, "step": 7095 }, { "epoch": 1.9921392476137, "grad_norm": 0.7577724456787109, "learning_rate": 3.06190009422949e-06, "loss": 0.3775, "step": 7096 }, { "epoch": 1.9924199887703538, "grad_norm": 0.6288694739341736, "learning_rate": 3.0603945406958812e-06, "loss": 0.4084, "step": 7097 }, { "epoch": 1.9927007299270074, "grad_norm": 0.6337240934371948, "learning_rate": 3.058889194144906e-06, "loss": 0.3466, "step": 7098 }, { "epoch": 1.9929814710836609, "grad_norm": 0.58216392993927, "learning_rate": 3.0573840547372047e-06, "loss": 0.3607, "step": 7099 }, { "epoch": 1.9932622122403143, "grad_norm": 0.6355688571929932, "learning_rate": 3.0558791226333974e-06, "loss": 0.3573, "step": 7100 }, { "epoch": 1.993542953396968, "grad_norm": 0.7551351189613342, "learning_rate": 3.0543743979940797e-06, "loss": 0.3727, "step": 7101 }, { "epoch": 1.9938236945536216, "grad_norm": 0.6733753681182861, "learning_rate": 3.0528698809798287e-06, "loss": 0.3987, "step": 7102 }, { "epoch": 1.994104435710275, "grad_norm": 0.6546279788017273, "learning_rate": 3.0513655717511936e-06, "loss": 0.3818, "step": 7103 }, { "epoch": 1.9943851768669287, "grad_norm": 0.7093046307563782, "learning_rate": 3.049861470468708e-06, "loss": 0.3783, "step": 7104 }, { "epoch": 1.9946659180235824, "grad_norm": 0.5993322134017944, "learning_rate": 3.0483575772928786e-06, "loss": 0.4103, "step": 7105 }, { "epoch": 1.9949466591802358, "grad_norm": 0.6651349067687988, "learning_rate": 3.046853892384192e-06, "loss": 0.3419, "step": 7106 }, { "epoch": 1.9952274003368893, "grad_norm": 0.5755440592765808, "learning_rate": 3.0453504159031128e-06, "loss": 0.3556, "step": 7107 }, { "epoch": 1.995508141493543, "grad_norm": 0.6396806836128235, "learning_rate": 3.043847148010083e-06, "loss": 0.3942, "step": 7108 }, { "epoch": 1.9957888826501966, "grad_norm": 0.6420109868049622, "learning_rate": 3.0423440888655198e-06, "loss": 0.4171, "step": 7109 }, { "epoch": 1.99606962380685, "grad_norm": 0.6826302409172058, "learning_rate": 3.0408412386298216e-06, "loss": 0.3735, "step": 7110 }, { "epoch": 1.9963503649635035, "grad_norm": 0.6247993111610413, "learning_rate": 3.0393385974633626e-06, "loss": 0.3673, "step": 7111 }, { "epoch": 1.9966311061201574, "grad_norm": 0.7043548822402954, "learning_rate": 3.0378361655264955e-06, "loss": 0.4281, "step": 7112 }, { "epoch": 1.9969118472768108, "grad_norm": 0.5801706910133362, "learning_rate": 3.036333942979552e-06, "loss": 0.3961, "step": 7113 }, { "epoch": 1.9971925884334643, "grad_norm": 0.5859178304672241, "learning_rate": 3.034831929982839e-06, "loss": 0.3809, "step": 7114 }, { "epoch": 1.997473329590118, "grad_norm": 0.7079702615737915, "learning_rate": 3.0333301266966415e-06, "loss": 0.3398, "step": 7115 }, { "epoch": 1.9977540707467716, "grad_norm": 0.5475205779075623, "learning_rate": 3.0318285332812225e-06, "loss": 0.3939, "step": 7116 }, { "epoch": 1.998034811903425, "grad_norm": 0.720130443572998, "learning_rate": 3.030327149896825e-06, "loss": 0.3752, "step": 7117 }, { "epoch": 1.9983155530600785, "grad_norm": 0.6578987836837769, "learning_rate": 3.0288259767036645e-06, "loss": 0.3815, "step": 7118 }, { "epoch": 1.9985962942167321, "grad_norm": 0.6133025884628296, "learning_rate": 3.0273250138619376e-06, "loss": 0.3697, "step": 7119 }, { "epoch": 1.9988770353733858, "grad_norm": 0.6277243494987488, "learning_rate": 3.025824261531818e-06, "loss": 0.3579, "step": 7120 }, { "epoch": 1.9991577765300392, "grad_norm": 0.6791841387748718, "learning_rate": 3.0243237198734567e-06, "loss": 0.4439, "step": 7121 }, { "epoch": 1.999438517686693, "grad_norm": 0.6244490146636963, "learning_rate": 3.0228233890469817e-06, "loss": 0.3806, "step": 7122 }, { "epoch": 1.9997192588433466, "grad_norm": 0.647245466709137, "learning_rate": 3.0213232692125005e-06, "loss": 0.3806, "step": 7123 }, { "epoch": 2.0, "grad_norm": 0.683252215385437, "learning_rate": 3.0198233605300947e-06, "loss": 0.3444, "step": 7124 }, { "epoch": 2.0002807411566534, "grad_norm": 0.5534584522247314, "learning_rate": 3.0183236631598257e-06, "loss": 0.3508, "step": 7125 }, { "epoch": 2.0005614823133073, "grad_norm": 0.5867440700531006, "learning_rate": 3.0168241772617323e-06, "loss": 0.2919, "step": 7126 }, { "epoch": 2.0008422234699608, "grad_norm": 0.5523701906204224, "learning_rate": 3.0153249029958296e-06, "loss": 0.3218, "step": 7127 }, { "epoch": 2.001122964626614, "grad_norm": 0.5687662363052368, "learning_rate": 3.013825840522112e-06, "loss": 0.3426, "step": 7128 }, { "epoch": 2.0014037057832677, "grad_norm": 0.5287777781486511, "learning_rate": 3.012326990000551e-06, "loss": 0.3301, "step": 7129 }, { "epoch": 2.0016844469399215, "grad_norm": 0.5678732991218567, "learning_rate": 3.0108283515910914e-06, "loss": 0.3635, "step": 7130 }, { "epoch": 2.001965188096575, "grad_norm": 0.5132319331169128, "learning_rate": 3.00932992545366e-06, "loss": 0.3438, "step": 7131 }, { "epoch": 2.0022459292532284, "grad_norm": 0.5629129409790039, "learning_rate": 3.0078317117481602e-06, "loss": 0.3202, "step": 7132 }, { "epoch": 2.0025266704098823, "grad_norm": 0.584525465965271, "learning_rate": 3.0063337106344713e-06, "loss": 0.3132, "step": 7133 }, { "epoch": 2.0028074115665357, "grad_norm": 0.5917782783508301, "learning_rate": 3.004835922272452e-06, "loss": 0.313, "step": 7134 }, { "epoch": 2.003088152723189, "grad_norm": 0.5950379967689514, "learning_rate": 3.003338346821936e-06, "loss": 0.3386, "step": 7135 }, { "epoch": 2.0033688938798426, "grad_norm": 0.6072313189506531, "learning_rate": 3.001840984442734e-06, "loss": 0.3626, "step": 7136 }, { "epoch": 2.0036496350364965, "grad_norm": 0.5548645257949829, "learning_rate": 3.0003438352946355e-06, "loss": 0.3279, "step": 7137 }, { "epoch": 2.00393037619315, "grad_norm": 0.6289433836936951, "learning_rate": 2.9988468995374093e-06, "loss": 0.3405, "step": 7138 }, { "epoch": 2.0042111173498034, "grad_norm": 0.5315924286842346, "learning_rate": 2.9973501773307984e-06, "loss": 0.3065, "step": 7139 }, { "epoch": 2.004491858506457, "grad_norm": 0.5503185987472534, "learning_rate": 2.995853668834522e-06, "loss": 0.3563, "step": 7140 }, { "epoch": 2.0047725996631107, "grad_norm": 0.5823440551757812, "learning_rate": 2.9943573742082793e-06, "loss": 0.3198, "step": 7141 }, { "epoch": 2.005053340819764, "grad_norm": 0.5085039734840393, "learning_rate": 2.9928612936117453e-06, "loss": 0.2783, "step": 7142 }, { "epoch": 2.0053340819764176, "grad_norm": 0.607925534248352, "learning_rate": 2.9913654272045723e-06, "loss": 0.3569, "step": 7143 }, { "epoch": 2.0056148231330715, "grad_norm": 0.5201929211616516, "learning_rate": 2.9898697751463903e-06, "loss": 0.348, "step": 7144 }, { "epoch": 2.005895564289725, "grad_norm": 0.6306862831115723, "learning_rate": 2.9883743375968067e-06, "loss": 0.3693, "step": 7145 }, { "epoch": 2.0061763054463784, "grad_norm": 0.5337955951690674, "learning_rate": 2.986879114715403e-06, "loss": 0.31, "step": 7146 }, { "epoch": 2.006457046603032, "grad_norm": 0.571149468421936, "learning_rate": 2.985384106661742e-06, "loss": 0.353, "step": 7147 }, { "epoch": 2.0067377877596857, "grad_norm": 0.6062653064727783, "learning_rate": 2.9838893135953604e-06, "loss": 0.3339, "step": 7148 }, { "epoch": 2.007018528916339, "grad_norm": 0.563480794429779, "learning_rate": 2.9823947356757744e-06, "loss": 0.3189, "step": 7149 }, { "epoch": 2.0072992700729926, "grad_norm": 0.5251970291137695, "learning_rate": 2.980900373062475e-06, "loss": 0.3331, "step": 7150 }, { "epoch": 2.0075800112296465, "grad_norm": 0.6197962164878845, "learning_rate": 2.979406225914933e-06, "loss": 0.3201, "step": 7151 }, { "epoch": 2.0078607523863, "grad_norm": 0.6260367631912231, "learning_rate": 2.9779122943925924e-06, "loss": 0.3471, "step": 7152 }, { "epoch": 2.0081414935429533, "grad_norm": 0.5870199799537659, "learning_rate": 2.976418578654877e-06, "loss": 0.324, "step": 7153 }, { "epoch": 2.008422234699607, "grad_norm": 0.6012134552001953, "learning_rate": 2.9749250788611884e-06, "loss": 0.3401, "step": 7154 }, { "epoch": 2.0087029758562607, "grad_norm": 0.5963485836982727, "learning_rate": 2.9734317951709008e-06, "loss": 0.3312, "step": 7155 }, { "epoch": 2.008983717012914, "grad_norm": 0.5649819374084473, "learning_rate": 2.97193872774337e-06, "loss": 0.2976, "step": 7156 }, { "epoch": 2.0092644581695676, "grad_norm": 0.5973899960517883, "learning_rate": 2.9704458767379274e-06, "loss": 0.3381, "step": 7157 }, { "epoch": 2.0095451993262214, "grad_norm": 0.6624057292938232, "learning_rate": 2.968953242313879e-06, "loss": 0.3136, "step": 7158 }, { "epoch": 2.009825940482875, "grad_norm": 0.5636825561523438, "learning_rate": 2.9674608246305103e-06, "loss": 0.3478, "step": 7159 }, { "epoch": 2.0101066816395283, "grad_norm": 0.6221297383308411, "learning_rate": 2.965968623847083e-06, "loss": 0.3079, "step": 7160 }, { "epoch": 2.0103874227961818, "grad_norm": 0.6307937502861023, "learning_rate": 2.964476640122835e-06, "loss": 0.287, "step": 7161 }, { "epoch": 2.0106681639528357, "grad_norm": 0.5652309656143188, "learning_rate": 2.9629848736169825e-06, "loss": 0.3627, "step": 7162 }, { "epoch": 2.010948905109489, "grad_norm": 0.6897119283676147, "learning_rate": 2.9614933244887154e-06, "loss": 0.3268, "step": 7163 }, { "epoch": 2.0112296462661425, "grad_norm": 0.6074008345603943, "learning_rate": 2.9600019928972057e-06, "loss": 0.3863, "step": 7164 }, { "epoch": 2.011510387422796, "grad_norm": 0.6044426560401917, "learning_rate": 2.958510879001597e-06, "loss": 0.3171, "step": 7165 }, { "epoch": 2.01179112857945, "grad_norm": 0.5722121596336365, "learning_rate": 2.9570199829610123e-06, "loss": 0.3659, "step": 7166 }, { "epoch": 2.0120718697361033, "grad_norm": 0.6430336236953735, "learning_rate": 2.955529304934551e-06, "loss": 0.2792, "step": 7167 }, { "epoch": 2.0123526108927567, "grad_norm": 0.5799283385276794, "learning_rate": 2.9540388450812874e-06, "loss": 0.3151, "step": 7168 }, { "epoch": 2.0126333520494106, "grad_norm": 0.6603776812553406, "learning_rate": 2.9525486035602758e-06, "loss": 0.3062, "step": 7169 }, { "epoch": 2.012914093206064, "grad_norm": 0.6019997596740723, "learning_rate": 2.9510585805305447e-06, "loss": 0.3136, "step": 7170 }, { "epoch": 2.0131948343627175, "grad_norm": 0.6055475473403931, "learning_rate": 2.949568776151101e-06, "loss": 0.3324, "step": 7171 }, { "epoch": 2.013475575519371, "grad_norm": 0.5041202902793884, "learning_rate": 2.948079190580927e-06, "loss": 0.2982, "step": 7172 }, { "epoch": 2.013756316676025, "grad_norm": 0.5663584470748901, "learning_rate": 2.9465898239789815e-06, "loss": 0.365, "step": 7173 }, { "epoch": 2.0140370578326783, "grad_norm": 0.6115890145301819, "learning_rate": 2.9451006765042e-06, "loss": 0.3233, "step": 7174 }, { "epoch": 2.0143177989893317, "grad_norm": 0.6371398568153381, "learning_rate": 2.943611748315496e-06, "loss": 0.3415, "step": 7175 }, { "epoch": 2.0145985401459856, "grad_norm": 0.5430166721343994, "learning_rate": 2.9421230395717582e-06, "loss": 0.3388, "step": 7176 }, { "epoch": 2.014879281302639, "grad_norm": 0.5949511528015137, "learning_rate": 2.940634550431852e-06, "loss": 0.3041, "step": 7177 }, { "epoch": 2.0151600224592925, "grad_norm": 0.5818724632263184, "learning_rate": 2.939146281054622e-06, "loss": 0.3297, "step": 7178 }, { "epoch": 2.015440763615946, "grad_norm": 0.549331784248352, "learning_rate": 2.9376582315988845e-06, "loss": 0.3182, "step": 7179 }, { "epoch": 2.0157215047726, "grad_norm": 0.5347083210945129, "learning_rate": 2.9361704022234354e-06, "loss": 0.3378, "step": 7180 }, { "epoch": 2.0160022459292533, "grad_norm": 0.5704168081283569, "learning_rate": 2.934682793087047e-06, "loss": 0.3576, "step": 7181 }, { "epoch": 2.0162829870859067, "grad_norm": 0.5149984359741211, "learning_rate": 2.9331954043484672e-06, "loss": 0.319, "step": 7182 }, { "epoch": 2.01656372824256, "grad_norm": 0.568211019039154, "learning_rate": 2.9317082361664213e-06, "loss": 0.3445, "step": 7183 }, { "epoch": 2.016844469399214, "grad_norm": 0.5676230788230896, "learning_rate": 2.9302212886996107e-06, "loss": 0.3568, "step": 7184 }, { "epoch": 2.0171252105558675, "grad_norm": 0.5790365934371948, "learning_rate": 2.928734562106714e-06, "loss": 0.3476, "step": 7185 }, { "epoch": 2.017405951712521, "grad_norm": 0.6276818513870239, "learning_rate": 2.9272480565463836e-06, "loss": 0.3584, "step": 7186 }, { "epoch": 2.017686692869175, "grad_norm": 0.6061694622039795, "learning_rate": 2.9257617721772508e-06, "loss": 0.2938, "step": 7187 }, { "epoch": 2.0179674340258282, "grad_norm": 0.5798271298408508, "learning_rate": 2.924275709157922e-06, "loss": 0.3567, "step": 7188 }, { "epoch": 2.0182481751824817, "grad_norm": 0.5810397863388062, "learning_rate": 2.9227898676469824e-06, "loss": 0.3068, "step": 7189 }, { "epoch": 2.018528916339135, "grad_norm": 0.6163998246192932, "learning_rate": 2.9213042478029908e-06, "loss": 0.2707, "step": 7190 }, { "epoch": 2.018809657495789, "grad_norm": 0.5872731804847717, "learning_rate": 2.919818849784483e-06, "loss": 0.3263, "step": 7191 }, { "epoch": 2.0190903986524424, "grad_norm": 0.5344861149787903, "learning_rate": 2.9183336737499733e-06, "loss": 0.3386, "step": 7192 }, { "epoch": 2.019371139809096, "grad_norm": 0.6087139844894409, "learning_rate": 2.9168487198579465e-06, "loss": 0.321, "step": 7193 }, { "epoch": 2.0196518809657498, "grad_norm": 0.6097393035888672, "learning_rate": 2.915363988266873e-06, "loss": 0.3357, "step": 7194 }, { "epoch": 2.019932622122403, "grad_norm": 0.5947750210762024, "learning_rate": 2.9138794791351877e-06, "loss": 0.2989, "step": 7195 }, { "epoch": 2.0202133632790567, "grad_norm": 0.576885998249054, "learning_rate": 2.9123951926213145e-06, "loss": 0.3854, "step": 7196 }, { "epoch": 2.02049410443571, "grad_norm": 0.5861470699310303, "learning_rate": 2.9109111288836443e-06, "loss": 0.3753, "step": 7197 }, { "epoch": 2.020774845592364, "grad_norm": 0.5560078024864197, "learning_rate": 2.909427288080545e-06, "loss": 0.3268, "step": 7198 }, { "epoch": 2.0210555867490174, "grad_norm": 0.596595048904419, "learning_rate": 2.9079436703703676e-06, "loss": 0.3612, "step": 7199 }, { "epoch": 2.021336327905671, "grad_norm": 0.5585770606994629, "learning_rate": 2.9064602759114295e-06, "loss": 0.3158, "step": 7200 }, { "epoch": 2.0216170690623247, "grad_norm": 0.5293141007423401, "learning_rate": 2.9049771048620344e-06, "loss": 0.3535, "step": 7201 }, { "epoch": 2.021897810218978, "grad_norm": 0.6145786046981812, "learning_rate": 2.903494157380452e-06, "loss": 0.3221, "step": 7202 }, { "epoch": 2.0221785513756316, "grad_norm": 0.5820688605308533, "learning_rate": 2.902011433624938e-06, "loss": 0.3577, "step": 7203 }, { "epoch": 2.022459292532285, "grad_norm": 0.5819162130355835, "learning_rate": 2.900528933753718e-06, "loss": 0.2765, "step": 7204 }, { "epoch": 2.022740033688939, "grad_norm": 0.567573606967926, "learning_rate": 2.899046657924992e-06, "loss": 0.2921, "step": 7205 }, { "epoch": 2.0230207748455924, "grad_norm": 0.5665693283081055, "learning_rate": 2.8975646062969432e-06, "loss": 0.3204, "step": 7206 }, { "epoch": 2.023301516002246, "grad_norm": 0.5993481874465942, "learning_rate": 2.8960827790277234e-06, "loss": 0.3456, "step": 7207 }, { "epoch": 2.0235822571588993, "grad_norm": 0.5737941861152649, "learning_rate": 2.894601176275469e-06, "loss": 0.3182, "step": 7208 }, { "epoch": 2.023862998315553, "grad_norm": 0.531103253364563, "learning_rate": 2.893119798198284e-06, "loss": 0.3492, "step": 7209 }, { "epoch": 2.0241437394722066, "grad_norm": 0.5729021430015564, "learning_rate": 2.89163864495425e-06, "loss": 0.3027, "step": 7210 }, { "epoch": 2.02442448062886, "grad_norm": 0.5614590048789978, "learning_rate": 2.8901577167014303e-06, "loss": 0.3185, "step": 7211 }, { "epoch": 2.024705221785514, "grad_norm": 0.5724241137504578, "learning_rate": 2.8886770135978582e-06, "loss": 0.3081, "step": 7212 }, { "epoch": 2.0249859629421674, "grad_norm": 0.489589661359787, "learning_rate": 2.8871965358015467e-06, "loss": 0.3074, "step": 7213 }, { "epoch": 2.025266704098821, "grad_norm": 0.499676913022995, "learning_rate": 2.885716283470481e-06, "loss": 0.298, "step": 7214 }, { "epoch": 2.0255474452554743, "grad_norm": 0.6236593723297119, "learning_rate": 2.884236256762625e-06, "loss": 0.3566, "step": 7215 }, { "epoch": 2.025828186412128, "grad_norm": 0.57680743932724, "learning_rate": 2.882756455835921e-06, "loss": 0.3436, "step": 7216 }, { "epoch": 2.0261089275687816, "grad_norm": 0.5888566374778748, "learning_rate": 2.881276880848279e-06, "loss": 0.3102, "step": 7217 }, { "epoch": 2.026389668725435, "grad_norm": 0.6607045531272888, "learning_rate": 2.879797531957596e-06, "loss": 0.2672, "step": 7218 }, { "epoch": 2.026670409882089, "grad_norm": 0.5833709836006165, "learning_rate": 2.8783184093217355e-06, "loss": 0.3353, "step": 7219 }, { "epoch": 2.0269511510387423, "grad_norm": 0.5592872500419617, "learning_rate": 2.8768395130985377e-06, "loss": 0.3496, "step": 7220 }, { "epoch": 2.027231892195396, "grad_norm": 0.5389991402626038, "learning_rate": 2.875360843445827e-06, "loss": 0.3629, "step": 7221 }, { "epoch": 2.0275126333520492, "grad_norm": 0.5927066206932068, "learning_rate": 2.873882400521392e-06, "loss": 0.3587, "step": 7222 }, { "epoch": 2.027793374508703, "grad_norm": 0.5337790250778198, "learning_rate": 2.8724041844830076e-06, "loss": 0.3152, "step": 7223 }, { "epoch": 2.0280741156653566, "grad_norm": 0.5419126152992249, "learning_rate": 2.870926195488417e-06, "loss": 0.4049, "step": 7224 }, { "epoch": 2.02835485682201, "grad_norm": 0.5905657410621643, "learning_rate": 2.8694484336953444e-06, "loss": 0.3667, "step": 7225 }, { "epoch": 2.028635597978664, "grad_norm": 0.5812539458274841, "learning_rate": 2.8679708992614857e-06, "loss": 0.329, "step": 7226 }, { "epoch": 2.0289163391353173, "grad_norm": 0.558904767036438, "learning_rate": 2.8664935923445125e-06, "loss": 0.3185, "step": 7227 }, { "epoch": 2.0291970802919708, "grad_norm": 0.5703031420707703, "learning_rate": 2.865016513102078e-06, "loss": 0.3579, "step": 7228 }, { "epoch": 2.029477821448624, "grad_norm": 0.5999102592468262, "learning_rate": 2.8635396616918027e-06, "loss": 0.3208, "step": 7229 }, { "epoch": 2.029758562605278, "grad_norm": 0.6106962561607361, "learning_rate": 2.8620630382712903e-06, "loss": 0.3185, "step": 7230 }, { "epoch": 2.0300393037619315, "grad_norm": 0.6072873473167419, "learning_rate": 2.8605866429981167e-06, "loss": 0.3138, "step": 7231 }, { "epoch": 2.030320044918585, "grad_norm": 0.5712512731552124, "learning_rate": 2.8591104760298293e-06, "loss": 0.3497, "step": 7232 }, { "epoch": 2.0306007860752384, "grad_norm": 0.5625197887420654, "learning_rate": 2.8576345375239612e-06, "loss": 0.3324, "step": 7233 }, { "epoch": 2.0308815272318923, "grad_norm": 0.5673905611038208, "learning_rate": 2.8561588276380103e-06, "loss": 0.3135, "step": 7234 }, { "epoch": 2.0311622683885457, "grad_norm": 0.5926187038421631, "learning_rate": 2.85468334652946e-06, "loss": 0.3175, "step": 7235 }, { "epoch": 2.031443009545199, "grad_norm": 0.638444721698761, "learning_rate": 2.8532080943557593e-06, "loss": 0.3031, "step": 7236 }, { "epoch": 2.031723750701853, "grad_norm": 0.607799232006073, "learning_rate": 2.851733071274344e-06, "loss": 0.3747, "step": 7237 }, { "epoch": 2.0320044918585065, "grad_norm": 0.6229817867279053, "learning_rate": 2.8502582774426156e-06, "loss": 0.3466, "step": 7238 }, { "epoch": 2.03228523301516, "grad_norm": 0.5465052723884583, "learning_rate": 2.8487837130179514e-06, "loss": 0.3511, "step": 7239 }, { "epoch": 2.0325659741718134, "grad_norm": 0.519872784614563, "learning_rate": 2.8473093781577156e-06, "loss": 0.3977, "step": 7240 }, { "epoch": 2.0328467153284673, "grad_norm": 0.5261691212654114, "learning_rate": 2.845835273019237e-06, "loss": 0.3656, "step": 7241 }, { "epoch": 2.0331274564851207, "grad_norm": 0.5705521702766418, "learning_rate": 2.8443613977598193e-06, "loss": 0.3554, "step": 7242 }, { "epoch": 2.033408197641774, "grad_norm": 0.6061201095581055, "learning_rate": 2.842887752536751e-06, "loss": 0.2866, "step": 7243 }, { "epoch": 2.033688938798428, "grad_norm": 0.5863782167434692, "learning_rate": 2.841414337507285e-06, "loss": 0.3359, "step": 7244 }, { "epoch": 2.0339696799550815, "grad_norm": 0.5974136590957642, "learning_rate": 2.83994115282866e-06, "loss": 0.3359, "step": 7245 }, { "epoch": 2.034250421111735, "grad_norm": 0.6383056640625, "learning_rate": 2.838468198658082e-06, "loss": 0.3143, "step": 7246 }, { "epoch": 2.0345311622683884, "grad_norm": 0.6040796637535095, "learning_rate": 2.836995475152735e-06, "loss": 0.3502, "step": 7247 }, { "epoch": 2.0348119034250423, "grad_norm": 0.570811927318573, "learning_rate": 2.8355229824697818e-06, "loss": 0.3429, "step": 7248 }, { "epoch": 2.0350926445816957, "grad_norm": 0.5666947364807129, "learning_rate": 2.834050720766353e-06, "loss": 0.3187, "step": 7249 }, { "epoch": 2.035373385738349, "grad_norm": 0.5729508996009827, "learning_rate": 2.832578690199565e-06, "loss": 0.3523, "step": 7250 }, { "epoch": 2.035654126895003, "grad_norm": 0.5670958161354065, "learning_rate": 2.8311068909264987e-06, "loss": 0.321, "step": 7251 }, { "epoch": 2.0359348680516565, "grad_norm": 1.3062509298324585, "learning_rate": 2.8296353231042197e-06, "loss": 0.3171, "step": 7252 }, { "epoch": 2.03621560920831, "grad_norm": 0.6545298099517822, "learning_rate": 2.8281639868897627e-06, "loss": 0.3205, "step": 7253 }, { "epoch": 2.0364963503649633, "grad_norm": 0.6367155909538269, "learning_rate": 2.8266928824401363e-06, "loss": 0.358, "step": 7254 }, { "epoch": 2.0367770915216172, "grad_norm": 0.5915209054946899, "learning_rate": 2.825222009912333e-06, "loss": 0.3099, "step": 7255 }, { "epoch": 2.0370578326782707, "grad_norm": 0.6656043529510498, "learning_rate": 2.823751369463311e-06, "loss": 0.2923, "step": 7256 }, { "epoch": 2.037338573834924, "grad_norm": 0.5338889956474304, "learning_rate": 2.8222809612500114e-06, "loss": 0.3487, "step": 7257 }, { "epoch": 2.0376193149915776, "grad_norm": 0.5747395157814026, "learning_rate": 2.8208107854293455e-06, "loss": 0.3254, "step": 7258 }, { "epoch": 2.0379000561482314, "grad_norm": 0.556736409664154, "learning_rate": 2.819340842158199e-06, "loss": 0.3191, "step": 7259 }, { "epoch": 2.038180797304885, "grad_norm": 0.6049169301986694, "learning_rate": 2.8178711315934395e-06, "loss": 0.3305, "step": 7260 }, { "epoch": 2.0384615384615383, "grad_norm": 0.6023212671279907, "learning_rate": 2.8164016538919005e-06, "loss": 0.3385, "step": 7261 }, { "epoch": 2.038742279618192, "grad_norm": 0.6171841025352478, "learning_rate": 2.8149324092104e-06, "loss": 0.3173, "step": 7262 }, { "epoch": 2.0390230207748457, "grad_norm": 0.6021910905838013, "learning_rate": 2.8134633977057236e-06, "loss": 0.3476, "step": 7263 }, { "epoch": 2.039303761931499, "grad_norm": 0.5984469056129456, "learning_rate": 2.8119946195346375e-06, "loss": 0.3288, "step": 7264 }, { "epoch": 2.0395845030881525, "grad_norm": 0.525161862373352, "learning_rate": 2.8105260748538778e-06, "loss": 0.3625, "step": 7265 }, { "epoch": 2.0398652442448064, "grad_norm": 0.613210916519165, "learning_rate": 2.80905776382016e-06, "loss": 0.3574, "step": 7266 }, { "epoch": 2.04014598540146, "grad_norm": 0.5665947794914246, "learning_rate": 2.807589686590174e-06, "loss": 0.3543, "step": 7267 }, { "epoch": 2.0404267265581133, "grad_norm": 0.5441635847091675, "learning_rate": 2.806121843320584e-06, "loss": 0.3469, "step": 7268 }, { "epoch": 2.040707467714767, "grad_norm": 0.6160149574279785, "learning_rate": 2.804654234168026e-06, "loss": 0.3073, "step": 7269 }, { "epoch": 2.0409882088714206, "grad_norm": 0.5180505514144897, "learning_rate": 2.8031868592891177e-06, "loss": 0.3089, "step": 7270 }, { "epoch": 2.041268950028074, "grad_norm": 0.5670261979103088, "learning_rate": 2.801719718840445e-06, "loss": 0.3112, "step": 7271 }, { "epoch": 2.0415496911847275, "grad_norm": 0.5812737941741943, "learning_rate": 2.8002528129785755e-06, "loss": 0.3412, "step": 7272 }, { "epoch": 2.0418304323413814, "grad_norm": 0.6227341890335083, "learning_rate": 2.798786141860045e-06, "loss": 0.2957, "step": 7273 }, { "epoch": 2.042111173498035, "grad_norm": 0.6089826226234436, "learning_rate": 2.7973197056413705e-06, "loss": 0.3218, "step": 7274 }, { "epoch": 2.0423919146546883, "grad_norm": 0.5416876077651978, "learning_rate": 2.795853504479039e-06, "loss": 0.3312, "step": 7275 }, { "epoch": 2.0426726558113417, "grad_norm": 0.5602169036865234, "learning_rate": 2.794387538529514e-06, "loss": 0.311, "step": 7276 }, { "epoch": 2.0429533969679956, "grad_norm": 0.5964089632034302, "learning_rate": 2.792921807949236e-06, "loss": 0.2815, "step": 7277 }, { "epoch": 2.043234138124649, "grad_norm": 0.5287725925445557, "learning_rate": 2.7914563128946165e-06, "loss": 0.3178, "step": 7278 }, { "epoch": 2.0435148792813025, "grad_norm": 0.5968223214149475, "learning_rate": 2.7899910535220463e-06, "loss": 0.3444, "step": 7279 }, { "epoch": 2.0437956204379564, "grad_norm": 0.5191061496734619, "learning_rate": 2.788526029987889e-06, "loss": 0.3306, "step": 7280 }, { "epoch": 2.04407636159461, "grad_norm": 0.6092125773429871, "learning_rate": 2.7870612424484787e-06, "loss": 0.3279, "step": 7281 }, { "epoch": 2.0443571027512633, "grad_norm": 0.6415168642997742, "learning_rate": 2.785596691060134e-06, "loss": 0.312, "step": 7282 }, { "epoch": 2.0446378439079167, "grad_norm": 0.5579442381858826, "learning_rate": 2.784132375979137e-06, "loss": 0.3075, "step": 7283 }, { "epoch": 2.0449185850645706, "grad_norm": 0.5816577076911926, "learning_rate": 2.7826682973617556e-06, "loss": 0.3102, "step": 7284 }, { "epoch": 2.045199326221224, "grad_norm": 0.502070963382721, "learning_rate": 2.7812044553642232e-06, "loss": 0.3539, "step": 7285 }, { "epoch": 2.0454800673778775, "grad_norm": 0.5898591876029968, "learning_rate": 2.7797408501427547e-06, "loss": 0.3461, "step": 7286 }, { "epoch": 2.0457608085345313, "grad_norm": 0.600614070892334, "learning_rate": 2.778277481853537e-06, "loss": 0.3603, "step": 7287 }, { "epoch": 2.046041549691185, "grad_norm": 0.5811378955841064, "learning_rate": 2.776814350652728e-06, "loss": 0.3374, "step": 7288 }, { "epoch": 2.0463222908478382, "grad_norm": 0.5325272679328918, "learning_rate": 2.775351456696468e-06, "loss": 0.3436, "step": 7289 }, { "epoch": 2.0466030320044917, "grad_norm": 0.5729573369026184, "learning_rate": 2.773888800140865e-06, "loss": 0.3748, "step": 7290 }, { "epoch": 2.0468837731611456, "grad_norm": 0.5439331531524658, "learning_rate": 2.772426381142005e-06, "loss": 0.3477, "step": 7291 }, { "epoch": 2.047164514317799, "grad_norm": 0.5973528623580933, "learning_rate": 2.7709641998559523e-06, "loss": 0.3341, "step": 7292 }, { "epoch": 2.0474452554744524, "grad_norm": 0.5766603350639343, "learning_rate": 2.769502256438736e-06, "loss": 0.349, "step": 7293 }, { "epoch": 2.0477259966311063, "grad_norm": 0.5928993821144104, "learning_rate": 2.7680405510463702e-06, "loss": 0.3063, "step": 7294 }, { "epoch": 2.0480067377877598, "grad_norm": 0.5455955266952515, "learning_rate": 2.7665790838348377e-06, "loss": 0.3539, "step": 7295 }, { "epoch": 2.048287478944413, "grad_norm": 0.5615442991256714, "learning_rate": 2.7651178549600942e-06, "loss": 0.3393, "step": 7296 }, { "epoch": 2.0485682201010667, "grad_norm": 0.5350291132926941, "learning_rate": 2.763656864578078e-06, "loss": 0.2635, "step": 7297 }, { "epoch": 2.0488489612577205, "grad_norm": 0.6523261070251465, "learning_rate": 2.762196112844692e-06, "loss": 0.3197, "step": 7298 }, { "epoch": 2.049129702414374, "grad_norm": 0.612379252910614, "learning_rate": 2.760735599915823e-06, "loss": 0.356, "step": 7299 }, { "epoch": 2.0494104435710274, "grad_norm": 0.5997228622436523, "learning_rate": 2.759275325947324e-06, "loss": 0.3303, "step": 7300 }, { "epoch": 2.0496911847276813, "grad_norm": 0.5357117652893066, "learning_rate": 2.7578152910950297e-06, "loss": 0.3508, "step": 7301 }, { "epoch": 2.0499719258843347, "grad_norm": 0.5867998600006104, "learning_rate": 2.7563554955147436e-06, "loss": 0.3299, "step": 7302 }, { "epoch": 2.050252667040988, "grad_norm": 0.6556015610694885, "learning_rate": 2.7548959393622454e-06, "loss": 0.3312, "step": 7303 }, { "epoch": 2.0505334081976416, "grad_norm": 0.5935055613517761, "learning_rate": 2.7534366227932928e-06, "loss": 0.3327, "step": 7304 }, { "epoch": 2.0508141493542955, "grad_norm": 0.5554129481315613, "learning_rate": 2.751977545963612e-06, "loss": 0.3316, "step": 7305 }, { "epoch": 2.051094890510949, "grad_norm": 0.5492838621139526, "learning_rate": 2.75051870902891e-06, "loss": 0.3548, "step": 7306 }, { "epoch": 2.0513756316676024, "grad_norm": 0.58622807264328, "learning_rate": 2.7490601121448602e-06, "loss": 0.3336, "step": 7307 }, { "epoch": 2.051656372824256, "grad_norm": 0.5542654991149902, "learning_rate": 2.74760175546712e-06, "loss": 0.3161, "step": 7308 }, { "epoch": 2.0519371139809097, "grad_norm": 0.5774078369140625, "learning_rate": 2.746143639151313e-06, "loss": 0.3421, "step": 7309 }, { "epoch": 2.052217855137563, "grad_norm": 0.6042695045471191, "learning_rate": 2.74468576335304e-06, "loss": 0.3662, "step": 7310 }, { "epoch": 2.0524985962942166, "grad_norm": 0.5622040629386902, "learning_rate": 2.7432281282278788e-06, "loss": 0.3463, "step": 7311 }, { "epoch": 2.0527793374508705, "grad_norm": 0.5341228246688843, "learning_rate": 2.741770733931376e-06, "loss": 0.3207, "step": 7312 }, { "epoch": 2.053060078607524, "grad_norm": 0.5676590204238892, "learning_rate": 2.7403135806190595e-06, "loss": 0.3088, "step": 7313 }, { "epoch": 2.0533408197641774, "grad_norm": 0.4851858913898468, "learning_rate": 2.738856668446426e-06, "loss": 0.3173, "step": 7314 }, { "epoch": 2.053621560920831, "grad_norm": 0.5754046440124512, "learning_rate": 2.737399997568943e-06, "loss": 0.3401, "step": 7315 }, { "epoch": 2.0539023020774847, "grad_norm": 0.5998167991638184, "learning_rate": 2.7359435681420665e-06, "loss": 0.3409, "step": 7316 }, { "epoch": 2.054183043234138, "grad_norm": 0.6578349471092224, "learning_rate": 2.734487380321213e-06, "loss": 0.3205, "step": 7317 }, { "epoch": 2.0544637843907916, "grad_norm": 0.5938233137130737, "learning_rate": 2.7330314342617758e-06, "loss": 0.3401, "step": 7318 }, { "epoch": 2.0547445255474455, "grad_norm": 0.5376281142234802, "learning_rate": 2.7315757301191293e-06, "loss": 0.3411, "step": 7319 }, { "epoch": 2.055025266704099, "grad_norm": 0.5516668558120728, "learning_rate": 2.730120268048612e-06, "loss": 0.2793, "step": 7320 }, { "epoch": 2.0553060078607523, "grad_norm": 0.568037748336792, "learning_rate": 2.728665048205546e-06, "loss": 0.3125, "step": 7321 }, { "epoch": 2.055586749017406, "grad_norm": 0.592534601688385, "learning_rate": 2.72721007074522e-06, "loss": 0.3567, "step": 7322 }, { "epoch": 2.0558674901740597, "grad_norm": 0.5778653621673584, "learning_rate": 2.725755335822903e-06, "loss": 0.341, "step": 7323 }, { "epoch": 2.056148231330713, "grad_norm": 0.6206321716308594, "learning_rate": 2.7243008435938346e-06, "loss": 0.333, "step": 7324 }, { "epoch": 2.0564289724873666, "grad_norm": 0.584697425365448, "learning_rate": 2.7228465942132264e-06, "loss": 0.3366, "step": 7325 }, { "epoch": 2.05670971364402, "grad_norm": 0.527962327003479, "learning_rate": 2.721392587836271e-06, "loss": 0.3553, "step": 7326 }, { "epoch": 2.056990454800674, "grad_norm": 0.5727418065071106, "learning_rate": 2.7199388246181268e-06, "loss": 0.3608, "step": 7327 }, { "epoch": 2.0572711959573273, "grad_norm": 0.604831874370575, "learning_rate": 2.718485304713935e-06, "loss": 0.3145, "step": 7328 }, { "epoch": 2.0575519371139808, "grad_norm": 0.5907540917396545, "learning_rate": 2.717032028278803e-06, "loss": 0.351, "step": 7329 }, { "epoch": 2.0578326782706347, "grad_norm": 0.6221470236778259, "learning_rate": 2.7155789954678146e-06, "loss": 0.3245, "step": 7330 }, { "epoch": 2.058113419427288, "grad_norm": 0.5572207570075989, "learning_rate": 2.7141262064360318e-06, "loss": 0.3396, "step": 7331 }, { "epoch": 2.0583941605839415, "grad_norm": 0.6000232100486755, "learning_rate": 2.7126736613384828e-06, "loss": 0.3458, "step": 7332 }, { "epoch": 2.058674901740595, "grad_norm": 0.5176807641983032, "learning_rate": 2.7112213603301798e-06, "loss": 0.3384, "step": 7333 }, { "epoch": 2.058955642897249, "grad_norm": 0.5626146793365479, "learning_rate": 2.709769303566097e-06, "loss": 0.3004, "step": 7334 }, { "epoch": 2.0592363840539023, "grad_norm": 0.589306652545929, "learning_rate": 2.708317491201195e-06, "loss": 0.3194, "step": 7335 }, { "epoch": 2.0595171252105557, "grad_norm": 0.5636805295944214, "learning_rate": 2.706865923390399e-06, "loss": 0.3217, "step": 7336 }, { "epoch": 2.0597978663672096, "grad_norm": 0.6115208864212036, "learning_rate": 2.7054146002886095e-06, "loss": 0.3137, "step": 7337 }, { "epoch": 2.060078607523863, "grad_norm": 0.6049689650535583, "learning_rate": 2.7039635220507067e-06, "loss": 0.3365, "step": 7338 }, { "epoch": 2.0603593486805165, "grad_norm": 0.6705333590507507, "learning_rate": 2.702512688831537e-06, "loss": 0.3068, "step": 7339 }, { "epoch": 2.06064008983717, "grad_norm": 0.5652129650115967, "learning_rate": 2.7010621007859284e-06, "loss": 0.2963, "step": 7340 }, { "epoch": 2.060920830993824, "grad_norm": 0.5260769128799438, "learning_rate": 2.6996117580686732e-06, "loss": 0.3383, "step": 7341 }, { "epoch": 2.0612015721504773, "grad_norm": 0.6173593997955322, "learning_rate": 2.698161660834547e-06, "loss": 0.3166, "step": 7342 }, { "epoch": 2.0614823133071307, "grad_norm": 0.5808701515197754, "learning_rate": 2.696711809238296e-06, "loss": 0.3529, "step": 7343 }, { "epoch": 2.0617630544637846, "grad_norm": 0.6364161968231201, "learning_rate": 2.6952622034346347e-06, "loss": 0.3702, "step": 7344 }, { "epoch": 2.062043795620438, "grad_norm": 0.6393119692802429, "learning_rate": 2.693812843578262e-06, "loss": 0.3322, "step": 7345 }, { "epoch": 2.0623245367770915, "grad_norm": 0.5216532945632935, "learning_rate": 2.69236372982384e-06, "loss": 0.3337, "step": 7346 }, { "epoch": 2.062605277933745, "grad_norm": 0.6000442504882812, "learning_rate": 2.6909148623260097e-06, "loss": 0.3581, "step": 7347 }, { "epoch": 2.062886019090399, "grad_norm": 0.6018159985542297, "learning_rate": 2.689466241239388e-06, "loss": 0.3084, "step": 7348 }, { "epoch": 2.0631667602470523, "grad_norm": 0.6169343590736389, "learning_rate": 2.688017866718558e-06, "loss": 0.3617, "step": 7349 }, { "epoch": 2.0634475014037057, "grad_norm": 0.5640069842338562, "learning_rate": 2.6865697389180867e-06, "loss": 0.3288, "step": 7350 }, { "epoch": 2.063728242560359, "grad_norm": 0.553663432598114, "learning_rate": 2.6851218579925065e-06, "loss": 0.3262, "step": 7351 }, { "epoch": 2.064008983717013, "grad_norm": 0.5483970642089844, "learning_rate": 2.6836742240963237e-06, "loss": 0.334, "step": 7352 }, { "epoch": 2.0642897248736665, "grad_norm": 0.5885378122329712, "learning_rate": 2.682226837384026e-06, "loss": 0.3375, "step": 7353 }, { "epoch": 2.06457046603032, "grad_norm": 0.5467673540115356, "learning_rate": 2.680779698010065e-06, "loss": 0.3582, "step": 7354 }, { "epoch": 2.064851207186974, "grad_norm": 0.5636199712753296, "learning_rate": 2.6793328061288736e-06, "loss": 0.3143, "step": 7355 }, { "epoch": 2.0651319483436272, "grad_norm": 0.6640552282333374, "learning_rate": 2.6778861618948525e-06, "loss": 0.32, "step": 7356 }, { "epoch": 2.0654126895002807, "grad_norm": 0.5755638480186462, "learning_rate": 2.6764397654623818e-06, "loss": 0.2938, "step": 7357 }, { "epoch": 2.065693430656934, "grad_norm": 0.5817362070083618, "learning_rate": 2.6749936169858103e-06, "loss": 0.3094, "step": 7358 }, { "epoch": 2.065974171813588, "grad_norm": 0.5755941867828369, "learning_rate": 2.6735477166194595e-06, "loss": 0.3351, "step": 7359 }, { "epoch": 2.0662549129702414, "grad_norm": 0.5803687572479248, "learning_rate": 2.672102064517631e-06, "loss": 0.3208, "step": 7360 }, { "epoch": 2.066535654126895, "grad_norm": 0.5887718796730042, "learning_rate": 2.6706566608345917e-06, "loss": 0.3207, "step": 7361 }, { "epoch": 2.0668163952835488, "grad_norm": 0.5406284332275391, "learning_rate": 2.6692115057245917e-06, "loss": 0.3071, "step": 7362 }, { "epoch": 2.067097136440202, "grad_norm": 0.5603659152984619, "learning_rate": 2.6677665993418445e-06, "loss": 0.3554, "step": 7363 }, { "epoch": 2.0673778775968557, "grad_norm": 0.588245689868927, "learning_rate": 2.6663219418405405e-06, "loss": 0.3466, "step": 7364 }, { "epoch": 2.067658618753509, "grad_norm": 0.5675176978111267, "learning_rate": 2.6648775333748487e-06, "loss": 0.3281, "step": 7365 }, { "epoch": 2.067939359910163, "grad_norm": 0.5302689075469971, "learning_rate": 2.6634333740989037e-06, "loss": 0.3154, "step": 7366 }, { "epoch": 2.0682201010668164, "grad_norm": 0.6093424558639526, "learning_rate": 2.661989464166819e-06, "loss": 0.2794, "step": 7367 }, { "epoch": 2.06850084222347, "grad_norm": 0.5445008277893066, "learning_rate": 2.6605458037326814e-06, "loss": 0.3224, "step": 7368 }, { "epoch": 2.0687815833801233, "grad_norm": 0.62278151512146, "learning_rate": 2.6591023929505453e-06, "loss": 0.3364, "step": 7369 }, { "epoch": 2.069062324536777, "grad_norm": 0.5738529562950134, "learning_rate": 2.6576592319744466e-06, "loss": 0.3199, "step": 7370 }, { "epoch": 2.0693430656934306, "grad_norm": 0.518867015838623, "learning_rate": 2.656216320958387e-06, "loss": 0.2954, "step": 7371 }, { "epoch": 2.069623806850084, "grad_norm": 0.5508706569671631, "learning_rate": 2.6547736600563486e-06, "loss": 0.3511, "step": 7372 }, { "epoch": 2.069904548006738, "grad_norm": 0.5410498380661011, "learning_rate": 2.653331249422281e-06, "loss": 0.3068, "step": 7373 }, { "epoch": 2.0701852891633914, "grad_norm": 0.6112616658210754, "learning_rate": 2.6518890892101075e-06, "loss": 0.3378, "step": 7374 }, { "epoch": 2.070466030320045, "grad_norm": 0.5861853957176208, "learning_rate": 2.6504471795737308e-06, "loss": 0.2802, "step": 7375 }, { "epoch": 2.0707467714766983, "grad_norm": 0.5634087324142456, "learning_rate": 2.6490055206670174e-06, "loss": 0.3042, "step": 7376 }, { "epoch": 2.071027512633352, "grad_norm": 0.5576903223991394, "learning_rate": 2.647564112643818e-06, "loss": 0.3534, "step": 7377 }, { "epoch": 2.0713082537900056, "grad_norm": 0.6198608875274658, "learning_rate": 2.646122955657947e-06, "loss": 0.292, "step": 7378 }, { "epoch": 2.071588994946659, "grad_norm": 0.521613359451294, "learning_rate": 2.644682049863194e-06, "loss": 0.3613, "step": 7379 }, { "epoch": 2.071869736103313, "grad_norm": 0.5087357759475708, "learning_rate": 2.6432413954133287e-06, "loss": 0.3411, "step": 7380 }, { "epoch": 2.0721504772599664, "grad_norm": 0.5704274773597717, "learning_rate": 2.6418009924620836e-06, "loss": 0.3732, "step": 7381 }, { "epoch": 2.07243121841662, "grad_norm": 0.5092198848724365, "learning_rate": 2.6403608411631744e-06, "loss": 0.3532, "step": 7382 }, { "epoch": 2.0727119595732733, "grad_norm": 0.5866182446479797, "learning_rate": 2.63892094167028e-06, "loss": 0.315, "step": 7383 }, { "epoch": 2.072992700729927, "grad_norm": 0.518237829208374, "learning_rate": 2.637481294137062e-06, "loss": 0.309, "step": 7384 }, { "epoch": 2.0732734418865806, "grad_norm": 0.56381756067276, "learning_rate": 2.6360418987171493e-06, "loss": 0.3084, "step": 7385 }, { "epoch": 2.073554183043234, "grad_norm": 0.608199954032898, "learning_rate": 2.6346027555641422e-06, "loss": 0.321, "step": 7386 }, { "epoch": 2.073834924199888, "grad_norm": 0.5592595338821411, "learning_rate": 2.6331638648316223e-06, "loss": 0.3583, "step": 7387 }, { "epoch": 2.0741156653565413, "grad_norm": 0.6111178994178772, "learning_rate": 2.6317252266731337e-06, "loss": 0.3382, "step": 7388 }, { "epoch": 2.074396406513195, "grad_norm": 0.5903767347335815, "learning_rate": 2.630286841242203e-06, "loss": 0.3195, "step": 7389 }, { "epoch": 2.0746771476698482, "grad_norm": 0.5476042628288269, "learning_rate": 2.628848708692326e-06, "loss": 0.3326, "step": 7390 }, { "epoch": 2.074957888826502, "grad_norm": 0.6097612977027893, "learning_rate": 2.627410829176966e-06, "loss": 0.3061, "step": 7391 }, { "epoch": 2.0752386299831556, "grad_norm": 0.5774500370025635, "learning_rate": 2.6259732028495693e-06, "loss": 0.3361, "step": 7392 }, { "epoch": 2.075519371139809, "grad_norm": 0.5278791189193726, "learning_rate": 2.624535829863549e-06, "loss": 0.3266, "step": 7393 }, { "epoch": 2.075800112296463, "grad_norm": 0.6039384007453918, "learning_rate": 2.623098710372295e-06, "loss": 0.3182, "step": 7394 }, { "epoch": 2.0760808534531163, "grad_norm": 0.6215166449546814, "learning_rate": 2.621661844529165e-06, "loss": 0.2946, "step": 7395 }, { "epoch": 2.0763615946097698, "grad_norm": 0.5878775119781494, "learning_rate": 2.6202252324874916e-06, "loss": 0.3404, "step": 7396 }, { "epoch": 2.076642335766423, "grad_norm": 0.5666494369506836, "learning_rate": 2.6187888744005842e-06, "loss": 0.2673, "step": 7397 }, { "epoch": 2.076923076923077, "grad_norm": 0.5629453659057617, "learning_rate": 2.6173527704217188e-06, "loss": 0.2981, "step": 7398 }, { "epoch": 2.0772038180797305, "grad_norm": 0.5452430248260498, "learning_rate": 2.6159169207041505e-06, "loss": 0.3016, "step": 7399 }, { "epoch": 2.077484559236384, "grad_norm": 0.6120348572731018, "learning_rate": 2.6144813254011036e-06, "loss": 0.2976, "step": 7400 }, { "epoch": 2.0777653003930374, "grad_norm": 0.5789759159088135, "learning_rate": 2.6130459846657723e-06, "loss": 0.3126, "step": 7401 }, { "epoch": 2.0780460415496913, "grad_norm": 0.5597143769264221, "learning_rate": 2.6116108986513324e-06, "loss": 0.3379, "step": 7402 }, { "epoch": 2.0783267827063447, "grad_norm": 0.6099171042442322, "learning_rate": 2.6101760675109228e-06, "loss": 0.3187, "step": 7403 }, { "epoch": 2.078607523862998, "grad_norm": 0.6088306903839111, "learning_rate": 2.6087414913976637e-06, "loss": 0.297, "step": 7404 }, { "epoch": 2.078888265019652, "grad_norm": 0.5763127207756042, "learning_rate": 2.607307170464641e-06, "loss": 0.3083, "step": 7405 }, { "epoch": 2.0791690061763055, "grad_norm": 0.48565754294395447, "learning_rate": 2.60587310486492e-06, "loss": 0.3519, "step": 7406 }, { "epoch": 2.079449747332959, "grad_norm": 0.5613904595375061, "learning_rate": 2.6044392947515326e-06, "loss": 0.3454, "step": 7407 }, { "epoch": 2.0797304884896124, "grad_norm": 0.5467095971107483, "learning_rate": 2.6030057402774846e-06, "loss": 0.3246, "step": 7408 }, { "epoch": 2.0800112296462663, "grad_norm": 0.6917138695716858, "learning_rate": 2.60157244159576e-06, "loss": 0.3632, "step": 7409 }, { "epoch": 2.0802919708029197, "grad_norm": 0.5972304344177246, "learning_rate": 2.600139398859308e-06, "loss": 0.3007, "step": 7410 }, { "epoch": 2.080572711959573, "grad_norm": 0.591606080532074, "learning_rate": 2.5987066122210574e-06, "loss": 0.3418, "step": 7411 }, { "epoch": 2.080853453116227, "grad_norm": 0.5599426627159119, "learning_rate": 2.5972740818339048e-06, "loss": 0.3257, "step": 7412 }, { "epoch": 2.0811341942728805, "grad_norm": 0.51194167137146, "learning_rate": 2.5958418078507187e-06, "loss": 0.3386, "step": 7413 }, { "epoch": 2.081414935429534, "grad_norm": 0.5968789458274841, "learning_rate": 2.594409790424346e-06, "loss": 0.331, "step": 7414 }, { "epoch": 2.0816956765861874, "grad_norm": 0.5914863348007202, "learning_rate": 2.592978029707599e-06, "loss": 0.3506, "step": 7415 }, { "epoch": 2.0819764177428413, "grad_norm": 0.5246887803077698, "learning_rate": 2.5915465258532703e-06, "loss": 0.3365, "step": 7416 }, { "epoch": 2.0822571588994947, "grad_norm": 0.5770841836929321, "learning_rate": 2.5901152790141175e-06, "loss": 0.3165, "step": 7417 }, { "epoch": 2.082537900056148, "grad_norm": 0.5936367511749268, "learning_rate": 2.588684289342876e-06, "loss": 0.2772, "step": 7418 }, { "epoch": 2.0828186412128016, "grad_norm": 0.6054359674453735, "learning_rate": 2.587253556992254e-06, "loss": 0.3147, "step": 7419 }, { "epoch": 2.0830993823694555, "grad_norm": 0.6312696933746338, "learning_rate": 2.5858230821149267e-06, "loss": 0.3152, "step": 7420 }, { "epoch": 2.083380123526109, "grad_norm": 0.5774433612823486, "learning_rate": 2.58439286486355e-06, "loss": 0.3323, "step": 7421 }, { "epoch": 2.0836608646827623, "grad_norm": 0.5920672416687012, "learning_rate": 2.5829629053907436e-06, "loss": 0.3329, "step": 7422 }, { "epoch": 2.0839416058394162, "grad_norm": 0.7043784856796265, "learning_rate": 2.5815332038491044e-06, "loss": 0.3182, "step": 7423 }, { "epoch": 2.0842223469960697, "grad_norm": 0.6078377366065979, "learning_rate": 2.5801037603912036e-06, "loss": 0.3414, "step": 7424 }, { "epoch": 2.084503088152723, "grad_norm": 0.5726706385612488, "learning_rate": 2.5786745751695796e-06, "loss": 0.2863, "step": 7425 }, { "epoch": 2.0847838293093766, "grad_norm": 0.5997820496559143, "learning_rate": 2.57724564833675e-06, "loss": 0.298, "step": 7426 }, { "epoch": 2.0850645704660304, "grad_norm": 0.6254377365112305, "learning_rate": 2.575816980045196e-06, "loss": 0.3404, "step": 7427 }, { "epoch": 2.085345311622684, "grad_norm": 0.5725039839744568, "learning_rate": 2.5743885704473813e-06, "loss": 0.3222, "step": 7428 }, { "epoch": 2.0856260527793373, "grad_norm": 0.608169674873352, "learning_rate": 2.572960419695734e-06, "loss": 0.3372, "step": 7429 }, { "epoch": 2.085906793935991, "grad_norm": 0.6945512294769287, "learning_rate": 2.571532527942657e-06, "loss": 0.2935, "step": 7430 }, { "epoch": 2.0861875350926447, "grad_norm": 0.5469156503677368, "learning_rate": 2.570104895340528e-06, "loss": 0.3506, "step": 7431 }, { "epoch": 2.086468276249298, "grad_norm": 0.5566493272781372, "learning_rate": 2.5686775220416927e-06, "loss": 0.3173, "step": 7432 }, { "epoch": 2.0867490174059515, "grad_norm": 0.6051676273345947, "learning_rate": 2.567250408198474e-06, "loss": 0.3332, "step": 7433 }, { "epoch": 2.0870297585626054, "grad_norm": 0.5985397696495056, "learning_rate": 2.5658235539631636e-06, "loss": 0.3162, "step": 7434 }, { "epoch": 2.087310499719259, "grad_norm": 0.6635539531707764, "learning_rate": 2.5643969594880253e-06, "loss": 0.3055, "step": 7435 }, { "epoch": 2.0875912408759123, "grad_norm": 0.6240601539611816, "learning_rate": 2.5629706249252984e-06, "loss": 0.3411, "step": 7436 }, { "epoch": 2.087871982032566, "grad_norm": 0.599671483039856, "learning_rate": 2.561544550427191e-06, "loss": 0.3049, "step": 7437 }, { "epoch": 2.0881527231892196, "grad_norm": 0.7075299620628357, "learning_rate": 2.560118736145886e-06, "loss": 0.3189, "step": 7438 }, { "epoch": 2.088433464345873, "grad_norm": 0.5455586314201355, "learning_rate": 2.558693182233535e-06, "loss": 0.3386, "step": 7439 }, { "epoch": 2.0887142055025265, "grad_norm": 0.5406078696250916, "learning_rate": 2.5572678888422684e-06, "loss": 0.3512, "step": 7440 }, { "epoch": 2.0889949466591804, "grad_norm": 0.6455414295196533, "learning_rate": 2.555842856124182e-06, "loss": 0.2934, "step": 7441 }, { "epoch": 2.089275687815834, "grad_norm": 0.663253903388977, "learning_rate": 2.554418084231346e-06, "loss": 0.3041, "step": 7442 }, { "epoch": 2.0895564289724873, "grad_norm": 0.5740071535110474, "learning_rate": 2.552993573315803e-06, "loss": 0.3295, "step": 7443 }, { "epoch": 2.0898371701291407, "grad_norm": 0.6303340792655945, "learning_rate": 2.5515693235295714e-06, "loss": 0.31, "step": 7444 }, { "epoch": 2.0901179112857946, "grad_norm": 0.6651839017868042, "learning_rate": 2.550145335024633e-06, "loss": 0.33, "step": 7445 }, { "epoch": 2.090398652442448, "grad_norm": 0.5303608179092407, "learning_rate": 2.548721607952952e-06, "loss": 0.3673, "step": 7446 }, { "epoch": 2.0906793935991015, "grad_norm": 0.5375730991363525, "learning_rate": 2.547298142466456e-06, "loss": 0.3085, "step": 7447 }, { "epoch": 2.0909601347557554, "grad_norm": 0.5289609432220459, "learning_rate": 2.545874938717052e-06, "loss": 0.3284, "step": 7448 }, { "epoch": 2.091240875912409, "grad_norm": 0.5582297444343567, "learning_rate": 2.5444519968566128e-06, "loss": 0.3177, "step": 7449 }, { "epoch": 2.0915216170690623, "grad_norm": 0.5148752331733704, "learning_rate": 2.543029317036985e-06, "loss": 0.3124, "step": 7450 }, { "epoch": 2.0918023582257157, "grad_norm": 0.5447492003440857, "learning_rate": 2.5416068994099907e-06, "loss": 0.2981, "step": 7451 }, { "epoch": 2.0920830993823696, "grad_norm": 0.5968486070632935, "learning_rate": 2.540184744127419e-06, "loss": 0.3387, "step": 7452 }, { "epoch": 2.092363840539023, "grad_norm": 0.6052152514457703, "learning_rate": 2.538762851341037e-06, "loss": 0.3227, "step": 7453 }, { "epoch": 2.0926445816956765, "grad_norm": 0.6142473220825195, "learning_rate": 2.537341221202576e-06, "loss": 0.2883, "step": 7454 }, { "epoch": 2.0929253228523303, "grad_norm": 0.6422644853591919, "learning_rate": 2.5359198538637475e-06, "loss": 0.2955, "step": 7455 }, { "epoch": 2.093206064008984, "grad_norm": 0.6409845948219299, "learning_rate": 2.5344987494762287e-06, "loss": 0.3308, "step": 7456 }, { "epoch": 2.0934868051656372, "grad_norm": 0.5762452483177185, "learning_rate": 2.5330779081916703e-06, "loss": 0.3282, "step": 7457 }, { "epoch": 2.0937675463222907, "grad_norm": 0.5471944212913513, "learning_rate": 2.5316573301616976e-06, "loss": 0.308, "step": 7458 }, { "epoch": 2.0940482874789446, "grad_norm": 0.5242460370063782, "learning_rate": 2.5302370155379037e-06, "loss": 0.3485, "step": 7459 }, { "epoch": 2.094329028635598, "grad_norm": 0.5595307350158691, "learning_rate": 2.5288169644718587e-06, "loss": 0.3595, "step": 7460 }, { "epoch": 2.0946097697922514, "grad_norm": 0.5410830974578857, "learning_rate": 2.5273971771151007e-06, "loss": 0.3302, "step": 7461 }, { "epoch": 2.094890510948905, "grad_norm": 0.543195903301239, "learning_rate": 2.5259776536191372e-06, "loss": 0.3293, "step": 7462 }, { "epoch": 2.0951712521055588, "grad_norm": 0.5585414171218872, "learning_rate": 2.524558394135456e-06, "loss": 0.3021, "step": 7463 }, { "epoch": 2.095451993262212, "grad_norm": 0.5444145798683167, "learning_rate": 2.523139398815507e-06, "loss": 0.308, "step": 7464 }, { "epoch": 2.0957327344188657, "grad_norm": 0.5823849439620972, "learning_rate": 2.5217206678107207e-06, "loss": 0.3196, "step": 7465 }, { "epoch": 2.0960134755755195, "grad_norm": 0.5624425411224365, "learning_rate": 2.520302201272491e-06, "loss": 0.3146, "step": 7466 }, { "epoch": 2.096294216732173, "grad_norm": 0.5407665967941284, "learning_rate": 2.518883999352193e-06, "loss": 0.3056, "step": 7467 }, { "epoch": 2.0965749578888264, "grad_norm": 0.5789952874183655, "learning_rate": 2.5174660622011627e-06, "loss": 0.3187, "step": 7468 }, { "epoch": 2.09685569904548, "grad_norm": 0.5371475219726562, "learning_rate": 2.5160483899707173e-06, "loss": 0.3329, "step": 7469 }, { "epoch": 2.0971364402021337, "grad_norm": 0.5785125494003296, "learning_rate": 2.5146309828121424e-06, "loss": 0.3119, "step": 7470 }, { "epoch": 2.097417181358787, "grad_norm": 0.5566592216491699, "learning_rate": 2.5132138408766937e-06, "loss": 0.3496, "step": 7471 }, { "epoch": 2.0976979225154406, "grad_norm": 0.5942724943161011, "learning_rate": 2.5117969643155975e-06, "loss": 0.3738, "step": 7472 }, { "epoch": 2.0979786636720945, "grad_norm": 0.5612032413482666, "learning_rate": 2.5103803532800587e-06, "loss": 0.3987, "step": 7473 }, { "epoch": 2.098259404828748, "grad_norm": 0.5707175731658936, "learning_rate": 2.508964007921244e-06, "loss": 0.3084, "step": 7474 }, { "epoch": 2.0985401459854014, "grad_norm": 0.6076996922492981, "learning_rate": 2.5075479283903013e-06, "loss": 0.3216, "step": 7475 }, { "epoch": 2.098820887142055, "grad_norm": 0.5889235138893127, "learning_rate": 2.506132114838343e-06, "loss": 0.3122, "step": 7476 }, { "epoch": 2.0991016282987087, "grad_norm": 0.5599289536476135, "learning_rate": 2.5047165674164586e-06, "loss": 0.3516, "step": 7477 }, { "epoch": 2.099382369455362, "grad_norm": 0.5801505446434021, "learning_rate": 2.5033012862757054e-06, "loss": 0.3031, "step": 7478 }, { "epoch": 2.0996631106120156, "grad_norm": 0.5513166785240173, "learning_rate": 2.501886271567111e-06, "loss": 0.2811, "step": 7479 }, { "epoch": 2.0999438517686695, "grad_norm": 0.6112514138221741, "learning_rate": 2.5004715234416804e-06, "loss": 0.376, "step": 7480 }, { "epoch": 2.100224592925323, "grad_norm": 0.5071297287940979, "learning_rate": 2.4990570420503834e-06, "loss": 0.3055, "step": 7481 }, { "epoch": 2.1005053340819764, "grad_norm": 0.516471266746521, "learning_rate": 2.4976428275441687e-06, "loss": 0.3257, "step": 7482 }, { "epoch": 2.10078607523863, "grad_norm": 0.5417808294296265, "learning_rate": 2.4962288800739503e-06, "loss": 0.3404, "step": 7483 }, { "epoch": 2.1010668163952837, "grad_norm": 0.6079418659210205, "learning_rate": 2.4948151997906138e-06, "loss": 0.3456, "step": 7484 }, { "epoch": 2.101347557551937, "grad_norm": 0.5489481091499329, "learning_rate": 2.4934017868450226e-06, "loss": 0.3265, "step": 7485 }, { "epoch": 2.1016282987085906, "grad_norm": 0.5220032930374146, "learning_rate": 2.4919886413880036e-06, "loss": 0.3148, "step": 7486 }, { "epoch": 2.1019090398652445, "grad_norm": 0.6036257147789001, "learning_rate": 2.4905757635703613e-06, "loss": 0.3158, "step": 7487 }, { "epoch": 2.102189781021898, "grad_norm": 0.5709389448165894, "learning_rate": 2.489163153542868e-06, "loss": 0.2949, "step": 7488 }, { "epoch": 2.1024705221785513, "grad_norm": 0.5465784668922424, "learning_rate": 2.4877508114562697e-06, "loss": 0.3371, "step": 7489 }, { "epoch": 2.102751263335205, "grad_norm": 0.5625720024108887, "learning_rate": 2.4863387374612827e-06, "loss": 0.319, "step": 7490 }, { "epoch": 2.1030320044918587, "grad_norm": 0.5638197660446167, "learning_rate": 2.4849269317085927e-06, "loss": 0.3438, "step": 7491 }, { "epoch": 2.103312745648512, "grad_norm": 0.591765284538269, "learning_rate": 2.4835153943488617e-06, "loss": 0.3305, "step": 7492 }, { "epoch": 2.1035934868051656, "grad_norm": 0.5602503418922424, "learning_rate": 2.482104125532717e-06, "loss": 0.3237, "step": 7493 }, { "epoch": 2.103874227961819, "grad_norm": 0.6113702058792114, "learning_rate": 2.4806931254107625e-06, "loss": 0.3125, "step": 7494 }, { "epoch": 2.104154969118473, "grad_norm": 0.5282644033432007, "learning_rate": 2.4792823941335724e-06, "loss": 0.3647, "step": 7495 }, { "epoch": 2.1044357102751263, "grad_norm": 0.5988295674324036, "learning_rate": 2.4778719318516886e-06, "loss": 0.3629, "step": 7496 }, { "epoch": 2.1047164514317798, "grad_norm": 0.6805895566940308, "learning_rate": 2.4764617387156304e-06, "loss": 0.36, "step": 7497 }, { "epoch": 2.1049971925884337, "grad_norm": 0.6177749633789062, "learning_rate": 2.4750518148758818e-06, "loss": 0.3218, "step": 7498 }, { "epoch": 2.105277933745087, "grad_norm": 0.5861527323722839, "learning_rate": 2.4736421604829002e-06, "loss": 0.3399, "step": 7499 }, { "epoch": 2.1055586749017405, "grad_norm": 0.6087446808815002, "learning_rate": 2.472232775687119e-06, "loss": 0.3161, "step": 7500 }, { "epoch": 2.105839416058394, "grad_norm": 0.49675971269607544, "learning_rate": 2.4708236606389347e-06, "loss": 0.3214, "step": 7501 }, { "epoch": 2.106120157215048, "grad_norm": 0.6294077038764954, "learning_rate": 2.4694148154887233e-06, "loss": 0.3223, "step": 7502 }, { "epoch": 2.1064008983717013, "grad_norm": 0.5638206601142883, "learning_rate": 2.4680062403868244e-06, "loss": 0.3203, "step": 7503 }, { "epoch": 2.1066816395283547, "grad_norm": 0.5751687288284302, "learning_rate": 2.4665979354835563e-06, "loss": 0.325, "step": 7504 }, { "epoch": 2.1069623806850086, "grad_norm": 0.6632601022720337, "learning_rate": 2.465189900929202e-06, "loss": 0.3106, "step": 7505 }, { "epoch": 2.107243121841662, "grad_norm": 0.5973731279373169, "learning_rate": 2.463782136874016e-06, "loss": 0.3438, "step": 7506 }, { "epoch": 2.1075238629983155, "grad_norm": 0.600148618221283, "learning_rate": 2.4623746434682317e-06, "loss": 0.321, "step": 7507 }, { "epoch": 2.107804604154969, "grad_norm": 0.5248749852180481, "learning_rate": 2.460967420862042e-06, "loss": 0.3083, "step": 7508 }, { "epoch": 2.108085345311623, "grad_norm": 0.5900746583938599, "learning_rate": 2.4595604692056225e-06, "loss": 0.3359, "step": 7509 }, { "epoch": 2.1083660864682763, "grad_norm": 0.5891392827033997, "learning_rate": 2.458153788649112e-06, "loss": 0.2958, "step": 7510 }, { "epoch": 2.1086468276249297, "grad_norm": 0.5616078972816467, "learning_rate": 2.45674737934262e-06, "loss": 0.3548, "step": 7511 }, { "epoch": 2.108927568781583, "grad_norm": 0.5850181579589844, "learning_rate": 2.4553412414362343e-06, "loss": 0.3199, "step": 7512 }, { "epoch": 2.109208309938237, "grad_norm": 0.557204008102417, "learning_rate": 2.4539353750800052e-06, "loss": 0.3649, "step": 7513 }, { "epoch": 2.1094890510948905, "grad_norm": 0.6122573614120483, "learning_rate": 2.4525297804239623e-06, "loss": 0.3062, "step": 7514 }, { "epoch": 2.109769792251544, "grad_norm": 0.5749813318252563, "learning_rate": 2.451124457618097e-06, "loss": 0.3345, "step": 7515 }, { "epoch": 2.110050533408198, "grad_norm": 0.6210273504257202, "learning_rate": 2.4497194068123816e-06, "loss": 0.333, "step": 7516 }, { "epoch": 2.1103312745648513, "grad_norm": 0.5353874564170837, "learning_rate": 2.4483146281567515e-06, "loss": 0.3077, "step": 7517 }, { "epoch": 2.1106120157215047, "grad_norm": 0.6579467058181763, "learning_rate": 2.446910121801115e-06, "loss": 0.3334, "step": 7518 }, { "epoch": 2.110892756878158, "grad_norm": 0.5924035906791687, "learning_rate": 2.445505887895353e-06, "loss": 0.3439, "step": 7519 }, { "epoch": 2.111173498034812, "grad_norm": 0.5882341861724854, "learning_rate": 2.4441019265893202e-06, "loss": 0.326, "step": 7520 }, { "epoch": 2.1114542391914655, "grad_norm": 0.5669301152229309, "learning_rate": 2.4426982380328328e-06, "loss": 0.3481, "step": 7521 }, { "epoch": 2.111734980348119, "grad_norm": 0.5781010985374451, "learning_rate": 2.4412948223756886e-06, "loss": 0.3442, "step": 7522 }, { "epoch": 2.112015721504773, "grad_norm": 0.5489780306816101, "learning_rate": 2.439891679767648e-06, "loss": 0.3243, "step": 7523 }, { "epoch": 2.1122964626614262, "grad_norm": 0.6508286595344543, "learning_rate": 2.4384888103584494e-06, "loss": 0.3317, "step": 7524 }, { "epoch": 2.1125772038180797, "grad_norm": 0.5775461196899414, "learning_rate": 2.437086214297793e-06, "loss": 0.3416, "step": 7525 }, { "epoch": 2.112857944974733, "grad_norm": 0.5868874192237854, "learning_rate": 2.435683891735361e-06, "loss": 0.3343, "step": 7526 }, { "epoch": 2.113138686131387, "grad_norm": 0.5194208025932312, "learning_rate": 2.434281842820797e-06, "loss": 0.3611, "step": 7527 }, { "epoch": 2.1134194272880404, "grad_norm": 0.5542311072349548, "learning_rate": 2.4328800677037178e-06, "loss": 0.3476, "step": 7528 }, { "epoch": 2.113700168444694, "grad_norm": 0.6189546585083008, "learning_rate": 2.4314785665337158e-06, "loss": 0.3554, "step": 7529 }, { "epoch": 2.1139809096013478, "grad_norm": 0.5882139801979065, "learning_rate": 2.4300773394603466e-06, "loss": 0.2938, "step": 7530 }, { "epoch": 2.114261650758001, "grad_norm": 0.5537412166595459, "learning_rate": 2.4286763866331447e-06, "loss": 0.2909, "step": 7531 }, { "epoch": 2.1145423919146547, "grad_norm": 0.5822217464447021, "learning_rate": 2.4272757082016087e-06, "loss": 0.3164, "step": 7532 }, { "epoch": 2.114823133071308, "grad_norm": 0.5897016525268555, "learning_rate": 2.425875304315208e-06, "loss": 0.3467, "step": 7533 }, { "epoch": 2.115103874227962, "grad_norm": 0.6019304394721985, "learning_rate": 2.4244751751233895e-06, "loss": 0.3264, "step": 7534 }, { "epoch": 2.1153846153846154, "grad_norm": 0.5713221430778503, "learning_rate": 2.4230753207755633e-06, "loss": 0.3045, "step": 7535 }, { "epoch": 2.115665356541269, "grad_norm": 0.6040393114089966, "learning_rate": 2.4216757414211143e-06, "loss": 0.3213, "step": 7536 }, { "epoch": 2.1159460976979223, "grad_norm": 0.5339690446853638, "learning_rate": 2.420276437209396e-06, "loss": 0.3378, "step": 7537 }, { "epoch": 2.116226838854576, "grad_norm": 0.6134693622589111, "learning_rate": 2.4188774082897353e-06, "loss": 0.3856, "step": 7538 }, { "epoch": 2.1165075800112296, "grad_norm": 0.5634220838546753, "learning_rate": 2.417478654811427e-06, "loss": 0.2969, "step": 7539 }, { "epoch": 2.116788321167883, "grad_norm": 0.6140146851539612, "learning_rate": 2.4160801769237353e-06, "loss": 0.3411, "step": 7540 }, { "epoch": 2.117069062324537, "grad_norm": 0.5918681621551514, "learning_rate": 2.4146819747759004e-06, "loss": 0.3637, "step": 7541 }, { "epoch": 2.1173498034811904, "grad_norm": 0.6469187140464783, "learning_rate": 2.413284048517126e-06, "loss": 0.3388, "step": 7542 }, { "epoch": 2.117630544637844, "grad_norm": 0.5671555995941162, "learning_rate": 2.411886398296594e-06, "loss": 0.3216, "step": 7543 }, { "epoch": 2.1179112857944973, "grad_norm": 0.5650293827056885, "learning_rate": 2.4104890242634497e-06, "loss": 0.3427, "step": 7544 }, { "epoch": 2.118192026951151, "grad_norm": 0.607598602771759, "learning_rate": 2.4090919265668134e-06, "loss": 0.3587, "step": 7545 }, { "epoch": 2.1184727681078046, "grad_norm": 0.6334040760993958, "learning_rate": 2.4076951053557773e-06, "loss": 0.3126, "step": 7546 }, { "epoch": 2.118753509264458, "grad_norm": 0.5849760174751282, "learning_rate": 2.4062985607793965e-06, "loss": 0.3234, "step": 7547 }, { "epoch": 2.119034250421112, "grad_norm": 0.5422614812850952, "learning_rate": 2.404902292986706e-06, "loss": 0.3108, "step": 7548 }, { "epoch": 2.1193149915777654, "grad_norm": 0.5604290962219238, "learning_rate": 2.4035063021267057e-06, "loss": 0.3361, "step": 7549 }, { "epoch": 2.119595732734419, "grad_norm": 0.5354562401771545, "learning_rate": 2.4021105883483636e-06, "loss": 0.3279, "step": 7550 }, { "epoch": 2.1198764738910723, "grad_norm": 0.5493098497390747, "learning_rate": 2.4007151518006267e-06, "loss": 0.3355, "step": 7551 }, { "epoch": 2.120157215047726, "grad_norm": 0.5883382558822632, "learning_rate": 2.399319992632403e-06, "loss": 0.3451, "step": 7552 }, { "epoch": 2.1204379562043796, "grad_norm": 0.6600798964500427, "learning_rate": 2.397925110992579e-06, "loss": 0.327, "step": 7553 }, { "epoch": 2.120718697361033, "grad_norm": 0.5692732930183411, "learning_rate": 2.3965305070300054e-06, "loss": 0.3234, "step": 7554 }, { "epoch": 2.1209994385176865, "grad_norm": 0.5529320240020752, "learning_rate": 2.3951361808935047e-06, "loss": 0.2964, "step": 7555 }, { "epoch": 2.1212801796743403, "grad_norm": 0.5611093044281006, "learning_rate": 2.3937421327318746e-06, "loss": 0.3681, "step": 7556 }, { "epoch": 2.121560920830994, "grad_norm": 0.5585898160934448, "learning_rate": 2.3923483626938744e-06, "loss": 0.3147, "step": 7557 }, { "epoch": 2.1218416619876472, "grad_norm": 0.6208656430244446, "learning_rate": 2.3909548709282427e-06, "loss": 0.3159, "step": 7558 }, { "epoch": 2.122122403144301, "grad_norm": 0.6122855544090271, "learning_rate": 2.389561657583681e-06, "loss": 0.3376, "step": 7559 }, { "epoch": 2.1224031443009546, "grad_norm": 0.5505663752555847, "learning_rate": 2.388168722808868e-06, "loss": 0.3573, "step": 7560 }, { "epoch": 2.122683885457608, "grad_norm": 0.5612902045249939, "learning_rate": 2.3867760667524464e-06, "loss": 0.3412, "step": 7561 }, { "epoch": 2.1229646266142614, "grad_norm": 0.6577821373939514, "learning_rate": 2.385383689563031e-06, "loss": 0.3318, "step": 7562 }, { "epoch": 2.1232453677709153, "grad_norm": 0.6066629886627197, "learning_rate": 2.383991591389211e-06, "loss": 0.3409, "step": 7563 }, { "epoch": 2.1235261089275688, "grad_norm": 0.6030011773109436, "learning_rate": 2.3825997723795384e-06, "loss": 0.3185, "step": 7564 }, { "epoch": 2.123806850084222, "grad_norm": 0.6716628074645996, "learning_rate": 2.381208232682543e-06, "loss": 0.3383, "step": 7565 }, { "epoch": 2.124087591240876, "grad_norm": 0.5747437477111816, "learning_rate": 2.3798169724467207e-06, "loss": 0.3173, "step": 7566 }, { "epoch": 2.1243683323975295, "grad_norm": 0.5512399077415466, "learning_rate": 2.3784259918205347e-06, "loss": 0.321, "step": 7567 }, { "epoch": 2.124649073554183, "grad_norm": 0.6270153522491455, "learning_rate": 2.377035290952427e-06, "loss": 0.3497, "step": 7568 }, { "epoch": 2.1249298147108364, "grad_norm": 0.6106194257736206, "learning_rate": 2.375644869990799e-06, "loss": 0.3372, "step": 7569 }, { "epoch": 2.1252105558674903, "grad_norm": 0.5862271785736084, "learning_rate": 2.374254729084031e-06, "loss": 0.3359, "step": 7570 }, { "epoch": 2.1254912970241437, "grad_norm": 0.5978150367736816, "learning_rate": 2.372864868380472e-06, "loss": 0.3816, "step": 7571 }, { "epoch": 2.125772038180797, "grad_norm": 0.6333993077278137, "learning_rate": 2.371475288028435e-06, "loss": 0.3105, "step": 7572 }, { "epoch": 2.126052779337451, "grad_norm": 0.6072990894317627, "learning_rate": 2.3700859881762125e-06, "loss": 0.3615, "step": 7573 }, { "epoch": 2.1263335204941045, "grad_norm": 0.5752395987510681, "learning_rate": 2.368696968972056e-06, "loss": 0.3371, "step": 7574 }, { "epoch": 2.126614261650758, "grad_norm": 0.5667293071746826, "learning_rate": 2.3673082305641982e-06, "loss": 0.3204, "step": 7575 }, { "epoch": 2.1268950028074114, "grad_norm": 0.550583004951477, "learning_rate": 2.3659197731008343e-06, "loss": 0.2922, "step": 7576 }, { "epoch": 2.1271757439640653, "grad_norm": 0.6199833154678345, "learning_rate": 2.3645315967301304e-06, "loss": 0.2839, "step": 7577 }, { "epoch": 2.1274564851207187, "grad_norm": 0.5537998080253601, "learning_rate": 2.363143701600227e-06, "loss": 0.3375, "step": 7578 }, { "epoch": 2.127737226277372, "grad_norm": 0.5808775424957275, "learning_rate": 2.361756087859228e-06, "loss": 0.3396, "step": 7579 }, { "epoch": 2.128017967434026, "grad_norm": 0.5723810791969299, "learning_rate": 2.360368755655216e-06, "loss": 0.2775, "step": 7580 }, { "epoch": 2.1282987085906795, "grad_norm": 0.48975247144699097, "learning_rate": 2.3589817051362353e-06, "loss": 0.363, "step": 7581 }, { "epoch": 2.128579449747333, "grad_norm": 0.5985215902328491, "learning_rate": 2.3575949364503013e-06, "loss": 0.2941, "step": 7582 }, { "epoch": 2.1288601909039864, "grad_norm": 0.5500089526176453, "learning_rate": 2.3562084497454064e-06, "loss": 0.324, "step": 7583 }, { "epoch": 2.1291409320606403, "grad_norm": 0.5465249419212341, "learning_rate": 2.3548222451695023e-06, "loss": 0.3049, "step": 7584 }, { "epoch": 2.1294216732172937, "grad_norm": 0.5955963730812073, "learning_rate": 2.3534363228705216e-06, "loss": 0.3097, "step": 7585 }, { "epoch": 2.129702414373947, "grad_norm": 0.5353860259056091, "learning_rate": 2.352050682996356e-06, "loss": 0.326, "step": 7586 }, { "epoch": 2.1299831555306006, "grad_norm": 0.5219101905822754, "learning_rate": 2.350665325694877e-06, "loss": 0.3722, "step": 7587 }, { "epoch": 2.1302638966872545, "grad_norm": 0.5881059169769287, "learning_rate": 2.3492802511139197e-06, "loss": 0.3312, "step": 7588 }, { "epoch": 2.130544637843908, "grad_norm": 0.5381979942321777, "learning_rate": 2.3478954594012884e-06, "loss": 0.3322, "step": 7589 }, { "epoch": 2.1308253790005613, "grad_norm": 0.6292857527732849, "learning_rate": 2.3465109507047628e-06, "loss": 0.3815, "step": 7590 }, { "epoch": 2.1311061201572152, "grad_norm": 0.5772551894187927, "learning_rate": 2.3451267251720862e-06, "loss": 0.3351, "step": 7591 }, { "epoch": 2.1313868613138687, "grad_norm": 0.5882990956306458, "learning_rate": 2.3437427829509775e-06, "loss": 0.3334, "step": 7592 }, { "epoch": 2.131667602470522, "grad_norm": 0.6055522561073303, "learning_rate": 2.34235912418912e-06, "loss": 0.3051, "step": 7593 }, { "epoch": 2.1319483436271756, "grad_norm": 0.5196595191955566, "learning_rate": 2.3409757490341694e-06, "loss": 0.3534, "step": 7594 }, { "epoch": 2.1322290847838294, "grad_norm": 0.5962284207344055, "learning_rate": 2.3395926576337513e-06, "loss": 0.3111, "step": 7595 }, { "epoch": 2.132509825940483, "grad_norm": 0.5383732318878174, "learning_rate": 2.3382098501354603e-06, "loss": 0.3222, "step": 7596 }, { "epoch": 2.1327905670971363, "grad_norm": 0.5777949690818787, "learning_rate": 2.3368273266868637e-06, "loss": 0.3553, "step": 7597 }, { "epoch": 2.13307130825379, "grad_norm": 0.5984180569648743, "learning_rate": 2.3354450874354934e-06, "loss": 0.2832, "step": 7598 }, { "epoch": 2.1333520494104437, "grad_norm": 0.5609362125396729, "learning_rate": 2.334063132528852e-06, "loss": 0.3384, "step": 7599 }, { "epoch": 2.133632790567097, "grad_norm": 0.5911528468132019, "learning_rate": 2.332681462114416e-06, "loss": 0.3451, "step": 7600 }, { "epoch": 2.1339135317237505, "grad_norm": 0.6141667366027832, "learning_rate": 2.3313000763396266e-06, "loss": 0.3033, "step": 7601 }, { "epoch": 2.1341942728804044, "grad_norm": 0.6282621622085571, "learning_rate": 2.3299189753518984e-06, "loss": 0.2947, "step": 7602 }, { "epoch": 2.134475014037058, "grad_norm": 0.5885448455810547, "learning_rate": 2.3285381592986133e-06, "loss": 0.3634, "step": 7603 }, { "epoch": 2.1347557551937113, "grad_norm": 0.5369791388511658, "learning_rate": 2.3271576283271215e-06, "loss": 0.3067, "step": 7604 }, { "epoch": 2.1350364963503647, "grad_norm": 0.5751357078552246, "learning_rate": 2.325777382584748e-06, "loss": 0.3165, "step": 7605 }, { "epoch": 2.1353172375070186, "grad_norm": 0.5804493427276611, "learning_rate": 2.3243974222187805e-06, "loss": 0.3145, "step": 7606 }, { "epoch": 2.135597978663672, "grad_norm": 0.6409282088279724, "learning_rate": 2.323017747376484e-06, "loss": 0.322, "step": 7607 }, { "epoch": 2.1358787198203255, "grad_norm": 0.7023793458938599, "learning_rate": 2.321638358205083e-06, "loss": 0.3812, "step": 7608 }, { "epoch": 2.1361594609769794, "grad_norm": 0.5948903560638428, "learning_rate": 2.320259254851784e-06, "loss": 0.3465, "step": 7609 }, { "epoch": 2.136440202133633, "grad_norm": 0.5517528057098389, "learning_rate": 2.318880437463753e-06, "loss": 0.2651, "step": 7610 }, { "epoch": 2.1367209432902863, "grad_norm": 0.5835468769073486, "learning_rate": 2.3175019061881266e-06, "loss": 0.3452, "step": 7611 }, { "epoch": 2.1370016844469397, "grad_norm": 0.5559266805648804, "learning_rate": 2.316123661172018e-06, "loss": 0.3072, "step": 7612 }, { "epoch": 2.1372824256035936, "grad_norm": 0.621350109577179, "learning_rate": 2.3147457025624998e-06, "loss": 0.2976, "step": 7613 }, { "epoch": 2.137563166760247, "grad_norm": 0.5921510457992554, "learning_rate": 2.3133680305066236e-06, "loss": 0.3149, "step": 7614 }, { "epoch": 2.1378439079169005, "grad_norm": 0.5949103236198425, "learning_rate": 2.3119906451514045e-06, "loss": 0.3443, "step": 7615 }, { "epoch": 2.1381246490735544, "grad_norm": 0.5970814228057861, "learning_rate": 2.310613546643826e-06, "loss": 0.2916, "step": 7616 }, { "epoch": 2.138405390230208, "grad_norm": 0.611373245716095, "learning_rate": 2.3092367351308478e-06, "loss": 0.3379, "step": 7617 }, { "epoch": 2.1386861313868613, "grad_norm": 0.5446313619613647, "learning_rate": 2.30786021075939e-06, "loss": 0.3182, "step": 7618 }, { "epoch": 2.1389668725435147, "grad_norm": 0.568806529045105, "learning_rate": 2.3064839736763516e-06, "loss": 0.3217, "step": 7619 }, { "epoch": 2.1392476137001686, "grad_norm": 0.5940142869949341, "learning_rate": 2.3051080240285917e-06, "loss": 0.2976, "step": 7620 }, { "epoch": 2.139528354856822, "grad_norm": 0.5360431671142578, "learning_rate": 2.3037323619629448e-06, "loss": 0.3038, "step": 7621 }, { "epoch": 2.1398090960134755, "grad_norm": 0.5377373695373535, "learning_rate": 2.3023569876262154e-06, "loss": 0.3278, "step": 7622 }, { "epoch": 2.1400898371701293, "grad_norm": 0.5923365950584412, "learning_rate": 2.3009819011651705e-06, "loss": 0.3461, "step": 7623 }, { "epoch": 2.140370578326783, "grad_norm": 0.5814552307128906, "learning_rate": 2.2996071027265553e-06, "loss": 0.3381, "step": 7624 }, { "epoch": 2.1406513194834362, "grad_norm": 0.5893500447273254, "learning_rate": 2.2982325924570775e-06, "loss": 0.3203, "step": 7625 }, { "epoch": 2.1409320606400897, "grad_norm": 0.522693932056427, "learning_rate": 2.296858370503414e-06, "loss": 0.368, "step": 7626 }, { "epoch": 2.1412128017967436, "grad_norm": 0.5240692496299744, "learning_rate": 2.295484437012218e-06, "loss": 0.3346, "step": 7627 }, { "epoch": 2.141493542953397, "grad_norm": 0.526337742805481, "learning_rate": 2.294110792130102e-06, "loss": 0.3137, "step": 7628 }, { "epoch": 2.1417742841100504, "grad_norm": 0.5728418827056885, "learning_rate": 2.292737436003658e-06, "loss": 0.3152, "step": 7629 }, { "epoch": 2.1420550252667043, "grad_norm": 0.5894528031349182, "learning_rate": 2.291364368779437e-06, "loss": 0.3136, "step": 7630 }, { "epoch": 2.1423357664233578, "grad_norm": 0.5678023099899292, "learning_rate": 2.2899915906039687e-06, "loss": 0.3616, "step": 7631 }, { "epoch": 2.142616507580011, "grad_norm": 0.5598422884941101, "learning_rate": 2.288619101623746e-06, "loss": 0.3357, "step": 7632 }, { "epoch": 2.1428972487366647, "grad_norm": 0.5799552202224731, "learning_rate": 2.287246901985229e-06, "loss": 0.2989, "step": 7633 }, { "epoch": 2.1431779898933185, "grad_norm": 0.5640353560447693, "learning_rate": 2.2858749918348556e-06, "loss": 0.3367, "step": 7634 }, { "epoch": 2.143458731049972, "grad_norm": 0.5647066831588745, "learning_rate": 2.284503371319023e-06, "loss": 0.3342, "step": 7635 }, { "epoch": 2.1437394722066254, "grad_norm": 0.6022063493728638, "learning_rate": 2.283132040584106e-06, "loss": 0.327, "step": 7636 }, { "epoch": 2.144020213363279, "grad_norm": 0.5313782691955566, "learning_rate": 2.2817609997764425e-06, "loss": 0.2896, "step": 7637 }, { "epoch": 2.1443009545199327, "grad_norm": 0.546099066734314, "learning_rate": 2.2803902490423393e-06, "loss": 0.3727, "step": 7638 }, { "epoch": 2.144581695676586, "grad_norm": 0.5981913805007935, "learning_rate": 2.2790197885280784e-06, "loss": 0.3227, "step": 7639 }, { "epoch": 2.1448624368332396, "grad_norm": 0.6189888715744019, "learning_rate": 2.2776496183799034e-06, "loss": 0.3163, "step": 7640 }, { "epoch": 2.1451431779898935, "grad_norm": 0.6030513644218445, "learning_rate": 2.2762797387440343e-06, "loss": 0.306, "step": 7641 }, { "epoch": 2.145423919146547, "grad_norm": 0.6845206022262573, "learning_rate": 2.2749101497666515e-06, "loss": 0.3137, "step": 7642 }, { "epoch": 2.1457046603032004, "grad_norm": 0.6505918502807617, "learning_rate": 2.273540851593913e-06, "loss": 0.3337, "step": 7643 }, { "epoch": 2.145985401459854, "grad_norm": 0.6210272312164307, "learning_rate": 2.27217184437194e-06, "loss": 0.3893, "step": 7644 }, { "epoch": 2.1462661426165077, "grad_norm": 0.5908429026603699, "learning_rate": 2.2708031282468235e-06, "loss": 0.3355, "step": 7645 }, { "epoch": 2.146546883773161, "grad_norm": 0.6091362833976746, "learning_rate": 2.269434703364625e-06, "loss": 0.3513, "step": 7646 }, { "epoch": 2.1468276249298146, "grad_norm": 0.6162760257720947, "learning_rate": 2.268066569871376e-06, "loss": 0.3102, "step": 7647 }, { "epoch": 2.147108366086468, "grad_norm": 0.5612039566040039, "learning_rate": 2.2666987279130727e-06, "loss": 0.3015, "step": 7648 }, { "epoch": 2.147389107243122, "grad_norm": 0.5317087769508362, "learning_rate": 2.265331177635685e-06, "loss": 0.3512, "step": 7649 }, { "epoch": 2.1476698483997754, "grad_norm": 0.5793330669403076, "learning_rate": 2.263963919185147e-06, "loss": 0.3264, "step": 7650 }, { "epoch": 2.147950589556429, "grad_norm": 0.6438215970993042, "learning_rate": 2.2625969527073666e-06, "loss": 0.328, "step": 7651 }, { "epoch": 2.1482313307130827, "grad_norm": 0.6481834650039673, "learning_rate": 2.261230278348217e-06, "loss": 0.3165, "step": 7652 }, { "epoch": 2.148512071869736, "grad_norm": 0.6245670318603516, "learning_rate": 2.259863896253539e-06, "loss": 0.3273, "step": 7653 }, { "epoch": 2.1487928130263896, "grad_norm": 0.5959929823875427, "learning_rate": 2.258497806569148e-06, "loss": 0.3575, "step": 7654 }, { "epoch": 2.149073554183043, "grad_norm": 0.545421838760376, "learning_rate": 2.2571320094408213e-06, "loss": 0.2979, "step": 7655 }, { "epoch": 2.149354295339697, "grad_norm": 0.5832664370536804, "learning_rate": 2.2557665050143106e-06, "loss": 0.3247, "step": 7656 }, { "epoch": 2.1496350364963503, "grad_norm": 0.6086153984069824, "learning_rate": 2.254401293435332e-06, "loss": 0.2812, "step": 7657 }, { "epoch": 2.149915777653004, "grad_norm": 0.5361592173576355, "learning_rate": 2.253036374849576e-06, "loss": 0.327, "step": 7658 }, { "epoch": 2.1501965188096577, "grad_norm": 0.5319271087646484, "learning_rate": 2.251671749402695e-06, "loss": 0.3496, "step": 7659 }, { "epoch": 2.150477259966311, "grad_norm": 0.6418182849884033, "learning_rate": 2.250307417240313e-06, "loss": 0.2949, "step": 7660 }, { "epoch": 2.1507580011229646, "grad_norm": 0.539433479309082, "learning_rate": 2.2489433785080256e-06, "loss": 0.3208, "step": 7661 }, { "epoch": 2.151038742279618, "grad_norm": 0.6144167184829712, "learning_rate": 2.2475796333513916e-06, "loss": 0.3068, "step": 7662 }, { "epoch": 2.151319483436272, "grad_norm": 0.5509148836135864, "learning_rate": 2.2462161819159445e-06, "loss": 0.3553, "step": 7663 }, { "epoch": 2.1516002245929253, "grad_norm": 0.5421391129493713, "learning_rate": 2.2448530243471816e-06, "loss": 0.301, "step": 7664 }, { "epoch": 2.1518809657495788, "grad_norm": 0.616186797618866, "learning_rate": 2.2434901607905694e-06, "loss": 0.3476, "step": 7665 }, { "epoch": 2.1521617069062327, "grad_norm": 0.5403421521186829, "learning_rate": 2.2421275913915473e-06, "loss": 0.3487, "step": 7666 }, { "epoch": 2.152442448062886, "grad_norm": 0.5637606382369995, "learning_rate": 2.2407653162955173e-06, "loss": 0.3133, "step": 7667 }, { "epoch": 2.1527231892195395, "grad_norm": 0.576444685459137, "learning_rate": 2.2394033356478557e-06, "loss": 0.3567, "step": 7668 }, { "epoch": 2.153003930376193, "grad_norm": 0.6034927368164062, "learning_rate": 2.238041649593901e-06, "loss": 0.3318, "step": 7669 }, { "epoch": 2.153284671532847, "grad_norm": 0.543558657169342, "learning_rate": 2.2366802582789676e-06, "loss": 0.3411, "step": 7670 }, { "epoch": 2.1535654126895003, "grad_norm": 0.6106688976287842, "learning_rate": 2.235319161848334e-06, "loss": 0.3128, "step": 7671 }, { "epoch": 2.1538461538461537, "grad_norm": 0.5550286769866943, "learning_rate": 2.2339583604472426e-06, "loss": 0.3032, "step": 7672 }, { "epoch": 2.1541268950028076, "grad_norm": 0.5444990992546082, "learning_rate": 2.2325978542209177e-06, "loss": 0.3112, "step": 7673 }, { "epoch": 2.154407636159461, "grad_norm": 0.5721750259399414, "learning_rate": 2.2312376433145412e-06, "loss": 0.3417, "step": 7674 }, { "epoch": 2.1546883773161145, "grad_norm": 0.5697322487831116, "learning_rate": 2.2298777278732632e-06, "loss": 0.3478, "step": 7675 }, { "epoch": 2.154969118472768, "grad_norm": 0.5581787824630737, "learning_rate": 2.2285181080422097e-06, "loss": 0.3135, "step": 7676 }, { "epoch": 2.155249859629422, "grad_norm": 0.6136451959609985, "learning_rate": 2.2271587839664673e-06, "loss": 0.3338, "step": 7677 }, { "epoch": 2.1555306007860753, "grad_norm": 0.5717584490776062, "learning_rate": 2.2257997557910974e-06, "loss": 0.3557, "step": 7678 }, { "epoch": 2.1558113419427287, "grad_norm": 0.6362666487693787, "learning_rate": 2.2244410236611254e-06, "loss": 0.2819, "step": 7679 }, { "epoch": 2.156092083099382, "grad_norm": 0.4996248185634613, "learning_rate": 2.2230825877215484e-06, "loss": 0.3315, "step": 7680 }, { "epoch": 2.156372824256036, "grad_norm": 0.5268717408180237, "learning_rate": 2.221724448117329e-06, "loss": 0.3021, "step": 7681 }, { "epoch": 2.1566535654126895, "grad_norm": 0.5348608493804932, "learning_rate": 2.220366604993398e-06, "loss": 0.3008, "step": 7682 }, { "epoch": 2.156934306569343, "grad_norm": 0.6342946290969849, "learning_rate": 2.2190090584946595e-06, "loss": 0.3182, "step": 7683 }, { "epoch": 2.157215047725997, "grad_norm": 0.5667331218719482, "learning_rate": 2.2176518087659783e-06, "loss": 0.3808, "step": 7684 }, { "epoch": 2.1574957888826503, "grad_norm": 0.5849010944366455, "learning_rate": 2.216294855952196e-06, "loss": 0.3348, "step": 7685 }, { "epoch": 2.1577765300393037, "grad_norm": 0.6405203342437744, "learning_rate": 2.214938200198115e-06, "loss": 0.3108, "step": 7686 }, { "epoch": 2.158057271195957, "grad_norm": 0.5259215831756592, "learning_rate": 2.2135818416485096e-06, "loss": 0.3193, "step": 7687 }, { "epoch": 2.158338012352611, "grad_norm": 0.5738062858581543, "learning_rate": 2.2122257804481233e-06, "loss": 0.3287, "step": 7688 }, { "epoch": 2.1586187535092645, "grad_norm": 0.5805240869522095, "learning_rate": 2.210870016741664e-06, "loss": 0.3451, "step": 7689 }, { "epoch": 2.158899494665918, "grad_norm": 0.6308855414390564, "learning_rate": 2.209514550673814e-06, "loss": 0.2936, "step": 7690 }, { "epoch": 2.159180235822572, "grad_norm": 0.5688693523406982, "learning_rate": 2.208159382389217e-06, "loss": 0.3584, "step": 7691 }, { "epoch": 2.1594609769792252, "grad_norm": 0.5478384494781494, "learning_rate": 2.2068045120324905e-06, "loss": 0.3125, "step": 7692 }, { "epoch": 2.1597417181358787, "grad_norm": 0.5713398456573486, "learning_rate": 2.205449939748217e-06, "loss": 0.3161, "step": 7693 }, { "epoch": 2.160022459292532, "grad_norm": 0.5147822499275208, "learning_rate": 2.204095665680946e-06, "loss": 0.3512, "step": 7694 }, { "epoch": 2.160303200449186, "grad_norm": 0.5163027048110962, "learning_rate": 2.202741689975201e-06, "loss": 0.3718, "step": 7695 }, { "epoch": 2.1605839416058394, "grad_norm": 0.5625680088996887, "learning_rate": 2.2013880127754654e-06, "loss": 0.3129, "step": 7696 }, { "epoch": 2.160864682762493, "grad_norm": 0.5634370446205139, "learning_rate": 2.200034634226198e-06, "loss": 0.3085, "step": 7697 }, { "epoch": 2.1611454239191463, "grad_norm": 0.5660167932510376, "learning_rate": 2.198681554471825e-06, "loss": 0.3381, "step": 7698 }, { "epoch": 2.1614261650758, "grad_norm": 0.5348774194717407, "learning_rate": 2.1973287736567332e-06, "loss": 0.3531, "step": 7699 }, { "epoch": 2.1617069062324537, "grad_norm": 0.5696753263473511, "learning_rate": 2.1959762919252885e-06, "loss": 0.3314, "step": 7700 }, { "epoch": 2.161987647389107, "grad_norm": 0.5382516384124756, "learning_rate": 2.1946241094218176e-06, "loss": 0.3257, "step": 7701 }, { "epoch": 2.162268388545761, "grad_norm": 0.567698061466217, "learning_rate": 2.1932722262906133e-06, "loss": 0.3453, "step": 7702 }, { "epoch": 2.1625491297024144, "grad_norm": 0.5876367688179016, "learning_rate": 2.1919206426759453e-06, "loss": 0.2999, "step": 7703 }, { "epoch": 2.162829870859068, "grad_norm": 0.580852210521698, "learning_rate": 2.190569358722042e-06, "loss": 0.3096, "step": 7704 }, { "epoch": 2.1631106120157213, "grad_norm": 0.598149836063385, "learning_rate": 2.189218374573108e-06, "loss": 0.3172, "step": 7705 }, { "epoch": 2.163391353172375, "grad_norm": 0.6089771389961243, "learning_rate": 2.1878676903733088e-06, "loss": 0.3641, "step": 7706 }, { "epoch": 2.1636720943290286, "grad_norm": 0.6105449199676514, "learning_rate": 2.1865173062667832e-06, "loss": 0.3414, "step": 7707 }, { "epoch": 2.163952835485682, "grad_norm": 0.5699039697647095, "learning_rate": 2.1851672223976355e-06, "loss": 0.2988, "step": 7708 }, { "epoch": 2.164233576642336, "grad_norm": 0.5573256015777588, "learning_rate": 2.183817438909935e-06, "loss": 0.351, "step": 7709 }, { "epoch": 2.1645143177989894, "grad_norm": 0.6350598931312561, "learning_rate": 2.1824679559477267e-06, "loss": 0.3197, "step": 7710 }, { "epoch": 2.164795058955643, "grad_norm": 0.6099666953086853, "learning_rate": 2.1811187736550156e-06, "loss": 0.3115, "step": 7711 }, { "epoch": 2.1650758001122963, "grad_norm": 0.5796921849250793, "learning_rate": 2.179769892175781e-06, "loss": 0.3097, "step": 7712 }, { "epoch": 2.16535654126895, "grad_norm": 0.6009588241577148, "learning_rate": 2.178421311653966e-06, "loss": 0.273, "step": 7713 }, { "epoch": 2.1656372824256036, "grad_norm": 0.5405430793762207, "learning_rate": 2.17707303223348e-06, "loss": 0.3355, "step": 7714 }, { "epoch": 2.165918023582257, "grad_norm": 0.6128930449485779, "learning_rate": 2.1757250540582075e-06, "loss": 0.3118, "step": 7715 }, { "epoch": 2.166198764738911, "grad_norm": 0.5836557745933533, "learning_rate": 2.1743773772719927e-06, "loss": 0.3437, "step": 7716 }, { "epoch": 2.1664795058955644, "grad_norm": 0.6024447679519653, "learning_rate": 2.173030002018654e-06, "loss": 0.2688, "step": 7717 }, { "epoch": 2.166760247052218, "grad_norm": 0.6296533942222595, "learning_rate": 2.1716829284419727e-06, "loss": 0.3088, "step": 7718 }, { "epoch": 2.1670409882088713, "grad_norm": 0.6000844240188599, "learning_rate": 2.1703361566857025e-06, "loss": 0.3062, "step": 7719 }, { "epoch": 2.167321729365525, "grad_norm": 0.5442104339599609, "learning_rate": 2.1689896868935613e-06, "loss": 0.3364, "step": 7720 }, { "epoch": 2.1676024705221786, "grad_norm": 0.592156171798706, "learning_rate": 2.167643519209233e-06, "loss": 0.3387, "step": 7721 }, { "epoch": 2.167883211678832, "grad_norm": 0.6309889554977417, "learning_rate": 2.166297653776378e-06, "loss": 0.2709, "step": 7722 }, { "epoch": 2.168163952835486, "grad_norm": 0.599522590637207, "learning_rate": 2.164952090738614e-06, "loss": 0.3284, "step": 7723 }, { "epoch": 2.1684446939921393, "grad_norm": 0.5739444494247437, "learning_rate": 2.1636068302395324e-06, "loss": 0.3244, "step": 7724 }, { "epoch": 2.168725435148793, "grad_norm": 0.6328409910202026, "learning_rate": 2.1622618724226934e-06, "loss": 0.3425, "step": 7725 }, { "epoch": 2.1690061763054462, "grad_norm": 0.5877583026885986, "learning_rate": 2.1609172174316188e-06, "loss": 0.3225, "step": 7726 }, { "epoch": 2.1692869174621, "grad_norm": 0.5428234934806824, "learning_rate": 2.1595728654098057e-06, "loss": 0.3717, "step": 7727 }, { "epoch": 2.1695676586187536, "grad_norm": 0.5548840761184692, "learning_rate": 2.158228816500711e-06, "loss": 0.3677, "step": 7728 }, { "epoch": 2.169848399775407, "grad_norm": 0.5127007961273193, "learning_rate": 2.1568850708477672e-06, "loss": 0.3156, "step": 7729 }, { "epoch": 2.1701291409320604, "grad_norm": 0.5930838584899902, "learning_rate": 2.1555416285943683e-06, "loss": 0.3291, "step": 7730 }, { "epoch": 2.1704098820887143, "grad_norm": 0.5415441989898682, "learning_rate": 2.1541984898838763e-06, "loss": 0.3561, "step": 7731 }, { "epoch": 2.1706906232453678, "grad_norm": 0.5847387909889221, "learning_rate": 2.152855654859627e-06, "loss": 0.3848, "step": 7732 }, { "epoch": 2.170971364402021, "grad_norm": 0.5580772757530212, "learning_rate": 2.1515131236649144e-06, "loss": 0.3037, "step": 7733 }, { "epoch": 2.171252105558675, "grad_norm": 0.5752868056297302, "learning_rate": 2.15017089644301e-06, "loss": 0.3629, "step": 7734 }, { "epoch": 2.1715328467153285, "grad_norm": 0.6275718808174133, "learning_rate": 2.1488289733371454e-06, "loss": 0.3029, "step": 7735 }, { "epoch": 2.171813587871982, "grad_norm": 0.5496490597724915, "learning_rate": 2.1474873544905204e-06, "loss": 0.3288, "step": 7736 }, { "epoch": 2.1720943290286354, "grad_norm": 0.5766544342041016, "learning_rate": 2.1461460400463084e-06, "loss": 0.336, "step": 7737 }, { "epoch": 2.1723750701852893, "grad_norm": 0.596799373626709, "learning_rate": 2.1448050301476425e-06, "loss": 0.3326, "step": 7738 }, { "epoch": 2.1726558113419427, "grad_norm": 0.5938395857810974, "learning_rate": 2.1434643249376304e-06, "loss": 0.3374, "step": 7739 }, { "epoch": 2.172936552498596, "grad_norm": 0.5244588851928711, "learning_rate": 2.14212392455934e-06, "loss": 0.3107, "step": 7740 }, { "epoch": 2.1732172936552496, "grad_norm": 0.5750537514686584, "learning_rate": 2.140783829155814e-06, "loss": 0.3297, "step": 7741 }, { "epoch": 2.1734980348119035, "grad_norm": 0.5554062724113464, "learning_rate": 2.139444038870057e-06, "loss": 0.336, "step": 7742 }, { "epoch": 2.173778775968557, "grad_norm": 0.5660613775253296, "learning_rate": 2.138104553845043e-06, "loss": 0.323, "step": 7743 }, { "epoch": 2.1740595171252104, "grad_norm": 0.6504146456718445, "learning_rate": 2.136765374223715e-06, "loss": 0.3416, "step": 7744 }, { "epoch": 2.1743402582818643, "grad_norm": 0.532696008682251, "learning_rate": 2.1354265001489795e-06, "loss": 0.3495, "step": 7745 }, { "epoch": 2.1746209994385177, "grad_norm": 0.6625186800956726, "learning_rate": 2.1340879317637154e-06, "loss": 0.3098, "step": 7746 }, { "epoch": 2.174901740595171, "grad_norm": 0.620116651058197, "learning_rate": 2.132749669210766e-06, "loss": 0.3671, "step": 7747 }, { "epoch": 2.1751824817518246, "grad_norm": 0.6086158156394958, "learning_rate": 2.1314117126329375e-06, "loss": 0.3115, "step": 7748 }, { "epoch": 2.1754632229084785, "grad_norm": 0.6067386269569397, "learning_rate": 2.1300740621730165e-06, "loss": 0.3228, "step": 7749 }, { "epoch": 2.175743964065132, "grad_norm": 0.6013914346694946, "learning_rate": 2.128736717973743e-06, "loss": 0.3161, "step": 7750 }, { "epoch": 2.1760247052217854, "grad_norm": 0.5442312359809875, "learning_rate": 2.1273996801778336e-06, "loss": 0.3615, "step": 7751 }, { "epoch": 2.1763054463784393, "grad_norm": 0.6353532671928406, "learning_rate": 2.1260629489279662e-06, "loss": 0.3188, "step": 7752 }, { "epoch": 2.1765861875350927, "grad_norm": 0.6379748582839966, "learning_rate": 2.1247265243667875e-06, "loss": 0.3386, "step": 7753 }, { "epoch": 2.176866928691746, "grad_norm": 0.6232293844223022, "learning_rate": 2.123390406636916e-06, "loss": 0.2794, "step": 7754 }, { "epoch": 2.1771476698483996, "grad_norm": 0.557019829750061, "learning_rate": 2.12205459588093e-06, "loss": 0.3874, "step": 7755 }, { "epoch": 2.1774284110050535, "grad_norm": 0.583795428276062, "learning_rate": 2.1207190922413825e-06, "loss": 0.3471, "step": 7756 }, { "epoch": 2.177709152161707, "grad_norm": 0.5697951912879944, "learning_rate": 2.119383895860788e-06, "loss": 0.3513, "step": 7757 }, { "epoch": 2.1779898933183603, "grad_norm": 0.5671022534370422, "learning_rate": 2.1180490068816296e-06, "loss": 0.3582, "step": 7758 }, { "epoch": 2.1782706344750142, "grad_norm": 0.5471994280815125, "learning_rate": 2.116714425446361e-06, "loss": 0.3537, "step": 7759 }, { "epoch": 2.1785513756316677, "grad_norm": 0.5638909339904785, "learning_rate": 2.1153801516973976e-06, "loss": 0.3138, "step": 7760 }, { "epoch": 2.178832116788321, "grad_norm": 0.602271318435669, "learning_rate": 2.114046185777128e-06, "loss": 0.3198, "step": 7761 }, { "epoch": 2.1791128579449746, "grad_norm": 0.5836250185966492, "learning_rate": 2.1127125278279005e-06, "loss": 0.3459, "step": 7762 }, { "epoch": 2.1793935991016284, "grad_norm": 0.5795777440071106, "learning_rate": 2.111379177992039e-06, "loss": 0.3149, "step": 7763 }, { "epoch": 2.179674340258282, "grad_norm": 0.590065598487854, "learning_rate": 2.1100461364118285e-06, "loss": 0.3654, "step": 7764 }, { "epoch": 2.1799550814149353, "grad_norm": 0.6122351288795471, "learning_rate": 2.1087134032295208e-06, "loss": 0.3374, "step": 7765 }, { "epoch": 2.180235822571589, "grad_norm": 0.5787692666053772, "learning_rate": 2.1073809785873417e-06, "loss": 0.3714, "step": 7766 }, { "epoch": 2.1805165637282427, "grad_norm": 0.540298342704773, "learning_rate": 2.106048862627474e-06, "loss": 0.3275, "step": 7767 }, { "epoch": 2.180797304884896, "grad_norm": 0.5755876302719116, "learning_rate": 2.1047170554920775e-06, "loss": 0.3117, "step": 7768 }, { "epoch": 2.1810780460415495, "grad_norm": 0.5442964434623718, "learning_rate": 2.103385557323272e-06, "loss": 0.3087, "step": 7769 }, { "epoch": 2.1813587871982034, "grad_norm": 0.6338362097740173, "learning_rate": 2.1020543682631454e-06, "loss": 0.3017, "step": 7770 }, { "epoch": 2.181639528354857, "grad_norm": 0.6212226748466492, "learning_rate": 2.1007234884537574e-06, "loss": 0.3292, "step": 7771 }, { "epoch": 2.1819202695115103, "grad_norm": 0.5792097449302673, "learning_rate": 2.0993929180371277e-06, "loss": 0.3172, "step": 7772 }, { "epoch": 2.182201010668164, "grad_norm": 0.59674072265625, "learning_rate": 2.0980626571552474e-06, "loss": 0.331, "step": 7773 }, { "epoch": 2.1824817518248176, "grad_norm": 0.5870463252067566, "learning_rate": 2.0967327059500763e-06, "loss": 0.31, "step": 7774 }, { "epoch": 2.182762492981471, "grad_norm": 0.5281668305397034, "learning_rate": 2.0954030645635352e-06, "loss": 0.3677, "step": 7775 }, { "epoch": 2.1830432341381245, "grad_norm": 0.5943126082420349, "learning_rate": 2.0940737331375182e-06, "loss": 0.3244, "step": 7776 }, { "epoch": 2.1833239752947784, "grad_norm": 0.5884861946105957, "learning_rate": 2.09274471181388e-06, "loss": 0.2964, "step": 7777 }, { "epoch": 2.183604716451432, "grad_norm": 0.5693631768226624, "learning_rate": 2.0914160007344487e-06, "loss": 0.3297, "step": 7778 }, { "epoch": 2.1838854576080853, "grad_norm": 0.6288324594497681, "learning_rate": 2.090087600041014e-06, "loss": 0.3094, "step": 7779 }, { "epoch": 2.1841661987647387, "grad_norm": 0.5757323503494263, "learning_rate": 2.0887595098753342e-06, "loss": 0.3147, "step": 7780 }, { "epoch": 2.1844469399213926, "grad_norm": 0.5840219855308533, "learning_rate": 2.087431730379137e-06, "loss": 0.3291, "step": 7781 }, { "epoch": 2.184727681078046, "grad_norm": 0.5445331335067749, "learning_rate": 2.0861042616941117e-06, "loss": 0.3535, "step": 7782 }, { "epoch": 2.1850084222346995, "grad_norm": 0.5671808123588562, "learning_rate": 2.0847771039619215e-06, "loss": 0.3084, "step": 7783 }, { "epoch": 2.1852891633913534, "grad_norm": 0.6456224918365479, "learning_rate": 2.08345025732419e-06, "loss": 0.3262, "step": 7784 }, { "epoch": 2.185569904548007, "grad_norm": 0.6174296140670776, "learning_rate": 2.082123721922508e-06, "loss": 0.3243, "step": 7785 }, { "epoch": 2.1858506457046603, "grad_norm": 0.5586106777191162, "learning_rate": 2.080797497898439e-06, "loss": 0.32, "step": 7786 }, { "epoch": 2.1861313868613137, "grad_norm": 0.5374150276184082, "learning_rate": 2.0794715853935064e-06, "loss": 0.3055, "step": 7787 }, { "epoch": 2.1864121280179676, "grad_norm": 0.5498889684677124, "learning_rate": 2.0781459845492064e-06, "loss": 0.3391, "step": 7788 }, { "epoch": 2.186692869174621, "grad_norm": 0.5667555928230286, "learning_rate": 2.0768206955069953e-06, "loss": 0.3288, "step": 7789 }, { "epoch": 2.1869736103312745, "grad_norm": 0.5854453444480896, "learning_rate": 2.0754957184083036e-06, "loss": 0.308, "step": 7790 }, { "epoch": 2.187254351487928, "grad_norm": 0.5819854736328125, "learning_rate": 2.074171053394522e-06, "loss": 0.2941, "step": 7791 }, { "epoch": 2.187535092644582, "grad_norm": 0.6175342798233032, "learning_rate": 2.0728467006070095e-06, "loss": 0.3443, "step": 7792 }, { "epoch": 2.1878158338012352, "grad_norm": 0.5961562395095825, "learning_rate": 2.0715226601870956e-06, "loss": 0.3465, "step": 7793 }, { "epoch": 2.1880965749578887, "grad_norm": 0.5794124007225037, "learning_rate": 2.0701989322760714e-06, "loss": 0.2745, "step": 7794 }, { "epoch": 2.1883773161145426, "grad_norm": 0.6212928891181946, "learning_rate": 2.0688755170152e-06, "loss": 0.3152, "step": 7795 }, { "epoch": 2.188658057271196, "grad_norm": 0.6221616864204407, "learning_rate": 2.067552414545705e-06, "loss": 0.32, "step": 7796 }, { "epoch": 2.1889387984278494, "grad_norm": 0.5288687348365784, "learning_rate": 2.0662296250087795e-06, "loss": 0.3123, "step": 7797 }, { "epoch": 2.189219539584503, "grad_norm": 0.5770567059516907, "learning_rate": 2.064907148545586e-06, "loss": 0.326, "step": 7798 }, { "epoch": 2.1895002807411568, "grad_norm": 0.6000266075134277, "learning_rate": 2.0635849852972456e-06, "loss": 0.3185, "step": 7799 }, { "epoch": 2.18978102189781, "grad_norm": 0.5692927241325378, "learning_rate": 2.062263135404859e-06, "loss": 0.3621, "step": 7800 }, { "epoch": 2.1900617630544637, "grad_norm": 0.5376588702201843, "learning_rate": 2.0609415990094824e-06, "loss": 0.3309, "step": 7801 }, { "epoch": 2.1903425042111175, "grad_norm": 0.65265291929245, "learning_rate": 2.0596203762521392e-06, "loss": 0.3343, "step": 7802 }, { "epoch": 2.190623245367771, "grad_norm": 0.585486888885498, "learning_rate": 2.0582994672738264e-06, "loss": 0.294, "step": 7803 }, { "epoch": 2.1909039865244244, "grad_norm": 0.5621030926704407, "learning_rate": 2.0569788722155e-06, "loss": 0.3274, "step": 7804 }, { "epoch": 2.191184727681078, "grad_norm": 0.6017136573791504, "learning_rate": 2.0556585912180876e-06, "loss": 0.3217, "step": 7805 }, { "epoch": 2.1914654688377317, "grad_norm": 0.5663872957229614, "learning_rate": 2.0543386244224817e-06, "loss": 0.3111, "step": 7806 }, { "epoch": 2.191746209994385, "grad_norm": 0.5912230610847473, "learning_rate": 2.0530189719695376e-06, "loss": 0.3277, "step": 7807 }, { "epoch": 2.1920269511510386, "grad_norm": 0.5351399183273315, "learning_rate": 2.051699634000085e-06, "loss": 0.3316, "step": 7808 }, { "epoch": 2.1923076923076925, "grad_norm": 0.6377553939819336, "learning_rate": 2.0503806106549107e-06, "loss": 0.2905, "step": 7809 }, { "epoch": 2.192588433464346, "grad_norm": 0.6106664538383484, "learning_rate": 2.0490619020747774e-06, "loss": 0.362, "step": 7810 }, { "epoch": 2.1928691746209994, "grad_norm": 0.6014419794082642, "learning_rate": 2.0477435084004053e-06, "loss": 0.3071, "step": 7811 }, { "epoch": 2.193149915777653, "grad_norm": 0.5704319477081299, "learning_rate": 2.0464254297724896e-06, "loss": 0.3366, "step": 7812 }, { "epoch": 2.1934306569343067, "grad_norm": 0.568524956703186, "learning_rate": 2.0451076663316843e-06, "loss": 0.3377, "step": 7813 }, { "epoch": 2.19371139809096, "grad_norm": 0.6283813714981079, "learning_rate": 2.0437902182186113e-06, "loss": 0.3569, "step": 7814 }, { "epoch": 2.1939921392476136, "grad_norm": 0.5840946435928345, "learning_rate": 2.0424730855738657e-06, "loss": 0.3769, "step": 7815 }, { "epoch": 2.1942728804042675, "grad_norm": 0.5546283721923828, "learning_rate": 2.0411562685379983e-06, "loss": 0.3305, "step": 7816 }, { "epoch": 2.194553621560921, "grad_norm": 0.5784397721290588, "learning_rate": 2.0398397672515356e-06, "loss": 0.3101, "step": 7817 }, { "epoch": 2.1948343627175744, "grad_norm": 0.5585423111915588, "learning_rate": 2.0385235818549653e-06, "loss": 0.3996, "step": 7818 }, { "epoch": 2.195115103874228, "grad_norm": 0.5485456585884094, "learning_rate": 2.0372077124887407e-06, "loss": 0.3324, "step": 7819 }, { "epoch": 2.1953958450308817, "grad_norm": 0.5490700602531433, "learning_rate": 2.0358921592932855e-06, "loss": 0.3688, "step": 7820 }, { "epoch": 2.195676586187535, "grad_norm": 0.5394091010093689, "learning_rate": 2.0345769224089855e-06, "loss": 0.3234, "step": 7821 }, { "epoch": 2.1959573273441886, "grad_norm": 0.5372628569602966, "learning_rate": 2.033262001976197e-06, "loss": 0.3462, "step": 7822 }, { "epoch": 2.196238068500842, "grad_norm": 0.6373228430747986, "learning_rate": 2.031947398135237e-06, "loss": 0.3556, "step": 7823 }, { "epoch": 2.196518809657496, "grad_norm": 0.5175253748893738, "learning_rate": 2.030633111026394e-06, "loss": 0.3201, "step": 7824 }, { "epoch": 2.1967995508141493, "grad_norm": 0.6014914512634277, "learning_rate": 2.0293191407899215e-06, "loss": 0.327, "step": 7825 }, { "epoch": 2.197080291970803, "grad_norm": 0.546654462814331, "learning_rate": 2.0280054875660353e-06, "loss": 0.3284, "step": 7826 }, { "epoch": 2.1973610331274567, "grad_norm": 0.5931504368782043, "learning_rate": 2.0266921514949235e-06, "loss": 0.3323, "step": 7827 }, { "epoch": 2.19764177428411, "grad_norm": 0.6058072447776794, "learning_rate": 2.0253791327167354e-06, "loss": 0.3245, "step": 7828 }, { "epoch": 2.1979225154407636, "grad_norm": 0.6033126711845398, "learning_rate": 2.0240664313715863e-06, "loss": 0.3263, "step": 7829 }, { "epoch": 2.198203256597417, "grad_norm": 0.5586051344871521, "learning_rate": 2.022754047599564e-06, "loss": 0.2928, "step": 7830 }, { "epoch": 2.198483997754071, "grad_norm": 0.5582164525985718, "learning_rate": 2.021441981540713e-06, "loss": 0.3319, "step": 7831 }, { "epoch": 2.1987647389107243, "grad_norm": 0.5959237217903137, "learning_rate": 2.020130233335054e-06, "loss": 0.319, "step": 7832 }, { "epoch": 2.1990454800673778, "grad_norm": 0.5489107370376587, "learning_rate": 2.0188188031225646e-06, "loss": 0.3161, "step": 7833 }, { "epoch": 2.199326221224031, "grad_norm": 0.6317627429962158, "learning_rate": 2.017507691043193e-06, "loss": 0.3051, "step": 7834 }, { "epoch": 2.199606962380685, "grad_norm": 0.5487210154533386, "learning_rate": 2.016196897236855e-06, "loss": 0.3183, "step": 7835 }, { "epoch": 2.1998877035373385, "grad_norm": 0.5515868067741394, "learning_rate": 2.0148864218434277e-06, "loss": 0.3095, "step": 7836 }, { "epoch": 2.200168444693992, "grad_norm": 0.5675533413887024, "learning_rate": 2.0135762650027595e-06, "loss": 0.3662, "step": 7837 }, { "epoch": 2.200449185850646, "grad_norm": 0.5844115018844604, "learning_rate": 2.01226642685466e-06, "loss": 0.3302, "step": 7838 }, { "epoch": 2.2007299270072993, "grad_norm": 0.6632570624351501, "learning_rate": 2.010956907538909e-06, "loss": 0.3138, "step": 7839 }, { "epoch": 2.2010106681639527, "grad_norm": 0.6090406179428101, "learning_rate": 2.0096477071952493e-06, "loss": 0.3114, "step": 7840 }, { "epoch": 2.201291409320606, "grad_norm": 0.7017105221748352, "learning_rate": 2.008338825963389e-06, "loss": 0.342, "step": 7841 }, { "epoch": 2.20157215047726, "grad_norm": 0.6197972893714905, "learning_rate": 2.007030263983007e-06, "loss": 0.2941, "step": 7842 }, { "epoch": 2.2018528916339135, "grad_norm": 0.5869395732879639, "learning_rate": 2.005722021393741e-06, "loss": 0.3029, "step": 7843 }, { "epoch": 2.202133632790567, "grad_norm": 0.5609723925590515, "learning_rate": 2.004414098335203e-06, "loss": 0.3338, "step": 7844 }, { "epoch": 2.202414373947221, "grad_norm": 0.6342980861663818, "learning_rate": 2.003106494946962e-06, "loss": 0.3352, "step": 7845 }, { "epoch": 2.2026951151038743, "grad_norm": 0.6291910409927368, "learning_rate": 2.0017992113685612e-06, "loss": 0.3748, "step": 7846 }, { "epoch": 2.2029758562605277, "grad_norm": 0.5410831570625305, "learning_rate": 2.0004922477395043e-06, "loss": 0.321, "step": 7847 }, { "epoch": 2.203256597417181, "grad_norm": 0.5877004265785217, "learning_rate": 1.9991856041992604e-06, "loss": 0.3111, "step": 7848 }, { "epoch": 2.203537338573835, "grad_norm": 0.6012861132621765, "learning_rate": 1.997879280887269e-06, "loss": 0.2995, "step": 7849 }, { "epoch": 2.2038180797304885, "grad_norm": 0.5536375045776367, "learning_rate": 1.9965732779429302e-06, "loss": 0.3011, "step": 7850 }, { "epoch": 2.204098820887142, "grad_norm": 0.5814211964607239, "learning_rate": 1.9952675955056144e-06, "loss": 0.3227, "step": 7851 }, { "epoch": 2.204379562043796, "grad_norm": 0.5739724040031433, "learning_rate": 1.9939622337146574e-06, "loss": 0.324, "step": 7852 }, { "epoch": 2.2046603032004493, "grad_norm": 0.5835128426551819, "learning_rate": 1.992657192709356e-06, "loss": 0.3203, "step": 7853 }, { "epoch": 2.2049410443571027, "grad_norm": 0.5871363878250122, "learning_rate": 1.9913524726289784e-06, "loss": 0.3051, "step": 7854 }, { "epoch": 2.205221785513756, "grad_norm": 0.5754022598266602, "learning_rate": 1.9900480736127557e-06, "loss": 0.3118, "step": 7855 }, { "epoch": 2.20550252667041, "grad_norm": 0.5595225095748901, "learning_rate": 1.988743995799884e-06, "loss": 0.3405, "step": 7856 }, { "epoch": 2.2057832678270635, "grad_norm": 0.6285916566848755, "learning_rate": 1.987440239329528e-06, "loss": 0.3072, "step": 7857 }, { "epoch": 2.206064008983717, "grad_norm": 0.575323760509491, "learning_rate": 1.9861368043408147e-06, "loss": 0.3595, "step": 7858 }, { "epoch": 2.206344750140371, "grad_norm": 0.6051434874534607, "learning_rate": 1.984833690972841e-06, "loss": 0.2976, "step": 7859 }, { "epoch": 2.2066254912970242, "grad_norm": 0.6305602192878723, "learning_rate": 1.9835308993646647e-06, "loss": 0.3218, "step": 7860 }, { "epoch": 2.2069062324536777, "grad_norm": 0.5705970525741577, "learning_rate": 1.9822284296553133e-06, "loss": 0.3523, "step": 7861 }, { "epoch": 2.207186973610331, "grad_norm": 0.6255747079849243, "learning_rate": 1.9809262819837783e-06, "loss": 0.2904, "step": 7862 }, { "epoch": 2.207467714766985, "grad_norm": 0.5619363188743591, "learning_rate": 1.9796244564890144e-06, "loss": 0.3448, "step": 7863 }, { "epoch": 2.2077484559236384, "grad_norm": 0.5439637899398804, "learning_rate": 1.9783229533099475e-06, "loss": 0.3033, "step": 7864 }, { "epoch": 2.208029197080292, "grad_norm": 0.50906902551651, "learning_rate": 1.977021772585463e-06, "loss": 0.2966, "step": 7865 }, { "epoch": 2.2083099382369458, "grad_norm": 0.561398446559906, "learning_rate": 1.9757209144544178e-06, "loss": 0.3458, "step": 7866 }, { "epoch": 2.208590679393599, "grad_norm": 0.5879591107368469, "learning_rate": 1.9744203790556298e-06, "loss": 0.3372, "step": 7867 }, { "epoch": 2.2088714205502527, "grad_norm": 0.6072366833686829, "learning_rate": 1.9731201665278823e-06, "loss": 0.3513, "step": 7868 }, { "epoch": 2.209152161706906, "grad_norm": 0.5826284289360046, "learning_rate": 1.97182027700993e-06, "loss": 0.2988, "step": 7869 }, { "epoch": 2.20943290286356, "grad_norm": 0.5746475458145142, "learning_rate": 1.970520710640484e-06, "loss": 0.2696, "step": 7870 }, { "epoch": 2.2097136440202134, "grad_norm": 0.566821277141571, "learning_rate": 1.969221467558231e-06, "loss": 0.33, "step": 7871 }, { "epoch": 2.209994385176867, "grad_norm": 0.6455053091049194, "learning_rate": 1.9679225479018144e-06, "loss": 0.3265, "step": 7872 }, { "epoch": 2.2102751263335203, "grad_norm": 0.6207641959190369, "learning_rate": 1.9666239518098496e-06, "loss": 0.3492, "step": 7873 }, { "epoch": 2.210555867490174, "grad_norm": 0.5575459599494934, "learning_rate": 1.965325679420913e-06, "loss": 0.3419, "step": 7874 }, { "epoch": 2.2108366086468276, "grad_norm": 0.6213704943656921, "learning_rate": 1.9640277308735457e-06, "loss": 0.3427, "step": 7875 }, { "epoch": 2.211117349803481, "grad_norm": 0.6364071369171143, "learning_rate": 1.9627301063062626e-06, "loss": 0.2843, "step": 7876 }, { "epoch": 2.211398090960135, "grad_norm": 0.6064237356185913, "learning_rate": 1.9614328058575347e-06, "loss": 0.3346, "step": 7877 }, { "epoch": 2.2116788321167884, "grad_norm": 0.5435418486595154, "learning_rate": 1.9601358296658013e-06, "loss": 0.3399, "step": 7878 }, { "epoch": 2.211959573273442, "grad_norm": 0.6605810523033142, "learning_rate": 1.95883917786947e-06, "loss": 0.3099, "step": 7879 }, { "epoch": 2.2122403144300953, "grad_norm": 0.5825643539428711, "learning_rate": 1.9575428506069077e-06, "loss": 0.3189, "step": 7880 }, { "epoch": 2.212521055586749, "grad_norm": 0.611756443977356, "learning_rate": 1.956246848016454e-06, "loss": 0.3382, "step": 7881 }, { "epoch": 2.2128017967434026, "grad_norm": 0.600749135017395, "learning_rate": 1.954951170236408e-06, "loss": 0.2971, "step": 7882 }, { "epoch": 2.213082537900056, "grad_norm": 0.6075959801673889, "learning_rate": 1.9536558174050386e-06, "loss": 0.3113, "step": 7883 }, { "epoch": 2.2133632790567095, "grad_norm": 0.5713773369789124, "learning_rate": 1.952360789660576e-06, "loss": 0.3589, "step": 7884 }, { "epoch": 2.2136440202133634, "grad_norm": 0.542594850063324, "learning_rate": 1.9510660871412167e-06, "loss": 0.3796, "step": 7885 }, { "epoch": 2.213924761370017, "grad_norm": 0.5956571698188782, "learning_rate": 1.9497717099851266e-06, "loss": 0.3047, "step": 7886 }, { "epoch": 2.2142055025266703, "grad_norm": 0.5386093258857727, "learning_rate": 1.948477658330429e-06, "loss": 0.3322, "step": 7887 }, { "epoch": 2.214486243683324, "grad_norm": 0.5793008208274841, "learning_rate": 1.9471839323152215e-06, "loss": 0.3265, "step": 7888 }, { "epoch": 2.2147669848399776, "grad_norm": 0.5399921536445618, "learning_rate": 1.9458905320775605e-06, "loss": 0.3034, "step": 7889 }, { "epoch": 2.215047725996631, "grad_norm": 0.5770264863967896, "learning_rate": 1.944597457755468e-06, "loss": 0.2891, "step": 7890 }, { "epoch": 2.2153284671532845, "grad_norm": 0.6316906809806824, "learning_rate": 1.943304709486936e-06, "loss": 0.3453, "step": 7891 }, { "epoch": 2.2156092083099383, "grad_norm": 0.5945931673049927, "learning_rate": 1.9420122874099157e-06, "loss": 0.3582, "step": 7892 }, { "epoch": 2.215889949466592, "grad_norm": 0.5706280469894409, "learning_rate": 1.9407201916623293e-06, "loss": 0.3589, "step": 7893 }, { "epoch": 2.2161706906232452, "grad_norm": 0.6127965450286865, "learning_rate": 1.939428422382058e-06, "loss": 0.3373, "step": 7894 }, { "epoch": 2.216451431779899, "grad_norm": 0.5266435146331787, "learning_rate": 1.938136979706955e-06, "loss": 0.333, "step": 7895 }, { "epoch": 2.2167321729365526, "grad_norm": 0.5792006850242615, "learning_rate": 1.9368458637748326e-06, "loss": 0.3474, "step": 7896 }, { "epoch": 2.217012914093206, "grad_norm": 0.6186229586601257, "learning_rate": 1.935555074723469e-06, "loss": 0.318, "step": 7897 }, { "epoch": 2.2172936552498594, "grad_norm": 0.5567249059677124, "learning_rate": 1.934264612690614e-06, "loss": 0.3207, "step": 7898 }, { "epoch": 2.2175743964065133, "grad_norm": 0.5644236207008362, "learning_rate": 1.9329744778139724e-06, "loss": 0.3198, "step": 7899 }, { "epoch": 2.2178551375631668, "grad_norm": 0.5268715620040894, "learning_rate": 1.931684670231225e-06, "loss": 0.3278, "step": 7900 }, { "epoch": 2.21813587871982, "grad_norm": 0.5538960695266724, "learning_rate": 1.930395190080006e-06, "loss": 0.3042, "step": 7901 }, { "epoch": 2.218416619876474, "grad_norm": 0.6099576950073242, "learning_rate": 1.9291060374979237e-06, "loss": 0.3613, "step": 7902 }, { "epoch": 2.2186973610331275, "grad_norm": 0.5823920369148254, "learning_rate": 1.9278172126225508e-06, "loss": 0.3116, "step": 7903 }, { "epoch": 2.218978102189781, "grad_norm": 0.5380644202232361, "learning_rate": 1.92652871559142e-06, "loss": 0.3243, "step": 7904 }, { "epoch": 2.2192588433464344, "grad_norm": 0.61611008644104, "learning_rate": 1.9252405465420305e-06, "loss": 0.3205, "step": 7905 }, { "epoch": 2.2195395845030883, "grad_norm": 0.585796058177948, "learning_rate": 1.923952705611851e-06, "loss": 0.2994, "step": 7906 }, { "epoch": 2.2198203256597417, "grad_norm": 0.5764893889427185, "learning_rate": 1.9226651929383077e-06, "loss": 0.3397, "step": 7907 }, { "epoch": 2.220101066816395, "grad_norm": 0.5012286305427551, "learning_rate": 1.921378008658801e-06, "loss": 0.3412, "step": 7908 }, { "epoch": 2.220381807973049, "grad_norm": 0.6187290549278259, "learning_rate": 1.920091152910686e-06, "loss": 0.3058, "step": 7909 }, { "epoch": 2.2206625491297025, "grad_norm": 0.628390908241272, "learning_rate": 1.9188046258312924e-06, "loss": 0.3045, "step": 7910 }, { "epoch": 2.220943290286356, "grad_norm": 0.5092245936393738, "learning_rate": 1.9175184275579083e-06, "loss": 0.3768, "step": 7911 }, { "epoch": 2.2212240314430094, "grad_norm": 0.5756733417510986, "learning_rate": 1.916232558227788e-06, "loss": 0.3559, "step": 7912 }, { "epoch": 2.2215047725996633, "grad_norm": 0.5927130579948425, "learning_rate": 1.914947017978153e-06, "loss": 0.337, "step": 7913 }, { "epoch": 2.2217855137563167, "grad_norm": 0.5313396453857422, "learning_rate": 1.9136618069461863e-06, "loss": 0.3246, "step": 7914 }, { "epoch": 2.22206625491297, "grad_norm": 0.5105767250061035, "learning_rate": 1.912376925269041e-06, "loss": 0.3031, "step": 7915 }, { "epoch": 2.2223469960696236, "grad_norm": 0.5950306057929993, "learning_rate": 1.911092373083829e-06, "loss": 0.3169, "step": 7916 }, { "epoch": 2.2226277372262775, "grad_norm": 0.529450535774231, "learning_rate": 1.9098081505276296e-06, "loss": 0.3076, "step": 7917 }, { "epoch": 2.222908478382931, "grad_norm": 0.6398170590400696, "learning_rate": 1.9085242577374885e-06, "loss": 0.3157, "step": 7918 }, { "epoch": 2.2231892195395844, "grad_norm": 0.5717803239822388, "learning_rate": 1.9072406948504123e-06, "loss": 0.3126, "step": 7919 }, { "epoch": 2.2234699606962383, "grad_norm": 0.6152672171592712, "learning_rate": 1.905957462003379e-06, "loss": 0.2989, "step": 7920 }, { "epoch": 2.2237507018528917, "grad_norm": 0.5398199558258057, "learning_rate": 1.9046745593333222e-06, "loss": 0.3386, "step": 7921 }, { "epoch": 2.224031443009545, "grad_norm": 0.568665087223053, "learning_rate": 1.90339198697715e-06, "loss": 0.3224, "step": 7922 }, { "epoch": 2.2243121841661986, "grad_norm": 0.533877968788147, "learning_rate": 1.9021097450717285e-06, "loss": 0.3403, "step": 7923 }, { "epoch": 2.2245929253228525, "grad_norm": 0.5058253407478333, "learning_rate": 1.9008278337538883e-06, "loss": 0.3631, "step": 7924 }, { "epoch": 2.224873666479506, "grad_norm": 0.6110826730728149, "learning_rate": 1.8995462531604314e-06, "loss": 0.3373, "step": 7925 }, { "epoch": 2.2251544076361593, "grad_norm": 0.5676496028900146, "learning_rate": 1.8982650034281158e-06, "loss": 0.3354, "step": 7926 }, { "epoch": 2.2254351487928132, "grad_norm": 0.6552402377128601, "learning_rate": 1.896984084693671e-06, "loss": 0.2991, "step": 7927 }, { "epoch": 2.2257158899494667, "grad_norm": 0.5908652544021606, "learning_rate": 1.8957034970937898e-06, "loss": 0.3494, "step": 7928 }, { "epoch": 2.22599663110612, "grad_norm": 0.6053508520126343, "learning_rate": 1.8944232407651253e-06, "loss": 0.3342, "step": 7929 }, { "epoch": 2.2262773722627736, "grad_norm": 0.5698174238204956, "learning_rate": 1.8931433158443024e-06, "loss": 0.3078, "step": 7930 }, { "epoch": 2.2265581134194274, "grad_norm": 0.5074124336242676, "learning_rate": 1.891863722467902e-06, "loss": 0.3071, "step": 7931 }, { "epoch": 2.226838854576081, "grad_norm": 0.5592202544212341, "learning_rate": 1.8905844607724793e-06, "loss": 0.2941, "step": 7932 }, { "epoch": 2.2271195957327343, "grad_norm": 0.5499841570854187, "learning_rate": 1.8893055308945468e-06, "loss": 0.3218, "step": 7933 }, { "epoch": 2.2274003368893878, "grad_norm": 0.5575269460678101, "learning_rate": 1.8880269329705818e-06, "loss": 0.3066, "step": 7934 }, { "epoch": 2.2276810780460417, "grad_norm": 0.5841457843780518, "learning_rate": 1.8867486671370323e-06, "loss": 0.3384, "step": 7935 }, { "epoch": 2.227961819202695, "grad_norm": 0.5694056153297424, "learning_rate": 1.8854707335303034e-06, "loss": 0.3356, "step": 7936 }, { "epoch": 2.2282425603593485, "grad_norm": 0.5506075024604797, "learning_rate": 1.8841931322867719e-06, "loss": 0.3122, "step": 7937 }, { "epoch": 2.2285233015160024, "grad_norm": 0.6021720767021179, "learning_rate": 1.8829158635427724e-06, "loss": 0.3548, "step": 7938 }, { "epoch": 2.228804042672656, "grad_norm": 0.5166854858398438, "learning_rate": 1.8816389274346069e-06, "loss": 0.3509, "step": 7939 }, { "epoch": 2.2290847838293093, "grad_norm": 0.6093659996986389, "learning_rate": 1.8803623240985452e-06, "loss": 0.3003, "step": 7940 }, { "epoch": 2.2293655249859627, "grad_norm": 0.5474016070365906, "learning_rate": 1.8790860536708144e-06, "loss": 0.3351, "step": 7941 }, { "epoch": 2.2296462661426166, "grad_norm": 0.5782341957092285, "learning_rate": 1.8778101162876144e-06, "loss": 0.3449, "step": 7942 }, { "epoch": 2.22992700729927, "grad_norm": 0.5373322367668152, "learning_rate": 1.8765345120851014e-06, "loss": 0.3125, "step": 7943 }, { "epoch": 2.2302077484559235, "grad_norm": 0.5648261308670044, "learning_rate": 1.8752592411994031e-06, "loss": 0.3033, "step": 7944 }, { "epoch": 2.2304884896125774, "grad_norm": 0.5863624811172485, "learning_rate": 1.8739843037666073e-06, "loss": 0.3072, "step": 7945 }, { "epoch": 2.230769230769231, "grad_norm": 0.5565276145935059, "learning_rate": 1.8727096999227655e-06, "loss": 0.3114, "step": 7946 }, { "epoch": 2.2310499719258843, "grad_norm": 0.5573920011520386, "learning_rate": 1.871435429803899e-06, "loss": 0.3064, "step": 7947 }, { "epoch": 2.2313307130825377, "grad_norm": 0.591001570224762, "learning_rate": 1.8701614935459867e-06, "loss": 0.3437, "step": 7948 }, { "epoch": 2.2316114542391916, "grad_norm": 0.5355536341667175, "learning_rate": 1.868887891284978e-06, "loss": 0.3483, "step": 7949 }, { "epoch": 2.231892195395845, "grad_norm": 0.5544248223304749, "learning_rate": 1.8676146231567821e-06, "loss": 0.2743, "step": 7950 }, { "epoch": 2.2321729365524985, "grad_norm": 0.5608052611351013, "learning_rate": 1.866341689297273e-06, "loss": 0.3117, "step": 7951 }, { "epoch": 2.2324536777091524, "grad_norm": 0.5530779957771301, "learning_rate": 1.8650690898422925e-06, "loss": 0.3316, "step": 7952 }, { "epoch": 2.232734418865806, "grad_norm": 0.6023195385932922, "learning_rate": 1.863796824927645e-06, "loss": 0.3395, "step": 7953 }, { "epoch": 2.2330151600224593, "grad_norm": 0.6207793354988098, "learning_rate": 1.862524894689096e-06, "loss": 0.3158, "step": 7954 }, { "epoch": 2.2332959011791127, "grad_norm": 0.5584052801132202, "learning_rate": 1.8612532992623816e-06, "loss": 0.2987, "step": 7955 }, { "epoch": 2.2335766423357666, "grad_norm": 0.5351366400718689, "learning_rate": 1.8599820387831947e-06, "loss": 0.3403, "step": 7956 }, { "epoch": 2.23385738349242, "grad_norm": 0.5963825583457947, "learning_rate": 1.8587111133871994e-06, "loss": 0.3314, "step": 7957 }, { "epoch": 2.2341381246490735, "grad_norm": 0.6010246872901917, "learning_rate": 1.8574405232100184e-06, "loss": 0.3044, "step": 7958 }, { "epoch": 2.2344188658057273, "grad_norm": 0.4795440137386322, "learning_rate": 1.856170268387244e-06, "loss": 0.38, "step": 7959 }, { "epoch": 2.234699606962381, "grad_norm": 0.5915919542312622, "learning_rate": 1.8549003490544281e-06, "loss": 0.3262, "step": 7960 }, { "epoch": 2.2349803481190342, "grad_norm": 0.6033159494400024, "learning_rate": 1.8536307653470875e-06, "loss": 0.3678, "step": 7961 }, { "epoch": 2.2352610892756877, "grad_norm": 0.5163617730140686, "learning_rate": 1.8523615174007077e-06, "loss": 0.2971, "step": 7962 }, { "epoch": 2.2355418304323416, "grad_norm": 0.5907436609268188, "learning_rate": 1.8510926053507306e-06, "loss": 0.3579, "step": 7963 }, { "epoch": 2.235822571588995, "grad_norm": 0.5777222514152527, "learning_rate": 1.8498240293325708e-06, "loss": 0.342, "step": 7964 }, { "epoch": 2.2361033127456484, "grad_norm": 0.6026085615158081, "learning_rate": 1.8485557894815992e-06, "loss": 0.3419, "step": 7965 }, { "epoch": 2.236384053902302, "grad_norm": 0.6003116965293884, "learning_rate": 1.8472878859331583e-06, "loss": 0.2992, "step": 7966 }, { "epoch": 2.2366647950589558, "grad_norm": 0.632685661315918, "learning_rate": 1.8460203188225484e-06, "loss": 0.361, "step": 7967 }, { "epoch": 2.236945536215609, "grad_norm": 0.5606963038444519, "learning_rate": 1.844753088285035e-06, "loss": 0.3607, "step": 7968 }, { "epoch": 2.2372262773722627, "grad_norm": 0.5325944423675537, "learning_rate": 1.843486194455853e-06, "loss": 0.2764, "step": 7969 }, { "epoch": 2.2375070185289165, "grad_norm": 0.4976692795753479, "learning_rate": 1.8422196374701927e-06, "loss": 0.3496, "step": 7970 }, { "epoch": 2.23778775968557, "grad_norm": 0.5979899764060974, "learning_rate": 1.8409534174632181e-06, "loss": 0.3005, "step": 7971 }, { "epoch": 2.2380685008422234, "grad_norm": 0.5300469398498535, "learning_rate": 1.8396875345700498e-06, "loss": 0.3254, "step": 7972 }, { "epoch": 2.238349241998877, "grad_norm": 0.6309905052185059, "learning_rate": 1.8384219889257737e-06, "loss": 0.3307, "step": 7973 }, { "epoch": 2.2386299831555307, "grad_norm": 0.5937932133674622, "learning_rate": 1.837156780665444e-06, "loss": 0.3324, "step": 7974 }, { "epoch": 2.238910724312184, "grad_norm": 0.5859931707382202, "learning_rate": 1.8358919099240723e-06, "loss": 0.3034, "step": 7975 }, { "epoch": 2.2391914654688376, "grad_norm": 0.515424370765686, "learning_rate": 1.8346273768366417e-06, "loss": 0.3365, "step": 7976 }, { "epoch": 2.239472206625491, "grad_norm": 0.6204776167869568, "learning_rate": 1.8333631815380915e-06, "loss": 0.3587, "step": 7977 }, { "epoch": 2.239752947782145, "grad_norm": 0.621213972568512, "learning_rate": 1.83209932416333e-06, "loss": 0.3138, "step": 7978 }, { "epoch": 2.2400336889387984, "grad_norm": 0.6118841767311096, "learning_rate": 1.8308358048472313e-06, "loss": 0.3238, "step": 7979 }, { "epoch": 2.240314430095452, "grad_norm": 0.5114016532897949, "learning_rate": 1.8295726237246254e-06, "loss": 0.3297, "step": 7980 }, { "epoch": 2.2405951712521057, "grad_norm": 0.5543270707130432, "learning_rate": 1.8283097809303158e-06, "loss": 0.3276, "step": 7981 }, { "epoch": 2.240875912408759, "grad_norm": 0.5982322096824646, "learning_rate": 1.8270472765990622e-06, "loss": 0.3296, "step": 7982 }, { "epoch": 2.2411566535654126, "grad_norm": 0.5736710429191589, "learning_rate": 1.8257851108655905e-06, "loss": 0.3739, "step": 7983 }, { "epoch": 2.241437394722066, "grad_norm": 0.5847101211547852, "learning_rate": 1.8245232838645948e-06, "loss": 0.2932, "step": 7984 }, { "epoch": 2.24171813587872, "grad_norm": 0.6166174411773682, "learning_rate": 1.8232617957307254e-06, "loss": 0.366, "step": 7985 }, { "epoch": 2.2419988770353734, "grad_norm": 0.5478823184967041, "learning_rate": 1.822000646598604e-06, "loss": 0.3398, "step": 7986 }, { "epoch": 2.242279618192027, "grad_norm": 0.6085838675498962, "learning_rate": 1.8207398366028106e-06, "loss": 0.3733, "step": 7987 }, { "epoch": 2.2425603593486807, "grad_norm": 0.548533022403717, "learning_rate": 1.8194793658778898e-06, "loss": 0.3508, "step": 7988 }, { "epoch": 2.242841100505334, "grad_norm": 0.5656881928443909, "learning_rate": 1.8182192345583543e-06, "loss": 0.3089, "step": 7989 }, { "epoch": 2.2431218416619876, "grad_norm": 0.5688865780830383, "learning_rate": 1.8169594427786746e-06, "loss": 0.292, "step": 7990 }, { "epoch": 2.243402582818641, "grad_norm": 0.5680079460144043, "learning_rate": 1.8156999906732902e-06, "loss": 0.3077, "step": 7991 }, { "epoch": 2.243683323975295, "grad_norm": 0.5976494550704956, "learning_rate": 1.8144408783766e-06, "loss": 0.2918, "step": 7992 }, { "epoch": 2.2439640651319483, "grad_norm": 0.564488410949707, "learning_rate": 1.813182106022971e-06, "loss": 0.3047, "step": 7993 }, { "epoch": 2.244244806288602, "grad_norm": 0.4989648461341858, "learning_rate": 1.81192367374673e-06, "loss": 0.2928, "step": 7994 }, { "epoch": 2.2445255474452557, "grad_norm": 0.5590886473655701, "learning_rate": 1.8106655816821672e-06, "loss": 0.359, "step": 7995 }, { "epoch": 2.244806288601909, "grad_norm": 0.5778689384460449, "learning_rate": 1.8094078299635427e-06, "loss": 0.3032, "step": 7996 }, { "epoch": 2.2450870297585626, "grad_norm": 0.5303788185119629, "learning_rate": 1.8081504187250715e-06, "loss": 0.338, "step": 7997 }, { "epoch": 2.245367770915216, "grad_norm": 0.6084389090538025, "learning_rate": 1.8068933481009405e-06, "loss": 0.3278, "step": 7998 }, { "epoch": 2.24564851207187, "grad_norm": 0.5453808307647705, "learning_rate": 1.8056366182252949e-06, "loss": 0.3586, "step": 7999 }, { "epoch": 2.2459292532285233, "grad_norm": 0.5822224020957947, "learning_rate": 1.8043802292322433e-06, "loss": 0.3137, "step": 8000 }, { "epoch": 2.2462099943851768, "grad_norm": 0.5362311005592346, "learning_rate": 1.8031241812558631e-06, "loss": 0.3386, "step": 8001 }, { "epoch": 2.2464907355418307, "grad_norm": 0.5358651876449585, "learning_rate": 1.8018684744301867e-06, "loss": 0.3103, "step": 8002 }, { "epoch": 2.246771476698484, "grad_norm": 0.5495103001594543, "learning_rate": 1.8006131088892227e-06, "loss": 0.3178, "step": 8003 }, { "epoch": 2.2470522178551375, "grad_norm": 0.6086322069168091, "learning_rate": 1.7993580847669312e-06, "loss": 0.3393, "step": 8004 }, { "epoch": 2.247332959011791, "grad_norm": 0.5692078471183777, "learning_rate": 1.7981034021972404e-06, "loss": 0.3328, "step": 8005 }, { "epoch": 2.247613700168445, "grad_norm": 0.5917820334434509, "learning_rate": 1.7968490613140443e-06, "loss": 0.3158, "step": 8006 }, { "epoch": 2.2478944413250983, "grad_norm": 0.5158905982971191, "learning_rate": 1.7955950622511964e-06, "loss": 0.317, "step": 8007 }, { "epoch": 2.2481751824817517, "grad_norm": 0.5884225368499756, "learning_rate": 1.7943414051425178e-06, "loss": 0.3606, "step": 8008 }, { "epoch": 2.248455923638405, "grad_norm": 0.5456855893135071, "learning_rate": 1.7930880901217896e-06, "loss": 0.3239, "step": 8009 }, { "epoch": 2.248736664795059, "grad_norm": 0.5448585152626038, "learning_rate": 1.7918351173227566e-06, "loss": 0.3199, "step": 8010 }, { "epoch": 2.2490174059517125, "grad_norm": 0.6192921996116638, "learning_rate": 1.790582486879131e-06, "loss": 0.3379, "step": 8011 }, { "epoch": 2.249298147108366, "grad_norm": 0.5639538168907166, "learning_rate": 1.789330198924583e-06, "loss": 0.3365, "step": 8012 }, { "epoch": 2.24957888826502, "grad_norm": 0.5992386937141418, "learning_rate": 1.788078253592752e-06, "loss": 0.3108, "step": 8013 }, { "epoch": 2.2498596294216733, "grad_norm": 0.5644425749778748, "learning_rate": 1.7868266510172344e-06, "loss": 0.2993, "step": 8014 }, { "epoch": 2.2501403705783267, "grad_norm": 0.5378849506378174, "learning_rate": 1.785575391331597e-06, "loss": 0.3072, "step": 8015 }, { "epoch": 2.25042111173498, "grad_norm": 0.5695605874061584, "learning_rate": 1.7843244746693645e-06, "loss": 0.3759, "step": 8016 }, { "epoch": 2.250701852891634, "grad_norm": 0.5689758062362671, "learning_rate": 1.7830739011640252e-06, "loss": 0.3287, "step": 8017 }, { "epoch": 2.2509825940482875, "grad_norm": 0.6179699301719666, "learning_rate": 1.7818236709490362e-06, "loss": 0.3415, "step": 8018 }, { "epoch": 2.251263335204941, "grad_norm": 0.6113905906677246, "learning_rate": 1.7805737841578108e-06, "loss": 0.3135, "step": 8019 }, { "epoch": 2.2515440763615944, "grad_norm": 0.5384484529495239, "learning_rate": 1.7793242409237327e-06, "loss": 0.3439, "step": 8020 }, { "epoch": 2.2518248175182483, "grad_norm": 0.552721381187439, "learning_rate": 1.778075041380144e-06, "loss": 0.3488, "step": 8021 }, { "epoch": 2.2521055586749017, "grad_norm": 0.5636094808578491, "learning_rate": 1.7768261856603487e-06, "loss": 0.3086, "step": 8022 }, { "epoch": 2.252386299831555, "grad_norm": 0.6538780331611633, "learning_rate": 1.775577673897621e-06, "loss": 0.3233, "step": 8023 }, { "epoch": 2.252667040988209, "grad_norm": 0.5709100365638733, "learning_rate": 1.7743295062251913e-06, "loss": 0.339, "step": 8024 }, { "epoch": 2.2529477821448625, "grad_norm": 0.5108280181884766, "learning_rate": 1.7730816827762587e-06, "loss": 0.3322, "step": 8025 }, { "epoch": 2.253228523301516, "grad_norm": 0.5429946184158325, "learning_rate": 1.7718342036839808e-06, "loss": 0.3199, "step": 8026 }, { "epoch": 2.2535092644581693, "grad_norm": 0.5417314171791077, "learning_rate": 1.7705870690814835e-06, "loss": 0.3159, "step": 8027 }, { "epoch": 2.2537900056148232, "grad_norm": 0.5941969156265259, "learning_rate": 1.7693402791018505e-06, "loss": 0.3811, "step": 8028 }, { "epoch": 2.2540707467714767, "grad_norm": 0.5561227798461914, "learning_rate": 1.7680938338781322e-06, "loss": 0.2943, "step": 8029 }, { "epoch": 2.25435148792813, "grad_norm": 0.540425717830658, "learning_rate": 1.7668477335433443e-06, "loss": 0.3421, "step": 8030 }, { "epoch": 2.254632229084784, "grad_norm": 0.5694829821586609, "learning_rate": 1.7656019782304602e-06, "loss": 0.3186, "step": 8031 }, { "epoch": 2.2549129702414374, "grad_norm": 0.5782746076583862, "learning_rate": 1.7643565680724183e-06, "loss": 0.2984, "step": 8032 }, { "epoch": 2.255193711398091, "grad_norm": 0.6405271887779236, "learning_rate": 1.7631115032021235e-06, "loss": 0.3296, "step": 8033 }, { "epoch": 2.2554744525547443, "grad_norm": 0.5816053152084351, "learning_rate": 1.761866783752439e-06, "loss": 0.3575, "step": 8034 }, { "epoch": 2.255755193711398, "grad_norm": 0.5303795337677002, "learning_rate": 1.7606224098561957e-06, "loss": 0.3766, "step": 8035 }, { "epoch": 2.2560359348680517, "grad_norm": 0.5758079290390015, "learning_rate": 1.7593783816461852e-06, "loss": 0.2891, "step": 8036 }, { "epoch": 2.256316676024705, "grad_norm": 0.5049720406532288, "learning_rate": 1.7581346992551595e-06, "loss": 0.3721, "step": 8037 }, { "epoch": 2.256597417181359, "grad_norm": 0.5692103505134583, "learning_rate": 1.7568913628158412e-06, "loss": 0.3573, "step": 8038 }, { "epoch": 2.2568781583380124, "grad_norm": 0.5312740802764893, "learning_rate": 1.755648372460907e-06, "loss": 0.3375, "step": 8039 }, { "epoch": 2.257158899494666, "grad_norm": 0.5886383056640625, "learning_rate": 1.754405728323005e-06, "loss": 0.3297, "step": 8040 }, { "epoch": 2.2574396406513193, "grad_norm": 0.5831328630447388, "learning_rate": 1.753163430534739e-06, "loss": 0.3483, "step": 8041 }, { "epoch": 2.257720381807973, "grad_norm": 0.5989186763763428, "learning_rate": 1.751921479228682e-06, "loss": 0.4014, "step": 8042 }, { "epoch": 2.2580011229646266, "grad_norm": 0.6026795506477356, "learning_rate": 1.750679874537367e-06, "loss": 0.3295, "step": 8043 }, { "epoch": 2.25828186412128, "grad_norm": 0.5234981179237366, "learning_rate": 1.7494386165932875e-06, "loss": 0.291, "step": 8044 }, { "epoch": 2.258562605277934, "grad_norm": 0.5749183297157288, "learning_rate": 1.7481977055289067e-06, "loss": 0.3237, "step": 8045 }, { "epoch": 2.2588433464345874, "grad_norm": 0.5641282796859741, "learning_rate": 1.7469571414766439e-06, "loss": 0.3293, "step": 8046 }, { "epoch": 2.259124087591241, "grad_norm": 0.5993145108222961, "learning_rate": 1.745716924568887e-06, "loss": 0.3187, "step": 8047 }, { "epoch": 2.2594048287478943, "grad_norm": 0.6631541848182678, "learning_rate": 1.744477054937983e-06, "loss": 0.3293, "step": 8048 }, { "epoch": 2.259685569904548, "grad_norm": 0.563859760761261, "learning_rate": 1.743237532716241e-06, "loss": 0.3392, "step": 8049 }, { "epoch": 2.2599663110612016, "grad_norm": 0.6036688685417175, "learning_rate": 1.7419983580359383e-06, "loss": 0.3225, "step": 8050 }, { "epoch": 2.260247052217855, "grad_norm": 0.5790561437606812, "learning_rate": 1.7407595310293096e-06, "loss": 0.3134, "step": 8051 }, { "epoch": 2.260527793374509, "grad_norm": 0.527915894985199, "learning_rate": 1.7395210518285577e-06, "loss": 0.3712, "step": 8052 }, { "epoch": 2.2608085345311624, "grad_norm": 0.5608624815940857, "learning_rate": 1.7382829205658413e-06, "loss": 0.324, "step": 8053 }, { "epoch": 2.261089275687816, "grad_norm": 0.6710591912269592, "learning_rate": 1.737045137373289e-06, "loss": 0.3105, "step": 8054 }, { "epoch": 2.2613700168444693, "grad_norm": 0.6487722396850586, "learning_rate": 1.73580770238299e-06, "loss": 0.359, "step": 8055 }, { "epoch": 2.261650758001123, "grad_norm": 0.5920708775520325, "learning_rate": 1.7345706157269926e-06, "loss": 0.3082, "step": 8056 }, { "epoch": 2.2619314991577766, "grad_norm": 0.615537703037262, "learning_rate": 1.7333338775373142e-06, "loss": 0.3109, "step": 8057 }, { "epoch": 2.26221224031443, "grad_norm": 0.5814971327781677, "learning_rate": 1.7320974879459306e-06, "loss": 0.3012, "step": 8058 }, { "epoch": 2.262492981471084, "grad_norm": 0.5310434699058533, "learning_rate": 1.7308614470847802e-06, "loss": 0.329, "step": 8059 }, { "epoch": 2.2627737226277373, "grad_norm": 0.592069685459137, "learning_rate": 1.7296257550857677e-06, "loss": 0.351, "step": 8060 }, { "epoch": 2.263054463784391, "grad_norm": 0.6197977066040039, "learning_rate": 1.728390412080756e-06, "loss": 0.3425, "step": 8061 }, { "epoch": 2.2633352049410442, "grad_norm": 0.5771230459213257, "learning_rate": 1.7271554182015765e-06, "loss": 0.3037, "step": 8062 }, { "epoch": 2.2636159460976977, "grad_norm": 0.5480746030807495, "learning_rate": 1.725920773580016e-06, "loss": 0.3071, "step": 8063 }, { "epoch": 2.2638966872543516, "grad_norm": 0.5535979866981506, "learning_rate": 1.7246864783478329e-06, "loss": 0.3282, "step": 8064 }, { "epoch": 2.264177428411005, "grad_norm": 0.550595760345459, "learning_rate": 1.7234525326367408e-06, "loss": 0.2868, "step": 8065 }, { "epoch": 2.2644581695676584, "grad_norm": 0.6128308176994324, "learning_rate": 1.722218936578417e-06, "loss": 0.3421, "step": 8066 }, { "epoch": 2.2647389107243123, "grad_norm": 0.5360646843910217, "learning_rate": 1.720985690304507e-06, "loss": 0.371, "step": 8067 }, { "epoch": 2.2650196518809658, "grad_norm": 0.6131049990653992, "learning_rate": 1.7197527939466114e-06, "loss": 0.3451, "step": 8068 }, { "epoch": 2.265300393037619, "grad_norm": 0.5713410973548889, "learning_rate": 1.7185202476363005e-06, "loss": 0.3193, "step": 8069 }, { "epoch": 2.2655811341942727, "grad_norm": 0.5460421442985535, "learning_rate": 1.7172880515051033e-06, "loss": 0.3092, "step": 8070 }, { "epoch": 2.2658618753509265, "grad_norm": 0.5895035862922668, "learning_rate": 1.7160562056845092e-06, "loss": 0.3461, "step": 8071 }, { "epoch": 2.26614261650758, "grad_norm": 0.5344123244285583, "learning_rate": 1.7148247103059772e-06, "loss": 0.3431, "step": 8072 }, { "epoch": 2.2664233576642334, "grad_norm": 0.6651570796966553, "learning_rate": 1.7135935655009218e-06, "loss": 0.3151, "step": 8073 }, { "epoch": 2.2667040988208873, "grad_norm": 0.560825526714325, "learning_rate": 1.7123627714007252e-06, "loss": 0.3769, "step": 8074 }, { "epoch": 2.2669848399775407, "grad_norm": 0.597614049911499, "learning_rate": 1.7111323281367276e-06, "loss": 0.3434, "step": 8075 }, { "epoch": 2.267265581134194, "grad_norm": 0.581476628780365, "learning_rate": 1.709902235840238e-06, "loss": 0.3367, "step": 8076 }, { "epoch": 2.2675463222908476, "grad_norm": 0.5085705518722534, "learning_rate": 1.7086724946425215e-06, "loss": 0.2906, "step": 8077 }, { "epoch": 2.2678270634475015, "grad_norm": 0.4936918616294861, "learning_rate": 1.7074431046748075e-06, "loss": 0.3195, "step": 8078 }, { "epoch": 2.268107804604155, "grad_norm": 0.5442838668823242, "learning_rate": 1.7062140660682902e-06, "loss": 0.3419, "step": 8079 }, { "epoch": 2.2683885457608084, "grad_norm": 0.5474158525466919, "learning_rate": 1.704985378954127e-06, "loss": 0.3031, "step": 8080 }, { "epoch": 2.2686692869174623, "grad_norm": 0.5990607142448425, "learning_rate": 1.703757043463432e-06, "loss": 0.3129, "step": 8081 }, { "epoch": 2.2689500280741157, "grad_norm": 0.5595345497131348, "learning_rate": 1.7025290597272886e-06, "loss": 0.3561, "step": 8082 }, { "epoch": 2.269230769230769, "grad_norm": 0.610658586025238, "learning_rate": 1.7013014278767377e-06, "loss": 0.2928, "step": 8083 }, { "epoch": 2.2695115103874226, "grad_norm": 0.549555778503418, "learning_rate": 1.7000741480427856e-06, "loss": 0.3529, "step": 8084 }, { "epoch": 2.2697922515440765, "grad_norm": 0.4983183443546295, "learning_rate": 1.6988472203563988e-06, "loss": 0.3351, "step": 8085 }, { "epoch": 2.27007299270073, "grad_norm": 0.6268941760063171, "learning_rate": 1.6976206449485094e-06, "loss": 0.3294, "step": 8086 }, { "epoch": 2.2703537338573834, "grad_norm": 0.5816642642021179, "learning_rate": 1.6963944219500084e-06, "loss": 0.3192, "step": 8087 }, { "epoch": 2.2706344750140373, "grad_norm": 0.6187565922737122, "learning_rate": 1.6951685514917499e-06, "loss": 0.2926, "step": 8088 }, { "epoch": 2.2709152161706907, "grad_norm": 0.6386123299598694, "learning_rate": 1.6939430337045532e-06, "loss": 0.3618, "step": 8089 }, { "epoch": 2.271195957327344, "grad_norm": 0.5533019304275513, "learning_rate": 1.6927178687191953e-06, "loss": 0.3203, "step": 8090 }, { "epoch": 2.2714766984839976, "grad_norm": 0.5853825211524963, "learning_rate": 1.6914930566664216e-06, "loss": 0.3381, "step": 8091 }, { "epoch": 2.2717574396406515, "grad_norm": 0.5264586210250854, "learning_rate": 1.690268597676935e-06, "loss": 0.3196, "step": 8092 }, { "epoch": 2.272038180797305, "grad_norm": 0.6071246266365051, "learning_rate": 1.6890444918814002e-06, "loss": 0.3026, "step": 8093 }, { "epoch": 2.2723189219539583, "grad_norm": 0.6103940010070801, "learning_rate": 1.687820739410449e-06, "loss": 0.3541, "step": 8094 }, { "epoch": 2.2725996631106122, "grad_norm": 0.5096889734268188, "learning_rate": 1.68659734039467e-06, "loss": 0.3352, "step": 8095 }, { "epoch": 2.2728804042672657, "grad_norm": 0.5179657340049744, "learning_rate": 1.6853742949646197e-06, "loss": 0.3501, "step": 8096 }, { "epoch": 2.273161145423919, "grad_norm": 0.5765989422798157, "learning_rate": 1.6841516032508105e-06, "loss": 0.283, "step": 8097 }, { "epoch": 2.2734418865805726, "grad_norm": 0.7021377682685852, "learning_rate": 1.6829292653837243e-06, "loss": 0.2916, "step": 8098 }, { "epoch": 2.2737226277372264, "grad_norm": 0.5201624035835266, "learning_rate": 1.681707281493799e-06, "loss": 0.3449, "step": 8099 }, { "epoch": 2.27400336889388, "grad_norm": 0.6332237124443054, "learning_rate": 1.6804856517114359e-06, "loss": 0.3409, "step": 8100 }, { "epoch": 2.2742841100505333, "grad_norm": 0.5995384454727173, "learning_rate": 1.6792643761670035e-06, "loss": 0.2915, "step": 8101 }, { "epoch": 2.274564851207187, "grad_norm": 0.5878689289093018, "learning_rate": 1.6780434549908241e-06, "loss": 0.3581, "step": 8102 }, { "epoch": 2.2748455923638407, "grad_norm": 0.580745279788971, "learning_rate": 1.6768228883131921e-06, "loss": 0.3258, "step": 8103 }, { "epoch": 2.275126333520494, "grad_norm": 0.5451475381851196, "learning_rate": 1.6756026762643535e-06, "loss": 0.314, "step": 8104 }, { "epoch": 2.2754070746771475, "grad_norm": 0.5909281373023987, "learning_rate": 1.6743828189745243e-06, "loss": 0.3359, "step": 8105 }, { "epoch": 2.2756878158338014, "grad_norm": 0.5588069558143616, "learning_rate": 1.673163316573882e-06, "loss": 0.3037, "step": 8106 }, { "epoch": 2.275968556990455, "grad_norm": 0.5816650986671448, "learning_rate": 1.671944169192562e-06, "loss": 0.3286, "step": 8107 }, { "epoch": 2.2762492981471083, "grad_norm": 0.5617421269416809, "learning_rate": 1.670725376960663e-06, "loss": 0.316, "step": 8108 }, { "epoch": 2.2765300393037617, "grad_norm": 0.5178927779197693, "learning_rate": 1.6695069400082497e-06, "loss": 0.2995, "step": 8109 }, { "epoch": 2.2768107804604156, "grad_norm": 0.5963665246963501, "learning_rate": 1.6682888584653434e-06, "loss": 0.3311, "step": 8110 }, { "epoch": 2.277091521617069, "grad_norm": 0.5320101976394653, "learning_rate": 1.6670711324619332e-06, "loss": 0.3383, "step": 8111 }, { "epoch": 2.2773722627737225, "grad_norm": 0.5567086338996887, "learning_rate": 1.6658537621279642e-06, "loss": 0.3347, "step": 8112 }, { "epoch": 2.277653003930376, "grad_norm": 0.5196081399917603, "learning_rate": 1.6646367475933495e-06, "loss": 0.3445, "step": 8113 }, { "epoch": 2.27793374508703, "grad_norm": 0.6353957056999207, "learning_rate": 1.6634200889879592e-06, "loss": 0.3133, "step": 8114 }, { "epoch": 2.2782144862436833, "grad_norm": 0.5574871897697449, "learning_rate": 1.6622037864416274e-06, "loss": 0.3003, "step": 8115 }, { "epoch": 2.2784952274003367, "grad_norm": 0.5481862425804138, "learning_rate": 1.6609878400841527e-06, "loss": 0.382, "step": 8116 }, { "epoch": 2.2787759685569906, "grad_norm": 0.5305172801017761, "learning_rate": 1.6597722500452895e-06, "loss": 0.3853, "step": 8117 }, { "epoch": 2.279056709713644, "grad_norm": 0.5914360284805298, "learning_rate": 1.6585570164547627e-06, "loss": 0.3063, "step": 8118 }, { "epoch": 2.2793374508702975, "grad_norm": 0.5952426195144653, "learning_rate": 1.6573421394422519e-06, "loss": 0.3658, "step": 8119 }, { "epoch": 2.279618192026951, "grad_norm": 0.5759475827217102, "learning_rate": 1.6561276191373994e-06, "loss": 0.2958, "step": 8120 }, { "epoch": 2.279898933183605, "grad_norm": 0.5460476875305176, "learning_rate": 1.6549134556698148e-06, "loss": 0.2847, "step": 8121 }, { "epoch": 2.2801796743402583, "grad_norm": 0.5319930911064148, "learning_rate": 1.6536996491690627e-06, "loss": 0.3438, "step": 8122 }, { "epoch": 2.2804604154969117, "grad_norm": 0.5628823637962341, "learning_rate": 1.6524861997646769e-06, "loss": 0.3398, "step": 8123 }, { "epoch": 2.2807411566535656, "grad_norm": 0.5416834354400635, "learning_rate": 1.6512731075861455e-06, "loss": 0.333, "step": 8124 }, { "epoch": 2.281021897810219, "grad_norm": 0.5499876737594604, "learning_rate": 1.650060372762925e-06, "loss": 0.351, "step": 8125 }, { "epoch": 2.2813026389668725, "grad_norm": 0.5261113047599792, "learning_rate": 1.6488479954244297e-06, "loss": 0.3234, "step": 8126 }, { "epoch": 2.281583380123526, "grad_norm": 0.5825338959693909, "learning_rate": 1.6476359757000349e-06, "loss": 0.3616, "step": 8127 }, { "epoch": 2.28186412128018, "grad_norm": 0.6240234375, "learning_rate": 1.6464243137190838e-06, "loss": 0.3149, "step": 8128 }, { "epoch": 2.2821448624368332, "grad_norm": 0.5497562289237976, "learning_rate": 1.6452130096108738e-06, "loss": 0.3578, "step": 8129 }, { "epoch": 2.2824256035934867, "grad_norm": 0.5215038061141968, "learning_rate": 1.6440020635046695e-06, "loss": 0.3712, "step": 8130 }, { "epoch": 2.2827063447501406, "grad_norm": 0.5622836351394653, "learning_rate": 1.6427914755296964e-06, "loss": 0.3812, "step": 8131 }, { "epoch": 2.282987085906794, "grad_norm": 0.5552845001220703, "learning_rate": 1.6415812458151393e-06, "loss": 0.3419, "step": 8132 }, { "epoch": 2.2832678270634474, "grad_norm": 0.5544072985649109, "learning_rate": 1.6403713744901478e-06, "loss": 0.3489, "step": 8133 }, { "epoch": 2.283548568220101, "grad_norm": 0.5986542701721191, "learning_rate": 1.63916186168383e-06, "loss": 0.3148, "step": 8134 }, { "epoch": 2.2838293093767548, "grad_norm": 0.5655476450920105, "learning_rate": 1.6379527075252598e-06, "loss": 0.335, "step": 8135 }, { "epoch": 2.284110050533408, "grad_norm": 0.5394382476806641, "learning_rate": 1.6367439121434698e-06, "loss": 0.3325, "step": 8136 }, { "epoch": 2.2843907916900617, "grad_norm": 0.5876544713973999, "learning_rate": 1.635535475667453e-06, "loss": 0.3508, "step": 8137 }, { "epoch": 2.2846715328467155, "grad_norm": 0.5868120193481445, "learning_rate": 1.63432739822617e-06, "loss": 0.3155, "step": 8138 }, { "epoch": 2.284952274003369, "grad_norm": 0.5698676705360413, "learning_rate": 1.633119679948535e-06, "loss": 0.3509, "step": 8139 }, { "epoch": 2.2852330151600224, "grad_norm": 0.5355852246284485, "learning_rate": 1.6319123209634324e-06, "loss": 0.3949, "step": 8140 }, { "epoch": 2.285513756316676, "grad_norm": 0.5934165716171265, "learning_rate": 1.6307053213997026e-06, "loss": 0.3681, "step": 8141 }, { "epoch": 2.2857944974733297, "grad_norm": 0.5372290015220642, "learning_rate": 1.6294986813861462e-06, "loss": 0.2869, "step": 8142 }, { "epoch": 2.286075238629983, "grad_norm": 0.6086657643318176, "learning_rate": 1.6282924010515333e-06, "loss": 0.3051, "step": 8143 }, { "epoch": 2.2863559797866366, "grad_norm": 0.5466243624687195, "learning_rate": 1.6270864805245856e-06, "loss": 0.3235, "step": 8144 }, { "epoch": 2.2866367209432905, "grad_norm": 0.5670375823974609, "learning_rate": 1.6258809199339964e-06, "loss": 0.3234, "step": 8145 }, { "epoch": 2.286917462099944, "grad_norm": 0.5249202251434326, "learning_rate": 1.6246757194084111e-06, "loss": 0.3425, "step": 8146 }, { "epoch": 2.2871982032565974, "grad_norm": 0.5967442393302917, "learning_rate": 1.6234708790764446e-06, "loss": 0.3449, "step": 8147 }, { "epoch": 2.287478944413251, "grad_norm": 0.606689453125, "learning_rate": 1.6222663990666692e-06, "loss": 0.3257, "step": 8148 }, { "epoch": 2.2877596855699047, "grad_norm": 0.5632715821266174, "learning_rate": 1.621062279507617e-06, "loss": 0.3176, "step": 8149 }, { "epoch": 2.288040426726558, "grad_norm": 0.5461794137954712, "learning_rate": 1.6198585205277877e-06, "loss": 0.3526, "step": 8150 }, { "epoch": 2.2883211678832116, "grad_norm": 0.6035500764846802, "learning_rate": 1.6186551222556363e-06, "loss": 0.3312, "step": 8151 }, { "epoch": 2.2886019090398655, "grad_norm": 0.5271415114402771, "learning_rate": 1.617452084819584e-06, "loss": 0.3147, "step": 8152 }, { "epoch": 2.288882650196519, "grad_norm": 0.6394876837730408, "learning_rate": 1.6162494083480106e-06, "loss": 0.3276, "step": 8153 }, { "epoch": 2.2891633913531724, "grad_norm": 0.5247066617012024, "learning_rate": 1.615047092969257e-06, "loss": 0.3377, "step": 8154 }, { "epoch": 2.289444132509826, "grad_norm": 0.5702440142631531, "learning_rate": 1.6138451388116278e-06, "loss": 0.3458, "step": 8155 }, { "epoch": 2.2897248736664797, "grad_norm": 0.5576598644256592, "learning_rate": 1.6126435460033896e-06, "loss": 0.3359, "step": 8156 }, { "epoch": 2.290005614823133, "grad_norm": 0.5746631026268005, "learning_rate": 1.6114423146727664e-06, "loss": 0.3263, "step": 8157 }, { "epoch": 2.2902863559797866, "grad_norm": 0.5279629230499268, "learning_rate": 1.6102414449479487e-06, "loss": 0.339, "step": 8158 }, { "epoch": 2.29056709713644, "grad_norm": 0.5480526089668274, "learning_rate": 1.609040936957083e-06, "loss": 0.3069, "step": 8159 }, { "epoch": 2.290847838293094, "grad_norm": 0.5356866717338562, "learning_rate": 1.6078407908282829e-06, "loss": 0.3243, "step": 8160 }, { "epoch": 2.2911285794497473, "grad_norm": 0.4751942753791809, "learning_rate": 1.6066410066896177e-06, "loss": 0.3796, "step": 8161 }, { "epoch": 2.291409320606401, "grad_norm": 0.5966656804084778, "learning_rate": 1.605441584669124e-06, "loss": 0.3573, "step": 8162 }, { "epoch": 2.2916900617630542, "grad_norm": 0.6304299831390381, "learning_rate": 1.6042425248947951e-06, "loss": 0.3411, "step": 8163 }, { "epoch": 2.291970802919708, "grad_norm": 0.5414368510246277, "learning_rate": 1.6030438274945853e-06, "loss": 0.3656, "step": 8164 }, { "epoch": 2.2922515440763616, "grad_norm": 0.5494125485420227, "learning_rate": 1.6018454925964161e-06, "loss": 0.2972, "step": 8165 }, { "epoch": 2.292532285233015, "grad_norm": 0.5852463245391846, "learning_rate": 1.600647520328162e-06, "loss": 0.3375, "step": 8166 }, { "epoch": 2.292813026389669, "grad_norm": 0.5897251963615417, "learning_rate": 1.599449910817667e-06, "loss": 0.3411, "step": 8167 }, { "epoch": 2.2930937675463223, "grad_norm": 0.6259673237800598, "learning_rate": 1.598252664192731e-06, "loss": 0.3235, "step": 8168 }, { "epoch": 2.2933745087029758, "grad_norm": 0.5944607853889465, "learning_rate": 1.5970557805811148e-06, "loss": 0.3467, "step": 8169 }, { "epoch": 2.293655249859629, "grad_norm": 0.5877252817153931, "learning_rate": 1.5958592601105461e-06, "loss": 0.3139, "step": 8170 }, { "epoch": 2.293935991016283, "grad_norm": 0.6014631986618042, "learning_rate": 1.5946631029087068e-06, "loss": 0.3081, "step": 8171 }, { "epoch": 2.2942167321729365, "grad_norm": 0.6377054452896118, "learning_rate": 1.593467309103246e-06, "loss": 0.3714, "step": 8172 }, { "epoch": 2.29449747332959, "grad_norm": 0.5477756857872009, "learning_rate": 1.5922718788217683e-06, "loss": 0.3356, "step": 8173 }, { "epoch": 2.294778214486244, "grad_norm": 0.6328738331794739, "learning_rate": 1.5910768121918469e-06, "loss": 0.3859, "step": 8174 }, { "epoch": 2.2950589556428973, "grad_norm": 0.5691003799438477, "learning_rate": 1.589882109341009e-06, "loss": 0.3464, "step": 8175 }, { "epoch": 2.2953396967995507, "grad_norm": 0.5517013669013977, "learning_rate": 1.5886877703967441e-06, "loss": 0.3233, "step": 8176 }, { "epoch": 2.295620437956204, "grad_norm": 0.5734623670578003, "learning_rate": 1.587493795486509e-06, "loss": 0.3074, "step": 8177 }, { "epoch": 2.295901179112858, "grad_norm": 0.5163578987121582, "learning_rate": 1.586300184737713e-06, "loss": 0.3368, "step": 8178 }, { "epoch": 2.2961819202695115, "grad_norm": 0.5542207360267639, "learning_rate": 1.5851069382777352e-06, "loss": 0.3153, "step": 8179 }, { "epoch": 2.296462661426165, "grad_norm": 0.5119909644126892, "learning_rate": 1.5839140562339066e-06, "loss": 0.3607, "step": 8180 }, { "epoch": 2.296743402582819, "grad_norm": 0.4962103068828583, "learning_rate": 1.5827215387335277e-06, "loss": 0.3562, "step": 8181 }, { "epoch": 2.2970241437394723, "grad_norm": 0.5574684739112854, "learning_rate": 1.5815293859038566e-06, "loss": 0.2697, "step": 8182 }, { "epoch": 2.2973048848961257, "grad_norm": 0.6066998243331909, "learning_rate": 1.5803375978721096e-06, "loss": 0.3226, "step": 8183 }, { "epoch": 2.297585626052779, "grad_norm": 0.5233213901519775, "learning_rate": 1.5791461747654707e-06, "loss": 0.3167, "step": 8184 }, { "epoch": 2.297866367209433, "grad_norm": 0.5955953001976013, "learning_rate": 1.5779551167110784e-06, "loss": 0.3381, "step": 8185 }, { "epoch": 2.2981471083660865, "grad_norm": 0.5840866565704346, "learning_rate": 1.5767644238360352e-06, "loss": 0.358, "step": 8186 }, { "epoch": 2.29842784952274, "grad_norm": 0.5760275721549988, "learning_rate": 1.575574096267406e-06, "loss": 0.3016, "step": 8187 }, { "epoch": 2.298708590679394, "grad_norm": 0.5533139705657959, "learning_rate": 1.5743841341322125e-06, "loss": 0.314, "step": 8188 }, { "epoch": 2.2989893318360473, "grad_norm": 0.5782603025436401, "learning_rate": 1.5731945375574432e-06, "loss": 0.3718, "step": 8189 }, { "epoch": 2.2992700729927007, "grad_norm": 0.5638977885246277, "learning_rate": 1.5720053066700436e-06, "loss": 0.3342, "step": 8190 }, { "epoch": 2.299550814149354, "grad_norm": 0.54283607006073, "learning_rate": 1.570816441596918e-06, "loss": 0.3243, "step": 8191 }, { "epoch": 2.299831555306008, "grad_norm": 0.5435816645622253, "learning_rate": 1.569627942464939e-06, "loss": 0.3682, "step": 8192 }, { "epoch": 2.3001122964626615, "grad_norm": 0.5701005458831787, "learning_rate": 1.5684398094009322e-06, "loss": 0.3395, "step": 8193 }, { "epoch": 2.300393037619315, "grad_norm": 0.5799752473831177, "learning_rate": 1.5672520425316912e-06, "loss": 0.3671, "step": 8194 }, { "epoch": 2.300673778775969, "grad_norm": 0.5573103427886963, "learning_rate": 1.5660646419839642e-06, "loss": 0.3248, "step": 8195 }, { "epoch": 2.3009545199326222, "grad_norm": 0.5010234713554382, "learning_rate": 1.5648776078844653e-06, "loss": 0.3262, "step": 8196 }, { "epoch": 2.3012352610892757, "grad_norm": 0.5645644664764404, "learning_rate": 1.5636909403598665e-06, "loss": 0.2944, "step": 8197 }, { "epoch": 2.301516002245929, "grad_norm": 0.5264140367507935, "learning_rate": 1.5625046395368004e-06, "loss": 0.3516, "step": 8198 }, { "epoch": 2.301796743402583, "grad_norm": 0.5427291393280029, "learning_rate": 1.5613187055418643e-06, "loss": 0.3304, "step": 8199 }, { "epoch": 2.3020774845592364, "grad_norm": 0.585015058517456, "learning_rate": 1.5601331385016106e-06, "loss": 0.2747, "step": 8200 }, { "epoch": 2.30235822571589, "grad_norm": 0.5409886837005615, "learning_rate": 1.5589479385425581e-06, "loss": 0.4031, "step": 8201 }, { "epoch": 2.3026389668725433, "grad_norm": 0.6217318177223206, "learning_rate": 1.557763105791184e-06, "loss": 0.3104, "step": 8202 }, { "epoch": 2.302919708029197, "grad_norm": 0.5729625225067139, "learning_rate": 1.5565786403739236e-06, "loss": 0.3413, "step": 8203 }, { "epoch": 2.3032004491858507, "grad_norm": 0.6085329651832581, "learning_rate": 1.555394542417179e-06, "loss": 0.3203, "step": 8204 }, { "epoch": 2.303481190342504, "grad_norm": 0.5232425332069397, "learning_rate": 1.5542108120473065e-06, "loss": 0.3966, "step": 8205 }, { "epoch": 2.3037619314991575, "grad_norm": 0.6044387221336365, "learning_rate": 1.5530274493906282e-06, "loss": 0.3144, "step": 8206 }, { "epoch": 2.3040426726558114, "grad_norm": 0.5787413716316223, "learning_rate": 1.5518444545734267e-06, "loss": 0.2744, "step": 8207 }, { "epoch": 2.304323413812465, "grad_norm": 0.6461145281791687, "learning_rate": 1.550661827721941e-06, "loss": 0.3532, "step": 8208 }, { "epoch": 2.3046041549691183, "grad_norm": 0.5377730131149292, "learning_rate": 1.549479568962377e-06, "loss": 0.338, "step": 8209 }, { "epoch": 2.304884896125772, "grad_norm": 0.6564037203788757, "learning_rate": 1.5482976784208935e-06, "loss": 0.3503, "step": 8210 }, { "epoch": 2.3051656372824256, "grad_norm": 0.5975328087806702, "learning_rate": 1.5471161562236192e-06, "loss": 0.3131, "step": 8211 }, { "epoch": 2.305446378439079, "grad_norm": 0.5177893042564392, "learning_rate": 1.5459350024966368e-06, "loss": 0.3158, "step": 8212 }, { "epoch": 2.3057271195957325, "grad_norm": 0.5879108309745789, "learning_rate": 1.5447542173659897e-06, "loss": 0.3304, "step": 8213 }, { "epoch": 2.3060078607523864, "grad_norm": 0.5290200114250183, "learning_rate": 1.5435738009576872e-06, "loss": 0.3021, "step": 8214 }, { "epoch": 2.30628860190904, "grad_norm": 0.5285158753395081, "learning_rate": 1.5423937533976936e-06, "loss": 0.3485, "step": 8215 }, { "epoch": 2.3065693430656933, "grad_norm": 0.5717185735702515, "learning_rate": 1.541214074811938e-06, "loss": 0.3601, "step": 8216 }, { "epoch": 2.306850084222347, "grad_norm": 0.5635417699813843, "learning_rate": 1.5400347653263064e-06, "loss": 0.361, "step": 8217 }, { "epoch": 2.3071308253790006, "grad_norm": 0.5670722723007202, "learning_rate": 1.5388558250666502e-06, "loss": 0.3308, "step": 8218 }, { "epoch": 2.307411566535654, "grad_norm": 0.5696948170661926, "learning_rate": 1.5376772541587765e-06, "loss": 0.323, "step": 8219 }, { "epoch": 2.3076923076923075, "grad_norm": 0.6074270009994507, "learning_rate": 1.536499052728454e-06, "loss": 0.3151, "step": 8220 }, { "epoch": 2.3079730488489614, "grad_norm": 0.5729271173477173, "learning_rate": 1.5353212209014163e-06, "loss": 0.3709, "step": 8221 }, { "epoch": 2.308253790005615, "grad_norm": 0.5935203433036804, "learning_rate": 1.5341437588033508e-06, "loss": 0.3307, "step": 8222 }, { "epoch": 2.3085345311622683, "grad_norm": 0.5793057084083557, "learning_rate": 1.5329666665599125e-06, "loss": 0.338, "step": 8223 }, { "epoch": 2.308815272318922, "grad_norm": 0.5447109341621399, "learning_rate": 1.531789944296711e-06, "loss": 0.3057, "step": 8224 }, { "epoch": 2.3090960134755756, "grad_norm": 0.5514894723892212, "learning_rate": 1.5306135921393178e-06, "loss": 0.2681, "step": 8225 }, { "epoch": 2.309376754632229, "grad_norm": 0.6545277237892151, "learning_rate": 1.5294376102132691e-06, "loss": 0.3523, "step": 8226 }, { "epoch": 2.3096574957888825, "grad_norm": 0.5393643975257874, "learning_rate": 1.528261998644055e-06, "loss": 0.3486, "step": 8227 }, { "epoch": 2.3099382369455363, "grad_norm": 0.5711279511451721, "learning_rate": 1.527086757557133e-06, "loss": 0.3129, "step": 8228 }, { "epoch": 2.31021897810219, "grad_norm": 0.5342314839363098, "learning_rate": 1.5259118870779134e-06, "loss": 0.3083, "step": 8229 }, { "epoch": 2.3104997192588432, "grad_norm": 0.6208454966545105, "learning_rate": 1.524737387331775e-06, "loss": 0.3157, "step": 8230 }, { "epoch": 2.310780460415497, "grad_norm": 0.5687270164489746, "learning_rate": 1.5235632584440503e-06, "loss": 0.3459, "step": 8231 }, { "epoch": 2.3110612015721506, "grad_norm": 0.5974138379096985, "learning_rate": 1.5223895005400359e-06, "loss": 0.3014, "step": 8232 }, { "epoch": 2.311341942728804, "grad_norm": 0.5893130302429199, "learning_rate": 1.5212161137449892e-06, "loss": 0.3337, "step": 8233 }, { "epoch": 2.3116226838854574, "grad_norm": 0.5888399481773376, "learning_rate": 1.5200430981841262e-06, "loss": 0.3174, "step": 8234 }, { "epoch": 2.3119034250421113, "grad_norm": 0.5141733288764954, "learning_rate": 1.5188704539826222e-06, "loss": 0.3262, "step": 8235 }, { "epoch": 2.3121841661987648, "grad_norm": 0.5367722511291504, "learning_rate": 1.5176981812656166e-06, "loss": 0.3919, "step": 8236 }, { "epoch": 2.312464907355418, "grad_norm": 0.5129967331886292, "learning_rate": 1.5165262801582048e-06, "loss": 0.3121, "step": 8237 }, { "epoch": 2.312745648512072, "grad_norm": 0.6207329034805298, "learning_rate": 1.5153547507854472e-06, "loss": 0.3898, "step": 8238 }, { "epoch": 2.3130263896687255, "grad_norm": 0.5402520298957825, "learning_rate": 1.5141835932723615e-06, "loss": 0.3251, "step": 8239 }, { "epoch": 2.313307130825379, "grad_norm": 0.5945013761520386, "learning_rate": 1.5130128077439244e-06, "loss": 0.3428, "step": 8240 }, { "epoch": 2.3135878719820324, "grad_norm": 0.5576349496841431, "learning_rate": 1.511842394325077e-06, "loss": 0.3475, "step": 8241 }, { "epoch": 2.3138686131386863, "grad_norm": 0.5237705111503601, "learning_rate": 1.510672353140717e-06, "loss": 0.3041, "step": 8242 }, { "epoch": 2.3141493542953397, "grad_norm": 0.5582318902015686, "learning_rate": 1.5095026843157062e-06, "loss": 0.2963, "step": 8243 }, { "epoch": 2.314430095451993, "grad_norm": 0.5891752243041992, "learning_rate": 1.5083333879748618e-06, "loss": 0.2987, "step": 8244 }, { "epoch": 2.314710836608647, "grad_norm": 0.581871747970581, "learning_rate": 1.507164464242966e-06, "loss": 0.338, "step": 8245 }, { "epoch": 2.3149915777653005, "grad_norm": 0.5098099112510681, "learning_rate": 1.5059959132447582e-06, "loss": 0.2718, "step": 8246 }, { "epoch": 2.315272318921954, "grad_norm": 0.5287011861801147, "learning_rate": 1.5048277351049373e-06, "loss": 0.3499, "step": 8247 }, { "epoch": 2.3155530600786074, "grad_norm": 0.6619024872779846, "learning_rate": 1.503659929948167e-06, "loss": 0.3568, "step": 8248 }, { "epoch": 2.3158338012352613, "grad_norm": 0.5380387902259827, "learning_rate": 1.5024924978990651e-06, "loss": 0.3302, "step": 8249 }, { "epoch": 2.3161145423919147, "grad_norm": 0.5728728175163269, "learning_rate": 1.5013254390822158e-06, "loss": 0.3213, "step": 8250 }, { "epoch": 2.316395283548568, "grad_norm": 0.5615373849868774, "learning_rate": 1.500158753622159e-06, "loss": 0.3108, "step": 8251 }, { "epoch": 2.3166760247052216, "grad_norm": 0.6084820032119751, "learning_rate": 1.4989924416433943e-06, "loss": 0.2813, "step": 8252 }, { "epoch": 2.3169567658618755, "grad_norm": 0.5133932828903198, "learning_rate": 1.4978265032703864e-06, "loss": 0.3426, "step": 8253 }, { "epoch": 2.317237507018529, "grad_norm": 0.567959189414978, "learning_rate": 1.4966609386275538e-06, "loss": 0.3044, "step": 8254 }, { "epoch": 2.3175182481751824, "grad_norm": 0.5489754676818848, "learning_rate": 1.4954957478392818e-06, "loss": 0.3123, "step": 8255 }, { "epoch": 2.317798989331836, "grad_norm": 0.568737268447876, "learning_rate": 1.4943309310299086e-06, "loss": 0.3351, "step": 8256 }, { "epoch": 2.3180797304884897, "grad_norm": 0.5404559373855591, "learning_rate": 1.4931664883237385e-06, "loss": 0.3401, "step": 8257 }, { "epoch": 2.318360471645143, "grad_norm": 0.4898296296596527, "learning_rate": 1.4920024198450344e-06, "loss": 0.3225, "step": 8258 }, { "epoch": 2.3186412128017966, "grad_norm": 0.5398330688476562, "learning_rate": 1.4908387257180162e-06, "loss": 0.3108, "step": 8259 }, { "epoch": 2.3189219539584505, "grad_norm": 0.580632746219635, "learning_rate": 1.4896754060668684e-06, "loss": 0.2972, "step": 8260 }, { "epoch": 2.319202695115104, "grad_norm": 0.6417765021324158, "learning_rate": 1.488512461015732e-06, "loss": 0.2738, "step": 8261 }, { "epoch": 2.3194834362717573, "grad_norm": 0.580383837223053, "learning_rate": 1.4873498906887073e-06, "loss": 0.3589, "step": 8262 }, { "epoch": 2.319764177428411, "grad_norm": 0.5803306102752686, "learning_rate": 1.4861876952098609e-06, "loss": 0.3285, "step": 8263 }, { "epoch": 2.3200449185850647, "grad_norm": 0.5498411655426025, "learning_rate": 1.485025874703211e-06, "loss": 0.3219, "step": 8264 }, { "epoch": 2.320325659741718, "grad_norm": 0.6075876355171204, "learning_rate": 1.483864429292743e-06, "loss": 0.3251, "step": 8265 }, { "epoch": 2.3206064008983716, "grad_norm": 0.5450096726417542, "learning_rate": 1.4827033591023953e-06, "loss": 0.314, "step": 8266 }, { "epoch": 2.3208871420550254, "grad_norm": 0.5331777334213257, "learning_rate": 1.4815426642560753e-06, "loss": 0.2894, "step": 8267 }, { "epoch": 2.321167883211679, "grad_norm": 0.4995564818382263, "learning_rate": 1.4803823448776417e-06, "loss": 0.3716, "step": 8268 }, { "epoch": 2.3214486243683323, "grad_norm": 0.5682269930839539, "learning_rate": 1.4792224010909157e-06, "loss": 0.3462, "step": 8269 }, { "epoch": 2.3217293655249858, "grad_norm": 0.6323981881141663, "learning_rate": 1.4780628330196821e-06, "loss": 0.2919, "step": 8270 }, { "epoch": 2.3220101066816397, "grad_norm": 0.6075713634490967, "learning_rate": 1.4769036407876808e-06, "loss": 0.3343, "step": 8271 }, { "epoch": 2.322290847838293, "grad_norm": 0.607307493686676, "learning_rate": 1.475744824518615e-06, "loss": 0.3415, "step": 8272 }, { "epoch": 2.3225715889949465, "grad_norm": 0.5312021374702454, "learning_rate": 1.4745863843361459e-06, "loss": 0.3408, "step": 8273 }, { "epoch": 2.3228523301516004, "grad_norm": 0.5947307348251343, "learning_rate": 1.4734283203638934e-06, "loss": 0.3654, "step": 8274 }, { "epoch": 2.323133071308254, "grad_norm": 0.5386355519294739, "learning_rate": 1.4722706327254416e-06, "loss": 0.3531, "step": 8275 }, { "epoch": 2.3234138124649073, "grad_norm": 0.5545230507850647, "learning_rate": 1.4711133215443285e-06, "loss": 0.321, "step": 8276 }, { "epoch": 2.3236945536215607, "grad_norm": 0.5298532843589783, "learning_rate": 1.4699563869440592e-06, "loss": 0.3268, "step": 8277 }, { "epoch": 2.3239752947782146, "grad_norm": 0.5816928148269653, "learning_rate": 1.4687998290480904e-06, "loss": 0.3152, "step": 8278 }, { "epoch": 2.324256035934868, "grad_norm": 0.6305320262908936, "learning_rate": 1.4676436479798462e-06, "loss": 0.3181, "step": 8279 }, { "epoch": 2.3245367770915215, "grad_norm": 0.6032200455665588, "learning_rate": 1.4664878438627061e-06, "loss": 0.3855, "step": 8280 }, { "epoch": 2.3248175182481754, "grad_norm": 0.5271633267402649, "learning_rate": 1.4653324168200078e-06, "loss": 0.3717, "step": 8281 }, { "epoch": 2.325098259404829, "grad_norm": 0.5593137741088867, "learning_rate": 1.4641773669750537e-06, "loss": 0.3202, "step": 8282 }, { "epoch": 2.3253790005614823, "grad_norm": 0.524844765663147, "learning_rate": 1.4630226944511045e-06, "loss": 0.3913, "step": 8283 }, { "epoch": 2.3256597417181357, "grad_norm": 0.5058882236480713, "learning_rate": 1.4618683993713773e-06, "loss": 0.347, "step": 8284 }, { "epoch": 2.3259404828747896, "grad_norm": 0.5980703830718994, "learning_rate": 1.4607144818590536e-06, "loss": 0.3311, "step": 8285 }, { "epoch": 2.326221224031443, "grad_norm": 0.5376325249671936, "learning_rate": 1.4595609420372692e-06, "loss": 0.3088, "step": 8286 }, { "epoch": 2.3265019651880965, "grad_norm": 0.5730352401733398, "learning_rate": 1.4584077800291262e-06, "loss": 0.3595, "step": 8287 }, { "epoch": 2.3267827063447504, "grad_norm": 0.5882030725479126, "learning_rate": 1.4572549959576793e-06, "loss": 0.3208, "step": 8288 }, { "epoch": 2.327063447501404, "grad_norm": 0.545253336429596, "learning_rate": 1.4561025899459508e-06, "loss": 0.334, "step": 8289 }, { "epoch": 2.3273441886580573, "grad_norm": 0.5363314151763916, "learning_rate": 1.4549505621169152e-06, "loss": 0.3279, "step": 8290 }, { "epoch": 2.3276249298147107, "grad_norm": 0.5934737920761108, "learning_rate": 1.4537989125935086e-06, "loss": 0.3027, "step": 8291 }, { "epoch": 2.3279056709713646, "grad_norm": 0.5294120907783508, "learning_rate": 1.4526476414986318e-06, "loss": 0.3074, "step": 8292 }, { "epoch": 2.328186412128018, "grad_norm": 0.5200093984603882, "learning_rate": 1.4514967489551373e-06, "loss": 0.3323, "step": 8293 }, { "epoch": 2.3284671532846715, "grad_norm": 0.5722740888595581, "learning_rate": 1.450346235085845e-06, "loss": 0.3623, "step": 8294 }, { "epoch": 2.328747894441325, "grad_norm": 0.574876606464386, "learning_rate": 1.4491961000135285e-06, "loss": 0.3011, "step": 8295 }, { "epoch": 2.329028635597979, "grad_norm": 0.5320606231689453, "learning_rate": 1.4480463438609216e-06, "loss": 0.3321, "step": 8296 }, { "epoch": 2.3293093767546322, "grad_norm": 0.5752878785133362, "learning_rate": 1.4468969667507222e-06, "loss": 0.352, "step": 8297 }, { "epoch": 2.3295901179112857, "grad_norm": 0.5299527049064636, "learning_rate": 1.445747968805582e-06, "loss": 0.3436, "step": 8298 }, { "epoch": 2.329870859067939, "grad_norm": 0.5439935922622681, "learning_rate": 1.4445993501481171e-06, "loss": 0.3133, "step": 8299 }, { "epoch": 2.330151600224593, "grad_norm": 0.5759299993515015, "learning_rate": 1.4434511109008987e-06, "loss": 0.2979, "step": 8300 }, { "epoch": 2.3304323413812464, "grad_norm": 0.5390260219573975, "learning_rate": 1.4423032511864626e-06, "loss": 0.3311, "step": 8301 }, { "epoch": 2.3307130825379, "grad_norm": 0.5760179162025452, "learning_rate": 1.4411557711273e-06, "loss": 0.3311, "step": 8302 }, { "epoch": 2.3309938236945538, "grad_norm": 0.499437540769577, "learning_rate": 1.4400086708458605e-06, "loss": 0.315, "step": 8303 }, { "epoch": 2.331274564851207, "grad_norm": 0.6799649596214294, "learning_rate": 1.438861950464559e-06, "loss": 0.2776, "step": 8304 }, { "epoch": 2.3315553060078607, "grad_norm": 0.5366402268409729, "learning_rate": 1.4377156101057638e-06, "loss": 0.3265, "step": 8305 }, { "epoch": 2.331836047164514, "grad_norm": 0.5740495920181274, "learning_rate": 1.4365696498918074e-06, "loss": 0.3323, "step": 8306 }, { "epoch": 2.332116788321168, "grad_norm": 0.5453783273696899, "learning_rate": 1.435424069944979e-06, "loss": 0.3448, "step": 8307 }, { "epoch": 2.3323975294778214, "grad_norm": 0.5498390793800354, "learning_rate": 1.4342788703875237e-06, "loss": 0.2801, "step": 8308 }, { "epoch": 2.332678270634475, "grad_norm": 0.5661704540252686, "learning_rate": 1.433134051341657e-06, "loss": 0.3173, "step": 8309 }, { "epoch": 2.3329590117911287, "grad_norm": 0.5494989156723022, "learning_rate": 1.4319896129295429e-06, "loss": 0.3518, "step": 8310 }, { "epoch": 2.333239752947782, "grad_norm": 0.5851483345031738, "learning_rate": 1.4308455552733086e-06, "loss": 0.3302, "step": 8311 }, { "epoch": 2.3335204941044356, "grad_norm": 0.5645585060119629, "learning_rate": 1.4297018784950424e-06, "loss": 0.3258, "step": 8312 }, { "epoch": 2.333801235261089, "grad_norm": 0.5929242968559265, "learning_rate": 1.428558582716788e-06, "loss": 0.3174, "step": 8313 }, { "epoch": 2.334081976417743, "grad_norm": 0.5702686309814453, "learning_rate": 1.4274156680605543e-06, "loss": 0.278, "step": 8314 }, { "epoch": 2.3343627175743964, "grad_norm": 0.5488886833190918, "learning_rate": 1.4262731346483022e-06, "loss": 0.3334, "step": 8315 }, { "epoch": 2.33464345873105, "grad_norm": 0.5933936238288879, "learning_rate": 1.4251309826019593e-06, "loss": 0.3424, "step": 8316 }, { "epoch": 2.3349241998877037, "grad_norm": 0.5949031710624695, "learning_rate": 1.4239892120434073e-06, "loss": 0.3333, "step": 8317 }, { "epoch": 2.335204941044357, "grad_norm": 0.5480186939239502, "learning_rate": 1.422847823094487e-06, "loss": 0.327, "step": 8318 }, { "epoch": 2.3354856822010106, "grad_norm": 0.5760910511016846, "learning_rate": 1.4217068158770043e-06, "loss": 0.2974, "step": 8319 }, { "epoch": 2.335766423357664, "grad_norm": 0.5917199850082397, "learning_rate": 1.420566190512716e-06, "loss": 0.3202, "step": 8320 }, { "epoch": 2.336047164514318, "grad_norm": 0.5515584349632263, "learning_rate": 1.4194259471233468e-06, "loss": 0.3348, "step": 8321 }, { "epoch": 2.3363279056709714, "grad_norm": 0.5509028434753418, "learning_rate": 1.4182860858305747e-06, "loss": 0.3378, "step": 8322 }, { "epoch": 2.336608646827625, "grad_norm": 0.6656421422958374, "learning_rate": 1.4171466067560362e-06, "loss": 0.3618, "step": 8323 }, { "epoch": 2.3368893879842787, "grad_norm": 0.5880535244941711, "learning_rate": 1.4160075100213334e-06, "loss": 0.3502, "step": 8324 }, { "epoch": 2.337170129140932, "grad_norm": 0.5356268286705017, "learning_rate": 1.41486879574802e-06, "loss": 0.357, "step": 8325 }, { "epoch": 2.3374508702975856, "grad_norm": 0.5595723390579224, "learning_rate": 1.4137304640576161e-06, "loss": 0.3307, "step": 8326 }, { "epoch": 2.337731611454239, "grad_norm": 0.6235252618789673, "learning_rate": 1.4125925150715936e-06, "loss": 0.3056, "step": 8327 }, { "epoch": 2.338012352610893, "grad_norm": 0.56630539894104, "learning_rate": 1.411454948911391e-06, "loss": 0.3141, "step": 8328 }, { "epoch": 2.3382930937675463, "grad_norm": 0.5708787441253662, "learning_rate": 1.4103177656984007e-06, "loss": 0.2683, "step": 8329 }, { "epoch": 2.3385738349242, "grad_norm": 0.6209465861320496, "learning_rate": 1.409180965553974e-06, "loss": 0.2943, "step": 8330 }, { "epoch": 2.3388545760808537, "grad_norm": 0.535517156124115, "learning_rate": 1.4080445485994265e-06, "loss": 0.3374, "step": 8331 }, { "epoch": 2.339135317237507, "grad_norm": 0.5888431072235107, "learning_rate": 1.4069085149560264e-06, "loss": 0.3154, "step": 8332 }, { "epoch": 2.3394160583941606, "grad_norm": 0.5710810422897339, "learning_rate": 1.4057728647450053e-06, "loss": 0.3306, "step": 8333 }, { "epoch": 2.339696799550814, "grad_norm": 0.6118029952049255, "learning_rate": 1.404637598087555e-06, "loss": 0.3057, "step": 8334 }, { "epoch": 2.339977540707468, "grad_norm": 0.5413199663162231, "learning_rate": 1.4035027151048203e-06, "loss": 0.3222, "step": 8335 }, { "epoch": 2.3402582818641213, "grad_norm": 0.5166553854942322, "learning_rate": 1.402368215917912e-06, "loss": 0.3518, "step": 8336 }, { "epoch": 2.3405390230207748, "grad_norm": 0.5677252411842346, "learning_rate": 1.4012341006478947e-06, "loss": 0.3681, "step": 8337 }, { "epoch": 2.3408197641774287, "grad_norm": 0.6056877374649048, "learning_rate": 1.4001003694157955e-06, "loss": 0.3207, "step": 8338 }, { "epoch": 2.341100505334082, "grad_norm": 0.5278530716896057, "learning_rate": 1.3989670223425995e-06, "loss": 0.3372, "step": 8339 }, { "epoch": 2.3413812464907355, "grad_norm": 0.6560772061347961, "learning_rate": 1.3978340595492473e-06, "loss": 0.3587, "step": 8340 }, { "epoch": 2.341661987647389, "grad_norm": 0.49482297897338867, "learning_rate": 1.396701481156646e-06, "loss": 0.3258, "step": 8341 }, { "epoch": 2.341942728804043, "grad_norm": 0.6362783908843994, "learning_rate": 1.3955692872856535e-06, "loss": 0.3311, "step": 8342 }, { "epoch": 2.3422234699606963, "grad_norm": 0.6038870215415955, "learning_rate": 1.3944374780570936e-06, "loss": 0.3241, "step": 8343 }, { "epoch": 2.3425042111173497, "grad_norm": 0.5476205348968506, "learning_rate": 1.393306053591744e-06, "loss": 0.3351, "step": 8344 }, { "epoch": 2.342784952274003, "grad_norm": 0.5627130270004272, "learning_rate": 1.3921750140103429e-06, "loss": 0.3579, "step": 8345 }, { "epoch": 2.343065693430657, "grad_norm": 0.5221319794654846, "learning_rate": 1.3910443594335904e-06, "loss": 0.386, "step": 8346 }, { "epoch": 2.3433464345873105, "grad_norm": 0.6206731796264648, "learning_rate": 1.3899140899821396e-06, "loss": 0.323, "step": 8347 }, { "epoch": 2.343627175743964, "grad_norm": 0.6443973779678345, "learning_rate": 1.3887842057766089e-06, "loss": 0.353, "step": 8348 }, { "epoch": 2.3439079169006174, "grad_norm": 0.6118109226226807, "learning_rate": 1.3876547069375696e-06, "loss": 0.3177, "step": 8349 }, { "epoch": 2.3441886580572713, "grad_norm": 0.6209521293640137, "learning_rate": 1.3865255935855577e-06, "loss": 0.2785, "step": 8350 }, { "epoch": 2.3444693992139247, "grad_norm": 0.5720152854919434, "learning_rate": 1.3853968658410638e-06, "loss": 0.3032, "step": 8351 }, { "epoch": 2.344750140370578, "grad_norm": 0.5183261036872864, "learning_rate": 1.384268523824537e-06, "loss": 0.3791, "step": 8352 }, { "epoch": 2.345030881527232, "grad_norm": 0.5950300097465515, "learning_rate": 1.3831405676563902e-06, "loss": 0.2945, "step": 8353 }, { "epoch": 2.3453116226838855, "grad_norm": 0.5488893985748291, "learning_rate": 1.382012997456989e-06, "loss": 0.3289, "step": 8354 }, { "epoch": 2.345592363840539, "grad_norm": 0.666434109210968, "learning_rate": 1.380885813346663e-06, "loss": 0.2815, "step": 8355 }, { "epoch": 2.3458731049971924, "grad_norm": 0.5665557384490967, "learning_rate": 1.3797590154456975e-06, "loss": 0.3674, "step": 8356 }, { "epoch": 2.3461538461538463, "grad_norm": 0.6095634698867798, "learning_rate": 1.3786326038743354e-06, "loss": 0.3822, "step": 8357 }, { "epoch": 2.3464345873104997, "grad_norm": 0.5665314793586731, "learning_rate": 1.3775065787527837e-06, "loss": 0.3284, "step": 8358 }, { "epoch": 2.346715328467153, "grad_norm": 0.5768237709999084, "learning_rate": 1.3763809402012012e-06, "loss": 0.2781, "step": 8359 }, { "epoch": 2.346996069623807, "grad_norm": 0.5959749221801758, "learning_rate": 1.3752556883397116e-06, "loss": 0.289, "step": 8360 }, { "epoch": 2.3472768107804605, "grad_norm": 0.5341787338256836, "learning_rate": 1.3741308232883955e-06, "loss": 0.3232, "step": 8361 }, { "epoch": 2.347557551937114, "grad_norm": 0.4947063624858856, "learning_rate": 1.3730063451672882e-06, "loss": 0.3399, "step": 8362 }, { "epoch": 2.3478382930937673, "grad_norm": 0.6144747734069824, "learning_rate": 1.3718822540963906e-06, "loss": 0.3154, "step": 8363 }, { "epoch": 2.3481190342504212, "grad_norm": 0.5683850646018982, "learning_rate": 1.370758550195656e-06, "loss": 0.3686, "step": 8364 }, { "epoch": 2.3483997754070747, "grad_norm": 0.5459191799163818, "learning_rate": 1.369635233585001e-06, "loss": 0.3309, "step": 8365 }, { "epoch": 2.348680516563728, "grad_norm": 0.5615030527114868, "learning_rate": 1.3685123043842985e-06, "loss": 0.3489, "step": 8366 }, { "epoch": 2.348961257720382, "grad_norm": 0.546295702457428, "learning_rate": 1.3673897627133787e-06, "loss": 0.3599, "step": 8367 }, { "epoch": 2.3492419988770354, "grad_norm": 0.5746467709541321, "learning_rate": 1.3662676086920352e-06, "loss": 0.3449, "step": 8368 }, { "epoch": 2.349522740033689, "grad_norm": 0.6438571810722351, "learning_rate": 1.3651458424400143e-06, "loss": 0.3255, "step": 8369 }, { "epoch": 2.3498034811903423, "grad_norm": 0.5440747141838074, "learning_rate": 1.3640244640770266e-06, "loss": 0.3148, "step": 8370 }, { "epoch": 2.350084222346996, "grad_norm": 0.575641930103302, "learning_rate": 1.3629034737227382e-06, "loss": 0.3093, "step": 8371 }, { "epoch": 2.3503649635036497, "grad_norm": 0.5817500948905945, "learning_rate": 1.3617828714967713e-06, "loss": 0.3389, "step": 8372 }, { "epoch": 2.350645704660303, "grad_norm": 0.5214036107063293, "learning_rate": 1.3606626575187138e-06, "loss": 0.3328, "step": 8373 }, { "epoch": 2.350926445816957, "grad_norm": 0.5743655562400818, "learning_rate": 1.359542831908104e-06, "loss": 0.3295, "step": 8374 }, { "epoch": 2.3512071869736104, "grad_norm": 0.5960953235626221, "learning_rate": 1.358423394784446e-06, "loss": 0.3077, "step": 8375 }, { "epoch": 2.351487928130264, "grad_norm": 0.5571728348731995, "learning_rate": 1.357304346267197e-06, "loss": 0.3136, "step": 8376 }, { "epoch": 2.3517686692869173, "grad_norm": 0.5779780745506287, "learning_rate": 1.3561856864757767e-06, "loss": 0.3465, "step": 8377 }, { "epoch": 2.352049410443571, "grad_norm": 0.5787519216537476, "learning_rate": 1.3550674155295606e-06, "loss": 0.3557, "step": 8378 }, { "epoch": 2.3523301516002246, "grad_norm": 0.5545042157173157, "learning_rate": 1.3539495335478826e-06, "loss": 0.3076, "step": 8379 }, { "epoch": 2.352610892756878, "grad_norm": 0.564544141292572, "learning_rate": 1.3528320406500378e-06, "loss": 0.3521, "step": 8380 }, { "epoch": 2.352891633913532, "grad_norm": 0.6182708144187927, "learning_rate": 1.3517149369552762e-06, "loss": 0.3474, "step": 8381 }, { "epoch": 2.3531723750701854, "grad_norm": 0.587466299533844, "learning_rate": 1.3505982225828113e-06, "loss": 0.3348, "step": 8382 }, { "epoch": 2.353453116226839, "grad_norm": 0.5764768123626709, "learning_rate": 1.3494818976518093e-06, "loss": 0.3901, "step": 8383 }, { "epoch": 2.3537338573834923, "grad_norm": 0.5216163396835327, "learning_rate": 1.3483659622813954e-06, "loss": 0.3857, "step": 8384 }, { "epoch": 2.354014598540146, "grad_norm": 0.5445965528488159, "learning_rate": 1.3472504165906614e-06, "loss": 0.2882, "step": 8385 }, { "epoch": 2.3542953396967996, "grad_norm": 0.5460180044174194, "learning_rate": 1.3461352606986456e-06, "loss": 0.3148, "step": 8386 }, { "epoch": 2.354576080853453, "grad_norm": 0.6001867651939392, "learning_rate": 1.345020494724355e-06, "loss": 0.329, "step": 8387 }, { "epoch": 2.3548568220101065, "grad_norm": 0.5801257491111755, "learning_rate": 1.343906118786748e-06, "loss": 0.3104, "step": 8388 }, { "epoch": 2.3551375631667604, "grad_norm": 0.6232779026031494, "learning_rate": 1.3427921330047434e-06, "loss": 0.3066, "step": 8389 }, { "epoch": 2.355418304323414, "grad_norm": 0.6081386208534241, "learning_rate": 1.3416785374972208e-06, "loss": 0.2967, "step": 8390 }, { "epoch": 2.3556990454800673, "grad_norm": 0.6113626956939697, "learning_rate": 1.3405653323830136e-06, "loss": 0.2924, "step": 8391 }, { "epoch": 2.3559797866367207, "grad_norm": 0.5782427787780762, "learning_rate": 1.3394525177809187e-06, "loss": 0.3128, "step": 8392 }, { "epoch": 2.3562605277933746, "grad_norm": 0.5502467751502991, "learning_rate": 1.338340093809688e-06, "loss": 0.3099, "step": 8393 }, { "epoch": 2.356541268950028, "grad_norm": 0.5638527274131775, "learning_rate": 1.337228060588031e-06, "loss": 0.3496, "step": 8394 }, { "epoch": 2.3568220101066815, "grad_norm": 0.5481851100921631, "learning_rate": 1.3361164182346193e-06, "loss": 0.3298, "step": 8395 }, { "epoch": 2.3571027512633353, "grad_norm": 0.5715014934539795, "learning_rate": 1.3350051668680775e-06, "loss": 0.331, "step": 8396 }, { "epoch": 2.357383492419989, "grad_norm": 0.5994958877563477, "learning_rate": 1.3338943066069948e-06, "loss": 0.3384, "step": 8397 }, { "epoch": 2.3576642335766422, "grad_norm": 0.5965902805328369, "learning_rate": 1.3327838375699127e-06, "loss": 0.3061, "step": 8398 }, { "epoch": 2.3579449747332957, "grad_norm": 0.5985276103019714, "learning_rate": 1.3316737598753354e-06, "loss": 0.2946, "step": 8399 }, { "epoch": 2.3582257158899496, "grad_norm": 0.610192596912384, "learning_rate": 1.330564073641723e-06, "loss": 0.3425, "step": 8400 }, { "epoch": 2.358506457046603, "grad_norm": 0.5971916913986206, "learning_rate": 1.3294547789874924e-06, "loss": 0.3113, "step": 8401 }, { "epoch": 2.3587871982032564, "grad_norm": 0.5463565587997437, "learning_rate": 1.3283458760310237e-06, "loss": 0.3726, "step": 8402 }, { "epoch": 2.3590679393599103, "grad_norm": 0.5724221467971802, "learning_rate": 1.327237364890649e-06, "loss": 0.3495, "step": 8403 }, { "epoch": 2.3593486805165638, "grad_norm": 0.5457465648651123, "learning_rate": 1.3261292456846648e-06, "loss": 0.3181, "step": 8404 }, { "epoch": 2.359629421673217, "grad_norm": 0.5801199674606323, "learning_rate": 1.3250215185313208e-06, "loss": 0.3261, "step": 8405 }, { "epoch": 2.3599101628298707, "grad_norm": 0.5199431777000427, "learning_rate": 1.3239141835488261e-06, "loss": 0.3523, "step": 8406 }, { "epoch": 2.3601909039865245, "grad_norm": 0.5299031734466553, "learning_rate": 1.322807240855351e-06, "loss": 0.3292, "step": 8407 }, { "epoch": 2.360471645143178, "grad_norm": 0.5299385190010071, "learning_rate": 1.3217006905690189e-06, "loss": 0.3163, "step": 8408 }, { "epoch": 2.3607523862998314, "grad_norm": 0.5740717649459839, "learning_rate": 1.3205945328079157e-06, "loss": 0.3241, "step": 8409 }, { "epoch": 2.3610331274564853, "grad_norm": 0.5934680700302124, "learning_rate": 1.3194887676900841e-06, "loss": 0.3565, "step": 8410 }, { "epoch": 2.3613138686131387, "grad_norm": 0.5834106206893921, "learning_rate": 1.3183833953335224e-06, "loss": 0.3472, "step": 8411 }, { "epoch": 2.361594609769792, "grad_norm": 0.5752813220024109, "learning_rate": 1.3172784158561913e-06, "loss": 0.3529, "step": 8412 }, { "epoch": 2.3618753509264456, "grad_norm": 0.5484989285469055, "learning_rate": 1.3161738293760052e-06, "loss": 0.304, "step": 8413 }, { "epoch": 2.3621560920830995, "grad_norm": 0.5734193921089172, "learning_rate": 1.315069636010841e-06, "loss": 0.3073, "step": 8414 }, { "epoch": 2.362436833239753, "grad_norm": 0.5143022537231445, "learning_rate": 1.3139658358785306e-06, "loss": 0.3348, "step": 8415 }, { "epoch": 2.3627175743964064, "grad_norm": 0.5107776522636414, "learning_rate": 1.3128624290968628e-06, "loss": 0.3013, "step": 8416 }, { "epoch": 2.3629983155530603, "grad_norm": 0.5321611762046814, "learning_rate": 1.3117594157835895e-06, "loss": 0.3234, "step": 8417 }, { "epoch": 2.3632790567097137, "grad_norm": 0.5909276008605957, "learning_rate": 1.3106567960564136e-06, "loss": 0.3481, "step": 8418 }, { "epoch": 2.363559797866367, "grad_norm": 0.513523519039154, "learning_rate": 1.3095545700330037e-06, "loss": 0.303, "step": 8419 }, { "epoch": 2.3638405390230206, "grad_norm": 0.5815671682357788, "learning_rate": 1.3084527378309792e-06, "loss": 0.325, "step": 8420 }, { "epoch": 2.3641212801796745, "grad_norm": 0.5334650874137878, "learning_rate": 1.3073512995679238e-06, "loss": 0.3154, "step": 8421 }, { "epoch": 2.364402021336328, "grad_norm": 0.5904586911201477, "learning_rate": 1.3062502553613743e-06, "loss": 0.3228, "step": 8422 }, { "epoch": 2.3646827624929814, "grad_norm": 0.5955625176429749, "learning_rate": 1.3051496053288265e-06, "loss": 0.3448, "step": 8423 }, { "epoch": 2.3649635036496353, "grad_norm": 0.5929975509643555, "learning_rate": 1.3040493495877376e-06, "loss": 0.3479, "step": 8424 }, { "epoch": 2.3652442448062887, "grad_norm": 0.5295895934104919, "learning_rate": 1.3029494882555166e-06, "loss": 0.339, "step": 8425 }, { "epoch": 2.365524985962942, "grad_norm": 0.5702146887779236, "learning_rate": 1.3018500214495378e-06, "loss": 0.3571, "step": 8426 }, { "epoch": 2.3658057271195956, "grad_norm": 0.5665997862815857, "learning_rate": 1.3007509492871274e-06, "loss": 0.3202, "step": 8427 }, { "epoch": 2.3660864682762495, "grad_norm": 0.5769081711769104, "learning_rate": 1.2996522718855698e-06, "loss": 0.3324, "step": 8428 }, { "epoch": 2.366367209432903, "grad_norm": 0.6712439060211182, "learning_rate": 1.2985539893621123e-06, "loss": 0.3498, "step": 8429 }, { "epoch": 2.3666479505895563, "grad_norm": 0.5461412668228149, "learning_rate": 1.2974561018339537e-06, "loss": 0.3305, "step": 8430 }, { "epoch": 2.3669286917462102, "grad_norm": 0.5450289845466614, "learning_rate": 1.2963586094182573e-06, "loss": 0.3477, "step": 8431 }, { "epoch": 2.3672094329028637, "grad_norm": 0.5062228441238403, "learning_rate": 1.2952615122321366e-06, "loss": 0.3232, "step": 8432 }, { "epoch": 2.367490174059517, "grad_norm": 0.5167391300201416, "learning_rate": 1.2941648103926712e-06, "loss": 0.3583, "step": 8433 }, { "epoch": 2.3677709152161706, "grad_norm": 0.5224451422691345, "learning_rate": 1.2930685040168916e-06, "loss": 0.3486, "step": 8434 }, { "epoch": 2.3680516563728244, "grad_norm": 0.5944300293922424, "learning_rate": 1.2919725932217863e-06, "loss": 0.2567, "step": 8435 }, { "epoch": 2.368332397529478, "grad_norm": 0.5734946131706238, "learning_rate": 1.2908770781243108e-06, "loss": 0.3202, "step": 8436 }, { "epoch": 2.3686131386861313, "grad_norm": 0.5120568871498108, "learning_rate": 1.2897819588413675e-06, "loss": 0.3213, "step": 8437 }, { "epoch": 2.3688938798427848, "grad_norm": 0.5835923552513123, "learning_rate": 1.28868723548982e-06, "loss": 0.2747, "step": 8438 }, { "epoch": 2.3691746209994387, "grad_norm": 0.5205466151237488, "learning_rate": 1.287592908186493e-06, "loss": 0.3618, "step": 8439 }, { "epoch": 2.369455362156092, "grad_norm": 0.6142522692680359, "learning_rate": 1.2864989770481634e-06, "loss": 0.3048, "step": 8440 }, { "epoch": 2.3697361033127455, "grad_norm": 0.5458880662918091, "learning_rate": 1.2854054421915712e-06, "loss": 0.3434, "step": 8441 }, { "epoch": 2.370016844469399, "grad_norm": 0.5864067077636719, "learning_rate": 1.2843123037334115e-06, "loss": 0.2684, "step": 8442 }, { "epoch": 2.370297585626053, "grad_norm": 0.5875257253646851, "learning_rate": 1.2832195617903342e-06, "loss": 0.3211, "step": 8443 }, { "epoch": 2.3705783267827063, "grad_norm": 0.5107319951057434, "learning_rate": 1.2821272164789544e-06, "loss": 0.3581, "step": 8444 }, { "epoch": 2.3708590679393597, "grad_norm": 0.630216121673584, "learning_rate": 1.2810352679158362e-06, "loss": 0.3138, "step": 8445 }, { "epoch": 2.3711398090960136, "grad_norm": 0.5387038588523865, "learning_rate": 1.2799437162175087e-06, "loss": 0.3519, "step": 8446 }, { "epoch": 2.371420550252667, "grad_norm": 0.5554496049880981, "learning_rate": 1.2788525615004532e-06, "loss": 0.3683, "step": 8447 }, { "epoch": 2.3717012914093205, "grad_norm": 0.547758936882019, "learning_rate": 1.2777618038811134e-06, "loss": 0.3423, "step": 8448 }, { "epoch": 2.371982032565974, "grad_norm": 0.5472351312637329, "learning_rate": 1.276671443475887e-06, "loss": 0.3288, "step": 8449 }, { "epoch": 2.372262773722628, "grad_norm": 0.6356356143951416, "learning_rate": 1.275581480401129e-06, "loss": 0.3491, "step": 8450 }, { "epoch": 2.3725435148792813, "grad_norm": 0.5399888157844543, "learning_rate": 1.274491914773156e-06, "loss": 0.3149, "step": 8451 }, { "epoch": 2.3728242560359347, "grad_norm": 0.5299639105796814, "learning_rate": 1.2734027467082366e-06, "loss": 0.3121, "step": 8452 }, { "epoch": 2.3731049971925886, "grad_norm": 0.4775037467479706, "learning_rate": 1.2723139763226039e-06, "loss": 0.3176, "step": 8453 }, { "epoch": 2.373385738349242, "grad_norm": 0.534806489944458, "learning_rate": 1.2712256037324421e-06, "loss": 0.326, "step": 8454 }, { "epoch": 2.3736664795058955, "grad_norm": 0.6329243779182434, "learning_rate": 1.2701376290538952e-06, "loss": 0.3308, "step": 8455 }, { "epoch": 2.373947220662549, "grad_norm": 0.5237137079238892, "learning_rate": 1.2690500524030675e-06, "loss": 0.2707, "step": 8456 }, { "epoch": 2.374227961819203, "grad_norm": 0.5480474829673767, "learning_rate": 1.2679628738960155e-06, "loss": 0.3525, "step": 8457 }, { "epoch": 2.3745087029758563, "grad_norm": 0.5983497500419617, "learning_rate": 1.266876093648759e-06, "loss": 0.325, "step": 8458 }, { "epoch": 2.3747894441325097, "grad_norm": 0.6835537552833557, "learning_rate": 1.2657897117772695e-06, "loss": 0.3166, "step": 8459 }, { "epoch": 2.3750701852891636, "grad_norm": 0.5635731220245361, "learning_rate": 1.2647037283974805e-06, "loss": 0.3609, "step": 8460 }, { "epoch": 2.375350926445817, "grad_norm": 0.5700622797012329, "learning_rate": 1.2636181436252826e-06, "loss": 0.326, "step": 8461 }, { "epoch": 2.3756316676024705, "grad_norm": 0.5278427600860596, "learning_rate": 1.2625329575765198e-06, "loss": 0.3247, "step": 8462 }, { "epoch": 2.375912408759124, "grad_norm": 0.5857083797454834, "learning_rate": 1.261448170367e-06, "loss": 0.3155, "step": 8463 }, { "epoch": 2.376193149915778, "grad_norm": 0.5428717136383057, "learning_rate": 1.2603637821124825e-06, "loss": 0.3218, "step": 8464 }, { "epoch": 2.3764738910724312, "grad_norm": 0.5713076591491699, "learning_rate": 1.259279792928686e-06, "loss": 0.2934, "step": 8465 }, { "epoch": 2.3767546322290847, "grad_norm": 0.5731424689292908, "learning_rate": 1.2581962029312889e-06, "loss": 0.3246, "step": 8466 }, { "epoch": 2.3770353733857386, "grad_norm": 0.5826208591461182, "learning_rate": 1.2571130122359226e-06, "loss": 0.3603, "step": 8467 }, { "epoch": 2.377316114542392, "grad_norm": 0.5617663860321045, "learning_rate": 1.2560302209581822e-06, "loss": 0.3218, "step": 8468 }, { "epoch": 2.3775968556990454, "grad_norm": 0.5224429368972778, "learning_rate": 1.254947829213613e-06, "loss": 0.3335, "step": 8469 }, { "epoch": 2.377877596855699, "grad_norm": 0.5597840547561646, "learning_rate": 1.2538658371177236e-06, "loss": 0.3352, "step": 8470 }, { "epoch": 2.3781583380123528, "grad_norm": 0.5383083820343018, "learning_rate": 1.2527842447859762e-06, "loss": 0.3496, "step": 8471 }, { "epoch": 2.378439079169006, "grad_norm": 0.6109507083892822, "learning_rate": 1.2517030523337908e-06, "loss": 0.2995, "step": 8472 }, { "epoch": 2.3787198203256597, "grad_norm": 0.6926080584526062, "learning_rate": 1.2506222598765477e-06, "loss": 0.2796, "step": 8473 }, { "epoch": 2.3790005614823135, "grad_norm": 0.5966923236846924, "learning_rate": 1.24954186752958e-06, "loss": 0.3514, "step": 8474 }, { "epoch": 2.379281302638967, "grad_norm": 0.5545368194580078, "learning_rate": 1.248461875408183e-06, "loss": 0.3558, "step": 8475 }, { "epoch": 2.3795620437956204, "grad_norm": 0.5807587504386902, "learning_rate": 1.2473822836276056e-06, "loss": 0.307, "step": 8476 }, { "epoch": 2.379842784952274, "grad_norm": 0.5329170823097229, "learning_rate": 1.2463030923030527e-06, "loss": 0.351, "step": 8477 }, { "epoch": 2.3801235261089277, "grad_norm": 0.525782585144043, "learning_rate": 1.2452243015496934e-06, "loss": 0.3402, "step": 8478 }, { "epoch": 2.380404267265581, "grad_norm": 0.583706259727478, "learning_rate": 1.2441459114826454e-06, "loss": 0.3038, "step": 8479 }, { "epoch": 2.3806850084222346, "grad_norm": 0.5716387629508972, "learning_rate": 1.2430679222169911e-06, "loss": 0.3398, "step": 8480 }, { "epoch": 2.3809657495788885, "grad_norm": 0.5780821442604065, "learning_rate": 1.2419903338677636e-06, "loss": 0.319, "step": 8481 }, { "epoch": 2.381246490735542, "grad_norm": 0.6285926699638367, "learning_rate": 1.2409131465499602e-06, "loss": 0.3012, "step": 8482 }, { "epoch": 2.3815272318921954, "grad_norm": 0.5814008712768555, "learning_rate": 1.239836360378529e-06, "loss": 0.3092, "step": 8483 }, { "epoch": 2.381807973048849, "grad_norm": 0.5618540644645691, "learning_rate": 1.2387599754683777e-06, "loss": 0.3377, "step": 8484 }, { "epoch": 2.3820887142055023, "grad_norm": 0.5493171811103821, "learning_rate": 1.2376839919343731e-06, "loss": 0.3365, "step": 8485 }, { "epoch": 2.382369455362156, "grad_norm": 0.8610407114028931, "learning_rate": 1.236608409891335e-06, "loss": 0.3283, "step": 8486 }, { "epoch": 2.3826501965188096, "grad_norm": 0.6050732731819153, "learning_rate": 1.235533229454045e-06, "loss": 0.3433, "step": 8487 }, { "epoch": 2.382930937675463, "grad_norm": 0.5976834893226624, "learning_rate": 1.2344584507372404e-06, "loss": 0.3333, "step": 8488 }, { "epoch": 2.383211678832117, "grad_norm": 0.5333192944526672, "learning_rate": 1.233384073855612e-06, "loss": 0.3488, "step": 8489 }, { "epoch": 2.3834924199887704, "grad_norm": 0.5537493824958801, "learning_rate": 1.2323100989238136e-06, "loss": 0.3466, "step": 8490 }, { "epoch": 2.383773161145424, "grad_norm": 0.5674349665641785, "learning_rate": 1.2312365260564513e-06, "loss": 0.337, "step": 8491 }, { "epoch": 2.3840539023020773, "grad_norm": 0.546229362487793, "learning_rate": 1.2301633553680896e-06, "loss": 0.2806, "step": 8492 }, { "epoch": 2.384334643458731, "grad_norm": 0.5464627742767334, "learning_rate": 1.2290905869732529e-06, "loss": 0.3387, "step": 8493 }, { "epoch": 2.3846153846153846, "grad_norm": 0.5359675884246826, "learning_rate": 1.2280182209864177e-06, "loss": 0.3151, "step": 8494 }, { "epoch": 2.384896125772038, "grad_norm": 0.6023908853530884, "learning_rate": 1.2269462575220226e-06, "loss": 0.3012, "step": 8495 }, { "epoch": 2.385176866928692, "grad_norm": 0.4947608411312103, "learning_rate": 1.2258746966944591e-06, "loss": 0.339, "step": 8496 }, { "epoch": 2.3854576080853453, "grad_norm": 0.631801187992096, "learning_rate": 1.224803538618079e-06, "loss": 0.3245, "step": 8497 }, { "epoch": 2.385738349241999, "grad_norm": 0.5674340724945068, "learning_rate": 1.2237327834071893e-06, "loss": 0.2874, "step": 8498 }, { "epoch": 2.3860190903986522, "grad_norm": 0.5879778265953064, "learning_rate": 1.2226624311760521e-06, "loss": 0.3076, "step": 8499 }, { "epoch": 2.386299831555306, "grad_norm": 0.5510382652282715, "learning_rate": 1.2215924820388919e-06, "loss": 0.3161, "step": 8500 }, { "epoch": 2.3865805727119596, "grad_norm": 0.5921781063079834, "learning_rate": 1.2205229361098847e-06, "loss": 0.2966, "step": 8501 }, { "epoch": 2.386861313868613, "grad_norm": 0.6243314146995544, "learning_rate": 1.219453793503168e-06, "loss": 0.3108, "step": 8502 }, { "epoch": 2.387142055025267, "grad_norm": 0.5734645128250122, "learning_rate": 1.2183850543328313e-06, "loss": 0.331, "step": 8503 }, { "epoch": 2.3874227961819203, "grad_norm": 0.5308526158332825, "learning_rate": 1.2173167187129265e-06, "loss": 0.3404, "step": 8504 }, { "epoch": 2.3877035373385738, "grad_norm": 0.5437654852867126, "learning_rate": 1.216248786757459e-06, "loss": 0.3325, "step": 8505 }, { "epoch": 2.387984278495227, "grad_norm": 0.5825144648551941, "learning_rate": 1.2151812585803895e-06, "loss": 0.3626, "step": 8506 }, { "epoch": 2.388265019651881, "grad_norm": 0.5162522792816162, "learning_rate": 1.2141141342956414e-06, "loss": 0.3759, "step": 8507 }, { "epoch": 2.3885457608085345, "grad_norm": 0.5401203632354736, "learning_rate": 1.213047414017089e-06, "loss": 0.3429, "step": 8508 }, { "epoch": 2.388826501965188, "grad_norm": 0.5947383642196655, "learning_rate": 1.2119810978585678e-06, "loss": 0.3491, "step": 8509 }, { "epoch": 2.389107243121842, "grad_norm": 0.5353080630302429, "learning_rate": 1.210915185933868e-06, "loss": 0.3527, "step": 8510 }, { "epoch": 2.3893879842784953, "grad_norm": 0.5459492206573486, "learning_rate": 1.2098496783567343e-06, "loss": 0.349, "step": 8511 }, { "epoch": 2.3896687254351487, "grad_norm": 0.5531647801399231, "learning_rate": 1.208784575240876e-06, "loss": 0.3292, "step": 8512 }, { "epoch": 2.389949466591802, "grad_norm": 0.5723190903663635, "learning_rate": 1.207719876699952e-06, "loss": 0.3563, "step": 8513 }, { "epoch": 2.390230207748456, "grad_norm": 0.5542603135108948, "learning_rate": 1.2066555828475785e-06, "loss": 0.3116, "step": 8514 }, { "epoch": 2.3905109489051095, "grad_norm": 0.5417312979698181, "learning_rate": 1.2055916937973333e-06, "loss": 0.3537, "step": 8515 }, { "epoch": 2.390791690061763, "grad_norm": 0.5427179336547852, "learning_rate": 1.2045282096627453e-06, "loss": 0.38, "step": 8516 }, { "epoch": 2.391072431218417, "grad_norm": 0.5366474986076355, "learning_rate": 1.2034651305573059e-06, "loss": 0.2698, "step": 8517 }, { "epoch": 2.3913531723750703, "grad_norm": 0.5877828001976013, "learning_rate": 1.2024024565944576e-06, "loss": 0.3196, "step": 8518 }, { "epoch": 2.3916339135317237, "grad_norm": 0.5598554611206055, "learning_rate": 1.2013401878876042e-06, "loss": 0.3598, "step": 8519 }, { "epoch": 2.391914654688377, "grad_norm": 0.4990025460720062, "learning_rate": 1.2002783245501038e-06, "loss": 0.3537, "step": 8520 }, { "epoch": 2.392195395845031, "grad_norm": 0.5653421878814697, "learning_rate": 1.1992168666952703e-06, "loss": 0.3424, "step": 8521 }, { "epoch": 2.3924761370016845, "grad_norm": 0.543632447719574, "learning_rate": 1.1981558144363787e-06, "loss": 0.3377, "step": 8522 }, { "epoch": 2.392756878158338, "grad_norm": 0.5864419937133789, "learning_rate": 1.1970951678866555e-06, "loss": 0.3312, "step": 8523 }, { "epoch": 2.393037619314992, "grad_norm": 0.5528798699378967, "learning_rate": 1.196034927159288e-06, "loss": 0.3036, "step": 8524 }, { "epoch": 2.3933183604716453, "grad_norm": 0.5613815784454346, "learning_rate": 1.194975092367418e-06, "loss": 0.3248, "step": 8525 }, { "epoch": 2.3935991016282987, "grad_norm": 0.6046698689460754, "learning_rate": 1.1939156636241429e-06, "loss": 0.3092, "step": 8526 }, { "epoch": 2.393879842784952, "grad_norm": 0.5368297696113586, "learning_rate": 1.1928566410425213e-06, "loss": 0.3377, "step": 8527 }, { "epoch": 2.394160583941606, "grad_norm": 0.5937784314155579, "learning_rate": 1.1917980247355621e-06, "loss": 0.3526, "step": 8528 }, { "epoch": 2.3944413250982595, "grad_norm": 0.5347118377685547, "learning_rate": 1.190739814816238e-06, "loss": 0.2996, "step": 8529 }, { "epoch": 2.394722066254913, "grad_norm": 0.584606409072876, "learning_rate": 1.1896820113974705e-06, "loss": 0.3166, "step": 8530 }, { "epoch": 2.3950028074115663, "grad_norm": 0.6021196842193604, "learning_rate": 1.188624614592146e-06, "loss": 0.3179, "step": 8531 }, { "epoch": 2.3952835485682202, "grad_norm": 0.5946304202079773, "learning_rate": 1.1875676245131012e-06, "loss": 0.3623, "step": 8532 }, { "epoch": 2.3955642897248737, "grad_norm": 0.5681818723678589, "learning_rate": 1.1865110412731295e-06, "loss": 0.3432, "step": 8533 }, { "epoch": 2.395845030881527, "grad_norm": 0.5449780225753784, "learning_rate": 1.1854548649849874e-06, "loss": 0.3562, "step": 8534 }, { "epoch": 2.3961257720381806, "grad_norm": 0.6077841520309448, "learning_rate": 1.1843990957613787e-06, "loss": 0.3174, "step": 8535 }, { "epoch": 2.3964065131948344, "grad_norm": 0.7257047891616821, "learning_rate": 1.1833437337149728e-06, "loss": 0.2879, "step": 8536 }, { "epoch": 2.396687254351488, "grad_norm": 0.5597652196884155, "learning_rate": 1.1822887789583875e-06, "loss": 0.3213, "step": 8537 }, { "epoch": 2.3969679955081413, "grad_norm": 0.5362836718559265, "learning_rate": 1.1812342316042036e-06, "loss": 0.3392, "step": 8538 }, { "epoch": 2.397248736664795, "grad_norm": 0.5410301089286804, "learning_rate": 1.180180091764956e-06, "loss": 0.3385, "step": 8539 }, { "epoch": 2.3975294778214487, "grad_norm": 0.608157753944397, "learning_rate": 1.1791263595531338e-06, "loss": 0.3491, "step": 8540 }, { "epoch": 2.397810218978102, "grad_norm": 0.5700143575668335, "learning_rate": 1.1780730350811876e-06, "loss": 0.3542, "step": 8541 }, { "epoch": 2.3980909601347555, "grad_norm": 0.6143352389335632, "learning_rate": 1.1770201184615203e-06, "loss": 0.2471, "step": 8542 }, { "epoch": 2.3983717012914094, "grad_norm": 0.5531323552131653, "learning_rate": 1.1759676098064903e-06, "loss": 0.3531, "step": 8543 }, { "epoch": 2.398652442448063, "grad_norm": 0.5703702569007874, "learning_rate": 1.1749155092284192e-06, "loss": 0.2932, "step": 8544 }, { "epoch": 2.3989331836047163, "grad_norm": 0.5654466152191162, "learning_rate": 1.1738638168395767e-06, "loss": 0.3307, "step": 8545 }, { "epoch": 2.39921392476137, "grad_norm": 0.6061022281646729, "learning_rate": 1.1728125327521955e-06, "loss": 0.3081, "step": 8546 }, { "epoch": 2.3994946659180236, "grad_norm": 0.6017066836357117, "learning_rate": 1.1717616570784612e-06, "loss": 0.3175, "step": 8547 }, { "epoch": 2.399775407074677, "grad_norm": 0.5499140620231628, "learning_rate": 1.1707111899305151e-06, "loss": 0.3067, "step": 8548 }, { "epoch": 2.4000561482313305, "grad_norm": 0.6325089335441589, "learning_rate": 1.1696611314204599e-06, "loss": 0.3309, "step": 8549 }, { "epoch": 2.4003368893879844, "grad_norm": 0.5376364588737488, "learning_rate": 1.1686114816603477e-06, "loss": 0.3229, "step": 8550 }, { "epoch": 2.400617630544638, "grad_norm": 0.5829223394393921, "learning_rate": 1.167562240762194e-06, "loss": 0.3612, "step": 8551 }, { "epoch": 2.4008983717012913, "grad_norm": 0.6152477264404297, "learning_rate": 1.1665134088379643e-06, "loss": 0.2987, "step": 8552 }, { "epoch": 2.401179112857945, "grad_norm": 0.5731292366981506, "learning_rate": 1.165464985999586e-06, "loss": 0.3559, "step": 8553 }, { "epoch": 2.4014598540145986, "grad_norm": 0.5887482762336731, "learning_rate": 1.1644169723589389e-06, "loss": 0.3053, "step": 8554 }, { "epoch": 2.401740595171252, "grad_norm": 0.5587543845176697, "learning_rate": 1.1633693680278591e-06, "loss": 0.3231, "step": 8555 }, { "epoch": 2.4020213363279055, "grad_norm": 0.5400908589363098, "learning_rate": 1.1623221731181432e-06, "loss": 0.3114, "step": 8556 }, { "epoch": 2.4023020774845594, "grad_norm": 0.568158745765686, "learning_rate": 1.161275387741539e-06, "loss": 0.337, "step": 8557 }, { "epoch": 2.402582818641213, "grad_norm": 0.539108395576477, "learning_rate": 1.1602290120097548e-06, "loss": 0.3102, "step": 8558 }, { "epoch": 2.4028635597978663, "grad_norm": 0.639522135257721, "learning_rate": 1.1591830460344528e-06, "loss": 0.3415, "step": 8559 }, { "epoch": 2.40314430095452, "grad_norm": 0.55396968126297, "learning_rate": 1.1581374899272507e-06, "loss": 0.3387, "step": 8560 }, { "epoch": 2.4034250421111736, "grad_norm": 0.5616620182991028, "learning_rate": 1.1570923437997255e-06, "loss": 0.3225, "step": 8561 }, { "epoch": 2.403705783267827, "grad_norm": 0.5308200120925903, "learning_rate": 1.156047607763407e-06, "loss": 0.3412, "step": 8562 }, { "epoch": 2.4039865244244805, "grad_norm": 0.5579840540885925, "learning_rate": 1.1550032819297835e-06, "loss": 0.357, "step": 8563 }, { "epoch": 2.4042672655811343, "grad_norm": 0.5448474884033203, "learning_rate": 1.1539593664103005e-06, "loss": 0.3172, "step": 8564 }, { "epoch": 2.404548006737788, "grad_norm": 0.6003230810165405, "learning_rate": 1.152915861316356e-06, "loss": 0.3518, "step": 8565 }, { "epoch": 2.4048287478944412, "grad_norm": 0.5988787412643433, "learning_rate": 1.1518727667593087e-06, "loss": 0.3165, "step": 8566 }, { "epoch": 2.405109489051095, "grad_norm": 0.5354225635528564, "learning_rate": 1.1508300828504682e-06, "loss": 0.3445, "step": 8567 }, { "epoch": 2.4053902302077486, "grad_norm": 0.5439662337303162, "learning_rate": 1.1497878097011062e-06, "loss": 0.324, "step": 8568 }, { "epoch": 2.405670971364402, "grad_norm": 0.5637765526771545, "learning_rate": 1.1487459474224467e-06, "loss": 0.3504, "step": 8569 }, { "epoch": 2.4059517125210554, "grad_norm": 0.5570805072784424, "learning_rate": 1.1477044961256684e-06, "loss": 0.3299, "step": 8570 }, { "epoch": 2.4062324536777093, "grad_norm": 0.5669572949409485, "learning_rate": 1.146663455921912e-06, "loss": 0.3214, "step": 8571 }, { "epoch": 2.4065131948343628, "grad_norm": 0.5762567520141602, "learning_rate": 1.145622826922268e-06, "loss": 0.3, "step": 8572 }, { "epoch": 2.406793935991016, "grad_norm": 0.6352512836456299, "learning_rate": 1.1445826092377888e-06, "loss": 0.2981, "step": 8573 }, { "epoch": 2.40707467714767, "grad_norm": 0.5779180526733398, "learning_rate": 1.1435428029794775e-06, "loss": 0.346, "step": 8574 }, { "epoch": 2.4073554183043235, "grad_norm": 0.5574477910995483, "learning_rate": 1.1425034082582959e-06, "loss": 0.312, "step": 8575 }, { "epoch": 2.407636159460977, "grad_norm": 0.5137155055999756, "learning_rate": 1.1414644251851637e-06, "loss": 0.332, "step": 8576 }, { "epoch": 2.4079169006176304, "grad_norm": 0.5635234117507935, "learning_rate": 1.1404258538709512e-06, "loss": 0.3753, "step": 8577 }, { "epoch": 2.4081976417742843, "grad_norm": 0.5221818089485168, "learning_rate": 1.1393876944264926e-06, "loss": 0.3065, "step": 8578 }, { "epoch": 2.4084783829309377, "grad_norm": 0.5356088876724243, "learning_rate": 1.1383499469625702e-06, "loss": 0.3014, "step": 8579 }, { "epoch": 2.408759124087591, "grad_norm": 0.5905362963676453, "learning_rate": 1.1373126115899286e-06, "loss": 0.3379, "step": 8580 }, { "epoch": 2.4090398652442446, "grad_norm": 0.6346529126167297, "learning_rate": 1.136275688419265e-06, "loss": 0.296, "step": 8581 }, { "epoch": 2.4093206064008985, "grad_norm": 0.5467550754547119, "learning_rate": 1.1352391775612314e-06, "loss": 0.3535, "step": 8582 }, { "epoch": 2.409601347557552, "grad_norm": 0.4837294816970825, "learning_rate": 1.1342030791264408e-06, "loss": 0.315, "step": 8583 }, { "epoch": 2.4098820887142054, "grad_norm": 0.6097621321678162, "learning_rate": 1.133167393225456e-06, "loss": 0.3558, "step": 8584 }, { "epoch": 2.410162829870859, "grad_norm": 0.6209611296653748, "learning_rate": 1.132132119968803e-06, "loss": 0.3096, "step": 8585 }, { "epoch": 2.4104435710275127, "grad_norm": 0.5919151306152344, "learning_rate": 1.1310972594669567e-06, "loss": 0.3734, "step": 8586 }, { "epoch": 2.410724312184166, "grad_norm": 0.6211033463478088, "learning_rate": 1.130062811830351e-06, "loss": 0.3623, "step": 8587 }, { "epoch": 2.4110050533408196, "grad_norm": 0.5057561993598938, "learning_rate": 1.1290287771693759e-06, "loss": 0.3433, "step": 8588 }, { "epoch": 2.4112857944974735, "grad_norm": 0.5803185105323792, "learning_rate": 1.127995155594378e-06, "loss": 0.3041, "step": 8589 }, { "epoch": 2.411566535654127, "grad_norm": 0.4840635657310486, "learning_rate": 1.1269619472156602e-06, "loss": 0.351, "step": 8590 }, { "epoch": 2.4118472768107804, "grad_norm": 0.5588074922561646, "learning_rate": 1.1259291521434785e-06, "loss": 0.3051, "step": 8591 }, { "epoch": 2.412128017967434, "grad_norm": 0.5140504240989685, "learning_rate": 1.1248967704880449e-06, "loss": 0.3652, "step": 8592 }, { "epoch": 2.4124087591240877, "grad_norm": 0.5610706210136414, "learning_rate": 1.1238648023595316e-06, "loss": 0.3171, "step": 8593 }, { "epoch": 2.412689500280741, "grad_norm": 0.5871493816375732, "learning_rate": 1.1228332478680608e-06, "loss": 0.3214, "step": 8594 }, { "epoch": 2.4129702414373946, "grad_norm": 0.533639669418335, "learning_rate": 1.1218021071237173e-06, "loss": 0.29, "step": 8595 }, { "epoch": 2.4132509825940485, "grad_norm": 0.5451346635818481, "learning_rate": 1.1207713802365361e-06, "loss": 0.3251, "step": 8596 }, { "epoch": 2.413531723750702, "grad_norm": 0.5177164077758789, "learning_rate": 1.1197410673165077e-06, "loss": 0.3011, "step": 8597 }, { "epoch": 2.4138124649073553, "grad_norm": 0.5294882655143738, "learning_rate": 1.1187111684735853e-06, "loss": 0.3416, "step": 8598 }, { "epoch": 2.414093206064009, "grad_norm": 0.5082541704177856, "learning_rate": 1.1176816838176685e-06, "loss": 0.3313, "step": 8599 }, { "epoch": 2.4143739472206627, "grad_norm": 0.5761443972587585, "learning_rate": 1.1166526134586213e-06, "loss": 0.3376, "step": 8600 }, { "epoch": 2.414654688377316, "grad_norm": 0.5105448365211487, "learning_rate": 1.1156239575062578e-06, "loss": 0.3248, "step": 8601 }, { "epoch": 2.4149354295339696, "grad_norm": 0.5691644549369812, "learning_rate": 1.1145957160703508e-06, "loss": 0.3394, "step": 8602 }, { "epoch": 2.4152161706906234, "grad_norm": 0.5590173006057739, "learning_rate": 1.1135678892606273e-06, "loss": 0.3605, "step": 8603 }, { "epoch": 2.415496911847277, "grad_norm": 0.5871078372001648, "learning_rate": 1.1125404771867692e-06, "loss": 0.2855, "step": 8604 }, { "epoch": 2.4157776530039303, "grad_norm": 0.6195324659347534, "learning_rate": 1.1115134799584188e-06, "loss": 0.3279, "step": 8605 }, { "epoch": 2.4160583941605838, "grad_norm": 0.5766201615333557, "learning_rate": 1.1104868976851669e-06, "loss": 0.3279, "step": 8606 }, { "epoch": 2.4163391353172377, "grad_norm": 0.5775571465492249, "learning_rate": 1.1094607304765676e-06, "loss": 0.3226, "step": 8607 }, { "epoch": 2.416619876473891, "grad_norm": 0.5180070996284485, "learning_rate": 1.108434978442125e-06, "loss": 0.3393, "step": 8608 }, { "epoch": 2.4169006176305445, "grad_norm": 0.6077528595924377, "learning_rate": 1.1074096416913005e-06, "loss": 0.3564, "step": 8609 }, { "epoch": 2.4171813587871984, "grad_norm": 0.5559960603713989, "learning_rate": 1.1063847203335143e-06, "loss": 0.3235, "step": 8610 }, { "epoch": 2.417462099943852, "grad_norm": 0.5508143305778503, "learning_rate": 1.105360214478136e-06, "loss": 0.328, "step": 8611 }, { "epoch": 2.4177428411005053, "grad_norm": 0.5017294883728027, "learning_rate": 1.104336124234498e-06, "loss": 0.3276, "step": 8612 }, { "epoch": 2.4180235822571587, "grad_norm": 0.5135388374328613, "learning_rate": 1.1033124497118825e-06, "loss": 0.362, "step": 8613 }, { "epoch": 2.4183043234138126, "grad_norm": 0.5576337575912476, "learning_rate": 1.10228919101953e-06, "loss": 0.3307, "step": 8614 }, { "epoch": 2.418585064570466, "grad_norm": 0.6229658722877502, "learning_rate": 1.101266348266638e-06, "loss": 0.3165, "step": 8615 }, { "epoch": 2.4188658057271195, "grad_norm": 0.5190553665161133, "learning_rate": 1.100243921562355e-06, "loss": 0.3807, "step": 8616 }, { "epoch": 2.4191465468837734, "grad_norm": 0.5850105285644531, "learning_rate": 1.0992219110157914e-06, "loss": 0.3312, "step": 8617 }, { "epoch": 2.419427288040427, "grad_norm": 0.5060490369796753, "learning_rate": 1.0982003167360083e-06, "loss": 0.307, "step": 8618 }, { "epoch": 2.4197080291970803, "grad_norm": 0.5556020140647888, "learning_rate": 1.0971791388320224e-06, "loss": 0.3356, "step": 8619 }, { "epoch": 2.4199887703537337, "grad_norm": 0.5170776844024658, "learning_rate": 1.0961583774128099e-06, "loss": 0.3296, "step": 8620 }, { "epoch": 2.4202695115103876, "grad_norm": 0.5360212922096252, "learning_rate": 1.095138032587298e-06, "loss": 0.3244, "step": 8621 }, { "epoch": 2.420550252667041, "grad_norm": 0.5871847867965698, "learning_rate": 1.0941181044643734e-06, "loss": 0.3181, "step": 8622 }, { "epoch": 2.4208309938236945, "grad_norm": 0.6124431490898132, "learning_rate": 1.0930985931528742e-06, "loss": 0.2591, "step": 8623 }, { "epoch": 2.421111734980348, "grad_norm": 0.5897138118743896, "learning_rate": 1.0920794987615996e-06, "loss": 0.3427, "step": 8624 }, { "epoch": 2.421392476137002, "grad_norm": 0.5578790307044983, "learning_rate": 1.091060821399299e-06, "loss": 0.3305, "step": 8625 }, { "epoch": 2.4216732172936553, "grad_norm": 0.5649060010910034, "learning_rate": 1.090042561174678e-06, "loss": 0.3414, "step": 8626 }, { "epoch": 2.4219539584503087, "grad_norm": 0.6106213927268982, "learning_rate": 1.0890247181964015e-06, "loss": 0.3337, "step": 8627 }, { "epoch": 2.422234699606962, "grad_norm": 0.6144611835479736, "learning_rate": 1.0880072925730855e-06, "loss": 0.2506, "step": 8628 }, { "epoch": 2.422515440763616, "grad_norm": 0.5643687844276428, "learning_rate": 1.0869902844133051e-06, "loss": 0.3061, "step": 8629 }, { "epoch": 2.4227961819202695, "grad_norm": 0.5640206336975098, "learning_rate": 1.0859736938255882e-06, "loss": 0.3359, "step": 8630 }, { "epoch": 2.423076923076923, "grad_norm": 0.6235983967781067, "learning_rate": 1.0849575209184178e-06, "loss": 0.3169, "step": 8631 }, { "epoch": 2.423357664233577, "grad_norm": 0.5621570348739624, "learning_rate": 1.083941765800236e-06, "loss": 0.3162, "step": 8632 }, { "epoch": 2.4236384053902302, "grad_norm": 0.585750162601471, "learning_rate": 1.0829264285794349e-06, "loss": 0.2987, "step": 8633 }, { "epoch": 2.4239191465468837, "grad_norm": 0.5673215389251709, "learning_rate": 1.081911509364369e-06, "loss": 0.3337, "step": 8634 }, { "epoch": 2.424199887703537, "grad_norm": 0.5640286803245544, "learning_rate": 1.0808970082633396e-06, "loss": 0.3602, "step": 8635 }, { "epoch": 2.424480628860191, "grad_norm": 0.5741168856620789, "learning_rate": 1.0798829253846116e-06, "loss": 0.3386, "step": 8636 }, { "epoch": 2.4247613700168444, "grad_norm": 0.5366054177284241, "learning_rate": 1.0788692608364004e-06, "loss": 0.294, "step": 8637 }, { "epoch": 2.425042111173498, "grad_norm": 0.5743140578269958, "learning_rate": 1.0778560147268752e-06, "loss": 0.3018, "step": 8638 }, { "epoch": 2.4253228523301518, "grad_norm": 0.5586783289909363, "learning_rate": 1.0768431871641682e-06, "loss": 0.3436, "step": 8639 }, { "epoch": 2.425603593486805, "grad_norm": 0.6264934539794922, "learning_rate": 1.0758307782563604e-06, "loss": 0.3589, "step": 8640 }, { "epoch": 2.4258843346434587, "grad_norm": 0.5324342250823975, "learning_rate": 1.0748187881114874e-06, "loss": 0.3215, "step": 8641 }, { "epoch": 2.426165075800112, "grad_norm": 0.5290731191635132, "learning_rate": 1.0738072168375452e-06, "loss": 0.3394, "step": 8642 }, { "epoch": 2.426445816956766, "grad_norm": 0.5561985969543457, "learning_rate": 1.0727960645424806e-06, "loss": 0.315, "step": 8643 }, { "epoch": 2.4267265581134194, "grad_norm": 0.5355144143104553, "learning_rate": 1.0717853313341997e-06, "loss": 0.3503, "step": 8644 }, { "epoch": 2.427007299270073, "grad_norm": 0.5117299556732178, "learning_rate": 1.07077501732056e-06, "loss": 0.3726, "step": 8645 }, { "epoch": 2.4272880404267267, "grad_norm": 0.5098596215248108, "learning_rate": 1.0697651226093752e-06, "loss": 0.3065, "step": 8646 }, { "epoch": 2.42756878158338, "grad_norm": 0.5706070065498352, "learning_rate": 1.0687556473084172e-06, "loss": 0.2954, "step": 8647 }, { "epoch": 2.4278495227400336, "grad_norm": 0.5685199499130249, "learning_rate": 1.067746591525408e-06, "loss": 0.339, "step": 8648 }, { "epoch": 2.428130263896687, "grad_norm": 0.5293835997581482, "learning_rate": 1.0667379553680312e-06, "loss": 0.3147, "step": 8649 }, { "epoch": 2.428411005053341, "grad_norm": 0.565785825252533, "learning_rate": 1.0657297389439192e-06, "loss": 0.3235, "step": 8650 }, { "epoch": 2.4286917462099944, "grad_norm": 0.5743482112884521, "learning_rate": 1.0647219423606653e-06, "loss": 0.339, "step": 8651 }, { "epoch": 2.428972487366648, "grad_norm": 0.5861659646034241, "learning_rate": 1.0637145657258135e-06, "loss": 0.313, "step": 8652 }, { "epoch": 2.4292532285233017, "grad_norm": 0.6089012622833252, "learning_rate": 1.062707609146864e-06, "loss": 0.3356, "step": 8653 }, { "epoch": 2.429533969679955, "grad_norm": 0.5933595895767212, "learning_rate": 1.0617010727312755e-06, "loss": 0.3475, "step": 8654 }, { "epoch": 2.4298147108366086, "grad_norm": 0.4917030930519104, "learning_rate": 1.060694956586456e-06, "loss": 0.3547, "step": 8655 }, { "epoch": 2.430095451993262, "grad_norm": 0.5100833177566528, "learning_rate": 1.0596892608197756e-06, "loss": 0.3571, "step": 8656 }, { "epoch": 2.430376193149916, "grad_norm": 0.6266801357269287, "learning_rate": 1.0586839855385539e-06, "loss": 0.3453, "step": 8657 }, { "epoch": 2.4306569343065694, "grad_norm": 0.5679176449775696, "learning_rate": 1.0576791308500661e-06, "loss": 0.3349, "step": 8658 }, { "epoch": 2.430937675463223, "grad_norm": 0.5580161809921265, "learning_rate": 1.0566746968615476e-06, "loss": 0.3264, "step": 8659 }, { "epoch": 2.4312184166198767, "grad_norm": 0.5244404673576355, "learning_rate": 1.0556706836801822e-06, "loss": 0.3378, "step": 8660 }, { "epoch": 2.43149915777653, "grad_norm": 0.582090437412262, "learning_rate": 1.0546670914131147e-06, "loss": 0.2973, "step": 8661 }, { "epoch": 2.4317798989331836, "grad_norm": 0.5382874608039856, "learning_rate": 1.0536639201674393e-06, "loss": 0.3411, "step": 8662 }, { "epoch": 2.432060640089837, "grad_norm": 0.5697934031486511, "learning_rate": 1.052661170050211e-06, "loss": 0.3323, "step": 8663 }, { "epoch": 2.432341381246491, "grad_norm": 0.5195464491844177, "learning_rate": 1.0516588411684347e-06, "loss": 0.2925, "step": 8664 }, { "epoch": 2.4326221224031443, "grad_norm": 0.5455653667449951, "learning_rate": 1.0506569336290735e-06, "loss": 0.3294, "step": 8665 }, { "epoch": 2.432902863559798, "grad_norm": 0.48982128500938416, "learning_rate": 1.0496554475390464e-06, "loss": 0.3288, "step": 8666 }, { "epoch": 2.4331836047164517, "grad_norm": 0.5944242477416992, "learning_rate": 1.0486543830052243e-06, "loss": 0.3009, "step": 8667 }, { "epoch": 2.433464345873105, "grad_norm": 0.5586980581283569, "learning_rate": 1.0476537401344333e-06, "loss": 0.2843, "step": 8668 }, { "epoch": 2.4337450870297586, "grad_norm": 0.5792757272720337, "learning_rate": 1.0466535190334588e-06, "loss": 0.3174, "step": 8669 }, { "epoch": 2.434025828186412, "grad_norm": 0.5918002128601074, "learning_rate": 1.0456537198090343e-06, "loss": 0.3126, "step": 8670 }, { "epoch": 2.434306569343066, "grad_norm": 0.5627588629722595, "learning_rate": 1.0446543425678563e-06, "loss": 0.3387, "step": 8671 }, { "epoch": 2.4345873104997193, "grad_norm": 0.5633062720298767, "learning_rate": 1.043655387416569e-06, "loss": 0.3255, "step": 8672 }, { "epoch": 2.4348680516563728, "grad_norm": 0.6193362474441528, "learning_rate": 1.0426568544617771e-06, "loss": 0.3331, "step": 8673 }, { "epoch": 2.435148792813026, "grad_norm": 0.6241822242736816, "learning_rate": 1.0416587438100367e-06, "loss": 0.3104, "step": 8674 }, { "epoch": 2.43542953396968, "grad_norm": 0.5509600639343262, "learning_rate": 1.0406610555678582e-06, "loss": 0.3432, "step": 8675 }, { "epoch": 2.4357102751263335, "grad_norm": 0.5371003746986389, "learning_rate": 1.0396637898417117e-06, "loss": 0.3349, "step": 8676 }, { "epoch": 2.435991016282987, "grad_norm": 0.5923648476600647, "learning_rate": 1.0386669467380167e-06, "loss": 0.3058, "step": 8677 }, { "epoch": 2.4362717574396404, "grad_norm": 0.5829152464866638, "learning_rate": 1.0376705263631525e-06, "loss": 0.3529, "step": 8678 }, { "epoch": 2.4365524985962943, "grad_norm": 0.5810061693191528, "learning_rate": 1.0366745288234497e-06, "loss": 0.2951, "step": 8679 }, { "epoch": 2.4368332397529477, "grad_norm": 0.5497332811355591, "learning_rate": 1.0356789542251939e-06, "loss": 0.3297, "step": 8680 }, { "epoch": 2.437113980909601, "grad_norm": 0.5494989156723022, "learning_rate": 1.034683802674628e-06, "loss": 0.3442, "step": 8681 }, { "epoch": 2.437394722066255, "grad_norm": 0.5286225080490112, "learning_rate": 1.0336890742779476e-06, "loss": 0.3357, "step": 8682 }, { "epoch": 2.4376754632229085, "grad_norm": 0.52973473072052, "learning_rate": 1.0326947691413053e-06, "loss": 0.3748, "step": 8683 }, { "epoch": 2.437956204379562, "grad_norm": 0.5699828863143921, "learning_rate": 1.0317008873708045e-06, "loss": 0.3565, "step": 8684 }, { "epoch": 2.4382369455362154, "grad_norm": 0.5908522009849548, "learning_rate": 1.03070742907251e-06, "loss": 0.3268, "step": 8685 }, { "epoch": 2.4385176866928693, "grad_norm": 0.5856167674064636, "learning_rate": 1.0297143943524345e-06, "loss": 0.3308, "step": 8686 }, { "epoch": 2.4387984278495227, "grad_norm": 0.5940700173377991, "learning_rate": 1.028721783316548e-06, "loss": 0.2994, "step": 8687 }, { "epoch": 2.439079169006176, "grad_norm": 0.5850594639778137, "learning_rate": 1.0277295960707788e-06, "loss": 0.329, "step": 8688 }, { "epoch": 2.43935991016283, "grad_norm": 0.5705891847610474, "learning_rate": 1.0267378327210036e-06, "loss": 0.3335, "step": 8689 }, { "epoch": 2.4396406513194835, "grad_norm": 0.5475537180900574, "learning_rate": 1.0257464933730587e-06, "loss": 0.3237, "step": 8690 }, { "epoch": 2.439921392476137, "grad_norm": 0.5784679651260376, "learning_rate": 1.024755578132735e-06, "loss": 0.3418, "step": 8691 }, { "epoch": 2.4402021336327904, "grad_norm": 0.518255889415741, "learning_rate": 1.0237650871057746e-06, "loss": 0.3549, "step": 8692 }, { "epoch": 2.4404828747894443, "grad_norm": 0.5716167688369751, "learning_rate": 1.0227750203978787e-06, "loss": 0.3188, "step": 8693 }, { "epoch": 2.4407636159460977, "grad_norm": 0.5474283695220947, "learning_rate": 1.0217853781147003e-06, "loss": 0.307, "step": 8694 }, { "epoch": 2.441044357102751, "grad_norm": 0.5570739507675171, "learning_rate": 1.0207961603618456e-06, "loss": 0.3073, "step": 8695 }, { "epoch": 2.441325098259405, "grad_norm": 0.5603153109550476, "learning_rate": 1.0198073672448811e-06, "loss": 0.29, "step": 8696 }, { "epoch": 2.4416058394160585, "grad_norm": 0.5592935681343079, "learning_rate": 1.0188189988693215e-06, "loss": 0.3273, "step": 8697 }, { "epoch": 2.441886580572712, "grad_norm": 0.6249144077301025, "learning_rate": 1.0178310553406429e-06, "loss": 0.3274, "step": 8698 }, { "epoch": 2.4421673217293653, "grad_norm": 0.5414302349090576, "learning_rate": 1.0168435367642693e-06, "loss": 0.3427, "step": 8699 }, { "epoch": 2.4424480628860192, "grad_norm": 0.512814998626709, "learning_rate": 1.0158564432455847e-06, "loss": 0.3502, "step": 8700 }, { "epoch": 2.4427288040426727, "grad_norm": 0.5602161288261414, "learning_rate": 1.014869774889925e-06, "loss": 0.2995, "step": 8701 }, { "epoch": 2.443009545199326, "grad_norm": 0.5614134073257446, "learning_rate": 1.0138835318025796e-06, "loss": 0.3221, "step": 8702 }, { "epoch": 2.44329028635598, "grad_norm": 0.5190013647079468, "learning_rate": 1.0128977140887968e-06, "loss": 0.3099, "step": 8703 }, { "epoch": 2.4435710275126334, "grad_norm": 0.5597667694091797, "learning_rate": 1.0119123218537752e-06, "loss": 0.3066, "step": 8704 }, { "epoch": 2.443851768669287, "grad_norm": 0.5079295039176941, "learning_rate": 1.010927355202671e-06, "loss": 0.3045, "step": 8705 }, { "epoch": 2.4441325098259403, "grad_norm": 0.597145140171051, "learning_rate": 1.009942814240593e-06, "loss": 0.3087, "step": 8706 }, { "epoch": 2.444413250982594, "grad_norm": 0.5442847609519958, "learning_rate": 1.0089586990726047e-06, "loss": 0.3124, "step": 8707 }, { "epoch": 2.4446939921392477, "grad_norm": 0.5378947854042053, "learning_rate": 1.007975009803726e-06, "loss": 0.2801, "step": 8708 }, { "epoch": 2.444974733295901, "grad_norm": 0.609731137752533, "learning_rate": 1.0069917465389285e-06, "loss": 0.3478, "step": 8709 }, { "epoch": 2.445255474452555, "grad_norm": 0.5179837346076965, "learning_rate": 1.006008909383142e-06, "loss": 0.3424, "step": 8710 }, { "epoch": 2.4455362156092084, "grad_norm": 0.5271161794662476, "learning_rate": 1.0050264984412467e-06, "loss": 0.3298, "step": 8711 }, { "epoch": 2.445816956765862, "grad_norm": 0.555071234703064, "learning_rate": 1.0040445138180816e-06, "loss": 0.3291, "step": 8712 }, { "epoch": 2.4460976979225153, "grad_norm": 0.5495772957801819, "learning_rate": 1.0030629556184367e-06, "loss": 0.3501, "step": 8713 }, { "epoch": 2.446378439079169, "grad_norm": 0.5728296637535095, "learning_rate": 1.002081823947057e-06, "loss": 0.2925, "step": 8714 }, { "epoch": 2.4466591802358226, "grad_norm": 0.572092592716217, "learning_rate": 1.0011011189086428e-06, "loss": 0.2899, "step": 8715 }, { "epoch": 2.446939921392476, "grad_norm": 0.6001694202423096, "learning_rate": 1.0001208406078516e-06, "loss": 0.2697, "step": 8716 }, { "epoch": 2.4472206625491295, "grad_norm": 0.5329112410545349, "learning_rate": 9.991409891492892e-07, "loss": 0.3375, "step": 8717 }, { "epoch": 2.4475014037057834, "grad_norm": 0.5405532121658325, "learning_rate": 9.98161564637522e-07, "loss": 0.3606, "step": 8718 }, { "epoch": 2.447782144862437, "grad_norm": 0.630767285823822, "learning_rate": 9.971825671770658e-07, "loss": 0.3045, "step": 8719 }, { "epoch": 2.4480628860190903, "grad_norm": 0.6453242897987366, "learning_rate": 9.962039968723952e-07, "loss": 0.2963, "step": 8720 }, { "epoch": 2.4483436271757437, "grad_norm": 0.5536431074142456, "learning_rate": 9.952258538279348e-07, "loss": 0.3714, "step": 8721 }, { "epoch": 2.4486243683323976, "grad_norm": 0.5230050086975098, "learning_rate": 9.942481381480684e-07, "loss": 0.341, "step": 8722 }, { "epoch": 2.448905109489051, "grad_norm": 0.5292918086051941, "learning_rate": 9.932708499371307e-07, "loss": 0.3405, "step": 8723 }, { "epoch": 2.4491858506457045, "grad_norm": 0.5841137766838074, "learning_rate": 9.922939892994104e-07, "loss": 0.2919, "step": 8724 }, { "epoch": 2.4494665918023584, "grad_norm": 0.5978755354881287, "learning_rate": 9.913175563391546e-07, "loss": 0.3034, "step": 8725 }, { "epoch": 2.449747332959012, "grad_norm": 0.5780662894248962, "learning_rate": 9.903415511605597e-07, "loss": 0.3214, "step": 8726 }, { "epoch": 2.4500280741156653, "grad_norm": 0.5812617540359497, "learning_rate": 9.893659738677807e-07, "loss": 0.3084, "step": 8727 }, { "epoch": 2.4503088152723187, "grad_norm": 0.557697594165802, "learning_rate": 9.883908245649253e-07, "loss": 0.3399, "step": 8728 }, { "epoch": 2.4505895564289726, "grad_norm": 0.543389081954956, "learning_rate": 9.874161033560532e-07, "loss": 0.3696, "step": 8729 }, { "epoch": 2.450870297585626, "grad_norm": 0.517784595489502, "learning_rate": 9.86441810345183e-07, "loss": 0.359, "step": 8730 }, { "epoch": 2.4511510387422795, "grad_norm": 0.6222788691520691, "learning_rate": 9.854679456362825e-07, "loss": 0.2722, "step": 8731 }, { "epoch": 2.4514317798989333, "grad_norm": 0.5975311994552612, "learning_rate": 9.844945093332797e-07, "loss": 0.3275, "step": 8732 }, { "epoch": 2.451712521055587, "grad_norm": 0.5764299035072327, "learning_rate": 9.835215015400506e-07, "loss": 0.2994, "step": 8733 }, { "epoch": 2.4519932622122402, "grad_norm": 0.5655746459960938, "learning_rate": 9.825489223604313e-07, "loss": 0.382, "step": 8734 }, { "epoch": 2.4522740033688937, "grad_norm": 0.5662592649459839, "learning_rate": 9.815767718982083e-07, "loss": 0.3239, "step": 8735 }, { "epoch": 2.4525547445255476, "grad_norm": 0.5751045942306519, "learning_rate": 9.806050502571224e-07, "loss": 0.3109, "step": 8736 }, { "epoch": 2.452835485682201, "grad_norm": 0.5780230760574341, "learning_rate": 9.79633757540871e-07, "loss": 0.373, "step": 8737 }, { "epoch": 2.4531162268388544, "grad_norm": 0.6236237287521362, "learning_rate": 9.786628938531034e-07, "loss": 0.3232, "step": 8738 }, { "epoch": 2.4533969679955083, "grad_norm": 0.6261851191520691, "learning_rate": 9.776924592974257e-07, "loss": 0.2989, "step": 8739 }, { "epoch": 2.4536777091521618, "grad_norm": 0.5853842496871948, "learning_rate": 9.767224539773944e-07, "loss": 0.3541, "step": 8740 }, { "epoch": 2.453958450308815, "grad_norm": 0.5706650614738464, "learning_rate": 9.757528779965236e-07, "loss": 0.3027, "step": 8741 }, { "epoch": 2.4542391914654687, "grad_norm": 0.5657103061676025, "learning_rate": 9.74783731458282e-07, "loss": 0.3169, "step": 8742 }, { "epoch": 2.4545199326221225, "grad_norm": 0.5295966267585754, "learning_rate": 9.738150144660879e-07, "loss": 0.3372, "step": 8743 }, { "epoch": 2.454800673778776, "grad_norm": 0.5562377572059631, "learning_rate": 9.7284672712332e-07, "loss": 0.3145, "step": 8744 }, { "epoch": 2.4550814149354294, "grad_norm": 0.6606711745262146, "learning_rate": 9.71878869533306e-07, "loss": 0.321, "step": 8745 }, { "epoch": 2.4553621560920833, "grad_norm": 0.5965161919593811, "learning_rate": 9.709114417993283e-07, "loss": 0.3008, "step": 8746 }, { "epoch": 2.4556428972487367, "grad_norm": 0.5543991923332214, "learning_rate": 9.699444440246276e-07, "loss": 0.3368, "step": 8747 }, { "epoch": 2.45592363840539, "grad_norm": 0.5702335834503174, "learning_rate": 9.689778763123935e-07, "loss": 0.3354, "step": 8748 }, { "epoch": 2.4562043795620436, "grad_norm": 0.5386536121368408, "learning_rate": 9.680117387657744e-07, "loss": 0.3613, "step": 8749 }, { "epoch": 2.4564851207186975, "grad_norm": 0.5534363985061646, "learning_rate": 9.670460314878694e-07, "loss": 0.3161, "step": 8750 }, { "epoch": 2.456765861875351, "grad_norm": 0.5873655080795288, "learning_rate": 9.660807545817308e-07, "loss": 0.2932, "step": 8751 }, { "epoch": 2.4570466030320044, "grad_norm": 0.5368070602416992, "learning_rate": 9.651159081503704e-07, "loss": 0.3087, "step": 8752 }, { "epoch": 2.4573273441886583, "grad_norm": 0.5483406186103821, "learning_rate": 9.641514922967476e-07, "loss": 0.3239, "step": 8753 }, { "epoch": 2.4576080853453117, "grad_norm": 0.5662078261375427, "learning_rate": 9.631875071237816e-07, "loss": 0.3549, "step": 8754 }, { "epoch": 2.457888826501965, "grad_norm": 0.539288341999054, "learning_rate": 9.622239527343409e-07, "loss": 0.3218, "step": 8755 }, { "epoch": 2.4581695676586186, "grad_norm": 0.5616505742073059, "learning_rate": 9.61260829231251e-07, "loss": 0.3092, "step": 8756 }, { "epoch": 2.4584503088152725, "grad_norm": 0.5700971484184265, "learning_rate": 9.60298136717291e-07, "loss": 0.3107, "step": 8757 }, { "epoch": 2.458731049971926, "grad_norm": 0.6075683832168579, "learning_rate": 9.593358752951904e-07, "loss": 0.2727, "step": 8758 }, { "epoch": 2.4590117911285794, "grad_norm": 0.5623923540115356, "learning_rate": 9.583740450676398e-07, "loss": 0.3617, "step": 8759 }, { "epoch": 2.4592925322852333, "grad_norm": 0.5814056992530823, "learning_rate": 9.574126461372762e-07, "loss": 0.3344, "step": 8760 }, { "epoch": 2.4595732734418867, "grad_norm": 0.5781075358390808, "learning_rate": 9.56451678606698e-07, "loss": 0.3265, "step": 8761 }, { "epoch": 2.45985401459854, "grad_norm": 0.553530752658844, "learning_rate": 9.554911425784507e-07, "loss": 0.3337, "step": 8762 }, { "epoch": 2.4601347557551936, "grad_norm": 0.5906181931495667, "learning_rate": 9.545310381550365e-07, "loss": 0.3293, "step": 8763 }, { "epoch": 2.4604154969118475, "grad_norm": 0.5802428126335144, "learning_rate": 9.535713654389139e-07, "loss": 0.3848, "step": 8764 }, { "epoch": 2.460696238068501, "grad_norm": 0.5183466672897339, "learning_rate": 9.52612124532491e-07, "loss": 0.313, "step": 8765 }, { "epoch": 2.4609769792251543, "grad_norm": 0.5322921276092529, "learning_rate": 9.51653315538133e-07, "loss": 0.3588, "step": 8766 }, { "epoch": 2.461257720381808, "grad_norm": 0.48973557353019714, "learning_rate": 9.506949385581593e-07, "loss": 0.3562, "step": 8767 }, { "epoch": 2.4615384615384617, "grad_norm": 0.6054965853691101, "learning_rate": 9.497369936948397e-07, "loss": 0.3743, "step": 8768 }, { "epoch": 2.461819202695115, "grad_norm": 0.5307208299636841, "learning_rate": 9.487794810504025e-07, "loss": 0.2946, "step": 8769 }, { "epoch": 2.4620999438517686, "grad_norm": 0.5321015119552612, "learning_rate": 9.478224007270253e-07, "loss": 0.3368, "step": 8770 }, { "epoch": 2.462380685008422, "grad_norm": 0.5543622970581055, "learning_rate": 9.468657528268432e-07, "loss": 0.3846, "step": 8771 }, { "epoch": 2.462661426165076, "grad_norm": 0.5620099306106567, "learning_rate": 9.45909537451944e-07, "loss": 0.3575, "step": 8772 }, { "epoch": 2.4629421673217293, "grad_norm": 0.5877784490585327, "learning_rate": 9.449537547043664e-07, "loss": 0.3329, "step": 8773 }, { "epoch": 2.4632229084783828, "grad_norm": 0.5643718242645264, "learning_rate": 9.439984046861089e-07, "loss": 0.3624, "step": 8774 }, { "epoch": 2.4635036496350367, "grad_norm": 0.5258351564407349, "learning_rate": 9.430434874991173e-07, "loss": 0.3522, "step": 8775 }, { "epoch": 2.46378439079169, "grad_norm": 0.5531173348426819, "learning_rate": 9.420890032452984e-07, "loss": 0.2925, "step": 8776 }, { "epoch": 2.4640651319483435, "grad_norm": 0.5484004616737366, "learning_rate": 9.411349520265056e-07, "loss": 0.3327, "step": 8777 }, { "epoch": 2.464345873104997, "grad_norm": 0.5880656242370605, "learning_rate": 9.401813339445498e-07, "loss": 0.2846, "step": 8778 }, { "epoch": 2.464626614261651, "grad_norm": 0.5654290914535522, "learning_rate": 9.392281491011967e-07, "loss": 0.3181, "step": 8779 }, { "epoch": 2.4649073554183043, "grad_norm": 0.5417488813400269, "learning_rate": 9.382753975981617e-07, "loss": 0.3467, "step": 8780 }, { "epoch": 2.4651880965749577, "grad_norm": 0.5313130021095276, "learning_rate": 9.373230795371197e-07, "loss": 0.3315, "step": 8781 }, { "epoch": 2.4654688377316116, "grad_norm": 0.5896884202957153, "learning_rate": 9.363711950196929e-07, "loss": 0.2942, "step": 8782 }, { "epoch": 2.465749578888265, "grad_norm": 0.5709277391433716, "learning_rate": 9.354197441474638e-07, "loss": 0.2942, "step": 8783 }, { "epoch": 2.4660303200449185, "grad_norm": 0.5979763269424438, "learning_rate": 9.344687270219632e-07, "loss": 0.2903, "step": 8784 }, { "epoch": 2.466311061201572, "grad_norm": 0.5077071189880371, "learning_rate": 9.335181437446772e-07, "loss": 0.3625, "step": 8785 }, { "epoch": 2.466591802358226, "grad_norm": 0.6212291121482849, "learning_rate": 9.325679944170484e-07, "loss": 0.3066, "step": 8786 }, { "epoch": 2.4668725435148793, "grad_norm": 0.5817729234695435, "learning_rate": 9.316182791404682e-07, "loss": 0.3081, "step": 8787 }, { "epoch": 2.4671532846715327, "grad_norm": 0.5042723417282104, "learning_rate": 9.30668998016287e-07, "loss": 0.3098, "step": 8788 }, { "epoch": 2.4674340258281866, "grad_norm": 0.6605865359306335, "learning_rate": 9.297201511458049e-07, "loss": 0.363, "step": 8789 }, { "epoch": 2.46771476698484, "grad_norm": 0.5550678968429565, "learning_rate": 9.287717386302753e-07, "loss": 0.3285, "step": 8790 }, { "epoch": 2.4679955081414935, "grad_norm": 0.5506582260131836, "learning_rate": 9.278237605709089e-07, "loss": 0.3299, "step": 8791 }, { "epoch": 2.468276249298147, "grad_norm": 0.5243185758590698, "learning_rate": 9.268762170688673e-07, "loss": 0.3295, "step": 8792 }, { "epoch": 2.468556990454801, "grad_norm": 0.5579249858856201, "learning_rate": 9.25929108225268e-07, "loss": 0.3221, "step": 8793 }, { "epoch": 2.4688377316114543, "grad_norm": 0.5396980047225952, "learning_rate": 9.249824341411795e-07, "loss": 0.3491, "step": 8794 }, { "epoch": 2.4691184727681077, "grad_norm": 0.5205463767051697, "learning_rate": 9.240361949176229e-07, "loss": 0.3462, "step": 8795 }, { "epoch": 2.4693992139247616, "grad_norm": 0.5658536553382874, "learning_rate": 9.230903906555788e-07, "loss": 0.3009, "step": 8796 }, { "epoch": 2.469679955081415, "grad_norm": 0.5527428388595581, "learning_rate": 9.221450214559741e-07, "loss": 0.3384, "step": 8797 }, { "epoch": 2.4699606962380685, "grad_norm": 0.5233757495880127, "learning_rate": 9.212000874196953e-07, "loss": 0.4131, "step": 8798 }, { "epoch": 2.470241437394722, "grad_norm": 0.5014605522155762, "learning_rate": 9.202555886475789e-07, "loss": 0.3034, "step": 8799 }, { "epoch": 2.470522178551376, "grad_norm": 0.5935212969779968, "learning_rate": 9.193115252404144e-07, "loss": 0.3101, "step": 8800 }, { "epoch": 2.4708029197080292, "grad_norm": 0.547580897808075, "learning_rate": 9.18367897298949e-07, "loss": 0.3134, "step": 8801 }, { "epoch": 2.4710836608646827, "grad_norm": 0.5496137142181396, "learning_rate": 9.174247049238788e-07, "loss": 0.3206, "step": 8802 }, { "epoch": 2.4713644020213366, "grad_norm": 0.5276779532432556, "learning_rate": 9.164819482158571e-07, "loss": 0.3357, "step": 8803 }, { "epoch": 2.47164514317799, "grad_norm": 0.6099643111228943, "learning_rate": 9.155396272754874e-07, "loss": 0.3313, "step": 8804 }, { "epoch": 2.4719258843346434, "grad_norm": 0.6051143407821655, "learning_rate": 9.145977422033298e-07, "loss": 0.3285, "step": 8805 }, { "epoch": 2.472206625491297, "grad_norm": 0.5772519111633301, "learning_rate": 9.13656293099896e-07, "loss": 0.3106, "step": 8806 }, { "epoch": 2.4724873666479508, "grad_norm": 0.5344874262809753, "learning_rate": 9.127152800656497e-07, "loss": 0.369, "step": 8807 }, { "epoch": 2.472768107804604, "grad_norm": 0.5589173436164856, "learning_rate": 9.117747032010132e-07, "loss": 0.3101, "step": 8808 }, { "epoch": 2.4730488489612577, "grad_norm": 0.5959139466285706, "learning_rate": 9.108345626063559e-07, "loss": 0.3601, "step": 8809 }, { "epoch": 2.473329590117911, "grad_norm": 0.5017551183700562, "learning_rate": 9.098948583820066e-07, "loss": 0.3705, "step": 8810 }, { "epoch": 2.473610331274565, "grad_norm": 0.526187002658844, "learning_rate": 9.089555906282438e-07, "loss": 0.3363, "step": 8811 }, { "epoch": 2.4738910724312184, "grad_norm": 0.6104418635368347, "learning_rate": 9.080167594452977e-07, "loss": 0.3358, "step": 8812 }, { "epoch": 2.474171813587872, "grad_norm": 0.5797775387763977, "learning_rate": 9.070783649333587e-07, "loss": 0.332, "step": 8813 }, { "epoch": 2.4744525547445253, "grad_norm": 0.5935958027839661, "learning_rate": 9.061404071925628e-07, "loss": 0.3541, "step": 8814 }, { "epoch": 2.474733295901179, "grad_norm": 0.5519028306007385, "learning_rate": 9.052028863230056e-07, "loss": 0.3449, "step": 8815 }, { "epoch": 2.4750140370578326, "grad_norm": 0.5282933712005615, "learning_rate": 9.042658024247314e-07, "loss": 0.3393, "step": 8816 }, { "epoch": 2.475294778214486, "grad_norm": 0.5654563903808594, "learning_rate": 9.033291555977414e-07, "loss": 0.2902, "step": 8817 }, { "epoch": 2.47557551937114, "grad_norm": 0.556206464767456, "learning_rate": 9.023929459419894e-07, "loss": 0.3482, "step": 8818 }, { "epoch": 2.4758562605277934, "grad_norm": 0.6563774347305298, "learning_rate": 9.014571735573796e-07, "loss": 0.2735, "step": 8819 }, { "epoch": 2.476137001684447, "grad_norm": 0.5940157175064087, "learning_rate": 9.005218385437742e-07, "loss": 0.3285, "step": 8820 }, { "epoch": 2.4764177428411003, "grad_norm": 0.557159423828125, "learning_rate": 8.995869410009855e-07, "loss": 0.3247, "step": 8821 }, { "epoch": 2.476698483997754, "grad_norm": 0.4959602355957031, "learning_rate": 8.986524810287783e-07, "loss": 0.3567, "step": 8822 }, { "epoch": 2.4769792251544076, "grad_norm": 0.5449239611625671, "learning_rate": 8.97718458726875e-07, "loss": 0.3645, "step": 8823 }, { "epoch": 2.477259966311061, "grad_norm": 0.5151669383049011, "learning_rate": 8.967848741949464e-07, "loss": 0.3351, "step": 8824 }, { "epoch": 2.477540707467715, "grad_norm": 0.5070462822914124, "learning_rate": 8.958517275326207e-07, "loss": 0.3934, "step": 8825 }, { "epoch": 2.4778214486243684, "grad_norm": 0.5659955143928528, "learning_rate": 8.949190188394768e-07, "loss": 0.3105, "step": 8826 }, { "epoch": 2.478102189781022, "grad_norm": 0.6299929022789001, "learning_rate": 8.93986748215046e-07, "loss": 0.3294, "step": 8827 }, { "epoch": 2.4783829309376753, "grad_norm": 0.5061848759651184, "learning_rate": 8.93054915758817e-07, "loss": 0.3352, "step": 8828 }, { "epoch": 2.478663672094329, "grad_norm": 0.5764517784118652, "learning_rate": 8.921235215702268e-07, "loss": 0.3123, "step": 8829 }, { "epoch": 2.4789444132509826, "grad_norm": 0.5483461618423462, "learning_rate": 8.911925657486709e-07, "loss": 0.3129, "step": 8830 }, { "epoch": 2.479225154407636, "grad_norm": 0.5682786703109741, "learning_rate": 8.902620483934915e-07, "loss": 0.3263, "step": 8831 }, { "epoch": 2.47950589556429, "grad_norm": 0.5124207735061646, "learning_rate": 8.89331969603991e-07, "loss": 0.3681, "step": 8832 }, { "epoch": 2.4797866367209433, "grad_norm": 0.5345808267593384, "learning_rate": 8.884023294794202e-07, "loss": 0.3145, "step": 8833 }, { "epoch": 2.480067377877597, "grad_norm": 0.5861395597457886, "learning_rate": 8.87473128118983e-07, "loss": 0.3744, "step": 8834 }, { "epoch": 2.4803481190342502, "grad_norm": 0.533804178237915, "learning_rate": 8.865443656218409e-07, "loss": 0.3531, "step": 8835 }, { "epoch": 2.480628860190904, "grad_norm": 0.5505049824714661, "learning_rate": 8.856160420871029e-07, "loss": 0.3218, "step": 8836 }, { "epoch": 2.4809096013475576, "grad_norm": 0.572982132434845, "learning_rate": 8.846881576138366e-07, "loss": 0.3578, "step": 8837 }, { "epoch": 2.481190342504211, "grad_norm": 0.6158345341682434, "learning_rate": 8.837607123010572e-07, "loss": 0.3306, "step": 8838 }, { "epoch": 2.481471083660865, "grad_norm": 0.5887256860733032, "learning_rate": 8.828337062477387e-07, "loss": 0.3081, "step": 8839 }, { "epoch": 2.4817518248175183, "grad_norm": 0.5889377593994141, "learning_rate": 8.819071395528044e-07, "loss": 0.2857, "step": 8840 }, { "epoch": 2.4820325659741718, "grad_norm": 0.5727645754814148, "learning_rate": 8.809810123151302e-07, "loss": 0.3135, "step": 8841 }, { "epoch": 2.482313307130825, "grad_norm": 0.535312294960022, "learning_rate": 8.800553246335475e-07, "loss": 0.3102, "step": 8842 }, { "epoch": 2.482594048287479, "grad_norm": 0.5249013900756836, "learning_rate": 8.791300766068416e-07, "loss": 0.3297, "step": 8843 }, { "epoch": 2.4828747894441325, "grad_norm": 0.5815885663032532, "learning_rate": 8.782052683337466e-07, "loss": 0.352, "step": 8844 }, { "epoch": 2.483155530600786, "grad_norm": 0.64006108045578, "learning_rate": 8.772808999129551e-07, "loss": 0.2846, "step": 8845 }, { "epoch": 2.48343627175744, "grad_norm": 0.5870609879493713, "learning_rate": 8.763569714431075e-07, "loss": 0.3247, "step": 8846 }, { "epoch": 2.4837170129140933, "grad_norm": 0.6090686321258545, "learning_rate": 8.754334830228012e-07, "loss": 0.3026, "step": 8847 }, { "epoch": 2.4839977540707467, "grad_norm": 0.6023396253585815, "learning_rate": 8.74510434750585e-07, "loss": 0.3225, "step": 8848 }, { "epoch": 2.4842784952274, "grad_norm": 0.5896828770637512, "learning_rate": 8.735878267249593e-07, "loss": 0.3483, "step": 8849 }, { "epoch": 2.484559236384054, "grad_norm": 0.5239513516426086, "learning_rate": 8.726656590443816e-07, "loss": 0.3149, "step": 8850 }, { "epoch": 2.4848399775407075, "grad_norm": 0.5483566522598267, "learning_rate": 8.717439318072568e-07, "loss": 0.3037, "step": 8851 }, { "epoch": 2.485120718697361, "grad_norm": 0.5274941921234131, "learning_rate": 8.708226451119495e-07, "loss": 0.3586, "step": 8852 }, { "epoch": 2.485401459854015, "grad_norm": 0.6059607267379761, "learning_rate": 8.699017990567704e-07, "loss": 0.3171, "step": 8853 }, { "epoch": 2.4856822010106683, "grad_norm": 0.5647132992744446, "learning_rate": 8.689813937399887e-07, "loss": 0.2982, "step": 8854 }, { "epoch": 2.4859629421673217, "grad_norm": 0.5836697816848755, "learning_rate": 8.680614292598244e-07, "loss": 0.3333, "step": 8855 }, { "epoch": 2.486243683323975, "grad_norm": 0.5829404592514038, "learning_rate": 8.671419057144475e-07, "loss": 0.3308, "step": 8856 }, { "epoch": 2.486524424480629, "grad_norm": 0.5423857569694519, "learning_rate": 8.662228232019876e-07, "loss": 0.3304, "step": 8857 }, { "epoch": 2.4868051656372825, "grad_norm": 0.6207121014595032, "learning_rate": 8.653041818205204e-07, "loss": 0.3718, "step": 8858 }, { "epoch": 2.487085906793936, "grad_norm": 0.590457022190094, "learning_rate": 8.6438598166808e-07, "loss": 0.312, "step": 8859 }, { "epoch": 2.4873666479505894, "grad_norm": 0.5615829825401306, "learning_rate": 8.634682228426505e-07, "loss": 0.2747, "step": 8860 }, { "epoch": 2.4876473891072433, "grad_norm": 0.539747953414917, "learning_rate": 8.625509054421671e-07, "loss": 0.3144, "step": 8861 }, { "epoch": 2.4879281302638967, "grad_norm": 0.5582963228225708, "learning_rate": 8.616340295645231e-07, "loss": 0.3043, "step": 8862 }, { "epoch": 2.48820887142055, "grad_norm": 0.5768175721168518, "learning_rate": 8.607175953075597e-07, "loss": 0.2978, "step": 8863 }, { "epoch": 2.4884896125772036, "grad_norm": 0.561724841594696, "learning_rate": 8.598016027690753e-07, "loss": 0.3553, "step": 8864 }, { "epoch": 2.4887703537338575, "grad_norm": 0.517392098903656, "learning_rate": 8.588860520468167e-07, "loss": 0.332, "step": 8865 }, { "epoch": 2.489051094890511, "grad_norm": 0.5608408451080322, "learning_rate": 8.579709432384876e-07, "loss": 0.2792, "step": 8866 }, { "epoch": 2.4893318360471643, "grad_norm": 0.6274533271789551, "learning_rate": 8.570562764417406e-07, "loss": 0.3592, "step": 8867 }, { "epoch": 2.4896125772038182, "grad_norm": 0.5518200397491455, "learning_rate": 8.561420517541846e-07, "loss": 0.3241, "step": 8868 }, { "epoch": 2.4898933183604717, "grad_norm": 0.5387300848960876, "learning_rate": 8.55228269273381e-07, "loss": 0.2999, "step": 8869 }, { "epoch": 2.490174059517125, "grad_norm": 0.5818502902984619, "learning_rate": 8.543149290968422e-07, "loss": 0.3076, "step": 8870 }, { "epoch": 2.4904548006737786, "grad_norm": 0.4942573308944702, "learning_rate": 8.53402031322032e-07, "loss": 0.3066, "step": 8871 }, { "epoch": 2.4907355418304324, "grad_norm": 0.5426687002182007, "learning_rate": 8.52489576046373e-07, "loss": 0.3359, "step": 8872 }, { "epoch": 2.491016282987086, "grad_norm": 0.6341243982315063, "learning_rate": 8.51577563367233e-07, "loss": 0.364, "step": 8873 }, { "epoch": 2.4912970241437393, "grad_norm": 0.5375432372093201, "learning_rate": 8.506659933819395e-07, "loss": 0.3095, "step": 8874 }, { "epoch": 2.491577765300393, "grad_norm": 0.5210657119750977, "learning_rate": 8.497548661877669e-07, "loss": 0.3357, "step": 8875 }, { "epoch": 2.4918585064570467, "grad_norm": 0.5737097263336182, "learning_rate": 8.488441818819476e-07, "loss": 0.3275, "step": 8876 }, { "epoch": 2.4921392476137, "grad_norm": 0.5249507427215576, "learning_rate": 8.479339405616633e-07, "loss": 0.3435, "step": 8877 }, { "epoch": 2.4924199887703535, "grad_norm": 0.5626519322395325, "learning_rate": 8.470241423240472e-07, "loss": 0.3107, "step": 8878 }, { "epoch": 2.4927007299270074, "grad_norm": 0.5387868881225586, "learning_rate": 8.461147872661902e-07, "loss": 0.317, "step": 8879 }, { "epoch": 2.492981471083661, "grad_norm": 0.5167239308357239, "learning_rate": 8.452058754851306e-07, "loss": 0.3349, "step": 8880 }, { "epoch": 2.4932622122403143, "grad_norm": 0.5811373591423035, "learning_rate": 8.442974070778643e-07, "loss": 0.3044, "step": 8881 }, { "epoch": 2.493542953396968, "grad_norm": 0.59946209192276, "learning_rate": 8.433893821413358e-07, "loss": 0.3655, "step": 8882 }, { "epoch": 2.4938236945536216, "grad_norm": 0.49584391713142395, "learning_rate": 8.424818007724434e-07, "loss": 0.3858, "step": 8883 }, { "epoch": 2.494104435710275, "grad_norm": 0.5428626537322998, "learning_rate": 8.415746630680405e-07, "loss": 0.3397, "step": 8884 }, { "epoch": 2.4943851768669285, "grad_norm": 0.6168662905693054, "learning_rate": 8.406679691249281e-07, "loss": 0.3104, "step": 8885 }, { "epoch": 2.4946659180235824, "grad_norm": 0.6244155764579773, "learning_rate": 8.397617190398671e-07, "loss": 0.269, "step": 8886 }, { "epoch": 2.494946659180236, "grad_norm": 0.5118817090988159, "learning_rate": 8.388559129095625e-07, "loss": 0.3109, "step": 8887 }, { "epoch": 2.4952274003368893, "grad_norm": 0.5412524938583374, "learning_rate": 8.379505508306801e-07, "loss": 0.342, "step": 8888 }, { "epoch": 2.495508141493543, "grad_norm": 0.5388503670692444, "learning_rate": 8.370456328998333e-07, "loss": 0.3085, "step": 8889 }, { "epoch": 2.4957888826501966, "grad_norm": 0.5854045748710632, "learning_rate": 8.36141159213587e-07, "loss": 0.3123, "step": 8890 }, { "epoch": 2.49606962380685, "grad_norm": 0.6063563227653503, "learning_rate": 8.352371298684641e-07, "loss": 0.3666, "step": 8891 }, { "epoch": 2.4963503649635035, "grad_norm": 0.60853511095047, "learning_rate": 8.343335449609347e-07, "loss": 0.3091, "step": 8892 }, { "epoch": 2.4966311061201574, "grad_norm": 0.5416209697723389, "learning_rate": 8.334304045874248e-07, "loss": 0.3229, "step": 8893 }, { "epoch": 2.496911847276811, "grad_norm": 0.5660837292671204, "learning_rate": 8.325277088443129e-07, "loss": 0.2786, "step": 8894 }, { "epoch": 2.4971925884334643, "grad_norm": 0.5210088491439819, "learning_rate": 8.316254578279276e-07, "loss": 0.3188, "step": 8895 }, { "epoch": 2.497473329590118, "grad_norm": 0.6038588285446167, "learning_rate": 8.307236516345524e-07, "loss": 0.3151, "step": 8896 }, { "epoch": 2.4977540707467716, "grad_norm": 0.552193284034729, "learning_rate": 8.298222903604225e-07, "loss": 0.3382, "step": 8897 }, { "epoch": 2.498034811903425, "grad_norm": 0.5803182721138, "learning_rate": 8.289213741017238e-07, "loss": 0.3278, "step": 8898 }, { "epoch": 2.4983155530600785, "grad_norm": 0.6113681793212891, "learning_rate": 8.280209029545993e-07, "loss": 0.3152, "step": 8899 }, { "epoch": 2.4985962942167323, "grad_norm": 0.5139860510826111, "learning_rate": 8.27120877015139e-07, "loss": 0.3099, "step": 8900 }, { "epoch": 2.498877035373386, "grad_norm": 0.6146509051322937, "learning_rate": 8.262212963793903e-07, "loss": 0.2872, "step": 8901 }, { "epoch": 2.4991577765300392, "grad_norm": 0.6226741075515747, "learning_rate": 8.253221611433481e-07, "loss": 0.3635, "step": 8902 }, { "epoch": 2.499438517686693, "grad_norm": 0.5823273658752441, "learning_rate": 8.244234714029664e-07, "loss": 0.2683, "step": 8903 }, { "epoch": 2.4997192588433466, "grad_norm": 0.5751005411148071, "learning_rate": 8.23525227254145e-07, "loss": 0.3014, "step": 8904 }, { "epoch": 2.5, "grad_norm": 0.5652390718460083, "learning_rate": 8.226274287927388e-07, "loss": 0.3097, "step": 8905 }, { "epoch": 2.5002807411566534, "grad_norm": 0.5503231287002563, "learning_rate": 8.217300761145569e-07, "loss": 0.3563, "step": 8906 }, { "epoch": 2.500561482313307, "grad_norm": 0.5338963866233826, "learning_rate": 8.208331693153577e-07, "loss": 0.3417, "step": 8907 }, { "epoch": 2.5008422234699608, "grad_norm": 0.5708197951316833, "learning_rate": 8.199367084908544e-07, "loss": 0.3123, "step": 8908 }, { "epoch": 2.501122964626614, "grad_norm": 0.5358381271362305, "learning_rate": 8.190406937367123e-07, "loss": 0.3572, "step": 8909 }, { "epoch": 2.501403705783268, "grad_norm": 0.5544228553771973, "learning_rate": 8.181451251485461e-07, "loss": 0.3483, "step": 8910 }, { "epoch": 2.5016844469399215, "grad_norm": 0.49938660860061646, "learning_rate": 8.172500028219283e-07, "loss": 0.3211, "step": 8911 }, { "epoch": 2.501965188096575, "grad_norm": 0.5111168026924133, "learning_rate": 8.163553268523777e-07, "loss": 0.3327, "step": 8912 }, { "epoch": 2.5022459292532284, "grad_norm": 0.5480323433876038, "learning_rate": 8.154610973353722e-07, "loss": 0.3645, "step": 8913 }, { "epoch": 2.502526670409882, "grad_norm": 0.5683262348175049, "learning_rate": 8.145673143663347e-07, "loss": 0.3696, "step": 8914 }, { "epoch": 2.5028074115665357, "grad_norm": 0.5247148275375366, "learning_rate": 8.136739780406472e-07, "loss": 0.3248, "step": 8915 }, { "epoch": 2.503088152723189, "grad_norm": 0.5809712409973145, "learning_rate": 8.127810884536402e-07, "loss": 0.3315, "step": 8916 }, { "epoch": 2.5033688938798426, "grad_norm": 0.5839601755142212, "learning_rate": 8.118886457005954e-07, "loss": 0.2789, "step": 8917 }, { "epoch": 2.5036496350364965, "grad_norm": 0.5308005213737488, "learning_rate": 8.109966498767497e-07, "loss": 0.2718, "step": 8918 }, { "epoch": 2.50393037619315, "grad_norm": 0.5514929294586182, "learning_rate": 8.101051010772937e-07, "loss": 0.3064, "step": 8919 }, { "epoch": 2.5042111173498034, "grad_norm": 0.5309748649597168, "learning_rate": 8.092139993973641e-07, "loss": 0.3732, "step": 8920 }, { "epoch": 2.504491858506457, "grad_norm": 0.553950846195221, "learning_rate": 8.083233449320576e-07, "loss": 0.3305, "step": 8921 }, { "epoch": 2.5047725996631107, "grad_norm": 0.5077446103096008, "learning_rate": 8.07433137776416e-07, "loss": 0.3318, "step": 8922 }, { "epoch": 2.505053340819764, "grad_norm": 0.5609468817710876, "learning_rate": 8.065433780254395e-07, "loss": 0.3329, "step": 8923 }, { "epoch": 2.5053340819764176, "grad_norm": 0.5669708847999573, "learning_rate": 8.056540657740747e-07, "loss": 0.2633, "step": 8924 }, { "epoch": 2.5056148231330715, "grad_norm": 0.6168059706687927, "learning_rate": 8.047652011172264e-07, "loss": 0.3286, "step": 8925 }, { "epoch": 2.505895564289725, "grad_norm": 0.5560932755470276, "learning_rate": 8.038767841497475e-07, "loss": 0.3118, "step": 8926 }, { "epoch": 2.5061763054463784, "grad_norm": 0.5844700336456299, "learning_rate": 8.029888149664434e-07, "loss": 0.2957, "step": 8927 }, { "epoch": 2.506457046603032, "grad_norm": 0.5862390995025635, "learning_rate": 8.021012936620737e-07, "loss": 0.3175, "step": 8928 }, { "epoch": 2.5067377877596857, "grad_norm": 0.5474711656570435, "learning_rate": 8.012142203313484e-07, "loss": 0.331, "step": 8929 }, { "epoch": 2.507018528916339, "grad_norm": 0.5384641885757446, "learning_rate": 8.003275950689321e-07, "loss": 0.3374, "step": 8930 }, { "epoch": 2.5072992700729926, "grad_norm": 0.5157870650291443, "learning_rate": 7.994414179694388e-07, "loss": 0.278, "step": 8931 }, { "epoch": 2.5075800112296465, "grad_norm": 0.5530809760093689, "learning_rate": 7.985556891274349e-07, "loss": 0.3128, "step": 8932 }, { "epoch": 2.5078607523863, "grad_norm": 0.5196710228919983, "learning_rate": 7.976704086374415e-07, "loss": 0.3215, "step": 8933 }, { "epoch": 2.5081414935429533, "grad_norm": 0.5491336584091187, "learning_rate": 7.967855765939286e-07, "loss": 0.3174, "step": 8934 }, { "epoch": 2.508422234699607, "grad_norm": 0.5538433790206909, "learning_rate": 7.959011930913219e-07, "loss": 0.329, "step": 8935 }, { "epoch": 2.5087029758562607, "grad_norm": 0.5808168053627014, "learning_rate": 7.950172582239957e-07, "loss": 0.2845, "step": 8936 }, { "epoch": 2.508983717012914, "grad_norm": 0.491447389125824, "learning_rate": 7.94133772086279e-07, "loss": 0.3353, "step": 8937 }, { "epoch": 2.5092644581695676, "grad_norm": 0.5617589950561523, "learning_rate": 7.932507347724522e-07, "loss": 0.314, "step": 8938 }, { "epoch": 2.5095451993262214, "grad_norm": 0.48420363664627075, "learning_rate": 7.923681463767452e-07, "loss": 0.3696, "step": 8939 }, { "epoch": 2.509825940482875, "grad_norm": 0.5282022953033447, "learning_rate": 7.914860069933455e-07, "loss": 0.2646, "step": 8940 }, { "epoch": 2.5101066816395283, "grad_norm": 0.5548469424247742, "learning_rate": 7.906043167163874e-07, "loss": 0.3555, "step": 8941 }, { "epoch": 2.5103874227961818, "grad_norm": 0.482534259557724, "learning_rate": 7.89723075639961e-07, "loss": 0.3473, "step": 8942 }, { "epoch": 2.5106681639528357, "grad_norm": 0.5801131129264832, "learning_rate": 7.888422838581056e-07, "loss": 0.3502, "step": 8943 }, { "epoch": 2.510948905109489, "grad_norm": 0.5397019386291504, "learning_rate": 7.879619414648121e-07, "loss": 0.268, "step": 8944 }, { "epoch": 2.5112296462661425, "grad_norm": 0.46736371517181396, "learning_rate": 7.870820485540298e-07, "loss": 0.3665, "step": 8945 }, { "epoch": 2.5115103874227964, "grad_norm": 0.5036104321479797, "learning_rate": 7.862026052196514e-07, "loss": 0.3464, "step": 8946 }, { "epoch": 2.51179112857945, "grad_norm": 0.6011244654655457, "learning_rate": 7.853236115555285e-07, "loss": 0.3021, "step": 8947 }, { "epoch": 2.5120718697361033, "grad_norm": 0.5795788168907166, "learning_rate": 7.844450676554604e-07, "loss": 0.326, "step": 8948 }, { "epoch": 2.5123526108927567, "grad_norm": 0.5732229948043823, "learning_rate": 7.83566973613199e-07, "loss": 0.3181, "step": 8949 }, { "epoch": 2.51263335204941, "grad_norm": 0.5182971954345703, "learning_rate": 7.826893295224509e-07, "loss": 0.3418, "step": 8950 }, { "epoch": 2.512914093206064, "grad_norm": 0.6016291975975037, "learning_rate": 7.818121354768704e-07, "loss": 0.3706, "step": 8951 }, { "epoch": 2.5131948343627175, "grad_norm": 0.6650599837303162, "learning_rate": 7.80935391570069e-07, "loss": 0.2721, "step": 8952 }, { "epoch": 2.5134755755193714, "grad_norm": 0.49425604939460754, "learning_rate": 7.800590978956058e-07, "loss": 0.3059, "step": 8953 }, { "epoch": 2.513756316676025, "grad_norm": 0.5239137411117554, "learning_rate": 7.791832545469924e-07, "loss": 0.3131, "step": 8954 }, { "epoch": 2.5140370578326783, "grad_norm": 0.5117790699005127, "learning_rate": 7.783078616176959e-07, "loss": 0.3273, "step": 8955 }, { "epoch": 2.5143177989893317, "grad_norm": 0.5523049831390381, "learning_rate": 7.774329192011304e-07, "loss": 0.3449, "step": 8956 }, { "epoch": 2.514598540145985, "grad_norm": 0.5536803603172302, "learning_rate": 7.765584273906662e-07, "loss": 0.3483, "step": 8957 }, { "epoch": 2.514879281302639, "grad_norm": 0.5914592146873474, "learning_rate": 7.756843862796215e-07, "loss": 0.3217, "step": 8958 }, { "epoch": 2.5151600224592925, "grad_norm": 0.5371770858764648, "learning_rate": 7.748107959612711e-07, "loss": 0.3223, "step": 8959 }, { "epoch": 2.5154407636159464, "grad_norm": 0.5213318467140198, "learning_rate": 7.739376565288382e-07, "loss": 0.3054, "step": 8960 }, { "epoch": 2.5157215047726, "grad_norm": 0.4887320399284363, "learning_rate": 7.73064968075497e-07, "loss": 0.3586, "step": 8961 }, { "epoch": 2.5160022459292533, "grad_norm": 0.5178138017654419, "learning_rate": 7.721927306943783e-07, "loss": 0.3231, "step": 8962 }, { "epoch": 2.5162829870859067, "grad_norm": 0.5341764688491821, "learning_rate": 7.713209444785586e-07, "loss": 0.3341, "step": 8963 }, { "epoch": 2.51656372824256, "grad_norm": 0.5665843486785889, "learning_rate": 7.704496095210729e-07, "loss": 0.3415, "step": 8964 }, { "epoch": 2.516844469399214, "grad_norm": 0.5507887601852417, "learning_rate": 7.695787259149029e-07, "loss": 0.3028, "step": 8965 }, { "epoch": 2.5171252105558675, "grad_norm": 0.5506218671798706, "learning_rate": 7.687082937529827e-07, "loss": 0.3306, "step": 8966 }, { "epoch": 2.517405951712521, "grad_norm": 0.5599642395973206, "learning_rate": 7.678383131282024e-07, "loss": 0.3473, "step": 8967 }, { "epoch": 2.517686692869175, "grad_norm": 0.548728346824646, "learning_rate": 7.669687841333978e-07, "loss": 0.3222, "step": 8968 }, { "epoch": 2.5179674340258282, "grad_norm": 0.44374653697013855, "learning_rate": 7.660997068613607e-07, "loss": 0.3252, "step": 8969 }, { "epoch": 2.5182481751824817, "grad_norm": 0.5944491624832153, "learning_rate": 7.652310814048358e-07, "loss": 0.2953, "step": 8970 }, { "epoch": 2.518528916339135, "grad_norm": 0.5412534475326538, "learning_rate": 7.643629078565141e-07, "loss": 0.3049, "step": 8971 }, { "epoch": 2.518809657495789, "grad_norm": 0.5190032720565796, "learning_rate": 7.634951863090445e-07, "loss": 0.3413, "step": 8972 }, { "epoch": 2.5190903986524424, "grad_norm": 0.5591129660606384, "learning_rate": 7.626279168550221e-07, "loss": 0.3227, "step": 8973 }, { "epoch": 2.519371139809096, "grad_norm": 0.524470865726471, "learning_rate": 7.61761099586999e-07, "loss": 0.3242, "step": 8974 }, { "epoch": 2.5196518809657498, "grad_norm": 0.5753305554389954, "learning_rate": 7.60894734597476e-07, "loss": 0.2893, "step": 8975 }, { "epoch": 2.519932622122403, "grad_norm": 0.5870516896247864, "learning_rate": 7.600288219789043e-07, "loss": 0.3517, "step": 8976 }, { "epoch": 2.5202133632790567, "grad_norm": 0.5744442343711853, "learning_rate": 7.59163361823691e-07, "loss": 0.3036, "step": 8977 }, { "epoch": 2.52049410443571, "grad_norm": 0.6032195091247559, "learning_rate": 7.582983542241906e-07, "loss": 0.3084, "step": 8978 }, { "epoch": 2.520774845592364, "grad_norm": 0.4968772530555725, "learning_rate": 7.574337992727137e-07, "loss": 0.3374, "step": 8979 }, { "epoch": 2.5210555867490174, "grad_norm": 0.5651487112045288, "learning_rate": 7.565696970615188e-07, "loss": 0.3112, "step": 8980 }, { "epoch": 2.521336327905671, "grad_norm": 0.5269807577133179, "learning_rate": 7.557060476828171e-07, "loss": 0.3436, "step": 8981 }, { "epoch": 2.5216170690623247, "grad_norm": 0.5763477683067322, "learning_rate": 7.548428512287731e-07, "loss": 0.2804, "step": 8982 }, { "epoch": 2.521897810218978, "grad_norm": 0.5456498861312866, "learning_rate": 7.539801077914999e-07, "loss": 0.3214, "step": 8983 }, { "epoch": 2.5221785513756316, "grad_norm": 0.5414750576019287, "learning_rate": 7.531178174630671e-07, "loss": 0.3298, "step": 8984 }, { "epoch": 2.522459292532285, "grad_norm": 0.5692690014839172, "learning_rate": 7.522559803354895e-07, "loss": 0.3135, "step": 8985 }, { "epoch": 2.522740033688939, "grad_norm": 0.5702782869338989, "learning_rate": 7.513945965007396e-07, "loss": 0.3163, "step": 8986 }, { "epoch": 2.5230207748455924, "grad_norm": 0.6065912246704102, "learning_rate": 7.50533666050739e-07, "loss": 0.3094, "step": 8987 }, { "epoch": 2.523301516002246, "grad_norm": 0.5257893800735474, "learning_rate": 7.496731890773579e-07, "loss": 0.3321, "step": 8988 }, { "epoch": 2.5235822571588997, "grad_norm": 0.5629504919052124, "learning_rate": 7.488131656724246e-07, "loss": 0.3232, "step": 8989 }, { "epoch": 2.523862998315553, "grad_norm": 0.49127185344696045, "learning_rate": 7.479535959277123e-07, "loss": 0.3365, "step": 8990 }, { "epoch": 2.5241437394722066, "grad_norm": 0.535672664642334, "learning_rate": 7.470944799349517e-07, "loss": 0.3256, "step": 8991 }, { "epoch": 2.52442448062886, "grad_norm": 0.5545815825462341, "learning_rate": 7.462358177858214e-07, "loss": 0.2886, "step": 8992 }, { "epoch": 2.5247052217855135, "grad_norm": 0.5472292900085449, "learning_rate": 7.453776095719511e-07, "loss": 0.3466, "step": 8993 }, { "epoch": 2.5249859629421674, "grad_norm": 0.610140323638916, "learning_rate": 7.445198553849259e-07, "loss": 0.2881, "step": 8994 }, { "epoch": 2.525266704098821, "grad_norm": 0.5382416844367981, "learning_rate": 7.436625553162757e-07, "loss": 0.2802, "step": 8995 }, { "epoch": 2.5255474452554747, "grad_norm": 0.4972606599330902, "learning_rate": 7.42805709457492e-07, "loss": 0.3334, "step": 8996 }, { "epoch": 2.525828186412128, "grad_norm": 0.5735927224159241, "learning_rate": 7.419493179000087e-07, "loss": 0.2911, "step": 8997 }, { "epoch": 2.5261089275687816, "grad_norm": 0.5767256617546082, "learning_rate": 7.410933807352144e-07, "loss": 0.3525, "step": 8998 }, { "epoch": 2.526389668725435, "grad_norm": 0.6331986784934998, "learning_rate": 7.402378980544517e-07, "loss": 0.341, "step": 8999 }, { "epoch": 2.5266704098820885, "grad_norm": 0.5588213801383972, "learning_rate": 7.393828699490091e-07, "loss": 0.3509, "step": 9000 }, { "epoch": 2.5269511510387423, "grad_norm": 0.6067502498626709, "learning_rate": 7.385282965101326e-07, "loss": 0.3248, "step": 9001 }, { "epoch": 2.527231892195396, "grad_norm": 0.5410355925559998, "learning_rate": 7.376741778290164e-07, "loss": 0.3635, "step": 9002 }, { "epoch": 2.5275126333520497, "grad_norm": 0.6640228033065796, "learning_rate": 7.368205139968048e-07, "loss": 0.318, "step": 9003 }, { "epoch": 2.527793374508703, "grad_norm": 0.6127304434776306, "learning_rate": 7.359673051045979e-07, "loss": 0.29, "step": 9004 }, { "epoch": 2.5280741156653566, "grad_norm": 0.5498339533805847, "learning_rate": 7.351145512434432e-07, "loss": 0.3223, "step": 9005 }, { "epoch": 2.52835485682201, "grad_norm": 0.5274477005004883, "learning_rate": 7.34262252504343e-07, "loss": 0.3181, "step": 9006 }, { "epoch": 2.5286355979786634, "grad_norm": 0.5333122611045837, "learning_rate": 7.334104089782473e-07, "loss": 0.3366, "step": 9007 }, { "epoch": 2.5289163391353173, "grad_norm": 0.513627827167511, "learning_rate": 7.325590207560618e-07, "loss": 0.3505, "step": 9008 }, { "epoch": 2.5291970802919708, "grad_norm": 0.5493861436843872, "learning_rate": 7.317080879286398e-07, "loss": 0.3318, "step": 9009 }, { "epoch": 2.529477821448624, "grad_norm": 0.5757958292961121, "learning_rate": 7.308576105867871e-07, "loss": 0.3204, "step": 9010 }, { "epoch": 2.529758562605278, "grad_norm": 0.5586037039756775, "learning_rate": 7.300075888212632e-07, "loss": 0.3082, "step": 9011 }, { "epoch": 2.5300393037619315, "grad_norm": 0.5369079113006592, "learning_rate": 7.291580227227751e-07, "loss": 0.328, "step": 9012 }, { "epoch": 2.530320044918585, "grad_norm": 0.6014928221702576, "learning_rate": 7.283089123819853e-07, "loss": 0.3023, "step": 9013 }, { "epoch": 2.5306007860752384, "grad_norm": 0.5827710032463074, "learning_rate": 7.27460257889504e-07, "loss": 0.3045, "step": 9014 }, { "epoch": 2.5308815272318923, "grad_norm": 0.5509430766105652, "learning_rate": 7.266120593358944e-07, "loss": 0.3146, "step": 9015 }, { "epoch": 2.5311622683885457, "grad_norm": 0.5305706262588501, "learning_rate": 7.257643168116718e-07, "loss": 0.3146, "step": 9016 }, { "epoch": 2.531443009545199, "grad_norm": 0.5430615544319153, "learning_rate": 7.249170304073011e-07, "loss": 0.3394, "step": 9017 }, { "epoch": 2.531723750701853, "grad_norm": 0.581591010093689, "learning_rate": 7.240702002132005e-07, "loss": 0.289, "step": 9018 }, { "epoch": 2.5320044918585065, "grad_norm": 0.5528303384780884, "learning_rate": 7.232238263197372e-07, "loss": 0.3649, "step": 9019 }, { "epoch": 2.53228523301516, "grad_norm": 0.5842733979225159, "learning_rate": 7.223779088172317e-07, "loss": 0.3548, "step": 9020 }, { "epoch": 2.5325659741718134, "grad_norm": 0.5184828042984009, "learning_rate": 7.215324477959557e-07, "loss": 0.2878, "step": 9021 }, { "epoch": 2.5328467153284673, "grad_norm": 0.6485675573348999, "learning_rate": 7.206874433461298e-07, "loss": 0.3439, "step": 9022 }, { "epoch": 2.5331274564851207, "grad_norm": 0.6202267408370972, "learning_rate": 7.1984289555793e-07, "loss": 0.3471, "step": 9023 }, { "epoch": 2.533408197641774, "grad_norm": 0.5531259179115295, "learning_rate": 7.189988045214797e-07, "loss": 0.3124, "step": 9024 }, { "epoch": 2.533688938798428, "grad_norm": 0.6165199279785156, "learning_rate": 7.181551703268541e-07, "loss": 0.3464, "step": 9025 }, { "epoch": 2.5339696799550815, "grad_norm": 0.6090344190597534, "learning_rate": 7.173119930640826e-07, "loss": 0.3429, "step": 9026 }, { "epoch": 2.534250421111735, "grad_norm": 0.5615666508674622, "learning_rate": 7.16469272823142e-07, "loss": 0.3225, "step": 9027 }, { "epoch": 2.5345311622683884, "grad_norm": 0.5526992678642273, "learning_rate": 7.156270096939644e-07, "loss": 0.3639, "step": 9028 }, { "epoch": 2.5348119034250423, "grad_norm": 0.5863558053970337, "learning_rate": 7.147852037664293e-07, "loss": 0.32, "step": 9029 }, { "epoch": 2.5350926445816957, "grad_norm": 0.5933303833007812, "learning_rate": 7.139438551303684e-07, "loss": 0.3524, "step": 9030 }, { "epoch": 2.535373385738349, "grad_norm": 0.5584559440612793, "learning_rate": 7.131029638755666e-07, "loss": 0.2797, "step": 9031 }, { "epoch": 2.535654126895003, "grad_norm": 0.5528318881988525, "learning_rate": 7.122625300917574e-07, "loss": 0.3657, "step": 9032 }, { "epoch": 2.5359348680516565, "grad_norm": 0.5077992081642151, "learning_rate": 7.114225538686287e-07, "loss": 0.3182, "step": 9033 }, { "epoch": 2.53621560920831, "grad_norm": 0.5783612132072449, "learning_rate": 7.105830352958143e-07, "loss": 0.351, "step": 9034 }, { "epoch": 2.5364963503649633, "grad_norm": 0.6035152077674866, "learning_rate": 7.097439744629059e-07, "loss": 0.331, "step": 9035 }, { "epoch": 2.5367770915216172, "grad_norm": 0.5512037873268127, "learning_rate": 7.089053714594413e-07, "loss": 0.3505, "step": 9036 }, { "epoch": 2.5370578326782707, "grad_norm": 0.5658860802650452, "learning_rate": 7.080672263749094e-07, "loss": 0.3831, "step": 9037 }, { "epoch": 2.537338573834924, "grad_norm": 0.5604510307312012, "learning_rate": 7.072295392987543e-07, "loss": 0.3089, "step": 9038 }, { "epoch": 2.537619314991578, "grad_norm": 0.5483195185661316, "learning_rate": 7.063923103203668e-07, "loss": 0.3593, "step": 9039 }, { "epoch": 2.5379000561482314, "grad_norm": 0.5885195136070251, "learning_rate": 7.05555539529093e-07, "loss": 0.3606, "step": 9040 }, { "epoch": 2.538180797304885, "grad_norm": 0.6049413084983826, "learning_rate": 7.047192270142273e-07, "loss": 0.3124, "step": 9041 }, { "epoch": 2.5384615384615383, "grad_norm": 0.5033851861953735, "learning_rate": 7.03883372865013e-07, "loss": 0.3535, "step": 9042 }, { "epoch": 2.5387422796181918, "grad_norm": 0.5461069941520691, "learning_rate": 7.03047977170651e-07, "loss": 0.3108, "step": 9043 }, { "epoch": 2.5390230207748457, "grad_norm": 0.48883023858070374, "learning_rate": 7.022130400202864e-07, "loss": 0.3801, "step": 9044 }, { "epoch": 2.539303761931499, "grad_norm": 0.5367558002471924, "learning_rate": 7.013785615030217e-07, "loss": 0.3327, "step": 9045 }, { "epoch": 2.539584503088153, "grad_norm": 0.5318974256515503, "learning_rate": 7.005445417079038e-07, "loss": 0.3152, "step": 9046 }, { "epoch": 2.5398652442448064, "grad_norm": 0.5410260558128357, "learning_rate": 6.99710980723936e-07, "loss": 0.3635, "step": 9047 }, { "epoch": 2.54014598540146, "grad_norm": 0.5868778824806213, "learning_rate": 6.988778786400719e-07, "loss": 0.3084, "step": 9048 }, { "epoch": 2.5404267265581133, "grad_norm": 0.562307596206665, "learning_rate": 6.980452355452122e-07, "loss": 0.3624, "step": 9049 }, { "epoch": 2.5407074677147667, "grad_norm": 0.5488806962966919, "learning_rate": 6.972130515282144e-07, "loss": 0.3285, "step": 9050 }, { "epoch": 2.5409882088714206, "grad_norm": 0.6462432742118835, "learning_rate": 6.963813266778824e-07, "loss": 0.3234, "step": 9051 }, { "epoch": 2.541268950028074, "grad_norm": 0.5582271218299866, "learning_rate": 6.955500610829718e-07, "loss": 0.3333, "step": 9052 }, { "epoch": 2.541549691184728, "grad_norm": 0.6388147473335266, "learning_rate": 6.947192548321918e-07, "loss": 0.3011, "step": 9053 }, { "epoch": 2.5418304323413814, "grad_norm": 0.5151721835136414, "learning_rate": 6.938889080141992e-07, "loss": 0.3262, "step": 9054 }, { "epoch": 2.542111173498035, "grad_norm": 0.594851553440094, "learning_rate": 6.930590207176058e-07, "loss": 0.3543, "step": 9055 }, { "epoch": 2.5423919146546883, "grad_norm": 0.5733625292778015, "learning_rate": 6.922295930309691e-07, "loss": 0.3097, "step": 9056 }, { "epoch": 2.5426726558113417, "grad_norm": 0.556485652923584, "learning_rate": 6.914006250428034e-07, "loss": 0.3398, "step": 9057 }, { "epoch": 2.5429533969679956, "grad_norm": 0.5053539276123047, "learning_rate": 6.905721168415691e-07, "loss": 0.315, "step": 9058 }, { "epoch": 2.543234138124649, "grad_norm": 0.5656735301017761, "learning_rate": 6.897440685156792e-07, "loss": 0.3504, "step": 9059 }, { "epoch": 2.5435148792813025, "grad_norm": 0.644794225692749, "learning_rate": 6.889164801534998e-07, "loss": 0.3133, "step": 9060 }, { "epoch": 2.5437956204379564, "grad_norm": 0.5380450487136841, "learning_rate": 6.880893518433434e-07, "loss": 0.3205, "step": 9061 }, { "epoch": 2.54407636159461, "grad_norm": 0.5853850245475769, "learning_rate": 6.872626836734786e-07, "loss": 0.3657, "step": 9062 }, { "epoch": 2.5443571027512633, "grad_norm": 0.5349850058555603, "learning_rate": 6.864364757321213e-07, "loss": 0.3277, "step": 9063 }, { "epoch": 2.5446378439079167, "grad_norm": 0.584952175617218, "learning_rate": 6.856107281074376e-07, "loss": 0.3395, "step": 9064 }, { "epoch": 2.5449185850645706, "grad_norm": 0.5998843312263489, "learning_rate": 6.847854408875488e-07, "loss": 0.338, "step": 9065 }, { "epoch": 2.545199326221224, "grad_norm": 0.5139232277870178, "learning_rate": 6.839606141605221e-07, "loss": 0.3064, "step": 9066 }, { "epoch": 2.5454800673778775, "grad_norm": 0.559762716293335, "learning_rate": 6.831362480143799e-07, "loss": 0.32, "step": 9067 }, { "epoch": 2.5457608085345313, "grad_norm": 0.5240930914878845, "learning_rate": 6.823123425370914e-07, "loss": 0.3414, "step": 9068 }, { "epoch": 2.546041549691185, "grad_norm": 0.5897007584571838, "learning_rate": 6.814888978165812e-07, "loss": 0.2849, "step": 9069 }, { "epoch": 2.5463222908478382, "grad_norm": 0.4596761465072632, "learning_rate": 6.806659139407206e-07, "loss": 0.3747, "step": 9070 }, { "epoch": 2.5466030320044917, "grad_norm": 0.5650991797447205, "learning_rate": 6.798433909973312e-07, "loss": 0.3151, "step": 9071 }, { "epoch": 2.5468837731611456, "grad_norm": 0.5619720220565796, "learning_rate": 6.790213290741921e-07, "loss": 0.3338, "step": 9072 }, { "epoch": 2.547164514317799, "grad_norm": 0.5655146837234497, "learning_rate": 6.781997282590264e-07, "loss": 0.2953, "step": 9073 }, { "epoch": 2.5474452554744524, "grad_norm": 0.5359267592430115, "learning_rate": 6.773785886395096e-07, "loss": 0.3197, "step": 9074 }, { "epoch": 2.5477259966311063, "grad_norm": 0.5062718987464905, "learning_rate": 6.765579103032694e-07, "loss": 0.3348, "step": 9075 }, { "epoch": 2.5480067377877598, "grad_norm": 0.5986114740371704, "learning_rate": 6.757376933378829e-07, "loss": 0.2997, "step": 9076 }, { "epoch": 2.548287478944413, "grad_norm": 0.5603893995285034, "learning_rate": 6.749179378308795e-07, "loss": 0.2791, "step": 9077 }, { "epoch": 2.5485682201010667, "grad_norm": 0.5378441214561462, "learning_rate": 6.740986438697372e-07, "loss": 0.3339, "step": 9078 }, { "epoch": 2.5488489612577205, "grad_norm": 0.6141390800476074, "learning_rate": 6.732798115418876e-07, "loss": 0.3197, "step": 9079 }, { "epoch": 2.549129702414374, "grad_norm": 0.5450079441070557, "learning_rate": 6.724614409347102e-07, "loss": 0.3639, "step": 9080 }, { "epoch": 2.5494104435710274, "grad_norm": 0.5260674953460693, "learning_rate": 6.716435321355358e-07, "loss": 0.3182, "step": 9081 }, { "epoch": 2.5496911847276813, "grad_norm": 0.5757387280464172, "learning_rate": 6.708260852316483e-07, "loss": 0.3647, "step": 9082 }, { "epoch": 2.5499719258843347, "grad_norm": 0.5008442401885986, "learning_rate": 6.700091003102788e-07, "loss": 0.3448, "step": 9083 }, { "epoch": 2.550252667040988, "grad_norm": 0.5486941337585449, "learning_rate": 6.691925774586122e-07, "loss": 0.332, "step": 9084 }, { "epoch": 2.5505334081976416, "grad_norm": 0.5755200982093811, "learning_rate": 6.683765167637829e-07, "loss": 0.2807, "step": 9085 }, { "epoch": 2.550814149354295, "grad_norm": 0.5597274303436279, "learning_rate": 6.675609183128739e-07, "loss": 0.3065, "step": 9086 }, { "epoch": 2.551094890510949, "grad_norm": 0.5731322765350342, "learning_rate": 6.667457821929229e-07, "loss": 0.2875, "step": 9087 }, { "epoch": 2.5513756316676024, "grad_norm": 0.5955571532249451, "learning_rate": 6.659311084909143e-07, "loss": 0.3247, "step": 9088 }, { "epoch": 2.5516563728242563, "grad_norm": 0.5537461638450623, "learning_rate": 6.651168972937871e-07, "loss": 0.337, "step": 9089 }, { "epoch": 2.5519371139809097, "grad_norm": 0.5642498135566711, "learning_rate": 6.643031486884266e-07, "loss": 0.3071, "step": 9090 }, { "epoch": 2.552217855137563, "grad_norm": 0.6300172805786133, "learning_rate": 6.634898627616731e-07, "loss": 0.2783, "step": 9091 }, { "epoch": 2.5524985962942166, "grad_norm": 0.5638207793235779, "learning_rate": 6.626770396003145e-07, "loss": 0.3316, "step": 9092 }, { "epoch": 2.55277933745087, "grad_norm": 0.6052414774894714, "learning_rate": 6.618646792910893e-07, "loss": 0.3487, "step": 9093 }, { "epoch": 2.553060078607524, "grad_norm": 0.5608871579170227, "learning_rate": 6.610527819206897e-07, "loss": 0.3197, "step": 9094 }, { "epoch": 2.5533408197641774, "grad_norm": 0.5285017490386963, "learning_rate": 6.602413475757541e-07, "loss": 0.3102, "step": 9095 }, { "epoch": 2.5536215609208313, "grad_norm": 0.6314661502838135, "learning_rate": 6.594303763428744e-07, "loss": 0.2841, "step": 9096 }, { "epoch": 2.5539023020774847, "grad_norm": 0.5488671660423279, "learning_rate": 6.586198683085937e-07, "loss": 0.3093, "step": 9097 }, { "epoch": 2.554183043234138, "grad_norm": 0.5688106417655945, "learning_rate": 6.578098235594022e-07, "loss": 0.3421, "step": 9098 }, { "epoch": 2.5544637843907916, "grad_norm": 0.601155161857605, "learning_rate": 6.570002421817456e-07, "loss": 0.3196, "step": 9099 }, { "epoch": 2.554744525547445, "grad_norm": 0.5598655343055725, "learning_rate": 6.561911242620156e-07, "loss": 0.354, "step": 9100 }, { "epoch": 2.555025266704099, "grad_norm": 0.49863100051879883, "learning_rate": 6.553824698865552e-07, "loss": 0.3619, "step": 9101 }, { "epoch": 2.5553060078607523, "grad_norm": 0.56257164478302, "learning_rate": 6.545742791416615e-07, "loss": 0.3314, "step": 9102 }, { "epoch": 2.555586749017406, "grad_norm": 0.536172091960907, "learning_rate": 6.537665521135772e-07, "loss": 0.3177, "step": 9103 }, { "epoch": 2.5558674901740597, "grad_norm": 0.5639461874961853, "learning_rate": 6.529592888885e-07, "loss": 0.3537, "step": 9104 }, { "epoch": 2.556148231330713, "grad_norm": 0.6627373099327087, "learning_rate": 6.521524895525738e-07, "loss": 0.297, "step": 9105 }, { "epoch": 2.5564289724873666, "grad_norm": 0.5472077131271362, "learning_rate": 6.513461541918981e-07, "loss": 0.3651, "step": 9106 }, { "epoch": 2.55670971364402, "grad_norm": 0.6022294759750366, "learning_rate": 6.505402828925178e-07, "loss": 0.3175, "step": 9107 }, { "epoch": 2.556990454800674, "grad_norm": 0.580473780632019, "learning_rate": 6.497348757404298e-07, "loss": 0.3987, "step": 9108 }, { "epoch": 2.5572711959573273, "grad_norm": 0.570435643196106, "learning_rate": 6.489299328215848e-07, "loss": 0.331, "step": 9109 }, { "epoch": 2.5575519371139808, "grad_norm": 0.5406184792518616, "learning_rate": 6.48125454221879e-07, "loss": 0.353, "step": 9110 }, { "epoch": 2.5578326782706347, "grad_norm": 0.517946183681488, "learning_rate": 6.473214400271626e-07, "loss": 0.3527, "step": 9111 }, { "epoch": 2.558113419427288, "grad_norm": 0.6016600131988525, "learning_rate": 6.465178903232349e-07, "loss": 0.3488, "step": 9112 }, { "epoch": 2.5583941605839415, "grad_norm": 0.5373508930206299, "learning_rate": 6.457148051958445e-07, "loss": 0.3628, "step": 9113 }, { "epoch": 2.558674901740595, "grad_norm": 0.5815570950508118, "learning_rate": 6.449121847306938e-07, "loss": 0.3125, "step": 9114 }, { "epoch": 2.558955642897249, "grad_norm": 0.5918525457382202, "learning_rate": 6.441100290134312e-07, "loss": 0.2971, "step": 9115 }, { "epoch": 2.5592363840539023, "grad_norm": 0.561310350894928, "learning_rate": 6.433083381296596e-07, "loss": 0.3199, "step": 9116 }, { "epoch": 2.5595171252105557, "grad_norm": 0.5653798580169678, "learning_rate": 6.425071121649285e-07, "loss": 0.3219, "step": 9117 }, { "epoch": 2.5597978663672096, "grad_norm": 0.5669142007827759, "learning_rate": 6.417063512047417e-07, "loss": 0.3513, "step": 9118 }, { "epoch": 2.560078607523863, "grad_norm": 0.5990543961524963, "learning_rate": 6.409060553345515e-07, "loss": 0.3014, "step": 9119 }, { "epoch": 2.5603593486805165, "grad_norm": 0.586254894733429, "learning_rate": 6.401062246397582e-07, "loss": 0.339, "step": 9120 }, { "epoch": 2.56064008983717, "grad_norm": 0.5165368914604187, "learning_rate": 6.393068592057173e-07, "loss": 0.3488, "step": 9121 }, { "epoch": 2.560920830993824, "grad_norm": 0.5481555461883545, "learning_rate": 6.385079591177296e-07, "loss": 0.3288, "step": 9122 }, { "epoch": 2.5612015721504773, "grad_norm": 0.6165788173675537, "learning_rate": 6.377095244610504e-07, "loss": 0.3473, "step": 9123 }, { "epoch": 2.5614823133071307, "grad_norm": 0.5769089460372925, "learning_rate": 6.369115553208849e-07, "loss": 0.3266, "step": 9124 }, { "epoch": 2.5617630544637846, "grad_norm": 0.5593810081481934, "learning_rate": 6.361140517823844e-07, "loss": 0.3161, "step": 9125 }, { "epoch": 2.562043795620438, "grad_norm": 0.5495381951332092, "learning_rate": 6.353170139306563e-07, "loss": 0.3237, "step": 9126 }, { "epoch": 2.5623245367770915, "grad_norm": 0.5270056128501892, "learning_rate": 6.345204418507534e-07, "loss": 0.3618, "step": 9127 }, { "epoch": 2.562605277933745, "grad_norm": 0.5322282910346985, "learning_rate": 6.337243356276829e-07, "loss": 0.3555, "step": 9128 }, { "epoch": 2.562886019090399, "grad_norm": 0.5177984833717346, "learning_rate": 6.329286953463992e-07, "loss": 0.3413, "step": 9129 }, { "epoch": 2.5631667602470523, "grad_norm": 0.5783156156539917, "learning_rate": 6.321335210918067e-07, "loss": 0.2853, "step": 9130 }, { "epoch": 2.5634475014037057, "grad_norm": 0.5159136652946472, "learning_rate": 6.313388129487641e-07, "loss": 0.3684, "step": 9131 }, { "epoch": 2.5637282425603596, "grad_norm": 0.5714773535728455, "learning_rate": 6.305445710020758e-07, "loss": 0.3101, "step": 9132 }, { "epoch": 2.564008983717013, "grad_norm": 0.5122051239013672, "learning_rate": 6.297507953364995e-07, "loss": 0.3199, "step": 9133 }, { "epoch": 2.5642897248736665, "grad_norm": 0.4964951276779175, "learning_rate": 6.28957486036742e-07, "loss": 0.3599, "step": 9134 }, { "epoch": 2.56457046603032, "grad_norm": 0.5868041515350342, "learning_rate": 6.281646431874583e-07, "loss": 0.3152, "step": 9135 }, { "epoch": 2.5648512071869733, "grad_norm": 0.5245321393013, "learning_rate": 6.273722668732585e-07, "loss": 0.3497, "step": 9136 }, { "epoch": 2.5651319483436272, "grad_norm": 0.5406701564788818, "learning_rate": 6.265803571786983e-07, "loss": 0.2849, "step": 9137 }, { "epoch": 2.5654126895002807, "grad_norm": 0.6307325959205627, "learning_rate": 6.257889141882861e-07, "loss": 0.3115, "step": 9138 }, { "epoch": 2.5656934306569346, "grad_norm": 0.6060090065002441, "learning_rate": 6.24997937986479e-07, "loss": 0.3362, "step": 9139 }, { "epoch": 2.565974171813588, "grad_norm": 0.5877820253372192, "learning_rate": 6.242074286576866e-07, "loss": 0.3368, "step": 9140 }, { "epoch": 2.5662549129702414, "grad_norm": 0.5987187623977661, "learning_rate": 6.234173862862663e-07, "loss": 0.3253, "step": 9141 }, { "epoch": 2.566535654126895, "grad_norm": 0.5555630922317505, "learning_rate": 6.226278109565248e-07, "loss": 0.35, "step": 9142 }, { "epoch": 2.5668163952835483, "grad_norm": 0.5410875678062439, "learning_rate": 6.21838702752724e-07, "loss": 0.321, "step": 9143 }, { "epoch": 2.567097136440202, "grad_norm": 0.567570686340332, "learning_rate": 6.210500617590692e-07, "loss": 0.274, "step": 9144 }, { "epoch": 2.5673778775968557, "grad_norm": 0.5772649049758911, "learning_rate": 6.202618880597222e-07, "loss": 0.3114, "step": 9145 }, { "epoch": 2.5676586187535095, "grad_norm": 0.5099751949310303, "learning_rate": 6.194741817387906e-07, "loss": 0.3284, "step": 9146 }, { "epoch": 2.567939359910163, "grad_norm": 0.5567918419837952, "learning_rate": 6.186869428803316e-07, "loss": 0.3011, "step": 9147 }, { "epoch": 2.5682201010668164, "grad_norm": 0.6177340745925903, "learning_rate": 6.179001715683586e-07, "loss": 0.2984, "step": 9148 }, { "epoch": 2.56850084222347, "grad_norm": 0.6140207648277283, "learning_rate": 6.171138678868288e-07, "loss": 0.3346, "step": 9149 }, { "epoch": 2.5687815833801233, "grad_norm": 0.5163900852203369, "learning_rate": 6.163280319196507e-07, "loss": 0.3642, "step": 9150 }, { "epoch": 2.569062324536777, "grad_norm": 0.5513126254081726, "learning_rate": 6.155426637506856e-07, "loss": 0.3159, "step": 9151 }, { "epoch": 2.5693430656934306, "grad_norm": 0.516830563545227, "learning_rate": 6.147577634637413e-07, "loss": 0.3515, "step": 9152 }, { "epoch": 2.569623806850084, "grad_norm": 0.4917302429676056, "learning_rate": 6.139733311425794e-07, "loss": 0.3459, "step": 9153 }, { "epoch": 2.569904548006738, "grad_norm": 0.5159890055656433, "learning_rate": 6.131893668709082e-07, "loss": 0.3261, "step": 9154 }, { "epoch": 2.5701852891633914, "grad_norm": 0.581808865070343, "learning_rate": 6.124058707323888e-07, "loss": 0.2964, "step": 9155 }, { "epoch": 2.570466030320045, "grad_norm": 0.5300115346908569, "learning_rate": 6.116228428106296e-07, "loss": 0.3052, "step": 9156 }, { "epoch": 2.5707467714766983, "grad_norm": 0.5398166179656982, "learning_rate": 6.108402831891908e-07, "loss": 0.3167, "step": 9157 }, { "epoch": 2.571027512633352, "grad_norm": 0.6200629472732544, "learning_rate": 6.100581919515835e-07, "loss": 0.3212, "step": 9158 }, { "epoch": 2.5713082537900056, "grad_norm": 0.565707266330719, "learning_rate": 6.092765691812652e-07, "loss": 0.3105, "step": 9159 }, { "epoch": 2.571588994946659, "grad_norm": 0.49850425124168396, "learning_rate": 6.08495414961649e-07, "loss": 0.3017, "step": 9160 }, { "epoch": 2.571869736103313, "grad_norm": 0.5485131740570068, "learning_rate": 6.077147293760915e-07, "loss": 0.3204, "step": 9161 }, { "epoch": 2.5721504772599664, "grad_norm": 0.5504187345504761, "learning_rate": 6.069345125079057e-07, "loss": 0.327, "step": 9162 }, { "epoch": 2.57243121841662, "grad_norm": 0.6180916428565979, "learning_rate": 6.061547644403503e-07, "loss": 0.3257, "step": 9163 }, { "epoch": 2.5727119595732733, "grad_norm": 0.586897075176239, "learning_rate": 6.053754852566335e-07, "loss": 0.3314, "step": 9164 }, { "epoch": 2.572992700729927, "grad_norm": 0.568545401096344, "learning_rate": 6.045966750399174e-07, "loss": 0.3122, "step": 9165 }, { "epoch": 2.5732734418865806, "grad_norm": 0.5774040818214417, "learning_rate": 6.038183338733106e-07, "loss": 0.3405, "step": 9166 }, { "epoch": 2.573554183043234, "grad_norm": 0.5538953542709351, "learning_rate": 6.030404618398733e-07, "loss": 0.3245, "step": 9167 }, { "epoch": 2.573834924199888, "grad_norm": 0.5269328951835632, "learning_rate": 6.022630590226159e-07, "loss": 0.2977, "step": 9168 }, { "epoch": 2.5741156653565413, "grad_norm": 0.5575495362281799, "learning_rate": 6.014861255044951e-07, "loss": 0.3178, "step": 9169 }, { "epoch": 2.574396406513195, "grad_norm": 0.5304580926895142, "learning_rate": 6.007096613684243e-07, "loss": 0.3362, "step": 9170 }, { "epoch": 2.5746771476698482, "grad_norm": 0.5896769762039185, "learning_rate": 5.9993366669726e-07, "loss": 0.3436, "step": 9171 }, { "epoch": 2.574957888826502, "grad_norm": 0.5595166683197021, "learning_rate": 5.991581415738129e-07, "loss": 0.3608, "step": 9172 }, { "epoch": 2.5752386299831556, "grad_norm": 0.5666127800941467, "learning_rate": 5.983830860808415e-07, "loss": 0.3379, "step": 9173 }, { "epoch": 2.575519371139809, "grad_norm": 0.5501779317855835, "learning_rate": 5.976085003010551e-07, "loss": 0.2954, "step": 9174 }, { "epoch": 2.575800112296463, "grad_norm": 0.604441225528717, "learning_rate": 5.968343843171143e-07, "loss": 0.3082, "step": 9175 }, { "epoch": 2.5760808534531163, "grad_norm": 0.55019211769104, "learning_rate": 5.960607382116246e-07, "loss": 0.3036, "step": 9176 }, { "epoch": 2.5763615946097698, "grad_norm": 0.5779778957366943, "learning_rate": 5.952875620671484e-07, "loss": 0.3058, "step": 9177 }, { "epoch": 2.576642335766423, "grad_norm": 0.5864604115486145, "learning_rate": 5.945148559661922e-07, "loss": 0.3242, "step": 9178 }, { "epoch": 2.5769230769230766, "grad_norm": 0.5155860185623169, "learning_rate": 5.937426199912139e-07, "loss": 0.3247, "step": 9179 }, { "epoch": 2.5772038180797305, "grad_norm": 0.558754026889801, "learning_rate": 5.929708542246232e-07, "loss": 0.2928, "step": 9180 }, { "epoch": 2.577484559236384, "grad_norm": 0.4811382293701172, "learning_rate": 5.921995587487767e-07, "loss": 0.313, "step": 9181 }, { "epoch": 2.577765300393038, "grad_norm": 0.591773509979248, "learning_rate": 5.91428733645984e-07, "loss": 0.3276, "step": 9182 }, { "epoch": 2.5780460415496913, "grad_norm": 0.5202547311782837, "learning_rate": 5.906583789985015e-07, "loss": 0.2937, "step": 9183 }, { "epoch": 2.5783267827063447, "grad_norm": 0.5595214366912842, "learning_rate": 5.898884948885358e-07, "loss": 0.3421, "step": 9184 }, { "epoch": 2.578607523862998, "grad_norm": 0.5193902254104614, "learning_rate": 5.891190813982467e-07, "loss": 0.3356, "step": 9185 }, { "epoch": 2.5788882650196516, "grad_norm": 0.5778075456619263, "learning_rate": 5.883501386097385e-07, "loss": 0.378, "step": 9186 }, { "epoch": 2.5791690061763055, "grad_norm": 0.5058221817016602, "learning_rate": 5.875816666050699e-07, "loss": 0.3479, "step": 9187 }, { "epoch": 2.579449747332959, "grad_norm": 0.5434385538101196, "learning_rate": 5.868136654662465e-07, "loss": 0.362, "step": 9188 }, { "epoch": 2.579730488489613, "grad_norm": 0.5488899350166321, "learning_rate": 5.860461352752256e-07, "loss": 0.3278, "step": 9189 }, { "epoch": 2.5800112296462663, "grad_norm": 0.5754150152206421, "learning_rate": 5.852790761139126e-07, "loss": 0.393, "step": 9190 }, { "epoch": 2.5802919708029197, "grad_norm": 0.6625264286994934, "learning_rate": 5.845124880641623e-07, "loss": 0.3082, "step": 9191 }, { "epoch": 2.580572711959573, "grad_norm": 0.5571896433830261, "learning_rate": 5.837463712077824e-07, "loss": 0.3572, "step": 9192 }, { "epoch": 2.5808534531162266, "grad_norm": 0.5079872012138367, "learning_rate": 5.829807256265252e-07, "loss": 0.3753, "step": 9193 }, { "epoch": 2.5811341942728805, "grad_norm": 0.5559747815132141, "learning_rate": 5.822155514020988e-07, "loss": 0.3315, "step": 9194 }, { "epoch": 2.581414935429534, "grad_norm": 0.575465977191925, "learning_rate": 5.814508486161563e-07, "loss": 0.2907, "step": 9195 }, { "epoch": 2.5816956765861874, "grad_norm": 0.5370486378669739, "learning_rate": 5.806866173503012e-07, "loss": 0.3183, "step": 9196 }, { "epoch": 2.5819764177428413, "grad_norm": 0.5360647439956665, "learning_rate": 5.799228576860893e-07, "loss": 0.3152, "step": 9197 }, { "epoch": 2.5822571588994947, "grad_norm": 0.6080288290977478, "learning_rate": 5.791595697050217e-07, "loss": 0.347, "step": 9198 }, { "epoch": 2.582537900056148, "grad_norm": 0.5147066116333008, "learning_rate": 5.783967534885549e-07, "loss": 0.2961, "step": 9199 }, { "epoch": 2.5828186412128016, "grad_norm": 0.539100706577301, "learning_rate": 5.776344091180908e-07, "loss": 0.3404, "step": 9200 }, { "epoch": 2.5830993823694555, "grad_norm": 0.491840660572052, "learning_rate": 5.768725366749806e-07, "loss": 0.316, "step": 9201 }, { "epoch": 2.583380123526109, "grad_norm": 0.5694340467453003, "learning_rate": 5.761111362405286e-07, "loss": 0.3055, "step": 9202 }, { "epoch": 2.5836608646827623, "grad_norm": 0.5699926018714905, "learning_rate": 5.753502078959849e-07, "loss": 0.2624, "step": 9203 }, { "epoch": 2.5839416058394162, "grad_norm": 0.5050646662712097, "learning_rate": 5.745897517225529e-07, "loss": 0.3213, "step": 9204 }, { "epoch": 2.5842223469960697, "grad_norm": 0.5447824001312256, "learning_rate": 5.738297678013826e-07, "loss": 0.3377, "step": 9205 }, { "epoch": 2.584503088152723, "grad_norm": 0.5743123888969421, "learning_rate": 5.730702562135742e-07, "loss": 0.3165, "step": 9206 }, { "epoch": 2.5847838293093766, "grad_norm": 0.5601766109466553, "learning_rate": 5.723112170401796e-07, "loss": 0.3119, "step": 9207 }, { "epoch": 2.5850645704660304, "grad_norm": 0.5751959681510925, "learning_rate": 5.71552650362197e-07, "loss": 0.3503, "step": 9208 }, { "epoch": 2.585345311622684, "grad_norm": 0.5345215797424316, "learning_rate": 5.707945562605777e-07, "loss": 0.2974, "step": 9209 }, { "epoch": 2.5856260527793373, "grad_norm": 0.5181090235710144, "learning_rate": 5.700369348162194e-07, "loss": 0.3499, "step": 9210 }, { "epoch": 2.585906793935991, "grad_norm": 0.5359534025192261, "learning_rate": 5.692797861099719e-07, "loss": 0.3076, "step": 9211 }, { "epoch": 2.5861875350926447, "grad_norm": 0.5513017177581787, "learning_rate": 5.68523110222633e-07, "loss": 0.3664, "step": 9212 }, { "epoch": 2.586468276249298, "grad_norm": 0.5880671739578247, "learning_rate": 5.67766907234949e-07, "loss": 0.316, "step": 9213 }, { "epoch": 2.5867490174059515, "grad_norm": 0.5786539912223816, "learning_rate": 5.670111772276194e-07, "loss": 0.2962, "step": 9214 }, { "epoch": 2.5870297585626054, "grad_norm": 0.5368750691413879, "learning_rate": 5.662559202812895e-07, "loss": 0.2904, "step": 9215 }, { "epoch": 2.587310499719259, "grad_norm": 0.5923262238502502, "learning_rate": 5.655011364765566e-07, "loss": 0.3456, "step": 9216 }, { "epoch": 2.5875912408759123, "grad_norm": 0.6336987614631653, "learning_rate": 5.647468258939664e-07, "loss": 0.3157, "step": 9217 }, { "epoch": 2.587871982032566, "grad_norm": 0.5571448802947998, "learning_rate": 5.639929886140127e-07, "loss": 0.3128, "step": 9218 }, { "epoch": 2.5881527231892196, "grad_norm": 0.4952406585216522, "learning_rate": 5.632396247171429e-07, "loss": 0.3326, "step": 9219 }, { "epoch": 2.588433464345873, "grad_norm": 0.5353228449821472, "learning_rate": 5.624867342837487e-07, "loss": 0.3089, "step": 9220 }, { "epoch": 2.5887142055025265, "grad_norm": 0.5043750405311584, "learning_rate": 5.617343173941763e-07, "loss": 0.3354, "step": 9221 }, { "epoch": 2.5889949466591804, "grad_norm": 0.546557605266571, "learning_rate": 5.609823741287168e-07, "loss": 0.2781, "step": 9222 }, { "epoch": 2.589275687815834, "grad_norm": 0.593588650226593, "learning_rate": 5.602309045676146e-07, "loss": 0.3152, "step": 9223 }, { "epoch": 2.5895564289724873, "grad_norm": 0.6697573661804199, "learning_rate": 5.594799087910608e-07, "loss": 0.3085, "step": 9224 }, { "epoch": 2.589837170129141, "grad_norm": 0.5571876764297485, "learning_rate": 5.587293868791965e-07, "loss": 0.3765, "step": 9225 }, { "epoch": 2.5901179112857946, "grad_norm": 0.47442951798439026, "learning_rate": 5.579793389121152e-07, "loss": 0.3371, "step": 9226 }, { "epoch": 2.590398652442448, "grad_norm": 0.5200581550598145, "learning_rate": 5.572297649698555e-07, "loss": 0.3205, "step": 9227 }, { "epoch": 2.5906793935991015, "grad_norm": 0.5396103262901306, "learning_rate": 5.564806651324068e-07, "loss": 0.3413, "step": 9228 }, { "epoch": 2.590960134755755, "grad_norm": 0.6101888418197632, "learning_rate": 5.5573203947971e-07, "loss": 0.3241, "step": 9229 }, { "epoch": 2.591240875912409, "grad_norm": 0.561924159526825, "learning_rate": 5.549838880916514e-07, "loss": 0.297, "step": 9230 }, { "epoch": 2.5915216170690623, "grad_norm": 0.5239946246147156, "learning_rate": 5.54236211048072e-07, "loss": 0.3656, "step": 9231 }, { "epoch": 2.591802358225716, "grad_norm": 0.5240875482559204, "learning_rate": 5.534890084287575e-07, "loss": 0.3076, "step": 9232 }, { "epoch": 2.5920830993823696, "grad_norm": 0.5559367537498474, "learning_rate": 5.527422803134441e-07, "loss": 0.3052, "step": 9233 }, { "epoch": 2.592363840539023, "grad_norm": 0.5739917159080505, "learning_rate": 5.519960267818203e-07, "loss": 0.3132, "step": 9234 }, { "epoch": 2.5926445816956765, "grad_norm": 0.626494824886322, "learning_rate": 5.512502479135184e-07, "loss": 0.2805, "step": 9235 }, { "epoch": 2.59292532285233, "grad_norm": 0.5362425446510315, "learning_rate": 5.505049437881266e-07, "loss": 0.3514, "step": 9236 }, { "epoch": 2.593206064008984, "grad_norm": 0.5248806476593018, "learning_rate": 5.497601144851766e-07, "loss": 0.2949, "step": 9237 }, { "epoch": 2.5934868051656372, "grad_norm": 0.5375782251358032, "learning_rate": 5.490157600841539e-07, "loss": 0.3784, "step": 9238 }, { "epoch": 2.593767546322291, "grad_norm": 0.5520331859588623, "learning_rate": 5.482718806644904e-07, "loss": 0.3275, "step": 9239 }, { "epoch": 2.5940482874789446, "grad_norm": 0.5443271398544312, "learning_rate": 5.475284763055677e-07, "loss": 0.3245, "step": 9240 }, { "epoch": 2.594329028635598, "grad_norm": 0.583730161190033, "learning_rate": 5.467855470867184e-07, "loss": 0.3458, "step": 9241 }, { "epoch": 2.5946097697922514, "grad_norm": 0.5758969783782959, "learning_rate": 5.460430930872224e-07, "loss": 0.3141, "step": 9242 }, { "epoch": 2.594890510948905, "grad_norm": 0.5554253458976746, "learning_rate": 5.453011143863108e-07, "loss": 0.2876, "step": 9243 }, { "epoch": 2.5951712521055588, "grad_norm": 0.582573413848877, "learning_rate": 5.445596110631618e-07, "loss": 0.3385, "step": 9244 }, { "epoch": 2.595451993262212, "grad_norm": 0.5768382549285889, "learning_rate": 5.438185831969045e-07, "loss": 0.3346, "step": 9245 }, { "epoch": 2.5957327344188657, "grad_norm": 0.5702758431434631, "learning_rate": 5.430780308666173e-07, "loss": 0.3251, "step": 9246 }, { "epoch": 2.5960134755755195, "grad_norm": 0.5713242292404175, "learning_rate": 5.423379541513257e-07, "loss": 0.2683, "step": 9247 }, { "epoch": 2.596294216732173, "grad_norm": 0.5656282901763916, "learning_rate": 5.41598353130009e-07, "loss": 0.3176, "step": 9248 }, { "epoch": 2.5965749578888264, "grad_norm": 0.5462035536766052, "learning_rate": 5.408592278815894e-07, "loss": 0.2929, "step": 9249 }, { "epoch": 2.59685569904548, "grad_norm": 0.6136484146118164, "learning_rate": 5.401205784849433e-07, "loss": 0.3025, "step": 9250 }, { "epoch": 2.5971364402021337, "grad_norm": 0.6153603792190552, "learning_rate": 5.393824050188968e-07, "loss": 0.3261, "step": 9251 }, { "epoch": 2.597417181358787, "grad_norm": 0.6858318448066711, "learning_rate": 5.386447075622198e-07, "loss": 0.3102, "step": 9252 }, { "epoch": 2.5976979225154406, "grad_norm": 0.5621753931045532, "learning_rate": 5.379074861936367e-07, "loss": 0.3255, "step": 9253 }, { "epoch": 2.5979786636720945, "grad_norm": 0.5572348833084106, "learning_rate": 5.371707409918198e-07, "loss": 0.3697, "step": 9254 }, { "epoch": 2.598259404828748, "grad_norm": 0.5891318321228027, "learning_rate": 5.36434472035387e-07, "loss": 0.2778, "step": 9255 }, { "epoch": 2.5985401459854014, "grad_norm": 0.5778660774230957, "learning_rate": 5.356986794029117e-07, "loss": 0.3383, "step": 9256 }, { "epoch": 2.598820887142055, "grad_norm": 0.5736680030822754, "learning_rate": 5.349633631729106e-07, "loss": 0.3693, "step": 9257 }, { "epoch": 2.5991016282987087, "grad_norm": 0.6269557476043701, "learning_rate": 5.342285234238543e-07, "loss": 0.3204, "step": 9258 }, { "epoch": 2.599382369455362, "grad_norm": 0.5626348257064819, "learning_rate": 5.334941602341581e-07, "loss": 0.3115, "step": 9259 }, { "epoch": 2.5996631106120156, "grad_norm": 0.5542964339256287, "learning_rate": 5.327602736821907e-07, "loss": 0.3284, "step": 9260 }, { "epoch": 2.5999438517686695, "grad_norm": 0.5452356338500977, "learning_rate": 5.320268638462667e-07, "loss": 0.3377, "step": 9261 }, { "epoch": 2.600224592925323, "grad_norm": 0.5376703143119812, "learning_rate": 5.312939308046505e-07, "loss": 0.3275, "step": 9262 }, { "epoch": 2.6005053340819764, "grad_norm": 0.5631587505340576, "learning_rate": 5.305614746355581e-07, "loss": 0.3034, "step": 9263 }, { "epoch": 2.60078607523863, "grad_norm": 0.5472807884216309, "learning_rate": 5.298294954171506e-07, "loss": 0.3011, "step": 9264 }, { "epoch": 2.6010668163952837, "grad_norm": 0.5509679317474365, "learning_rate": 5.290979932275419e-07, "loss": 0.3329, "step": 9265 }, { "epoch": 2.601347557551937, "grad_norm": 0.585071325302124, "learning_rate": 5.283669681447928e-07, "loss": 0.3164, "step": 9266 }, { "epoch": 2.6016282987085906, "grad_norm": 0.540922999382019, "learning_rate": 5.276364202469131e-07, "loss": 0.3554, "step": 9267 }, { "epoch": 2.6019090398652445, "grad_norm": 0.557780385017395, "learning_rate": 5.269063496118632e-07, "loss": 0.3168, "step": 9268 }, { "epoch": 2.602189781021898, "grad_norm": 0.522637128829956, "learning_rate": 5.261767563175501e-07, "loss": 0.3183, "step": 9269 }, { "epoch": 2.6024705221785513, "grad_norm": 0.5406872630119324, "learning_rate": 5.254476404418341e-07, "loss": 0.3343, "step": 9270 }, { "epoch": 2.602751263335205, "grad_norm": 0.5162304043769836, "learning_rate": 5.247190020625197e-07, "loss": 0.352, "step": 9271 }, { "epoch": 2.6030320044918582, "grad_norm": 0.5424394011497498, "learning_rate": 5.239908412573641e-07, "loss": 0.3061, "step": 9272 }, { "epoch": 2.603312745648512, "grad_norm": 0.5532450675964355, "learning_rate": 5.23263158104072e-07, "loss": 0.3388, "step": 9273 }, { "epoch": 2.6035934868051656, "grad_norm": 0.5204339623451233, "learning_rate": 5.225359526802942e-07, "loss": 0.3182, "step": 9274 }, { "epoch": 2.6038742279618194, "grad_norm": 0.5085756182670593, "learning_rate": 5.21809225063638e-07, "loss": 0.3007, "step": 9275 }, { "epoch": 2.604154969118473, "grad_norm": 0.5846678614616394, "learning_rate": 5.210829753316532e-07, "loss": 0.3605, "step": 9276 }, { "epoch": 2.6044357102751263, "grad_norm": 0.5946673154830933, "learning_rate": 5.203572035618398e-07, "loss": 0.3474, "step": 9277 }, { "epoch": 2.6047164514317798, "grad_norm": 0.5541054010391235, "learning_rate": 5.196319098316499e-07, "loss": 0.3412, "step": 9278 }, { "epoch": 2.604997192588433, "grad_norm": 0.5580927729606628, "learning_rate": 5.189070942184799e-07, "loss": 0.3187, "step": 9279 }, { "epoch": 2.605277933745087, "grad_norm": 0.535959780216217, "learning_rate": 5.181827567996795e-07, "loss": 0.2686, "step": 9280 }, { "epoch": 2.6055586749017405, "grad_norm": 0.5632882714271545, "learning_rate": 5.174588976525441e-07, "loss": 0.3003, "step": 9281 }, { "epoch": 2.6058394160583944, "grad_norm": 0.5435212850570679, "learning_rate": 5.167355168543203e-07, "loss": 0.2932, "step": 9282 }, { "epoch": 2.606120157215048, "grad_norm": 0.7202975153923035, "learning_rate": 5.160126144822031e-07, "loss": 0.3529, "step": 9283 }, { "epoch": 2.6064008983717013, "grad_norm": 0.6050068736076355, "learning_rate": 5.15290190613335e-07, "loss": 0.3536, "step": 9284 }, { "epoch": 2.6066816395283547, "grad_norm": 0.5328752994537354, "learning_rate": 5.145682453248096e-07, "loss": 0.3385, "step": 9285 }, { "epoch": 2.606962380685008, "grad_norm": 0.6006103157997131, "learning_rate": 5.138467786936669e-07, "loss": 0.3407, "step": 9286 }, { "epoch": 2.607243121841662, "grad_norm": 0.5150094628334045, "learning_rate": 5.131257907969001e-07, "loss": 0.3551, "step": 9287 }, { "epoch": 2.6075238629983155, "grad_norm": 0.5361217260360718, "learning_rate": 5.124052817114461e-07, "loss": 0.3093, "step": 9288 }, { "epoch": 2.607804604154969, "grad_norm": 0.5722095966339111, "learning_rate": 5.116852515141934e-07, "loss": 0.3301, "step": 9289 }, { "epoch": 2.608085345311623, "grad_norm": 0.6124836802482605, "learning_rate": 5.109657002819807e-07, "loss": 0.3366, "step": 9290 }, { "epoch": 2.6083660864682763, "grad_norm": 0.5385775566101074, "learning_rate": 5.102466280915918e-07, "loss": 0.3525, "step": 9291 }, { "epoch": 2.6086468276249297, "grad_norm": 0.5474715232849121, "learning_rate": 5.09528035019764e-07, "loss": 0.3547, "step": 9292 }, { "epoch": 2.608927568781583, "grad_norm": 0.5858557820320129, "learning_rate": 5.088099211431785e-07, "loss": 0.2946, "step": 9293 }, { "epoch": 2.609208309938237, "grad_norm": 0.5125181078910828, "learning_rate": 5.080922865384707e-07, "loss": 0.3123, "step": 9294 }, { "epoch": 2.6094890510948905, "grad_norm": 0.5887751579284668, "learning_rate": 5.073751312822206e-07, "loss": 0.3226, "step": 9295 }, { "epoch": 2.609769792251544, "grad_norm": 0.5322958827018738, "learning_rate": 5.066584554509579e-07, "loss": 0.3807, "step": 9296 }, { "epoch": 2.610050533408198, "grad_norm": 0.5683243274688721, "learning_rate": 5.059422591211633e-07, "loss": 0.363, "step": 9297 }, { "epoch": 2.6103312745648513, "grad_norm": 0.5509128570556641, "learning_rate": 5.052265423692631e-07, "loss": 0.3405, "step": 9298 }, { "epoch": 2.6106120157215047, "grad_norm": 0.5450987815856934, "learning_rate": 5.045113052716366e-07, "loss": 0.299, "step": 9299 }, { "epoch": 2.610892756878158, "grad_norm": 0.4994717240333557, "learning_rate": 5.037965479046064e-07, "loss": 0.3371, "step": 9300 }, { "epoch": 2.611173498034812, "grad_norm": 0.7137396335601807, "learning_rate": 5.030822703444494e-07, "loss": 0.3466, "step": 9301 }, { "epoch": 2.6114542391914655, "grad_norm": 0.4880484640598297, "learning_rate": 5.023684726673883e-07, "loss": 0.3518, "step": 9302 }, { "epoch": 2.611734980348119, "grad_norm": 0.5722585916519165, "learning_rate": 5.016551549495951e-07, "loss": 0.2926, "step": 9303 }, { "epoch": 2.612015721504773, "grad_norm": 0.6080985069274902, "learning_rate": 5.009423172671896e-07, "loss": 0.3066, "step": 9304 }, { "epoch": 2.6122964626614262, "grad_norm": 0.6243522763252258, "learning_rate": 5.002299596962429e-07, "loss": 0.359, "step": 9305 }, { "epoch": 2.6125772038180797, "grad_norm": 0.5547509789466858, "learning_rate": 4.995180823127716e-07, "loss": 0.3057, "step": 9306 }, { "epoch": 2.612857944974733, "grad_norm": 0.5454524755477905, "learning_rate": 4.988066851927448e-07, "loss": 0.308, "step": 9307 }, { "epoch": 2.613138686131387, "grad_norm": 0.4972596764564514, "learning_rate": 4.980957684120768e-07, "loss": 0.3304, "step": 9308 }, { "epoch": 2.6134194272880404, "grad_norm": 0.5153542160987854, "learning_rate": 4.973853320466338e-07, "loss": 0.2921, "step": 9309 }, { "epoch": 2.613700168444694, "grad_norm": 0.5744361877441406, "learning_rate": 4.966753761722287e-07, "loss": 0.291, "step": 9310 }, { "epoch": 2.6139809096013478, "grad_norm": 0.5168066024780273, "learning_rate": 4.959659008646217e-07, "loss": 0.3278, "step": 9311 }, { "epoch": 2.614261650758001, "grad_norm": 0.5387728810310364, "learning_rate": 4.95256906199526e-07, "loss": 0.3342, "step": 9312 }, { "epoch": 2.6145423919146547, "grad_norm": 0.6054450869560242, "learning_rate": 4.94548392252599e-07, "loss": 0.3109, "step": 9313 }, { "epoch": 2.614823133071308, "grad_norm": 0.5160964727401733, "learning_rate": 4.938403590994512e-07, "loss": 0.3252, "step": 9314 }, { "epoch": 2.615103874227962, "grad_norm": 0.6011884212493896, "learning_rate": 4.931328068156383e-07, "loss": 0.3263, "step": 9315 }, { "epoch": 2.6153846153846154, "grad_norm": 0.5339115858078003, "learning_rate": 4.924257354766648e-07, "loss": 0.322, "step": 9316 }, { "epoch": 2.615665356541269, "grad_norm": 0.5518389344215393, "learning_rate": 4.917191451579873e-07, "loss": 0.3496, "step": 9317 }, { "epoch": 2.6159460976979227, "grad_norm": 0.479958176612854, "learning_rate": 4.910130359350063e-07, "loss": 0.3357, "step": 9318 }, { "epoch": 2.616226838854576, "grad_norm": 0.5352818965911865, "learning_rate": 4.903074078830755e-07, "loss": 0.3491, "step": 9319 }, { "epoch": 2.6165075800112296, "grad_norm": 0.5416983366012573, "learning_rate": 4.896022610774937e-07, "loss": 0.3617, "step": 9320 }, { "epoch": 2.616788321167883, "grad_norm": 0.5425775647163391, "learning_rate": 4.888975955935104e-07, "loss": 0.3296, "step": 9321 }, { "epoch": 2.6170690623245365, "grad_norm": 0.5622072219848633, "learning_rate": 4.881934115063237e-07, "loss": 0.3529, "step": 9322 }, { "epoch": 2.6173498034811904, "grad_norm": 0.5656238198280334, "learning_rate": 4.87489708891078e-07, "loss": 0.3612, "step": 9323 }, { "epoch": 2.617630544637844, "grad_norm": 0.600321888923645, "learning_rate": 4.867864878228701e-07, "loss": 0.3256, "step": 9324 }, { "epoch": 2.6179112857944977, "grad_norm": 0.5435383319854736, "learning_rate": 4.860837483767411e-07, "loss": 0.286, "step": 9325 }, { "epoch": 2.618192026951151, "grad_norm": 0.561693012714386, "learning_rate": 4.853814906276844e-07, "loss": 0.358, "step": 9326 }, { "epoch": 2.6184727681078046, "grad_norm": 0.4986342787742615, "learning_rate": 4.846797146506416e-07, "loss": 0.3737, "step": 9327 }, { "epoch": 2.618753509264458, "grad_norm": 0.5988184809684753, "learning_rate": 4.839784205204995e-07, "loss": 0.3074, "step": 9328 }, { "epoch": 2.6190342504211115, "grad_norm": 0.5872687697410583, "learning_rate": 4.832776083120983e-07, "loss": 0.2894, "step": 9329 }, { "epoch": 2.6193149915777654, "grad_norm": 0.601529061794281, "learning_rate": 4.825772781002219e-07, "loss": 0.3043, "step": 9330 }, { "epoch": 2.619595732734419, "grad_norm": 0.5467483401298523, "learning_rate": 4.818774299596079e-07, "loss": 0.3301, "step": 9331 }, { "epoch": 2.6198764738910727, "grad_norm": 0.5491088032722473, "learning_rate": 4.811780639649377e-07, "loss": 0.3151, "step": 9332 }, { "epoch": 2.620157215047726, "grad_norm": 0.5768588185310364, "learning_rate": 4.804791801908432e-07, "loss": 0.2798, "step": 9333 }, { "epoch": 2.6204379562043796, "grad_norm": 0.5211851596832275, "learning_rate": 4.797807787119058e-07, "loss": 0.2931, "step": 9334 }, { "epoch": 2.620718697361033, "grad_norm": 0.5591154098510742, "learning_rate": 4.790828596026542e-07, "loss": 0.2922, "step": 9335 }, { "epoch": 2.6209994385176865, "grad_norm": 0.5015286803245544, "learning_rate": 4.783854229375667e-07, "loss": 0.3445, "step": 9336 }, { "epoch": 2.6212801796743403, "grad_norm": 0.5570569634437561, "learning_rate": 4.776884687910688e-07, "loss": 0.3092, "step": 9337 }, { "epoch": 2.621560920830994, "grad_norm": 0.5584204196929932, "learning_rate": 4.769919972375337e-07, "loss": 0.3034, "step": 9338 }, { "epoch": 2.6218416619876472, "grad_norm": 0.543012797832489, "learning_rate": 4.762960083512874e-07, "loss": 0.3559, "step": 9339 }, { "epoch": 2.622122403144301, "grad_norm": 0.540846049785614, "learning_rate": 4.7560050220659903e-07, "loss": 0.3432, "step": 9340 }, { "epoch": 2.6224031443009546, "grad_norm": 0.6288012862205505, "learning_rate": 4.749054788776908e-07, "loss": 0.3164, "step": 9341 }, { "epoch": 2.622683885457608, "grad_norm": 0.5445126891136169, "learning_rate": 4.742109384387289e-07, "loss": 0.3356, "step": 9342 }, { "epoch": 2.6229646266142614, "grad_norm": 0.6313918828964233, "learning_rate": 4.735168809638324e-07, "loss": 0.3443, "step": 9343 }, { "epoch": 2.6232453677709153, "grad_norm": 0.5379695892333984, "learning_rate": 4.728233065270665e-07, "loss": 0.3011, "step": 9344 }, { "epoch": 2.6235261089275688, "grad_norm": 0.5338721871376038, "learning_rate": 4.7213021520244375e-07, "loss": 0.3584, "step": 9345 }, { "epoch": 2.623806850084222, "grad_norm": 0.5549765825271606, "learning_rate": 4.7143760706392816e-07, "loss": 0.3672, "step": 9346 }, { "epoch": 2.624087591240876, "grad_norm": 0.5060943961143494, "learning_rate": 4.707454821854285e-07, "loss": 0.2816, "step": 9347 }, { "epoch": 2.6243683323975295, "grad_norm": 0.5631430149078369, "learning_rate": 4.700538406408073e-07, "loss": 0.3635, "step": 9348 }, { "epoch": 2.624649073554183, "grad_norm": 0.5169394016265869, "learning_rate": 4.693626825038694e-07, "loss": 0.333, "step": 9349 }, { "epoch": 2.6249298147108364, "grad_norm": 0.5193637609481812, "learning_rate": 4.686720078483714e-07, "loss": 0.3496, "step": 9350 }, { "epoch": 2.6252105558674903, "grad_norm": 0.556094229221344, "learning_rate": 4.6798181674801825e-07, "loss": 0.333, "step": 9351 }, { "epoch": 2.6254912970241437, "grad_norm": 0.5475212931632996, "learning_rate": 4.6729210927646374e-07, "loss": 0.3557, "step": 9352 }, { "epoch": 2.625772038180797, "grad_norm": 0.5579310059547424, "learning_rate": 4.6660288550730735e-07, "loss": 0.3264, "step": 9353 }, { "epoch": 2.626052779337451, "grad_norm": 0.5508068203926086, "learning_rate": 4.6591414551410085e-07, "loss": 0.3392, "step": 9354 }, { "epoch": 2.6263335204941045, "grad_norm": 0.5637610554695129, "learning_rate": 4.6522588937033986e-07, "loss": 0.3241, "step": 9355 }, { "epoch": 2.626614261650758, "grad_norm": 0.5632300972938538, "learning_rate": 4.6453811714947283e-07, "loss": 0.3393, "step": 9356 }, { "epoch": 2.6268950028074114, "grad_norm": 0.525488018989563, "learning_rate": 4.6385082892489333e-07, "loss": 0.3684, "step": 9357 }, { "epoch": 2.6271757439640653, "grad_norm": 0.5839850306510925, "learning_rate": 4.631640247699459e-07, "loss": 0.323, "step": 9358 }, { "epoch": 2.6274564851207187, "grad_norm": 0.5301842093467712, "learning_rate": 4.624777047579204e-07, "loss": 0.303, "step": 9359 }, { "epoch": 2.627737226277372, "grad_norm": 0.5381922721862793, "learning_rate": 4.6179186896205654e-07, "loss": 0.3423, "step": 9360 }, { "epoch": 2.628017967434026, "grad_norm": 0.5731533765792847, "learning_rate": 4.611065174555446e-07, "loss": 0.3234, "step": 9361 }, { "epoch": 2.6282987085906795, "grad_norm": 0.5349149107933044, "learning_rate": 4.6042165031151833e-07, "loss": 0.3192, "step": 9362 }, { "epoch": 2.628579449747333, "grad_norm": 0.5049703121185303, "learning_rate": 4.5973726760306427e-07, "loss": 0.3534, "step": 9363 }, { "epoch": 2.6288601909039864, "grad_norm": 0.5449021458625793, "learning_rate": 4.5905336940321565e-07, "loss": 0.3401, "step": 9364 }, { "epoch": 2.62914093206064, "grad_norm": 0.5794591903686523, "learning_rate": 4.5836995578495193e-07, "loss": 0.3083, "step": 9365 }, { "epoch": 2.6294216732172937, "grad_norm": 0.5394207835197449, "learning_rate": 4.576870268212047e-07, "loss": 0.3048, "step": 9366 }, { "epoch": 2.629702414373947, "grad_norm": 0.542289674282074, "learning_rate": 4.570045825848507e-07, "loss": 0.309, "step": 9367 }, { "epoch": 2.629983155530601, "grad_norm": 0.5799707770347595, "learning_rate": 4.563226231487172e-07, "loss": 0.3015, "step": 9368 }, { "epoch": 2.6302638966872545, "grad_norm": 0.5405160188674927, "learning_rate": 4.5564114858557776e-07, "loss": 0.3421, "step": 9369 }, { "epoch": 2.630544637843908, "grad_norm": 0.6025461554527283, "learning_rate": 4.5496015896815573e-07, "loss": 0.3223, "step": 9370 }, { "epoch": 2.6308253790005613, "grad_norm": 0.5442836284637451, "learning_rate": 4.542796543691219e-07, "loss": 0.316, "step": 9371 }, { "epoch": 2.631106120157215, "grad_norm": 0.5988059639930725, "learning_rate": 4.5359963486109495e-07, "loss": 0.2992, "step": 9372 }, { "epoch": 2.6313868613138687, "grad_norm": 0.5295849442481995, "learning_rate": 4.529201005166428e-07, "loss": 0.3463, "step": 9373 }, { "epoch": 2.631667602470522, "grad_norm": 0.5595494508743286, "learning_rate": 4.5224105140828087e-07, "loss": 0.3246, "step": 9374 }, { "epoch": 2.631948343627176, "grad_norm": 0.5467063188552856, "learning_rate": 4.51562487608474e-07, "loss": 0.3578, "step": 9375 }, { "epoch": 2.6322290847838294, "grad_norm": 0.5460591912269592, "learning_rate": 4.508844091896325e-07, "loss": 0.303, "step": 9376 }, { "epoch": 2.632509825940483, "grad_norm": 0.6249129772186279, "learning_rate": 4.50206816224118e-07, "loss": 0.3391, "step": 9377 }, { "epoch": 2.6327905670971363, "grad_norm": 0.5546107292175293, "learning_rate": 4.4952970878423983e-07, "loss": 0.3268, "step": 9378 }, { "epoch": 2.6330713082537898, "grad_norm": 0.5169398784637451, "learning_rate": 4.488530869422525e-07, "loss": 0.3755, "step": 9379 }, { "epoch": 2.6333520494104437, "grad_norm": 0.6199008822441101, "learning_rate": 4.4817695077036316e-07, "loss": 0.3415, "step": 9380 }, { "epoch": 2.633632790567097, "grad_norm": 0.5584743618965149, "learning_rate": 4.4750130034072356e-07, "loss": 0.3203, "step": 9381 }, { "epoch": 2.633913531723751, "grad_norm": 0.546150267124176, "learning_rate": 4.468261357254339e-07, "loss": 0.3239, "step": 9382 }, { "epoch": 2.6341942728804044, "grad_norm": 0.4746125638484955, "learning_rate": 4.4615145699654585e-07, "loss": 0.3121, "step": 9383 }, { "epoch": 2.634475014037058, "grad_norm": 0.5220149755477905, "learning_rate": 4.454772642260552e-07, "loss": 0.3466, "step": 9384 }, { "epoch": 2.6347557551937113, "grad_norm": 0.5786832571029663, "learning_rate": 4.4480355748590834e-07, "loss": 0.2597, "step": 9385 }, { "epoch": 2.6350364963503647, "grad_norm": 0.5669474601745605, "learning_rate": 4.4413033684799935e-07, "loss": 0.3449, "step": 9386 }, { "epoch": 2.6353172375070186, "grad_norm": 0.622798502445221, "learning_rate": 4.434576023841691e-07, "loss": 0.3406, "step": 9387 }, { "epoch": 2.635597978663672, "grad_norm": 0.5079540610313416, "learning_rate": 4.4278535416620914e-07, "loss": 0.296, "step": 9388 }, { "epoch": 2.6358787198203255, "grad_norm": 0.5355873703956604, "learning_rate": 4.4211359226585536e-07, "loss": 0.3236, "step": 9389 }, { "epoch": 2.6361594609769794, "grad_norm": 0.5529024600982666, "learning_rate": 4.4144231675479656e-07, "loss": 0.3035, "step": 9390 }, { "epoch": 2.636440202133633, "grad_norm": 0.5050484538078308, "learning_rate": 4.407715277046648e-07, "loss": 0.3403, "step": 9391 }, { "epoch": 2.6367209432902863, "grad_norm": 0.49440085887908936, "learning_rate": 4.401012251870451e-07, "loss": 0.3166, "step": 9392 }, { "epoch": 2.6370016844469397, "grad_norm": 0.5921834111213684, "learning_rate": 4.3943140927346584e-07, "loss": 0.3188, "step": 9393 }, { "epoch": 2.6372824256035936, "grad_norm": 0.5881319046020508, "learning_rate": 4.387620800354059e-07, "loss": 0.3478, "step": 9394 }, { "epoch": 2.637563166760247, "grad_norm": 0.6179415583610535, "learning_rate": 4.380932375442931e-07, "loss": 0.2947, "step": 9395 }, { "epoch": 2.6378439079169005, "grad_norm": 0.5373092889785767, "learning_rate": 4.374248818714999e-07, "loss": 0.3413, "step": 9396 }, { "epoch": 2.6381246490735544, "grad_norm": 0.5521765351295471, "learning_rate": 4.3675701308835196e-07, "loss": 0.344, "step": 9397 }, { "epoch": 2.638405390230208, "grad_norm": 0.5564184784889221, "learning_rate": 4.360896312661189e-07, "loss": 0.3405, "step": 9398 }, { "epoch": 2.6386861313868613, "grad_norm": 0.5610907077789307, "learning_rate": 4.3542273647601774e-07, "loss": 0.3451, "step": 9399 }, { "epoch": 2.6389668725435147, "grad_norm": 0.593033492565155, "learning_rate": 4.3475632878921816e-07, "loss": 0.3315, "step": 9400 }, { "epoch": 2.6392476137001686, "grad_norm": 0.5673864483833313, "learning_rate": 4.340904082768332e-07, "loss": 0.3257, "step": 9401 }, { "epoch": 2.639528354856822, "grad_norm": 0.5801091194152832, "learning_rate": 4.3342497500992566e-07, "loss": 0.3212, "step": 9402 }, { "epoch": 2.6398090960134755, "grad_norm": 0.5522968769073486, "learning_rate": 4.3276002905950853e-07, "loss": 0.3043, "step": 9403 }, { "epoch": 2.6400898371701293, "grad_norm": 0.6049322485923767, "learning_rate": 4.320955704965385e-07, "loss": 0.3057, "step": 9404 }, { "epoch": 2.640370578326783, "grad_norm": 0.5833464860916138, "learning_rate": 4.314315993919238e-07, "loss": 0.3147, "step": 9405 }, { "epoch": 2.6406513194834362, "grad_norm": 0.5223578810691833, "learning_rate": 4.3076811581651777e-07, "loss": 0.3306, "step": 9406 }, { "epoch": 2.6409320606400897, "grad_norm": 0.5660110116004944, "learning_rate": 4.301051198411255e-07, "loss": 0.3624, "step": 9407 }, { "epoch": 2.6412128017967436, "grad_norm": 0.4879720211029053, "learning_rate": 4.294426115364964e-07, "loss": 0.3068, "step": 9408 }, { "epoch": 2.641493542953397, "grad_norm": 0.5538328289985657, "learning_rate": 4.2878059097332834e-07, "loss": 0.3644, "step": 9409 }, { "epoch": 2.6417742841100504, "grad_norm": 0.5390783548355103, "learning_rate": 4.281190582222705e-07, "loss": 0.3033, "step": 9410 }, { "epoch": 2.6420550252667043, "grad_norm": 0.5444378852844238, "learning_rate": 4.274580133539147e-07, "loss": 0.3538, "step": 9411 }, { "epoch": 2.6423357664233578, "grad_norm": 0.5433520674705505, "learning_rate": 4.267974564388061e-07, "loss": 0.3333, "step": 9412 }, { "epoch": 2.642616507580011, "grad_norm": 0.5778615474700928, "learning_rate": 4.261373875474328e-07, "loss": 0.3487, "step": 9413 }, { "epoch": 2.6428972487366647, "grad_norm": 0.5516786575317383, "learning_rate": 4.2547780675023577e-07, "loss": 0.3227, "step": 9414 }, { "epoch": 2.643177989893318, "grad_norm": 0.5535200238227844, "learning_rate": 4.248187141176002e-07, "loss": 0.3145, "step": 9415 }, { "epoch": 2.643458731049972, "grad_norm": 0.6026257872581482, "learning_rate": 4.2416010971985945e-07, "loss": 0.2817, "step": 9416 }, { "epoch": 2.6437394722066254, "grad_norm": 0.512552797794342, "learning_rate": 4.2350199362729717e-07, "loss": 0.324, "step": 9417 }, { "epoch": 2.6440202133632793, "grad_norm": 0.5500130653381348, "learning_rate": 4.2284436591014166e-07, "loss": 0.3638, "step": 9418 }, { "epoch": 2.6443009545199327, "grad_norm": 0.6028361320495605, "learning_rate": 4.2218722663857294e-07, "loss": 0.3141, "step": 9419 }, { "epoch": 2.644581695676586, "grad_norm": 0.5345044136047363, "learning_rate": 4.2153057588271597e-07, "loss": 0.3172, "step": 9420 }, { "epoch": 2.6448624368332396, "grad_norm": 0.5637393593788147, "learning_rate": 4.208744137126436e-07, "loss": 0.3486, "step": 9421 }, { "epoch": 2.645143177989893, "grad_norm": 0.5475842356681824, "learning_rate": 4.2021874019837874e-07, "loss": 0.3525, "step": 9422 }, { "epoch": 2.645423919146547, "grad_norm": 0.5665484666824341, "learning_rate": 4.195635554098898e-07, "loss": 0.346, "step": 9423 }, { "epoch": 2.6457046603032004, "grad_norm": 0.5510707497596741, "learning_rate": 4.189088594170948e-07, "loss": 0.3382, "step": 9424 }, { "epoch": 2.6459854014598543, "grad_norm": 0.5881587862968445, "learning_rate": 4.182546522898573e-07, "loss": 0.3405, "step": 9425 }, { "epoch": 2.6462661426165077, "grad_norm": 0.6297908425331116, "learning_rate": 4.1760093409799253e-07, "loss": 0.2861, "step": 9426 }, { "epoch": 2.646546883773161, "grad_norm": 0.5179921388626099, "learning_rate": 4.1694770491125914e-07, "loss": 0.3369, "step": 9427 }, { "epoch": 2.6468276249298146, "grad_norm": 0.522391140460968, "learning_rate": 4.1629496479936636e-07, "loss": 0.2874, "step": 9428 }, { "epoch": 2.647108366086468, "grad_norm": 0.5518567562103271, "learning_rate": 4.1564271383197183e-07, "loss": 0.3344, "step": 9429 }, { "epoch": 2.647389107243122, "grad_norm": 0.5623995661735535, "learning_rate": 4.1499095207867877e-07, "loss": 0.3644, "step": 9430 }, { "epoch": 2.6476698483997754, "grad_norm": 0.5590502023696899, "learning_rate": 4.1433967960903764e-07, "loss": 0.335, "step": 9431 }, { "epoch": 2.647950589556429, "grad_norm": 0.5279700756072998, "learning_rate": 4.136888964925506e-07, "loss": 0.2864, "step": 9432 }, { "epoch": 2.6482313307130827, "grad_norm": 0.5270088315010071, "learning_rate": 4.1303860279866383e-07, "loss": 0.325, "step": 9433 }, { "epoch": 2.648512071869736, "grad_norm": 0.5713874697685242, "learning_rate": 4.123887985967734e-07, "loss": 0.3212, "step": 9434 }, { "epoch": 2.6487928130263896, "grad_norm": 0.5349494218826294, "learning_rate": 4.1173948395622167e-07, "loss": 0.3325, "step": 9435 }, { "epoch": 2.649073554183043, "grad_norm": 0.5535030364990234, "learning_rate": 4.110906589462993e-07, "loss": 0.3663, "step": 9436 }, { "epoch": 2.649354295339697, "grad_norm": 0.4877673089504242, "learning_rate": 4.104423236362459e-07, "loss": 0.3166, "step": 9437 }, { "epoch": 2.6496350364963503, "grad_norm": 0.5749571919441223, "learning_rate": 4.097944780952462e-07, "loss": 0.3235, "step": 9438 }, { "epoch": 2.649915777653004, "grad_norm": 0.5387641191482544, "learning_rate": 4.0914712239243595e-07, "loss": 0.3377, "step": 9439 }, { "epoch": 2.6501965188096577, "grad_norm": 0.563923716545105, "learning_rate": 4.085002565968954e-07, "loss": 0.3065, "step": 9440 }, { "epoch": 2.650477259966311, "grad_norm": 0.5082716345787048, "learning_rate": 4.0785388077765606e-07, "loss": 0.3413, "step": 9441 }, { "epoch": 2.6507580011229646, "grad_norm": 0.5434393286705017, "learning_rate": 4.0720799500369337e-07, "loss": 0.3174, "step": 9442 }, { "epoch": 2.651038742279618, "grad_norm": 0.5248544812202454, "learning_rate": 4.065625993439321e-07, "loss": 0.3546, "step": 9443 }, { "epoch": 2.651319483436272, "grad_norm": 0.5421655774116516, "learning_rate": 4.0591769386724656e-07, "loss": 0.3244, "step": 9444 }, { "epoch": 2.6516002245929253, "grad_norm": 0.5521301627159119, "learning_rate": 4.052732786424551e-07, "loss": 0.3213, "step": 9445 }, { "epoch": 2.6518809657495788, "grad_norm": 0.5440464615821838, "learning_rate": 4.0462935373832725e-07, "loss": 0.322, "step": 9446 }, { "epoch": 2.6521617069062327, "grad_norm": 0.5045026540756226, "learning_rate": 4.0398591922357787e-07, "loss": 0.3576, "step": 9447 }, { "epoch": 2.652442448062886, "grad_norm": 0.5185729265213013, "learning_rate": 4.0334297516686994e-07, "loss": 0.2952, "step": 9448 }, { "epoch": 2.6527231892195395, "grad_norm": 0.5884493589401245, "learning_rate": 4.0270052163681627e-07, "loss": 0.3419, "step": 9449 }, { "epoch": 2.653003930376193, "grad_norm": 0.5225335955619812, "learning_rate": 4.020585587019726e-07, "loss": 0.3643, "step": 9450 }, { "epoch": 2.653284671532847, "grad_norm": 0.5333012938499451, "learning_rate": 4.014170864308481e-07, "loss": 0.3384, "step": 9451 }, { "epoch": 2.6535654126895003, "grad_norm": 0.5922673344612122, "learning_rate": 4.0077610489189453e-07, "loss": 0.3454, "step": 9452 }, { "epoch": 2.6538461538461537, "grad_norm": 0.5295975208282471, "learning_rate": 4.00135614153514e-07, "loss": 0.3451, "step": 9453 }, { "epoch": 2.6541268950028076, "grad_norm": 0.5840491652488708, "learning_rate": 3.9949561428405723e-07, "loss": 0.3554, "step": 9454 }, { "epoch": 2.654407636159461, "grad_norm": 0.4888450801372528, "learning_rate": 3.988561053518192e-07, "loss": 0.3776, "step": 9455 }, { "epoch": 2.6546883773161145, "grad_norm": 0.5314975380897522, "learning_rate": 3.9821708742504573e-07, "loss": 0.352, "step": 9456 }, { "epoch": 2.654969118472768, "grad_norm": 0.5290461778640747, "learning_rate": 3.975785605719279e-07, "loss": 0.351, "step": 9457 }, { "epoch": 2.655249859629422, "grad_norm": 0.534783661365509, "learning_rate": 3.9694052486060453e-07, "loss": 0.3387, "step": 9458 }, { "epoch": 2.6555306007860753, "grad_norm": 0.5146978497505188, "learning_rate": 3.9630298035916503e-07, "loss": 0.3371, "step": 9459 }, { "epoch": 2.6558113419427287, "grad_norm": 0.5695974826812744, "learning_rate": 3.956659271356422e-07, "loss": 0.327, "step": 9460 }, { "epoch": 2.6560920830993826, "grad_norm": 0.5320312976837158, "learning_rate": 3.950293652580195e-07, "loss": 0.3497, "step": 9461 }, { "epoch": 2.656372824256036, "grad_norm": 0.5372816324234009, "learning_rate": 3.9439329479422585e-07, "loss": 0.3192, "step": 9462 }, { "epoch": 2.6566535654126895, "grad_norm": 0.6621048450469971, "learning_rate": 3.937577158121408e-07, "loss": 0.3481, "step": 9463 }, { "epoch": 2.656934306569343, "grad_norm": 0.5602954030036926, "learning_rate": 3.9312262837958746e-07, "loss": 0.3059, "step": 9464 }, { "epoch": 2.6572150477259964, "grad_norm": 0.5516539812088013, "learning_rate": 3.924880325643382e-07, "loss": 0.2998, "step": 9465 }, { "epoch": 2.6574957888826503, "grad_norm": 0.5282114148139954, "learning_rate": 3.918539284341144e-07, "loss": 0.3176, "step": 9466 }, { "epoch": 2.6577765300393037, "grad_norm": 0.5805308222770691, "learning_rate": 3.912203160565825e-07, "loss": 0.2986, "step": 9467 }, { "epoch": 2.6580572711959576, "grad_norm": 0.5293039083480835, "learning_rate": 3.9058719549935953e-07, "loss": 0.3304, "step": 9468 }, { "epoch": 2.658338012352611, "grad_norm": 0.5936756134033203, "learning_rate": 3.8995456683000696e-07, "loss": 0.3178, "step": 9469 }, { "epoch": 2.6586187535092645, "grad_norm": 0.502144455909729, "learning_rate": 3.893224301160342e-07, "loss": 0.3548, "step": 9470 }, { "epoch": 2.658899494665918, "grad_norm": 0.5930432081222534, "learning_rate": 3.886907854249e-07, "loss": 0.3562, "step": 9471 }, { "epoch": 2.6591802358225713, "grad_norm": 0.5416750311851501, "learning_rate": 3.8805963282400936e-07, "loss": 0.3252, "step": 9472 }, { "epoch": 2.6594609769792252, "grad_norm": 0.5488835573196411, "learning_rate": 3.874289723807151e-07, "loss": 0.3299, "step": 9473 }, { "epoch": 2.6597417181358787, "grad_norm": 0.5911818146705627, "learning_rate": 3.8679880416231666e-07, "loss": 0.3079, "step": 9474 }, { "epoch": 2.6600224592925326, "grad_norm": 0.6673837304115295, "learning_rate": 3.8616912823606357e-07, "loss": 0.3159, "step": 9475 }, { "epoch": 2.660303200449186, "grad_norm": 0.5358628034591675, "learning_rate": 3.855399446691488e-07, "loss": 0.3095, "step": 9476 }, { "epoch": 2.6605839416058394, "grad_norm": 0.5243021845817566, "learning_rate": 3.849112535287153e-07, "loss": 0.33, "step": 9477 }, { "epoch": 2.660864682762493, "grad_norm": 0.5456988215446472, "learning_rate": 3.8428305488185327e-07, "loss": 0.323, "step": 9478 }, { "epoch": 2.6611454239191463, "grad_norm": 0.57991623878479, "learning_rate": 3.836553487956013e-07, "loss": 0.3371, "step": 9479 }, { "epoch": 2.6614261650758, "grad_norm": 0.492904931306839, "learning_rate": 3.830281353369425e-07, "loss": 0.3397, "step": 9480 }, { "epoch": 2.6617069062324537, "grad_norm": 0.5380684733390808, "learning_rate": 3.824014145728111e-07, "loss": 0.3147, "step": 9481 }, { "epoch": 2.661987647389107, "grad_norm": 0.5540799498558044, "learning_rate": 3.817751865700847e-07, "loss": 0.3165, "step": 9482 }, { "epoch": 2.662268388545761, "grad_norm": 0.5207505226135254, "learning_rate": 3.8114945139559214e-07, "loss": 0.3543, "step": 9483 }, { "epoch": 2.6625491297024144, "grad_norm": 0.587072491645813, "learning_rate": 3.8052420911610767e-07, "loss": 0.3555, "step": 9484 }, { "epoch": 2.662829870859068, "grad_norm": 0.47210097312927246, "learning_rate": 3.7989945979835243e-07, "loss": 0.337, "step": 9485 }, { "epoch": 2.6631106120157213, "grad_norm": 0.5797653794288635, "learning_rate": 3.7927520350899693e-07, "loss": 0.3285, "step": 9486 }, { "epoch": 2.663391353172375, "grad_norm": 0.5478522777557373, "learning_rate": 3.786514403146563e-07, "loss": 0.2741, "step": 9487 }, { "epoch": 2.6636720943290286, "grad_norm": 0.5957590341567993, "learning_rate": 3.780281702818966e-07, "loss": 0.3463, "step": 9488 }, { "epoch": 2.663952835485682, "grad_norm": 0.5520449280738831, "learning_rate": 3.774053934772276e-07, "loss": 0.3376, "step": 9489 }, { "epoch": 2.664233576642336, "grad_norm": 0.5618157982826233, "learning_rate": 3.767831099671099e-07, "loss": 0.3067, "step": 9490 }, { "epoch": 2.6645143177989894, "grad_norm": 0.5358468294143677, "learning_rate": 3.7616131981794925e-07, "loss": 0.3111, "step": 9491 }, { "epoch": 2.664795058955643, "grad_norm": 0.5917156934738159, "learning_rate": 3.7554002309609707e-07, "loss": 0.332, "step": 9492 }, { "epoch": 2.6650758001122963, "grad_norm": 0.5606355667114258, "learning_rate": 3.749192198678575e-07, "loss": 0.2889, "step": 9493 }, { "epoch": 2.66535654126895, "grad_norm": 0.5559099316596985, "learning_rate": 3.742989101994765e-07, "loss": 0.3282, "step": 9494 }, { "epoch": 2.6656372824256036, "grad_norm": 0.5656825304031372, "learning_rate": 3.7367909415715054e-07, "loss": 0.3306, "step": 9495 }, { "epoch": 2.665918023582257, "grad_norm": 0.5568356513977051, "learning_rate": 3.7305977180702223e-07, "loss": 0.328, "step": 9496 }, { "epoch": 2.666198764738911, "grad_norm": 0.5428913235664368, "learning_rate": 3.724409432151832e-07, "loss": 0.3465, "step": 9497 }, { "epoch": 2.6664795058955644, "grad_norm": 0.6108860373497009, "learning_rate": 3.718226084476689e-07, "loss": 0.3329, "step": 9498 }, { "epoch": 2.666760247052218, "grad_norm": 0.5239104628562927, "learning_rate": 3.7120476757046496e-07, "loss": 0.3625, "step": 9499 }, { "epoch": 2.6670409882088713, "grad_norm": 0.6110329627990723, "learning_rate": 3.7058742064950417e-07, "loss": 0.3056, "step": 9500 }, { "epoch": 2.667321729365525, "grad_norm": 0.5739771127700806, "learning_rate": 3.6997056775066486e-07, "loss": 0.3379, "step": 9501 }, { "epoch": 2.6676024705221786, "grad_norm": 0.5730254650115967, "learning_rate": 3.6935420893977503e-07, "loss": 0.3685, "step": 9502 }, { "epoch": 2.667883211678832, "grad_norm": 0.568720281124115, "learning_rate": 3.68738344282607e-07, "loss": 0.33, "step": 9503 }, { "epoch": 2.668163952835486, "grad_norm": 0.5455431342124939, "learning_rate": 3.6812297384488326e-07, "loss": 0.3252, "step": 9504 }, { "epoch": 2.6684446939921393, "grad_norm": 0.5412228107452393, "learning_rate": 3.6750809769227237e-07, "loss": 0.3587, "step": 9505 }, { "epoch": 2.668725435148793, "grad_norm": 0.5856388211250305, "learning_rate": 3.6689371589039013e-07, "loss": 0.359, "step": 9506 }, { "epoch": 2.6690061763054462, "grad_norm": 0.5271070003509521, "learning_rate": 3.6627982850479805e-07, "loss": 0.3157, "step": 9507 }, { "epoch": 2.6692869174620997, "grad_norm": 0.551708459854126, "learning_rate": 3.656664356010081e-07, "loss": 0.301, "step": 9508 }, { "epoch": 2.6695676586187536, "grad_norm": 0.5328904986381531, "learning_rate": 3.650535372444769e-07, "loss": 0.3416, "step": 9509 }, { "epoch": 2.669848399775407, "grad_norm": 0.5363731384277344, "learning_rate": 3.6444113350060985e-07, "loss": 0.3554, "step": 9510 }, { "epoch": 2.670129140932061, "grad_norm": 0.5621405839920044, "learning_rate": 3.6382922443475743e-07, "loss": 0.3185, "step": 9511 }, { "epoch": 2.6704098820887143, "grad_norm": 0.640352189540863, "learning_rate": 3.632178101122208e-07, "loss": 0.2812, "step": 9512 }, { "epoch": 2.6706906232453678, "grad_norm": 0.5635489821434021, "learning_rate": 3.6260689059824495e-07, "loss": 0.3339, "step": 9513 }, { "epoch": 2.670971364402021, "grad_norm": 0.5003281831741333, "learning_rate": 3.619964659580233e-07, "loss": 0.3388, "step": 9514 }, { "epoch": 2.6712521055586746, "grad_norm": 0.5414652228355408, "learning_rate": 3.6138653625669764e-07, "loss": 0.3217, "step": 9515 }, { "epoch": 2.6715328467153285, "grad_norm": 0.5872387886047363, "learning_rate": 3.607771015593542e-07, "loss": 0.3054, "step": 9516 }, { "epoch": 2.671813587871982, "grad_norm": 0.5588954091072083, "learning_rate": 3.601681619310299e-07, "loss": 0.34, "step": 9517 }, { "epoch": 2.672094329028636, "grad_norm": 0.5404657125473022, "learning_rate": 3.5955971743670605e-07, "loss": 0.3437, "step": 9518 }, { "epoch": 2.6723750701852893, "grad_norm": 0.5519921183586121, "learning_rate": 3.589517681413118e-07, "loss": 0.3459, "step": 9519 }, { "epoch": 2.6726558113419427, "grad_norm": 0.5478466153144836, "learning_rate": 3.583443141097248e-07, "loss": 0.3472, "step": 9520 }, { "epoch": 2.672936552498596, "grad_norm": 0.557296872138977, "learning_rate": 3.57737355406767e-07, "loss": 0.298, "step": 9521 }, { "epoch": 2.6732172936552496, "grad_norm": 0.5240044593811035, "learning_rate": 3.571308920972111e-07, "loss": 0.3584, "step": 9522 }, { "epoch": 2.6734980348119035, "grad_norm": 0.5655061602592468, "learning_rate": 3.565249242457736e-07, "loss": 0.3218, "step": 9523 }, { "epoch": 2.673778775968557, "grad_norm": 0.5311112403869629, "learning_rate": 3.5591945191712105e-07, "loss": 0.3307, "step": 9524 }, { "epoch": 2.6740595171252104, "grad_norm": 0.547884464263916, "learning_rate": 3.553144751758647e-07, "loss": 0.3308, "step": 9525 }, { "epoch": 2.6743402582818643, "grad_norm": 0.5580441355705261, "learning_rate": 3.547099940865639e-07, "loss": 0.353, "step": 9526 }, { "epoch": 2.6746209994385177, "grad_norm": 0.5507350564002991, "learning_rate": 3.54106008713726e-07, "loss": 0.3312, "step": 9527 }, { "epoch": 2.674901740595171, "grad_norm": 0.5310389399528503, "learning_rate": 3.5350251912180277e-07, "loss": 0.3565, "step": 9528 }, { "epoch": 2.6751824817518246, "grad_norm": 0.570912778377533, "learning_rate": 3.5289952537519654e-07, "loss": 0.3128, "step": 9529 }, { "epoch": 2.6754632229084785, "grad_norm": 0.5651665329933167, "learning_rate": 3.5229702753825536e-07, "loss": 0.3056, "step": 9530 }, { "epoch": 2.675743964065132, "grad_norm": 0.5954676270484924, "learning_rate": 3.5169502567527215e-07, "loss": 0.3314, "step": 9531 }, { "epoch": 2.6760247052217854, "grad_norm": 0.5380265116691589, "learning_rate": 3.5109351985049054e-07, "loss": 0.3208, "step": 9532 }, { "epoch": 2.6763054463784393, "grad_norm": 0.6027633547782898, "learning_rate": 3.504925101280981e-07, "loss": 0.3069, "step": 9533 }, { "epoch": 2.6765861875350927, "grad_norm": 0.5807909965515137, "learning_rate": 3.4989199657223307e-07, "loss": 0.3048, "step": 9534 }, { "epoch": 2.676866928691746, "grad_norm": 0.4826158881187439, "learning_rate": 3.4929197924697623e-07, "loss": 0.3025, "step": 9535 }, { "epoch": 2.6771476698483996, "grad_norm": 0.6137301921844482, "learning_rate": 3.486924582163581e-07, "loss": 0.3067, "step": 9536 }, { "epoch": 2.6774284110050535, "grad_norm": 0.5462205410003662, "learning_rate": 3.48093433544357e-07, "loss": 0.291, "step": 9537 }, { "epoch": 2.677709152161707, "grad_norm": 0.5179881453514099, "learning_rate": 3.474949052948956e-07, "loss": 0.2973, "step": 9538 }, { "epoch": 2.6779898933183603, "grad_norm": 0.514231264591217, "learning_rate": 3.4689687353184675e-07, "loss": 0.3531, "step": 9539 }, { "epoch": 2.6782706344750142, "grad_norm": 0.5977672934532166, "learning_rate": 3.462993383190277e-07, "loss": 0.3408, "step": 9540 }, { "epoch": 2.6785513756316677, "grad_norm": 0.5542689561843872, "learning_rate": 3.4570229972020306e-07, "loss": 0.3148, "step": 9541 }, { "epoch": 2.678832116788321, "grad_norm": 0.5289836525917053, "learning_rate": 3.451057577990868e-07, "loss": 0.3332, "step": 9542 }, { "epoch": 2.6791128579449746, "grad_norm": 0.611068844795227, "learning_rate": 3.4450971261933643e-07, "loss": 0.3282, "step": 9543 }, { "epoch": 2.6793935991016284, "grad_norm": 0.5822266936302185, "learning_rate": 3.4391416424455925e-07, "loss": 0.3309, "step": 9544 }, { "epoch": 2.679674340258282, "grad_norm": 0.6132161617279053, "learning_rate": 3.433191127383079e-07, "loss": 0.3556, "step": 9545 }, { "epoch": 2.6799550814149353, "grad_norm": 0.5779435038566589, "learning_rate": 3.427245581640831e-07, "loss": 0.3137, "step": 9546 }, { "epoch": 2.680235822571589, "grad_norm": 0.5796428322792053, "learning_rate": 3.4213050058533203e-07, "loss": 0.3158, "step": 9547 }, { "epoch": 2.6805165637282427, "grad_norm": 0.6340121626853943, "learning_rate": 3.415369400654478e-07, "loss": 0.3287, "step": 9548 }, { "epoch": 2.680797304884896, "grad_norm": 0.560840368270874, "learning_rate": 3.4094387666777305e-07, "loss": 0.3184, "step": 9549 }, { "epoch": 2.6810780460415495, "grad_norm": 0.5851090550422668, "learning_rate": 3.4035131045559445e-07, "loss": 0.3167, "step": 9550 }, { "epoch": 2.6813587871982034, "grad_norm": 0.5190525650978088, "learning_rate": 3.39759241492148e-07, "loss": 0.2957, "step": 9551 }, { "epoch": 2.681639528354857, "grad_norm": 0.5649513006210327, "learning_rate": 3.3916766984061546e-07, "loss": 0.3569, "step": 9552 }, { "epoch": 2.6819202695115103, "grad_norm": 0.5288940072059631, "learning_rate": 3.3857659556412457e-07, "loss": 0.3483, "step": 9553 }, { "epoch": 2.682201010668164, "grad_norm": 0.5213239192962646, "learning_rate": 3.379860187257517e-07, "loss": 0.2801, "step": 9554 }, { "epoch": 2.6824817518248176, "grad_norm": 0.5677089095115662, "learning_rate": 3.3739593938852065e-07, "loss": 0.3039, "step": 9555 }, { "epoch": 2.682762492981471, "grad_norm": 0.5153822302818298, "learning_rate": 3.368063576153996e-07, "loss": 0.3373, "step": 9556 }, { "epoch": 2.6830432341381245, "grad_norm": 0.5741904377937317, "learning_rate": 3.362172734693059e-07, "loss": 0.2974, "step": 9557 }, { "epoch": 2.683323975294778, "grad_norm": 0.6181908845901489, "learning_rate": 3.35628687013102e-07, "loss": 0.3169, "step": 9558 }, { "epoch": 2.683604716451432, "grad_norm": 0.5768583416938782, "learning_rate": 3.3504059830960003e-07, "loss": 0.3216, "step": 9559 }, { "epoch": 2.6838854576080853, "grad_norm": 0.5114100575447083, "learning_rate": 3.344530074215546e-07, "loss": 0.323, "step": 9560 }, { "epoch": 2.684166198764739, "grad_norm": 0.5469650626182556, "learning_rate": 3.3386591441167184e-07, "loss": 0.3455, "step": 9561 }, { "epoch": 2.6844469399213926, "grad_norm": 0.6090538501739502, "learning_rate": 3.3327931934260206e-07, "loss": 0.3312, "step": 9562 }, { "epoch": 2.684727681078046, "grad_norm": 0.5837964415550232, "learning_rate": 3.3269322227694244e-07, "loss": 0.3085, "step": 9563 }, { "epoch": 2.6850084222346995, "grad_norm": 0.5404969453811646, "learning_rate": 3.321076232772386e-07, "loss": 0.3169, "step": 9564 }, { "epoch": 2.685289163391353, "grad_norm": 0.5449265241622925, "learning_rate": 3.315225224059809e-07, "loss": 0.3241, "step": 9565 }, { "epoch": 2.685569904548007, "grad_norm": 0.5977044701576233, "learning_rate": 3.309379197256085e-07, "loss": 0.3257, "step": 9566 }, { "epoch": 2.6858506457046603, "grad_norm": 0.5311598181724548, "learning_rate": 3.3035381529850697e-07, "loss": 0.3325, "step": 9567 }, { "epoch": 2.686131386861314, "grad_norm": 0.542149007320404, "learning_rate": 3.2977020918700644e-07, "loss": 0.3159, "step": 9568 }, { "epoch": 2.6864121280179676, "grad_norm": 0.5439414978027344, "learning_rate": 3.2918710145338817e-07, "loss": 0.2904, "step": 9569 }, { "epoch": 2.686692869174621, "grad_norm": 0.5617645978927612, "learning_rate": 3.286044921598752e-07, "loss": 0.3225, "step": 9570 }, { "epoch": 2.6869736103312745, "grad_norm": 0.5254599452018738, "learning_rate": 3.280223813686423e-07, "loss": 0.2832, "step": 9571 }, { "epoch": 2.687254351487928, "grad_norm": 0.5410608053207397, "learning_rate": 3.2744076914180746e-07, "loss": 0.3792, "step": 9572 }, { "epoch": 2.687535092644582, "grad_norm": 0.6197337508201599, "learning_rate": 3.268596555414372e-07, "loss": 0.3085, "step": 9573 }, { "epoch": 2.6878158338012352, "grad_norm": 0.5445768237113953, "learning_rate": 3.2627904062954463e-07, "loss": 0.311, "step": 9574 }, { "epoch": 2.6880965749578887, "grad_norm": 0.5660192966461182, "learning_rate": 3.256989244680875e-07, "loss": 0.3236, "step": 9575 }, { "epoch": 2.6883773161145426, "grad_norm": 0.5556873679161072, "learning_rate": 3.251193071189751e-07, "loss": 0.3166, "step": 9576 }, { "epoch": 2.688658057271196, "grad_norm": 0.6064473986625671, "learning_rate": 3.2454018864405745e-07, "loss": 0.2923, "step": 9577 }, { "epoch": 2.6889387984278494, "grad_norm": 0.5116872191429138, "learning_rate": 3.239615691051379e-07, "loss": 0.329, "step": 9578 }, { "epoch": 2.689219539584503, "grad_norm": 0.5713472962379456, "learning_rate": 3.233834485639603e-07, "loss": 0.3312, "step": 9579 }, { "epoch": 2.6895002807411568, "grad_norm": 0.5208572149276733, "learning_rate": 3.2280582708221817e-07, "loss": 0.3446, "step": 9580 }, { "epoch": 2.68978102189781, "grad_norm": 0.5725234150886536, "learning_rate": 3.2222870472155386e-07, "loss": 0.3107, "step": 9581 }, { "epoch": 2.6900617630544637, "grad_norm": 0.564706027507782, "learning_rate": 3.2165208154355253e-07, "loss": 0.3257, "step": 9582 }, { "epoch": 2.6903425042111175, "grad_norm": 0.6131207346916199, "learning_rate": 3.2107595760974944e-07, "loss": 0.3005, "step": 9583 }, { "epoch": 2.690623245367771, "grad_norm": 0.5989841222763062, "learning_rate": 3.2050033298162307e-07, "loss": 0.3195, "step": 9584 }, { "epoch": 2.6909039865244244, "grad_norm": 0.52616947889328, "learning_rate": 3.199252077206011e-07, "loss": 0.3425, "step": 9585 }, { "epoch": 2.691184727681078, "grad_norm": 0.5609388947486877, "learning_rate": 3.1935058188805825e-07, "loss": 0.3325, "step": 9586 }, { "epoch": 2.6914654688377317, "grad_norm": 0.46792733669281006, "learning_rate": 3.187764555453132e-07, "loss": 0.3408, "step": 9587 }, { "epoch": 2.691746209994385, "grad_norm": 0.51032555103302, "learning_rate": 3.182028287536348e-07, "loss": 0.311, "step": 9588 }, { "epoch": 2.6920269511510386, "grad_norm": 0.5743250846862793, "learning_rate": 3.176297015742369e-07, "loss": 0.3284, "step": 9589 }, { "epoch": 2.6923076923076925, "grad_norm": 0.5259221196174622, "learning_rate": 3.1705707406827825e-07, "loss": 0.3167, "step": 9590 }, { "epoch": 2.692588433464346, "grad_norm": 0.49799326062202454, "learning_rate": 3.164849462968683e-07, "loss": 0.3735, "step": 9591 }, { "epoch": 2.6928691746209994, "grad_norm": 0.5619614720344543, "learning_rate": 3.159133183210594e-07, "loss": 0.386, "step": 9592 }, { "epoch": 2.693149915777653, "grad_norm": 0.5929741859436035, "learning_rate": 3.153421902018533e-07, "loss": 0.3129, "step": 9593 }, { "epoch": 2.6934306569343067, "grad_norm": 0.5148611664772034, "learning_rate": 3.147715620001962e-07, "loss": 0.2732, "step": 9594 }, { "epoch": 2.69371139809096, "grad_norm": 0.511158287525177, "learning_rate": 3.1420143377698274e-07, "loss": 0.3327, "step": 9595 }, { "epoch": 2.6939921392476136, "grad_norm": 0.5217373967170715, "learning_rate": 3.1363180559305374e-07, "loss": 0.308, "step": 9596 }, { "epoch": 2.6942728804042675, "grad_norm": 0.5783630013465881, "learning_rate": 3.1306267750919496e-07, "loss": 0.3468, "step": 9597 }, { "epoch": 2.694553621560921, "grad_norm": 0.5551561713218689, "learning_rate": 3.124940495861417e-07, "loss": 0.303, "step": 9598 }, { "epoch": 2.6948343627175744, "grad_norm": 0.5402063727378845, "learning_rate": 3.119259218845733e-07, "loss": 0.3598, "step": 9599 }, { "epoch": 2.695115103874228, "grad_norm": 0.5575801134109497, "learning_rate": 3.1135829446511776e-07, "loss": 0.2999, "step": 9600 }, { "epoch": 2.6953958450308813, "grad_norm": 0.5121349096298218, "learning_rate": 3.1079116738834845e-07, "loss": 0.2835, "step": 9601 }, { "epoch": 2.695676586187535, "grad_norm": 0.5719684362411499, "learning_rate": 3.1022454071478467e-07, "loss": 0.3326, "step": 9602 }, { "epoch": 2.6959573273441886, "grad_norm": 0.5816136002540588, "learning_rate": 3.0965841450489474e-07, "loss": 0.3486, "step": 9603 }, { "epoch": 2.6962380685008425, "grad_norm": 0.5508344173431396, "learning_rate": 3.090927888190909e-07, "loss": 0.3371, "step": 9604 }, { "epoch": 2.696518809657496, "grad_norm": 0.5628803372383118, "learning_rate": 3.0852766371773423e-07, "loss": 0.3606, "step": 9605 }, { "epoch": 2.6967995508141493, "grad_norm": 0.5306931138038635, "learning_rate": 3.07963039261131e-07, "loss": 0.3052, "step": 9606 }, { "epoch": 2.697080291970803, "grad_norm": 0.5864467024803162, "learning_rate": 3.0739891550953415e-07, "loss": 0.2937, "step": 9607 }, { "epoch": 2.6973610331274562, "grad_norm": 0.5703273415565491, "learning_rate": 3.0683529252314436e-07, "loss": 0.321, "step": 9608 }, { "epoch": 2.69764177428411, "grad_norm": 0.5476502776145935, "learning_rate": 3.062721703621063e-07, "loss": 0.3399, "step": 9609 }, { "epoch": 2.6979225154407636, "grad_norm": 0.5061193108558655, "learning_rate": 3.0570954908651474e-07, "loss": 0.2823, "step": 9610 }, { "epoch": 2.6982032565974174, "grad_norm": 0.6391820311546326, "learning_rate": 3.0514742875640825e-07, "loss": 0.3066, "step": 9611 }, { "epoch": 2.698483997754071, "grad_norm": 0.5198980569839478, "learning_rate": 3.0458580943177165e-07, "loss": 0.3138, "step": 9612 }, { "epoch": 2.6987647389107243, "grad_norm": 0.5558304190635681, "learning_rate": 3.040246911725397e-07, "loss": 0.3262, "step": 9613 }, { "epoch": 2.6990454800673778, "grad_norm": 0.5105278491973877, "learning_rate": 3.034640740385897e-07, "loss": 0.3743, "step": 9614 }, { "epoch": 2.699326221224031, "grad_norm": 0.5367564558982849, "learning_rate": 3.029039580897486e-07, "loss": 0.3189, "step": 9615 }, { "epoch": 2.699606962380685, "grad_norm": 0.5472331643104553, "learning_rate": 3.02344343385787e-07, "loss": 0.3645, "step": 9616 }, { "epoch": 2.6998877035373385, "grad_norm": 0.5079192519187927, "learning_rate": 3.017852299864249e-07, "loss": 0.3778, "step": 9617 }, { "epoch": 2.700168444693992, "grad_norm": 0.4927424490451813, "learning_rate": 3.012266179513268e-07, "loss": 0.295, "step": 9618 }, { "epoch": 2.700449185850646, "grad_norm": 0.550288736820221, "learning_rate": 3.006685073401039e-07, "loss": 0.3289, "step": 9619 }, { "epoch": 2.7007299270072993, "grad_norm": 0.467318594455719, "learning_rate": 3.001108982123152e-07, "loss": 0.308, "step": 9620 }, { "epoch": 2.7010106681639527, "grad_norm": 0.5019981265068054, "learning_rate": 2.995537906274637e-07, "loss": 0.3235, "step": 9621 }, { "epoch": 2.701291409320606, "grad_norm": 0.563401460647583, "learning_rate": 2.989971846450024e-07, "loss": 0.3368, "step": 9622 }, { "epoch": 2.70157215047726, "grad_norm": 0.4814218282699585, "learning_rate": 2.984410803243282e-07, "loss": 0.2999, "step": 9623 }, { "epoch": 2.7018528916339135, "grad_norm": 0.6057947874069214, "learning_rate": 2.9788547772478416e-07, "loss": 0.3172, "step": 9624 }, { "epoch": 2.702133632790567, "grad_norm": 0.6055577993392944, "learning_rate": 2.973303769056618e-07, "loss": 0.35, "step": 9625 }, { "epoch": 2.702414373947221, "grad_norm": 0.5092797875404358, "learning_rate": 2.9677577792619704e-07, "loss": 0.3372, "step": 9626 }, { "epoch": 2.7026951151038743, "grad_norm": 0.5349711179733276, "learning_rate": 2.962216808455748e-07, "loss": 0.3499, "step": 9627 }, { "epoch": 2.7029758562605277, "grad_norm": 0.5831418037414551, "learning_rate": 2.956680857229233e-07, "loss": 0.2988, "step": 9628 }, { "epoch": 2.703256597417181, "grad_norm": 0.4941293001174927, "learning_rate": 2.951149926173197e-07, "loss": 0.3223, "step": 9629 }, { "epoch": 2.703537338573835, "grad_norm": 0.5175655484199524, "learning_rate": 2.9456240158778627e-07, "loss": 0.3444, "step": 9630 }, { "epoch": 2.7038180797304885, "grad_norm": 0.5821202397346497, "learning_rate": 2.9401031269329085e-07, "loss": 0.3082, "step": 9631 }, { "epoch": 2.704098820887142, "grad_norm": 0.5866091847419739, "learning_rate": 2.934587259927518e-07, "loss": 0.3112, "step": 9632 }, { "epoch": 2.704379562043796, "grad_norm": 0.536228597164154, "learning_rate": 2.929076415450294e-07, "loss": 0.326, "step": 9633 }, { "epoch": 2.7046603032004493, "grad_norm": 0.6105281710624695, "learning_rate": 2.9235705940893144e-07, "loss": 0.2728, "step": 9634 }, { "epoch": 2.7049410443571027, "grad_norm": 0.5519971251487732, "learning_rate": 2.918069796432138e-07, "loss": 0.3387, "step": 9635 }, { "epoch": 2.705221785513756, "grad_norm": 0.5336389541625977, "learning_rate": 2.912574023065762e-07, "loss": 0.3229, "step": 9636 }, { "epoch": 2.70550252667041, "grad_norm": 0.5339955687522888, "learning_rate": 2.9070832745766774e-07, "loss": 0.3273, "step": 9637 }, { "epoch": 2.7057832678270635, "grad_norm": 0.6147384643554688, "learning_rate": 2.901597551550811e-07, "loss": 0.2974, "step": 9638 }, { "epoch": 2.706064008983717, "grad_norm": 0.6201896071434021, "learning_rate": 2.8961168545735606e-07, "loss": 0.3193, "step": 9639 }, { "epoch": 2.706344750140371, "grad_norm": 0.5300193428993225, "learning_rate": 2.8906411842298144e-07, "loss": 0.3012, "step": 9640 }, { "epoch": 2.7066254912970242, "grad_norm": 0.52329021692276, "learning_rate": 2.8851705411038713e-07, "loss": 0.3662, "step": 9641 }, { "epoch": 2.7069062324536777, "grad_norm": 0.5149202942848206, "learning_rate": 2.879704925779553e-07, "loss": 0.3317, "step": 9642 }, { "epoch": 2.707186973610331, "grad_norm": 0.5491557121276855, "learning_rate": 2.874244338840093e-07, "loss": 0.3249, "step": 9643 }, { "epoch": 2.707467714766985, "grad_norm": 0.6020238399505615, "learning_rate": 2.8687887808682315e-07, "loss": 0.2963, "step": 9644 }, { "epoch": 2.7077484559236384, "grad_norm": 0.510621964931488, "learning_rate": 2.8633382524461406e-07, "loss": 0.309, "step": 9645 }, { "epoch": 2.708029197080292, "grad_norm": 0.5275996923446655, "learning_rate": 2.8578927541554614e-07, "loss": 0.3554, "step": 9646 }, { "epoch": 2.7083099382369458, "grad_norm": 0.5425759553909302, "learning_rate": 2.8524522865773177e-07, "loss": 0.3255, "step": 9647 }, { "epoch": 2.708590679393599, "grad_norm": 0.5639587640762329, "learning_rate": 2.847016850292261e-07, "loss": 0.3371, "step": 9648 }, { "epoch": 2.7088714205502527, "grad_norm": 0.5653936266899109, "learning_rate": 2.8415864458803566e-07, "loss": 0.3142, "step": 9649 }, { "epoch": 2.709152161706906, "grad_norm": 0.5165392160415649, "learning_rate": 2.8361610739210845e-07, "loss": 0.3942, "step": 9650 }, { "epoch": 2.7094329028635595, "grad_norm": 0.5510459542274475, "learning_rate": 2.8307407349934033e-07, "loss": 0.2903, "step": 9651 }, { "epoch": 2.7097136440202134, "grad_norm": 0.49656468629837036, "learning_rate": 2.8253254296757514e-07, "loss": 0.3084, "step": 9652 }, { "epoch": 2.709994385176867, "grad_norm": 0.5687635540962219, "learning_rate": 2.8199151585459996e-07, "loss": 0.3359, "step": 9653 }, { "epoch": 2.7102751263335207, "grad_norm": 0.5923601984977722, "learning_rate": 2.814509922181519e-07, "loss": 0.3585, "step": 9654 }, { "epoch": 2.710555867490174, "grad_norm": 0.5608054399490356, "learning_rate": 2.8091097211591034e-07, "loss": 0.3368, "step": 9655 }, { "epoch": 2.7108366086468276, "grad_norm": 0.5620177388191223, "learning_rate": 2.8037145560550425e-07, "loss": 0.3204, "step": 9656 }, { "epoch": 2.711117349803481, "grad_norm": 0.5754275918006897, "learning_rate": 2.79832442744507e-07, "loss": 0.3021, "step": 9657 }, { "epoch": 2.7113980909601345, "grad_norm": 0.551679790019989, "learning_rate": 2.7929393359043875e-07, "loss": 0.3189, "step": 9658 }, { "epoch": 2.7116788321167884, "grad_norm": 0.5252971649169922, "learning_rate": 2.787559282007657e-07, "loss": 0.3555, "step": 9659 }, { "epoch": 2.711959573273442, "grad_norm": 0.5090240836143494, "learning_rate": 2.7821842663290076e-07, "loss": 0.3369, "step": 9660 }, { "epoch": 2.7122403144300957, "grad_norm": 0.6105270981788635, "learning_rate": 2.77681428944202e-07, "loss": 0.4077, "step": 9661 }, { "epoch": 2.712521055586749, "grad_norm": 0.597703754901886, "learning_rate": 2.7714493519197585e-07, "loss": 0.2904, "step": 9662 }, { "epoch": 2.7128017967434026, "grad_norm": 0.5896347165107727, "learning_rate": 2.766089454334714e-07, "loss": 0.3206, "step": 9663 }, { "epoch": 2.713082537900056, "grad_norm": 0.5729497075080872, "learning_rate": 2.760734597258885e-07, "loss": 0.3567, "step": 9664 }, { "epoch": 2.7133632790567095, "grad_norm": 0.48528119921684265, "learning_rate": 2.7553847812636924e-07, "loss": 0.3175, "step": 9665 }, { "epoch": 2.7136440202133634, "grad_norm": 0.49910029768943787, "learning_rate": 2.750040006920046e-07, "loss": 0.3582, "step": 9666 }, { "epoch": 2.713924761370017, "grad_norm": 0.5457202792167664, "learning_rate": 2.7447002747983065e-07, "loss": 0.3206, "step": 9667 }, { "epoch": 2.7142055025266703, "grad_norm": 0.5576246976852417, "learning_rate": 2.739365585468279e-07, "loss": 0.3442, "step": 9668 }, { "epoch": 2.714486243683324, "grad_norm": 0.6320303678512573, "learning_rate": 2.7340359394992687e-07, "loss": 0.3267, "step": 9669 }, { "epoch": 2.7147669848399776, "grad_norm": 0.5264926552772522, "learning_rate": 2.728711337460016e-07, "loss": 0.3346, "step": 9670 }, { "epoch": 2.715047725996631, "grad_norm": 0.5229409337043762, "learning_rate": 2.723391779918727e-07, "loss": 0.3021, "step": 9671 }, { "epoch": 2.7153284671532845, "grad_norm": 0.6011293530464172, "learning_rate": 2.7180772674430813e-07, "loss": 0.3007, "step": 9672 }, { "epoch": 2.7156092083099383, "grad_norm": 0.5639219880104065, "learning_rate": 2.712767800600191e-07, "loss": 0.3188, "step": 9673 }, { "epoch": 2.715889949466592, "grad_norm": 0.5480575561523438, "learning_rate": 2.7074633799566654e-07, "loss": 0.3108, "step": 9674 }, { "epoch": 2.7161706906232452, "grad_norm": 0.5085447430610657, "learning_rate": 2.7021640060785505e-07, "loss": 0.3419, "step": 9675 }, { "epoch": 2.716451431779899, "grad_norm": 0.5802251696586609, "learning_rate": 2.6968696795313773e-07, "loss": 0.3111, "step": 9676 }, { "epoch": 2.7167321729365526, "grad_norm": 0.5498368740081787, "learning_rate": 2.6915804008801005e-07, "loss": 0.3295, "step": 9677 }, { "epoch": 2.717012914093206, "grad_norm": 0.5393154621124268, "learning_rate": 2.6862961706891844e-07, "loss": 0.3269, "step": 9678 }, { "epoch": 2.7172936552498594, "grad_norm": 0.5487037897109985, "learning_rate": 2.6810169895225116e-07, "loss": 0.317, "step": 9679 }, { "epoch": 2.7175743964065133, "grad_norm": 0.564078152179718, "learning_rate": 2.6757428579434473e-07, "loss": 0.3155, "step": 9680 }, { "epoch": 2.7178551375631668, "grad_norm": 0.6106180548667908, "learning_rate": 2.670473776514815e-07, "loss": 0.2887, "step": 9681 }, { "epoch": 2.71813587871982, "grad_norm": 0.5764226317405701, "learning_rate": 2.665209745798897e-07, "loss": 0.3344, "step": 9682 }, { "epoch": 2.718416619876474, "grad_norm": 0.5557388067245483, "learning_rate": 2.6599507663574387e-07, "loss": 0.3504, "step": 9683 }, { "epoch": 2.7186973610331275, "grad_norm": 0.5256271362304688, "learning_rate": 2.654696838751647e-07, "loss": 0.3304, "step": 9684 }, { "epoch": 2.718978102189781, "grad_norm": 0.5853570699691772, "learning_rate": 2.64944796354219e-07, "loss": 0.3509, "step": 9685 }, { "epoch": 2.7192588433464344, "grad_norm": 0.5093727707862854, "learning_rate": 2.6442041412891916e-07, "loss": 0.3266, "step": 9686 }, { "epoch": 2.7195395845030883, "grad_norm": 0.54444819688797, "learning_rate": 2.6389653725522433e-07, "loss": 0.3373, "step": 9687 }, { "epoch": 2.7198203256597417, "grad_norm": 0.5237269401550293, "learning_rate": 2.633731657890387e-07, "loss": 0.3299, "step": 9688 }, { "epoch": 2.720101066816395, "grad_norm": 0.5486893653869629, "learning_rate": 2.6285029978621426e-07, "loss": 0.3253, "step": 9689 }, { "epoch": 2.720381807973049, "grad_norm": 0.5041185617446899, "learning_rate": 2.6232793930254686e-07, "loss": 0.3452, "step": 9690 }, { "epoch": 2.7206625491297025, "grad_norm": 0.538627564907074, "learning_rate": 2.618060843937809e-07, "loss": 0.3322, "step": 9691 }, { "epoch": 2.720943290286356, "grad_norm": 0.5140835642814636, "learning_rate": 2.6128473511560337e-07, "loss": 0.3225, "step": 9692 }, { "epoch": 2.7212240314430094, "grad_norm": 0.513480007648468, "learning_rate": 2.607638915236521e-07, "loss": 0.3451, "step": 9693 }, { "epoch": 2.721504772599663, "grad_norm": 0.612460196018219, "learning_rate": 2.6024355367350706e-07, "loss": 0.31, "step": 9694 }, { "epoch": 2.7217855137563167, "grad_norm": 0.5481112599372864, "learning_rate": 2.5972372162069436e-07, "loss": 0.3359, "step": 9695 }, { "epoch": 2.72206625491297, "grad_norm": 0.527559220790863, "learning_rate": 2.592043954206891e-07, "loss": 0.3419, "step": 9696 }, { "epoch": 2.722346996069624, "grad_norm": 0.5547905564308167, "learning_rate": 2.5868557512890855e-07, "loss": 0.3135, "step": 9697 }, { "epoch": 2.7226277372262775, "grad_norm": 0.5954114198684692, "learning_rate": 2.581672608007202e-07, "loss": 0.3347, "step": 9698 }, { "epoch": 2.722908478382931, "grad_norm": 0.5858810544013977, "learning_rate": 2.576494524914347e-07, "loss": 0.306, "step": 9699 }, { "epoch": 2.7231892195395844, "grad_norm": 0.5046973824501038, "learning_rate": 2.5713215025630734e-07, "loss": 0.2988, "step": 9700 }, { "epoch": 2.723469960696238, "grad_norm": 0.6144183278083801, "learning_rate": 2.566153541505445e-07, "loss": 0.3357, "step": 9701 }, { "epoch": 2.7237507018528917, "grad_norm": 0.5328025817871094, "learning_rate": 2.5609906422929263e-07, "loss": 0.2969, "step": 9702 }, { "epoch": 2.724031443009545, "grad_norm": 0.5346313714981079, "learning_rate": 2.555832805476488e-07, "loss": 0.3241, "step": 9703 }, { "epoch": 2.724312184166199, "grad_norm": 0.5095492601394653, "learning_rate": 2.550680031606534e-07, "loss": 0.3276, "step": 9704 }, { "epoch": 2.7245929253228525, "grad_norm": 0.5440551042556763, "learning_rate": 2.5455323212329463e-07, "loss": 0.2901, "step": 9705 }, { "epoch": 2.724873666479506, "grad_norm": 0.5555832982063293, "learning_rate": 2.5403896749050474e-07, "loss": 0.2796, "step": 9706 }, { "epoch": 2.7251544076361593, "grad_norm": 0.5191340446472168, "learning_rate": 2.5352520931716196e-07, "loss": 0.3383, "step": 9707 }, { "epoch": 2.725435148792813, "grad_norm": 0.5697038173675537, "learning_rate": 2.530119576580936e-07, "loss": 0.3102, "step": 9708 }, { "epoch": 2.7257158899494667, "grad_norm": 0.5125386714935303, "learning_rate": 2.5249921256806976e-07, "loss": 0.3257, "step": 9709 }, { "epoch": 2.72599663110612, "grad_norm": 0.546018660068512, "learning_rate": 2.5198697410180606e-07, "loss": 0.3398, "step": 9710 }, { "epoch": 2.7262773722627736, "grad_norm": 0.5035435557365417, "learning_rate": 2.514752423139677e-07, "loss": 0.3365, "step": 9711 }, { "epoch": 2.7265581134194274, "grad_norm": 0.5132114887237549, "learning_rate": 2.509640172591615e-07, "loss": 0.3278, "step": 9712 }, { "epoch": 2.726838854576081, "grad_norm": 0.5680822134017944, "learning_rate": 2.5045329899194337e-07, "loss": 0.3014, "step": 9713 }, { "epoch": 2.7271195957327343, "grad_norm": 0.49828410148620605, "learning_rate": 2.4994308756681354e-07, "loss": 0.2894, "step": 9714 }, { "epoch": 2.7274003368893878, "grad_norm": 0.6366356611251831, "learning_rate": 2.4943338303821894e-07, "loss": 0.3601, "step": 9715 }, { "epoch": 2.7276810780460417, "grad_norm": 0.5409765243530273, "learning_rate": 2.489241854605523e-07, "loss": 0.3342, "step": 9716 }, { "epoch": 2.727961819202695, "grad_norm": 0.5092395544052124, "learning_rate": 2.484154948881512e-07, "loss": 0.3359, "step": 9717 }, { "epoch": 2.7282425603593485, "grad_norm": 0.5486072301864624, "learning_rate": 2.4790731137530054e-07, "loss": 0.323, "step": 9718 }, { "epoch": 2.7285233015160024, "grad_norm": 0.5532756447792053, "learning_rate": 2.4739963497622977e-07, "loss": 0.3258, "step": 9719 }, { "epoch": 2.728804042672656, "grad_norm": 0.6096033453941345, "learning_rate": 2.4689246574511604e-07, "loss": 0.2796, "step": 9720 }, { "epoch": 2.7290847838293093, "grad_norm": 0.5525265336036682, "learning_rate": 2.4638580373608057e-07, "loss": 0.323, "step": 9721 }, { "epoch": 2.7293655249859627, "grad_norm": 0.5786679983139038, "learning_rate": 2.4587964900319117e-07, "loss": 0.3448, "step": 9722 }, { "epoch": 2.7296462661426166, "grad_norm": 0.5298022031784058, "learning_rate": 2.4537400160046186e-07, "loss": 0.2942, "step": 9723 }, { "epoch": 2.72992700729927, "grad_norm": 0.5679615139961243, "learning_rate": 2.4486886158185175e-07, "loss": 0.3366, "step": 9724 }, { "epoch": 2.7302077484559235, "grad_norm": 0.5167908668518066, "learning_rate": 2.443642290012671e-07, "loss": 0.3446, "step": 9725 }, { "epoch": 2.7304884896125774, "grad_norm": 0.4989408850669861, "learning_rate": 2.4386010391255763e-07, "loss": 0.3423, "step": 9726 }, { "epoch": 2.730769230769231, "grad_norm": 0.46948695182800293, "learning_rate": 2.4335648636952256e-07, "loss": 0.3425, "step": 9727 }, { "epoch": 2.7310499719258843, "grad_norm": 0.5441704988479614, "learning_rate": 2.4285337642590336e-07, "loss": 0.3221, "step": 9728 }, { "epoch": 2.7313307130825377, "grad_norm": 0.4902036786079407, "learning_rate": 2.423507741353881e-07, "loss": 0.3498, "step": 9729 }, { "epoch": 2.7316114542391916, "grad_norm": 0.5878719687461853, "learning_rate": 2.418486795516134e-07, "loss": 0.3362, "step": 9730 }, { "epoch": 2.731892195395845, "grad_norm": 0.5594978928565979, "learning_rate": 2.4134709272815803e-07, "loss": 0.304, "step": 9731 }, { "epoch": 2.7321729365524985, "grad_norm": 0.5778236389160156, "learning_rate": 2.4084601371854867e-07, "loss": 0.3144, "step": 9732 }, { "epoch": 2.7324536777091524, "grad_norm": 0.5218297243118286, "learning_rate": 2.4034544257625805e-07, "loss": 0.3276, "step": 9733 }, { "epoch": 2.732734418865806, "grad_norm": 0.5348289012908936, "learning_rate": 2.398453793547034e-07, "loss": 0.3064, "step": 9734 }, { "epoch": 2.7330151600224593, "grad_norm": 0.530484676361084, "learning_rate": 2.3934582410724827e-07, "loss": 0.3049, "step": 9735 }, { "epoch": 2.7332959011791127, "grad_norm": 0.4909449517726898, "learning_rate": 2.3884677688720216e-07, "loss": 0.3272, "step": 9736 }, { "epoch": 2.7335766423357666, "grad_norm": 0.4785850942134857, "learning_rate": 2.383482377478208e-07, "loss": 0.3365, "step": 9737 }, { "epoch": 2.73385738349242, "grad_norm": 0.5998890995979309, "learning_rate": 2.3785020674230443e-07, "loss": 0.3, "step": 9738 }, { "epoch": 2.7341381246490735, "grad_norm": 0.5561297535896301, "learning_rate": 2.3735268392379995e-07, "loss": 0.3644, "step": 9739 }, { "epoch": 2.7344188658057273, "grad_norm": 0.5706725716590881, "learning_rate": 2.368556693454005e-07, "loss": 0.335, "step": 9740 }, { "epoch": 2.734699606962381, "grad_norm": 0.5220150351524353, "learning_rate": 2.3635916306014362e-07, "loss": 0.3158, "step": 9741 }, { "epoch": 2.7349803481190342, "grad_norm": 0.5619484186172485, "learning_rate": 2.3586316512101416e-07, "loss": 0.3397, "step": 9742 }, { "epoch": 2.7352610892756877, "grad_norm": 0.5048370361328125, "learning_rate": 2.3536767558094142e-07, "loss": 0.3369, "step": 9743 }, { "epoch": 2.735541830432341, "grad_norm": 0.5645469427108765, "learning_rate": 2.3487269449280037e-07, "loss": 0.3028, "step": 9744 }, { "epoch": 2.735822571588995, "grad_norm": 0.5247581005096436, "learning_rate": 2.343782219094143e-07, "loss": 0.3171, "step": 9745 }, { "epoch": 2.7361033127456484, "grad_norm": 0.571412980556488, "learning_rate": 2.3388425788354762e-07, "loss": 0.317, "step": 9746 }, { "epoch": 2.7363840539023023, "grad_norm": 0.5135009288787842, "learning_rate": 2.333908024679149e-07, "loss": 0.2874, "step": 9747 }, { "epoch": 2.7366647950589558, "grad_norm": 0.49205532670021057, "learning_rate": 2.3289785571517398e-07, "loss": 0.3548, "step": 9748 }, { "epoch": 2.736945536215609, "grad_norm": 0.5593763589859009, "learning_rate": 2.3240541767793002e-07, "loss": 0.3716, "step": 9749 }, { "epoch": 2.7372262773722627, "grad_norm": 0.5323165059089661, "learning_rate": 2.319134884087315e-07, "loss": 0.2951, "step": 9750 }, { "epoch": 2.737507018528916, "grad_norm": 0.5595327615737915, "learning_rate": 2.3142206796007484e-07, "loss": 0.3389, "step": 9751 }, { "epoch": 2.73778775968557, "grad_norm": 0.5712656378746033, "learning_rate": 2.3093115638440134e-07, "loss": 0.336, "step": 9752 }, { "epoch": 2.7380685008422234, "grad_norm": 0.5395691394805908, "learning_rate": 2.3044075373409746e-07, "loss": 0.3375, "step": 9753 }, { "epoch": 2.7383492419988773, "grad_norm": 0.6070802211761475, "learning_rate": 2.2995086006149746e-07, "loss": 0.3286, "step": 9754 }, { "epoch": 2.7386299831555307, "grad_norm": 0.6212032437324524, "learning_rate": 2.2946147541887788e-07, "loss": 0.2668, "step": 9755 }, { "epoch": 2.738910724312184, "grad_norm": 0.5324410200119019, "learning_rate": 2.2897259985846355e-07, "loss": 0.3187, "step": 9756 }, { "epoch": 2.7391914654688376, "grad_norm": 0.5174353122711182, "learning_rate": 2.2848423343242498e-07, "loss": 0.3398, "step": 9757 }, { "epoch": 2.739472206625491, "grad_norm": 0.5650909543037415, "learning_rate": 2.2799637619287606e-07, "loss": 0.3077, "step": 9758 }, { "epoch": 2.739752947782145, "grad_norm": 0.5459592938423157, "learning_rate": 2.2750902819187903e-07, "loss": 0.3049, "step": 9759 }, { "epoch": 2.7400336889387984, "grad_norm": 0.5758468508720398, "learning_rate": 2.2702218948144061e-07, "loss": 0.3751, "step": 9760 }, { "epoch": 2.740314430095452, "grad_norm": 0.5374598503112793, "learning_rate": 2.2653586011351259e-07, "loss": 0.3028, "step": 9761 }, { "epoch": 2.7405951712521057, "grad_norm": 0.5994312167167664, "learning_rate": 2.2605004013999343e-07, "loss": 0.3111, "step": 9762 }, { "epoch": 2.740875912408759, "grad_norm": 0.6063372492790222, "learning_rate": 2.2556472961272668e-07, "loss": 0.3272, "step": 9763 }, { "epoch": 2.7411566535654126, "grad_norm": 0.5221341252326965, "learning_rate": 2.2507992858350258e-07, "loss": 0.3493, "step": 9764 }, { "epoch": 2.741437394722066, "grad_norm": 0.5699635148048401, "learning_rate": 2.2459563710405473e-07, "loss": 0.3177, "step": 9765 }, { "epoch": 2.74171813587872, "grad_norm": 0.6553797721862793, "learning_rate": 2.241118552260635e-07, "loss": 0.3737, "step": 9766 }, { "epoch": 2.7419988770353734, "grad_norm": 0.5036535859107971, "learning_rate": 2.2362858300115696e-07, "loss": 0.323, "step": 9767 }, { "epoch": 2.742279618192027, "grad_norm": 0.5880960822105408, "learning_rate": 2.231458204809045e-07, "loss": 0.3428, "step": 9768 }, { "epoch": 2.7425603593486807, "grad_norm": 0.6233805418014526, "learning_rate": 2.2266356771682596e-07, "loss": 0.3174, "step": 9769 }, { "epoch": 2.742841100505334, "grad_norm": 0.5725235939025879, "learning_rate": 2.2218182476038298e-07, "loss": 0.3382, "step": 9770 }, { "epoch": 2.7431218416619876, "grad_norm": 0.5659641027450562, "learning_rate": 2.2170059166298386e-07, "loss": 0.3004, "step": 9771 }, { "epoch": 2.743402582818641, "grad_norm": 0.5155832767486572, "learning_rate": 2.2121986847598364e-07, "loss": 0.329, "step": 9772 }, { "epoch": 2.743683323975295, "grad_norm": 0.48419222235679626, "learning_rate": 2.207396552506813e-07, "loss": 0.3647, "step": 9773 }, { "epoch": 2.7439640651319483, "grad_norm": 0.5316529870033264, "learning_rate": 2.202599520383236e-07, "loss": 0.3126, "step": 9774 }, { "epoch": 2.744244806288602, "grad_norm": 0.5338003039360046, "learning_rate": 2.1978075889009965e-07, "loss": 0.3122, "step": 9775 }, { "epoch": 2.7445255474452557, "grad_norm": 0.6163437962532043, "learning_rate": 2.1930207585714736e-07, "loss": 0.3474, "step": 9776 }, { "epoch": 2.744806288601909, "grad_norm": 0.56536865234375, "learning_rate": 2.1882390299054867e-07, "loss": 0.2856, "step": 9777 }, { "epoch": 2.7450870297585626, "grad_norm": 0.563823938369751, "learning_rate": 2.1834624034133002e-07, "loss": 0.2931, "step": 9778 }, { "epoch": 2.745367770915216, "grad_norm": 0.548951268196106, "learning_rate": 2.1786908796046612e-07, "loss": 0.3462, "step": 9779 }, { "epoch": 2.74564851207187, "grad_norm": 0.5613840818405151, "learning_rate": 2.1739244589887464e-07, "loss": 0.2883, "step": 9780 }, { "epoch": 2.7459292532285233, "grad_norm": 0.5726778507232666, "learning_rate": 2.1691631420742043e-07, "loss": 0.2636, "step": 9781 }, { "epoch": 2.7462099943851768, "grad_norm": 0.5685061812400818, "learning_rate": 2.1644069293691338e-07, "loss": 0.3302, "step": 9782 }, { "epoch": 2.7464907355418307, "grad_norm": 0.5521758794784546, "learning_rate": 2.1596558213810735e-07, "loss": 0.3433, "step": 9783 }, { "epoch": 2.746771476698484, "grad_norm": 0.5658281445503235, "learning_rate": 2.154909818617057e-07, "loss": 0.3486, "step": 9784 }, { "epoch": 2.7470522178551375, "grad_norm": 0.6052652597427368, "learning_rate": 2.1501689215835342e-07, "loss": 0.2884, "step": 9785 }, { "epoch": 2.747332959011791, "grad_norm": 0.5344831347465515, "learning_rate": 2.1454331307864229e-07, "loss": 0.3386, "step": 9786 }, { "epoch": 2.7476137001684444, "grad_norm": 0.553074836730957, "learning_rate": 2.1407024467311077e-07, "loss": 0.3124, "step": 9787 }, { "epoch": 2.7478944413250983, "grad_norm": 0.539045512676239, "learning_rate": 2.1359768699224015e-07, "loss": 0.3238, "step": 9788 }, { "epoch": 2.7481751824817517, "grad_norm": 0.5671885013580322, "learning_rate": 2.1312564008646008e-07, "loss": 0.3491, "step": 9789 }, { "epoch": 2.7484559236384056, "grad_norm": 0.5347363352775574, "learning_rate": 2.126541040061436e-07, "loss": 0.3759, "step": 9790 }, { "epoch": 2.748736664795059, "grad_norm": 0.5233283638954163, "learning_rate": 2.1218307880161105e-07, "loss": 0.3282, "step": 9791 }, { "epoch": 2.7490174059517125, "grad_norm": 0.5441765785217285, "learning_rate": 2.1171256452312717e-07, "loss": 0.3173, "step": 9792 }, { "epoch": 2.749298147108366, "grad_norm": 0.4917449951171875, "learning_rate": 2.1124256122090125e-07, "loss": 0.3337, "step": 9793 }, { "epoch": 2.7495788882650194, "grad_norm": 0.5290191769599915, "learning_rate": 2.1077306894509042e-07, "loss": 0.338, "step": 9794 }, { "epoch": 2.7498596294216733, "grad_norm": 0.5983152985572815, "learning_rate": 2.1030408774579515e-07, "loss": 0.2917, "step": 9795 }, { "epoch": 2.7501403705783267, "grad_norm": 0.46865564584732056, "learning_rate": 2.0983561767306314e-07, "loss": 0.3288, "step": 9796 }, { "epoch": 2.7504211117349806, "grad_norm": 0.5375168919563293, "learning_rate": 2.0936765877688504e-07, "loss": 0.3532, "step": 9797 }, { "epoch": 2.750701852891634, "grad_norm": 0.5457127690315247, "learning_rate": 2.0890021110719972e-07, "loss": 0.3202, "step": 9798 }, { "epoch": 2.7509825940482875, "grad_norm": 0.51783287525177, "learning_rate": 2.0843327471389063e-07, "loss": 0.3116, "step": 9799 }, { "epoch": 2.751263335204941, "grad_norm": 0.6021425127983093, "learning_rate": 2.079668496467846e-07, "loss": 0.3255, "step": 9800 }, { "epoch": 2.7515440763615944, "grad_norm": 0.5214225649833679, "learning_rate": 2.0750093595565735e-07, "loss": 0.3233, "step": 9801 }, { "epoch": 2.7518248175182483, "grad_norm": 0.5541335940361023, "learning_rate": 2.0703553369022743e-07, "loss": 0.3285, "step": 9802 }, { "epoch": 2.7521055586749017, "grad_norm": 0.49332740902900696, "learning_rate": 2.0657064290016015e-07, "loss": 0.3281, "step": 9803 }, { "epoch": 2.7523862998315556, "grad_norm": 0.5509899854660034, "learning_rate": 2.0610626363506526e-07, "loss": 0.3498, "step": 9804 }, { "epoch": 2.752667040988209, "grad_norm": 0.5894120931625366, "learning_rate": 2.0564239594449863e-07, "loss": 0.2912, "step": 9805 }, { "epoch": 2.7529477821448625, "grad_norm": 0.5687708258628845, "learning_rate": 2.051790398779613e-07, "loss": 0.3523, "step": 9806 }, { "epoch": 2.753228523301516, "grad_norm": 0.4712930917739868, "learning_rate": 2.0471619548489974e-07, "loss": 0.3269, "step": 9807 }, { "epoch": 2.7535092644581693, "grad_norm": 0.46517688035964966, "learning_rate": 2.0425386281470617e-07, "loss": 0.315, "step": 9808 }, { "epoch": 2.7537900056148232, "grad_norm": 0.6015969514846802, "learning_rate": 2.037920419167172e-07, "loss": 0.3251, "step": 9809 }, { "epoch": 2.7540707467714767, "grad_norm": 0.5469324588775635, "learning_rate": 2.0333073284021565e-07, "loss": 0.3286, "step": 9810 }, { "epoch": 2.75435148792813, "grad_norm": 0.5498746037483215, "learning_rate": 2.0286993563443048e-07, "loss": 0.3157, "step": 9811 }, { "epoch": 2.754632229084784, "grad_norm": 0.5221884846687317, "learning_rate": 2.02409650348534e-07, "loss": 0.3367, "step": 9812 }, { "epoch": 2.7549129702414374, "grad_norm": 0.49796751141548157, "learning_rate": 2.0194987703164582e-07, "loss": 0.2977, "step": 9813 }, { "epoch": 2.755193711398091, "grad_norm": 0.5357893109321594, "learning_rate": 2.0149061573283003e-07, "loss": 0.3371, "step": 9814 }, { "epoch": 2.7554744525547443, "grad_norm": 0.5246144533157349, "learning_rate": 2.0103186650109462e-07, "loss": 0.2993, "step": 9815 }, { "epoch": 2.755755193711398, "grad_norm": 0.5692760348320007, "learning_rate": 2.005736293853966e-07, "loss": 0.3415, "step": 9816 }, { "epoch": 2.7560359348680517, "grad_norm": 0.48902472853660583, "learning_rate": 2.0011590443463403e-07, "loss": 0.3486, "step": 9817 }, { "epoch": 2.756316676024705, "grad_norm": 0.5468211770057678, "learning_rate": 1.996586916976545e-07, "loss": 0.2888, "step": 9818 }, { "epoch": 2.756597417181359, "grad_norm": 0.5358859300613403, "learning_rate": 1.992019912232479e-07, "loss": 0.3376, "step": 9819 }, { "epoch": 2.7568781583380124, "grad_norm": 0.50605309009552, "learning_rate": 1.987458030601508e-07, "loss": 0.342, "step": 9820 }, { "epoch": 2.757158899494666, "grad_norm": 0.5235331058502197, "learning_rate": 1.982901272570442e-07, "loss": 0.3184, "step": 9821 }, { "epoch": 2.7574396406513193, "grad_norm": 0.5272335410118103, "learning_rate": 1.978349638625554e-07, "loss": 0.3314, "step": 9822 }, { "epoch": 2.757720381807973, "grad_norm": 0.5786486268043518, "learning_rate": 1.973803129252566e-07, "loss": 0.32, "step": 9823 }, { "epoch": 2.7580011229646266, "grad_norm": 0.5336734056472778, "learning_rate": 1.9692617449366514e-07, "loss": 0.3584, "step": 9824 }, { "epoch": 2.75828186412128, "grad_norm": 0.5181218981742859, "learning_rate": 1.9647254861624444e-07, "loss": 0.3429, "step": 9825 }, { "epoch": 2.758562605277934, "grad_norm": 0.5494160652160645, "learning_rate": 1.9601943534140245e-07, "loss": 0.3301, "step": 9826 }, { "epoch": 2.7588433464345874, "grad_norm": 0.5143555998802185, "learning_rate": 1.955668347174916e-07, "loss": 0.297, "step": 9827 }, { "epoch": 2.759124087591241, "grad_norm": 0.5042095184326172, "learning_rate": 1.951147467928116e-07, "loss": 0.3044, "step": 9828 }, { "epoch": 2.7594048287478943, "grad_norm": 0.5449483394622803, "learning_rate": 1.9466317161560556e-07, "loss": 0.2934, "step": 9829 }, { "epoch": 2.759685569904548, "grad_norm": 0.5260158777236938, "learning_rate": 1.942121092340643e-07, "loss": 0.3321, "step": 9830 }, { "epoch": 2.7599663110612016, "grad_norm": 0.5834680199623108, "learning_rate": 1.9376155969632104e-07, "loss": 0.3181, "step": 9831 }, { "epoch": 2.760247052217855, "grad_norm": 0.6096001863479614, "learning_rate": 1.9331152305045674e-07, "loss": 0.3585, "step": 9832 }, { "epoch": 2.760527793374509, "grad_norm": 0.5205704569816589, "learning_rate": 1.9286199934449578e-07, "loss": 0.3308, "step": 9833 }, { "epoch": 2.7608085345311624, "grad_norm": 0.6398050785064697, "learning_rate": 1.9241298862640757e-07, "loss": 0.3051, "step": 9834 }, { "epoch": 2.761089275687816, "grad_norm": 0.6154297590255737, "learning_rate": 1.9196449094410985e-07, "loss": 0.2986, "step": 9835 }, { "epoch": 2.7613700168444693, "grad_norm": 0.5259007215499878, "learning_rate": 1.9151650634546214e-07, "loss": 0.3218, "step": 9836 }, { "epoch": 2.7616507580011227, "grad_norm": 0.571379542350769, "learning_rate": 1.9106903487827067e-07, "loss": 0.328, "step": 9837 }, { "epoch": 2.7619314991577766, "grad_norm": 0.5834273099899292, "learning_rate": 1.9062207659028774e-07, "loss": 0.3255, "step": 9838 }, { "epoch": 2.76221224031443, "grad_norm": 0.5143601894378662, "learning_rate": 1.9017563152920803e-07, "loss": 0.3265, "step": 9839 }, { "epoch": 2.762492981471084, "grad_norm": 0.5301283001899719, "learning_rate": 1.8972969974267564e-07, "loss": 0.3052, "step": 9840 }, { "epoch": 2.7627737226277373, "grad_norm": 0.6125536561012268, "learning_rate": 1.8928428127827693e-07, "loss": 0.2915, "step": 9841 }, { "epoch": 2.763054463784391, "grad_norm": 0.5684979557991028, "learning_rate": 1.8883937618354275e-07, "loss": 0.3369, "step": 9842 }, { "epoch": 2.7633352049410442, "grad_norm": 0.5631262063980103, "learning_rate": 1.883949845059524e-07, "loss": 0.3139, "step": 9843 }, { "epoch": 2.7636159460976977, "grad_norm": 0.620158851146698, "learning_rate": 1.8795110629292734e-07, "loss": 0.3624, "step": 9844 }, { "epoch": 2.7638966872543516, "grad_norm": 0.5072607398033142, "learning_rate": 1.8750774159183693e-07, "loss": 0.3445, "step": 9845 }, { "epoch": 2.764177428411005, "grad_norm": 0.5696941614151001, "learning_rate": 1.8706489044999222e-07, "loss": 0.3293, "step": 9846 }, { "epoch": 2.764458169567659, "grad_norm": 0.591693103313446, "learning_rate": 1.866225529146537e-07, "loss": 0.3516, "step": 9847 }, { "epoch": 2.7647389107243123, "grad_norm": 0.5637592077255249, "learning_rate": 1.861807290330242e-07, "loss": 0.2612, "step": 9848 }, { "epoch": 2.7650196518809658, "grad_norm": 0.5399445295333862, "learning_rate": 1.85739418852251e-07, "loss": 0.3225, "step": 9849 }, { "epoch": 2.765300393037619, "grad_norm": 0.5351146459579468, "learning_rate": 1.8529862241942975e-07, "loss": 0.3085, "step": 9850 }, { "epoch": 2.7655811341942727, "grad_norm": 0.528683066368103, "learning_rate": 1.8485833978159895e-07, "loss": 0.3321, "step": 9851 }, { "epoch": 2.7658618753509265, "grad_norm": 0.5455690026283264, "learning_rate": 1.8441857098574267e-07, "loss": 0.3029, "step": 9852 }, { "epoch": 2.76614261650758, "grad_norm": 0.5639652013778687, "learning_rate": 1.839793160787906e-07, "loss": 0.3232, "step": 9853 }, { "epoch": 2.7664233576642334, "grad_norm": 0.5558152794837952, "learning_rate": 1.8354057510761637e-07, "loss": 0.327, "step": 9854 }, { "epoch": 2.7667040988208873, "grad_norm": 0.546768307685852, "learning_rate": 1.831023481190408e-07, "loss": 0.3422, "step": 9855 }, { "epoch": 2.7669848399775407, "grad_norm": 0.5269338488578796, "learning_rate": 1.8266463515982824e-07, "loss": 0.3338, "step": 9856 }, { "epoch": 2.767265581134194, "grad_norm": 0.575153648853302, "learning_rate": 1.8222743627668903e-07, "loss": 0.3071, "step": 9857 }, { "epoch": 2.7675463222908476, "grad_norm": 0.593867838382721, "learning_rate": 1.8179075151627755e-07, "loss": 0.3008, "step": 9858 }, { "epoch": 2.7678270634475015, "grad_norm": 0.49975305795669556, "learning_rate": 1.8135458092519485e-07, "loss": 0.3043, "step": 9859 }, { "epoch": 2.768107804604155, "grad_norm": 0.6041407585144043, "learning_rate": 1.8091892454998595e-07, "loss": 0.3608, "step": 9860 }, { "epoch": 2.7683885457608084, "grad_norm": 0.5721468925476074, "learning_rate": 1.8048378243714136e-07, "loss": 0.3171, "step": 9861 }, { "epoch": 2.7686692869174623, "grad_norm": 0.5866236090660095, "learning_rate": 1.8004915463309792e-07, "loss": 0.2997, "step": 9862 }, { "epoch": 2.7689500280741157, "grad_norm": 0.5634253025054932, "learning_rate": 1.7961504118423512e-07, "loss": 0.3062, "step": 9863 }, { "epoch": 2.769230769230769, "grad_norm": 0.6289180517196655, "learning_rate": 1.7918144213687815e-07, "loss": 0.3098, "step": 9864 }, { "epoch": 2.7695115103874226, "grad_norm": 0.5639164447784424, "learning_rate": 1.7874835753730003e-07, "loss": 0.3309, "step": 9865 }, { "epoch": 2.7697922515440765, "grad_norm": 0.5672398209571838, "learning_rate": 1.7831578743171484e-07, "loss": 0.2969, "step": 9866 }, { "epoch": 2.77007299270073, "grad_norm": 0.5080732703208923, "learning_rate": 1.7788373186628573e-07, "loss": 0.2991, "step": 9867 }, { "epoch": 2.7703537338573834, "grad_norm": 0.54532390832901, "learning_rate": 1.774521908871174e-07, "loss": 0.336, "step": 9868 }, { "epoch": 2.7706344750140373, "grad_norm": 0.5791751146316528, "learning_rate": 1.7702116454026196e-07, "loss": 0.3353, "step": 9869 }, { "epoch": 2.7709152161706907, "grad_norm": 0.4919031858444214, "learning_rate": 1.7659065287171594e-07, "loss": 0.3385, "step": 9870 }, { "epoch": 2.771195957327344, "grad_norm": 0.5607175827026367, "learning_rate": 1.7616065592742038e-07, "loss": 0.3511, "step": 9871 }, { "epoch": 2.7714766984839976, "grad_norm": 0.5476047396659851, "learning_rate": 1.7573117375326242e-07, "loss": 0.3253, "step": 9872 }, { "epoch": 2.7717574396406515, "grad_norm": 0.6203597187995911, "learning_rate": 1.7530220639507268e-07, "loss": 0.3501, "step": 9873 }, { "epoch": 2.772038180797305, "grad_norm": 0.5313006043434143, "learning_rate": 1.748737538986295e-07, "loss": 0.3248, "step": 9874 }, { "epoch": 2.7723189219539583, "grad_norm": 0.5611987709999084, "learning_rate": 1.7444581630965406e-07, "loss": 0.3257, "step": 9875 }, { "epoch": 2.7725996631106122, "grad_norm": 0.4884902536869049, "learning_rate": 1.7401839367381213e-07, "loss": 0.3458, "step": 9876 }, { "epoch": 2.7728804042672657, "grad_norm": 0.5368293523788452, "learning_rate": 1.735914860367166e-07, "loss": 0.3392, "step": 9877 }, { "epoch": 2.773161145423919, "grad_norm": 0.521256685256958, "learning_rate": 1.731650934439244e-07, "loss": 0.3374, "step": 9878 }, { "epoch": 2.7734418865805726, "grad_norm": 0.49649372696876526, "learning_rate": 1.7273921594093746e-07, "loss": 0.3483, "step": 9879 }, { "epoch": 2.7737226277372264, "grad_norm": 0.5162923336029053, "learning_rate": 1.723138535732022e-07, "loss": 0.3163, "step": 9880 }, { "epoch": 2.77400336889388, "grad_norm": 0.5270166397094727, "learning_rate": 1.7188900638611172e-07, "loss": 0.3463, "step": 9881 }, { "epoch": 2.7742841100505333, "grad_norm": 0.5457121133804321, "learning_rate": 1.7146467442500203e-07, "loss": 0.3815, "step": 9882 }, { "epoch": 2.774564851207187, "grad_norm": 0.5218682289123535, "learning_rate": 1.710408577351552e-07, "loss": 0.317, "step": 9883 }, { "epoch": 2.7748455923638407, "grad_norm": 0.5575658679008484, "learning_rate": 1.7061755636179954e-07, "loss": 0.3254, "step": 9884 }, { "epoch": 2.775126333520494, "grad_norm": 0.5529207587242126, "learning_rate": 1.7019477035010557e-07, "loss": 0.369, "step": 9885 }, { "epoch": 2.7754070746771475, "grad_norm": 0.5120636224746704, "learning_rate": 1.6977249974519106e-07, "loss": 0.362, "step": 9886 }, { "epoch": 2.775687815833801, "grad_norm": 0.537596583366394, "learning_rate": 1.6935074459211887e-07, "loss": 0.3167, "step": 9887 }, { "epoch": 2.775968556990455, "grad_norm": 0.4775805175304413, "learning_rate": 1.6892950493589522e-07, "loss": 0.3413, "step": 9888 }, { "epoch": 2.7762492981471083, "grad_norm": 0.5533557534217834, "learning_rate": 1.685087808214725e-07, "loss": 0.3336, "step": 9889 }, { "epoch": 2.776530039303762, "grad_norm": 0.5454351305961609, "learning_rate": 1.6808857229374753e-07, "loss": 0.3243, "step": 9890 }, { "epoch": 2.7768107804604156, "grad_norm": 0.4986509382724762, "learning_rate": 1.6766887939756227e-07, "loss": 0.3571, "step": 9891 }, { "epoch": 2.777091521617069, "grad_norm": 0.5733097791671753, "learning_rate": 1.672497021777042e-07, "loss": 0.3009, "step": 9892 }, { "epoch": 2.7773722627737225, "grad_norm": 0.5357571244239807, "learning_rate": 1.6683104067890422e-07, "loss": 0.3498, "step": 9893 }, { "epoch": 2.777653003930376, "grad_norm": 0.5690387487411499, "learning_rate": 1.6641289494584102e-07, "loss": 0.2982, "step": 9894 }, { "epoch": 2.77793374508703, "grad_norm": 0.5264323353767395, "learning_rate": 1.6599526502313502e-07, "loss": 0.3467, "step": 9895 }, { "epoch": 2.7782144862436833, "grad_norm": 0.4939902126789093, "learning_rate": 1.655781509553539e-07, "loss": 0.3609, "step": 9896 }, { "epoch": 2.778495227400337, "grad_norm": 0.6071324944496155, "learning_rate": 1.651615527870093e-07, "loss": 0.3193, "step": 9897 }, { "epoch": 2.7787759685569906, "grad_norm": 0.5301510095596313, "learning_rate": 1.6474547056255785e-07, "loss": 0.2884, "step": 9898 }, { "epoch": 2.779056709713644, "grad_norm": 0.563073992729187, "learning_rate": 1.6432990432640127e-07, "loss": 0.3197, "step": 9899 }, { "epoch": 2.7793374508702975, "grad_norm": 0.5243821740150452, "learning_rate": 1.6391485412288576e-07, "loss": 0.3203, "step": 9900 }, { "epoch": 2.779618192026951, "grad_norm": 0.4988243579864502, "learning_rate": 1.6350031999630423e-07, "loss": 0.3656, "step": 9901 }, { "epoch": 2.779898933183605, "grad_norm": 0.5599173903465271, "learning_rate": 1.6308630199089238e-07, "loss": 0.3034, "step": 9902 }, { "epoch": 2.7801796743402583, "grad_norm": 0.5435174107551575, "learning_rate": 1.6267280015083098e-07, "loss": 0.298, "step": 9903 }, { "epoch": 2.7804604154969117, "grad_norm": 0.6139850616455078, "learning_rate": 1.6225981452024752e-07, "loss": 0.3715, "step": 9904 }, { "epoch": 2.7807411566535656, "grad_norm": 0.546872615814209, "learning_rate": 1.6184734514321177e-07, "loss": 0.3276, "step": 9905 }, { "epoch": 2.781021897810219, "grad_norm": 0.5780566334724426, "learning_rate": 1.614353920637418e-07, "loss": 0.295, "step": 9906 }, { "epoch": 2.7813026389668725, "grad_norm": 0.539276659488678, "learning_rate": 1.6102395532579695e-07, "loss": 0.3031, "step": 9907 }, { "epoch": 2.781583380123526, "grad_norm": 0.550692617893219, "learning_rate": 1.6061303497328485e-07, "loss": 0.311, "step": 9908 }, { "epoch": 2.78186412128018, "grad_norm": 0.601198673248291, "learning_rate": 1.602026310500554e-07, "loss": 0.3091, "step": 9909 }, { "epoch": 2.7821448624368332, "grad_norm": 0.5254162549972534, "learning_rate": 1.5979274359990415e-07, "loss": 0.3718, "step": 9910 }, { "epoch": 2.7824256035934867, "grad_norm": 0.5281869769096375, "learning_rate": 1.5938337266657167e-07, "loss": 0.3488, "step": 9911 }, { "epoch": 2.7827063447501406, "grad_norm": 0.5763053894042969, "learning_rate": 1.5897451829374465e-07, "loss": 0.3154, "step": 9912 }, { "epoch": 2.782987085906794, "grad_norm": 0.5088951587677002, "learning_rate": 1.5856618052505157e-07, "loss": 0.3292, "step": 9913 }, { "epoch": 2.7832678270634474, "grad_norm": 0.5531498193740845, "learning_rate": 1.5815835940406977e-07, "loss": 0.3289, "step": 9914 }, { "epoch": 2.783548568220101, "grad_norm": 0.5799394249916077, "learning_rate": 1.5775105497431831e-07, "loss": 0.3133, "step": 9915 }, { "epoch": 2.7838293093767548, "grad_norm": 0.5850483775138855, "learning_rate": 1.5734426727926245e-07, "loss": 0.2798, "step": 9916 }, { "epoch": 2.784110050533408, "grad_norm": 0.594656765460968, "learning_rate": 1.5693799636231134e-07, "loss": 0.3612, "step": 9917 }, { "epoch": 2.7843907916900617, "grad_norm": 0.5652356147766113, "learning_rate": 1.5653224226682085e-07, "loss": 0.3407, "step": 9918 }, { "epoch": 2.7846715328467155, "grad_norm": 0.5384714007377625, "learning_rate": 1.561270050360897e-07, "loss": 0.3086, "step": 9919 }, { "epoch": 2.784952274003369, "grad_norm": 0.5557889342308044, "learning_rate": 1.5572228471336214e-07, "loss": 0.3134, "step": 9920 }, { "epoch": 2.7852330151600224, "grad_norm": 0.5542265772819519, "learning_rate": 1.5531808134182813e-07, "loss": 0.3073, "step": 9921 }, { "epoch": 2.785513756316676, "grad_norm": 0.5558714866638184, "learning_rate": 1.549143949646209e-07, "loss": 0.2906, "step": 9922 }, { "epoch": 2.7857944974733297, "grad_norm": 0.5353521704673767, "learning_rate": 1.5451122562482047e-07, "loss": 0.3463, "step": 9923 }, { "epoch": 2.786075238629983, "grad_norm": 0.5866934061050415, "learning_rate": 1.5410857336545015e-07, "loss": 0.3059, "step": 9924 }, { "epoch": 2.7863559797866366, "grad_norm": 0.5464940667152405, "learning_rate": 1.5370643822947784e-07, "loss": 0.3245, "step": 9925 }, { "epoch": 2.7866367209432905, "grad_norm": 0.5502282381057739, "learning_rate": 1.5330482025981753e-07, "loss": 0.3118, "step": 9926 }, { "epoch": 2.786917462099944, "grad_norm": 0.5689045786857605, "learning_rate": 1.5290371949932657e-07, "loss": 0.3105, "step": 9927 }, { "epoch": 2.7871982032565974, "grad_norm": 0.5704795718193054, "learning_rate": 1.5250313599080913e-07, "loss": 0.2706, "step": 9928 }, { "epoch": 2.787478944413251, "grad_norm": 0.5281431674957275, "learning_rate": 1.5210306977701205e-07, "loss": 0.3107, "step": 9929 }, { "epoch": 2.7877596855699043, "grad_norm": 0.551979124546051, "learning_rate": 1.51703520900629e-07, "loss": 0.2933, "step": 9930 }, { "epoch": 2.788040426726558, "grad_norm": 0.6004064083099365, "learning_rate": 1.5130448940429644e-07, "loss": 0.3244, "step": 9931 }, { "epoch": 2.7883211678832116, "grad_norm": 0.5763236284255981, "learning_rate": 1.509059753305958e-07, "loss": 0.3288, "step": 9932 }, { "epoch": 2.7886019090398655, "grad_norm": 0.5568984746932983, "learning_rate": 1.5050797872205592e-07, "loss": 0.3074, "step": 9933 }, { "epoch": 2.788882650196519, "grad_norm": 0.5835351347923279, "learning_rate": 1.501104996211472e-07, "loss": 0.2962, "step": 9934 }, { "epoch": 2.7891633913531724, "grad_norm": 0.5246619582176208, "learning_rate": 1.4971353807028688e-07, "loss": 0.2898, "step": 9935 }, { "epoch": 2.789444132509826, "grad_norm": 0.5604928731918335, "learning_rate": 1.4931709411183493e-07, "loss": 0.3153, "step": 9936 }, { "epoch": 2.7897248736664793, "grad_norm": 0.48893657326698303, "learning_rate": 1.4892116778809863e-07, "loss": 0.3509, "step": 9937 }, { "epoch": 2.790005614823133, "grad_norm": 0.5521307587623596, "learning_rate": 1.485257591413286e-07, "loss": 0.3234, "step": 9938 }, { "epoch": 2.7902863559797866, "grad_norm": 0.5598009824752808, "learning_rate": 1.4813086821372003e-07, "loss": 0.3583, "step": 9939 }, { "epoch": 2.7905670971364405, "grad_norm": 0.5937865376472473, "learning_rate": 1.4773649504741417e-07, "loss": 0.3402, "step": 9940 }, { "epoch": 2.790847838293094, "grad_norm": 0.6012153029441833, "learning_rate": 1.4734263968449515e-07, "loss": 0.3428, "step": 9941 }, { "epoch": 2.7911285794497473, "grad_norm": 0.5403911471366882, "learning_rate": 1.469493021669921e-07, "loss": 0.3261, "step": 9942 }, { "epoch": 2.791409320606401, "grad_norm": 0.505204439163208, "learning_rate": 1.4655648253688094e-07, "loss": 0.2905, "step": 9943 }, { "epoch": 2.7916900617630542, "grad_norm": 0.5365025401115417, "learning_rate": 1.461641808360803e-07, "loss": 0.3105, "step": 9944 }, { "epoch": 2.791970802919708, "grad_norm": 0.5731824636459351, "learning_rate": 1.4577239710645452e-07, "loss": 0.3012, "step": 9945 }, { "epoch": 2.7922515440763616, "grad_norm": 0.5721343159675598, "learning_rate": 1.453811313898118e-07, "loss": 0.3647, "step": 9946 }, { "epoch": 2.792532285233015, "grad_norm": 0.5556032657623291, "learning_rate": 1.4499038372790596e-07, "loss": 0.3482, "step": 9947 }, { "epoch": 2.792813026389669, "grad_norm": 0.5744494795799255, "learning_rate": 1.4460015416243534e-07, "loss": 0.3327, "step": 9948 }, { "epoch": 2.7930937675463223, "grad_norm": 0.564821183681488, "learning_rate": 1.442104427350416e-07, "loss": 0.3467, "step": 9949 }, { "epoch": 2.7933745087029758, "grad_norm": 0.5284962058067322, "learning_rate": 1.4382124948731423e-07, "loss": 0.3323, "step": 9950 }, { "epoch": 2.793655249859629, "grad_norm": 0.5517900586128235, "learning_rate": 1.4343257446078397e-07, "loss": 0.3577, "step": 9951 }, { "epoch": 2.793935991016283, "grad_norm": 0.5161297917366028, "learning_rate": 1.4304441769692867e-07, "loss": 0.3377, "step": 9952 }, { "epoch": 2.7942167321729365, "grad_norm": 0.5578047633171082, "learning_rate": 1.426567792371697e-07, "loss": 0.2868, "step": 9953 }, { "epoch": 2.79449747332959, "grad_norm": 0.5168033838272095, "learning_rate": 1.4226965912287282e-07, "loss": 0.3105, "step": 9954 }, { "epoch": 2.794778214486244, "grad_norm": 0.5163038969039917, "learning_rate": 1.4188305739535059e-07, "loss": 0.3419, "step": 9955 }, { "epoch": 2.7950589556428973, "grad_norm": 0.5863468050956726, "learning_rate": 1.4149697409585715e-07, "loss": 0.3107, "step": 9956 }, { "epoch": 2.7953396967995507, "grad_norm": 0.5861634612083435, "learning_rate": 1.411114092655941e-07, "loss": 0.279, "step": 9957 }, { "epoch": 2.795620437956204, "grad_norm": 0.617355465888977, "learning_rate": 1.4072636294570617e-07, "loss": 0.3253, "step": 9958 }, { "epoch": 2.795901179112858, "grad_norm": 0.5366426706314087, "learning_rate": 1.4034183517728229e-07, "loss": 0.3507, "step": 9959 }, { "epoch": 2.7961819202695115, "grad_norm": 0.556643009185791, "learning_rate": 1.3995782600135787e-07, "loss": 0.3296, "step": 9960 }, { "epoch": 2.796462661426165, "grad_norm": 0.5481691956520081, "learning_rate": 1.395743354589113e-07, "loss": 0.3096, "step": 9961 }, { "epoch": 2.796743402582819, "grad_norm": 0.5889233350753784, "learning_rate": 1.3919136359086703e-07, "loss": 0.3071, "step": 9962 }, { "epoch": 2.7970241437394723, "grad_norm": 0.465064138174057, "learning_rate": 1.3880891043809296e-07, "loss": 0.2922, "step": 9963 }, { "epoch": 2.7973048848961257, "grad_norm": 0.5425115823745728, "learning_rate": 1.3842697604140198e-07, "loss": 0.3441, "step": 9964 }, { "epoch": 2.797585626052779, "grad_norm": 0.5827770233154297, "learning_rate": 1.380455604415526e-07, "loss": 0.309, "step": 9965 }, { "epoch": 2.797866367209433, "grad_norm": 0.6112386584281921, "learning_rate": 1.3766466367924557e-07, "loss": 0.3407, "step": 9966 }, { "epoch": 2.7981471083660865, "grad_norm": 0.5571935176849365, "learning_rate": 1.3728428579513008e-07, "loss": 0.3261, "step": 9967 }, { "epoch": 2.79842784952274, "grad_norm": 0.5710159540176392, "learning_rate": 1.3690442682979588e-07, "loss": 0.3388, "step": 9968 }, { "epoch": 2.798708590679394, "grad_norm": 0.5367956161499023, "learning_rate": 1.3652508682377886e-07, "loss": 0.3387, "step": 9969 }, { "epoch": 2.7989893318360473, "grad_norm": 0.5501680970191956, "learning_rate": 1.3614626581756164e-07, "loss": 0.2838, "step": 9970 }, { "epoch": 2.7992700729927007, "grad_norm": 0.5452075600624084, "learning_rate": 1.35767963851568e-07, "loss": 0.3137, "step": 9971 }, { "epoch": 2.799550814149354, "grad_norm": 0.5247623920440674, "learning_rate": 1.3539018096616897e-07, "loss": 0.3196, "step": 9972 }, { "epoch": 2.799831555306008, "grad_norm": 0.5177823901176453, "learning_rate": 1.3501291720167898e-07, "loss": 0.3287, "step": 9973 }, { "epoch": 2.8001122964626615, "grad_norm": 0.5358867645263672, "learning_rate": 1.3463617259835639e-07, "loss": 0.3494, "step": 9974 }, { "epoch": 2.800393037619315, "grad_norm": 0.5144773721694946, "learning_rate": 1.3425994719640622e-07, "loss": 0.3313, "step": 9975 }, { "epoch": 2.800673778775969, "grad_norm": 0.5704407691955566, "learning_rate": 1.3388424103597586e-07, "loss": 0.3092, "step": 9976 }, { "epoch": 2.8009545199326222, "grad_norm": 0.5049776434898376, "learning_rate": 1.3350905415715986e-07, "loss": 0.3522, "step": 9977 }, { "epoch": 2.8012352610892757, "grad_norm": 0.5009602308273315, "learning_rate": 1.33134386599994e-07, "loss": 0.361, "step": 9978 }, { "epoch": 2.801516002245929, "grad_norm": 0.5368480086326599, "learning_rate": 1.3276023840446183e-07, "loss": 0.3301, "step": 9979 }, { "epoch": 2.8017967434025826, "grad_norm": 0.5960638523101807, "learning_rate": 1.3238660961049033e-07, "loss": 0.3122, "step": 9980 }, { "epoch": 2.8020774845592364, "grad_norm": 0.4748612344264984, "learning_rate": 1.3201350025794924e-07, "loss": 0.3104, "step": 9981 }, { "epoch": 2.80235822571589, "grad_norm": 0.5358529686927795, "learning_rate": 1.316409103866556e-07, "loss": 0.324, "step": 9982 }, { "epoch": 2.8026389668725438, "grad_norm": 0.6039599776268005, "learning_rate": 1.312688400363693e-07, "loss": 0.3326, "step": 9983 }, { "epoch": 2.802919708029197, "grad_norm": 0.5407767295837402, "learning_rate": 1.3089728924679634e-07, "loss": 0.3457, "step": 9984 }, { "epoch": 2.8032004491858507, "grad_norm": 0.5642867088317871, "learning_rate": 1.3052625805758556e-07, "loss": 0.3, "step": 9985 }, { "epoch": 2.803481190342504, "grad_norm": 0.533231258392334, "learning_rate": 1.3015574650833141e-07, "loss": 0.3457, "step": 9986 }, { "epoch": 2.8037619314991575, "grad_norm": 0.5869655609130859, "learning_rate": 1.2978575463857168e-07, "loss": 0.328, "step": 9987 }, { "epoch": 2.8040426726558114, "grad_norm": 0.5910897254943848, "learning_rate": 1.294162824877909e-07, "loss": 0.3339, "step": 9988 }, { "epoch": 2.804323413812465, "grad_norm": 0.573411226272583, "learning_rate": 1.2904733009541647e-07, "loss": 0.2916, "step": 9989 }, { "epoch": 2.8046041549691187, "grad_norm": 0.5155426263809204, "learning_rate": 1.2867889750082018e-07, "loss": 0.357, "step": 9990 }, { "epoch": 2.804884896125772, "grad_norm": 0.5490040183067322, "learning_rate": 1.283109847433195e-07, "loss": 0.3393, "step": 9991 }, { "epoch": 2.8051656372824256, "grad_norm": 0.5404068827629089, "learning_rate": 1.279435918621752e-07, "loss": 0.3893, "step": 9992 }, { "epoch": 2.805446378439079, "grad_norm": 0.5030194520950317, "learning_rate": 1.2757671889659373e-07, "loss": 0.3112, "step": 9993 }, { "epoch": 2.8057271195957325, "grad_norm": 0.5610947608947754, "learning_rate": 1.272103658857249e-07, "loss": 0.3198, "step": 9994 }, { "epoch": 2.8060078607523864, "grad_norm": 0.5328671336174011, "learning_rate": 1.268445328686646e-07, "loss": 0.3142, "step": 9995 }, { "epoch": 2.80628860190904, "grad_norm": 0.5556824207305908, "learning_rate": 1.264792198844511e-07, "loss": 0.3467, "step": 9996 }, { "epoch": 2.8065693430656933, "grad_norm": 0.5286175608634949, "learning_rate": 1.2611442697206877e-07, "loss": 0.3342, "step": 9997 }, { "epoch": 2.806850084222347, "grad_norm": 0.5325221419334412, "learning_rate": 1.257501541704459e-07, "loss": 0.3327, "step": 9998 }, { "epoch": 2.8071308253790006, "grad_norm": 0.5068479776382446, "learning_rate": 1.2538640151845638e-07, "loss": 0.3201, "step": 9999 }, { "epoch": 2.807411566535654, "grad_norm": 0.5140798687934875, "learning_rate": 1.2502316905491584e-07, "loss": 0.3031, "step": 10000 }, { "epoch": 2.8076923076923075, "grad_norm": 0.4957268238067627, "learning_rate": 1.246604568185883e-07, "loss": 0.2899, "step": 10001 }, { "epoch": 2.8079730488489614, "grad_norm": 0.5943296551704407, "learning_rate": 1.2429826484817887e-07, "loss": 0.3311, "step": 10002 }, { "epoch": 2.808253790005615, "grad_norm": 0.5997329354286194, "learning_rate": 1.2393659318233775e-07, "loss": 0.3807, "step": 10003 }, { "epoch": 2.8085345311622683, "grad_norm": 0.553098738193512, "learning_rate": 1.2357544185966187e-07, "loss": 0.3146, "step": 10004 }, { "epoch": 2.808815272318922, "grad_norm": 0.5032588839530945, "learning_rate": 1.2321481091869035e-07, "loss": 0.3575, "step": 10005 }, { "epoch": 2.8090960134755756, "grad_norm": 0.5524049401283264, "learning_rate": 1.2285470039790749e-07, "loss": 0.3072, "step": 10006 }, { "epoch": 2.809376754632229, "grad_norm": 0.5532103180885315, "learning_rate": 1.224951103357419e-07, "loss": 0.3417, "step": 10007 }, { "epoch": 2.8096574957888825, "grad_norm": 0.504477322101593, "learning_rate": 1.2213604077056685e-07, "loss": 0.3161, "step": 10008 }, { "epoch": 2.8099382369455363, "grad_norm": 0.5959299802780151, "learning_rate": 1.2177749174070053e-07, "loss": 0.2957, "step": 10009 }, { "epoch": 2.81021897810219, "grad_norm": 0.565331757068634, "learning_rate": 1.21419463284404e-07, "loss": 0.3706, "step": 10010 }, { "epoch": 2.8104997192588432, "grad_norm": 0.5657942295074463, "learning_rate": 1.2106195543988454e-07, "loss": 0.3318, "step": 10011 }, { "epoch": 2.810780460415497, "grad_norm": 0.5270640254020691, "learning_rate": 1.207049682452932e-07, "loss": 0.34, "step": 10012 }, { "epoch": 2.8110612015721506, "grad_norm": 0.5368057489395142, "learning_rate": 1.2034850173872515e-07, "loss": 0.3541, "step": 10013 }, { "epoch": 2.811341942728804, "grad_norm": 0.5489338636398315, "learning_rate": 1.19992555958221e-07, "loss": 0.3014, "step": 10014 }, { "epoch": 2.8116226838854574, "grad_norm": 0.5464556217193604, "learning_rate": 1.1963713094176376e-07, "loss": 0.3026, "step": 10015 }, { "epoch": 2.8119034250421113, "grad_norm": 0.5182561278343201, "learning_rate": 1.1928222672728363e-07, "loss": 0.3132, "step": 10016 }, { "epoch": 2.8121841661987648, "grad_norm": 0.5293217897415161, "learning_rate": 1.1892784335265307e-07, "loss": 0.3176, "step": 10017 }, { "epoch": 2.812464907355418, "grad_norm": 0.5431877970695496, "learning_rate": 1.1857398085568905e-07, "loss": 0.3152, "step": 10018 }, { "epoch": 2.812745648512072, "grad_norm": 0.5799314379692078, "learning_rate": 1.1822063927415527e-07, "loss": 0.2787, "step": 10019 }, { "epoch": 2.8130263896687255, "grad_norm": 0.500160276889801, "learning_rate": 1.1786781864575602e-07, "loss": 0.2854, "step": 10020 }, { "epoch": 2.813307130825379, "grad_norm": 0.5474427938461304, "learning_rate": 1.1751551900814395e-07, "loss": 0.2829, "step": 10021 }, { "epoch": 2.8135878719820324, "grad_norm": 0.5582706332206726, "learning_rate": 1.1716374039891288e-07, "loss": 0.3442, "step": 10022 }, { "epoch": 2.813868613138686, "grad_norm": 0.5029194951057434, "learning_rate": 1.1681248285560332e-07, "loss": 0.3481, "step": 10023 }, { "epoch": 2.8141493542953397, "grad_norm": 0.5261446237564087, "learning_rate": 1.1646174641569919e-07, "loss": 0.3322, "step": 10024 }, { "epoch": 2.814430095451993, "grad_norm": 0.5505539774894714, "learning_rate": 1.1611153111662832e-07, "loss": 0.3508, "step": 10025 }, { "epoch": 2.814710836608647, "grad_norm": 0.5470914244651794, "learning_rate": 1.1576183699576471e-07, "loss": 0.3486, "step": 10026 }, { "epoch": 2.8149915777653005, "grad_norm": 0.5789533853530884, "learning_rate": 1.1541266409042406e-07, "loss": 0.3233, "step": 10027 }, { "epoch": 2.815272318921954, "grad_norm": 0.5578635334968567, "learning_rate": 1.1506401243786935e-07, "loss": 0.3194, "step": 10028 }, { "epoch": 2.8155530600786074, "grad_norm": 0.522255003452301, "learning_rate": 1.1471588207530527e-07, "loss": 0.3624, "step": 10029 }, { "epoch": 2.815833801235261, "grad_norm": 0.5245084166526794, "learning_rate": 1.1436827303988263e-07, "loss": 0.3684, "step": 10030 }, { "epoch": 2.8161145423919147, "grad_norm": 0.5508833527565002, "learning_rate": 1.1402118536869677e-07, "loss": 0.3311, "step": 10031 }, { "epoch": 2.816395283548568, "grad_norm": 0.6131346225738525, "learning_rate": 1.136746190987853e-07, "loss": 0.3566, "step": 10032 }, { "epoch": 2.816676024705222, "grad_norm": 0.581041693687439, "learning_rate": 1.1332857426713307e-07, "loss": 0.3211, "step": 10033 }, { "epoch": 2.8169567658618755, "grad_norm": 0.616783082485199, "learning_rate": 1.1298305091066664e-07, "loss": 0.2943, "step": 10034 }, { "epoch": 2.817237507018529, "grad_norm": 0.5316537618637085, "learning_rate": 1.1263804906625931e-07, "loss": 0.3107, "step": 10035 }, { "epoch": 2.8175182481751824, "grad_norm": 0.5256446599960327, "learning_rate": 1.1229356877072662e-07, "loss": 0.2897, "step": 10036 }, { "epoch": 2.817798989331836, "grad_norm": 0.6676996350288391, "learning_rate": 1.1194961006082972e-07, "loss": 0.314, "step": 10037 }, { "epoch": 2.8180797304884897, "grad_norm": 0.5410981178283691, "learning_rate": 1.1160617297327314e-07, "loss": 0.3493, "step": 10038 }, { "epoch": 2.818360471645143, "grad_norm": 0.5171387195587158, "learning_rate": 1.1126325754470701e-07, "loss": 0.3107, "step": 10039 }, { "epoch": 2.8186412128017966, "grad_norm": 0.5379522442817688, "learning_rate": 1.109208638117254e-07, "loss": 0.2837, "step": 10040 }, { "epoch": 2.8189219539584505, "grad_norm": 0.5606611371040344, "learning_rate": 1.1057899181086573e-07, "loss": 0.298, "step": 10041 }, { "epoch": 2.819202695115104, "grad_norm": 0.5114135146141052, "learning_rate": 1.102376415786105e-07, "loss": 0.3479, "step": 10042 }, { "epoch": 2.8194834362717573, "grad_norm": 0.5463590025901794, "learning_rate": 1.0989681315138778e-07, "loss": 0.3068, "step": 10043 }, { "epoch": 2.819764177428411, "grad_norm": 0.549858570098877, "learning_rate": 1.0955650656556682e-07, "loss": 0.3423, "step": 10044 }, { "epoch": 2.8200449185850647, "grad_norm": 0.5742153525352478, "learning_rate": 1.0921672185746357e-07, "loss": 0.3147, "step": 10045 }, { "epoch": 2.820325659741718, "grad_norm": 0.5294710397720337, "learning_rate": 1.0887745906333846e-07, "loss": 0.363, "step": 10046 }, { "epoch": 2.8206064008983716, "grad_norm": 0.5256567001342773, "learning_rate": 1.0853871821939477e-07, "loss": 0.3615, "step": 10047 }, { "epoch": 2.8208871420550254, "grad_norm": 0.49465903639793396, "learning_rate": 1.0820049936178134e-07, "loss": 0.3489, "step": 10048 }, { "epoch": 2.821167883211679, "grad_norm": 0.5517958998680115, "learning_rate": 1.0786280252659043e-07, "loss": 0.3118, "step": 10049 }, { "epoch": 2.8214486243683323, "grad_norm": 0.6105058193206787, "learning_rate": 1.075256277498593e-07, "loss": 0.3198, "step": 10050 }, { "epoch": 2.8217293655249858, "grad_norm": 0.5564125776290894, "learning_rate": 1.0718897506756865e-07, "loss": 0.3476, "step": 10051 }, { "epoch": 2.8220101066816397, "grad_norm": 0.5239488482475281, "learning_rate": 1.0685284451564415e-07, "loss": 0.3679, "step": 10052 }, { "epoch": 2.822290847838293, "grad_norm": 0.52907395362854, "learning_rate": 1.0651723612995546e-07, "loss": 0.3131, "step": 10053 }, { "epoch": 2.8225715889949465, "grad_norm": 0.5235008597373962, "learning_rate": 1.061821499463167e-07, "loss": 0.2992, "step": 10054 }, { "epoch": 2.8228523301516004, "grad_norm": 0.6019278764724731, "learning_rate": 1.058475860004865e-07, "loss": 0.3487, "step": 10055 }, { "epoch": 2.823133071308254, "grad_norm": 0.5207324624061584, "learning_rate": 1.055135443281674e-07, "loss": 0.3389, "step": 10056 }, { "epoch": 2.8234138124649073, "grad_norm": 0.6004019379615784, "learning_rate": 1.051800249650059e-07, "loss": 0.342, "step": 10057 }, { "epoch": 2.8236945536215607, "grad_norm": 0.5514563322067261, "learning_rate": 1.0484702794659352e-07, "loss": 0.3262, "step": 10058 }, { "epoch": 2.8239752947782146, "grad_norm": 0.5341305732727051, "learning_rate": 1.0451455330846461e-07, "loss": 0.3119, "step": 10059 }, { "epoch": 2.824256035934868, "grad_norm": 0.5418851375579834, "learning_rate": 1.0418260108610023e-07, "loss": 0.3104, "step": 10060 }, { "epoch": 2.8245367770915215, "grad_norm": 0.5163307785987854, "learning_rate": 1.0385117131492318e-07, "loss": 0.2921, "step": 10061 }, { "epoch": 2.8248175182481754, "grad_norm": 0.6473627090454102, "learning_rate": 1.0352026403030235e-07, "loss": 0.3064, "step": 10062 }, { "epoch": 2.825098259404829, "grad_norm": 0.47515952587127686, "learning_rate": 1.0318987926754897e-07, "loss": 0.333, "step": 10063 }, { "epoch": 2.8253790005614823, "grad_norm": 0.5817738175392151, "learning_rate": 1.0286001706192095e-07, "loss": 0.2753, "step": 10064 }, { "epoch": 2.8256597417181357, "grad_norm": 0.5453835725784302, "learning_rate": 1.02530677448619e-07, "loss": 0.3077, "step": 10065 }, { "epoch": 2.8259404828747896, "grad_norm": 0.5364516973495483, "learning_rate": 1.0220186046278724e-07, "loss": 0.3615, "step": 10066 }, { "epoch": 2.826221224031443, "grad_norm": 0.5471426248550415, "learning_rate": 1.0187356613951538e-07, "loss": 0.3171, "step": 10067 }, { "epoch": 2.8265019651880965, "grad_norm": 0.5084347724914551, "learning_rate": 1.0154579451383817e-07, "loss": 0.2998, "step": 10068 }, { "epoch": 2.8267827063447504, "grad_norm": 0.4818313717842102, "learning_rate": 1.0121854562073097e-07, "loss": 0.311, "step": 10069 }, { "epoch": 2.827063447501404, "grad_norm": 0.537496030330658, "learning_rate": 1.0089181949511805e-07, "loss": 0.3544, "step": 10070 }, { "epoch": 2.8273441886580573, "grad_norm": 0.5626868009567261, "learning_rate": 1.0056561617186378e-07, "loss": 0.3284, "step": 10071 }, { "epoch": 2.8276249298147107, "grad_norm": 0.5381502509117126, "learning_rate": 1.0023993568578027e-07, "loss": 0.3354, "step": 10072 }, { "epoch": 2.827905670971364, "grad_norm": 0.5141575932502747, "learning_rate": 9.991477807162086e-08, "loss": 0.3409, "step": 10073 }, { "epoch": 2.828186412128018, "grad_norm": 0.5125014185905457, "learning_rate": 9.959014336408446e-08, "loss": 0.3468, "step": 10074 }, { "epoch": 2.8284671532846715, "grad_norm": 0.5192347168922424, "learning_rate": 9.926603159781444e-08, "loss": 0.3509, "step": 10075 }, { "epoch": 2.8287478944413254, "grad_norm": 0.5120210647583008, "learning_rate": 9.894244280739817e-08, "loss": 0.3216, "step": 10076 }, { "epoch": 2.829028635597979, "grad_norm": 0.5238515138626099, "learning_rate": 9.861937702736635e-08, "loss": 0.2914, "step": 10077 }, { "epoch": 2.8293093767546322, "grad_norm": 0.5473514199256897, "learning_rate": 9.829683429219528e-08, "loss": 0.3048, "step": 10078 }, { "epoch": 2.8295901179112857, "grad_norm": 0.5520949363708496, "learning_rate": 9.79748146363041e-08, "loss": 0.3543, "step": 10079 }, { "epoch": 2.829870859067939, "grad_norm": 0.5224297642707825, "learning_rate": 9.765331809405754e-08, "loss": 0.2951, "step": 10080 }, { "epoch": 2.830151600224593, "grad_norm": 0.5464485883712769, "learning_rate": 9.73323446997626e-08, "loss": 0.3872, "step": 10081 }, { "epoch": 2.8304323413812464, "grad_norm": 0.5790408849716187, "learning_rate": 9.701189448767245e-08, "loss": 0.2835, "step": 10082 }, { "epoch": 2.8307130825379003, "grad_norm": 0.5932109951972961, "learning_rate": 9.669196749198251e-08, "loss": 0.2984, "step": 10083 }, { "epoch": 2.8309938236945538, "grad_norm": 0.5314530730247498, "learning_rate": 9.637256374683546e-08, "loss": 0.3159, "step": 10084 }, { "epoch": 2.831274564851207, "grad_norm": 0.5765442252159119, "learning_rate": 9.605368328631403e-08, "loss": 0.3292, "step": 10085 }, { "epoch": 2.8315553060078607, "grad_norm": 0.5692158937454224, "learning_rate": 9.57353261444477e-08, "loss": 0.3028, "step": 10086 }, { "epoch": 2.831836047164514, "grad_norm": 0.5491392612457275, "learning_rate": 9.541749235521036e-08, "loss": 0.3378, "step": 10087 }, { "epoch": 2.832116788321168, "grad_norm": 0.5618224740028381, "learning_rate": 9.510018195251769e-08, "loss": 0.2745, "step": 10088 }, { "epoch": 2.8323975294778214, "grad_norm": 0.5175615549087524, "learning_rate": 9.478339497023259e-08, "loss": 0.3758, "step": 10089 }, { "epoch": 2.832678270634475, "grad_norm": 0.5344045758247375, "learning_rate": 9.446713144216024e-08, "loss": 0.3304, "step": 10090 }, { "epoch": 2.8329590117911287, "grad_norm": 0.5420215725898743, "learning_rate": 9.415139140204977e-08, "loss": 0.3353, "step": 10091 }, { "epoch": 2.833239752947782, "grad_norm": 0.5115945339202881, "learning_rate": 9.383617488359587e-08, "loss": 0.3446, "step": 10092 }, { "epoch": 2.8335204941044356, "grad_norm": 0.5203591585159302, "learning_rate": 9.352148192043553e-08, "loss": 0.3056, "step": 10093 }, { "epoch": 2.833801235261089, "grad_norm": 0.48789289593696594, "learning_rate": 9.320731254615134e-08, "loss": 0.3755, "step": 10094 }, { "epoch": 2.834081976417743, "grad_norm": 0.6028507351875305, "learning_rate": 9.289366679426926e-08, "loss": 0.3039, "step": 10095 }, { "epoch": 2.8343627175743964, "grad_norm": 0.5670675039291382, "learning_rate": 9.258054469825972e-08, "loss": 0.343, "step": 10096 }, { "epoch": 2.83464345873105, "grad_norm": 0.566779375076294, "learning_rate": 9.226794629153768e-08, "loss": 0.3404, "step": 10097 }, { "epoch": 2.8349241998877037, "grad_norm": 0.5995916128158569, "learning_rate": 9.195587160746089e-08, "loss": 0.3581, "step": 10098 }, { "epoch": 2.835204941044357, "grad_norm": 0.5468376278877258, "learning_rate": 9.164432067933271e-08, "loss": 0.3174, "step": 10099 }, { "epoch": 2.8354856822010106, "grad_norm": 0.5315907597541809, "learning_rate": 9.133329354039988e-08, "loss": 0.3299, "step": 10100 }, { "epoch": 2.835766423357664, "grad_norm": 0.5144544243812561, "learning_rate": 9.102279022385196e-08, "loss": 0.3711, "step": 10101 }, { "epoch": 2.836047164514318, "grad_norm": 0.5138227343559265, "learning_rate": 9.071281076282579e-08, "loss": 0.307, "step": 10102 }, { "epoch": 2.8363279056709714, "grad_norm": 0.5745804309844971, "learning_rate": 9.040335519039933e-08, "loss": 0.3517, "step": 10103 }, { "epoch": 2.836608646827625, "grad_norm": 0.6126704216003418, "learning_rate": 9.009442353959618e-08, "loss": 0.332, "step": 10104 }, { "epoch": 2.8368893879842787, "grad_norm": 0.6069706678390503, "learning_rate": 8.978601584338332e-08, "loss": 0.332, "step": 10105 }, { "epoch": 2.837170129140932, "grad_norm": 0.5251445174217224, "learning_rate": 8.947813213467216e-08, "loss": 0.3303, "step": 10106 }, { "epoch": 2.8374508702975856, "grad_norm": 0.5183876752853394, "learning_rate": 8.917077244631812e-08, "loss": 0.3543, "step": 10107 }, { "epoch": 2.837731611454239, "grad_norm": 0.5323402285575867, "learning_rate": 8.886393681112104e-08, "loss": 0.2954, "step": 10108 }, { "epoch": 2.838012352610893, "grad_norm": 0.5713376402854919, "learning_rate": 8.85576252618242e-08, "loss": 0.3086, "step": 10109 }, { "epoch": 2.8382930937675463, "grad_norm": 0.5591835975646973, "learning_rate": 8.825183783111535e-08, "loss": 0.3298, "step": 10110 }, { "epoch": 2.8385738349242, "grad_norm": 0.5651745796203613, "learning_rate": 8.794657455162615e-08, "loss": 0.3382, "step": 10111 }, { "epoch": 2.8388545760808537, "grad_norm": 0.5637382864952087, "learning_rate": 8.764183545593275e-08, "loss": 0.32, "step": 10112 }, { "epoch": 2.839135317237507, "grad_norm": 0.5656781792640686, "learning_rate": 8.73376205765547e-08, "loss": 0.3162, "step": 10113 }, { "epoch": 2.8394160583941606, "grad_norm": 0.48276302218437195, "learning_rate": 8.70339299459555e-08, "loss": 0.3243, "step": 10114 }, { "epoch": 2.839696799550814, "grad_norm": 0.6217085719108582, "learning_rate": 8.673076359654364e-08, "loss": 0.3169, "step": 10115 }, { "epoch": 2.8399775407074674, "grad_norm": 0.5485671162605286, "learning_rate": 8.642812156067104e-08, "loss": 0.3507, "step": 10116 }, { "epoch": 2.8402582818641213, "grad_norm": 0.6354616284370422, "learning_rate": 8.612600387063463e-08, "loss": 0.3233, "step": 10117 }, { "epoch": 2.8405390230207748, "grad_norm": 0.5699709057807922, "learning_rate": 8.582441055867308e-08, "loss": 0.3316, "step": 10118 }, { "epoch": 2.8408197641774287, "grad_norm": 0.5277790427207947, "learning_rate": 8.552334165697118e-08, "loss": 0.3452, "step": 10119 }, { "epoch": 2.841100505334082, "grad_norm": 0.5202212333679199, "learning_rate": 8.522279719765714e-08, "loss": 0.3006, "step": 10120 }, { "epoch": 2.8413812464907355, "grad_norm": 0.5469048619270325, "learning_rate": 8.492277721280362e-08, "loss": 0.373, "step": 10121 }, { "epoch": 2.841661987647389, "grad_norm": 0.5882004499435425, "learning_rate": 8.462328173442613e-08, "loss": 0.2982, "step": 10122 }, { "epoch": 2.8419427288040424, "grad_norm": 0.4816424250602722, "learning_rate": 8.432431079448521e-08, "loss": 0.3833, "step": 10123 }, { "epoch": 2.8422234699606963, "grad_norm": 0.5815948247909546, "learning_rate": 8.402586442488536e-08, "loss": 0.3541, "step": 10124 }, { "epoch": 2.8425042111173497, "grad_norm": 0.5320097208023071, "learning_rate": 8.372794265747498e-08, "loss": 0.3211, "step": 10125 }, { "epoch": 2.8427849522740036, "grad_norm": 0.5010483860969543, "learning_rate": 8.343054552404639e-08, "loss": 0.2919, "step": 10126 }, { "epoch": 2.843065693430657, "grad_norm": 0.5685129165649414, "learning_rate": 8.313367305633591e-08, "loss": 0.3144, "step": 10127 }, { "epoch": 2.8433464345873105, "grad_norm": 0.5361132621765137, "learning_rate": 8.283732528602318e-08, "loss": 0.3237, "step": 10128 }, { "epoch": 2.843627175743964, "grad_norm": 0.5101833343505859, "learning_rate": 8.2541502244734e-08, "loss": 0.3141, "step": 10129 }, { "epoch": 2.8439079169006174, "grad_norm": 0.5523453950881958, "learning_rate": 8.224620396403537e-08, "loss": 0.3032, "step": 10130 }, { "epoch": 2.8441886580572713, "grad_norm": 0.6426838040351868, "learning_rate": 8.195143047544096e-08, "loss": 0.2995, "step": 10131 }, { "epoch": 2.8444693992139247, "grad_norm": 0.514690637588501, "learning_rate": 8.165718181040617e-08, "loss": 0.3156, "step": 10132 }, { "epoch": 2.844750140370578, "grad_norm": 0.532554566860199, "learning_rate": 8.136345800033196e-08, "loss": 0.2773, "step": 10133 }, { "epoch": 2.845030881527232, "grad_norm": 0.4946853518486023, "learning_rate": 8.107025907656274e-08, "loss": 0.3494, "step": 10134 }, { "epoch": 2.8453116226838855, "grad_norm": 0.55220627784729, "learning_rate": 8.077758507038624e-08, "loss": 0.3246, "step": 10135 }, { "epoch": 2.845592363840539, "grad_norm": 0.5223219394683838, "learning_rate": 8.048543601303583e-08, "loss": 0.3113, "step": 10136 }, { "epoch": 2.8458731049971924, "grad_norm": 0.5246886610984802, "learning_rate": 8.019381193568654e-08, "loss": 0.3525, "step": 10137 }, { "epoch": 2.8461538461538463, "grad_norm": 0.5363090634346008, "learning_rate": 7.990271286945961e-08, "loss": 0.3618, "step": 10138 }, { "epoch": 2.8464345873104997, "grad_norm": 0.5171090960502625, "learning_rate": 7.961213884541962e-08, "loss": 0.3434, "step": 10139 }, { "epoch": 2.846715328467153, "grad_norm": 0.5945302248001099, "learning_rate": 7.93220898945729e-08, "loss": 0.3501, "step": 10140 }, { "epoch": 2.846996069623807, "grad_norm": 0.4916573762893677, "learning_rate": 7.903256604787468e-08, "loss": 0.3742, "step": 10141 }, { "epoch": 2.8472768107804605, "grad_norm": 0.5513736009597778, "learning_rate": 7.874356733621913e-08, "loss": 0.3375, "step": 10142 }, { "epoch": 2.847557551937114, "grad_norm": 0.545196533203125, "learning_rate": 7.845509379044603e-08, "loss": 0.3572, "step": 10143 }, { "epoch": 2.8478382930937673, "grad_norm": 0.5878639817237854, "learning_rate": 7.816714544134074e-08, "loss": 0.3035, "step": 10144 }, { "epoch": 2.8481190342504212, "grad_norm": 0.5589436888694763, "learning_rate": 7.787972231963092e-08, "loss": 0.3272, "step": 10145 }, { "epoch": 2.8483997754070747, "grad_norm": 0.5432109832763672, "learning_rate": 7.759282445598871e-08, "loss": 0.3198, "step": 10146 }, { "epoch": 2.848680516563728, "grad_norm": 0.6179593205451965, "learning_rate": 7.730645188102904e-08, "loss": 0.2991, "step": 10147 }, { "epoch": 2.848961257720382, "grad_norm": 0.5356544852256775, "learning_rate": 7.702060462531358e-08, "loss": 0.3185, "step": 10148 }, { "epoch": 2.8492419988770354, "grad_norm": 0.5758293271064758, "learning_rate": 7.673528271934516e-08, "loss": 0.307, "step": 10149 }, { "epoch": 2.849522740033689, "grad_norm": 0.5547780990600586, "learning_rate": 7.645048619357054e-08, "loss": 0.3399, "step": 10150 }, { "epoch": 2.8498034811903423, "grad_norm": 0.5531817674636841, "learning_rate": 7.616621507838374e-08, "loss": 0.341, "step": 10151 }, { "epoch": 2.850084222346996, "grad_norm": 0.5724533200263977, "learning_rate": 7.588246940411825e-08, "loss": 0.3, "step": 10152 }, { "epoch": 2.8503649635036497, "grad_norm": 0.5944581031799316, "learning_rate": 7.559924920105488e-08, "loss": 0.3358, "step": 10153 }, { "epoch": 2.850645704660303, "grad_norm": 0.5550400614738464, "learning_rate": 7.531655449941666e-08, "loss": 0.337, "step": 10154 }, { "epoch": 2.850926445816957, "grad_norm": 0.5747030377388, "learning_rate": 7.503438532937169e-08, "loss": 0.344, "step": 10155 }, { "epoch": 2.8512071869736104, "grad_norm": 0.5691621899604797, "learning_rate": 7.475274172103086e-08, "loss": 0.3255, "step": 10156 }, { "epoch": 2.851487928130264, "grad_norm": 0.5421184301376343, "learning_rate": 7.447162370444849e-08, "loss": 0.3737, "step": 10157 }, { "epoch": 2.8517686692869173, "grad_norm": 0.5573785901069641, "learning_rate": 7.419103130962502e-08, "loss": 0.3367, "step": 10158 }, { "epoch": 2.852049410443571, "grad_norm": 0.5164793133735657, "learning_rate": 7.391096456650315e-08, "loss": 0.3171, "step": 10159 }, { "epoch": 2.8523301516002246, "grad_norm": 0.5594348311424255, "learning_rate": 7.36314235049701e-08, "loss": 0.337, "step": 10160 }, { "epoch": 2.852610892756878, "grad_norm": 0.5490220785140991, "learning_rate": 7.335240815485589e-08, "loss": 0.3511, "step": 10161 }, { "epoch": 2.852891633913532, "grad_norm": 0.5237599015235901, "learning_rate": 7.307391854593615e-08, "loss": 0.3314, "step": 10162 }, { "epoch": 2.8531723750701854, "grad_norm": 0.5433495044708252, "learning_rate": 7.279595470792932e-08, "loss": 0.2959, "step": 10163 }, { "epoch": 2.853453116226839, "grad_norm": 0.5113106966018677, "learning_rate": 7.25185166704978e-08, "loss": 0.3614, "step": 10164 }, { "epoch": 2.8537338573834923, "grad_norm": 0.5449342727661133, "learning_rate": 7.22416044632479e-08, "loss": 0.2881, "step": 10165 }, { "epoch": 2.8540145985401457, "grad_norm": 0.5747097134590149, "learning_rate": 7.196521811573098e-08, "loss": 0.3041, "step": 10166 }, { "epoch": 2.8542953396967996, "grad_norm": 0.5181044340133667, "learning_rate": 7.168935765744012e-08, "loss": 0.329, "step": 10167 }, { "epoch": 2.854576080853453, "grad_norm": 0.5626211166381836, "learning_rate": 7.141402311781398e-08, "loss": 0.3659, "step": 10168 }, { "epoch": 2.854856822010107, "grad_norm": 0.5382546782493591, "learning_rate": 7.113921452623462e-08, "loss": 0.3173, "step": 10169 }, { "epoch": 2.8551375631667604, "grad_norm": 0.5320886373519897, "learning_rate": 7.086493191202747e-08, "loss": 0.3004, "step": 10170 }, { "epoch": 2.855418304323414, "grad_norm": 0.6260315179824829, "learning_rate": 7.059117530446303e-08, "loss": 0.3578, "step": 10171 }, { "epoch": 2.8556990454800673, "grad_norm": 0.602780282497406, "learning_rate": 7.031794473275344e-08, "loss": 0.3261, "step": 10172 }, { "epoch": 2.8559797866367207, "grad_norm": 0.6174184083938599, "learning_rate": 7.004524022605764e-08, "loss": 0.3228, "step": 10173 }, { "epoch": 2.8562605277933746, "grad_norm": 0.6240619421005249, "learning_rate": 6.977306181347677e-08, "loss": 0.3005, "step": 10174 }, { "epoch": 2.856541268950028, "grad_norm": 0.5377023816108704, "learning_rate": 6.950140952405538e-08, "loss": 0.3116, "step": 10175 }, { "epoch": 2.856822010106682, "grad_norm": 0.579302966594696, "learning_rate": 6.923028338678306e-08, "loss": 0.3184, "step": 10176 }, { "epoch": 2.8571027512633353, "grad_norm": 0.5167502760887146, "learning_rate": 6.895968343059168e-08, "loss": 0.2948, "step": 10177 }, { "epoch": 2.857383492419989, "grad_norm": 0.625468373298645, "learning_rate": 6.868960968435978e-08, "loss": 0.3118, "step": 10178 }, { "epoch": 2.8576642335766422, "grad_norm": 0.5315325260162354, "learning_rate": 6.8420062176906e-08, "loss": 0.3405, "step": 10179 }, { "epoch": 2.8579449747332957, "grad_norm": 0.4759010970592499, "learning_rate": 6.815104093699621e-08, "loss": 0.3338, "step": 10180 }, { "epoch": 2.8582257158899496, "grad_norm": 0.5294516682624817, "learning_rate": 6.788254599333799e-08, "loss": 0.3237, "step": 10181 }, { "epoch": 2.858506457046603, "grad_norm": 0.540122926235199, "learning_rate": 6.761457737458399e-08, "loss": 0.3345, "step": 10182 }, { "epoch": 2.8587871982032564, "grad_norm": 0.5500849485397339, "learning_rate": 6.734713510932967e-08, "loss": 0.3381, "step": 10183 }, { "epoch": 2.8590679393599103, "grad_norm": 0.5662973523139954, "learning_rate": 6.708021922611496e-08, "loss": 0.3283, "step": 10184 }, { "epoch": 2.8593486805165638, "grad_norm": 0.5160301923751831, "learning_rate": 6.681382975342321e-08, "loss": 0.3076, "step": 10185 }, { "epoch": 2.859629421673217, "grad_norm": 0.5323373675346375, "learning_rate": 6.654796671968222e-08, "loss": 0.3389, "step": 10186 }, { "epoch": 2.8599101628298707, "grad_norm": 0.577246904373169, "learning_rate": 6.62826301532632e-08, "loss": 0.3038, "step": 10187 }, { "epoch": 2.8601909039865245, "grad_norm": 0.5509032011032104, "learning_rate": 6.601782008248126e-08, "loss": 0.3164, "step": 10188 }, { "epoch": 2.860471645143178, "grad_norm": 0.606834352016449, "learning_rate": 6.575353653559491e-08, "loss": 0.2821, "step": 10189 }, { "epoch": 2.8607523862998314, "grad_norm": 0.5302348136901855, "learning_rate": 6.548977954080716e-08, "loss": 0.2887, "step": 10190 }, { "epoch": 2.8610331274564853, "grad_norm": 0.4936233162879944, "learning_rate": 6.522654912626381e-08, "loss": 0.3539, "step": 10191 }, { "epoch": 2.8613138686131387, "grad_norm": 0.5368813872337341, "learning_rate": 6.496384532005684e-08, "loss": 0.3217, "step": 10192 }, { "epoch": 2.861594609769792, "grad_norm": 0.5451339483261108, "learning_rate": 6.470166815021884e-08, "loss": 0.3348, "step": 10193 }, { "epoch": 2.8618753509264456, "grad_norm": 0.49208346009254456, "learning_rate": 6.444001764472852e-08, "loss": 0.3632, "step": 10194 }, { "epoch": 2.8621560920830995, "grad_norm": 0.5209183692932129, "learning_rate": 6.417889383150688e-08, "loss": 0.3281, "step": 10195 }, { "epoch": 2.862436833239753, "grad_norm": 0.5655257105827332, "learning_rate": 6.391829673841998e-08, "loss": 0.3641, "step": 10196 }, { "epoch": 2.8627175743964064, "grad_norm": 0.5721009969711304, "learning_rate": 6.365822639327724e-08, "loss": 0.3308, "step": 10197 }, { "epoch": 2.8629983155530603, "grad_norm": 0.5337321758270264, "learning_rate": 6.339868282383144e-08, "loss": 0.3526, "step": 10198 }, { "epoch": 2.8632790567097137, "grad_norm": 0.5727617144584656, "learning_rate": 6.313966605777932e-08, "loss": 0.3172, "step": 10199 }, { "epoch": 2.863559797866367, "grad_norm": 0.5758257508277893, "learning_rate": 6.288117612276157e-08, "loss": 0.3114, "step": 10200 }, { "epoch": 2.8638405390230206, "grad_norm": 0.5676164627075195, "learning_rate": 6.262321304636277e-08, "loss": 0.3292, "step": 10201 }, { "epoch": 2.8641212801796745, "grad_norm": 0.5245554447174072, "learning_rate": 6.2365776856112e-08, "loss": 0.3388, "step": 10202 }, { "epoch": 2.864402021336328, "grad_norm": 0.5958057045936584, "learning_rate": 6.210886757947954e-08, "loss": 0.2981, "step": 10203 }, { "epoch": 2.8646827624929814, "grad_norm": 0.6062753796577454, "learning_rate": 6.185248524388232e-08, "loss": 0.3074, "step": 10204 }, { "epoch": 2.8649635036496353, "grad_norm": 0.49634841084480286, "learning_rate": 6.159662987667959e-08, "loss": 0.3604, "step": 10205 }, { "epoch": 2.8652442448062887, "grad_norm": 0.5320731401443481, "learning_rate": 6.134130150517447e-08, "loss": 0.3814, "step": 10206 }, { "epoch": 2.865524985962942, "grad_norm": 0.5452108383178711, "learning_rate": 6.10865001566141e-08, "loss": 0.3404, "step": 10207 }, { "epoch": 2.8658057271195956, "grad_norm": 0.5631430149078369, "learning_rate": 6.083222585818949e-08, "loss": 0.327, "step": 10208 }, { "epoch": 2.866086468276249, "grad_norm": 0.5746709108352661, "learning_rate": 6.057847863703503e-08, "loss": 0.304, "step": 10209 }, { "epoch": 2.866367209432903, "grad_norm": 0.5286504030227661, "learning_rate": 6.032525852022964e-08, "loss": 0.3492, "step": 10210 }, { "epoch": 2.8666479505895563, "grad_norm": 0.5584084987640381, "learning_rate": 6.00725655347939e-08, "loss": 0.3283, "step": 10211 }, { "epoch": 2.8669286917462102, "grad_norm": 0.566763162612915, "learning_rate": 5.982039970769515e-08, "loss": 0.3203, "step": 10212 }, { "epoch": 2.8672094329028637, "grad_norm": 0.5794817209243774, "learning_rate": 5.956876106584242e-08, "loss": 0.285, "step": 10213 }, { "epoch": 2.867490174059517, "grad_norm": 0.5237037539482117, "learning_rate": 5.9317649636088656e-08, "loss": 0.3399, "step": 10214 }, { "epoch": 2.8677709152161706, "grad_norm": 0.5269162654876709, "learning_rate": 5.906706544523133e-08, "loss": 0.3061, "step": 10215 }, { "epoch": 2.868051656372824, "grad_norm": 0.6088763475418091, "learning_rate": 5.881700852001127e-08, "loss": 0.3223, "step": 10216 }, { "epoch": 2.868332397529478, "grad_norm": 0.5291255712509155, "learning_rate": 5.856747888711267e-08, "loss": 0.3254, "step": 10217 }, { "epoch": 2.8686131386861313, "grad_norm": 0.5879034996032715, "learning_rate": 5.831847657316425e-08, "loss": 0.3235, "step": 10218 }, { "epoch": 2.868893879842785, "grad_norm": 0.5519848465919495, "learning_rate": 5.8070001604737525e-08, "loss": 0.329, "step": 10219 }, { "epoch": 2.8691746209994387, "grad_norm": 0.5223598480224609, "learning_rate": 5.782205400834906e-08, "loss": 0.319, "step": 10220 }, { "epoch": 2.869455362156092, "grad_norm": 0.5825893878936768, "learning_rate": 5.757463381045658e-08, "loss": 0.2851, "step": 10221 }, { "epoch": 2.8697361033127455, "grad_norm": 0.48623377084732056, "learning_rate": 5.732774103746508e-08, "loss": 0.3602, "step": 10222 }, { "epoch": 2.870016844469399, "grad_norm": 0.5634300112724304, "learning_rate": 5.7081375715720146e-08, "loss": 0.2964, "step": 10223 }, { "epoch": 2.870297585626053, "grad_norm": 0.5560899972915649, "learning_rate": 5.683553787151297e-08, "loss": 0.3085, "step": 10224 }, { "epoch": 2.8705783267827063, "grad_norm": 0.578284502029419, "learning_rate": 5.659022753107757e-08, "loss": 0.3338, "step": 10225 }, { "epoch": 2.87085906793936, "grad_norm": 0.5929065942764282, "learning_rate": 5.6345444720591894e-08, "loss": 0.3214, "step": 10226 }, { "epoch": 2.8711398090960136, "grad_norm": 0.5859715938568115, "learning_rate": 5.610118946617837e-08, "loss": 0.3054, "step": 10227 }, { "epoch": 2.871420550252667, "grad_norm": 0.5601416230201721, "learning_rate": 5.585746179390117e-08, "loss": 0.3447, "step": 10228 }, { "epoch": 2.8717012914093205, "grad_norm": 0.5210142135620117, "learning_rate": 5.561426172977058e-08, "loss": 0.3095, "step": 10229 }, { "epoch": 2.871982032565974, "grad_norm": 0.542915940284729, "learning_rate": 5.537158929973863e-08, "loss": 0.3809, "step": 10230 }, { "epoch": 2.872262773722628, "grad_norm": 0.517369270324707, "learning_rate": 5.512944452970237e-08, "loss": 0.3331, "step": 10231 }, { "epoch": 2.8725435148792813, "grad_norm": 0.5070340037345886, "learning_rate": 5.4887827445501144e-08, "loss": 0.328, "step": 10232 }, { "epoch": 2.8728242560359347, "grad_norm": 0.49687615036964417, "learning_rate": 5.464673807291987e-08, "loss": 0.3369, "step": 10233 }, { "epoch": 2.8731049971925886, "grad_norm": 0.5236063003540039, "learning_rate": 5.44061764376852e-08, "loss": 0.3544, "step": 10234 }, { "epoch": 2.873385738349242, "grad_norm": 0.5609530806541443, "learning_rate": 5.4166142565468815e-08, "loss": 0.2939, "step": 10235 }, { "epoch": 2.8736664795058955, "grad_norm": 0.5667310953140259, "learning_rate": 5.3926636481885786e-08, "loss": 0.3501, "step": 10236 }, { "epoch": 2.873947220662549, "grad_norm": 0.5604002475738525, "learning_rate": 5.3687658212494554e-08, "loss": 0.3014, "step": 10237 }, { "epoch": 2.874227961819203, "grad_norm": 0.5266476273536682, "learning_rate": 5.344920778279694e-08, "loss": 0.2882, "step": 10238 }, { "epoch": 2.8745087029758563, "grad_norm": 0.5349099636077881, "learning_rate": 5.321128521823982e-08, "loss": 0.3287, "step": 10239 }, { "epoch": 2.8747894441325097, "grad_norm": 0.5059753656387329, "learning_rate": 5.297389054421176e-08, "loss": 0.3048, "step": 10240 }, { "epoch": 2.8750701852891636, "grad_norm": 0.5411710739135742, "learning_rate": 5.273702378604639e-08, "loss": 0.3586, "step": 10241 }, { "epoch": 2.875350926445817, "grad_norm": 0.5979899764060974, "learning_rate": 5.250068496902183e-08, "loss": 0.2859, "step": 10242 }, { "epoch": 2.8756316676024705, "grad_norm": 0.5573270916938782, "learning_rate": 5.226487411835679e-08, "loss": 0.3023, "step": 10243 }, { "epoch": 2.875912408759124, "grad_norm": 0.5321760177612305, "learning_rate": 5.202959125921725e-08, "loss": 0.3501, "step": 10244 }, { "epoch": 2.876193149915778, "grad_norm": 0.5406895875930786, "learning_rate": 5.1794836416709236e-08, "loss": 0.2834, "step": 10245 }, { "epoch": 2.8764738910724312, "grad_norm": 0.5286221504211426, "learning_rate": 5.15606096158866e-08, "loss": 0.347, "step": 10246 }, { "epoch": 2.8767546322290847, "grad_norm": 0.5209830403327942, "learning_rate": 5.132691088174269e-08, "loss": 0.3234, "step": 10247 }, { "epoch": 2.8770353733857386, "grad_norm": 0.5012113451957703, "learning_rate": 5.109374023921754e-08, "loss": 0.3785, "step": 10248 }, { "epoch": 2.877316114542392, "grad_norm": 0.5220000743865967, "learning_rate": 5.0861097713192916e-08, "loss": 0.3395, "step": 10249 }, { "epoch": 2.8775968556990454, "grad_norm": 0.539867103099823, "learning_rate": 5.062898332849509e-08, "loss": 0.3119, "step": 10250 }, { "epoch": 2.877877596855699, "grad_norm": 0.5832822918891907, "learning_rate": 5.039739710989422e-08, "loss": 0.3235, "step": 10251 }, { "epoch": 2.8781583380123528, "grad_norm": 0.5702241659164429, "learning_rate": 5.016633908210389e-08, "loss": 0.304, "step": 10252 }, { "epoch": 2.878439079169006, "grad_norm": 0.5818907022476196, "learning_rate": 4.993580926978048e-08, "loss": 0.3146, "step": 10253 }, { "epoch": 2.8787198203256597, "grad_norm": 0.5696885585784912, "learning_rate": 4.970580769752542e-08, "loss": 0.3038, "step": 10254 }, { "epoch": 2.8790005614823135, "grad_norm": 0.5527303814888, "learning_rate": 4.9476334389882416e-08, "loss": 0.3359, "step": 10255 }, { "epoch": 2.879281302638967, "grad_norm": 0.5346391797065735, "learning_rate": 4.924738937133966e-08, "loss": 0.3166, "step": 10256 }, { "epoch": 2.8795620437956204, "grad_norm": 0.5558182597160339, "learning_rate": 4.901897266632927e-08, "loss": 0.342, "step": 10257 }, { "epoch": 2.879842784952274, "grad_norm": 0.537028968334198, "learning_rate": 4.8791084299225635e-08, "loss": 0.3139, "step": 10258 }, { "epoch": 2.8801235261089273, "grad_norm": 0.5365895628929138, "learning_rate": 4.856372429434819e-08, "loss": 0.3217, "step": 10259 }, { "epoch": 2.880404267265581, "grad_norm": 0.5496370196342468, "learning_rate": 4.8336892675958646e-08, "loss": 0.3511, "step": 10260 }, { "epoch": 2.8806850084222346, "grad_norm": 0.5810548067092896, "learning_rate": 4.81105894682643e-08, "loss": 0.2826, "step": 10261 }, { "epoch": 2.8809657495788885, "grad_norm": 0.60880446434021, "learning_rate": 4.788481469541306e-08, "loss": 0.2954, "step": 10262 }, { "epoch": 2.881246490735542, "grad_norm": 0.600534200668335, "learning_rate": 4.76595683815001e-08, "loss": 0.2726, "step": 10263 }, { "epoch": 2.8815272318921954, "grad_norm": 0.5583833456039429, "learning_rate": 4.7434850550561185e-08, "loss": 0.3538, "step": 10264 }, { "epoch": 2.881807973048849, "grad_norm": 0.547808825969696, "learning_rate": 4.721066122657714e-08, "loss": 0.3073, "step": 10265 }, { "epoch": 2.8820887142055023, "grad_norm": 0.5400701761245728, "learning_rate": 4.698700043347215e-08, "loss": 0.3383, "step": 10266 }, { "epoch": 2.882369455362156, "grad_norm": 0.6265240907669067, "learning_rate": 4.6763868195112695e-08, "loss": 0.3008, "step": 10267 }, { "epoch": 2.8826501965188096, "grad_norm": 0.5000604391098022, "learning_rate": 4.6541264535311936e-08, "loss": 0.3396, "step": 10268 }, { "epoch": 2.8829309376754635, "grad_norm": 0.5225700736045837, "learning_rate": 4.631918947782421e-08, "loss": 0.338, "step": 10269 }, { "epoch": 2.883211678832117, "grad_norm": 0.505322277545929, "learning_rate": 4.6097643046346674e-08, "loss": 0.3291, "step": 10270 }, { "epoch": 2.8834924199887704, "grad_norm": 0.4945646822452545, "learning_rate": 4.587662526452319e-08, "loss": 0.3683, "step": 10271 }, { "epoch": 2.883773161145424, "grad_norm": 0.5467312335968018, "learning_rate": 4.565613615593822e-08, "loss": 0.2948, "step": 10272 }, { "epoch": 2.8840539023020773, "grad_norm": 0.5793159604072571, "learning_rate": 4.543617574412185e-08, "loss": 0.3087, "step": 10273 }, { "epoch": 2.884334643458731, "grad_norm": 0.5565694570541382, "learning_rate": 4.521674405254583e-08, "loss": 0.3424, "step": 10274 }, { "epoch": 2.8846153846153846, "grad_norm": 0.565380334854126, "learning_rate": 4.499784110462757e-08, "loss": 0.3108, "step": 10275 }, { "epoch": 2.884896125772038, "grad_norm": 0.49810847640037537, "learning_rate": 4.4779466923726146e-08, "loss": 0.3562, "step": 10276 }, { "epoch": 2.885176866928692, "grad_norm": 0.5254920125007629, "learning_rate": 4.456162153314569e-08, "loss": 0.3223, "step": 10277 }, { "epoch": 2.8854576080853453, "grad_norm": 0.5809634327888489, "learning_rate": 4.434430495613318e-08, "loss": 0.33, "step": 10278 }, { "epoch": 2.885738349241999, "grad_norm": 0.5467097163200378, "learning_rate": 4.412751721587949e-08, "loss": 0.3482, "step": 10279 }, { "epoch": 2.8860190903986522, "grad_norm": 0.5433565378189087, "learning_rate": 4.3911258335518345e-08, "loss": 0.3757, "step": 10280 }, { "epoch": 2.886299831555306, "grad_norm": 0.5442503094673157, "learning_rate": 4.369552833812796e-08, "loss": 0.3516, "step": 10281 }, { "epoch": 2.8865805727119596, "grad_norm": 0.5326297879219055, "learning_rate": 4.3480327246729347e-08, "loss": 0.352, "step": 10282 }, { "epoch": 2.886861313868613, "grad_norm": 0.5138195753097534, "learning_rate": 4.326565508428804e-08, "loss": 0.3385, "step": 10283 }, { "epoch": 2.887142055025267, "grad_norm": 0.5652940273284912, "learning_rate": 4.305151187371182e-08, "loss": 0.3256, "step": 10284 }, { "epoch": 2.8874227961819203, "grad_norm": 0.6104983687400818, "learning_rate": 4.283789763785295e-08, "loss": 0.2974, "step": 10285 }, { "epoch": 2.8877035373385738, "grad_norm": 0.5927769541740417, "learning_rate": 4.262481239950711e-08, "loss": 0.3275, "step": 10286 }, { "epoch": 2.887984278495227, "grad_norm": 0.5427842140197754, "learning_rate": 4.241225618141387e-08, "loss": 0.3128, "step": 10287 }, { "epoch": 2.888265019651881, "grad_norm": 0.5188747048377991, "learning_rate": 4.220022900625509e-08, "loss": 0.3239, "step": 10288 }, { "epoch": 2.8885457608085345, "grad_norm": 0.5585922002792358, "learning_rate": 4.198873089665711e-08, "loss": 0.2919, "step": 10289 }, { "epoch": 2.888826501965188, "grad_norm": 0.507307231426239, "learning_rate": 4.177776187519023e-08, "loss": 0.3559, "step": 10290 }, { "epoch": 2.889107243121842, "grad_norm": 0.5435081720352173, "learning_rate": 4.156732196436752e-08, "loss": 0.3373, "step": 10291 }, { "epoch": 2.8893879842784953, "grad_norm": 0.5974767804145813, "learning_rate": 4.135741118664549e-08, "loss": 0.3264, "step": 10292 }, { "epoch": 2.8896687254351487, "grad_norm": 0.5404211282730103, "learning_rate": 4.1148029564425094e-08, "loss": 0.3582, "step": 10293 }, { "epoch": 2.889949466591802, "grad_norm": 0.5471585392951965, "learning_rate": 4.0939177120049576e-08, "loss": 0.3166, "step": 10294 }, { "epoch": 2.890230207748456, "grad_norm": 0.5118330717086792, "learning_rate": 4.073085387580722e-08, "loss": 0.3692, "step": 10295 }, { "epoch": 2.8905109489051095, "grad_norm": 0.5458824634552002, "learning_rate": 4.0523059853928014e-08, "loss": 0.3055, "step": 10296 }, { "epoch": 2.890791690061763, "grad_norm": 0.5640842914581299, "learning_rate": 4.0315795076587005e-08, "loss": 0.3125, "step": 10297 }, { "epoch": 2.891072431218417, "grad_norm": 0.48543086647987366, "learning_rate": 4.010905956590205e-08, "loss": 0.3545, "step": 10298 }, { "epoch": 2.8913531723750703, "grad_norm": 0.5426521301269531, "learning_rate": 3.9902853343934955e-08, "loss": 0.3301, "step": 10299 }, { "epoch": 2.8916339135317237, "grad_norm": 0.5406230092048645, "learning_rate": 3.9697176432690335e-08, "loss": 0.3159, "step": 10300 }, { "epoch": 2.891914654688377, "grad_norm": 0.5597209334373474, "learning_rate": 3.949202885411674e-08, "loss": 0.2952, "step": 10301 }, { "epoch": 2.892195395845031, "grad_norm": 0.6307613253593445, "learning_rate": 3.928741063010721e-08, "loss": 0.3169, "step": 10302 }, { "epoch": 2.8924761370016845, "grad_norm": 0.5918838977813721, "learning_rate": 3.9083321782495965e-08, "loss": 0.3306, "step": 10303 }, { "epoch": 2.892756878158338, "grad_norm": 0.5424016714096069, "learning_rate": 3.887976233306279e-08, "loss": 0.3429, "step": 10304 }, { "epoch": 2.893037619314992, "grad_norm": 0.536504328250885, "learning_rate": 3.867673230353031e-08, "loss": 0.328, "step": 10305 }, { "epoch": 2.8933183604716453, "grad_norm": 0.6102229952812195, "learning_rate": 3.847423171556452e-08, "loss": 0.2996, "step": 10306 }, { "epoch": 2.8935991016282987, "grad_norm": 0.5004302263259888, "learning_rate": 3.827226059077538e-08, "loss": 0.3217, "step": 10307 }, { "epoch": 2.893879842784952, "grad_norm": 0.5153526663780212, "learning_rate": 3.807081895071507e-08, "loss": 0.3303, "step": 10308 }, { "epoch": 2.8941605839416056, "grad_norm": 0.5676251649856567, "learning_rate": 3.7869906816880855e-08, "loss": 0.3022, "step": 10309 }, { "epoch": 2.8944413250982595, "grad_norm": 0.5440424680709839, "learning_rate": 3.766952421071335e-08, "loss": 0.3073, "step": 10310 }, { "epoch": 2.894722066254913, "grad_norm": 0.5407348275184631, "learning_rate": 3.7469671153594346e-08, "loss": 0.3455, "step": 10311 }, { "epoch": 2.895002807411567, "grad_norm": 0.4998387396335602, "learning_rate": 3.7270347666853446e-08, "loss": 0.3108, "step": 10312 }, { "epoch": 2.8952835485682202, "grad_norm": 0.5646181702613831, "learning_rate": 3.707155377175864e-08, "loss": 0.2744, "step": 10313 }, { "epoch": 2.8955642897248737, "grad_norm": 0.5334526300430298, "learning_rate": 3.6873289489526285e-08, "loss": 0.418, "step": 10314 }, { "epoch": 2.895845030881527, "grad_norm": 0.5950364470481873, "learning_rate": 3.6675554841312246e-08, "loss": 0.3107, "step": 10315 }, { "epoch": 2.8961257720381806, "grad_norm": 0.5439245700836182, "learning_rate": 3.6478349848217966e-08, "loss": 0.2894, "step": 10316 }, { "epoch": 2.8964065131948344, "grad_norm": 0.5425600409507751, "learning_rate": 3.628167453128828e-08, "loss": 0.3334, "step": 10317 }, { "epoch": 2.896687254351488, "grad_norm": 0.5517589449882507, "learning_rate": 3.608552891151085e-08, "loss": 0.3591, "step": 10318 }, { "epoch": 2.8969679955081418, "grad_norm": 0.5716979503631592, "learning_rate": 3.588991300981726e-08, "loss": 0.329, "step": 10319 }, { "epoch": 2.897248736664795, "grad_norm": 0.5769723057746887, "learning_rate": 3.569482684708247e-08, "loss": 0.3438, "step": 10320 }, { "epoch": 2.8975294778214487, "grad_norm": 0.5434415936470032, "learning_rate": 3.550027044412485e-08, "loss": 0.3272, "step": 10321 }, { "epoch": 2.897810218978102, "grad_norm": 0.517772912979126, "learning_rate": 3.530624382170611e-08, "loss": 0.3453, "step": 10322 }, { "epoch": 2.8980909601347555, "grad_norm": 0.5583859086036682, "learning_rate": 3.5112747000531355e-08, "loss": 0.3527, "step": 10323 }, { "epoch": 2.8983717012914094, "grad_norm": 0.5648369193077087, "learning_rate": 3.491978000125018e-08, "loss": 0.3211, "step": 10324 }, { "epoch": 2.898652442448063, "grad_norm": 0.5798605680465698, "learning_rate": 3.472734284445445e-08, "loss": 0.2961, "step": 10325 }, { "epoch": 2.8989331836047163, "grad_norm": 0.5161697864532471, "learning_rate": 3.4535435550678844e-08, "loss": 0.3224, "step": 10326 }, { "epoch": 2.89921392476137, "grad_norm": 0.5376103520393372, "learning_rate": 3.43440581404042e-08, "loss": 0.2998, "step": 10327 }, { "epoch": 2.8994946659180236, "grad_norm": 0.5266596078872681, "learning_rate": 3.415321063405141e-08, "loss": 0.3251, "step": 10328 }, { "epoch": 2.899775407074677, "grad_norm": 0.5706929564476013, "learning_rate": 3.3962893051988077e-08, "loss": 0.3823, "step": 10329 }, { "epoch": 2.9000561482313305, "grad_norm": 0.5149383544921875, "learning_rate": 3.3773105414523496e-08, "loss": 0.3331, "step": 10330 }, { "epoch": 2.9003368893879844, "grad_norm": 0.512887179851532, "learning_rate": 3.358384774190926e-08, "loss": 0.3314, "step": 10331 }, { "epoch": 2.900617630544638, "grad_norm": 0.5874810814857483, "learning_rate": 3.339512005434309e-08, "loss": 0.2891, "step": 10332 }, { "epoch": 2.9008983717012913, "grad_norm": 0.522426962852478, "learning_rate": 3.3206922371964436e-08, "loss": 0.3386, "step": 10333 }, { "epoch": 2.901179112857945, "grad_norm": 0.5868736505508423, "learning_rate": 3.301925471485612e-08, "loss": 0.3159, "step": 10334 }, { "epoch": 2.9014598540145986, "grad_norm": 0.5494204163551331, "learning_rate": 3.283211710304601e-08, "loss": 0.333, "step": 10335 }, { "epoch": 2.901740595171252, "grad_norm": 0.5404460430145264, "learning_rate": 3.264550955650314e-08, "loss": 0.3383, "step": 10336 }, { "epoch": 2.9020213363279055, "grad_norm": 0.526391863822937, "learning_rate": 3.245943209514213e-08, "loss": 0.3108, "step": 10337 }, { "epoch": 2.9023020774845594, "grad_norm": 0.5620700716972351, "learning_rate": 3.227388473881876e-08, "loss": 0.2952, "step": 10338 }, { "epoch": 2.902582818641213, "grad_norm": 0.49800369143486023, "learning_rate": 3.208886750733442e-08, "loss": 0.3496, "step": 10339 }, { "epoch": 2.9028635597978663, "grad_norm": 0.5326073169708252, "learning_rate": 3.190438042043276e-08, "loss": 0.342, "step": 10340 }, { "epoch": 2.90314430095452, "grad_norm": 0.6173860430717468, "learning_rate": 3.17204234978008e-08, "loss": 0.3089, "step": 10341 }, { "epoch": 2.9034250421111736, "grad_norm": 0.5504783987998962, "learning_rate": 3.153699675907007e-08, "loss": 0.3424, "step": 10342 }, { "epoch": 2.903705783267827, "grad_norm": 0.5313858389854431, "learning_rate": 3.1354100223813246e-08, "loss": 0.3203, "step": 10343 }, { "epoch": 2.9039865244244805, "grad_norm": 0.4968644678592682, "learning_rate": 3.117173391154971e-08, "loss": 0.3211, "step": 10344 }, { "epoch": 2.9042672655811343, "grad_norm": 0.5251296758651733, "learning_rate": 3.0989897841739446e-08, "loss": 0.3061, "step": 10345 }, { "epoch": 2.904548006737788, "grad_norm": 0.5955836772918701, "learning_rate": 3.0808592033786944e-08, "loss": 0.3351, "step": 10346 }, { "epoch": 2.9048287478944412, "grad_norm": 0.5214530825614929, "learning_rate": 3.062781650704061e-08, "loss": 0.3383, "step": 10347 }, { "epoch": 2.905109489051095, "grad_norm": 0.561315655708313, "learning_rate": 3.044757128079057e-08, "loss": 0.2763, "step": 10348 }, { "epoch": 2.9053902302077486, "grad_norm": 0.545353353023529, "learning_rate": 3.026785637427254e-08, "loss": 0.3542, "step": 10349 }, { "epoch": 2.905670971364402, "grad_norm": 0.5503994822502136, "learning_rate": 3.008867180666397e-08, "loss": 0.356, "step": 10350 }, { "epoch": 2.9059517125210554, "grad_norm": 0.5180359482765198, "learning_rate": 2.991001759708678e-08, "loss": 0.3488, "step": 10351 }, { "epoch": 2.906232453677709, "grad_norm": 0.48081329464912415, "learning_rate": 2.973189376460517e-08, "loss": 0.323, "step": 10352 }, { "epoch": 2.9065131948343628, "grad_norm": 0.5101129412651062, "learning_rate": 2.9554300328228368e-08, "loss": 0.3278, "step": 10353 }, { "epoch": 2.906793935991016, "grad_norm": 0.5722399353981018, "learning_rate": 2.937723730690678e-08, "loss": 0.2704, "step": 10354 }, { "epoch": 2.90707467714767, "grad_norm": 0.5677437782287598, "learning_rate": 2.92007047195364e-08, "loss": 0.2834, "step": 10355 }, { "epoch": 2.9073554183043235, "grad_norm": 0.5312627553939819, "learning_rate": 2.902470258495549e-08, "loss": 0.3123, "step": 10356 }, { "epoch": 2.907636159460977, "grad_norm": 0.6231878399848938, "learning_rate": 2.8849230921946248e-08, "loss": 0.3309, "step": 10357 }, { "epoch": 2.9079169006176304, "grad_norm": 0.5991750359535217, "learning_rate": 2.8674289749233142e-08, "loss": 0.3519, "step": 10358 }, { "epoch": 2.908197641774284, "grad_norm": 0.5414668321609497, "learning_rate": 2.8499879085485128e-08, "loss": 0.3843, "step": 10359 }, { "epoch": 2.9084783829309377, "grad_norm": 0.5410469174385071, "learning_rate": 2.8325998949314536e-08, "loss": 0.3001, "step": 10360 }, { "epoch": 2.908759124087591, "grad_norm": 0.4624980390071869, "learning_rate": 2.815264935927653e-08, "loss": 0.3158, "step": 10361 }, { "epoch": 2.909039865244245, "grad_norm": 0.5556575655937195, "learning_rate": 2.7979830333869638e-08, "loss": 0.29, "step": 10362 }, { "epoch": 2.9093206064008985, "grad_norm": 0.506561279296875, "learning_rate": 2.780754189153634e-08, "loss": 0.3363, "step": 10363 }, { "epoch": 2.909601347557552, "grad_norm": 0.5680440664291382, "learning_rate": 2.7635784050662474e-08, "loss": 0.315, "step": 10364 }, { "epoch": 2.9098820887142054, "grad_norm": 0.47068169713020325, "learning_rate": 2.746455682957616e-08, "loss": 0.3098, "step": 10365 }, { "epoch": 2.910162829870859, "grad_norm": 0.48363471031188965, "learning_rate": 2.7293860246550563e-08, "loss": 0.3483, "step": 10366 }, { "epoch": 2.9104435710275127, "grad_norm": 0.5012058019638062, "learning_rate": 2.7123694319800552e-08, "loss": 0.3316, "step": 10367 }, { "epoch": 2.910724312184166, "grad_norm": 0.5747319459915161, "learning_rate": 2.695405906748605e-08, "loss": 0.3357, "step": 10368 }, { "epoch": 2.9110050533408196, "grad_norm": 0.5871933698654175, "learning_rate": 2.678495450770924e-08, "loss": 0.3277, "step": 10369 }, { "epoch": 2.9112857944974735, "grad_norm": 0.5979270935058594, "learning_rate": 2.6616380658515128e-08, "loss": 0.3435, "step": 10370 }, { "epoch": 2.911566535654127, "grad_norm": 0.5709941387176514, "learning_rate": 2.6448337537893776e-08, "loss": 0.3324, "step": 10371 }, { "epoch": 2.9118472768107804, "grad_norm": 0.5479961633682251, "learning_rate": 2.6280825163776946e-08, "loss": 0.3391, "step": 10372 }, { "epoch": 2.912128017967434, "grad_norm": 0.560149073600769, "learning_rate": 2.6113843554041453e-08, "loss": 0.3575, "step": 10373 }, { "epoch": 2.9124087591240877, "grad_norm": 0.5506590604782104, "learning_rate": 2.5947392726505817e-08, "loss": 0.3279, "step": 10374 }, { "epoch": 2.912689500280741, "grad_norm": 0.4642602801322937, "learning_rate": 2.578147269893305e-08, "loss": 0.3367, "step": 10375 }, { "epoch": 2.9129702414373946, "grad_norm": 0.5406062602996826, "learning_rate": 2.5616083489028443e-08, "loss": 0.3271, "step": 10376 }, { "epoch": 2.9132509825940485, "grad_norm": 0.5081760883331299, "learning_rate": 2.5451225114441758e-08, "loss": 0.3172, "step": 10377 }, { "epoch": 2.913531723750702, "grad_norm": 0.5476431846618652, "learning_rate": 2.5286897592766147e-08, "loss": 0.3141, "step": 10378 }, { "epoch": 2.9138124649073553, "grad_norm": 0.569713294506073, "learning_rate": 2.5123100941537027e-08, "loss": 0.337, "step": 10379 }, { "epoch": 2.914093206064009, "grad_norm": 0.5425527691841125, "learning_rate": 2.4959835178233748e-08, "loss": 0.3072, "step": 10380 }, { "epoch": 2.9143739472206627, "grad_norm": 0.5655215978622437, "learning_rate": 2.4797100320279045e-08, "loss": 0.3058, "step": 10381 }, { "epoch": 2.914654688377316, "grad_norm": 0.504270613193512, "learning_rate": 2.463489638503902e-08, "loss": 0.3181, "step": 10382 }, { "epoch": 2.9149354295339696, "grad_norm": 0.5506932139396667, "learning_rate": 2.4473223389823166e-08, "loss": 0.3079, "step": 10383 }, { "epoch": 2.9152161706906234, "grad_norm": 0.5552608370780945, "learning_rate": 2.4312081351883786e-08, "loss": 0.3365, "step": 10384 }, { "epoch": 2.915496911847277, "grad_norm": 0.5960777997970581, "learning_rate": 2.4151470288418246e-08, "loss": 0.3114, "step": 10385 }, { "epoch": 2.9157776530039303, "grad_norm": 0.5495597720146179, "learning_rate": 2.3991390216564492e-08, "loss": 0.3443, "step": 10386 }, { "epoch": 2.9160583941605838, "grad_norm": 0.5533434152603149, "learning_rate": 2.3831841153405532e-08, "loss": 0.2783, "step": 10387 }, { "epoch": 2.9163391353172377, "grad_norm": 0.4872790575027466, "learning_rate": 2.3672823115968303e-08, "loss": 0.3496, "step": 10388 }, { "epoch": 2.916619876473891, "grad_norm": 0.49798262119293213, "learning_rate": 2.3514336121220893e-08, "loss": 0.3551, "step": 10389 }, { "epoch": 2.9169006176305445, "grad_norm": 0.5557563900947571, "learning_rate": 2.3356380186077554e-08, "loss": 0.3619, "step": 10390 }, { "epoch": 2.9171813587871984, "grad_norm": 0.5699899792671204, "learning_rate": 2.319895532739369e-08, "loss": 0.3509, "step": 10391 }, { "epoch": 2.917462099943852, "grad_norm": 0.632973849773407, "learning_rate": 2.3042061561968087e-08, "loss": 0.3064, "step": 10392 }, { "epoch": 2.9177428411005053, "grad_norm": 0.4675182104110718, "learning_rate": 2.2885698906544017e-08, "loss": 0.3706, "step": 10393 }, { "epoch": 2.9180235822571587, "grad_norm": 0.534770131111145, "learning_rate": 2.272986737780758e-08, "loss": 0.3383, "step": 10394 }, { "epoch": 2.9183043234138126, "grad_norm": 0.5844979286193848, "learning_rate": 2.2574566992388247e-08, "loss": 0.3407, "step": 10395 }, { "epoch": 2.918585064570466, "grad_norm": 0.584099292755127, "learning_rate": 2.2419797766858876e-08, "loss": 0.3281, "step": 10396 }, { "epoch": 2.9188658057271195, "grad_norm": 0.5600420236587524, "learning_rate": 2.2265559717734586e-08, "loss": 0.3283, "step": 10397 }, { "epoch": 2.9191465468837734, "grad_norm": 0.5826917886734009, "learning_rate": 2.2111852861475546e-08, "loss": 0.3408, "step": 10398 }, { "epoch": 2.919427288040427, "grad_norm": 0.5664740204811096, "learning_rate": 2.1958677214484192e-08, "loss": 0.3267, "step": 10399 }, { "epoch": 2.9197080291970803, "grad_norm": 0.5925627946853638, "learning_rate": 2.1806032793106334e-08, "loss": 0.3083, "step": 10400 }, { "epoch": 2.9199887703537337, "grad_norm": 0.525027871131897, "learning_rate": 2.165391961363117e-08, "loss": 0.3482, "step": 10401 }, { "epoch": 2.920269511510387, "grad_norm": 0.48973405361175537, "learning_rate": 2.1502337692291818e-08, "loss": 0.3958, "step": 10402 }, { "epoch": 2.920550252667041, "grad_norm": 0.6105417609214783, "learning_rate": 2.1351287045263124e-08, "loss": 0.3459, "step": 10403 }, { "epoch": 2.9208309938236945, "grad_norm": 0.5968075394630432, "learning_rate": 2.1200767688665524e-08, "loss": 0.3289, "step": 10404 }, { "epoch": 2.9211117349803484, "grad_norm": 0.5124432444572449, "learning_rate": 2.1050779638560616e-08, "loss": 0.3492, "step": 10405 }, { "epoch": 2.921392476137002, "grad_norm": 0.5225157141685486, "learning_rate": 2.090132291095448e-08, "loss": 0.323, "step": 10406 }, { "epoch": 2.9216732172936553, "grad_norm": 0.5560155510902405, "learning_rate": 2.075239752179603e-08, "loss": 0.3574, "step": 10407 }, { "epoch": 2.9219539584503087, "grad_norm": 0.5677082538604736, "learning_rate": 2.060400348697811e-08, "loss": 0.322, "step": 10408 }, { "epoch": 2.922234699606962, "grad_norm": 0.5468083620071411, "learning_rate": 2.0456140822335825e-08, "loss": 0.2917, "step": 10409 }, { "epoch": 2.922515440763616, "grad_norm": 0.5514059662818909, "learning_rate": 2.0308809543648776e-08, "loss": 0.29, "step": 10410 }, { "epoch": 2.9227961819202695, "grad_norm": 0.5410345196723938, "learning_rate": 2.0162009666638837e-08, "loss": 0.335, "step": 10411 }, { "epoch": 2.9230769230769234, "grad_norm": 0.5883322358131409, "learning_rate": 2.0015741206971252e-08, "loss": 0.3183, "step": 10412 }, { "epoch": 2.923357664233577, "grad_norm": 0.5663148164749146, "learning_rate": 1.9870004180255755e-08, "loss": 0.3271, "step": 10413 }, { "epoch": 2.9236384053902302, "grad_norm": 0.5233526229858398, "learning_rate": 1.9724798602043793e-08, "loss": 0.3407, "step": 10414 }, { "epoch": 2.9239191465468837, "grad_norm": 0.6190729737281799, "learning_rate": 1.958012448783131e-08, "loss": 0.3041, "step": 10415 }, { "epoch": 2.924199887703537, "grad_norm": 0.5732908248901367, "learning_rate": 1.9435981853056506e-08, "loss": 0.3322, "step": 10416 }, { "epoch": 2.924480628860191, "grad_norm": 0.5757434964179993, "learning_rate": 1.9292370713101527e-08, "loss": 0.3411, "step": 10417 }, { "epoch": 2.9247613700168444, "grad_norm": 0.5287437438964844, "learning_rate": 1.9149291083291888e-08, "loss": 0.3659, "step": 10418 }, { "epoch": 2.925042111173498, "grad_norm": 0.5786102414131165, "learning_rate": 1.9006742978895933e-08, "loss": 0.3072, "step": 10419 }, { "epoch": 2.9253228523301518, "grad_norm": 0.5517265200614929, "learning_rate": 1.8864726415125933e-08, "loss": 0.2873, "step": 10420 }, { "epoch": 2.925603593486805, "grad_norm": 0.5496549606323242, "learning_rate": 1.872324140713644e-08, "loss": 0.3576, "step": 10421 }, { "epoch": 2.9258843346434587, "grad_norm": 0.5972222685813904, "learning_rate": 1.858228797002648e-08, "loss": 0.3111, "step": 10422 }, { "epoch": 2.926165075800112, "grad_norm": 0.5477586388587952, "learning_rate": 1.8441866118836804e-08, "loss": 0.2791, "step": 10423 }, { "epoch": 2.926445816956766, "grad_norm": 0.5522471070289612, "learning_rate": 1.8301975868553202e-08, "loss": 0.3107, "step": 10424 }, { "epoch": 2.9267265581134194, "grad_norm": 0.5395142436027527, "learning_rate": 1.816261723410373e-08, "loss": 0.3256, "step": 10425 }, { "epoch": 2.927007299270073, "grad_norm": 0.515011191368103, "learning_rate": 1.802379023035927e-08, "loss": 0.3307, "step": 10426 }, { "epoch": 2.9272880404267267, "grad_norm": 0.5615693926811218, "learning_rate": 1.7885494872135754e-08, "loss": 0.3205, "step": 10427 }, { "epoch": 2.92756878158338, "grad_norm": 0.5720718502998352, "learning_rate": 1.7747731174190262e-08, "loss": 0.3105, "step": 10428 }, { "epoch": 2.9278495227400336, "grad_norm": 0.5090105533599854, "learning_rate": 1.7610499151223813e-08, "loss": 0.3065, "step": 10429 }, { "epoch": 2.928130263896687, "grad_norm": 0.5333269834518433, "learning_rate": 1.7473798817881915e-08, "loss": 0.3635, "step": 10430 }, { "epoch": 2.928411005053341, "grad_norm": 0.5919604897499084, "learning_rate": 1.7337630188751787e-08, "loss": 0.2897, "step": 10431 }, { "epoch": 2.9286917462099944, "grad_norm": 0.5328156352043152, "learning_rate": 1.7201993278364582e-08, "loss": 0.3172, "step": 10432 }, { "epoch": 2.928972487366648, "grad_norm": 0.6113194227218628, "learning_rate": 1.7066888101194835e-08, "loss": 0.29, "step": 10433 }, { "epoch": 2.9292532285233017, "grad_norm": 0.5573399066925049, "learning_rate": 1.69323146716599e-08, "loss": 0.3267, "step": 10434 }, { "epoch": 2.929533969679955, "grad_norm": 0.4959973990917206, "learning_rate": 1.6798273004121067e-08, "loss": 0.356, "step": 10435 }, { "epoch": 2.9298147108366086, "grad_norm": 0.5739654302597046, "learning_rate": 1.6664763112881342e-08, "loss": 0.3563, "step": 10436 }, { "epoch": 2.930095451993262, "grad_norm": 0.5687186121940613, "learning_rate": 1.6531785012189327e-08, "loss": 0.2725, "step": 10437 }, { "epoch": 2.930376193149916, "grad_norm": 0.5686548352241516, "learning_rate": 1.639933871623478e-08, "loss": 0.3233, "step": 10438 }, { "epoch": 2.9306569343065694, "grad_norm": 0.5329210758209229, "learning_rate": 1.6267424239151953e-08, "loss": 0.2906, "step": 10439 }, { "epoch": 2.930937675463223, "grad_norm": 0.5315520167350769, "learning_rate": 1.6136041595017914e-08, "loss": 0.3553, "step": 10440 }, { "epoch": 2.9312184166198767, "grad_norm": 0.5640670657157898, "learning_rate": 1.6005190797852564e-08, "loss": 0.2898, "step": 10441 }, { "epoch": 2.93149915777653, "grad_norm": 0.539916455745697, "learning_rate": 1.5874871861620287e-08, "loss": 0.3293, "step": 10442 }, { "epoch": 2.9317798989331836, "grad_norm": 0.5563673377037048, "learning_rate": 1.5745084800227184e-08, "loss": 0.3042, "step": 10443 }, { "epoch": 2.932060640089837, "grad_norm": 0.5389270782470703, "learning_rate": 1.5615829627523837e-08, "loss": 0.2954, "step": 10444 }, { "epoch": 2.9323413812464905, "grad_norm": 0.5811926126480103, "learning_rate": 1.5487106357303106e-08, "loss": 0.3445, "step": 10445 }, { "epoch": 2.9326221224031443, "grad_norm": 0.566461980342865, "learning_rate": 1.5358915003301223e-08, "loss": 0.2767, "step": 10446 }, { "epoch": 2.932902863559798, "grad_norm": 0.49240151047706604, "learning_rate": 1.5231255579199466e-08, "loss": 0.3875, "step": 10447 }, { "epoch": 2.9331836047164517, "grad_norm": 0.5289565920829773, "learning_rate": 1.5104128098619164e-08, "loss": 0.3177, "step": 10448 }, { "epoch": 2.933464345873105, "grad_norm": 0.5947088599205017, "learning_rate": 1.4977532575127794e-08, "loss": 0.3637, "step": 10449 }, { "epoch": 2.9337450870297586, "grad_norm": 0.5179443955421448, "learning_rate": 1.4851469022234e-08, "loss": 0.3051, "step": 10450 }, { "epoch": 2.934025828186412, "grad_norm": 0.5526657104492188, "learning_rate": 1.4725937453390904e-08, "loss": 0.3195, "step": 10451 }, { "epoch": 2.9343065693430654, "grad_norm": 0.5307288765907288, "learning_rate": 1.460093788199446e-08, "loss": 0.3406, "step": 10452 }, { "epoch": 2.9345873104997193, "grad_norm": 0.6137859225273132, "learning_rate": 1.4476470321383995e-08, "loss": 0.3571, "step": 10453 }, { "epoch": 2.9348680516563728, "grad_norm": 0.5237303376197815, "learning_rate": 1.435253478484111e-08, "loss": 0.3229, "step": 10454 }, { "epoch": 2.9351487928130267, "grad_norm": 0.5243408679962158, "learning_rate": 1.4229131285592446e-08, "loss": 0.2663, "step": 10455 }, { "epoch": 2.93542953396968, "grad_norm": 0.5741708278656006, "learning_rate": 1.4106259836806357e-08, "loss": 0.3031, "step": 10456 }, { "epoch": 2.9357102751263335, "grad_norm": 0.5652405023574829, "learning_rate": 1.3983920451595135e-08, "loss": 0.3058, "step": 10457 }, { "epoch": 2.935991016282987, "grad_norm": 0.5818977355957031, "learning_rate": 1.3862113143013888e-08, "loss": 0.281, "step": 10458 }, { "epoch": 2.9362717574396404, "grad_norm": 0.5077506899833679, "learning_rate": 1.3740837924061112e-08, "loss": 0.3462, "step": 10459 }, { "epoch": 2.9365524985962943, "grad_norm": 0.5771107077598572, "learning_rate": 1.362009480767812e-08, "loss": 0.3328, "step": 10460 }, { "epoch": 2.9368332397529477, "grad_norm": 0.5271692276000977, "learning_rate": 1.3499883806751269e-08, "loss": 0.2982, "step": 10461 }, { "epoch": 2.937113980909601, "grad_norm": 0.6047289967536926, "learning_rate": 1.3380204934106967e-08, "loss": 0.3102, "step": 10462 }, { "epoch": 2.937394722066255, "grad_norm": 0.47763630747795105, "learning_rate": 1.3261058202517773e-08, "loss": 0.3464, "step": 10463 }, { "epoch": 2.9376754632229085, "grad_norm": 0.5546920895576477, "learning_rate": 1.314244362469852e-08, "loss": 0.3326, "step": 10464 }, { "epoch": 2.937956204379562, "grad_norm": 0.5359768867492676, "learning_rate": 1.3024361213305747e-08, "loss": 0.2888, "step": 10465 }, { "epoch": 2.9382369455362154, "grad_norm": 0.5531989336013794, "learning_rate": 1.2906810980941597e-08, "loss": 0.3443, "step": 10466 }, { "epoch": 2.9385176866928693, "grad_norm": 0.5018088221549988, "learning_rate": 1.2789792940149371e-08, "loss": 0.3229, "step": 10467 }, { "epoch": 2.9387984278495227, "grad_norm": 0.5590019822120667, "learning_rate": 1.267330710341741e-08, "loss": 0.3599, "step": 10468 }, { "epoch": 2.939079169006176, "grad_norm": 0.5875823497772217, "learning_rate": 1.2557353483176327e-08, "loss": 0.3281, "step": 10469 }, { "epoch": 2.93935991016283, "grad_norm": 0.5337052941322327, "learning_rate": 1.2441932091799004e-08, "loss": 0.2883, "step": 10470 }, { "epoch": 2.9396406513194835, "grad_norm": 0.5880594253540039, "learning_rate": 1.2327042941603362e-08, "loss": 0.318, "step": 10471 }, { "epoch": 2.939921392476137, "grad_norm": 0.6057711839675903, "learning_rate": 1.2212686044849598e-08, "loss": 0.3212, "step": 10472 }, { "epoch": 2.9402021336327904, "grad_norm": 0.5037532448768616, "learning_rate": 1.2098861413740726e-08, "loss": 0.3392, "step": 10473 }, { "epoch": 2.9404828747894443, "grad_norm": 0.5388426184654236, "learning_rate": 1.1985569060423696e-08, "loss": 0.3077, "step": 10474 }, { "epoch": 2.9407636159460977, "grad_norm": 0.5098231434822083, "learning_rate": 1.1872808996988284e-08, "loss": 0.3176, "step": 10475 }, { "epoch": 2.941044357102751, "grad_norm": 0.5242395401000977, "learning_rate": 1.176058123546764e-08, "loss": 0.3001, "step": 10476 }, { "epoch": 2.941325098259405, "grad_norm": 0.5007601380348206, "learning_rate": 1.1648885787837737e-08, "loss": 0.3105, "step": 10477 }, { "epoch": 2.9416058394160585, "grad_norm": 0.494960218667984, "learning_rate": 1.1537722666018492e-08, "loss": 0.3354, "step": 10478 }, { "epoch": 2.941886580572712, "grad_norm": 0.4698236584663391, "learning_rate": 1.1427091881872077e-08, "loss": 0.3497, "step": 10479 }, { "epoch": 2.9421673217293653, "grad_norm": 0.532472550868988, "learning_rate": 1.1316993447204604e-08, "loss": 0.3354, "step": 10480 }, { "epoch": 2.9424480628860192, "grad_norm": 0.565513014793396, "learning_rate": 1.1207427373765568e-08, "loss": 0.3438, "step": 10481 }, { "epoch": 2.9427288040426727, "grad_norm": 0.5432757139205933, "learning_rate": 1.1098393673246166e-08, "loss": 0.3479, "step": 10482 }, { "epoch": 2.943009545199326, "grad_norm": 0.6216778755187988, "learning_rate": 1.0989892357282095e-08, "loss": 0.3328, "step": 10483 }, { "epoch": 2.94329028635598, "grad_norm": 0.48868751525878906, "learning_rate": 1.0881923437452424e-08, "loss": 0.3117, "step": 10484 }, { "epoch": 2.9435710275126334, "grad_norm": 0.5677542090415955, "learning_rate": 1.0774486925278493e-08, "loss": 0.2997, "step": 10485 }, { "epoch": 2.943851768669287, "grad_norm": 0.5757440328598022, "learning_rate": 1.066758283222502e-08, "loss": 0.2953, "step": 10486 }, { "epoch": 2.9441325098259403, "grad_norm": 0.5152274966239929, "learning_rate": 1.0561211169700658e-08, "loss": 0.3133, "step": 10487 }, { "epoch": 2.944413250982594, "grad_norm": 0.5793607234954834, "learning_rate": 1.045537194905688e-08, "loss": 0.3091, "step": 10488 }, { "epoch": 2.9446939921392477, "grad_norm": 0.5389032363891602, "learning_rate": 1.0350065181587432e-08, "loss": 0.2924, "step": 10489 }, { "epoch": 2.944974733295901, "grad_norm": 0.6232114434242249, "learning_rate": 1.02452908785311e-08, "loss": 0.3007, "step": 10490 }, { "epoch": 2.945255474452555, "grad_norm": 0.5648753046989441, "learning_rate": 1.014104905106783e-08, "loss": 0.3289, "step": 10491 }, { "epoch": 2.9455362156092084, "grad_norm": 0.5341949462890625, "learning_rate": 1.0037339710321503e-08, "loss": 0.3247, "step": 10492 }, { "epoch": 2.945816956765862, "grad_norm": 0.5344690084457397, "learning_rate": 9.934162867359932e-09, "loss": 0.3034, "step": 10493 }, { "epoch": 2.9460976979225153, "grad_norm": 0.5142850279808044, "learning_rate": 9.83151853319375e-09, "loss": 0.3155, "step": 10494 }, { "epoch": 2.9463784390791687, "grad_norm": 0.5972554087638855, "learning_rate": 9.72940671877587e-09, "loss": 0.359, "step": 10495 }, { "epoch": 2.9466591802358226, "grad_norm": 0.5689883232116699, "learning_rate": 9.627827435003124e-09, "loss": 0.3496, "step": 10496 }, { "epoch": 2.946939921392476, "grad_norm": 0.539682149887085, "learning_rate": 9.526780692715177e-09, "loss": 0.3097, "step": 10497 }, { "epoch": 2.94722066254913, "grad_norm": 0.5398123264312744, "learning_rate": 9.42626650269618e-09, "loss": 0.331, "step": 10498 }, { "epoch": 2.9475014037057834, "grad_norm": 0.5512509346008301, "learning_rate": 9.326284875671444e-09, "loss": 0.3332, "step": 10499 }, { "epoch": 2.947782144862437, "grad_norm": 0.5428307056427002, "learning_rate": 9.226835822310765e-09, "loss": 0.3715, "step": 10500 }, { "epoch": 2.9480628860190903, "grad_norm": 0.5412346124649048, "learning_rate": 9.127919353226212e-09, "loss": 0.3278, "step": 10501 }, { "epoch": 2.9483436271757437, "grad_norm": 0.5379180908203125, "learning_rate": 9.029535478974339e-09, "loss": 0.3038, "step": 10502 }, { "epoch": 2.9486243683323976, "grad_norm": 0.5824816226959229, "learning_rate": 8.931684210053415e-09, "loss": 0.3405, "step": 10503 }, { "epoch": 2.948905109489051, "grad_norm": 0.5193791389465332, "learning_rate": 8.834365556905644e-09, "loss": 0.3748, "step": 10504 }, { "epoch": 2.949185850645705, "grad_norm": 0.5310022234916687, "learning_rate": 8.737579529916607e-09, "loss": 0.3754, "step": 10505 }, { "epoch": 2.9494665918023584, "grad_norm": 0.510654866695404, "learning_rate": 8.641326139414707e-09, "loss": 0.2935, "step": 10506 }, { "epoch": 2.949747332959012, "grad_norm": 0.5400134921073914, "learning_rate": 8.54560539567062e-09, "loss": 0.3278, "step": 10507 }, { "epoch": 2.9500280741156653, "grad_norm": 0.5767861008644104, "learning_rate": 8.45041730890006e-09, "loss": 0.3373, "step": 10508 }, { "epoch": 2.9503088152723187, "grad_norm": 0.51285320520401, "learning_rate": 8.35576188926046e-09, "loss": 0.3096, "step": 10509 }, { "epoch": 2.9505895564289726, "grad_norm": 0.5136780738830566, "learning_rate": 8.261639146853185e-09, "loss": 0.3167, "step": 10510 }, { "epoch": 2.950870297585626, "grad_norm": 0.5744150280952454, "learning_rate": 8.168049091722418e-09, "loss": 0.347, "step": 10511 }, { "epoch": 2.9511510387422795, "grad_norm": 0.5855783224105835, "learning_rate": 8.07499173385462e-09, "loss": 0.3265, "step": 10512 }, { "epoch": 2.9514317798989333, "grad_norm": 0.5832756757736206, "learning_rate": 7.982467083181845e-09, "loss": 0.3291, "step": 10513 }, { "epoch": 2.951712521055587, "grad_norm": 0.548391580581665, "learning_rate": 7.890475149576194e-09, "loss": 0.3197, "step": 10514 }, { "epoch": 2.9519932622122402, "grad_norm": 0.5297378301620483, "learning_rate": 7.79901594285537e-09, "loss": 0.3109, "step": 10515 }, { "epoch": 2.9522740033688937, "grad_norm": 0.48342037200927734, "learning_rate": 7.70808947277879e-09, "loss": 0.3334, "step": 10516 }, { "epoch": 2.9525547445255476, "grad_norm": 0.5580374002456665, "learning_rate": 7.617695749050358e-09, "loss": 0.3156, "step": 10517 }, { "epoch": 2.952835485682201, "grad_norm": 0.5306932330131531, "learning_rate": 7.52783478131569e-09, "loss": 0.3414, "step": 10518 }, { "epoch": 2.9531162268388544, "grad_norm": 0.5230298042297363, "learning_rate": 7.43850657916434e-09, "loss": 0.3603, "step": 10519 }, { "epoch": 2.9533969679955083, "grad_norm": 0.5968591570854187, "learning_rate": 7.34971115212868e-09, "loss": 0.3154, "step": 10520 }, { "epoch": 2.9536777091521618, "grad_norm": 0.633278489112854, "learning_rate": 7.2614485096850205e-09, "loss": 0.3056, "step": 10521 }, { "epoch": 2.953958450308815, "grad_norm": 0.5836353302001953, "learning_rate": 7.173718661251383e-09, "loss": 0.3193, "step": 10522 }, { "epoch": 2.9542391914654687, "grad_norm": 0.5063651204109192, "learning_rate": 7.0865216161902785e-09, "loss": 0.2898, "step": 10523 }, { "epoch": 2.9545199326221225, "grad_norm": 0.6093495488166809, "learning_rate": 6.999857383806485e-09, "loss": 0.3116, "step": 10524 }, { "epoch": 2.954800673778776, "grad_norm": 0.5958014726638794, "learning_rate": 6.913725973349272e-09, "loss": 0.2975, "step": 10525 }, { "epoch": 2.9550814149354294, "grad_norm": 0.5749132633209229, "learning_rate": 6.82812739400851e-09, "loss": 0.3268, "step": 10526 }, { "epoch": 2.9553621560920833, "grad_norm": 0.517746090888977, "learning_rate": 6.743061654919669e-09, "loss": 0.3395, "step": 10527 }, { "epoch": 2.9556428972487367, "grad_norm": 0.6040874123573303, "learning_rate": 6.658528765160488e-09, "loss": 0.3363, "step": 10528 }, { "epoch": 2.95592363840539, "grad_norm": 0.5603330135345459, "learning_rate": 6.574528733751529e-09, "loss": 0.3631, "step": 10529 }, { "epoch": 2.9562043795620436, "grad_norm": 0.5852132439613342, "learning_rate": 6.491061569656731e-09, "loss": 0.3742, "step": 10530 }, { "epoch": 2.9564851207186975, "grad_norm": 0.515753984451294, "learning_rate": 6.408127281782861e-09, "loss": 0.3291, "step": 10531 }, { "epoch": 2.956765861875351, "grad_norm": 0.5279064774513245, "learning_rate": 6.32572587898117e-09, "loss": 0.3308, "step": 10532 }, { "epoch": 2.9570466030320044, "grad_norm": 0.5763012766838074, "learning_rate": 6.2438573700440706e-09, "loss": 0.3341, "step": 10533 }, { "epoch": 2.9573273441886583, "grad_norm": 0.5592288374900818, "learning_rate": 6.162521763708462e-09, "loss": 0.2718, "step": 10534 }, { "epoch": 2.9576080853453117, "grad_norm": 0.5332738757133484, "learning_rate": 6.0817190686540685e-09, "loss": 0.2813, "step": 10535 }, { "epoch": 2.957888826501965, "grad_norm": 0.5409986972808838, "learning_rate": 6.001449293503436e-09, "loss": 0.3257, "step": 10536 }, { "epoch": 2.9581695676586186, "grad_norm": 0.5411288142204285, "learning_rate": 5.921712446822492e-09, "loss": 0.3446, "step": 10537 }, { "epoch": 2.958450308815272, "grad_norm": 0.5346503853797913, "learning_rate": 5.842508537119984e-09, "loss": 0.3407, "step": 10538 }, { "epoch": 2.958731049971926, "grad_norm": 0.528346598148346, "learning_rate": 5.763837572848596e-09, "loss": 0.3092, "step": 10539 }, { "epoch": 2.9590117911285794, "grad_norm": 0.5470328330993652, "learning_rate": 5.685699562403279e-09, "loss": 0.3298, "step": 10540 }, { "epoch": 2.9592925322852333, "grad_norm": 0.5878907442092896, "learning_rate": 5.6080945141223644e-09, "loss": 0.3496, "step": 10541 }, { "epoch": 2.9595732734418867, "grad_norm": 0.4949032664299011, "learning_rate": 5.531022436288114e-09, "loss": 0.3576, "step": 10542 }, { "epoch": 2.95985401459854, "grad_norm": 0.6270462870597839, "learning_rate": 5.45448333712395e-09, "loss": 0.3467, "step": 10543 }, { "epoch": 2.9601347557551936, "grad_norm": 0.5856950879096985, "learning_rate": 5.378477224798895e-09, "loss": 0.2873, "step": 10544 }, { "epoch": 2.960415496911847, "grad_norm": 0.5382655262947083, "learning_rate": 5.303004107422571e-09, "loss": 0.3211, "step": 10545 }, { "epoch": 2.960696238068501, "grad_norm": 0.699613094329834, "learning_rate": 5.228063993050203e-09, "loss": 0.2839, "step": 10546 }, { "epoch": 2.9609769792251543, "grad_norm": 0.5352074503898621, "learning_rate": 5.1536568896781715e-09, "loss": 0.3989, "step": 10547 }, { "epoch": 2.9612577203818082, "grad_norm": 0.49492260813713074, "learning_rate": 5.079782805246791e-09, "loss": 0.3326, "step": 10548 }, { "epoch": 2.9615384615384617, "grad_norm": 0.49701163172721863, "learning_rate": 5.0064417476403115e-09, "loss": 0.3489, "step": 10549 }, { "epoch": 2.961819202695115, "grad_norm": 0.489573210477829, "learning_rate": 4.9336337246841394e-09, "loss": 0.3358, "step": 10550 }, { "epoch": 2.9620999438517686, "grad_norm": 0.5300890207290649, "learning_rate": 4.86135874414817e-09, "loss": 0.3099, "step": 10551 }, { "epoch": 2.962380685008422, "grad_norm": 0.6492616534233093, "learning_rate": 4.789616813745123e-09, "loss": 0.3033, "step": 10552 }, { "epoch": 2.962661426165076, "grad_norm": 0.5822144150733948, "learning_rate": 4.7184079411310975e-09, "loss": 0.28, "step": 10553 }, { "epoch": 2.9629421673217293, "grad_norm": 0.5453940629959106, "learning_rate": 4.6477321339055695e-09, "loss": 0.3671, "step": 10554 }, { "epoch": 2.9632229084783828, "grad_norm": 0.610227644443512, "learning_rate": 4.5775893996097274e-09, "loss": 0.3249, "step": 10555 }, { "epoch": 2.9635036496350367, "grad_norm": 0.5688523054122925, "learning_rate": 4.5079797457286965e-09, "loss": 0.3318, "step": 10556 }, { "epoch": 2.96378439079169, "grad_norm": 0.5592189431190491, "learning_rate": 4.438903179691534e-09, "loss": 0.3837, "step": 10557 }, { "epoch": 2.9640651319483435, "grad_norm": 0.6163564324378967, "learning_rate": 4.37035970886901e-09, "loss": 0.2799, "step": 10558 }, { "epoch": 2.964345873104997, "grad_norm": 0.5381830930709839, "learning_rate": 4.302349340575829e-09, "loss": 0.3028, "step": 10559 }, { "epoch": 2.964626614261651, "grad_norm": 0.5643508434295654, "learning_rate": 4.234872082070074e-09, "loss": 0.3514, "step": 10560 }, { "epoch": 2.9649073554183043, "grad_norm": 0.5094709992408752, "learning_rate": 4.167927940552097e-09, "loss": 0.354, "step": 10561 }, { "epoch": 2.9651880965749577, "grad_norm": 0.5500958561897278, "learning_rate": 4.101516923165627e-09, "loss": 0.3048, "step": 10562 }, { "epoch": 2.9654688377316116, "grad_norm": 0.5777549743652344, "learning_rate": 4.035639036998329e-09, "loss": 0.3187, "step": 10563 }, { "epoch": 2.965749578888265, "grad_norm": 0.5151559114456177, "learning_rate": 3.970294289079024e-09, "loss": 0.3021, "step": 10564 }, { "epoch": 2.9660303200449185, "grad_norm": 0.5703250765800476, "learning_rate": 3.905482686382134e-09, "loss": 0.3246, "step": 10565 }, { "epoch": 2.966311061201572, "grad_norm": 0.5516538023948669, "learning_rate": 3.841204235823792e-09, "loss": 0.3308, "step": 10566 }, { "epoch": 2.966591802358226, "grad_norm": 0.5556989908218384, "learning_rate": 3.777458944262402e-09, "loss": 0.3584, "step": 10567 }, { "epoch": 2.9668725435148793, "grad_norm": 0.5369669795036316, "learning_rate": 3.71424681850141e-09, "loss": 0.2965, "step": 10568 }, { "epoch": 2.9671532846715327, "grad_norm": 0.5599948167800903, "learning_rate": 3.6515678652859765e-09, "loss": 0.3284, "step": 10569 }, { "epoch": 2.9674340258281866, "grad_norm": 0.5642825365066528, "learning_rate": 3.589422091305195e-09, "loss": 0.285, "step": 10570 }, { "epoch": 2.96771476698484, "grad_norm": 0.5726443529129028, "learning_rate": 3.527809503190982e-09, "loss": 0.3092, "step": 10571 }, { "epoch": 2.9679955081414935, "grad_norm": 0.4911079704761505, "learning_rate": 3.4667301075175242e-09, "loss": 0.3249, "step": 10572 }, { "epoch": 2.968276249298147, "grad_norm": 0.5511907935142517, "learning_rate": 3.4061839108029403e-09, "loss": 0.326, "step": 10573 }, { "epoch": 2.968556990454801, "grad_norm": 0.5117613673210144, "learning_rate": 3.346170919509284e-09, "loss": 0.3266, "step": 10574 }, { "epoch": 2.9688377316114543, "grad_norm": 0.5381171703338623, "learning_rate": 3.286691140039766e-09, "loss": 0.307, "step": 10575 }, { "epoch": 2.9691184727681077, "grad_norm": 0.5498996376991272, "learning_rate": 3.227744578742087e-09, "loss": 0.3189, "step": 10576 }, { "epoch": 2.9693992139247616, "grad_norm": 0.5805063843727112, "learning_rate": 3.16933124190677e-09, "loss": 0.3325, "step": 10577 }, { "epoch": 2.969679955081415, "grad_norm": 0.5250602960586548, "learning_rate": 3.1114511357666077e-09, "loss": 0.3277, "step": 10578 }, { "epoch": 2.9699606962380685, "grad_norm": 0.5555799603462219, "learning_rate": 3.054104266499436e-09, "loss": 0.3127, "step": 10579 }, { "epoch": 2.970241437394722, "grad_norm": 0.5048649311065674, "learning_rate": 2.9972906402242487e-09, "loss": 0.3484, "step": 10580 }, { "epoch": 2.970522178551376, "grad_norm": 0.5382528901100159, "learning_rate": 2.9410102630034186e-09, "loss": 0.3327, "step": 10581 }, { "epoch": 2.9708029197080292, "grad_norm": 0.5924113988876343, "learning_rate": 2.885263140843808e-09, "loss": 0.3206, "step": 10582 }, { "epoch": 2.9710836608646827, "grad_norm": 0.519474446773529, "learning_rate": 2.8300492796939914e-09, "loss": 0.3459, "step": 10583 }, { "epoch": 2.9713644020213366, "grad_norm": 0.5960259437561035, "learning_rate": 2.7753686854453676e-09, "loss": 0.3431, "step": 10584 }, { "epoch": 2.97164514317799, "grad_norm": 0.5507221817970276, "learning_rate": 2.721221363934379e-09, "loss": 0.3204, "step": 10585 }, { "epoch": 2.9719258843346434, "grad_norm": 0.5993731617927551, "learning_rate": 2.6676073209380705e-09, "loss": 0.2847, "step": 10586 }, { "epoch": 2.972206625491297, "grad_norm": 0.5828808546066284, "learning_rate": 2.614526562178532e-09, "loss": 0.3446, "step": 10587 }, { "epoch": 2.9724873666479503, "grad_norm": 0.6000937223434448, "learning_rate": 2.5619790933201216e-09, "loss": 0.3496, "step": 10588 }, { "epoch": 2.972768107804604, "grad_norm": 0.484211266040802, "learning_rate": 2.5099649199705754e-09, "loss": 0.3505, "step": 10589 }, { "epoch": 2.9730488489612577, "grad_norm": 0.58460932970047, "learning_rate": 2.4584840476798986e-09, "loss": 0.322, "step": 10590 }, { "epoch": 2.9733295901179115, "grad_norm": 0.5212740898132324, "learning_rate": 2.40753648194203e-09, "loss": 0.316, "step": 10591 }, { "epoch": 2.973610331274565, "grad_norm": 0.5156008005142212, "learning_rate": 2.3571222281937312e-09, "loss": 0.3286, "step": 10592 }, { "epoch": 2.9738910724312184, "grad_norm": 0.5211127996444702, "learning_rate": 2.3072412918156984e-09, "loss": 0.3035, "step": 10593 }, { "epoch": 2.974171813587872, "grad_norm": 0.4743029475212097, "learning_rate": 2.2578936781297854e-09, "loss": 0.3533, "step": 10594 }, { "epoch": 2.9744525547445253, "grad_norm": 0.6124950647354126, "learning_rate": 2.209079392402891e-09, "loss": 0.2876, "step": 10595 }, { "epoch": 2.974733295901179, "grad_norm": 0.4953879415988922, "learning_rate": 2.1607984398436255e-09, "loss": 0.3059, "step": 10596 }, { "epoch": 2.9750140370578326, "grad_norm": 0.543163001537323, "learning_rate": 2.1130508256039793e-09, "loss": 0.3122, "step": 10597 }, { "epoch": 2.9752947782144865, "grad_norm": 0.5467824339866638, "learning_rate": 2.0658365547804316e-09, "loss": 0.3232, "step": 10598 }, { "epoch": 2.97557551937114, "grad_norm": 0.5721521377563477, "learning_rate": 2.0191556324106186e-09, "loss": 0.3285, "step": 10599 }, { "epoch": 2.9758562605277934, "grad_norm": 0.5498932600021362, "learning_rate": 1.9730080634761116e-09, "loss": 0.306, "step": 10600 }, { "epoch": 2.976137001684447, "grad_norm": 0.5505990386009216, "learning_rate": 1.927393852901305e-09, "loss": 0.3281, "step": 10601 }, { "epoch": 2.9764177428411003, "grad_norm": 0.5039509534835815, "learning_rate": 1.8823130055539706e-09, "loss": 0.3456, "step": 10602 }, { "epoch": 2.976698483997754, "grad_norm": 0.5800801515579224, "learning_rate": 1.837765526245261e-09, "loss": 0.2784, "step": 10603 }, { "epoch": 2.9769792251544076, "grad_norm": 0.5249902009963989, "learning_rate": 1.7937514197285955e-09, "loss": 0.315, "step": 10604 }, { "epoch": 2.977259966311061, "grad_norm": 0.5883903503417969, "learning_rate": 1.7502706907007727e-09, "loss": 0.3186, "step": 10605 }, { "epoch": 2.977540707467715, "grad_norm": 0.5466181039810181, "learning_rate": 1.7073233438019699e-09, "loss": 0.3521, "step": 10606 }, { "epoch": 2.9778214486243684, "grad_norm": 0.5664665699005127, "learning_rate": 1.6649093836157427e-09, "loss": 0.3201, "step": 10607 }, { "epoch": 2.978102189781022, "grad_norm": 0.5520034432411194, "learning_rate": 1.62302881466736e-09, "loss": 0.3534, "step": 10608 }, { "epoch": 2.9783829309376753, "grad_norm": 0.54378741979599, "learning_rate": 1.58168164142658e-09, "loss": 0.3412, "step": 10609 }, { "epoch": 2.978663672094329, "grad_norm": 0.5794954895973206, "learning_rate": 1.540867868305984e-09, "loss": 0.3537, "step": 10610 }, { "epoch": 2.9789444132509826, "grad_norm": 0.5040106773376465, "learning_rate": 1.500587499660422e-09, "loss": 0.3113, "step": 10611 }, { "epoch": 2.979225154407636, "grad_norm": 0.5427335500717163, "learning_rate": 1.460840539788122e-09, "loss": 0.4027, "step": 10612 }, { "epoch": 2.97950589556429, "grad_norm": 0.49653446674346924, "learning_rate": 1.4216269929318017e-09, "loss": 0.3556, "step": 10613 }, { "epoch": 2.9797866367209433, "grad_norm": 0.5319122672080994, "learning_rate": 1.382946863274781e-09, "loss": 0.2885, "step": 10614 }, { "epoch": 2.980067377877597, "grad_norm": 0.5083991289138794, "learning_rate": 1.3448001549454248e-09, "loss": 0.3235, "step": 10615 }, { "epoch": 2.9803481190342502, "grad_norm": 0.5581716895103455, "learning_rate": 1.3071868720143654e-09, "loss": 0.2853, "step": 10616 }, { "epoch": 2.980628860190904, "grad_norm": 0.47545793652534485, "learning_rate": 1.2701070184956143e-09, "loss": 0.3366, "step": 10617 }, { "epoch": 2.9809096013475576, "grad_norm": 0.5832540988922119, "learning_rate": 1.2335605983460065e-09, "loss": 0.3102, "step": 10618 }, { "epoch": 2.981190342504211, "grad_norm": 0.5522453784942627, "learning_rate": 1.1975476154651999e-09, "loss": 0.2653, "step": 10619 }, { "epoch": 2.981471083660865, "grad_norm": 0.5310642123222351, "learning_rate": 1.1620680736973422e-09, "loss": 0.3552, "step": 10620 }, { "epoch": 2.9817518248175183, "grad_norm": 0.565291702747345, "learning_rate": 1.1271219768271836e-09, "loss": 0.2752, "step": 10621 }, { "epoch": 2.9820325659741718, "grad_norm": 0.5362760424613953, "learning_rate": 1.0927093285850732e-09, "loss": 0.3523, "step": 10622 }, { "epoch": 2.982313307130825, "grad_norm": 0.6323783993721008, "learning_rate": 1.0588301326425187e-09, "loss": 0.2652, "step": 10623 }, { "epoch": 2.982594048287479, "grad_norm": 0.5766808986663818, "learning_rate": 1.0254843926155167e-09, "loss": 0.3853, "step": 10624 }, { "epoch": 2.9828747894441325, "grad_norm": 0.5770233273506165, "learning_rate": 9.926721120617766e-10, "loss": 0.3102, "step": 10625 }, { "epoch": 2.983155530600786, "grad_norm": 0.5527682304382324, "learning_rate": 9.603932944840522e-10, "loss": 0.3594, "step": 10626 }, { "epoch": 2.98343627175744, "grad_norm": 0.5960839986801147, "learning_rate": 9.286479433257e-10, "loss": 0.3154, "step": 10627 }, { "epoch": 2.9837170129140933, "grad_norm": 0.49269023537635803, "learning_rate": 8.97436061975121e-10, "loss": 0.3657, "step": 10628 }, { "epoch": 2.9839977540707467, "grad_norm": 0.5490446090698242, "learning_rate": 8.667576537624289e-10, "loss": 0.3287, "step": 10629 }, { "epoch": 2.9842784952274, "grad_norm": 0.48584094643592834, "learning_rate": 8.366127219616715e-10, "loss": 0.3202, "step": 10630 }, { "epoch": 2.9845592363840536, "grad_norm": 0.5821020007133484, "learning_rate": 8.070012697902752e-10, "loss": 0.354, "step": 10631 }, { "epoch": 2.9848399775407075, "grad_norm": 0.5731359124183655, "learning_rate": 7.779233004079345e-10, "loss": 0.3505, "step": 10632 }, { "epoch": 2.985120718697361, "grad_norm": 0.5254268050193787, "learning_rate": 7.493788169171678e-10, "loss": 0.2915, "step": 10633 }, { "epoch": 2.985401459854015, "grad_norm": 0.5600931644439697, "learning_rate": 7.213678223644272e-10, "loss": 0.2926, "step": 10634 }, { "epoch": 2.9856822010106683, "grad_norm": 0.4954153597354889, "learning_rate": 6.938903197389879e-10, "loss": 0.3117, "step": 10635 }, { "epoch": 2.9859629421673217, "grad_norm": 0.578407347202301, "learning_rate": 6.669463119729491e-10, "loss": 0.3263, "step": 10636 }, { "epoch": 2.986243683323975, "grad_norm": 0.6071785092353821, "learning_rate": 6.405358019412333e-10, "loss": 0.3116, "step": 10637 }, { "epoch": 2.9865244244806286, "grad_norm": 0.5326305627822876, "learning_rate": 6.146587924632519e-10, "loss": 0.3328, "step": 10638 }, { "epoch": 2.9868051656372825, "grad_norm": 0.5341914296150208, "learning_rate": 5.893152862990192e-10, "loss": 0.3398, "step": 10639 }, { "epoch": 2.987085906793936, "grad_norm": 0.6067649722099304, "learning_rate": 5.645052861541489e-10, "loss": 0.3017, "step": 10640 }, { "epoch": 2.98736664795059, "grad_norm": 0.567701518535614, "learning_rate": 5.402287946759676e-10, "loss": 0.2839, "step": 10641 }, { "epoch": 2.9876473891072433, "grad_norm": 0.5179830193519592, "learning_rate": 5.164858144546259e-10, "loss": 0.3089, "step": 10642 }, { "epoch": 2.9879281302638967, "grad_norm": 0.5601654648780823, "learning_rate": 4.932763480247626e-10, "loss": 0.3635, "step": 10643 }, { "epoch": 2.98820887142055, "grad_norm": 0.5821573138237, "learning_rate": 4.706003978621754e-10, "loss": 0.3229, "step": 10644 }, { "epoch": 2.9884896125772036, "grad_norm": 0.6123842000961304, "learning_rate": 4.484579663871502e-10, "loss": 0.2975, "step": 10645 }, { "epoch": 2.9887703537338575, "grad_norm": 0.5240119099617004, "learning_rate": 4.268490559622418e-10, "loss": 0.361, "step": 10646 }, { "epoch": 2.989051094890511, "grad_norm": 0.5306237936019897, "learning_rate": 4.0577366889393844e-10, "loss": 0.339, "step": 10647 }, { "epoch": 2.9893318360471643, "grad_norm": 0.5565869212150574, "learning_rate": 3.8523180743099686e-10, "loss": 0.3589, "step": 10648 }, { "epoch": 2.9896125772038182, "grad_norm": 0.5355308651924133, "learning_rate": 3.6522347376555244e-10, "loss": 0.3194, "step": 10649 }, { "epoch": 2.9898933183604717, "grad_norm": 0.5698487758636475, "learning_rate": 3.4574867003311916e-10, "loss": 0.3167, "step": 10650 }, { "epoch": 2.990174059517125, "grad_norm": 0.5874865055084229, "learning_rate": 3.2680739831092436e-10, "loss": 0.3066, "step": 10651 }, { "epoch": 2.9904548006737786, "grad_norm": 0.5256145000457764, "learning_rate": 3.083996606217943e-10, "loss": 0.3004, "step": 10652 }, { "epoch": 2.9907355418304324, "grad_norm": 0.584301769733429, "learning_rate": 2.905254589286033e-10, "loss": 0.3049, "step": 10653 }, { "epoch": 2.991016282987086, "grad_norm": 0.5106728672981262, "learning_rate": 2.7318479513926965e-10, "loss": 0.3625, "step": 10654 }, { "epoch": 2.9912970241437393, "grad_norm": 0.607122004032135, "learning_rate": 2.5637767110509025e-10, "loss": 0.3115, "step": 10655 }, { "epoch": 2.991577765300393, "grad_norm": 0.5652472376823425, "learning_rate": 2.401040886185202e-10, "loss": 0.3221, "step": 10656 }, { "epoch": 2.9918585064570467, "grad_norm": 0.5534455180168152, "learning_rate": 2.2436404941650337e-10, "loss": 0.2889, "step": 10657 }, { "epoch": 2.9921392476137, "grad_norm": 0.47856977581977844, "learning_rate": 2.091575551788072e-10, "loss": 0.3409, "step": 10658 }, { "epoch": 2.9924199887703535, "grad_norm": 0.561577320098877, "learning_rate": 1.9448460752802267e-10, "loss": 0.355, "step": 10659 }, { "epoch": 2.9927007299270074, "grad_norm": 0.5409138202667236, "learning_rate": 1.8034520803067444e-10, "loss": 0.3475, "step": 10660 }, { "epoch": 2.992981471083661, "grad_norm": 0.5205484628677368, "learning_rate": 1.6673935819444541e-10, "loss": 0.3434, "step": 10661 }, { "epoch": 2.9932622122403143, "grad_norm": 0.5401335954666138, "learning_rate": 1.536670594720624e-10, "loss": 0.2729, "step": 10662 }, { "epoch": 2.993542953396968, "grad_norm": 0.5215033888816833, "learning_rate": 1.411283132585206e-10, "loss": 0.3518, "step": 10663 }, { "epoch": 2.9938236945536216, "grad_norm": 0.49377742409706116, "learning_rate": 1.291231208916388e-10, "loss": 0.2978, "step": 10664 }, { "epoch": 2.994104435710275, "grad_norm": 0.5893886685371399, "learning_rate": 1.1765148365261437e-10, "loss": 0.2775, "step": 10665 }, { "epoch": 2.9943851768669285, "grad_norm": 0.5212074518203735, "learning_rate": 1.0671340276546816e-10, "loss": 0.3573, "step": 10666 }, { "epoch": 2.9946659180235824, "grad_norm": 0.5782735347747803, "learning_rate": 9.63088793975997e-11, "loss": 0.2998, "step": 10667 }, { "epoch": 2.994946659180236, "grad_norm": 0.47576746344566345, "learning_rate": 8.643791465978712e-11, "loss": 0.3351, "step": 10668 }, { "epoch": 2.9952274003368893, "grad_norm": 0.6338368654251099, "learning_rate": 7.710050960452187e-11, "loss": 0.3259, "step": 10669 }, { "epoch": 2.995508141493543, "grad_norm": 0.6172406673431396, "learning_rate": 6.829666522822909e-11, "loss": 0.274, "step": 10670 }, { "epoch": 2.9957888826501966, "grad_norm": 0.513821005821228, "learning_rate": 6.002638247126769e-11, "loss": 0.3604, "step": 10671 }, { "epoch": 2.99606962380685, "grad_norm": 0.5321049094200134, "learning_rate": 5.228966221570986e-11, "loss": 0.3484, "step": 10672 }, { "epoch": 2.9963503649635035, "grad_norm": 0.5471032857894897, "learning_rate": 4.508650528700642e-11, "loss": 0.3083, "step": 10673 }, { "epoch": 2.9966311061201574, "grad_norm": 0.5512019395828247, "learning_rate": 3.841691245398682e-11, "loss": 0.3014, "step": 10674 }, { "epoch": 2.996911847276811, "grad_norm": 0.5173888206481934, "learning_rate": 3.228088442830402e-11, "loss": 0.3355, "step": 10675 }, { "epoch": 2.9971925884334643, "grad_norm": 0.5431347489356995, "learning_rate": 2.667842186498959e-11, "loss": 0.3199, "step": 10676 }, { "epoch": 2.997473329590118, "grad_norm": 0.5168270468711853, "learning_rate": 2.1609525361898643e-11, "loss": 0.3701, "step": 10677 }, { "epoch": 2.9977540707467716, "grad_norm": 0.5044336318969727, "learning_rate": 1.7074195459709784e-11, "loss": 0.321, "step": 10678 }, { "epoch": 2.998034811903425, "grad_norm": 0.5126956105232239, "learning_rate": 1.307243264248026e-11, "loss": 0.3203, "step": 10679 }, { "epoch": 2.9983155530600785, "grad_norm": 0.5498796105384827, "learning_rate": 9.604237337645928e-12, "loss": 0.3084, "step": 10680 }, { "epoch": 2.998596294216732, "grad_norm": 0.5440199971199036, "learning_rate": 6.669609914911057e-12, "loss": 0.3357, "step": 10681 }, { "epoch": 2.998877035373386, "grad_norm": 0.49261048436164856, "learning_rate": 4.26855068680343e-12, "loss": 0.3433, "step": 10682 }, { "epoch": 2.9991577765300392, "grad_norm": 0.5796825289726257, "learning_rate": 2.4010599108947876e-12, "loss": 0.3425, "step": 10683 }, { "epoch": 2.999438517686693, "grad_norm": 0.5342565178871155, "learning_rate": 1.0671377853599395e-12, "loss": 0.319, "step": 10684 }, { "epoch": 2.9997192588433466, "grad_norm": 0.6024678349494934, "learning_rate": 2.667844534176567e-13, "loss": 0.3298, "step": 10685 }, { "epoch": 3.0, "grad_norm": 0.4760485291481018, "learning_rate": 0.0, "loss": 0.3339, "step": 10686 }, { "epoch": 3.0, "step": 10686, "total_flos": 4620716816613376.0, "train_loss": 0.39878227835304614, "train_runtime": 132014.4933, "train_samples_per_second": 2.59, "train_steps_per_second": 0.081 } ], "logging_steps": 1.0, "max_steps": 10686, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 4620716816613376.0, "train_batch_size": 1, "trial_name": null, "trial_params": null }