Qwen_Math_high_1e5 / checkpoint-2775 /trainer_state.json
redsgnaoh's picture
Upload folder using huggingface_hub
487e8fc verified
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.262511373976342,
"eval_steps": 500,
"global_step": 2775,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.00045495905368516835,
"grad_norm": 9.461428161462043,
"learning_rate": 1e-05,
"loss": 0.1263,
"step": 1
},
{
"epoch": 0.0009099181073703367,
"grad_norm": 5.190780450250769,
"learning_rate": 9.99999979571129e-06,
"loss": 0.1723,
"step": 2
},
{
"epoch": 0.001364877161055505,
"grad_norm": 7.521926017130347,
"learning_rate": 9.999999182845177e-06,
"loss": 0.1327,
"step": 3
},
{
"epoch": 0.0018198362147406734,
"grad_norm": 2.5665810200307217,
"learning_rate": 9.99999816140171e-06,
"loss": 0.1095,
"step": 4
},
{
"epoch": 0.0022747952684258415,
"grad_norm": 2.738508706395883,
"learning_rate": 9.999996731380973e-06,
"loss": 0.1151,
"step": 5
},
{
"epoch": 0.00272975432211101,
"grad_norm": 2.67941899677245,
"learning_rate": 9.999994892783083e-06,
"loss": 0.0821,
"step": 6
},
{
"epoch": 0.0031847133757961785,
"grad_norm": 2.137586234420784,
"learning_rate": 9.99999264560819e-06,
"loss": 0.0729,
"step": 7
},
{
"epoch": 0.003639672429481347,
"grad_norm": 2.8221590420989164,
"learning_rate": 9.999989989856477e-06,
"loss": 0.0929,
"step": 8
},
{
"epoch": 0.004094631483166515,
"grad_norm": 1.6167314639784554,
"learning_rate": 9.999986925528164e-06,
"loss": 0.0466,
"step": 9
},
{
"epoch": 0.004549590536851683,
"grad_norm": 2.1773262431631313,
"learning_rate": 9.999983452623498e-06,
"loss": 0.0709,
"step": 10
},
{
"epoch": 0.005004549590536852,
"grad_norm": 7.6444390817806465,
"learning_rate": 9.999979571142765e-06,
"loss": 0.0809,
"step": 11
},
{
"epoch": 0.00545950864422202,
"grad_norm": 2.034523884241798,
"learning_rate": 9.999975281086278e-06,
"loss": 0.0839,
"step": 12
},
{
"epoch": 0.005914467697907188,
"grad_norm": 3.576108282005355,
"learning_rate": 9.999970582454392e-06,
"loss": 0.0728,
"step": 13
},
{
"epoch": 0.006369426751592357,
"grad_norm": 2.623641566468802,
"learning_rate": 9.999965475247491e-06,
"loss": 0.1052,
"step": 14
},
{
"epoch": 0.006824385805277525,
"grad_norm": 2.1413574998269085,
"learning_rate": 9.99995995946599e-06,
"loss": 0.0885,
"step": 15
},
{
"epoch": 0.007279344858962694,
"grad_norm": 1.4859066724415246,
"learning_rate": 9.999954035110342e-06,
"loss": 0.0644,
"step": 16
},
{
"epoch": 0.0077343039126478615,
"grad_norm": 2.851793157608408,
"learning_rate": 9.999947702181027e-06,
"loss": 0.1057,
"step": 17
},
{
"epoch": 0.00818926296633303,
"grad_norm": 4.693829546662477,
"learning_rate": 9.999940960678568e-06,
"loss": 0.0867,
"step": 18
},
{
"epoch": 0.008644222020018199,
"grad_norm": 2.2728033563417362,
"learning_rate": 9.999933810603513e-06,
"loss": 0.0789,
"step": 19
},
{
"epoch": 0.009099181073703366,
"grad_norm": 1.6705986173507794,
"learning_rate": 9.999926251956447e-06,
"loss": 0.0683,
"step": 20
},
{
"epoch": 0.009554140127388535,
"grad_norm": 2.187579869114393,
"learning_rate": 9.999918284737986e-06,
"loss": 0.0984,
"step": 21
},
{
"epoch": 0.010009099181073703,
"grad_norm": 2.328040268012338,
"learning_rate": 9.999909908948782e-06,
"loss": 0.0699,
"step": 22
},
{
"epoch": 0.010464058234758872,
"grad_norm": 5.572389775693198,
"learning_rate": 9.999901124589519e-06,
"loss": 0.0912,
"step": 23
},
{
"epoch": 0.01091901728844404,
"grad_norm": 1.84796719674859,
"learning_rate": 9.999891931660916e-06,
"loss": 0.1015,
"step": 24
},
{
"epoch": 0.011373976342129208,
"grad_norm": 1.7501762990792236,
"learning_rate": 9.999882330163725e-06,
"loss": 0.0909,
"step": 25
},
{
"epoch": 0.011828935395814377,
"grad_norm": 0.9922115950592263,
"learning_rate": 9.999872320098729e-06,
"loss": 0.0656,
"step": 26
},
{
"epoch": 0.012283894449499545,
"grad_norm": 1.5612370560987539,
"learning_rate": 9.999861901466746e-06,
"loss": 0.0974,
"step": 27
},
{
"epoch": 0.012738853503184714,
"grad_norm": 1.4617271794930395,
"learning_rate": 9.999851074268625e-06,
"loss": 0.0853,
"step": 28
},
{
"epoch": 0.013193812556869881,
"grad_norm": 1.8127085104491556,
"learning_rate": 9.999839838505257e-06,
"loss": 0.1081,
"step": 29
},
{
"epoch": 0.01364877161055505,
"grad_norm": 1.4710105512612208,
"learning_rate": 9.999828194177555e-06,
"loss": 0.0868,
"step": 30
},
{
"epoch": 0.014103730664240218,
"grad_norm": 1.3474487189311888,
"learning_rate": 9.999816141286472e-06,
"loss": 0.0817,
"step": 31
},
{
"epoch": 0.014558689717925387,
"grad_norm": 1.0967596652549403,
"learning_rate": 9.99980367983299e-06,
"loss": 0.0637,
"step": 32
},
{
"epoch": 0.015013648771610554,
"grad_norm": 3.179425671823194,
"learning_rate": 9.999790809818134e-06,
"loss": 0.069,
"step": 33
},
{
"epoch": 0.015468607825295723,
"grad_norm": 4.482257681577152,
"learning_rate": 9.999777531242951e-06,
"loss": 0.0915,
"step": 34
},
{
"epoch": 0.01592356687898089,
"grad_norm": 3.953299040475791,
"learning_rate": 9.999763844108528e-06,
"loss": 0.0562,
"step": 35
},
{
"epoch": 0.01637852593266606,
"grad_norm": 1.1127201050382067,
"learning_rate": 9.999749748415982e-06,
"loss": 0.0556,
"step": 36
},
{
"epoch": 0.01683348498635123,
"grad_norm": 79.45756094624792,
"learning_rate": 9.999735244166464e-06,
"loss": 0.1223,
"step": 37
},
{
"epoch": 0.017288444040036398,
"grad_norm": 2777.9092912017113,
"learning_rate": 9.99972033136116e-06,
"loss": 0.3211,
"step": 38
},
{
"epoch": 0.017743403093721567,
"grad_norm": 2.5204693177238466,
"learning_rate": 9.999705010001291e-06,
"loss": 0.0723,
"step": 39
},
{
"epoch": 0.018198362147406732,
"grad_norm": 2.2975907071135655,
"learning_rate": 9.999689280088105e-06,
"loss": 0.0696,
"step": 40
},
{
"epoch": 0.0186533212010919,
"grad_norm": 2.998434349074003,
"learning_rate": 9.99967314162289e-06,
"loss": 0.083,
"step": 41
},
{
"epoch": 0.01910828025477707,
"grad_norm": 3.882239448575704,
"learning_rate": 9.999656594606966e-06,
"loss": 0.1015,
"step": 42
},
{
"epoch": 0.019563239308462238,
"grad_norm": 3.5286596480512493,
"learning_rate": 9.999639639041681e-06,
"loss": 0.0817,
"step": 43
},
{
"epoch": 0.020018198362147407,
"grad_norm": 1.6933989447443707,
"learning_rate": 9.999622274928424e-06,
"loss": 0.1003,
"step": 44
},
{
"epoch": 0.020473157415832575,
"grad_norm": 1.2483160046323276,
"learning_rate": 9.999604502268614e-06,
"loss": 0.0952,
"step": 45
},
{
"epoch": 0.020928116469517744,
"grad_norm": 0.9417906124383243,
"learning_rate": 9.9995863210637e-06,
"loss": 0.0731,
"step": 46
},
{
"epoch": 0.021383075523202913,
"grad_norm": 2.8195414757816897,
"learning_rate": 9.99956773131517e-06,
"loss": 0.1845,
"step": 47
},
{
"epoch": 0.02183803457688808,
"grad_norm": 2.74390379471345,
"learning_rate": 9.999548733024545e-06,
"loss": 0.1826,
"step": 48
},
{
"epoch": 0.022292993630573247,
"grad_norm": 1.5138494619527987,
"learning_rate": 9.999529326193373e-06,
"loss": 0.0857,
"step": 49
},
{
"epoch": 0.022747952684258416,
"grad_norm": 1.215379974181271,
"learning_rate": 9.999509510823242e-06,
"loss": 0.0686,
"step": 50
},
{
"epoch": 0.023202911737943584,
"grad_norm": 1.292187967807859,
"learning_rate": 9.999489286915773e-06,
"loss": 0.0707,
"step": 51
},
{
"epoch": 0.023657870791628753,
"grad_norm": 1.7888013203563982,
"learning_rate": 9.999468654472614e-06,
"loss": 0.0682,
"step": 52
},
{
"epoch": 0.024112829845313922,
"grad_norm": 0.8979425621703144,
"learning_rate": 9.999447613495457e-06,
"loss": 0.0508,
"step": 53
},
{
"epoch": 0.02456778889899909,
"grad_norm": 1.9123835444775663,
"learning_rate": 9.99942616398602e-06,
"loss": 0.0689,
"step": 54
},
{
"epoch": 0.02502274795268426,
"grad_norm": 0.9393581994096443,
"learning_rate": 9.99940430594605e-06,
"loss": 0.0496,
"step": 55
},
{
"epoch": 0.025477707006369428,
"grad_norm": 1.0234476513644222,
"learning_rate": 9.999382039377339e-06,
"loss": 0.0601,
"step": 56
},
{
"epoch": 0.025932666060054597,
"grad_norm": 0.9291387208138827,
"learning_rate": 9.999359364281704e-06,
"loss": 0.0377,
"step": 57
},
{
"epoch": 0.026387625113739762,
"grad_norm": 1.8209170803663992,
"learning_rate": 9.999336280660999e-06,
"loss": 0.1144,
"step": 58
},
{
"epoch": 0.02684258416742493,
"grad_norm": 1.1214625046464874,
"learning_rate": 9.99931278851711e-06,
"loss": 0.0622,
"step": 59
},
{
"epoch": 0.0272975432211101,
"grad_norm": 1.0331723997917317,
"learning_rate": 9.999288887851956e-06,
"loss": 0.0667,
"step": 60
},
{
"epoch": 0.027752502274795268,
"grad_norm": 1.0412381501406744,
"learning_rate": 9.999264578667493e-06,
"loss": 0.0566,
"step": 61
},
{
"epoch": 0.028207461328480437,
"grad_norm": 1.4510603110658047,
"learning_rate": 9.999239860965703e-06,
"loss": 0.0845,
"step": 62
},
{
"epoch": 0.028662420382165606,
"grad_norm": 1.301162540669183,
"learning_rate": 9.999214734748609e-06,
"loss": 0.0759,
"step": 63
},
{
"epoch": 0.029117379435850774,
"grad_norm": 0.9977688847603402,
"learning_rate": 9.999189200018263e-06,
"loss": 0.0528,
"step": 64
},
{
"epoch": 0.029572338489535943,
"grad_norm": 1.2894688842348854,
"learning_rate": 9.99916325677675e-06,
"loss": 0.0899,
"step": 65
},
{
"epoch": 0.03002729754322111,
"grad_norm": 1.4627871680702638,
"learning_rate": 9.999136905026194e-06,
"loss": 0.1456,
"step": 66
},
{
"epoch": 0.030482256596906277,
"grad_norm": 1.2304385710214434,
"learning_rate": 9.999110144768745e-06,
"loss": 0.079,
"step": 67
},
{
"epoch": 0.030937215650591446,
"grad_norm": 1.085016380732753,
"learning_rate": 9.99908297600659e-06,
"loss": 0.0696,
"step": 68
},
{
"epoch": 0.03139217470427662,
"grad_norm": 0.989450558642297,
"learning_rate": 9.99905539874195e-06,
"loss": 0.069,
"step": 69
},
{
"epoch": 0.03184713375796178,
"grad_norm": 1.0510491151133208,
"learning_rate": 9.99902741297708e-06,
"loss": 0.0555,
"step": 70
},
{
"epoch": 0.03230209281164695,
"grad_norm": 0.8938033562648371,
"learning_rate": 9.998999018714264e-06,
"loss": 0.0783,
"step": 71
},
{
"epoch": 0.03275705186533212,
"grad_norm": 2.902512108322722,
"learning_rate": 9.998970215955824e-06,
"loss": 0.0702,
"step": 72
},
{
"epoch": 0.033212010919017286,
"grad_norm": 0.7661831894133686,
"learning_rate": 9.998941004704113e-06,
"loss": 0.0519,
"step": 73
},
{
"epoch": 0.03366696997270246,
"grad_norm": 1.1047249497744047,
"learning_rate": 9.998911384961518e-06,
"loss": 0.0773,
"step": 74
},
{
"epoch": 0.034121929026387623,
"grad_norm": 0.7750047299312716,
"learning_rate": 9.998881356730458e-06,
"loss": 0.0598,
"step": 75
},
{
"epoch": 0.034576888080072796,
"grad_norm": 0.9815801555720315,
"learning_rate": 9.99885092001339e-06,
"loss": 0.0661,
"step": 76
},
{
"epoch": 0.03503184713375796,
"grad_norm": 1.3090963451351905,
"learning_rate": 9.998820074812799e-06,
"loss": 0.0713,
"step": 77
},
{
"epoch": 0.03548680618744313,
"grad_norm": 1.1489338732270693,
"learning_rate": 9.998788821131207e-06,
"loss": 0.0946,
"step": 78
},
{
"epoch": 0.0359417652411283,
"grad_norm": 0.9040381990998293,
"learning_rate": 9.998757158971164e-06,
"loss": 0.067,
"step": 79
},
{
"epoch": 0.036396724294813464,
"grad_norm": 1.1019926198229115,
"learning_rate": 9.998725088335263e-06,
"loss": 0.0874,
"step": 80
},
{
"epoch": 0.036851683348498636,
"grad_norm": 0.5779852750462403,
"learning_rate": 9.99869260922612e-06,
"loss": 0.0492,
"step": 81
},
{
"epoch": 0.0373066424021838,
"grad_norm": 1.2769852710418472,
"learning_rate": 9.998659721646393e-06,
"loss": 0.0781,
"step": 82
},
{
"epoch": 0.03776160145586897,
"grad_norm": 0.9020624084974485,
"learning_rate": 9.998626425598766e-06,
"loss": 0.0734,
"step": 83
},
{
"epoch": 0.03821656050955414,
"grad_norm": 0.9626764462141776,
"learning_rate": 9.99859272108596e-06,
"loss": 0.0719,
"step": 84
},
{
"epoch": 0.03867151956323931,
"grad_norm": 0.9435885887029873,
"learning_rate": 9.998558608110733e-06,
"loss": 0.0835,
"step": 85
},
{
"epoch": 0.039126478616924476,
"grad_norm": 1.0578725525123687,
"learning_rate": 9.998524086675867e-06,
"loss": 0.0746,
"step": 86
},
{
"epoch": 0.03958143767060965,
"grad_norm": 1.0366588534208079,
"learning_rate": 9.998489156784188e-06,
"loss": 0.0933,
"step": 87
},
{
"epoch": 0.040036396724294813,
"grad_norm": 1.0595948680723846,
"learning_rate": 9.998453818438547e-06,
"loss": 0.0846,
"step": 88
},
{
"epoch": 0.04049135577797998,
"grad_norm": 0.8807515753016749,
"learning_rate": 9.998418071641833e-06,
"loss": 0.0649,
"step": 89
},
{
"epoch": 0.04094631483166515,
"grad_norm": 0.9034225145874141,
"learning_rate": 9.998381916396967e-06,
"loss": 0.0621,
"step": 90
},
{
"epoch": 0.041401273885350316,
"grad_norm": 0.6732889821553815,
"learning_rate": 9.998345352706901e-06,
"loss": 0.0367,
"step": 91
},
{
"epoch": 0.04185623293903549,
"grad_norm": 0.7136967603743426,
"learning_rate": 9.998308380574628e-06,
"loss": 0.0569,
"step": 92
},
{
"epoch": 0.042311191992720654,
"grad_norm": 1.1459385364035048,
"learning_rate": 9.998271000003166e-06,
"loss": 0.1184,
"step": 93
},
{
"epoch": 0.042766151046405826,
"grad_norm": 0.8224906129097734,
"learning_rate": 9.998233210995569e-06,
"loss": 0.0682,
"step": 94
},
{
"epoch": 0.04322111010009099,
"grad_norm": 1.5182946932236698,
"learning_rate": 9.998195013554926e-06,
"loss": 0.0875,
"step": 95
},
{
"epoch": 0.04367606915377616,
"grad_norm": 0.9355855711018981,
"learning_rate": 9.998156407684359e-06,
"loss": 0.0939,
"step": 96
},
{
"epoch": 0.04413102820746133,
"grad_norm": 0.7329840867165283,
"learning_rate": 9.998117393387022e-06,
"loss": 0.0466,
"step": 97
},
{
"epoch": 0.044585987261146494,
"grad_norm": 0.8701001036058451,
"learning_rate": 9.9980779706661e-06,
"loss": 0.0729,
"step": 98
},
{
"epoch": 0.045040946314831666,
"grad_norm": 1.0218896298663185,
"learning_rate": 9.99803813952482e-06,
"loss": 0.0828,
"step": 99
},
{
"epoch": 0.04549590536851683,
"grad_norm": 0.9044995357273884,
"learning_rate": 9.997997899966433e-06,
"loss": 0.0709,
"step": 100
},
{
"epoch": 0.045950864422202004,
"grad_norm": 0.9877796099816964,
"learning_rate": 9.99795725199423e-06,
"loss": 0.0903,
"step": 101
},
{
"epoch": 0.04640582347588717,
"grad_norm": 1.0061501994463906,
"learning_rate": 9.99791619561153e-06,
"loss": 0.0831,
"step": 102
},
{
"epoch": 0.04686078252957234,
"grad_norm": 0.8789173954818107,
"learning_rate": 9.997874730821689e-06,
"loss": 0.0714,
"step": 103
},
{
"epoch": 0.047315741583257506,
"grad_norm": 15.480920098194954,
"learning_rate": 9.997832857628093e-06,
"loss": 0.2603,
"step": 104
},
{
"epoch": 0.04777070063694268,
"grad_norm": 1.3806761301603454,
"learning_rate": 9.99779057603417e-06,
"loss": 0.1227,
"step": 105
},
{
"epoch": 0.048225659690627844,
"grad_norm": 0.8462176607269959,
"learning_rate": 9.997747886043368e-06,
"loss": 0.0605,
"step": 106
},
{
"epoch": 0.04868061874431301,
"grad_norm": 0.7467169847716549,
"learning_rate": 9.997704787659179e-06,
"loss": 0.0618,
"step": 107
},
{
"epoch": 0.04913557779799818,
"grad_norm": 1.5653334818977065,
"learning_rate": 9.997661280885125e-06,
"loss": 0.1253,
"step": 108
},
{
"epoch": 0.049590536851683346,
"grad_norm": 0.871706038604149,
"learning_rate": 9.99761736572476e-06,
"loss": 0.0716,
"step": 109
},
{
"epoch": 0.05004549590536852,
"grad_norm": 1.1398296008355844,
"learning_rate": 9.997573042181672e-06,
"loss": 0.0698,
"step": 110
},
{
"epoch": 0.050500454959053684,
"grad_norm": 1.0487992691419916,
"learning_rate": 9.997528310259485e-06,
"loss": 0.1102,
"step": 111
},
{
"epoch": 0.050955414012738856,
"grad_norm": 0.9112684449646818,
"learning_rate": 9.997483169961852e-06,
"loss": 0.1032,
"step": 112
},
{
"epoch": 0.05141037306642402,
"grad_norm": 0.9418790141923585,
"learning_rate": 9.997437621292463e-06,
"loss": 0.0771,
"step": 113
},
{
"epoch": 0.051865332120109194,
"grad_norm": 0.7796140692842074,
"learning_rate": 9.99739166425504e-06,
"loss": 0.0627,
"step": 114
},
{
"epoch": 0.05232029117379436,
"grad_norm": 1.5434421216734795,
"learning_rate": 9.997345298853339e-06,
"loss": 0.1495,
"step": 115
},
{
"epoch": 0.052775250227479524,
"grad_norm": 0.8898179660551836,
"learning_rate": 9.997298525091148e-06,
"loss": 0.0735,
"step": 116
},
{
"epoch": 0.053230209281164696,
"grad_norm": 0.8585916871524272,
"learning_rate": 9.997251342972288e-06,
"loss": 0.068,
"step": 117
},
{
"epoch": 0.05368516833484986,
"grad_norm": 0.812806800238708,
"learning_rate": 9.997203752500616e-06,
"loss": 0.0689,
"step": 118
},
{
"epoch": 0.054140127388535034,
"grad_norm": 0.9677722064277628,
"learning_rate": 9.997155753680021e-06,
"loss": 0.0795,
"step": 119
},
{
"epoch": 0.0545950864422202,
"grad_norm": 1.621934591654054,
"learning_rate": 9.997107346514425e-06,
"loss": 0.0707,
"step": 120
},
{
"epoch": 0.05505004549590537,
"grad_norm": 0.6750452750311531,
"learning_rate": 9.997058531007782e-06,
"loss": 0.0588,
"step": 121
},
{
"epoch": 0.055505004549590536,
"grad_norm": 0.9583870506818666,
"learning_rate": 9.997009307164083e-06,
"loss": 0.0859,
"step": 122
},
{
"epoch": 0.05595996360327571,
"grad_norm": 1.247483970027119,
"learning_rate": 9.99695967498735e-06,
"loss": 0.0952,
"step": 123
},
{
"epoch": 0.056414922656960874,
"grad_norm": 0.7937903902273558,
"learning_rate": 9.996909634481639e-06,
"loss": 0.0614,
"step": 124
},
{
"epoch": 0.05686988171064604,
"grad_norm": 4.855426128828546,
"learning_rate": 9.996859185651038e-06,
"loss": 0.1629,
"step": 125
},
{
"epoch": 0.05732484076433121,
"grad_norm": 1.0499970639607177,
"learning_rate": 9.99680832849967e-06,
"loss": 0.1031,
"step": 126
},
{
"epoch": 0.05777979981801638,
"grad_norm": 0.8730447821488512,
"learning_rate": 9.99675706303169e-06,
"loss": 0.0606,
"step": 127
},
{
"epoch": 0.05823475887170155,
"grad_norm": 1.2779985416162813,
"learning_rate": 9.99670538925129e-06,
"loss": 0.074,
"step": 128
},
{
"epoch": 0.058689717925386714,
"grad_norm": 0.8606157718419157,
"learning_rate": 9.996653307162687e-06,
"loss": 0.0703,
"step": 129
},
{
"epoch": 0.059144676979071886,
"grad_norm": 0.8920761218762643,
"learning_rate": 9.996600816770144e-06,
"loss": 0.0818,
"step": 130
},
{
"epoch": 0.05959963603275705,
"grad_norm": 1.1603462045917847,
"learning_rate": 9.996547918077944e-06,
"loss": 0.1148,
"step": 131
},
{
"epoch": 0.06005459508644222,
"grad_norm": 0.9108713801214797,
"learning_rate": 9.996494611090414e-06,
"loss": 0.0884,
"step": 132
},
{
"epoch": 0.06050955414012739,
"grad_norm": 0.6523725468628359,
"learning_rate": 9.996440895811907e-06,
"loss": 0.0535,
"step": 133
},
{
"epoch": 0.060964513193812554,
"grad_norm": 0.8812777694752004,
"learning_rate": 9.996386772246816e-06,
"loss": 0.087,
"step": 134
},
{
"epoch": 0.061419472247497726,
"grad_norm": 1.0622191207422995,
"learning_rate": 9.99633224039956e-06,
"loss": 0.0982,
"step": 135
},
{
"epoch": 0.06187443130118289,
"grad_norm": 3.7961077321923025,
"learning_rate": 9.996277300274596e-06,
"loss": 0.1526,
"step": 136
},
{
"epoch": 0.062329390354868064,
"grad_norm": 0.9444433559435487,
"learning_rate": 9.996221951876415e-06,
"loss": 0.0996,
"step": 137
},
{
"epoch": 0.06278434940855324,
"grad_norm": 1.444871481552235,
"learning_rate": 9.996166195209539e-06,
"loss": 0.1075,
"step": 138
},
{
"epoch": 0.0632393084622384,
"grad_norm": 0.7446446480732116,
"learning_rate": 9.996110030278522e-06,
"loss": 0.0561,
"step": 139
},
{
"epoch": 0.06369426751592357,
"grad_norm": 0.8913010543094952,
"learning_rate": 9.996053457087958e-06,
"loss": 0.0715,
"step": 140
},
{
"epoch": 0.06414922656960874,
"grad_norm": 0.7815821404043856,
"learning_rate": 9.995996475642466e-06,
"loss": 0.0796,
"step": 141
},
{
"epoch": 0.0646041856232939,
"grad_norm": 0.74337588448595,
"learning_rate": 9.995939085946704e-06,
"loss": 0.0661,
"step": 142
},
{
"epoch": 0.06505914467697907,
"grad_norm": 0.9974255688753435,
"learning_rate": 9.995881288005363e-06,
"loss": 0.0869,
"step": 143
},
{
"epoch": 0.06551410373066424,
"grad_norm": 1.2260290141946268,
"learning_rate": 9.995823081823162e-06,
"loss": 0.0766,
"step": 144
},
{
"epoch": 0.06596906278434941,
"grad_norm": 0.9751795993584637,
"learning_rate": 9.99576446740486e-06,
"loss": 0.091,
"step": 145
},
{
"epoch": 0.06642402183803457,
"grad_norm": 1.6175476325168967,
"learning_rate": 9.995705444755249e-06,
"loss": 0.1208,
"step": 146
},
{
"epoch": 0.06687898089171974,
"grad_norm": 0.7580083688127299,
"learning_rate": 9.995646013879147e-06,
"loss": 0.0622,
"step": 147
},
{
"epoch": 0.06733393994540492,
"grad_norm": 1.0194887039793072,
"learning_rate": 9.995586174781413e-06,
"loss": 0.0753,
"step": 148
},
{
"epoch": 0.06778889899909009,
"grad_norm": 0.9065646408503975,
"learning_rate": 9.995525927466936e-06,
"loss": 0.0848,
"step": 149
},
{
"epoch": 0.06824385805277525,
"grad_norm": 0.8871078738477127,
"learning_rate": 9.995465271940641e-06,
"loss": 0.0607,
"step": 150
},
{
"epoch": 0.06869881710646042,
"grad_norm": 1.1486707652049646,
"learning_rate": 9.995404208207485e-06,
"loss": 0.0809,
"step": 151
},
{
"epoch": 0.06915377616014559,
"grad_norm": 1.1473150526096232,
"learning_rate": 9.995342736272453e-06,
"loss": 0.1035,
"step": 152
},
{
"epoch": 0.06960873521383075,
"grad_norm": 1.3025683052462544,
"learning_rate": 9.995280856140572e-06,
"loss": 0.1197,
"step": 153
},
{
"epoch": 0.07006369426751592,
"grad_norm": 0.8069596755970996,
"learning_rate": 9.9952185678169e-06,
"loss": 0.0526,
"step": 154
},
{
"epoch": 0.0705186533212011,
"grad_norm": 0.8153700064848134,
"learning_rate": 9.995155871306524e-06,
"loss": 0.0613,
"step": 155
},
{
"epoch": 0.07097361237488627,
"grad_norm": 0.7319023745966868,
"learning_rate": 9.995092766614567e-06,
"loss": 0.0512,
"step": 156
},
{
"epoch": 0.07142857142857142,
"grad_norm": 1.0146656175738817,
"learning_rate": 9.995029253746186e-06,
"loss": 0.0846,
"step": 157
},
{
"epoch": 0.0718835304822566,
"grad_norm": 0.8015254985373994,
"learning_rate": 9.994965332706574e-06,
"loss": 0.0619,
"step": 158
},
{
"epoch": 0.07233848953594177,
"grad_norm": 1.0630207312416284,
"learning_rate": 9.994901003500952e-06,
"loss": 0.0796,
"step": 159
},
{
"epoch": 0.07279344858962693,
"grad_norm": 0.9431304991088505,
"learning_rate": 9.994836266134575e-06,
"loss": 0.0743,
"step": 160
},
{
"epoch": 0.0732484076433121,
"grad_norm": 1.023738915097686,
"learning_rate": 9.994771120612737e-06,
"loss": 0.0888,
"step": 161
},
{
"epoch": 0.07370336669699727,
"grad_norm": 0.9272637744585672,
"learning_rate": 9.994705566940757e-06,
"loss": 0.084,
"step": 162
},
{
"epoch": 0.07415832575068244,
"grad_norm": 1.122378326253592,
"learning_rate": 9.994639605123994e-06,
"loss": 0.0961,
"step": 163
},
{
"epoch": 0.0746132848043676,
"grad_norm": 0.753531768411978,
"learning_rate": 9.994573235167839e-06,
"loss": 0.0736,
"step": 164
},
{
"epoch": 0.07506824385805277,
"grad_norm": 0.9314766958597749,
"learning_rate": 9.994506457077715e-06,
"loss": 0.0838,
"step": 165
},
{
"epoch": 0.07552320291173795,
"grad_norm": 0.996008388557059,
"learning_rate": 9.994439270859077e-06,
"loss": 0.1076,
"step": 166
},
{
"epoch": 0.07597816196542312,
"grad_norm": 0.9199332464612126,
"learning_rate": 9.994371676517418e-06,
"loss": 0.0724,
"step": 167
},
{
"epoch": 0.07643312101910828,
"grad_norm": 0.8652292283168678,
"learning_rate": 9.994303674058259e-06,
"loss": 0.0628,
"step": 168
},
{
"epoch": 0.07688808007279345,
"grad_norm": 0.8176262426438138,
"learning_rate": 9.994235263487158e-06,
"loss": 0.0743,
"step": 169
},
{
"epoch": 0.07734303912647862,
"grad_norm": 0.8147855247941459,
"learning_rate": 9.994166444809705e-06,
"loss": 0.0559,
"step": 170
},
{
"epoch": 0.07779799818016378,
"grad_norm": 0.7853019575635352,
"learning_rate": 9.994097218031524e-06,
"loss": 0.0681,
"step": 171
},
{
"epoch": 0.07825295723384895,
"grad_norm": 0.8445610480134321,
"learning_rate": 9.994027583158272e-06,
"loss": 0.0785,
"step": 172
},
{
"epoch": 0.07870791628753412,
"grad_norm": 0.8555498692388026,
"learning_rate": 9.993957540195638e-06,
"loss": 0.077,
"step": 173
},
{
"epoch": 0.0791628753412193,
"grad_norm": 0.8281270493499452,
"learning_rate": 9.993887089149346e-06,
"loss": 0.0848,
"step": 174
},
{
"epoch": 0.07961783439490445,
"grad_norm": 0.7180425978661062,
"learning_rate": 9.993816230025152e-06,
"loss": 0.0588,
"step": 175
},
{
"epoch": 0.08007279344858963,
"grad_norm": 0.9287545326980071,
"learning_rate": 9.99374496282885e-06,
"loss": 0.0874,
"step": 176
},
{
"epoch": 0.0805277525022748,
"grad_norm": 1.5950603980195528,
"learning_rate": 9.993673287566261e-06,
"loss": 0.1301,
"step": 177
},
{
"epoch": 0.08098271155595996,
"grad_norm": 0.505966633973175,
"learning_rate": 9.99360120424324e-06,
"loss": 0.0459,
"step": 178
},
{
"epoch": 0.08143767060964513,
"grad_norm": 0.6170796905443107,
"learning_rate": 9.993528712865681e-06,
"loss": 0.0666,
"step": 179
},
{
"epoch": 0.0818926296633303,
"grad_norm": 0.8965600572228928,
"learning_rate": 9.993455813439507e-06,
"loss": 0.0648,
"step": 180
},
{
"epoch": 0.08234758871701547,
"grad_norm": 0.7555745664692847,
"learning_rate": 9.993382505970673e-06,
"loss": 0.0479,
"step": 181
},
{
"epoch": 0.08280254777070063,
"grad_norm": 0.7885826993774436,
"learning_rate": 9.99330879046517e-06,
"loss": 0.0605,
"step": 182
},
{
"epoch": 0.0832575068243858,
"grad_norm": 0.6970911126559147,
"learning_rate": 9.993234666929024e-06,
"loss": 0.0545,
"step": 183
},
{
"epoch": 0.08371246587807098,
"grad_norm": 0.8281240642020996,
"learning_rate": 9.99316013536829e-06,
"loss": 0.0651,
"step": 184
},
{
"epoch": 0.08416742493175614,
"grad_norm": 0.8497823551734951,
"learning_rate": 9.993085195789057e-06,
"loss": 0.098,
"step": 185
},
{
"epoch": 0.08462238398544131,
"grad_norm": 0.8425278224044996,
"learning_rate": 9.993009848197452e-06,
"loss": 0.0861,
"step": 186
},
{
"epoch": 0.08507734303912648,
"grad_norm": 0.729342450692031,
"learning_rate": 9.992934092599629e-06,
"loss": 0.0651,
"step": 187
},
{
"epoch": 0.08553230209281165,
"grad_norm": 0.8810253378927329,
"learning_rate": 9.99285792900178e-06,
"loss": 0.0995,
"step": 188
},
{
"epoch": 0.08598726114649681,
"grad_norm": 1.0402457083445067,
"learning_rate": 9.992781357410131e-06,
"loss": 0.1061,
"step": 189
},
{
"epoch": 0.08644222020018198,
"grad_norm": 0.7397036090930822,
"learning_rate": 9.992704377830934e-06,
"loss": 0.0571,
"step": 190
},
{
"epoch": 0.08689717925386715,
"grad_norm": 1.4783630598693296,
"learning_rate": 9.992626990270484e-06,
"loss": 0.1154,
"step": 191
},
{
"epoch": 0.08735213830755233,
"grad_norm": 1.1100322283473036,
"learning_rate": 9.992549194735101e-06,
"loss": 0.1179,
"step": 192
},
{
"epoch": 0.08780709736123748,
"grad_norm": 0.5797984556503705,
"learning_rate": 9.992470991231144e-06,
"loss": 0.0466,
"step": 193
},
{
"epoch": 0.08826205641492266,
"grad_norm": 1.059908713900853,
"learning_rate": 9.992392379765005e-06,
"loss": 0.0994,
"step": 194
},
{
"epoch": 0.08871701546860783,
"grad_norm": 1.1187885391430794,
"learning_rate": 9.992313360343104e-06,
"loss": 0.0986,
"step": 195
},
{
"epoch": 0.08917197452229299,
"grad_norm": 0.7509441330173129,
"learning_rate": 9.992233932971901e-06,
"loss": 0.0634,
"step": 196
},
{
"epoch": 0.08962693357597816,
"grad_norm": 0.9426276516690344,
"learning_rate": 9.992154097657888e-06,
"loss": 0.0857,
"step": 197
},
{
"epoch": 0.09008189262966333,
"grad_norm": 0.8754039034503873,
"learning_rate": 9.992073854407585e-06,
"loss": 0.0881,
"step": 198
},
{
"epoch": 0.0905368516833485,
"grad_norm": 2.8697219156120712,
"learning_rate": 9.99199320322755e-06,
"loss": 0.0851,
"step": 199
},
{
"epoch": 0.09099181073703366,
"grad_norm": 0.7429242681646778,
"learning_rate": 9.991912144124375e-06,
"loss": 0.0729,
"step": 200
},
{
"epoch": 0.09144676979071883,
"grad_norm": 1.0552979449251756,
"learning_rate": 9.991830677104682e-06,
"loss": 0.1066,
"step": 201
},
{
"epoch": 0.09190172884440401,
"grad_norm": 0.8812651371324355,
"learning_rate": 9.99174880217513e-06,
"loss": 0.0732,
"step": 202
},
{
"epoch": 0.09235668789808917,
"grad_norm": 1.0755107845413352,
"learning_rate": 9.991666519342407e-06,
"loss": 0.0977,
"step": 203
},
{
"epoch": 0.09281164695177434,
"grad_norm": 0.8925063431256136,
"learning_rate": 9.99158382861324e-06,
"loss": 0.0904,
"step": 204
},
{
"epoch": 0.09326660600545951,
"grad_norm": 0.8190206986922173,
"learning_rate": 9.991500729994384e-06,
"loss": 0.0729,
"step": 205
},
{
"epoch": 0.09372156505914468,
"grad_norm": 0.6635798147425112,
"learning_rate": 9.991417223492629e-06,
"loss": 0.0631,
"step": 206
},
{
"epoch": 0.09417652411282984,
"grad_norm": 1.0314655306023923,
"learning_rate": 9.991333309114798e-06,
"loss": 0.0852,
"step": 207
},
{
"epoch": 0.09463148316651501,
"grad_norm": 0.8533496857694978,
"learning_rate": 9.991248986867753e-06,
"loss": 0.0868,
"step": 208
},
{
"epoch": 0.09508644222020018,
"grad_norm": 1.039085255997433,
"learning_rate": 9.991164256758378e-06,
"loss": 0.095,
"step": 209
},
{
"epoch": 0.09554140127388536,
"grad_norm": 1.1484522866350177,
"learning_rate": 9.9910791187936e-06,
"loss": 0.1333,
"step": 210
},
{
"epoch": 0.09599636032757052,
"grad_norm": 0.8277820800102422,
"learning_rate": 9.99099357298038e-06,
"loss": 0.0664,
"step": 211
},
{
"epoch": 0.09645131938125569,
"grad_norm": 0.821796111319934,
"learning_rate": 9.9909076193257e-06,
"loss": 0.083,
"step": 212
},
{
"epoch": 0.09690627843494086,
"grad_norm": 0.9448800546720313,
"learning_rate": 9.990821257836589e-06,
"loss": 0.0873,
"step": 213
},
{
"epoch": 0.09736123748862602,
"grad_norm": 0.9002810379340489,
"learning_rate": 9.990734488520103e-06,
"loss": 0.099,
"step": 214
},
{
"epoch": 0.09781619654231119,
"grad_norm": 0.6145149717344348,
"learning_rate": 9.990647311383334e-06,
"loss": 0.0425,
"step": 215
},
{
"epoch": 0.09827115559599636,
"grad_norm": 1.1377497370761045,
"learning_rate": 9.990559726433404e-06,
"loss": 0.0903,
"step": 216
},
{
"epoch": 0.09872611464968153,
"grad_norm": 0.8401357673155365,
"learning_rate": 9.99047173367747e-06,
"loss": 0.0812,
"step": 217
},
{
"epoch": 0.09918107370336669,
"grad_norm": 0.6977882365614015,
"learning_rate": 9.990383333122722e-06,
"loss": 0.0613,
"step": 218
},
{
"epoch": 0.09963603275705187,
"grad_norm": 0.6751056796776193,
"learning_rate": 9.990294524776384e-06,
"loss": 0.0636,
"step": 219
},
{
"epoch": 0.10009099181073704,
"grad_norm": 0.7973250315161167,
"learning_rate": 9.990205308645716e-06,
"loss": 0.0655,
"step": 220
},
{
"epoch": 0.1005459508644222,
"grad_norm": 0.6494979859380491,
"learning_rate": 9.990115684738005e-06,
"loss": 0.0461,
"step": 221
},
{
"epoch": 0.10100090991810737,
"grad_norm": 0.7863907355652456,
"learning_rate": 9.990025653060574e-06,
"loss": 0.0881,
"step": 222
},
{
"epoch": 0.10145586897179254,
"grad_norm": 1.2756737972223395,
"learning_rate": 9.98993521362078e-06,
"loss": 0.1102,
"step": 223
},
{
"epoch": 0.10191082802547771,
"grad_norm": 1.1992554133605928,
"learning_rate": 9.989844366426018e-06,
"loss": 0.1147,
"step": 224
},
{
"epoch": 0.10236578707916287,
"grad_norm": 0.5034605400337953,
"learning_rate": 9.989753111483707e-06,
"loss": 0.0462,
"step": 225
},
{
"epoch": 0.10282074613284804,
"grad_norm": 0.9881921480518578,
"learning_rate": 9.989661448801305e-06,
"loss": 0.0848,
"step": 226
},
{
"epoch": 0.10327570518653321,
"grad_norm": 0.7581777568438945,
"learning_rate": 9.989569378386303e-06,
"loss": 0.079,
"step": 227
},
{
"epoch": 0.10373066424021839,
"grad_norm": 0.6464731162067388,
"learning_rate": 9.989476900246223e-06,
"loss": 0.0617,
"step": 228
},
{
"epoch": 0.10418562329390355,
"grad_norm": 0.8780639185859085,
"learning_rate": 9.989384014388624e-06,
"loss": 0.086,
"step": 229
},
{
"epoch": 0.10464058234758872,
"grad_norm": 0.6623808171307163,
"learning_rate": 9.989290720821095e-06,
"loss": 0.0694,
"step": 230
},
{
"epoch": 0.10509554140127389,
"grad_norm": 0.721054554263859,
"learning_rate": 9.98919701955126e-06,
"loss": 0.0735,
"step": 231
},
{
"epoch": 0.10555050045495905,
"grad_norm": 0.7868134014829404,
"learning_rate": 9.989102910586776e-06,
"loss": 0.0546,
"step": 232
},
{
"epoch": 0.10600545950864422,
"grad_norm": 0.9137158371163484,
"learning_rate": 9.989008393935331e-06,
"loss": 0.0771,
"step": 233
},
{
"epoch": 0.10646041856232939,
"grad_norm": 0.8326009579593463,
"learning_rate": 9.98891346960465e-06,
"loss": 0.0667,
"step": 234
},
{
"epoch": 0.10691537761601456,
"grad_norm": 0.6462724580348628,
"learning_rate": 9.988818137602494e-06,
"loss": 0.0717,
"step": 235
},
{
"epoch": 0.10737033666969972,
"grad_norm": 0.7513725247558808,
"learning_rate": 9.988722397936646e-06,
"loss": 0.0733,
"step": 236
},
{
"epoch": 0.1078252957233849,
"grad_norm": 1.094509848236789,
"learning_rate": 9.988626250614932e-06,
"loss": 0.1009,
"step": 237
},
{
"epoch": 0.10828025477707007,
"grad_norm": 0.8200579138639758,
"learning_rate": 9.98852969564521e-06,
"loss": 0.0844,
"step": 238
},
{
"epoch": 0.10873521383075523,
"grad_norm": 0.7417763562196316,
"learning_rate": 9.988432733035369e-06,
"loss": 0.0611,
"step": 239
},
{
"epoch": 0.1091901728844404,
"grad_norm": 0.8476475869820355,
"learning_rate": 9.988335362793333e-06,
"loss": 0.0863,
"step": 240
},
{
"epoch": 0.10964513193812557,
"grad_norm": 0.9998642783878469,
"learning_rate": 9.988237584927058e-06,
"loss": 0.0909,
"step": 241
},
{
"epoch": 0.11010009099181074,
"grad_norm": 1.1689324698997519,
"learning_rate": 9.988139399444534e-06,
"loss": 0.124,
"step": 242
},
{
"epoch": 0.1105550500454959,
"grad_norm": 0.790901332269412,
"learning_rate": 9.988040806353786e-06,
"loss": 0.0855,
"step": 243
},
{
"epoch": 0.11101000909918107,
"grad_norm": 0.8931785977847209,
"learning_rate": 9.987941805662869e-06,
"loss": 0.1023,
"step": 244
},
{
"epoch": 0.11146496815286625,
"grad_norm": 0.7352781929773609,
"learning_rate": 9.98784239737987e-06,
"loss": 0.0563,
"step": 245
},
{
"epoch": 0.11191992720655142,
"grad_norm": 0.7169092611535308,
"learning_rate": 9.987742581512919e-06,
"loss": 0.0683,
"step": 246
},
{
"epoch": 0.11237488626023658,
"grad_norm": 0.6767560569792272,
"learning_rate": 9.987642358070167e-06,
"loss": 0.0669,
"step": 247
},
{
"epoch": 0.11282984531392175,
"grad_norm": 0.8442319805699996,
"learning_rate": 9.987541727059805e-06,
"loss": 0.0768,
"step": 248
},
{
"epoch": 0.11328480436760692,
"grad_norm": 0.7700876798522618,
"learning_rate": 9.987440688490058e-06,
"loss": 0.0643,
"step": 249
},
{
"epoch": 0.11373976342129208,
"grad_norm": 0.7286087978317647,
"learning_rate": 9.98733924236918e-06,
"loss": 0.0698,
"step": 250
},
{
"epoch": 0.11419472247497725,
"grad_norm": 0.7917355018437868,
"learning_rate": 9.98723738870546e-06,
"loss": 0.0791,
"step": 251
},
{
"epoch": 0.11464968152866242,
"grad_norm": 1.0469499693242315,
"learning_rate": 9.987135127507226e-06,
"loss": 0.0761,
"step": 252
},
{
"epoch": 0.1151046405823476,
"grad_norm": 0.8361714930383379,
"learning_rate": 9.987032458782828e-06,
"loss": 0.0789,
"step": 253
},
{
"epoch": 0.11555959963603275,
"grad_norm": 0.5902853873046482,
"learning_rate": 9.986929382540662e-06,
"loss": 0.0479,
"step": 254
},
{
"epoch": 0.11601455868971793,
"grad_norm": 0.7349436304465384,
"learning_rate": 9.986825898789145e-06,
"loss": 0.0668,
"step": 255
},
{
"epoch": 0.1164695177434031,
"grad_norm": 0.7657107039148755,
"learning_rate": 9.986722007536737e-06,
"loss": 0.0617,
"step": 256
},
{
"epoch": 0.11692447679708826,
"grad_norm": 0.6450631027744769,
"learning_rate": 9.986617708791926e-06,
"loss": 0.0679,
"step": 257
},
{
"epoch": 0.11737943585077343,
"grad_norm": 0.6292930010016882,
"learning_rate": 9.986513002563236e-06,
"loss": 0.0482,
"step": 258
},
{
"epoch": 0.1178343949044586,
"grad_norm": 0.8758541343517451,
"learning_rate": 9.986407888859221e-06,
"loss": 0.0994,
"step": 259
},
{
"epoch": 0.11828935395814377,
"grad_norm": 0.6537445862223847,
"learning_rate": 9.986302367688473e-06,
"loss": 0.07,
"step": 260
},
{
"epoch": 0.11874431301182893,
"grad_norm": 0.8029660816844667,
"learning_rate": 9.986196439059613e-06,
"loss": 0.0623,
"step": 261
},
{
"epoch": 0.1191992720655141,
"grad_norm": 0.7339528606524214,
"learning_rate": 9.986090102981297e-06,
"loss": 0.0791,
"step": 262
},
{
"epoch": 0.11965423111919928,
"grad_norm": 0.7934112522002073,
"learning_rate": 9.985983359462215e-06,
"loss": 0.0672,
"step": 263
},
{
"epoch": 0.12010919017288443,
"grad_norm": 1.0186962263060808,
"learning_rate": 9.98587620851109e-06,
"loss": 0.1213,
"step": 264
},
{
"epoch": 0.1205641492265696,
"grad_norm": 0.6769843647605545,
"learning_rate": 9.985768650136679e-06,
"loss": 0.0685,
"step": 265
},
{
"epoch": 0.12101910828025478,
"grad_norm": 0.7543020935976431,
"learning_rate": 9.985660684347765e-06,
"loss": 0.0861,
"step": 266
},
{
"epoch": 0.12147406733393995,
"grad_norm": 0.9552124731299731,
"learning_rate": 9.985552311153178e-06,
"loss": 0.0922,
"step": 267
},
{
"epoch": 0.12192902638762511,
"grad_norm": 0.7436699167226903,
"learning_rate": 9.985443530561769e-06,
"loss": 0.0885,
"step": 268
},
{
"epoch": 0.12238398544131028,
"grad_norm": 1.329058937551934,
"learning_rate": 9.98533434258243e-06,
"loss": 0.1115,
"step": 269
},
{
"epoch": 0.12283894449499545,
"grad_norm": 0.6835909813818813,
"learning_rate": 9.985224747224083e-06,
"loss": 0.0586,
"step": 270
},
{
"epoch": 0.12329390354868063,
"grad_norm": 1.0733107060854794,
"learning_rate": 9.98511474449568e-06,
"loss": 0.0811,
"step": 271
},
{
"epoch": 0.12374886260236578,
"grad_norm": 0.5916007278667166,
"learning_rate": 9.985004334406215e-06,
"loss": 0.0696,
"step": 272
},
{
"epoch": 0.12420382165605096,
"grad_norm": 0.9149357508392912,
"learning_rate": 9.984893516964707e-06,
"loss": 0.0704,
"step": 273
},
{
"epoch": 0.12465878070973613,
"grad_norm": 1.1634742377762608,
"learning_rate": 9.984782292180212e-06,
"loss": 0.1178,
"step": 274
},
{
"epoch": 0.1251137397634213,
"grad_norm": 0.603957454908005,
"learning_rate": 9.98467066006182e-06,
"loss": 0.0585,
"step": 275
},
{
"epoch": 0.12556869881710647,
"grad_norm": 0.7735087790025026,
"learning_rate": 9.984558620618651e-06,
"loss": 0.0953,
"step": 276
},
{
"epoch": 0.12602365787079162,
"grad_norm": 1.2570182633873541,
"learning_rate": 9.984446173859863e-06,
"loss": 0.1353,
"step": 277
},
{
"epoch": 0.1264786169244768,
"grad_norm": 0.7275895818672663,
"learning_rate": 9.984333319794642e-06,
"loss": 0.0774,
"step": 278
},
{
"epoch": 0.12693357597816196,
"grad_norm": 0.6395006056363333,
"learning_rate": 9.984220058432212e-06,
"loss": 0.0591,
"step": 279
},
{
"epoch": 0.12738853503184713,
"grad_norm": 0.6563921850032347,
"learning_rate": 9.984106389781828e-06,
"loss": 0.0573,
"step": 280
},
{
"epoch": 0.1278434940855323,
"grad_norm": 0.9399157526953884,
"learning_rate": 9.983992313852776e-06,
"loss": 0.0793,
"step": 281
},
{
"epoch": 0.12829845313921748,
"grad_norm": 0.93528061821534,
"learning_rate": 9.983877830654381e-06,
"loss": 0.0807,
"step": 282
},
{
"epoch": 0.12875341219290265,
"grad_norm": 0.7192448233352142,
"learning_rate": 9.983762940195996e-06,
"loss": 0.0773,
"step": 283
},
{
"epoch": 0.1292083712465878,
"grad_norm": 0.7097381072031733,
"learning_rate": 9.98364764248701e-06,
"loss": 0.0698,
"step": 284
},
{
"epoch": 0.12966333030027297,
"grad_norm": 1.1635566012920768,
"learning_rate": 9.983531937536844e-06,
"loss": 0.0893,
"step": 285
},
{
"epoch": 0.13011828935395814,
"grad_norm": 0.8456555685011555,
"learning_rate": 9.983415825354954e-06,
"loss": 0.0628,
"step": 286
},
{
"epoch": 0.1305732484076433,
"grad_norm": 0.7151838393189083,
"learning_rate": 9.983299305950828e-06,
"loss": 0.0557,
"step": 287
},
{
"epoch": 0.13102820746132848,
"grad_norm": 0.7095193783870621,
"learning_rate": 9.983182379333989e-06,
"loss": 0.0604,
"step": 288
},
{
"epoch": 0.13148316651501366,
"grad_norm": 0.8581434444337498,
"learning_rate": 9.983065045513986e-06,
"loss": 0.0781,
"step": 289
},
{
"epoch": 0.13193812556869883,
"grad_norm": 0.5600994934804626,
"learning_rate": 9.982947304500414e-06,
"loss": 0.0498,
"step": 290
},
{
"epoch": 0.13239308462238397,
"grad_norm": 0.7355720212694087,
"learning_rate": 9.98282915630289e-06,
"loss": 0.0692,
"step": 291
},
{
"epoch": 0.13284804367606914,
"grad_norm": 1.6846985851500909,
"learning_rate": 9.98271060093107e-06,
"loss": 0.1687,
"step": 292
},
{
"epoch": 0.13330300272975432,
"grad_norm": 0.7959406174268434,
"learning_rate": 9.98259163839464e-06,
"loss": 0.0718,
"step": 293
},
{
"epoch": 0.1337579617834395,
"grad_norm": 0.6005858848115938,
"learning_rate": 9.982472268703323e-06,
"loss": 0.0465,
"step": 294
},
{
"epoch": 0.13421292083712466,
"grad_norm": 0.7865103977061746,
"learning_rate": 9.982352491866874e-06,
"loss": 0.071,
"step": 295
},
{
"epoch": 0.13466787989080983,
"grad_norm": 0.7167219429964851,
"learning_rate": 9.982232307895077e-06,
"loss": 0.0658,
"step": 296
},
{
"epoch": 0.135122838944495,
"grad_norm": 1.206398567596641,
"learning_rate": 9.982111716797758e-06,
"loss": 0.101,
"step": 297
},
{
"epoch": 0.13557779799818018,
"grad_norm": 1.0085912508470862,
"learning_rate": 9.981990718584768e-06,
"loss": 0.0959,
"step": 298
},
{
"epoch": 0.13603275705186532,
"grad_norm": 0.8594135430057543,
"learning_rate": 9.981869313265995e-06,
"loss": 0.0912,
"step": 299
},
{
"epoch": 0.1364877161055505,
"grad_norm": 0.9903339586980618,
"learning_rate": 9.981747500851357e-06,
"loss": 0.0692,
"step": 300
},
{
"epoch": 0.13694267515923567,
"grad_norm": 0.7623380548666351,
"learning_rate": 9.981625281350812e-06,
"loss": 0.0699,
"step": 301
},
{
"epoch": 0.13739763421292084,
"grad_norm": 0.6267143484055344,
"learning_rate": 9.981502654774349e-06,
"loss": 0.0499,
"step": 302
},
{
"epoch": 0.137852593266606,
"grad_norm": 0.8234150836820757,
"learning_rate": 9.98137962113198e-06,
"loss": 0.0788,
"step": 303
},
{
"epoch": 0.13830755232029118,
"grad_norm": 0.8158733102806115,
"learning_rate": 9.98125618043377e-06,
"loss": 0.089,
"step": 304
},
{
"epoch": 0.13876251137397635,
"grad_norm": 0.6372656549463032,
"learning_rate": 9.981132332689796e-06,
"loss": 0.0517,
"step": 305
},
{
"epoch": 0.1392174704276615,
"grad_norm": 0.7713863813548327,
"learning_rate": 9.981008077910184e-06,
"loss": 0.0769,
"step": 306
},
{
"epoch": 0.13967242948134667,
"grad_norm": 0.8883775702857831,
"learning_rate": 9.980883416105084e-06,
"loss": 0.0828,
"step": 307
},
{
"epoch": 0.14012738853503184,
"grad_norm": 0.6490936355626988,
"learning_rate": 9.980758347284687e-06,
"loss": 0.0618,
"step": 308
},
{
"epoch": 0.14058234758871702,
"grad_norm": 0.8359554084586713,
"learning_rate": 9.980632871459209e-06,
"loss": 0.0714,
"step": 309
},
{
"epoch": 0.1410373066424022,
"grad_norm": 0.7373523328454649,
"learning_rate": 9.980506988638906e-06,
"loss": 0.0836,
"step": 310
},
{
"epoch": 0.14149226569608736,
"grad_norm": 0.6644370731485183,
"learning_rate": 9.980380698834064e-06,
"loss": 0.0777,
"step": 311
},
{
"epoch": 0.14194722474977253,
"grad_norm": 0.870883965477211,
"learning_rate": 9.980254002055003e-06,
"loss": 0.0847,
"step": 312
},
{
"epoch": 0.14240218380345768,
"grad_norm": 0.6021065409531002,
"learning_rate": 9.980126898312074e-06,
"loss": 0.0583,
"step": 313
},
{
"epoch": 0.14285714285714285,
"grad_norm": 0.8705461588189498,
"learning_rate": 9.979999387615665e-06,
"loss": 0.0895,
"step": 314
},
{
"epoch": 0.14331210191082802,
"grad_norm": 0.9639410731114018,
"learning_rate": 9.979871469976197e-06,
"loss": 0.0901,
"step": 315
},
{
"epoch": 0.1437670609645132,
"grad_norm": 0.7554126383153169,
"learning_rate": 9.97974314540412e-06,
"loss": 0.0699,
"step": 316
},
{
"epoch": 0.14422202001819837,
"grad_norm": 1.1039648440512544,
"learning_rate": 9.979614413909922e-06,
"loss": 0.1013,
"step": 317
},
{
"epoch": 0.14467697907188354,
"grad_norm": 0.5258831871743486,
"learning_rate": 9.979485275504121e-06,
"loss": 0.0544,
"step": 318
},
{
"epoch": 0.1451319381255687,
"grad_norm": 1.3025897394440575,
"learning_rate": 9.979355730197271e-06,
"loss": 0.1067,
"step": 319
},
{
"epoch": 0.14558689717925385,
"grad_norm": 0.5206132423310033,
"learning_rate": 9.979225777999956e-06,
"loss": 0.0497,
"step": 320
},
{
"epoch": 0.14604185623293903,
"grad_norm": 0.7202189397663867,
"learning_rate": 9.9790954189228e-06,
"loss": 0.0807,
"step": 321
},
{
"epoch": 0.1464968152866242,
"grad_norm": 0.5738667169449175,
"learning_rate": 9.97896465297645e-06,
"loss": 0.0614,
"step": 322
},
{
"epoch": 0.14695177434030937,
"grad_norm": 0.7972440737628133,
"learning_rate": 9.978833480171592e-06,
"loss": 0.0906,
"step": 323
},
{
"epoch": 0.14740673339399454,
"grad_norm": 0.7697423454053598,
"learning_rate": 9.978701900518947e-06,
"loss": 0.0632,
"step": 324
},
{
"epoch": 0.14786169244767972,
"grad_norm": 0.8259885564233931,
"learning_rate": 9.978569914029267e-06,
"loss": 0.0944,
"step": 325
},
{
"epoch": 0.1483166515013649,
"grad_norm": 0.8450006655868962,
"learning_rate": 9.978437520713335e-06,
"loss": 0.0862,
"step": 326
},
{
"epoch": 0.14877161055505003,
"grad_norm": 0.7746078278616594,
"learning_rate": 9.978304720581973e-06,
"loss": 0.088,
"step": 327
},
{
"epoch": 0.1492265696087352,
"grad_norm": 0.9977734940815816,
"learning_rate": 9.97817151364603e-06,
"loss": 0.1036,
"step": 328
},
{
"epoch": 0.14968152866242038,
"grad_norm": 0.7800752301510507,
"learning_rate": 9.978037899916393e-06,
"loss": 0.0778,
"step": 329
},
{
"epoch": 0.15013648771610555,
"grad_norm": 0.7521153273438224,
"learning_rate": 9.97790387940398e-06,
"loss": 0.0532,
"step": 330
},
{
"epoch": 0.15059144676979072,
"grad_norm": 0.8046420256419254,
"learning_rate": 9.977769452119741e-06,
"loss": 0.0708,
"step": 331
},
{
"epoch": 0.1510464058234759,
"grad_norm": 0.9071770528791517,
"learning_rate": 9.97763461807466e-06,
"loss": 0.1006,
"step": 332
},
{
"epoch": 0.15150136487716107,
"grad_norm": 0.8824570234268595,
"learning_rate": 9.97749937727976e-06,
"loss": 0.0855,
"step": 333
},
{
"epoch": 0.15195632393084624,
"grad_norm": 0.8286075823730068,
"learning_rate": 9.977363729746088e-06,
"loss": 0.077,
"step": 334
},
{
"epoch": 0.15241128298453138,
"grad_norm": 0.6791233851472963,
"learning_rate": 9.977227675484729e-06,
"loss": 0.0698,
"step": 335
},
{
"epoch": 0.15286624203821655,
"grad_norm": 0.9813875260679181,
"learning_rate": 9.977091214506803e-06,
"loss": 0.0838,
"step": 336
},
{
"epoch": 0.15332120109190173,
"grad_norm": 0.9986284190120469,
"learning_rate": 9.976954346823456e-06,
"loss": 0.0789,
"step": 337
},
{
"epoch": 0.1537761601455869,
"grad_norm": 0.6456071732838817,
"learning_rate": 9.976817072445878e-06,
"loss": 0.0566,
"step": 338
},
{
"epoch": 0.15423111919927207,
"grad_norm": 0.7707362352402762,
"learning_rate": 9.976679391385283e-06,
"loss": 0.0677,
"step": 339
},
{
"epoch": 0.15468607825295724,
"grad_norm": 0.5804713825378958,
"learning_rate": 9.976541303652923e-06,
"loss": 0.0547,
"step": 340
},
{
"epoch": 0.15514103730664242,
"grad_norm": 0.7705377953828665,
"learning_rate": 9.976402809260083e-06,
"loss": 0.0673,
"step": 341
},
{
"epoch": 0.15559599636032756,
"grad_norm": 0.651002355082985,
"learning_rate": 9.976263908218076e-06,
"loss": 0.066,
"step": 342
},
{
"epoch": 0.15605095541401273,
"grad_norm": 1.0075230687249708,
"learning_rate": 9.976124600538257e-06,
"loss": 0.1151,
"step": 343
},
{
"epoch": 0.1565059144676979,
"grad_norm": 0.7110146200064966,
"learning_rate": 9.975984886232006e-06,
"loss": 0.0693,
"step": 344
},
{
"epoch": 0.15696087352138308,
"grad_norm": 0.782615076662302,
"learning_rate": 9.975844765310743e-06,
"loss": 0.071,
"step": 345
},
{
"epoch": 0.15741583257506825,
"grad_norm": 1.091513822496144,
"learning_rate": 9.975704237785915e-06,
"loss": 0.1277,
"step": 346
},
{
"epoch": 0.15787079162875342,
"grad_norm": 0.8244942271322709,
"learning_rate": 9.975563303669006e-06,
"loss": 0.092,
"step": 347
},
{
"epoch": 0.1583257506824386,
"grad_norm": 1.0997264747524325,
"learning_rate": 9.975421962971536e-06,
"loss": 0.102,
"step": 348
},
{
"epoch": 0.15878070973612374,
"grad_norm": 1.0471722358260585,
"learning_rate": 9.97528021570505e-06,
"loss": 0.1112,
"step": 349
},
{
"epoch": 0.1592356687898089,
"grad_norm": 0.6366013160292697,
"learning_rate": 9.975138061881135e-06,
"loss": 0.0629,
"step": 350
},
{
"epoch": 0.15969062784349408,
"grad_norm": 0.7145502784859615,
"learning_rate": 9.974995501511404e-06,
"loss": 0.0567,
"step": 351
},
{
"epoch": 0.16014558689717925,
"grad_norm": 1.0825694007542435,
"learning_rate": 9.974852534607506e-06,
"loss": 0.0897,
"step": 352
},
{
"epoch": 0.16060054595086443,
"grad_norm": 0.8874195306329471,
"learning_rate": 9.974709161181126e-06,
"loss": 0.0879,
"step": 353
},
{
"epoch": 0.1610555050045496,
"grad_norm": 0.8193025449594961,
"learning_rate": 9.974565381243982e-06,
"loss": 0.0969,
"step": 354
},
{
"epoch": 0.16151046405823477,
"grad_norm": 0.76528422131405,
"learning_rate": 9.974421194807815e-06,
"loss": 0.0786,
"step": 355
},
{
"epoch": 0.16196542311191992,
"grad_norm": 0.8836543328533641,
"learning_rate": 9.974276601884416e-06,
"loss": 0.0744,
"step": 356
},
{
"epoch": 0.1624203821656051,
"grad_norm": 0.7482952108426273,
"learning_rate": 9.974131602485596e-06,
"loss": 0.0772,
"step": 357
},
{
"epoch": 0.16287534121929026,
"grad_norm": 0.9122723647083647,
"learning_rate": 9.973986196623203e-06,
"loss": 0.0851,
"step": 358
},
{
"epoch": 0.16333030027297543,
"grad_norm": 0.8373653902978805,
"learning_rate": 9.973840384309121e-06,
"loss": 0.0865,
"step": 359
},
{
"epoch": 0.1637852593266606,
"grad_norm": 0.6360069343077157,
"learning_rate": 9.973694165555264e-06,
"loss": 0.0618,
"step": 360
},
{
"epoch": 0.16424021838034578,
"grad_norm": 0.7967304456611868,
"learning_rate": 9.973547540373582e-06,
"loss": 0.0865,
"step": 361
},
{
"epoch": 0.16469517743403095,
"grad_norm": 1.1699452577832765,
"learning_rate": 9.973400508776054e-06,
"loss": 0.1144,
"step": 362
},
{
"epoch": 0.1651501364877161,
"grad_norm": 0.6282867599706373,
"learning_rate": 9.973253070774698e-06,
"loss": 0.0633,
"step": 363
},
{
"epoch": 0.16560509554140126,
"grad_norm": 0.79942272506218,
"learning_rate": 9.973105226381559e-06,
"loss": 0.069,
"step": 364
},
{
"epoch": 0.16606005459508644,
"grad_norm": 0.9348674828410355,
"learning_rate": 9.972956975608719e-06,
"loss": 0.1019,
"step": 365
},
{
"epoch": 0.1665150136487716,
"grad_norm": 1.0942665884463076,
"learning_rate": 9.972808318468292e-06,
"loss": 0.0859,
"step": 366
},
{
"epoch": 0.16696997270245678,
"grad_norm": 0.6283579225277517,
"learning_rate": 9.972659254972426e-06,
"loss": 0.0589,
"step": 367
},
{
"epoch": 0.16742493175614195,
"grad_norm": 1.0989677054167046,
"learning_rate": 9.972509785133304e-06,
"loss": 0.1081,
"step": 368
},
{
"epoch": 0.16787989080982713,
"grad_norm": 0.7310198219540203,
"learning_rate": 9.972359908963137e-06,
"loss": 0.0675,
"step": 369
},
{
"epoch": 0.16833484986351227,
"grad_norm": 0.757671629194488,
"learning_rate": 9.972209626474172e-06,
"loss": 0.0734,
"step": 370
},
{
"epoch": 0.16878980891719744,
"grad_norm": 0.7966175159886519,
"learning_rate": 9.972058937678692e-06,
"loss": 0.075,
"step": 371
},
{
"epoch": 0.16924476797088261,
"grad_norm": 0.9805514159267839,
"learning_rate": 9.97190784258901e-06,
"loss": 0.1071,
"step": 372
},
{
"epoch": 0.1696997270245678,
"grad_norm": 0.7000612574442994,
"learning_rate": 9.971756341217471e-06,
"loss": 0.0526,
"step": 373
},
{
"epoch": 0.17015468607825296,
"grad_norm": 0.7917466702374949,
"learning_rate": 9.971604433576456e-06,
"loss": 0.0698,
"step": 374
},
{
"epoch": 0.17060964513193813,
"grad_norm": 0.8412692631182211,
"learning_rate": 9.97145211967838e-06,
"loss": 0.0783,
"step": 375
},
{
"epoch": 0.1710646041856233,
"grad_norm": 0.5615038895232536,
"learning_rate": 9.971299399535685e-06,
"loss": 0.053,
"step": 376
},
{
"epoch": 0.17151956323930848,
"grad_norm": 0.6849745369298482,
"learning_rate": 9.971146273160854e-06,
"loss": 0.0774,
"step": 377
},
{
"epoch": 0.17197452229299362,
"grad_norm": 0.6466596777060115,
"learning_rate": 9.9709927405664e-06,
"loss": 0.0606,
"step": 378
},
{
"epoch": 0.1724294813466788,
"grad_norm": 0.7169884074840761,
"learning_rate": 9.970838801764866e-06,
"loss": 0.0839,
"step": 379
},
{
"epoch": 0.17288444040036396,
"grad_norm": 0.9393396355410675,
"learning_rate": 9.970684456768836e-06,
"loss": 0.1132,
"step": 380
},
{
"epoch": 0.17333939945404914,
"grad_norm": 12.197098173453568,
"learning_rate": 9.970529705590918e-06,
"loss": 0.4858,
"step": 381
},
{
"epoch": 0.1737943585077343,
"grad_norm": 0.7355841274771772,
"learning_rate": 9.97037454824376e-06,
"loss": 0.0714,
"step": 382
},
{
"epoch": 0.17424931756141948,
"grad_norm": 1.050385265783733,
"learning_rate": 9.97021898474004e-06,
"loss": 0.1024,
"step": 383
},
{
"epoch": 0.17470427661510465,
"grad_norm": 0.8612087678995594,
"learning_rate": 9.970063015092469e-06,
"loss": 0.085,
"step": 384
},
{
"epoch": 0.1751592356687898,
"grad_norm": 1.3886472100476919,
"learning_rate": 9.969906639313793e-06,
"loss": 0.1212,
"step": 385
},
{
"epoch": 0.17561419472247497,
"grad_norm": 0.8238176964814595,
"learning_rate": 9.96974985741679e-06,
"loss": 0.0721,
"step": 386
},
{
"epoch": 0.17606915377616014,
"grad_norm": 0.8718897735731601,
"learning_rate": 9.969592669414272e-06,
"loss": 0.0959,
"step": 387
},
{
"epoch": 0.17652411282984531,
"grad_norm": 6.796752422837202,
"learning_rate": 9.969435075319083e-06,
"loss": 0.115,
"step": 388
},
{
"epoch": 0.1769790718835305,
"grad_norm": 0.58176536820322,
"learning_rate": 9.969277075144104e-06,
"loss": 0.0459,
"step": 389
},
{
"epoch": 0.17743403093721566,
"grad_norm": 0.7267253435076165,
"learning_rate": 9.969118668902242e-06,
"loss": 0.07,
"step": 390
},
{
"epoch": 0.17788898999090083,
"grad_norm": 0.7682389367523258,
"learning_rate": 9.968959856606442e-06,
"loss": 0.0542,
"step": 391
},
{
"epoch": 0.17834394904458598,
"grad_norm": 0.7873348185837048,
"learning_rate": 9.968800638269682e-06,
"loss": 0.0598,
"step": 392
},
{
"epoch": 0.17879890809827115,
"grad_norm": 1.287713292390112,
"learning_rate": 9.968641013904974e-06,
"loss": 0.1442,
"step": 393
},
{
"epoch": 0.17925386715195632,
"grad_norm": 1.085650814952146,
"learning_rate": 9.968480983525359e-06,
"loss": 0.0926,
"step": 394
},
{
"epoch": 0.1797088262056415,
"grad_norm": 0.6716676596759695,
"learning_rate": 9.968320547143918e-06,
"loss": 0.0767,
"step": 395
},
{
"epoch": 0.18016378525932666,
"grad_norm": 0.8467396807693714,
"learning_rate": 9.968159704773757e-06,
"loss": 0.0977,
"step": 396
},
{
"epoch": 0.18061874431301184,
"grad_norm": 0.6438855833782786,
"learning_rate": 9.967998456428021e-06,
"loss": 0.0586,
"step": 397
},
{
"epoch": 0.181073703366697,
"grad_norm": 0.7254140122399564,
"learning_rate": 9.967836802119886e-06,
"loss": 0.06,
"step": 398
},
{
"epoch": 0.18152866242038215,
"grad_norm": 0.87517545358881,
"learning_rate": 9.967674741862563e-06,
"loss": 0.1016,
"step": 399
},
{
"epoch": 0.18198362147406733,
"grad_norm": 1.0624206936058178,
"learning_rate": 9.967512275669294e-06,
"loss": 0.1296,
"step": 400
},
{
"epoch": 0.1824385805277525,
"grad_norm": 1.0284720738314184,
"learning_rate": 9.967349403553353e-06,
"loss": 0.0862,
"step": 401
},
{
"epoch": 0.18289353958143767,
"grad_norm": 0.8342932737384292,
"learning_rate": 9.967186125528053e-06,
"loss": 0.0873,
"step": 402
},
{
"epoch": 0.18334849863512284,
"grad_norm": 1.543095569701571,
"learning_rate": 9.967022441606734e-06,
"loss": 0.1209,
"step": 403
},
{
"epoch": 0.18380345768880801,
"grad_norm": 0.70731586616612,
"learning_rate": 9.966858351802773e-06,
"loss": 0.0726,
"step": 404
},
{
"epoch": 0.1842584167424932,
"grad_norm": 0.6660531988680356,
"learning_rate": 9.966693856129576e-06,
"loss": 0.0562,
"step": 405
},
{
"epoch": 0.18471337579617833,
"grad_norm": 0.8503640969928286,
"learning_rate": 9.966528954600587e-06,
"loss": 0.0838,
"step": 406
},
{
"epoch": 0.1851683348498635,
"grad_norm": 0.6021534124846688,
"learning_rate": 9.96636364722928e-06,
"loss": 0.0673,
"step": 407
},
{
"epoch": 0.18562329390354868,
"grad_norm": 0.8782816795828058,
"learning_rate": 9.966197934029165e-06,
"loss": 0.0845,
"step": 408
},
{
"epoch": 0.18607825295723385,
"grad_norm": 0.9030990654346936,
"learning_rate": 9.966031815013781e-06,
"loss": 0.0839,
"step": 409
},
{
"epoch": 0.18653321201091902,
"grad_norm": 0.8567507299712805,
"learning_rate": 9.965865290196703e-06,
"loss": 0.0935,
"step": 410
},
{
"epoch": 0.1869881710646042,
"grad_norm": 0.8099856489670021,
"learning_rate": 9.96569835959154e-06,
"loss": 0.0747,
"step": 411
},
{
"epoch": 0.18744313011828936,
"grad_norm": 0.8938878675243255,
"learning_rate": 9.965531023211931e-06,
"loss": 0.0854,
"step": 412
},
{
"epoch": 0.18789808917197454,
"grad_norm": 0.735313860104022,
"learning_rate": 9.965363281071551e-06,
"loss": 0.0865,
"step": 413
},
{
"epoch": 0.18835304822565968,
"grad_norm": 0.5495229598132649,
"learning_rate": 9.965195133184108e-06,
"loss": 0.0403,
"step": 414
},
{
"epoch": 0.18880800727934485,
"grad_norm": 1.0700416713113117,
"learning_rate": 9.965026579563342e-06,
"loss": 0.1086,
"step": 415
},
{
"epoch": 0.18926296633303002,
"grad_norm": 0.7118653717355078,
"learning_rate": 9.964857620223024e-06,
"loss": 0.0691,
"step": 416
},
{
"epoch": 0.1897179253867152,
"grad_norm": 0.6871481686027417,
"learning_rate": 9.964688255176963e-06,
"loss": 0.0667,
"step": 417
},
{
"epoch": 0.19017288444040037,
"grad_norm": 0.9848841869658392,
"learning_rate": 9.964518484438998e-06,
"loss": 0.0813,
"step": 418
},
{
"epoch": 0.19062784349408554,
"grad_norm": 0.6311750922074311,
"learning_rate": 9.964348308023001e-06,
"loss": 0.0592,
"step": 419
},
{
"epoch": 0.1910828025477707,
"grad_norm": 0.7813168734245782,
"learning_rate": 9.964177725942881e-06,
"loss": 0.0826,
"step": 420
},
{
"epoch": 0.19153776160145586,
"grad_norm": 0.8572110622332836,
"learning_rate": 9.964006738212574e-06,
"loss": 0.0853,
"step": 421
},
{
"epoch": 0.19199272065514103,
"grad_norm": 0.5304433423014596,
"learning_rate": 9.963835344846056e-06,
"loss": 0.048,
"step": 422
},
{
"epoch": 0.1924476797088262,
"grad_norm": 0.7598521228122416,
"learning_rate": 9.963663545857328e-06,
"loss": 0.0757,
"step": 423
},
{
"epoch": 0.19290263876251137,
"grad_norm": 1.1542546683489703,
"learning_rate": 9.963491341260432e-06,
"loss": 0.104,
"step": 424
},
{
"epoch": 0.19335759781619655,
"grad_norm": 0.7766563582253432,
"learning_rate": 9.963318731069437e-06,
"loss": 0.0952,
"step": 425
},
{
"epoch": 0.19381255686988172,
"grad_norm": 1.1319194983916299,
"learning_rate": 9.96314571529845e-06,
"loss": 0.1005,
"step": 426
},
{
"epoch": 0.1942675159235669,
"grad_norm": 0.7230559135257585,
"learning_rate": 9.962972293961608e-06,
"loss": 0.0647,
"step": 427
},
{
"epoch": 0.19472247497725204,
"grad_norm": 0.9863934566369588,
"learning_rate": 9.962798467073083e-06,
"loss": 0.0763,
"step": 428
},
{
"epoch": 0.1951774340309372,
"grad_norm": 0.8259784410005646,
"learning_rate": 9.96262423464708e-06,
"loss": 0.087,
"step": 429
},
{
"epoch": 0.19563239308462238,
"grad_norm": 0.7987139095182185,
"learning_rate": 9.962449596697834e-06,
"loss": 0.0671,
"step": 430
},
{
"epoch": 0.19608735213830755,
"grad_norm": 1.130208173229934,
"learning_rate": 9.962274553239619e-06,
"loss": 0.119,
"step": 431
},
{
"epoch": 0.19654231119199272,
"grad_norm": 0.7399696243677417,
"learning_rate": 9.962099104286735e-06,
"loss": 0.064,
"step": 432
},
{
"epoch": 0.1969972702456779,
"grad_norm": 1.156015767405528,
"learning_rate": 9.961923249853523e-06,
"loss": 0.1102,
"step": 433
},
{
"epoch": 0.19745222929936307,
"grad_norm": 0.972422739757894,
"learning_rate": 9.961746989954349e-06,
"loss": 0.1093,
"step": 434
},
{
"epoch": 0.1979071883530482,
"grad_norm": 0.7766700420403171,
"learning_rate": 9.96157032460362e-06,
"loss": 0.0655,
"step": 435
},
{
"epoch": 0.19836214740673339,
"grad_norm": 0.7460679115751414,
"learning_rate": 9.961393253815767e-06,
"loss": 0.0751,
"step": 436
},
{
"epoch": 0.19881710646041856,
"grad_norm": 1.0684214450487566,
"learning_rate": 9.961215777605266e-06,
"loss": 0.0789,
"step": 437
},
{
"epoch": 0.19927206551410373,
"grad_norm": 0.7683994291392229,
"learning_rate": 9.961037895986615e-06,
"loss": 0.0849,
"step": 438
},
{
"epoch": 0.1997270245677889,
"grad_norm": 0.7270368453251704,
"learning_rate": 9.960859608974352e-06,
"loss": 0.0779,
"step": 439
},
{
"epoch": 0.20018198362147407,
"grad_norm": 0.701460207303568,
"learning_rate": 9.960680916583042e-06,
"loss": 0.0639,
"step": 440
},
{
"epoch": 0.20063694267515925,
"grad_norm": 0.6784619280926262,
"learning_rate": 9.960501818827292e-06,
"loss": 0.077,
"step": 441
},
{
"epoch": 0.2010919017288444,
"grad_norm": 0.8064075868568972,
"learning_rate": 9.960322315721735e-06,
"loss": 0.0827,
"step": 442
},
{
"epoch": 0.20154686078252956,
"grad_norm": 0.9155026735417204,
"learning_rate": 9.960142407281039e-06,
"loss": 0.0841,
"step": 443
},
{
"epoch": 0.20200181983621474,
"grad_norm": 0.6167749294869733,
"learning_rate": 9.959962093519904e-06,
"loss": 0.054,
"step": 444
},
{
"epoch": 0.2024567788898999,
"grad_norm": 0.8127781985331358,
"learning_rate": 9.959781374453066e-06,
"loss": 0.0751,
"step": 445
},
{
"epoch": 0.20291173794358508,
"grad_norm": 0.98306444688532,
"learning_rate": 9.959600250095294e-06,
"loss": 0.075,
"step": 446
},
{
"epoch": 0.20336669699727025,
"grad_norm": 0.7982130269360888,
"learning_rate": 9.959418720461384e-06,
"loss": 0.0834,
"step": 447
},
{
"epoch": 0.20382165605095542,
"grad_norm": 0.7862225023823932,
"learning_rate": 9.959236785566175e-06,
"loss": 0.0704,
"step": 448
},
{
"epoch": 0.20427661510464057,
"grad_norm": 0.562107514296544,
"learning_rate": 9.959054445424532e-06,
"loss": 0.0644,
"step": 449
},
{
"epoch": 0.20473157415832574,
"grad_norm": 0.6089607791855781,
"learning_rate": 9.958871700051353e-06,
"loss": 0.0512,
"step": 450
},
{
"epoch": 0.2051865332120109,
"grad_norm": 0.6962095067981563,
"learning_rate": 9.958688549461573e-06,
"loss": 0.0712,
"step": 451
},
{
"epoch": 0.20564149226569609,
"grad_norm": 1.155217046291275,
"learning_rate": 9.958504993670158e-06,
"loss": 0.1049,
"step": 452
},
{
"epoch": 0.20609645131938126,
"grad_norm": 1.0913314226134752,
"learning_rate": 9.958321032692107e-06,
"loss": 0.1226,
"step": 453
},
{
"epoch": 0.20655141037306643,
"grad_norm": 22.735025633907238,
"learning_rate": 9.958136666542455e-06,
"loss": 0.8419,
"step": 454
},
{
"epoch": 0.2070063694267516,
"grad_norm": 1.184019553325164,
"learning_rate": 9.957951895236262e-06,
"loss": 0.1113,
"step": 455
},
{
"epoch": 0.20746132848043677,
"grad_norm": 0.7664792046331882,
"learning_rate": 9.957766718788632e-06,
"loss": 0.104,
"step": 456
},
{
"epoch": 0.20791628753412192,
"grad_norm": 0.8672883026786035,
"learning_rate": 9.957581137214695e-06,
"loss": 0.074,
"step": 457
},
{
"epoch": 0.2083712465878071,
"grad_norm": 0.8772220264781722,
"learning_rate": 9.957395150529615e-06,
"loss": 0.0986,
"step": 458
},
{
"epoch": 0.20882620564149226,
"grad_norm": 0.7016331971826193,
"learning_rate": 9.95720875874859e-06,
"loss": 0.0752,
"step": 459
},
{
"epoch": 0.20928116469517744,
"grad_norm": 0.6308822051977305,
"learning_rate": 9.957021961886855e-06,
"loss": 0.0608,
"step": 460
},
{
"epoch": 0.2097361237488626,
"grad_norm": 0.9803601042372939,
"learning_rate": 9.956834759959669e-06,
"loss": 0.0908,
"step": 461
},
{
"epoch": 0.21019108280254778,
"grad_norm": 0.7674462109758159,
"learning_rate": 9.95664715298233e-06,
"loss": 0.074,
"step": 462
},
{
"epoch": 0.21064604185623295,
"grad_norm": 0.7450186566335193,
"learning_rate": 9.95645914097017e-06,
"loss": 0.0817,
"step": 463
},
{
"epoch": 0.2111010009099181,
"grad_norm": 0.7225723661612439,
"learning_rate": 9.956270723938553e-06,
"loss": 0.0849,
"step": 464
},
{
"epoch": 0.21155595996360327,
"grad_norm": 0.7190355211871646,
"learning_rate": 9.956081901902875e-06,
"loss": 0.0748,
"step": 465
},
{
"epoch": 0.21201091901728844,
"grad_norm": 1.210684562087392,
"learning_rate": 9.955892674878565e-06,
"loss": 0.1272,
"step": 466
},
{
"epoch": 0.2124658780709736,
"grad_norm": 0.834170476650907,
"learning_rate": 9.955703042881087e-06,
"loss": 0.0992,
"step": 467
},
{
"epoch": 0.21292083712465878,
"grad_norm": 0.874478173291907,
"learning_rate": 9.955513005925934e-06,
"loss": 0.0858,
"step": 468
},
{
"epoch": 0.21337579617834396,
"grad_norm": 0.5510320150423565,
"learning_rate": 9.95532256402864e-06,
"loss": 0.0574,
"step": 469
},
{
"epoch": 0.21383075523202913,
"grad_norm": 0.5657171871822584,
"learning_rate": 9.955131717204762e-06,
"loss": 0.0671,
"step": 470
},
{
"epoch": 0.21428571428571427,
"grad_norm": 0.7564664653864259,
"learning_rate": 9.954940465469898e-06,
"loss": 0.085,
"step": 471
},
{
"epoch": 0.21474067333939945,
"grad_norm": 0.7594501005901694,
"learning_rate": 9.954748808839675e-06,
"loss": 0.0733,
"step": 472
},
{
"epoch": 0.21519563239308462,
"grad_norm": 0.6748092428366178,
"learning_rate": 9.954556747329754e-06,
"loss": 0.0707,
"step": 473
},
{
"epoch": 0.2156505914467698,
"grad_norm": 1.715089789819449,
"learning_rate": 9.954364280955832e-06,
"loss": 0.1045,
"step": 474
},
{
"epoch": 0.21610555050045496,
"grad_norm": 0.6668751648778155,
"learning_rate": 9.954171409733634e-06,
"loss": 0.0573,
"step": 475
},
{
"epoch": 0.21656050955414013,
"grad_norm": 0.5963716475430643,
"learning_rate": 9.95397813367892e-06,
"loss": 0.0752,
"step": 476
},
{
"epoch": 0.2170154686078253,
"grad_norm": 0.9917190233932158,
"learning_rate": 9.953784452807487e-06,
"loss": 0.1049,
"step": 477
},
{
"epoch": 0.21747042766151045,
"grad_norm": 0.5638529401686616,
"learning_rate": 9.953590367135159e-06,
"loss": 0.0547,
"step": 478
},
{
"epoch": 0.21792538671519562,
"grad_norm": 0.6477110515460727,
"learning_rate": 9.953395876677796e-06,
"loss": 0.0564,
"step": 479
},
{
"epoch": 0.2183803457688808,
"grad_norm": 0.5492055118574499,
"learning_rate": 9.95320098145129e-06,
"loss": 0.0505,
"step": 480
},
{
"epoch": 0.21883530482256597,
"grad_norm": 0.8954528378372288,
"learning_rate": 9.95300568147157e-06,
"loss": 0.126,
"step": 481
},
{
"epoch": 0.21929026387625114,
"grad_norm": 0.6155736143826033,
"learning_rate": 9.952809976754593e-06,
"loss": 0.0518,
"step": 482
},
{
"epoch": 0.2197452229299363,
"grad_norm": 1.1486004986445648,
"learning_rate": 9.952613867316351e-06,
"loss": 0.1142,
"step": 483
},
{
"epoch": 0.22020018198362148,
"grad_norm": 0.8236924325360948,
"learning_rate": 9.95241735317287e-06,
"loss": 0.1047,
"step": 484
},
{
"epoch": 0.22065514103730663,
"grad_norm": 0.832372102653505,
"learning_rate": 9.952220434340209e-06,
"loss": 0.0729,
"step": 485
},
{
"epoch": 0.2211101000909918,
"grad_norm": 0.7288716722109786,
"learning_rate": 9.952023110834456e-06,
"loss": 0.068,
"step": 486
},
{
"epoch": 0.22156505914467697,
"grad_norm": 0.5327254294033283,
"learning_rate": 9.951825382671739e-06,
"loss": 0.0614,
"step": 487
},
{
"epoch": 0.22202001819836215,
"grad_norm": 0.7204991379763186,
"learning_rate": 9.951627249868213e-06,
"loss": 0.0666,
"step": 488
},
{
"epoch": 0.22247497725204732,
"grad_norm": 0.7485835393026234,
"learning_rate": 9.95142871244007e-06,
"loss": 0.068,
"step": 489
},
{
"epoch": 0.2229299363057325,
"grad_norm": 0.45602532896445397,
"learning_rate": 9.951229770403531e-06,
"loss": 0.0414,
"step": 490
},
{
"epoch": 0.22338489535941766,
"grad_norm": 0.7240661348572547,
"learning_rate": 9.951030423774858e-06,
"loss": 0.0798,
"step": 491
},
{
"epoch": 0.22383985441310283,
"grad_norm": 0.7716352477687572,
"learning_rate": 9.950830672570337e-06,
"loss": 0.071,
"step": 492
},
{
"epoch": 0.22429481346678798,
"grad_norm": 1.22677184750836,
"learning_rate": 9.95063051680629e-06,
"loss": 0.1373,
"step": 493
},
{
"epoch": 0.22474977252047315,
"grad_norm": 0.7365431233953595,
"learning_rate": 9.950429956499074e-06,
"loss": 0.0699,
"step": 494
},
{
"epoch": 0.22520473157415832,
"grad_norm": 0.705654951368504,
"learning_rate": 9.950228991665078e-06,
"loss": 0.0741,
"step": 495
},
{
"epoch": 0.2256596906278435,
"grad_norm": 0.8261497906057415,
"learning_rate": 9.950027622320724e-06,
"loss": 0.0764,
"step": 496
},
{
"epoch": 0.22611464968152867,
"grad_norm": 0.9965395262255518,
"learning_rate": 9.949825848482465e-06,
"loss": 0.0852,
"step": 497
},
{
"epoch": 0.22656960873521384,
"grad_norm": 0.6807161957389707,
"learning_rate": 9.949623670166794e-06,
"loss": 0.074,
"step": 498
},
{
"epoch": 0.227024567788899,
"grad_norm": 1.1216390709095547,
"learning_rate": 9.949421087390228e-06,
"loss": 0.0931,
"step": 499
},
{
"epoch": 0.22747952684258416,
"grad_norm": 1.1278655216416786,
"learning_rate": 9.949218100169322e-06,
"loss": 0.1177,
"step": 500
},
{
"epoch": 0.22793448589626933,
"grad_norm": 0.9160591457448575,
"learning_rate": 9.949014708520664e-06,
"loss": 0.1015,
"step": 501
},
{
"epoch": 0.2283894449499545,
"grad_norm": 0.9377363057118697,
"learning_rate": 9.948810912460872e-06,
"loss": 0.1059,
"step": 502
},
{
"epoch": 0.22884440400363967,
"grad_norm": 0.8760932101779023,
"learning_rate": 9.948606712006601e-06,
"loss": 0.0812,
"step": 503
},
{
"epoch": 0.22929936305732485,
"grad_norm": 0.6962605051289937,
"learning_rate": 9.948402107174537e-06,
"loss": 0.0735,
"step": 504
},
{
"epoch": 0.22975432211101002,
"grad_norm": 0.6501265713488487,
"learning_rate": 9.948197097981401e-06,
"loss": 0.0551,
"step": 505
},
{
"epoch": 0.2302092811646952,
"grad_norm": 1.2156011775652311,
"learning_rate": 9.947991684443942e-06,
"loss": 0.1066,
"step": 506
},
{
"epoch": 0.23066424021838033,
"grad_norm": 0.9679794435610901,
"learning_rate": 9.947785866578951e-06,
"loss": 0.0981,
"step": 507
},
{
"epoch": 0.2311191992720655,
"grad_norm": 0.7195724631231237,
"learning_rate": 9.94757964440324e-06,
"loss": 0.0777,
"step": 508
},
{
"epoch": 0.23157415832575068,
"grad_norm": 0.549427502610929,
"learning_rate": 9.947373017933665e-06,
"loss": 0.0516,
"step": 509
},
{
"epoch": 0.23202911737943585,
"grad_norm": 0.5667212336170355,
"learning_rate": 9.947165987187108e-06,
"loss": 0.0583,
"step": 510
},
{
"epoch": 0.23248407643312102,
"grad_norm": 0.6638127935874616,
"learning_rate": 9.946958552180489e-06,
"loss": 0.0723,
"step": 511
},
{
"epoch": 0.2329390354868062,
"grad_norm": 0.5226768129517959,
"learning_rate": 9.946750712930756e-06,
"loss": 0.0482,
"step": 512
},
{
"epoch": 0.23339399454049137,
"grad_norm": 0.8358986518129136,
"learning_rate": 9.946542469454894e-06,
"loss": 0.1037,
"step": 513
},
{
"epoch": 0.2338489535941765,
"grad_norm": 0.6695809647699968,
"learning_rate": 9.94633382176992e-06,
"loss": 0.0728,
"step": 514
},
{
"epoch": 0.23430391264786168,
"grad_norm": 1.0608546974350634,
"learning_rate": 9.946124769892884e-06,
"loss": 0.1192,
"step": 515
},
{
"epoch": 0.23475887170154686,
"grad_norm": 0.5090717025630993,
"learning_rate": 9.945915313840869e-06,
"loss": 0.0612,
"step": 516
},
{
"epoch": 0.23521383075523203,
"grad_norm": 0.8105130307542814,
"learning_rate": 9.94570545363099e-06,
"loss": 0.0838,
"step": 517
},
{
"epoch": 0.2356687898089172,
"grad_norm": 0.7752986876049957,
"learning_rate": 9.945495189280394e-06,
"loss": 0.092,
"step": 518
},
{
"epoch": 0.23612374886260237,
"grad_norm": 0.869801315379322,
"learning_rate": 9.945284520806267e-06,
"loss": 0.077,
"step": 519
},
{
"epoch": 0.23657870791628755,
"grad_norm": 0.5427153243822386,
"learning_rate": 9.94507344822582e-06,
"loss": 0.0592,
"step": 520
},
{
"epoch": 0.2370336669699727,
"grad_norm": 0.7368670007832758,
"learning_rate": 9.944861971556305e-06,
"loss": 0.0608,
"step": 521
},
{
"epoch": 0.23748862602365786,
"grad_norm": 0.8141430793460733,
"learning_rate": 9.944650090814998e-06,
"loss": 0.0616,
"step": 522
},
{
"epoch": 0.23794358507734303,
"grad_norm": 2.1096588720516425,
"learning_rate": 9.944437806019216e-06,
"loss": 0.0938,
"step": 523
},
{
"epoch": 0.2383985441310282,
"grad_norm": 0.7014907085161215,
"learning_rate": 9.944225117186306e-06,
"loss": 0.0812,
"step": 524
},
{
"epoch": 0.23885350318471338,
"grad_norm": 0.5078467158211916,
"learning_rate": 9.944012024333647e-06,
"loss": 0.0561,
"step": 525
},
{
"epoch": 0.23930846223839855,
"grad_norm": 0.6379031604907951,
"learning_rate": 9.943798527478652e-06,
"loss": 0.0678,
"step": 526
},
{
"epoch": 0.23976342129208372,
"grad_norm": 0.799876019099874,
"learning_rate": 9.943584626638768e-06,
"loss": 0.0914,
"step": 527
},
{
"epoch": 0.24021838034576887,
"grad_norm": 0.6550229607349646,
"learning_rate": 9.943370321831474e-06,
"loss": 0.0668,
"step": 528
},
{
"epoch": 0.24067333939945404,
"grad_norm": 0.767534839542607,
"learning_rate": 9.943155613074279e-06,
"loss": 0.0711,
"step": 529
},
{
"epoch": 0.2411282984531392,
"grad_norm": 0.7571838990000624,
"learning_rate": 9.942940500384733e-06,
"loss": 0.0893,
"step": 530
},
{
"epoch": 0.24158325750682438,
"grad_norm": 17.807000846945513,
"learning_rate": 9.942724983780409e-06,
"loss": 0.3419,
"step": 531
},
{
"epoch": 0.24203821656050956,
"grad_norm": 1.2088422410181228,
"learning_rate": 9.942509063278922e-06,
"loss": 0.1173,
"step": 532
},
{
"epoch": 0.24249317561419473,
"grad_norm": 0.8811842157145667,
"learning_rate": 9.942292738897914e-06,
"loss": 0.1006,
"step": 533
},
{
"epoch": 0.2429481346678799,
"grad_norm": 0.7726281786442553,
"learning_rate": 9.942076010655063e-06,
"loss": 0.0909,
"step": 534
},
{
"epoch": 0.24340309372156507,
"grad_norm": 0.9942256398778268,
"learning_rate": 9.941858878568078e-06,
"loss": 0.134,
"step": 535
},
{
"epoch": 0.24385805277525022,
"grad_norm": 1.001596627292525,
"learning_rate": 9.941641342654702e-06,
"loss": 0.0977,
"step": 536
},
{
"epoch": 0.2443130118289354,
"grad_norm": 0.5064863363900076,
"learning_rate": 9.941423402932713e-06,
"loss": 0.0559,
"step": 537
},
{
"epoch": 0.24476797088262056,
"grad_norm": 0.8589680374278897,
"learning_rate": 9.94120505941992e-06,
"loss": 0.0992,
"step": 538
},
{
"epoch": 0.24522292993630573,
"grad_norm": 0.7830880681851201,
"learning_rate": 9.940986312134162e-06,
"loss": 0.0825,
"step": 539
},
{
"epoch": 0.2456778889899909,
"grad_norm": 0.5778344550660577,
"learning_rate": 9.940767161093316e-06,
"loss": 0.0637,
"step": 540
},
{
"epoch": 0.24613284804367608,
"grad_norm": 0.8661775200374767,
"learning_rate": 9.94054760631529e-06,
"loss": 0.0958,
"step": 541
},
{
"epoch": 0.24658780709736125,
"grad_norm": 0.6976226834296251,
"learning_rate": 9.940327647818026e-06,
"loss": 0.0752,
"step": 542
},
{
"epoch": 0.2470427661510464,
"grad_norm": 0.7530160135685138,
"learning_rate": 9.940107285619495e-06,
"loss": 0.077,
"step": 543
},
{
"epoch": 0.24749772520473157,
"grad_norm": 0.7997106896354084,
"learning_rate": 9.939886519737707e-06,
"loss": 0.0958,
"step": 544
},
{
"epoch": 0.24795268425841674,
"grad_norm": 0.8918061918047896,
"learning_rate": 9.939665350190702e-06,
"loss": 0.0822,
"step": 545
},
{
"epoch": 0.2484076433121019,
"grad_norm": 0.804115756264787,
"learning_rate": 9.93944377699655e-06,
"loss": 0.0915,
"step": 546
},
{
"epoch": 0.24886260236578708,
"grad_norm": 0.6234057941022288,
"learning_rate": 9.93922180017336e-06,
"loss": 0.0672,
"step": 547
},
{
"epoch": 0.24931756141947226,
"grad_norm": 0.8269450754551354,
"learning_rate": 9.93899941973927e-06,
"loss": 0.1102,
"step": 548
},
{
"epoch": 0.24977252047315743,
"grad_norm": 0.9233841316663005,
"learning_rate": 9.93877663571245e-06,
"loss": 0.0963,
"step": 549
},
{
"epoch": 0.2502274795268426,
"grad_norm": 0.9944861568923805,
"learning_rate": 9.938553448111108e-06,
"loss": 0.1127,
"step": 550
},
{
"epoch": 0.25068243858052774,
"grad_norm": 0.8423641298780182,
"learning_rate": 9.938329856953482e-06,
"loss": 0.0788,
"step": 551
},
{
"epoch": 0.25113739763421294,
"grad_norm": 0.8124861649110975,
"learning_rate": 9.938105862257839e-06,
"loss": 0.0831,
"step": 552
},
{
"epoch": 0.2515923566878981,
"grad_norm": 0.6612222253979325,
"learning_rate": 9.937881464042485e-06,
"loss": 0.0703,
"step": 553
},
{
"epoch": 0.25204731574158323,
"grad_norm": 0.854447666921162,
"learning_rate": 9.937656662325759e-06,
"loss": 0.1074,
"step": 554
},
{
"epoch": 0.25250227479526843,
"grad_norm": 0.74521770368624,
"learning_rate": 9.937431457126028e-06,
"loss": 0.0777,
"step": 555
},
{
"epoch": 0.2529572338489536,
"grad_norm": 0.5044600553216889,
"learning_rate": 9.937205848461694e-06,
"loss": 0.0482,
"step": 556
},
{
"epoch": 0.2534121929026388,
"grad_norm": 1.0949051966397356,
"learning_rate": 9.936979836351197e-06,
"loss": 0.0945,
"step": 557
},
{
"epoch": 0.2538671519563239,
"grad_norm": 1.0332199252594778,
"learning_rate": 9.936753420813003e-06,
"loss": 0.092,
"step": 558
},
{
"epoch": 0.2543221110100091,
"grad_norm": 0.7029577630748303,
"learning_rate": 9.936526601865612e-06,
"loss": 0.0612,
"step": 559
},
{
"epoch": 0.25477707006369427,
"grad_norm": 0.5251640812064944,
"learning_rate": 9.936299379527561e-06,
"loss": 0.0569,
"step": 560
},
{
"epoch": 0.2552320291173794,
"grad_norm": 0.6689496924283664,
"learning_rate": 9.936071753817416e-06,
"loss": 0.0831,
"step": 561
},
{
"epoch": 0.2556869881710646,
"grad_norm": 0.8094390650978945,
"learning_rate": 9.935843724753778e-06,
"loss": 0.0897,
"step": 562
},
{
"epoch": 0.25614194722474976,
"grad_norm": 0.9168849457874456,
"learning_rate": 9.935615292355283e-06,
"loss": 0.1002,
"step": 563
},
{
"epoch": 0.25659690627843496,
"grad_norm": 0.8829987760246157,
"learning_rate": 9.935386456640593e-06,
"loss": 0.0997,
"step": 564
},
{
"epoch": 0.2570518653321201,
"grad_norm": 0.9381858557170412,
"learning_rate": 9.93515721762841e-06,
"loss": 0.0926,
"step": 565
},
{
"epoch": 0.2575068243858053,
"grad_norm": 0.6555630906162114,
"learning_rate": 9.934927575337469e-06,
"loss": 0.0805,
"step": 566
},
{
"epoch": 0.25796178343949044,
"grad_norm": 0.49897284031908906,
"learning_rate": 9.93469752978653e-06,
"loss": 0.0545,
"step": 567
},
{
"epoch": 0.2584167424931756,
"grad_norm": 0.8528689809178094,
"learning_rate": 9.934467080994394e-06,
"loss": 0.071,
"step": 568
},
{
"epoch": 0.2588717015468608,
"grad_norm": 0.7999188284583189,
"learning_rate": 9.934236228979893e-06,
"loss": 0.0675,
"step": 569
},
{
"epoch": 0.25932666060054593,
"grad_norm": 0.6603615540899209,
"learning_rate": 9.934004973761888e-06,
"loss": 0.0584,
"step": 570
},
{
"epoch": 0.25978161965423113,
"grad_norm": 0.907545218090885,
"learning_rate": 9.933773315359281e-06,
"loss": 0.0912,
"step": 571
},
{
"epoch": 0.2602365787079163,
"grad_norm": 1.2225854103436529,
"learning_rate": 9.933541253790998e-06,
"loss": 0.0996,
"step": 572
},
{
"epoch": 0.2606915377616015,
"grad_norm": 0.821182112953313,
"learning_rate": 9.933308789076004e-06,
"loss": 0.0886,
"step": 573
},
{
"epoch": 0.2611464968152866,
"grad_norm": 0.5608593716975471,
"learning_rate": 9.933075921233292e-06,
"loss": 0.0597,
"step": 574
},
{
"epoch": 0.26160145586897177,
"grad_norm": 0.977094581221023,
"learning_rate": 9.932842650281897e-06,
"loss": 0.0796,
"step": 575
},
{
"epoch": 0.26205641492265697,
"grad_norm": 1.0086738407073246,
"learning_rate": 9.932608976240875e-06,
"loss": 0.1245,
"step": 576
},
{
"epoch": 0.2625113739763421,
"grad_norm": 0.7841605184531412,
"learning_rate": 9.932374899129323e-06,
"loss": 0.0798,
"step": 577
},
{
"epoch": 0.2629663330300273,
"grad_norm": 0.6360279282536222,
"learning_rate": 9.932140418966369e-06,
"loss": 0.0714,
"step": 578
},
{
"epoch": 0.26342129208371245,
"grad_norm": 0.8673569892639119,
"learning_rate": 9.931905535771174e-06,
"loss": 0.0805,
"step": 579
},
{
"epoch": 0.26387625113739765,
"grad_norm": 1.0489822111787226,
"learning_rate": 9.93167024956293e-06,
"loss": 0.1046,
"step": 580
},
{
"epoch": 0.2643312101910828,
"grad_norm": 0.5670611684906575,
"learning_rate": 9.931434560360864e-06,
"loss": 0.0662,
"step": 581
},
{
"epoch": 0.26478616924476794,
"grad_norm": 0.6786486717931198,
"learning_rate": 9.931198468184236e-06,
"loss": 0.0705,
"step": 582
},
{
"epoch": 0.26524112829845314,
"grad_norm": 0.7580601459978998,
"learning_rate": 9.93096197305234e-06,
"loss": 0.0852,
"step": 583
},
{
"epoch": 0.2656960873521383,
"grad_norm": 0.8802141056853473,
"learning_rate": 9.930725074984498e-06,
"loss": 0.0989,
"step": 584
},
{
"epoch": 0.2661510464058235,
"grad_norm": 0.6365186853726369,
"learning_rate": 9.930487774000071e-06,
"loss": 0.0639,
"step": 585
},
{
"epoch": 0.26660600545950863,
"grad_norm": 0.5301331320559389,
"learning_rate": 9.930250070118448e-06,
"loss": 0.0628,
"step": 586
},
{
"epoch": 0.26706096451319383,
"grad_norm": 0.6982626314754508,
"learning_rate": 9.930011963359055e-06,
"loss": 0.071,
"step": 587
},
{
"epoch": 0.267515923566879,
"grad_norm": 1.0151988128038116,
"learning_rate": 9.929773453741346e-06,
"loss": 0.1074,
"step": 588
},
{
"epoch": 0.2679708826205642,
"grad_norm": 0.809050548171497,
"learning_rate": 9.929534541284814e-06,
"loss": 0.0715,
"step": 589
},
{
"epoch": 0.2684258416742493,
"grad_norm": 0.8254901916718546,
"learning_rate": 9.929295226008981e-06,
"loss": 0.0867,
"step": 590
},
{
"epoch": 0.26888080072793447,
"grad_norm": 0.695875393623419,
"learning_rate": 9.929055507933403e-06,
"loss": 0.0667,
"step": 591
},
{
"epoch": 0.26933575978161967,
"grad_norm": 0.6569370607259161,
"learning_rate": 9.928815387077668e-06,
"loss": 0.0667,
"step": 592
},
{
"epoch": 0.2697907188353048,
"grad_norm": 0.8509989554819866,
"learning_rate": 9.9285748634614e-06,
"loss": 0.0964,
"step": 593
},
{
"epoch": 0.27024567788899,
"grad_norm": 0.7743154017799978,
"learning_rate": 9.928333937104249e-06,
"loss": 0.1008,
"step": 594
},
{
"epoch": 0.27070063694267515,
"grad_norm": 0.6810806452813069,
"learning_rate": 9.928092608025905e-06,
"loss": 0.0623,
"step": 595
},
{
"epoch": 0.27115559599636035,
"grad_norm": 0.6757764847225584,
"learning_rate": 9.927850876246087e-06,
"loss": 0.0621,
"step": 596
},
{
"epoch": 0.2716105550500455,
"grad_norm": 0.7561897396028232,
"learning_rate": 9.927608741784551e-06,
"loss": 0.0769,
"step": 597
},
{
"epoch": 0.27206551410373064,
"grad_norm": 0.9087608421567758,
"learning_rate": 9.927366204661081e-06,
"loss": 0.1064,
"step": 598
},
{
"epoch": 0.27252047315741584,
"grad_norm": 0.6090969825991095,
"learning_rate": 9.927123264895497e-06,
"loss": 0.0596,
"step": 599
},
{
"epoch": 0.272975432211101,
"grad_norm": 0.5838273869575724,
"learning_rate": 9.926879922507651e-06,
"loss": 0.0581,
"step": 600
},
{
"epoch": 0.2734303912647862,
"grad_norm": 41.16319851924577,
"learning_rate": 9.926636177517427e-06,
"loss": 0.7305,
"step": 601
},
{
"epoch": 0.27388535031847133,
"grad_norm": 0.7159907538362364,
"learning_rate": 9.926392029944743e-06,
"loss": 0.0655,
"step": 602
},
{
"epoch": 0.27434030937215653,
"grad_norm": 0.6649118967721417,
"learning_rate": 9.92614747980955e-06,
"loss": 0.0676,
"step": 603
},
{
"epoch": 0.2747952684258417,
"grad_norm": 0.6955588874689645,
"learning_rate": 9.92590252713183e-06,
"loss": 0.0691,
"step": 604
},
{
"epoch": 0.2752502274795268,
"grad_norm": 1.0093833512385355,
"learning_rate": 9.925657171931603e-06,
"loss": 0.0788,
"step": 605
},
{
"epoch": 0.275705186533212,
"grad_norm": 0.7222760734094591,
"learning_rate": 9.925411414228913e-06,
"loss": 0.0765,
"step": 606
},
{
"epoch": 0.27616014558689717,
"grad_norm": 0.7901083190949632,
"learning_rate": 9.925165254043846e-06,
"loss": 0.0899,
"step": 607
},
{
"epoch": 0.27661510464058237,
"grad_norm": 0.9417411536264935,
"learning_rate": 9.924918691396516e-06,
"loss": 0.105,
"step": 608
},
{
"epoch": 0.2770700636942675,
"grad_norm": 0.8531576003982281,
"learning_rate": 9.924671726307073e-06,
"loss": 0.0943,
"step": 609
},
{
"epoch": 0.2775250227479527,
"grad_norm": 0.5771833327707789,
"learning_rate": 9.924424358795694e-06,
"loss": 0.0649,
"step": 610
},
{
"epoch": 0.27797998180163785,
"grad_norm": 0.6804808150530418,
"learning_rate": 9.924176588882597e-06,
"loss": 0.0591,
"step": 611
},
{
"epoch": 0.278434940855323,
"grad_norm": 0.6916110773643345,
"learning_rate": 9.923928416588027e-06,
"loss": 0.082,
"step": 612
},
{
"epoch": 0.2788898999090082,
"grad_norm": 0.7302341341594485,
"learning_rate": 9.923679841932261e-06,
"loss": 0.0858,
"step": 613
},
{
"epoch": 0.27934485896269334,
"grad_norm": 0.7190514572276734,
"learning_rate": 9.923430864935615e-06,
"loss": 0.0658,
"step": 614
},
{
"epoch": 0.27979981801637854,
"grad_norm": 0.6872892360375661,
"learning_rate": 9.923181485618432e-06,
"loss": 0.0639,
"step": 615
},
{
"epoch": 0.2802547770700637,
"grad_norm": 0.6937876338258171,
"learning_rate": 9.92293170400109e-06,
"loss": 0.0759,
"step": 616
},
{
"epoch": 0.2807097361237489,
"grad_norm": 0.8498928251372749,
"learning_rate": 9.922681520104002e-06,
"loss": 0.0777,
"step": 617
},
{
"epoch": 0.28116469517743403,
"grad_norm": 0.7409609990217324,
"learning_rate": 9.922430933947612e-06,
"loss": 0.0665,
"step": 618
},
{
"epoch": 0.2816196542311192,
"grad_norm": 1.2216942184143182,
"learning_rate": 9.922179945552393e-06,
"loss": 0.1405,
"step": 619
},
{
"epoch": 0.2820746132848044,
"grad_norm": 0.6637234254274302,
"learning_rate": 9.921928554938857e-06,
"loss": 0.062,
"step": 620
},
{
"epoch": 0.2825295723384895,
"grad_norm": 0.9463087936758936,
"learning_rate": 9.921676762127548e-06,
"loss": 0.0767,
"step": 621
},
{
"epoch": 0.2829845313921747,
"grad_norm": 1.089309305809361,
"learning_rate": 9.921424567139042e-06,
"loss": 0.1171,
"step": 622
},
{
"epoch": 0.28343949044585987,
"grad_norm": 0.8752119302288704,
"learning_rate": 9.921171969993942e-06,
"loss": 0.0813,
"step": 623
},
{
"epoch": 0.28389444949954507,
"grad_norm": 0.7870883299373892,
"learning_rate": 9.920918970712894e-06,
"loss": 0.0993,
"step": 624
},
{
"epoch": 0.2843494085532302,
"grad_norm": 0.6504873266789636,
"learning_rate": 9.92066556931657e-06,
"loss": 0.073,
"step": 625
},
{
"epoch": 0.28480436760691535,
"grad_norm": 1.1098031698420505,
"learning_rate": 9.920411765825679e-06,
"loss": 0.1218,
"step": 626
},
{
"epoch": 0.28525932666060055,
"grad_norm": 1.217844501512982,
"learning_rate": 9.920157560260957e-06,
"loss": 0.1549,
"step": 627
},
{
"epoch": 0.2857142857142857,
"grad_norm": 0.9728161223416268,
"learning_rate": 9.919902952643179e-06,
"loss": 0.0984,
"step": 628
},
{
"epoch": 0.2861692447679709,
"grad_norm": 0.5217007184455262,
"learning_rate": 9.91964794299315e-06,
"loss": 0.0636,
"step": 629
},
{
"epoch": 0.28662420382165604,
"grad_norm": 1.7394407973312302,
"learning_rate": 9.919392531331706e-06,
"loss": 0.1686,
"step": 630
},
{
"epoch": 0.28707916287534124,
"grad_norm": 0.5702940927618096,
"learning_rate": 9.919136717679723e-06,
"loss": 0.0465,
"step": 631
},
{
"epoch": 0.2875341219290264,
"grad_norm": 0.5990973378462472,
"learning_rate": 9.9188805020581e-06,
"loss": 0.0678,
"step": 632
},
{
"epoch": 0.28798908098271153,
"grad_norm": 0.9343816967111115,
"learning_rate": 9.918623884487777e-06,
"loss": 0.1068,
"step": 633
},
{
"epoch": 0.28844404003639673,
"grad_norm": 0.5997939637509836,
"learning_rate": 9.91836686498972e-06,
"loss": 0.0629,
"step": 634
},
{
"epoch": 0.2888989990900819,
"grad_norm": 0.8063617612610782,
"learning_rate": 9.918109443584938e-06,
"loss": 0.0904,
"step": 635
},
{
"epoch": 0.2893539581437671,
"grad_norm": 0.6625405697250593,
"learning_rate": 9.917851620294461e-06,
"loss": 0.0638,
"step": 636
},
{
"epoch": 0.2898089171974522,
"grad_norm": 0.7423789779714624,
"learning_rate": 9.917593395139358e-06,
"loss": 0.0714,
"step": 637
},
{
"epoch": 0.2902638762511374,
"grad_norm": 0.6102576569607258,
"learning_rate": 9.91733476814073e-06,
"loss": 0.0563,
"step": 638
},
{
"epoch": 0.29071883530482256,
"grad_norm": 0.8342620452233175,
"learning_rate": 9.91707573931971e-06,
"loss": 0.0934,
"step": 639
},
{
"epoch": 0.2911737943585077,
"grad_norm": 0.6397583044633867,
"learning_rate": 9.916816308697468e-06,
"loss": 0.0608,
"step": 640
},
{
"epoch": 0.2916287534121929,
"grad_norm": 0.7837909798874247,
"learning_rate": 9.9165564762952e-06,
"loss": 0.0936,
"step": 641
},
{
"epoch": 0.29208371246587805,
"grad_norm": 0.9915309549496408,
"learning_rate": 9.916296242134142e-06,
"loss": 0.1364,
"step": 642
},
{
"epoch": 0.29253867151956325,
"grad_norm": 0.7722166587924495,
"learning_rate": 9.916035606235555e-06,
"loss": 0.1022,
"step": 643
},
{
"epoch": 0.2929936305732484,
"grad_norm": 0.6446192951972597,
"learning_rate": 9.915774568620739e-06,
"loss": 0.0794,
"step": 644
},
{
"epoch": 0.2934485896269336,
"grad_norm": 0.7655996282008942,
"learning_rate": 9.915513129311025e-06,
"loss": 0.083,
"step": 645
},
{
"epoch": 0.29390354868061874,
"grad_norm": 0.7358761993420325,
"learning_rate": 9.915251288327776e-06,
"loss": 0.0927,
"step": 646
},
{
"epoch": 0.2943585077343039,
"grad_norm": 0.8417441236168001,
"learning_rate": 9.914989045692388e-06,
"loss": 0.0791,
"step": 647
},
{
"epoch": 0.2948134667879891,
"grad_norm": 0.8847229450668922,
"learning_rate": 9.914726401426293e-06,
"loss": 0.1114,
"step": 648
},
{
"epoch": 0.29526842584167423,
"grad_norm": 0.6805089048669102,
"learning_rate": 9.91446335555095e-06,
"loss": 0.0645,
"step": 649
},
{
"epoch": 0.29572338489535943,
"grad_norm": 0.9967907781154212,
"learning_rate": 9.914199908087856e-06,
"loss": 0.1125,
"step": 650
},
{
"epoch": 0.2961783439490446,
"grad_norm": 0.7069764233646496,
"learning_rate": 9.913936059058537e-06,
"loss": 0.0961,
"step": 651
},
{
"epoch": 0.2966333030027298,
"grad_norm": 0.8237259808163154,
"learning_rate": 9.913671808484554e-06,
"loss": 0.0863,
"step": 652
},
{
"epoch": 0.2970882620564149,
"grad_norm": 0.5595221349609915,
"learning_rate": 9.913407156387503e-06,
"loss": 0.0477,
"step": 653
},
{
"epoch": 0.29754322111010006,
"grad_norm": 0.8322598543263076,
"learning_rate": 9.913142102789005e-06,
"loss": 0.0785,
"step": 654
},
{
"epoch": 0.29799818016378526,
"grad_norm": 0.9426946452527044,
"learning_rate": 9.912876647710723e-06,
"loss": 0.0993,
"step": 655
},
{
"epoch": 0.2984531392174704,
"grad_norm": 0.8902481236790349,
"learning_rate": 9.912610791174348e-06,
"loss": 0.0981,
"step": 656
},
{
"epoch": 0.2989080982711556,
"grad_norm": 0.6714333609160019,
"learning_rate": 9.912344533201604e-06,
"loss": 0.0716,
"step": 657
},
{
"epoch": 0.29936305732484075,
"grad_norm": 0.6721636461789662,
"learning_rate": 9.91207787381425e-06,
"loss": 0.0675,
"step": 658
},
{
"epoch": 0.29981801637852595,
"grad_norm": 0.628744075340254,
"learning_rate": 9.911810813034073e-06,
"loss": 0.0583,
"step": 659
},
{
"epoch": 0.3002729754322111,
"grad_norm": 0.9172548581720068,
"learning_rate": 9.9115433508829e-06,
"loss": 0.0972,
"step": 660
},
{
"epoch": 0.30072793448589624,
"grad_norm": 0.914462327674233,
"learning_rate": 9.911275487382583e-06,
"loss": 0.089,
"step": 661
},
{
"epoch": 0.30118289353958144,
"grad_norm": 0.7410939383575923,
"learning_rate": 9.911007222555011e-06,
"loss": 0.0744,
"step": 662
},
{
"epoch": 0.3016378525932666,
"grad_norm": 0.6952942958219819,
"learning_rate": 9.91073855642211e-06,
"loss": 0.0627,
"step": 663
},
{
"epoch": 0.3020928116469518,
"grad_norm": 0.8802064643150562,
"learning_rate": 9.910469489005828e-06,
"loss": 0.0836,
"step": 664
},
{
"epoch": 0.30254777070063693,
"grad_norm": 0.9015922573736656,
"learning_rate": 9.910200020328158e-06,
"loss": 0.0934,
"step": 665
},
{
"epoch": 0.30300272975432213,
"grad_norm": 0.6635682732023674,
"learning_rate": 9.909930150411113e-06,
"loss": 0.0623,
"step": 666
},
{
"epoch": 0.3034576888080073,
"grad_norm": 1.928152977107998,
"learning_rate": 9.909659879276751e-06,
"loss": 0.1457,
"step": 667
},
{
"epoch": 0.3039126478616925,
"grad_norm": 0.7754006092902415,
"learning_rate": 9.909389206947156e-06,
"loss": 0.0621,
"step": 668
},
{
"epoch": 0.3043676069153776,
"grad_norm": 1.0461982822616211,
"learning_rate": 9.909118133444444e-06,
"loss": 0.1087,
"step": 669
},
{
"epoch": 0.30482256596906276,
"grad_norm": 0.7981897376851527,
"learning_rate": 9.90884665879077e-06,
"loss": 0.0921,
"step": 670
},
{
"epoch": 0.30527752502274796,
"grad_norm": 0.8941901965354629,
"learning_rate": 9.908574783008313e-06,
"loss": 0.1055,
"step": 671
},
{
"epoch": 0.3057324840764331,
"grad_norm": 1.0219508428898654,
"learning_rate": 9.908302506119291e-06,
"loss": 0.1152,
"step": 672
},
{
"epoch": 0.3061874431301183,
"grad_norm": 0.7623168423299865,
"learning_rate": 9.908029828145956e-06,
"loss": 0.0837,
"step": 673
},
{
"epoch": 0.30664240218380345,
"grad_norm": 0.7026665400337327,
"learning_rate": 9.907756749110587e-06,
"loss": 0.0785,
"step": 674
},
{
"epoch": 0.30709736123748865,
"grad_norm": 1.0861630797383492,
"learning_rate": 9.9074832690355e-06,
"loss": 0.1121,
"step": 675
},
{
"epoch": 0.3075523202911738,
"grad_norm": 0.8171913655631801,
"learning_rate": 9.907209387943042e-06,
"loss": 0.0759,
"step": 676
},
{
"epoch": 0.30800727934485894,
"grad_norm": 0.695009650682766,
"learning_rate": 9.906935105855595e-06,
"loss": 0.0508,
"step": 677
},
{
"epoch": 0.30846223839854414,
"grad_norm": 1.1629680848047237,
"learning_rate": 9.906660422795569e-06,
"loss": 0.1123,
"step": 678
},
{
"epoch": 0.3089171974522293,
"grad_norm": 1.1028006392582481,
"learning_rate": 9.906385338785411e-06,
"loss": 0.1048,
"step": 679
},
{
"epoch": 0.3093721565059145,
"grad_norm": 0.8590661780887954,
"learning_rate": 9.906109853847601e-06,
"loss": 0.0947,
"step": 680
},
{
"epoch": 0.30982711555959963,
"grad_norm": 0.9160314729851723,
"learning_rate": 9.90583396800465e-06,
"loss": 0.0928,
"step": 681
},
{
"epoch": 0.31028207461328483,
"grad_norm": 0.8935511298088069,
"learning_rate": 9.9055576812791e-06,
"loss": 0.0996,
"step": 682
},
{
"epoch": 0.31073703366697,
"grad_norm": 0.7005723015579258,
"learning_rate": 9.905280993693533e-06,
"loss": 0.0863,
"step": 683
},
{
"epoch": 0.3111919927206551,
"grad_norm": 0.6441434987399284,
"learning_rate": 9.905003905270553e-06,
"loss": 0.0682,
"step": 684
},
{
"epoch": 0.3116469517743403,
"grad_norm": 0.9609160991558658,
"learning_rate": 9.904726416032803e-06,
"loss": 0.1095,
"step": 685
},
{
"epoch": 0.31210191082802546,
"grad_norm": 0.723787688745946,
"learning_rate": 9.904448526002963e-06,
"loss": 0.0637,
"step": 686
},
{
"epoch": 0.31255686988171066,
"grad_norm": 0.5250433090776031,
"learning_rate": 9.904170235203737e-06,
"loss": 0.0587,
"step": 687
},
{
"epoch": 0.3130118289353958,
"grad_norm": 0.8819438583914972,
"learning_rate": 9.903891543657866e-06,
"loss": 0.1112,
"step": 688
},
{
"epoch": 0.313466787989081,
"grad_norm": 0.5413774773467063,
"learning_rate": 9.903612451388122e-06,
"loss": 0.0722,
"step": 689
},
{
"epoch": 0.31392174704276615,
"grad_norm": 0.8913097595158456,
"learning_rate": 9.903332958417315e-06,
"loss": 0.0893,
"step": 690
},
{
"epoch": 0.3143767060964513,
"grad_norm": 0.6466979890354269,
"learning_rate": 9.903053064768283e-06,
"loss": 0.0709,
"step": 691
},
{
"epoch": 0.3148316651501365,
"grad_norm": 0.8428101951038133,
"learning_rate": 9.902772770463892e-06,
"loss": 0.0814,
"step": 692
},
{
"epoch": 0.31528662420382164,
"grad_norm": 0.5832299371816577,
"learning_rate": 9.902492075527057e-06,
"loss": 0.0597,
"step": 693
},
{
"epoch": 0.31574158325750684,
"grad_norm": 0.7856263020740725,
"learning_rate": 9.902210979980705e-06,
"loss": 0.074,
"step": 694
},
{
"epoch": 0.316196542311192,
"grad_norm": 0.8507681095680276,
"learning_rate": 9.90192948384781e-06,
"loss": 0.0941,
"step": 695
},
{
"epoch": 0.3166515013648772,
"grad_norm": 0.7777857824270489,
"learning_rate": 9.901647587151376e-06,
"loss": 0.0708,
"step": 696
},
{
"epoch": 0.31710646041856233,
"grad_norm": 1.068022521735614,
"learning_rate": 9.901365289914437e-06,
"loss": 0.108,
"step": 697
},
{
"epoch": 0.3175614194722475,
"grad_norm": 1.1320770025873614,
"learning_rate": 9.901082592160059e-06,
"loss": 0.108,
"step": 698
},
{
"epoch": 0.3180163785259327,
"grad_norm": 0.803518334023751,
"learning_rate": 9.900799493911346e-06,
"loss": 0.0871,
"step": 699
},
{
"epoch": 0.3184713375796178,
"grad_norm": 0.8188444942805464,
"learning_rate": 9.900515995191431e-06,
"loss": 0.0808,
"step": 700
},
{
"epoch": 0.318926296633303,
"grad_norm": 0.8993527964087475,
"learning_rate": 9.900232096023478e-06,
"loss": 0.0821,
"step": 701
},
{
"epoch": 0.31938125568698816,
"grad_norm": 0.5600271316880729,
"learning_rate": 9.899947796430687e-06,
"loss": 0.0478,
"step": 702
},
{
"epoch": 0.31983621474067336,
"grad_norm": 0.8369718087747545,
"learning_rate": 9.899663096436292e-06,
"loss": 0.0871,
"step": 703
},
{
"epoch": 0.3202911737943585,
"grad_norm": 0.8993771893247359,
"learning_rate": 9.899377996063554e-06,
"loss": 0.0858,
"step": 704
},
{
"epoch": 0.32074613284804365,
"grad_norm": 0.6615773523414142,
"learning_rate": 9.899092495335772e-06,
"loss": 0.0601,
"step": 705
},
{
"epoch": 0.32120109190172885,
"grad_norm": 0.8278593900178107,
"learning_rate": 9.898806594276273e-06,
"loss": 0.0769,
"step": 706
},
{
"epoch": 0.321656050955414,
"grad_norm": 0.7866286577186284,
"learning_rate": 9.898520292908425e-06,
"loss": 0.0894,
"step": 707
},
{
"epoch": 0.3221110100090992,
"grad_norm": 0.8050313615570786,
"learning_rate": 9.89823359125562e-06,
"loss": 0.0732,
"step": 708
},
{
"epoch": 0.32256596906278434,
"grad_norm": 1.0243914254387991,
"learning_rate": 9.897946489341286e-06,
"loss": 0.0901,
"step": 709
},
{
"epoch": 0.32302092811646954,
"grad_norm": 0.7036337195424629,
"learning_rate": 9.897658987188882e-06,
"loss": 0.0686,
"step": 710
},
{
"epoch": 0.3234758871701547,
"grad_norm": 0.5593772745397846,
"learning_rate": 9.897371084821905e-06,
"loss": 0.045,
"step": 711
},
{
"epoch": 0.32393084622383983,
"grad_norm": 0.608867956874154,
"learning_rate": 9.897082782263878e-06,
"loss": 0.0692,
"step": 712
},
{
"epoch": 0.32438580527752503,
"grad_norm": 0.6488333561840038,
"learning_rate": 9.896794079538362e-06,
"loss": 0.0513,
"step": 713
},
{
"epoch": 0.3248407643312102,
"grad_norm": 0.5593745607285364,
"learning_rate": 9.896504976668948e-06,
"loss": 0.0437,
"step": 714
},
{
"epoch": 0.3252957233848954,
"grad_norm": 0.5072427035814352,
"learning_rate": 9.896215473679259e-06,
"loss": 0.0566,
"step": 715
},
{
"epoch": 0.3257506824385805,
"grad_norm": 0.7088539736923404,
"learning_rate": 9.895925570592952e-06,
"loss": 0.0878,
"step": 716
},
{
"epoch": 0.3262056414922657,
"grad_norm": 0.9653520712469312,
"learning_rate": 9.895635267433719e-06,
"loss": 0.101,
"step": 717
},
{
"epoch": 0.32666060054595086,
"grad_norm": 1.2323140645024868,
"learning_rate": 9.895344564225277e-06,
"loss": 0.1359,
"step": 718
},
{
"epoch": 0.327115559599636,
"grad_norm": 0.6826807669546061,
"learning_rate": 9.895053460991389e-06,
"loss": 0.0799,
"step": 719
},
{
"epoch": 0.3275705186533212,
"grad_norm": 0.9496304010026827,
"learning_rate": 9.894761957755834e-06,
"loss": 0.0928,
"step": 720
},
{
"epoch": 0.32802547770700635,
"grad_norm": 0.8578622125964999,
"learning_rate": 9.894470054542438e-06,
"loss": 0.1149,
"step": 721
},
{
"epoch": 0.32848043676069155,
"grad_norm": 0.5483719717114235,
"learning_rate": 9.894177751375053e-06,
"loss": 0.0621,
"step": 722
},
{
"epoch": 0.3289353958143767,
"grad_norm": 0.6341198897869947,
"learning_rate": 9.893885048277564e-06,
"loss": 0.0568,
"step": 723
},
{
"epoch": 0.3293903548680619,
"grad_norm": 0.7169738278552924,
"learning_rate": 9.893591945273888e-06,
"loss": 0.0752,
"step": 724
},
{
"epoch": 0.32984531392174704,
"grad_norm": 0.9839905963719277,
"learning_rate": 9.89329844238798e-06,
"loss": 0.1167,
"step": 725
},
{
"epoch": 0.3303002729754322,
"grad_norm": 0.6825969142747964,
"learning_rate": 9.89300453964382e-06,
"loss": 0.0693,
"step": 726
},
{
"epoch": 0.3307552320291174,
"grad_norm": 1.0420794853330364,
"learning_rate": 9.892710237065423e-06,
"loss": 0.1561,
"step": 727
},
{
"epoch": 0.33121019108280253,
"grad_norm": 1.0109988913697336,
"learning_rate": 9.892415534676844e-06,
"loss": 0.0813,
"step": 728
},
{
"epoch": 0.33166515013648773,
"grad_norm": 0.6237179977245606,
"learning_rate": 9.892120432502161e-06,
"loss": 0.063,
"step": 729
},
{
"epoch": 0.3321201091901729,
"grad_norm": 0.7047649578988654,
"learning_rate": 9.891824930565488e-06,
"loss": 0.0757,
"step": 730
},
{
"epoch": 0.3325750682438581,
"grad_norm": 0.8381336709785119,
"learning_rate": 9.891529028890974e-06,
"loss": 0.1137,
"step": 731
},
{
"epoch": 0.3330300272975432,
"grad_norm": 1.108812928457643,
"learning_rate": 9.891232727502797e-06,
"loss": 0.0971,
"step": 732
},
{
"epoch": 0.33348498635122836,
"grad_norm": 0.8911550238765422,
"learning_rate": 9.89093602642517e-06,
"loss": 0.0869,
"step": 733
},
{
"epoch": 0.33393994540491356,
"grad_norm": 0.7527062298816352,
"learning_rate": 9.890638925682339e-06,
"loss": 0.085,
"step": 734
},
{
"epoch": 0.3343949044585987,
"grad_norm": 0.8028637093759472,
"learning_rate": 9.89034142529858e-06,
"loss": 0.0866,
"step": 735
},
{
"epoch": 0.3348498635122839,
"grad_norm": 0.6620365400447171,
"learning_rate": 9.890043525298203e-06,
"loss": 0.053,
"step": 736
},
{
"epoch": 0.33530482256596905,
"grad_norm": 0.6606838089782118,
"learning_rate": 9.889745225705555e-06,
"loss": 0.0783,
"step": 737
},
{
"epoch": 0.33575978161965425,
"grad_norm": 0.6719238881234298,
"learning_rate": 9.889446526545007e-06,
"loss": 0.079,
"step": 738
},
{
"epoch": 0.3362147406733394,
"grad_norm": 0.7379881342173255,
"learning_rate": 9.88914742784097e-06,
"loss": 0.0848,
"step": 739
},
{
"epoch": 0.33666969972702454,
"grad_norm": 1.9725398231448836,
"learning_rate": 9.888847929617887e-06,
"loss": 0.1666,
"step": 740
},
{
"epoch": 0.33712465878070974,
"grad_norm": 0.7800667095330575,
"learning_rate": 9.888548031900226e-06,
"loss": 0.0779,
"step": 741
},
{
"epoch": 0.3375796178343949,
"grad_norm": 0.9725198572426639,
"learning_rate": 9.888247734712497e-06,
"loss": 0.0719,
"step": 742
},
{
"epoch": 0.3380345768880801,
"grad_norm": 0.9547104503470986,
"learning_rate": 9.887947038079238e-06,
"loss": 0.1119,
"step": 743
},
{
"epoch": 0.33848953594176523,
"grad_norm": 0.5879353672489683,
"learning_rate": 9.887645942025022e-06,
"loss": 0.0553,
"step": 744
},
{
"epoch": 0.33894449499545043,
"grad_norm": 0.5485885922626542,
"learning_rate": 9.887344446574452e-06,
"loss": 0.0494,
"step": 745
},
{
"epoch": 0.3393994540491356,
"grad_norm": 0.9640668269863656,
"learning_rate": 9.887042551752163e-06,
"loss": 0.1104,
"step": 746
},
{
"epoch": 0.3398544131028208,
"grad_norm": 0.8639463935480832,
"learning_rate": 9.886740257582827e-06,
"loss": 0.0655,
"step": 747
},
{
"epoch": 0.3403093721565059,
"grad_norm": 0.6489702107287116,
"learning_rate": 9.886437564091148e-06,
"loss": 0.0777,
"step": 748
},
{
"epoch": 0.34076433121019106,
"grad_norm": 0.8236523684362178,
"learning_rate": 9.886134471301854e-06,
"loss": 0.0916,
"step": 749
},
{
"epoch": 0.34121929026387626,
"grad_norm": 0.8459143900125461,
"learning_rate": 9.885830979239718e-06,
"loss": 0.1017,
"step": 750
},
{
"epoch": 0.3416742493175614,
"grad_norm": 0.7496065352262437,
"learning_rate": 9.885527087929541e-06,
"loss": 0.0861,
"step": 751
},
{
"epoch": 0.3421292083712466,
"grad_norm": 0.849292513666517,
"learning_rate": 9.88522279739615e-06,
"loss": 0.0839,
"step": 752
},
{
"epoch": 0.34258416742493175,
"grad_norm": 0.7756671663835698,
"learning_rate": 9.884918107664417e-06,
"loss": 0.0809,
"step": 753
},
{
"epoch": 0.34303912647861695,
"grad_norm": 0.7338987681003677,
"learning_rate": 9.884613018759234e-06,
"loss": 0.0721,
"step": 754
},
{
"epoch": 0.3434940855323021,
"grad_norm": 0.6003946948163056,
"learning_rate": 9.884307530705534e-06,
"loss": 0.0782,
"step": 755
},
{
"epoch": 0.34394904458598724,
"grad_norm": 0.5309561440373582,
"learning_rate": 9.88400164352828e-06,
"loss": 0.0563,
"step": 756
},
{
"epoch": 0.34440400363967244,
"grad_norm": 0.6551261739802692,
"learning_rate": 9.883695357252467e-06,
"loss": 0.061,
"step": 757
},
{
"epoch": 0.3448589626933576,
"grad_norm": 0.6598139820416582,
"learning_rate": 9.883388671903125e-06,
"loss": 0.084,
"step": 758
},
{
"epoch": 0.3453139217470428,
"grad_norm": 0.8678451615084499,
"learning_rate": 9.883081587505315e-06,
"loss": 0.0893,
"step": 759
},
{
"epoch": 0.34576888080072793,
"grad_norm": 0.8849976199871086,
"learning_rate": 9.882774104084127e-06,
"loss": 0.0938,
"step": 760
},
{
"epoch": 0.34622383985441313,
"grad_norm": 0.6157555054475868,
"learning_rate": 9.882466221664691e-06,
"loss": 0.0535,
"step": 761
},
{
"epoch": 0.3466787989080983,
"grad_norm": 0.9555128068667961,
"learning_rate": 9.882157940272165e-06,
"loss": 0.0984,
"step": 762
},
{
"epoch": 0.3471337579617834,
"grad_norm": 0.8431106213501941,
"learning_rate": 9.881849259931738e-06,
"loss": 0.1062,
"step": 763
},
{
"epoch": 0.3475887170154686,
"grad_norm": 0.6608166650909644,
"learning_rate": 9.881540180668637e-06,
"loss": 0.0589,
"step": 764
},
{
"epoch": 0.34804367606915376,
"grad_norm": 0.7177237690901401,
"learning_rate": 9.881230702508118e-06,
"loss": 0.0721,
"step": 765
},
{
"epoch": 0.34849863512283896,
"grad_norm": 0.49396541889218665,
"learning_rate": 9.880920825475468e-06,
"loss": 0.0582,
"step": 766
},
{
"epoch": 0.3489535941765241,
"grad_norm": 0.7008727540015932,
"learning_rate": 9.88061054959601e-06,
"loss": 0.0689,
"step": 767
},
{
"epoch": 0.3494085532302093,
"grad_norm": 0.6417543130209264,
"learning_rate": 9.880299874895098e-06,
"loss": 0.0859,
"step": 768
},
{
"epoch": 0.34986351228389445,
"grad_norm": 0.5325758158155319,
"learning_rate": 9.879988801398121e-06,
"loss": 0.0508,
"step": 769
},
{
"epoch": 0.3503184713375796,
"grad_norm": 0.653129374155715,
"learning_rate": 9.879677329130496e-06,
"loss": 0.0822,
"step": 770
},
{
"epoch": 0.3507734303912648,
"grad_norm": 0.6044703796770591,
"learning_rate": 9.879365458117678e-06,
"loss": 0.0662,
"step": 771
},
{
"epoch": 0.35122838944494994,
"grad_norm": 0.6417796330386928,
"learning_rate": 9.879053188385148e-06,
"loss": 0.0649,
"step": 772
},
{
"epoch": 0.35168334849863514,
"grad_norm": 0.6127493684308597,
"learning_rate": 9.878740519958425e-06,
"loss": 0.0601,
"step": 773
},
{
"epoch": 0.3521383075523203,
"grad_norm": 0.9092296350808027,
"learning_rate": 9.878427452863059e-06,
"loss": 0.1138,
"step": 774
},
{
"epoch": 0.3525932666060055,
"grad_norm": 0.8850379239223551,
"learning_rate": 9.878113987124633e-06,
"loss": 0.1135,
"step": 775
},
{
"epoch": 0.35304822565969063,
"grad_norm": 0.8106864823035035,
"learning_rate": 9.877800122768761e-06,
"loss": 0.084,
"step": 776
},
{
"epoch": 0.3535031847133758,
"grad_norm": 0.6717791100158048,
"learning_rate": 9.877485859821092e-06,
"loss": 0.0764,
"step": 777
},
{
"epoch": 0.353958143767061,
"grad_norm": 0.4266356830653338,
"learning_rate": 9.877171198307304e-06,
"loss": 0.0496,
"step": 778
},
{
"epoch": 0.3544131028207461,
"grad_norm": 0.7839112755574695,
"learning_rate": 9.87685613825311e-06,
"loss": 0.0864,
"step": 779
},
{
"epoch": 0.3548680618744313,
"grad_norm": 0.8928629316475961,
"learning_rate": 9.876540679684257e-06,
"loss": 0.0802,
"step": 780
},
{
"epoch": 0.35532302092811646,
"grad_norm": 0.7427060191976654,
"learning_rate": 9.876224822626522e-06,
"loss": 0.0809,
"step": 781
},
{
"epoch": 0.35577797998180166,
"grad_norm": 0.6618589317208607,
"learning_rate": 9.875908567105716e-06,
"loss": 0.0633,
"step": 782
},
{
"epoch": 0.3562329390354868,
"grad_norm": 0.9168643329932029,
"learning_rate": 9.87559191314768e-06,
"loss": 0.0977,
"step": 783
},
{
"epoch": 0.35668789808917195,
"grad_norm": 1.010661772545197,
"learning_rate": 9.87527486077829e-06,
"loss": 0.112,
"step": 784
},
{
"epoch": 0.35714285714285715,
"grad_norm": 0.7355960177801563,
"learning_rate": 9.874957410023458e-06,
"loss": 0.0578,
"step": 785
},
{
"epoch": 0.3575978161965423,
"grad_norm": 0.7012046376593928,
"learning_rate": 9.874639560909118e-06,
"loss": 0.0856,
"step": 786
},
{
"epoch": 0.3580527752502275,
"grad_norm": 0.629856671324697,
"learning_rate": 9.87432131346125e-06,
"loss": 0.079,
"step": 787
},
{
"epoch": 0.35850773430391264,
"grad_norm": 0.6605442679933491,
"learning_rate": 9.874002667705855e-06,
"loss": 0.0713,
"step": 788
},
{
"epoch": 0.35896269335759784,
"grad_norm": 0.6036439966816435,
"learning_rate": 9.873683623668972e-06,
"loss": 0.0734,
"step": 789
},
{
"epoch": 0.359417652411283,
"grad_norm": 0.9098464282834562,
"learning_rate": 9.873364181376674e-06,
"loss": 0.1273,
"step": 790
},
{
"epoch": 0.35987261146496813,
"grad_norm": 0.725232432410699,
"learning_rate": 9.873044340855062e-06,
"loss": 0.0704,
"step": 791
},
{
"epoch": 0.36032757051865333,
"grad_norm": 0.8275864687946802,
"learning_rate": 9.872724102130273e-06,
"loss": 0.0722,
"step": 792
},
{
"epoch": 0.3607825295723385,
"grad_norm": 0.6908762665090429,
"learning_rate": 9.872403465228476e-06,
"loss": 0.068,
"step": 793
},
{
"epoch": 0.3612374886260237,
"grad_norm": 0.8007479624540592,
"learning_rate": 9.872082430175871e-06,
"loss": 0.0792,
"step": 794
},
{
"epoch": 0.3616924476797088,
"grad_norm": 0.7580697654486878,
"learning_rate": 9.871760996998692e-06,
"loss": 0.0662,
"step": 795
},
{
"epoch": 0.362147406733394,
"grad_norm": 1.0378802589927232,
"learning_rate": 9.871439165723207e-06,
"loss": 0.0905,
"step": 796
},
{
"epoch": 0.36260236578707916,
"grad_norm": 0.9366156924362913,
"learning_rate": 9.87111693637571e-06,
"loss": 0.0966,
"step": 797
},
{
"epoch": 0.3630573248407643,
"grad_norm": 0.9568919919938076,
"learning_rate": 9.870794308982536e-06,
"loss": 0.1092,
"step": 798
},
{
"epoch": 0.3635122838944495,
"grad_norm": 1.0303944561108107,
"learning_rate": 9.870471283570046e-06,
"loss": 0.1214,
"step": 799
},
{
"epoch": 0.36396724294813465,
"grad_norm": 0.7123988620535131,
"learning_rate": 9.870147860164639e-06,
"loss": 0.0952,
"step": 800
},
{
"epoch": 0.36442220200181985,
"grad_norm": 0.6461145025804255,
"learning_rate": 9.86982403879274e-06,
"loss": 0.0653,
"step": 801
},
{
"epoch": 0.364877161055505,
"grad_norm": 0.761176238728339,
"learning_rate": 9.869499819480815e-06,
"loss": 0.0911,
"step": 802
},
{
"epoch": 0.3653321201091902,
"grad_norm": 0.6778284620896282,
"learning_rate": 9.869175202255354e-06,
"loss": 0.0726,
"step": 803
},
{
"epoch": 0.36578707916287534,
"grad_norm": 0.6378934869683002,
"learning_rate": 9.868850187142885e-06,
"loss": 0.0721,
"step": 804
},
{
"epoch": 0.3662420382165605,
"grad_norm": 0.725078464245391,
"learning_rate": 9.868524774169968e-06,
"loss": 0.0774,
"step": 805
},
{
"epoch": 0.3666969972702457,
"grad_norm": 0.7707907185217752,
"learning_rate": 9.86819896336319e-06,
"loss": 0.067,
"step": 806
},
{
"epoch": 0.36715195632393083,
"grad_norm": 0.8162851407409059,
"learning_rate": 9.867872754749178e-06,
"loss": 0.0908,
"step": 807
},
{
"epoch": 0.36760691537761603,
"grad_norm": 0.5330499489332517,
"learning_rate": 9.867546148354586e-06,
"loss": 0.066,
"step": 808
},
{
"epoch": 0.3680618744313012,
"grad_norm": 0.6649993383235931,
"learning_rate": 9.867219144206105e-06,
"loss": 0.0672,
"step": 809
},
{
"epoch": 0.3685168334849864,
"grad_norm": 0.9824606570699352,
"learning_rate": 9.866891742330458e-06,
"loss": 0.11,
"step": 810
},
{
"epoch": 0.3689717925386715,
"grad_norm": 0.6507791006697302,
"learning_rate": 9.866563942754394e-06,
"loss": 0.0622,
"step": 811
},
{
"epoch": 0.36942675159235666,
"grad_norm": 0.7455907568930894,
"learning_rate": 9.866235745504705e-06,
"loss": 0.0833,
"step": 812
},
{
"epoch": 0.36988171064604186,
"grad_norm": 0.9927293122267482,
"learning_rate": 9.865907150608203e-06,
"loss": 0.0978,
"step": 813
},
{
"epoch": 0.370336669699727,
"grad_norm": 0.817279180213694,
"learning_rate": 9.865578158091746e-06,
"loss": 0.1036,
"step": 814
},
{
"epoch": 0.3707916287534122,
"grad_norm": 0.9966504261459711,
"learning_rate": 9.865248767982211e-06,
"loss": 0.1027,
"step": 815
},
{
"epoch": 0.37124658780709735,
"grad_norm": 0.9561727776097537,
"learning_rate": 9.864918980306521e-06,
"loss": 0.1136,
"step": 816
},
{
"epoch": 0.37170154686078255,
"grad_norm": 0.6718095123705313,
"learning_rate": 9.86458879509162e-06,
"loss": 0.0762,
"step": 817
},
{
"epoch": 0.3721565059144677,
"grad_norm": 0.9803345299998187,
"learning_rate": 9.864258212364492e-06,
"loss": 0.0791,
"step": 818
},
{
"epoch": 0.37261146496815284,
"grad_norm": 0.8058679812037255,
"learning_rate": 9.86392723215215e-06,
"loss": 0.069,
"step": 819
},
{
"epoch": 0.37306642402183804,
"grad_norm": 0.5836160590759203,
"learning_rate": 9.86359585448164e-06,
"loss": 0.0621,
"step": 820
},
{
"epoch": 0.3735213830755232,
"grad_norm": 0.6511599091669776,
"learning_rate": 9.863264079380039e-06,
"loss": 0.0745,
"step": 821
},
{
"epoch": 0.3739763421292084,
"grad_norm": 0.9308266206126162,
"learning_rate": 9.862931906874461e-06,
"loss": 0.1132,
"step": 822
},
{
"epoch": 0.37443130118289353,
"grad_norm": 0.613775373571284,
"learning_rate": 9.862599336992048e-06,
"loss": 0.0545,
"step": 823
},
{
"epoch": 0.37488626023657873,
"grad_norm": 0.6991388893487894,
"learning_rate": 9.862266369759976e-06,
"loss": 0.0754,
"step": 824
},
{
"epoch": 0.37534121929026387,
"grad_norm": 0.6352968005261165,
"learning_rate": 9.861933005205454e-06,
"loss": 0.0576,
"step": 825
},
{
"epoch": 0.37579617834394907,
"grad_norm": 1.109194467922723,
"learning_rate": 9.861599243355725e-06,
"loss": 0.1281,
"step": 826
},
{
"epoch": 0.3762511373976342,
"grad_norm": 0.9742134289860664,
"learning_rate": 9.86126508423806e-06,
"loss": 0.1067,
"step": 827
},
{
"epoch": 0.37670609645131936,
"grad_norm": 0.6015820455914206,
"learning_rate": 9.860930527879763e-06,
"loss": 0.055,
"step": 828
},
{
"epoch": 0.37716105550500456,
"grad_norm": 1.0894948091440197,
"learning_rate": 9.860595574308179e-06,
"loss": 0.1147,
"step": 829
},
{
"epoch": 0.3776160145586897,
"grad_norm": 0.7023892750192133,
"learning_rate": 9.860260223550672e-06,
"loss": 0.0815,
"step": 830
},
{
"epoch": 0.3780709736123749,
"grad_norm": 0.4943868719085533,
"learning_rate": 9.859924475634649e-06,
"loss": 0.0476,
"step": 831
},
{
"epoch": 0.37852593266606005,
"grad_norm": 0.9974648765413693,
"learning_rate": 9.859588330587545e-06,
"loss": 0.1068,
"step": 832
},
{
"epoch": 0.37898089171974525,
"grad_norm": 0.5960289391531881,
"learning_rate": 9.859251788436829e-06,
"loss": 0.0715,
"step": 833
},
{
"epoch": 0.3794358507734304,
"grad_norm": 0.907079582974149,
"learning_rate": 9.85891484921e-06,
"loss": 0.0905,
"step": 834
},
{
"epoch": 0.37989080982711554,
"grad_norm": 0.8133034306250352,
"learning_rate": 9.858577512934592e-06,
"loss": 0.1012,
"step": 835
},
{
"epoch": 0.38034576888080074,
"grad_norm": 0.7828785203637737,
"learning_rate": 9.858239779638173e-06,
"loss": 0.0726,
"step": 836
},
{
"epoch": 0.3808007279344859,
"grad_norm": 1.3138864597148558,
"learning_rate": 9.857901649348338e-06,
"loss": 0.1307,
"step": 837
},
{
"epoch": 0.3812556869881711,
"grad_norm": 0.7000750227265026,
"learning_rate": 9.857563122092717e-06,
"loss": 0.0777,
"step": 838
},
{
"epoch": 0.3817106460418562,
"grad_norm": 0.757283984575844,
"learning_rate": 9.857224197898975e-06,
"loss": 0.083,
"step": 839
},
{
"epoch": 0.3821656050955414,
"grad_norm": 0.7113754486134378,
"learning_rate": 9.856884876794805e-06,
"loss": 0.0795,
"step": 840
},
{
"epoch": 0.38262056414922657,
"grad_norm": 0.6891370217065743,
"learning_rate": 9.856545158807938e-06,
"loss": 0.0576,
"step": 841
},
{
"epoch": 0.3830755232029117,
"grad_norm": 0.7230826558764609,
"learning_rate": 9.856205043966134e-06,
"loss": 0.0973,
"step": 842
},
{
"epoch": 0.3835304822565969,
"grad_norm": 0.9951638416419379,
"learning_rate": 9.855864532297181e-06,
"loss": 0.1225,
"step": 843
},
{
"epoch": 0.38398544131028206,
"grad_norm": 0.8272776971451865,
"learning_rate": 9.85552362382891e-06,
"loss": 0.0928,
"step": 844
},
{
"epoch": 0.38444040036396726,
"grad_norm": 0.662562460388915,
"learning_rate": 9.855182318589174e-06,
"loss": 0.0711,
"step": 845
},
{
"epoch": 0.3848953594176524,
"grad_norm": 1.185659176011977,
"learning_rate": 9.854840616605866e-06,
"loss": 0.0922,
"step": 846
},
{
"epoch": 0.3853503184713376,
"grad_norm": 0.7002426118833048,
"learning_rate": 9.854498517906908e-06,
"loss": 0.0828,
"step": 847
},
{
"epoch": 0.38580527752502275,
"grad_norm": 0.8957633348930525,
"learning_rate": 9.854156022520252e-06,
"loss": 0.0809,
"step": 848
},
{
"epoch": 0.3862602365787079,
"grad_norm": 1.0593251614278854,
"learning_rate": 9.853813130473887e-06,
"loss": 0.1109,
"step": 849
},
{
"epoch": 0.3867151956323931,
"grad_norm": 0.7751748709357449,
"learning_rate": 9.853469841795832e-06,
"loss": 0.0823,
"step": 850
},
{
"epoch": 0.38717015468607824,
"grad_norm": 0.5943868690351954,
"learning_rate": 9.853126156514142e-06,
"loss": 0.0758,
"step": 851
},
{
"epoch": 0.38762511373976344,
"grad_norm": 0.4901349757557767,
"learning_rate": 9.852782074656897e-06,
"loss": 0.064,
"step": 852
},
{
"epoch": 0.3880800727934486,
"grad_norm": 0.7531191508768753,
"learning_rate": 9.852437596252216e-06,
"loss": 0.0824,
"step": 853
},
{
"epoch": 0.3885350318471338,
"grad_norm": 0.7684236261792305,
"learning_rate": 9.852092721328248e-06,
"loss": 0.0674,
"step": 854
},
{
"epoch": 0.3889899909008189,
"grad_norm": 0.8624513661560378,
"learning_rate": 9.851747449913176e-06,
"loss": 0.09,
"step": 855
},
{
"epoch": 0.38944494995450407,
"grad_norm": 0.9125725996183891,
"learning_rate": 9.851401782035213e-06,
"loss": 0.129,
"step": 856
},
{
"epoch": 0.38989990900818927,
"grad_norm": 0.7630714638300728,
"learning_rate": 9.851055717722604e-06,
"loss": 0.068,
"step": 857
},
{
"epoch": 0.3903548680618744,
"grad_norm": 0.834756070401477,
"learning_rate": 9.850709257003628e-06,
"loss": 0.0831,
"step": 858
},
{
"epoch": 0.3908098271155596,
"grad_norm": 0.9864776662717517,
"learning_rate": 9.850362399906598e-06,
"loss": 0.0904,
"step": 859
},
{
"epoch": 0.39126478616924476,
"grad_norm": 0.6242730295284743,
"learning_rate": 9.850015146459857e-06,
"loss": 0.0754,
"step": 860
},
{
"epoch": 0.39171974522292996,
"grad_norm": 0.838271649072902,
"learning_rate": 9.84966749669178e-06,
"loss": 0.0899,
"step": 861
},
{
"epoch": 0.3921747042766151,
"grad_norm": 0.6826448278617049,
"learning_rate": 9.849319450630777e-06,
"loss": 0.0698,
"step": 862
},
{
"epoch": 0.39262966333030025,
"grad_norm": 0.5533993282250775,
"learning_rate": 9.848971008305288e-06,
"loss": 0.0688,
"step": 863
},
{
"epoch": 0.39308462238398545,
"grad_norm": 0.838673412156409,
"learning_rate": 9.848622169743784e-06,
"loss": 0.0815,
"step": 864
},
{
"epoch": 0.3935395814376706,
"grad_norm": 0.9783580500729582,
"learning_rate": 9.848272934974774e-06,
"loss": 0.0745,
"step": 865
},
{
"epoch": 0.3939945404913558,
"grad_norm": 0.5976030953641746,
"learning_rate": 9.847923304026793e-06,
"loss": 0.0664,
"step": 866
},
{
"epoch": 0.39444949954504094,
"grad_norm": 0.6999143793652887,
"learning_rate": 9.847573276928415e-06,
"loss": 0.0804,
"step": 867
},
{
"epoch": 0.39490445859872614,
"grad_norm": 0.6338725165728231,
"learning_rate": 9.847222853708239e-06,
"loss": 0.0655,
"step": 868
},
{
"epoch": 0.3953594176524113,
"grad_norm": 0.7010627446349382,
"learning_rate": 9.846872034394902e-06,
"loss": 0.0667,
"step": 869
},
{
"epoch": 0.3958143767060964,
"grad_norm": 0.6173227181881447,
"learning_rate": 9.84652081901707e-06,
"loss": 0.0674,
"step": 870
},
{
"epoch": 0.3962693357597816,
"grad_norm": 0.9673042020268607,
"learning_rate": 9.846169207603443e-06,
"loss": 0.1267,
"step": 871
},
{
"epoch": 0.39672429481346677,
"grad_norm": 0.6294912489479282,
"learning_rate": 9.845817200182755e-06,
"loss": 0.0588,
"step": 872
},
{
"epoch": 0.39717925386715197,
"grad_norm": 0.8477152807126976,
"learning_rate": 9.845464796783767e-06,
"loss": 0.1219,
"step": 873
},
{
"epoch": 0.3976342129208371,
"grad_norm": 0.5887483684825674,
"learning_rate": 9.845111997435279e-06,
"loss": 0.0731,
"step": 874
},
{
"epoch": 0.3980891719745223,
"grad_norm": 0.5630369277247907,
"learning_rate": 9.844758802166116e-06,
"loss": 0.0579,
"step": 875
},
{
"epoch": 0.39854413102820746,
"grad_norm": 0.6717541815357567,
"learning_rate": 9.844405211005145e-06,
"loss": 0.0711,
"step": 876
},
{
"epoch": 0.3989990900818926,
"grad_norm": 0.6571828619535791,
"learning_rate": 9.844051223981258e-06,
"loss": 0.0638,
"step": 877
},
{
"epoch": 0.3994540491355778,
"grad_norm": 0.6723710552364174,
"learning_rate": 9.84369684112338e-06,
"loss": 0.0676,
"step": 878
},
{
"epoch": 0.39990900818926295,
"grad_norm": 0.7014173744195523,
"learning_rate": 9.84334206246047e-06,
"loss": 0.0751,
"step": 879
},
{
"epoch": 0.40036396724294815,
"grad_norm": 0.7999660318519703,
"learning_rate": 9.842986888021518e-06,
"loss": 0.0895,
"step": 880
},
{
"epoch": 0.4008189262966333,
"grad_norm": 0.5578605501955606,
"learning_rate": 9.842631317835548e-06,
"loss": 0.0637,
"step": 881
},
{
"epoch": 0.4012738853503185,
"grad_norm": 0.6615256090849237,
"learning_rate": 9.842275351931617e-06,
"loss": 0.0664,
"step": 882
},
{
"epoch": 0.40172884440400364,
"grad_norm": 0.5263094198672195,
"learning_rate": 9.841918990338812e-06,
"loss": 0.0611,
"step": 883
},
{
"epoch": 0.4021838034576888,
"grad_norm": 0.8080883575450535,
"learning_rate": 9.841562233086252e-06,
"loss": 0.0912,
"step": 884
},
{
"epoch": 0.402638762511374,
"grad_norm": 0.6655757939327012,
"learning_rate": 9.841205080203092e-06,
"loss": 0.0601,
"step": 885
},
{
"epoch": 0.4030937215650591,
"grad_norm": 0.8701903481119097,
"learning_rate": 9.840847531718515e-06,
"loss": 0.0914,
"step": 886
},
{
"epoch": 0.4035486806187443,
"grad_norm": 0.7730206436987713,
"learning_rate": 9.840489587661738e-06,
"loss": 0.0747,
"step": 887
},
{
"epoch": 0.40400363967242947,
"grad_norm": 0.7410839527981146,
"learning_rate": 9.840131248062012e-06,
"loss": 0.079,
"step": 888
},
{
"epoch": 0.40445859872611467,
"grad_norm": 0.627620281196765,
"learning_rate": 9.839772512948618e-06,
"loss": 0.0715,
"step": 889
},
{
"epoch": 0.4049135577797998,
"grad_norm": 0.8746014124114054,
"learning_rate": 9.83941338235087e-06,
"loss": 0.0824,
"step": 890
},
{
"epoch": 0.40536851683348496,
"grad_norm": 1.0112737589697485,
"learning_rate": 9.839053856298116e-06,
"loss": 0.1251,
"step": 891
},
{
"epoch": 0.40582347588717016,
"grad_norm": 0.72216805525771,
"learning_rate": 9.838693934819734e-06,
"loss": 0.0893,
"step": 892
},
{
"epoch": 0.4062784349408553,
"grad_norm": 0.7544949830136005,
"learning_rate": 9.838333617945134e-06,
"loss": 0.0968,
"step": 893
},
{
"epoch": 0.4067333939945405,
"grad_norm": 0.9543024355165705,
"learning_rate": 9.837972905703762e-06,
"loss": 0.102,
"step": 894
},
{
"epoch": 0.40718835304822565,
"grad_norm": 1.02061795078975,
"learning_rate": 9.83761179812509e-06,
"loss": 0.0649,
"step": 895
},
{
"epoch": 0.40764331210191085,
"grad_norm": 0.39738812842187227,
"learning_rate": 9.837250295238629e-06,
"loss": 0.0428,
"step": 896
},
{
"epoch": 0.408098271155596,
"grad_norm": 0.8873895570319217,
"learning_rate": 9.836888397073919e-06,
"loss": 0.1068,
"step": 897
},
{
"epoch": 0.40855323020928114,
"grad_norm": 0.7492126364897504,
"learning_rate": 9.836526103660533e-06,
"loss": 0.0953,
"step": 898
},
{
"epoch": 0.40900818926296634,
"grad_norm": 0.821575499525911,
"learning_rate": 9.836163415028075e-06,
"loss": 0.0712,
"step": 899
},
{
"epoch": 0.4094631483166515,
"grad_norm": 1.0052579979241618,
"learning_rate": 9.835800331206183e-06,
"loss": 0.1138,
"step": 900
},
{
"epoch": 0.4099181073703367,
"grad_norm": 0.7848465428804848,
"learning_rate": 9.835436852224525e-06,
"loss": 0.0978,
"step": 901
},
{
"epoch": 0.4103730664240218,
"grad_norm": 0.9719856735481065,
"learning_rate": 9.835072978112804e-06,
"loss": 0.0846,
"step": 902
},
{
"epoch": 0.410828025477707,
"grad_norm": 0.6607308818506346,
"learning_rate": 9.834708708900755e-06,
"loss": 0.0654,
"step": 903
},
{
"epoch": 0.41128298453139217,
"grad_norm": 0.5191597312034261,
"learning_rate": 9.834344044618144e-06,
"loss": 0.0518,
"step": 904
},
{
"epoch": 0.41173794358507737,
"grad_norm": 0.5336391872354229,
"learning_rate": 9.83397898529477e-06,
"loss": 0.0535,
"step": 905
},
{
"epoch": 0.4121929026387625,
"grad_norm": 0.5687342550017563,
"learning_rate": 9.833613530960462e-06,
"loss": 0.0578,
"step": 906
},
{
"epoch": 0.41264786169244766,
"grad_norm": 0.8793783198642894,
"learning_rate": 9.833247681645083e-06,
"loss": 0.1286,
"step": 907
},
{
"epoch": 0.41310282074613286,
"grad_norm": 0.8073005899800644,
"learning_rate": 9.832881437378534e-06,
"loss": 0.0853,
"step": 908
},
{
"epoch": 0.413557779799818,
"grad_norm": 0.511699500000588,
"learning_rate": 9.832514798190738e-06,
"loss": 0.0504,
"step": 909
},
{
"epoch": 0.4140127388535032,
"grad_norm": 0.5082793074725768,
"learning_rate": 9.832147764111655e-06,
"loss": 0.056,
"step": 910
},
{
"epoch": 0.41446769790718835,
"grad_norm": 0.9876041013395295,
"learning_rate": 9.83178033517128e-06,
"loss": 0.0984,
"step": 911
},
{
"epoch": 0.41492265696087355,
"grad_norm": 0.7511273129930924,
"learning_rate": 9.831412511399633e-06,
"loss": 0.0969,
"step": 912
},
{
"epoch": 0.4153776160145587,
"grad_norm": 1.0144870263760433,
"learning_rate": 9.831044292826778e-06,
"loss": 0.1482,
"step": 913
},
{
"epoch": 0.41583257506824384,
"grad_norm": 0.70444400073401,
"learning_rate": 9.830675679482797e-06,
"loss": 0.0802,
"step": 914
},
{
"epoch": 0.41628753412192904,
"grad_norm": 1.0357251397748677,
"learning_rate": 9.830306671397816e-06,
"loss": 0.1061,
"step": 915
},
{
"epoch": 0.4167424931756142,
"grad_norm": 0.895894802940119,
"learning_rate": 9.829937268601988e-06,
"loss": 0.1005,
"step": 916
},
{
"epoch": 0.4171974522292994,
"grad_norm": 0.6004589977630954,
"learning_rate": 9.829567471125497e-06,
"loss": 0.0664,
"step": 917
},
{
"epoch": 0.4176524112829845,
"grad_norm": 0.6058859475834909,
"learning_rate": 9.829197278998562e-06,
"loss": 0.0728,
"step": 918
},
{
"epoch": 0.4181073703366697,
"grad_norm": 0.5886912548442098,
"learning_rate": 9.828826692251435e-06,
"loss": 0.074,
"step": 919
},
{
"epoch": 0.41856232939035487,
"grad_norm": 0.5982473215332103,
"learning_rate": 9.828455710914398e-06,
"loss": 0.0653,
"step": 920
},
{
"epoch": 0.41901728844404,
"grad_norm": 0.8647804622811079,
"learning_rate": 9.828084335017763e-06,
"loss": 0.0741,
"step": 921
},
{
"epoch": 0.4194722474977252,
"grad_norm": 0.653767178815679,
"learning_rate": 9.827712564591883e-06,
"loss": 0.0604,
"step": 922
},
{
"epoch": 0.41992720655141036,
"grad_norm": 0.7812500085225947,
"learning_rate": 9.827340399667132e-06,
"loss": 0.0708,
"step": 923
},
{
"epoch": 0.42038216560509556,
"grad_norm": 0.7314008563711142,
"learning_rate": 9.826967840273921e-06,
"loss": 0.0721,
"step": 924
},
{
"epoch": 0.4208371246587807,
"grad_norm": 0.8727413076803472,
"learning_rate": 9.8265948864427e-06,
"loss": 0.0892,
"step": 925
},
{
"epoch": 0.4212920837124659,
"grad_norm": 0.6051379056710864,
"learning_rate": 9.826221538203942e-06,
"loss": 0.0685,
"step": 926
},
{
"epoch": 0.42174704276615105,
"grad_norm": 0.7279887191787228,
"learning_rate": 9.825847795588154e-06,
"loss": 0.0766,
"step": 927
},
{
"epoch": 0.4222020018198362,
"grad_norm": 0.7126811268305303,
"learning_rate": 9.825473658625876e-06,
"loss": 0.0821,
"step": 928
},
{
"epoch": 0.4226569608735214,
"grad_norm": 0.8812960827967533,
"learning_rate": 9.825099127347684e-06,
"loss": 0.0982,
"step": 929
},
{
"epoch": 0.42311191992720654,
"grad_norm": 0.7462955906438729,
"learning_rate": 9.824724201784182e-06,
"loss": 0.1073,
"step": 930
},
{
"epoch": 0.42356687898089174,
"grad_norm": 0.5448066050338419,
"learning_rate": 9.824348881966004e-06,
"loss": 0.0637,
"step": 931
},
{
"epoch": 0.4240218380345769,
"grad_norm": 0.7750150802923693,
"learning_rate": 9.823973167923823e-06,
"loss": 0.09,
"step": 932
},
{
"epoch": 0.4244767970882621,
"grad_norm": 0.8695175796556455,
"learning_rate": 9.82359705968834e-06,
"loss": 0.0857,
"step": 933
},
{
"epoch": 0.4249317561419472,
"grad_norm": 0.653112477618241,
"learning_rate": 9.823220557290289e-06,
"loss": 0.0722,
"step": 934
},
{
"epoch": 0.42538671519563237,
"grad_norm": 0.7764742726938813,
"learning_rate": 9.822843660760434e-06,
"loss": 0.0582,
"step": 935
},
{
"epoch": 0.42584167424931757,
"grad_norm": 0.8338160462571067,
"learning_rate": 9.822466370129576e-06,
"loss": 0.0993,
"step": 936
},
{
"epoch": 0.4262966333030027,
"grad_norm": 0.7416650975880095,
"learning_rate": 9.822088685428543e-06,
"loss": 0.0782,
"step": 937
},
{
"epoch": 0.4267515923566879,
"grad_norm": 0.5969422348364739,
"learning_rate": 9.821710606688199e-06,
"loss": 0.0546,
"step": 938
},
{
"epoch": 0.42720655141037306,
"grad_norm": 0.6235404067325917,
"learning_rate": 9.82133213393944e-06,
"loss": 0.0638,
"step": 939
},
{
"epoch": 0.42766151046405826,
"grad_norm": 0.7910461101358781,
"learning_rate": 9.820953267213194e-06,
"loss": 0.0775,
"step": 940
},
{
"epoch": 0.4281164695177434,
"grad_norm": 0.692978452923811,
"learning_rate": 9.820574006540415e-06,
"loss": 0.053,
"step": 941
},
{
"epoch": 0.42857142857142855,
"grad_norm": 0.7310389759017597,
"learning_rate": 9.820194351952098e-06,
"loss": 0.0716,
"step": 942
},
{
"epoch": 0.42902638762511375,
"grad_norm": 0.6553331509390902,
"learning_rate": 9.819814303479268e-06,
"loss": 0.0612,
"step": 943
},
{
"epoch": 0.4294813466787989,
"grad_norm": 1.1310076957610966,
"learning_rate": 9.819433861152978e-06,
"loss": 0.1116,
"step": 944
},
{
"epoch": 0.4299363057324841,
"grad_norm": 0.6933766894953944,
"learning_rate": 9.819053025004316e-06,
"loss": 0.0932,
"step": 945
},
{
"epoch": 0.43039126478616924,
"grad_norm": 0.7823571557493696,
"learning_rate": 9.818671795064405e-06,
"loss": 0.0847,
"step": 946
},
{
"epoch": 0.43084622383985444,
"grad_norm": 0.8000794358590197,
"learning_rate": 9.818290171364396e-06,
"loss": 0.0916,
"step": 947
},
{
"epoch": 0.4313011828935396,
"grad_norm": 0.6207042654318157,
"learning_rate": 9.817908153935473e-06,
"loss": 0.0568,
"step": 948
},
{
"epoch": 0.4317561419472247,
"grad_norm": 0.7957970680354334,
"learning_rate": 9.817525742808854e-06,
"loss": 0.1203,
"step": 949
},
{
"epoch": 0.4322111010009099,
"grad_norm": 0.6607960765057979,
"learning_rate": 9.817142938015786e-06,
"loss": 0.069,
"step": 950
},
{
"epoch": 0.43266606005459507,
"grad_norm": 0.8132102265727185,
"learning_rate": 9.816759739587552e-06,
"loss": 0.0821,
"step": 951
},
{
"epoch": 0.43312101910828027,
"grad_norm": 0.6410149691778323,
"learning_rate": 9.816376147555464e-06,
"loss": 0.0612,
"step": 952
},
{
"epoch": 0.4335759781619654,
"grad_norm": 1.0196998859089288,
"learning_rate": 9.815992161950867e-06,
"loss": 0.1183,
"step": 953
},
{
"epoch": 0.4340309372156506,
"grad_norm": 0.5899375116434804,
"learning_rate": 9.81560778280514e-06,
"loss": 0.0604,
"step": 954
},
{
"epoch": 0.43448589626933576,
"grad_norm": 1.0046158107797931,
"learning_rate": 9.815223010149693e-06,
"loss": 0.0876,
"step": 955
},
{
"epoch": 0.4349408553230209,
"grad_norm": 0.7980339738331416,
"learning_rate": 9.814837844015966e-06,
"loss": 0.0894,
"step": 956
},
{
"epoch": 0.4353958143767061,
"grad_norm": 0.6974524248281853,
"learning_rate": 9.814452284435433e-06,
"loss": 0.0741,
"step": 957
},
{
"epoch": 0.43585077343039125,
"grad_norm": 0.7679692797858835,
"learning_rate": 9.814066331439603e-06,
"loss": 0.0796,
"step": 958
},
{
"epoch": 0.43630573248407645,
"grad_norm": 0.8183774417740679,
"learning_rate": 9.813679985060012e-06,
"loss": 0.0963,
"step": 959
},
{
"epoch": 0.4367606915377616,
"grad_norm": 0.7950656053104391,
"learning_rate": 9.81329324532823e-06,
"loss": 0.0837,
"step": 960
},
{
"epoch": 0.4372156505914468,
"grad_norm": 0.6056809369995887,
"learning_rate": 9.812906112275862e-06,
"loss": 0.0465,
"step": 961
},
{
"epoch": 0.43767060964513194,
"grad_norm": 1.0980359635620318,
"learning_rate": 9.81251858593454e-06,
"loss": 0.1206,
"step": 962
},
{
"epoch": 0.4381255686988171,
"grad_norm": 0.6123483237764059,
"learning_rate": 9.812130666335933e-06,
"loss": 0.08,
"step": 963
},
{
"epoch": 0.4385805277525023,
"grad_norm": 0.8151730014839008,
"learning_rate": 9.81174235351174e-06,
"loss": 0.0983,
"step": 964
},
{
"epoch": 0.4390354868061874,
"grad_norm": 0.7143828681073273,
"learning_rate": 9.811353647493691e-06,
"loss": 0.0809,
"step": 965
},
{
"epoch": 0.4394904458598726,
"grad_norm": 0.5647036962239634,
"learning_rate": 9.810964548313549e-06,
"loss": 0.0581,
"step": 966
},
{
"epoch": 0.43994540491355777,
"grad_norm": 0.7594400506736699,
"learning_rate": 9.81057505600311e-06,
"loss": 0.078,
"step": 967
},
{
"epoch": 0.44040036396724297,
"grad_norm": 0.6515426202345832,
"learning_rate": 9.810185170594205e-06,
"loss": 0.0688,
"step": 968
},
{
"epoch": 0.4408553230209281,
"grad_norm": 0.8798906332352223,
"learning_rate": 9.809794892118687e-06,
"loss": 0.0915,
"step": 969
},
{
"epoch": 0.44131028207461326,
"grad_norm": 0.7350866900672135,
"learning_rate": 9.809404220608451e-06,
"loss": 0.0671,
"step": 970
},
{
"epoch": 0.44176524112829846,
"grad_norm": 0.7216847217866104,
"learning_rate": 9.809013156095424e-06,
"loss": 0.0726,
"step": 971
},
{
"epoch": 0.4422202001819836,
"grad_norm": 0.8179702740752783,
"learning_rate": 9.808621698611557e-06,
"loss": 0.0758,
"step": 972
},
{
"epoch": 0.4426751592356688,
"grad_norm": 0.5533105745807706,
"learning_rate": 9.808229848188842e-06,
"loss": 0.0528,
"step": 973
},
{
"epoch": 0.44313011828935395,
"grad_norm": 0.7503486538749657,
"learning_rate": 9.807837604859296e-06,
"loss": 0.0878,
"step": 974
},
{
"epoch": 0.44358507734303915,
"grad_norm": 0.40510949005498975,
"learning_rate": 9.807444968654975e-06,
"loss": 0.0424,
"step": 975
},
{
"epoch": 0.4440400363967243,
"grad_norm": 0.8540666353042626,
"learning_rate": 9.807051939607959e-06,
"loss": 0.1108,
"step": 976
},
{
"epoch": 0.44449499545040944,
"grad_norm": 0.7543284179304937,
"learning_rate": 9.806658517750369e-06,
"loss": 0.0719,
"step": 977
},
{
"epoch": 0.44494995450409464,
"grad_norm": 0.6982493359241757,
"learning_rate": 9.80626470311435e-06,
"loss": 0.0777,
"step": 978
},
{
"epoch": 0.4454049135577798,
"grad_norm": 0.7275511253894157,
"learning_rate": 9.805870495732085e-06,
"loss": 0.0693,
"step": 979
},
{
"epoch": 0.445859872611465,
"grad_norm": 0.8647890459895436,
"learning_rate": 9.805475895635787e-06,
"loss": 0.0882,
"step": 980
},
{
"epoch": 0.4463148316651501,
"grad_norm": 0.757804762973183,
"learning_rate": 9.8050809028577e-06,
"loss": 0.0724,
"step": 981
},
{
"epoch": 0.4467697907188353,
"grad_norm": 0.7515219153063712,
"learning_rate": 9.8046855174301e-06,
"loss": 0.0659,
"step": 982
},
{
"epoch": 0.44722474977252047,
"grad_norm": 1.0502681583017184,
"learning_rate": 9.804289739385297e-06,
"loss": 0.1207,
"step": 983
},
{
"epoch": 0.44767970882620567,
"grad_norm": 0.5780062486364612,
"learning_rate": 9.803893568755633e-06,
"loss": 0.0772,
"step": 984
},
{
"epoch": 0.4481346678798908,
"grad_norm": 0.5515644567052078,
"learning_rate": 9.80349700557348e-06,
"loss": 0.0628,
"step": 985
},
{
"epoch": 0.44858962693357596,
"grad_norm": 0.6432677095504179,
"learning_rate": 9.803100049871246e-06,
"loss": 0.0817,
"step": 986
},
{
"epoch": 0.44904458598726116,
"grad_norm": 0.5424958391196154,
"learning_rate": 9.802702701681366e-06,
"loss": 0.0649,
"step": 987
},
{
"epoch": 0.4494995450409463,
"grad_norm": 0.6556126282036931,
"learning_rate": 9.80230496103631e-06,
"loss": 0.0579,
"step": 988
},
{
"epoch": 0.4499545040946315,
"grad_norm": 0.5632646083130022,
"learning_rate": 9.801906827968578e-06,
"loss": 0.0591,
"step": 989
},
{
"epoch": 0.45040946314831665,
"grad_norm": 1.0464719217252296,
"learning_rate": 9.801508302510707e-06,
"loss": 0.124,
"step": 990
},
{
"epoch": 0.45086442220200185,
"grad_norm": 0.7231067459050019,
"learning_rate": 9.801109384695261e-06,
"loss": 0.0631,
"step": 991
},
{
"epoch": 0.451319381255687,
"grad_norm": 0.775594128230074,
"learning_rate": 9.800710074554837e-06,
"loss": 0.0924,
"step": 992
},
{
"epoch": 0.45177434030937214,
"grad_norm": 0.6340180385643369,
"learning_rate": 9.800310372122066e-06,
"loss": 0.068,
"step": 993
},
{
"epoch": 0.45222929936305734,
"grad_norm": 0.9703750136380557,
"learning_rate": 9.799910277429609e-06,
"loss": 0.0902,
"step": 994
},
{
"epoch": 0.4526842584167425,
"grad_norm": 0.5881925827197537,
"learning_rate": 9.79950979051016e-06,
"loss": 0.0662,
"step": 995
},
{
"epoch": 0.4531392174704277,
"grad_norm": 0.7583235380843109,
"learning_rate": 9.799108911396446e-06,
"loss": 0.0755,
"step": 996
},
{
"epoch": 0.4535941765241128,
"grad_norm": 0.6585135755735663,
"learning_rate": 9.798707640121224e-06,
"loss": 0.0669,
"step": 997
},
{
"epoch": 0.454049135577798,
"grad_norm": 0.9344579240939844,
"learning_rate": 9.798305976717286e-06,
"loss": 0.1028,
"step": 998
},
{
"epoch": 0.45450409463148317,
"grad_norm": 0.6238360425747993,
"learning_rate": 9.79790392121745e-06,
"loss": 0.0608,
"step": 999
},
{
"epoch": 0.4549590536851683,
"grad_norm": 0.715680092291253,
"learning_rate": 9.797501473654573e-06,
"loss": 0.0792,
"step": 1000
},
{
"epoch": 0.4554140127388535,
"grad_norm": 0.8167758856821831,
"learning_rate": 9.797098634061543e-06,
"loss": 0.0948,
"step": 1001
},
{
"epoch": 0.45586897179253866,
"grad_norm": 0.8318764431867516,
"learning_rate": 9.796695402471275e-06,
"loss": 0.0967,
"step": 1002
},
{
"epoch": 0.45632393084622386,
"grad_norm": 0.9700547030363569,
"learning_rate": 9.79629177891672e-06,
"loss": 0.1138,
"step": 1003
},
{
"epoch": 0.456778889899909,
"grad_norm": 0.7702596501705347,
"learning_rate": 9.79588776343086e-06,
"loss": 0.0826,
"step": 1004
},
{
"epoch": 0.4572338489535942,
"grad_norm": 0.833778163717652,
"learning_rate": 9.795483356046711e-06,
"loss": 0.0927,
"step": 1005
},
{
"epoch": 0.45768880800727935,
"grad_norm": 0.7006737675801851,
"learning_rate": 9.795078556797318e-06,
"loss": 0.0747,
"step": 1006
},
{
"epoch": 0.4581437670609645,
"grad_norm": 0.8810114143185821,
"learning_rate": 9.794673365715761e-06,
"loss": 0.0921,
"step": 1007
},
{
"epoch": 0.4585987261146497,
"grad_norm": 0.7286145380478113,
"learning_rate": 9.794267782835148e-06,
"loss": 0.0832,
"step": 1008
},
{
"epoch": 0.45905368516833484,
"grad_norm": 0.8181887559127218,
"learning_rate": 9.793861808188622e-06,
"loss": 0.0729,
"step": 1009
},
{
"epoch": 0.45950864422202004,
"grad_norm": 1.0821839097582124,
"learning_rate": 9.793455441809359e-06,
"loss": 0.1025,
"step": 1010
},
{
"epoch": 0.4599636032757052,
"grad_norm": 0.515896949523265,
"learning_rate": 9.793048683730564e-06,
"loss": 0.0512,
"step": 1011
},
{
"epoch": 0.4604185623293904,
"grad_norm": 0.7800604571516774,
"learning_rate": 9.792641533985474e-06,
"loss": 0.1065,
"step": 1012
},
{
"epoch": 0.4608735213830755,
"grad_norm": 0.48365424866268936,
"learning_rate": 9.792233992607365e-06,
"loss": 0.0622,
"step": 1013
},
{
"epoch": 0.46132848043676067,
"grad_norm": 0.8472876133123602,
"learning_rate": 9.791826059629532e-06,
"loss": 0.0713,
"step": 1014
},
{
"epoch": 0.46178343949044587,
"grad_norm": 0.935522534168844,
"learning_rate": 9.791417735085316e-06,
"loss": 0.0853,
"step": 1015
},
{
"epoch": 0.462238398544131,
"grad_norm": 0.8028819334602026,
"learning_rate": 9.791009019008078e-06,
"loss": 0.0795,
"step": 1016
},
{
"epoch": 0.4626933575978162,
"grad_norm": 0.6458928385673616,
"learning_rate": 9.79059991143122e-06,
"loss": 0.0836,
"step": 1017
},
{
"epoch": 0.46314831665150136,
"grad_norm": 0.8309912415690437,
"learning_rate": 9.790190412388173e-06,
"loss": 0.0895,
"step": 1018
},
{
"epoch": 0.46360327570518656,
"grad_norm": 0.6953691809158898,
"learning_rate": 9.789780521912396e-06,
"loss": 0.0686,
"step": 1019
},
{
"epoch": 0.4640582347588717,
"grad_norm": 0.7563151979586233,
"learning_rate": 9.789370240037385e-06,
"loss": 0.0879,
"step": 1020
},
{
"epoch": 0.46451319381255685,
"grad_norm": 0.6646619102460968,
"learning_rate": 9.788959566796667e-06,
"loss": 0.0761,
"step": 1021
},
{
"epoch": 0.46496815286624205,
"grad_norm": 0.8092527562913561,
"learning_rate": 9.788548502223801e-06,
"loss": 0.0863,
"step": 1022
},
{
"epoch": 0.4654231119199272,
"grad_norm": 2.0284506817542396,
"learning_rate": 9.788137046352374e-06,
"loss": 0.2011,
"step": 1023
},
{
"epoch": 0.4658780709736124,
"grad_norm": 0.6524644993097855,
"learning_rate": 9.787725199216011e-06,
"loss": 0.0765,
"step": 1024
},
{
"epoch": 0.46633303002729753,
"grad_norm": 0.48134373932870766,
"learning_rate": 9.787312960848368e-06,
"loss": 0.0505,
"step": 1025
},
{
"epoch": 0.46678798908098273,
"grad_norm": 0.6646547386252114,
"learning_rate": 9.786900331283128e-06,
"loss": 0.0825,
"step": 1026
},
{
"epoch": 0.4672429481346679,
"grad_norm": 0.5655812014606527,
"learning_rate": 9.78648731055401e-06,
"loss": 0.0659,
"step": 1027
},
{
"epoch": 0.467697907188353,
"grad_norm": 0.680196435092224,
"learning_rate": 9.786073898694766e-06,
"loss": 0.0734,
"step": 1028
},
{
"epoch": 0.4681528662420382,
"grad_norm": 0.6198434008496165,
"learning_rate": 9.785660095739176e-06,
"loss": 0.0687,
"step": 1029
},
{
"epoch": 0.46860782529572337,
"grad_norm": 0.5967309034966486,
"learning_rate": 9.785245901721054e-06,
"loss": 0.0443,
"step": 1030
},
{
"epoch": 0.46906278434940857,
"grad_norm": 0.588565790719301,
"learning_rate": 9.784831316674246e-06,
"loss": 0.0741,
"step": 1031
},
{
"epoch": 0.4695177434030937,
"grad_norm": 0.6384508627867143,
"learning_rate": 9.784416340632634e-06,
"loss": 0.0639,
"step": 1032
},
{
"epoch": 0.4699727024567789,
"grad_norm": 0.528980291125106,
"learning_rate": 9.784000973630124e-06,
"loss": 0.0506,
"step": 1033
},
{
"epoch": 0.47042766151046406,
"grad_norm": 0.6297922247581061,
"learning_rate": 9.783585215700656e-06,
"loss": 0.0704,
"step": 1034
},
{
"epoch": 0.4708826205641492,
"grad_norm": 1.1014615381108162,
"learning_rate": 9.783169066878208e-06,
"loss": 0.1063,
"step": 1035
},
{
"epoch": 0.4713375796178344,
"grad_norm": 0.7370811970547196,
"learning_rate": 9.782752527196785e-06,
"loss": 0.0888,
"step": 1036
},
{
"epoch": 0.47179253867151955,
"grad_norm": 0.6272964856361817,
"learning_rate": 9.782335596690425e-06,
"loss": 0.0683,
"step": 1037
},
{
"epoch": 0.47224749772520475,
"grad_norm": 0.9675945822898259,
"learning_rate": 9.781918275393196e-06,
"loss": 0.1031,
"step": 1038
},
{
"epoch": 0.4727024567788899,
"grad_norm": 0.8448129794628584,
"learning_rate": 9.781500563339202e-06,
"loss": 0.0818,
"step": 1039
},
{
"epoch": 0.4731574158325751,
"grad_norm": 0.5148120993988892,
"learning_rate": 9.781082460562574e-06,
"loss": 0.0525,
"step": 1040
},
{
"epoch": 0.47361237488626023,
"grad_norm": 0.7767251927940846,
"learning_rate": 9.780663967097477e-06,
"loss": 0.0869,
"step": 1041
},
{
"epoch": 0.4740673339399454,
"grad_norm": 0.9661754574144388,
"learning_rate": 9.780245082978112e-06,
"loss": 0.0923,
"step": 1042
},
{
"epoch": 0.4745222929936306,
"grad_norm": 0.780061387882855,
"learning_rate": 9.779825808238705e-06,
"loss": 0.095,
"step": 1043
},
{
"epoch": 0.4749772520473157,
"grad_norm": 0.8513172657519864,
"learning_rate": 9.77940614291352e-06,
"loss": 0.0772,
"step": 1044
},
{
"epoch": 0.4754322111010009,
"grad_norm": 0.6199453465731616,
"learning_rate": 9.778986087036846e-06,
"loss": 0.0701,
"step": 1045
},
{
"epoch": 0.47588717015468607,
"grad_norm": 0.5327629714743946,
"learning_rate": 9.778565640643011e-06,
"loss": 0.0447,
"step": 1046
},
{
"epoch": 0.47634212920837127,
"grad_norm": 0.8882337205809296,
"learning_rate": 9.778144803766375e-06,
"loss": 0.0788,
"step": 1047
},
{
"epoch": 0.4767970882620564,
"grad_norm": 0.6023343672839219,
"learning_rate": 9.77772357644132e-06,
"loss": 0.0693,
"step": 1048
},
{
"epoch": 0.47725204731574156,
"grad_norm": 0.8031515985448552,
"learning_rate": 9.777301958702273e-06,
"loss": 0.0911,
"step": 1049
},
{
"epoch": 0.47770700636942676,
"grad_norm": 0.8695877166802147,
"learning_rate": 9.776879950583683e-06,
"loss": 0.12,
"step": 1050
},
{
"epoch": 0.4781619654231119,
"grad_norm": 0.6077253389668626,
"learning_rate": 9.776457552120034e-06,
"loss": 0.0722,
"step": 1051
},
{
"epoch": 0.4786169244767971,
"grad_norm": 0.7976020915977983,
"learning_rate": 9.776034763345845e-06,
"loss": 0.0783,
"step": 1052
},
{
"epoch": 0.47907188353048225,
"grad_norm": 0.7091049596783572,
"learning_rate": 9.775611584295663e-06,
"loss": 0.0739,
"step": 1053
},
{
"epoch": 0.47952684258416745,
"grad_norm": 0.7919907245184465,
"learning_rate": 9.775188015004072e-06,
"loss": 0.0728,
"step": 1054
},
{
"epoch": 0.4799818016378526,
"grad_norm": 0.9227645018819045,
"learning_rate": 9.774764055505676e-06,
"loss": 0.0905,
"step": 1055
},
{
"epoch": 0.48043676069153773,
"grad_norm": 0.7130315690029604,
"learning_rate": 9.774339705835127e-06,
"loss": 0.09,
"step": 1056
},
{
"epoch": 0.48089171974522293,
"grad_norm": 0.7993270676292756,
"learning_rate": 9.773914966027098e-06,
"loss": 0.1011,
"step": 1057
},
{
"epoch": 0.4813466787989081,
"grad_norm": 0.8955668988276211,
"learning_rate": 9.773489836116297e-06,
"loss": 0.0963,
"step": 1058
},
{
"epoch": 0.4818016378525933,
"grad_norm": 0.7582155580680914,
"learning_rate": 9.773064316137464e-06,
"loss": 0.0766,
"step": 1059
},
{
"epoch": 0.4822565969062784,
"grad_norm": 0.6939955066308027,
"learning_rate": 9.772638406125367e-06,
"loss": 0.0687,
"step": 1060
},
{
"epoch": 0.4827115559599636,
"grad_norm": 0.8091635860789653,
"learning_rate": 9.772212106114816e-06,
"loss": 0.0754,
"step": 1061
},
{
"epoch": 0.48316651501364877,
"grad_norm": 0.8236012040739623,
"learning_rate": 9.77178541614064e-06,
"loss": 0.0951,
"step": 1062
},
{
"epoch": 0.48362147406733397,
"grad_norm": 0.6622501946117725,
"learning_rate": 9.77135833623771e-06,
"loss": 0.083,
"step": 1063
},
{
"epoch": 0.4840764331210191,
"grad_norm": 0.8689743387052602,
"learning_rate": 9.770930866440927e-06,
"loss": 0.1074,
"step": 1064
},
{
"epoch": 0.48453139217470426,
"grad_norm": 0.6733750246744147,
"learning_rate": 9.770503006785214e-06,
"loss": 0.0639,
"step": 1065
},
{
"epoch": 0.48498635122838946,
"grad_norm": 0.9485233745498586,
"learning_rate": 9.770074757305541e-06,
"loss": 0.1106,
"step": 1066
},
{
"epoch": 0.4854413102820746,
"grad_norm": 0.8288392949652397,
"learning_rate": 9.769646118036902e-06,
"loss": 0.0661,
"step": 1067
},
{
"epoch": 0.4858962693357598,
"grad_norm": 0.7475423805914638,
"learning_rate": 9.76921708901432e-06,
"loss": 0.0686,
"step": 1068
},
{
"epoch": 0.48635122838944495,
"grad_norm": 0.54120364671088,
"learning_rate": 9.768787670272855e-06,
"loss": 0.0629,
"step": 1069
},
{
"epoch": 0.48680618744313015,
"grad_norm": 0.7281619635509152,
"learning_rate": 9.768357861847598e-06,
"loss": 0.0723,
"step": 1070
},
{
"epoch": 0.4872611464968153,
"grad_norm": 0.8883321717067604,
"learning_rate": 9.767927663773668e-06,
"loss": 0.0832,
"step": 1071
},
{
"epoch": 0.48771610555050043,
"grad_norm": 0.7681469789077073,
"learning_rate": 9.767497076086223e-06,
"loss": 0.0786,
"step": 1072
},
{
"epoch": 0.48817106460418563,
"grad_norm": 0.6590861395931087,
"learning_rate": 9.767066098820446e-06,
"loss": 0.0704,
"step": 1073
},
{
"epoch": 0.4886260236578708,
"grad_norm": 0.7944203702948146,
"learning_rate": 9.766634732011557e-06,
"loss": 0.0867,
"step": 1074
},
{
"epoch": 0.489080982711556,
"grad_norm": 0.7832480468570255,
"learning_rate": 9.766202975694801e-06,
"loss": 0.0873,
"step": 1075
},
{
"epoch": 0.4895359417652411,
"grad_norm": 0.7232266679451883,
"learning_rate": 9.765770829905464e-06,
"loss": 0.0785,
"step": 1076
},
{
"epoch": 0.4899909008189263,
"grad_norm": 0.5406798309730716,
"learning_rate": 9.765338294678856e-06,
"loss": 0.0469,
"step": 1077
},
{
"epoch": 0.49044585987261147,
"grad_norm": 0.5866548164219128,
"learning_rate": 9.764905370050321e-06,
"loss": 0.0524,
"step": 1078
},
{
"epoch": 0.4909008189262966,
"grad_norm": 0.9915720236606885,
"learning_rate": 9.76447205605524e-06,
"loss": 0.1019,
"step": 1079
},
{
"epoch": 0.4913557779799818,
"grad_norm": 0.6838845303274752,
"learning_rate": 9.764038352729018e-06,
"loss": 0.0891,
"step": 1080
},
{
"epoch": 0.49181073703366696,
"grad_norm": 0.9385660559352969,
"learning_rate": 9.763604260107096e-06,
"loss": 0.1058,
"step": 1081
},
{
"epoch": 0.49226569608735216,
"grad_norm": 0.6710872617569944,
"learning_rate": 9.763169778224946e-06,
"loss": 0.0665,
"step": 1082
},
{
"epoch": 0.4927206551410373,
"grad_norm": 0.7878885609137168,
"learning_rate": 9.762734907118072e-06,
"loss": 0.0876,
"step": 1083
},
{
"epoch": 0.4931756141947225,
"grad_norm": 0.6302166766090778,
"learning_rate": 9.76229964682201e-06,
"loss": 0.0507,
"step": 1084
},
{
"epoch": 0.49363057324840764,
"grad_norm": 0.5833462678864086,
"learning_rate": 9.761863997372325e-06,
"loss": 0.0612,
"step": 1085
},
{
"epoch": 0.4940855323020928,
"grad_norm": 1.036522158484448,
"learning_rate": 9.761427958804621e-06,
"loss": 0.1395,
"step": 1086
},
{
"epoch": 0.494540491355778,
"grad_norm": 1.1502320115946314,
"learning_rate": 9.760991531154526e-06,
"loss": 0.1149,
"step": 1087
},
{
"epoch": 0.49499545040946313,
"grad_norm": 0.7616054217825209,
"learning_rate": 9.760554714457704e-06,
"loss": 0.0684,
"step": 1088
},
{
"epoch": 0.49545040946314833,
"grad_norm": 0.5129309167340426,
"learning_rate": 9.760117508749846e-06,
"loss": 0.0614,
"step": 1089
},
{
"epoch": 0.4959053685168335,
"grad_norm": 0.7147170789642256,
"learning_rate": 9.759679914066686e-06,
"loss": 0.0842,
"step": 1090
},
{
"epoch": 0.4963603275705187,
"grad_norm": 0.7513123367978354,
"learning_rate": 9.759241930443975e-06,
"loss": 0.0749,
"step": 1091
},
{
"epoch": 0.4968152866242038,
"grad_norm": 0.5462870672862663,
"learning_rate": 9.75880355791751e-06,
"loss": 0.0588,
"step": 1092
},
{
"epoch": 0.49727024567788897,
"grad_norm": 0.6158644897786469,
"learning_rate": 9.758364796523105e-06,
"loss": 0.0578,
"step": 1093
},
{
"epoch": 0.49772520473157417,
"grad_norm": 0.5248367448810554,
"learning_rate": 9.757925646296617e-06,
"loss": 0.0504,
"step": 1094
},
{
"epoch": 0.4981801637852593,
"grad_norm": 0.7801307646100064,
"learning_rate": 9.757486107273935e-06,
"loss": 0.0819,
"step": 1095
},
{
"epoch": 0.4986351228389445,
"grad_norm": 0.6822936325355138,
"learning_rate": 9.75704617949097e-06,
"loss": 0.0828,
"step": 1096
},
{
"epoch": 0.49909008189262966,
"grad_norm": 0.49379397863131413,
"learning_rate": 9.756605862983675e-06,
"loss": 0.0606,
"step": 1097
},
{
"epoch": 0.49954504094631486,
"grad_norm": 0.5236513133369656,
"learning_rate": 9.756165157788029e-06,
"loss": 0.0493,
"step": 1098
},
{
"epoch": 0.5,
"grad_norm": 0.7323812225903658,
"learning_rate": 9.755724063940047e-06,
"loss": 0.0794,
"step": 1099
},
{
"epoch": 0.5004549590536852,
"grad_norm": 0.853156508842135,
"learning_rate": 9.755282581475769e-06,
"loss": 0.08,
"step": 1100
},
{
"epoch": 0.5009099181073703,
"grad_norm": 0.7117091061791435,
"learning_rate": 9.754840710431274e-06,
"loss": 0.0773,
"step": 1101
},
{
"epoch": 0.5013648771610555,
"grad_norm": 0.9350752111669145,
"learning_rate": 9.754398450842668e-06,
"loss": 0.1046,
"step": 1102
},
{
"epoch": 0.5018198362147407,
"grad_norm": 0.8834833642233855,
"learning_rate": 9.753955802746091e-06,
"loss": 0.1284,
"step": 1103
},
{
"epoch": 0.5022747952684259,
"grad_norm": 0.9022387216275947,
"learning_rate": 9.753512766177717e-06,
"loss": 0.0898,
"step": 1104
},
{
"epoch": 0.502729754322111,
"grad_norm": 0.551248880180483,
"learning_rate": 9.753069341173745e-06,
"loss": 0.0596,
"step": 1105
},
{
"epoch": 0.5031847133757962,
"grad_norm": 0.5970423480352659,
"learning_rate": 9.752625527770409e-06,
"loss": 0.0723,
"step": 1106
},
{
"epoch": 0.5036396724294814,
"grad_norm": 0.7620108531589319,
"learning_rate": 9.75218132600398e-06,
"loss": 0.0856,
"step": 1107
},
{
"epoch": 0.5040946314831665,
"grad_norm": 0.7720887684681512,
"learning_rate": 9.751736735910753e-06,
"loss": 0.0904,
"step": 1108
},
{
"epoch": 0.5045495905368517,
"grad_norm": 0.8672659681858957,
"learning_rate": 9.75129175752706e-06,
"loss": 0.1043,
"step": 1109
},
{
"epoch": 0.5050045495905369,
"grad_norm": 0.7511079874116621,
"learning_rate": 9.75084639088926e-06,
"loss": 0.0719,
"step": 1110
},
{
"epoch": 0.5054595086442221,
"grad_norm": 0.7442062138473109,
"learning_rate": 9.750400636033746e-06,
"loss": 0.0805,
"step": 1111
},
{
"epoch": 0.5059144676979072,
"grad_norm": 0.716157443156474,
"learning_rate": 9.749954492996947e-06,
"loss": 0.0902,
"step": 1112
},
{
"epoch": 0.5063694267515924,
"grad_norm": 0.7655895172099163,
"learning_rate": 9.749507961815317e-06,
"loss": 0.0973,
"step": 1113
},
{
"epoch": 0.5068243858052776,
"grad_norm": 0.6288294239038802,
"learning_rate": 9.749061042525343e-06,
"loss": 0.0646,
"step": 1114
},
{
"epoch": 0.5072793448589626,
"grad_norm": 0.6709452216437115,
"learning_rate": 9.74861373516355e-06,
"loss": 0.0717,
"step": 1115
},
{
"epoch": 0.5077343039126478,
"grad_norm": 0.6522838269502338,
"learning_rate": 9.748166039766484e-06,
"loss": 0.0475,
"step": 1116
},
{
"epoch": 0.508189262966333,
"grad_norm": 0.7999784990978867,
"learning_rate": 9.747717956370735e-06,
"loss": 0.0925,
"step": 1117
},
{
"epoch": 0.5086442220200182,
"grad_norm": 1.0917998243863505,
"learning_rate": 9.747269485012913e-06,
"loss": 0.1293,
"step": 1118
},
{
"epoch": 0.5090991810737033,
"grad_norm": 0.7636715530766439,
"learning_rate": 9.746820625729667e-06,
"loss": 0.0774,
"step": 1119
},
{
"epoch": 0.5095541401273885,
"grad_norm": 0.6701230428761437,
"learning_rate": 9.746371378557677e-06,
"loss": 0.0623,
"step": 1120
},
{
"epoch": 0.5100090991810737,
"grad_norm": 0.972334707766994,
"learning_rate": 9.745921743533653e-06,
"loss": 0.113,
"step": 1121
},
{
"epoch": 0.5104640582347588,
"grad_norm": 0.6630727679984025,
"learning_rate": 9.745471720694335e-06,
"loss": 0.0828,
"step": 1122
},
{
"epoch": 0.510919017288444,
"grad_norm": 0.8798279960192045,
"learning_rate": 9.745021310076498e-06,
"loss": 0.0772,
"step": 1123
},
{
"epoch": 0.5113739763421292,
"grad_norm": 0.6337737332675445,
"learning_rate": 9.744570511716952e-06,
"loss": 0.0805,
"step": 1124
},
{
"epoch": 0.5118289353958144,
"grad_norm": 0.9171053674032225,
"learning_rate": 9.744119325652526e-06,
"loss": 0.0901,
"step": 1125
},
{
"epoch": 0.5122838944494995,
"grad_norm": 0.7437420002919692,
"learning_rate": 9.743667751920093e-06,
"loss": 0.0789,
"step": 1126
},
{
"epoch": 0.5127388535031847,
"grad_norm": 0.692440215965907,
"learning_rate": 9.743215790556556e-06,
"loss": 0.0885,
"step": 1127
},
{
"epoch": 0.5131938125568699,
"grad_norm": 0.5830998661595514,
"learning_rate": 9.742763441598841e-06,
"loss": 0.0571,
"step": 1128
},
{
"epoch": 0.513648771610555,
"grad_norm": 0.7409283851806759,
"learning_rate": 9.742310705083919e-06,
"loss": 0.0819,
"step": 1129
},
{
"epoch": 0.5141037306642402,
"grad_norm": 0.6329559817029019,
"learning_rate": 9.74185758104878e-06,
"loss": 0.0732,
"step": 1130
},
{
"epoch": 0.5145586897179254,
"grad_norm": 0.47102788261692413,
"learning_rate": 9.741404069530455e-06,
"loss": 0.0496,
"step": 1131
},
{
"epoch": 0.5150136487716106,
"grad_norm": 0.7193278988032876,
"learning_rate": 9.740950170566002e-06,
"loss": 0.0797,
"step": 1132
},
{
"epoch": 0.5154686078252957,
"grad_norm": 0.7827454423152818,
"learning_rate": 9.740495884192509e-06,
"loss": 0.0863,
"step": 1133
},
{
"epoch": 0.5159235668789809,
"grad_norm": 0.5187125000260286,
"learning_rate": 9.740041210447101e-06,
"loss": 0.048,
"step": 1134
},
{
"epoch": 0.5163785259326661,
"grad_norm": 0.7621657915309645,
"learning_rate": 9.739586149366932e-06,
"loss": 0.076,
"step": 1135
},
{
"epoch": 0.5168334849863512,
"grad_norm": 1.0691498364952807,
"learning_rate": 9.739130700989185e-06,
"loss": 0.1085,
"step": 1136
},
{
"epoch": 0.5172884440400364,
"grad_norm": 1.126943089011516,
"learning_rate": 9.738674865351081e-06,
"loss": 0.1197,
"step": 1137
},
{
"epoch": 0.5177434030937216,
"grad_norm": 0.5967935472543325,
"learning_rate": 9.738218642489864e-06,
"loss": 0.0715,
"step": 1138
},
{
"epoch": 0.5181983621474068,
"grad_norm": 0.6520369417533736,
"learning_rate": 9.73776203244282e-06,
"loss": 0.0812,
"step": 1139
},
{
"epoch": 0.5186533212010919,
"grad_norm": 0.6923655317783546,
"learning_rate": 9.737305035247258e-06,
"loss": 0.0607,
"step": 1140
},
{
"epoch": 0.5191082802547771,
"grad_norm": 0.5971267035932937,
"learning_rate": 9.73684765094052e-06,
"loss": 0.0597,
"step": 1141
},
{
"epoch": 0.5195632393084623,
"grad_norm": 0.6102979031011873,
"learning_rate": 9.736389879559984e-06,
"loss": 0.0464,
"step": 1142
},
{
"epoch": 0.5200181983621474,
"grad_norm": 0.5971210330968472,
"learning_rate": 9.735931721143058e-06,
"loss": 0.0674,
"step": 1143
},
{
"epoch": 0.5204731574158326,
"grad_norm": 0.9014574419537533,
"learning_rate": 9.735473175727178e-06,
"loss": 0.1071,
"step": 1144
},
{
"epoch": 0.5209281164695178,
"grad_norm": 1.024240239778721,
"learning_rate": 9.735014243349814e-06,
"loss": 0.1058,
"step": 1145
},
{
"epoch": 0.521383075523203,
"grad_norm": 0.740240244958144,
"learning_rate": 9.73455492404847e-06,
"loss": 0.0716,
"step": 1146
},
{
"epoch": 0.521838034576888,
"grad_norm": 0.8552793125149327,
"learning_rate": 9.734095217860679e-06,
"loss": 0.1116,
"step": 1147
},
{
"epoch": 0.5222929936305732,
"grad_norm": 0.8388846880500271,
"learning_rate": 9.733635124824007e-06,
"loss": 0.1195,
"step": 1148
},
{
"epoch": 0.5227479526842584,
"grad_norm": 0.7476616795889469,
"learning_rate": 9.733174644976047e-06,
"loss": 0.0982,
"step": 1149
},
{
"epoch": 0.5232029117379435,
"grad_norm": 1.247104578949049,
"learning_rate": 9.732713778354431e-06,
"loss": 0.1339,
"step": 1150
},
{
"epoch": 0.5236578707916287,
"grad_norm": 0.8127429979477634,
"learning_rate": 9.732252524996818e-06,
"loss": 0.0994,
"step": 1151
},
{
"epoch": 0.5241128298453139,
"grad_norm": 1.1678300434583342,
"learning_rate": 9.731790884940899e-06,
"loss": 0.1152,
"step": 1152
},
{
"epoch": 0.5245677888989991,
"grad_norm": 0.5209287069427062,
"learning_rate": 9.731328858224398e-06,
"loss": 0.0546,
"step": 1153
},
{
"epoch": 0.5250227479526842,
"grad_norm": 0.8363023252623251,
"learning_rate": 9.730866444885069e-06,
"loss": 0.0894,
"step": 1154
},
{
"epoch": 0.5254777070063694,
"grad_norm": 0.8202924553152645,
"learning_rate": 9.730403644960697e-06,
"loss": 0.0914,
"step": 1155
},
{
"epoch": 0.5259326660600546,
"grad_norm": 0.4900409376406188,
"learning_rate": 9.729940458489105e-06,
"loss": 0.0454,
"step": 1156
},
{
"epoch": 0.5263876251137397,
"grad_norm": 0.5631225499534328,
"learning_rate": 9.729476885508136e-06,
"loss": 0.0542,
"step": 1157
},
{
"epoch": 0.5268425841674249,
"grad_norm": 0.566596895824316,
"learning_rate": 9.729012926055674e-06,
"loss": 0.0625,
"step": 1158
},
{
"epoch": 0.5272975432211101,
"grad_norm": 0.9035766920121469,
"learning_rate": 9.728548580169632e-06,
"loss": 0.1013,
"step": 1159
},
{
"epoch": 0.5277525022747953,
"grad_norm": 0.8241016260766749,
"learning_rate": 9.728083847887955e-06,
"loss": 0.078,
"step": 1160
},
{
"epoch": 0.5282074613284804,
"grad_norm": 0.7435557294319748,
"learning_rate": 9.727618729248617e-06,
"loss": 0.0864,
"step": 1161
},
{
"epoch": 0.5286624203821656,
"grad_norm": 0.6611375262646607,
"learning_rate": 9.727153224289627e-06,
"loss": 0.0769,
"step": 1162
},
{
"epoch": 0.5291173794358508,
"grad_norm": 0.8275931946782299,
"learning_rate": 9.726687333049024e-06,
"loss": 0.0889,
"step": 1163
},
{
"epoch": 0.5295723384895359,
"grad_norm": 1.057751919756087,
"learning_rate": 9.726221055564874e-06,
"loss": 0.0851,
"step": 1164
},
{
"epoch": 0.5300272975432211,
"grad_norm": 0.7884543920060787,
"learning_rate": 9.725754391875287e-06,
"loss": 0.0746,
"step": 1165
},
{
"epoch": 0.5304822565969063,
"grad_norm": 0.8593529313000522,
"learning_rate": 9.72528734201839e-06,
"loss": 0.0828,
"step": 1166
},
{
"epoch": 0.5309372156505915,
"grad_norm": 0.5225417485901063,
"learning_rate": 9.72481990603235e-06,
"loss": 0.0794,
"step": 1167
},
{
"epoch": 0.5313921747042766,
"grad_norm": 0.8820660720540598,
"learning_rate": 9.724352083955366e-06,
"loss": 0.1059,
"step": 1168
},
{
"epoch": 0.5318471337579618,
"grad_norm": 0.6775105748188827,
"learning_rate": 9.723883875825664e-06,
"loss": 0.079,
"step": 1169
},
{
"epoch": 0.532302092811647,
"grad_norm": 0.5969175177573056,
"learning_rate": 9.723415281681505e-06,
"loss": 0.061,
"step": 1170
},
{
"epoch": 0.5327570518653321,
"grad_norm": 0.7165111743049339,
"learning_rate": 9.722946301561179e-06,
"loss": 0.0824,
"step": 1171
},
{
"epoch": 0.5332120109190173,
"grad_norm": 0.7771351455478163,
"learning_rate": 9.722476935503011e-06,
"loss": 0.0936,
"step": 1172
},
{
"epoch": 0.5336669699727025,
"grad_norm": 0.5612071801020553,
"learning_rate": 9.722007183545353e-06,
"loss": 0.0584,
"step": 1173
},
{
"epoch": 0.5341219290263877,
"grad_norm": 0.7630759308283642,
"learning_rate": 9.721537045726594e-06,
"loss": 0.0711,
"step": 1174
},
{
"epoch": 0.5345768880800728,
"grad_norm": 0.7415951616336062,
"learning_rate": 9.721066522085148e-06,
"loss": 0.0786,
"step": 1175
},
{
"epoch": 0.535031847133758,
"grad_norm": 0.6697058559185771,
"learning_rate": 9.720595612659467e-06,
"loss": 0.0943,
"step": 1176
},
{
"epoch": 0.5354868061874432,
"grad_norm": 0.8294561042543531,
"learning_rate": 9.720124317488031e-06,
"loss": 0.0766,
"step": 1177
},
{
"epoch": 0.5359417652411284,
"grad_norm": 0.8069252663248169,
"learning_rate": 9.719652636609351e-06,
"loss": 0.1036,
"step": 1178
},
{
"epoch": 0.5363967242948134,
"grad_norm": 0.5216393236723873,
"learning_rate": 9.719180570061973e-06,
"loss": 0.0681,
"step": 1179
},
{
"epoch": 0.5368516833484986,
"grad_norm": 0.7561882785891234,
"learning_rate": 9.718708117884468e-06,
"loss": 0.0888,
"step": 1180
},
{
"epoch": 0.5373066424021838,
"grad_norm": 0.7101886443887773,
"learning_rate": 9.718235280115446e-06,
"loss": 0.0841,
"step": 1181
},
{
"epoch": 0.5377616014558689,
"grad_norm": 0.93883085852681,
"learning_rate": 9.717762056793545e-06,
"loss": 0.1116,
"step": 1182
},
{
"epoch": 0.5382165605095541,
"grad_norm": 0.8029318164759022,
"learning_rate": 9.717288447957433e-06,
"loss": 0.0817,
"step": 1183
},
{
"epoch": 0.5386715195632393,
"grad_norm": 0.7189629467174897,
"learning_rate": 9.716814453645811e-06,
"loss": 0.0913,
"step": 1184
},
{
"epoch": 0.5391264786169245,
"grad_norm": 0.6194922793353296,
"learning_rate": 9.716340073897414e-06,
"loss": 0.073,
"step": 1185
},
{
"epoch": 0.5395814376706096,
"grad_norm": 0.5862599296496694,
"learning_rate": 9.715865308751006e-06,
"loss": 0.0599,
"step": 1186
},
{
"epoch": 0.5400363967242948,
"grad_norm": 1.0638863826866105,
"learning_rate": 9.715390158245381e-06,
"loss": 0.1412,
"step": 1187
},
{
"epoch": 0.54049135577798,
"grad_norm": 0.6031416289368001,
"learning_rate": 9.714914622419367e-06,
"loss": 0.0694,
"step": 1188
},
{
"epoch": 0.5409463148316651,
"grad_norm": 0.5762096954254395,
"learning_rate": 9.714438701311822e-06,
"loss": 0.0627,
"step": 1189
},
{
"epoch": 0.5414012738853503,
"grad_norm": 0.6077021479661606,
"learning_rate": 9.713962394961636e-06,
"loss": 0.067,
"step": 1190
},
{
"epoch": 0.5418562329390355,
"grad_norm": 0.5381873559759192,
"learning_rate": 9.713485703407732e-06,
"loss": 0.0595,
"step": 1191
},
{
"epoch": 0.5423111919927207,
"grad_norm": 0.7866618609648011,
"learning_rate": 9.713008626689063e-06,
"loss": 0.1064,
"step": 1192
},
{
"epoch": 0.5427661510464058,
"grad_norm": 0.7100862231154079,
"learning_rate": 9.712531164844611e-06,
"loss": 0.07,
"step": 1193
},
{
"epoch": 0.543221110100091,
"grad_norm": 0.5579932774059501,
"learning_rate": 9.712053317913394e-06,
"loss": 0.0525,
"step": 1194
},
{
"epoch": 0.5436760691537762,
"grad_norm": 0.5454543895601387,
"learning_rate": 9.711575085934459e-06,
"loss": 0.0741,
"step": 1195
},
{
"epoch": 0.5441310282074613,
"grad_norm": 0.6754854519258514,
"learning_rate": 9.711096468946888e-06,
"loss": 0.101,
"step": 1196
},
{
"epoch": 0.5445859872611465,
"grad_norm": 0.8125002765504534,
"learning_rate": 9.710617466989787e-06,
"loss": 0.0937,
"step": 1197
},
{
"epoch": 0.5450409463148317,
"grad_norm": 0.5893498973936582,
"learning_rate": 9.710138080102298e-06,
"loss": 0.0658,
"step": 1198
},
{
"epoch": 0.5454959053685169,
"grad_norm": 0.8107633297228217,
"learning_rate": 9.709658308323597e-06,
"loss": 0.0955,
"step": 1199
},
{
"epoch": 0.545950864422202,
"grad_norm": 0.6726060122769176,
"learning_rate": 9.70917815169289e-06,
"loss": 0.084,
"step": 1200
},
{
"epoch": 0.5464058234758872,
"grad_norm": 0.6077011277694447,
"learning_rate": 9.708697610249407e-06,
"loss": 0.0756,
"step": 1201
},
{
"epoch": 0.5468607825295724,
"grad_norm": 0.7073007110523803,
"learning_rate": 9.70821668403242e-06,
"loss": 0.0818,
"step": 1202
},
{
"epoch": 0.5473157415832575,
"grad_norm": 0.9420816064988972,
"learning_rate": 9.707735373081231e-06,
"loss": 0.1197,
"step": 1203
},
{
"epoch": 0.5477707006369427,
"grad_norm": 0.552138579735494,
"learning_rate": 9.707253677435165e-06,
"loss": 0.0594,
"step": 1204
},
{
"epoch": 0.5482256596906279,
"grad_norm": 0.6375758502862188,
"learning_rate": 9.706771597133587e-06,
"loss": 0.0572,
"step": 1205
},
{
"epoch": 0.5486806187443131,
"grad_norm": 0.6581691945271008,
"learning_rate": 9.706289132215889e-06,
"loss": 0.0707,
"step": 1206
},
{
"epoch": 0.5491355777979982,
"grad_norm": 0.820106985355047,
"learning_rate": 9.705806282721498e-06,
"loss": 0.0865,
"step": 1207
},
{
"epoch": 0.5495905368516834,
"grad_norm": 0.5258555939105785,
"learning_rate": 9.705323048689866e-06,
"loss": 0.0462,
"step": 1208
},
{
"epoch": 0.5500454959053686,
"grad_norm": 0.7818892498713288,
"learning_rate": 9.704839430160487e-06,
"loss": 0.1005,
"step": 1209
},
{
"epoch": 0.5505004549590536,
"grad_norm": 0.6371281646305975,
"learning_rate": 9.704355427172874e-06,
"loss": 0.0712,
"step": 1210
},
{
"epoch": 0.5509554140127388,
"grad_norm": 0.5981165031558572,
"learning_rate": 9.70387103976658e-06,
"loss": 0.0669,
"step": 1211
},
{
"epoch": 0.551410373066424,
"grad_norm": 0.640233382171881,
"learning_rate": 9.703386267981188e-06,
"loss": 0.0629,
"step": 1212
},
{
"epoch": 0.5518653321201092,
"grad_norm": 0.5436666812285462,
"learning_rate": 9.70290111185631e-06,
"loss": 0.0527,
"step": 1213
},
{
"epoch": 0.5523202911737943,
"grad_norm": 0.9264418893677014,
"learning_rate": 9.702415571431594e-06,
"loss": 0.1392,
"step": 1214
},
{
"epoch": 0.5527752502274795,
"grad_norm": 0.6659444469982292,
"learning_rate": 9.70192964674671e-06,
"loss": 0.0948,
"step": 1215
},
{
"epoch": 0.5532302092811647,
"grad_norm": 0.5526163080676849,
"learning_rate": 9.70144333784137e-06,
"loss": 0.0661,
"step": 1216
},
{
"epoch": 0.5536851683348498,
"grad_norm": 0.7994476768514381,
"learning_rate": 9.700956644755313e-06,
"loss": 0.0966,
"step": 1217
},
{
"epoch": 0.554140127388535,
"grad_norm": 0.7919884013199107,
"learning_rate": 9.700469567528307e-06,
"loss": 0.1082,
"step": 1218
},
{
"epoch": 0.5545950864422202,
"grad_norm": 0.7366932972024113,
"learning_rate": 9.699982106200155e-06,
"loss": 0.0841,
"step": 1219
},
{
"epoch": 0.5550500454959054,
"grad_norm": 0.8558659635343526,
"learning_rate": 9.699494260810692e-06,
"loss": 0.0866,
"step": 1220
},
{
"epoch": 0.5555050045495905,
"grad_norm": 0.8060928626360002,
"learning_rate": 9.699006031399779e-06,
"loss": 0.0777,
"step": 1221
},
{
"epoch": 0.5559599636032757,
"grad_norm": 0.6914626835020681,
"learning_rate": 9.698517418007314e-06,
"loss": 0.0775,
"step": 1222
},
{
"epoch": 0.5564149226569609,
"grad_norm": 0.8706739684427142,
"learning_rate": 9.698028420673224e-06,
"loss": 0.0984,
"step": 1223
},
{
"epoch": 0.556869881710646,
"grad_norm": 0.7863016327992207,
"learning_rate": 9.697539039437468e-06,
"loss": 0.1118,
"step": 1224
},
{
"epoch": 0.5573248407643312,
"grad_norm": 0.7719453440565228,
"learning_rate": 9.697049274340036e-06,
"loss": 0.0824,
"step": 1225
},
{
"epoch": 0.5577797998180164,
"grad_norm": 1.1509899845731206,
"learning_rate": 9.696559125420949e-06,
"loss": 0.1254,
"step": 1226
},
{
"epoch": 0.5582347588717016,
"grad_norm": 0.5202193771917482,
"learning_rate": 9.696068592720257e-06,
"loss": 0.0538,
"step": 1227
},
{
"epoch": 0.5586897179253867,
"grad_norm": 0.5880633286090164,
"learning_rate": 9.69557767627805e-06,
"loss": 0.0711,
"step": 1228
},
{
"epoch": 0.5591446769790719,
"grad_norm": 0.6342846572654288,
"learning_rate": 9.695086376134438e-06,
"loss": 0.0671,
"step": 1229
},
{
"epoch": 0.5595996360327571,
"grad_norm": 0.7541651906429654,
"learning_rate": 9.694594692329571e-06,
"loss": 0.0813,
"step": 1230
},
{
"epoch": 0.5600545950864422,
"grad_norm": 0.6416731945433944,
"learning_rate": 9.694102624903627e-06,
"loss": 0.0733,
"step": 1231
},
{
"epoch": 0.5605095541401274,
"grad_norm": 1.0012992796464886,
"learning_rate": 9.693610173896815e-06,
"loss": 0.096,
"step": 1232
},
{
"epoch": 0.5609645131938126,
"grad_norm": 0.725396699259508,
"learning_rate": 9.693117339349376e-06,
"loss": 0.0665,
"step": 1233
},
{
"epoch": 0.5614194722474978,
"grad_norm": 0.7481457641805567,
"learning_rate": 9.692624121301581e-06,
"loss": 0.0715,
"step": 1234
},
{
"epoch": 0.5618744313011829,
"grad_norm": 0.969766282604155,
"learning_rate": 9.692130519793734e-06,
"loss": 0.0991,
"step": 1235
},
{
"epoch": 0.5623293903548681,
"grad_norm": 0.8522169509206354,
"learning_rate": 9.691636534866172e-06,
"loss": 0.1025,
"step": 1236
},
{
"epoch": 0.5627843494085533,
"grad_norm": 0.7682304561659135,
"learning_rate": 9.691142166559259e-06,
"loss": 0.0846,
"step": 1237
},
{
"epoch": 0.5632393084622384,
"grad_norm": 0.5495617218791536,
"learning_rate": 9.690647414913392e-06,
"loss": 0.0766,
"step": 1238
},
{
"epoch": 0.5636942675159236,
"grad_norm": 0.6826816911759014,
"learning_rate": 9.690152279969003e-06,
"loss": 0.0729,
"step": 1239
},
{
"epoch": 0.5641492265696088,
"grad_norm": 0.8352406959674302,
"learning_rate": 9.689656761766548e-06,
"loss": 0.0896,
"step": 1240
},
{
"epoch": 0.564604185623294,
"grad_norm": 0.5908696548320724,
"learning_rate": 9.689160860346522e-06,
"loss": 0.0753,
"step": 1241
},
{
"epoch": 0.565059144676979,
"grad_norm": 0.4283914528398344,
"learning_rate": 9.688664575749447e-06,
"loss": 0.0414,
"step": 1242
},
{
"epoch": 0.5655141037306642,
"grad_norm": 0.6584468440229382,
"learning_rate": 9.688167908015877e-06,
"loss": 0.0733,
"step": 1243
},
{
"epoch": 0.5659690627843494,
"grad_norm": 0.9211218848648471,
"learning_rate": 9.687670857186396e-06,
"loss": 0.1171,
"step": 1244
},
{
"epoch": 0.5664240218380345,
"grad_norm": 0.9250852893692096,
"learning_rate": 9.68717342330162e-06,
"loss": 0.1061,
"step": 1245
},
{
"epoch": 0.5668789808917197,
"grad_norm": 0.8688266055790496,
"learning_rate": 9.686675606402203e-06,
"loss": 0.1213,
"step": 1246
},
{
"epoch": 0.5673339399454049,
"grad_norm": 0.7110325678190088,
"learning_rate": 9.686177406528819e-06,
"loss": 0.0836,
"step": 1247
},
{
"epoch": 0.5677888989990901,
"grad_norm": 0.8260984800022192,
"learning_rate": 9.685678823722178e-06,
"loss": 0.0907,
"step": 1248
},
{
"epoch": 0.5682438580527752,
"grad_norm": 0.6625042460625208,
"learning_rate": 9.685179858023026e-06,
"loss": 0.0777,
"step": 1249
},
{
"epoch": 0.5686988171064604,
"grad_norm": 0.711324638729454,
"learning_rate": 9.684680509472133e-06,
"loss": 0.0815,
"step": 1250
},
{
"epoch": 0.5691537761601456,
"grad_norm": 0.6863010294874783,
"learning_rate": 9.684180778110306e-06,
"loss": 0.0642,
"step": 1251
},
{
"epoch": 0.5696087352138307,
"grad_norm": 0.5978880624303593,
"learning_rate": 9.683680663978377e-06,
"loss": 0.065,
"step": 1252
},
{
"epoch": 0.5700636942675159,
"grad_norm": 0.6322068932784428,
"learning_rate": 9.683180167117216e-06,
"loss": 0.0681,
"step": 1253
},
{
"epoch": 0.5705186533212011,
"grad_norm": 0.7826720403434554,
"learning_rate": 9.682679287567722e-06,
"loss": 0.0881,
"step": 1254
},
{
"epoch": 0.5709736123748863,
"grad_norm": 0.794807695787425,
"learning_rate": 9.682178025370824e-06,
"loss": 0.1118,
"step": 1255
},
{
"epoch": 0.5714285714285714,
"grad_norm": 0.7050268620804678,
"learning_rate": 9.681676380567482e-06,
"loss": 0.0839,
"step": 1256
},
{
"epoch": 0.5718835304822566,
"grad_norm": 0.5581694578677082,
"learning_rate": 9.681174353198687e-06,
"loss": 0.0482,
"step": 1257
},
{
"epoch": 0.5723384895359418,
"grad_norm": 0.6766600070725707,
"learning_rate": 9.680671943305465e-06,
"loss": 0.0679,
"step": 1258
},
{
"epoch": 0.5727934485896269,
"grad_norm": 0.6995276308642288,
"learning_rate": 9.680169150928868e-06,
"loss": 0.0823,
"step": 1259
},
{
"epoch": 0.5732484076433121,
"grad_norm": 0.6008334474427011,
"learning_rate": 9.679665976109985e-06,
"loss": 0.0669,
"step": 1260
},
{
"epoch": 0.5737033666969973,
"grad_norm": 0.6951316344905618,
"learning_rate": 9.679162418889932e-06,
"loss": 0.0644,
"step": 1261
},
{
"epoch": 0.5741583257506825,
"grad_norm": 0.7661270676130627,
"learning_rate": 9.678658479309854e-06,
"loss": 0.0837,
"step": 1262
},
{
"epoch": 0.5746132848043676,
"grad_norm": 0.7593531327031607,
"learning_rate": 9.678154157410937e-06,
"loss": 0.0646,
"step": 1263
},
{
"epoch": 0.5750682438580528,
"grad_norm": 0.7824619403016152,
"learning_rate": 9.677649453234388e-06,
"loss": 0.0907,
"step": 1264
},
{
"epoch": 0.575523202911738,
"grad_norm": 0.8187746029529864,
"learning_rate": 9.67714436682145e-06,
"loss": 0.0906,
"step": 1265
},
{
"epoch": 0.5759781619654231,
"grad_norm": 0.7676559233650921,
"learning_rate": 9.676638898213394e-06,
"loss": 0.0839,
"step": 1266
},
{
"epoch": 0.5764331210191083,
"grad_norm": 0.5944493207466681,
"learning_rate": 9.676133047451528e-06,
"loss": 0.0588,
"step": 1267
},
{
"epoch": 0.5768880800727935,
"grad_norm": 0.6734586229257056,
"learning_rate": 9.675626814577188e-06,
"loss": 0.0804,
"step": 1268
},
{
"epoch": 0.5773430391264787,
"grad_norm": 0.6315388478681175,
"learning_rate": 9.675120199631738e-06,
"loss": 0.0636,
"step": 1269
},
{
"epoch": 0.5777979981801638,
"grad_norm": 0.7252277920198784,
"learning_rate": 9.674613202656577e-06,
"loss": 0.0842,
"step": 1270
},
{
"epoch": 0.578252957233849,
"grad_norm": 0.58556718084403,
"learning_rate": 9.674105823693139e-06,
"loss": 0.0764,
"step": 1271
},
{
"epoch": 0.5787079162875342,
"grad_norm": 0.7635901125586164,
"learning_rate": 9.673598062782878e-06,
"loss": 0.0907,
"step": 1272
},
{
"epoch": 0.5791628753412192,
"grad_norm": 0.33852379656119563,
"learning_rate": 9.67308991996729e-06,
"loss": 0.0387,
"step": 1273
},
{
"epoch": 0.5796178343949044,
"grad_norm": 0.8984557509320932,
"learning_rate": 9.672581395287897e-06,
"loss": 0.0969,
"step": 1274
},
{
"epoch": 0.5800727934485896,
"grad_norm": 0.881696210059407,
"learning_rate": 9.672072488786254e-06,
"loss": 0.115,
"step": 1275
},
{
"epoch": 0.5805277525022748,
"grad_norm": 0.805394208652388,
"learning_rate": 9.671563200503947e-06,
"loss": 0.0916,
"step": 1276
},
{
"epoch": 0.5809827115559599,
"grad_norm": 0.5947193670178038,
"learning_rate": 9.67105353048259e-06,
"loss": 0.0645,
"step": 1277
},
{
"epoch": 0.5814376706096451,
"grad_norm": 0.9345719582841384,
"learning_rate": 9.670543478763834e-06,
"loss": 0.0853,
"step": 1278
},
{
"epoch": 0.5818926296633303,
"grad_norm": 0.46822310121822047,
"learning_rate": 9.670033045389356e-06,
"loss": 0.06,
"step": 1279
},
{
"epoch": 0.5823475887170154,
"grad_norm": 0.882335352298928,
"learning_rate": 9.669522230400868e-06,
"loss": 0.1288,
"step": 1280
},
{
"epoch": 0.5828025477707006,
"grad_norm": 0.7155876804587362,
"learning_rate": 9.66901103384011e-06,
"loss": 0.0923,
"step": 1281
},
{
"epoch": 0.5832575068243858,
"grad_norm": 0.758339057709363,
"learning_rate": 9.668499455748857e-06,
"loss": 0.0866,
"step": 1282
},
{
"epoch": 0.583712465878071,
"grad_norm": 0.5929990208040478,
"learning_rate": 9.66798749616891e-06,
"loss": 0.0571,
"step": 1283
},
{
"epoch": 0.5841674249317561,
"grad_norm": 0.5486564328594907,
"learning_rate": 9.667475155142104e-06,
"loss": 0.0551,
"step": 1284
},
{
"epoch": 0.5846223839854413,
"grad_norm": 0.6958253493282612,
"learning_rate": 9.666962432710307e-06,
"loss": 0.0731,
"step": 1285
},
{
"epoch": 0.5850773430391265,
"grad_norm": 1.1984701204529857,
"learning_rate": 9.666449328915418e-06,
"loss": 0.1248,
"step": 1286
},
{
"epoch": 0.5855323020928116,
"grad_norm": 1.07466414021835,
"learning_rate": 9.66593584379936e-06,
"loss": 0.0969,
"step": 1287
},
{
"epoch": 0.5859872611464968,
"grad_norm": 0.7365065558485686,
"learning_rate": 9.6654219774041e-06,
"loss": 0.0768,
"step": 1288
},
{
"epoch": 0.586442220200182,
"grad_norm": 0.7278778525375763,
"learning_rate": 9.664907729771622e-06,
"loss": 0.0931,
"step": 1289
},
{
"epoch": 0.5868971792538672,
"grad_norm": 0.6940342908894654,
"learning_rate": 9.664393100943951e-06,
"loss": 0.0716,
"step": 1290
},
{
"epoch": 0.5873521383075523,
"grad_norm": 0.7046475563496115,
"learning_rate": 9.663878090963142e-06,
"loss": 0.0833,
"step": 1291
},
{
"epoch": 0.5878070973612375,
"grad_norm": 0.6554863862272154,
"learning_rate": 9.663362699871275e-06,
"loss": 0.0705,
"step": 1292
},
{
"epoch": 0.5882620564149227,
"grad_norm": 0.610296786595235,
"learning_rate": 9.66284692771047e-06,
"loss": 0.0592,
"step": 1293
},
{
"epoch": 0.5887170154686078,
"grad_norm": 0.6866815075031769,
"learning_rate": 9.662330774522869e-06,
"loss": 0.0748,
"step": 1294
},
{
"epoch": 0.589171974522293,
"grad_norm": 0.5654106713312388,
"learning_rate": 9.661814240350653e-06,
"loss": 0.0546,
"step": 1295
},
{
"epoch": 0.5896269335759782,
"grad_norm": 1.271034489401823,
"learning_rate": 9.66129732523603e-06,
"loss": 0.1473,
"step": 1296
},
{
"epoch": 0.5900818926296634,
"grad_norm": 0.45734781465896296,
"learning_rate": 9.66078002922124e-06,
"loss": 0.0452,
"step": 1297
},
{
"epoch": 0.5905368516833485,
"grad_norm": 0.8001910391102482,
"learning_rate": 9.660262352348553e-06,
"loss": 0.0801,
"step": 1298
},
{
"epoch": 0.5909918107370337,
"grad_norm": 0.8095822615697389,
"learning_rate": 9.659744294660272e-06,
"loss": 0.0851,
"step": 1299
},
{
"epoch": 0.5914467697907189,
"grad_norm": 0.6222175915293906,
"learning_rate": 9.659225856198732e-06,
"loss": 0.0725,
"step": 1300
},
{
"epoch": 0.591901728844404,
"grad_norm": 0.5098172411498206,
"learning_rate": 9.658707037006294e-06,
"loss": 0.0586,
"step": 1301
},
{
"epoch": 0.5923566878980892,
"grad_norm": 0.5056342525545805,
"learning_rate": 9.658187837125357e-06,
"loss": 0.0552,
"step": 1302
},
{
"epoch": 0.5928116469517744,
"grad_norm": 0.8298114087640572,
"learning_rate": 9.657668256598347e-06,
"loss": 0.0976,
"step": 1303
},
{
"epoch": 0.5932666060054596,
"grad_norm": 0.9354418819253106,
"learning_rate": 9.657148295467719e-06,
"loss": 0.1128,
"step": 1304
},
{
"epoch": 0.5937215650591446,
"grad_norm": 0.732222390896743,
"learning_rate": 9.656627953775964e-06,
"loss": 0.0719,
"step": 1305
},
{
"epoch": 0.5941765241128298,
"grad_norm": 0.817074061431315,
"learning_rate": 9.6561072315656e-06,
"loss": 0.097,
"step": 1306
},
{
"epoch": 0.594631483166515,
"grad_norm": 0.6993010225350191,
"learning_rate": 9.655586128879185e-06,
"loss": 0.0866,
"step": 1307
},
{
"epoch": 0.5950864422202001,
"grad_norm": 0.6036033167422408,
"learning_rate": 9.655064645759291e-06,
"loss": 0.0615,
"step": 1308
},
{
"epoch": 0.5955414012738853,
"grad_norm": 0.4333029170805267,
"learning_rate": 9.654542782248539e-06,
"loss": 0.0333,
"step": 1309
},
{
"epoch": 0.5959963603275705,
"grad_norm": 0.5158856954901245,
"learning_rate": 9.65402053838957e-06,
"loss": 0.0534,
"step": 1310
},
{
"epoch": 0.5964513193812557,
"grad_norm": 0.8439407413306237,
"learning_rate": 9.653497914225059e-06,
"loss": 0.0886,
"step": 1311
},
{
"epoch": 0.5969062784349408,
"grad_norm": 1.097335021441692,
"learning_rate": 9.652974909797714e-06,
"loss": 0.1184,
"step": 1312
},
{
"epoch": 0.597361237488626,
"grad_norm": 0.6552117042192046,
"learning_rate": 9.652451525150272e-06,
"loss": 0.0719,
"step": 1313
},
{
"epoch": 0.5978161965423112,
"grad_norm": 0.6353863518066384,
"learning_rate": 9.651927760325504e-06,
"loss": 0.0696,
"step": 1314
},
{
"epoch": 0.5982711555959963,
"grad_norm": 0.9048456403488727,
"learning_rate": 9.651403615366204e-06,
"loss": 0.0859,
"step": 1315
},
{
"epoch": 0.5987261146496815,
"grad_norm": 0.7176841695337582,
"learning_rate": 9.650879090315207e-06,
"loss": 0.0821,
"step": 1316
},
{
"epoch": 0.5991810737033667,
"grad_norm": 0.696539124420045,
"learning_rate": 9.650354185215374e-06,
"loss": 0.0875,
"step": 1317
},
{
"epoch": 0.5996360327570519,
"grad_norm": 0.5924500205612657,
"learning_rate": 9.649828900109599e-06,
"loss": 0.0646,
"step": 1318
},
{
"epoch": 0.600090991810737,
"grad_norm": 0.5430407542910594,
"learning_rate": 9.649303235040803e-06,
"loss": 0.0486,
"step": 1319
},
{
"epoch": 0.6005459508644222,
"grad_norm": 0.6459813862779727,
"learning_rate": 9.648777190051944e-06,
"loss": 0.0903,
"step": 1320
},
{
"epoch": 0.6010009099181074,
"grad_norm": 0.6531397749427512,
"learning_rate": 9.648250765186006e-06,
"loss": 0.0638,
"step": 1321
},
{
"epoch": 0.6014558689717925,
"grad_norm": 0.6616813941465042,
"learning_rate": 9.647723960486006e-06,
"loss": 0.0861,
"step": 1322
},
{
"epoch": 0.6019108280254777,
"grad_norm": 0.8426003399558685,
"learning_rate": 9.647196775994995e-06,
"loss": 0.0928,
"step": 1323
},
{
"epoch": 0.6023657870791629,
"grad_norm": 0.6908471872127779,
"learning_rate": 9.646669211756049e-06,
"loss": 0.064,
"step": 1324
},
{
"epoch": 0.6028207461328481,
"grad_norm": 0.6969433310817453,
"learning_rate": 9.64614126781228e-06,
"loss": 0.0683,
"step": 1325
},
{
"epoch": 0.6032757051865332,
"grad_norm": 0.7506047981065134,
"learning_rate": 9.645612944206826e-06,
"loss": 0.0849,
"step": 1326
},
{
"epoch": 0.6037306642402184,
"grad_norm": 0.5624997977779479,
"learning_rate": 9.645084240982862e-06,
"loss": 0.064,
"step": 1327
},
{
"epoch": 0.6041856232939036,
"grad_norm": 0.43671100502349636,
"learning_rate": 9.644555158183592e-06,
"loss": 0.0615,
"step": 1328
},
{
"epoch": 0.6046405823475887,
"grad_norm": 0.553762280713577,
"learning_rate": 9.64402569585225e-06,
"loss": 0.0596,
"step": 1329
},
{
"epoch": 0.6050955414012739,
"grad_norm": 0.6580653378362663,
"learning_rate": 9.643495854032099e-06,
"loss": 0.0558,
"step": 1330
},
{
"epoch": 0.6055505004549591,
"grad_norm": 0.7656128172437318,
"learning_rate": 9.642965632766437e-06,
"loss": 0.0915,
"step": 1331
},
{
"epoch": 0.6060054595086443,
"grad_norm": 0.49008300515141723,
"learning_rate": 9.642435032098591e-06,
"loss": 0.0553,
"step": 1332
},
{
"epoch": 0.6064604185623294,
"grad_norm": 0.6058179105933948,
"learning_rate": 9.64190405207192e-06,
"loss": 0.0709,
"step": 1333
},
{
"epoch": 0.6069153776160146,
"grad_norm": 0.6707142568108124,
"learning_rate": 9.641372692729811e-06,
"loss": 0.0715,
"step": 1334
},
{
"epoch": 0.6073703366696998,
"grad_norm": 0.8710319334113071,
"learning_rate": 9.640840954115686e-06,
"loss": 0.091,
"step": 1335
},
{
"epoch": 0.607825295723385,
"grad_norm": 0.7496993600003082,
"learning_rate": 9.640308836272996e-06,
"loss": 0.0932,
"step": 1336
},
{
"epoch": 0.60828025477707,
"grad_norm": 0.9684583450547241,
"learning_rate": 9.639776339245225e-06,
"loss": 0.087,
"step": 1337
},
{
"epoch": 0.6087352138307552,
"grad_norm": 0.7857186962980957,
"learning_rate": 9.639243463075884e-06,
"loss": 0.1084,
"step": 1338
},
{
"epoch": 0.6091901728844404,
"grad_norm": 1.1677743182021476,
"learning_rate": 9.638710207808518e-06,
"loss": 0.0712,
"step": 1339
},
{
"epoch": 0.6096451319381255,
"grad_norm": 0.725604064535932,
"learning_rate": 9.6381765734867e-06,
"loss": 0.077,
"step": 1340
},
{
"epoch": 0.6101000909918107,
"grad_norm": 0.5923782964843433,
"learning_rate": 9.63764256015404e-06,
"loss": 0.0641,
"step": 1341
},
{
"epoch": 0.6105550500454959,
"grad_norm": 0.7069177546563966,
"learning_rate": 9.637108167854173e-06,
"loss": 0.0747,
"step": 1342
},
{
"epoch": 0.6110100090991811,
"grad_norm": 0.780384533965345,
"learning_rate": 9.636573396630767e-06,
"loss": 0.0709,
"step": 1343
},
{
"epoch": 0.6114649681528662,
"grad_norm": 0.7305821703239879,
"learning_rate": 9.636038246527523e-06,
"loss": 0.0955,
"step": 1344
},
{
"epoch": 0.6119199272065514,
"grad_norm": 0.6274215993935015,
"learning_rate": 9.635502717588168e-06,
"loss": 0.0656,
"step": 1345
},
{
"epoch": 0.6123748862602366,
"grad_norm": 0.6018866737558257,
"learning_rate": 9.634966809856465e-06,
"loss": 0.0729,
"step": 1346
},
{
"epoch": 0.6128298453139217,
"grad_norm": 0.9406786913650838,
"learning_rate": 9.634430523376207e-06,
"loss": 0.1105,
"step": 1347
},
{
"epoch": 0.6132848043676069,
"grad_norm": 0.6910930219074588,
"learning_rate": 9.633893858191214e-06,
"loss": 0.0652,
"step": 1348
},
{
"epoch": 0.6137397634212921,
"grad_norm": 0.6641071332456526,
"learning_rate": 9.633356814345342e-06,
"loss": 0.0896,
"step": 1349
},
{
"epoch": 0.6141947224749773,
"grad_norm": 0.6463461735454817,
"learning_rate": 9.632819391882475e-06,
"loss": 0.0691,
"step": 1350
},
{
"epoch": 0.6146496815286624,
"grad_norm": 0.6570738741447356,
"learning_rate": 9.63228159084653e-06,
"loss": 0.0726,
"step": 1351
},
{
"epoch": 0.6151046405823476,
"grad_norm": 0.9251372605740943,
"learning_rate": 9.631743411281451e-06,
"loss": 0.1089,
"step": 1352
},
{
"epoch": 0.6155595996360328,
"grad_norm": 1.0354136522724409,
"learning_rate": 9.631204853231219e-06,
"loss": 0.1065,
"step": 1353
},
{
"epoch": 0.6160145586897179,
"grad_norm": 0.7577345531084587,
"learning_rate": 9.630665916739839e-06,
"loss": 0.083,
"step": 1354
},
{
"epoch": 0.6164695177434031,
"grad_norm": 0.6775679844485006,
"learning_rate": 9.630126601851353e-06,
"loss": 0.065,
"step": 1355
},
{
"epoch": 0.6169244767970883,
"grad_norm": 0.6510409015870585,
"learning_rate": 9.62958690860983e-06,
"loss": 0.0842,
"step": 1356
},
{
"epoch": 0.6173794358507735,
"grad_norm": 0.6541401291987898,
"learning_rate": 9.629046837059373e-06,
"loss": 0.0809,
"step": 1357
},
{
"epoch": 0.6178343949044586,
"grad_norm": 0.6773644747284383,
"learning_rate": 9.628506387244111e-06,
"loss": 0.08,
"step": 1358
},
{
"epoch": 0.6182893539581438,
"grad_norm": 0.7401243921784199,
"learning_rate": 9.627965559208212e-06,
"loss": 0.0632,
"step": 1359
},
{
"epoch": 0.618744313011829,
"grad_norm": 0.6255731586329286,
"learning_rate": 9.627424352995866e-06,
"loss": 0.0836,
"step": 1360
},
{
"epoch": 0.6191992720655141,
"grad_norm": 0.8684189032240879,
"learning_rate": 9.626882768651298e-06,
"loss": 0.0918,
"step": 1361
},
{
"epoch": 0.6196542311191993,
"grad_norm": 0.5565014005760545,
"learning_rate": 9.626340806218765e-06,
"loss": 0.0508,
"step": 1362
},
{
"epoch": 0.6201091901728845,
"grad_norm": 0.580066419485805,
"learning_rate": 9.625798465742555e-06,
"loss": 0.0691,
"step": 1363
},
{
"epoch": 0.6205641492265697,
"grad_norm": 0.5980127746625918,
"learning_rate": 9.625255747266984e-06,
"loss": 0.0674,
"step": 1364
},
{
"epoch": 0.6210191082802548,
"grad_norm": 0.8518146992949526,
"learning_rate": 9.6247126508364e-06,
"loss": 0.1112,
"step": 1365
},
{
"epoch": 0.62147406733394,
"grad_norm": 0.8485700961520207,
"learning_rate": 9.624169176495185e-06,
"loss": 0.0966,
"step": 1366
},
{
"epoch": 0.6219290263876252,
"grad_norm": 0.9962639418238284,
"learning_rate": 9.623625324287747e-06,
"loss": 0.1047,
"step": 1367
},
{
"epoch": 0.6223839854413102,
"grad_norm": 0.7706385402975253,
"learning_rate": 9.623081094258527e-06,
"loss": 0.1229,
"step": 1368
},
{
"epoch": 0.6228389444949954,
"grad_norm": 0.9185957443221413,
"learning_rate": 9.622536486451997e-06,
"loss": 0.0981,
"step": 1369
},
{
"epoch": 0.6232939035486806,
"grad_norm": 0.5737112203779396,
"learning_rate": 9.621991500912662e-06,
"loss": 0.0615,
"step": 1370
},
{
"epoch": 0.6237488626023658,
"grad_norm": 0.8225187377418599,
"learning_rate": 9.621446137685051e-06,
"loss": 0.1032,
"step": 1371
},
{
"epoch": 0.6242038216560509,
"grad_norm": 0.911993563924521,
"learning_rate": 9.620900396813734e-06,
"loss": 0.1052,
"step": 1372
},
{
"epoch": 0.6246587807097361,
"grad_norm": 1.1969877300226637,
"learning_rate": 9.620354278343306e-06,
"loss": 0.1323,
"step": 1373
},
{
"epoch": 0.6251137397634213,
"grad_norm": 0.49674299728731663,
"learning_rate": 9.61980778231839e-06,
"loss": 0.0469,
"step": 1374
},
{
"epoch": 0.6255686988171064,
"grad_norm": 0.9419790098064809,
"learning_rate": 9.619260908783645e-06,
"loss": 0.0829,
"step": 1375
},
{
"epoch": 0.6260236578707916,
"grad_norm": 0.8648992102518269,
"learning_rate": 9.61871365778376e-06,
"loss": 0.1227,
"step": 1376
},
{
"epoch": 0.6264786169244768,
"grad_norm": 0.6855921150752273,
"learning_rate": 9.618166029363452e-06,
"loss": 0.0893,
"step": 1377
},
{
"epoch": 0.626933575978162,
"grad_norm": 0.7460350385490577,
"learning_rate": 9.61761802356747e-06,
"loss": 0.1029,
"step": 1378
},
{
"epoch": 0.6273885350318471,
"grad_norm": 0.6238948896650269,
"learning_rate": 9.617069640440598e-06,
"loss": 0.0671,
"step": 1379
},
{
"epoch": 0.6278434940855323,
"grad_norm": 0.8484782740935036,
"learning_rate": 9.616520880027645e-06,
"loss": 0.1094,
"step": 1380
},
{
"epoch": 0.6282984531392175,
"grad_norm": 0.4929008515621752,
"learning_rate": 9.615971742373453e-06,
"loss": 0.0621,
"step": 1381
},
{
"epoch": 0.6287534121929026,
"grad_norm": 0.8230508842215047,
"learning_rate": 9.615422227522897e-06,
"loss": 0.0873,
"step": 1382
},
{
"epoch": 0.6292083712465878,
"grad_norm": 0.8269677617343545,
"learning_rate": 9.614872335520879e-06,
"loss": 0.0996,
"step": 1383
},
{
"epoch": 0.629663330300273,
"grad_norm": 0.7039938726965704,
"learning_rate": 9.614322066412335e-06,
"loss": 0.084,
"step": 1384
},
{
"epoch": 0.6301182893539582,
"grad_norm": 0.7376546247757936,
"learning_rate": 9.613771420242229e-06,
"loss": 0.0857,
"step": 1385
},
{
"epoch": 0.6305732484076433,
"grad_norm": 0.6736142636267153,
"learning_rate": 9.613220397055558e-06,
"loss": 0.0732,
"step": 1386
},
{
"epoch": 0.6310282074613285,
"grad_norm": 0.7476942520500481,
"learning_rate": 9.612668996897351e-06,
"loss": 0.0713,
"step": 1387
},
{
"epoch": 0.6314831665150137,
"grad_norm": 0.7359465201312233,
"learning_rate": 9.612117219812662e-06,
"loss": 0.0847,
"step": 1388
},
{
"epoch": 0.6319381255686988,
"grad_norm": 0.9663363466846744,
"learning_rate": 9.611565065846583e-06,
"loss": 0.1015,
"step": 1389
},
{
"epoch": 0.632393084622384,
"grad_norm": 0.7893446645403931,
"learning_rate": 9.611012535044232e-06,
"loss": 0.0983,
"step": 1390
},
{
"epoch": 0.6328480436760692,
"grad_norm": 1.024989133088754,
"learning_rate": 9.61045962745076e-06,
"loss": 0.1102,
"step": 1391
},
{
"epoch": 0.6333030027297544,
"grad_norm": 0.4979683651622851,
"learning_rate": 9.609906343111348e-06,
"loss": 0.0586,
"step": 1392
},
{
"epoch": 0.6337579617834395,
"grad_norm": 1.1009002383858189,
"learning_rate": 9.609352682071209e-06,
"loss": 0.0963,
"step": 1393
},
{
"epoch": 0.6342129208371247,
"grad_norm": 1.0522149389130615,
"learning_rate": 9.608798644375583e-06,
"loss": 0.1189,
"step": 1394
},
{
"epoch": 0.6346678798908099,
"grad_norm": 0.9812979427333788,
"learning_rate": 9.608244230069745e-06,
"loss": 0.1216,
"step": 1395
},
{
"epoch": 0.635122838944495,
"grad_norm": 0.7352050689297358,
"learning_rate": 9.607689439199e-06,
"loss": 0.0875,
"step": 1396
},
{
"epoch": 0.6355777979981801,
"grad_norm": 0.8346962373874338,
"learning_rate": 9.60713427180868e-06,
"loss": 0.0872,
"step": 1397
},
{
"epoch": 0.6360327570518653,
"grad_norm": 0.9100484302304894,
"learning_rate": 9.606578727944156e-06,
"loss": 0.1014,
"step": 1398
},
{
"epoch": 0.6364877161055505,
"grad_norm": 0.6397054531308819,
"learning_rate": 9.606022807650819e-06,
"loss": 0.0661,
"step": 1399
},
{
"epoch": 0.6369426751592356,
"grad_norm": 0.7013671405977515,
"learning_rate": 9.6054665109741e-06,
"loss": 0.0788,
"step": 1400
},
{
"epoch": 0.6373976342129208,
"grad_norm": 0.7177935827049716,
"learning_rate": 9.604909837959456e-06,
"loss": 0.0739,
"step": 1401
},
{
"epoch": 0.637852593266606,
"grad_norm": 1.0034339624615456,
"learning_rate": 9.604352788652375e-06,
"loss": 0.125,
"step": 1402
},
{
"epoch": 0.6383075523202911,
"grad_norm": 0.7908500695821505,
"learning_rate": 9.603795363098377e-06,
"loss": 0.0626,
"step": 1403
},
{
"epoch": 0.6387625113739763,
"grad_norm": 0.7396845097003291,
"learning_rate": 9.603237561343013e-06,
"loss": 0.0845,
"step": 1404
},
{
"epoch": 0.6392174704276615,
"grad_norm": 0.6132031146325181,
"learning_rate": 9.602679383431864e-06,
"loss": 0.0832,
"step": 1405
},
{
"epoch": 0.6396724294813467,
"grad_norm": 0.5848815265706712,
"learning_rate": 9.602120829410539e-06,
"loss": 0.0609,
"step": 1406
},
{
"epoch": 0.6401273885350318,
"grad_norm": 1.1396916096380878,
"learning_rate": 9.601561899324685e-06,
"loss": 0.089,
"step": 1407
},
{
"epoch": 0.640582347588717,
"grad_norm": 0.6243784477376835,
"learning_rate": 9.601002593219972e-06,
"loss": 0.0629,
"step": 1408
},
{
"epoch": 0.6410373066424022,
"grad_norm": 0.7693306930944409,
"learning_rate": 9.600442911142107e-06,
"loss": 0.0975,
"step": 1409
},
{
"epoch": 0.6414922656960873,
"grad_norm": 0.5824222441008058,
"learning_rate": 9.599882853136821e-06,
"loss": 0.0668,
"step": 1410
},
{
"epoch": 0.6419472247497725,
"grad_norm": 0.7486427214965261,
"learning_rate": 9.59932241924988e-06,
"loss": 0.0885,
"step": 1411
},
{
"epoch": 0.6424021838034577,
"grad_norm": 0.7403442425812181,
"learning_rate": 9.598761609527084e-06,
"loss": 0.0764,
"step": 1412
},
{
"epoch": 0.6428571428571429,
"grad_norm": 0.8444168000337251,
"learning_rate": 9.598200424014255e-06,
"loss": 0.0901,
"step": 1413
},
{
"epoch": 0.643312101910828,
"grad_norm": 0.6214870203253012,
"learning_rate": 9.597638862757255e-06,
"loss": 0.0641,
"step": 1414
},
{
"epoch": 0.6437670609645132,
"grad_norm": 0.45639812216740483,
"learning_rate": 9.597076925801967e-06,
"loss": 0.0525,
"step": 1415
},
{
"epoch": 0.6442220200181984,
"grad_norm": 0.5879645013041995,
"learning_rate": 9.596514613194313e-06,
"loss": 0.0664,
"step": 1416
},
{
"epoch": 0.6446769790718835,
"grad_norm": 0.723485890557837,
"learning_rate": 9.595951924980245e-06,
"loss": 0.0878,
"step": 1417
},
{
"epoch": 0.6451319381255687,
"grad_norm": 0.49190939142236517,
"learning_rate": 9.595388861205738e-06,
"loss": 0.0446,
"step": 1418
},
{
"epoch": 0.6455868971792539,
"grad_norm": 0.8244975390610266,
"learning_rate": 9.59482542191681e-06,
"loss": 0.0927,
"step": 1419
},
{
"epoch": 0.6460418562329391,
"grad_norm": 0.8365340393723969,
"learning_rate": 9.594261607159494e-06,
"loss": 0.0944,
"step": 1420
},
{
"epoch": 0.6464968152866242,
"grad_norm": 0.9246231982112141,
"learning_rate": 9.59369741697987e-06,
"loss": 0.1132,
"step": 1421
},
{
"epoch": 0.6469517743403094,
"grad_norm": 0.7576903487594321,
"learning_rate": 9.593132851424036e-06,
"loss": 0.0968,
"step": 1422
},
{
"epoch": 0.6474067333939946,
"grad_norm": 0.7385455319846311,
"learning_rate": 9.59256791053813e-06,
"loss": 0.1045,
"step": 1423
},
{
"epoch": 0.6478616924476797,
"grad_norm": 0.8466333605064674,
"learning_rate": 9.592002594368312e-06,
"loss": 0.1058,
"step": 1424
},
{
"epoch": 0.6483166515013649,
"grad_norm": 0.9463191649116842,
"learning_rate": 9.59143690296078e-06,
"loss": 0.1179,
"step": 1425
},
{
"epoch": 0.6487716105550501,
"grad_norm": 0.49506567565602905,
"learning_rate": 9.590870836361758e-06,
"loss": 0.0679,
"step": 1426
},
{
"epoch": 0.6492265696087353,
"grad_norm": 0.9070193484568203,
"learning_rate": 9.590304394617506e-06,
"loss": 0.0889,
"step": 1427
},
{
"epoch": 0.6496815286624203,
"grad_norm": 0.4746970963167155,
"learning_rate": 9.589737577774308e-06,
"loss": 0.0474,
"step": 1428
},
{
"epoch": 0.6501364877161055,
"grad_norm": 0.7625565873276676,
"learning_rate": 9.58917038587848e-06,
"loss": 0.1052,
"step": 1429
},
{
"epoch": 0.6505914467697907,
"grad_norm": 0.5544350713091404,
"learning_rate": 9.588602818976374e-06,
"loss": 0.0602,
"step": 1430
},
{
"epoch": 0.6510464058234758,
"grad_norm": 0.8043877114109435,
"learning_rate": 9.588034877114367e-06,
"loss": 0.0714,
"step": 1431
},
{
"epoch": 0.651501364877161,
"grad_norm": 0.6177719048805246,
"learning_rate": 9.58746656033887e-06,
"loss": 0.0822,
"step": 1432
},
{
"epoch": 0.6519563239308462,
"grad_norm": 1.070732220715245,
"learning_rate": 9.586897868696323e-06,
"loss": 0.1203,
"step": 1433
},
{
"epoch": 0.6524112829845314,
"grad_norm": 1.183590915899486,
"learning_rate": 9.586328802233195e-06,
"loss": 0.0935,
"step": 1434
},
{
"epoch": 0.6528662420382165,
"grad_norm": 0.581772493938091,
"learning_rate": 9.58575936099599e-06,
"loss": 0.0682,
"step": 1435
},
{
"epoch": 0.6533212010919017,
"grad_norm": 0.7377901301818582,
"learning_rate": 9.58518954503124e-06,
"loss": 0.0824,
"step": 1436
},
{
"epoch": 0.6537761601455869,
"grad_norm": 0.9292214040800371,
"learning_rate": 9.584619354385505e-06,
"loss": 0.1138,
"step": 1437
},
{
"epoch": 0.654231119199272,
"grad_norm": 0.7573270642921373,
"learning_rate": 9.58404878910538e-06,
"loss": 0.074,
"step": 1438
},
{
"epoch": 0.6546860782529572,
"grad_norm": 0.5838864743945036,
"learning_rate": 9.58347784923749e-06,
"loss": 0.067,
"step": 1439
},
{
"epoch": 0.6551410373066424,
"grad_norm": 0.6730458126896756,
"learning_rate": 9.58290653482849e-06,
"loss": 0.0632,
"step": 1440
},
{
"epoch": 0.6555959963603276,
"grad_norm": 0.7216545389315259,
"learning_rate": 9.582334845925063e-06,
"loss": 0.0757,
"step": 1441
},
{
"epoch": 0.6560509554140127,
"grad_norm": 0.929819001740202,
"learning_rate": 9.581762782573926e-06,
"loss": 0.0973,
"step": 1442
},
{
"epoch": 0.6565059144676979,
"grad_norm": 0.7680577896195074,
"learning_rate": 9.581190344821827e-06,
"loss": 0.086,
"step": 1443
},
{
"epoch": 0.6569608735213831,
"grad_norm": 0.8746535076926352,
"learning_rate": 9.58061753271554e-06,
"loss": 0.1085,
"step": 1444
},
{
"epoch": 0.6574158325750682,
"grad_norm": 0.6364512825611769,
"learning_rate": 9.580044346301875e-06,
"loss": 0.0764,
"step": 1445
},
{
"epoch": 0.6578707916287534,
"grad_norm": 0.47118649986170347,
"learning_rate": 9.57947078562767e-06,
"loss": 0.0506,
"step": 1446
},
{
"epoch": 0.6583257506824386,
"grad_norm": 0.6564703457147261,
"learning_rate": 9.578896850739792e-06,
"loss": 0.0702,
"step": 1447
},
{
"epoch": 0.6587807097361238,
"grad_norm": 0.6786314185300042,
"learning_rate": 9.578322541685142e-06,
"loss": 0.0778,
"step": 1448
},
{
"epoch": 0.6592356687898089,
"grad_norm": 0.7866249519519628,
"learning_rate": 9.577747858510647e-06,
"loss": 0.1066,
"step": 1449
},
{
"epoch": 0.6596906278434941,
"grad_norm": 0.8352652198110325,
"learning_rate": 9.577172801263272e-06,
"loss": 0.0973,
"step": 1450
},
{
"epoch": 0.6601455868971793,
"grad_norm": 0.6694090591857538,
"learning_rate": 9.576597369990006e-06,
"loss": 0.077,
"step": 1451
},
{
"epoch": 0.6606005459508644,
"grad_norm": 0.6613042389515336,
"learning_rate": 9.576021564737871e-06,
"loss": 0.0608,
"step": 1452
},
{
"epoch": 0.6610555050045496,
"grad_norm": 0.7515982683897205,
"learning_rate": 9.575445385553917e-06,
"loss": 0.1003,
"step": 1453
},
{
"epoch": 0.6615104640582348,
"grad_norm": 0.9769815693335377,
"learning_rate": 9.57486883248523e-06,
"loss": 0.0946,
"step": 1454
},
{
"epoch": 0.66196542311192,
"grad_norm": 1.1665424395125852,
"learning_rate": 9.574291905578922e-06,
"loss": 0.1317,
"step": 1455
},
{
"epoch": 0.6624203821656051,
"grad_norm": 0.6942177292436024,
"learning_rate": 9.573714604882138e-06,
"loss": 0.0615,
"step": 1456
},
{
"epoch": 0.6628753412192903,
"grad_norm": 0.9194225981756011,
"learning_rate": 9.57313693044205e-06,
"loss": 0.0975,
"step": 1457
},
{
"epoch": 0.6633303002729755,
"grad_norm": 0.7117926275391128,
"learning_rate": 9.572558882305863e-06,
"loss": 0.0847,
"step": 1458
},
{
"epoch": 0.6637852593266605,
"grad_norm": 0.9546376743105418,
"learning_rate": 9.571980460520815e-06,
"loss": 0.1196,
"step": 1459
},
{
"epoch": 0.6642402183803457,
"grad_norm": 0.8937437496424256,
"learning_rate": 9.57140166513417e-06,
"loss": 0.096,
"step": 1460
},
{
"epoch": 0.664695177434031,
"grad_norm": 0.5937947199850856,
"learning_rate": 9.570822496193225e-06,
"loss": 0.058,
"step": 1461
},
{
"epoch": 0.6651501364877161,
"grad_norm": 0.5756039867728808,
"learning_rate": 9.570242953745307e-06,
"loss": 0.082,
"step": 1462
},
{
"epoch": 0.6656050955414012,
"grad_norm": 0.7416722804778516,
"learning_rate": 9.569663037837776e-06,
"loss": 0.098,
"step": 1463
},
{
"epoch": 0.6660600545950864,
"grad_norm": 0.6377485683281849,
"learning_rate": 9.569082748518017e-06,
"loss": 0.0723,
"step": 1464
},
{
"epoch": 0.6665150136487716,
"grad_norm": 0.7884664768500067,
"learning_rate": 9.568502085833449e-06,
"loss": 0.0884,
"step": 1465
},
{
"epoch": 0.6669699727024567,
"grad_norm": 0.7723350087530905,
"learning_rate": 9.567921049831522e-06,
"loss": 0.0967,
"step": 1466
},
{
"epoch": 0.6674249317561419,
"grad_norm": 0.7260885892233983,
"learning_rate": 9.567339640559716e-06,
"loss": 0.0812,
"step": 1467
},
{
"epoch": 0.6678798908098271,
"grad_norm": 0.5596294621225263,
"learning_rate": 9.566757858065538e-06,
"loss": 0.0631,
"step": 1468
},
{
"epoch": 0.6683348498635123,
"grad_norm": 0.7286352648100037,
"learning_rate": 9.566175702396534e-06,
"loss": 0.0823,
"step": 1469
},
{
"epoch": 0.6687898089171974,
"grad_norm": 0.9301493673689373,
"learning_rate": 9.565593173600271e-06,
"loss": 0.0987,
"step": 1470
},
{
"epoch": 0.6692447679708826,
"grad_norm": 0.6817718703338496,
"learning_rate": 9.565010271724353e-06,
"loss": 0.0755,
"step": 1471
},
{
"epoch": 0.6696997270245678,
"grad_norm": 0.7526239018301766,
"learning_rate": 9.56442699681641e-06,
"loss": 0.0876,
"step": 1472
},
{
"epoch": 0.6701546860782529,
"grad_norm": 0.7279647211742274,
"learning_rate": 9.563843348924105e-06,
"loss": 0.0681,
"step": 1473
},
{
"epoch": 0.6706096451319381,
"grad_norm": 0.8487044021854026,
"learning_rate": 9.563259328095132e-06,
"loss": 0.0903,
"step": 1474
},
{
"epoch": 0.6710646041856233,
"grad_norm": 0.609495225783116,
"learning_rate": 9.562674934377214e-06,
"loss": 0.0801,
"step": 1475
},
{
"epoch": 0.6715195632393085,
"grad_norm": 0.7638645194963899,
"learning_rate": 9.562090167818107e-06,
"loss": 0.0874,
"step": 1476
},
{
"epoch": 0.6719745222929936,
"grad_norm": 1.4076317151154771,
"learning_rate": 9.561505028465593e-06,
"loss": 0.0874,
"step": 1477
},
{
"epoch": 0.6724294813466788,
"grad_norm": 0.6311161675673277,
"learning_rate": 9.560919516367486e-06,
"loss": 0.0738,
"step": 1478
},
{
"epoch": 0.672884440400364,
"grad_norm": 0.638266808298586,
"learning_rate": 9.560333631571634e-06,
"loss": 0.0682,
"step": 1479
},
{
"epoch": 0.6733393994540491,
"grad_norm": 0.7097356519617585,
"learning_rate": 9.559747374125911e-06,
"loss": 0.0987,
"step": 1480
},
{
"epoch": 0.6737943585077343,
"grad_norm": 0.6502346745698145,
"learning_rate": 9.559160744078226e-06,
"loss": 0.0644,
"step": 1481
},
{
"epoch": 0.6742493175614195,
"grad_norm": 1.056681303492363,
"learning_rate": 9.558573741476513e-06,
"loss": 0.0939,
"step": 1482
},
{
"epoch": 0.6747042766151047,
"grad_norm": 0.7992268675141662,
"learning_rate": 9.557986366368742e-06,
"loss": 0.0733,
"step": 1483
},
{
"epoch": 0.6751592356687898,
"grad_norm": 1.0832399406974047,
"learning_rate": 9.557398618802907e-06,
"loss": 0.1123,
"step": 1484
},
{
"epoch": 0.675614194722475,
"grad_norm": 0.6543008513198456,
"learning_rate": 9.556810498827039e-06,
"loss": 0.0794,
"step": 1485
},
{
"epoch": 0.6760691537761602,
"grad_norm": 0.6306597614421026,
"learning_rate": 9.556222006489193e-06,
"loss": 0.0786,
"step": 1486
},
{
"epoch": 0.6765241128298453,
"grad_norm": 0.5618899284499352,
"learning_rate": 9.555633141837462e-06,
"loss": 0.0618,
"step": 1487
},
{
"epoch": 0.6769790718835305,
"grad_norm": 0.6434016854657288,
"learning_rate": 9.555043904919963e-06,
"loss": 0.0796,
"step": 1488
},
{
"epoch": 0.6774340309372157,
"grad_norm": 0.7512094182824542,
"learning_rate": 9.554454295784848e-06,
"loss": 0.0745,
"step": 1489
},
{
"epoch": 0.6778889899909009,
"grad_norm": 0.662429978970196,
"learning_rate": 9.553864314480294e-06,
"loss": 0.0788,
"step": 1490
},
{
"epoch": 0.678343949044586,
"grad_norm": 0.7125824073483379,
"learning_rate": 9.553273961054514e-06,
"loss": 0.072,
"step": 1491
},
{
"epoch": 0.6787989080982711,
"grad_norm": 0.8599367957772613,
"learning_rate": 9.552683235555749e-06,
"loss": 0.0765,
"step": 1492
},
{
"epoch": 0.6792538671519563,
"grad_norm": 0.7900843446637873,
"learning_rate": 9.55209213803227e-06,
"loss": 0.0861,
"step": 1493
},
{
"epoch": 0.6797088262056415,
"grad_norm": 0.9492542185178791,
"learning_rate": 9.551500668532377e-06,
"loss": 0.1036,
"step": 1494
},
{
"epoch": 0.6801637852593266,
"grad_norm": 0.5324340095596853,
"learning_rate": 9.550908827104404e-06,
"loss": 0.0509,
"step": 1495
},
{
"epoch": 0.6806187443130118,
"grad_norm": 1.4654919772375794,
"learning_rate": 9.550316613796716e-06,
"loss": 0.0891,
"step": 1496
},
{
"epoch": 0.681073703366697,
"grad_norm": 0.6964909028346599,
"learning_rate": 9.549724028657698e-06,
"loss": 0.0814,
"step": 1497
},
{
"epoch": 0.6815286624203821,
"grad_norm": 0.7118346157191014,
"learning_rate": 9.549131071735784e-06,
"loss": 0.0711,
"step": 1498
},
{
"epoch": 0.6819836214740673,
"grad_norm": 0.9814989838911676,
"learning_rate": 9.54853774307942e-06,
"loss": 0.0981,
"step": 1499
},
{
"epoch": 0.6824385805277525,
"grad_norm": 0.8030617514029292,
"learning_rate": 9.547944042737092e-06,
"loss": 0.0944,
"step": 1500
},
{
"epoch": 0.6828935395814377,
"grad_norm": 0.9091821467413523,
"learning_rate": 9.547349970757317e-06,
"loss": 0.1419,
"step": 1501
},
{
"epoch": 0.6833484986351228,
"grad_norm": 0.7604842345576438,
"learning_rate": 9.546755527188638e-06,
"loss": 0.0616,
"step": 1502
},
{
"epoch": 0.683803457688808,
"grad_norm": 0.7795635296832277,
"learning_rate": 9.546160712079629e-06,
"loss": 0.0819,
"step": 1503
},
{
"epoch": 0.6842584167424932,
"grad_norm": 0.6155010796235886,
"learning_rate": 9.545565525478896e-06,
"loss": 0.0737,
"step": 1504
},
{
"epoch": 0.6847133757961783,
"grad_norm": 0.6981564617213015,
"learning_rate": 9.544969967435079e-06,
"loss": 0.0786,
"step": 1505
},
{
"epoch": 0.6851683348498635,
"grad_norm": 0.8590705218017948,
"learning_rate": 9.54437403799684e-06,
"loss": 0.0835,
"step": 1506
},
{
"epoch": 0.6856232939035487,
"grad_norm": 0.8783591706447448,
"learning_rate": 9.543777737212876e-06,
"loss": 0.118,
"step": 1507
},
{
"epoch": 0.6860782529572339,
"grad_norm": 0.5312480753344904,
"learning_rate": 9.543181065131914e-06,
"loss": 0.0535,
"step": 1508
},
{
"epoch": 0.686533212010919,
"grad_norm": 0.6911478055364548,
"learning_rate": 9.542584021802715e-06,
"loss": 0.0651,
"step": 1509
},
{
"epoch": 0.6869881710646042,
"grad_norm": 0.910176403224045,
"learning_rate": 9.54198660727406e-06,
"loss": 0.0916,
"step": 1510
},
{
"epoch": 0.6874431301182894,
"grad_norm": 0.5369469100452242,
"learning_rate": 9.541388821594774e-06,
"loss": 0.064,
"step": 1511
},
{
"epoch": 0.6878980891719745,
"grad_norm": 0.7242695685667516,
"learning_rate": 9.540790664813702e-06,
"loss": 0.0725,
"step": 1512
},
{
"epoch": 0.6883530482256597,
"grad_norm": 0.7527422721071317,
"learning_rate": 9.540192136979722e-06,
"loss": 0.0863,
"step": 1513
},
{
"epoch": 0.6888080072793449,
"grad_norm": 0.5409793571909967,
"learning_rate": 9.539593238141745e-06,
"loss": 0.0678,
"step": 1514
},
{
"epoch": 0.6892629663330301,
"grad_norm": 0.5059270742296627,
"learning_rate": 9.538993968348706e-06,
"loss": 0.0613,
"step": 1515
},
{
"epoch": 0.6897179253867152,
"grad_norm": 0.8092866682697022,
"learning_rate": 9.538394327649581e-06,
"loss": 0.0816,
"step": 1516
},
{
"epoch": 0.6901728844404004,
"grad_norm": 0.7416822411067572,
"learning_rate": 9.537794316093366e-06,
"loss": 0.0736,
"step": 1517
},
{
"epoch": 0.6906278434940856,
"grad_norm": 0.6013123530792879,
"learning_rate": 9.537193933729092e-06,
"loss": 0.0637,
"step": 1518
},
{
"epoch": 0.6910828025477707,
"grad_norm": 1.0953662823641266,
"learning_rate": 9.53659318060582e-06,
"loss": 0.1381,
"step": 1519
},
{
"epoch": 0.6915377616014559,
"grad_norm": 0.7906081758139587,
"learning_rate": 9.535992056772639e-06,
"loss": 0.088,
"step": 1520
},
{
"epoch": 0.6919927206551411,
"grad_norm": 0.9984370937403453,
"learning_rate": 9.535390562278673e-06,
"loss": 0.086,
"step": 1521
},
{
"epoch": 0.6924476797088263,
"grad_norm": 0.7438661675719108,
"learning_rate": 9.53478869717307e-06,
"loss": 0.0771,
"step": 1522
},
{
"epoch": 0.6929026387625113,
"grad_norm": 0.85189844123529,
"learning_rate": 9.534186461505015e-06,
"loss": 0.1109,
"step": 1523
},
{
"epoch": 0.6933575978161965,
"grad_norm": 0.7215256903381998,
"learning_rate": 9.533583855323717e-06,
"loss": 0.0947,
"step": 1524
},
{
"epoch": 0.6938125568698817,
"grad_norm": 0.8936614524747819,
"learning_rate": 9.532980878678422e-06,
"loss": 0.0731,
"step": 1525
},
{
"epoch": 0.6942675159235668,
"grad_norm": 0.7734700292932609,
"learning_rate": 9.5323775316184e-06,
"loss": 0.0844,
"step": 1526
},
{
"epoch": 0.694722474977252,
"grad_norm": 0.7521845435610183,
"learning_rate": 9.531773814192953e-06,
"loss": 0.0878,
"step": 1527
},
{
"epoch": 0.6951774340309372,
"grad_norm": 0.890089227377408,
"learning_rate": 9.531169726451417e-06,
"loss": 0.1128,
"step": 1528
},
{
"epoch": 0.6956323930846224,
"grad_norm": 0.7682866565773229,
"learning_rate": 9.530565268443153e-06,
"loss": 0.0956,
"step": 1529
},
{
"epoch": 0.6960873521383075,
"grad_norm": 0.9617852359873308,
"learning_rate": 9.529960440217554e-06,
"loss": 0.1088,
"step": 1530
},
{
"epoch": 0.6965423111919927,
"grad_norm": 0.9775947633570551,
"learning_rate": 9.529355241824045e-06,
"loss": 0.107,
"step": 1531
},
{
"epoch": 0.6969972702456779,
"grad_norm": 0.6007455012792351,
"learning_rate": 9.528749673312082e-06,
"loss": 0.0743,
"step": 1532
},
{
"epoch": 0.697452229299363,
"grad_norm": 0.5419764603212612,
"learning_rate": 9.528143734731143e-06,
"loss": 0.0822,
"step": 1533
},
{
"epoch": 0.6979071883530482,
"grad_norm": 0.8185575482665152,
"learning_rate": 9.52753742613075e-06,
"loss": 0.0832,
"step": 1534
},
{
"epoch": 0.6983621474067334,
"grad_norm": 0.9643638751029543,
"learning_rate": 9.526930747560446e-06,
"loss": 0.1026,
"step": 1535
},
{
"epoch": 0.6988171064604186,
"grad_norm": 0.8502651132594353,
"learning_rate": 9.526323699069803e-06,
"loss": 0.0902,
"step": 1536
},
{
"epoch": 0.6992720655141037,
"grad_norm": 0.5376181329235236,
"learning_rate": 9.525716280708428e-06,
"loss": 0.068,
"step": 1537
},
{
"epoch": 0.6997270245677889,
"grad_norm": 0.7166675033334694,
"learning_rate": 9.525108492525957e-06,
"loss": 0.0752,
"step": 1538
},
{
"epoch": 0.7001819836214741,
"grad_norm": 0.43432195935007917,
"learning_rate": 9.524500334572054e-06,
"loss": 0.0417,
"step": 1539
},
{
"epoch": 0.7006369426751592,
"grad_norm": 0.8369054167821826,
"learning_rate": 9.523891806896417e-06,
"loss": 0.1098,
"step": 1540
},
{
"epoch": 0.7010919017288444,
"grad_norm": 0.49781336551041033,
"learning_rate": 9.523282909548773e-06,
"loss": 0.0618,
"step": 1541
},
{
"epoch": 0.7015468607825296,
"grad_norm": 0.9187882410427298,
"learning_rate": 9.522673642578873e-06,
"loss": 0.1247,
"step": 1542
},
{
"epoch": 0.7020018198362148,
"grad_norm": 0.5007920591193696,
"learning_rate": 9.522064006036509e-06,
"loss": 0.0601,
"step": 1543
},
{
"epoch": 0.7024567788898999,
"grad_norm": 0.582945252861272,
"learning_rate": 9.521453999971497e-06,
"loss": 0.0585,
"step": 1544
},
{
"epoch": 0.7029117379435851,
"grad_norm": 0.5749885951853907,
"learning_rate": 9.520843624433681e-06,
"loss": 0.0664,
"step": 1545
},
{
"epoch": 0.7033666969972703,
"grad_norm": 0.9724598324631707,
"learning_rate": 9.520232879472942e-06,
"loss": 0.1199,
"step": 1546
},
{
"epoch": 0.7038216560509554,
"grad_norm": 1.0592052108390146,
"learning_rate": 9.519621765139181e-06,
"loss": 0.1278,
"step": 1547
},
{
"epoch": 0.7042766151046406,
"grad_norm": 0.42374402440173636,
"learning_rate": 9.519010281482344e-06,
"loss": 0.0446,
"step": 1548
},
{
"epoch": 0.7047315741583258,
"grad_norm": 1.102301602930716,
"learning_rate": 9.518398428552393e-06,
"loss": 0.1226,
"step": 1549
},
{
"epoch": 0.705186533212011,
"grad_norm": 0.6842519583257138,
"learning_rate": 9.51778620639933e-06,
"loss": 0.0905,
"step": 1550
},
{
"epoch": 0.7056414922656961,
"grad_norm": 0.7530573117253311,
"learning_rate": 9.517173615073177e-06,
"loss": 0.0766,
"step": 1551
},
{
"epoch": 0.7060964513193813,
"grad_norm": 0.43285639961604566,
"learning_rate": 9.516560654623996e-06,
"loss": 0.0475,
"step": 1552
},
{
"epoch": 0.7065514103730665,
"grad_norm": 0.9094561094681402,
"learning_rate": 9.515947325101875e-06,
"loss": 0.0896,
"step": 1553
},
{
"epoch": 0.7070063694267515,
"grad_norm": 0.6097385256206468,
"learning_rate": 9.515333626556933e-06,
"loss": 0.0653,
"step": 1554
},
{
"epoch": 0.7074613284804367,
"grad_norm": 0.7304393114645329,
"learning_rate": 9.514719559039318e-06,
"loss": 0.0896,
"step": 1555
},
{
"epoch": 0.707916287534122,
"grad_norm": 0.8799769831067698,
"learning_rate": 9.514105122599208e-06,
"loss": 0.1176,
"step": 1556
},
{
"epoch": 0.7083712465878071,
"grad_norm": 1.0962688093811397,
"learning_rate": 9.513490317286815e-06,
"loss": 0.1174,
"step": 1557
},
{
"epoch": 0.7088262056414922,
"grad_norm": 0.8022559500547495,
"learning_rate": 9.512875143152373e-06,
"loss": 0.0969,
"step": 1558
},
{
"epoch": 0.7092811646951774,
"grad_norm": 0.37133918747574174,
"learning_rate": 9.512259600246156e-06,
"loss": 0.031,
"step": 1559
},
{
"epoch": 0.7097361237488626,
"grad_norm": 0.6214125216955318,
"learning_rate": 9.511643688618463e-06,
"loss": 0.0943,
"step": 1560
},
{
"epoch": 0.7101910828025477,
"grad_norm": 0.7097270108607417,
"learning_rate": 9.51102740831962e-06,
"loss": 0.0847,
"step": 1561
},
{
"epoch": 0.7106460418562329,
"grad_norm": 0.8290870913254417,
"learning_rate": 9.510410759399991e-06,
"loss": 0.0867,
"step": 1562
},
{
"epoch": 0.7111010009099181,
"grad_norm": 0.7141101307254801,
"learning_rate": 9.50979374190996e-06,
"loss": 0.0838,
"step": 1563
},
{
"epoch": 0.7115559599636033,
"grad_norm": 0.8532705780985276,
"learning_rate": 9.509176355899954e-06,
"loss": 0.09,
"step": 1564
},
{
"epoch": 0.7120109190172884,
"grad_norm": 0.6858037908830302,
"learning_rate": 9.508558601420417e-06,
"loss": 0.0637,
"step": 1565
},
{
"epoch": 0.7124658780709736,
"grad_norm": 0.7489578082911201,
"learning_rate": 9.507940478521833e-06,
"loss": 0.1059,
"step": 1566
},
{
"epoch": 0.7129208371246588,
"grad_norm": 0.5241685648277268,
"learning_rate": 9.507321987254712e-06,
"loss": 0.0474,
"step": 1567
},
{
"epoch": 0.7133757961783439,
"grad_norm": 0.9862924439076355,
"learning_rate": 9.50670312766959e-06,
"loss": 0.1047,
"step": 1568
},
{
"epoch": 0.7138307552320291,
"grad_norm": 0.8286292773017996,
"learning_rate": 9.506083899817043e-06,
"loss": 0.0808,
"step": 1569
},
{
"epoch": 0.7142857142857143,
"grad_norm": 0.8166629192761119,
"learning_rate": 9.505464303747667e-06,
"loss": 0.079,
"step": 1570
},
{
"epoch": 0.7147406733393995,
"grad_norm": 0.6651663578468047,
"learning_rate": 9.504844339512096e-06,
"loss": 0.0879,
"step": 1571
},
{
"epoch": 0.7151956323930846,
"grad_norm": 0.5230779536546156,
"learning_rate": 9.50422400716099e-06,
"loss": 0.0585,
"step": 1572
},
{
"epoch": 0.7156505914467698,
"grad_norm": 0.6543543054934573,
"learning_rate": 9.503603306745036e-06,
"loss": 0.0564,
"step": 1573
},
{
"epoch": 0.716105550500455,
"grad_norm": 0.7812592861176204,
"learning_rate": 9.502982238314962e-06,
"loss": 0.0874,
"step": 1574
},
{
"epoch": 0.7165605095541401,
"grad_norm": 0.5040232473993467,
"learning_rate": 9.502360801921512e-06,
"loss": 0.0532,
"step": 1575
},
{
"epoch": 0.7170154686078253,
"grad_norm": 0.8631279038726943,
"learning_rate": 9.501738997615471e-06,
"loss": 0.1045,
"step": 1576
},
{
"epoch": 0.7174704276615105,
"grad_norm": 0.7716014465645913,
"learning_rate": 9.501116825447648e-06,
"loss": 0.068,
"step": 1577
},
{
"epoch": 0.7179253867151957,
"grad_norm": 0.5327432187838176,
"learning_rate": 9.500494285468884e-06,
"loss": 0.053,
"step": 1578
},
{
"epoch": 0.7183803457688808,
"grad_norm": 0.8209926537375553,
"learning_rate": 9.499871377730053e-06,
"loss": 0.1164,
"step": 1579
},
{
"epoch": 0.718835304822566,
"grad_norm": 0.5454374508074649,
"learning_rate": 9.499248102282052e-06,
"loss": 0.0579,
"step": 1580
},
{
"epoch": 0.7192902638762512,
"grad_norm": 0.4944315103743207,
"learning_rate": 9.498624459175815e-06,
"loss": 0.0542,
"step": 1581
},
{
"epoch": 0.7197452229299363,
"grad_norm": 0.8372013648456964,
"learning_rate": 9.498000448462305e-06,
"loss": 0.0948,
"step": 1582
},
{
"epoch": 0.7202001819836215,
"grad_norm": 0.6792072434969908,
"learning_rate": 9.49737607019251e-06,
"loss": 0.0683,
"step": 1583
},
{
"epoch": 0.7206551410373067,
"grad_norm": 0.6679228302277659,
"learning_rate": 9.496751324417452e-06,
"loss": 0.0526,
"step": 1584
},
{
"epoch": 0.7211101000909919,
"grad_norm": 0.830168268257237,
"learning_rate": 9.496126211188184e-06,
"loss": 0.1049,
"step": 1585
},
{
"epoch": 0.721565059144677,
"grad_norm": 0.7614112606151382,
"learning_rate": 9.495500730555784e-06,
"loss": 0.0966,
"step": 1586
},
{
"epoch": 0.7220200181983621,
"grad_norm": 0.7574732623314945,
"learning_rate": 9.494874882571368e-06,
"loss": 0.0648,
"step": 1587
},
{
"epoch": 0.7224749772520473,
"grad_norm": 0.7541681951930181,
"learning_rate": 9.494248667286075e-06,
"loss": 0.0905,
"step": 1588
},
{
"epoch": 0.7229299363057324,
"grad_norm": 0.776748715422375,
"learning_rate": 9.493622084751076e-06,
"loss": 0.0841,
"step": 1589
},
{
"epoch": 0.7233848953594176,
"grad_norm": 0.6440945504942991,
"learning_rate": 9.492995135017574e-06,
"loss": 0.0779,
"step": 1590
},
{
"epoch": 0.7238398544131028,
"grad_norm": 0.658893968607762,
"learning_rate": 9.4923678181368e-06,
"loss": 0.0862,
"step": 1591
},
{
"epoch": 0.724294813466788,
"grad_norm": 0.764304310956247,
"learning_rate": 9.491740134160014e-06,
"loss": 0.0834,
"step": 1592
},
{
"epoch": 0.7247497725204731,
"grad_norm": 1.246667162089055,
"learning_rate": 9.491112083138509e-06,
"loss": 0.141,
"step": 1593
},
{
"epoch": 0.7252047315741583,
"grad_norm": 0.7827390484343668,
"learning_rate": 9.490483665123606e-06,
"loss": 0.0687,
"step": 1594
},
{
"epoch": 0.7256596906278435,
"grad_norm": 0.6055248563993239,
"learning_rate": 9.489854880166658e-06,
"loss": 0.0716,
"step": 1595
},
{
"epoch": 0.7261146496815286,
"grad_norm": 0.7067865427149594,
"learning_rate": 9.489225728319044e-06,
"loss": 0.0756,
"step": 1596
},
{
"epoch": 0.7265696087352138,
"grad_norm": 0.85395818798431,
"learning_rate": 9.488596209632179e-06,
"loss": 0.1099,
"step": 1597
},
{
"epoch": 0.727024567788899,
"grad_norm": 0.6870669290352402,
"learning_rate": 9.4879663241575e-06,
"loss": 0.0703,
"step": 1598
},
{
"epoch": 0.7274795268425842,
"grad_norm": 1.2809048497988667,
"learning_rate": 9.48733607194648e-06,
"loss": 0.1663,
"step": 1599
},
{
"epoch": 0.7279344858962693,
"grad_norm": 0.7180890087653823,
"learning_rate": 9.486705453050622e-06,
"loss": 0.0738,
"step": 1600
},
{
"epoch": 0.7283894449499545,
"grad_norm": 0.5662460892211576,
"learning_rate": 9.486074467521456e-06,
"loss": 0.0627,
"step": 1601
},
{
"epoch": 0.7288444040036397,
"grad_norm": 0.7172800606287587,
"learning_rate": 9.485443115410541e-06,
"loss": 0.0715,
"step": 1602
},
{
"epoch": 0.7292993630573248,
"grad_norm": 0.6146064647413995,
"learning_rate": 9.484811396769475e-06,
"loss": 0.0828,
"step": 1603
},
{
"epoch": 0.72975432211101,
"grad_norm": 0.8606888467276742,
"learning_rate": 9.484179311649873e-06,
"loss": 0.0962,
"step": 1604
},
{
"epoch": 0.7302092811646952,
"grad_norm": 0.46814164753859155,
"learning_rate": 9.483546860103388e-06,
"loss": 0.0477,
"step": 1605
},
{
"epoch": 0.7306642402183804,
"grad_norm": 0.7370090010007736,
"learning_rate": 9.4829140421817e-06,
"loss": 0.081,
"step": 1606
},
{
"epoch": 0.7311191992720655,
"grad_norm": 1.0689466216112777,
"learning_rate": 9.482280857936522e-06,
"loss": 0.109,
"step": 1607
},
{
"epoch": 0.7315741583257507,
"grad_norm": 0.4147348220425697,
"learning_rate": 9.481647307419594e-06,
"loss": 0.0479,
"step": 1608
},
{
"epoch": 0.7320291173794359,
"grad_norm": 0.4998747516198886,
"learning_rate": 9.481013390682687e-06,
"loss": 0.0634,
"step": 1609
},
{
"epoch": 0.732484076433121,
"grad_norm": 0.8673371359679307,
"learning_rate": 9.480379107777601e-06,
"loss": 0.1108,
"step": 1610
},
{
"epoch": 0.7329390354868062,
"grad_norm": 0.6369274329058493,
"learning_rate": 9.47974445875617e-06,
"loss": 0.0698,
"step": 1611
},
{
"epoch": 0.7333939945404914,
"grad_norm": 0.6434647227835387,
"learning_rate": 9.47910944367025e-06,
"loss": 0.0618,
"step": 1612
},
{
"epoch": 0.7338489535941766,
"grad_norm": 0.8035955314379585,
"learning_rate": 9.478474062571735e-06,
"loss": 0.0997,
"step": 1613
},
{
"epoch": 0.7343039126478617,
"grad_norm": 0.7996949463502321,
"learning_rate": 9.477838315512544e-06,
"loss": 0.0873,
"step": 1614
},
{
"epoch": 0.7347588717015469,
"grad_norm": 0.6484970204244012,
"learning_rate": 9.477202202544626e-06,
"loss": 0.0925,
"step": 1615
},
{
"epoch": 0.7352138307552321,
"grad_norm": 0.6478821974846899,
"learning_rate": 9.476565723719966e-06,
"loss": 0.0693,
"step": 1616
},
{
"epoch": 0.7356687898089171,
"grad_norm": 0.6896940284490023,
"learning_rate": 9.475928879090568e-06,
"loss": 0.0763,
"step": 1617
},
{
"epoch": 0.7361237488626023,
"grad_norm": 0.6758264439259065,
"learning_rate": 9.475291668708476e-06,
"loss": 0.0717,
"step": 1618
},
{
"epoch": 0.7365787079162875,
"grad_norm": 0.6285383601705616,
"learning_rate": 9.474654092625758e-06,
"loss": 0.0561,
"step": 1619
},
{
"epoch": 0.7370336669699727,
"grad_norm": 0.7488998942485512,
"learning_rate": 9.474016150894518e-06,
"loss": 0.0765,
"step": 1620
},
{
"epoch": 0.7374886260236578,
"grad_norm": 0.7511340475878087,
"learning_rate": 9.47337784356688e-06,
"loss": 0.0865,
"step": 1621
},
{
"epoch": 0.737943585077343,
"grad_norm": 0.6908706816034008,
"learning_rate": 9.472739170695006e-06,
"loss": 0.0879,
"step": 1622
},
{
"epoch": 0.7383985441310282,
"grad_norm": 0.9159671053782389,
"learning_rate": 9.472100132331089e-06,
"loss": 0.0862,
"step": 1623
},
{
"epoch": 0.7388535031847133,
"grad_norm": 0.8367180794291794,
"learning_rate": 9.471460728527342e-06,
"loss": 0.0988,
"step": 1624
},
{
"epoch": 0.7393084622383985,
"grad_norm": 0.6396536181540736,
"learning_rate": 9.470820959336018e-06,
"loss": 0.0742,
"step": 1625
},
{
"epoch": 0.7397634212920837,
"grad_norm": 0.7212059639642758,
"learning_rate": 9.470180824809394e-06,
"loss": 0.0887,
"step": 1626
},
{
"epoch": 0.7402183803457689,
"grad_norm": 0.6570480817818456,
"learning_rate": 9.469540324999782e-06,
"loss": 0.0654,
"step": 1627
},
{
"epoch": 0.740673339399454,
"grad_norm": 0.6780217435395393,
"learning_rate": 9.468899459959518e-06,
"loss": 0.0613,
"step": 1628
},
{
"epoch": 0.7411282984531392,
"grad_norm": 0.8367065537687267,
"learning_rate": 9.468258229740972e-06,
"loss": 0.087,
"step": 1629
},
{
"epoch": 0.7415832575068244,
"grad_norm": 0.6724757485261361,
"learning_rate": 9.467616634396542e-06,
"loss": 0.0513,
"step": 1630
},
{
"epoch": 0.7420382165605095,
"grad_norm": 0.5923362651506067,
"learning_rate": 9.466974673978654e-06,
"loss": 0.0668,
"step": 1631
},
{
"epoch": 0.7424931756141947,
"grad_norm": 0.8046255156703264,
"learning_rate": 9.466332348539772e-06,
"loss": 0.0888,
"step": 1632
},
{
"epoch": 0.7429481346678799,
"grad_norm": 0.7456071657218726,
"learning_rate": 9.465689658132379e-06,
"loss": 0.0872,
"step": 1633
},
{
"epoch": 0.7434030937215651,
"grad_norm": 0.8751254537474247,
"learning_rate": 9.465046602808994e-06,
"loss": 0.0901,
"step": 1634
},
{
"epoch": 0.7438580527752502,
"grad_norm": 0.9953711560207276,
"learning_rate": 9.464403182622164e-06,
"loss": 0.1175,
"step": 1635
},
{
"epoch": 0.7443130118289354,
"grad_norm": 0.738323897945569,
"learning_rate": 9.463759397624466e-06,
"loss": 0.1016,
"step": 1636
},
{
"epoch": 0.7447679708826206,
"grad_norm": 0.620705920516562,
"learning_rate": 9.46311524786851e-06,
"loss": 0.0654,
"step": 1637
},
{
"epoch": 0.7452229299363057,
"grad_norm": 1.2433273775382216,
"learning_rate": 9.462470733406929e-06,
"loss": 0.1403,
"step": 1638
},
{
"epoch": 0.7456778889899909,
"grad_norm": 1.0268174749706445,
"learning_rate": 9.461825854292394e-06,
"loss": 0.1065,
"step": 1639
},
{
"epoch": 0.7461328480436761,
"grad_norm": 0.6942991337802967,
"learning_rate": 9.4611806105776e-06,
"loss": 0.0736,
"step": 1640
},
{
"epoch": 0.7465878070973613,
"grad_norm": 0.8367822612372433,
"learning_rate": 9.460535002315272e-06,
"loss": 0.089,
"step": 1641
},
{
"epoch": 0.7470427661510464,
"grad_norm": 0.5929887457730553,
"learning_rate": 9.459889029558167e-06,
"loss": 0.0665,
"step": 1642
},
{
"epoch": 0.7474977252047316,
"grad_norm": 0.5692342733265978,
"learning_rate": 9.459242692359072e-06,
"loss": 0.0708,
"step": 1643
},
{
"epoch": 0.7479526842584168,
"grad_norm": 0.6049162715481944,
"learning_rate": 9.4585959907708e-06,
"loss": 0.0716,
"step": 1644
},
{
"epoch": 0.7484076433121019,
"grad_norm": 0.5865800556894495,
"learning_rate": 9.457948924846201e-06,
"loss": 0.0562,
"step": 1645
},
{
"epoch": 0.7488626023657871,
"grad_norm": 1.018263961729041,
"learning_rate": 9.457301494638147e-06,
"loss": 0.1129,
"step": 1646
},
{
"epoch": 0.7493175614194723,
"grad_norm": 0.8420303347709615,
"learning_rate": 9.456653700199542e-06,
"loss": 0.0982,
"step": 1647
},
{
"epoch": 0.7497725204731575,
"grad_norm": 0.6178217269864875,
"learning_rate": 9.456005541583326e-06,
"loss": 0.0777,
"step": 1648
},
{
"epoch": 0.7502274795268425,
"grad_norm": 0.6159701780113571,
"learning_rate": 9.455357018842458e-06,
"loss": 0.075,
"step": 1649
},
{
"epoch": 0.7506824385805277,
"grad_norm": 0.5563337669331565,
"learning_rate": 9.454708132029936e-06,
"loss": 0.0594,
"step": 1650
},
{
"epoch": 0.7511373976342129,
"grad_norm": 0.7796132603413727,
"learning_rate": 9.454058881198782e-06,
"loss": 0.0842,
"step": 1651
},
{
"epoch": 0.7515923566878981,
"grad_norm": 0.5977999349867541,
"learning_rate": 9.45340926640205e-06,
"loss": 0.0623,
"step": 1652
},
{
"epoch": 0.7520473157415832,
"grad_norm": 0.7762091660359064,
"learning_rate": 9.452759287692824e-06,
"loss": 0.0923,
"step": 1653
},
{
"epoch": 0.7525022747952684,
"grad_norm": 1.029286283612893,
"learning_rate": 9.452108945124218e-06,
"loss": 0.1114,
"step": 1654
},
{
"epoch": 0.7529572338489536,
"grad_norm": 0.5046695202197234,
"learning_rate": 9.451458238749375e-06,
"loss": 0.058,
"step": 1655
},
{
"epoch": 0.7534121929026387,
"grad_norm": 0.6262659207860063,
"learning_rate": 9.450807168621468e-06,
"loss": 0.0607,
"step": 1656
},
{
"epoch": 0.7538671519563239,
"grad_norm": 0.7451490801568118,
"learning_rate": 9.450155734793697e-06,
"loss": 0.0716,
"step": 1657
},
{
"epoch": 0.7543221110100091,
"grad_norm": 0.6504007368655154,
"learning_rate": 9.449503937319297e-06,
"loss": 0.0913,
"step": 1658
},
{
"epoch": 0.7547770700636943,
"grad_norm": 0.8923820492879996,
"learning_rate": 9.448851776251528e-06,
"loss": 0.0984,
"step": 1659
},
{
"epoch": 0.7552320291173794,
"grad_norm": 0.7256175088606572,
"learning_rate": 9.448199251643684e-06,
"loss": 0.0834,
"step": 1660
},
{
"epoch": 0.7556869881710646,
"grad_norm": 0.7778885787730276,
"learning_rate": 9.447546363549085e-06,
"loss": 0.0878,
"step": 1661
},
{
"epoch": 0.7561419472247498,
"grad_norm": 0.8265030986085233,
"learning_rate": 9.446893112021083e-06,
"loss": 0.0827,
"step": 1662
},
{
"epoch": 0.7565969062784349,
"grad_norm": 0.5801162274559535,
"learning_rate": 9.446239497113055e-06,
"loss": 0.0797,
"step": 1663
},
{
"epoch": 0.7570518653321201,
"grad_norm": 0.8974914764997551,
"learning_rate": 9.445585518878418e-06,
"loss": 0.1088,
"step": 1664
},
{
"epoch": 0.7575068243858053,
"grad_norm": 0.8878060872125964,
"learning_rate": 9.444931177370605e-06,
"loss": 0.1235,
"step": 1665
},
{
"epoch": 0.7579617834394905,
"grad_norm": 0.5088737676913533,
"learning_rate": 9.44427647264309e-06,
"loss": 0.0478,
"step": 1666
},
{
"epoch": 0.7584167424931756,
"grad_norm": 0.7484910765250183,
"learning_rate": 9.443621404749374e-06,
"loss": 0.0686,
"step": 1667
},
{
"epoch": 0.7588717015468608,
"grad_norm": 0.6292123912530658,
"learning_rate": 9.442965973742983e-06,
"loss": 0.0652,
"step": 1668
},
{
"epoch": 0.759326660600546,
"grad_norm": 1.037223955207567,
"learning_rate": 9.442310179677476e-06,
"loss": 0.0827,
"step": 1669
},
{
"epoch": 0.7597816196542311,
"grad_norm": 0.6769034013570638,
"learning_rate": 9.441654022606444e-06,
"loss": 0.0771,
"step": 1670
},
{
"epoch": 0.7602365787079163,
"grad_norm": 0.8310244395490821,
"learning_rate": 9.440997502583503e-06,
"loss": 0.091,
"step": 1671
},
{
"epoch": 0.7606915377616015,
"grad_norm": 1.0039785109365194,
"learning_rate": 9.4403406196623e-06,
"loss": 0.1251,
"step": 1672
},
{
"epoch": 0.7611464968152867,
"grad_norm": 0.7908056524331212,
"learning_rate": 9.439683373896515e-06,
"loss": 0.0876,
"step": 1673
},
{
"epoch": 0.7616014558689718,
"grad_norm": 1.0809832712577787,
"learning_rate": 9.439025765339852e-06,
"loss": 0.1256,
"step": 1674
},
{
"epoch": 0.762056414922657,
"grad_norm": 0.5964161616065347,
"learning_rate": 9.438367794046053e-06,
"loss": 0.0585,
"step": 1675
},
{
"epoch": 0.7625113739763422,
"grad_norm": 0.8617975528364193,
"learning_rate": 9.437709460068882e-06,
"loss": 0.0783,
"step": 1676
},
{
"epoch": 0.7629663330300273,
"grad_norm": 0.6361215357389327,
"learning_rate": 9.437050763462132e-06,
"loss": 0.0692,
"step": 1677
},
{
"epoch": 0.7634212920837125,
"grad_norm": 0.9790069893643866,
"learning_rate": 9.436391704279632e-06,
"loss": 0.1173,
"step": 1678
},
{
"epoch": 0.7638762511373977,
"grad_norm": 1.1287905857392149,
"learning_rate": 9.435732282575235e-06,
"loss": 0.1505,
"step": 1679
},
{
"epoch": 0.7643312101910829,
"grad_norm": 0.8195744592905398,
"learning_rate": 9.435072498402832e-06,
"loss": 0.0877,
"step": 1680
},
{
"epoch": 0.7647861692447679,
"grad_norm": 0.5293612997987346,
"learning_rate": 9.434412351816329e-06,
"loss": 0.0609,
"step": 1681
},
{
"epoch": 0.7652411282984531,
"grad_norm": 0.7565664140640663,
"learning_rate": 9.433751842869676e-06,
"loss": 0.0895,
"step": 1682
},
{
"epoch": 0.7656960873521383,
"grad_norm": 0.8390610329820178,
"learning_rate": 9.433090971616842e-06,
"loss": 0.0823,
"step": 1683
},
{
"epoch": 0.7661510464058234,
"grad_norm": 0.7979326314286513,
"learning_rate": 9.432429738111836e-06,
"loss": 0.0893,
"step": 1684
},
{
"epoch": 0.7666060054595086,
"grad_norm": 0.7985876042778349,
"learning_rate": 9.431768142408687e-06,
"loss": 0.0965,
"step": 1685
},
{
"epoch": 0.7670609645131938,
"grad_norm": 0.7008114448081032,
"learning_rate": 9.431106184561462e-06,
"loss": 0.0894,
"step": 1686
},
{
"epoch": 0.767515923566879,
"grad_norm": 0.8506122352220377,
"learning_rate": 9.430443864624249e-06,
"loss": 0.0949,
"step": 1687
},
{
"epoch": 0.7679708826205641,
"grad_norm": 1.0900644244466022,
"learning_rate": 9.429781182651171e-06,
"loss": 0.1211,
"step": 1688
},
{
"epoch": 0.7684258416742493,
"grad_norm": 0.585079487316927,
"learning_rate": 9.429118138696378e-06,
"loss": 0.0642,
"step": 1689
},
{
"epoch": 0.7688808007279345,
"grad_norm": 0.8727981223997378,
"learning_rate": 9.428454732814055e-06,
"loss": 0.0987,
"step": 1690
},
{
"epoch": 0.7693357597816196,
"grad_norm": 0.7032463083497149,
"learning_rate": 9.427790965058407e-06,
"loss": 0.0685,
"step": 1691
},
{
"epoch": 0.7697907188353048,
"grad_norm": 0.6784390616651746,
"learning_rate": 9.42712683548368e-06,
"loss": 0.079,
"step": 1692
},
{
"epoch": 0.77024567788899,
"grad_norm": 0.774501448184362,
"learning_rate": 9.426462344144138e-06,
"loss": 0.0784,
"step": 1693
},
{
"epoch": 0.7707006369426752,
"grad_norm": 0.7793988116138444,
"learning_rate": 9.425797491094086e-06,
"loss": 0.0801,
"step": 1694
},
{
"epoch": 0.7711555959963603,
"grad_norm": 0.7642360389143683,
"learning_rate": 9.425132276387847e-06,
"loss": 0.1009,
"step": 1695
},
{
"epoch": 0.7716105550500455,
"grad_norm": 0.6080046843370063,
"learning_rate": 9.424466700079785e-06,
"loss": 0.0688,
"step": 1696
},
{
"epoch": 0.7720655141037307,
"grad_norm": 0.6270167280264678,
"learning_rate": 9.423800762224283e-06,
"loss": 0.0626,
"step": 1697
},
{
"epoch": 0.7725204731574158,
"grad_norm": 0.5357586110049548,
"learning_rate": 9.42313446287576e-06,
"loss": 0.0626,
"step": 1698
},
{
"epoch": 0.772975432211101,
"grad_norm": 0.6233095813256608,
"learning_rate": 9.422467802088664e-06,
"loss": 0.0804,
"step": 1699
},
{
"epoch": 0.7734303912647862,
"grad_norm": 0.7158265191654914,
"learning_rate": 9.42180077991747e-06,
"loss": 0.0887,
"step": 1700
},
{
"epoch": 0.7738853503184714,
"grad_norm": 1.0305735114746193,
"learning_rate": 9.421133396416687e-06,
"loss": 0.1441,
"step": 1701
},
{
"epoch": 0.7743403093721565,
"grad_norm": 0.6965845039033058,
"learning_rate": 9.420465651640847e-06,
"loss": 0.079,
"step": 1702
},
{
"epoch": 0.7747952684258417,
"grad_norm": 0.4529773063241175,
"learning_rate": 9.419797545644516e-06,
"loss": 0.0443,
"step": 1703
},
{
"epoch": 0.7752502274795269,
"grad_norm": 0.5407082720421394,
"learning_rate": 9.41912907848229e-06,
"loss": 0.0625,
"step": 1704
},
{
"epoch": 0.775705186533212,
"grad_norm": 0.5625290405803486,
"learning_rate": 9.418460250208791e-06,
"loss": 0.0695,
"step": 1705
},
{
"epoch": 0.7761601455868972,
"grad_norm": 0.5288549658523206,
"learning_rate": 9.417791060878677e-06,
"loss": 0.0546,
"step": 1706
},
{
"epoch": 0.7766151046405824,
"grad_norm": 0.6390336517076213,
"learning_rate": 9.417121510546626e-06,
"loss": 0.0474,
"step": 1707
},
{
"epoch": 0.7770700636942676,
"grad_norm": 1.1628554226147039,
"learning_rate": 9.416451599267353e-06,
"loss": 0.1427,
"step": 1708
},
{
"epoch": 0.7775250227479527,
"grad_norm": 0.5775794942631142,
"learning_rate": 9.415781327095601e-06,
"loss": 0.0722,
"step": 1709
},
{
"epoch": 0.7779799818016379,
"grad_norm": 0.6702327788675698,
"learning_rate": 9.415110694086139e-06,
"loss": 0.0863,
"step": 1710
},
{
"epoch": 0.778434940855323,
"grad_norm": 1.0756620214218862,
"learning_rate": 9.41443970029377e-06,
"loss": 0.0916,
"step": 1711
},
{
"epoch": 0.7788898999090081,
"grad_norm": 0.6873597883249742,
"learning_rate": 9.413768345773324e-06,
"loss": 0.0928,
"step": 1712
},
{
"epoch": 0.7793448589626933,
"grad_norm": 0.546687059556293,
"learning_rate": 9.413096630579661e-06,
"loss": 0.0681,
"step": 1713
},
{
"epoch": 0.7797998180163785,
"grad_norm": 0.5882776722743176,
"learning_rate": 9.412424554767672e-06,
"loss": 0.0666,
"step": 1714
},
{
"epoch": 0.7802547770700637,
"grad_norm": 0.7757931395434748,
"learning_rate": 9.411752118392272e-06,
"loss": 0.0961,
"step": 1715
},
{
"epoch": 0.7807097361237488,
"grad_norm": 0.7533384044089068,
"learning_rate": 9.411079321508416e-06,
"loss": 0.0915,
"step": 1716
},
{
"epoch": 0.781164695177434,
"grad_norm": 0.6690633163427073,
"learning_rate": 9.410406164171076e-06,
"loss": 0.0757,
"step": 1717
},
{
"epoch": 0.7816196542311192,
"grad_norm": 0.9875033482174213,
"learning_rate": 9.40973264643526e-06,
"loss": 0.1016,
"step": 1718
},
{
"epoch": 0.7820746132848043,
"grad_norm": 0.7285855686862363,
"learning_rate": 9.409058768356007e-06,
"loss": 0.0777,
"step": 1719
},
{
"epoch": 0.7825295723384895,
"grad_norm": 0.5412833929378409,
"learning_rate": 9.408384529988385e-06,
"loss": 0.0596,
"step": 1720
},
{
"epoch": 0.7829845313921747,
"grad_norm": 0.48748390975323075,
"learning_rate": 9.407709931387486e-06,
"loss": 0.0451,
"step": 1721
},
{
"epoch": 0.7834394904458599,
"grad_norm": 0.8626755233369133,
"learning_rate": 9.407034972608436e-06,
"loss": 0.1093,
"step": 1722
},
{
"epoch": 0.783894449499545,
"grad_norm": 0.5986423081381415,
"learning_rate": 9.40635965370639e-06,
"loss": 0.0737,
"step": 1723
},
{
"epoch": 0.7843494085532302,
"grad_norm": 0.8697508747552452,
"learning_rate": 9.40568397473653e-06,
"loss": 0.0748,
"step": 1724
},
{
"epoch": 0.7848043676069154,
"grad_norm": 0.6651587535516658,
"learning_rate": 9.405007935754076e-06,
"loss": 0.0553,
"step": 1725
},
{
"epoch": 0.7852593266606005,
"grad_norm": 1.1307670638395897,
"learning_rate": 9.404331536814265e-06,
"loss": 0.1451,
"step": 1726
},
{
"epoch": 0.7857142857142857,
"grad_norm": 0.6724877006657928,
"learning_rate": 9.40365477797237e-06,
"loss": 0.0803,
"step": 1727
},
{
"epoch": 0.7861692447679709,
"grad_norm": 0.739524107451132,
"learning_rate": 9.40297765928369e-06,
"loss": 0.0713,
"step": 1728
},
{
"epoch": 0.7866242038216561,
"grad_norm": 0.6341880042511068,
"learning_rate": 9.402300180803563e-06,
"loss": 0.0739,
"step": 1729
},
{
"epoch": 0.7870791628753412,
"grad_norm": 0.5809522499341311,
"learning_rate": 9.401622342587346e-06,
"loss": 0.067,
"step": 1730
},
{
"epoch": 0.7875341219290264,
"grad_norm": 0.6208756444695567,
"learning_rate": 9.400944144690428e-06,
"loss": 0.0865,
"step": 1731
},
{
"epoch": 0.7879890809827116,
"grad_norm": 0.7358085271263743,
"learning_rate": 9.400265587168226e-06,
"loss": 0.0827,
"step": 1732
},
{
"epoch": 0.7884440400363967,
"grad_norm": 0.6985098389174249,
"learning_rate": 9.399586670076196e-06,
"loss": 0.0784,
"step": 1733
},
{
"epoch": 0.7888989990900819,
"grad_norm": 0.6524277365731544,
"learning_rate": 9.39890739346981e-06,
"loss": 0.0759,
"step": 1734
},
{
"epoch": 0.7893539581437671,
"grad_norm": 0.8500489687124628,
"learning_rate": 9.398227757404576e-06,
"loss": 0.1139,
"step": 1735
},
{
"epoch": 0.7898089171974523,
"grad_norm": 0.49161558761743546,
"learning_rate": 9.397547761936034e-06,
"loss": 0.0445,
"step": 1736
},
{
"epoch": 0.7902638762511374,
"grad_norm": 0.3886581827401007,
"learning_rate": 9.396867407119748e-06,
"loss": 0.0387,
"step": 1737
},
{
"epoch": 0.7907188353048226,
"grad_norm": 0.43315626329206963,
"learning_rate": 9.396186693011312e-06,
"loss": 0.0484,
"step": 1738
},
{
"epoch": 0.7911737943585078,
"grad_norm": 0.7578063731873546,
"learning_rate": 9.395505619666353e-06,
"loss": 0.0872,
"step": 1739
},
{
"epoch": 0.7916287534121929,
"grad_norm": 0.9087897001540515,
"learning_rate": 9.394824187140526e-06,
"loss": 0.0914,
"step": 1740
},
{
"epoch": 0.792083712465878,
"grad_norm": 0.5994634977370948,
"learning_rate": 9.394142395489512e-06,
"loss": 0.061,
"step": 1741
},
{
"epoch": 0.7925386715195633,
"grad_norm": 0.6263578026813904,
"learning_rate": 9.393460244769023e-06,
"loss": 0.0608,
"step": 1742
},
{
"epoch": 0.7929936305732485,
"grad_norm": 0.5753033056961346,
"learning_rate": 9.392777735034807e-06,
"loss": 0.0721,
"step": 1743
},
{
"epoch": 0.7934485896269335,
"grad_norm": 0.6561198773299641,
"learning_rate": 9.392094866342632e-06,
"loss": 0.0599,
"step": 1744
},
{
"epoch": 0.7939035486806187,
"grad_norm": 0.7317990056550264,
"learning_rate": 9.391411638748297e-06,
"loss": 0.0742,
"step": 1745
},
{
"epoch": 0.7943585077343039,
"grad_norm": 0.5011723772780661,
"learning_rate": 9.390728052307637e-06,
"loss": 0.0647,
"step": 1746
},
{
"epoch": 0.794813466787989,
"grad_norm": 0.6867846904523061,
"learning_rate": 9.390044107076506e-06,
"loss": 0.0779,
"step": 1747
},
{
"epoch": 0.7952684258416742,
"grad_norm": 0.9267872196876082,
"learning_rate": 9.389359803110796e-06,
"loss": 0.1001,
"step": 1748
},
{
"epoch": 0.7957233848953594,
"grad_norm": 3.487580179742763,
"learning_rate": 9.388675140466427e-06,
"loss": 0.1841,
"step": 1749
},
{
"epoch": 0.7961783439490446,
"grad_norm": 0.6520959532750612,
"learning_rate": 9.387990119199343e-06,
"loss": 0.0714,
"step": 1750
},
{
"epoch": 0.7966333030027297,
"grad_norm": 0.8129917876989495,
"learning_rate": 9.387304739365524e-06,
"loss": 0.0949,
"step": 1751
},
{
"epoch": 0.7970882620564149,
"grad_norm": 0.6276053555905522,
"learning_rate": 9.386619001020974e-06,
"loss": 0.0552,
"step": 1752
},
{
"epoch": 0.7975432211101001,
"grad_norm": 0.7632340875896291,
"learning_rate": 9.385932904221729e-06,
"loss": 0.0655,
"step": 1753
},
{
"epoch": 0.7979981801637852,
"grad_norm": 0.7239218776412117,
"learning_rate": 9.385246449023853e-06,
"loss": 0.1113,
"step": 1754
},
{
"epoch": 0.7984531392174704,
"grad_norm": 1.0468381569335767,
"learning_rate": 9.38455963548344e-06,
"loss": 0.1042,
"step": 1755
},
{
"epoch": 0.7989080982711556,
"grad_norm": 0.8019558864262506,
"learning_rate": 9.383872463656616e-06,
"loss": 0.0868,
"step": 1756
},
{
"epoch": 0.7993630573248408,
"grad_norm": 0.7449121488820226,
"learning_rate": 9.383184933599531e-06,
"loss": 0.0945,
"step": 1757
},
{
"epoch": 0.7998180163785259,
"grad_norm": 0.5905383438931077,
"learning_rate": 9.382497045368368e-06,
"loss": 0.0672,
"step": 1758
},
{
"epoch": 0.8002729754322111,
"grad_norm": 0.5337189472762474,
"learning_rate": 9.381808799019336e-06,
"loss": 0.0509,
"step": 1759
},
{
"epoch": 0.8007279344858963,
"grad_norm": 1.0483707789224317,
"learning_rate": 9.38112019460868e-06,
"loss": 0.1069,
"step": 1760
},
{
"epoch": 0.8011828935395814,
"grad_norm": 0.8974041640796228,
"learning_rate": 9.380431232192663e-06,
"loss": 0.1061,
"step": 1761
},
{
"epoch": 0.8016378525932666,
"grad_norm": 0.774987790741639,
"learning_rate": 9.379741911827591e-06,
"loss": 0.0971,
"step": 1762
},
{
"epoch": 0.8020928116469518,
"grad_norm": 0.5037991292329869,
"learning_rate": 9.379052233569788e-06,
"loss": 0.0545,
"step": 1763
},
{
"epoch": 0.802547770700637,
"grad_norm": 0.7571282390818425,
"learning_rate": 9.37836219747561e-06,
"loss": 0.0774,
"step": 1764
},
{
"epoch": 0.8030027297543221,
"grad_norm": 0.47374252215612206,
"learning_rate": 9.377671803601447e-06,
"loss": 0.0479,
"step": 1765
},
{
"epoch": 0.8034576888080073,
"grad_norm": 0.563871853603133,
"learning_rate": 9.376981052003713e-06,
"loss": 0.0583,
"step": 1766
},
{
"epoch": 0.8039126478616925,
"grad_norm": 0.7260639419055305,
"learning_rate": 9.376289942738855e-06,
"loss": 0.0739,
"step": 1767
},
{
"epoch": 0.8043676069153776,
"grad_norm": 0.7704639306429572,
"learning_rate": 9.375598475863345e-06,
"loss": 0.08,
"step": 1768
},
{
"epoch": 0.8048225659690628,
"grad_norm": 0.8052864772012752,
"learning_rate": 9.374906651433689e-06,
"loss": 0.1155,
"step": 1769
},
{
"epoch": 0.805277525022748,
"grad_norm": 0.945940660466259,
"learning_rate": 9.374214469506416e-06,
"loss": 0.0942,
"step": 1770
},
{
"epoch": 0.8057324840764332,
"grad_norm": 0.8382092898318407,
"learning_rate": 9.373521930138092e-06,
"loss": 0.0831,
"step": 1771
},
{
"epoch": 0.8061874431301183,
"grad_norm": 0.5910933141386769,
"learning_rate": 9.372829033385306e-06,
"loss": 0.0825,
"step": 1772
},
{
"epoch": 0.8066424021838035,
"grad_norm": 0.7616883112365667,
"learning_rate": 9.37213577930468e-06,
"loss": 0.0907,
"step": 1773
},
{
"epoch": 0.8070973612374887,
"grad_norm": 0.9571485234330176,
"learning_rate": 9.37144216795286e-06,
"loss": 0.1322,
"step": 1774
},
{
"epoch": 0.8075523202911737,
"grad_norm": 0.770430324420924,
"learning_rate": 9.370748199386529e-06,
"loss": 0.0821,
"step": 1775
},
{
"epoch": 0.8080072793448589,
"grad_norm": 0.6303205378749905,
"learning_rate": 9.370053873662393e-06,
"loss": 0.0694,
"step": 1776
},
{
"epoch": 0.8084622383985441,
"grad_norm": 0.6777135846807264,
"learning_rate": 9.36935919083719e-06,
"loss": 0.0685,
"step": 1777
},
{
"epoch": 0.8089171974522293,
"grad_norm": 0.7319936383805717,
"learning_rate": 9.368664150967686e-06,
"loss": 0.0679,
"step": 1778
},
{
"epoch": 0.8093721565059144,
"grad_norm": 0.7990830113911501,
"learning_rate": 9.367968754110675e-06,
"loss": 0.1023,
"step": 1779
},
{
"epoch": 0.8098271155595996,
"grad_norm": 0.5223284241529513,
"learning_rate": 9.367273000322983e-06,
"loss": 0.063,
"step": 1780
},
{
"epoch": 0.8102820746132848,
"grad_norm": 1.040419010652034,
"learning_rate": 9.366576889661465e-06,
"loss": 0.1236,
"step": 1781
},
{
"epoch": 0.8107370336669699,
"grad_norm": 0.6404250074887077,
"learning_rate": 9.365880422183003e-06,
"loss": 0.0656,
"step": 1782
},
{
"epoch": 0.8111919927206551,
"grad_norm": 0.7564675990794105,
"learning_rate": 9.365183597944506e-06,
"loss": 0.0725,
"step": 1783
},
{
"epoch": 0.8116469517743403,
"grad_norm": 0.5955963027805166,
"learning_rate": 9.364486417002922e-06,
"loss": 0.07,
"step": 1784
},
{
"epoch": 0.8121019108280255,
"grad_norm": 0.6658882483856376,
"learning_rate": 9.363788879415217e-06,
"loss": 0.0616,
"step": 1785
},
{
"epoch": 0.8125568698817106,
"grad_norm": 0.6032274064354748,
"learning_rate": 9.36309098523839e-06,
"loss": 0.0688,
"step": 1786
},
{
"epoch": 0.8130118289353958,
"grad_norm": 0.7627355718580127,
"learning_rate": 9.362392734529472e-06,
"loss": 0.0841,
"step": 1787
},
{
"epoch": 0.813466787989081,
"grad_norm": 0.6581922552034235,
"learning_rate": 9.361694127345523e-06,
"loss": 0.0773,
"step": 1788
},
{
"epoch": 0.8139217470427661,
"grad_norm": 0.5723109702485146,
"learning_rate": 9.360995163743622e-06,
"loss": 0.0755,
"step": 1789
},
{
"epoch": 0.8143767060964513,
"grad_norm": 0.8492692664232014,
"learning_rate": 9.360295843780893e-06,
"loss": 0.084,
"step": 1790
},
{
"epoch": 0.8148316651501365,
"grad_norm": 0.7138327780528116,
"learning_rate": 9.35959616751448e-06,
"loss": 0.0754,
"step": 1791
},
{
"epoch": 0.8152866242038217,
"grad_norm": 0.7513269368015193,
"learning_rate": 9.358896135001555e-06,
"loss": 0.075,
"step": 1792
},
{
"epoch": 0.8157415832575068,
"grad_norm": 6.226904157676098,
"learning_rate": 9.35819574629932e-06,
"loss": 0.2447,
"step": 1793
},
{
"epoch": 0.816196542311192,
"grad_norm": 0.9632842432595244,
"learning_rate": 9.35749500146501e-06,
"loss": 0.0968,
"step": 1794
},
{
"epoch": 0.8166515013648772,
"grad_norm": 0.6910899092527569,
"learning_rate": 9.356793900555891e-06,
"loss": 0.0736,
"step": 1795
},
{
"epoch": 0.8171064604185623,
"grad_norm": 0.8430341812657529,
"learning_rate": 9.356092443629247e-06,
"loss": 0.0929,
"step": 1796
},
{
"epoch": 0.8175614194722475,
"grad_norm": 0.7425545237339678,
"learning_rate": 9.355390630742401e-06,
"loss": 0.1005,
"step": 1797
},
{
"epoch": 0.8180163785259327,
"grad_norm": 0.7004618898733044,
"learning_rate": 9.3546884619527e-06,
"loss": 0.0789,
"step": 1798
},
{
"epoch": 0.8184713375796179,
"grad_norm": 0.5461552026045962,
"learning_rate": 9.353985937317525e-06,
"loss": 0.0763,
"step": 1799
},
{
"epoch": 0.818926296633303,
"grad_norm": 0.6222175380121098,
"learning_rate": 9.35328305689428e-06,
"loss": 0.0754,
"step": 1800
},
{
"epoch": 0.8193812556869882,
"grad_norm": 0.7386705168753549,
"learning_rate": 9.352579820740404e-06,
"loss": 0.0641,
"step": 1801
},
{
"epoch": 0.8198362147406734,
"grad_norm": 1.2544587029581489,
"learning_rate": 9.351876228913363e-06,
"loss": 0.107,
"step": 1802
},
{
"epoch": 0.8202911737943585,
"grad_norm": 0.6546855629883478,
"learning_rate": 9.351172281470645e-06,
"loss": 0.0781,
"step": 1803
},
{
"epoch": 0.8207461328480437,
"grad_norm": 0.7485647273392206,
"learning_rate": 9.350467978469782e-06,
"loss": 0.0898,
"step": 1804
},
{
"epoch": 0.8212010919017289,
"grad_norm": 0.5530668925780788,
"learning_rate": 9.34976331996832e-06,
"loss": 0.057,
"step": 1805
},
{
"epoch": 0.821656050955414,
"grad_norm": 0.870085999603916,
"learning_rate": 9.349058306023844e-06,
"loss": 0.1077,
"step": 1806
},
{
"epoch": 0.8221110100090991,
"grad_norm": 0.891036381079533,
"learning_rate": 9.348352936693964e-06,
"loss": 0.1082,
"step": 1807
},
{
"epoch": 0.8225659690627843,
"grad_norm": 0.5641275258385202,
"learning_rate": 9.347647212036316e-06,
"loss": 0.0613,
"step": 1808
},
{
"epoch": 0.8230209281164695,
"grad_norm": 0.7163257638587112,
"learning_rate": 9.346941132108575e-06,
"loss": 0.0842,
"step": 1809
},
{
"epoch": 0.8234758871701547,
"grad_norm": 0.7333770270884309,
"learning_rate": 9.346234696968435e-06,
"loss": 0.0782,
"step": 1810
},
{
"epoch": 0.8239308462238398,
"grad_norm": 0.5399164747367127,
"learning_rate": 9.345527906673622e-06,
"loss": 0.0676,
"step": 1811
},
{
"epoch": 0.824385805277525,
"grad_norm": 1.0476291790994476,
"learning_rate": 9.344820761281892e-06,
"loss": 0.0984,
"step": 1812
},
{
"epoch": 0.8248407643312102,
"grad_norm": 0.639304845804496,
"learning_rate": 9.344113260851031e-06,
"loss": 0.0764,
"step": 1813
},
{
"epoch": 0.8252957233848953,
"grad_norm": 0.6071291165528282,
"learning_rate": 9.343405405438852e-06,
"loss": 0.0707,
"step": 1814
},
{
"epoch": 0.8257506824385805,
"grad_norm": 0.6973111552871604,
"learning_rate": 9.342697195103199e-06,
"loss": 0.0917,
"step": 1815
},
{
"epoch": 0.8262056414922657,
"grad_norm": 0.6486872321285189,
"learning_rate": 9.341988629901942e-06,
"loss": 0.0725,
"step": 1816
},
{
"epoch": 0.8266606005459509,
"grad_norm": 0.5216883119977757,
"learning_rate": 9.341279709892981e-06,
"loss": 0.0572,
"step": 1817
},
{
"epoch": 0.827115559599636,
"grad_norm": 0.4472530755665983,
"learning_rate": 9.340570435134248e-06,
"loss": 0.0412,
"step": 1818
},
{
"epoch": 0.8275705186533212,
"grad_norm": 0.786165560489741,
"learning_rate": 9.339860805683703e-06,
"loss": 0.0905,
"step": 1819
},
{
"epoch": 0.8280254777070064,
"grad_norm": 0.8504390923669081,
"learning_rate": 9.33915082159933e-06,
"loss": 0.0761,
"step": 1820
},
{
"epoch": 0.8284804367606915,
"grad_norm": 0.5303034158640553,
"learning_rate": 9.338440482939146e-06,
"loss": 0.0735,
"step": 1821
},
{
"epoch": 0.8289353958143767,
"grad_norm": 0.6407993820931909,
"learning_rate": 9.337729789761199e-06,
"loss": 0.0829,
"step": 1822
},
{
"epoch": 0.8293903548680619,
"grad_norm": 2.670877671269915,
"learning_rate": 9.337018742123563e-06,
"loss": 0.1871,
"step": 1823
},
{
"epoch": 0.8298453139217471,
"grad_norm": 1.0355313595445745,
"learning_rate": 9.336307340084341e-06,
"loss": 0.0955,
"step": 1824
},
{
"epoch": 0.8303002729754322,
"grad_norm": 0.6127983226216669,
"learning_rate": 9.335595583701667e-06,
"loss": 0.0639,
"step": 1825
},
{
"epoch": 0.8307552320291174,
"grad_norm": 0.6196615465194765,
"learning_rate": 9.334883473033699e-06,
"loss": 0.0706,
"step": 1826
},
{
"epoch": 0.8312101910828026,
"grad_norm": 0.7243682512181147,
"learning_rate": 9.33417100813863e-06,
"loss": 0.0869,
"step": 1827
},
{
"epoch": 0.8316651501364877,
"grad_norm": 0.94108166831404,
"learning_rate": 9.33345818907468e-06,
"loss": 0.1349,
"step": 1828
},
{
"epoch": 0.8321201091901729,
"grad_norm": 4.6896190497823955,
"learning_rate": 9.332745015900097e-06,
"loss": 0.1125,
"step": 1829
},
{
"epoch": 0.8325750682438581,
"grad_norm": 0.7268733027831774,
"learning_rate": 9.332031488673156e-06,
"loss": 0.0651,
"step": 1830
},
{
"epoch": 0.8330300272975433,
"grad_norm": 0.5169699897246913,
"learning_rate": 9.331317607452166e-06,
"loss": 0.0683,
"step": 1831
},
{
"epoch": 0.8334849863512284,
"grad_norm": 0.5056561715785393,
"learning_rate": 9.330603372295463e-06,
"loss": 0.0568,
"step": 1832
},
{
"epoch": 0.8339399454049136,
"grad_norm": 0.5749009883761049,
"learning_rate": 9.329888783261408e-06,
"loss": 0.0594,
"step": 1833
},
{
"epoch": 0.8343949044585988,
"grad_norm": 0.6696966952437984,
"learning_rate": 9.329173840408394e-06,
"loss": 0.0764,
"step": 1834
},
{
"epoch": 0.8348498635122839,
"grad_norm": 0.7329039198928983,
"learning_rate": 9.328458543794844e-06,
"loss": 0.0729,
"step": 1835
},
{
"epoch": 0.835304822565969,
"grad_norm": 0.5892831520257552,
"learning_rate": 9.327742893479212e-06,
"loss": 0.0838,
"step": 1836
},
{
"epoch": 0.8357597816196543,
"grad_norm": 0.848350653615326,
"learning_rate": 9.327026889519973e-06,
"loss": 0.0778,
"step": 1837
},
{
"epoch": 0.8362147406733395,
"grad_norm": 0.939837339633871,
"learning_rate": 9.326310531975636e-06,
"loss": 0.1005,
"step": 1838
},
{
"epoch": 0.8366696997270245,
"grad_norm": 0.6312875650471034,
"learning_rate": 9.32559382090474e-06,
"loss": 0.0626,
"step": 1839
},
{
"epoch": 0.8371246587807097,
"grad_norm": 0.9586580739045799,
"learning_rate": 9.324876756365853e-06,
"loss": 0.1154,
"step": 1840
},
{
"epoch": 0.8375796178343949,
"grad_norm": 0.6108920091747637,
"learning_rate": 9.324159338417566e-06,
"loss": 0.0674,
"step": 1841
},
{
"epoch": 0.83803457688808,
"grad_norm": 0.9247779620401613,
"learning_rate": 9.323441567118508e-06,
"loss": 0.11,
"step": 1842
},
{
"epoch": 0.8384895359417652,
"grad_norm": 0.6152452902665,
"learning_rate": 9.322723442527328e-06,
"loss": 0.0657,
"step": 1843
},
{
"epoch": 0.8389444949954504,
"grad_norm": 0.6579130646316164,
"learning_rate": 9.32200496470271e-06,
"loss": 0.0721,
"step": 1844
},
{
"epoch": 0.8393994540491356,
"grad_norm": 0.6812573423845587,
"learning_rate": 9.321286133703365e-06,
"loss": 0.0627,
"step": 1845
},
{
"epoch": 0.8398544131028207,
"grad_norm": 0.5946100319565307,
"learning_rate": 9.320566949588031e-06,
"loss": 0.0708,
"step": 1846
},
{
"epoch": 0.8403093721565059,
"grad_norm": 0.6319246275087805,
"learning_rate": 9.319847412415477e-06,
"loss": 0.0651,
"step": 1847
},
{
"epoch": 0.8407643312101911,
"grad_norm": 0.6789460664352271,
"learning_rate": 9.3191275222445e-06,
"loss": 0.0707,
"step": 1848
},
{
"epoch": 0.8412192902638762,
"grad_norm": 0.4396253526793688,
"learning_rate": 9.31840727913393e-06,
"loss": 0.0431,
"step": 1849
},
{
"epoch": 0.8416742493175614,
"grad_norm": 0.6745617928769184,
"learning_rate": 9.317686683142616e-06,
"loss": 0.0747,
"step": 1850
},
{
"epoch": 0.8421292083712466,
"grad_norm": 0.6924165554321049,
"learning_rate": 9.316965734329447e-06,
"loss": 0.0575,
"step": 1851
},
{
"epoch": 0.8425841674249318,
"grad_norm": 0.7219679526943963,
"learning_rate": 9.316244432753332e-06,
"loss": 0.0935,
"step": 1852
},
{
"epoch": 0.8430391264786169,
"grad_norm": 1.0205930330831676,
"learning_rate": 9.315522778473214e-06,
"loss": 0.1213,
"step": 1853
},
{
"epoch": 0.8434940855323021,
"grad_norm": 1.009181015179975,
"learning_rate": 9.314800771548064e-06,
"loss": 0.1049,
"step": 1854
},
{
"epoch": 0.8439490445859873,
"grad_norm": 0.7263916504334191,
"learning_rate": 9.31407841203688e-06,
"loss": 0.1025,
"step": 1855
},
{
"epoch": 0.8444040036396724,
"grad_norm": 0.6276487176726284,
"learning_rate": 9.31335569999869e-06,
"loss": 0.0587,
"step": 1856
},
{
"epoch": 0.8448589626933576,
"grad_norm": 0.6171084743549562,
"learning_rate": 9.31263263549255e-06,
"loss": 0.0495,
"step": 1857
},
{
"epoch": 0.8453139217470428,
"grad_norm": 0.6730791565382994,
"learning_rate": 9.31190921857755e-06,
"loss": 0.0789,
"step": 1858
},
{
"epoch": 0.845768880800728,
"grad_norm": 0.7874386993734893,
"learning_rate": 9.311185449312798e-06,
"loss": 0.088,
"step": 1859
},
{
"epoch": 0.8462238398544131,
"grad_norm": 0.5073783803158326,
"learning_rate": 9.310461327757442e-06,
"loss": 0.0561,
"step": 1860
},
{
"epoch": 0.8466787989080983,
"grad_norm": 0.6051266904327832,
"learning_rate": 9.309736853970652e-06,
"loss": 0.0688,
"step": 1861
},
{
"epoch": 0.8471337579617835,
"grad_norm": 1.0483500354699085,
"learning_rate": 9.309012028011628e-06,
"loss": 0.1346,
"step": 1862
},
{
"epoch": 0.8475887170154686,
"grad_norm": 0.9049471090474998,
"learning_rate": 9.3082868499396e-06,
"loss": 0.0986,
"step": 1863
},
{
"epoch": 0.8480436760691538,
"grad_norm": 0.47381125867485346,
"learning_rate": 9.307561319813829e-06,
"loss": 0.058,
"step": 1864
},
{
"epoch": 0.848498635122839,
"grad_norm": 0.7964538075850383,
"learning_rate": 9.306835437693597e-06,
"loss": 0.0829,
"step": 1865
},
{
"epoch": 0.8489535941765242,
"grad_norm": 0.9919343521297046,
"learning_rate": 9.306109203638225e-06,
"loss": 0.0885,
"step": 1866
},
{
"epoch": 0.8494085532302093,
"grad_norm": 1.4502514405100166,
"learning_rate": 9.305382617707052e-06,
"loss": 0.1023,
"step": 1867
},
{
"epoch": 0.8498635122838945,
"grad_norm": 0.7238180713867792,
"learning_rate": 9.304655679959459e-06,
"loss": 0.0813,
"step": 1868
},
{
"epoch": 0.8503184713375797,
"grad_norm": 0.7360849022013412,
"learning_rate": 9.303928390454839e-06,
"loss": 0.0671,
"step": 1869
},
{
"epoch": 0.8507734303912647,
"grad_norm": 0.5803360108595549,
"learning_rate": 9.30320074925263e-06,
"loss": 0.075,
"step": 1870
},
{
"epoch": 0.8512283894449499,
"grad_norm": 0.6838093346854254,
"learning_rate": 9.302472756412288e-06,
"loss": 0.0812,
"step": 1871
},
{
"epoch": 0.8516833484986351,
"grad_norm": 0.8850924783689049,
"learning_rate": 9.301744411993302e-06,
"loss": 0.0991,
"step": 1872
},
{
"epoch": 0.8521383075523203,
"grad_norm": 0.8273381019086633,
"learning_rate": 9.30101571605519e-06,
"loss": 0.0803,
"step": 1873
},
{
"epoch": 0.8525932666060054,
"grad_norm": 0.6554434764444423,
"learning_rate": 9.300286668657495e-06,
"loss": 0.0737,
"step": 1874
},
{
"epoch": 0.8530482256596906,
"grad_norm": 0.8230660869280486,
"learning_rate": 9.299557269859795e-06,
"loss": 0.0748,
"step": 1875
},
{
"epoch": 0.8535031847133758,
"grad_norm": 0.609738768294497,
"learning_rate": 9.298827519721692e-06,
"loss": 0.0608,
"step": 1876
},
{
"epoch": 0.8539581437670609,
"grad_norm": 0.7433208516076715,
"learning_rate": 9.298097418302817e-06,
"loss": 0.0992,
"step": 1877
},
{
"epoch": 0.8544131028207461,
"grad_norm": 0.5414027711398505,
"learning_rate": 9.29736696566283e-06,
"loss": 0.0642,
"step": 1878
},
{
"epoch": 0.8548680618744313,
"grad_norm": 0.8950820233319129,
"learning_rate": 9.296636161861422e-06,
"loss": 0.1121,
"step": 1879
},
{
"epoch": 0.8553230209281165,
"grad_norm": 2.0225500877401617,
"learning_rate": 9.295905006958308e-06,
"loss": 0.1409,
"step": 1880
},
{
"epoch": 0.8557779799818016,
"grad_norm": 0.7783660516278756,
"learning_rate": 9.295173501013239e-06,
"loss": 0.0974,
"step": 1881
},
{
"epoch": 0.8562329390354868,
"grad_norm": 0.7064043776078144,
"learning_rate": 9.29444164408599e-06,
"loss": 0.0954,
"step": 1882
},
{
"epoch": 0.856687898089172,
"grad_norm": 0.6658976396134992,
"learning_rate": 9.29370943623636e-06,
"loss": 0.0636,
"step": 1883
},
{
"epoch": 0.8571428571428571,
"grad_norm": 0.6825106501213147,
"learning_rate": 9.292976877524189e-06,
"loss": 0.0908,
"step": 1884
},
{
"epoch": 0.8575978161965423,
"grad_norm": 0.8132731569130554,
"learning_rate": 9.292243968009332e-06,
"loss": 0.0952,
"step": 1885
},
{
"epoch": 0.8580527752502275,
"grad_norm": 1.283740720887758,
"learning_rate": 9.29151070775168e-06,
"loss": 0.1407,
"step": 1886
},
{
"epoch": 0.8585077343039127,
"grad_norm": 0.8987444265022443,
"learning_rate": 9.290777096811156e-06,
"loss": 0.1008,
"step": 1887
},
{
"epoch": 0.8589626933575978,
"grad_norm": 0.9027753674161602,
"learning_rate": 9.290043135247704e-06,
"loss": 0.0917,
"step": 1888
},
{
"epoch": 0.859417652411283,
"grad_norm": 0.7721264653335534,
"learning_rate": 9.289308823121302e-06,
"loss": 0.0876,
"step": 1889
},
{
"epoch": 0.8598726114649682,
"grad_norm": 0.8645055674602313,
"learning_rate": 9.28857416049195e-06,
"loss": 0.0775,
"step": 1890
},
{
"epoch": 0.8603275705186533,
"grad_norm": 0.7828026058785104,
"learning_rate": 9.287839147419685e-06,
"loss": 0.0953,
"step": 1891
},
{
"epoch": 0.8607825295723385,
"grad_norm": 0.7581321197025821,
"learning_rate": 9.287103783964571e-06,
"loss": 0.1004,
"step": 1892
},
{
"epoch": 0.8612374886260237,
"grad_norm": 0.5836098633522236,
"learning_rate": 9.286368070186696e-06,
"loss": 0.0586,
"step": 1893
},
{
"epoch": 0.8616924476797089,
"grad_norm": 0.8102404855384281,
"learning_rate": 9.285632006146178e-06,
"loss": 0.0809,
"step": 1894
},
{
"epoch": 0.862147406733394,
"grad_norm": 0.5684276012396848,
"learning_rate": 9.284895591903167e-06,
"loss": 0.0736,
"step": 1895
},
{
"epoch": 0.8626023657870792,
"grad_norm": 0.629014301705328,
"learning_rate": 9.284158827517838e-06,
"loss": 0.0707,
"step": 1896
},
{
"epoch": 0.8630573248407644,
"grad_norm": 0.6150335967135018,
"learning_rate": 9.283421713050398e-06,
"loss": 0.0665,
"step": 1897
},
{
"epoch": 0.8635122838944495,
"grad_norm": 0.7977181385850289,
"learning_rate": 9.282684248561078e-06,
"loss": 0.1077,
"step": 1898
},
{
"epoch": 0.8639672429481347,
"grad_norm": 0.5184482645002529,
"learning_rate": 9.281946434110141e-06,
"loss": 0.0594,
"step": 1899
},
{
"epoch": 0.8644222020018199,
"grad_norm": 0.7148270230091635,
"learning_rate": 9.28120826975788e-06,
"loss": 0.1005,
"step": 1900
},
{
"epoch": 0.864877161055505,
"grad_norm": 0.6020497479816633,
"learning_rate": 9.280469755564613e-06,
"loss": 0.0595,
"step": 1901
},
{
"epoch": 0.8653321201091901,
"grad_norm": 0.7725143836000526,
"learning_rate": 9.279730891590688e-06,
"loss": 0.063,
"step": 1902
},
{
"epoch": 0.8657870791628753,
"grad_norm": 0.5341160118168524,
"learning_rate": 9.27899167789648e-06,
"loss": 0.0649,
"step": 1903
},
{
"epoch": 0.8662420382165605,
"grad_norm": 0.78025783272878,
"learning_rate": 9.278252114542398e-06,
"loss": 0.0987,
"step": 1904
},
{
"epoch": 0.8666969972702456,
"grad_norm": 1.0383225939834173,
"learning_rate": 9.277512201588871e-06,
"loss": 0.1532,
"step": 1905
},
{
"epoch": 0.8671519563239308,
"grad_norm": 0.742851971816876,
"learning_rate": 9.276771939096367e-06,
"loss": 0.1083,
"step": 1906
},
{
"epoch": 0.867606915377616,
"grad_norm": 0.6246586544484709,
"learning_rate": 9.276031327125371e-06,
"loss": 0.0798,
"step": 1907
},
{
"epoch": 0.8680618744313012,
"grad_norm": 0.6937230711216974,
"learning_rate": 9.275290365736408e-06,
"loss": 0.0764,
"step": 1908
},
{
"epoch": 0.8685168334849863,
"grad_norm": 0.6405216327010745,
"learning_rate": 9.274549054990022e-06,
"loss": 0.0553,
"step": 1909
},
{
"epoch": 0.8689717925386715,
"grad_norm": 0.6118088958703919,
"learning_rate": 9.273807394946791e-06,
"loss": 0.0719,
"step": 1910
},
{
"epoch": 0.8694267515923567,
"grad_norm": 0.5929451056907732,
"learning_rate": 9.27306538566732e-06,
"loss": 0.0736,
"step": 1911
},
{
"epoch": 0.8698817106460418,
"grad_norm": 0.551189089448713,
"learning_rate": 9.272323027212244e-06,
"loss": 0.0802,
"step": 1912
},
{
"epoch": 0.870336669699727,
"grad_norm": 0.6964950682522272,
"learning_rate": 9.271580319642221e-06,
"loss": 0.0956,
"step": 1913
},
{
"epoch": 0.8707916287534122,
"grad_norm": 0.656523844824833,
"learning_rate": 9.270837263017947e-06,
"loss": 0.0716,
"step": 1914
},
{
"epoch": 0.8712465878070974,
"grad_norm": 0.5516956702822526,
"learning_rate": 9.270093857400138e-06,
"loss": 0.0756,
"step": 1915
},
{
"epoch": 0.8717015468607825,
"grad_norm": 0.6458984664434074,
"learning_rate": 9.269350102849542e-06,
"loss": 0.0762,
"step": 1916
},
{
"epoch": 0.8721565059144677,
"grad_norm": 0.6244797606471136,
"learning_rate": 9.268605999426936e-06,
"loss": 0.066,
"step": 1917
},
{
"epoch": 0.8726114649681529,
"grad_norm": 1.3051429800547985,
"learning_rate": 9.267861547193126e-06,
"loss": 0.1487,
"step": 1918
},
{
"epoch": 0.873066424021838,
"grad_norm": 0.9503536634109886,
"learning_rate": 9.267116746208944e-06,
"loss": 0.1088,
"step": 1919
},
{
"epoch": 0.8735213830755232,
"grad_norm": 0.6872044557187451,
"learning_rate": 9.26637159653525e-06,
"loss": 0.0952,
"step": 1920
},
{
"epoch": 0.8739763421292084,
"grad_norm": 0.8261797174841458,
"learning_rate": 9.265626098232934e-06,
"loss": 0.0917,
"step": 1921
},
{
"epoch": 0.8744313011828936,
"grad_norm": 0.6285868744907084,
"learning_rate": 9.26488025136292e-06,
"loss": 0.0736,
"step": 1922
},
{
"epoch": 0.8748862602365787,
"grad_norm": 0.95408072866655,
"learning_rate": 9.264134055986152e-06,
"loss": 0.09,
"step": 1923
},
{
"epoch": 0.8753412192902639,
"grad_norm": 0.8126928412084633,
"learning_rate": 9.263387512163604e-06,
"loss": 0.0861,
"step": 1924
},
{
"epoch": 0.8757961783439491,
"grad_norm": 0.628340619476289,
"learning_rate": 9.262640619956282e-06,
"loss": 0.0853,
"step": 1925
},
{
"epoch": 0.8762511373976342,
"grad_norm": 0.822645279842771,
"learning_rate": 9.261893379425218e-06,
"loss": 0.0921,
"step": 1926
},
{
"epoch": 0.8767060964513194,
"grad_norm": 0.664699910134531,
"learning_rate": 9.261145790631475e-06,
"loss": 0.0661,
"step": 1927
},
{
"epoch": 0.8771610555050046,
"grad_norm": 0.46120202232971963,
"learning_rate": 9.26039785363614e-06,
"loss": 0.0548,
"step": 1928
},
{
"epoch": 0.8776160145586898,
"grad_norm": 0.47348608915538554,
"learning_rate": 9.259649568500333e-06,
"loss": 0.0579,
"step": 1929
},
{
"epoch": 0.8780709736123748,
"grad_norm": 0.5421377090850338,
"learning_rate": 9.258900935285199e-06,
"loss": 0.0591,
"step": 1930
},
{
"epoch": 0.87852593266606,
"grad_norm": 0.5523212054660892,
"learning_rate": 9.258151954051914e-06,
"loss": 0.0757,
"step": 1931
},
{
"epoch": 0.8789808917197452,
"grad_norm": 0.733320680764707,
"learning_rate": 9.25740262486168e-06,
"loss": 0.0999,
"step": 1932
},
{
"epoch": 0.8794358507734303,
"grad_norm": 0.5636961368288687,
"learning_rate": 9.25665294777573e-06,
"loss": 0.0525,
"step": 1933
},
{
"epoch": 0.8798908098271155,
"grad_norm": 0.5613709035035684,
"learning_rate": 9.255902922855326e-06,
"loss": 0.0512,
"step": 1934
},
{
"epoch": 0.8803457688808007,
"grad_norm": 0.6266000159117329,
"learning_rate": 9.255152550161753e-06,
"loss": 0.0714,
"step": 1935
},
{
"epoch": 0.8808007279344859,
"grad_norm": 0.5624931761265524,
"learning_rate": 9.25440182975633e-06,
"loss": 0.0667,
"step": 1936
},
{
"epoch": 0.881255686988171,
"grad_norm": 0.8855653361345076,
"learning_rate": 9.253650761700401e-06,
"loss": 0.1104,
"step": 1937
},
{
"epoch": 0.8817106460418562,
"grad_norm": 0.4051324158485566,
"learning_rate": 9.252899346055343e-06,
"loss": 0.0447,
"step": 1938
},
{
"epoch": 0.8821656050955414,
"grad_norm": 0.6705030425420828,
"learning_rate": 9.252147582882556e-06,
"loss": 0.08,
"step": 1939
},
{
"epoch": 0.8826205641492265,
"grad_norm": 0.745395756906896,
"learning_rate": 9.25139547224347e-06,
"loss": 0.0892,
"step": 1940
},
{
"epoch": 0.8830755232029117,
"grad_norm": 0.9577657000178205,
"learning_rate": 9.250643014199547e-06,
"loss": 0.1144,
"step": 1941
},
{
"epoch": 0.8835304822565969,
"grad_norm": 0.6774410545148242,
"learning_rate": 9.24989020881227e-06,
"loss": 0.0753,
"step": 1942
},
{
"epoch": 0.8839854413102821,
"grad_norm": 0.7409774305157982,
"learning_rate": 9.249137056143159e-06,
"loss": 0.0722,
"step": 1943
},
{
"epoch": 0.8844404003639672,
"grad_norm": 0.6042335346844097,
"learning_rate": 9.248383556253758e-06,
"loss": 0.0775,
"step": 1944
},
{
"epoch": 0.8848953594176524,
"grad_norm": 0.8396643903072698,
"learning_rate": 9.247629709205635e-06,
"loss": 0.1051,
"step": 1945
},
{
"epoch": 0.8853503184713376,
"grad_norm": 0.6590167845553623,
"learning_rate": 9.246875515060396e-06,
"loss": 0.0774,
"step": 1946
},
{
"epoch": 0.8858052775250227,
"grad_norm": 0.5876827286169646,
"learning_rate": 9.24612097387967e-06,
"loss": 0.0768,
"step": 1947
},
{
"epoch": 0.8862602365787079,
"grad_norm": 0.8894868784932225,
"learning_rate": 9.245366085725111e-06,
"loss": 0.0983,
"step": 1948
},
{
"epoch": 0.8867151956323931,
"grad_norm": 0.5389319757607208,
"learning_rate": 9.24461085065841e-06,
"loss": 0.0571,
"step": 1949
},
{
"epoch": 0.8871701546860783,
"grad_norm": 0.4677621224916707,
"learning_rate": 9.243855268741275e-06,
"loss": 0.0534,
"step": 1950
},
{
"epoch": 0.8876251137397634,
"grad_norm": 0.6166575793819061,
"learning_rate": 9.243099340035454e-06,
"loss": 0.0679,
"step": 1951
},
{
"epoch": 0.8880800727934486,
"grad_norm": 0.684219803564928,
"learning_rate": 9.242343064602719e-06,
"loss": 0.0797,
"step": 1952
},
{
"epoch": 0.8885350318471338,
"grad_norm": 0.6543060915410528,
"learning_rate": 9.241586442504865e-06,
"loss": 0.0876,
"step": 1953
},
{
"epoch": 0.8889899909008189,
"grad_norm": 0.6916358607655352,
"learning_rate": 9.240829473803723e-06,
"loss": 0.0816,
"step": 1954
},
{
"epoch": 0.8894449499545041,
"grad_norm": 0.6650683160408256,
"learning_rate": 9.240072158561146e-06,
"loss": 0.0851,
"step": 1955
},
{
"epoch": 0.8898999090081893,
"grad_norm": 0.8336397769475173,
"learning_rate": 9.239314496839022e-06,
"loss": 0.1075,
"step": 1956
},
{
"epoch": 0.8903548680618745,
"grad_norm": 0.6498784190415388,
"learning_rate": 9.23855648869926e-06,
"loss": 0.0748,
"step": 1957
},
{
"epoch": 0.8908098271155596,
"grad_norm": 0.7894795440995916,
"learning_rate": 9.237798134203803e-06,
"loss": 0.1045,
"step": 1958
},
{
"epoch": 0.8912647861692448,
"grad_norm": 0.5980997509859944,
"learning_rate": 9.237039433414623e-06,
"loss": 0.079,
"step": 1959
},
{
"epoch": 0.89171974522293,
"grad_norm": 0.8222326498301533,
"learning_rate": 9.236280386393712e-06,
"loss": 0.082,
"step": 1960
},
{
"epoch": 0.892174704276615,
"grad_norm": 0.6293204676003961,
"learning_rate": 9.2355209932031e-06,
"loss": 0.0741,
"step": 1961
},
{
"epoch": 0.8926296633303002,
"grad_norm": 0.47863668175134233,
"learning_rate": 9.23476125390484e-06,
"loss": 0.0524,
"step": 1962
},
{
"epoch": 0.8930846223839854,
"grad_norm": 0.7798093326874596,
"learning_rate": 9.234001168561013e-06,
"loss": 0.0691,
"step": 1963
},
{
"epoch": 0.8935395814376706,
"grad_norm": 0.7301612531501247,
"learning_rate": 9.233240737233733e-06,
"loss": 0.0965,
"step": 1964
},
{
"epoch": 0.8939945404913557,
"grad_norm": 1.0452984923884894,
"learning_rate": 9.232479959985136e-06,
"loss": 0.1293,
"step": 1965
},
{
"epoch": 0.8944494995450409,
"grad_norm": 0.6963389022030017,
"learning_rate": 9.23171883687739e-06,
"loss": 0.0767,
"step": 1966
},
{
"epoch": 0.8949044585987261,
"grad_norm": 0.45171069390219404,
"learning_rate": 9.23095736797269e-06,
"loss": 0.0522,
"step": 1967
},
{
"epoch": 0.8953594176524113,
"grad_norm": 1.0061313103020273,
"learning_rate": 9.230195553333263e-06,
"loss": 0.1277,
"step": 1968
},
{
"epoch": 0.8958143767060964,
"grad_norm": 1.5986138982364897,
"learning_rate": 9.229433393021358e-06,
"loss": 0.1405,
"step": 1969
},
{
"epoch": 0.8962693357597816,
"grad_norm": 0.6908357505139043,
"learning_rate": 9.228670887099256e-06,
"loss": 0.0739,
"step": 1970
},
{
"epoch": 0.8967242948134668,
"grad_norm": 0.5277345258701365,
"learning_rate": 9.227908035629266e-06,
"loss": 0.0526,
"step": 1971
},
{
"epoch": 0.8971792538671519,
"grad_norm": 0.6285224648148875,
"learning_rate": 9.227144838673724e-06,
"loss": 0.0706,
"step": 1972
},
{
"epoch": 0.8976342129208371,
"grad_norm": 0.949308919855668,
"learning_rate": 9.226381296294995e-06,
"loss": 0.1045,
"step": 1973
},
{
"epoch": 0.8980891719745223,
"grad_norm": 0.752138900094858,
"learning_rate": 9.225617408555471e-06,
"loss": 0.0907,
"step": 1974
},
{
"epoch": 0.8985441310282075,
"grad_norm": 0.9650799951574368,
"learning_rate": 9.224853175517578e-06,
"loss": 0.1261,
"step": 1975
},
{
"epoch": 0.8989990900818926,
"grad_norm": 0.6368811817284902,
"learning_rate": 9.224088597243762e-06,
"loss": 0.0759,
"step": 1976
},
{
"epoch": 0.8994540491355778,
"grad_norm": 0.7403608884362824,
"learning_rate": 9.223323673796503e-06,
"loss": 0.081,
"step": 1977
},
{
"epoch": 0.899909008189263,
"grad_norm": 0.8033696439311833,
"learning_rate": 9.222558405238303e-06,
"loss": 0.0968,
"step": 1978
},
{
"epoch": 0.9003639672429481,
"grad_norm": 0.7306511821068437,
"learning_rate": 9.2217927916317e-06,
"loss": 0.0916,
"step": 1979
},
{
"epoch": 0.9008189262966333,
"grad_norm": 0.8380967239417318,
"learning_rate": 9.221026833039256e-06,
"loss": 0.0945,
"step": 1980
},
{
"epoch": 0.9012738853503185,
"grad_norm": 0.7718744506924977,
"learning_rate": 9.220260529523561e-06,
"loss": 0.0918,
"step": 1981
},
{
"epoch": 0.9017288444040037,
"grad_norm": 0.7393925382776323,
"learning_rate": 9.219493881147234e-06,
"loss": 0.0816,
"step": 1982
},
{
"epoch": 0.9021838034576888,
"grad_norm": 0.7687427983757074,
"learning_rate": 9.218726887972923e-06,
"loss": 0.0835,
"step": 1983
},
{
"epoch": 0.902638762511374,
"grad_norm": 0.6785077320109779,
"learning_rate": 9.2179595500633e-06,
"loss": 0.0799,
"step": 1984
},
{
"epoch": 0.9030937215650592,
"grad_norm": 0.9172539926736025,
"learning_rate": 9.217191867481072e-06,
"loss": 0.1147,
"step": 1985
},
{
"epoch": 0.9035486806187443,
"grad_norm": 0.9222679238503178,
"learning_rate": 9.21642384028897e-06,
"loss": 0.127,
"step": 1986
},
{
"epoch": 0.9040036396724295,
"grad_norm": 0.8844523810912496,
"learning_rate": 9.215655468549752e-06,
"loss": 0.1013,
"step": 1987
},
{
"epoch": 0.9044585987261147,
"grad_norm": 0.5874811797706115,
"learning_rate": 9.214886752326208e-06,
"loss": 0.0528,
"step": 1988
},
{
"epoch": 0.9049135577797999,
"grad_norm": 0.6774186522730414,
"learning_rate": 9.214117691681152e-06,
"loss": 0.0749,
"step": 1989
},
{
"epoch": 0.905368516833485,
"grad_norm": 0.46678264083336873,
"learning_rate": 9.213348286677429e-06,
"loss": 0.0502,
"step": 1990
},
{
"epoch": 0.9058234758871702,
"grad_norm": 0.6369505909634797,
"learning_rate": 9.21257853737791e-06,
"loss": 0.0597,
"step": 1991
},
{
"epoch": 0.9062784349408554,
"grad_norm": 0.7872482528902512,
"learning_rate": 9.211808443845499e-06,
"loss": 0.0842,
"step": 1992
},
{
"epoch": 0.9067333939945404,
"grad_norm": 0.6991340678786092,
"learning_rate": 9.211038006143121e-06,
"loss": 0.0714,
"step": 1993
},
{
"epoch": 0.9071883530482256,
"grad_norm": 0.5842126029431552,
"learning_rate": 9.210267224333735e-06,
"loss": 0.0686,
"step": 1994
},
{
"epoch": 0.9076433121019108,
"grad_norm": 0.6405241386542652,
"learning_rate": 9.209496098480324e-06,
"loss": 0.0843,
"step": 1995
},
{
"epoch": 0.908098271155596,
"grad_norm": 0.6431855863004138,
"learning_rate": 9.208724628645901e-06,
"loss": 0.0781,
"step": 1996
},
{
"epoch": 0.9085532302092811,
"grad_norm": 0.6571372788631167,
"learning_rate": 9.207952814893511e-06,
"loss": 0.0746,
"step": 1997
},
{
"epoch": 0.9090081892629663,
"grad_norm": 0.6228847041781231,
"learning_rate": 9.207180657286216e-06,
"loss": 0.0563,
"step": 1998
},
{
"epoch": 0.9094631483166515,
"grad_norm": 0.6649592874484661,
"learning_rate": 9.20640815588712e-06,
"loss": 0.0737,
"step": 1999
},
{
"epoch": 0.9099181073703366,
"grad_norm": 0.6395827893566276,
"learning_rate": 9.205635310759344e-06,
"loss": 0.0864,
"step": 2000
},
{
"epoch": 0.9103730664240218,
"grad_norm": 0.6470816609318947,
"learning_rate": 9.204862121966044e-06,
"loss": 0.0819,
"step": 2001
},
{
"epoch": 0.910828025477707,
"grad_norm": 0.6954176357821441,
"learning_rate": 9.2040885895704e-06,
"loss": 0.0935,
"step": 2002
},
{
"epoch": 0.9112829845313922,
"grad_norm": 0.5250024400720148,
"learning_rate": 9.203314713635621e-06,
"loss": 0.0521,
"step": 2003
},
{
"epoch": 0.9117379435850773,
"grad_norm": 0.6765818316745539,
"learning_rate": 9.202540494224946e-06,
"loss": 0.1078,
"step": 2004
},
{
"epoch": 0.9121929026387625,
"grad_norm": 0.7602463030942905,
"learning_rate": 9.20176593140164e-06,
"loss": 0.068,
"step": 2005
},
{
"epoch": 0.9126478616924477,
"grad_norm": 0.4564764883431911,
"learning_rate": 9.200991025228998e-06,
"loss": 0.0576,
"step": 2006
},
{
"epoch": 0.9131028207461328,
"grad_norm": 0.87338946860691,
"learning_rate": 9.20021577577034e-06,
"loss": 0.1155,
"step": 2007
},
{
"epoch": 0.913557779799818,
"grad_norm": 0.67443699378812,
"learning_rate": 9.199440183089019e-06,
"loss": 0.0803,
"step": 2008
},
{
"epoch": 0.9140127388535032,
"grad_norm": 0.697779741574365,
"learning_rate": 9.198664247248408e-06,
"loss": 0.0886,
"step": 2009
},
{
"epoch": 0.9144676979071884,
"grad_norm": 0.6888292123310293,
"learning_rate": 9.197887968311917e-06,
"loss": 0.088,
"step": 2010
},
{
"epoch": 0.9149226569608735,
"grad_norm": 0.593887211300783,
"learning_rate": 9.197111346342979e-06,
"loss": 0.0597,
"step": 2011
},
{
"epoch": 0.9153776160145587,
"grad_norm": 0.5222048906208826,
"learning_rate": 9.196334381405055e-06,
"loss": 0.055,
"step": 2012
},
{
"epoch": 0.9158325750682439,
"grad_norm": 0.7406902681131339,
"learning_rate": 9.195557073561636e-06,
"loss": 0.0725,
"step": 2013
},
{
"epoch": 0.916287534121929,
"grad_norm": 0.7369752030698005,
"learning_rate": 9.194779422876242e-06,
"loss": 0.0725,
"step": 2014
},
{
"epoch": 0.9167424931756142,
"grad_norm": 0.5674786098045346,
"learning_rate": 9.194001429412414e-06,
"loss": 0.0528,
"step": 2015
},
{
"epoch": 0.9171974522292994,
"grad_norm": 0.9561188233992612,
"learning_rate": 9.19322309323373e-06,
"loss": 0.1213,
"step": 2016
},
{
"epoch": 0.9176524112829846,
"grad_norm": 0.7666480467189352,
"learning_rate": 9.192444414403792e-06,
"loss": 0.0788,
"step": 2017
},
{
"epoch": 0.9181073703366697,
"grad_norm": 1.0242939804657472,
"learning_rate": 9.19166539298623e-06,
"loss": 0.1341,
"step": 2018
},
{
"epoch": 0.9185623293903549,
"grad_norm": 0.6407407288510717,
"learning_rate": 9.1908860290447e-06,
"loss": 0.0702,
"step": 2019
},
{
"epoch": 0.9190172884440401,
"grad_norm": 0.9262978099585683,
"learning_rate": 9.190106322642888e-06,
"loss": 0.0962,
"step": 2020
},
{
"epoch": 0.9194722474977252,
"grad_norm": 0.6371294810639554,
"learning_rate": 9.189326273844512e-06,
"loss": 0.0716,
"step": 2021
},
{
"epoch": 0.9199272065514104,
"grad_norm": 0.616042736799084,
"learning_rate": 9.18854588271331e-06,
"loss": 0.0697,
"step": 2022
},
{
"epoch": 0.9203821656050956,
"grad_norm": 0.8652881040430276,
"learning_rate": 9.187765149313057e-06,
"loss": 0.0949,
"step": 2023
},
{
"epoch": 0.9208371246587808,
"grad_norm": 0.7171212404467417,
"learning_rate": 9.186984073707545e-06,
"loss": 0.0685,
"step": 2024
},
{
"epoch": 0.9212920837124658,
"grad_norm": 0.6434040420425213,
"learning_rate": 9.186202655960603e-06,
"loss": 0.0774,
"step": 2025
},
{
"epoch": 0.921747042766151,
"grad_norm": 0.6537324523008204,
"learning_rate": 9.185420896136086e-06,
"loss": 0.0786,
"step": 2026
},
{
"epoch": 0.9222020018198362,
"grad_norm": 0.6271186642997567,
"learning_rate": 9.184638794297873e-06,
"loss": 0.0636,
"step": 2027
},
{
"epoch": 0.9226569608735213,
"grad_norm": 0.7041069370791754,
"learning_rate": 9.183856350509877e-06,
"loss": 0.0809,
"step": 2028
},
{
"epoch": 0.9231119199272065,
"grad_norm": 0.8781019574614535,
"learning_rate": 9.183073564836033e-06,
"loss": 0.1051,
"step": 2029
},
{
"epoch": 0.9235668789808917,
"grad_norm": 0.48818413319632054,
"learning_rate": 9.182290437340308e-06,
"loss": 0.0474,
"step": 2030
},
{
"epoch": 0.9240218380345769,
"grad_norm": 0.8775797840737246,
"learning_rate": 9.181506968086696e-06,
"loss": 0.0949,
"step": 2031
},
{
"epoch": 0.924476797088262,
"grad_norm": 0.958612912496998,
"learning_rate": 9.180723157139218e-06,
"loss": 0.121,
"step": 2032
},
{
"epoch": 0.9249317561419472,
"grad_norm": 0.6245762602830833,
"learning_rate": 9.179939004561925e-06,
"loss": 0.0655,
"step": 2033
},
{
"epoch": 0.9253867151956324,
"grad_norm": 0.5017046465493271,
"learning_rate": 9.17915451041889e-06,
"loss": 0.0661,
"step": 2034
},
{
"epoch": 0.9258416742493175,
"grad_norm": 0.710064858137144,
"learning_rate": 9.178369674774224e-06,
"loss": 0.0791,
"step": 2035
},
{
"epoch": 0.9262966333030027,
"grad_norm": 0.587851189554333,
"learning_rate": 9.177584497692056e-06,
"loss": 0.0637,
"step": 2036
},
{
"epoch": 0.9267515923566879,
"grad_norm": 1.3023478543600886,
"learning_rate": 9.176798979236548e-06,
"loss": 0.1095,
"step": 2037
},
{
"epoch": 0.9272065514103731,
"grad_norm": 0.540716658575828,
"learning_rate": 9.17601311947189e-06,
"loss": 0.0693,
"step": 2038
},
{
"epoch": 0.9276615104640582,
"grad_norm": 0.6208372361565256,
"learning_rate": 9.175226918462298e-06,
"loss": 0.0718,
"step": 2039
},
{
"epoch": 0.9281164695177434,
"grad_norm": 0.7701609774864682,
"learning_rate": 9.174440376272021e-06,
"loss": 0.0976,
"step": 2040
},
{
"epoch": 0.9285714285714286,
"grad_norm": 0.7010494768516853,
"learning_rate": 9.173653492965325e-06,
"loss": 0.0993,
"step": 2041
},
{
"epoch": 0.9290263876251137,
"grad_norm": 0.6373763175184742,
"learning_rate": 9.172866268606514e-06,
"loss": 0.0724,
"step": 2042
},
{
"epoch": 0.9294813466787989,
"grad_norm": 0.701200286737339,
"learning_rate": 9.172078703259917e-06,
"loss": 0.0825,
"step": 2043
},
{
"epoch": 0.9299363057324841,
"grad_norm": 0.4368340952860916,
"learning_rate": 9.171290796989887e-06,
"loss": 0.0477,
"step": 2044
},
{
"epoch": 0.9303912647861693,
"grad_norm": 0.6370651977402901,
"learning_rate": 9.170502549860813e-06,
"loss": 0.0796,
"step": 2045
},
{
"epoch": 0.9308462238398544,
"grad_norm": 1.1692149001382897,
"learning_rate": 9.169713961937104e-06,
"loss": 0.122,
"step": 2046
},
{
"epoch": 0.9313011828935396,
"grad_norm": 0.694595823352437,
"learning_rate": 9.168925033283199e-06,
"loss": 0.0935,
"step": 2047
},
{
"epoch": 0.9317561419472248,
"grad_norm": 0.672175800896758,
"learning_rate": 9.168135763963567e-06,
"loss": 0.0763,
"step": 2048
},
{
"epoch": 0.9322111010009099,
"grad_norm": 0.5254037194744254,
"learning_rate": 9.167346154042705e-06,
"loss": 0.0535,
"step": 2049
},
{
"epoch": 0.9326660600545951,
"grad_norm": 0.6788074343357934,
"learning_rate": 9.166556203585134e-06,
"loss": 0.0804,
"step": 2050
},
{
"epoch": 0.9331210191082803,
"grad_norm": 0.6950456412782345,
"learning_rate": 9.165765912655407e-06,
"loss": 0.0727,
"step": 2051
},
{
"epoch": 0.9335759781619655,
"grad_norm": 0.8037111447772672,
"learning_rate": 9.1649752813181e-06,
"loss": 0.0811,
"step": 2052
},
{
"epoch": 0.9340309372156506,
"grad_norm": 0.6043473581913603,
"learning_rate": 9.164184309637824e-06,
"loss": 0.0773,
"step": 2053
},
{
"epoch": 0.9344858962693358,
"grad_norm": 0.6914300193057683,
"learning_rate": 9.16339299767921e-06,
"loss": 0.0888,
"step": 2054
},
{
"epoch": 0.934940855323021,
"grad_norm": 0.5973299516809696,
"learning_rate": 9.162601345506923e-06,
"loss": 0.0771,
"step": 2055
},
{
"epoch": 0.935395814376706,
"grad_norm": 0.5667027927032561,
"learning_rate": 9.161809353185651e-06,
"loss": 0.0589,
"step": 2056
},
{
"epoch": 0.9358507734303912,
"grad_norm": 0.5892355686848351,
"learning_rate": 9.161017020780114e-06,
"loss": 0.0562,
"step": 2057
},
{
"epoch": 0.9363057324840764,
"grad_norm": 0.8503563061945567,
"learning_rate": 9.160224348355057e-06,
"loss": 0.1075,
"step": 2058
},
{
"epoch": 0.9367606915377616,
"grad_norm": 0.8030569297687169,
"learning_rate": 9.159431335975255e-06,
"loss": 0.0651,
"step": 2059
},
{
"epoch": 0.9372156505914467,
"grad_norm": 0.6182029602806504,
"learning_rate": 9.158637983705505e-06,
"loss": 0.0908,
"step": 2060
},
{
"epoch": 0.9376706096451319,
"grad_norm": 0.6167088007283392,
"learning_rate": 9.157844291610641e-06,
"loss": 0.0719,
"step": 2061
},
{
"epoch": 0.9381255686988171,
"grad_norm": 1.0378949375185438,
"learning_rate": 9.157050259755519e-06,
"loss": 0.0925,
"step": 2062
},
{
"epoch": 0.9385805277525022,
"grad_norm": 0.6009053311569907,
"learning_rate": 9.156255888205021e-06,
"loss": 0.0868,
"step": 2063
},
{
"epoch": 0.9390354868061874,
"grad_norm": 0.6730461926983252,
"learning_rate": 9.155461177024062e-06,
"loss": 0.0791,
"step": 2064
},
{
"epoch": 0.9394904458598726,
"grad_norm": 0.8310142050561945,
"learning_rate": 9.154666126277582e-06,
"loss": 0.0882,
"step": 2065
},
{
"epoch": 0.9399454049135578,
"grad_norm": 0.5455153208822874,
"learning_rate": 9.153870736030549e-06,
"loss": 0.0651,
"step": 2066
},
{
"epoch": 0.9404003639672429,
"grad_norm": 0.8245922923142007,
"learning_rate": 9.153075006347957e-06,
"loss": 0.1357,
"step": 2067
},
{
"epoch": 0.9408553230209281,
"grad_norm": 0.7891736693746195,
"learning_rate": 9.15227893729483e-06,
"loss": 0.0879,
"step": 2068
},
{
"epoch": 0.9413102820746133,
"grad_norm": 0.6032022964433661,
"learning_rate": 9.151482528936222e-06,
"loss": 0.0594,
"step": 2069
},
{
"epoch": 0.9417652411282984,
"grad_norm": 0.8087071917107507,
"learning_rate": 9.150685781337207e-06,
"loss": 0.0872,
"step": 2070
},
{
"epoch": 0.9422202001819836,
"grad_norm": 1.1875700013397057,
"learning_rate": 9.149888694562896e-06,
"loss": 0.1447,
"step": 2071
},
{
"epoch": 0.9426751592356688,
"grad_norm": 0.7351727785498874,
"learning_rate": 9.149091268678423e-06,
"loss": 0.0708,
"step": 2072
},
{
"epoch": 0.943130118289354,
"grad_norm": 0.6792286796417435,
"learning_rate": 9.148293503748947e-06,
"loss": 0.0876,
"step": 2073
},
{
"epoch": 0.9435850773430391,
"grad_norm": 0.7417762096300724,
"learning_rate": 9.14749539983966e-06,
"loss": 0.0852,
"step": 2074
},
{
"epoch": 0.9440400363967243,
"grad_norm": 0.5155173170030183,
"learning_rate": 9.146696957015777e-06,
"loss": 0.0606,
"step": 2075
},
{
"epoch": 0.9444949954504095,
"grad_norm": 1.1023064832096257,
"learning_rate": 9.145898175342545e-06,
"loss": 0.1488,
"step": 2076
},
{
"epoch": 0.9449499545040946,
"grad_norm": 0.6914694719967308,
"learning_rate": 9.145099054885238e-06,
"loss": 0.0816,
"step": 2077
},
{
"epoch": 0.9454049135577798,
"grad_norm": 0.6905933706764309,
"learning_rate": 9.144299595709156e-06,
"loss": 0.0876,
"step": 2078
},
{
"epoch": 0.945859872611465,
"grad_norm": 0.5233906895741112,
"learning_rate": 9.143499797879626e-06,
"loss": 0.0562,
"step": 2079
},
{
"epoch": 0.9463148316651502,
"grad_norm": 0.5101515836442003,
"learning_rate": 9.142699661462005e-06,
"loss": 0.0559,
"step": 2080
},
{
"epoch": 0.9467697907188353,
"grad_norm": 0.48017157157527135,
"learning_rate": 9.141899186521675e-06,
"loss": 0.0503,
"step": 2081
},
{
"epoch": 0.9472247497725205,
"grad_norm": 0.6592673728640894,
"learning_rate": 9.141098373124048e-06,
"loss": 0.0797,
"step": 2082
},
{
"epoch": 0.9476797088262057,
"grad_norm": 0.86432014477488,
"learning_rate": 9.140297221334562e-06,
"loss": 0.0858,
"step": 2083
},
{
"epoch": 0.9481346678798908,
"grad_norm": 1.0397141319559977,
"learning_rate": 9.139495731218685e-06,
"loss": 0.1198,
"step": 2084
},
{
"epoch": 0.948589626933576,
"grad_norm": 0.862052866017664,
"learning_rate": 9.138693902841914e-06,
"loss": 0.1056,
"step": 2085
},
{
"epoch": 0.9490445859872612,
"grad_norm": 0.7709077621401632,
"learning_rate": 9.137891736269764e-06,
"loss": 0.0918,
"step": 2086
},
{
"epoch": 0.9494995450409464,
"grad_norm": 0.8691294728765458,
"learning_rate": 9.137089231567789e-06,
"loss": 0.0925,
"step": 2087
},
{
"epoch": 0.9499545040946314,
"grad_norm": 0.6098999809715144,
"learning_rate": 9.136286388801564e-06,
"loss": 0.0673,
"step": 2088
},
{
"epoch": 0.9504094631483166,
"grad_norm": 0.7157788293123913,
"learning_rate": 9.135483208036695e-06,
"loss": 0.0802,
"step": 2089
},
{
"epoch": 0.9508644222020018,
"grad_norm": 0.9397853662008804,
"learning_rate": 9.134679689338814e-06,
"loss": 0.1021,
"step": 2090
},
{
"epoch": 0.9513193812556869,
"grad_norm": 0.5449934450219076,
"learning_rate": 9.133875832773582e-06,
"loss": 0.0698,
"step": 2091
},
{
"epoch": 0.9517743403093721,
"grad_norm": 0.5678662789014983,
"learning_rate": 9.133071638406684e-06,
"loss": 0.0726,
"step": 2092
},
{
"epoch": 0.9522292993630573,
"grad_norm": 0.704718355722168,
"learning_rate": 9.132267106303836e-06,
"loss": 0.0949,
"step": 2093
},
{
"epoch": 0.9526842584167425,
"grad_norm": 0.7119333629649424,
"learning_rate": 9.131462236530784e-06,
"loss": 0.0815,
"step": 2094
},
{
"epoch": 0.9531392174704276,
"grad_norm": 0.9543831010874976,
"learning_rate": 9.130657029153293e-06,
"loss": 0.1037,
"step": 2095
},
{
"epoch": 0.9535941765241128,
"grad_norm": 0.4141088945678519,
"learning_rate": 9.129851484237165e-06,
"loss": 0.0438,
"step": 2096
},
{
"epoch": 0.954049135577798,
"grad_norm": 0.880955172212152,
"learning_rate": 9.129045601848222e-06,
"loss": 0.1139,
"step": 2097
},
{
"epoch": 0.9545040946314831,
"grad_norm": 0.5340666725025275,
"learning_rate": 9.12823938205232e-06,
"loss": 0.0662,
"step": 2098
},
{
"epoch": 0.9549590536851683,
"grad_norm": 0.7598809630255295,
"learning_rate": 9.127432824915339e-06,
"loss": 0.086,
"step": 2099
},
{
"epoch": 0.9554140127388535,
"grad_norm": 0.5889551801250265,
"learning_rate": 9.126625930503187e-06,
"loss": 0.0618,
"step": 2100
},
{
"epoch": 0.9558689717925387,
"grad_norm": 0.7452095277301981,
"learning_rate": 9.125818698881798e-06,
"loss": 0.0846,
"step": 2101
},
{
"epoch": 0.9563239308462238,
"grad_norm": 0.874570701264544,
"learning_rate": 9.125011130117139e-06,
"loss": 0.0711,
"step": 2102
},
{
"epoch": 0.956778889899909,
"grad_norm": 0.6700889468480424,
"learning_rate": 9.124203224275198e-06,
"loss": 0.0771,
"step": 2103
},
{
"epoch": 0.9572338489535942,
"grad_norm": 0.5713697589917575,
"learning_rate": 9.123394981421995e-06,
"loss": 0.0647,
"step": 2104
},
{
"epoch": 0.9576888080072793,
"grad_norm": 0.7416406361243658,
"learning_rate": 9.122586401623574e-06,
"loss": 0.0797,
"step": 2105
},
{
"epoch": 0.9581437670609645,
"grad_norm": 0.8792771411195691,
"learning_rate": 9.12177748494601e-06,
"loss": 0.1043,
"step": 2106
},
{
"epoch": 0.9585987261146497,
"grad_norm": 0.8409261244287831,
"learning_rate": 9.120968231455406e-06,
"loss": 0.0968,
"step": 2107
},
{
"epoch": 0.9590536851683349,
"grad_norm": 0.588499824544961,
"learning_rate": 9.120158641217885e-06,
"loss": 0.0675,
"step": 2108
},
{
"epoch": 0.95950864422202,
"grad_norm": 0.5664840104040384,
"learning_rate": 9.119348714299607e-06,
"loss": 0.0721,
"step": 2109
},
{
"epoch": 0.9599636032757052,
"grad_norm": 0.7544363313105896,
"learning_rate": 9.118538450766755e-06,
"loss": 0.0723,
"step": 2110
},
{
"epoch": 0.9604185623293904,
"grad_norm": 0.6699256182505398,
"learning_rate": 9.117727850685541e-06,
"loss": 0.0669,
"step": 2111
},
{
"epoch": 0.9608735213830755,
"grad_norm": 0.5711605071447146,
"learning_rate": 9.116916914122202e-06,
"loss": 0.0637,
"step": 2112
},
{
"epoch": 0.9613284804367607,
"grad_norm": 0.6965803730129388,
"learning_rate": 9.116105641143005e-06,
"loss": 0.0744,
"step": 2113
},
{
"epoch": 0.9617834394904459,
"grad_norm": 0.8598026014818454,
"learning_rate": 9.115294031814242e-06,
"loss": 0.0937,
"step": 2114
},
{
"epoch": 0.9622383985441311,
"grad_norm": 0.5794082624701737,
"learning_rate": 9.114482086202236e-06,
"loss": 0.0675,
"step": 2115
},
{
"epoch": 0.9626933575978162,
"grad_norm": 0.7600807206599288,
"learning_rate": 9.113669804373335e-06,
"loss": 0.1047,
"step": 2116
},
{
"epoch": 0.9631483166515014,
"grad_norm": 0.6377342056356247,
"learning_rate": 9.112857186393913e-06,
"loss": 0.0676,
"step": 2117
},
{
"epoch": 0.9636032757051866,
"grad_norm": 1.1042469320816768,
"learning_rate": 9.112044232330377e-06,
"loss": 0.1508,
"step": 2118
},
{
"epoch": 0.9640582347588716,
"grad_norm": 0.817690744261235,
"learning_rate": 9.111230942249156e-06,
"loss": 0.0904,
"step": 2119
},
{
"epoch": 0.9645131938125568,
"grad_norm": 0.7037231293816442,
"learning_rate": 9.110417316216708e-06,
"loss": 0.0636,
"step": 2120
},
{
"epoch": 0.964968152866242,
"grad_norm": 0.6588945759110881,
"learning_rate": 9.10960335429952e-06,
"loss": 0.0684,
"step": 2121
},
{
"epoch": 0.9654231119199272,
"grad_norm": 0.6220308381200076,
"learning_rate": 9.108789056564105e-06,
"loss": 0.0877,
"step": 2122
},
{
"epoch": 0.9658780709736123,
"grad_norm": 0.6262721502493606,
"learning_rate": 9.107974423077001e-06,
"loss": 0.0642,
"step": 2123
},
{
"epoch": 0.9663330300272975,
"grad_norm": 0.9510165739511419,
"learning_rate": 9.107159453904781e-06,
"loss": 0.0994,
"step": 2124
},
{
"epoch": 0.9667879890809827,
"grad_norm": 0.7410601791583596,
"learning_rate": 9.10634414911404e-06,
"loss": 0.0751,
"step": 2125
},
{
"epoch": 0.9672429481346679,
"grad_norm": 0.592927363864185,
"learning_rate": 9.105528508771395e-06,
"loss": 0.0785,
"step": 2126
},
{
"epoch": 0.967697907188353,
"grad_norm": 0.704125884709214,
"learning_rate": 9.104712532943502e-06,
"loss": 0.0672,
"step": 2127
},
{
"epoch": 0.9681528662420382,
"grad_norm": 0.6763649668606744,
"learning_rate": 9.10389622169704e-06,
"loss": 0.0813,
"step": 2128
},
{
"epoch": 0.9686078252957234,
"grad_norm": 1.0481681916194059,
"learning_rate": 9.103079575098708e-06,
"loss": 0.1165,
"step": 2129
},
{
"epoch": 0.9690627843494085,
"grad_norm": 0.6244343397167454,
"learning_rate": 9.102262593215246e-06,
"loss": 0.0548,
"step": 2130
},
{
"epoch": 0.9695177434030937,
"grad_norm": 0.6662772517701377,
"learning_rate": 9.101445276113407e-06,
"loss": 0.0672,
"step": 2131
},
{
"epoch": 0.9699727024567789,
"grad_norm": 0.7302079833291476,
"learning_rate": 9.100627623859985e-06,
"loss": 0.0747,
"step": 2132
},
{
"epoch": 0.9704276615104641,
"grad_norm": 0.7003598456468986,
"learning_rate": 9.09980963652179e-06,
"loss": 0.0763,
"step": 2133
},
{
"epoch": 0.9708826205641492,
"grad_norm": 0.8675523177046712,
"learning_rate": 9.098991314165668e-06,
"loss": 0.1123,
"step": 2134
},
{
"epoch": 0.9713375796178344,
"grad_norm": 0.6531391716615499,
"learning_rate": 9.098172656858484e-06,
"loss": 0.0626,
"step": 2135
},
{
"epoch": 0.9717925386715196,
"grad_norm": 0.8230462520119928,
"learning_rate": 9.097353664667138e-06,
"loss": 0.0873,
"step": 2136
},
{
"epoch": 0.9722474977252047,
"grad_norm": 0.6524897158303723,
"learning_rate": 9.096534337658558e-06,
"loss": 0.0658,
"step": 2137
},
{
"epoch": 0.9727024567788899,
"grad_norm": 0.7421742040769631,
"learning_rate": 9.095714675899688e-06,
"loss": 0.0782,
"step": 2138
},
{
"epoch": 0.9731574158325751,
"grad_norm": 0.6400011673563383,
"learning_rate": 9.094894679457511e-06,
"loss": 0.0605,
"step": 2139
},
{
"epoch": 0.9736123748862603,
"grad_norm": 0.5825220963314399,
"learning_rate": 9.094074348399034e-06,
"loss": 0.0711,
"step": 2140
},
{
"epoch": 0.9740673339399454,
"grad_norm": 0.9652267063711952,
"learning_rate": 9.09325368279129e-06,
"loss": 0.0996,
"step": 2141
},
{
"epoch": 0.9745222929936306,
"grad_norm": 0.9291202899333796,
"learning_rate": 9.09243268270134e-06,
"loss": 0.0818,
"step": 2142
},
{
"epoch": 0.9749772520473158,
"grad_norm": 0.8799622533298002,
"learning_rate": 9.091611348196272e-06,
"loss": 0.0904,
"step": 2143
},
{
"epoch": 0.9754322111010009,
"grad_norm": 0.8326816067428606,
"learning_rate": 9.090789679343201e-06,
"loss": 0.0931,
"step": 2144
},
{
"epoch": 0.9758871701546861,
"grad_norm": 0.783000579713321,
"learning_rate": 9.089967676209274e-06,
"loss": 0.0879,
"step": 2145
},
{
"epoch": 0.9763421292083713,
"grad_norm": 0.7001846964382422,
"learning_rate": 9.089145338861657e-06,
"loss": 0.0916,
"step": 2146
},
{
"epoch": 0.9767970882620565,
"grad_norm": 0.953946241556791,
"learning_rate": 9.08832266736755e-06,
"loss": 0.1205,
"step": 2147
},
{
"epoch": 0.9772520473157416,
"grad_norm": 0.7358151559070641,
"learning_rate": 9.087499661794177e-06,
"loss": 0.0915,
"step": 2148
},
{
"epoch": 0.9777070063694268,
"grad_norm": 0.8142291270830226,
"learning_rate": 9.08667632220879e-06,
"loss": 0.0995,
"step": 2149
},
{
"epoch": 0.978161965423112,
"grad_norm": 0.7106034630801776,
"learning_rate": 9.08585264867867e-06,
"loss": 0.0783,
"step": 2150
},
{
"epoch": 0.978616924476797,
"grad_norm": 0.826812478555379,
"learning_rate": 9.085028641271123e-06,
"loss": 0.1058,
"step": 2151
},
{
"epoch": 0.9790718835304822,
"grad_norm": 0.8960647231942128,
"learning_rate": 9.084204300053483e-06,
"loss": 0.108,
"step": 2152
},
{
"epoch": 0.9795268425841674,
"grad_norm": 0.7308955491972883,
"learning_rate": 9.083379625093111e-06,
"loss": 0.0963,
"step": 2153
},
{
"epoch": 0.9799818016378526,
"grad_norm": 0.854998609995297,
"learning_rate": 9.082554616457397e-06,
"loss": 0.1031,
"step": 2154
},
{
"epoch": 0.9804367606915377,
"grad_norm": 0.6134903423880519,
"learning_rate": 9.081729274213758e-06,
"loss": 0.0728,
"step": 2155
},
{
"epoch": 0.9808917197452229,
"grad_norm": 0.7494461465991118,
"learning_rate": 9.080903598429634e-06,
"loss": 0.0612,
"step": 2156
},
{
"epoch": 0.9813466787989081,
"grad_norm": 0.6477350071161301,
"learning_rate": 9.080077589172496e-06,
"loss": 0.0725,
"step": 2157
},
{
"epoch": 0.9818016378525932,
"grad_norm": 0.5949372826775987,
"learning_rate": 9.079251246509846e-06,
"loss": 0.0618,
"step": 2158
},
{
"epoch": 0.9822565969062784,
"grad_norm": 1.0457437129682037,
"learning_rate": 9.078424570509202e-06,
"loss": 0.134,
"step": 2159
},
{
"epoch": 0.9827115559599636,
"grad_norm": 0.7562918714504535,
"learning_rate": 9.077597561238123e-06,
"loss": 0.0746,
"step": 2160
},
{
"epoch": 0.9831665150136488,
"grad_norm": 0.705691881874251,
"learning_rate": 9.076770218764186e-06,
"loss": 0.0903,
"step": 2161
},
{
"epoch": 0.9836214740673339,
"grad_norm": 0.700571619924188,
"learning_rate": 9.075942543154996e-06,
"loss": 0.0905,
"step": 2162
},
{
"epoch": 0.9840764331210191,
"grad_norm": 0.5178609664739039,
"learning_rate": 9.075114534478187e-06,
"loss": 0.0623,
"step": 2163
},
{
"epoch": 0.9845313921747043,
"grad_norm": 0.5564063525132696,
"learning_rate": 9.074286192801423e-06,
"loss": 0.0622,
"step": 2164
},
{
"epoch": 0.9849863512283894,
"grad_norm": 0.8390150599738658,
"learning_rate": 9.07345751819239e-06,
"loss": 0.0894,
"step": 2165
},
{
"epoch": 0.9854413102820746,
"grad_norm": 0.6899304429749638,
"learning_rate": 9.072628510718804e-06,
"loss": 0.0715,
"step": 2166
},
{
"epoch": 0.9858962693357598,
"grad_norm": 0.7215157855324703,
"learning_rate": 9.071799170448409e-06,
"loss": 0.0767,
"step": 2167
},
{
"epoch": 0.986351228389445,
"grad_norm": 0.5513970488289187,
"learning_rate": 9.070969497448972e-06,
"loss": 0.0586,
"step": 2168
},
{
"epoch": 0.9868061874431301,
"grad_norm": 0.5126138943457034,
"learning_rate": 9.070139491788295e-06,
"loss": 0.0686,
"step": 2169
},
{
"epoch": 0.9872611464968153,
"grad_norm": 0.7021455623884609,
"learning_rate": 9.069309153534196e-06,
"loss": 0.0853,
"step": 2170
},
{
"epoch": 0.9877161055505005,
"grad_norm": 0.8937932838828458,
"learning_rate": 9.068478482754532e-06,
"loss": 0.1229,
"step": 2171
},
{
"epoch": 0.9881710646041856,
"grad_norm": 0.7580326063736847,
"learning_rate": 9.067647479517179e-06,
"loss": 0.1176,
"step": 2172
},
{
"epoch": 0.9886260236578708,
"grad_norm": 0.854693695415459,
"learning_rate": 9.066816143890042e-06,
"loss": 0.0624,
"step": 2173
},
{
"epoch": 0.989080982711556,
"grad_norm": 0.691622087221906,
"learning_rate": 9.065984475941056e-06,
"loss": 0.0821,
"step": 2174
},
{
"epoch": 0.9895359417652412,
"grad_norm": 0.5701976798754824,
"learning_rate": 9.065152475738182e-06,
"loss": 0.0525,
"step": 2175
},
{
"epoch": 0.9899909008189263,
"grad_norm": 0.5280985607821013,
"learning_rate": 9.064320143349405e-06,
"loss": 0.0532,
"step": 2176
},
{
"epoch": 0.9904458598726115,
"grad_norm": 0.7270073505569681,
"learning_rate": 9.063487478842738e-06,
"loss": 0.0729,
"step": 2177
},
{
"epoch": 0.9909008189262967,
"grad_norm": 0.5397573476737881,
"learning_rate": 9.062654482286228e-06,
"loss": 0.0546,
"step": 2178
},
{
"epoch": 0.9913557779799818,
"grad_norm": 0.8280519656078903,
"learning_rate": 9.061821153747938e-06,
"loss": 0.0794,
"step": 2179
},
{
"epoch": 0.991810737033667,
"grad_norm": 0.6367661759018886,
"learning_rate": 9.060987493295967e-06,
"loss": 0.0679,
"step": 2180
},
{
"epoch": 0.9922656960873522,
"grad_norm": 0.7859239736098618,
"learning_rate": 9.060153500998438e-06,
"loss": 0.0958,
"step": 2181
},
{
"epoch": 0.9927206551410374,
"grad_norm": 0.8770748630020422,
"learning_rate": 9.0593191769235e-06,
"loss": 0.1037,
"step": 2182
},
{
"epoch": 0.9931756141947224,
"grad_norm": 0.5493767625809909,
"learning_rate": 9.05848452113933e-06,
"loss": 0.0535,
"step": 2183
},
{
"epoch": 0.9936305732484076,
"grad_norm": 1.0509546431486094,
"learning_rate": 9.057649533714134e-06,
"loss": 0.1136,
"step": 2184
},
{
"epoch": 0.9940855323020928,
"grad_norm": 0.8067366260983323,
"learning_rate": 9.056814214716143e-06,
"loss": 0.0911,
"step": 2185
},
{
"epoch": 0.9945404913557779,
"grad_norm": 0.6708197750921108,
"learning_rate": 9.055978564213614e-06,
"loss": 0.0737,
"step": 2186
},
{
"epoch": 0.9949954504094631,
"grad_norm": 1.0620824544949425,
"learning_rate": 9.055142582274831e-06,
"loss": 0.1035,
"step": 2187
},
{
"epoch": 0.9954504094631483,
"grad_norm": 0.7809645088567875,
"learning_rate": 9.054306268968111e-06,
"loss": 0.0964,
"step": 2188
},
{
"epoch": 0.9959053685168335,
"grad_norm": 0.6922882332723763,
"learning_rate": 9.053469624361793e-06,
"loss": 0.0769,
"step": 2189
},
{
"epoch": 0.9963603275705186,
"grad_norm": 0.6135634693459231,
"learning_rate": 9.052632648524242e-06,
"loss": 0.0857,
"step": 2190
},
{
"epoch": 0.9968152866242038,
"grad_norm": 0.7230383107997012,
"learning_rate": 9.051795341523852e-06,
"loss": 0.0666,
"step": 2191
},
{
"epoch": 0.997270245677889,
"grad_norm": 0.7702877397526973,
"learning_rate": 9.050957703429044e-06,
"loss": 0.0861,
"step": 2192
},
{
"epoch": 0.9977252047315741,
"grad_norm": 0.79537510756259,
"learning_rate": 9.050119734308266e-06,
"loss": 0.0906,
"step": 2193
},
{
"epoch": 0.9981801637852593,
"grad_norm": 0.6318589660625535,
"learning_rate": 9.049281434229995e-06,
"loss": 0.0821,
"step": 2194
},
{
"epoch": 0.9986351228389445,
"grad_norm": 0.6618836956269952,
"learning_rate": 9.048442803262731e-06,
"loss": 0.0748,
"step": 2195
},
{
"epoch": 0.9990900818926297,
"grad_norm": 0.5469592163366095,
"learning_rate": 9.047603841475003e-06,
"loss": 0.066,
"step": 2196
},
{
"epoch": 0.9995450409463148,
"grad_norm": 0.6279887796401853,
"learning_rate": 9.046764548935368e-06,
"loss": 0.0743,
"step": 2197
},
{
"epoch": 1.0,
"grad_norm": 0.40519899960847033,
"learning_rate": 9.045924925712411e-06,
"loss": 0.0327,
"step": 2198
},
{
"epoch": 1.000454959053685,
"grad_norm": 0.41468311147935694,
"learning_rate": 9.045084971874738e-06,
"loss": 0.0243,
"step": 2199
},
{
"epoch": 1.0009099181073704,
"grad_norm": 0.5188055788021196,
"learning_rate": 9.04424468749099e-06,
"loss": 0.0375,
"step": 2200
},
{
"epoch": 1.0013648771610555,
"grad_norm": 0.4764585088866917,
"learning_rate": 9.04340407262983e-06,
"loss": 0.0395,
"step": 2201
},
{
"epoch": 1.0018198362147406,
"grad_norm": 0.28928828491344616,
"learning_rate": 9.042563127359946e-06,
"loss": 0.0208,
"step": 2202
},
{
"epoch": 1.0022747952684259,
"grad_norm": 0.5179468693343099,
"learning_rate": 9.041721851750063e-06,
"loss": 0.0322,
"step": 2203
},
{
"epoch": 1.002729754322111,
"grad_norm": 0.4198208723720039,
"learning_rate": 9.04088024586892e-06,
"loss": 0.0366,
"step": 2204
},
{
"epoch": 1.0031847133757963,
"grad_norm": 0.4784473138415427,
"learning_rate": 9.040038309785293e-06,
"loss": 0.0422,
"step": 2205
},
{
"epoch": 1.0036396724294814,
"grad_norm": 0.576332931747316,
"learning_rate": 9.039196043567979e-06,
"loss": 0.0387,
"step": 2206
},
{
"epoch": 1.0040946314831665,
"grad_norm": 0.5205582439898824,
"learning_rate": 9.038353447285807e-06,
"loss": 0.0551,
"step": 2207
},
{
"epoch": 1.0045495905368518,
"grad_norm": 0.7737994932982504,
"learning_rate": 9.037510521007626e-06,
"loss": 0.042,
"step": 2208
},
{
"epoch": 1.0050045495905369,
"grad_norm": 0.4056433108647087,
"learning_rate": 9.03666726480232e-06,
"loss": 0.0309,
"step": 2209
},
{
"epoch": 1.005459508644222,
"grad_norm": 0.31259616668647877,
"learning_rate": 9.035823678738795e-06,
"loss": 0.0247,
"step": 2210
},
{
"epoch": 1.0059144676979073,
"grad_norm": 0.545747512672262,
"learning_rate": 9.034979762885985e-06,
"loss": 0.0379,
"step": 2211
},
{
"epoch": 1.0063694267515924,
"grad_norm": 0.3531093457798414,
"learning_rate": 9.034135517312848e-06,
"loss": 0.0198,
"step": 2212
},
{
"epoch": 1.0068243858052774,
"grad_norm": 0.3471778421349368,
"learning_rate": 9.033290942088377e-06,
"loss": 0.0191,
"step": 2213
},
{
"epoch": 1.0072793448589628,
"grad_norm": 0.45123302926671505,
"learning_rate": 9.032446037281582e-06,
"loss": 0.0233,
"step": 2214
},
{
"epoch": 1.0077343039126478,
"grad_norm": 0.40498118740009004,
"learning_rate": 9.031600802961508e-06,
"loss": 0.028,
"step": 2215
},
{
"epoch": 1.008189262966333,
"grad_norm": 0.44404852807953515,
"learning_rate": 9.030755239197224e-06,
"loss": 0.0343,
"step": 2216
},
{
"epoch": 1.0086442220200182,
"grad_norm": 0.41886201143517243,
"learning_rate": 9.029909346057826e-06,
"loss": 0.0276,
"step": 2217
},
{
"epoch": 1.0090991810737033,
"grad_norm": 0.2879285343911946,
"learning_rate": 9.029063123612431e-06,
"loss": 0.02,
"step": 2218
},
{
"epoch": 1.0095541401273886,
"grad_norm": 0.5781677724909076,
"learning_rate": 9.028216571930197e-06,
"loss": 0.0339,
"step": 2219
},
{
"epoch": 1.0100090991810737,
"grad_norm": 0.42128445628125777,
"learning_rate": 9.027369691080292e-06,
"loss": 0.0329,
"step": 2220
},
{
"epoch": 1.0104640582347588,
"grad_norm": 0.4867304814601137,
"learning_rate": 9.026522481131925e-06,
"loss": 0.0451,
"step": 2221
},
{
"epoch": 1.0109190172884441,
"grad_norm": 0.35647532367363194,
"learning_rate": 9.025674942154325e-06,
"loss": 0.0202,
"step": 2222
},
{
"epoch": 1.0113739763421292,
"grad_norm": 0.6154778638320356,
"learning_rate": 9.024827074216748e-06,
"loss": 0.0619,
"step": 2223
},
{
"epoch": 1.0118289353958143,
"grad_norm": 0.46447780693049373,
"learning_rate": 9.023978877388479e-06,
"loss": 0.0265,
"step": 2224
},
{
"epoch": 1.0122838944494996,
"grad_norm": 0.4551756875246183,
"learning_rate": 9.02313035173883e-06,
"loss": 0.0167,
"step": 2225
},
{
"epoch": 1.0127388535031847,
"grad_norm": 0.4341660568896861,
"learning_rate": 9.022281497337133e-06,
"loss": 0.0257,
"step": 2226
},
{
"epoch": 1.0131938125568698,
"grad_norm": 0.37807969634776667,
"learning_rate": 9.021432314252758e-06,
"loss": 0.0235,
"step": 2227
},
{
"epoch": 1.013648771610555,
"grad_norm": 0.43791115876653813,
"learning_rate": 9.020582802555095e-06,
"loss": 0.0285,
"step": 2228
},
{
"epoch": 1.0141037306642402,
"grad_norm": 0.7541669794368306,
"learning_rate": 9.019732962313562e-06,
"loss": 0.0412,
"step": 2229
},
{
"epoch": 1.0145586897179253,
"grad_norm": 0.41591203424935613,
"learning_rate": 9.018882793597605e-06,
"loss": 0.0217,
"step": 2230
},
{
"epoch": 1.0150136487716106,
"grad_norm": 0.531675738557164,
"learning_rate": 9.018032296476695e-06,
"loss": 0.0259,
"step": 2231
},
{
"epoch": 1.0154686078252957,
"grad_norm": 0.4525534861298487,
"learning_rate": 9.017181471020331e-06,
"loss": 0.032,
"step": 2232
},
{
"epoch": 1.015923566878981,
"grad_norm": 0.5572932855598556,
"learning_rate": 9.016330317298038e-06,
"loss": 0.0321,
"step": 2233
},
{
"epoch": 1.016378525932666,
"grad_norm": 0.4880772464783955,
"learning_rate": 9.01547883537937e-06,
"loss": 0.0242,
"step": 2234
},
{
"epoch": 1.0168334849863512,
"grad_norm": 0.5290436879010799,
"learning_rate": 9.014627025333906e-06,
"loss": 0.0268,
"step": 2235
},
{
"epoch": 1.0172884440400365,
"grad_norm": 0.3469524553449946,
"learning_rate": 9.01377488723125e-06,
"loss": 0.0189,
"step": 2236
},
{
"epoch": 1.0177434030937216,
"grad_norm": 0.5381328202645719,
"learning_rate": 9.012922421141036e-06,
"loss": 0.0282,
"step": 2237
},
{
"epoch": 1.0181983621474067,
"grad_norm": 0.5437416204093511,
"learning_rate": 9.012069627132925e-06,
"loss": 0.0365,
"step": 2238
},
{
"epoch": 1.018653321201092,
"grad_norm": 0.5151432843211493,
"learning_rate": 9.011216505276601e-06,
"loss": 0.0327,
"step": 2239
},
{
"epoch": 1.019108280254777,
"grad_norm": 0.7194165832171175,
"learning_rate": 9.01036305564178e-06,
"loss": 0.0447,
"step": 2240
},
{
"epoch": 1.0195632393084622,
"grad_norm": 0.4895196525190099,
"learning_rate": 9.009509278298201e-06,
"loss": 0.0226,
"step": 2241
},
{
"epoch": 1.0200181983621475,
"grad_norm": 0.36403402277658775,
"learning_rate": 9.008655173315629e-06,
"loss": 0.0172,
"step": 2242
},
{
"epoch": 1.0204731574158326,
"grad_norm": 0.5192307375895406,
"learning_rate": 9.00780074076386e-06,
"loss": 0.0281,
"step": 2243
},
{
"epoch": 1.0209281164695176,
"grad_norm": 0.5855074570295021,
"learning_rate": 9.006945980712713e-06,
"loss": 0.039,
"step": 2244
},
{
"epoch": 1.021383075523203,
"grad_norm": 0.3530576777441414,
"learning_rate": 9.006090893232036e-06,
"loss": 0.0165,
"step": 2245
},
{
"epoch": 1.021838034576888,
"grad_norm": 0.46560015374930225,
"learning_rate": 9.005235478391704e-06,
"loss": 0.031,
"step": 2246
},
{
"epoch": 1.0222929936305734,
"grad_norm": 0.4320906337363968,
"learning_rate": 9.004379736261614e-06,
"loss": 0.0229,
"step": 2247
},
{
"epoch": 1.0227479526842584,
"grad_norm": 0.5843690219708401,
"learning_rate": 9.003523666911698e-06,
"loss": 0.0398,
"step": 2248
},
{
"epoch": 1.0232029117379435,
"grad_norm": 0.4876049343109499,
"learning_rate": 9.002667270411905e-06,
"loss": 0.0209,
"step": 2249
},
{
"epoch": 1.0236578707916288,
"grad_norm": 0.4996309287294051,
"learning_rate": 9.001810546832219e-06,
"loss": 0.0339,
"step": 2250
},
{
"epoch": 1.024112829845314,
"grad_norm": 0.44615485337683974,
"learning_rate": 9.000953496242648e-06,
"loss": 0.0367,
"step": 2251
},
{
"epoch": 1.024567788898999,
"grad_norm": 0.4816248261028461,
"learning_rate": 9.000096118713226e-06,
"loss": 0.0302,
"step": 2252
},
{
"epoch": 1.0250227479526843,
"grad_norm": 0.3202895454501902,
"learning_rate": 8.999238414314014e-06,
"loss": 0.018,
"step": 2253
},
{
"epoch": 1.0254777070063694,
"grad_norm": 0.39394390771447657,
"learning_rate": 8.998380383115098e-06,
"loss": 0.0203,
"step": 2254
},
{
"epoch": 1.0259326660600545,
"grad_norm": 0.6774965098079401,
"learning_rate": 8.997522025186592e-06,
"loss": 0.0444,
"step": 2255
},
{
"epoch": 1.0263876251137398,
"grad_norm": 0.6156285698131154,
"learning_rate": 8.996663340598642e-06,
"loss": 0.033,
"step": 2256
},
{
"epoch": 1.026842584167425,
"grad_norm": 0.6636465470342775,
"learning_rate": 8.995804329421408e-06,
"loss": 0.0282,
"step": 2257
},
{
"epoch": 1.02729754322111,
"grad_norm": 0.7643329557559453,
"learning_rate": 8.994944991725094e-06,
"loss": 0.0413,
"step": 2258
},
{
"epoch": 1.0277525022747953,
"grad_norm": 0.4484887858566329,
"learning_rate": 8.994085327579914e-06,
"loss": 0.0244,
"step": 2259
},
{
"epoch": 1.0282074613284804,
"grad_norm": 0.6046158805682427,
"learning_rate": 8.993225337056118e-06,
"loss": 0.0372,
"step": 2260
},
{
"epoch": 1.0286624203821657,
"grad_norm": 0.5297868937946675,
"learning_rate": 8.992365020223982e-06,
"loss": 0.0407,
"step": 2261
},
{
"epoch": 1.0291173794358508,
"grad_norm": 0.4805793953554321,
"learning_rate": 8.991504377153805e-06,
"loss": 0.0297,
"step": 2262
},
{
"epoch": 1.0295723384895359,
"grad_norm": 0.6196673347815759,
"learning_rate": 8.990643407915915e-06,
"loss": 0.0397,
"step": 2263
},
{
"epoch": 1.0300272975432212,
"grad_norm": 0.6223272220447811,
"learning_rate": 8.98978211258067e-06,
"loss": 0.0409,
"step": 2264
},
{
"epoch": 1.0304822565969063,
"grad_norm": 0.49952273986223505,
"learning_rate": 8.988920491218446e-06,
"loss": 0.0272,
"step": 2265
},
{
"epoch": 1.0309372156505914,
"grad_norm": 0.6292771186739616,
"learning_rate": 8.988058543899654e-06,
"loss": 0.0384,
"step": 2266
},
{
"epoch": 1.0313921747042767,
"grad_norm": 0.38772458827936923,
"learning_rate": 8.987196270694727e-06,
"loss": 0.024,
"step": 2267
},
{
"epoch": 1.0318471337579618,
"grad_norm": 0.8799833129039605,
"learning_rate": 8.986333671674128e-06,
"loss": 0.0341,
"step": 2268
},
{
"epoch": 1.0323020928116469,
"grad_norm": 0.6271731268799836,
"learning_rate": 8.985470746908342e-06,
"loss": 0.033,
"step": 2269
},
{
"epoch": 1.0327570518653322,
"grad_norm": 0.38786047905872434,
"learning_rate": 8.984607496467885e-06,
"loss": 0.021,
"step": 2270
},
{
"epoch": 1.0332120109190173,
"grad_norm": 0.6280644096851069,
"learning_rate": 8.9837439204233e-06,
"loss": 0.0491,
"step": 2271
},
{
"epoch": 1.0336669699727024,
"grad_norm": 0.5847841334225715,
"learning_rate": 8.98288001884515e-06,
"loss": 0.0337,
"step": 2272
},
{
"epoch": 1.0341219290263877,
"grad_norm": 0.36088677101245703,
"learning_rate": 8.982015791804032e-06,
"loss": 0.0156,
"step": 2273
},
{
"epoch": 1.0345768880800728,
"grad_norm": 0.4537884974426005,
"learning_rate": 8.981151239370566e-06,
"loss": 0.027,
"step": 2274
},
{
"epoch": 1.035031847133758,
"grad_norm": 0.6090066061447076,
"learning_rate": 8.9802863616154e-06,
"loss": 0.0378,
"step": 2275
},
{
"epoch": 1.0354868061874432,
"grad_norm": 0.7101749544755233,
"learning_rate": 8.979421158609206e-06,
"loss": 0.0439,
"step": 2276
},
{
"epoch": 1.0359417652411282,
"grad_norm": 0.5742339125588956,
"learning_rate": 8.978555630422686e-06,
"loss": 0.0328,
"step": 2277
},
{
"epoch": 1.0363967242948136,
"grad_norm": 0.632873074474985,
"learning_rate": 8.977689777126568e-06,
"loss": 0.0472,
"step": 2278
},
{
"epoch": 1.0368516833484986,
"grad_norm": 0.8069979527700195,
"learning_rate": 8.976823598791604e-06,
"loss": 0.0319,
"step": 2279
},
{
"epoch": 1.0373066424021837,
"grad_norm": 0.4015240288673539,
"learning_rate": 8.975957095488575e-06,
"loss": 0.0269,
"step": 2280
},
{
"epoch": 1.037761601455869,
"grad_norm": 0.5786381841993868,
"learning_rate": 8.975090267288286e-06,
"loss": 0.0296,
"step": 2281
},
{
"epoch": 1.0382165605095541,
"grad_norm": 0.5451914455456522,
"learning_rate": 8.974223114261574e-06,
"loss": 0.0343,
"step": 2282
},
{
"epoch": 1.0386715195632392,
"grad_norm": 0.6945170105788371,
"learning_rate": 8.973355636479294e-06,
"loss": 0.0476,
"step": 2283
},
{
"epoch": 1.0391264786169245,
"grad_norm": 0.5171663408691534,
"learning_rate": 8.972487834012338e-06,
"loss": 0.0301,
"step": 2284
},
{
"epoch": 1.0395814376706096,
"grad_norm": 0.494166229450044,
"learning_rate": 8.971619706931613e-06,
"loss": 0.0226,
"step": 2285
},
{
"epoch": 1.0400363967242947,
"grad_norm": 0.7676778552323048,
"learning_rate": 8.970751255308063e-06,
"loss": 0.045,
"step": 2286
},
{
"epoch": 1.04049135577798,
"grad_norm": 0.44323443611073776,
"learning_rate": 8.969882479212652e-06,
"loss": 0.0196,
"step": 2287
},
{
"epoch": 1.040946314831665,
"grad_norm": 0.41146000373164554,
"learning_rate": 8.969013378716371e-06,
"loss": 0.0196,
"step": 2288
},
{
"epoch": 1.0414012738853504,
"grad_norm": 0.3888711487160539,
"learning_rate": 8.968143953890242e-06,
"loss": 0.0228,
"step": 2289
},
{
"epoch": 1.0418562329390355,
"grad_norm": 0.49379959221935377,
"learning_rate": 8.96727420480531e-06,
"loss": 0.0306,
"step": 2290
},
{
"epoch": 1.0423111919927206,
"grad_norm": 0.48325360654642197,
"learning_rate": 8.966404131532645e-06,
"loss": 0.0265,
"step": 2291
},
{
"epoch": 1.042766151046406,
"grad_norm": 0.47493208719115093,
"learning_rate": 8.965533734143347e-06,
"loss": 0.0239,
"step": 2292
},
{
"epoch": 1.043221110100091,
"grad_norm": 0.556271091368108,
"learning_rate": 8.964663012708538e-06,
"loss": 0.0365,
"step": 2293
},
{
"epoch": 1.043676069153776,
"grad_norm": 0.8512257992210553,
"learning_rate": 8.963791967299375e-06,
"loss": 0.0332,
"step": 2294
},
{
"epoch": 1.0441310282074614,
"grad_norm": 0.4600946915818348,
"learning_rate": 8.96292059798703e-06,
"loss": 0.0254,
"step": 2295
},
{
"epoch": 1.0445859872611465,
"grad_norm": 0.5926927797370501,
"learning_rate": 8.962048904842713e-06,
"loss": 0.034,
"step": 2296
},
{
"epoch": 1.0450409463148316,
"grad_norm": 0.5174352508068348,
"learning_rate": 8.96117688793765e-06,
"loss": 0.0334,
"step": 2297
},
{
"epoch": 1.0454959053685169,
"grad_norm": 0.4726564762945724,
"learning_rate": 8.960304547343101e-06,
"loss": 0.0271,
"step": 2298
},
{
"epoch": 1.045950864422202,
"grad_norm": 0.49021838747059965,
"learning_rate": 8.959431883130348e-06,
"loss": 0.0272,
"step": 2299
},
{
"epoch": 1.046405823475887,
"grad_norm": 0.33392762330146264,
"learning_rate": 8.958558895370703e-06,
"loss": 0.0184,
"step": 2300
},
{
"epoch": 1.0468607825295724,
"grad_norm": 0.43970090494512293,
"learning_rate": 8.9576855841355e-06,
"loss": 0.0247,
"step": 2301
},
{
"epoch": 1.0473157415832575,
"grad_norm": 0.34961568768416074,
"learning_rate": 8.956811949496108e-06,
"loss": 0.0207,
"step": 2302
},
{
"epoch": 1.0477707006369428,
"grad_norm": 0.5047819086466443,
"learning_rate": 8.955937991523908e-06,
"loss": 0.0358,
"step": 2303
},
{
"epoch": 1.0482256596906279,
"grad_norm": 0.5502957295717672,
"learning_rate": 8.955063710290322e-06,
"loss": 0.0396,
"step": 2304
},
{
"epoch": 1.048680618744313,
"grad_norm": 0.4007555279082937,
"learning_rate": 8.95418910586679e-06,
"loss": 0.0205,
"step": 2305
},
{
"epoch": 1.0491355777979983,
"grad_norm": 0.37932885662916804,
"learning_rate": 8.953314178324782e-06,
"loss": 0.0261,
"step": 2306
},
{
"epoch": 1.0495905368516834,
"grad_norm": 0.6331059696275105,
"learning_rate": 8.952438927735793e-06,
"loss": 0.0397,
"step": 2307
},
{
"epoch": 1.0500454959053684,
"grad_norm": 0.5533999405103901,
"learning_rate": 8.951563354171343e-06,
"loss": 0.0216,
"step": 2308
},
{
"epoch": 1.0505004549590538,
"grad_norm": 0.5064049753801714,
"learning_rate": 8.950687457702981e-06,
"loss": 0.0253,
"step": 2309
},
{
"epoch": 1.0509554140127388,
"grad_norm": 0.7762514931128638,
"learning_rate": 8.94981123840228e-06,
"loss": 0.0257,
"step": 2310
},
{
"epoch": 1.051410373066424,
"grad_norm": 0.5258772784610919,
"learning_rate": 8.948934696340842e-06,
"loss": 0.0402,
"step": 2311
},
{
"epoch": 1.0518653321201092,
"grad_norm": 0.5179164003875761,
"learning_rate": 8.948057831590296e-06,
"loss": 0.0392,
"step": 2312
},
{
"epoch": 1.0523202911737943,
"grad_norm": 0.4873683404674824,
"learning_rate": 8.94718064422229e-06,
"loss": 0.0225,
"step": 2313
},
{
"epoch": 1.0527752502274794,
"grad_norm": 0.42294954664238593,
"learning_rate": 8.94630313430851e-06,
"loss": 0.0239,
"step": 2314
},
{
"epoch": 1.0532302092811647,
"grad_norm": 0.5120965619588207,
"learning_rate": 8.945425301920656e-06,
"loss": 0.0239,
"step": 2315
},
{
"epoch": 1.0536851683348498,
"grad_norm": 0.5274581767565953,
"learning_rate": 8.944547147130467e-06,
"loss": 0.0395,
"step": 2316
},
{
"epoch": 1.0541401273885351,
"grad_norm": 0.6240914390797723,
"learning_rate": 8.943668670009698e-06,
"loss": 0.04,
"step": 2317
},
{
"epoch": 1.0545950864422202,
"grad_norm": 0.588480807715609,
"learning_rate": 8.942789870630133e-06,
"loss": 0.0379,
"step": 2318
},
{
"epoch": 1.0550500454959053,
"grad_norm": 0.5328051168509789,
"learning_rate": 8.941910749063587e-06,
"loss": 0.0256,
"step": 2319
},
{
"epoch": 1.0555050045495906,
"grad_norm": 0.5662136884367794,
"learning_rate": 8.941031305381894e-06,
"loss": 0.0349,
"step": 2320
},
{
"epoch": 1.0559599636032757,
"grad_norm": 0.4080289916939306,
"learning_rate": 8.940151539656922e-06,
"loss": 0.0203,
"step": 2321
},
{
"epoch": 1.0564149226569608,
"grad_norm": 0.6644738842779135,
"learning_rate": 8.93927145196056e-06,
"loss": 0.0295,
"step": 2322
},
{
"epoch": 1.056869881710646,
"grad_norm": 0.43989425636246393,
"learning_rate": 8.938391042364723e-06,
"loss": 0.0257,
"step": 2323
},
{
"epoch": 1.0573248407643312,
"grad_norm": 0.5431541428763835,
"learning_rate": 8.937510310941358e-06,
"loss": 0.03,
"step": 2324
},
{
"epoch": 1.0577797998180163,
"grad_norm": 0.5122724279785533,
"learning_rate": 8.936629257762429e-06,
"loss": 0.0273,
"step": 2325
},
{
"epoch": 1.0582347588717016,
"grad_norm": 0.41195858239961775,
"learning_rate": 8.935747882899937e-06,
"loss": 0.0216,
"step": 2326
},
{
"epoch": 1.0586897179253867,
"grad_norm": 0.5171757707727286,
"learning_rate": 8.9348661864259e-06,
"loss": 0.0299,
"step": 2327
},
{
"epoch": 1.0591446769790718,
"grad_norm": 0.6216382380161013,
"learning_rate": 8.93398416841237e-06,
"loss": 0.0525,
"step": 2328
},
{
"epoch": 1.059599636032757,
"grad_norm": 0.47615445722593264,
"learning_rate": 8.933101828931418e-06,
"loss": 0.0229,
"step": 2329
},
{
"epoch": 1.0600545950864422,
"grad_norm": 0.5543921495737715,
"learning_rate": 8.932219168055146e-06,
"loss": 0.0353,
"step": 2330
},
{
"epoch": 1.0605095541401275,
"grad_norm": 0.4807073495602966,
"learning_rate": 8.931336185855682e-06,
"loss": 0.029,
"step": 2331
},
{
"epoch": 1.0609645131938126,
"grad_norm": 0.7132043951444881,
"learning_rate": 8.930452882405178e-06,
"loss": 0.0573,
"step": 2332
},
{
"epoch": 1.0614194722474977,
"grad_norm": 0.7323908092635573,
"learning_rate": 8.929569257775816e-06,
"loss": 0.031,
"step": 2333
},
{
"epoch": 1.061874431301183,
"grad_norm": 0.7282498524373471,
"learning_rate": 8.9286853120398e-06,
"loss": 0.0212,
"step": 2334
},
{
"epoch": 1.062329390354868,
"grad_norm": 0.5041730540211715,
"learning_rate": 8.92780104526936e-06,
"loss": 0.0219,
"step": 2335
},
{
"epoch": 1.0627843494085532,
"grad_norm": 0.5694707546108049,
"learning_rate": 8.926916457536755e-06,
"loss": 0.0277,
"step": 2336
},
{
"epoch": 1.0632393084622385,
"grad_norm": 0.4942987205465501,
"learning_rate": 8.926031548914274e-06,
"loss": 0.0283,
"step": 2337
},
{
"epoch": 1.0636942675159236,
"grad_norm": 0.7094719472889628,
"learning_rate": 8.925146319474225e-06,
"loss": 0.0484,
"step": 2338
},
{
"epoch": 1.0641492265696086,
"grad_norm": 0.5401572696577567,
"learning_rate": 8.924260769288944e-06,
"loss": 0.032,
"step": 2339
},
{
"epoch": 1.064604185623294,
"grad_norm": 0.6271229371930636,
"learning_rate": 8.923374898430794e-06,
"loss": 0.0417,
"step": 2340
},
{
"epoch": 1.065059144676979,
"grad_norm": 0.5384710947557135,
"learning_rate": 8.922488706972165e-06,
"loss": 0.028,
"step": 2341
},
{
"epoch": 1.0655141037306644,
"grad_norm": 0.5738095562796759,
"learning_rate": 8.921602194985473e-06,
"loss": 0.0251,
"step": 2342
},
{
"epoch": 1.0659690627843494,
"grad_norm": 0.4114388383836,
"learning_rate": 8.920715362543158e-06,
"loss": 0.0257,
"step": 2343
},
{
"epoch": 1.0664240218380345,
"grad_norm": 0.4407026853756295,
"learning_rate": 8.919828209717691e-06,
"loss": 0.0318,
"step": 2344
},
{
"epoch": 1.0668789808917198,
"grad_norm": 0.5795706484311789,
"learning_rate": 8.918940736581565e-06,
"loss": 0.0384,
"step": 2345
},
{
"epoch": 1.067333939945405,
"grad_norm": 0.4997138165488597,
"learning_rate": 8.918052943207298e-06,
"loss": 0.0339,
"step": 2346
},
{
"epoch": 1.06778889899909,
"grad_norm": 0.6466785074736559,
"learning_rate": 8.91716482966744e-06,
"loss": 0.0412,
"step": 2347
},
{
"epoch": 1.0682438580527753,
"grad_norm": 0.6101860514996267,
"learning_rate": 8.916276396034561e-06,
"loss": 0.0349,
"step": 2348
},
{
"epoch": 1.0686988171064604,
"grad_norm": 0.6648890763063255,
"learning_rate": 8.915387642381261e-06,
"loss": 0.0374,
"step": 2349
},
{
"epoch": 1.0691537761601455,
"grad_norm": 0.6435783427790035,
"learning_rate": 8.914498568780163e-06,
"loss": 0.0425,
"step": 2350
},
{
"epoch": 1.0696087352138308,
"grad_norm": 0.4168529921191238,
"learning_rate": 8.913609175303923e-06,
"loss": 0.0222,
"step": 2351
},
{
"epoch": 1.070063694267516,
"grad_norm": 0.370333742802149,
"learning_rate": 8.912719462025213e-06,
"loss": 0.018,
"step": 2352
},
{
"epoch": 1.070518653321201,
"grad_norm": 0.3929772094003772,
"learning_rate": 8.911829429016737e-06,
"loss": 0.0184,
"step": 2353
},
{
"epoch": 1.0709736123748863,
"grad_norm": 0.36777976145335695,
"learning_rate": 8.910939076351228e-06,
"loss": 0.0199,
"step": 2354
},
{
"epoch": 1.0714285714285714,
"grad_norm": 0.5445905742319043,
"learning_rate": 8.910048404101437e-06,
"loss": 0.0297,
"step": 2355
},
{
"epoch": 1.0718835304822565,
"grad_norm": 0.517651494476337,
"learning_rate": 8.90915741234015e-06,
"loss": 0.0244,
"step": 2356
},
{
"epoch": 1.0723384895359418,
"grad_norm": 0.6079868190664829,
"learning_rate": 8.908266101140173e-06,
"loss": 0.0327,
"step": 2357
},
{
"epoch": 1.0727934485896269,
"grad_norm": 0.5005614750938115,
"learning_rate": 8.907374470574339e-06,
"loss": 0.0288,
"step": 2358
},
{
"epoch": 1.0732484076433122,
"grad_norm": 0.41084278869296126,
"learning_rate": 8.906482520715508e-06,
"loss": 0.0196,
"step": 2359
},
{
"epoch": 1.0737033666969973,
"grad_norm": 0.42883961230062595,
"learning_rate": 8.905590251636566e-06,
"loss": 0.0201,
"step": 2360
},
{
"epoch": 1.0741583257506824,
"grad_norm": 0.7507509176249603,
"learning_rate": 8.904697663410429e-06,
"loss": 0.0519,
"step": 2361
},
{
"epoch": 1.0746132848043677,
"grad_norm": 0.35684834441788627,
"learning_rate": 8.90380475611003e-06,
"loss": 0.0193,
"step": 2362
},
{
"epoch": 1.0750682438580528,
"grad_norm": 0.359991301638448,
"learning_rate": 8.902911529808338e-06,
"loss": 0.02,
"step": 2363
},
{
"epoch": 1.0755232029117379,
"grad_norm": 0.6485293447004715,
"learning_rate": 8.90201798457834e-06,
"loss": 0.05,
"step": 2364
},
{
"epoch": 1.0759781619654232,
"grad_norm": 0.35596882973823685,
"learning_rate": 8.901124120493055e-06,
"loss": 0.0201,
"step": 2365
},
{
"epoch": 1.0764331210191083,
"grad_norm": 0.5195485453283638,
"learning_rate": 8.900229937625522e-06,
"loss": 0.0267,
"step": 2366
},
{
"epoch": 1.0768880800727934,
"grad_norm": 0.5121436407601963,
"learning_rate": 8.899335436048813e-06,
"loss": 0.0293,
"step": 2367
},
{
"epoch": 1.0773430391264787,
"grad_norm": 0.574083355691705,
"learning_rate": 8.898440615836021e-06,
"loss": 0.0314,
"step": 2368
},
{
"epoch": 1.0777979981801638,
"grad_norm": 0.36323016195490376,
"learning_rate": 8.897545477060268e-06,
"loss": 0.0164,
"step": 2369
},
{
"epoch": 1.078252957233849,
"grad_norm": 0.44874033315946665,
"learning_rate": 8.8966500197947e-06,
"loss": 0.0255,
"step": 2370
},
{
"epoch": 1.0787079162875342,
"grad_norm": 0.4549169634711705,
"learning_rate": 8.895754244112486e-06,
"loss": 0.0252,
"step": 2371
},
{
"epoch": 1.0791628753412192,
"grad_norm": 0.5188300138751303,
"learning_rate": 8.894858150086832e-06,
"loss": 0.022,
"step": 2372
},
{
"epoch": 1.0796178343949046,
"grad_norm": 0.5077854205250166,
"learning_rate": 8.893961737790957e-06,
"loss": 0.027,
"step": 2373
},
{
"epoch": 1.0800727934485896,
"grad_norm": 0.5080695970336,
"learning_rate": 8.893065007298116e-06,
"loss": 0.0293,
"step": 2374
},
{
"epoch": 1.0805277525022747,
"grad_norm": 0.49124016807194615,
"learning_rate": 8.89216795868158e-06,
"loss": 0.0253,
"step": 2375
},
{
"epoch": 1.08098271155596,
"grad_norm": 0.746420330430573,
"learning_rate": 8.891270592014658e-06,
"loss": 0.0393,
"step": 2376
},
{
"epoch": 1.0814376706096451,
"grad_norm": 0.5899621371906842,
"learning_rate": 8.890372907370677e-06,
"loss": 0.0325,
"step": 2377
},
{
"epoch": 1.0818926296633302,
"grad_norm": 0.538668781912988,
"learning_rate": 8.889474904822987e-06,
"loss": 0.0254,
"step": 2378
},
{
"epoch": 1.0823475887170155,
"grad_norm": 0.48796027217616167,
"learning_rate": 8.888576584444976e-06,
"loss": 0.0284,
"step": 2379
},
{
"epoch": 1.0828025477707006,
"grad_norm": 0.4607384499708701,
"learning_rate": 8.887677946310045e-06,
"loss": 0.0293,
"step": 2380
},
{
"epoch": 1.0832575068243857,
"grad_norm": 0.6691227522534325,
"learning_rate": 8.886778990491632e-06,
"loss": 0.0479,
"step": 2381
},
{
"epoch": 1.083712465878071,
"grad_norm": 0.4131339751828579,
"learning_rate": 8.885879717063189e-06,
"loss": 0.0232,
"step": 2382
},
{
"epoch": 1.084167424931756,
"grad_norm": 0.49834287436563,
"learning_rate": 8.884980126098206e-06,
"loss": 0.0261,
"step": 2383
},
{
"epoch": 1.0846223839854412,
"grad_norm": 0.49133678192638947,
"learning_rate": 8.88408021767019e-06,
"loss": 0.0217,
"step": 2384
},
{
"epoch": 1.0850773430391265,
"grad_norm": 0.4897177991752284,
"learning_rate": 8.88317999185268e-06,
"loss": 0.0304,
"step": 2385
},
{
"epoch": 1.0855323020928116,
"grad_norm": 0.5332982190122252,
"learning_rate": 8.882279448719235e-06,
"loss": 0.024,
"step": 2386
},
{
"epoch": 1.085987261146497,
"grad_norm": 0.39337001966991797,
"learning_rate": 8.881378588343448e-06,
"loss": 0.0195,
"step": 2387
},
{
"epoch": 1.086442220200182,
"grad_norm": 0.5648723431118464,
"learning_rate": 8.88047741079893e-06,
"loss": 0.0277,
"step": 2388
},
{
"epoch": 1.086897179253867,
"grad_norm": 0.38358401084782046,
"learning_rate": 8.879575916159323e-06,
"loss": 0.0234,
"step": 2389
},
{
"epoch": 1.0873521383075524,
"grad_norm": 0.4916039064871815,
"learning_rate": 8.878674104498293e-06,
"loss": 0.0196,
"step": 2390
},
{
"epoch": 1.0878070973612375,
"grad_norm": 0.4574406020630443,
"learning_rate": 8.877771975889529e-06,
"loss": 0.0266,
"step": 2391
},
{
"epoch": 1.0882620564149226,
"grad_norm": 1.2527103886930033,
"learning_rate": 8.876869530406753e-06,
"loss": 0.085,
"step": 2392
},
{
"epoch": 1.0887170154686079,
"grad_norm": 0.6740099441800771,
"learning_rate": 8.875966768123705e-06,
"loss": 0.0491,
"step": 2393
},
{
"epoch": 1.089171974522293,
"grad_norm": 0.8127319301316774,
"learning_rate": 8.875063689114157e-06,
"loss": 0.0351,
"step": 2394
},
{
"epoch": 1.089626933575978,
"grad_norm": 0.6883882884250196,
"learning_rate": 8.874160293451903e-06,
"loss": 0.0351,
"step": 2395
},
{
"epoch": 1.0900818926296634,
"grad_norm": 0.472050537765526,
"learning_rate": 8.873256581210767e-06,
"loss": 0.0281,
"step": 2396
},
{
"epoch": 1.0905368516833485,
"grad_norm": 0.43429585005126187,
"learning_rate": 8.872352552464594e-06,
"loss": 0.0217,
"step": 2397
},
{
"epoch": 1.0909918107370338,
"grad_norm": 0.7559591015285818,
"learning_rate": 8.871448207287259e-06,
"loss": 0.0234,
"step": 2398
},
{
"epoch": 1.0914467697907189,
"grad_norm": 1.295843093263791,
"learning_rate": 8.870543545752657e-06,
"loss": 0.0378,
"step": 2399
},
{
"epoch": 1.091901728844404,
"grad_norm": 0.687703240327456,
"learning_rate": 8.869638567934718e-06,
"loss": 0.0428,
"step": 2400
},
{
"epoch": 1.0923566878980893,
"grad_norm": 0.5316380088515792,
"learning_rate": 8.86873327390739e-06,
"loss": 0.0207,
"step": 2401
},
{
"epoch": 1.0928116469517744,
"grad_norm": 0.37080940024955544,
"learning_rate": 8.867827663744649e-06,
"loss": 0.014,
"step": 2402
},
{
"epoch": 1.0932666060054594,
"grad_norm": 0.551372034105751,
"learning_rate": 8.8669217375205e-06,
"loss": 0.0407,
"step": 2403
},
{
"epoch": 1.0937215650591448,
"grad_norm": 0.550827427093742,
"learning_rate": 8.866015495308967e-06,
"loss": 0.0295,
"step": 2404
},
{
"epoch": 1.0941765241128298,
"grad_norm": 0.5312346261037174,
"learning_rate": 8.865108937184108e-06,
"loss": 0.0329,
"step": 2405
},
{
"epoch": 1.094631483166515,
"grad_norm": 0.606116027973049,
"learning_rate": 8.864202063220003e-06,
"loss": 0.036,
"step": 2406
},
{
"epoch": 1.0950864422202002,
"grad_norm": 0.5039409256044083,
"learning_rate": 8.863294873490752e-06,
"loss": 0.0237,
"step": 2407
},
{
"epoch": 1.0955414012738853,
"grad_norm": 0.7088932326845141,
"learning_rate": 8.862387368070493e-06,
"loss": 0.0502,
"step": 2408
},
{
"epoch": 1.0959963603275704,
"grad_norm": 0.42457685669799344,
"learning_rate": 8.86147954703338e-06,
"loss": 0.0232,
"step": 2409
},
{
"epoch": 1.0964513193812557,
"grad_norm": 0.400049727629285,
"learning_rate": 8.860571410453598e-06,
"loss": 0.0137,
"step": 2410
},
{
"epoch": 1.0969062784349408,
"grad_norm": 0.5528326412344238,
"learning_rate": 8.859662958405352e-06,
"loss": 0.0259,
"step": 2411
},
{
"epoch": 1.097361237488626,
"grad_norm": 0.3740020218354164,
"learning_rate": 8.858754190962881e-06,
"loss": 0.0207,
"step": 2412
},
{
"epoch": 1.0978161965423112,
"grad_norm": 0.43380267454252947,
"learning_rate": 8.857845108200443e-06,
"loss": 0.03,
"step": 2413
},
{
"epoch": 1.0982711555959963,
"grad_norm": 0.41117776188244837,
"learning_rate": 8.856935710192326e-06,
"loss": 0.0217,
"step": 2414
},
{
"epoch": 1.0987261146496816,
"grad_norm": 0.7295481072089418,
"learning_rate": 8.856025997012837e-06,
"loss": 0.0355,
"step": 2415
},
{
"epoch": 1.0991810737033667,
"grad_norm": 0.6100308273835641,
"learning_rate": 8.85511596873632e-06,
"loss": 0.0369,
"step": 2416
},
{
"epoch": 1.0996360327570518,
"grad_norm": 0.41413261117443184,
"learning_rate": 8.854205625437135e-06,
"loss": 0.0198,
"step": 2417
},
{
"epoch": 1.100090991810737,
"grad_norm": 0.45865368499615844,
"learning_rate": 8.853294967189672e-06,
"loss": 0.0274,
"step": 2418
},
{
"epoch": 1.1005459508644222,
"grad_norm": 0.49503724640291885,
"learning_rate": 8.852383994068345e-06,
"loss": 0.039,
"step": 2419
},
{
"epoch": 1.1010009099181073,
"grad_norm": 0.3278139965958097,
"learning_rate": 8.851472706147595e-06,
"loss": 0.02,
"step": 2420
},
{
"epoch": 1.1014558689717926,
"grad_norm": 0.7072991481662654,
"learning_rate": 8.85056110350189e-06,
"loss": 0.0478,
"step": 2421
},
{
"epoch": 1.1019108280254777,
"grad_norm": 0.3754428113606483,
"learning_rate": 8.84964918620572e-06,
"loss": 0.0204,
"step": 2422
},
{
"epoch": 1.1023657870791628,
"grad_norm": 0.7096758634544409,
"learning_rate": 8.848736954333603e-06,
"loss": 0.0335,
"step": 2423
},
{
"epoch": 1.102820746132848,
"grad_norm": 0.5727995354405594,
"learning_rate": 8.847824407960083e-06,
"loss": 0.0323,
"step": 2424
},
{
"epoch": 1.1032757051865332,
"grad_norm": 0.6229568548114003,
"learning_rate": 8.84691154715973e-06,
"loss": 0.0309,
"step": 2425
},
{
"epoch": 1.1037306642402185,
"grad_norm": 0.5010513455704715,
"learning_rate": 8.845998372007136e-06,
"loss": 0.0286,
"step": 2426
},
{
"epoch": 1.1041856232939036,
"grad_norm": 0.34862957832393143,
"learning_rate": 8.845084882576924e-06,
"loss": 0.0165,
"step": 2427
},
{
"epoch": 1.1046405823475887,
"grad_norm": 0.5610710585811625,
"learning_rate": 8.84417107894374e-06,
"loss": 0.0381,
"step": 2428
},
{
"epoch": 1.105095541401274,
"grad_norm": 0.3998367702132408,
"learning_rate": 8.843256961182255e-06,
"loss": 0.0186,
"step": 2429
},
{
"epoch": 1.105550500454959,
"grad_norm": 0.6787215229828617,
"learning_rate": 8.842342529367167e-06,
"loss": 0.0487,
"step": 2430
},
{
"epoch": 1.1060054595086442,
"grad_norm": 0.6483563929183911,
"learning_rate": 8.8414277835732e-06,
"loss": 0.0409,
"step": 2431
},
{
"epoch": 1.1064604185623295,
"grad_norm": 0.6351823340870137,
"learning_rate": 8.840512723875103e-06,
"loss": 0.0497,
"step": 2432
},
{
"epoch": 1.1069153776160146,
"grad_norm": 0.3467791981865341,
"learning_rate": 8.839597350347648e-06,
"loss": 0.0172,
"step": 2433
},
{
"epoch": 1.1073703366696996,
"grad_norm": 0.4877926867999841,
"learning_rate": 8.838681663065638e-06,
"loss": 0.0268,
"step": 2434
},
{
"epoch": 1.107825295723385,
"grad_norm": 0.561052741145843,
"learning_rate": 8.837765662103898e-06,
"loss": 0.0351,
"step": 2435
},
{
"epoch": 1.10828025477707,
"grad_norm": 0.5339886977527083,
"learning_rate": 8.836849347537278e-06,
"loss": 0.0286,
"step": 2436
},
{
"epoch": 1.1087352138307551,
"grad_norm": 0.41940315295115715,
"learning_rate": 8.835932719440658e-06,
"loss": 0.016,
"step": 2437
},
{
"epoch": 1.1091901728844404,
"grad_norm": 0.500811248377599,
"learning_rate": 8.835015777888938e-06,
"loss": 0.0277,
"step": 2438
},
{
"epoch": 1.1096451319381255,
"grad_norm": 0.6905252242552301,
"learning_rate": 8.83409852295705e-06,
"loss": 0.0451,
"step": 2439
},
{
"epoch": 1.1101000909918108,
"grad_norm": 0.4932334291437054,
"learning_rate": 8.833180954719941e-06,
"loss": 0.023,
"step": 2440
},
{
"epoch": 1.110555050045496,
"grad_norm": 0.32570391119462067,
"learning_rate": 8.832263073252597e-06,
"loss": 0.0223,
"step": 2441
},
{
"epoch": 1.111010009099181,
"grad_norm": 0.5189620509513116,
"learning_rate": 8.831344878630022e-06,
"loss": 0.0345,
"step": 2442
},
{
"epoch": 1.1114649681528663,
"grad_norm": 0.35471915929013836,
"learning_rate": 8.830426370927246e-06,
"loss": 0.0178,
"step": 2443
},
{
"epoch": 1.1119199272065514,
"grad_norm": 0.4071867204646678,
"learning_rate": 8.829507550219323e-06,
"loss": 0.0187,
"step": 2444
},
{
"epoch": 1.1123748862602365,
"grad_norm": 0.5327053422443435,
"learning_rate": 8.828588416581338e-06,
"loss": 0.0321,
"step": 2445
},
{
"epoch": 1.1128298453139218,
"grad_norm": 0.4727447057361278,
"learning_rate": 8.827668970088397e-06,
"loss": 0.0256,
"step": 2446
},
{
"epoch": 1.113284804367607,
"grad_norm": 0.44344698021715867,
"learning_rate": 8.826749210815634e-06,
"loss": 0.0212,
"step": 2447
},
{
"epoch": 1.113739763421292,
"grad_norm": 0.48653354586078956,
"learning_rate": 8.825829138838206e-06,
"loss": 0.0252,
"step": 2448
},
{
"epoch": 1.1141947224749773,
"grad_norm": 0.4904789767614279,
"learning_rate": 8.824908754231299e-06,
"loss": 0.0219,
"step": 2449
},
{
"epoch": 1.1146496815286624,
"grad_norm": 0.5096306577344566,
"learning_rate": 8.823988057070122e-06,
"loss": 0.0269,
"step": 2450
},
{
"epoch": 1.1151046405823477,
"grad_norm": 0.4524604770972165,
"learning_rate": 8.823067047429908e-06,
"loss": 0.0197,
"step": 2451
},
{
"epoch": 1.1155595996360328,
"grad_norm": 0.6661762941224277,
"learning_rate": 8.82214572538592e-06,
"loss": 0.0432,
"step": 2452
},
{
"epoch": 1.1160145586897179,
"grad_norm": 0.45413808918893234,
"learning_rate": 8.821224091013445e-06,
"loss": 0.0252,
"step": 2453
},
{
"epoch": 1.1164695177434032,
"grad_norm": 0.4564359066247584,
"learning_rate": 8.820302144387794e-06,
"loss": 0.0305,
"step": 2454
},
{
"epoch": 1.1169244767970883,
"grad_norm": 0.5331752474098931,
"learning_rate": 8.819379885584303e-06,
"loss": 0.0285,
"step": 2455
},
{
"epoch": 1.1173794358507734,
"grad_norm": 0.8314482044455632,
"learning_rate": 8.818457314678336e-06,
"loss": 0.0474,
"step": 2456
},
{
"epoch": 1.1178343949044587,
"grad_norm": 0.5831509752587852,
"learning_rate": 8.817534431745283e-06,
"loss": 0.0204,
"step": 2457
},
{
"epoch": 1.1182893539581438,
"grad_norm": 0.42113991056064237,
"learning_rate": 8.816611236860554e-06,
"loss": 0.0207,
"step": 2458
},
{
"epoch": 1.1187443130118289,
"grad_norm": 0.5492674131587796,
"learning_rate": 8.815687730099594e-06,
"loss": 0.023,
"step": 2459
},
{
"epoch": 1.1191992720655142,
"grad_norm": 0.5627677712218775,
"learning_rate": 8.81476391153786e-06,
"loss": 0.0238,
"step": 2460
},
{
"epoch": 1.1196542311191993,
"grad_norm": 0.306412099822185,
"learning_rate": 8.813839781250848e-06,
"loss": 0.0136,
"step": 2461
},
{
"epoch": 1.1201091901728844,
"grad_norm": 0.4884139369729457,
"learning_rate": 8.812915339314073e-06,
"loss": 0.0325,
"step": 2462
},
{
"epoch": 1.1205641492265697,
"grad_norm": 0.6440331779678226,
"learning_rate": 8.811990585803074e-06,
"loss": 0.0462,
"step": 2463
},
{
"epoch": 1.1210191082802548,
"grad_norm": 0.6354635395644428,
"learning_rate": 8.81106552079342e-06,
"loss": 0.0326,
"step": 2464
},
{
"epoch": 1.1214740673339398,
"grad_norm": 0.4841057095746355,
"learning_rate": 8.810140144360701e-06,
"loss": 0.0288,
"step": 2465
},
{
"epoch": 1.1219290263876252,
"grad_norm": 0.7578064954916388,
"learning_rate": 8.809214456580539e-06,
"loss": 0.0444,
"step": 2466
},
{
"epoch": 1.1223839854413102,
"grad_norm": 0.36333027437030824,
"learning_rate": 8.80828845752857e-06,
"loss": 0.0166,
"step": 2467
},
{
"epoch": 1.1228389444949956,
"grad_norm": 1.0828419984965674,
"learning_rate": 8.80736214728047e-06,
"loss": 0.0509,
"step": 2468
},
{
"epoch": 1.1232939035486806,
"grad_norm": 0.41035853061268457,
"learning_rate": 8.806435525911927e-06,
"loss": 0.0152,
"step": 2469
},
{
"epoch": 1.1237488626023657,
"grad_norm": 0.48117366130842515,
"learning_rate": 8.805508593498662e-06,
"loss": 0.0358,
"step": 2470
},
{
"epoch": 1.124203821656051,
"grad_norm": 0.48865070302034325,
"learning_rate": 8.804581350116422e-06,
"loss": 0.0248,
"step": 2471
},
{
"epoch": 1.1246587807097361,
"grad_norm": 0.6166160347574816,
"learning_rate": 8.803653795840974e-06,
"loss": 0.0372,
"step": 2472
},
{
"epoch": 1.1251137397634212,
"grad_norm": 0.4235666133907878,
"learning_rate": 8.802725930748115e-06,
"loss": 0.0224,
"step": 2473
},
{
"epoch": 1.1255686988171065,
"grad_norm": 0.49371023402555386,
"learning_rate": 8.801797754913667e-06,
"loss": 0.0253,
"step": 2474
},
{
"epoch": 1.1260236578707916,
"grad_norm": 0.5375981231946215,
"learning_rate": 8.800869268413475e-06,
"loss": 0.0303,
"step": 2475
},
{
"epoch": 1.1264786169244767,
"grad_norm": 0.6200342528643785,
"learning_rate": 8.79994047132341e-06,
"loss": 0.0301,
"step": 2476
},
{
"epoch": 1.126933575978162,
"grad_norm": 0.7763567599332302,
"learning_rate": 8.79901136371937e-06,
"loss": 0.0367,
"step": 2477
},
{
"epoch": 1.127388535031847,
"grad_norm": 0.4168679527566863,
"learning_rate": 8.798081945677279e-06,
"loss": 0.0193,
"step": 2478
},
{
"epoch": 1.1278434940855324,
"grad_norm": 0.5499515478297102,
"learning_rate": 8.797152217273082e-06,
"loss": 0.0232,
"step": 2479
},
{
"epoch": 1.1282984531392175,
"grad_norm": 0.3629031290073349,
"learning_rate": 8.796222178582756e-06,
"loss": 0.0217,
"step": 2480
},
{
"epoch": 1.1287534121929026,
"grad_norm": 0.539897737827513,
"learning_rate": 8.795291829682293e-06,
"loss": 0.0272,
"step": 2481
},
{
"epoch": 1.129208371246588,
"grad_norm": 0.5636939303591514,
"learning_rate": 8.794361170647723e-06,
"loss": 0.0322,
"step": 2482
},
{
"epoch": 1.129663330300273,
"grad_norm": 0.6219815104303015,
"learning_rate": 8.793430201555095e-06,
"loss": 0.0274,
"step": 2483
},
{
"epoch": 1.130118289353958,
"grad_norm": 0.6542904517198702,
"learning_rate": 8.79249892248048e-06,
"loss": 0.0358,
"step": 2484
},
{
"epoch": 1.1305732484076434,
"grad_norm": 0.46666017679304383,
"learning_rate": 8.79156733349998e-06,
"loss": 0.0308,
"step": 2485
},
{
"epoch": 1.1310282074613285,
"grad_norm": 0.643787908195578,
"learning_rate": 8.790635434689722e-06,
"loss": 0.0325,
"step": 2486
},
{
"epoch": 1.1314831665150136,
"grad_norm": 0.6798497056398047,
"learning_rate": 8.789703226125853e-06,
"loss": 0.0388,
"step": 2487
},
{
"epoch": 1.1319381255686989,
"grad_norm": 0.45682700520723596,
"learning_rate": 8.78877070788455e-06,
"loss": 0.0248,
"step": 2488
},
{
"epoch": 1.132393084622384,
"grad_norm": 0.520494224107322,
"learning_rate": 8.787837880042016e-06,
"loss": 0.0251,
"step": 2489
},
{
"epoch": 1.132848043676069,
"grad_norm": 0.5608809735379154,
"learning_rate": 8.786904742674476e-06,
"loss": 0.0354,
"step": 2490
},
{
"epoch": 1.1333030027297544,
"grad_norm": 0.5383912877252518,
"learning_rate": 8.78597129585818e-06,
"loss": 0.0252,
"step": 2491
},
{
"epoch": 1.1337579617834395,
"grad_norm": 0.3952421850434973,
"learning_rate": 8.78503753966941e-06,
"loss": 0.0191,
"step": 2492
},
{
"epoch": 1.1342129208371245,
"grad_norm": 0.7660377240440205,
"learning_rate": 8.784103474184463e-06,
"loss": 0.0372,
"step": 2493
},
{
"epoch": 1.1346678798908099,
"grad_norm": 0.45419840808136375,
"learning_rate": 8.783169099479669e-06,
"loss": 0.0237,
"step": 2494
},
{
"epoch": 1.135122838944495,
"grad_norm": 0.6963944475868004,
"learning_rate": 8.782234415631381e-06,
"loss": 0.0402,
"step": 2495
},
{
"epoch": 1.1355777979981803,
"grad_norm": 0.43802475738162483,
"learning_rate": 8.781299422715979e-06,
"loss": 0.0238,
"step": 2496
},
{
"epoch": 1.1360327570518653,
"grad_norm": 0.6062845259672841,
"learning_rate": 8.780364120809863e-06,
"loss": 0.0299,
"step": 2497
},
{
"epoch": 1.1364877161055504,
"grad_norm": 0.44459971712814256,
"learning_rate": 8.779428509989463e-06,
"loss": 0.0205,
"step": 2498
},
{
"epoch": 1.1369426751592357,
"grad_norm": 0.8182256630287221,
"learning_rate": 8.778492590331234e-06,
"loss": 0.0358,
"step": 2499
},
{
"epoch": 1.1373976342129208,
"grad_norm": 0.35292113524041313,
"learning_rate": 8.777556361911652e-06,
"loss": 0.0188,
"step": 2500
},
{
"epoch": 1.137852593266606,
"grad_norm": 0.5495898839385301,
"learning_rate": 8.776619824807225e-06,
"loss": 0.0403,
"step": 2501
},
{
"epoch": 1.1383075523202912,
"grad_norm": 0.47715012261917683,
"learning_rate": 8.77568297909448e-06,
"loss": 0.0308,
"step": 2502
},
{
"epoch": 1.1387625113739763,
"grad_norm": 0.5057002315147829,
"learning_rate": 8.774745824849973e-06,
"loss": 0.0255,
"step": 2503
},
{
"epoch": 1.1392174704276614,
"grad_norm": 0.637445487028803,
"learning_rate": 8.773808362150284e-06,
"loss": 0.0441,
"step": 2504
},
{
"epoch": 1.1396724294813467,
"grad_norm": 0.46970000948757085,
"learning_rate": 8.772870591072016e-06,
"loss": 0.0203,
"step": 2505
},
{
"epoch": 1.1401273885350318,
"grad_norm": 0.48405940158780947,
"learning_rate": 8.771932511691805e-06,
"loss": 0.0248,
"step": 2506
},
{
"epoch": 1.1405823475887171,
"grad_norm": 0.5007699680851107,
"learning_rate": 8.7709941240863e-06,
"loss": 0.0299,
"step": 2507
},
{
"epoch": 1.1410373066424022,
"grad_norm": 0.47412512472759577,
"learning_rate": 8.770055428332187e-06,
"loss": 0.0289,
"step": 2508
},
{
"epoch": 1.1414922656960873,
"grad_norm": 0.6167640062421629,
"learning_rate": 8.769116424506168e-06,
"loss": 0.0308,
"step": 2509
},
{
"epoch": 1.1419472247497726,
"grad_norm": 0.39237316345479106,
"learning_rate": 8.768177112684976e-06,
"loss": 0.023,
"step": 2510
},
{
"epoch": 1.1424021838034577,
"grad_norm": 0.5186908295343413,
"learning_rate": 8.767237492945372e-06,
"loss": 0.0253,
"step": 2511
},
{
"epoch": 1.1428571428571428,
"grad_norm": 0.5056070603356543,
"learning_rate": 8.766297565364127e-06,
"loss": 0.0269,
"step": 2512
},
{
"epoch": 1.143312101910828,
"grad_norm": 0.572114404769031,
"learning_rate": 8.765357330018056e-06,
"loss": 0.04,
"step": 2513
},
{
"epoch": 1.1437670609645132,
"grad_norm": 0.5742667251635876,
"learning_rate": 8.764416786983987e-06,
"loss": 0.0341,
"step": 2514
},
{
"epoch": 1.1442220200181983,
"grad_norm": 0.7921946978016261,
"learning_rate": 8.763475936338778e-06,
"loss": 0.0297,
"step": 2515
},
{
"epoch": 1.1446769790718836,
"grad_norm": 0.5932003547457203,
"learning_rate": 8.762534778159313e-06,
"loss": 0.0329,
"step": 2516
},
{
"epoch": 1.1451319381255687,
"grad_norm": 0.4383972484081299,
"learning_rate": 8.761593312522496e-06,
"loss": 0.026,
"step": 2517
},
{
"epoch": 1.1455868971792538,
"grad_norm": 0.494406013971066,
"learning_rate": 8.76065153950526e-06,
"loss": 0.0252,
"step": 2518
},
{
"epoch": 1.146041856232939,
"grad_norm": 0.41600285124838154,
"learning_rate": 8.759709459184565e-06,
"loss": 0.03,
"step": 2519
},
{
"epoch": 1.1464968152866242,
"grad_norm": 0.7103449624996373,
"learning_rate": 8.758767071637391e-06,
"loss": 0.0293,
"step": 2520
},
{
"epoch": 1.1469517743403093,
"grad_norm": 0.7247596682387525,
"learning_rate": 8.757824376940748e-06,
"loss": 0.0534,
"step": 2521
},
{
"epoch": 1.1474067333939946,
"grad_norm": 0.5429066180348485,
"learning_rate": 8.756881375171664e-06,
"loss": 0.0366,
"step": 2522
},
{
"epoch": 1.1478616924476797,
"grad_norm": 0.5884373670939516,
"learning_rate": 8.755938066407201e-06,
"loss": 0.0335,
"step": 2523
},
{
"epoch": 1.148316651501365,
"grad_norm": 0.6156045708560577,
"learning_rate": 8.754994450724441e-06,
"loss": 0.0345,
"step": 2524
},
{
"epoch": 1.14877161055505,
"grad_norm": 0.5614699649040673,
"learning_rate": 8.754050528200493e-06,
"loss": 0.0329,
"step": 2525
},
{
"epoch": 1.1492265696087351,
"grad_norm": 0.6406021126928062,
"learning_rate": 8.753106298912488e-06,
"loss": 0.0306,
"step": 2526
},
{
"epoch": 1.1496815286624205,
"grad_norm": 0.5000438600163287,
"learning_rate": 8.752161762937586e-06,
"loss": 0.0223,
"step": 2527
},
{
"epoch": 1.1501364877161055,
"grad_norm": 0.3997197285041498,
"learning_rate": 8.751216920352967e-06,
"loss": 0.0221,
"step": 2528
},
{
"epoch": 1.1505914467697906,
"grad_norm": 0.5040179214810742,
"learning_rate": 8.750271771235844e-06,
"loss": 0.0196,
"step": 2529
},
{
"epoch": 1.151046405823476,
"grad_norm": 0.40549609696673644,
"learning_rate": 8.749326315663447e-06,
"loss": 0.0231,
"step": 2530
},
{
"epoch": 1.151501364877161,
"grad_norm": 0.406160230893779,
"learning_rate": 8.748380553713033e-06,
"loss": 0.0208,
"step": 2531
},
{
"epoch": 1.1519563239308463,
"grad_norm": 0.5844194685702613,
"learning_rate": 8.747434485461892e-06,
"loss": 0.0241,
"step": 2532
},
{
"epoch": 1.1524112829845314,
"grad_norm": 0.36029638509152084,
"learning_rate": 8.746488110987326e-06,
"loss": 0.015,
"step": 2533
},
{
"epoch": 1.1528662420382165,
"grad_norm": 0.7276197204807093,
"learning_rate": 8.745541430366671e-06,
"loss": 0.0418,
"step": 2534
},
{
"epoch": 1.1533212010919018,
"grad_norm": 1.5020467500828025,
"learning_rate": 8.744594443677284e-06,
"loss": 0.0582,
"step": 2535
},
{
"epoch": 1.153776160145587,
"grad_norm": 0.4311974728697227,
"learning_rate": 8.743647150996551e-06,
"loss": 0.0258,
"step": 2536
},
{
"epoch": 1.154231119199272,
"grad_norm": 0.6248463720530537,
"learning_rate": 8.742699552401878e-06,
"loss": 0.0398,
"step": 2537
},
{
"epoch": 1.1546860782529573,
"grad_norm": 0.5339944254155865,
"learning_rate": 8.7417516479707e-06,
"loss": 0.0252,
"step": 2538
},
{
"epoch": 1.1551410373066424,
"grad_norm": 0.3465118720450813,
"learning_rate": 8.740803437780474e-06,
"loss": 0.0183,
"step": 2539
},
{
"epoch": 1.1555959963603275,
"grad_norm": 0.6096918552154363,
"learning_rate": 8.739854921908684e-06,
"loss": 0.0318,
"step": 2540
},
{
"epoch": 1.1560509554140128,
"grad_norm": 0.42626286323793855,
"learning_rate": 8.73890610043284e-06,
"loss": 0.0292,
"step": 2541
},
{
"epoch": 1.156505914467698,
"grad_norm": 0.47325164391197866,
"learning_rate": 8.737956973430475e-06,
"loss": 0.0337,
"step": 2542
},
{
"epoch": 1.156960873521383,
"grad_norm": 0.6214186683671308,
"learning_rate": 8.737007540979146e-06,
"loss": 0.0235,
"step": 2543
},
{
"epoch": 1.1574158325750683,
"grad_norm": 0.4958886649213906,
"learning_rate": 8.736057803156436e-06,
"loss": 0.0255,
"step": 2544
},
{
"epoch": 1.1578707916287534,
"grad_norm": 0.3732620529932146,
"learning_rate": 8.735107760039954e-06,
"loss": 0.0197,
"step": 2545
},
{
"epoch": 1.1583257506824385,
"grad_norm": 0.5778213004705967,
"learning_rate": 8.734157411707334e-06,
"loss": 0.0277,
"step": 2546
},
{
"epoch": 1.1587807097361238,
"grad_norm": 0.4850677867721973,
"learning_rate": 8.733206758236235e-06,
"loss": 0.0235,
"step": 2547
},
{
"epoch": 1.1592356687898089,
"grad_norm": 0.5687049775983313,
"learning_rate": 8.732255799704337e-06,
"loss": 0.0335,
"step": 2548
},
{
"epoch": 1.159690627843494,
"grad_norm": 0.5063906062734673,
"learning_rate": 8.73130453618935e-06,
"loss": 0.0224,
"step": 2549
},
{
"epoch": 1.1601455868971793,
"grad_norm": 0.4830706957588217,
"learning_rate": 8.730352967769007e-06,
"loss": 0.026,
"step": 2550
},
{
"epoch": 1.1606005459508644,
"grad_norm": 0.4565903397736301,
"learning_rate": 8.729401094521066e-06,
"loss": 0.0171,
"step": 2551
},
{
"epoch": 1.1610555050045497,
"grad_norm": 0.5299141705331825,
"learning_rate": 8.728448916523309e-06,
"loss": 0.0283,
"step": 2552
},
{
"epoch": 1.1615104640582348,
"grad_norm": 0.5618467862878425,
"learning_rate": 8.727496433853543e-06,
"loss": 0.0289,
"step": 2553
},
{
"epoch": 1.1619654231119199,
"grad_norm": 0.464342731748468,
"learning_rate": 8.726543646589605e-06,
"loss": 0.0202,
"step": 2554
},
{
"epoch": 1.1624203821656052,
"grad_norm": 0.5984943035378484,
"learning_rate": 8.725590554809346e-06,
"loss": 0.0387,
"step": 2555
},
{
"epoch": 1.1628753412192903,
"grad_norm": 0.3103247899143151,
"learning_rate": 8.724637158590652e-06,
"loss": 0.0172,
"step": 2556
},
{
"epoch": 1.1633303002729753,
"grad_norm": 0.5719001232225214,
"learning_rate": 8.72368345801143e-06,
"loss": 0.0328,
"step": 2557
},
{
"epoch": 1.1637852593266607,
"grad_norm": 0.7184689253863656,
"learning_rate": 8.722729453149613e-06,
"loss": 0.0256,
"step": 2558
},
{
"epoch": 1.1642402183803457,
"grad_norm": 0.4264869300929295,
"learning_rate": 8.721775144083155e-06,
"loss": 0.0273,
"step": 2559
},
{
"epoch": 1.164695177434031,
"grad_norm": 0.6992959245688258,
"learning_rate": 8.72082053089004e-06,
"loss": 0.0391,
"step": 2560
},
{
"epoch": 1.1651501364877161,
"grad_norm": 0.5598830058244858,
"learning_rate": 8.719865613648276e-06,
"loss": 0.0348,
"step": 2561
},
{
"epoch": 1.1656050955414012,
"grad_norm": 0.4490293057873329,
"learning_rate": 8.718910392435892e-06,
"loss": 0.0185,
"step": 2562
},
{
"epoch": 1.1660600545950865,
"grad_norm": 0.3188239247752473,
"learning_rate": 8.717954867330943e-06,
"loss": 0.0118,
"step": 2563
},
{
"epoch": 1.1665150136487716,
"grad_norm": 0.529002754756549,
"learning_rate": 8.716999038411513e-06,
"loss": 0.0422,
"step": 2564
},
{
"epoch": 1.1669699727024567,
"grad_norm": 0.6102751055626958,
"learning_rate": 8.716042905755708e-06,
"loss": 0.0321,
"step": 2565
},
{
"epoch": 1.167424931756142,
"grad_norm": 0.4958464600211268,
"learning_rate": 8.715086469441659e-06,
"loss": 0.027,
"step": 2566
},
{
"epoch": 1.1678798908098271,
"grad_norm": 0.6925927485590572,
"learning_rate": 8.714129729547522e-06,
"loss": 0.0528,
"step": 2567
},
{
"epoch": 1.1683348498635122,
"grad_norm": 0.48346645004557054,
"learning_rate": 8.713172686151475e-06,
"loss": 0.0241,
"step": 2568
},
{
"epoch": 1.1687898089171975,
"grad_norm": 0.6160868757033329,
"learning_rate": 8.712215339331724e-06,
"loss": 0.0364,
"step": 2569
},
{
"epoch": 1.1692447679708826,
"grad_norm": 0.5521736841094272,
"learning_rate": 8.711257689166499e-06,
"loss": 0.0384,
"step": 2570
},
{
"epoch": 1.1696997270245677,
"grad_norm": 0.4358123533199606,
"learning_rate": 8.710299735734057e-06,
"loss": 0.0218,
"step": 2571
},
{
"epoch": 1.170154686078253,
"grad_norm": 0.49989161769199447,
"learning_rate": 8.709341479112676e-06,
"loss": 0.019,
"step": 2572
},
{
"epoch": 1.170609645131938,
"grad_norm": 0.6461070187412289,
"learning_rate": 8.70838291938066e-06,
"loss": 0.05,
"step": 2573
},
{
"epoch": 1.1710646041856232,
"grad_norm": 0.5015730644729591,
"learning_rate": 8.70742405661634e-06,
"loss": 0.0262,
"step": 2574
},
{
"epoch": 1.1715195632393085,
"grad_norm": 0.6731652049317264,
"learning_rate": 8.706464890898068e-06,
"loss": 0.0417,
"step": 2575
},
{
"epoch": 1.1719745222929936,
"grad_norm": 0.5953498514866105,
"learning_rate": 8.705505422304224e-06,
"loss": 0.0251,
"step": 2576
},
{
"epoch": 1.1724294813466787,
"grad_norm": 0.49337464142227694,
"learning_rate": 8.70454565091321e-06,
"loss": 0.0283,
"step": 2577
},
{
"epoch": 1.172884440400364,
"grad_norm": 0.40746621618427764,
"learning_rate": 8.703585576803455e-06,
"loss": 0.0235,
"step": 2578
},
{
"epoch": 1.173339399454049,
"grad_norm": 0.574388099759434,
"learning_rate": 8.702625200053412e-06,
"loss": 0.0357,
"step": 2579
},
{
"epoch": 1.1737943585077344,
"grad_norm": 0.49209063287204186,
"learning_rate": 8.701664520741558e-06,
"loss": 0.0271,
"step": 2580
},
{
"epoch": 1.1742493175614195,
"grad_norm": 0.49658769644628054,
"learning_rate": 8.700703538946396e-06,
"loss": 0.0312,
"step": 2581
},
{
"epoch": 1.1747042766151046,
"grad_norm": 0.48898735666034404,
"learning_rate": 8.699742254746452e-06,
"loss": 0.0308,
"step": 2582
},
{
"epoch": 1.1751592356687899,
"grad_norm": 0.6965571111870493,
"learning_rate": 8.698780668220281e-06,
"loss": 0.0587,
"step": 2583
},
{
"epoch": 1.175614194722475,
"grad_norm": 0.4680913844344663,
"learning_rate": 8.697818779446456e-06,
"loss": 0.0268,
"step": 2584
},
{
"epoch": 1.17606915377616,
"grad_norm": 0.5966094635320064,
"learning_rate": 8.696856588503582e-06,
"loss": 0.0441,
"step": 2585
},
{
"epoch": 1.1765241128298454,
"grad_norm": 0.41029105691286216,
"learning_rate": 8.69589409547028e-06,
"loss": 0.0238,
"step": 2586
},
{
"epoch": 1.1769790718835305,
"grad_norm": 0.4919555962191467,
"learning_rate": 8.694931300425204e-06,
"loss": 0.022,
"step": 2587
},
{
"epoch": 1.1774340309372158,
"grad_norm": 0.4941665993905159,
"learning_rate": 8.693968203447027e-06,
"loss": 0.0318,
"step": 2588
},
{
"epoch": 1.1778889899909009,
"grad_norm": 0.4471241857833498,
"learning_rate": 8.693004804614451e-06,
"loss": 0.0298,
"step": 2589
},
{
"epoch": 1.178343949044586,
"grad_norm": 0.42475689565329255,
"learning_rate": 8.692041104006201e-06,
"loss": 0.0245,
"step": 2590
},
{
"epoch": 1.1787989080982713,
"grad_norm": 0.7037247909228679,
"learning_rate": 8.691077101701024e-06,
"loss": 0.0422,
"step": 2591
},
{
"epoch": 1.1792538671519563,
"grad_norm": 0.4727292395507324,
"learning_rate": 8.690112797777695e-06,
"loss": 0.0286,
"step": 2592
},
{
"epoch": 1.1797088262056414,
"grad_norm": 0.4886187172760372,
"learning_rate": 8.689148192315013e-06,
"loss": 0.0253,
"step": 2593
},
{
"epoch": 1.1801637852593267,
"grad_norm": 0.4878895092851417,
"learning_rate": 8.6881832853918e-06,
"loss": 0.0294,
"step": 2594
},
{
"epoch": 1.1806187443130118,
"grad_norm": 0.3785632403936228,
"learning_rate": 8.687218077086905e-06,
"loss": 0.0262,
"step": 2595
},
{
"epoch": 1.181073703366697,
"grad_norm": 0.3032359273578328,
"learning_rate": 8.6862525674792e-06,
"loss": 0.0207,
"step": 2596
},
{
"epoch": 1.1815286624203822,
"grad_norm": 0.5805982565364416,
"learning_rate": 8.685286756647582e-06,
"loss": 0.0299,
"step": 2597
},
{
"epoch": 1.1819836214740673,
"grad_norm": 0.5312395563049912,
"learning_rate": 8.684320644670975e-06,
"loss": 0.0391,
"step": 2598
},
{
"epoch": 1.1824385805277524,
"grad_norm": 0.6427828501421616,
"learning_rate": 8.68335423162832e-06,
"loss": 0.0366,
"step": 2599
},
{
"epoch": 1.1828935395814377,
"grad_norm": 0.6549023820063344,
"learning_rate": 8.682387517598591e-06,
"loss": 0.0466,
"step": 2600
},
{
"epoch": 1.1833484986351228,
"grad_norm": 0.4191743788408071,
"learning_rate": 8.681420502660785e-06,
"loss": 0.0233,
"step": 2601
},
{
"epoch": 1.183803457688808,
"grad_norm": 0.4871715984486466,
"learning_rate": 8.68045318689392e-06,
"loss": 0.0271,
"step": 2602
},
{
"epoch": 1.1842584167424932,
"grad_norm": 0.6701976394432037,
"learning_rate": 8.679485570377043e-06,
"loss": 0.0306,
"step": 2603
},
{
"epoch": 1.1847133757961783,
"grad_norm": 0.6441120205935942,
"learning_rate": 8.678517653189222e-06,
"loss": 0.0394,
"step": 2604
},
{
"epoch": 1.1851683348498634,
"grad_norm": 0.5060858425158437,
"learning_rate": 8.677549435409548e-06,
"loss": 0.0217,
"step": 2605
},
{
"epoch": 1.1856232939035487,
"grad_norm": 0.6752485468046396,
"learning_rate": 8.676580917117144e-06,
"loss": 0.039,
"step": 2606
},
{
"epoch": 1.1860782529572338,
"grad_norm": 0.3957815075118571,
"learning_rate": 8.675612098391149e-06,
"loss": 0.0188,
"step": 2607
},
{
"epoch": 1.186533212010919,
"grad_norm": 0.5187116630942156,
"learning_rate": 8.674642979310732e-06,
"loss": 0.026,
"step": 2608
},
{
"epoch": 1.1869881710646042,
"grad_norm": 0.5769983660492354,
"learning_rate": 8.673673559955086e-06,
"loss": 0.0343,
"step": 2609
},
{
"epoch": 1.1874431301182893,
"grad_norm": 0.4743399882711679,
"learning_rate": 8.672703840403428e-06,
"loss": 0.0293,
"step": 2610
},
{
"epoch": 1.1878980891719746,
"grad_norm": 0.3693698002797069,
"learning_rate": 8.671733820734996e-06,
"loss": 0.0162,
"step": 2611
},
{
"epoch": 1.1883530482256597,
"grad_norm": 0.7143210340908582,
"learning_rate": 8.670763501029059e-06,
"loss": 0.0424,
"step": 2612
},
{
"epoch": 1.1888080072793448,
"grad_norm": 0.44099669973790273,
"learning_rate": 8.669792881364905e-06,
"loss": 0.0288,
"step": 2613
},
{
"epoch": 1.18926296633303,
"grad_norm": 0.47880134181841405,
"learning_rate": 8.668821961821848e-06,
"loss": 0.0356,
"step": 2614
},
{
"epoch": 1.1897179253867152,
"grad_norm": 0.49961852236193943,
"learning_rate": 8.66785074247923e-06,
"loss": 0.0264,
"step": 2615
},
{
"epoch": 1.1901728844404005,
"grad_norm": 0.6606861173434392,
"learning_rate": 8.666879223416413e-06,
"loss": 0.0402,
"step": 2616
},
{
"epoch": 1.1906278434940856,
"grad_norm": 0.5832250365729773,
"learning_rate": 8.665907404712786e-06,
"loss": 0.0349,
"step": 2617
},
{
"epoch": 1.1910828025477707,
"grad_norm": 0.47607736173413934,
"learning_rate": 8.66493528644776e-06,
"loss": 0.0275,
"step": 2618
},
{
"epoch": 1.191537761601456,
"grad_norm": 0.4323045066773957,
"learning_rate": 8.663962868700773e-06,
"loss": 0.0215,
"step": 2619
},
{
"epoch": 1.191992720655141,
"grad_norm": 0.6823901111258103,
"learning_rate": 8.662990151551288e-06,
"loss": 0.0367,
"step": 2620
},
{
"epoch": 1.1924476797088261,
"grad_norm": 0.568395741941641,
"learning_rate": 8.66201713507879e-06,
"loss": 0.0327,
"step": 2621
},
{
"epoch": 1.1929026387625115,
"grad_norm": 0.8032308375903047,
"learning_rate": 8.661043819362788e-06,
"loss": 0.0396,
"step": 2622
},
{
"epoch": 1.1933575978161965,
"grad_norm": 0.5352047847553939,
"learning_rate": 8.660070204482818e-06,
"loss": 0.0384,
"step": 2623
},
{
"epoch": 1.1938125568698816,
"grad_norm": 0.43266491785940075,
"learning_rate": 8.65909629051844e-06,
"loss": 0.0235,
"step": 2624
},
{
"epoch": 1.194267515923567,
"grad_norm": 0.5039359947320041,
"learning_rate": 8.658122077549239e-06,
"loss": 0.0332,
"step": 2625
},
{
"epoch": 1.194722474977252,
"grad_norm": 0.46282675009108876,
"learning_rate": 8.65714756565482e-06,
"loss": 0.028,
"step": 2626
},
{
"epoch": 1.1951774340309371,
"grad_norm": 0.42685254155176316,
"learning_rate": 8.656172754914818e-06,
"loss": 0.0193,
"step": 2627
},
{
"epoch": 1.1956323930846224,
"grad_norm": 0.5644652302861507,
"learning_rate": 8.655197645408889e-06,
"loss": 0.0327,
"step": 2628
},
{
"epoch": 1.1960873521383075,
"grad_norm": 0.6017102850762671,
"learning_rate": 8.654222237216714e-06,
"loss": 0.0395,
"step": 2629
},
{
"epoch": 1.1965423111919926,
"grad_norm": 0.4828717952370834,
"learning_rate": 8.653246530418003e-06,
"loss": 0.0296,
"step": 2630
},
{
"epoch": 1.196997270245678,
"grad_norm": 0.4718632798920294,
"learning_rate": 8.652270525092481e-06,
"loss": 0.0175,
"step": 2631
},
{
"epoch": 1.197452229299363,
"grad_norm": 0.9210566120370747,
"learning_rate": 8.651294221319907e-06,
"loss": 0.0532,
"step": 2632
},
{
"epoch": 1.197907188353048,
"grad_norm": 0.5973832244257986,
"learning_rate": 8.650317619180057e-06,
"loss": 0.0356,
"step": 2633
},
{
"epoch": 1.1983621474067334,
"grad_norm": 0.4056353546459655,
"learning_rate": 8.649340718752736e-06,
"loss": 0.0233,
"step": 2634
},
{
"epoch": 1.1988171064604185,
"grad_norm": 0.6383917144915527,
"learning_rate": 8.648363520117773e-06,
"loss": 0.0282,
"step": 2635
},
{
"epoch": 1.1992720655141038,
"grad_norm": 0.30187722032440356,
"learning_rate": 8.647386023355017e-06,
"loss": 0.015,
"step": 2636
},
{
"epoch": 1.199727024567789,
"grad_norm": 0.7620089776567717,
"learning_rate": 8.646408228544349e-06,
"loss": 0.0449,
"step": 2637
},
{
"epoch": 1.200181983621474,
"grad_norm": 0.7042927681153068,
"learning_rate": 8.645430135765667e-06,
"loss": 0.04,
"step": 2638
},
{
"epoch": 1.2006369426751593,
"grad_norm": 0.5117403840739881,
"learning_rate": 8.644451745098896e-06,
"loss": 0.0297,
"step": 2639
},
{
"epoch": 1.2010919017288444,
"grad_norm": 0.7659399394915278,
"learning_rate": 8.643473056623987e-06,
"loss": 0.0592,
"step": 2640
},
{
"epoch": 1.2015468607825295,
"grad_norm": 0.5678495394727697,
"learning_rate": 8.642494070420912e-06,
"loss": 0.032,
"step": 2641
},
{
"epoch": 1.2020018198362148,
"grad_norm": 0.4587046178873542,
"learning_rate": 8.641514786569674e-06,
"loss": 0.0273,
"step": 2642
},
{
"epoch": 1.2024567788898999,
"grad_norm": 0.5810971871142143,
"learning_rate": 8.640535205150291e-06,
"loss": 0.0436,
"step": 2643
},
{
"epoch": 1.2029117379435852,
"grad_norm": 0.49553783255896267,
"learning_rate": 8.639555326242812e-06,
"loss": 0.0375,
"step": 2644
},
{
"epoch": 1.2033666969972703,
"grad_norm": 0.700954373813157,
"learning_rate": 8.638575149927306e-06,
"loss": 0.0416,
"step": 2645
},
{
"epoch": 1.2038216560509554,
"grad_norm": 0.51916075076626,
"learning_rate": 8.637594676283872e-06,
"loss": 0.0301,
"step": 2646
},
{
"epoch": 1.2042766151046407,
"grad_norm": 0.5616014526557234,
"learning_rate": 8.636613905392628e-06,
"loss": 0.0333,
"step": 2647
},
{
"epoch": 1.2047315741583258,
"grad_norm": 0.3996003632999196,
"learning_rate": 8.635632837333719e-06,
"loss": 0.0203,
"step": 2648
},
{
"epoch": 1.2051865332120109,
"grad_norm": 0.5908400254903149,
"learning_rate": 8.634651472187312e-06,
"loss": 0.0355,
"step": 2649
},
{
"epoch": 1.2056414922656962,
"grad_norm": 0.5521857176836706,
"learning_rate": 8.633669810033601e-06,
"loss": 0.0302,
"step": 2650
},
{
"epoch": 1.2060964513193813,
"grad_norm": 0.47154629646415547,
"learning_rate": 8.632687850952803e-06,
"loss": 0.0254,
"step": 2651
},
{
"epoch": 1.2065514103730663,
"grad_norm": 0.5084600548265098,
"learning_rate": 8.63170559502516e-06,
"loss": 0.0263,
"step": 2652
},
{
"epoch": 1.2070063694267517,
"grad_norm": 0.41669809700741084,
"learning_rate": 8.630723042330934e-06,
"loss": 0.0235,
"step": 2653
},
{
"epoch": 1.2074613284804367,
"grad_norm": 0.4239984269262903,
"learning_rate": 8.629740192950418e-06,
"loss": 0.0258,
"step": 2654
},
{
"epoch": 1.2079162875341218,
"grad_norm": 0.5493755020180808,
"learning_rate": 8.628757046963925e-06,
"loss": 0.0312,
"step": 2655
},
{
"epoch": 1.2083712465878071,
"grad_norm": 0.44940260929025,
"learning_rate": 8.627773604451795e-06,
"loss": 0.0253,
"step": 2656
},
{
"epoch": 1.2088262056414922,
"grad_norm": 0.49748760446391493,
"learning_rate": 8.626789865494388e-06,
"loss": 0.029,
"step": 2657
},
{
"epoch": 1.2092811646951773,
"grad_norm": 0.4473696250717918,
"learning_rate": 8.62580583017209e-06,
"loss": 0.0265,
"step": 2658
},
{
"epoch": 1.2097361237488626,
"grad_norm": 0.634783340896908,
"learning_rate": 8.624821498565316e-06,
"loss": 0.0375,
"step": 2659
},
{
"epoch": 1.2101910828025477,
"grad_norm": 0.5688906906342468,
"learning_rate": 8.623836870754497e-06,
"loss": 0.0291,
"step": 2660
},
{
"epoch": 1.210646041856233,
"grad_norm": 0.524163167377845,
"learning_rate": 8.622851946820094e-06,
"loss": 0.0343,
"step": 2661
},
{
"epoch": 1.2111010009099181,
"grad_norm": 0.4184285347511745,
"learning_rate": 8.621866726842592e-06,
"loss": 0.0245,
"step": 2662
},
{
"epoch": 1.2115559599636032,
"grad_norm": 0.5452023193304021,
"learning_rate": 8.620881210902497e-06,
"loss": 0.0361,
"step": 2663
},
{
"epoch": 1.2120109190172885,
"grad_norm": 0.8825681885181793,
"learning_rate": 8.61989539908034e-06,
"loss": 0.0551,
"step": 2664
},
{
"epoch": 1.2124658780709736,
"grad_norm": 0.6606796283358398,
"learning_rate": 8.61890929145668e-06,
"loss": 0.0501,
"step": 2665
},
{
"epoch": 1.2129208371246587,
"grad_norm": 0.5383057502775304,
"learning_rate": 8.617922888112093e-06,
"loss": 0.0327,
"step": 2666
},
{
"epoch": 1.213375796178344,
"grad_norm": 0.456267646438963,
"learning_rate": 8.616936189127189e-06,
"loss": 0.0271,
"step": 2667
},
{
"epoch": 1.213830755232029,
"grad_norm": 0.6876820645690198,
"learning_rate": 8.615949194582591e-06,
"loss": 0.0522,
"step": 2668
},
{
"epoch": 1.2142857142857142,
"grad_norm": 0.4235510337955621,
"learning_rate": 8.614961904558956e-06,
"loss": 0.0178,
"step": 2669
},
{
"epoch": 1.2147406733393995,
"grad_norm": 0.31389612581359266,
"learning_rate": 8.613974319136959e-06,
"loss": 0.0142,
"step": 2670
},
{
"epoch": 1.2151956323930846,
"grad_norm": 0.5466534592913287,
"learning_rate": 8.6129864383973e-06,
"loss": 0.0325,
"step": 2671
},
{
"epoch": 1.21565059144677,
"grad_norm": 0.6256801141600264,
"learning_rate": 8.611998262420707e-06,
"loss": 0.031,
"step": 2672
},
{
"epoch": 1.216105550500455,
"grad_norm": 0.5060382153635896,
"learning_rate": 8.611009791287926e-06,
"loss": 0.0262,
"step": 2673
},
{
"epoch": 1.21656050955414,
"grad_norm": 0.5027235560302646,
"learning_rate": 8.610021025079734e-06,
"loss": 0.0254,
"step": 2674
},
{
"epoch": 1.2170154686078254,
"grad_norm": 0.5543017523957823,
"learning_rate": 8.609031963876924e-06,
"loss": 0.0308,
"step": 2675
},
{
"epoch": 1.2174704276615105,
"grad_norm": 0.4737161111249352,
"learning_rate": 8.608042607760322e-06,
"loss": 0.0326,
"step": 2676
},
{
"epoch": 1.2179253867151956,
"grad_norm": 0.4843464243684333,
"learning_rate": 8.607052956810772e-06,
"loss": 0.0258,
"step": 2677
},
{
"epoch": 1.2183803457688809,
"grad_norm": 0.5194322149503382,
"learning_rate": 8.606063011109143e-06,
"loss": 0.0358,
"step": 2678
},
{
"epoch": 1.218835304822566,
"grad_norm": 0.5930513493210321,
"learning_rate": 8.60507277073633e-06,
"loss": 0.0362,
"step": 2679
},
{
"epoch": 1.219290263876251,
"grad_norm": 0.32996053031100914,
"learning_rate": 8.604082235773249e-06,
"loss": 0.0131,
"step": 2680
},
{
"epoch": 1.2197452229299364,
"grad_norm": 0.4531032973363827,
"learning_rate": 8.603091406300845e-06,
"loss": 0.0264,
"step": 2681
},
{
"epoch": 1.2202001819836215,
"grad_norm": 0.4752447004618926,
"learning_rate": 8.602100282400082e-06,
"loss": 0.0222,
"step": 2682
},
{
"epoch": 1.2206551410373065,
"grad_norm": 0.48294135837077795,
"learning_rate": 8.60110886415195e-06,
"loss": 0.0286,
"step": 2683
},
{
"epoch": 1.2211101000909919,
"grad_norm": 0.8146460808068521,
"learning_rate": 8.600117151637465e-06,
"loss": 0.0553,
"step": 2684
},
{
"epoch": 1.221565059144677,
"grad_norm": 0.5348405988590901,
"learning_rate": 8.599125144937666e-06,
"loss": 0.0341,
"step": 2685
},
{
"epoch": 1.222020018198362,
"grad_norm": 0.5209228039836593,
"learning_rate": 8.598132844133614e-06,
"loss": 0.0285,
"step": 2686
},
{
"epoch": 1.2224749772520473,
"grad_norm": 0.8667405302686297,
"learning_rate": 8.597140249306393e-06,
"loss": 0.0554,
"step": 2687
},
{
"epoch": 1.2229299363057324,
"grad_norm": 0.3662245233762516,
"learning_rate": 8.596147360537115e-06,
"loss": 0.0186,
"step": 2688
},
{
"epoch": 1.2233848953594177,
"grad_norm": 0.5675330701823686,
"learning_rate": 8.595154177906915e-06,
"loss": 0.0252,
"step": 2689
},
{
"epoch": 1.2238398544131028,
"grad_norm": 0.5055412550341041,
"learning_rate": 8.594160701496951e-06,
"loss": 0.0359,
"step": 2690
},
{
"epoch": 1.224294813466788,
"grad_norm": 0.4636507359192646,
"learning_rate": 8.593166931388408e-06,
"loss": 0.0235,
"step": 2691
},
{
"epoch": 1.2247497725204732,
"grad_norm": 0.5789114485670152,
"learning_rate": 8.592172867662488e-06,
"loss": 0.0309,
"step": 2692
},
{
"epoch": 1.2252047315741583,
"grad_norm": 0.5362511549256743,
"learning_rate": 8.591178510400424e-06,
"loss": 0.0288,
"step": 2693
},
{
"epoch": 1.2256596906278434,
"grad_norm": 0.665176698679116,
"learning_rate": 8.590183859683469e-06,
"loss": 0.0381,
"step": 2694
},
{
"epoch": 1.2261146496815287,
"grad_norm": 0.5319510120853973,
"learning_rate": 8.589188915592903e-06,
"loss": 0.0359,
"step": 2695
},
{
"epoch": 1.2265696087352138,
"grad_norm": 0.4177494615666587,
"learning_rate": 8.588193678210026e-06,
"loss": 0.0194,
"step": 2696
},
{
"epoch": 1.2270245677888991,
"grad_norm": 0.34563423472616117,
"learning_rate": 8.587198147616166e-06,
"loss": 0.0188,
"step": 2697
},
{
"epoch": 1.2274795268425842,
"grad_norm": 0.5420023688259344,
"learning_rate": 8.586202323892675e-06,
"loss": 0.0322,
"step": 2698
},
{
"epoch": 1.2279344858962693,
"grad_norm": 0.5715046852040315,
"learning_rate": 8.585206207120925e-06,
"loss": 0.0248,
"step": 2699
},
{
"epoch": 1.2283894449499546,
"grad_norm": 0.6150293588585071,
"learning_rate": 8.584209797382313e-06,
"loss": 0.0349,
"step": 2700
},
{
"epoch": 1.2288444040036397,
"grad_norm": 0.7538546206140824,
"learning_rate": 8.583213094758262e-06,
"loss": 0.0415,
"step": 2701
},
{
"epoch": 1.2292993630573248,
"grad_norm": 0.41258699232239693,
"learning_rate": 8.582216099330218e-06,
"loss": 0.0252,
"step": 2702
},
{
"epoch": 1.22975432211101,
"grad_norm": 0.5992053934366026,
"learning_rate": 8.581218811179655e-06,
"loss": 0.0231,
"step": 2703
},
{
"epoch": 1.2302092811646952,
"grad_norm": 0.4911038111295034,
"learning_rate": 8.58022123038806e-06,
"loss": 0.0367,
"step": 2704
},
{
"epoch": 1.2306642402183803,
"grad_norm": 0.5415583441174247,
"learning_rate": 8.579223357036956e-06,
"loss": 0.0356,
"step": 2705
},
{
"epoch": 1.2311191992720656,
"grad_norm": 0.648050207407017,
"learning_rate": 8.578225191207881e-06,
"loss": 0.0322,
"step": 2706
},
{
"epoch": 1.2315741583257507,
"grad_norm": 0.6515223387873779,
"learning_rate": 8.577226732982405e-06,
"loss": 0.0424,
"step": 2707
},
{
"epoch": 1.2320291173794358,
"grad_norm": 0.7662318426027166,
"learning_rate": 8.576227982442114e-06,
"loss": 0.037,
"step": 2708
},
{
"epoch": 1.232484076433121,
"grad_norm": 0.4709920734770032,
"learning_rate": 8.575228939668623e-06,
"loss": 0.0284,
"step": 2709
},
{
"epoch": 1.2329390354868062,
"grad_norm": 0.7144313144730997,
"learning_rate": 8.574229604743566e-06,
"loss": 0.0316,
"step": 2710
},
{
"epoch": 1.2333939945404913,
"grad_norm": 0.4992331855484428,
"learning_rate": 8.573229977748609e-06,
"loss": 0.0345,
"step": 2711
},
{
"epoch": 1.2338489535941766,
"grad_norm": 0.6112686451914704,
"learning_rate": 8.572230058765434e-06,
"loss": 0.0358,
"step": 2712
},
{
"epoch": 1.2343039126478617,
"grad_norm": 0.8262726736467544,
"learning_rate": 8.571229847875751e-06,
"loss": 0.0641,
"step": 2713
},
{
"epoch": 1.2347588717015467,
"grad_norm": 0.4953827805427677,
"learning_rate": 8.570229345161293e-06,
"loss": 0.0247,
"step": 2714
},
{
"epoch": 1.235213830755232,
"grad_norm": 0.3801656553630412,
"learning_rate": 8.569228550703815e-06,
"loss": 0.0249,
"step": 2715
},
{
"epoch": 1.2356687898089171,
"grad_norm": 0.49612613452863535,
"learning_rate": 8.568227464585099e-06,
"loss": 0.0277,
"step": 2716
},
{
"epoch": 1.2361237488626025,
"grad_norm": 0.4582666835548743,
"learning_rate": 8.567226086886948e-06,
"loss": 0.0262,
"step": 2717
},
{
"epoch": 1.2365787079162875,
"grad_norm": 0.6697552955443566,
"learning_rate": 8.566224417691191e-06,
"loss": 0.0338,
"step": 2718
},
{
"epoch": 1.2370336669699726,
"grad_norm": 0.8001154357445661,
"learning_rate": 8.565222457079679e-06,
"loss": 0.0685,
"step": 2719
},
{
"epoch": 1.237488626023658,
"grad_norm": 0.4454996360487464,
"learning_rate": 8.56422020513429e-06,
"loss": 0.0233,
"step": 2720
},
{
"epoch": 1.237943585077343,
"grad_norm": 0.42231887554095254,
"learning_rate": 8.56321766193692e-06,
"loss": 0.0247,
"step": 2721
},
{
"epoch": 1.2383985441310281,
"grad_norm": 0.49520892835841024,
"learning_rate": 8.562214827569495e-06,
"loss": 0.0198,
"step": 2722
},
{
"epoch": 1.2388535031847134,
"grad_norm": 0.3119762559086726,
"learning_rate": 8.56121170211396e-06,
"loss": 0.0183,
"step": 2723
},
{
"epoch": 1.2393084622383985,
"grad_norm": 0.48127588980662994,
"learning_rate": 8.560208285652287e-06,
"loss": 0.0348,
"step": 2724
},
{
"epoch": 1.2397634212920838,
"grad_norm": 0.975980592939099,
"learning_rate": 8.559204578266471e-06,
"loss": 0.0712,
"step": 2725
},
{
"epoch": 1.240218380345769,
"grad_norm": 0.4739910877413602,
"learning_rate": 8.55820058003853e-06,
"loss": 0.027,
"step": 2726
},
{
"epoch": 1.240673339399454,
"grad_norm": 0.5358172750361924,
"learning_rate": 8.557196291050506e-06,
"loss": 0.0403,
"step": 2727
},
{
"epoch": 1.2411282984531393,
"grad_norm": 0.49464890318884047,
"learning_rate": 8.556191711384466e-06,
"loss": 0.0336,
"step": 2728
},
{
"epoch": 1.2415832575068244,
"grad_norm": 0.4046597291390638,
"learning_rate": 8.555186841122498e-06,
"loss": 0.024,
"step": 2729
},
{
"epoch": 1.2420382165605095,
"grad_norm": 0.656706108193742,
"learning_rate": 8.554181680346717e-06,
"loss": 0.0348,
"step": 2730
},
{
"epoch": 1.2424931756141948,
"grad_norm": 0.49134341156698247,
"learning_rate": 8.553176229139262e-06,
"loss": 0.033,
"step": 2731
},
{
"epoch": 1.24294813466788,
"grad_norm": 0.3673616941332998,
"learning_rate": 8.552170487582287e-06,
"loss": 0.0233,
"step": 2732
},
{
"epoch": 1.243403093721565,
"grad_norm": 0.3845834813421107,
"learning_rate": 8.551164455757985e-06,
"loss": 0.021,
"step": 2733
},
{
"epoch": 1.2438580527752503,
"grad_norm": 0.4219248857316413,
"learning_rate": 8.550158133748559e-06,
"loss": 0.0232,
"step": 2734
},
{
"epoch": 1.2443130118289354,
"grad_norm": 0.5359384657995739,
"learning_rate": 8.549151521636244e-06,
"loss": 0.0426,
"step": 2735
},
{
"epoch": 1.2447679708826205,
"grad_norm": 0.6147117803498731,
"learning_rate": 8.548144619503291e-06,
"loss": 0.0372,
"step": 2736
},
{
"epoch": 1.2452229299363058,
"grad_norm": 0.7816013628144164,
"learning_rate": 8.547137427431986e-06,
"loss": 0.0509,
"step": 2737
},
{
"epoch": 1.2456778889899909,
"grad_norm": 0.5732293106945054,
"learning_rate": 8.546129945504629e-06,
"loss": 0.0404,
"step": 2738
},
{
"epoch": 1.246132848043676,
"grad_norm": 0.5878496377747829,
"learning_rate": 8.545122173803547e-06,
"loss": 0.0349,
"step": 2739
},
{
"epoch": 1.2465878070973613,
"grad_norm": 0.5178543900697522,
"learning_rate": 8.544114112411088e-06,
"loss": 0.0317,
"step": 2740
},
{
"epoch": 1.2470427661510464,
"grad_norm": 0.44475184485600816,
"learning_rate": 8.54310576140963e-06,
"loss": 0.0246,
"step": 2741
},
{
"epoch": 1.2474977252047315,
"grad_norm": 0.41811991583751146,
"learning_rate": 8.542097120881572e-06,
"loss": 0.0264,
"step": 2742
},
{
"epoch": 1.2479526842584168,
"grad_norm": 0.504603909447871,
"learning_rate": 8.541088190909333e-06,
"loss": 0.037,
"step": 2743
},
{
"epoch": 1.2484076433121019,
"grad_norm": 0.5546565546187008,
"learning_rate": 8.540078971575355e-06,
"loss": 0.0321,
"step": 2744
},
{
"epoch": 1.2488626023657872,
"grad_norm": 0.5988533107048205,
"learning_rate": 8.539069462962115e-06,
"loss": 0.0356,
"step": 2745
},
{
"epoch": 1.2493175614194723,
"grad_norm": 0.5355497681868633,
"learning_rate": 8.538059665152097e-06,
"loss": 0.0219,
"step": 2746
},
{
"epoch": 1.2497725204731573,
"grad_norm": 0.5560216189929246,
"learning_rate": 8.537049578227823e-06,
"loss": 0.0318,
"step": 2747
},
{
"epoch": 1.2502274795268427,
"grad_norm": 0.41791265535852423,
"learning_rate": 8.536039202271828e-06,
"loss": 0.0296,
"step": 2748
},
{
"epoch": 1.2506824385805277,
"grad_norm": 0.6230283621476296,
"learning_rate": 8.53502853736668e-06,
"loss": 0.0229,
"step": 2749
},
{
"epoch": 1.251137397634213,
"grad_norm": 0.5883015192363978,
"learning_rate": 8.534017583594965e-06,
"loss": 0.0454,
"step": 2750
},
{
"epoch": 1.2515923566878981,
"grad_norm": 0.5657093936113446,
"learning_rate": 8.53300634103929e-06,
"loss": 0.0328,
"step": 2751
},
{
"epoch": 1.2520473157415832,
"grad_norm": 0.9286848475357391,
"learning_rate": 8.531994809782294e-06,
"loss": 0.0651,
"step": 2752
},
{
"epoch": 1.2525022747952685,
"grad_norm": 0.5306254596544426,
"learning_rate": 8.530982989906632e-06,
"loss": 0.0264,
"step": 2753
},
{
"epoch": 1.2529572338489536,
"grad_norm": 0.599793814100533,
"learning_rate": 8.529970881494985e-06,
"loss": 0.038,
"step": 2754
},
{
"epoch": 1.2534121929026387,
"grad_norm": 0.4592924108716034,
"learning_rate": 8.52895848463006e-06,
"loss": 0.0253,
"step": 2755
},
{
"epoch": 1.253867151956324,
"grad_norm": 0.5025180855718538,
"learning_rate": 8.527945799394584e-06,
"loss": 0.0269,
"step": 2756
},
{
"epoch": 1.2543221110100091,
"grad_norm": 0.3690223518853051,
"learning_rate": 8.526932825871308e-06,
"loss": 0.0214,
"step": 2757
},
{
"epoch": 1.2547770700636942,
"grad_norm": 0.38161446652737785,
"learning_rate": 8.52591956414301e-06,
"loss": 0.0166,
"step": 2758
},
{
"epoch": 1.2552320291173795,
"grad_norm": 0.611622699149414,
"learning_rate": 8.524906014292488e-06,
"loss": 0.0412,
"step": 2759
},
{
"epoch": 1.2556869881710646,
"grad_norm": 0.4022077081421061,
"learning_rate": 8.523892176402565e-06,
"loss": 0.0234,
"step": 2760
},
{
"epoch": 1.2561419472247497,
"grad_norm": 0.4085009912666225,
"learning_rate": 8.522878050556087e-06,
"loss": 0.0271,
"step": 2761
},
{
"epoch": 1.256596906278435,
"grad_norm": 0.591494783456256,
"learning_rate": 8.521863636835924e-06,
"loss": 0.0288,
"step": 2762
},
{
"epoch": 1.25705186533212,
"grad_norm": 0.4315940956441906,
"learning_rate": 8.520848935324968e-06,
"loss": 0.0257,
"step": 2763
},
{
"epoch": 1.2575068243858052,
"grad_norm": 0.4623767141710468,
"learning_rate": 8.519833946106139e-06,
"loss": 0.0293,
"step": 2764
},
{
"epoch": 1.2579617834394905,
"grad_norm": 0.5965051882391731,
"learning_rate": 8.518818669262373e-06,
"loss": 0.0367,
"step": 2765
},
{
"epoch": 1.2584167424931756,
"grad_norm": 0.5441954958905808,
"learning_rate": 8.517803104876638e-06,
"loss": 0.0314,
"step": 2766
},
{
"epoch": 1.2588717015468607,
"grad_norm": 0.5077782576820083,
"learning_rate": 8.51678725303192e-06,
"loss": 0.0261,
"step": 2767
},
{
"epoch": 1.259326660600546,
"grad_norm": 0.6376855259836618,
"learning_rate": 8.515771113811226e-06,
"loss": 0.0409,
"step": 2768
},
{
"epoch": 1.259781619654231,
"grad_norm": 6.915760462322178,
"learning_rate": 8.514754687297598e-06,
"loss": 0.1986,
"step": 2769
},
{
"epoch": 1.2602365787079162,
"grad_norm": 0.5889806379105973,
"learning_rate": 8.513737973574088e-06,
"loss": 0.0336,
"step": 2770
},
{
"epoch": 1.2606915377616015,
"grad_norm": 0.5275667357404193,
"learning_rate": 8.512720972723779e-06,
"loss": 0.0289,
"step": 2771
},
{
"epoch": 1.2611464968152866,
"grad_norm": 0.3147286633021264,
"learning_rate": 8.511703684829773e-06,
"loss": 0.0163,
"step": 2772
},
{
"epoch": 1.2616014558689717,
"grad_norm": 0.8013976464224811,
"learning_rate": 8.510686109975202e-06,
"loss": 0.0468,
"step": 2773
},
{
"epoch": 1.262056414922657,
"grad_norm": 0.4994061588441834,
"learning_rate": 8.509668248243217e-06,
"loss": 0.02,
"step": 2774
},
{
"epoch": 1.262511373976342,
"grad_norm": 0.5749302842677763,
"learning_rate": 8.508650099716991e-06,
"loss": 0.0362,
"step": 2775
}
],
"logging_steps": 1,
"max_steps": 10990,
"num_input_tokens_seen": 0,
"num_train_epochs": 5,
"save_steps": 555,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 18262287138816.0,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}