augment-nvfp4a16 / trainer_state.json
Ba2han's picture
Upload folder using huggingface_hub
27e3aec verified
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.1000694927032661,
"eval_steps": 500,
"global_step": 2375,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.000463284688441047,
"grad_norm": 1.1484375,
"learning_rate": 0.0,
"loss": 1.3776911497116089,
"step": 1
},
{
"epoch": 0.000926569376882094,
"grad_norm": 1.90625,
"learning_rate": 6.31578947368421e-07,
"loss": 1.3288359642028809,
"step": 2
},
{
"epoch": 0.001389854065323141,
"grad_norm": 1.4765625,
"learning_rate": 1.263157894736842e-06,
"loss": 1.3400298357009888,
"step": 3
},
{
"epoch": 0.001853138753764188,
"grad_norm": 1.1796875,
"learning_rate": 1.8947368421052632e-06,
"loss": 1.2718102931976318,
"step": 4
},
{
"epoch": 0.0023164234422052353,
"grad_norm": 1.53125,
"learning_rate": 2.526315789473684e-06,
"loss": 1.3158093690872192,
"step": 5
},
{
"epoch": 0.002779708130646282,
"grad_norm": 1.53125,
"learning_rate": 3.157894736842105e-06,
"loss": 1.3021347522735596,
"step": 6
},
{
"epoch": 0.0032429928190873293,
"grad_norm": 1.4921875,
"learning_rate": 3.7894736842105264e-06,
"loss": 1.3042571544647217,
"step": 7
},
{
"epoch": 0.003706277507528376,
"grad_norm": 1.515625,
"learning_rate": 4.4210526315789476e-06,
"loss": 1.5282930135726929,
"step": 8
},
{
"epoch": 0.004169562195969423,
"grad_norm": 1.4921875,
"learning_rate": 5.052631578947368e-06,
"loss": 1.1924694776535034,
"step": 9
},
{
"epoch": 0.0046328468844104706,
"grad_norm": 1.5625,
"learning_rate": 5.684210526315789e-06,
"loss": 1.2525532245635986,
"step": 10
},
{
"epoch": 0.005096131572851517,
"grad_norm": 1.2890625,
"learning_rate": 6.31578947368421e-06,
"loss": 1.354798436164856,
"step": 11
},
{
"epoch": 0.005559416261292564,
"grad_norm": 1.359375,
"learning_rate": 6.947368421052632e-06,
"loss": 1.2614648342132568,
"step": 12
},
{
"epoch": 0.006022700949733611,
"grad_norm": 1.59375,
"learning_rate": 7.578947368421053e-06,
"loss": 1.441209077835083,
"step": 13
},
{
"epoch": 0.006485985638174659,
"grad_norm": 1.0546875,
"learning_rate": 8.210526315789475e-06,
"loss": 1.2802681922912598,
"step": 14
},
{
"epoch": 0.006949270326615705,
"grad_norm": 1.265625,
"learning_rate": 8.842105263157895e-06,
"loss": 1.3814109563827515,
"step": 15
},
{
"epoch": 0.007412555015056752,
"grad_norm": 1.2421875,
"learning_rate": 9.473684210526315e-06,
"loss": 1.0409232378005981,
"step": 16
},
{
"epoch": 0.007875839703497799,
"grad_norm": 1.015625,
"learning_rate": 1.0105263157894736e-05,
"loss": 1.2293620109558105,
"step": 17
},
{
"epoch": 0.008339124391938846,
"grad_norm": 1.0390625,
"learning_rate": 1.0736842105263158e-05,
"loss": 1.2468554973602295,
"step": 18
},
{
"epoch": 0.008802409080379893,
"grad_norm": 0.91796875,
"learning_rate": 1.1368421052631578e-05,
"loss": 1.2527501583099365,
"step": 19
},
{
"epoch": 0.009265693768820941,
"grad_norm": 1.140625,
"learning_rate": 1.2e-05,
"loss": 1.2057502269744873,
"step": 20
},
{
"epoch": 0.009728978457261988,
"grad_norm": 1.078125,
"learning_rate": 1.263157894736842e-05,
"loss": 1.190750241279602,
"step": 21
},
{
"epoch": 0.010192263145703035,
"grad_norm": 1.0,
"learning_rate": 1.3263157894736844e-05,
"loss": 1.2279465198516846,
"step": 22
},
{
"epoch": 0.010655547834144082,
"grad_norm": 0.8828125,
"learning_rate": 1.3894736842105265e-05,
"loss": 1.2643662691116333,
"step": 23
},
{
"epoch": 0.011118832522585128,
"grad_norm": 1.0390625,
"learning_rate": 1.4526315789473685e-05,
"loss": 1.1729130744934082,
"step": 24
},
{
"epoch": 0.011582117211026175,
"grad_norm": 1.0,
"learning_rate": 1.5157894736842105e-05,
"loss": 1.249855875968933,
"step": 25
},
{
"epoch": 0.012045401899467222,
"grad_norm": 0.828125,
"learning_rate": 1.578947368421053e-05,
"loss": 1.2158374786376953,
"step": 26
},
{
"epoch": 0.01250868658790827,
"grad_norm": 0.89453125,
"learning_rate": 1.642105263157895e-05,
"loss": 1.1104485988616943,
"step": 27
},
{
"epoch": 0.012971971276349317,
"grad_norm": 0.828125,
"learning_rate": 1.705263157894737e-05,
"loss": 1.1513713598251343,
"step": 28
},
{
"epoch": 0.013435255964790364,
"grad_norm": 0.89453125,
"learning_rate": 1.768421052631579e-05,
"loss": 1.1879022121429443,
"step": 29
},
{
"epoch": 0.01389854065323141,
"grad_norm": 0.83984375,
"learning_rate": 1.831578947368421e-05,
"loss": 1.250070333480835,
"step": 30
},
{
"epoch": 0.014361825341672458,
"grad_norm": 0.859375,
"learning_rate": 1.894736842105263e-05,
"loss": 1.1055482625961304,
"step": 31
},
{
"epoch": 0.014825110030113504,
"grad_norm": 0.7890625,
"learning_rate": 1.957894736842105e-05,
"loss": 1.1359180212020874,
"step": 32
},
{
"epoch": 0.015288394718554551,
"grad_norm": 0.83203125,
"learning_rate": 2.0210526315789472e-05,
"loss": 1.426615595817566,
"step": 33
},
{
"epoch": 0.015751679406995598,
"grad_norm": 0.828125,
"learning_rate": 2.0842105263157895e-05,
"loss": 1.1893213987350464,
"step": 34
},
{
"epoch": 0.016214964095436647,
"grad_norm": 0.84765625,
"learning_rate": 2.1473684210526316e-05,
"loss": 1.2659728527069092,
"step": 35
},
{
"epoch": 0.01667824878387769,
"grad_norm": 0.7734375,
"learning_rate": 2.2105263157894736e-05,
"loss": 1.2023910284042358,
"step": 36
},
{
"epoch": 0.01714153347231874,
"grad_norm": 0.8203125,
"learning_rate": 2.2736842105263157e-05,
"loss": 1.4574058055877686,
"step": 37
},
{
"epoch": 0.017604818160759785,
"grad_norm": 0.90625,
"learning_rate": 2.336842105263158e-05,
"loss": 0.9998283386230469,
"step": 38
},
{
"epoch": 0.018068102849200834,
"grad_norm": 0.81640625,
"learning_rate": 2.4e-05,
"loss": 1.094596266746521,
"step": 39
},
{
"epoch": 0.018531387537641882,
"grad_norm": 0.80859375,
"learning_rate": 2.4631578947368424e-05,
"loss": 1.2685940265655518,
"step": 40
},
{
"epoch": 0.018994672226082927,
"grad_norm": 0.86328125,
"learning_rate": 2.526315789473684e-05,
"loss": 1.370650053024292,
"step": 41
},
{
"epoch": 0.019457956914523976,
"grad_norm": 0.9140625,
"learning_rate": 2.5894736842105265e-05,
"loss": 1.0709939002990723,
"step": 42
},
{
"epoch": 0.01992124160296502,
"grad_norm": 0.73046875,
"learning_rate": 2.652631578947369e-05,
"loss": 1.2332732677459717,
"step": 43
},
{
"epoch": 0.02038452629140607,
"grad_norm": 0.81640625,
"learning_rate": 2.7157894736842106e-05,
"loss": 1.1283347606658936,
"step": 44
},
{
"epoch": 0.020847810979847115,
"grad_norm": 0.87109375,
"learning_rate": 2.778947368421053e-05,
"loss": 1.050850749015808,
"step": 45
},
{
"epoch": 0.021311095668288163,
"grad_norm": 0.74609375,
"learning_rate": 2.8421052631578946e-05,
"loss": 1.1958825588226318,
"step": 46
},
{
"epoch": 0.02177438035672921,
"grad_norm": 0.7265625,
"learning_rate": 2.905263157894737e-05,
"loss": 1.439139485359192,
"step": 47
},
{
"epoch": 0.022237665045170257,
"grad_norm": 0.7109375,
"learning_rate": 2.968421052631579e-05,
"loss": 1.2316217422485352,
"step": 48
},
{
"epoch": 0.022700949733611305,
"grad_norm": 0.79296875,
"learning_rate": 3.031578947368421e-05,
"loss": 1.177676796913147,
"step": 49
},
{
"epoch": 0.02316423442205235,
"grad_norm": 0.74609375,
"learning_rate": 3.094736842105263e-05,
"loss": 1.1291377544403076,
"step": 50
},
{
"epoch": 0.0236275191104934,
"grad_norm": 0.83984375,
"learning_rate": 3.157894736842106e-05,
"loss": 1.2162253856658936,
"step": 51
},
{
"epoch": 0.024090803798934444,
"grad_norm": 0.875,
"learning_rate": 3.221052631578947e-05,
"loss": 1.4145865440368652,
"step": 52
},
{
"epoch": 0.024554088487375492,
"grad_norm": 0.859375,
"learning_rate": 3.28421052631579e-05,
"loss": 1.2053899765014648,
"step": 53
},
{
"epoch": 0.02501737317581654,
"grad_norm": 0.8671875,
"learning_rate": 3.347368421052631e-05,
"loss": 1.406412959098816,
"step": 54
},
{
"epoch": 0.025480657864257586,
"grad_norm": 0.828125,
"learning_rate": 3.410526315789474e-05,
"loss": 1.3238595724105835,
"step": 55
},
{
"epoch": 0.025943942552698634,
"grad_norm": 0.8203125,
"learning_rate": 3.473684210526316e-05,
"loss": 1.089475154876709,
"step": 56
},
{
"epoch": 0.02640722724113968,
"grad_norm": 0.85546875,
"learning_rate": 3.536842105263158e-05,
"loss": 0.9788758754730225,
"step": 57
},
{
"epoch": 0.026870511929580728,
"grad_norm": 0.93359375,
"learning_rate": 3.6e-05,
"loss": 1.3209675550460815,
"step": 58
},
{
"epoch": 0.027333796618021773,
"grad_norm": 0.73828125,
"learning_rate": 3.663157894736842e-05,
"loss": 1.0189337730407715,
"step": 59
},
{
"epoch": 0.02779708130646282,
"grad_norm": 0.92578125,
"learning_rate": 3.726315789473684e-05,
"loss": 1.2801414728164673,
"step": 60
},
{
"epoch": 0.028260365994903867,
"grad_norm": 0.765625,
"learning_rate": 3.789473684210526e-05,
"loss": 1.1465449333190918,
"step": 61
},
{
"epoch": 0.028723650683344915,
"grad_norm": 0.87890625,
"learning_rate": 3.852631578947369e-05,
"loss": 1.259995937347412,
"step": 62
},
{
"epoch": 0.029186935371785964,
"grad_norm": 0.8125,
"learning_rate": 3.91578947368421e-05,
"loss": 1.0937800407409668,
"step": 63
},
{
"epoch": 0.02965022006022701,
"grad_norm": 0.8046875,
"learning_rate": 3.978947368421053e-05,
"loss": 1.218725562095642,
"step": 64
},
{
"epoch": 0.030113504748668057,
"grad_norm": 0.7265625,
"learning_rate": 4.0421052631578943e-05,
"loss": 1.2543126344680786,
"step": 65
},
{
"epoch": 0.030576789437109102,
"grad_norm": 0.75,
"learning_rate": 4.105263157894737e-05,
"loss": 1.2053662538528442,
"step": 66
},
{
"epoch": 0.03104007412555015,
"grad_norm": 0.78515625,
"learning_rate": 4.168421052631579e-05,
"loss": 1.1383905410766602,
"step": 67
},
{
"epoch": 0.031503358813991196,
"grad_norm": 0.76953125,
"learning_rate": 4.231578947368421e-05,
"loss": 1.1001615524291992,
"step": 68
},
{
"epoch": 0.031966643502432245,
"grad_norm": 0.86328125,
"learning_rate": 4.294736842105263e-05,
"loss": 1.2164722681045532,
"step": 69
},
{
"epoch": 0.03242992819087329,
"grad_norm": 0.78125,
"learning_rate": 4.357894736842106e-05,
"loss": 1.1911834478378296,
"step": 70
},
{
"epoch": 0.03289321287931434,
"grad_norm": 0.9375,
"learning_rate": 4.421052631578947e-05,
"loss": 1.1898846626281738,
"step": 71
},
{
"epoch": 0.03335649756775538,
"grad_norm": 0.79296875,
"learning_rate": 4.48421052631579e-05,
"loss": 1.3878438472747803,
"step": 72
},
{
"epoch": 0.03381978225619643,
"grad_norm": 0.8515625,
"learning_rate": 4.547368421052631e-05,
"loss": 1.1823328733444214,
"step": 73
},
{
"epoch": 0.03428306694463748,
"grad_norm": 0.7265625,
"learning_rate": 4.610526315789474e-05,
"loss": 1.1236375570297241,
"step": 74
},
{
"epoch": 0.03474635163307853,
"grad_norm": 0.73046875,
"learning_rate": 4.673684210526316e-05,
"loss": 1.2263376712799072,
"step": 75
},
{
"epoch": 0.03520963632151957,
"grad_norm": 0.7734375,
"learning_rate": 4.736842105263158e-05,
"loss": 1.0898189544677734,
"step": 76
},
{
"epoch": 0.03567292100996062,
"grad_norm": 0.72265625,
"learning_rate": 4.8e-05,
"loss": 1.1455436944961548,
"step": 77
},
{
"epoch": 0.03613620569840167,
"grad_norm": 0.765625,
"learning_rate": 4.799997759200591e-05,
"loss": 1.3289111852645874,
"step": 78
},
{
"epoch": 0.036599490386842716,
"grad_norm": 0.7890625,
"learning_rate": 4.799991036806548e-05,
"loss": 1.103787899017334,
"step": 79
},
{
"epoch": 0.037062775075283765,
"grad_norm": 0.7890625,
"learning_rate": 4.799979832830423e-05,
"loss": 1.1579338312149048,
"step": 80
},
{
"epoch": 0.037526059763724806,
"grad_norm": 0.7734375,
"learning_rate": 4.799964147293139e-05,
"loss": 1.3283060789108276,
"step": 81
},
{
"epoch": 0.037989344452165855,
"grad_norm": 0.86328125,
"learning_rate": 4.799943980223985e-05,
"loss": 0.9260512590408325,
"step": 82
},
{
"epoch": 0.0384526291406069,
"grad_norm": 0.76171875,
"learning_rate": 4.7999193316606205e-05,
"loss": 1.1989682912826538,
"step": 83
},
{
"epoch": 0.03891591382904795,
"grad_norm": 0.8046875,
"learning_rate": 4.799890201649072e-05,
"loss": 1.2116103172302246,
"step": 84
},
{
"epoch": 0.039379198517489,
"grad_norm": 0.86328125,
"learning_rate": 4.7998565902437354e-05,
"loss": 1.2562410831451416,
"step": 85
},
{
"epoch": 0.03984248320593004,
"grad_norm": 0.80078125,
"learning_rate": 4.799818497507374e-05,
"loss": 1.1803618669509888,
"step": 86
},
{
"epoch": 0.04030576789437109,
"grad_norm": 0.78125,
"learning_rate": 4.79977592351112e-05,
"loss": 1.193110466003418,
"step": 87
},
{
"epoch": 0.04076905258281214,
"grad_norm": 0.74609375,
"learning_rate": 4.799728868334472e-05,
"loss": 1.1182022094726562,
"step": 88
},
{
"epoch": 0.04123233727125319,
"grad_norm": 0.78515625,
"learning_rate": 4.799677332065299e-05,
"loss": 1.052176833152771,
"step": 89
},
{
"epoch": 0.04169562195969423,
"grad_norm": 0.80078125,
"learning_rate": 4.799621314799836e-05,
"loss": 1.3748915195465088,
"step": 90
},
{
"epoch": 0.04215890664813528,
"grad_norm": 0.75,
"learning_rate": 4.799560816642687e-05,
"loss": 1.1480522155761719,
"step": 91
},
{
"epoch": 0.042622191336576326,
"grad_norm": 0.73046875,
"learning_rate": 4.79949583770682e-05,
"loss": 1.0132099390029907,
"step": 92
},
{
"epoch": 0.043085476025017375,
"grad_norm": 0.83203125,
"learning_rate": 4.799426378113573e-05,
"loss": 1.1310032606124878,
"step": 93
},
{
"epoch": 0.04354876071345842,
"grad_norm": 0.9296875,
"learning_rate": 4.799352437992651e-05,
"loss": 1.1506468057632446,
"step": 94
},
{
"epoch": 0.044012045401899465,
"grad_norm": 0.76953125,
"learning_rate": 4.7992740174821246e-05,
"loss": 0.833928108215332,
"step": 95
},
{
"epoch": 0.04447533009034051,
"grad_norm": 0.734375,
"learning_rate": 4.79919111672843e-05,
"loss": 1.064217209815979,
"step": 96
},
{
"epoch": 0.04493861477878156,
"grad_norm": 0.80859375,
"learning_rate": 4.799103735886371e-05,
"loss": 1.0427517890930176,
"step": 97
},
{
"epoch": 0.04540189946722261,
"grad_norm": 0.90625,
"learning_rate": 4.7990118751191185e-05,
"loss": 1.0797550678253174,
"step": 98
},
{
"epoch": 0.04586518415566365,
"grad_norm": 1.0234375,
"learning_rate": 4.798915534598205e-05,
"loss": 1.0864336490631104,
"step": 99
},
{
"epoch": 0.0463284688441047,
"grad_norm": 0.9609375,
"learning_rate": 4.79881471450353e-05,
"loss": 1.1602739095687866,
"step": 100
},
{
"epoch": 0.04679175353254575,
"grad_norm": 0.82421875,
"learning_rate": 4.79870941502336e-05,
"loss": 1.045765995979309,
"step": 101
},
{
"epoch": 0.0472550382209868,
"grad_norm": 0.77734375,
"learning_rate": 4.798599636354323e-05,
"loss": 1.2531412839889526,
"step": 102
},
{
"epoch": 0.047718322909427846,
"grad_norm": 0.8125,
"learning_rate": 4.7984853787014124e-05,
"loss": 1.208916425704956,
"step": 103
},
{
"epoch": 0.04818160759786889,
"grad_norm": 0.81640625,
"learning_rate": 4.798366642277986e-05,
"loss": 1.238208532333374,
"step": 104
},
{
"epoch": 0.048644892286309936,
"grad_norm": 0.8828125,
"learning_rate": 4.7982434273057635e-05,
"loss": 0.9851164817810059,
"step": 105
},
{
"epoch": 0.049108176974750985,
"grad_norm": 0.875,
"learning_rate": 4.798115734014828e-05,
"loss": 1.0408838987350464,
"step": 106
},
{
"epoch": 0.04957146166319203,
"grad_norm": 0.83984375,
"learning_rate": 4.7979835626436254e-05,
"loss": 1.191272497177124,
"step": 107
},
{
"epoch": 0.05003474635163308,
"grad_norm": 0.828125,
"learning_rate": 4.797846913438965e-05,
"loss": 0.9605915546417236,
"step": 108
},
{
"epoch": 0.05049803104007412,
"grad_norm": 0.73828125,
"learning_rate": 4.797705786656015e-05,
"loss": 1.1408090591430664,
"step": 109
},
{
"epoch": 0.05096131572851517,
"grad_norm": 0.796875,
"learning_rate": 4.797560182558307e-05,
"loss": 1.277418613433838,
"step": 110
},
{
"epoch": 0.05142460041695622,
"grad_norm": 0.859375,
"learning_rate": 4.797410101417731e-05,
"loss": 1.1940449476242065,
"step": 111
},
{
"epoch": 0.05188788510539727,
"grad_norm": 0.703125,
"learning_rate": 4.7972555435145395e-05,
"loss": 1.1584206819534302,
"step": 112
},
{
"epoch": 0.05235116979383831,
"grad_norm": 0.8125,
"learning_rate": 4.7970965091373425e-05,
"loss": 1.314054250717163,
"step": 113
},
{
"epoch": 0.05281445448227936,
"grad_norm": 0.765625,
"learning_rate": 4.796932998583113e-05,
"loss": 1.155271053314209,
"step": 114
},
{
"epoch": 0.05327773917072041,
"grad_norm": 0.87890625,
"learning_rate": 4.7967650121571754e-05,
"loss": 1.0596888065338135,
"step": 115
},
{
"epoch": 0.053741023859161456,
"grad_norm": 0.8203125,
"learning_rate": 4.796592550173219e-05,
"loss": 1.0339300632476807,
"step": 116
},
{
"epoch": 0.054204308547602505,
"grad_norm": 0.81640625,
"learning_rate": 4.7964156129532876e-05,
"loss": 1.0572959184646606,
"step": 117
},
{
"epoch": 0.054667593236043546,
"grad_norm": 1.15625,
"learning_rate": 4.796234200827781e-05,
"loss": 1.2064818143844604,
"step": 118
},
{
"epoch": 0.055130877924484595,
"grad_norm": 0.7890625,
"learning_rate": 4.796048314135457e-05,
"loss": 1.0486756563186646,
"step": 119
},
{
"epoch": 0.05559416261292564,
"grad_norm": 0.78125,
"learning_rate": 4.7958579532234265e-05,
"loss": 1.2740678787231445,
"step": 120
},
{
"epoch": 0.05605744730136669,
"grad_norm": 0.76171875,
"learning_rate": 4.795663118447158e-05,
"loss": 1.2558701038360596,
"step": 121
},
{
"epoch": 0.05652073198980773,
"grad_norm": 0.76953125,
"learning_rate": 4.7954638101704724e-05,
"loss": 1.0604870319366455,
"step": 122
},
{
"epoch": 0.05698401667824878,
"grad_norm": 0.77734375,
"learning_rate": 4.7952600287655444e-05,
"loss": 1.0435048341751099,
"step": 123
},
{
"epoch": 0.05744730136668983,
"grad_norm": 0.703125,
"learning_rate": 4.795051774612902e-05,
"loss": 1.0488735437393188,
"step": 124
},
{
"epoch": 0.05791058605513088,
"grad_norm": 0.8359375,
"learning_rate": 4.7948390481014245e-05,
"loss": 1.0075401067733765,
"step": 125
},
{
"epoch": 0.05837387074357193,
"grad_norm": 0.7890625,
"learning_rate": 4.7946218496283435e-05,
"loss": 1.1233757734298706,
"step": 126
},
{
"epoch": 0.05883715543201297,
"grad_norm": 0.83203125,
"learning_rate": 4.794400179599242e-05,
"loss": 1.1474615335464478,
"step": 127
},
{
"epoch": 0.05930044012045402,
"grad_norm": 0.76171875,
"learning_rate": 4.794174038428049e-05,
"loss": 1.0086901187896729,
"step": 128
},
{
"epoch": 0.059763724808895066,
"grad_norm": 0.76171875,
"learning_rate": 4.793943426537048e-05,
"loss": 1.119909644126892,
"step": 129
},
{
"epoch": 0.060227009497336115,
"grad_norm": 0.765625,
"learning_rate": 4.793708344356867e-05,
"loss": 1.0933876037597656,
"step": 130
},
{
"epoch": 0.06069029418577716,
"grad_norm": 0.73828125,
"learning_rate": 4.793468792326482e-05,
"loss": 1.424509882926941,
"step": 131
},
{
"epoch": 0.061153578874218205,
"grad_norm": 0.8125,
"learning_rate": 4.7932247708932184e-05,
"loss": 1.086850643157959,
"step": 132
},
{
"epoch": 0.06161686356265925,
"grad_norm": 0.76171875,
"learning_rate": 4.7929762805127435e-05,
"loss": 1.4302482604980469,
"step": 133
},
{
"epoch": 0.0620801482511003,
"grad_norm": 0.8125,
"learning_rate": 4.7927233216490726e-05,
"loss": 0.9927620887756348,
"step": 134
},
{
"epoch": 0.06254343293954134,
"grad_norm": 0.8359375,
"learning_rate": 4.792465894774563e-05,
"loss": 1.1411983966827393,
"step": 135
},
{
"epoch": 0.06300671762798239,
"grad_norm": 0.76171875,
"learning_rate": 4.792204000369917e-05,
"loss": 1.3219720125198364,
"step": 136
},
{
"epoch": 0.06347000231642344,
"grad_norm": 0.75390625,
"learning_rate": 4.791937638924179e-05,
"loss": 1.2182328701019287,
"step": 137
},
{
"epoch": 0.06393328700486449,
"grad_norm": 0.85546875,
"learning_rate": 4.7916668109347346e-05,
"loss": 1.2830442190170288,
"step": 138
},
{
"epoch": 0.06439657169330554,
"grad_norm": 0.85546875,
"learning_rate": 4.791391516907309e-05,
"loss": 1.1010041236877441,
"step": 139
},
{
"epoch": 0.06485985638174659,
"grad_norm": 0.8359375,
"learning_rate": 4.7911117573559676e-05,
"loss": 1.0438331365585327,
"step": 140
},
{
"epoch": 0.06532314107018763,
"grad_norm": 0.74609375,
"learning_rate": 4.7908275328031156e-05,
"loss": 1.039322853088379,
"step": 141
},
{
"epoch": 0.06578642575862868,
"grad_norm": 0.82421875,
"learning_rate": 4.7905388437794946e-05,
"loss": 1.1718674898147583,
"step": 142
},
{
"epoch": 0.06624971044706972,
"grad_norm": 0.75,
"learning_rate": 4.7902456908241836e-05,
"loss": 1.0182360410690308,
"step": 143
},
{
"epoch": 0.06671299513551077,
"grad_norm": 0.76171875,
"learning_rate": 4.789948074484594e-05,
"loss": 0.9300652742385864,
"step": 144
},
{
"epoch": 0.06717627982395181,
"grad_norm": 0.796875,
"learning_rate": 4.7896459953164785e-05,
"loss": 1.1588186025619507,
"step": 145
},
{
"epoch": 0.06763956451239286,
"grad_norm": 0.8203125,
"learning_rate": 4.7893394538839164e-05,
"loss": 1.1683034896850586,
"step": 146
},
{
"epoch": 0.06810284920083391,
"grad_norm": 0.73828125,
"learning_rate": 4.7890284507593236e-05,
"loss": 1.2006157636642456,
"step": 147
},
{
"epoch": 0.06856613388927496,
"grad_norm": 0.83203125,
"learning_rate": 4.788712986523447e-05,
"loss": 1.1696548461914062,
"step": 148
},
{
"epoch": 0.06902941857771601,
"grad_norm": 0.80078125,
"learning_rate": 4.788393061765363e-05,
"loss": 1.0099486112594604,
"step": 149
},
{
"epoch": 0.06949270326615706,
"grad_norm": 0.76171875,
"learning_rate": 4.7880686770824775e-05,
"loss": 1.000266671180725,
"step": 150
},
{
"epoch": 0.0699559879545981,
"grad_norm": 0.92578125,
"learning_rate": 4.7877398330805246e-05,
"loss": 1.1522239446640015,
"step": 151
},
{
"epoch": 0.07041927264303914,
"grad_norm": 0.83203125,
"learning_rate": 4.7874065303735655e-05,
"loss": 1.11775803565979,
"step": 152
},
{
"epoch": 0.07088255733148019,
"grad_norm": 0.76171875,
"learning_rate": 4.787068769583987e-05,
"loss": 1.0676116943359375,
"step": 153
},
{
"epoch": 0.07134584201992124,
"grad_norm": 0.7890625,
"learning_rate": 4.786726551342502e-05,
"loss": 0.9372677206993103,
"step": 154
},
{
"epoch": 0.07180912670836229,
"grad_norm": 0.78125,
"learning_rate": 4.7863798762881446e-05,
"loss": 1.0489038228988647,
"step": 155
},
{
"epoch": 0.07227241139680333,
"grad_norm": 0.7890625,
"learning_rate": 4.7860287450682735e-05,
"loss": 1.2821038961410522,
"step": 156
},
{
"epoch": 0.07273569608524438,
"grad_norm": 0.7265625,
"learning_rate": 4.7856731583385665e-05,
"loss": 1.3503544330596924,
"step": 157
},
{
"epoch": 0.07319898077368543,
"grad_norm": 0.74609375,
"learning_rate": 4.7853131167630235e-05,
"loss": 1.1046172380447388,
"step": 158
},
{
"epoch": 0.07366226546212648,
"grad_norm": 0.73828125,
"learning_rate": 4.7849486210139616e-05,
"loss": 1.5007928609848022,
"step": 159
},
{
"epoch": 0.07412555015056753,
"grad_norm": 0.7734375,
"learning_rate": 4.784579671772015e-05,
"loss": 1.1768280267715454,
"step": 160
},
{
"epoch": 0.07458883483900858,
"grad_norm": 0.703125,
"learning_rate": 4.784206269726136e-05,
"loss": 1.2257065773010254,
"step": 161
},
{
"epoch": 0.07505211952744961,
"grad_norm": 0.76953125,
"learning_rate": 4.7838284155735886e-05,
"loss": 1.2349004745483398,
"step": 162
},
{
"epoch": 0.07551540421589066,
"grad_norm": 0.84375,
"learning_rate": 4.783446110019954e-05,
"loss": 1.1083492040634155,
"step": 163
},
{
"epoch": 0.07597868890433171,
"grad_norm": 0.83984375,
"learning_rate": 4.7830593537791244e-05,
"loss": 1.0492440462112427,
"step": 164
},
{
"epoch": 0.07644197359277276,
"grad_norm": 0.8359375,
"learning_rate": 4.7826681475733e-05,
"loss": 1.0901589393615723,
"step": 165
},
{
"epoch": 0.0769052582812138,
"grad_norm": 0.8359375,
"learning_rate": 4.7822724921329945e-05,
"loss": 1.196974515914917,
"step": 166
},
{
"epoch": 0.07736854296965485,
"grad_norm": 0.84375,
"learning_rate": 4.781872388197029e-05,
"loss": 1.2492700815200806,
"step": 167
},
{
"epoch": 0.0778318276580959,
"grad_norm": 0.890625,
"learning_rate": 4.781467836512529e-05,
"loss": 0.9922595620155334,
"step": 168
},
{
"epoch": 0.07829511234653695,
"grad_norm": 0.73828125,
"learning_rate": 4.781058837834929e-05,
"loss": 1.2686748504638672,
"step": 169
},
{
"epoch": 0.078758397034978,
"grad_norm": 0.77734375,
"learning_rate": 4.780645392927964e-05,
"loss": 0.9617519378662109,
"step": 170
},
{
"epoch": 0.07922168172341904,
"grad_norm": 0.76953125,
"learning_rate": 4.780227502563674e-05,
"loss": 1.0572490692138672,
"step": 171
},
{
"epoch": 0.07968496641186008,
"grad_norm": 0.7734375,
"learning_rate": 4.7798051675223994e-05,
"loss": 1.1447961330413818,
"step": 172
},
{
"epoch": 0.08014825110030113,
"grad_norm": 0.765625,
"learning_rate": 4.77937838859278e-05,
"loss": 0.9723523855209351,
"step": 173
},
{
"epoch": 0.08061153578874218,
"grad_norm": 0.83984375,
"learning_rate": 4.778947166571755e-05,
"loss": 1.1690819263458252,
"step": 174
},
{
"epoch": 0.08107482047718323,
"grad_norm": 0.859375,
"learning_rate": 4.778511502264559e-05,
"loss": 1.043947458267212,
"step": 175
},
{
"epoch": 0.08153810516562428,
"grad_norm": 0.8515625,
"learning_rate": 4.778071396484721e-05,
"loss": 1.0934100151062012,
"step": 176
},
{
"epoch": 0.08200138985406533,
"grad_norm": 0.79296875,
"learning_rate": 4.777626850054067e-05,
"loss": 1.1645115613937378,
"step": 177
},
{
"epoch": 0.08246467454250637,
"grad_norm": 0.72265625,
"learning_rate": 4.7771778638027116e-05,
"loss": 1.0093110799789429,
"step": 178
},
{
"epoch": 0.08292795923094742,
"grad_norm": 0.7734375,
"learning_rate": 4.7767244385690624e-05,
"loss": 1.2085744142532349,
"step": 179
},
{
"epoch": 0.08339124391938846,
"grad_norm": 0.69140625,
"learning_rate": 4.776266575199815e-05,
"loss": 1.048790693283081,
"step": 180
},
{
"epoch": 0.0838545286078295,
"grad_norm": 0.734375,
"learning_rate": 4.775804274549953e-05,
"loss": 1.0067102909088135,
"step": 181
},
{
"epoch": 0.08431781329627056,
"grad_norm": 0.73828125,
"learning_rate": 4.775337537482744e-05,
"loss": 1.0322071313858032,
"step": 182
},
{
"epoch": 0.0847810979847116,
"grad_norm": 0.70703125,
"learning_rate": 4.7748663648697436e-05,
"loss": 0.8763373494148254,
"step": 183
},
{
"epoch": 0.08524438267315265,
"grad_norm": 0.890625,
"learning_rate": 4.774390757590787e-05,
"loss": 1.1351971626281738,
"step": 184
},
{
"epoch": 0.0857076673615937,
"grad_norm": 0.7734375,
"learning_rate": 4.773910716533992e-05,
"loss": 1.21125066280365,
"step": 185
},
{
"epoch": 0.08617095205003475,
"grad_norm": 0.8046875,
"learning_rate": 4.773426242595754e-05,
"loss": 1.0625823736190796,
"step": 186
},
{
"epoch": 0.0866342367384758,
"grad_norm": 0.80859375,
"learning_rate": 4.772937336680748e-05,
"loss": 1.2072420120239258,
"step": 187
},
{
"epoch": 0.08709752142691685,
"grad_norm": 0.72265625,
"learning_rate": 4.772443999701922e-05,
"loss": 1.1252281665802002,
"step": 188
},
{
"epoch": 0.08756080611535788,
"grad_norm": 0.81640625,
"learning_rate": 4.771946232580503e-05,
"loss": 1.1829332113265991,
"step": 189
},
{
"epoch": 0.08802409080379893,
"grad_norm": 0.83984375,
"learning_rate": 4.771444036245987e-05,
"loss": 1.177690029144287,
"step": 190
},
{
"epoch": 0.08848737549223998,
"grad_norm": 0.70703125,
"learning_rate": 4.7709374116361405e-05,
"loss": 1.025864839553833,
"step": 191
},
{
"epoch": 0.08895066018068103,
"grad_norm": 1.046875,
"learning_rate": 4.770426359697001e-05,
"loss": 1.2552249431610107,
"step": 192
},
{
"epoch": 0.08941394486912208,
"grad_norm": 0.91796875,
"learning_rate": 4.7699108813828735e-05,
"loss": 1.1388694047927856,
"step": 193
},
{
"epoch": 0.08987722955756312,
"grad_norm": 0.7578125,
"learning_rate": 4.769390977656328e-05,
"loss": 1.1736036539077759,
"step": 194
},
{
"epoch": 0.09034051424600417,
"grad_norm": 0.84765625,
"learning_rate": 4.768866649488196e-05,
"loss": 1.1389501094818115,
"step": 195
},
{
"epoch": 0.09080379893444522,
"grad_norm": 0.81640625,
"learning_rate": 4.768337897857572e-05,
"loss": 1.0693917274475098,
"step": 196
},
{
"epoch": 0.09126708362288627,
"grad_norm": 1.1640625,
"learning_rate": 4.767804723751814e-05,
"loss": 1.139711856842041,
"step": 197
},
{
"epoch": 0.0917303683113273,
"grad_norm": 0.8125,
"learning_rate": 4.767267128166534e-05,
"loss": 1.021757960319519,
"step": 198
},
{
"epoch": 0.09219365299976835,
"grad_norm": 0.8828125,
"learning_rate": 4.766725112105602e-05,
"loss": 1.2109063863754272,
"step": 199
},
{
"epoch": 0.0926569376882094,
"grad_norm": 0.78515625,
"learning_rate": 4.7661786765811425e-05,
"loss": 1.1550836563110352,
"step": 200
},
{
"epoch": 0.09312022237665045,
"grad_norm": 0.88671875,
"learning_rate": 4.765627822613532e-05,
"loss": 1.1337045431137085,
"step": 201
},
{
"epoch": 0.0935835070650915,
"grad_norm": 0.734375,
"learning_rate": 4.7650725512313996e-05,
"loss": 1.1244243383407593,
"step": 202
},
{
"epoch": 0.09404679175353255,
"grad_norm": 0.76171875,
"learning_rate": 4.76451286347162e-05,
"loss": 1.1743513345718384,
"step": 203
},
{
"epoch": 0.0945100764419736,
"grad_norm": 0.74609375,
"learning_rate": 4.763948760379319e-05,
"loss": 1.1148113012313843,
"step": 204
},
{
"epoch": 0.09497336113041464,
"grad_norm": 0.75,
"learning_rate": 4.763380243007862e-05,
"loss": 0.9122455716133118,
"step": 205
},
{
"epoch": 0.09543664581885569,
"grad_norm": 0.76171875,
"learning_rate": 4.7628073124188615e-05,
"loss": 1.0933022499084473,
"step": 206
},
{
"epoch": 0.09589993050729674,
"grad_norm": 0.8359375,
"learning_rate": 4.7622299696821693e-05,
"loss": 1.0184919834136963,
"step": 207
},
{
"epoch": 0.09636321519573778,
"grad_norm": 0.76953125,
"learning_rate": 4.7616482158758773e-05,
"loss": 1.1238012313842773,
"step": 208
},
{
"epoch": 0.09682649988417882,
"grad_norm": 0.71875,
"learning_rate": 4.761062052086313e-05,
"loss": 1.22682523727417,
"step": 209
},
{
"epoch": 0.09728978457261987,
"grad_norm": 0.875,
"learning_rate": 4.760471479408038e-05,
"loss": 1.1153074502944946,
"step": 210
},
{
"epoch": 0.09775306926106092,
"grad_norm": 0.7890625,
"learning_rate": 4.7598764989438495e-05,
"loss": 1.0884509086608887,
"step": 211
},
{
"epoch": 0.09821635394950197,
"grad_norm": 0.7109375,
"learning_rate": 4.7592771118047746e-05,
"loss": 0.9598002433776855,
"step": 212
},
{
"epoch": 0.09867963863794302,
"grad_norm": 0.75,
"learning_rate": 4.758673319110067e-05,
"loss": 1.1340510845184326,
"step": 213
},
{
"epoch": 0.09914292332638407,
"grad_norm": 0.7890625,
"learning_rate": 4.75806512198721e-05,
"loss": 0.9966357946395874,
"step": 214
},
{
"epoch": 0.09960620801482511,
"grad_norm": 0.765625,
"learning_rate": 4.757452521571909e-05,
"loss": 1.0271143913269043,
"step": 215
},
{
"epoch": 0.10006949270326616,
"grad_norm": 0.78125,
"learning_rate": 4.7568355190080936e-05,
"loss": 0.938353419303894,
"step": 216
},
{
"epoch": 0.1005327773917072,
"grad_norm": 0.890625,
"learning_rate": 4.756214115447912e-05,
"loss": 1.047834873199463,
"step": 217
},
{
"epoch": 0.10099606208014825,
"grad_norm": 0.85546875,
"learning_rate": 4.7555883120517335e-05,
"loss": 1.1204979419708252,
"step": 218
},
{
"epoch": 0.1014593467685893,
"grad_norm": 0.76953125,
"learning_rate": 4.7549581099881384e-05,
"loss": 1.2204188108444214,
"step": 219
},
{
"epoch": 0.10192263145703034,
"grad_norm": 0.73828125,
"learning_rate": 4.7543235104339265e-05,
"loss": 1.1481391191482544,
"step": 220
},
{
"epoch": 0.10238591614547139,
"grad_norm": 0.71484375,
"learning_rate": 4.753684514574105e-05,
"loss": 1.201314091682434,
"step": 221
},
{
"epoch": 0.10284920083391244,
"grad_norm": 0.7109375,
"learning_rate": 4.753041123601891e-05,
"loss": 1.159132480621338,
"step": 222
},
{
"epoch": 0.10331248552235349,
"grad_norm": 0.79296875,
"learning_rate": 4.752393338718712e-05,
"loss": 1.1852577924728394,
"step": 223
},
{
"epoch": 0.10377577021079454,
"grad_norm": 0.8203125,
"learning_rate": 4.7517411611341954e-05,
"loss": 1.0710164308547974,
"step": 224
},
{
"epoch": 0.10423905489923559,
"grad_norm": 0.8828125,
"learning_rate": 4.7510845920661756e-05,
"loss": 1.097131371498108,
"step": 225
},
{
"epoch": 0.10470233958767662,
"grad_norm": 0.76171875,
"learning_rate": 4.7504236327406854e-05,
"loss": 0.9716182351112366,
"step": 226
},
{
"epoch": 0.10516562427611767,
"grad_norm": 0.8828125,
"learning_rate": 4.749758284391955e-05,
"loss": 1.2137223482131958,
"step": 227
},
{
"epoch": 0.10562890896455872,
"grad_norm": 0.7421875,
"learning_rate": 4.7490885482624115e-05,
"loss": 0.9825916886329651,
"step": 228
},
{
"epoch": 0.10609219365299977,
"grad_norm": 0.73046875,
"learning_rate": 4.748414425602676e-05,
"loss": 1.0940011739730835,
"step": 229
},
{
"epoch": 0.10655547834144082,
"grad_norm": 0.859375,
"learning_rate": 4.7477359176715584e-05,
"loss": 0.9418008923530579,
"step": 230
},
{
"epoch": 0.10701876302988186,
"grad_norm": 0.7109375,
"learning_rate": 4.747053025736061e-05,
"loss": 0.9472661018371582,
"step": 231
},
{
"epoch": 0.10748204771832291,
"grad_norm": 0.7890625,
"learning_rate": 4.746365751071368e-05,
"loss": 1.2847800254821777,
"step": 232
},
{
"epoch": 0.10794533240676396,
"grad_norm": 0.76171875,
"learning_rate": 4.745674094960851e-05,
"loss": 1.0107430219650269,
"step": 233
},
{
"epoch": 0.10840861709520501,
"grad_norm": 0.79296875,
"learning_rate": 4.744978058696062e-05,
"loss": 1.1128199100494385,
"step": 234
},
{
"epoch": 0.10887190178364604,
"grad_norm": 1.1171875,
"learning_rate": 4.744277643576733e-05,
"loss": 0.9219973087310791,
"step": 235
},
{
"epoch": 0.10933518647208709,
"grad_norm": 0.86328125,
"learning_rate": 4.743572850910772e-05,
"loss": 1.2181633710861206,
"step": 236
},
{
"epoch": 0.10979847116052814,
"grad_norm": 0.765625,
"learning_rate": 4.74286368201426e-05,
"loss": 1.2480499744415283,
"step": 237
},
{
"epoch": 0.11026175584896919,
"grad_norm": 0.78125,
"learning_rate": 4.7421501382114536e-05,
"loss": 1.171923279762268,
"step": 238
},
{
"epoch": 0.11072504053741024,
"grad_norm": 0.78515625,
"learning_rate": 4.741432220834775e-05,
"loss": 1.0495820045471191,
"step": 239
},
{
"epoch": 0.11118832522585129,
"grad_norm": 0.84765625,
"learning_rate": 4.740709931224815e-05,
"loss": 1.1960190534591675,
"step": 240
},
{
"epoch": 0.11165160991429234,
"grad_norm": 0.75390625,
"learning_rate": 4.73998327073033e-05,
"loss": 1.1030462980270386,
"step": 241
},
{
"epoch": 0.11211489460273338,
"grad_norm": 0.796875,
"learning_rate": 4.739252240708236e-05,
"loss": 1.1221550703048706,
"step": 242
},
{
"epoch": 0.11257817929117443,
"grad_norm": 1.109375,
"learning_rate": 4.738516842523609e-05,
"loss": 1.2760341167449951,
"step": 243
},
{
"epoch": 0.11304146397961547,
"grad_norm": 0.76953125,
"learning_rate": 4.737777077549683e-05,
"loss": 1.1421096324920654,
"step": 244
},
{
"epoch": 0.11350474866805652,
"grad_norm": 0.734375,
"learning_rate": 4.737032947167845e-05,
"loss": 1.06126070022583,
"step": 245
},
{
"epoch": 0.11396803335649756,
"grad_norm": 0.7734375,
"learning_rate": 4.7362844527676346e-05,
"loss": 1.1655036211013794,
"step": 246
},
{
"epoch": 0.11443131804493861,
"grad_norm": 0.796875,
"learning_rate": 4.735531595746739e-05,
"loss": 1.069222092628479,
"step": 247
},
{
"epoch": 0.11489460273337966,
"grad_norm": 0.7265625,
"learning_rate": 4.7347743775109935e-05,
"loss": 1.2415424585342407,
"step": 248
},
{
"epoch": 0.11535788742182071,
"grad_norm": 0.79296875,
"learning_rate": 4.734012799474377e-05,
"loss": 0.9880377650260925,
"step": 249
},
{
"epoch": 0.11582117211026176,
"grad_norm": 0.84375,
"learning_rate": 4.733246863059008e-05,
"loss": 1.1792749166488647,
"step": 250
},
{
"epoch": 0.1162844567987028,
"grad_norm": 0.75,
"learning_rate": 4.732476569695146e-05,
"loss": 1.2084414958953857,
"step": 251
},
{
"epoch": 0.11674774148714386,
"grad_norm": 0.71484375,
"learning_rate": 4.731701920821184e-05,
"loss": 1.2508437633514404,
"step": 252
},
{
"epoch": 0.1172110261755849,
"grad_norm": 0.78125,
"learning_rate": 4.73092291788365e-05,
"loss": 0.9441017508506775,
"step": 253
},
{
"epoch": 0.11767431086402594,
"grad_norm": 0.80078125,
"learning_rate": 4.7301395623372014e-05,
"loss": 1.1250604391098022,
"step": 254
},
{
"epoch": 0.11813759555246699,
"grad_norm": 0.75390625,
"learning_rate": 4.729351855644624e-05,
"loss": 1.2054286003112793,
"step": 255
},
{
"epoch": 0.11860088024090804,
"grad_norm": 0.79296875,
"learning_rate": 4.7285597992768285e-05,
"loss": 1.2127487659454346,
"step": 256
},
{
"epoch": 0.11906416492934908,
"grad_norm": 0.70703125,
"learning_rate": 4.727763394712847e-05,
"loss": 1.1345865726470947,
"step": 257
},
{
"epoch": 0.11952744961779013,
"grad_norm": 0.77734375,
"learning_rate": 4.726962643439833e-05,
"loss": 1.1558208465576172,
"step": 258
},
{
"epoch": 0.11999073430623118,
"grad_norm": 0.80078125,
"learning_rate": 4.726157546953055e-05,
"loss": 1.0446807146072388,
"step": 259
},
{
"epoch": 0.12045401899467223,
"grad_norm": 0.83203125,
"learning_rate": 4.7253481067558954e-05,
"loss": 1.1157430410385132,
"step": 260
},
{
"epoch": 0.12091730368311328,
"grad_norm": 0.828125,
"learning_rate": 4.72453432435985e-05,
"loss": 1.026274561882019,
"step": 261
},
{
"epoch": 0.12138058837155433,
"grad_norm": 0.69140625,
"learning_rate": 4.7237162012845206e-05,
"loss": 0.9912748336791992,
"step": 262
},
{
"epoch": 0.12184387305999536,
"grad_norm": 0.7421875,
"learning_rate": 4.7228937390576154e-05,
"loss": 1.0849391222000122,
"step": 263
},
{
"epoch": 0.12230715774843641,
"grad_norm": 0.78125,
"learning_rate": 4.722066939214945e-05,
"loss": 1.133726716041565,
"step": 264
},
{
"epoch": 0.12277044243687746,
"grad_norm": 0.765625,
"learning_rate": 4.72123580330042e-05,
"loss": 1.0994298458099365,
"step": 265
},
{
"epoch": 0.1232337271253185,
"grad_norm": 0.73828125,
"learning_rate": 4.720400332866047e-05,
"loss": 1.2458348274230957,
"step": 266
},
{
"epoch": 0.12369701181375956,
"grad_norm": 0.7890625,
"learning_rate": 4.7195605294719286e-05,
"loss": 0.8411968946456909,
"step": 267
},
{
"epoch": 0.1241602965022006,
"grad_norm": 0.8671875,
"learning_rate": 4.718716394686257e-05,
"loss": 1.0096313953399658,
"step": 268
},
{
"epoch": 0.12462358119064165,
"grad_norm": 0.70703125,
"learning_rate": 4.7178679300853125e-05,
"loss": 0.9048255085945129,
"step": 269
},
{
"epoch": 0.1250868658790827,
"grad_norm": 0.703125,
"learning_rate": 4.7170151372534615e-05,
"loss": 1.0907902717590332,
"step": 270
},
{
"epoch": 0.12555015056752375,
"grad_norm": 0.83203125,
"learning_rate": 4.71615801778315e-05,
"loss": 1.1986042261123657,
"step": 271
},
{
"epoch": 0.12601343525596478,
"grad_norm": 0.7890625,
"learning_rate": 4.7152965732749085e-05,
"loss": 0.93548184633255,
"step": 272
},
{
"epoch": 0.12647671994440585,
"grad_norm": 0.78515625,
"learning_rate": 4.714430805337338e-05,
"loss": 0.8795110583305359,
"step": 273
},
{
"epoch": 0.12694000463284688,
"grad_norm": 0.78515625,
"learning_rate": 4.713560715587117e-05,
"loss": 1.1542648077011108,
"step": 274
},
{
"epoch": 0.12740328932128794,
"grad_norm": 0.85546875,
"learning_rate": 4.7126863056489925e-05,
"loss": 1.1123528480529785,
"step": 275
},
{
"epoch": 0.12786657400972898,
"grad_norm": 0.76953125,
"learning_rate": 4.7118075771557775e-05,
"loss": 0.946189820766449,
"step": 276
},
{
"epoch": 0.12832985869817,
"grad_norm": 0.78515625,
"learning_rate": 4.710924531748352e-05,
"loss": 1.0332181453704834,
"step": 277
},
{
"epoch": 0.12879314338661108,
"grad_norm": 0.828125,
"learning_rate": 4.7100371710756555e-05,
"loss": 1.1407872438430786,
"step": 278
},
{
"epoch": 0.1292564280750521,
"grad_norm": 0.79296875,
"learning_rate": 4.709145496794685e-05,
"loss": 1.0078046321868896,
"step": 279
},
{
"epoch": 0.12971971276349317,
"grad_norm": 0.828125,
"learning_rate": 4.7082495105704936e-05,
"loss": 1.1784926652908325,
"step": 280
},
{
"epoch": 0.1301829974519342,
"grad_norm": 0.81640625,
"learning_rate": 4.707349214076186e-05,
"loss": 1.055182695388794,
"step": 281
},
{
"epoch": 0.13064628214037527,
"grad_norm": 0.7578125,
"learning_rate": 4.706444608992915e-05,
"loss": 1.2529042959213257,
"step": 282
},
{
"epoch": 0.1311095668288163,
"grad_norm": 0.81640625,
"learning_rate": 4.70553569700988e-05,
"loss": 1.1405866146087646,
"step": 283
},
{
"epoch": 0.13157285151725737,
"grad_norm": 0.7890625,
"learning_rate": 4.7046224798243215e-05,
"loss": 1.025738000869751,
"step": 284
},
{
"epoch": 0.1320361362056984,
"grad_norm": 1.0078125,
"learning_rate": 4.7037049591415213e-05,
"loss": 1.2285195589065552,
"step": 285
},
{
"epoch": 0.13249942089413944,
"grad_norm": 0.8984375,
"learning_rate": 4.702783136674794e-05,
"loss": 1.0521762371063232,
"step": 286
},
{
"epoch": 0.1329627055825805,
"grad_norm": 0.69140625,
"learning_rate": 4.70185701414549e-05,
"loss": 1.0171458721160889,
"step": 287
},
{
"epoch": 0.13342599027102153,
"grad_norm": 4.5625,
"learning_rate": 4.700926593282988e-05,
"loss": 1.017797589302063,
"step": 288
},
{
"epoch": 0.1338892749594626,
"grad_norm": 0.7578125,
"learning_rate": 4.699991875824693e-05,
"loss": 1.098080039024353,
"step": 289
},
{
"epoch": 0.13435255964790363,
"grad_norm": 0.87109375,
"learning_rate": 4.6990528635160354e-05,
"loss": 1.069311261177063,
"step": 290
},
{
"epoch": 0.1348158443363447,
"grad_norm": 0.7265625,
"learning_rate": 4.6981095581104625e-05,
"loss": 1.1987462043762207,
"step": 291
},
{
"epoch": 0.13527912902478573,
"grad_norm": 0.78515625,
"learning_rate": 4.697161961369438e-05,
"loss": 0.9862013459205627,
"step": 292
},
{
"epoch": 0.1357424137132268,
"grad_norm": 0.83984375,
"learning_rate": 4.696210075062443e-05,
"loss": 1.101189136505127,
"step": 293
},
{
"epoch": 0.13620569840166782,
"grad_norm": 0.796875,
"learning_rate": 4.695253900966965e-05,
"loss": 1.0801221132278442,
"step": 294
},
{
"epoch": 0.13666898309010886,
"grad_norm": 0.80859375,
"learning_rate": 4.694293440868499e-05,
"loss": 1.0982296466827393,
"step": 295
},
{
"epoch": 0.13713226777854992,
"grad_norm": 0.8359375,
"learning_rate": 4.693328696560544e-05,
"loss": 1.1208291053771973,
"step": 296
},
{
"epoch": 0.13759555246699096,
"grad_norm": 0.78515625,
"learning_rate": 4.692359669844599e-05,
"loss": 1.137648344039917,
"step": 297
},
{
"epoch": 0.13805883715543202,
"grad_norm": 0.76171875,
"learning_rate": 4.691386362530158e-05,
"loss": 1.225368618965149,
"step": 298
},
{
"epoch": 0.13852212184387305,
"grad_norm": 0.8046875,
"learning_rate": 4.690408776434712e-05,
"loss": 1.021425485610962,
"step": 299
},
{
"epoch": 0.13898540653231412,
"grad_norm": 0.78515625,
"learning_rate": 4.6894269133837377e-05,
"loss": 1.244565486907959,
"step": 300
},
{
"epoch": 0.13944869122075515,
"grad_norm": 0.7890625,
"learning_rate": 4.688440775210701e-05,
"loss": 1.11764657497406,
"step": 301
},
{
"epoch": 0.1399119759091962,
"grad_norm": 0.82421875,
"learning_rate": 4.6874503637570496e-05,
"loss": 1.2240279912948608,
"step": 302
},
{
"epoch": 0.14037526059763725,
"grad_norm": 0.76171875,
"learning_rate": 4.6864556808722126e-05,
"loss": 1.058721899986267,
"step": 303
},
{
"epoch": 0.14083854528607828,
"grad_norm": 0.7734375,
"learning_rate": 4.685456728413593e-05,
"loss": 0.9557834267616272,
"step": 304
},
{
"epoch": 0.14130182997451934,
"grad_norm": 0.7734375,
"learning_rate": 4.684453508246567e-05,
"loss": 1.1231224536895752,
"step": 305
},
{
"epoch": 0.14176511466296038,
"grad_norm": 0.7109375,
"learning_rate": 4.683446022244482e-05,
"loss": 1.1800084114074707,
"step": 306
},
{
"epoch": 0.14222839935140144,
"grad_norm": 0.76953125,
"learning_rate": 4.682434272288649e-05,
"loss": 0.9980816841125488,
"step": 307
},
{
"epoch": 0.14269168403984248,
"grad_norm": 0.77734375,
"learning_rate": 4.681418260268341e-05,
"loss": 1.141348958015442,
"step": 308
},
{
"epoch": 0.14315496872828354,
"grad_norm": 0.69140625,
"learning_rate": 4.680397988080792e-05,
"loss": 1.0376156568527222,
"step": 309
},
{
"epoch": 0.14361825341672457,
"grad_norm": 0.83203125,
"learning_rate": 4.67937345763119e-05,
"loss": 0.9319735765457153,
"step": 310
},
{
"epoch": 0.14408153810516564,
"grad_norm": 0.80078125,
"learning_rate": 4.678344670832673e-05,
"loss": 1.093515157699585,
"step": 311
},
{
"epoch": 0.14454482279360667,
"grad_norm": 0.8125,
"learning_rate": 4.677311629606328e-05,
"loss": 0.9726182222366333,
"step": 312
},
{
"epoch": 0.1450081074820477,
"grad_norm": 0.84765625,
"learning_rate": 4.6762743358811894e-05,
"loss": 1.2113114595413208,
"step": 313
},
{
"epoch": 0.14547139217048877,
"grad_norm": 0.72265625,
"learning_rate": 4.675232791594227e-05,
"loss": 1.0208406448364258,
"step": 314
},
{
"epoch": 0.1459346768589298,
"grad_norm": 0.72265625,
"learning_rate": 4.674186998690353e-05,
"loss": 0.9950704574584961,
"step": 315
},
{
"epoch": 0.14639796154737086,
"grad_norm": 0.7890625,
"learning_rate": 4.673136959122409e-05,
"loss": 1.0458511114120483,
"step": 316
},
{
"epoch": 0.1468612462358119,
"grad_norm": 0.7265625,
"learning_rate": 4.672082674851169e-05,
"loss": 1.0969946384429932,
"step": 317
},
{
"epoch": 0.14732453092425296,
"grad_norm": 0.81640625,
"learning_rate": 4.6710241478453334e-05,
"loss": 1.00065016746521,
"step": 318
},
{
"epoch": 0.147787815612694,
"grad_norm": 0.71875,
"learning_rate": 4.669961380081523e-05,
"loss": 0.9182780981063843,
"step": 319
},
{
"epoch": 0.14825110030113506,
"grad_norm": 0.83203125,
"learning_rate": 4.6688943735442805e-05,
"loss": 1.0130627155303955,
"step": 320
},
{
"epoch": 0.1487143849895761,
"grad_norm": 0.78515625,
"learning_rate": 4.667823130226061e-05,
"loss": 1.091713547706604,
"step": 321
},
{
"epoch": 0.14917766967801716,
"grad_norm": 0.80078125,
"learning_rate": 4.666747652127233e-05,
"loss": 1.0551024675369263,
"step": 322
},
{
"epoch": 0.1496409543664582,
"grad_norm": 0.74609375,
"learning_rate": 4.665667941256072e-05,
"loss": 1.0696836709976196,
"step": 323
},
{
"epoch": 0.15010423905489922,
"grad_norm": 0.74609375,
"learning_rate": 4.664583999628757e-05,
"loss": 1.0591177940368652,
"step": 324
},
{
"epoch": 0.1505675237433403,
"grad_norm": 0.8359375,
"learning_rate": 4.663495829269368e-05,
"loss": 1.1695055961608887,
"step": 325
},
{
"epoch": 0.15103080843178132,
"grad_norm": 0.8203125,
"learning_rate": 4.662403432209882e-05,
"loss": 1.1184823513031006,
"step": 326
},
{
"epoch": 0.15149409312022238,
"grad_norm": 0.671875,
"learning_rate": 4.661306810490168e-05,
"loss": 1.0364640951156616,
"step": 327
},
{
"epoch": 0.15195737780866342,
"grad_norm": 0.78125,
"learning_rate": 4.660205966157982e-05,
"loss": 1.2171732187271118,
"step": 328
},
{
"epoch": 0.15242066249710448,
"grad_norm": 0.8515625,
"learning_rate": 4.6591009012689685e-05,
"loss": 1.1131620407104492,
"step": 329
},
{
"epoch": 0.15288394718554552,
"grad_norm": 0.75,
"learning_rate": 4.6579916178866506e-05,
"loss": 0.9144288301467896,
"step": 330
},
{
"epoch": 0.15334723187398658,
"grad_norm": 0.8125,
"learning_rate": 4.6568781180824304e-05,
"loss": 1.180692434310913,
"step": 331
},
{
"epoch": 0.1538105165624276,
"grad_norm": 0.8125,
"learning_rate": 4.655760403935581e-05,
"loss": 1.1063326597213745,
"step": 332
},
{
"epoch": 0.15427380125086865,
"grad_norm": 0.77734375,
"learning_rate": 4.654638477533249e-05,
"loss": 1.1317967176437378,
"step": 333
},
{
"epoch": 0.1547370859393097,
"grad_norm": 0.8671875,
"learning_rate": 4.653512340970443e-05,
"loss": 1.0568040609359741,
"step": 334
},
{
"epoch": 0.15520037062775074,
"grad_norm": 0.73828125,
"learning_rate": 4.6523819963500345e-05,
"loss": 1.0148340463638306,
"step": 335
},
{
"epoch": 0.1556636553161918,
"grad_norm": 0.8046875,
"learning_rate": 4.651247445782754e-05,
"loss": 0.9750385284423828,
"step": 336
},
{
"epoch": 0.15612694000463284,
"grad_norm": 0.92578125,
"learning_rate": 4.650108691387185e-05,
"loss": 1.0633890628814697,
"step": 337
},
{
"epoch": 0.1565902246930739,
"grad_norm": 0.81640625,
"learning_rate": 4.648965735289761e-05,
"loss": 1.223706603050232,
"step": 338
},
{
"epoch": 0.15705350938151494,
"grad_norm": 0.78125,
"learning_rate": 4.647818579624761e-05,
"loss": 1.2269283533096313,
"step": 339
},
{
"epoch": 0.157516794069956,
"grad_norm": 0.7109375,
"learning_rate": 4.6466672265343056e-05,
"loss": 0.990770697593689,
"step": 340
},
{
"epoch": 0.15798007875839704,
"grad_norm": 0.765625,
"learning_rate": 4.645511678168356e-05,
"loss": 1.1373369693756104,
"step": 341
},
{
"epoch": 0.15844336344683807,
"grad_norm": 0.90625,
"learning_rate": 4.644351936684705e-05,
"loss": 1.106075406074524,
"step": 342
},
{
"epoch": 0.15890664813527913,
"grad_norm": 0.74609375,
"learning_rate": 4.643188004248975e-05,
"loss": 0.9842250943183899,
"step": 343
},
{
"epoch": 0.15936993282372017,
"grad_norm": 0.81640625,
"learning_rate": 4.642019883034617e-05,
"loss": 1.1008222103118896,
"step": 344
},
{
"epoch": 0.15983321751216123,
"grad_norm": 0.75,
"learning_rate": 4.640847575222901e-05,
"loss": 1.127953052520752,
"step": 345
},
{
"epoch": 0.16029650220060226,
"grad_norm": 0.95703125,
"learning_rate": 4.6396710830029164e-05,
"loss": 1.2000129222869873,
"step": 346
},
{
"epoch": 0.16075978688904333,
"grad_norm": 0.96484375,
"learning_rate": 4.638490408571564e-05,
"loss": 1.0962949991226196,
"step": 347
},
{
"epoch": 0.16122307157748436,
"grad_norm": 0.77734375,
"learning_rate": 4.637305554133559e-05,
"loss": 1.0607415437698364,
"step": 348
},
{
"epoch": 0.16168635626592542,
"grad_norm": 0.71875,
"learning_rate": 4.636116521901417e-05,
"loss": 1.0603266954421997,
"step": 349
},
{
"epoch": 0.16214964095436646,
"grad_norm": 0.80078125,
"learning_rate": 4.6349233140954573e-05,
"loss": 1.084631085395813,
"step": 350
},
{
"epoch": 0.1626129256428075,
"grad_norm": 0.7421875,
"learning_rate": 4.633725932943795e-05,
"loss": 1.1179983615875244,
"step": 351
},
{
"epoch": 0.16307621033124856,
"grad_norm": 0.7578125,
"learning_rate": 4.632524380682341e-05,
"loss": 0.9485760927200317,
"step": 352
},
{
"epoch": 0.1635394950196896,
"grad_norm": 0.83984375,
"learning_rate": 4.631318659554793e-05,
"loss": 0.8660714626312256,
"step": 353
},
{
"epoch": 0.16400277970813065,
"grad_norm": 0.90234375,
"learning_rate": 4.6301087718126324e-05,
"loss": 1.2922559976577759,
"step": 354
},
{
"epoch": 0.1644660643965717,
"grad_norm": 0.8984375,
"learning_rate": 4.628894719715124e-05,
"loss": 1.1782947778701782,
"step": 355
},
{
"epoch": 0.16492934908501275,
"grad_norm": 0.7890625,
"learning_rate": 4.627676505529306e-05,
"loss": 1.32277512550354,
"step": 356
},
{
"epoch": 0.16539263377345378,
"grad_norm": 0.8359375,
"learning_rate": 4.62645413152999e-05,
"loss": 1.2258048057556152,
"step": 357
},
{
"epoch": 0.16585591846189485,
"grad_norm": 0.84375,
"learning_rate": 4.6252275999997546e-05,
"loss": 1.1965945959091187,
"step": 358
},
{
"epoch": 0.16631920315033588,
"grad_norm": 0.75390625,
"learning_rate": 4.6239969132289436e-05,
"loss": 1.1148847341537476,
"step": 359
},
{
"epoch": 0.16678248783877692,
"grad_norm": 0.87109375,
"learning_rate": 4.622762073515658e-05,
"loss": 1.1224826574325562,
"step": 360
},
{
"epoch": 0.16724577252721798,
"grad_norm": 0.796875,
"learning_rate": 4.621523083165755e-05,
"loss": 1.1334441900253296,
"step": 361
},
{
"epoch": 0.167709057215659,
"grad_norm": 0.78515625,
"learning_rate": 4.620279944492841e-05,
"loss": 1.0212844610214233,
"step": 362
},
{
"epoch": 0.16817234190410008,
"grad_norm": 0.78125,
"learning_rate": 4.619032659818271e-05,
"loss": 1.0923480987548828,
"step": 363
},
{
"epoch": 0.1686356265925411,
"grad_norm": 0.828125,
"learning_rate": 4.61778123147114e-05,
"loss": 1.0250271558761597,
"step": 364
},
{
"epoch": 0.16909891128098217,
"grad_norm": 0.765625,
"learning_rate": 4.616525661788281e-05,
"loss": 1.1997897624969482,
"step": 365
},
{
"epoch": 0.1695621959694232,
"grad_norm": 0.7578125,
"learning_rate": 4.6152659531142605e-05,
"loss": 1.0714635848999023,
"step": 366
},
{
"epoch": 0.17002548065786427,
"grad_norm": 0.875,
"learning_rate": 4.614002107801375e-05,
"loss": 1.1388036012649536,
"step": 367
},
{
"epoch": 0.1704887653463053,
"grad_norm": 0.7421875,
"learning_rate": 4.612734128209643e-05,
"loss": 1.0413213968276978,
"step": 368
},
{
"epoch": 0.17095205003474634,
"grad_norm": 0.72265625,
"learning_rate": 4.6114620167068055e-05,
"loss": 1.0464006662368774,
"step": 369
},
{
"epoch": 0.1714153347231874,
"grad_norm": 0.8125,
"learning_rate": 4.610185775668317e-05,
"loss": 0.9965865015983582,
"step": 370
},
{
"epoch": 0.17187861941162844,
"grad_norm": 0.7890625,
"learning_rate": 4.6089054074773446e-05,
"loss": 1.0935486555099487,
"step": 371
},
{
"epoch": 0.1723419041000695,
"grad_norm": 0.8046875,
"learning_rate": 4.6076209145247627e-05,
"loss": 1.158833384513855,
"step": 372
},
{
"epoch": 0.17280518878851053,
"grad_norm": 0.7421875,
"learning_rate": 4.606332299209146e-05,
"loss": 0.897361695766449,
"step": 373
},
{
"epoch": 0.1732684734769516,
"grad_norm": 0.83984375,
"learning_rate": 4.60503956393677e-05,
"loss": 1.0273572206497192,
"step": 374
},
{
"epoch": 0.17373175816539263,
"grad_norm": 0.82421875,
"learning_rate": 4.603742711121599e-05,
"loss": 1.222002387046814,
"step": 375
},
{
"epoch": 0.1741950428538337,
"grad_norm": 0.89453125,
"learning_rate": 4.602441743185291e-05,
"loss": 1.1995201110839844,
"step": 376
},
{
"epoch": 0.17465832754227473,
"grad_norm": 0.7421875,
"learning_rate": 4.601136662557185e-05,
"loss": 1.0347135066986084,
"step": 377
},
{
"epoch": 0.17512161223071576,
"grad_norm": 0.859375,
"learning_rate": 4.599827471674302e-05,
"loss": 1.1969027519226074,
"step": 378
},
{
"epoch": 0.17558489691915682,
"grad_norm": 0.7265625,
"learning_rate": 4.5985141729813366e-05,
"loss": 1.1000288724899292,
"step": 379
},
{
"epoch": 0.17604818160759786,
"grad_norm": 0.80859375,
"learning_rate": 4.5971967689306545e-05,
"loss": 1.0385537147521973,
"step": 380
},
{
"epoch": 0.17651146629603892,
"grad_norm": 0.7421875,
"learning_rate": 4.595875261982288e-05,
"loss": 0.8807584643363953,
"step": 381
},
{
"epoch": 0.17697475098447996,
"grad_norm": 0.77734375,
"learning_rate": 4.5945496546039286e-05,
"loss": 0.9811716675758362,
"step": 382
},
{
"epoch": 0.17743803567292102,
"grad_norm": 0.875,
"learning_rate": 4.59321994927093e-05,
"loss": 1.0565159320831299,
"step": 383
},
{
"epoch": 0.17790132036136205,
"grad_norm": 0.765625,
"learning_rate": 4.5918861484662906e-05,
"loss": 1.0541253089904785,
"step": 384
},
{
"epoch": 0.17836460504980312,
"grad_norm": 0.78515625,
"learning_rate": 4.5905482546806626e-05,
"loss": 1.1991245746612549,
"step": 385
},
{
"epoch": 0.17882788973824415,
"grad_norm": 0.75390625,
"learning_rate": 4.58920627041234e-05,
"loss": 0.9940633177757263,
"step": 386
},
{
"epoch": 0.17929117442668518,
"grad_norm": 0.8046875,
"learning_rate": 4.587860198167252e-05,
"loss": 1.0880647897720337,
"step": 387
},
{
"epoch": 0.17975445911512625,
"grad_norm": 0.77734375,
"learning_rate": 4.586510040458965e-05,
"loss": 0.9566104412078857,
"step": 388
},
{
"epoch": 0.18021774380356728,
"grad_norm": 0.875,
"learning_rate": 4.585155799808672e-05,
"loss": 1.0409622192382812,
"step": 389
},
{
"epoch": 0.18068102849200834,
"grad_norm": 0.75390625,
"learning_rate": 4.583797478745191e-05,
"loss": 1.0287699699401855,
"step": 390
},
{
"epoch": 0.18114431318044938,
"grad_norm": 0.76953125,
"learning_rate": 4.58243507980496e-05,
"loss": 1.1182043552398682,
"step": 391
},
{
"epoch": 0.18160759786889044,
"grad_norm": 0.765625,
"learning_rate": 4.581068605532031e-05,
"loss": 1.040753722190857,
"step": 392
},
{
"epoch": 0.18207088255733148,
"grad_norm": 0.7265625,
"learning_rate": 4.5796980584780665e-05,
"loss": 1.105460524559021,
"step": 393
},
{
"epoch": 0.18253416724577254,
"grad_norm": 0.859375,
"learning_rate": 4.578323441202334e-05,
"loss": 0.9269900918006897,
"step": 394
},
{
"epoch": 0.18299745193421357,
"grad_norm": 0.7890625,
"learning_rate": 4.5769447562717005e-05,
"loss": 1.0313459634780884,
"step": 395
},
{
"epoch": 0.1834607366226546,
"grad_norm": 0.7265625,
"learning_rate": 4.5755620062606313e-05,
"loss": 0.9970820546150208,
"step": 396
},
{
"epoch": 0.18392402131109567,
"grad_norm": 0.83203125,
"learning_rate": 4.5741751937511796e-05,
"loss": 1.0869134664535522,
"step": 397
},
{
"epoch": 0.1843873059995367,
"grad_norm": 0.77734375,
"learning_rate": 4.572784321332987e-05,
"loss": 1.0493508577346802,
"step": 398
},
{
"epoch": 0.18485059068797777,
"grad_norm": 0.80859375,
"learning_rate": 4.571389391603275e-05,
"loss": 0.9378384947776794,
"step": 399
},
{
"epoch": 0.1853138753764188,
"grad_norm": 0.76953125,
"learning_rate": 4.56999040716684e-05,
"loss": 0.9289635419845581,
"step": 400
},
{
"epoch": 0.18577716006485986,
"grad_norm": 0.84375,
"learning_rate": 4.568587370636055e-05,
"loss": 1.065589427947998,
"step": 401
},
{
"epoch": 0.1862404447533009,
"grad_norm": 0.7734375,
"learning_rate": 4.567180284630853e-05,
"loss": 0.9970924258232117,
"step": 402
},
{
"epoch": 0.18670372944174196,
"grad_norm": 0.85546875,
"learning_rate": 4.565769151778733e-05,
"loss": 1.1094486713409424,
"step": 403
},
{
"epoch": 0.187167014130183,
"grad_norm": 0.73046875,
"learning_rate": 4.5643539747147506e-05,
"loss": 1.0456472635269165,
"step": 404
},
{
"epoch": 0.18763029881862406,
"grad_norm": 0.796875,
"learning_rate": 4.562934756081511e-05,
"loss": 1.055879831314087,
"step": 405
},
{
"epoch": 0.1880935835070651,
"grad_norm": 4.5,
"learning_rate": 4.5615114985291684e-05,
"loss": 1.4064699411392212,
"step": 406
},
{
"epoch": 0.18855686819550613,
"grad_norm": 0.890625,
"learning_rate": 4.5600842047154176e-05,
"loss": 1.0524810552597046,
"step": 407
},
{
"epoch": 0.1890201528839472,
"grad_norm": 0.77734375,
"learning_rate": 4.558652877305494e-05,
"loss": 1.052716851234436,
"step": 408
},
{
"epoch": 0.18948343757238822,
"grad_norm": 0.8046875,
"learning_rate": 4.5572175189721586e-05,
"loss": 1.1580179929733276,
"step": 409
},
{
"epoch": 0.1899467222608293,
"grad_norm": 0.80078125,
"learning_rate": 4.5557781323957055e-05,
"loss": 1.0525214672088623,
"step": 410
},
{
"epoch": 0.19041000694927032,
"grad_norm": 0.80078125,
"learning_rate": 4.5543347202639477e-05,
"loss": 0.9145269989967346,
"step": 411
},
{
"epoch": 0.19087329163771138,
"grad_norm": 0.80078125,
"learning_rate": 4.5528872852722156e-05,
"loss": 0.9920161366462708,
"step": 412
},
{
"epoch": 0.19133657632615242,
"grad_norm": 0.77734375,
"learning_rate": 4.551435830123353e-05,
"loss": 1.0678218603134155,
"step": 413
},
{
"epoch": 0.19179986101459348,
"grad_norm": 0.796875,
"learning_rate": 4.54998035752771e-05,
"loss": 1.0776031017303467,
"step": 414
},
{
"epoch": 0.19226314570303452,
"grad_norm": 0.76171875,
"learning_rate": 4.5485208702031374e-05,
"loss": 1.1205060482025146,
"step": 415
},
{
"epoch": 0.19272643039147555,
"grad_norm": 0.84375,
"learning_rate": 4.547057370874984e-05,
"loss": 1.0913411378860474,
"step": 416
},
{
"epoch": 0.1931897150799166,
"grad_norm": 0.8125,
"learning_rate": 4.545589862276091e-05,
"loss": 1.0345311164855957,
"step": 417
},
{
"epoch": 0.19365299976835765,
"grad_norm": 0.80859375,
"learning_rate": 4.544118347146784e-05,
"loss": 1.1177470684051514,
"step": 418
},
{
"epoch": 0.1941162844567987,
"grad_norm": 0.86328125,
"learning_rate": 4.542642828234873e-05,
"loss": 1.1447192430496216,
"step": 419
},
{
"epoch": 0.19457956914523974,
"grad_norm": 0.77734375,
"learning_rate": 4.5411633082956416e-05,
"loss": 1.1146210432052612,
"step": 420
},
{
"epoch": 0.1950428538336808,
"grad_norm": 0.8671875,
"learning_rate": 4.539679790091847e-05,
"loss": 1.0338633060455322,
"step": 421
},
{
"epoch": 0.19550613852212184,
"grad_norm": 0.78515625,
"learning_rate": 4.538192276393712e-05,
"loss": 1.0040104389190674,
"step": 422
},
{
"epoch": 0.1959694232105629,
"grad_norm": 0.81640625,
"learning_rate": 4.536700769978918e-05,
"loss": 1.1796895265579224,
"step": 423
},
{
"epoch": 0.19643270789900394,
"grad_norm": 0.8125,
"learning_rate": 4.535205273632605e-05,
"loss": 1.0307509899139404,
"step": 424
},
{
"epoch": 0.19689599258744497,
"grad_norm": 0.6953125,
"learning_rate": 4.533705790147362e-05,
"loss": 0.9913015365600586,
"step": 425
},
{
"epoch": 0.19735927727588604,
"grad_norm": 0.80859375,
"learning_rate": 4.532202322323224e-05,
"loss": 1.1445434093475342,
"step": 426
},
{
"epoch": 0.19782256196432707,
"grad_norm": 0.83984375,
"learning_rate": 4.530694872967666e-05,
"loss": 1.3544632196426392,
"step": 427
},
{
"epoch": 0.19828584665276813,
"grad_norm": 1.1484375,
"learning_rate": 4.5291834448955975e-05,
"loss": 0.9704390168190002,
"step": 428
},
{
"epoch": 0.19874913134120917,
"grad_norm": 0.84375,
"learning_rate": 4.5276680409293576e-05,
"loss": 0.9586291909217834,
"step": 429
},
{
"epoch": 0.19921241602965023,
"grad_norm": 0.7890625,
"learning_rate": 4.52614866389871e-05,
"loss": 1.1021863222122192,
"step": 430
},
{
"epoch": 0.19967570071809126,
"grad_norm": 0.8125,
"learning_rate": 4.5246253166408376e-05,
"loss": 1.0716869831085205,
"step": 431
},
{
"epoch": 0.20013898540653233,
"grad_norm": 0.875,
"learning_rate": 4.523098002000336e-05,
"loss": 0.9597651958465576,
"step": 432
},
{
"epoch": 0.20060227009497336,
"grad_norm": 0.6875,
"learning_rate": 4.5215667228292114e-05,
"loss": 0.910536527633667,
"step": 433
},
{
"epoch": 0.2010655547834144,
"grad_norm": 0.7734375,
"learning_rate": 4.52003148198687e-05,
"loss": 1.0383825302124023,
"step": 434
},
{
"epoch": 0.20152883947185546,
"grad_norm": 0.71484375,
"learning_rate": 4.51849228234012e-05,
"loss": 1.1084293127059937,
"step": 435
},
{
"epoch": 0.2019921241602965,
"grad_norm": 0.796875,
"learning_rate": 4.516949126763156e-05,
"loss": 1.1191846132278442,
"step": 436
},
{
"epoch": 0.20245540884873756,
"grad_norm": 0.71484375,
"learning_rate": 4.515402018137565e-05,
"loss": 0.9708357453346252,
"step": 437
},
{
"epoch": 0.2029186935371786,
"grad_norm": 0.84765625,
"learning_rate": 4.513850959352314e-05,
"loss": 1.1482406854629517,
"step": 438
},
{
"epoch": 0.20338197822561965,
"grad_norm": 0.6953125,
"learning_rate": 4.512295953303746e-05,
"loss": 0.9390287399291992,
"step": 439
},
{
"epoch": 0.2038452629140607,
"grad_norm": 0.78125,
"learning_rate": 4.510737002895574e-05,
"loss": 1.125487208366394,
"step": 440
},
{
"epoch": 0.20430854760250175,
"grad_norm": 0.76171875,
"learning_rate": 4.5091741110388775e-05,
"loss": 1.0969908237457275,
"step": 441
},
{
"epoch": 0.20477183229094278,
"grad_norm": 0.94140625,
"learning_rate": 4.507607280652096e-05,
"loss": 0.9356522560119629,
"step": 442
},
{
"epoch": 0.20523511697938382,
"grad_norm": 0.76953125,
"learning_rate": 4.506036514661024e-05,
"loss": 1.131638526916504,
"step": 443
},
{
"epoch": 0.20569840166782488,
"grad_norm": 0.76953125,
"learning_rate": 4.504461815998803e-05,
"loss": 1.0650880336761475,
"step": 444
},
{
"epoch": 0.20616168635626592,
"grad_norm": 0.7890625,
"learning_rate": 4.502883187605921e-05,
"loss": 1.0747191905975342,
"step": 445
},
{
"epoch": 0.20662497104470698,
"grad_norm": 0.859375,
"learning_rate": 4.5013006324302014e-05,
"loss": 1.0188624858856201,
"step": 446
},
{
"epoch": 0.207088255733148,
"grad_norm": 0.73046875,
"learning_rate": 4.4997141534268026e-05,
"loss": 1.117804765701294,
"step": 447
},
{
"epoch": 0.20755154042158908,
"grad_norm": 0.80859375,
"learning_rate": 4.498123753558208e-05,
"loss": 0.9615070819854736,
"step": 448
},
{
"epoch": 0.2080148251100301,
"grad_norm": 0.8984375,
"learning_rate": 4.496529435794224e-05,
"loss": 1.1164673566818237,
"step": 449
},
{
"epoch": 0.20847810979847117,
"grad_norm": 1.0078125,
"learning_rate": 4.494931203111972e-05,
"loss": 1.2451194524765015,
"step": 450
},
{
"epoch": 0.2089413944869122,
"grad_norm": 0.82421875,
"learning_rate": 4.493329058495885e-05,
"loss": 1.0730493068695068,
"step": 451
},
{
"epoch": 0.20940467917535324,
"grad_norm": 0.8203125,
"learning_rate": 4.491723004937699e-05,
"loss": 1.1245779991149902,
"step": 452
},
{
"epoch": 0.2098679638637943,
"grad_norm": 0.73828125,
"learning_rate": 4.490113045436454e-05,
"loss": 1.1549571752548218,
"step": 453
},
{
"epoch": 0.21033124855223534,
"grad_norm": 0.80859375,
"learning_rate": 4.488499182998475e-05,
"loss": 1.1194530725479126,
"step": 454
},
{
"epoch": 0.2107945332406764,
"grad_norm": 0.86328125,
"learning_rate": 4.486881420637385e-05,
"loss": 0.9296231865882874,
"step": 455
},
{
"epoch": 0.21125781792911744,
"grad_norm": 0.75,
"learning_rate": 4.4852597613740826e-05,
"loss": 0.9777655601501465,
"step": 456
},
{
"epoch": 0.2117211026175585,
"grad_norm": 0.8984375,
"learning_rate": 4.4836342082367454e-05,
"loss": 1.2194538116455078,
"step": 457
},
{
"epoch": 0.21218438730599953,
"grad_norm": 0.7578125,
"learning_rate": 4.482004764260822e-05,
"loss": 1.1565169095993042,
"step": 458
},
{
"epoch": 0.2126476719944406,
"grad_norm": 0.6796875,
"learning_rate": 4.4803714324890286e-05,
"loss": 1.0961380004882812,
"step": 459
},
{
"epoch": 0.21311095668288163,
"grad_norm": 0.83984375,
"learning_rate": 4.478734215971337e-05,
"loss": 0.9595807790756226,
"step": 460
},
{
"epoch": 0.21357424137132267,
"grad_norm": 0.74609375,
"learning_rate": 4.477093117764976e-05,
"loss": 1.0418174266815186,
"step": 461
},
{
"epoch": 0.21403752605976373,
"grad_norm": 0.8125,
"learning_rate": 4.4754481409344225e-05,
"loss": 1.1094303131103516,
"step": 462
},
{
"epoch": 0.21450081074820476,
"grad_norm": 0.765625,
"learning_rate": 4.4737992885513955e-05,
"loss": 0.8423942923545837,
"step": 463
},
{
"epoch": 0.21496409543664582,
"grad_norm": 0.84765625,
"learning_rate": 4.472146563694851e-05,
"loss": 1.148449182510376,
"step": 464
},
{
"epoch": 0.21542738012508686,
"grad_norm": 0.78515625,
"learning_rate": 4.470489969450977e-05,
"loss": 0.9094064831733704,
"step": 465
},
{
"epoch": 0.21589066481352792,
"grad_norm": 0.78515625,
"learning_rate": 4.4688295089131864e-05,
"loss": 1.0065088272094727,
"step": 466
},
{
"epoch": 0.21635394950196896,
"grad_norm": 0.69921875,
"learning_rate": 4.46716518518211e-05,
"loss": 1.0104409456253052,
"step": 467
},
{
"epoch": 0.21681723419041002,
"grad_norm": 0.75390625,
"learning_rate": 4.465497001365598e-05,
"loss": 1.0070152282714844,
"step": 468
},
{
"epoch": 0.21728051887885105,
"grad_norm": 0.76953125,
"learning_rate": 4.4638249605787e-05,
"loss": 1.160359263420105,
"step": 469
},
{
"epoch": 0.2177438035672921,
"grad_norm": 0.796875,
"learning_rate": 4.462149065943676e-05,
"loss": 1.0029304027557373,
"step": 470
},
{
"epoch": 0.21820708825573315,
"grad_norm": 0.859375,
"learning_rate": 4.4604693205899775e-05,
"loss": 1.1525189876556396,
"step": 471
},
{
"epoch": 0.21867037294417419,
"grad_norm": 0.83984375,
"learning_rate": 4.458785727654249e-05,
"loss": 0.9948219060897827,
"step": 472
},
{
"epoch": 0.21913365763261525,
"grad_norm": 0.859375,
"learning_rate": 4.457098290280319e-05,
"loss": 1.1646232604980469,
"step": 473
},
{
"epoch": 0.21959694232105628,
"grad_norm": 0.7265625,
"learning_rate": 4.455407011619194e-05,
"loss": 1.1069519519805908,
"step": 474
},
{
"epoch": 0.22006022700949734,
"grad_norm": 0.828125,
"learning_rate": 4.4537118948290546e-05,
"loss": 1.155336618423462,
"step": 475
},
{
"epoch": 0.22052351169793838,
"grad_norm": 0.78515625,
"learning_rate": 4.4520129430752487e-05,
"loss": 0.8776676058769226,
"step": 476
},
{
"epoch": 0.22098679638637944,
"grad_norm": 0.79296875,
"learning_rate": 4.4503101595302826e-05,
"loss": 1.0383992195129395,
"step": 477
},
{
"epoch": 0.22145008107482048,
"grad_norm": 0.734375,
"learning_rate": 4.448603547373822e-05,
"loss": 1.0792429447174072,
"step": 478
},
{
"epoch": 0.2219133657632615,
"grad_norm": 0.8671875,
"learning_rate": 4.4468931097926796e-05,
"loss": 1.2104331254959106,
"step": 479
},
{
"epoch": 0.22237665045170257,
"grad_norm": 0.7578125,
"learning_rate": 4.44517884998081e-05,
"loss": 1.1502063274383545,
"step": 480
},
{
"epoch": 0.2228399351401436,
"grad_norm": 0.8359375,
"learning_rate": 4.443460771139309e-05,
"loss": 1.0817224979400635,
"step": 481
},
{
"epoch": 0.22330321982858467,
"grad_norm": 0.859375,
"learning_rate": 4.441738876476401e-05,
"loss": 0.880224347114563,
"step": 482
},
{
"epoch": 0.2237665045170257,
"grad_norm": 0.765625,
"learning_rate": 4.4400131692074355e-05,
"loss": 0.9947736263275146,
"step": 483
},
{
"epoch": 0.22422978920546677,
"grad_norm": 1.1328125,
"learning_rate": 4.438283652554883e-05,
"loss": 1.0493063926696777,
"step": 484
},
{
"epoch": 0.2246930738939078,
"grad_norm": 0.80859375,
"learning_rate": 4.436550329748328e-05,
"loss": 1.156111240386963,
"step": 485
},
{
"epoch": 0.22515635858234886,
"grad_norm": 0.75,
"learning_rate": 4.4348132040244586e-05,
"loss": 1.1266316175460815,
"step": 486
},
{
"epoch": 0.2256196432707899,
"grad_norm": 0.75,
"learning_rate": 4.4330722786270686e-05,
"loss": 0.9672824740409851,
"step": 487
},
{
"epoch": 0.22608292795923093,
"grad_norm": 0.71875,
"learning_rate": 4.431327556807044e-05,
"loss": 1.073356032371521,
"step": 488
},
{
"epoch": 0.226546212647672,
"grad_norm": 0.69921875,
"learning_rate": 4.429579041822362e-05,
"loss": 1.0501450300216675,
"step": 489
},
{
"epoch": 0.22700949733611303,
"grad_norm": 1.0546875,
"learning_rate": 4.427826736938081e-05,
"loss": 1.253738522529602,
"step": 490
},
{
"epoch": 0.2274727820245541,
"grad_norm": 0.76171875,
"learning_rate": 4.426070645426339e-05,
"loss": 0.9925602078437805,
"step": 491
},
{
"epoch": 0.22793606671299513,
"grad_norm": 1.1875,
"learning_rate": 4.424310770566343e-05,
"loss": 1.0422255992889404,
"step": 492
},
{
"epoch": 0.2283993514014362,
"grad_norm": 0.77734375,
"learning_rate": 4.4225471156443644e-05,
"loss": 0.9284833669662476,
"step": 493
},
{
"epoch": 0.22886263608987722,
"grad_norm": 0.71875,
"learning_rate": 4.420779683953734e-05,
"loss": 0.9178367853164673,
"step": 494
},
{
"epoch": 0.2293259207783183,
"grad_norm": 0.890625,
"learning_rate": 4.419008478794835e-05,
"loss": 1.1734018325805664,
"step": 495
},
{
"epoch": 0.22978920546675932,
"grad_norm": 0.7734375,
"learning_rate": 4.4172335034750976e-05,
"loss": 1.1945644617080688,
"step": 496
},
{
"epoch": 0.23025249015520038,
"grad_norm": 0.9921875,
"learning_rate": 4.415454761308991e-05,
"loss": 1.0181314945220947,
"step": 497
},
{
"epoch": 0.23071577484364142,
"grad_norm": 0.97265625,
"learning_rate": 4.413672255618019e-05,
"loss": 1.0368403196334839,
"step": 498
},
{
"epoch": 0.23117905953208245,
"grad_norm": 0.78125,
"learning_rate": 4.411885989730713e-05,
"loss": 1.0601047277450562,
"step": 499
},
{
"epoch": 0.23164234422052352,
"grad_norm": 0.77734375,
"learning_rate": 4.410095966982626e-05,
"loss": 0.901918888092041,
"step": 500
},
{
"epoch": 0.23210562890896455,
"grad_norm": 0.91796875,
"learning_rate": 4.408302190716327e-05,
"loss": 1.1393412351608276,
"step": 501
},
{
"epoch": 0.2325689135974056,
"grad_norm": 0.8984375,
"learning_rate": 4.406504664281392e-05,
"loss": 1.2997722625732422,
"step": 502
},
{
"epoch": 0.23303219828584665,
"grad_norm": 0.7890625,
"learning_rate": 4.4047033910344015e-05,
"loss": 0.889095664024353,
"step": 503
},
{
"epoch": 0.2334954829742877,
"grad_norm": 0.8359375,
"learning_rate": 4.4028983743389327e-05,
"loss": 1.096193790435791,
"step": 504
},
{
"epoch": 0.23395876766272874,
"grad_norm": 0.8125,
"learning_rate": 4.4010896175655516e-05,
"loss": 0.8893133997917175,
"step": 505
},
{
"epoch": 0.2344220523511698,
"grad_norm": 0.8359375,
"learning_rate": 4.39927712409181e-05,
"loss": 1.0893774032592773,
"step": 506
},
{
"epoch": 0.23488533703961084,
"grad_norm": 0.80859375,
"learning_rate": 4.397460897302237e-05,
"loss": 1.2915987968444824,
"step": 507
},
{
"epoch": 0.23534862172805188,
"grad_norm": 0.83984375,
"learning_rate": 4.395640940588332e-05,
"loss": 1.1468744277954102,
"step": 508
},
{
"epoch": 0.23581190641649294,
"grad_norm": 0.875,
"learning_rate": 4.3938172573485584e-05,
"loss": 1.081978678703308,
"step": 509
},
{
"epoch": 0.23627519110493397,
"grad_norm": 0.921875,
"learning_rate": 4.391989850988342e-05,
"loss": 1.1137498617172241,
"step": 510
},
{
"epoch": 0.23673847579337504,
"grad_norm": 0.78125,
"learning_rate": 4.390158724920058e-05,
"loss": 0.9725139141082764,
"step": 511
},
{
"epoch": 0.23720176048181607,
"grad_norm": 0.78125,
"learning_rate": 4.388323882563028e-05,
"loss": 1.2336326837539673,
"step": 512
},
{
"epoch": 0.23766504517025713,
"grad_norm": 0.734375,
"learning_rate": 4.3864853273435136e-05,
"loss": 0.9616613984107971,
"step": 513
},
{
"epoch": 0.23812832985869817,
"grad_norm": 0.734375,
"learning_rate": 4.384643062694709e-05,
"loss": 0.9157605767250061,
"step": 514
},
{
"epoch": 0.23859161454713923,
"grad_norm": 0.91015625,
"learning_rate": 4.382797092056735e-05,
"loss": 1.1036900281906128,
"step": 515
},
{
"epoch": 0.23905489923558026,
"grad_norm": 0.87109375,
"learning_rate": 4.380947418876636e-05,
"loss": 0.9066743850708008,
"step": 516
},
{
"epoch": 0.2395181839240213,
"grad_norm": 0.8515625,
"learning_rate": 4.379094046608364e-05,
"loss": 1.0424668788909912,
"step": 517
},
{
"epoch": 0.23998146861246236,
"grad_norm": 0.8515625,
"learning_rate": 4.3772369787127826e-05,
"loss": 1.0981203317642212,
"step": 518
},
{
"epoch": 0.2404447533009034,
"grad_norm": 0.8125,
"learning_rate": 4.3753762186576575e-05,
"loss": 1.098775863647461,
"step": 519
},
{
"epoch": 0.24090803798934446,
"grad_norm": 0.78515625,
"learning_rate": 4.3735117699176455e-05,
"loss": 0.8571038246154785,
"step": 520
},
{
"epoch": 0.2413713226777855,
"grad_norm": 0.80859375,
"learning_rate": 4.3716436359742935e-05,
"loss": 0.991769552230835,
"step": 521
},
{
"epoch": 0.24183460736622656,
"grad_norm": 0.80078125,
"learning_rate": 4.369771820316029e-05,
"loss": 1.2347557544708252,
"step": 522
},
{
"epoch": 0.2422978920546676,
"grad_norm": 0.796875,
"learning_rate": 4.367896326438155e-05,
"loss": 0.941724956035614,
"step": 523
},
{
"epoch": 0.24276117674310865,
"grad_norm": 0.8046875,
"learning_rate": 4.366017157842844e-05,
"loss": 0.9241411685943604,
"step": 524
},
{
"epoch": 0.2432244614315497,
"grad_norm": 0.8359375,
"learning_rate": 4.3641343180391275e-05,
"loss": 1.107820987701416,
"step": 525
},
{
"epoch": 0.24368774611999072,
"grad_norm": 0.83984375,
"learning_rate": 4.362247810542894e-05,
"loss": 1.052571177482605,
"step": 526
},
{
"epoch": 0.24415103080843178,
"grad_norm": 0.75,
"learning_rate": 4.360357638876883e-05,
"loss": 0.8987835645675659,
"step": 527
},
{
"epoch": 0.24461431549687282,
"grad_norm": 0.83203125,
"learning_rate": 4.3584638065706724e-05,
"loss": 1.1791050434112549,
"step": 528
},
{
"epoch": 0.24507760018531388,
"grad_norm": 0.77734375,
"learning_rate": 4.356566317160677e-05,
"loss": 1.173535704612732,
"step": 529
},
{
"epoch": 0.24554088487375492,
"grad_norm": 0.87890625,
"learning_rate": 4.354665174190142e-05,
"loss": 0.8905298709869385,
"step": 530
},
{
"epoch": 0.24600416956219598,
"grad_norm": 0.92578125,
"learning_rate": 4.352760381209135e-05,
"loss": 1.2305561304092407,
"step": 531
},
{
"epoch": 0.246467454250637,
"grad_norm": 0.75390625,
"learning_rate": 4.350851941774537e-05,
"loss": 1.010733723640442,
"step": 532
},
{
"epoch": 0.24693073893907808,
"grad_norm": 0.76953125,
"learning_rate": 4.34893985945004e-05,
"loss": 1.0408154726028442,
"step": 533
},
{
"epoch": 0.2473940236275191,
"grad_norm": 0.9375,
"learning_rate": 4.347024137806139e-05,
"loss": 1.157252550125122,
"step": 534
},
{
"epoch": 0.24785730831596015,
"grad_norm": 0.859375,
"learning_rate": 4.345104780420122e-05,
"loss": 1.2410048246383667,
"step": 535
},
{
"epoch": 0.2483205930044012,
"grad_norm": 0.8359375,
"learning_rate": 4.34318179087607e-05,
"loss": 1.0325219631195068,
"step": 536
},
{
"epoch": 0.24878387769284224,
"grad_norm": 0.859375,
"learning_rate": 4.3412551727648435e-05,
"loss": 1.166888952255249,
"step": 537
},
{
"epoch": 0.2492471623812833,
"grad_norm": 0.8046875,
"learning_rate": 4.33932492968408e-05,
"loss": 1.1053187847137451,
"step": 538
},
{
"epoch": 0.24971044706972434,
"grad_norm": 0.796875,
"learning_rate": 4.337391065238187e-05,
"loss": 1.1279836893081665,
"step": 539
},
{
"epoch": 0.2501737317581654,
"grad_norm": 0.72265625,
"learning_rate": 4.335453583038331e-05,
"loss": 1.048471450805664,
"step": 540
},
{
"epoch": 0.25063701644660646,
"grad_norm": 0.828125,
"learning_rate": 4.333512486702438e-05,
"loss": 0.949547290802002,
"step": 541
},
{
"epoch": 0.2511003011350475,
"grad_norm": 0.80859375,
"learning_rate": 4.331567779855179e-05,
"loss": 1.026901364326477,
"step": 542
},
{
"epoch": 0.25156358582348853,
"grad_norm": 0.88671875,
"learning_rate": 4.3296194661279704e-05,
"loss": 0.979106605052948,
"step": 543
},
{
"epoch": 0.25202687051192957,
"grad_norm": 0.8046875,
"learning_rate": 4.327667549158962e-05,
"loss": 0.9530601501464844,
"step": 544
},
{
"epoch": 0.2524901552003706,
"grad_norm": 0.9375,
"learning_rate": 4.325712032593032e-05,
"loss": 1.4264435768127441,
"step": 545
},
{
"epoch": 0.2529534398888117,
"grad_norm": 0.875,
"learning_rate": 4.3237529200817824e-05,
"loss": 1.0840469598770142,
"step": 546
},
{
"epoch": 0.2534167245772527,
"grad_norm": 0.8359375,
"learning_rate": 4.321790215283526e-05,
"loss": 0.9668251872062683,
"step": 547
},
{
"epoch": 0.25388000926569376,
"grad_norm": 0.72265625,
"learning_rate": 4.3198239218632874e-05,
"loss": 1.0909249782562256,
"step": 548
},
{
"epoch": 0.2543432939541348,
"grad_norm": 0.73828125,
"learning_rate": 4.317854043492791e-05,
"loss": 1.1680148839950562,
"step": 549
},
{
"epoch": 0.2548065786425759,
"grad_norm": 0.82421875,
"learning_rate": 4.315880583850454e-05,
"loss": 1.0644400119781494,
"step": 550
},
{
"epoch": 0.2552698633310169,
"grad_norm": 0.76171875,
"learning_rate": 4.313903546621384e-05,
"loss": 1.0424561500549316,
"step": 551
},
{
"epoch": 0.25573314801945796,
"grad_norm": 0.80859375,
"learning_rate": 4.3119229354973664e-05,
"loss": 0.989732563495636,
"step": 552
},
{
"epoch": 0.256196432707899,
"grad_norm": 0.82421875,
"learning_rate": 4.309938754176862e-05,
"loss": 1.0276066064834595,
"step": 553
},
{
"epoch": 0.25665971739634,
"grad_norm": 0.8125,
"learning_rate": 4.307951006364998e-05,
"loss": 1.0524067878723145,
"step": 554
},
{
"epoch": 0.2571230020847811,
"grad_norm": 0.984375,
"learning_rate": 4.3059596957735606e-05,
"loss": 1.1999335289001465,
"step": 555
},
{
"epoch": 0.25758628677322215,
"grad_norm": 0.74609375,
"learning_rate": 4.3039648261209896e-05,
"loss": 1.0140695571899414,
"step": 556
},
{
"epoch": 0.2580495714616632,
"grad_norm": 0.82421875,
"learning_rate": 4.3019664011323705e-05,
"loss": 1.0452879667282104,
"step": 557
},
{
"epoch": 0.2585128561501042,
"grad_norm": 0.79296875,
"learning_rate": 4.2999644245394275e-05,
"loss": 1.1013998985290527,
"step": 558
},
{
"epoch": 0.2589761408385453,
"grad_norm": 0.87890625,
"learning_rate": 4.297958900080519e-05,
"loss": 0.9173800945281982,
"step": 559
},
{
"epoch": 0.25943942552698634,
"grad_norm": 0.7734375,
"learning_rate": 4.295949831500624e-05,
"loss": 1.2523088455200195,
"step": 560
},
{
"epoch": 0.2599027102154274,
"grad_norm": 0.78125,
"learning_rate": 4.293937222551345e-05,
"loss": 0.9910227060317993,
"step": 561
},
{
"epoch": 0.2603659949038684,
"grad_norm": 0.83203125,
"learning_rate": 4.2919210769908905e-05,
"loss": 1.015892744064331,
"step": 562
},
{
"epoch": 0.26082927959230945,
"grad_norm": 0.8046875,
"learning_rate": 4.289901398584077e-05,
"loss": 1.1399426460266113,
"step": 563
},
{
"epoch": 0.26129256428075054,
"grad_norm": 0.796875,
"learning_rate": 4.287878191102316e-05,
"loss": 0.9163965582847595,
"step": 564
},
{
"epoch": 0.2617558489691916,
"grad_norm": 0.69921875,
"learning_rate": 4.285851458323608e-05,
"loss": 1.0406631231307983,
"step": 565
},
{
"epoch": 0.2622191336576326,
"grad_norm": 0.87109375,
"learning_rate": 4.283821204032539e-05,
"loss": 0.952318549156189,
"step": 566
},
{
"epoch": 0.26268241834607364,
"grad_norm": 0.921875,
"learning_rate": 4.281787432020269e-05,
"loss": 1.0432265996932983,
"step": 567
},
{
"epoch": 0.26314570303451473,
"grad_norm": 0.8359375,
"learning_rate": 4.279750146084527e-05,
"loss": 1.1223399639129639,
"step": 568
},
{
"epoch": 0.26360898772295577,
"grad_norm": 0.80859375,
"learning_rate": 4.2777093500296055e-05,
"loss": 1.0468631982803345,
"step": 569
},
{
"epoch": 0.2640722724113968,
"grad_norm": 0.76953125,
"learning_rate": 4.2756650476663475e-05,
"loss": 1.0525509119033813,
"step": 570
},
{
"epoch": 0.26453555709983784,
"grad_norm": 0.80078125,
"learning_rate": 4.273617242812147e-05,
"loss": 0.9959677457809448,
"step": 571
},
{
"epoch": 0.26499884178827887,
"grad_norm": 0.8359375,
"learning_rate": 4.271565939290939e-05,
"loss": 0.8867281675338745,
"step": 572
},
{
"epoch": 0.26546212647671996,
"grad_norm": 0.79296875,
"learning_rate": 4.269511140933187e-05,
"loss": 1.0435187816619873,
"step": 573
},
{
"epoch": 0.265925411165161,
"grad_norm": 0.74609375,
"learning_rate": 4.267452851575886e-05,
"loss": 0.9710588455200195,
"step": 574
},
{
"epoch": 0.26638869585360203,
"grad_norm": 0.828125,
"learning_rate": 4.2653910750625455e-05,
"loss": 1.3287699222564697,
"step": 575
},
{
"epoch": 0.26685198054204307,
"grad_norm": 0.80859375,
"learning_rate": 4.2633258152431896e-05,
"loss": 1.0961614847183228,
"step": 576
},
{
"epoch": 0.26731526523048416,
"grad_norm": 0.77734375,
"learning_rate": 4.261257075974345e-05,
"loss": 0.9179559946060181,
"step": 577
},
{
"epoch": 0.2677785499189252,
"grad_norm": 0.98828125,
"learning_rate": 4.2591848611190364e-05,
"loss": 1.1007611751556396,
"step": 578
},
{
"epoch": 0.2682418346073662,
"grad_norm": 0.75,
"learning_rate": 4.257109174546781e-05,
"loss": 1.0560678243637085,
"step": 579
},
{
"epoch": 0.26870511929580726,
"grad_norm": 0.74609375,
"learning_rate": 4.2550300201335725e-05,
"loss": 1.0741382837295532,
"step": 580
},
{
"epoch": 0.2691684039842483,
"grad_norm": 0.76171875,
"learning_rate": 4.252947401761887e-05,
"loss": 0.9691828489303589,
"step": 581
},
{
"epoch": 0.2696316886726894,
"grad_norm": 0.76953125,
"learning_rate": 4.250861323320666e-05,
"loss": 1.2666388750076294,
"step": 582
},
{
"epoch": 0.2700949733611304,
"grad_norm": 0.8203125,
"learning_rate": 4.248771788705311e-05,
"loss": 0.8697996735572815,
"step": 583
},
{
"epoch": 0.27055825804957145,
"grad_norm": 0.80078125,
"learning_rate": 4.24667880181768e-05,
"loss": 0.9978750348091125,
"step": 584
},
{
"epoch": 0.2710215427380125,
"grad_norm": 0.8203125,
"learning_rate": 4.244582366566075e-05,
"loss": 1.2406501770019531,
"step": 585
},
{
"epoch": 0.2714848274264536,
"grad_norm": 0.7890625,
"learning_rate": 4.24248248686524e-05,
"loss": 1.0365958213806152,
"step": 586
},
{
"epoch": 0.2719481121148946,
"grad_norm": 0.8359375,
"learning_rate": 4.240379166636347e-05,
"loss": 0.9354648590087891,
"step": 587
},
{
"epoch": 0.27241139680333565,
"grad_norm": 0.796875,
"learning_rate": 4.238272409806997e-05,
"loss": 1.082112431526184,
"step": 588
},
{
"epoch": 0.2728746814917767,
"grad_norm": 0.828125,
"learning_rate": 4.2361622203112054e-05,
"loss": 1.1324368715286255,
"step": 589
},
{
"epoch": 0.2733379661802177,
"grad_norm": 0.76953125,
"learning_rate": 4.234048602089398e-05,
"loss": 0.9812889695167542,
"step": 590
},
{
"epoch": 0.2738012508686588,
"grad_norm": 0.7890625,
"learning_rate": 4.231931559088404e-05,
"loss": 0.9894756078720093,
"step": 591
},
{
"epoch": 0.27426453555709984,
"grad_norm": 0.8125,
"learning_rate": 4.2298110952614474e-05,
"loss": 1.006495475769043,
"step": 592
},
{
"epoch": 0.2747278202455409,
"grad_norm": 0.87109375,
"learning_rate": 4.22768721456814e-05,
"loss": 1.0490379333496094,
"step": 593
},
{
"epoch": 0.2751911049339819,
"grad_norm": 0.77734375,
"learning_rate": 4.225559920974473e-05,
"loss": 1.0940771102905273,
"step": 594
},
{
"epoch": 0.275654389622423,
"grad_norm": 0.84765625,
"learning_rate": 4.223429218452812e-05,
"loss": 1.1644221544265747,
"step": 595
},
{
"epoch": 0.27611767431086404,
"grad_norm": 0.75390625,
"learning_rate": 4.2212951109818895e-05,
"loss": 0.9143954515457153,
"step": 596
},
{
"epoch": 0.27658095899930507,
"grad_norm": 0.7734375,
"learning_rate": 4.219157602546792e-05,
"loss": 0.9301037192344666,
"step": 597
},
{
"epoch": 0.2770442436877461,
"grad_norm": 0.69921875,
"learning_rate": 4.217016697138961e-05,
"loss": 0.889419436454773,
"step": 598
},
{
"epoch": 0.27750752837618714,
"grad_norm": 0.83203125,
"learning_rate": 4.2148723987561786e-05,
"loss": 1.0732734203338623,
"step": 599
},
{
"epoch": 0.27797081306462823,
"grad_norm": 0.76953125,
"learning_rate": 4.212724711402563e-05,
"loss": 1.0122696161270142,
"step": 600
},
{
"epoch": 0.27843409775306927,
"grad_norm": 0.83203125,
"learning_rate": 4.2105736390885625e-05,
"loss": 0.8314121961593628,
"step": 601
},
{
"epoch": 0.2788973824415103,
"grad_norm": 0.75,
"learning_rate": 4.208419185830945e-05,
"loss": 1.0255941152572632,
"step": 602
},
{
"epoch": 0.27936066712995133,
"grad_norm": 0.7265625,
"learning_rate": 4.206261355652791e-05,
"loss": 1.0456650257110596,
"step": 603
},
{
"epoch": 0.2798239518183924,
"grad_norm": 0.74609375,
"learning_rate": 4.204100152583488e-05,
"loss": 0.9093706607818604,
"step": 604
},
{
"epoch": 0.28028723650683346,
"grad_norm": 2.3125,
"learning_rate": 4.201935580658723e-05,
"loss": 1.0478147268295288,
"step": 605
},
{
"epoch": 0.2807505211952745,
"grad_norm": 0.8359375,
"learning_rate": 4.199767643920469e-05,
"loss": 1.1493206024169922,
"step": 606
},
{
"epoch": 0.28121380588371553,
"grad_norm": 0.75,
"learning_rate": 4.197596346416988e-05,
"loss": 1.017486810684204,
"step": 607
},
{
"epoch": 0.28167709057215656,
"grad_norm": 0.84765625,
"learning_rate": 4.195421692202815e-05,
"loss": 1.1652302742004395,
"step": 608
},
{
"epoch": 0.28214037526059765,
"grad_norm": 0.84375,
"learning_rate": 4.1932436853387514e-05,
"loss": 0.9417747259140015,
"step": 609
},
{
"epoch": 0.2826036599490387,
"grad_norm": 0.74609375,
"learning_rate": 4.191062329891863e-05,
"loss": 0.8607147932052612,
"step": 610
},
{
"epoch": 0.2830669446374797,
"grad_norm": 0.92578125,
"learning_rate": 4.1888776299354656e-05,
"loss": 1.025602102279663,
"step": 611
},
{
"epoch": 0.28353022932592076,
"grad_norm": 0.8046875,
"learning_rate": 4.186689589549121e-05,
"loss": 1.0558090209960938,
"step": 612
},
{
"epoch": 0.28399351401436185,
"grad_norm": 0.8828125,
"learning_rate": 4.1844982128186294e-05,
"loss": 0.9318227171897888,
"step": 613
},
{
"epoch": 0.2844567987028029,
"grad_norm": 0.8828125,
"learning_rate": 4.18230350383602e-05,
"loss": 0.9264402389526367,
"step": 614
},
{
"epoch": 0.2849200833912439,
"grad_norm": 0.80859375,
"learning_rate": 4.1801054666995453e-05,
"loss": 1.160361647605896,
"step": 615
},
{
"epoch": 0.28538336807968495,
"grad_norm": 0.87890625,
"learning_rate": 4.177904105513673e-05,
"loss": 0.902491569519043,
"step": 616
},
{
"epoch": 0.285846652768126,
"grad_norm": 0.81640625,
"learning_rate": 4.175699424389075e-05,
"loss": 1.1254316568374634,
"step": 617
},
{
"epoch": 0.2863099374565671,
"grad_norm": 0.7421875,
"learning_rate": 4.173491427442627e-05,
"loss": 0.8220522999763489,
"step": 618
},
{
"epoch": 0.2867732221450081,
"grad_norm": 0.859375,
"learning_rate": 4.1712801187973925e-05,
"loss": 1.1775267124176025,
"step": 619
},
{
"epoch": 0.28723650683344915,
"grad_norm": 0.8203125,
"learning_rate": 4.1690655025826225e-05,
"loss": 1.0950840711593628,
"step": 620
},
{
"epoch": 0.2876997915218902,
"grad_norm": 0.7734375,
"learning_rate": 4.166847582933741e-05,
"loss": 1.2453440427780151,
"step": 621
},
{
"epoch": 0.28816307621033127,
"grad_norm": 0.79296875,
"learning_rate": 4.164626363992343e-05,
"loss": 0.9733505845069885,
"step": 622
},
{
"epoch": 0.2886263608987723,
"grad_norm": 0.8984375,
"learning_rate": 4.162401849906183e-05,
"loss": 1.0875972509384155,
"step": 623
},
{
"epoch": 0.28908964558721334,
"grad_norm": 0.8359375,
"learning_rate": 4.16017404482917e-05,
"loss": 1.1624879837036133,
"step": 624
},
{
"epoch": 0.2895529302756544,
"grad_norm": 1.125,
"learning_rate": 4.1579429529213564e-05,
"loss": 1.1447054147720337,
"step": 625
},
{
"epoch": 0.2900162149640954,
"grad_norm": 0.70703125,
"learning_rate": 4.155708578348935e-05,
"loss": 0.92429119348526,
"step": 626
},
{
"epoch": 0.2904794996525365,
"grad_norm": 0.8125,
"learning_rate": 4.1534709252842254e-05,
"loss": 0.9543266296386719,
"step": 627
},
{
"epoch": 0.29094278434097753,
"grad_norm": 0.89453125,
"learning_rate": 4.151229997905672e-05,
"loss": 1.0998059511184692,
"step": 628
},
{
"epoch": 0.29140606902941857,
"grad_norm": 0.77734375,
"learning_rate": 4.148985800397831e-05,
"loss": 0.9753661155700684,
"step": 629
},
{
"epoch": 0.2918693537178596,
"grad_norm": 0.84375,
"learning_rate": 4.146738336951367e-05,
"loss": 0.9926996231079102,
"step": 630
},
{
"epoch": 0.2923326384063007,
"grad_norm": 0.671875,
"learning_rate": 4.144487611763041e-05,
"loss": 0.9744971394538879,
"step": 631
},
{
"epoch": 0.29279592309474173,
"grad_norm": 0.84375,
"learning_rate": 4.142233629035706e-05,
"loss": 1.1101515293121338,
"step": 632
},
{
"epoch": 0.29325920778318276,
"grad_norm": 0.7421875,
"learning_rate": 4.1399763929783e-05,
"loss": 1.1098037958145142,
"step": 633
},
{
"epoch": 0.2937224924716238,
"grad_norm": 0.83984375,
"learning_rate": 4.137715907805832e-05,
"loss": 1.0720516443252563,
"step": 634
},
{
"epoch": 0.2941857771600649,
"grad_norm": 0.7734375,
"learning_rate": 4.135452177739382e-05,
"loss": 1.0267348289489746,
"step": 635
},
{
"epoch": 0.2946490618485059,
"grad_norm": 0.76171875,
"learning_rate": 4.133185207006086e-05,
"loss": 0.9987479448318481,
"step": 636
},
{
"epoch": 0.29511234653694696,
"grad_norm": 0.74609375,
"learning_rate": 4.130914999839133e-05,
"loss": 0.9802069664001465,
"step": 637
},
{
"epoch": 0.295575631225388,
"grad_norm": 0.84765625,
"learning_rate": 4.128641560477756e-05,
"loss": 1.0791590213775635,
"step": 638
},
{
"epoch": 0.296038915913829,
"grad_norm": 0.7109375,
"learning_rate": 4.1263648931672234e-05,
"loss": 0.8927035927772522,
"step": 639
},
{
"epoch": 0.2965022006022701,
"grad_norm": 0.75,
"learning_rate": 4.124085002158829e-05,
"loss": 0.9619215726852417,
"step": 640
},
{
"epoch": 0.29696548529071115,
"grad_norm": 0.953125,
"learning_rate": 4.12180189170989e-05,
"loss": 1.0131444931030273,
"step": 641
},
{
"epoch": 0.2974287699791522,
"grad_norm": 0.8203125,
"learning_rate": 4.119515566083733e-05,
"loss": 1.0167940855026245,
"step": 642
},
{
"epoch": 0.2978920546675932,
"grad_norm": 0.94140625,
"learning_rate": 4.117226029549689e-05,
"loss": 1.1122088432312012,
"step": 643
},
{
"epoch": 0.2983553393560343,
"grad_norm": 0.8125,
"learning_rate": 4.114933286383084e-05,
"loss": 0.8055898547172546,
"step": 644
},
{
"epoch": 0.29881862404447534,
"grad_norm": 0.8515625,
"learning_rate": 4.112637340865234e-05,
"loss": 1.1266543865203857,
"step": 645
},
{
"epoch": 0.2992819087329164,
"grad_norm": 0.79296875,
"learning_rate": 4.110338197283431e-05,
"loss": 1.0558011531829834,
"step": 646
},
{
"epoch": 0.2997451934213574,
"grad_norm": 0.9140625,
"learning_rate": 4.108035859930944e-05,
"loss": 1.0644391775131226,
"step": 647
},
{
"epoch": 0.30020847810979845,
"grad_norm": 0.7578125,
"learning_rate": 4.105730333107003e-05,
"loss": 1.043839454650879,
"step": 648
},
{
"epoch": 0.30067176279823954,
"grad_norm": 0.9375,
"learning_rate": 4.1034216211167914e-05,
"loss": 1.147243857383728,
"step": 649
},
{
"epoch": 0.3011350474866806,
"grad_norm": 0.76953125,
"learning_rate": 4.1011097282714454e-05,
"loss": 1.051954984664917,
"step": 650
},
{
"epoch": 0.3015983321751216,
"grad_norm": 0.82421875,
"learning_rate": 4.0987946588880385e-05,
"loss": 1.002161979675293,
"step": 651
},
{
"epoch": 0.30206161686356264,
"grad_norm": 0.8359375,
"learning_rate": 4.096476417289574e-05,
"loss": 0.9687187671661377,
"step": 652
},
{
"epoch": 0.30252490155200373,
"grad_norm": 0.796875,
"learning_rate": 4.094155007804981e-05,
"loss": 1.1040300130844116,
"step": 653
},
{
"epoch": 0.30298818624044477,
"grad_norm": 0.83203125,
"learning_rate": 4.091830434769105e-05,
"loss": 1.0147384405136108,
"step": 654
},
{
"epoch": 0.3034514709288858,
"grad_norm": 0.70703125,
"learning_rate": 4.089502702522696e-05,
"loss": 0.8908687233924866,
"step": 655
},
{
"epoch": 0.30391475561732684,
"grad_norm": 0.9140625,
"learning_rate": 4.087171815412406e-05,
"loss": 1.1389329433441162,
"step": 656
},
{
"epoch": 0.30437804030576787,
"grad_norm": 0.78515625,
"learning_rate": 4.0848377777907765e-05,
"loss": 1.061093807220459,
"step": 657
},
{
"epoch": 0.30484132499420896,
"grad_norm": 0.81640625,
"learning_rate": 4.0825005940162326e-05,
"loss": 1.0024491548538208,
"step": 658
},
{
"epoch": 0.30530460968265,
"grad_norm": 0.81640625,
"learning_rate": 4.080160268453075e-05,
"loss": 1.2541595697402954,
"step": 659
},
{
"epoch": 0.30576789437109103,
"grad_norm": 0.75390625,
"learning_rate": 4.07781680547147e-05,
"loss": 0.9933417439460754,
"step": 660
},
{
"epoch": 0.30623117905953207,
"grad_norm": 0.828125,
"learning_rate": 4.075470209447442e-05,
"loss": 1.053157091140747,
"step": 661
},
{
"epoch": 0.30669446374797316,
"grad_norm": 0.91015625,
"learning_rate": 4.073120484762868e-05,
"loss": 1.1531765460968018,
"step": 662
},
{
"epoch": 0.3071577484364142,
"grad_norm": 0.81640625,
"learning_rate": 4.070767635805466e-05,
"loss": 1.125023603439331,
"step": 663
},
{
"epoch": 0.3076210331248552,
"grad_norm": 0.8046875,
"learning_rate": 4.068411666968788e-05,
"loss": 0.8804372549057007,
"step": 664
},
{
"epoch": 0.30808431781329626,
"grad_norm": 0.91015625,
"learning_rate": 4.066052582652213e-05,
"loss": 1.0438697338104248,
"step": 665
},
{
"epoch": 0.3085476025017373,
"grad_norm": 0.796875,
"learning_rate": 4.0636903872609336e-05,
"loss": 0.9848630428314209,
"step": 666
},
{
"epoch": 0.3090108871901784,
"grad_norm": 0.7578125,
"learning_rate": 4.061325085205958e-05,
"loss": 1.0336278676986694,
"step": 667
},
{
"epoch": 0.3094741718786194,
"grad_norm": 0.80078125,
"learning_rate": 4.058956680904091e-05,
"loss": 1.0741722583770752,
"step": 668
},
{
"epoch": 0.30993745656706045,
"grad_norm": 1.03125,
"learning_rate": 4.0565851787779316e-05,
"loss": 1.1203691959381104,
"step": 669
},
{
"epoch": 0.3104007412555015,
"grad_norm": 1.046875,
"learning_rate": 4.054210583255864e-05,
"loss": 1.099678874015808,
"step": 670
},
{
"epoch": 0.3108640259439426,
"grad_norm": 0.73046875,
"learning_rate": 4.051832898772048e-05,
"loss": 1.0561059713363647,
"step": 671
},
{
"epoch": 0.3113273106323836,
"grad_norm": 0.79296875,
"learning_rate": 4.049452129766413e-05,
"loss": 1.186478853225708,
"step": 672
},
{
"epoch": 0.31179059532082465,
"grad_norm": 0.73828125,
"learning_rate": 4.047068280684646e-05,
"loss": 0.9179085493087769,
"step": 673
},
{
"epoch": 0.3122538800092657,
"grad_norm": 0.83203125,
"learning_rate": 4.044681355978187e-05,
"loss": 0.9686939716339111,
"step": 674
},
{
"epoch": 0.3127171646977067,
"grad_norm": 0.875,
"learning_rate": 4.042291360104219e-05,
"loss": 1.121710181236267,
"step": 675
},
{
"epoch": 0.3131804493861478,
"grad_norm": 0.85546875,
"learning_rate": 4.03989829752566e-05,
"loss": 1.0991014242172241,
"step": 676
},
{
"epoch": 0.31364373407458884,
"grad_norm": 0.75,
"learning_rate": 4.0375021727111543e-05,
"loss": 1.0645341873168945,
"step": 677
},
{
"epoch": 0.3141070187630299,
"grad_norm": 0.78125,
"learning_rate": 4.0351029901350636e-05,
"loss": 1.1132837533950806,
"step": 678
},
{
"epoch": 0.3145703034514709,
"grad_norm": 0.7265625,
"learning_rate": 4.032700754277461e-05,
"loss": 1.0442454814910889,
"step": 679
},
{
"epoch": 0.315033588139912,
"grad_norm": 0.7578125,
"learning_rate": 4.0302954696241206e-05,
"loss": 0.9802740812301636,
"step": 680
},
{
"epoch": 0.31549687282835304,
"grad_norm": 0.984375,
"learning_rate": 4.02788714066651e-05,
"loss": 0.9827386140823364,
"step": 681
},
{
"epoch": 0.31596015751679407,
"grad_norm": 0.8515625,
"learning_rate": 4.025475771901779e-05,
"loss": 1.1004528999328613,
"step": 682
},
{
"epoch": 0.3164234422052351,
"grad_norm": 0.8203125,
"learning_rate": 4.023061367832757e-05,
"loss": 1.0051753520965576,
"step": 683
},
{
"epoch": 0.31688672689367614,
"grad_norm": 0.83984375,
"learning_rate": 4.02064393296794e-05,
"loss": 0.9524490833282471,
"step": 684
},
{
"epoch": 0.31735001158211723,
"grad_norm": 0.828125,
"learning_rate": 4.018223471821483e-05,
"loss": 1.079671859741211,
"step": 685
},
{
"epoch": 0.31781329627055827,
"grad_norm": 0.7578125,
"learning_rate": 4.0157999889131936e-05,
"loss": 0.912105917930603,
"step": 686
},
{
"epoch": 0.3182765809589993,
"grad_norm": 0.82421875,
"learning_rate": 4.01337348876852e-05,
"loss": 1.2040618658065796,
"step": 687
},
{
"epoch": 0.31873986564744033,
"grad_norm": 0.7578125,
"learning_rate": 4.0109439759185465e-05,
"loss": 0.8994999527931213,
"step": 688
},
{
"epoch": 0.3192031503358814,
"grad_norm": 0.75390625,
"learning_rate": 4.0085114548999816e-05,
"loss": 1.244059681892395,
"step": 689
},
{
"epoch": 0.31966643502432246,
"grad_norm": 0.71875,
"learning_rate": 4.006075930255152e-05,
"loss": 1.126865029335022,
"step": 690
},
{
"epoch": 0.3201297197127635,
"grad_norm": 0.8203125,
"learning_rate": 4.003637406531992e-05,
"loss": 1.0775233507156372,
"step": 691
},
{
"epoch": 0.32059300440120453,
"grad_norm": 0.94140625,
"learning_rate": 4.001195888284037e-05,
"loss": 1.0177921056747437,
"step": 692
},
{
"epoch": 0.32105628908964556,
"grad_norm": 0.87890625,
"learning_rate": 3.998751380070416e-05,
"loss": 1.057099461555481,
"step": 693
},
{
"epoch": 0.32151957377808665,
"grad_norm": 0.85546875,
"learning_rate": 3.9963038864558385e-05,
"loss": 0.9934321045875549,
"step": 694
},
{
"epoch": 0.3219828584665277,
"grad_norm": 0.7734375,
"learning_rate": 3.993853412010589e-05,
"loss": 0.9183391332626343,
"step": 695
},
{
"epoch": 0.3224461431549687,
"grad_norm": 0.75390625,
"learning_rate": 3.9913999613105204e-05,
"loss": 0.9654147624969482,
"step": 696
},
{
"epoch": 0.32290942784340976,
"grad_norm": 0.76171875,
"learning_rate": 3.98894353893704e-05,
"loss": 1.0339151620864868,
"step": 697
},
{
"epoch": 0.32337271253185085,
"grad_norm": 0.89453125,
"learning_rate": 3.986484149477107e-05,
"loss": 0.9817367792129517,
"step": 698
},
{
"epoch": 0.3238359972202919,
"grad_norm": 0.8515625,
"learning_rate": 3.98402179752322e-05,
"loss": 0.9361385703086853,
"step": 699
},
{
"epoch": 0.3242992819087329,
"grad_norm": 0.92578125,
"learning_rate": 3.981556487673409e-05,
"loss": 0.9315399527549744,
"step": 700
},
{
"epoch": 0.32476256659717395,
"grad_norm": 0.7734375,
"learning_rate": 3.979088224531229e-05,
"loss": 1.005590796470642,
"step": 701
},
{
"epoch": 0.325225851285615,
"grad_norm": 0.84765625,
"learning_rate": 3.976617012705749e-05,
"loss": 0.8338845372200012,
"step": 702
},
{
"epoch": 0.3256891359740561,
"grad_norm": 0.75,
"learning_rate": 3.9741428568115435e-05,
"loss": 1.0329554080963135,
"step": 703
},
{
"epoch": 0.3261524206624971,
"grad_norm": 0.9140625,
"learning_rate": 3.9716657614686844e-05,
"loss": 0.8598560094833374,
"step": 704
},
{
"epoch": 0.32661570535093815,
"grad_norm": 0.796875,
"learning_rate": 3.9691857313027335e-05,
"loss": 0.9257340431213379,
"step": 705
},
{
"epoch": 0.3270789900393792,
"grad_norm": 0.8359375,
"learning_rate": 3.966702770944734e-05,
"loss": 0.8521995544433594,
"step": 706
},
{
"epoch": 0.32754227472782027,
"grad_norm": 0.94140625,
"learning_rate": 3.964216885031197e-05,
"loss": 1.1843841075897217,
"step": 707
},
{
"epoch": 0.3280055594162613,
"grad_norm": 0.90234375,
"learning_rate": 3.961728078204101e-05,
"loss": 1.1602882146835327,
"step": 708
},
{
"epoch": 0.32846884410470234,
"grad_norm": 0.765625,
"learning_rate": 3.9592363551108756e-05,
"loss": 1.020529866218567,
"step": 709
},
{
"epoch": 0.3289321287931434,
"grad_norm": 0.72265625,
"learning_rate": 3.956741720404397e-05,
"loss": 0.9926280975341797,
"step": 710
},
{
"epoch": 0.3293954134815844,
"grad_norm": 0.75390625,
"learning_rate": 3.9542441787429795e-05,
"loss": 0.7993087768554688,
"step": 711
},
{
"epoch": 0.3298586981700255,
"grad_norm": 0.77734375,
"learning_rate": 3.9517437347903635e-05,
"loss": 0.9188562631607056,
"step": 712
},
{
"epoch": 0.33032198285846653,
"grad_norm": 0.75390625,
"learning_rate": 3.949240393215711e-05,
"loss": 0.9771900177001953,
"step": 713
},
{
"epoch": 0.33078526754690757,
"grad_norm": 0.8203125,
"learning_rate": 3.9467341586935936e-05,
"loss": 0.989328145980835,
"step": 714
},
{
"epoch": 0.3312485522353486,
"grad_norm": 0.82421875,
"learning_rate": 3.9442250359039855e-05,
"loss": 1.002003788948059,
"step": 715
},
{
"epoch": 0.3317118369237897,
"grad_norm": 0.8203125,
"learning_rate": 3.941713029532253e-05,
"loss": 1.0104445219039917,
"step": 716
},
{
"epoch": 0.33217512161223073,
"grad_norm": 0.71484375,
"learning_rate": 3.93919814426915e-05,
"loss": 1.0114507675170898,
"step": 717
},
{
"epoch": 0.33263840630067176,
"grad_norm": 0.75390625,
"learning_rate": 3.936680384810803e-05,
"loss": 0.9771013855934143,
"step": 718
},
{
"epoch": 0.3331016909891128,
"grad_norm": 0.98046875,
"learning_rate": 3.934159755858707e-05,
"loss": 1.0455206632614136,
"step": 719
},
{
"epoch": 0.33356497567755383,
"grad_norm": 0.953125,
"learning_rate": 3.931636262119716e-05,
"loss": 0.875360369682312,
"step": 720
},
{
"epoch": 0.3340282603659949,
"grad_norm": 0.671875,
"learning_rate": 3.929109908306032e-05,
"loss": 0.9550399780273438,
"step": 721
},
{
"epoch": 0.33449154505443596,
"grad_norm": 0.86328125,
"learning_rate": 3.9265806991351995e-05,
"loss": 1.1120067834854126,
"step": 722
},
{
"epoch": 0.334954829742877,
"grad_norm": 0.765625,
"learning_rate": 3.9240486393300924e-05,
"loss": 0.9513478875160217,
"step": 723
},
{
"epoch": 0.335418114431318,
"grad_norm": 0.86328125,
"learning_rate": 3.9215137336189096e-05,
"loss": 0.9390691518783569,
"step": 724
},
{
"epoch": 0.3358813991197591,
"grad_norm": 0.8125,
"learning_rate": 3.918975986735164e-05,
"loss": 1.0198416709899902,
"step": 725
},
{
"epoch": 0.33634468380820015,
"grad_norm": 0.7734375,
"learning_rate": 3.916435403417674e-05,
"loss": 0.9613708257675171,
"step": 726
},
{
"epoch": 0.3368079684966412,
"grad_norm": 0.8359375,
"learning_rate": 3.913891988410554e-05,
"loss": 0.9495355486869812,
"step": 727
},
{
"epoch": 0.3372712531850822,
"grad_norm": 0.76953125,
"learning_rate": 3.9113457464632056e-05,
"loss": 0.9577147960662842,
"step": 728
},
{
"epoch": 0.33773453787352326,
"grad_norm": 0.9765625,
"learning_rate": 3.9087966823303105e-05,
"loss": 0.9388977885246277,
"step": 729
},
{
"epoch": 0.33819782256196435,
"grad_norm": 0.75390625,
"learning_rate": 3.906244800771821e-05,
"loss": 0.9760944247245789,
"step": 730
},
{
"epoch": 0.3386611072504054,
"grad_norm": 0.9609375,
"learning_rate": 3.903690106552948e-05,
"loss": 1.0838488340377808,
"step": 731
},
{
"epoch": 0.3391243919388464,
"grad_norm": 0.88671875,
"learning_rate": 3.9011326044441564e-05,
"loss": 0.937881350517273,
"step": 732
},
{
"epoch": 0.33958767662728745,
"grad_norm": 0.8125,
"learning_rate": 3.898572299221153e-05,
"loss": 1.1330440044403076,
"step": 733
},
{
"epoch": 0.34005096131572854,
"grad_norm": 1.109375,
"learning_rate": 3.896009195664882e-05,
"loss": 1.1508278846740723,
"step": 734
},
{
"epoch": 0.3405142460041696,
"grad_norm": 0.79296875,
"learning_rate": 3.893443298561508e-05,
"loss": 0.9706493020057678,
"step": 735
},
{
"epoch": 0.3409775306926106,
"grad_norm": 0.8515625,
"learning_rate": 3.890874612702417e-05,
"loss": 0.9598948955535889,
"step": 736
},
{
"epoch": 0.34144081538105164,
"grad_norm": 0.87109375,
"learning_rate": 3.8883031428842e-05,
"loss": 1.0114010572433472,
"step": 737
},
{
"epoch": 0.3419041000694927,
"grad_norm": 0.73046875,
"learning_rate": 3.8857288939086474e-05,
"loss": 1.0451589822769165,
"step": 738
},
{
"epoch": 0.34236738475793377,
"grad_norm": 0.7421875,
"learning_rate": 3.8831518705827376e-05,
"loss": 1.100400686264038,
"step": 739
},
{
"epoch": 0.3428306694463748,
"grad_norm": 0.73046875,
"learning_rate": 3.8805720777186314e-05,
"loss": 0.907010555267334,
"step": 740
},
{
"epoch": 0.34329395413481584,
"grad_norm": 0.72265625,
"learning_rate": 3.877989520133662e-05,
"loss": 1.0915554761886597,
"step": 741
},
{
"epoch": 0.34375723882325687,
"grad_norm": 0.7890625,
"learning_rate": 3.8754042026503224e-05,
"loss": 1.011785626411438,
"step": 742
},
{
"epoch": 0.34422052351169796,
"grad_norm": 0.8671875,
"learning_rate": 3.872816130096261e-05,
"loss": 1.0811213254928589,
"step": 743
},
{
"epoch": 0.344683808200139,
"grad_norm": 0.85546875,
"learning_rate": 3.8702253073042716e-05,
"loss": 0.834938645362854,
"step": 744
},
{
"epoch": 0.34514709288858003,
"grad_norm": 0.8203125,
"learning_rate": 3.8676317391122824e-05,
"loss": 0.9235035181045532,
"step": 745
},
{
"epoch": 0.34561037757702107,
"grad_norm": 0.765625,
"learning_rate": 3.865035430363348e-05,
"loss": 1.0086536407470703,
"step": 746
},
{
"epoch": 0.3460736622654621,
"grad_norm": 0.86328125,
"learning_rate": 3.862436385905641e-05,
"loss": 0.987399697303772,
"step": 747
},
{
"epoch": 0.3465369469539032,
"grad_norm": 0.73046875,
"learning_rate": 3.859834610592443e-05,
"loss": 1.1993310451507568,
"step": 748
},
{
"epoch": 0.3470002316423442,
"grad_norm": 0.828125,
"learning_rate": 3.857230109282134e-05,
"loss": 1.0457353591918945,
"step": 749
},
{
"epoch": 0.34746351633078526,
"grad_norm": 0.91015625,
"learning_rate": 3.854622886838185e-05,
"loss": 1.139293909072876,
"step": 750
},
{
"epoch": 0.3479268010192263,
"grad_norm": 0.78125,
"learning_rate": 3.852012948129148e-05,
"loss": 1.0585147142410278,
"step": 751
},
{
"epoch": 0.3483900857076674,
"grad_norm": 0.92578125,
"learning_rate": 3.849400298028647e-05,
"loss": 0.9727704524993896,
"step": 752
},
{
"epoch": 0.3488533703961084,
"grad_norm": 0.71875,
"learning_rate": 3.846784941415371e-05,
"loss": 0.9992061257362366,
"step": 753
},
{
"epoch": 0.34931665508454945,
"grad_norm": 0.75390625,
"learning_rate": 3.8441668831730586e-05,
"loss": 1.1475231647491455,
"step": 754
},
{
"epoch": 0.3497799397729905,
"grad_norm": 0.875,
"learning_rate": 3.8415461281904984e-05,
"loss": 1.036689281463623,
"step": 755
},
{
"epoch": 0.3502432244614315,
"grad_norm": 0.81640625,
"learning_rate": 3.83892268136151e-05,
"loss": 0.87852543592453,
"step": 756
},
{
"epoch": 0.3507065091498726,
"grad_norm": 0.91015625,
"learning_rate": 3.8362965475849445e-05,
"loss": 0.9990617036819458,
"step": 757
},
{
"epoch": 0.35116979383831365,
"grad_norm": 0.96484375,
"learning_rate": 3.833667731764665e-05,
"loss": 1.1460075378417969,
"step": 758
},
{
"epoch": 0.3516330785267547,
"grad_norm": 0.75,
"learning_rate": 3.831036238809548e-05,
"loss": 0.9850847721099854,
"step": 759
},
{
"epoch": 0.3520963632151957,
"grad_norm": 0.7734375,
"learning_rate": 3.828402073633464e-05,
"loss": 0.9239014387130737,
"step": 760
},
{
"epoch": 0.3525596479036368,
"grad_norm": 0.80078125,
"learning_rate": 3.825765241155279e-05,
"loss": 0.9535107016563416,
"step": 761
},
{
"epoch": 0.35302293259207784,
"grad_norm": 0.74609375,
"learning_rate": 3.8231257462988355e-05,
"loss": 0.9702818989753723,
"step": 762
},
{
"epoch": 0.3534862172805189,
"grad_norm": 0.83984375,
"learning_rate": 3.820483593992948e-05,
"loss": 0.9807397127151489,
"step": 763
},
{
"epoch": 0.3539495019689599,
"grad_norm": 0.859375,
"learning_rate": 3.817838789171397e-05,
"loss": 0.9782893061637878,
"step": 764
},
{
"epoch": 0.35441278665740095,
"grad_norm": 0.87109375,
"learning_rate": 3.815191336772911e-05,
"loss": 1.050409197807312,
"step": 765
},
{
"epoch": 0.35487607134584204,
"grad_norm": 0.79296875,
"learning_rate": 3.812541241741164e-05,
"loss": 0.9888057112693787,
"step": 766
},
{
"epoch": 0.35533935603428307,
"grad_norm": 0.89453125,
"learning_rate": 3.809888509024769e-05,
"loss": 0.9265248775482178,
"step": 767
},
{
"epoch": 0.3558026407227241,
"grad_norm": 0.80078125,
"learning_rate": 3.807233143577258e-05,
"loss": 0.9314517378807068,
"step": 768
},
{
"epoch": 0.35626592541116514,
"grad_norm": 0.80078125,
"learning_rate": 3.804575150357084e-05,
"loss": 1.0255682468414307,
"step": 769
},
{
"epoch": 0.35672921009960623,
"grad_norm": 0.79296875,
"learning_rate": 3.8019145343276026e-05,
"loss": 1.2007423639297485,
"step": 770
},
{
"epoch": 0.35719249478804727,
"grad_norm": 0.7734375,
"learning_rate": 3.799251300457071e-05,
"loss": 1.0465315580368042,
"step": 771
},
{
"epoch": 0.3576557794764883,
"grad_norm": 0.84375,
"learning_rate": 3.796585453718635e-05,
"loss": 0.9410252571105957,
"step": 772
},
{
"epoch": 0.35811906416492933,
"grad_norm": 0.76171875,
"learning_rate": 3.7939169990903146e-05,
"loss": 0.8860654830932617,
"step": 773
},
{
"epoch": 0.35858234885337037,
"grad_norm": 0.8203125,
"learning_rate": 3.791245941555004e-05,
"loss": 0.9542768001556396,
"step": 774
},
{
"epoch": 0.35904563354181146,
"grad_norm": 0.98046875,
"learning_rate": 3.788572286100457e-05,
"loss": 1.121732473373413,
"step": 775
},
{
"epoch": 0.3595089182302525,
"grad_norm": 0.796875,
"learning_rate": 3.785896037719278e-05,
"loss": 1.0113410949707031,
"step": 776
},
{
"epoch": 0.35997220291869353,
"grad_norm": 0.84765625,
"learning_rate": 3.7832172014089136e-05,
"loss": 0.9060476422309875,
"step": 777
},
{
"epoch": 0.36043548760713456,
"grad_norm": 0.76953125,
"learning_rate": 3.780535782171643e-05,
"loss": 0.9059662818908691,
"step": 778
},
{
"epoch": 0.36089877229557565,
"grad_norm": 0.765625,
"learning_rate": 3.777851785014569e-05,
"loss": 1.008833646774292,
"step": 779
},
{
"epoch": 0.3613620569840167,
"grad_norm": 0.75390625,
"learning_rate": 3.775165214949605e-05,
"loss": 0.9600525498390198,
"step": 780
},
{
"epoch": 0.3618253416724577,
"grad_norm": 0.890625,
"learning_rate": 3.772476076993474e-05,
"loss": 1.1373387575149536,
"step": 781
},
{
"epoch": 0.36228862636089876,
"grad_norm": 0.8203125,
"learning_rate": 3.769784376167691e-05,
"loss": 0.9134207367897034,
"step": 782
},
{
"epoch": 0.3627519110493398,
"grad_norm": 0.8046875,
"learning_rate": 3.767090117498558e-05,
"loss": 0.9469197988510132,
"step": 783
},
{
"epoch": 0.3632151957377809,
"grad_norm": 0.79296875,
"learning_rate": 3.764393306017151e-05,
"loss": 1.1006484031677246,
"step": 784
},
{
"epoch": 0.3636784804262219,
"grad_norm": 0.79296875,
"learning_rate": 3.761693946759315e-05,
"loss": 0.9012340307235718,
"step": 785
},
{
"epoch": 0.36414176511466295,
"grad_norm": 0.8359375,
"learning_rate": 3.758992044765654e-05,
"loss": 1.0260119438171387,
"step": 786
},
{
"epoch": 0.364605049803104,
"grad_norm": 0.82421875,
"learning_rate": 3.756287605081517e-05,
"loss": 1.1195753812789917,
"step": 787
},
{
"epoch": 0.3650683344915451,
"grad_norm": 0.75,
"learning_rate": 3.753580632756993e-05,
"loss": 0.9033543467521667,
"step": 788
},
{
"epoch": 0.3655316191799861,
"grad_norm": 0.9375,
"learning_rate": 3.7508711328469e-05,
"loss": 0.8747038841247559,
"step": 789
},
{
"epoch": 0.36599490386842715,
"grad_norm": 0.83203125,
"learning_rate": 3.7481591104107775e-05,
"loss": 1.0821847915649414,
"step": 790
},
{
"epoch": 0.3664581885568682,
"grad_norm": 0.83203125,
"learning_rate": 3.745444570512872e-05,
"loss": 1.023503303527832,
"step": 791
},
{
"epoch": 0.3669214732453092,
"grad_norm": 0.90625,
"learning_rate": 3.7427275182221356e-05,
"loss": 1.1093895435333252,
"step": 792
},
{
"epoch": 0.3673847579337503,
"grad_norm": 0.78125,
"learning_rate": 3.740007958612207e-05,
"loss": 1.0260508060455322,
"step": 793
},
{
"epoch": 0.36784804262219134,
"grad_norm": 0.8203125,
"learning_rate": 3.73728589676141e-05,
"loss": 0.961272120475769,
"step": 794
},
{
"epoch": 0.3683113273106324,
"grad_norm": 0.77734375,
"learning_rate": 3.734561337752741e-05,
"loss": 1.2031164169311523,
"step": 795
},
{
"epoch": 0.3687746119990734,
"grad_norm": 0.8515625,
"learning_rate": 3.7318342866738565e-05,
"loss": 0.998257577419281,
"step": 796
},
{
"epoch": 0.3692378966875145,
"grad_norm": 0.9296875,
"learning_rate": 3.72910474861707e-05,
"loss": 1.0234450101852417,
"step": 797
},
{
"epoch": 0.36970118137595553,
"grad_norm": 0.80859375,
"learning_rate": 3.726372728679338e-05,
"loss": 1.0095072984695435,
"step": 798
},
{
"epoch": 0.37016446606439657,
"grad_norm": 0.85546875,
"learning_rate": 3.7236382319622494e-05,
"loss": 1.0547491312026978,
"step": 799
},
{
"epoch": 0.3706277507528376,
"grad_norm": 0.84375,
"learning_rate": 3.720901263572021e-05,
"loss": 1.1043885946273804,
"step": 800
},
{
"epoch": 0.37109103544127864,
"grad_norm": 0.91015625,
"learning_rate": 3.7181618286194834e-05,
"loss": 1.0135180950164795,
"step": 801
},
{
"epoch": 0.37155432012971973,
"grad_norm": 0.71875,
"learning_rate": 3.715419932220074e-05,
"loss": 0.9376970529556274,
"step": 802
},
{
"epoch": 0.37201760481816076,
"grad_norm": 0.953125,
"learning_rate": 3.7126755794938255e-05,
"loss": 1.0911214351654053,
"step": 803
},
{
"epoch": 0.3724808895066018,
"grad_norm": 0.80859375,
"learning_rate": 3.7099287755653566e-05,
"loss": 1.1035547256469727,
"step": 804
},
{
"epoch": 0.37294417419504283,
"grad_norm": 0.87890625,
"learning_rate": 3.707179525563866e-05,
"loss": 1.1932406425476074,
"step": 805
},
{
"epoch": 0.3734074588834839,
"grad_norm": 0.78515625,
"learning_rate": 3.704427834623118e-05,
"loss": 1.0412805080413818,
"step": 806
},
{
"epoch": 0.37387074357192496,
"grad_norm": 0.90625,
"learning_rate": 3.7016737078814365e-05,
"loss": 1.1123768091201782,
"step": 807
},
{
"epoch": 0.374334028260366,
"grad_norm": 0.765625,
"learning_rate": 3.698917150481692e-05,
"loss": 0.9360041618347168,
"step": 808
},
{
"epoch": 0.374797312948807,
"grad_norm": 0.77734375,
"learning_rate": 3.696158167571294e-05,
"loss": 0.9965537190437317,
"step": 809
},
{
"epoch": 0.3752605976372481,
"grad_norm": 0.8125,
"learning_rate": 3.693396764302183e-05,
"loss": 0.9210027456283569,
"step": 810
},
{
"epoch": 0.37572388232568915,
"grad_norm": 0.8203125,
"learning_rate": 3.690632945830817e-05,
"loss": 0.9936932325363159,
"step": 811
},
{
"epoch": 0.3761871670141302,
"grad_norm": 0.95703125,
"learning_rate": 3.687866717318166e-05,
"loss": 1.0398387908935547,
"step": 812
},
{
"epoch": 0.3766504517025712,
"grad_norm": 0.8984375,
"learning_rate": 3.685098083929699e-05,
"loss": 0.9467533826828003,
"step": 813
},
{
"epoch": 0.37711373639101226,
"grad_norm": 0.8359375,
"learning_rate": 3.682327050835376e-05,
"loss": 1.156292200088501,
"step": 814
},
{
"epoch": 0.37757702107945335,
"grad_norm": 0.83203125,
"learning_rate": 3.6795536232096374e-05,
"loss": 0.986288845539093,
"step": 815
},
{
"epoch": 0.3780403057678944,
"grad_norm": 0.8671875,
"learning_rate": 3.676777806231396e-05,
"loss": 1.123473882675171,
"step": 816
},
{
"epoch": 0.3785035904563354,
"grad_norm": 0.88671875,
"learning_rate": 3.673999605084028e-05,
"loss": 1.0756930112838745,
"step": 817
},
{
"epoch": 0.37896687514477645,
"grad_norm": 0.88671875,
"learning_rate": 3.671219024955357e-05,
"loss": 1.0483829975128174,
"step": 818
},
{
"epoch": 0.37943015983321754,
"grad_norm": 0.90625,
"learning_rate": 3.668436071037653e-05,
"loss": 0.9522889852523804,
"step": 819
},
{
"epoch": 0.3798934445216586,
"grad_norm": 0.7890625,
"learning_rate": 3.665650748527616e-05,
"loss": 1.056382179260254,
"step": 820
},
{
"epoch": 0.3803567292100996,
"grad_norm": 0.78125,
"learning_rate": 3.662863062626371e-05,
"loss": 1.141240119934082,
"step": 821
},
{
"epoch": 0.38082001389854064,
"grad_norm": 0.8828125,
"learning_rate": 3.660073018539456e-05,
"loss": 0.861331582069397,
"step": 822
},
{
"epoch": 0.3812832985869817,
"grad_norm": 0.85546875,
"learning_rate": 3.657280621476811e-05,
"loss": 1.1151138544082642,
"step": 823
},
{
"epoch": 0.38174658327542277,
"grad_norm": 0.69140625,
"learning_rate": 3.654485876652772e-05,
"loss": 0.9755687117576599,
"step": 824
},
{
"epoch": 0.3822098679638638,
"grad_norm": 0.734375,
"learning_rate": 3.651688789286056e-05,
"loss": 0.9543071985244751,
"step": 825
},
{
"epoch": 0.38267315265230484,
"grad_norm": 0.73046875,
"learning_rate": 3.6488893645997575e-05,
"loss": 0.9777738451957703,
"step": 826
},
{
"epoch": 0.3831364373407459,
"grad_norm": 0.75390625,
"learning_rate": 3.646087607821333e-05,
"loss": 1.010209321975708,
"step": 827
},
{
"epoch": 0.38359972202918696,
"grad_norm": 0.81640625,
"learning_rate": 3.6432835241825965e-05,
"loss": 1.0441359281539917,
"step": 828
},
{
"epoch": 0.384063006717628,
"grad_norm": 0.83203125,
"learning_rate": 3.640477118919705e-05,
"loss": 0.8406580090522766,
"step": 829
},
{
"epoch": 0.38452629140606903,
"grad_norm": 0.90234375,
"learning_rate": 3.637668397273149e-05,
"loss": 1.0144675970077515,
"step": 830
},
{
"epoch": 0.38498957609451007,
"grad_norm": 0.8828125,
"learning_rate": 3.6348573644877495e-05,
"loss": 1.2290412187576294,
"step": 831
},
{
"epoch": 0.3854528607829511,
"grad_norm": 0.76171875,
"learning_rate": 3.63204402581264e-05,
"loss": 0.8894533514976501,
"step": 832
},
{
"epoch": 0.3859161454713922,
"grad_norm": 0.8046875,
"learning_rate": 3.629228386501259e-05,
"loss": 1.1188613176345825,
"step": 833
},
{
"epoch": 0.3863794301598332,
"grad_norm": 0.8203125,
"learning_rate": 3.626410451811342e-05,
"loss": 0.9740458726882935,
"step": 834
},
{
"epoch": 0.38684271484827426,
"grad_norm": 0.8046875,
"learning_rate": 3.623590227004913e-05,
"loss": 0.7910479307174683,
"step": 835
},
{
"epoch": 0.3873059995367153,
"grad_norm": 0.828125,
"learning_rate": 3.620767717348268e-05,
"loss": 0.9454694986343384,
"step": 836
},
{
"epoch": 0.3877692842251564,
"grad_norm": 0.875,
"learning_rate": 3.617942928111973e-05,
"loss": 1.0109909772872925,
"step": 837
},
{
"epoch": 0.3882325689135974,
"grad_norm": 0.84765625,
"learning_rate": 3.615115864570851e-05,
"loss": 0.9801681041717529,
"step": 838
},
{
"epoch": 0.38869585360203845,
"grad_norm": 0.7890625,
"learning_rate": 3.612286532003969e-05,
"loss": 1.106335163116455,
"step": 839
},
{
"epoch": 0.3891591382904795,
"grad_norm": 0.83203125,
"learning_rate": 3.609454935694634e-05,
"loss": 0.9830104112625122,
"step": 840
},
{
"epoch": 0.3896224229789205,
"grad_norm": 0.90234375,
"learning_rate": 3.606621080930376e-05,
"loss": 1.0451645851135254,
"step": 841
},
{
"epoch": 0.3900857076673616,
"grad_norm": 0.80078125,
"learning_rate": 3.603784973002948e-05,
"loss": 0.980257511138916,
"step": 842
},
{
"epoch": 0.39054899235580265,
"grad_norm": 0.78515625,
"learning_rate": 3.600946617208306e-05,
"loss": 0.9005157351493835,
"step": 843
},
{
"epoch": 0.3910122770442437,
"grad_norm": 0.8203125,
"learning_rate": 3.5981060188466055e-05,
"loss": 0.9599143266677856,
"step": 844
},
{
"epoch": 0.3914755617326847,
"grad_norm": 0.7734375,
"learning_rate": 3.5952631832221895e-05,
"loss": 0.9783821702003479,
"step": 845
},
{
"epoch": 0.3919388464211258,
"grad_norm": 0.95703125,
"learning_rate": 3.592418115643576e-05,
"loss": 0.9757992625236511,
"step": 846
},
{
"epoch": 0.39240213110956684,
"grad_norm": 0.7734375,
"learning_rate": 3.589570821423457e-05,
"loss": 0.9026694297790527,
"step": 847
},
{
"epoch": 0.3928654157980079,
"grad_norm": 0.84765625,
"learning_rate": 3.586721305878676e-05,
"loss": 0.9864629507064819,
"step": 848
},
{
"epoch": 0.3933287004864489,
"grad_norm": 0.88671875,
"learning_rate": 3.583869574330227e-05,
"loss": 0.9566922783851624,
"step": 849
},
{
"epoch": 0.39379198517488995,
"grad_norm": 0.77734375,
"learning_rate": 3.5810156321032424e-05,
"loss": 1.0206118822097778,
"step": 850
},
{
"epoch": 0.39425526986333104,
"grad_norm": 0.76953125,
"learning_rate": 3.5781594845269824e-05,
"loss": 1.2455644607543945,
"step": 851
},
{
"epoch": 0.39471855455177207,
"grad_norm": 0.8203125,
"learning_rate": 3.575301136934825e-05,
"loss": 0.9965265393257141,
"step": 852
},
{
"epoch": 0.3951818392402131,
"grad_norm": 0.80078125,
"learning_rate": 3.5724405946642565e-05,
"loss": 1.058623194694519,
"step": 853
},
{
"epoch": 0.39564512392865414,
"grad_norm": 0.71484375,
"learning_rate": 3.569577863056861e-05,
"loss": 0.9021344184875488,
"step": 854
},
{
"epoch": 0.39610840861709523,
"grad_norm": 0.8984375,
"learning_rate": 3.5667129474583116e-05,
"loss": 1.1672606468200684,
"step": 855
},
{
"epoch": 0.39657169330553627,
"grad_norm": 0.7109375,
"learning_rate": 3.5638458532183604e-05,
"loss": 0.8217394351959229,
"step": 856
},
{
"epoch": 0.3970349779939773,
"grad_norm": 0.75390625,
"learning_rate": 3.5609765856908244e-05,
"loss": 1.0040171146392822,
"step": 857
},
{
"epoch": 0.39749826268241834,
"grad_norm": 0.81640625,
"learning_rate": 3.5581051502335834e-05,
"loss": 1.053956389427185,
"step": 858
},
{
"epoch": 0.39796154737085937,
"grad_norm": 0.8359375,
"learning_rate": 3.555231552208561e-05,
"loss": 1.0706506967544556,
"step": 859
},
{
"epoch": 0.39842483205930046,
"grad_norm": 0.84375,
"learning_rate": 3.5523557969817226e-05,
"loss": 0.8872452974319458,
"step": 860
},
{
"epoch": 0.3988881167477415,
"grad_norm": 0.8671875,
"learning_rate": 3.5494778899230605e-05,
"loss": 0.9684238433837891,
"step": 861
},
{
"epoch": 0.39935140143618253,
"grad_norm": 0.75,
"learning_rate": 3.5465978364065835e-05,
"loss": 1.1116052865982056,
"step": 862
},
{
"epoch": 0.39981468612462356,
"grad_norm": 0.99609375,
"learning_rate": 3.543715641810312e-05,
"loss": 0.9077733159065247,
"step": 863
},
{
"epoch": 0.40027797081306465,
"grad_norm": 1.0234375,
"learning_rate": 3.540831311516261e-05,
"loss": 1.0570735931396484,
"step": 864
},
{
"epoch": 0.4007412555015057,
"grad_norm": 0.76171875,
"learning_rate": 3.537944850910436e-05,
"loss": 1.1230758428573608,
"step": 865
},
{
"epoch": 0.4012045401899467,
"grad_norm": 0.7578125,
"learning_rate": 3.5350562653828204e-05,
"loss": 0.9723849296569824,
"step": 866
},
{
"epoch": 0.40166782487838776,
"grad_norm": 0.8515625,
"learning_rate": 3.532165560327364e-05,
"loss": 0.9751421213150024,
"step": 867
},
{
"epoch": 0.4021311095668288,
"grad_norm": 0.796875,
"learning_rate": 3.529272741141974e-05,
"loss": 0.9020988941192627,
"step": 868
},
{
"epoch": 0.4025943942552699,
"grad_norm": 0.8046875,
"learning_rate": 3.5263778132285085e-05,
"loss": 0.9929109811782837,
"step": 869
},
{
"epoch": 0.4030576789437109,
"grad_norm": 0.79296875,
"learning_rate": 3.5234807819927625e-05,
"loss": 1.088818073272705,
"step": 870
},
{
"epoch": 0.40352096363215195,
"grad_norm": 0.86328125,
"learning_rate": 3.520581652844454e-05,
"loss": 1.1746731996536255,
"step": 871
},
{
"epoch": 0.403984248320593,
"grad_norm": 0.796875,
"learning_rate": 3.517680431197226e-05,
"loss": 1.0509936809539795,
"step": 872
},
{
"epoch": 0.4044475330090341,
"grad_norm": 0.91015625,
"learning_rate": 3.514777122468621e-05,
"loss": 1.023998737335205,
"step": 873
},
{
"epoch": 0.4049108176974751,
"grad_norm": 0.87109375,
"learning_rate": 3.511871732080087e-05,
"loss": 0.9446095824241638,
"step": 874
},
{
"epoch": 0.40537410238591615,
"grad_norm": 0.68359375,
"learning_rate": 3.508964265456951e-05,
"loss": 0.9351980686187744,
"step": 875
},
{
"epoch": 0.4058373870743572,
"grad_norm": 0.6953125,
"learning_rate": 3.506054728028423e-05,
"loss": 0.9516130685806274,
"step": 876
},
{
"epoch": 0.4063006717627982,
"grad_norm": 0.8359375,
"learning_rate": 3.503143125227577e-05,
"loss": 1.006507158279419,
"step": 877
},
{
"epoch": 0.4067639564512393,
"grad_norm": 0.6953125,
"learning_rate": 3.500229462491346e-05,
"loss": 0.8910001516342163,
"step": 878
},
{
"epoch": 0.40722724113968034,
"grad_norm": 0.76953125,
"learning_rate": 3.497313745260507e-05,
"loss": 0.8634387850761414,
"step": 879
},
{
"epoch": 0.4076905258281214,
"grad_norm": 0.71484375,
"learning_rate": 3.494395978979673e-05,
"loss": 1.022470235824585,
"step": 880
},
{
"epoch": 0.4081538105165624,
"grad_norm": 0.74609375,
"learning_rate": 3.491476169097288e-05,
"loss": 1.0753809213638306,
"step": 881
},
{
"epoch": 0.4086170952050035,
"grad_norm": 0.7578125,
"learning_rate": 3.488554321065606e-05,
"loss": 0.8573417067527771,
"step": 882
},
{
"epoch": 0.40908037989344453,
"grad_norm": 0.9296875,
"learning_rate": 3.485630440340692e-05,
"loss": 0.9140716195106506,
"step": 883
},
{
"epoch": 0.40954366458188557,
"grad_norm": 0.7421875,
"learning_rate": 3.482704532382404e-05,
"loss": 0.8698415756225586,
"step": 884
},
{
"epoch": 0.4100069492703266,
"grad_norm": 0.95703125,
"learning_rate": 3.479776602654384e-05,
"loss": 0.9196599721908569,
"step": 885
},
{
"epoch": 0.41047023395876764,
"grad_norm": 0.8125,
"learning_rate": 3.476846656624054e-05,
"loss": 1.182805061340332,
"step": 886
},
{
"epoch": 0.41093351864720873,
"grad_norm": 0.92578125,
"learning_rate": 3.4739146997625966e-05,
"loss": 1.1990854740142822,
"step": 887
},
{
"epoch": 0.41139680333564976,
"grad_norm": 0.76171875,
"learning_rate": 3.4709807375449526e-05,
"loss": 0.9600467681884766,
"step": 888
},
{
"epoch": 0.4118600880240908,
"grad_norm": 0.72265625,
"learning_rate": 3.468044775449804e-05,
"loss": 0.9062017202377319,
"step": 889
},
{
"epoch": 0.41232337271253183,
"grad_norm": 0.83203125,
"learning_rate": 3.4651068189595725e-05,
"loss": 1.0649828910827637,
"step": 890
},
{
"epoch": 0.4127866574009729,
"grad_norm": 0.79296875,
"learning_rate": 3.4621668735603974e-05,
"loss": 0.8955351710319519,
"step": 891
},
{
"epoch": 0.41324994208941396,
"grad_norm": 0.82421875,
"learning_rate": 3.459224944742137e-05,
"loss": 0.8985044360160828,
"step": 892
},
{
"epoch": 0.413713226777855,
"grad_norm": 0.765625,
"learning_rate": 3.4562810379983515e-05,
"loss": 0.9573203921318054,
"step": 893
},
{
"epoch": 0.414176511466296,
"grad_norm": 0.8515625,
"learning_rate": 3.453335158826294e-05,
"loss": 0.8726100921630859,
"step": 894
},
{
"epoch": 0.41463979615473706,
"grad_norm": 0.796875,
"learning_rate": 3.450387312726902e-05,
"loss": 0.9778281450271606,
"step": 895
},
{
"epoch": 0.41510308084317815,
"grad_norm": 0.87109375,
"learning_rate": 3.447437505204785e-05,
"loss": 0.9495804905891418,
"step": 896
},
{
"epoch": 0.4155663655316192,
"grad_norm": 0.8125,
"learning_rate": 3.444485741768216e-05,
"loss": 1.0823774337768555,
"step": 897
},
{
"epoch": 0.4160296502200602,
"grad_norm": 0.80078125,
"learning_rate": 3.441532027929119e-05,
"loss": 1.0764063596725464,
"step": 898
},
{
"epoch": 0.41649293490850126,
"grad_norm": 0.95703125,
"learning_rate": 3.438576369203061e-05,
"loss": 0.9195699691772461,
"step": 899
},
{
"epoch": 0.41695621959694235,
"grad_norm": 0.95703125,
"learning_rate": 3.435618771109241e-05,
"loss": 1.0315985679626465,
"step": 900
},
{
"epoch": 0.4174195042853834,
"grad_norm": 0.9609375,
"learning_rate": 3.43265923917048e-05,
"loss": 0.9629949331283569,
"step": 901
},
{
"epoch": 0.4178827889738244,
"grad_norm": 0.86328125,
"learning_rate": 3.4296977789132076e-05,
"loss": 0.9754863977432251,
"step": 902
},
{
"epoch": 0.41834607366226545,
"grad_norm": 0.85546875,
"learning_rate": 3.4267343958674553e-05,
"loss": 1.0928244590759277,
"step": 903
},
{
"epoch": 0.4188093583507065,
"grad_norm": 0.7421875,
"learning_rate": 3.423769095566848e-05,
"loss": 0.9829870462417603,
"step": 904
},
{
"epoch": 0.4192726430391476,
"grad_norm": 0.78515625,
"learning_rate": 3.420801883548586e-05,
"loss": 1.0500094890594482,
"step": 905
},
{
"epoch": 0.4197359277275886,
"grad_norm": 0.80859375,
"learning_rate": 3.417832765353443e-05,
"loss": 1.0949947834014893,
"step": 906
},
{
"epoch": 0.42019921241602964,
"grad_norm": 0.74609375,
"learning_rate": 3.4148617465257505e-05,
"loss": 0.9589704275131226,
"step": 907
},
{
"epoch": 0.4206624971044707,
"grad_norm": 0.703125,
"learning_rate": 3.41188883261339e-05,
"loss": 0.9546459913253784,
"step": 908
},
{
"epoch": 0.42112578179291177,
"grad_norm": 0.828125,
"learning_rate": 3.40891402916778e-05,
"loss": 1.1055099964141846,
"step": 909
},
{
"epoch": 0.4215890664813528,
"grad_norm": 0.84765625,
"learning_rate": 3.40593734174387e-05,
"loss": 0.9318978786468506,
"step": 910
},
{
"epoch": 0.42205235116979384,
"grad_norm": 0.79296875,
"learning_rate": 3.402958775900126e-05,
"loss": 0.8830047249794006,
"step": 911
},
{
"epoch": 0.4225156358582349,
"grad_norm": 0.7734375,
"learning_rate": 3.399978337198521e-05,
"loss": 0.9814854264259338,
"step": 912
},
{
"epoch": 0.4229789205466759,
"grad_norm": 0.7890625,
"learning_rate": 3.3969960312045276e-05,
"loss": 0.9006556272506714,
"step": 913
},
{
"epoch": 0.423442205235117,
"grad_norm": 0.77734375,
"learning_rate": 3.394011863487102e-05,
"loss": 1.1782516241073608,
"step": 914
},
{
"epoch": 0.42390548992355803,
"grad_norm": 0.80078125,
"learning_rate": 3.39102583961868e-05,
"loss": 0.9220030903816223,
"step": 915
},
{
"epoch": 0.42436877461199907,
"grad_norm": 0.78515625,
"learning_rate": 3.388037965175161e-05,
"loss": 1.0260250568389893,
"step": 916
},
{
"epoch": 0.4248320593004401,
"grad_norm": 0.71875,
"learning_rate": 3.385048245735901e-05,
"loss": 0.9909316301345825,
"step": 917
},
{
"epoch": 0.4252953439888812,
"grad_norm": 0.7421875,
"learning_rate": 3.3820566868837025e-05,
"loss": 0.9483840465545654,
"step": 918
},
{
"epoch": 0.4257586286773222,
"grad_norm": 0.7890625,
"learning_rate": 3.3790632942048e-05,
"loss": 0.9133286476135254,
"step": 919
},
{
"epoch": 0.42622191336576326,
"grad_norm": 0.7265625,
"learning_rate": 3.376068073288856e-05,
"loss": 0.8733887076377869,
"step": 920
},
{
"epoch": 0.4266851980542043,
"grad_norm": 0.8046875,
"learning_rate": 3.373071029728942e-05,
"loss": 0.9942792654037476,
"step": 921
},
{
"epoch": 0.42714848274264533,
"grad_norm": 0.75,
"learning_rate": 3.370072169121539e-05,
"loss": 1.031928539276123,
"step": 922
},
{
"epoch": 0.4276117674310864,
"grad_norm": 0.859375,
"learning_rate": 3.367071497066516e-05,
"loss": 1.044718861579895,
"step": 923
},
{
"epoch": 0.42807505211952745,
"grad_norm": 0.7421875,
"learning_rate": 3.364069019167127e-05,
"loss": 1.10916268825531,
"step": 924
},
{
"epoch": 0.4285383368079685,
"grad_norm": 0.9453125,
"learning_rate": 3.361064741029997e-05,
"loss": 1.1920192241668701,
"step": 925
},
{
"epoch": 0.4290016214964095,
"grad_norm": 0.796875,
"learning_rate": 3.3580586682651144e-05,
"loss": 0.8233553171157837,
"step": 926
},
{
"epoch": 0.4294649061848506,
"grad_norm": 0.80078125,
"learning_rate": 3.3550508064858165e-05,
"loss": 0.8930643796920776,
"step": 927
},
{
"epoch": 0.42992819087329165,
"grad_norm": 0.81640625,
"learning_rate": 3.352041161308782e-05,
"loss": 0.9572421312332153,
"step": 928
},
{
"epoch": 0.4303914755617327,
"grad_norm": 0.76171875,
"learning_rate": 3.349029738354023e-05,
"loss": 1.018913984298706,
"step": 929
},
{
"epoch": 0.4308547602501737,
"grad_norm": 0.83984375,
"learning_rate": 3.346016543244865e-05,
"loss": 0.9118576049804688,
"step": 930
},
{
"epoch": 0.43131804493861475,
"grad_norm": 0.76171875,
"learning_rate": 3.343001581607949e-05,
"loss": 0.8083831071853638,
"step": 931
},
{
"epoch": 0.43178132962705584,
"grad_norm": 0.87109375,
"learning_rate": 3.339984859073209e-05,
"loss": 0.8767201900482178,
"step": 932
},
{
"epoch": 0.4322446143154969,
"grad_norm": 0.8046875,
"learning_rate": 3.3369663812738717e-05,
"loss": 0.9277627468109131,
"step": 933
},
{
"epoch": 0.4327078990039379,
"grad_norm": 1.0,
"learning_rate": 3.333946153846441e-05,
"loss": 1.1319029331207275,
"step": 934
},
{
"epoch": 0.43317118369237895,
"grad_norm": 0.78515625,
"learning_rate": 3.330924182430684e-05,
"loss": 0.8661171197891235,
"step": 935
},
{
"epoch": 0.43363446838082004,
"grad_norm": 0.7890625,
"learning_rate": 3.327900472669629e-05,
"loss": 0.9028452634811401,
"step": 936
},
{
"epoch": 0.43409775306926107,
"grad_norm": 1.2421875,
"learning_rate": 3.324875030209549e-05,
"loss": 1.1232396364212036,
"step": 937
},
{
"epoch": 0.4345610377577021,
"grad_norm": 0.94921875,
"learning_rate": 3.32184786069995e-05,
"loss": 1.0140894651412964,
"step": 938
},
{
"epoch": 0.43502432244614314,
"grad_norm": 0.94140625,
"learning_rate": 3.318818969793567e-05,
"loss": 1.0171058177947998,
"step": 939
},
{
"epoch": 0.4354876071345842,
"grad_norm": 0.83984375,
"learning_rate": 3.3157883631463465e-05,
"loss": 1.07037353515625,
"step": 940
},
{
"epoch": 0.43595089182302527,
"grad_norm": 0.86328125,
"learning_rate": 3.312756046417441e-05,
"loss": 1.1801575422286987,
"step": 941
},
{
"epoch": 0.4364141765114663,
"grad_norm": 0.85546875,
"learning_rate": 3.309722025269193e-05,
"loss": 1.1600738763809204,
"step": 942
},
{
"epoch": 0.43687746119990734,
"grad_norm": 0.84765625,
"learning_rate": 3.306686305367132e-05,
"loss": 0.9927069544792175,
"step": 943
},
{
"epoch": 0.43734074588834837,
"grad_norm": 0.84765625,
"learning_rate": 3.303648892379956e-05,
"loss": 1.0282708406448364,
"step": 944
},
{
"epoch": 0.43780403057678946,
"grad_norm": 0.86328125,
"learning_rate": 3.300609791979526e-05,
"loss": 1.0274934768676758,
"step": 945
},
{
"epoch": 0.4382673152652305,
"grad_norm": 0.8671875,
"learning_rate": 3.2975690098408555e-05,
"loss": 0.9179637432098389,
"step": 946
},
{
"epoch": 0.43873059995367153,
"grad_norm": 0.78125,
"learning_rate": 3.2945265516420954e-05,
"loss": 0.9191789627075195,
"step": 947
},
{
"epoch": 0.43919388464211256,
"grad_norm": 0.890625,
"learning_rate": 3.291482423064528e-05,
"loss": 1.0582070350646973,
"step": 948
},
{
"epoch": 0.4396571693305536,
"grad_norm": 0.8671875,
"learning_rate": 3.2884366297925543e-05,
"loss": 0.9630937576293945,
"step": 949
},
{
"epoch": 0.4401204540189947,
"grad_norm": 0.6875,
"learning_rate": 3.2853891775136854e-05,
"loss": 1.039337158203125,
"step": 950
},
{
"epoch": 0.4405837387074357,
"grad_norm": 0.92578125,
"learning_rate": 3.2823400719185286e-05,
"loss": 1.1467114686965942,
"step": 951
},
{
"epoch": 0.44104702339587676,
"grad_norm": 0.73046875,
"learning_rate": 3.279289318700778e-05,
"loss": 1.0572043657302856,
"step": 952
},
{
"epoch": 0.4415103080843178,
"grad_norm": 0.81640625,
"learning_rate": 3.276236923557206e-05,
"loss": 1.015242099761963,
"step": 953
},
{
"epoch": 0.4419735927727589,
"grad_norm": 0.83984375,
"learning_rate": 3.27318289218765e-05,
"loss": 1.0399034023284912,
"step": 954
},
{
"epoch": 0.4424368774611999,
"grad_norm": 0.79296875,
"learning_rate": 3.2701272302950036e-05,
"loss": 1.0193357467651367,
"step": 955
},
{
"epoch": 0.44290016214964095,
"grad_norm": 0.8203125,
"learning_rate": 3.2670699435852034e-05,
"loss": 0.9832947850227356,
"step": 956
},
{
"epoch": 0.443363446838082,
"grad_norm": 0.890625,
"learning_rate": 3.2640110377672225e-05,
"loss": 0.9701854586601257,
"step": 957
},
{
"epoch": 0.443826731526523,
"grad_norm": 0.8203125,
"learning_rate": 3.260950518553056e-05,
"loss": 1.1149054765701294,
"step": 958
},
{
"epoch": 0.4442900162149641,
"grad_norm": 1.0546875,
"learning_rate": 3.257888391657711e-05,
"loss": 0.949036180973053,
"step": 959
},
{
"epoch": 0.44475330090340515,
"grad_norm": 0.796875,
"learning_rate": 3.254824662799199e-05,
"loss": 0.976753294467926,
"step": 960
},
{
"epoch": 0.4452165855918462,
"grad_norm": 0.8125,
"learning_rate": 3.2517593376985216e-05,
"loss": 0.9391505122184753,
"step": 961
},
{
"epoch": 0.4456798702802872,
"grad_norm": 0.8671875,
"learning_rate": 3.248692422079659e-05,
"loss": 1.0805474519729614,
"step": 962
},
{
"epoch": 0.4461431549687283,
"grad_norm": 0.88671875,
"learning_rate": 3.245623921669565e-05,
"loss": 0.8896968364715576,
"step": 963
},
{
"epoch": 0.44660643965716934,
"grad_norm": 0.76171875,
"learning_rate": 3.2425538421981515e-05,
"loss": 0.9105522036552429,
"step": 964
},
{
"epoch": 0.4470697243456104,
"grad_norm": 0.85546875,
"learning_rate": 3.2394821893982765e-05,
"loss": 1.0720794200897217,
"step": 965
},
{
"epoch": 0.4475330090340514,
"grad_norm": 0.84765625,
"learning_rate": 3.2364089690057414e-05,
"loss": 0.9761070013046265,
"step": 966
},
{
"epoch": 0.44799629372249244,
"grad_norm": 0.78515625,
"learning_rate": 3.2333341867592697e-05,
"loss": 0.9775373339653015,
"step": 967
},
{
"epoch": 0.44845957841093353,
"grad_norm": 0.7578125,
"learning_rate": 3.230257848400503e-05,
"loss": 1.045255184173584,
"step": 968
},
{
"epoch": 0.44892286309937457,
"grad_norm": 0.90234375,
"learning_rate": 3.22717995967399e-05,
"loss": 0.9985790848731995,
"step": 969
},
{
"epoch": 0.4493861477878156,
"grad_norm": 0.69140625,
"learning_rate": 3.224100526327173e-05,
"loss": 0.8842822909355164,
"step": 970
},
{
"epoch": 0.44984943247625664,
"grad_norm": 1.15625,
"learning_rate": 3.221019554110378e-05,
"loss": 1.080345630645752,
"step": 971
},
{
"epoch": 0.45031271716469773,
"grad_norm": 0.82421875,
"learning_rate": 3.2179370487768067e-05,
"loss": 1.0478893518447876,
"step": 972
},
{
"epoch": 0.45077600185313876,
"grad_norm": 0.6953125,
"learning_rate": 3.214853016082523e-05,
"loss": 0.9434666037559509,
"step": 973
},
{
"epoch": 0.4512392865415798,
"grad_norm": 0.91015625,
"learning_rate": 3.211767461786441e-05,
"loss": 0.9274519681930542,
"step": 974
},
{
"epoch": 0.45170257123002083,
"grad_norm": 0.796875,
"learning_rate": 3.208680391650319e-05,
"loss": 1.0596266984939575,
"step": 975
},
{
"epoch": 0.45216585591846187,
"grad_norm": 0.8828125,
"learning_rate": 3.205591811438744e-05,
"loss": 0.9719846844673157,
"step": 976
},
{
"epoch": 0.45262914060690296,
"grad_norm": 0.91796875,
"learning_rate": 3.2025017269191223e-05,
"loss": 1.1913756132125854,
"step": 977
},
{
"epoch": 0.453092425295344,
"grad_norm": 0.7578125,
"learning_rate": 3.199410143861671e-05,
"loss": 0.8659987449645996,
"step": 978
},
{
"epoch": 0.453555709983785,
"grad_norm": 0.94140625,
"learning_rate": 3.196317068039405e-05,
"loss": 1.1136746406555176,
"step": 979
},
{
"epoch": 0.45401899467222606,
"grad_norm": 0.80859375,
"learning_rate": 3.193222505228125e-05,
"loss": 0.954369068145752,
"step": 980
},
{
"epoch": 0.45448227936066715,
"grad_norm": 0.85546875,
"learning_rate": 3.1901264612064124e-05,
"loss": 1.025739073753357,
"step": 981
},
{
"epoch": 0.4549455640491082,
"grad_norm": 0.77734375,
"learning_rate": 3.1870289417556095e-05,
"loss": 1.1050188541412354,
"step": 982
},
{
"epoch": 0.4554088487375492,
"grad_norm": 0.9765625,
"learning_rate": 3.1839299526598156e-05,
"loss": 0.9074011445045471,
"step": 983
},
{
"epoch": 0.45587213342599026,
"grad_norm": 0.80078125,
"learning_rate": 3.180829499705879e-05,
"loss": 0.8634052872657776,
"step": 984
},
{
"epoch": 0.4563354181144313,
"grad_norm": 0.8203125,
"learning_rate": 3.1777275886833714e-05,
"loss": 0.9090867638587952,
"step": 985
},
{
"epoch": 0.4567987028028724,
"grad_norm": 0.71875,
"learning_rate": 3.1746242253845975e-05,
"loss": 0.90366530418396,
"step": 986
},
{
"epoch": 0.4572619874913134,
"grad_norm": 0.9296875,
"learning_rate": 3.1715194156045676e-05,
"loss": 0.9696229696273804,
"step": 987
},
{
"epoch": 0.45772527217975445,
"grad_norm": 0.73828125,
"learning_rate": 3.168413165140996e-05,
"loss": 0.9196410179138184,
"step": 988
},
{
"epoch": 0.4581885568681955,
"grad_norm": 0.765625,
"learning_rate": 3.165305479794285e-05,
"loss": 1.135054111480713,
"step": 989
},
{
"epoch": 0.4586518415566366,
"grad_norm": 0.87890625,
"learning_rate": 3.162196365367518e-05,
"loss": 0.9602132439613342,
"step": 990
},
{
"epoch": 0.4591151262450776,
"grad_norm": 0.8671875,
"learning_rate": 3.1590858276664475e-05,
"loss": 0.875511884689331,
"step": 991
},
{
"epoch": 0.45957841093351864,
"grad_norm": 0.7578125,
"learning_rate": 3.155973872499481e-05,
"loss": 0.9517480731010437,
"step": 992
},
{
"epoch": 0.4600416956219597,
"grad_norm": 0.9140625,
"learning_rate": 3.152860505677676e-05,
"loss": 1.126634120941162,
"step": 993
},
{
"epoch": 0.46050498031040077,
"grad_norm": 0.74609375,
"learning_rate": 3.149745733014724e-05,
"loss": 1.0964951515197754,
"step": 994
},
{
"epoch": 0.4609682649988418,
"grad_norm": 0.8671875,
"learning_rate": 3.146629560326942e-05,
"loss": 1.0599464178085327,
"step": 995
},
{
"epoch": 0.46143154968728284,
"grad_norm": 0.8828125,
"learning_rate": 3.143511993433263e-05,
"loss": 0.894392728805542,
"step": 996
},
{
"epoch": 0.4618948343757239,
"grad_norm": 0.7109375,
"learning_rate": 3.140393038155219e-05,
"loss": 0.9883759021759033,
"step": 997
},
{
"epoch": 0.4623581190641649,
"grad_norm": 0.81640625,
"learning_rate": 3.1372727003169414e-05,
"loss": 1.1054998636245728,
"step": 998
},
{
"epoch": 0.462821403752606,
"grad_norm": 0.73828125,
"learning_rate": 3.1341509857451374e-05,
"loss": 0.9076305031776428,
"step": 999
},
{
"epoch": 0.46328468844104703,
"grad_norm": 0.91796875,
"learning_rate": 3.131027900269087e-05,
"loss": 1.0859215259552002,
"step": 1000
},
{
"epoch": 0.46374797312948807,
"grad_norm": 0.78515625,
"learning_rate": 3.12790344972063e-05,
"loss": 0.8842138648033142,
"step": 1001
},
{
"epoch": 0.4642112578179291,
"grad_norm": 0.96875,
"learning_rate": 3.1247776399341574e-05,
"loss": 0.953213632106781,
"step": 1002
},
{
"epoch": 0.4646745425063702,
"grad_norm": 0.77734375,
"learning_rate": 3.121650476746595e-05,
"loss": 0.9613085389137268,
"step": 1003
},
{
"epoch": 0.4651378271948112,
"grad_norm": 0.7578125,
"learning_rate": 3.1185219659973974e-05,
"loss": 1.0341384410858154,
"step": 1004
},
{
"epoch": 0.46560111188325226,
"grad_norm": 0.85546875,
"learning_rate": 3.115392113528536e-05,
"loss": 0.8540540337562561,
"step": 1005
},
{
"epoch": 0.4660643965716933,
"grad_norm": 0.80859375,
"learning_rate": 3.112260925184487e-05,
"loss": 0.8874945044517517,
"step": 1006
},
{
"epoch": 0.46652768126013433,
"grad_norm": 0.83984375,
"learning_rate": 3.1091284068122206e-05,
"loss": 0.9373153448104858,
"step": 1007
},
{
"epoch": 0.4669909659485754,
"grad_norm": 0.83984375,
"learning_rate": 3.1059945642611913e-05,
"loss": 0.9182353615760803,
"step": 1008
},
{
"epoch": 0.46745425063701646,
"grad_norm": 0.84765625,
"learning_rate": 3.1028594033833274e-05,
"loss": 0.9852114319801331,
"step": 1009
},
{
"epoch": 0.4679175353254575,
"grad_norm": 0.9609375,
"learning_rate": 3.099722930033017e-05,
"loss": 1.0278407335281372,
"step": 1010
},
{
"epoch": 0.4683808200138985,
"grad_norm": 0.79296875,
"learning_rate": 3.0965851500670984e-05,
"loss": 0.9236195683479309,
"step": 1011
},
{
"epoch": 0.4688441047023396,
"grad_norm": 0.7421875,
"learning_rate": 3.093446069344854e-05,
"loss": 0.8953359127044678,
"step": 1012
},
{
"epoch": 0.46930738939078065,
"grad_norm": 0.81640625,
"learning_rate": 3.09030569372799e-05,
"loss": 0.8945424556732178,
"step": 1013
},
{
"epoch": 0.4697706740792217,
"grad_norm": 0.86328125,
"learning_rate": 3.087164029080634e-05,
"loss": 0.9093310236930847,
"step": 1014
},
{
"epoch": 0.4702339587676627,
"grad_norm": 0.81640625,
"learning_rate": 3.084021081269319e-05,
"loss": 1.0238938331604004,
"step": 1015
},
{
"epoch": 0.47069724345610375,
"grad_norm": 0.76171875,
"learning_rate": 3.080876856162976e-05,
"loss": 0.9495306015014648,
"step": 1016
},
{
"epoch": 0.47116052814454484,
"grad_norm": 0.77734375,
"learning_rate": 3.0777313596329175e-05,
"loss": 0.8257219791412354,
"step": 1017
},
{
"epoch": 0.4716238128329859,
"grad_norm": 0.7890625,
"learning_rate": 3.074584597552834e-05,
"loss": 1.2120308876037598,
"step": 1018
},
{
"epoch": 0.4720870975214269,
"grad_norm": 0.85546875,
"learning_rate": 3.071436575798779e-05,
"loss": 0.8118107318878174,
"step": 1019
},
{
"epoch": 0.47255038220986795,
"grad_norm": 0.75,
"learning_rate": 3.068287300249154e-05,
"loss": 0.9661107659339905,
"step": 1020
},
{
"epoch": 0.47301366689830904,
"grad_norm": 0.73828125,
"learning_rate": 3.065136776784706e-05,
"loss": 0.9633262753486633,
"step": 1021
},
{
"epoch": 0.47347695158675007,
"grad_norm": 0.8046875,
"learning_rate": 3.061985011288511e-05,
"loss": 0.8450291752815247,
"step": 1022
},
{
"epoch": 0.4739402362751911,
"grad_norm": 0.8671875,
"learning_rate": 3.0588320096459646e-05,
"loss": 0.8301048278808594,
"step": 1023
},
{
"epoch": 0.47440352096363214,
"grad_norm": 0.87890625,
"learning_rate": 3.0556777777447695e-05,
"loss": 1.1722376346588135,
"step": 1024
},
{
"epoch": 0.4748668056520732,
"grad_norm": 0.80078125,
"learning_rate": 3.0525223214749266e-05,
"loss": 1.0436758995056152,
"step": 1025
},
{
"epoch": 0.47533009034051427,
"grad_norm": 0.8515625,
"learning_rate": 3.0493656467287242e-05,
"loss": 1.0242863893508911,
"step": 1026
},
{
"epoch": 0.4757933750289553,
"grad_norm": 0.84375,
"learning_rate": 3.046207759400723e-05,
"loss": 1.0861283540725708,
"step": 1027
},
{
"epoch": 0.47625665971739634,
"grad_norm": 0.83984375,
"learning_rate": 3.043048665387751e-05,
"loss": 1.0913084745407104,
"step": 1028
},
{
"epoch": 0.47671994440583737,
"grad_norm": 0.828125,
"learning_rate": 3.0398883705888867e-05,
"loss": 0.9872074723243713,
"step": 1029
},
{
"epoch": 0.47718322909427846,
"grad_norm": 0.80859375,
"learning_rate": 3.0367268809054554e-05,
"loss": 0.9497346878051758,
"step": 1030
},
{
"epoch": 0.4776465137827195,
"grad_norm": 0.78515625,
"learning_rate": 3.0335642022410072e-05,
"loss": 0.9556658864021301,
"step": 1031
},
{
"epoch": 0.47810979847116053,
"grad_norm": 1.34375,
"learning_rate": 3.0304003405013176e-05,
"loss": 1.1097627878189087,
"step": 1032
},
{
"epoch": 0.47857308315960156,
"grad_norm": 0.89453125,
"learning_rate": 3.0272353015943694e-05,
"loss": 1.0165081024169922,
"step": 1033
},
{
"epoch": 0.4790363678480426,
"grad_norm": 0.7890625,
"learning_rate": 3.024069091430343e-05,
"loss": 0.957397997379303,
"step": 1034
},
{
"epoch": 0.4794996525364837,
"grad_norm": 0.84375,
"learning_rate": 3.0209017159216076e-05,
"loss": 0.8591142892837524,
"step": 1035
},
{
"epoch": 0.4799629372249247,
"grad_norm": 0.75390625,
"learning_rate": 3.0177331809827064e-05,
"loss": 0.8509551286697388,
"step": 1036
},
{
"epoch": 0.48042622191336576,
"grad_norm": 0.95703125,
"learning_rate": 3.0145634925303502e-05,
"loss": 0.9679578542709351,
"step": 1037
},
{
"epoch": 0.4808895066018068,
"grad_norm": 0.9296875,
"learning_rate": 3.011392656483401e-05,
"loss": 1.2282400131225586,
"step": 1038
},
{
"epoch": 0.4813527912902479,
"grad_norm": 0.765625,
"learning_rate": 3.0082206787628658e-05,
"loss": 0.8720540404319763,
"step": 1039
},
{
"epoch": 0.4818160759786889,
"grad_norm": 0.734375,
"learning_rate": 3.005047565291882e-05,
"loss": 1.1174383163452148,
"step": 1040
},
{
"epoch": 0.48227936066712995,
"grad_norm": 0.80078125,
"learning_rate": 3.0018733219957094e-05,
"loss": 0.9691076278686523,
"step": 1041
},
{
"epoch": 0.482742645355571,
"grad_norm": 0.81640625,
"learning_rate": 2.998697954801717e-05,
"loss": 1.0722172260284424,
"step": 1042
},
{
"epoch": 0.483205930044012,
"grad_norm": 0.9765625,
"learning_rate": 2.9955214696393707e-05,
"loss": 1.0778554677963257,
"step": 1043
},
{
"epoch": 0.4836692147324531,
"grad_norm": 0.84375,
"learning_rate": 2.9923438724402278e-05,
"loss": 0.9753159880638123,
"step": 1044
},
{
"epoch": 0.48413249942089415,
"grad_norm": 0.8046875,
"learning_rate": 2.989165169137918e-05,
"loss": 0.9562061429023743,
"step": 1045
},
{
"epoch": 0.4845957841093352,
"grad_norm": 0.75,
"learning_rate": 2.9859853656681395e-05,
"loss": 0.8260341882705688,
"step": 1046
},
{
"epoch": 0.4850590687977762,
"grad_norm": 0.87109375,
"learning_rate": 2.982804467968643e-05,
"loss": 1.0546704530715942,
"step": 1047
},
{
"epoch": 0.4855223534862173,
"grad_norm": 0.83984375,
"learning_rate": 2.979622481979224e-05,
"loss": 0.9112711548805237,
"step": 1048
},
{
"epoch": 0.48598563817465834,
"grad_norm": 0.79296875,
"learning_rate": 2.9764394136417088e-05,
"loss": 0.9864487648010254,
"step": 1049
},
{
"epoch": 0.4864489228630994,
"grad_norm": 0.75390625,
"learning_rate": 2.973255268899945e-05,
"loss": 0.9202637076377869,
"step": 1050
},
{
"epoch": 0.4869122075515404,
"grad_norm": 0.90625,
"learning_rate": 2.970070053699792e-05,
"loss": 1.0404424667358398,
"step": 1051
},
{
"epoch": 0.48737549223998144,
"grad_norm": 0.89453125,
"learning_rate": 2.9668837739891063e-05,
"loss": 1.0003234148025513,
"step": 1052
},
{
"epoch": 0.48783877692842254,
"grad_norm": 0.890625,
"learning_rate": 2.9636964357177317e-05,
"loss": 0.9466184377670288,
"step": 1053
},
{
"epoch": 0.48830206161686357,
"grad_norm": 0.828125,
"learning_rate": 2.9605080448374903e-05,
"loss": 1.1342371702194214,
"step": 1054
},
{
"epoch": 0.4887653463053046,
"grad_norm": 0.8828125,
"learning_rate": 2.9573186073021696e-05,
"loss": 1.0444382429122925,
"step": 1055
},
{
"epoch": 0.48922863099374564,
"grad_norm": 0.67578125,
"learning_rate": 2.95412812906751e-05,
"loss": 0.9123279452323914,
"step": 1056
},
{
"epoch": 0.48969191568218673,
"grad_norm": 0.9453125,
"learning_rate": 2.9509366160911977e-05,
"loss": 0.9924875497817993,
"step": 1057
},
{
"epoch": 0.49015520037062776,
"grad_norm": 0.703125,
"learning_rate": 2.9477440743328484e-05,
"loss": 0.9442932605743408,
"step": 1058
},
{
"epoch": 0.4906184850590688,
"grad_norm": 0.8984375,
"learning_rate": 2.944550509754e-05,
"loss": 0.9229353666305542,
"step": 1059
},
{
"epoch": 0.49108176974750983,
"grad_norm": 0.84765625,
"learning_rate": 2.9413559283181028e-05,
"loss": 0.8888018727302551,
"step": 1060
},
{
"epoch": 0.49154505443595087,
"grad_norm": 0.7265625,
"learning_rate": 2.9381603359905006e-05,
"loss": 0.8846039175987244,
"step": 1061
},
{
"epoch": 0.49200833912439196,
"grad_norm": 0.9921875,
"learning_rate": 2.9349637387384297e-05,
"loss": 1.0494961738586426,
"step": 1062
},
{
"epoch": 0.492471623812833,
"grad_norm": 0.8515625,
"learning_rate": 2.9317661425310004e-05,
"loss": 0.9032423496246338,
"step": 1063
},
{
"epoch": 0.492934908501274,
"grad_norm": 0.81640625,
"learning_rate": 2.928567553339189e-05,
"loss": 1.0323213338851929,
"step": 1064
},
{
"epoch": 0.49339819318971506,
"grad_norm": 0.734375,
"learning_rate": 2.925367977135828e-05,
"loss": 1.0107799768447876,
"step": 1065
},
{
"epoch": 0.49386147787815615,
"grad_norm": 0.77734375,
"learning_rate": 2.9221674198955883e-05,
"loss": 1.0235852003097534,
"step": 1066
},
{
"epoch": 0.4943247625665972,
"grad_norm": 0.9609375,
"learning_rate": 2.9189658875949772e-05,
"loss": 1.0340992212295532,
"step": 1067
},
{
"epoch": 0.4947880472550382,
"grad_norm": 0.8515625,
"learning_rate": 2.9157633862123216e-05,
"loss": 0.9333528280258179,
"step": 1068
},
{
"epoch": 0.49525133194347926,
"grad_norm": 0.98828125,
"learning_rate": 2.9125599217277566e-05,
"loss": 1.2312861680984497,
"step": 1069
},
{
"epoch": 0.4957146166319203,
"grad_norm": 0.796875,
"learning_rate": 2.9093555001232157e-05,
"loss": 0.8942990303039551,
"step": 1070
},
{
"epoch": 0.4961779013203614,
"grad_norm": 0.78125,
"learning_rate": 2.9061501273824226e-05,
"loss": 0.9628137350082397,
"step": 1071
},
{
"epoch": 0.4966411860088024,
"grad_norm": 0.73046875,
"learning_rate": 2.902943809490874e-05,
"loss": 0.9911953806877136,
"step": 1072
},
{
"epoch": 0.49710447069724345,
"grad_norm": 0.77734375,
"learning_rate": 2.8997365524358323e-05,
"loss": 0.8831644058227539,
"step": 1073
},
{
"epoch": 0.4975677553856845,
"grad_norm": 0.8359375,
"learning_rate": 2.8965283622063147e-05,
"loss": 1.0550167560577393,
"step": 1074
},
{
"epoch": 0.4980310400741256,
"grad_norm": 0.890625,
"learning_rate": 2.89331924479308e-05,
"loss": 0.9322593212127686,
"step": 1075
},
{
"epoch": 0.4984943247625666,
"grad_norm": 0.81640625,
"learning_rate": 2.8901092061886184e-05,
"loss": 0.9539169073104858,
"step": 1076
},
{
"epoch": 0.49895760945100764,
"grad_norm": 0.8203125,
"learning_rate": 2.88689825238714e-05,
"loss": 0.9450077414512634,
"step": 1077
},
{
"epoch": 0.4994208941394487,
"grad_norm": 0.734375,
"learning_rate": 2.883686389384565e-05,
"loss": 0.9067424535751343,
"step": 1078
},
{
"epoch": 0.4998841788278897,
"grad_norm": 0.7421875,
"learning_rate": 2.8804736231785098e-05,
"loss": 0.8575161695480347,
"step": 1079
},
{
"epoch": 0.5003474635163307,
"grad_norm": 0.88671875,
"learning_rate": 2.87725995976828e-05,
"loss": 0.9551107287406921,
"step": 1080
},
{
"epoch": 0.5008107482047718,
"grad_norm": 0.984375,
"learning_rate": 2.8740454051548526e-05,
"loss": 1.1050188541412354,
"step": 1081
},
{
"epoch": 0.5012740328932129,
"grad_norm": 0.86328125,
"learning_rate": 2.8708299653408722e-05,
"loss": 0.9398777484893799,
"step": 1082
},
{
"epoch": 0.501737317581654,
"grad_norm": 0.75390625,
"learning_rate": 2.8676136463306354e-05,
"loss": 0.9366370439529419,
"step": 1083
},
{
"epoch": 0.502200602270095,
"grad_norm": 0.79296875,
"learning_rate": 2.8643964541300793e-05,
"loss": 0.916124701499939,
"step": 1084
},
{
"epoch": 0.502663886958536,
"grad_norm": 0.7890625,
"learning_rate": 2.861178394746774e-05,
"loss": 0.94605952501297,
"step": 1085
},
{
"epoch": 0.5031271716469771,
"grad_norm": 0.81640625,
"learning_rate": 2.8579594741899052e-05,
"loss": 0.812589168548584,
"step": 1086
},
{
"epoch": 0.5035904563354181,
"grad_norm": 0.87109375,
"learning_rate": 2.8547396984702716e-05,
"loss": 1.3426378965377808,
"step": 1087
},
{
"epoch": 0.5040537410238591,
"grad_norm": 0.71875,
"learning_rate": 2.851519073600265e-05,
"loss": 0.992534875869751,
"step": 1088
},
{
"epoch": 0.5045170257123002,
"grad_norm": 0.75,
"learning_rate": 2.8482976055938628e-05,
"loss": 1.0410428047180176,
"step": 1089
},
{
"epoch": 0.5049803104007412,
"grad_norm": 0.81640625,
"learning_rate": 2.8450753004666203e-05,
"loss": 0.9198648929595947,
"step": 1090
},
{
"epoch": 0.5054435950891824,
"grad_norm": 0.8828125,
"learning_rate": 2.841852164235652e-05,
"loss": 1.0291709899902344,
"step": 1091
},
{
"epoch": 0.5059068797776234,
"grad_norm": 1.21875,
"learning_rate": 2.8386282029196264e-05,
"loss": 1.1194522380828857,
"step": 1092
},
{
"epoch": 0.5063701644660644,
"grad_norm": 0.73828125,
"learning_rate": 2.8354034225387525e-05,
"loss": 0.843272864818573,
"step": 1093
},
{
"epoch": 0.5068334491545055,
"grad_norm": 0.8359375,
"learning_rate": 2.832177829114769e-05,
"loss": 0.8743926882743835,
"step": 1094
},
{
"epoch": 0.5072967338429465,
"grad_norm": 0.96484375,
"learning_rate": 2.8289514286709315e-05,
"loss": 1.130947470664978,
"step": 1095
},
{
"epoch": 0.5077600185313875,
"grad_norm": 0.859375,
"learning_rate": 2.825724227232004e-05,
"loss": 0.961344838142395,
"step": 1096
},
{
"epoch": 0.5082233032198286,
"grad_norm": 0.796875,
"learning_rate": 2.8224962308242467e-05,
"loss": 0.9592534303665161,
"step": 1097
},
{
"epoch": 0.5086865879082696,
"grad_norm": 0.921875,
"learning_rate": 2.8192674454754016e-05,
"loss": 0.9232071042060852,
"step": 1098
},
{
"epoch": 0.5091498725967106,
"grad_norm": 0.828125,
"learning_rate": 2.8160378772146868e-05,
"loss": 0.9333939552307129,
"step": 1099
},
{
"epoch": 0.5096131572851518,
"grad_norm": 0.78125,
"learning_rate": 2.8128075320727803e-05,
"loss": 0.8948829174041748,
"step": 1100
},
{
"epoch": 0.5100764419735928,
"grad_norm": 0.87109375,
"learning_rate": 2.8095764160818127e-05,
"loss": 1.1355596780776978,
"step": 1101
},
{
"epoch": 0.5105397266620338,
"grad_norm": 0.93359375,
"learning_rate": 2.8063445352753518e-05,
"loss": 0.9624730944633484,
"step": 1102
},
{
"epoch": 0.5110030113504749,
"grad_norm": 0.8515625,
"learning_rate": 2.8031118956883958e-05,
"loss": 1.002945899963379,
"step": 1103
},
{
"epoch": 0.5114662960389159,
"grad_norm": 0.875,
"learning_rate": 2.7998785033573583e-05,
"loss": 0.8556519746780396,
"step": 1104
},
{
"epoch": 0.511929580727357,
"grad_norm": 1.1875,
"learning_rate": 2.7966443643200585e-05,
"loss": 0.9405410289764404,
"step": 1105
},
{
"epoch": 0.512392865415798,
"grad_norm": 0.875,
"learning_rate": 2.7934094846157126e-05,
"loss": 1.1397373676300049,
"step": 1106
},
{
"epoch": 0.512856150104239,
"grad_norm": 0.78125,
"learning_rate": 2.7901738702849144e-05,
"loss": 0.7816023230552673,
"step": 1107
},
{
"epoch": 0.51331943479268,
"grad_norm": 0.84375,
"learning_rate": 2.7869375273696355e-05,
"loss": 0.8463333249092102,
"step": 1108
},
{
"epoch": 0.5137827194811212,
"grad_norm": 0.7890625,
"learning_rate": 2.7837004619132037e-05,
"loss": 0.8399050831794739,
"step": 1109
},
{
"epoch": 0.5142460041695622,
"grad_norm": 0.76953125,
"learning_rate": 2.7804626799602988e-05,
"loss": 1.0282368659973145,
"step": 1110
},
{
"epoch": 0.5147092888580033,
"grad_norm": 0.8515625,
"learning_rate": 2.7772241875569362e-05,
"loss": 0.944800853729248,
"step": 1111
},
{
"epoch": 0.5151725735464443,
"grad_norm": 0.90234375,
"learning_rate": 2.77398499075046e-05,
"loss": 0.9259645938873291,
"step": 1112
},
{
"epoch": 0.5156358582348853,
"grad_norm": 0.859375,
"learning_rate": 2.7707450955895287e-05,
"loss": 1.0247880220413208,
"step": 1113
},
{
"epoch": 0.5160991429233264,
"grad_norm": 0.83984375,
"learning_rate": 2.7675045081241056e-05,
"loss": 0.8392752408981323,
"step": 1114
},
{
"epoch": 0.5165624276117674,
"grad_norm": 0.7890625,
"learning_rate": 2.7642632344054442e-05,
"loss": 0.9830076098442078,
"step": 1115
},
{
"epoch": 0.5170257123002084,
"grad_norm": 0.83984375,
"learning_rate": 2.761021280486082e-05,
"loss": 0.8453760147094727,
"step": 1116
},
{
"epoch": 0.5174889969886495,
"grad_norm": 0.81640625,
"learning_rate": 2.7577786524198273e-05,
"loss": 0.9899021983146667,
"step": 1117
},
{
"epoch": 0.5179522816770906,
"grad_norm": 0.66796875,
"learning_rate": 2.7545353562617444e-05,
"loss": 0.8067998290061951,
"step": 1118
},
{
"epoch": 0.5184155663655317,
"grad_norm": 0.9609375,
"learning_rate": 2.7512913980681483e-05,
"loss": 1.0544780492782593,
"step": 1119
},
{
"epoch": 0.5188788510539727,
"grad_norm": 0.83203125,
"learning_rate": 2.7480467838965872e-05,
"loss": 1.0132243633270264,
"step": 1120
},
{
"epoch": 0.5193421357424137,
"grad_norm": 0.765625,
"learning_rate": 2.744801519805837e-05,
"loss": 0.9769008755683899,
"step": 1121
},
{
"epoch": 0.5198054204308548,
"grad_norm": 0.82421875,
"learning_rate": 2.7415556118558848e-05,
"loss": 0.9457086324691772,
"step": 1122
},
{
"epoch": 0.5202687051192958,
"grad_norm": 0.8984375,
"learning_rate": 2.7383090661079215e-05,
"loss": 0.9511300921440125,
"step": 1123
},
{
"epoch": 0.5207319898077368,
"grad_norm": 0.80859375,
"learning_rate": 2.7350618886243286e-05,
"loss": 0.9526143670082092,
"step": 1124
},
{
"epoch": 0.5211952744961779,
"grad_norm": 0.80078125,
"learning_rate": 2.731814085468667e-05,
"loss": 0.8233035206794739,
"step": 1125
},
{
"epoch": 0.5216585591846189,
"grad_norm": 0.90625,
"learning_rate": 2.7285656627056673e-05,
"loss": 0.8465626239776611,
"step": 1126
},
{
"epoch": 0.52212184387306,
"grad_norm": 0.8203125,
"learning_rate": 2.7253166264012142e-05,
"loss": 0.8446078896522522,
"step": 1127
},
{
"epoch": 0.5225851285615011,
"grad_norm": 0.8359375,
"learning_rate": 2.7220669826223418e-05,
"loss": 0.8863167762756348,
"step": 1128
},
{
"epoch": 0.5230484132499421,
"grad_norm": 0.8984375,
"learning_rate": 2.7188167374372155e-05,
"loss": 0.9343461394309998,
"step": 1129
},
{
"epoch": 0.5235116979383831,
"grad_norm": 0.85546875,
"learning_rate": 2.715565896915125e-05,
"loss": 0.9686758518218994,
"step": 1130
},
{
"epoch": 0.5239749826268242,
"grad_norm": 0.84375,
"learning_rate": 2.7123144671264715e-05,
"loss": 0.9815382361412048,
"step": 1131
},
{
"epoch": 0.5244382673152652,
"grad_norm": 0.80859375,
"learning_rate": 2.7090624541427566e-05,
"loss": 1.1032721996307373,
"step": 1132
},
{
"epoch": 0.5249015520037063,
"grad_norm": 0.76171875,
"learning_rate": 2.7058098640365716e-05,
"loss": 0.9356403350830078,
"step": 1133
},
{
"epoch": 0.5253648366921473,
"grad_norm": 0.8515625,
"learning_rate": 2.702556702881584e-05,
"loss": 0.8864633440971375,
"step": 1134
},
{
"epoch": 0.5258281213805883,
"grad_norm": 0.75,
"learning_rate": 2.6993029767525295e-05,
"loss": 0.8282592296600342,
"step": 1135
},
{
"epoch": 0.5262914060690295,
"grad_norm": 0.82421875,
"learning_rate": 2.6960486917251967e-05,
"loss": 0.9786428213119507,
"step": 1136
},
{
"epoch": 0.5267546907574705,
"grad_norm": 0.91796875,
"learning_rate": 2.6927938538764197e-05,
"loss": 0.8919640183448792,
"step": 1137
},
{
"epoch": 0.5272179754459115,
"grad_norm": 0.8359375,
"learning_rate": 2.6895384692840634e-05,
"loss": 1.1693425178527832,
"step": 1138
},
{
"epoch": 0.5276812601343526,
"grad_norm": 0.83984375,
"learning_rate": 2.6862825440270143e-05,
"loss": 0.961223304271698,
"step": 1139
},
{
"epoch": 0.5281445448227936,
"grad_norm": 0.9453125,
"learning_rate": 2.683026084185169e-05,
"loss": 1.0943102836608887,
"step": 1140
},
{
"epoch": 0.5286078295112346,
"grad_norm": 0.77734375,
"learning_rate": 2.679769095839422e-05,
"loss": 0.9912340641021729,
"step": 1141
},
{
"epoch": 0.5290711141996757,
"grad_norm": 0.85546875,
"learning_rate": 2.6765115850716548e-05,
"loss": 1.1055166721343994,
"step": 1142
},
{
"epoch": 0.5295343988881167,
"grad_norm": 0.76171875,
"learning_rate": 2.673253557964724e-05,
"loss": 0.9419571161270142,
"step": 1143
},
{
"epoch": 0.5299976835765577,
"grad_norm": 0.8828125,
"learning_rate": 2.66999502060245e-05,
"loss": 0.949909508228302,
"step": 1144
},
{
"epoch": 0.5304609682649989,
"grad_norm": 0.953125,
"learning_rate": 2.6667359790696074e-05,
"loss": 0.9741145372390747,
"step": 1145
},
{
"epoch": 0.5309242529534399,
"grad_norm": 0.87109375,
"learning_rate": 2.6634764394519106e-05,
"loss": 0.9066289663314819,
"step": 1146
},
{
"epoch": 0.531387537641881,
"grad_norm": 0.94921875,
"learning_rate": 2.660216407836006e-05,
"loss": 1.090651512145996,
"step": 1147
},
{
"epoch": 0.531850822330322,
"grad_norm": 0.80859375,
"learning_rate": 2.656955890309457e-05,
"loss": 1.0257591009140015,
"step": 1148
},
{
"epoch": 0.532314107018763,
"grad_norm": 0.92578125,
"learning_rate": 2.6536948929607355e-05,
"loss": 1.2080005407333374,
"step": 1149
},
{
"epoch": 0.5327773917072041,
"grad_norm": 0.796875,
"learning_rate": 2.6504334218792082e-05,
"loss": 0.9306067228317261,
"step": 1150
},
{
"epoch": 0.5332406763956451,
"grad_norm": 0.796875,
"learning_rate": 2.6471714831551293e-05,
"loss": 1.0611542463302612,
"step": 1151
},
{
"epoch": 0.5337039610840861,
"grad_norm": 0.80859375,
"learning_rate": 2.6439090828796214e-05,
"loss": 0.9698439240455627,
"step": 1152
},
{
"epoch": 0.5341672457725272,
"grad_norm": 0.8515625,
"learning_rate": 2.6406462271446732e-05,
"loss": 0.8757132887840271,
"step": 1153
},
{
"epoch": 0.5346305304609683,
"grad_norm": 0.703125,
"learning_rate": 2.6373829220431218e-05,
"loss": 0.7780014872550964,
"step": 1154
},
{
"epoch": 0.5350938151494093,
"grad_norm": 1.0390625,
"learning_rate": 2.6341191736686438e-05,
"loss": 1.0552921295166016,
"step": 1155
},
{
"epoch": 0.5355570998378504,
"grad_norm": 0.84765625,
"learning_rate": 2.6308549881157446e-05,
"loss": 0.8510526418685913,
"step": 1156
},
{
"epoch": 0.5360203845262914,
"grad_norm": 0.7265625,
"learning_rate": 2.6275903714797442e-05,
"loss": 1.0102177858352661,
"step": 1157
},
{
"epoch": 0.5364836692147325,
"grad_norm": 0.78515625,
"learning_rate": 2.624325329856769e-05,
"loss": 0.8171606063842773,
"step": 1158
},
{
"epoch": 0.5369469539031735,
"grad_norm": 0.76171875,
"learning_rate": 2.621059869343739e-05,
"loss": 0.8785892128944397,
"step": 1159
},
{
"epoch": 0.5374102385916145,
"grad_norm": 0.8125,
"learning_rate": 2.6177939960383546e-05,
"loss": 0.7128728628158569,
"step": 1160
},
{
"epoch": 0.5378735232800556,
"grad_norm": 0.77734375,
"learning_rate": 2.6145277160390888e-05,
"loss": 0.9567815661430359,
"step": 1161
},
{
"epoch": 0.5383368079684966,
"grad_norm": 0.83984375,
"learning_rate": 2.611261035445173e-05,
"loss": 1.065600872039795,
"step": 1162
},
{
"epoch": 0.5388000926569377,
"grad_norm": 0.87109375,
"learning_rate": 2.6079939603565884e-05,
"loss": 0.9629444479942322,
"step": 1163
},
{
"epoch": 0.5392633773453788,
"grad_norm": 0.7265625,
"learning_rate": 2.6047264968740505e-05,
"loss": 1.0867857933044434,
"step": 1164
},
{
"epoch": 0.5397266620338198,
"grad_norm": 0.83203125,
"learning_rate": 2.6014586510990015e-05,
"loss": 1.0058200359344482,
"step": 1165
},
{
"epoch": 0.5401899467222608,
"grad_norm": 0.91015625,
"learning_rate": 2.598190429133598e-05,
"loss": 1.0715608596801758,
"step": 1166
},
{
"epoch": 0.5406532314107019,
"grad_norm": 0.7890625,
"learning_rate": 2.5949218370806967e-05,
"loss": 0.9926679730415344,
"step": 1167
},
{
"epoch": 0.5411165160991429,
"grad_norm": 0.703125,
"learning_rate": 2.591652881043848e-05,
"loss": 0.9041029810905457,
"step": 1168
},
{
"epoch": 0.5415798007875839,
"grad_norm": 0.72265625,
"learning_rate": 2.5883835671272798e-05,
"loss": 0.8764104843139648,
"step": 1169
},
{
"epoch": 0.542043085476025,
"grad_norm": 0.83203125,
"learning_rate": 2.5851139014358903e-05,
"loss": 0.8071422576904297,
"step": 1170
},
{
"epoch": 0.542506370164466,
"grad_norm": 0.76171875,
"learning_rate": 2.581843890075233e-05,
"loss": 1.064335584640503,
"step": 1171
},
{
"epoch": 0.5429696548529072,
"grad_norm": 0.8125,
"learning_rate": 2.5785735391515083e-05,
"loss": 0.9307787418365479,
"step": 1172
},
{
"epoch": 0.5434329395413482,
"grad_norm": 0.73046875,
"learning_rate": 2.5753028547715487e-05,
"loss": 0.8384194374084473,
"step": 1173
},
{
"epoch": 0.5438962242297892,
"grad_norm": 0.78515625,
"learning_rate": 2.572031843042813e-05,
"loss": 0.9470226764678955,
"step": 1174
},
{
"epoch": 0.5443595089182303,
"grad_norm": 0.8671875,
"learning_rate": 2.5687605100733652e-05,
"loss": 1.0152946710586548,
"step": 1175
},
{
"epoch": 0.5448227936066713,
"grad_norm": 0.8046875,
"learning_rate": 2.5654888619718762e-05,
"loss": 0.8778582811355591,
"step": 1176
},
{
"epoch": 0.5452860782951123,
"grad_norm": 0.80078125,
"learning_rate": 2.5622169048475996e-05,
"loss": 1.002034306526184,
"step": 1177
},
{
"epoch": 0.5457493629835534,
"grad_norm": 0.77734375,
"learning_rate": 2.5589446448103687e-05,
"loss": 1.00825834274292,
"step": 1178
},
{
"epoch": 0.5462126476719944,
"grad_norm": 0.734375,
"learning_rate": 2.5556720879705834e-05,
"loss": 0.8981512784957886,
"step": 1179
},
{
"epoch": 0.5466759323604354,
"grad_norm": 1.1484375,
"learning_rate": 2.552399240439195e-05,
"loss": 0.9638339281082153,
"step": 1180
},
{
"epoch": 0.5471392170488766,
"grad_norm": 0.82421875,
"learning_rate": 2.5491261083277014e-05,
"loss": 0.832276463508606,
"step": 1181
},
{
"epoch": 0.5476025017373176,
"grad_norm": 0.8046875,
"learning_rate": 2.545852697748127e-05,
"loss": 0.7502810955047607,
"step": 1182
},
{
"epoch": 0.5480657864257586,
"grad_norm": 0.66796875,
"learning_rate": 2.5425790148130212e-05,
"loss": 0.8867089748382568,
"step": 1183
},
{
"epoch": 0.5485290711141997,
"grad_norm": 0.7421875,
"learning_rate": 2.5393050656354378e-05,
"loss": 1.0290521383285522,
"step": 1184
},
{
"epoch": 0.5489923558026407,
"grad_norm": 0.828125,
"learning_rate": 2.5360308563289304e-05,
"loss": 0.9290564060211182,
"step": 1185
},
{
"epoch": 0.5494556404910818,
"grad_norm": 0.84765625,
"learning_rate": 2.5327563930075376e-05,
"loss": 1.1053215265274048,
"step": 1186
},
{
"epoch": 0.5499189251795228,
"grad_norm": 0.9140625,
"learning_rate": 2.5294816817857718e-05,
"loss": 0.9043922424316406,
"step": 1187
},
{
"epoch": 0.5503822098679638,
"grad_norm": 0.8359375,
"learning_rate": 2.5262067287786105e-05,
"loss": 0.8395538926124573,
"step": 1188
},
{
"epoch": 0.5508454945564049,
"grad_norm": 0.9296875,
"learning_rate": 2.5229315401014793e-05,
"loss": 0.9732888340950012,
"step": 1189
},
{
"epoch": 0.551308779244846,
"grad_norm": 0.90625,
"learning_rate": 2.5196561218702462e-05,
"loss": 0.9241658449172974,
"step": 1190
},
{
"epoch": 0.551772063933287,
"grad_norm": 0.82421875,
"learning_rate": 2.5163804802012067e-05,
"loss": 0.810673713684082,
"step": 1191
},
{
"epoch": 0.5522353486217281,
"grad_norm": 0.84765625,
"learning_rate": 2.5131046212110748e-05,
"loss": 0.9831432700157166,
"step": 1192
},
{
"epoch": 0.5526986333101691,
"grad_norm": 0.8359375,
"learning_rate": 2.509828551016969e-05,
"loss": 0.9300224781036377,
"step": 1193
},
{
"epoch": 0.5531619179986101,
"grad_norm": 0.87890625,
"learning_rate": 2.5065522757364026e-05,
"loss": 1.0542097091674805,
"step": 1194
},
{
"epoch": 0.5536252026870512,
"grad_norm": 0.890625,
"learning_rate": 2.5032758014872737e-05,
"loss": 1.0396784543991089,
"step": 1195
},
{
"epoch": 0.5540884873754922,
"grad_norm": 0.765625,
"learning_rate": 2.4999991343878476e-05,
"loss": 0.999907910823822,
"step": 1196
},
{
"epoch": 0.5545517720639332,
"grad_norm": 0.875,
"learning_rate": 2.496722280556755e-05,
"loss": 1.0439246892929077,
"step": 1197
},
{
"epoch": 0.5550150567523743,
"grad_norm": 0.8984375,
"learning_rate": 2.4934452461129705e-05,
"loss": 1.1016960144042969,
"step": 1198
},
{
"epoch": 0.5554783414408154,
"grad_norm": 0.87109375,
"learning_rate": 2.49016803717581e-05,
"loss": 1.0953267812728882,
"step": 1199
},
{
"epoch": 0.5559416261292565,
"grad_norm": 0.78125,
"learning_rate": 2.486890659864912e-05,
"loss": 1.0697580575942993,
"step": 1200
},
{
"epoch": 0.5564049108176975,
"grad_norm": 0.77734375,
"learning_rate": 2.4836131203002307e-05,
"loss": 0.9801178574562073,
"step": 1201
},
{
"epoch": 0.5568681955061385,
"grad_norm": 0.83203125,
"learning_rate": 2.4803354246020246e-05,
"loss": 0.8415432572364807,
"step": 1202
},
{
"epoch": 0.5573314801945796,
"grad_norm": 0.88671875,
"learning_rate": 2.4770575788908413e-05,
"loss": 0.8861055374145508,
"step": 1203
},
{
"epoch": 0.5577947648830206,
"grad_norm": 0.77734375,
"learning_rate": 2.4737795892875114e-05,
"loss": 0.8379161357879639,
"step": 1204
},
{
"epoch": 0.5582580495714616,
"grad_norm": 0.76171875,
"learning_rate": 2.4705014619131302e-05,
"loss": 0.7862236499786377,
"step": 1205
},
{
"epoch": 0.5587213342599027,
"grad_norm": 0.921875,
"learning_rate": 2.4672232028890544e-05,
"loss": 1.1108901500701904,
"step": 1206
},
{
"epoch": 0.5591846189483437,
"grad_norm": 0.91015625,
"learning_rate": 2.4639448183368848e-05,
"loss": 0.9187421798706055,
"step": 1207
},
{
"epoch": 0.5596479036367848,
"grad_norm": 0.80859375,
"learning_rate": 2.4606663143784546e-05,
"loss": 0.9021953344345093,
"step": 1208
},
{
"epoch": 0.5601111883252259,
"grad_norm": 1.0546875,
"learning_rate": 2.457387697135824e-05,
"loss": 0.8863352537155151,
"step": 1209
},
{
"epoch": 0.5605744730136669,
"grad_norm": 0.7421875,
"learning_rate": 2.454108972731261e-05,
"loss": 0.9452582597732544,
"step": 1210
},
{
"epoch": 0.561037757702108,
"grad_norm": 0.859375,
"learning_rate": 2.4508301472872372e-05,
"loss": 0.9802470803260803,
"step": 1211
},
{
"epoch": 0.561501042390549,
"grad_norm": 0.828125,
"learning_rate": 2.4475512269264102e-05,
"loss": 1.040086269378662,
"step": 1212
},
{
"epoch": 0.56196432707899,
"grad_norm": 0.8125,
"learning_rate": 2.4442722177716155e-05,
"loss": 0.8640920519828796,
"step": 1213
},
{
"epoch": 0.5624276117674311,
"grad_norm": 0.96484375,
"learning_rate": 2.4409931259458534e-05,
"loss": 1.000259518623352,
"step": 1214
},
{
"epoch": 0.5628908964558721,
"grad_norm": 0.88671875,
"learning_rate": 2.4377139575722816e-05,
"loss": 1.037365198135376,
"step": 1215
},
{
"epoch": 0.5633541811443131,
"grad_norm": 0.84375,
"learning_rate": 2.4344347187741992e-05,
"loss": 0.8693150281906128,
"step": 1216
},
{
"epoch": 0.5638174658327543,
"grad_norm": 1.09375,
"learning_rate": 2.4311554156750342e-05,
"loss": 1.0040010213851929,
"step": 1217
},
{
"epoch": 0.5642807505211953,
"grad_norm": 0.87890625,
"learning_rate": 2.427876054398339e-05,
"loss": 1.1392626762390137,
"step": 1218
},
{
"epoch": 0.5647440352096363,
"grad_norm": 1.2265625,
"learning_rate": 2.424596641067772e-05,
"loss": 1.005986213684082,
"step": 1219
},
{
"epoch": 0.5652073198980774,
"grad_norm": 0.890625,
"learning_rate": 2.4213171818070896e-05,
"loss": 0.8679553270339966,
"step": 1220
},
{
"epoch": 0.5656706045865184,
"grad_norm": 0.97265625,
"learning_rate": 2.4180376827401326e-05,
"loss": 1.0555665493011475,
"step": 1221
},
{
"epoch": 0.5661338892749594,
"grad_norm": 0.7421875,
"learning_rate": 2.4147581499908194e-05,
"loss": 0.7991371154785156,
"step": 1222
},
{
"epoch": 0.5665971739634005,
"grad_norm": 1.546875,
"learning_rate": 2.4114785896831272e-05,
"loss": 1.079115390777588,
"step": 1223
},
{
"epoch": 0.5670604586518415,
"grad_norm": 0.7890625,
"learning_rate": 2.4081990079410877e-05,
"loss": 0.9234097599983215,
"step": 1224
},
{
"epoch": 0.5675237433402825,
"grad_norm": 0.79296875,
"learning_rate": 2.4049194108887712e-05,
"loss": 0.8750408291816711,
"step": 1225
},
{
"epoch": 0.5679870280287237,
"grad_norm": 0.8359375,
"learning_rate": 2.401639804650277e-05,
"loss": 0.959878146648407,
"step": 1226
},
{
"epoch": 0.5684503127171647,
"grad_norm": 1.0234375,
"learning_rate": 2.398360195349723e-05,
"loss": 0.8520339131355286,
"step": 1227
},
{
"epoch": 0.5689135974056058,
"grad_norm": 1.1484375,
"learning_rate": 2.3950805891112282e-05,
"loss": 0.9484376907348633,
"step": 1228
},
{
"epoch": 0.5693768820940468,
"grad_norm": 0.796875,
"learning_rate": 2.391800992058913e-05,
"loss": 1.069106101989746,
"step": 1229
},
{
"epoch": 0.5698401667824878,
"grad_norm": 0.76171875,
"learning_rate": 2.3885214103168733e-05,
"loss": 0.8645256161689758,
"step": 1230
},
{
"epoch": 0.5703034514709289,
"grad_norm": 0.74609375,
"learning_rate": 2.3852418500091814e-05,
"loss": 0.8843850493431091,
"step": 1231
},
{
"epoch": 0.5707667361593699,
"grad_norm": 0.77734375,
"learning_rate": 2.3819623172598675e-05,
"loss": 0.848798394203186,
"step": 1232
},
{
"epoch": 0.5712300208478109,
"grad_norm": 0.75390625,
"learning_rate": 2.378682818192911e-05,
"loss": 0.867169201374054,
"step": 1233
},
{
"epoch": 0.571693305536252,
"grad_norm": 0.87109375,
"learning_rate": 2.375403358932229e-05,
"loss": 0.9214097857475281,
"step": 1234
},
{
"epoch": 0.5721565902246931,
"grad_norm": 1.0234375,
"learning_rate": 2.372123945601661e-05,
"loss": 1.0255508422851562,
"step": 1235
},
{
"epoch": 0.5726198749131342,
"grad_norm": 0.83203125,
"learning_rate": 2.3688445843249663e-05,
"loss": 1.0348937511444092,
"step": 1236
},
{
"epoch": 0.5730831596015752,
"grad_norm": 0.83203125,
"learning_rate": 2.3655652812258016e-05,
"loss": 0.9052950143814087,
"step": 1237
},
{
"epoch": 0.5735464442900162,
"grad_norm": 0.8046875,
"learning_rate": 2.3622860424277185e-05,
"loss": 1.0120303630828857,
"step": 1238
},
{
"epoch": 0.5740097289784573,
"grad_norm": 0.8984375,
"learning_rate": 2.359006874054147e-05,
"loss": 1.0887019634246826,
"step": 1239
},
{
"epoch": 0.5744730136668983,
"grad_norm": 0.921875,
"learning_rate": 2.3557277822283853e-05,
"loss": 0.9740076065063477,
"step": 1240
},
{
"epoch": 0.5749362983553393,
"grad_norm": 0.83984375,
"learning_rate": 2.352448773073591e-05,
"loss": 0.9325964450836182,
"step": 1241
},
{
"epoch": 0.5753995830437804,
"grad_norm": 0.8828125,
"learning_rate": 2.3491698527127626e-05,
"loss": 0.9349949955940247,
"step": 1242
},
{
"epoch": 0.5758628677322214,
"grad_norm": 0.984375,
"learning_rate": 2.345891027268739e-05,
"loss": 0.8547556400299072,
"step": 1243
},
{
"epoch": 0.5763261524206625,
"grad_norm": 0.87109375,
"learning_rate": 2.3426123028641764e-05,
"loss": 0.8496311902999878,
"step": 1244
},
{
"epoch": 0.5767894371091036,
"grad_norm": 0.828125,
"learning_rate": 2.339333685621546e-05,
"loss": 0.8979066610336304,
"step": 1245
},
{
"epoch": 0.5772527217975446,
"grad_norm": 0.87890625,
"learning_rate": 2.3360551816631163e-05,
"loss": 0.9577649831771851,
"step": 1246
},
{
"epoch": 0.5777160064859856,
"grad_norm": 0.921875,
"learning_rate": 2.3327767971109457e-05,
"loss": 0.844273567199707,
"step": 1247
},
{
"epoch": 0.5781792911744267,
"grad_norm": 0.890625,
"learning_rate": 2.32949853808687e-05,
"loss": 0.9688032269477844,
"step": 1248
},
{
"epoch": 0.5786425758628677,
"grad_norm": 0.828125,
"learning_rate": 2.326220410712489e-05,
"loss": 0.8966050148010254,
"step": 1249
},
{
"epoch": 0.5791058605513087,
"grad_norm": 0.78125,
"learning_rate": 2.322942421109159e-05,
"loss": 0.8237859010696411,
"step": 1250
},
{
"epoch": 0.5795691452397498,
"grad_norm": 0.8515625,
"learning_rate": 2.319664575397975e-05,
"loss": 0.998030424118042,
"step": 1251
},
{
"epoch": 0.5800324299281908,
"grad_norm": 0.66796875,
"learning_rate": 2.3163868796997697e-05,
"loss": 0.8741839528083801,
"step": 1252
},
{
"epoch": 0.580495714616632,
"grad_norm": 0.79296875,
"learning_rate": 2.3131093401350886e-05,
"loss": 0.8840314745903015,
"step": 1253
},
{
"epoch": 0.580958999305073,
"grad_norm": 0.8046875,
"learning_rate": 2.309831962824191e-05,
"loss": 0.8226364850997925,
"step": 1254
},
{
"epoch": 0.581422283993514,
"grad_norm": 0.7734375,
"learning_rate": 2.30655475388703e-05,
"loss": 1.046274185180664,
"step": 1255
},
{
"epoch": 0.5818855686819551,
"grad_norm": 0.76171875,
"learning_rate": 2.3032777194432454e-05,
"loss": 0.962317943572998,
"step": 1256
},
{
"epoch": 0.5823488533703961,
"grad_norm": 0.8203125,
"learning_rate": 2.3000008656121528e-05,
"loss": 1.0184528827667236,
"step": 1257
},
{
"epoch": 0.5828121380588371,
"grad_norm": 0.7578125,
"learning_rate": 2.2967241985127264e-05,
"loss": 0.9843693375587463,
"step": 1258
},
{
"epoch": 0.5832754227472782,
"grad_norm": 0.765625,
"learning_rate": 2.2934477242635975e-05,
"loss": 0.8737512826919556,
"step": 1259
},
{
"epoch": 0.5837387074357192,
"grad_norm": 0.78515625,
"learning_rate": 2.2901714489830313e-05,
"loss": 0.9993818998336792,
"step": 1260
},
{
"epoch": 0.5842019921241604,
"grad_norm": 0.76171875,
"learning_rate": 2.2868953787889257e-05,
"loss": 0.8710107207298279,
"step": 1261
},
{
"epoch": 0.5846652768126014,
"grad_norm": 0.765625,
"learning_rate": 2.283619519798794e-05,
"loss": 0.9011490345001221,
"step": 1262
},
{
"epoch": 0.5851285615010424,
"grad_norm": 0.8671875,
"learning_rate": 2.2803438781297542e-05,
"loss": 1.043053150177002,
"step": 1263
},
{
"epoch": 0.5855918461894835,
"grad_norm": 0.8515625,
"learning_rate": 2.2770684598985215e-05,
"loss": 0.9350489377975464,
"step": 1264
},
{
"epoch": 0.5860551308779245,
"grad_norm": 0.8125,
"learning_rate": 2.2737932712213897e-05,
"loss": 0.9060037136077881,
"step": 1265
},
{
"epoch": 0.5865184155663655,
"grad_norm": 0.796875,
"learning_rate": 2.2705183182142283e-05,
"loss": 1.0310790538787842,
"step": 1266
},
{
"epoch": 0.5869817002548066,
"grad_norm": 0.90234375,
"learning_rate": 2.2672436069924625e-05,
"loss": 1.060436725616455,
"step": 1267
},
{
"epoch": 0.5874449849432476,
"grad_norm": 0.8359375,
"learning_rate": 2.2639691436710704e-05,
"loss": 0.9399782419204712,
"step": 1268
},
{
"epoch": 0.5879082696316886,
"grad_norm": 0.67578125,
"learning_rate": 2.2606949343645633e-05,
"loss": 0.7602555155754089,
"step": 1269
},
{
"epoch": 0.5883715543201298,
"grad_norm": 0.8203125,
"learning_rate": 2.2574209851869796e-05,
"loss": 0.8838884830474854,
"step": 1270
},
{
"epoch": 0.5888348390085708,
"grad_norm": 0.7578125,
"learning_rate": 2.2541473022518734e-05,
"loss": 0.9206914305686951,
"step": 1271
},
{
"epoch": 0.5892981236970118,
"grad_norm": 0.96484375,
"learning_rate": 2.250873891672299e-05,
"loss": 1.0698550939559937,
"step": 1272
},
{
"epoch": 0.5897614083854529,
"grad_norm": 0.95703125,
"learning_rate": 2.247600759560805e-05,
"loss": 1.186781406402588,
"step": 1273
},
{
"epoch": 0.5902246930738939,
"grad_norm": 0.88671875,
"learning_rate": 2.2443279120294167e-05,
"loss": 0.859512209892273,
"step": 1274
},
{
"epoch": 0.590687977762335,
"grad_norm": 1.0,
"learning_rate": 2.2410553551896318e-05,
"loss": 1.1249911785125732,
"step": 1275
},
{
"epoch": 0.591151262450776,
"grad_norm": 1.015625,
"learning_rate": 2.2377830951524016e-05,
"loss": 0.8921389579772949,
"step": 1276
},
{
"epoch": 0.591614547139217,
"grad_norm": 0.7578125,
"learning_rate": 2.2345111380281246e-05,
"loss": 1.017206072807312,
"step": 1277
},
{
"epoch": 0.592077831827658,
"grad_norm": 0.85546875,
"learning_rate": 2.231239489926635e-05,
"loss": 0.8455703258514404,
"step": 1278
},
{
"epoch": 0.5925411165160992,
"grad_norm": 0.82421875,
"learning_rate": 2.2279681569571873e-05,
"loss": 1.062213659286499,
"step": 1279
},
{
"epoch": 0.5930044012045402,
"grad_norm": 0.8515625,
"learning_rate": 2.2246971452284514e-05,
"loss": 0.9728783965110779,
"step": 1280
},
{
"epoch": 0.5934676858929813,
"grad_norm": 0.76171875,
"learning_rate": 2.221426460848491e-05,
"loss": 0.9838144779205322,
"step": 1281
},
{
"epoch": 0.5939309705814223,
"grad_norm": 0.8125,
"learning_rate": 2.218156109924767e-05,
"loss": 0.996467649936676,
"step": 1282
},
{
"epoch": 0.5943942552698633,
"grad_norm": 0.83984375,
"learning_rate": 2.2148860985641098e-05,
"loss": 0.9236133694648743,
"step": 1283
},
{
"epoch": 0.5948575399583044,
"grad_norm": 0.92578125,
"learning_rate": 2.2116164328727207e-05,
"loss": 0.9230520129203796,
"step": 1284
},
{
"epoch": 0.5953208246467454,
"grad_norm": 0.8515625,
"learning_rate": 2.2083471189561532e-05,
"loss": 1.163870096206665,
"step": 1285
},
{
"epoch": 0.5957841093351864,
"grad_norm": 0.8359375,
"learning_rate": 2.2050781629193035e-05,
"loss": 0.9241350293159485,
"step": 1286
},
{
"epoch": 0.5962473940236275,
"grad_norm": 0.81640625,
"learning_rate": 2.2018095708664025e-05,
"loss": 0.9975556135177612,
"step": 1287
},
{
"epoch": 0.5967106787120686,
"grad_norm": 0.96875,
"learning_rate": 2.198541348900998e-05,
"loss": 0.9642297625541687,
"step": 1288
},
{
"epoch": 0.5971739634005097,
"grad_norm": 0.74609375,
"learning_rate": 2.19527350312595e-05,
"loss": 1.0175257921218872,
"step": 1289
},
{
"epoch": 0.5976372480889507,
"grad_norm": 0.84765625,
"learning_rate": 2.1920060396434114e-05,
"loss": 0.8721986413002014,
"step": 1290
},
{
"epoch": 0.5981005327773917,
"grad_norm": 0.8359375,
"learning_rate": 2.188738964554827e-05,
"loss": 0.9678272008895874,
"step": 1291
},
{
"epoch": 0.5985638174658328,
"grad_norm": 0.91015625,
"learning_rate": 2.185472283960912e-05,
"loss": 0.8769973516464233,
"step": 1292
},
{
"epoch": 0.5990271021542738,
"grad_norm": 0.84765625,
"learning_rate": 2.182206003961646e-05,
"loss": 0.8562041521072388,
"step": 1293
},
{
"epoch": 0.5994903868427148,
"grad_norm": 0.765625,
"learning_rate": 2.1789401306562614e-05,
"loss": 0.9333268404006958,
"step": 1294
},
{
"epoch": 0.5999536715311559,
"grad_norm": 0.76171875,
"learning_rate": 2.1756746701432304e-05,
"loss": 1.0222218036651611,
"step": 1295
},
{
"epoch": 0.6004169562195969,
"grad_norm": 0.80859375,
"learning_rate": 2.172409628520256e-05,
"loss": 0.9753302931785583,
"step": 1296
},
{
"epoch": 0.600880240908038,
"grad_norm": 0.78125,
"learning_rate": 2.169145011884255e-05,
"loss": 0.7853343486785889,
"step": 1297
},
{
"epoch": 0.6013435255964791,
"grad_norm": 0.890625,
"learning_rate": 2.1658808263313563e-05,
"loss": 0.9388110637664795,
"step": 1298
},
{
"epoch": 0.6018068102849201,
"grad_norm": 0.8203125,
"learning_rate": 2.162617077956879e-05,
"loss": 1.0297507047653198,
"step": 1299
},
{
"epoch": 0.6022700949733611,
"grad_norm": 0.890625,
"learning_rate": 2.1593537728553272e-05,
"loss": 1.123502492904663,
"step": 1300
},
{
"epoch": 0.6027333796618022,
"grad_norm": 0.85546875,
"learning_rate": 2.156090917120379e-05,
"loss": 0.987675130367279,
"step": 1301
},
{
"epoch": 0.6031966643502432,
"grad_norm": 0.8046875,
"learning_rate": 2.152828516844871e-05,
"loss": 0.9241386651992798,
"step": 1302
},
{
"epoch": 0.6036599490386843,
"grad_norm": 0.765625,
"learning_rate": 2.149566578120792e-05,
"loss": 1.0078705549240112,
"step": 1303
},
{
"epoch": 0.6041232337271253,
"grad_norm": 0.80078125,
"learning_rate": 2.1463051070392643e-05,
"loss": 1.0738126039505005,
"step": 1304
},
{
"epoch": 0.6045865184155663,
"grad_norm": 0.75390625,
"learning_rate": 2.1430441096905437e-05,
"loss": 0.9489339590072632,
"step": 1305
},
{
"epoch": 0.6050498031040075,
"grad_norm": 0.9375,
"learning_rate": 2.139783592163994e-05,
"loss": 0.7799573540687561,
"step": 1306
},
{
"epoch": 0.6055130877924485,
"grad_norm": 0.80859375,
"learning_rate": 2.13652356054809e-05,
"loss": 0.957494854927063,
"step": 1307
},
{
"epoch": 0.6059763724808895,
"grad_norm": 0.89453125,
"learning_rate": 2.1332640209303938e-05,
"loss": 0.939851701259613,
"step": 1308
},
{
"epoch": 0.6064396571693306,
"grad_norm": 0.70703125,
"learning_rate": 2.1300049793975505e-05,
"loss": 0.8805224895477295,
"step": 1309
},
{
"epoch": 0.6069029418577716,
"grad_norm": 0.79296875,
"learning_rate": 2.126746442035277e-05,
"loss": 0.8852795958518982,
"step": 1310
},
{
"epoch": 0.6073662265462126,
"grad_norm": 1.046875,
"learning_rate": 2.1234884149283453e-05,
"loss": 1.1347432136535645,
"step": 1311
},
{
"epoch": 0.6078295112346537,
"grad_norm": 0.8515625,
"learning_rate": 2.1202309041605784e-05,
"loss": 0.8818974494934082,
"step": 1312
},
{
"epoch": 0.6082927959230947,
"grad_norm": 0.90625,
"learning_rate": 2.1169739158148307e-05,
"loss": 1.2017409801483154,
"step": 1313
},
{
"epoch": 0.6087560806115357,
"grad_norm": 0.73828125,
"learning_rate": 2.113717455972986e-05,
"loss": 0.8007137179374695,
"step": 1314
},
{
"epoch": 0.6092193652999769,
"grad_norm": 0.85546875,
"learning_rate": 2.1104615307159377e-05,
"loss": 1.1176743507385254,
"step": 1315
},
{
"epoch": 0.6096826499884179,
"grad_norm": 0.72265625,
"learning_rate": 2.1072061461235807e-05,
"loss": 1.1391681432724,
"step": 1316
},
{
"epoch": 0.610145934676859,
"grad_norm": 1.0625,
"learning_rate": 2.1039513082748037e-05,
"loss": 1.0258846282958984,
"step": 1317
},
{
"epoch": 0.6106092193653,
"grad_norm": 0.7578125,
"learning_rate": 2.1006970232474706e-05,
"loss": 0.8150526881217957,
"step": 1318
},
{
"epoch": 0.611072504053741,
"grad_norm": 0.796875,
"learning_rate": 2.0974432971184163e-05,
"loss": 0.9746679067611694,
"step": 1319
},
{
"epoch": 0.6115357887421821,
"grad_norm": 0.74609375,
"learning_rate": 2.0941901359634282e-05,
"loss": 0.9087226390838623,
"step": 1320
},
{
"epoch": 0.6119990734306231,
"grad_norm": 0.75,
"learning_rate": 2.0909375458572435e-05,
"loss": 0.7601380348205566,
"step": 1321
},
{
"epoch": 0.6124623581190641,
"grad_norm": 0.71875,
"learning_rate": 2.0876855328735293e-05,
"loss": 0.8713633418083191,
"step": 1322
},
{
"epoch": 0.6129256428075052,
"grad_norm": 0.9296875,
"learning_rate": 2.0844341030848755e-05,
"loss": 0.8106034398078918,
"step": 1323
},
{
"epoch": 0.6133889274959463,
"grad_norm": 0.890625,
"learning_rate": 2.0811832625627853e-05,
"loss": 0.9880518913269043,
"step": 1324
},
{
"epoch": 0.6138522121843873,
"grad_norm": 1.0,
"learning_rate": 2.0779330173776586e-05,
"loss": 1.0225927829742432,
"step": 1325
},
{
"epoch": 0.6143154968728284,
"grad_norm": 0.80078125,
"learning_rate": 2.074683373598786e-05,
"loss": 0.8777214288711548,
"step": 1326
},
{
"epoch": 0.6147787815612694,
"grad_norm": 0.81640625,
"learning_rate": 2.0714343372943328e-05,
"loss": 0.9182271957397461,
"step": 1327
},
{
"epoch": 0.6152420662497105,
"grad_norm": 0.87109375,
"learning_rate": 2.0681859145313334e-05,
"loss": 1.0506292581558228,
"step": 1328
},
{
"epoch": 0.6157053509381515,
"grad_norm": 0.78125,
"learning_rate": 2.0649381113756725e-05,
"loss": 0.9353680610656738,
"step": 1329
},
{
"epoch": 0.6161686356265925,
"grad_norm": 0.85546875,
"learning_rate": 2.0616909338920793e-05,
"loss": 0.9634753465652466,
"step": 1330
},
{
"epoch": 0.6166319203150336,
"grad_norm": 0.77734375,
"learning_rate": 2.058444388144116e-05,
"loss": 0.8841714859008789,
"step": 1331
},
{
"epoch": 0.6170952050034746,
"grad_norm": 0.7265625,
"learning_rate": 2.0551984801941635e-05,
"loss": 0.8491650819778442,
"step": 1332
},
{
"epoch": 0.6175584896919157,
"grad_norm": 0.86328125,
"learning_rate": 2.051953216103413e-05,
"loss": 1.1438350677490234,
"step": 1333
},
{
"epoch": 0.6180217743803568,
"grad_norm": 0.80859375,
"learning_rate": 2.048708601931852e-05,
"loss": 0.9793174862861633,
"step": 1334
},
{
"epoch": 0.6184850590687978,
"grad_norm": 0.9375,
"learning_rate": 2.0454646437382557e-05,
"loss": 0.933502197265625,
"step": 1335
},
{
"epoch": 0.6189483437572388,
"grad_norm": 0.734375,
"learning_rate": 2.0422213475801728e-05,
"loss": 1.0236554145812988,
"step": 1336
},
{
"epoch": 0.6194116284456799,
"grad_norm": 0.984375,
"learning_rate": 2.0389787195139183e-05,
"loss": 1.0013253688812256,
"step": 1337
},
{
"epoch": 0.6198749131341209,
"grad_norm": 0.84375,
"learning_rate": 2.035736765594557e-05,
"loss": 0.8655804991722107,
"step": 1338
},
{
"epoch": 0.6203381978225619,
"grad_norm": 0.8515625,
"learning_rate": 2.0324954918758952e-05,
"loss": 0.9141555428504944,
"step": 1339
},
{
"epoch": 0.620801482511003,
"grad_norm": 0.79296875,
"learning_rate": 2.0292549044104714e-05,
"loss": 0.8629526495933533,
"step": 1340
},
{
"epoch": 0.621264767199444,
"grad_norm": 0.84765625,
"learning_rate": 2.0260150092495398e-05,
"loss": 0.8775175213813782,
"step": 1341
},
{
"epoch": 0.6217280518878852,
"grad_norm": 0.75390625,
"learning_rate": 2.0227758124430642e-05,
"loss": 0.9920527935028076,
"step": 1342
},
{
"epoch": 0.6221913365763262,
"grad_norm": 0.8359375,
"learning_rate": 2.019537320039701e-05,
"loss": 0.9502109289169312,
"step": 1343
},
{
"epoch": 0.6226546212647672,
"grad_norm": 0.73828125,
"learning_rate": 2.0162995380867968e-05,
"loss": 0.9974714517593384,
"step": 1344
},
{
"epoch": 0.6231179059532083,
"grad_norm": 0.890625,
"learning_rate": 2.0130624726303653e-05,
"loss": 0.8903458714485168,
"step": 1345
},
{
"epoch": 0.6235811906416493,
"grad_norm": 0.79296875,
"learning_rate": 2.0098261297150857e-05,
"loss": 0.8124538660049438,
"step": 1346
},
{
"epoch": 0.6240444753300903,
"grad_norm": 0.94140625,
"learning_rate": 2.0065905153842885e-05,
"loss": 0.925495982170105,
"step": 1347
},
{
"epoch": 0.6245077600185314,
"grad_norm": 0.796875,
"learning_rate": 2.0033556356799412e-05,
"loss": 1.0063080787658691,
"step": 1348
},
{
"epoch": 0.6249710447069724,
"grad_norm": 0.78125,
"learning_rate": 2.0001214966426418e-05,
"loss": 0.857069730758667,
"step": 1349
},
{
"epoch": 0.6254343293954134,
"grad_norm": 0.8046875,
"learning_rate": 1.9968881043116043e-05,
"loss": 1.0339258909225464,
"step": 1350
},
{
"epoch": 0.6258976140838546,
"grad_norm": 0.84375,
"learning_rate": 1.993655464724649e-05,
"loss": 0.8328740000724792,
"step": 1351
},
{
"epoch": 0.6263608987722956,
"grad_norm": 0.75390625,
"learning_rate": 1.9904235839181884e-05,
"loss": 1.000057578086853,
"step": 1352
},
{
"epoch": 0.6268241834607367,
"grad_norm": 0.84765625,
"learning_rate": 1.98719246792722e-05,
"loss": 0.9434694647789001,
"step": 1353
},
{
"epoch": 0.6272874681491777,
"grad_norm": 0.84765625,
"learning_rate": 1.983962122785314e-05,
"loss": 0.8397997617721558,
"step": 1354
},
{
"epoch": 0.6277507528376187,
"grad_norm": 0.90625,
"learning_rate": 1.9807325545245985e-05,
"loss": 1.1510478258132935,
"step": 1355
},
{
"epoch": 0.6282140375260598,
"grad_norm": 0.8203125,
"learning_rate": 1.9775037691757538e-05,
"loss": 0.9587162733078003,
"step": 1356
},
{
"epoch": 0.6286773222145008,
"grad_norm": 0.984375,
"learning_rate": 1.9742757727679956e-05,
"loss": 0.8667926788330078,
"step": 1357
},
{
"epoch": 0.6291406069029418,
"grad_norm": 0.7890625,
"learning_rate": 1.971048571329069e-05,
"loss": 0.8282247185707092,
"step": 1358
},
{
"epoch": 0.6296038915913829,
"grad_norm": 0.94921875,
"learning_rate": 1.967822170885231e-05,
"loss": 1.0788094997406006,
"step": 1359
},
{
"epoch": 0.630067176279824,
"grad_norm": 0.8046875,
"learning_rate": 1.964596577461248e-05,
"loss": 0.9873729348182678,
"step": 1360
},
{
"epoch": 0.630530460968265,
"grad_norm": 0.8046875,
"learning_rate": 1.9613717970803744e-05,
"loss": 0.8831279873847961,
"step": 1361
},
{
"epoch": 0.6309937456567061,
"grad_norm": 0.7890625,
"learning_rate": 1.9581478357643482e-05,
"loss": 0.9821964502334595,
"step": 1362
},
{
"epoch": 0.6314570303451471,
"grad_norm": 0.83984375,
"learning_rate": 1.9549246995333805e-05,
"loss": 0.9710639119148254,
"step": 1363
},
{
"epoch": 0.6319203150335881,
"grad_norm": 0.828125,
"learning_rate": 1.951702394406137e-05,
"loss": 0.8819524645805359,
"step": 1364
},
{
"epoch": 0.6323835997220292,
"grad_norm": 0.765625,
"learning_rate": 1.948480926399736e-05,
"loss": 0.8822925090789795,
"step": 1365
},
{
"epoch": 0.6328468844104702,
"grad_norm": 0.87109375,
"learning_rate": 1.9452603015297282e-05,
"loss": 0.9902225136756897,
"step": 1366
},
{
"epoch": 0.6333101690989112,
"grad_norm": 0.87890625,
"learning_rate": 1.942040525810095e-05,
"loss": 0.877446174621582,
"step": 1367
},
{
"epoch": 0.6337734537873523,
"grad_norm": 0.8515625,
"learning_rate": 1.938821605253227e-05,
"loss": 1.0983047485351562,
"step": 1368
},
{
"epoch": 0.6342367384757934,
"grad_norm": 0.88671875,
"learning_rate": 1.9356035458699208e-05,
"loss": 1.289228916168213,
"step": 1369
},
{
"epoch": 0.6347000231642345,
"grad_norm": 0.8984375,
"learning_rate": 1.9323863536693654e-05,
"loss": 0.9632030129432678,
"step": 1370
},
{
"epoch": 0.6351633078526755,
"grad_norm": 0.90625,
"learning_rate": 1.929170034659128e-05,
"loss": 0.8725213408470154,
"step": 1371
},
{
"epoch": 0.6356265925411165,
"grad_norm": 0.91796875,
"learning_rate": 1.925954594845148e-05,
"loss": 1.0300366878509521,
"step": 1372
},
{
"epoch": 0.6360898772295576,
"grad_norm": 0.80859375,
"learning_rate": 1.9227400402317202e-05,
"loss": 1.045912504196167,
"step": 1373
},
{
"epoch": 0.6365531619179986,
"grad_norm": 0.72265625,
"learning_rate": 1.9195263768214903e-05,
"loss": 0.9979844093322754,
"step": 1374
},
{
"epoch": 0.6370164466064396,
"grad_norm": 0.765625,
"learning_rate": 1.9163136106154358e-05,
"loss": 0.8099144697189331,
"step": 1375
},
{
"epoch": 0.6374797312948807,
"grad_norm": 0.89453125,
"learning_rate": 1.9131017476128604e-05,
"loss": 0.9197705388069153,
"step": 1376
},
{
"epoch": 0.6379430159833217,
"grad_norm": 0.72265625,
"learning_rate": 1.9098907938113824e-05,
"loss": 0.8932551741600037,
"step": 1377
},
{
"epoch": 0.6384063006717628,
"grad_norm": 0.88671875,
"learning_rate": 1.90668075520692e-05,
"loss": 0.8447968363761902,
"step": 1378
},
{
"epoch": 0.6388695853602039,
"grad_norm": 1.125,
"learning_rate": 1.9034716377936855e-05,
"loss": 1.2065844535827637,
"step": 1379
},
{
"epoch": 0.6393328700486449,
"grad_norm": 0.87109375,
"learning_rate": 1.9002634475641678e-05,
"loss": 0.8183916211128235,
"step": 1380
},
{
"epoch": 0.639796154737086,
"grad_norm": 0.86328125,
"learning_rate": 1.8970561905091263e-05,
"loss": 0.875165581703186,
"step": 1381
},
{
"epoch": 0.640259439425527,
"grad_norm": 0.7421875,
"learning_rate": 1.8938498726175775e-05,
"loss": 0.9768784642219543,
"step": 1382
},
{
"epoch": 0.640722724113968,
"grad_norm": 0.87109375,
"learning_rate": 1.8906444998767847e-05,
"loss": 0.8940538763999939,
"step": 1383
},
{
"epoch": 0.6411860088024091,
"grad_norm": 0.80859375,
"learning_rate": 1.887440078272245e-05,
"loss": 0.8721079230308533,
"step": 1384
},
{
"epoch": 0.6416492934908501,
"grad_norm": 0.828125,
"learning_rate": 1.884236613787679e-05,
"loss": 0.8797422051429749,
"step": 1385
},
{
"epoch": 0.6421125781792911,
"grad_norm": 0.98046875,
"learning_rate": 1.881034112405023e-05,
"loss": 0.978660523891449,
"step": 1386
},
{
"epoch": 0.6425758628677323,
"grad_norm": 0.73828125,
"learning_rate": 1.8778325801044118e-05,
"loss": 0.9717423319816589,
"step": 1387
},
{
"epoch": 0.6430391475561733,
"grad_norm": 0.8125,
"learning_rate": 1.8746320228641726e-05,
"loss": 0.9432107210159302,
"step": 1388
},
{
"epoch": 0.6435024322446143,
"grad_norm": 0.78515625,
"learning_rate": 1.8714324466608103e-05,
"loss": 0.7963858246803284,
"step": 1389
},
{
"epoch": 0.6439657169330554,
"grad_norm": 0.8359375,
"learning_rate": 1.8682338574690004e-05,
"loss": 1.0017811059951782,
"step": 1390
},
{
"epoch": 0.6444290016214964,
"grad_norm": 0.73046875,
"learning_rate": 1.8650362612615715e-05,
"loss": 0.9127390384674072,
"step": 1391
},
{
"epoch": 0.6448922863099374,
"grad_norm": 0.94140625,
"learning_rate": 1.8618396640095e-05,
"loss": 1.0621755123138428,
"step": 1392
},
{
"epoch": 0.6453555709983785,
"grad_norm": 0.78125,
"learning_rate": 1.8586440716818984e-05,
"loss": 0.8622088432312012,
"step": 1393
},
{
"epoch": 0.6458188556868195,
"grad_norm": 0.75390625,
"learning_rate": 1.8554494902459995e-05,
"loss": 0.8669024705886841,
"step": 1394
},
{
"epoch": 0.6462821403752605,
"grad_norm": 0.734375,
"learning_rate": 1.852255925667152e-05,
"loss": 1.1172361373901367,
"step": 1395
},
{
"epoch": 0.6467454250637017,
"grad_norm": 0.8046875,
"learning_rate": 1.8490633839088025e-05,
"loss": 0.879069447517395,
"step": 1396
},
{
"epoch": 0.6472087097521427,
"grad_norm": 0.765625,
"learning_rate": 1.84587187093249e-05,
"loss": 0.9635634422302246,
"step": 1397
},
{
"epoch": 0.6476719944405838,
"grad_norm": 0.85546875,
"learning_rate": 1.8426813926978312e-05,
"loss": 1.092876672744751,
"step": 1398
},
{
"epoch": 0.6481352791290248,
"grad_norm": 0.71875,
"learning_rate": 1.83949195516251e-05,
"loss": 0.7953818440437317,
"step": 1399
},
{
"epoch": 0.6485985638174658,
"grad_norm": 0.87890625,
"learning_rate": 1.836303564282269e-05,
"loss": 0.9496350288391113,
"step": 1400
},
{
"epoch": 0.6490618485059069,
"grad_norm": 0.796875,
"learning_rate": 1.8331162260108945e-05,
"loss": 0.9301480054855347,
"step": 1401
},
{
"epoch": 0.6495251331943479,
"grad_norm": 0.8515625,
"learning_rate": 1.8299299463002083e-05,
"loss": 0.8314633965492249,
"step": 1402
},
{
"epoch": 0.6499884178827889,
"grad_norm": 0.87890625,
"learning_rate": 1.826744731100055e-05,
"loss": 0.7800553441047668,
"step": 1403
},
{
"epoch": 0.65045170257123,
"grad_norm": 0.86328125,
"learning_rate": 1.823560586358292e-05,
"loss": 0.8455618619918823,
"step": 1404
},
{
"epoch": 0.6509149872596711,
"grad_norm": 0.8203125,
"learning_rate": 1.8203775180207772e-05,
"loss": 0.7878734469413757,
"step": 1405
},
{
"epoch": 0.6513782719481122,
"grad_norm": 0.81640625,
"learning_rate": 1.8171955320313575e-05,
"loss": 0.8364105820655823,
"step": 1406
},
{
"epoch": 0.6518415566365532,
"grad_norm": 0.828125,
"learning_rate": 1.814014634331861e-05,
"loss": 0.9706379771232605,
"step": 1407
},
{
"epoch": 0.6523048413249942,
"grad_norm": 0.82421875,
"learning_rate": 1.8108348308620824e-05,
"loss": 0.9855102300643921,
"step": 1408
},
{
"epoch": 0.6527681260134353,
"grad_norm": 0.7265625,
"learning_rate": 1.8076561275597727e-05,
"loss": 0.8652253746986389,
"step": 1409
},
{
"epoch": 0.6532314107018763,
"grad_norm": 0.85546875,
"learning_rate": 1.8044785303606288e-05,
"loss": 0.8510443568229675,
"step": 1410
},
{
"epoch": 0.6536946953903173,
"grad_norm": 0.8828125,
"learning_rate": 1.8013020451982835e-05,
"loss": 0.9615334868431091,
"step": 1411
},
{
"epoch": 0.6541579800787584,
"grad_norm": 0.82421875,
"learning_rate": 1.7981266780042904e-05,
"loss": 0.8480163812637329,
"step": 1412
},
{
"epoch": 0.6546212647671994,
"grad_norm": 0.81640625,
"learning_rate": 1.7949524347081187e-05,
"loss": 0.9262988567352295,
"step": 1413
},
{
"epoch": 0.6550845494556405,
"grad_norm": 1.078125,
"learning_rate": 1.7917793212371354e-05,
"loss": 1.2862350940704346,
"step": 1414
},
{
"epoch": 0.6555478341440816,
"grad_norm": 0.87109375,
"learning_rate": 1.7886073435165996e-05,
"loss": 1.0281234979629517,
"step": 1415
},
{
"epoch": 0.6560111188325226,
"grad_norm": 0.890625,
"learning_rate": 1.785436507469651e-05,
"loss": 0.8532906770706177,
"step": 1416
},
{
"epoch": 0.6564744035209636,
"grad_norm": 0.95703125,
"learning_rate": 1.7822668190172938e-05,
"loss": 0.8838380575180054,
"step": 1417
},
{
"epoch": 0.6569376882094047,
"grad_norm": 0.86328125,
"learning_rate": 1.779098284078393e-05,
"loss": 0.8856160044670105,
"step": 1418
},
{
"epoch": 0.6574009728978457,
"grad_norm": 0.859375,
"learning_rate": 1.775930908569657e-05,
"loss": 0.8010708093643188,
"step": 1419
},
{
"epoch": 0.6578642575862867,
"grad_norm": 0.91015625,
"learning_rate": 1.772764698405631e-05,
"loss": 0.8682946562767029,
"step": 1420
},
{
"epoch": 0.6583275422747278,
"grad_norm": 0.78515625,
"learning_rate": 1.7695996594986836e-05,
"loss": 1.0009998083114624,
"step": 1421
},
{
"epoch": 0.6587908269631688,
"grad_norm": 0.953125,
"learning_rate": 1.7664357977589932e-05,
"loss": 0.9323142170906067,
"step": 1422
},
{
"epoch": 0.65925411165161,
"grad_norm": 0.82421875,
"learning_rate": 1.7632731190945454e-05,
"loss": 0.9634347558021545,
"step": 1423
},
{
"epoch": 0.659717396340051,
"grad_norm": 0.859375,
"learning_rate": 1.7601116294111127e-05,
"loss": 1.1587272882461548,
"step": 1424
},
{
"epoch": 0.660180681028492,
"grad_norm": 0.87890625,
"learning_rate": 1.7569513346122498e-05,
"loss": 1.0130343437194824,
"step": 1425
},
{
"epoch": 0.6606439657169331,
"grad_norm": 0.95703125,
"learning_rate": 1.7537922405992772e-05,
"loss": 0.904670000076294,
"step": 1426
},
{
"epoch": 0.6611072504053741,
"grad_norm": 0.96875,
"learning_rate": 1.7506343532712762e-05,
"loss": 1.015366554260254,
"step": 1427
},
{
"epoch": 0.6615705350938151,
"grad_norm": 0.828125,
"learning_rate": 1.7474776785250742e-05,
"loss": 1.0041009187698364,
"step": 1428
},
{
"epoch": 0.6620338197822562,
"grad_norm": 0.84375,
"learning_rate": 1.744322222255231e-05,
"loss": 0.8108413815498352,
"step": 1429
},
{
"epoch": 0.6624971044706972,
"grad_norm": 0.80859375,
"learning_rate": 1.7411679903540366e-05,
"loss": 0.8150144815444946,
"step": 1430
},
{
"epoch": 0.6629603891591382,
"grad_norm": 0.81640625,
"learning_rate": 1.7380149887114892e-05,
"loss": 0.9165526032447815,
"step": 1431
},
{
"epoch": 0.6634236738475794,
"grad_norm": 0.86328125,
"learning_rate": 1.7348632232152943e-05,
"loss": 0.8653296828269958,
"step": 1432
},
{
"epoch": 0.6638869585360204,
"grad_norm": 0.765625,
"learning_rate": 1.7317126997508464e-05,
"loss": 0.8013156652450562,
"step": 1433
},
{
"epoch": 0.6643502432244615,
"grad_norm": 0.765625,
"learning_rate": 1.7285634242012216e-05,
"loss": 0.800615668296814,
"step": 1434
},
{
"epoch": 0.6648135279129025,
"grad_norm": 0.71875,
"learning_rate": 1.725415402447165e-05,
"loss": 0.8959740996360779,
"step": 1435
},
{
"epoch": 0.6652768126013435,
"grad_norm": 0.77734375,
"learning_rate": 1.722268640367083e-05,
"loss": 0.9206319451332092,
"step": 1436
},
{
"epoch": 0.6657400972897846,
"grad_norm": 0.8203125,
"learning_rate": 1.719123143837025e-05,
"loss": 0.9993884563446045,
"step": 1437
},
{
"epoch": 0.6662033819782256,
"grad_norm": 0.97265625,
"learning_rate": 1.715978918730681e-05,
"loss": 0.9378257393836975,
"step": 1438
},
{
"epoch": 0.6666666666666666,
"grad_norm": 0.87109375,
"learning_rate": 1.7128359709193664e-05,
"loss": 1.0335369110107422,
"step": 1439
},
{
"epoch": 0.6671299513551077,
"grad_norm": 0.75,
"learning_rate": 1.70969430627201e-05,
"loss": 0.843043327331543,
"step": 1440
},
{
"epoch": 0.6675932360435488,
"grad_norm": 0.8359375,
"learning_rate": 1.7065539306551467e-05,
"loss": 0.9367977380752563,
"step": 1441
},
{
"epoch": 0.6680565207319898,
"grad_norm": 0.8984375,
"learning_rate": 1.7034148499329014e-05,
"loss": 0.9247215390205383,
"step": 1442
},
{
"epoch": 0.6685198054204309,
"grad_norm": 0.8125,
"learning_rate": 1.700277069966984e-05,
"loss": 0.9778249859809875,
"step": 1443
},
{
"epoch": 0.6689830901088719,
"grad_norm": 0.8125,
"learning_rate": 1.6971405966166737e-05,
"loss": 0.7912523150444031,
"step": 1444
},
{
"epoch": 0.669446374797313,
"grad_norm": 0.734375,
"learning_rate": 1.6940054357388088e-05,
"loss": 0.891732394695282,
"step": 1445
},
{
"epoch": 0.669909659485754,
"grad_norm": 0.81640625,
"learning_rate": 1.6908715931877802e-05,
"loss": 1.0751419067382812,
"step": 1446
},
{
"epoch": 0.670372944174195,
"grad_norm": 0.82421875,
"learning_rate": 1.6877390748155137e-05,
"loss": 0.952225923538208,
"step": 1447
},
{
"epoch": 0.670836228862636,
"grad_norm": 1.0703125,
"learning_rate": 1.6846078864714642e-05,
"loss": 0.9902151823043823,
"step": 1448
},
{
"epoch": 0.6712995135510771,
"grad_norm": 0.8515625,
"learning_rate": 1.6814780340026027e-05,
"loss": 0.9151983261108398,
"step": 1449
},
{
"epoch": 0.6717627982395182,
"grad_norm": 0.8125,
"learning_rate": 1.6783495232534053e-05,
"loss": 0.81586754322052,
"step": 1450
},
{
"epoch": 0.6722260829279593,
"grad_norm": 0.8203125,
"learning_rate": 1.6752223600658437e-05,
"loss": 0.8691772818565369,
"step": 1451
},
{
"epoch": 0.6726893676164003,
"grad_norm": 0.94921875,
"learning_rate": 1.67209655027937e-05,
"loss": 0.7808753848075867,
"step": 1452
},
{
"epoch": 0.6731526523048413,
"grad_norm": 0.9453125,
"learning_rate": 1.6689720997309144e-05,
"loss": 0.9605461955070496,
"step": 1453
},
{
"epoch": 0.6736159369932824,
"grad_norm": 0.859375,
"learning_rate": 1.6658490142548634e-05,
"loss": 0.8777569532394409,
"step": 1454
},
{
"epoch": 0.6740792216817234,
"grad_norm": 0.75390625,
"learning_rate": 1.6627272996830594e-05,
"loss": 0.8905514478683472,
"step": 1455
},
{
"epoch": 0.6745425063701644,
"grad_norm": 0.80078125,
"learning_rate": 1.659606961844781e-05,
"loss": 1.0816737413406372,
"step": 1456
},
{
"epoch": 0.6750057910586055,
"grad_norm": 0.9296875,
"learning_rate": 1.656488006566738e-05,
"loss": 0.9656401872634888,
"step": 1457
},
{
"epoch": 0.6754690757470465,
"grad_norm": 0.78125,
"learning_rate": 1.6533704396730586e-05,
"loss": 0.9407315850257874,
"step": 1458
},
{
"epoch": 0.6759323604354877,
"grad_norm": 0.80078125,
"learning_rate": 1.6502542669852762e-05,
"loss": 0.9171674847602844,
"step": 1459
},
{
"epoch": 0.6763956451239287,
"grad_norm": 0.8671875,
"learning_rate": 1.647139494322325e-05,
"loss": 0.8651185035705566,
"step": 1460
},
{
"epoch": 0.6768589298123697,
"grad_norm": 0.9140625,
"learning_rate": 1.644026127500519e-05,
"loss": 0.9177205562591553,
"step": 1461
},
{
"epoch": 0.6773222145008108,
"grad_norm": 0.85546875,
"learning_rate": 1.6409141723335533e-05,
"loss": 0.8896920680999756,
"step": 1462
},
{
"epoch": 0.6777854991892518,
"grad_norm": 0.82421875,
"learning_rate": 1.637803634632482e-05,
"loss": 0.9356686472892761,
"step": 1463
},
{
"epoch": 0.6782487838776928,
"grad_norm": 0.86328125,
"learning_rate": 1.6346945202057156e-05,
"loss": 0.9953092336654663,
"step": 1464
},
{
"epoch": 0.6787120685661339,
"grad_norm": 0.85546875,
"learning_rate": 1.6315868348590043e-05,
"loss": 0.9929192066192627,
"step": 1465
},
{
"epoch": 0.6791753532545749,
"grad_norm": 0.8359375,
"learning_rate": 1.6284805843954325e-05,
"loss": 0.9500339031219482,
"step": 1466
},
{
"epoch": 0.6796386379430159,
"grad_norm": 0.84375,
"learning_rate": 1.6253757746154036e-05,
"loss": 0.932491660118103,
"step": 1467
},
{
"epoch": 0.6801019226314571,
"grad_norm": 0.8203125,
"learning_rate": 1.622272411316629e-05,
"loss": 0.9832372665405273,
"step": 1468
},
{
"epoch": 0.6805652073198981,
"grad_norm": 0.80078125,
"learning_rate": 1.6191705002941227e-05,
"loss": 0.8932191133499146,
"step": 1469
},
{
"epoch": 0.6810284920083391,
"grad_norm": 0.765625,
"learning_rate": 1.6160700473401838e-05,
"loss": 0.9749459624290466,
"step": 1470
},
{
"epoch": 0.6814917766967802,
"grad_norm": 1.0390625,
"learning_rate": 1.6129710582443913e-05,
"loss": 1.025862216949463,
"step": 1471
},
{
"epoch": 0.6819550613852212,
"grad_norm": 0.78515625,
"learning_rate": 1.609873538793588e-05,
"loss": 0.7363277077674866,
"step": 1472
},
{
"epoch": 0.6824183460736623,
"grad_norm": 0.9296875,
"learning_rate": 1.6067774947718745e-05,
"loss": 0.9932308793067932,
"step": 1473
},
{
"epoch": 0.6828816307621033,
"grad_norm": 0.984375,
"learning_rate": 1.6036829319605963e-05,
"loss": 1.0912789106369019,
"step": 1474
},
{
"epoch": 0.6833449154505443,
"grad_norm": 0.8046875,
"learning_rate": 1.6005898561383296e-05,
"loss": 0.8973681330680847,
"step": 1475
},
{
"epoch": 0.6838082001389854,
"grad_norm": 0.88671875,
"learning_rate": 1.5974982730808785e-05,
"loss": 0.9494137167930603,
"step": 1476
},
{
"epoch": 0.6842714848274265,
"grad_norm": 0.7578125,
"learning_rate": 1.5944081885612567e-05,
"loss": 1.014467477798462,
"step": 1477
},
{
"epoch": 0.6847347695158675,
"grad_norm": 0.859375,
"learning_rate": 1.5913196083496813e-05,
"loss": 0.8571305871009827,
"step": 1478
},
{
"epoch": 0.6851980542043086,
"grad_norm": 0.90625,
"learning_rate": 1.588232538213559e-05,
"loss": 1.0085415840148926,
"step": 1479
},
{
"epoch": 0.6856613388927496,
"grad_norm": 0.8671875,
"learning_rate": 1.5851469839174773e-05,
"loss": 0.9112807512283325,
"step": 1480
},
{
"epoch": 0.6861246235811906,
"grad_norm": 0.7109375,
"learning_rate": 1.5820629512231938e-05,
"loss": 0.7599020004272461,
"step": 1481
},
{
"epoch": 0.6865879082696317,
"grad_norm": 0.81640625,
"learning_rate": 1.578980445889622e-05,
"loss": 1.0665782690048218,
"step": 1482
},
{
"epoch": 0.6870511929580727,
"grad_norm": 1.046875,
"learning_rate": 1.5758994736728285e-05,
"loss": 0.9333577752113342,
"step": 1483
},
{
"epoch": 0.6875144776465137,
"grad_norm": 0.84375,
"learning_rate": 1.5728200403260104e-05,
"loss": 1.0278582572937012,
"step": 1484
},
{
"epoch": 0.6879777623349548,
"grad_norm": 0.890625,
"learning_rate": 1.5697421515994976e-05,
"loss": 0.8942716121673584,
"step": 1485
},
{
"epoch": 0.6884410470233959,
"grad_norm": 0.8203125,
"learning_rate": 1.5666658132407308e-05,
"loss": 0.8894409537315369,
"step": 1486
},
{
"epoch": 0.688904331711837,
"grad_norm": 0.90234375,
"learning_rate": 1.563591030994259e-05,
"loss": 0.9284682273864746,
"step": 1487
},
{
"epoch": 0.689367616400278,
"grad_norm": 0.8984375,
"learning_rate": 1.560517810601723e-05,
"loss": 0.9897350072860718,
"step": 1488
},
{
"epoch": 0.689830901088719,
"grad_norm": 0.8671875,
"learning_rate": 1.5574461578018493e-05,
"loss": 0.9923799633979797,
"step": 1489
},
{
"epoch": 0.6902941857771601,
"grad_norm": 0.90234375,
"learning_rate": 1.554376078330436e-05,
"loss": 0.8437385559082031,
"step": 1490
},
{
"epoch": 0.6907574704656011,
"grad_norm": 0.8984375,
"learning_rate": 1.5513075779203417e-05,
"loss": 1.065632700920105,
"step": 1491
},
{
"epoch": 0.6912207551540421,
"grad_norm": 0.7890625,
"learning_rate": 1.5482406623014795e-05,
"loss": 0.9187345504760742,
"step": 1492
},
{
"epoch": 0.6916840398424832,
"grad_norm": 0.76171875,
"learning_rate": 1.545175337200801e-05,
"loss": 0.8924624919891357,
"step": 1493
},
{
"epoch": 0.6921473245309242,
"grad_norm": 0.94921875,
"learning_rate": 1.5421116083422887e-05,
"loss": 0.9492448568344116,
"step": 1494
},
{
"epoch": 0.6926106092193653,
"grad_norm": 0.80859375,
"learning_rate": 1.539049481446944e-05,
"loss": 0.738587498664856,
"step": 1495
},
{
"epoch": 0.6930738939078064,
"grad_norm": 0.76953125,
"learning_rate": 1.5359889622327773e-05,
"loss": 1.0237951278686523,
"step": 1496
},
{
"epoch": 0.6935371785962474,
"grad_norm": 0.82421875,
"learning_rate": 1.5329300564147974e-05,
"loss": 0.9008263349533081,
"step": 1497
},
{
"epoch": 0.6940004632846885,
"grad_norm": 1.046875,
"learning_rate": 1.529872769704997e-05,
"loss": 1.008281946182251,
"step": 1498
},
{
"epoch": 0.6944637479731295,
"grad_norm": 0.98828125,
"learning_rate": 1.5268171078123503e-05,
"loss": 1.0567467212677002,
"step": 1499
},
{
"epoch": 0.6949270326615705,
"grad_norm": 0.90234375,
"learning_rate": 1.5237630764427945e-05,
"loss": 0.8660717606544495,
"step": 1500
},
{
"epoch": 0.6953903173500116,
"grad_norm": 0.8984375,
"learning_rate": 1.5207106812992225e-05,
"loss": 0.9844351410865784,
"step": 1501
},
{
"epoch": 0.6958536020384526,
"grad_norm": 0.953125,
"learning_rate": 1.5176599280814716e-05,
"loss": 0.9823206067085266,
"step": 1502
},
{
"epoch": 0.6963168867268936,
"grad_norm": 0.84765625,
"learning_rate": 1.5146108224863147e-05,
"loss": 0.8877736330032349,
"step": 1503
},
{
"epoch": 0.6967801714153348,
"grad_norm": 0.765625,
"learning_rate": 1.5115633702074463e-05,
"loss": 0.8826972842216492,
"step": 1504
},
{
"epoch": 0.6972434561037758,
"grad_norm": 0.9375,
"learning_rate": 1.5085175769354723e-05,
"loss": 0.9058219194412231,
"step": 1505
},
{
"epoch": 0.6977067407922168,
"grad_norm": 0.8125,
"learning_rate": 1.5054734483579058e-05,
"loss": 0.9776875972747803,
"step": 1506
},
{
"epoch": 0.6981700254806579,
"grad_norm": 0.89453125,
"learning_rate": 1.5024309901591453e-05,
"loss": 0.8491601347923279,
"step": 1507
},
{
"epoch": 0.6986333101690989,
"grad_norm": 0.76171875,
"learning_rate": 1.4993902080204744e-05,
"loss": 0.9605410099029541,
"step": 1508
},
{
"epoch": 0.6990965948575399,
"grad_norm": 0.87890625,
"learning_rate": 1.4963511076200446e-05,
"loss": 0.8161846995353699,
"step": 1509
},
{
"epoch": 0.699559879545981,
"grad_norm": 0.93359375,
"learning_rate": 1.4933136946328686e-05,
"loss": 0.992790699005127,
"step": 1510
},
{
"epoch": 0.700023164234422,
"grad_norm": 0.80078125,
"learning_rate": 1.4902779747308069e-05,
"loss": 0.75239497423172,
"step": 1511
},
{
"epoch": 0.700486448922863,
"grad_norm": 0.82421875,
"learning_rate": 1.4872439535825595e-05,
"loss": 0.8581488132476807,
"step": 1512
},
{
"epoch": 0.7009497336113042,
"grad_norm": 1.015625,
"learning_rate": 1.484211636853654e-05,
"loss": 0.9911954998970032,
"step": 1513
},
{
"epoch": 0.7014130182997452,
"grad_norm": 0.8203125,
"learning_rate": 1.4811810302064333e-05,
"loss": 0.9692468047142029,
"step": 1514
},
{
"epoch": 0.7018763029881863,
"grad_norm": 0.875,
"learning_rate": 1.4781521393000504e-05,
"loss": 0.9069231152534485,
"step": 1515
},
{
"epoch": 0.7023395876766273,
"grad_norm": 0.85546875,
"learning_rate": 1.4751249697904517e-05,
"loss": 0.8558336496353149,
"step": 1516
},
{
"epoch": 0.7028028723650683,
"grad_norm": 0.828125,
"learning_rate": 1.4720995273303713e-05,
"loss": 0.8646467328071594,
"step": 1517
},
{
"epoch": 0.7032661570535094,
"grad_norm": 0.765625,
"learning_rate": 1.4690758175693161e-05,
"loss": 0.9294448494911194,
"step": 1518
},
{
"epoch": 0.7037294417419504,
"grad_norm": 0.8984375,
"learning_rate": 1.46605384615356e-05,
"loss": 1.5108270645141602,
"step": 1519
},
{
"epoch": 0.7041927264303914,
"grad_norm": 0.765625,
"learning_rate": 1.463033618726129e-05,
"loss": 0.7721649408340454,
"step": 1520
},
{
"epoch": 0.7046560111188325,
"grad_norm": 0.921875,
"learning_rate": 1.4600151409267915e-05,
"loss": 0.9843413233757019,
"step": 1521
},
{
"epoch": 0.7051192958072736,
"grad_norm": 0.77734375,
"learning_rate": 1.4569984183920527e-05,
"loss": 1.0829393863677979,
"step": 1522
},
{
"epoch": 0.7055825804957147,
"grad_norm": 0.84375,
"learning_rate": 1.453983456755135e-05,
"loss": 0.8989307284355164,
"step": 1523
},
{
"epoch": 0.7060458651841557,
"grad_norm": 0.89453125,
"learning_rate": 1.4509702616459779e-05,
"loss": 1.061563491821289,
"step": 1524
},
{
"epoch": 0.7065091498725967,
"grad_norm": 0.7265625,
"learning_rate": 1.4479588386912172e-05,
"loss": 0.9384868741035461,
"step": 1525
},
{
"epoch": 0.7069724345610378,
"grad_norm": 0.75,
"learning_rate": 1.4449491935141836e-05,
"loss": 0.7927857637405396,
"step": 1526
},
{
"epoch": 0.7074357192494788,
"grad_norm": 0.9140625,
"learning_rate": 1.4419413317348868e-05,
"loss": 0.7902013063430786,
"step": 1527
},
{
"epoch": 0.7078990039379198,
"grad_norm": 0.859375,
"learning_rate": 1.4389352589700028e-05,
"loss": 0.8877607583999634,
"step": 1528
},
{
"epoch": 0.7083622886263609,
"grad_norm": 0.85546875,
"learning_rate": 1.4359309808328738e-05,
"loss": 0.8904527425765991,
"step": 1529
},
{
"epoch": 0.7088255733148019,
"grad_norm": 0.9453125,
"learning_rate": 1.4329285029334844e-05,
"loss": 0.9153241515159607,
"step": 1530
},
{
"epoch": 0.709288858003243,
"grad_norm": 0.76171875,
"learning_rate": 1.4299278308784614e-05,
"loss": 0.8823608756065369,
"step": 1531
},
{
"epoch": 0.7097521426916841,
"grad_norm": 0.66796875,
"learning_rate": 1.4269289702710575e-05,
"loss": 0.8942029476165771,
"step": 1532
},
{
"epoch": 0.7102154273801251,
"grad_norm": 0.78125,
"learning_rate": 1.4239319267111447e-05,
"loss": 0.9542202353477478,
"step": 1533
},
{
"epoch": 0.7106787120685661,
"grad_norm": 0.9296875,
"learning_rate": 1.4209367057952005e-05,
"loss": 0.834882915019989,
"step": 1534
},
{
"epoch": 0.7111419967570072,
"grad_norm": 0.80859375,
"learning_rate": 1.4179433131162978e-05,
"loss": 0.8554872870445251,
"step": 1535
},
{
"epoch": 0.7116052814454482,
"grad_norm": 0.88671875,
"learning_rate": 1.4149517542640996e-05,
"loss": 0.9996330738067627,
"step": 1536
},
{
"epoch": 0.7120685661338892,
"grad_norm": 0.953125,
"learning_rate": 1.4119620348248392e-05,
"loss": 0.8751652240753174,
"step": 1537
},
{
"epoch": 0.7125318508223303,
"grad_norm": 0.8984375,
"learning_rate": 1.4089741603813209e-05,
"loss": 0.9856979846954346,
"step": 1538
},
{
"epoch": 0.7129951355107713,
"grad_norm": 0.9765625,
"learning_rate": 1.4059881365128982e-05,
"loss": 0.9445462226867676,
"step": 1539
},
{
"epoch": 0.7134584201992125,
"grad_norm": 0.92578125,
"learning_rate": 1.4030039687954728e-05,
"loss": 0.7988513112068176,
"step": 1540
},
{
"epoch": 0.7139217048876535,
"grad_norm": 0.8359375,
"learning_rate": 1.4000216628014782e-05,
"loss": 0.8697713017463684,
"step": 1541
},
{
"epoch": 0.7143849895760945,
"grad_norm": 0.80078125,
"learning_rate": 1.3970412240998741e-05,
"loss": 1.1426811218261719,
"step": 1542
},
{
"epoch": 0.7148482742645356,
"grad_norm": 0.7734375,
"learning_rate": 1.3940626582561308e-05,
"loss": 0.932056188583374,
"step": 1543
},
{
"epoch": 0.7153115589529766,
"grad_norm": 0.84765625,
"learning_rate": 1.3910859708322204e-05,
"loss": 0.9006248116493225,
"step": 1544
},
{
"epoch": 0.7157748436414176,
"grad_norm": 0.73046875,
"learning_rate": 1.3881111673866106e-05,
"loss": 0.869684636592865,
"step": 1545
},
{
"epoch": 0.7162381283298587,
"grad_norm": 0.84765625,
"learning_rate": 1.3851382534742493e-05,
"loss": 1.0497184991836548,
"step": 1546
},
{
"epoch": 0.7167014130182997,
"grad_norm": 0.80078125,
"learning_rate": 1.3821672346465575e-05,
"loss": 0.9837194681167603,
"step": 1547
},
{
"epoch": 0.7171646977067407,
"grad_norm": 0.9453125,
"learning_rate": 1.3791981164514141e-05,
"loss": 0.9810088872909546,
"step": 1548
},
{
"epoch": 0.7176279823951819,
"grad_norm": 0.8828125,
"learning_rate": 1.376230904433153e-05,
"loss": 0.9475647211074829,
"step": 1549
},
{
"epoch": 0.7180912670836229,
"grad_norm": 0.87109375,
"learning_rate": 1.3732656041325448e-05,
"loss": 0.7182843089103699,
"step": 1550
},
{
"epoch": 0.718554551772064,
"grad_norm": 0.78515625,
"learning_rate": 1.370302221086793e-05,
"loss": 0.8560316562652588,
"step": 1551
},
{
"epoch": 0.719017836460505,
"grad_norm": 0.8125,
"learning_rate": 1.3673407608295208e-05,
"loss": 0.9808245301246643,
"step": 1552
},
{
"epoch": 0.719481121148946,
"grad_norm": 0.796875,
"learning_rate": 1.3643812288907586e-05,
"loss": 0.8075791597366333,
"step": 1553
},
{
"epoch": 0.7199444058373871,
"grad_norm": 0.7109375,
"learning_rate": 1.3614236307969388e-05,
"loss": 0.8571614027023315,
"step": 1554
},
{
"epoch": 0.7204076905258281,
"grad_norm": 0.94140625,
"learning_rate": 1.3584679720708808e-05,
"loss": 1.0778717994689941,
"step": 1555
},
{
"epoch": 0.7208709752142691,
"grad_norm": 0.703125,
"learning_rate": 1.3555142582317846e-05,
"loss": 0.726076602935791,
"step": 1556
},
{
"epoch": 0.7213342599027102,
"grad_norm": 0.87109375,
"learning_rate": 1.352562494795216e-05,
"loss": 0.8657900094985962,
"step": 1557
},
{
"epoch": 0.7217975445911513,
"grad_norm": 0.77734375,
"learning_rate": 1.3496126872730989e-05,
"loss": 1.003196120262146,
"step": 1558
},
{
"epoch": 0.7222608292795923,
"grad_norm": 0.8046875,
"learning_rate": 1.3466648411737065e-05,
"loss": 0.990123450756073,
"step": 1559
},
{
"epoch": 0.7227241139680334,
"grad_norm": 0.890625,
"learning_rate": 1.3437189620016487e-05,
"loss": 0.8534030914306641,
"step": 1560
},
{
"epoch": 0.7231873986564744,
"grad_norm": 0.78515625,
"learning_rate": 1.3407750552578635e-05,
"loss": 0.9646372199058533,
"step": 1561
},
{
"epoch": 0.7236506833449154,
"grad_norm": 0.8359375,
"learning_rate": 1.3378331264396029e-05,
"loss": 0.7858661413192749,
"step": 1562
},
{
"epoch": 0.7241139680333565,
"grad_norm": 0.73828125,
"learning_rate": 1.3348931810404288e-05,
"loss": 0.7996460795402527,
"step": 1563
},
{
"epoch": 0.7245772527217975,
"grad_norm": 0.91796875,
"learning_rate": 1.331955224550195e-05,
"loss": 0.9326428174972534,
"step": 1564
},
{
"epoch": 0.7250405374102386,
"grad_norm": 0.85546875,
"learning_rate": 1.329019262455048e-05,
"loss": 1.1491047143936157,
"step": 1565
},
{
"epoch": 0.7255038220986796,
"grad_norm": 0.8359375,
"learning_rate": 1.3260853002374042e-05,
"loss": 0.8331834077835083,
"step": 1566
},
{
"epoch": 0.7259671067871207,
"grad_norm": 0.92578125,
"learning_rate": 1.323153343375947e-05,
"loss": 0.920341432094574,
"step": 1567
},
{
"epoch": 0.7264303914755618,
"grad_norm": 0.85546875,
"learning_rate": 1.3202233973456163e-05,
"loss": 1.0477654933929443,
"step": 1568
},
{
"epoch": 0.7268936761640028,
"grad_norm": 0.89453125,
"learning_rate": 1.3172954676175968e-05,
"loss": 0.877657949924469,
"step": 1569
},
{
"epoch": 0.7273569608524438,
"grad_norm": 0.91796875,
"learning_rate": 1.3143695596593085e-05,
"loss": 0.8888318538665771,
"step": 1570
},
{
"epoch": 0.7278202455408849,
"grad_norm": 0.7890625,
"learning_rate": 1.3114456789343936e-05,
"loss": 1.0591609477996826,
"step": 1571
},
{
"epoch": 0.7282835302293259,
"grad_norm": 0.7421875,
"learning_rate": 1.308523830902713e-05,
"loss": 0.9316011071205139,
"step": 1572
},
{
"epoch": 0.7287468149177669,
"grad_norm": 0.93359375,
"learning_rate": 1.3056040210203272e-05,
"loss": 0.9526464343070984,
"step": 1573
},
{
"epoch": 0.729210099606208,
"grad_norm": 0.86328125,
"learning_rate": 1.302686254739494e-05,
"loss": 0.900551438331604,
"step": 1574
},
{
"epoch": 0.729673384294649,
"grad_norm": 0.88671875,
"learning_rate": 1.299770537508655e-05,
"loss": 0.9370485544204712,
"step": 1575
},
{
"epoch": 0.7301366689830902,
"grad_norm": 0.890625,
"learning_rate": 1.2968568747724228e-05,
"loss": 0.9385664463043213,
"step": 1576
},
{
"epoch": 0.7305999536715312,
"grad_norm": 0.82421875,
"learning_rate": 1.2939452719715771e-05,
"loss": 0.8683898448944092,
"step": 1577
},
{
"epoch": 0.7310632383599722,
"grad_norm": 0.78515625,
"learning_rate": 1.291035734543049e-05,
"loss": 0.8029768466949463,
"step": 1578
},
{
"epoch": 0.7315265230484133,
"grad_norm": 0.890625,
"learning_rate": 1.2881282679199143e-05,
"loss": 0.9224568605422974,
"step": 1579
},
{
"epoch": 0.7319898077368543,
"grad_norm": 0.75,
"learning_rate": 1.2852228775313794e-05,
"loss": 0.8857653737068176,
"step": 1580
},
{
"epoch": 0.7324530924252953,
"grad_norm": 0.8046875,
"learning_rate": 1.2823195688027754e-05,
"loss": 0.9866558313369751,
"step": 1581
},
{
"epoch": 0.7329163771137364,
"grad_norm": 0.83203125,
"learning_rate": 1.2794183471555458e-05,
"loss": 0.8291232585906982,
"step": 1582
},
{
"epoch": 0.7333796618021774,
"grad_norm": 0.84765625,
"learning_rate": 1.2765192180072382e-05,
"loss": 1.0451363325119019,
"step": 1583
},
{
"epoch": 0.7338429464906184,
"grad_norm": 0.7734375,
"learning_rate": 1.2736221867714914e-05,
"loss": 0.8132802248001099,
"step": 1584
},
{
"epoch": 0.7343062311790596,
"grad_norm": 0.82421875,
"learning_rate": 1.2707272588580259e-05,
"loss": 0.9321390390396118,
"step": 1585
},
{
"epoch": 0.7347695158675006,
"grad_norm": 0.74609375,
"learning_rate": 1.2678344396726367e-05,
"loss": 0.8528720140457153,
"step": 1586
},
{
"epoch": 0.7352328005559416,
"grad_norm": 0.7578125,
"learning_rate": 1.2649437346171806e-05,
"loss": 0.9147552847862244,
"step": 1587
},
{
"epoch": 0.7356960852443827,
"grad_norm": 0.8671875,
"learning_rate": 1.262055149089564e-05,
"loss": 0.938007652759552,
"step": 1588
},
{
"epoch": 0.7361593699328237,
"grad_norm": 0.83984375,
"learning_rate": 1.2591686884837398e-05,
"loss": 0.9420212507247925,
"step": 1589
},
{
"epoch": 0.7366226546212647,
"grad_norm": 0.93359375,
"learning_rate": 1.2562843581896892e-05,
"loss": 0.8320150375366211,
"step": 1590
},
{
"epoch": 0.7370859393097058,
"grad_norm": 0.9375,
"learning_rate": 1.2534021635934168e-05,
"loss": 0.8456387519836426,
"step": 1591
},
{
"epoch": 0.7375492239981468,
"grad_norm": 0.87890625,
"learning_rate": 1.25052211007694e-05,
"loss": 0.8855749368667603,
"step": 1592
},
{
"epoch": 0.7380125086865879,
"grad_norm": 0.91015625,
"learning_rate": 1.2476442030182779e-05,
"loss": 0.9768263101577759,
"step": 1593
},
{
"epoch": 0.738475793375029,
"grad_norm": 0.70703125,
"learning_rate": 1.244768447791439e-05,
"loss": 0.7757108211517334,
"step": 1594
},
{
"epoch": 0.73893907806347,
"grad_norm": 0.91796875,
"learning_rate": 1.2418948497664178e-05,
"loss": 0.7897142767906189,
"step": 1595
},
{
"epoch": 0.7394023627519111,
"grad_norm": 0.9140625,
"learning_rate": 1.2390234143091761e-05,
"loss": 1.009446620941162,
"step": 1596
},
{
"epoch": 0.7398656474403521,
"grad_norm": 0.83203125,
"learning_rate": 1.2361541467816402e-05,
"loss": 0.7995726466178894,
"step": 1597
},
{
"epoch": 0.7403289321287931,
"grad_norm": 0.92578125,
"learning_rate": 1.2332870525416888e-05,
"loss": 0.9182596206665039,
"step": 1598
},
{
"epoch": 0.7407922168172342,
"grad_norm": 0.89453125,
"learning_rate": 1.2304221369431394e-05,
"loss": 0.8457068800926208,
"step": 1599
},
{
"epoch": 0.7412555015056752,
"grad_norm": 0.8046875,
"learning_rate": 1.227559405335744e-05,
"loss": 1.0693312883377075,
"step": 1600
},
{
"epoch": 0.7417187861941162,
"grad_norm": 0.90234375,
"learning_rate": 1.2246988630651752e-05,
"loss": 0.9207885265350342,
"step": 1601
},
{
"epoch": 0.7421820708825573,
"grad_norm": 0.79296875,
"learning_rate": 1.2218405154730182e-05,
"loss": 0.9426785111427307,
"step": 1602
},
{
"epoch": 0.7426453555709984,
"grad_norm": 0.79296875,
"learning_rate": 1.2189843678967586e-05,
"loss": 0.8506474494934082,
"step": 1603
},
{
"epoch": 0.7431086402594395,
"grad_norm": 0.89453125,
"learning_rate": 1.2161304256697735e-05,
"loss": 0.8817695379257202,
"step": 1604
},
{
"epoch": 0.7435719249478805,
"grad_norm": 0.8828125,
"learning_rate": 1.2132786941213243e-05,
"loss": 0.9216139316558838,
"step": 1605
},
{
"epoch": 0.7440352096363215,
"grad_norm": 0.79296875,
"learning_rate": 1.2104291785765427e-05,
"loss": 0.9422940611839294,
"step": 1606
},
{
"epoch": 0.7444984943247626,
"grad_norm": 0.7578125,
"learning_rate": 1.2075818843564235e-05,
"loss": 0.8162217140197754,
"step": 1607
},
{
"epoch": 0.7449617790132036,
"grad_norm": 0.85546875,
"learning_rate": 1.2047368167778111e-05,
"loss": 0.8420689105987549,
"step": 1608
},
{
"epoch": 0.7454250637016446,
"grad_norm": 0.80859375,
"learning_rate": 1.2018939811533943e-05,
"loss": 0.8831995129585266,
"step": 1609
},
{
"epoch": 0.7458883483900857,
"grad_norm": 0.87109375,
"learning_rate": 1.1990533827916945e-05,
"loss": 0.9695085287094116,
"step": 1610
},
{
"epoch": 0.7463516330785267,
"grad_norm": 0.83984375,
"learning_rate": 1.1962150269970522e-05,
"loss": 0.7822751402854919,
"step": 1611
},
{
"epoch": 0.7468149177669678,
"grad_norm": 0.99609375,
"learning_rate": 1.1933789190696248e-05,
"loss": 1.0877046585083008,
"step": 1612
},
{
"epoch": 0.7472782024554089,
"grad_norm": 1.0078125,
"learning_rate": 1.1905450643053673e-05,
"loss": 0.9393079280853271,
"step": 1613
},
{
"epoch": 0.7477414871438499,
"grad_norm": 0.83984375,
"learning_rate": 1.187713467996031e-05,
"loss": 0.8477166295051575,
"step": 1614
},
{
"epoch": 0.748204771832291,
"grad_norm": 0.86328125,
"learning_rate": 1.1848841354291486e-05,
"loss": 1.0002130270004272,
"step": 1615
},
{
"epoch": 0.748668056520732,
"grad_norm": 0.76953125,
"learning_rate": 1.1820570718880265e-05,
"loss": 0.8591433167457581,
"step": 1616
},
{
"epoch": 0.749131341209173,
"grad_norm": 0.80859375,
"learning_rate": 1.179232282651732e-05,
"loss": 0.8319763541221619,
"step": 1617
},
{
"epoch": 0.749594625897614,
"grad_norm": 0.78515625,
"learning_rate": 1.1764097729950881e-05,
"loss": 0.8276806473731995,
"step": 1618
},
{
"epoch": 0.7500579105860551,
"grad_norm": 0.81640625,
"learning_rate": 1.1735895481886583e-05,
"loss": 0.940848708152771,
"step": 1619
},
{
"epoch": 0.7505211952744962,
"grad_norm": 0.84765625,
"learning_rate": 1.1707716134987416e-05,
"loss": 0.8151571154594421,
"step": 1620
},
{
"epoch": 0.7509844799629373,
"grad_norm": 0.81640625,
"learning_rate": 1.167955974187361e-05,
"loss": 0.8742914795875549,
"step": 1621
},
{
"epoch": 0.7514477646513783,
"grad_norm": 0.8125,
"learning_rate": 1.1651426355122506e-05,
"loss": 0.8702387809753418,
"step": 1622
},
{
"epoch": 0.7519110493398193,
"grad_norm": 1.0703125,
"learning_rate": 1.1623316027268506e-05,
"loss": 0.8356806039810181,
"step": 1623
},
{
"epoch": 0.7523743340282604,
"grad_norm": 0.828125,
"learning_rate": 1.1595228810802956e-05,
"loss": 0.8744298815727234,
"step": 1624
},
{
"epoch": 0.7528376187167014,
"grad_norm": 0.9140625,
"learning_rate": 1.156716475817404e-05,
"loss": 0.8392553329467773,
"step": 1625
},
{
"epoch": 0.7533009034051424,
"grad_norm": 0.859375,
"learning_rate": 1.1539123921786677e-05,
"loss": 0.9593334197998047,
"step": 1626
},
{
"epoch": 0.7537641880935835,
"grad_norm": 0.8359375,
"learning_rate": 1.1511106354002433e-05,
"loss": 0.9393202066421509,
"step": 1627
},
{
"epoch": 0.7542274727820245,
"grad_norm": 0.88671875,
"learning_rate": 1.1483112107139444e-05,
"loss": 0.8775235414505005,
"step": 1628
},
{
"epoch": 0.7546907574704657,
"grad_norm": 0.796875,
"learning_rate": 1.1455141233472282e-05,
"loss": 0.9231534004211426,
"step": 1629
},
{
"epoch": 0.7551540421589067,
"grad_norm": 0.8984375,
"learning_rate": 1.1427193785231894e-05,
"loss": 1.0438674688339233,
"step": 1630
},
{
"epoch": 0.7556173268473477,
"grad_norm": 1.0625,
"learning_rate": 1.1399269814605442e-05,
"loss": 1.0147441625595093,
"step": 1631
},
{
"epoch": 0.7560806115357888,
"grad_norm": 0.8671875,
"learning_rate": 1.1371369373736287e-05,
"loss": 1.0305769443511963,
"step": 1632
},
{
"epoch": 0.7565438962242298,
"grad_norm": 0.9375,
"learning_rate": 1.1343492514723849e-05,
"loss": 0.9958128333091736,
"step": 1633
},
{
"epoch": 0.7570071809126708,
"grad_norm": 0.75390625,
"learning_rate": 1.131563928962348e-05,
"loss": 0.997496485710144,
"step": 1634
},
{
"epoch": 0.7574704656011119,
"grad_norm": 0.921875,
"learning_rate": 1.128780975044644e-05,
"loss": 0.885054886341095,
"step": 1635
},
{
"epoch": 0.7579337502895529,
"grad_norm": 0.7734375,
"learning_rate": 1.1260003949159729e-05,
"loss": 0.9202278256416321,
"step": 1636
},
{
"epoch": 0.7583970349779939,
"grad_norm": 0.73046875,
"learning_rate": 1.1232221937686033e-05,
"loss": 0.7514294385910034,
"step": 1637
},
{
"epoch": 0.7588603196664351,
"grad_norm": 0.75390625,
"learning_rate": 1.1204463767903624e-05,
"loss": 0.7889110445976257,
"step": 1638
},
{
"epoch": 0.7593236043548761,
"grad_norm": 0.7890625,
"learning_rate": 1.1176729491646248e-05,
"loss": 0.8639605045318604,
"step": 1639
},
{
"epoch": 0.7597868890433171,
"grad_norm": 0.87890625,
"learning_rate": 1.1149019160703012e-05,
"loss": 0.9009301662445068,
"step": 1640
},
{
"epoch": 0.7602501737317582,
"grad_norm": 0.84765625,
"learning_rate": 1.1121332826818346e-05,
"loss": 0.8193938732147217,
"step": 1641
},
{
"epoch": 0.7607134584201992,
"grad_norm": 0.890625,
"learning_rate": 1.1093670541691834e-05,
"loss": 0.9459896683692932,
"step": 1642
},
{
"epoch": 0.7611767431086403,
"grad_norm": 0.8125,
"learning_rate": 1.1066032356978173e-05,
"loss": 1.0018055438995361,
"step": 1643
},
{
"epoch": 0.7616400277970813,
"grad_norm": 0.72265625,
"learning_rate": 1.1038418324287065e-05,
"loss": 0.9474573731422424,
"step": 1644
},
{
"epoch": 0.7621033124855223,
"grad_norm": 0.8671875,
"learning_rate": 1.1010828495183086e-05,
"loss": 0.8761516213417053,
"step": 1645
},
{
"epoch": 0.7625665971739634,
"grad_norm": 0.83203125,
"learning_rate": 1.0983262921185635e-05,
"loss": 0.8465573191642761,
"step": 1646
},
{
"epoch": 0.7630298818624045,
"grad_norm": 0.9921875,
"learning_rate": 1.095572165376881e-05,
"loss": 1.064576268196106,
"step": 1647
},
{
"epoch": 0.7634931665508455,
"grad_norm": 0.86328125,
"learning_rate": 1.0928204744361344e-05,
"loss": 0.9846107363700867,
"step": 1648
},
{
"epoch": 0.7639564512392866,
"grad_norm": 0.78515625,
"learning_rate": 1.0900712244346447e-05,
"loss": 0.9933382272720337,
"step": 1649
},
{
"epoch": 0.7644197359277276,
"grad_norm": 0.96875,
"learning_rate": 1.087324420506176e-05,
"loss": 1.019972324371338,
"step": 1650
},
{
"epoch": 0.7648830206161686,
"grad_norm": 0.7890625,
"learning_rate": 1.0845800677799265e-05,
"loss": 0.8567442893981934,
"step": 1651
},
{
"epoch": 0.7653463053046097,
"grad_norm": 0.8359375,
"learning_rate": 1.0818381713805164e-05,
"loss": 0.833297610282898,
"step": 1652
},
{
"epoch": 0.7658095899930507,
"grad_norm": 0.75,
"learning_rate": 1.0790987364279792e-05,
"loss": 1.0111546516418457,
"step": 1653
},
{
"epoch": 0.7662728746814917,
"grad_norm": 0.79296875,
"learning_rate": 1.0763617680377507e-05,
"loss": 0.9289141893386841,
"step": 1654
},
{
"epoch": 0.7667361593699328,
"grad_norm": 0.859375,
"learning_rate": 1.0736272713206621e-05,
"loss": 0.8431038856506348,
"step": 1655
},
{
"epoch": 0.7671994440583739,
"grad_norm": 0.78125,
"learning_rate": 1.0708952513829302e-05,
"loss": 0.911212682723999,
"step": 1656
},
{
"epoch": 0.767662728746815,
"grad_norm": 1.109375,
"learning_rate": 1.0681657133261436e-05,
"loss": 1.0007601976394653,
"step": 1657
},
{
"epoch": 0.768126013435256,
"grad_norm": 0.76171875,
"learning_rate": 1.0654386622472605e-05,
"loss": 0.7746074795722961,
"step": 1658
},
{
"epoch": 0.768589298123697,
"grad_norm": 0.84765625,
"learning_rate": 1.0627141032385904e-05,
"loss": 0.8235775828361511,
"step": 1659
},
{
"epoch": 0.7690525828121381,
"grad_norm": 0.84375,
"learning_rate": 1.0599920413877935e-05,
"loss": 1.0395088195800781,
"step": 1660
},
{
"epoch": 0.7695158675005791,
"grad_norm": 0.796875,
"learning_rate": 1.0572724817778647e-05,
"loss": 0.9071087837219238,
"step": 1661
},
{
"epoch": 0.7699791521890201,
"grad_norm": 0.86328125,
"learning_rate": 1.0545554294871282e-05,
"loss": 0.836544930934906,
"step": 1662
},
{
"epoch": 0.7704424368774612,
"grad_norm": 0.765625,
"learning_rate": 1.051840889589224e-05,
"loss": 0.8565930724143982,
"step": 1663
},
{
"epoch": 0.7709057215659022,
"grad_norm": 0.80078125,
"learning_rate": 1.0491288671531e-05,
"loss": 0.9731587171554565,
"step": 1664
},
{
"epoch": 0.7713690062543433,
"grad_norm": 0.91796875,
"learning_rate": 1.0464193672430078e-05,
"loss": 0.9530751705169678,
"step": 1665
},
{
"epoch": 0.7718322909427844,
"grad_norm": 0.75390625,
"learning_rate": 1.0437123949184834e-05,
"loss": 0.9852890968322754,
"step": 1666
},
{
"epoch": 0.7722955756312254,
"grad_norm": 1.0625,
"learning_rate": 1.0410079552343468e-05,
"loss": 1.010793924331665,
"step": 1667
},
{
"epoch": 0.7727588603196665,
"grad_norm": 0.67578125,
"learning_rate": 1.0383060532406851e-05,
"loss": 0.8825767636299133,
"step": 1668
},
{
"epoch": 0.7732221450081075,
"grad_norm": 0.87109375,
"learning_rate": 1.0356066939828494e-05,
"loss": 0.7823730707168579,
"step": 1669
},
{
"epoch": 0.7736854296965485,
"grad_norm": 1.03125,
"learning_rate": 1.0329098825014424e-05,
"loss": 0.8294221758842468,
"step": 1670
},
{
"epoch": 0.7741487143849896,
"grad_norm": 1.5234375,
"learning_rate": 1.0302156238323093e-05,
"loss": 0.9734241962432861,
"step": 1671
},
{
"epoch": 0.7746119990734306,
"grad_norm": 0.8671875,
"learning_rate": 1.0275239230065266e-05,
"loss": 1.0100374221801758,
"step": 1672
},
{
"epoch": 0.7750752837618716,
"grad_norm": 0.75,
"learning_rate": 1.0248347850503954e-05,
"loss": 1.160994529724121,
"step": 1673
},
{
"epoch": 0.7755385684503128,
"grad_norm": 0.79296875,
"learning_rate": 1.0221482149854319e-05,
"loss": 1.0030722618103027,
"step": 1674
},
{
"epoch": 0.7760018531387538,
"grad_norm": 0.8515625,
"learning_rate": 1.0194642178283568e-05,
"loss": 0.9101303219795227,
"step": 1675
},
{
"epoch": 0.7764651378271948,
"grad_norm": 0.8046875,
"learning_rate": 1.0167827985910865e-05,
"loss": 0.9005215167999268,
"step": 1676
},
{
"epoch": 0.7769284225156359,
"grad_norm": 0.86328125,
"learning_rate": 1.014103962280722e-05,
"loss": 0.935279369354248,
"step": 1677
},
{
"epoch": 0.7773917072040769,
"grad_norm": 0.859375,
"learning_rate": 1.0114277138995428e-05,
"loss": 0.8451523780822754,
"step": 1678
},
{
"epoch": 0.7778549918925179,
"grad_norm": 0.93359375,
"learning_rate": 1.0087540584449966e-05,
"loss": 0.8488246202468872,
"step": 1679
},
{
"epoch": 0.778318276580959,
"grad_norm": 0.859375,
"learning_rate": 1.0060830009096858e-05,
"loss": 0.9785677194595337,
"step": 1680
},
{
"epoch": 0.7787815612694,
"grad_norm": 0.8828125,
"learning_rate": 1.0034145462813665e-05,
"loss": 1.1581677198410034,
"step": 1681
},
{
"epoch": 0.779244845957841,
"grad_norm": 0.87109375,
"learning_rate": 1.000748699542929e-05,
"loss": 0.9785441160202026,
"step": 1682
},
{
"epoch": 0.7797081306462822,
"grad_norm": 0.7578125,
"learning_rate": 9.980854656723977e-06,
"loss": 0.9519538879394531,
"step": 1683
},
{
"epoch": 0.7801714153347232,
"grad_norm": 0.921875,
"learning_rate": 9.954248496429166e-06,
"loss": 0.8969386219978333,
"step": 1684
},
{
"epoch": 0.7806347000231643,
"grad_norm": 0.78125,
"learning_rate": 9.927668564227422e-06,
"loss": 0.8716106414794922,
"step": 1685
},
{
"epoch": 0.7810979847116053,
"grad_norm": 0.8359375,
"learning_rate": 9.901114909752323e-06,
"loss": 0.8693481087684631,
"step": 1686
},
{
"epoch": 0.7815612694000463,
"grad_norm": 0.76953125,
"learning_rate": 9.874587582588353e-06,
"loss": 0.9671769142150879,
"step": 1687
},
{
"epoch": 0.7820245540884874,
"grad_norm": 0.6875,
"learning_rate": 9.848086632270901e-06,
"loss": 0.8428149223327637,
"step": 1688
},
{
"epoch": 0.7824878387769284,
"grad_norm": 0.78125,
"learning_rate": 9.821612108286036e-06,
"loss": 1.0409609079360962,
"step": 1689
},
{
"epoch": 0.7829511234653694,
"grad_norm": 0.87109375,
"learning_rate": 9.795164060070523e-06,
"loss": 0.9725620746612549,
"step": 1690
},
{
"epoch": 0.7834144081538105,
"grad_norm": 0.9609375,
"learning_rate": 9.768742537011652e-06,
"loss": 0.9622225165367126,
"step": 1691
},
{
"epoch": 0.7838776928422516,
"grad_norm": 0.6796875,
"learning_rate": 9.74234758844721e-06,
"loss": 0.762078583240509,
"step": 1692
},
{
"epoch": 0.7843409775306927,
"grad_norm": 0.8046875,
"learning_rate": 9.715979263665355e-06,
"loss": 0.8382387161254883,
"step": 1693
},
{
"epoch": 0.7848042622191337,
"grad_norm": 0.76171875,
"learning_rate": 9.689637611904528e-06,
"loss": 0.8127785921096802,
"step": 1694
},
{
"epoch": 0.7852675469075747,
"grad_norm": 0.77734375,
"learning_rate": 9.663322682353359e-06,
"loss": 0.7931768894195557,
"step": 1695
},
{
"epoch": 0.7857308315960158,
"grad_norm": 0.734375,
"learning_rate": 9.637034524150567e-06,
"loss": 0.9206767678260803,
"step": 1696
},
{
"epoch": 0.7861941162844568,
"grad_norm": 0.97265625,
"learning_rate": 9.610773186384898e-06,
"loss": 0.9987959265708923,
"step": 1697
},
{
"epoch": 0.7866574009728978,
"grad_norm": 0.89453125,
"learning_rate": 9.584538718095019e-06,
"loss": 0.9304317235946655,
"step": 1698
},
{
"epoch": 0.7871206856613389,
"grad_norm": 0.875,
"learning_rate": 9.558331168269418e-06,
"loss": 0.8821865320205688,
"step": 1699
},
{
"epoch": 0.7875839703497799,
"grad_norm": 0.7109375,
"learning_rate": 9.532150585846297e-06,
"loss": 0.879505455493927,
"step": 1700
},
{
"epoch": 0.788047255038221,
"grad_norm": 0.97265625,
"learning_rate": 9.505997019713527e-06,
"loss": 0.9673975110054016,
"step": 1701
},
{
"epoch": 0.7885105397266621,
"grad_norm": 0.78515625,
"learning_rate": 9.479870518708525e-06,
"loss": 0.9111735820770264,
"step": 1702
},
{
"epoch": 0.7889738244151031,
"grad_norm": 0.8515625,
"learning_rate": 9.453771131618154e-06,
"loss": 1.1614623069763184,
"step": 1703
},
{
"epoch": 0.7894371091035441,
"grad_norm": 0.73046875,
"learning_rate": 9.42769890717867e-06,
"loss": 0.8665764331817627,
"step": 1704
},
{
"epoch": 0.7899003937919852,
"grad_norm": 0.86328125,
"learning_rate": 9.401653894075576e-06,
"loss": 0.9338579177856445,
"step": 1705
},
{
"epoch": 0.7903636784804262,
"grad_norm": 0.8046875,
"learning_rate": 9.375636140943591e-06,
"loss": 1.0184825658798218,
"step": 1706
},
{
"epoch": 0.7908269631688672,
"grad_norm": 0.890625,
"learning_rate": 9.349645696366522e-06,
"loss": 0.8786851167678833,
"step": 1707
},
{
"epoch": 0.7912902478573083,
"grad_norm": 0.94921875,
"learning_rate": 9.32368260887718e-06,
"loss": 1.002317190170288,
"step": 1708
},
{
"epoch": 0.7917535325457493,
"grad_norm": 0.80859375,
"learning_rate": 9.29774692695729e-06,
"loss": 0.9711103439331055,
"step": 1709
},
{
"epoch": 0.7922168172341905,
"grad_norm": 0.86328125,
"learning_rate": 9.271838699037386e-06,
"loss": 0.8316183090209961,
"step": 1710
},
{
"epoch": 0.7926801019226315,
"grad_norm": 0.86328125,
"learning_rate": 9.24595797349678e-06,
"loss": 1.0970137119293213,
"step": 1711
},
{
"epoch": 0.7931433866110725,
"grad_norm": 0.87109375,
"learning_rate": 9.220104798663383e-06,
"loss": 0.8820261359214783,
"step": 1712
},
{
"epoch": 0.7936066712995136,
"grad_norm": 0.9609375,
"learning_rate": 9.194279222813689e-06,
"loss": 0.9609889388084412,
"step": 1713
},
{
"epoch": 0.7940699559879546,
"grad_norm": 0.875,
"learning_rate": 9.168481294172628e-06,
"loss": 0.8790441751480103,
"step": 1714
},
{
"epoch": 0.7945332406763956,
"grad_norm": 0.9375,
"learning_rate": 9.142711060913529e-06,
"loss": 0.8385621309280396,
"step": 1715
},
{
"epoch": 0.7949965253648367,
"grad_norm": 0.78515625,
"learning_rate": 9.116968571158004e-06,
"loss": 0.7539405822753906,
"step": 1716
},
{
"epoch": 0.7954598100532777,
"grad_norm": 0.86328125,
"learning_rate": 9.09125387297583e-06,
"loss": 0.8373897075653076,
"step": 1717
},
{
"epoch": 0.7959230947417187,
"grad_norm": 0.88671875,
"learning_rate": 9.065567014384927e-06,
"loss": 0.8968495726585388,
"step": 1718
},
{
"epoch": 0.7963863794301599,
"grad_norm": 0.94921875,
"learning_rate": 9.03990804335119e-06,
"loss": 0.779243528842926,
"step": 1719
},
{
"epoch": 0.7968496641186009,
"grad_norm": 0.859375,
"learning_rate": 9.014277007788471e-06,
"loss": 0.8106821775436401,
"step": 1720
},
{
"epoch": 0.797312948807042,
"grad_norm": 0.75390625,
"learning_rate": 8.988673955558443e-06,
"loss": 0.8644953370094299,
"step": 1721
},
{
"epoch": 0.797776233495483,
"grad_norm": 0.76171875,
"learning_rate": 8.96309893447053e-06,
"loss": 0.9038248658180237,
"step": 1722
},
{
"epoch": 0.798239518183924,
"grad_norm": 0.93359375,
"learning_rate": 8.937551992281796e-06,
"loss": 0.996218204498291,
"step": 1723
},
{
"epoch": 0.7987028028723651,
"grad_norm": 0.859375,
"learning_rate": 8.912033176696893e-06,
"loss": 0.8666702508926392,
"step": 1724
},
{
"epoch": 0.7991660875608061,
"grad_norm": 0.85546875,
"learning_rate": 8.886542535367954e-06,
"loss": 0.8723835349082947,
"step": 1725
},
{
"epoch": 0.7996293722492471,
"grad_norm": 0.8515625,
"learning_rate": 8.861080115894469e-06,
"loss": 0.9064018130302429,
"step": 1726
},
{
"epoch": 0.8000926569376882,
"grad_norm": 0.86328125,
"learning_rate": 8.83564596582327e-06,
"loss": 0.8690557479858398,
"step": 1727
},
{
"epoch": 0.8005559416261293,
"grad_norm": 0.9453125,
"learning_rate": 8.810240132648365e-06,
"loss": 0.9081324934959412,
"step": 1728
},
{
"epoch": 0.8010192263145703,
"grad_norm": 1.0,
"learning_rate": 8.784862663810909e-06,
"loss": 0.8451979160308838,
"step": 1729
},
{
"epoch": 0.8014825110030114,
"grad_norm": 0.88671875,
"learning_rate": 8.759513606699077e-06,
"loss": 1.040283203125,
"step": 1730
},
{
"epoch": 0.8019457956914524,
"grad_norm": 0.7734375,
"learning_rate": 8.734193008648011e-06,
"loss": 1.03599214553833,
"step": 1731
},
{
"epoch": 0.8024090803798934,
"grad_norm": 0.7578125,
"learning_rate": 8.708900916939685e-06,
"loss": 1.0581880807876587,
"step": 1732
},
{
"epoch": 0.8028723650683345,
"grad_norm": 1.0390625,
"learning_rate": 8.683637378802835e-06,
"loss": 0.9091055989265442,
"step": 1733
},
{
"epoch": 0.8033356497567755,
"grad_norm": 0.76953125,
"learning_rate": 8.658402441412928e-06,
"loss": 0.7468848824501038,
"step": 1734
},
{
"epoch": 0.8037989344452166,
"grad_norm": 0.99609375,
"learning_rate": 8.63319615189197e-06,
"loss": 1.0149157047271729,
"step": 1735
},
{
"epoch": 0.8042622191336576,
"grad_norm": 0.8203125,
"learning_rate": 8.608018557308506e-06,
"loss": 0.825045645236969,
"step": 1736
},
{
"epoch": 0.8047255038220987,
"grad_norm": 0.92578125,
"learning_rate": 8.58286970467747e-06,
"loss": 0.7038781642913818,
"step": 1737
},
{
"epoch": 0.8051887885105398,
"grad_norm": 0.8515625,
"learning_rate": 8.55774964096015e-06,
"loss": 1.1522748470306396,
"step": 1738
},
{
"epoch": 0.8056520731989808,
"grad_norm": 0.83203125,
"learning_rate": 8.53265841306407e-06,
"loss": 0.949603259563446,
"step": 1739
},
{
"epoch": 0.8061153578874218,
"grad_norm": 0.875,
"learning_rate": 8.507596067842894e-06,
"loss": 0.9490684866905212,
"step": 1740
},
{
"epoch": 0.8065786425758629,
"grad_norm": 0.81640625,
"learning_rate": 8.482562652096375e-06,
"loss": 0.874863862991333,
"step": 1741
},
{
"epoch": 0.8070419272643039,
"grad_norm": 0.8203125,
"learning_rate": 8.457558212570205e-06,
"loss": 0.9082040190696716,
"step": 1742
},
{
"epoch": 0.8075052119527449,
"grad_norm": 0.8671875,
"learning_rate": 8.432582795956032e-06,
"loss": 0.9235297441482544,
"step": 1743
},
{
"epoch": 0.807968496641186,
"grad_norm": 0.765625,
"learning_rate": 8.407636448891245e-06,
"loss": 0.9147178530693054,
"step": 1744
},
{
"epoch": 0.808431781329627,
"grad_norm": 0.796875,
"learning_rate": 8.382719217958996e-06,
"loss": 1.0070385932922363,
"step": 1745
},
{
"epoch": 0.8088950660180682,
"grad_norm": 0.79296875,
"learning_rate": 8.35783114968803e-06,
"loss": 0.8351930975914001,
"step": 1746
},
{
"epoch": 0.8093583507065092,
"grad_norm": 0.77734375,
"learning_rate": 8.332972290552663e-06,
"loss": 0.8390335440635681,
"step": 1747
},
{
"epoch": 0.8098216353949502,
"grad_norm": 0.796875,
"learning_rate": 8.308142686972666e-06,
"loss": 0.9591008424758911,
"step": 1748
},
{
"epoch": 0.8102849200833913,
"grad_norm": 0.87890625,
"learning_rate": 8.28334238531316e-06,
"loss": 1.086327075958252,
"step": 1749
},
{
"epoch": 0.8107482047718323,
"grad_norm": 1.109375,
"learning_rate": 8.258571431884575e-06,
"loss": 0.960287868976593,
"step": 1750
},
{
"epoch": 0.8112114894602733,
"grad_norm": 0.796875,
"learning_rate": 8.233829872942513e-06,
"loss": 0.9874426126480103,
"step": 1751
},
{
"epoch": 0.8116747741487144,
"grad_norm": 0.859375,
"learning_rate": 8.209117754687708e-06,
"loss": 0.9690349102020264,
"step": 1752
},
{
"epoch": 0.8121380588371554,
"grad_norm": 0.82421875,
"learning_rate": 8.184435123265906e-06,
"loss": 1.0509775876998901,
"step": 1753
},
{
"epoch": 0.8126013435255964,
"grad_norm": 0.94140625,
"learning_rate": 8.159782024767808e-06,
"loss": 1.0827224254608154,
"step": 1754
},
{
"epoch": 0.8130646282140376,
"grad_norm": 0.921875,
"learning_rate": 8.13515850522894e-06,
"loss": 1.030488133430481,
"step": 1755
},
{
"epoch": 0.8135279129024786,
"grad_norm": 0.90234375,
"learning_rate": 8.110564610629599e-06,
"loss": 0.920238733291626,
"step": 1756
},
{
"epoch": 0.8139911975909196,
"grad_norm": 0.83203125,
"learning_rate": 8.086000386894804e-06,
"loss": 0.8451364040374756,
"step": 1757
},
{
"epoch": 0.8144544822793607,
"grad_norm": 0.96484375,
"learning_rate": 8.061465879894107e-06,
"loss": 0.893768846988678,
"step": 1758
},
{
"epoch": 0.8149177669678017,
"grad_norm": 0.84765625,
"learning_rate": 8.036961135441621e-06,
"loss": 0.9750687479972839,
"step": 1759
},
{
"epoch": 0.8153810516562428,
"grad_norm": 0.7578125,
"learning_rate": 8.01248619929584e-06,
"loss": 1.0192598104476929,
"step": 1760
},
{
"epoch": 0.8158443363446838,
"grad_norm": 0.7421875,
"learning_rate": 7.988041117159626e-06,
"loss": 1.0063856840133667,
"step": 1761
},
{
"epoch": 0.8163076210331248,
"grad_norm": 0.890625,
"learning_rate": 7.96362593468009e-06,
"loss": 0.8969213366508484,
"step": 1762
},
{
"epoch": 0.8167709057215659,
"grad_norm": 0.83203125,
"learning_rate": 7.939240697448489e-06,
"loss": 1.0545355081558228,
"step": 1763
},
{
"epoch": 0.817234190410007,
"grad_norm": 0.796875,
"learning_rate": 7.914885451000196e-06,
"loss": 0.8668513894081116,
"step": 1764
},
{
"epoch": 0.817697475098448,
"grad_norm": 0.9375,
"learning_rate": 7.890560240814532e-06,
"loss": 0.9358338713645935,
"step": 1765
},
{
"epoch": 0.8181607597868891,
"grad_norm": 0.77734375,
"learning_rate": 7.866265112314799e-06,
"loss": 0.8214500546455383,
"step": 1766
},
{
"epoch": 0.8186240444753301,
"grad_norm": 0.86328125,
"learning_rate": 7.842000110868062e-06,
"loss": 0.970549464225769,
"step": 1767
},
{
"epoch": 0.8190873291637711,
"grad_norm": 0.86328125,
"learning_rate": 7.81776528178517e-06,
"loss": 0.7393088936805725,
"step": 1768
},
{
"epoch": 0.8195506138522122,
"grad_norm": 0.84765625,
"learning_rate": 7.793560670320604e-06,
"loss": 1.0626767873764038,
"step": 1769
},
{
"epoch": 0.8200138985406532,
"grad_norm": 0.984375,
"learning_rate": 7.769386321672433e-06,
"loss": 0.9659113883972168,
"step": 1770
},
{
"epoch": 0.8204771832290942,
"grad_norm": 0.83984375,
"learning_rate": 7.74524228098222e-06,
"loss": 0.9227487444877625,
"step": 1771
},
{
"epoch": 0.8209404679175353,
"grad_norm": 0.88671875,
"learning_rate": 7.72112859333491e-06,
"loss": 1.034658670425415,
"step": 1772
},
{
"epoch": 0.8214037526059764,
"grad_norm": 0.984375,
"learning_rate": 7.6970453037588e-06,
"loss": 0.90932297706604,
"step": 1773
},
{
"epoch": 0.8218670372944175,
"grad_norm": 0.76171875,
"learning_rate": 7.672992457225394e-06,
"loss": 0.9367461204528809,
"step": 1774
},
{
"epoch": 0.8223303219828585,
"grad_norm": 0.890625,
"learning_rate": 7.648970098649369e-06,
"loss": 1.1317241191864014,
"step": 1775
},
{
"epoch": 0.8227936066712995,
"grad_norm": 0.82421875,
"learning_rate": 7.6249782728884594e-06,
"loss": 1.149112343788147,
"step": 1776
},
{
"epoch": 0.8232568913597406,
"grad_norm": 0.88671875,
"learning_rate": 7.601017024743406e-06,
"loss": 0.9359456300735474,
"step": 1777
},
{
"epoch": 0.8237201760481816,
"grad_norm": 0.80078125,
"learning_rate": 7.57708639895781e-06,
"loss": 0.9038832783699036,
"step": 1778
},
{
"epoch": 0.8241834607366226,
"grad_norm": 0.8828125,
"learning_rate": 7.553186440218129e-06,
"loss": 0.9364471435546875,
"step": 1779
},
{
"epoch": 0.8246467454250637,
"grad_norm": 0.83984375,
"learning_rate": 7.529317193153543e-06,
"loss": 0.8946930170059204,
"step": 1780
},
{
"epoch": 0.8251100301135047,
"grad_norm": 0.71484375,
"learning_rate": 7.505478702335871e-06,
"loss": 0.9415825009346008,
"step": 1781
},
{
"epoch": 0.8255733148019458,
"grad_norm": 0.83203125,
"learning_rate": 7.481671012279523e-06,
"loss": 0.9467945694923401,
"step": 1782
},
{
"epoch": 0.8260365994903869,
"grad_norm": 0.90234375,
"learning_rate": 7.457894167441365e-06,
"loss": 0.9999266266822815,
"step": 1783
},
{
"epoch": 0.8264998841788279,
"grad_norm": 0.875,
"learning_rate": 7.434148212220688e-06,
"loss": 0.854033350944519,
"step": 1784
},
{
"epoch": 0.826963168867269,
"grad_norm": 0.71484375,
"learning_rate": 7.4104331909591e-06,
"loss": 0.8315075039863586,
"step": 1785
},
{
"epoch": 0.82742645355571,
"grad_norm": 0.86328125,
"learning_rate": 7.3867491479404256e-06,
"loss": 0.7753373980522156,
"step": 1786
},
{
"epoch": 0.827889738244151,
"grad_norm": 1.015625,
"learning_rate": 7.363096127390672e-06,
"loss": 1.0225822925567627,
"step": 1787
},
{
"epoch": 0.828353022932592,
"grad_norm": 0.7578125,
"learning_rate": 7.339474173477875e-06,
"loss": 0.8440317511558533,
"step": 1788
},
{
"epoch": 0.8288163076210331,
"grad_norm": 0.734375,
"learning_rate": 7.315883330312121e-06,
"loss": 0.8816229701042175,
"step": 1789
},
{
"epoch": 0.8292795923094741,
"grad_norm": 0.79296875,
"learning_rate": 7.292323641945339e-06,
"loss": 0.9245011210441589,
"step": 1790
},
{
"epoch": 0.8297428769979153,
"grad_norm": 0.8515625,
"learning_rate": 7.268795152371322e-06,
"loss": 0.919562816619873,
"step": 1791
},
{
"epoch": 0.8302061616863563,
"grad_norm": 0.7890625,
"learning_rate": 7.245297905525582e-06,
"loss": 0.9542215466499329,
"step": 1792
},
{
"epoch": 0.8306694463747973,
"grad_norm": 0.8046875,
"learning_rate": 7.2218319452853055e-06,
"loss": 0.840879499912262,
"step": 1793
},
{
"epoch": 0.8311327310632384,
"grad_norm": 0.80078125,
"learning_rate": 7.198397315469257e-06,
"loss": 1.072795033454895,
"step": 1794
},
{
"epoch": 0.8315960157516794,
"grad_norm": 0.7578125,
"learning_rate": 7.174994059837673e-06,
"loss": 0.9962195754051208,
"step": 1795
},
{
"epoch": 0.8320593004401204,
"grad_norm": 0.7890625,
"learning_rate": 7.1516222220922425e-06,
"loss": 0.8247233033180237,
"step": 1796
},
{
"epoch": 0.8325225851285615,
"grad_norm": 0.87109375,
"learning_rate": 7.128281845875946e-06,
"loss": 0.8432327508926392,
"step": 1797
},
{
"epoch": 0.8329858698170025,
"grad_norm": 0.953125,
"learning_rate": 7.104972974773042e-06,
"loss": 1.0490100383758545,
"step": 1798
},
{
"epoch": 0.8334491545054435,
"grad_norm": 0.86328125,
"learning_rate": 7.081695652308952e-06,
"loss": 1.007668375968933,
"step": 1799
},
{
"epoch": 0.8339124391938847,
"grad_norm": 0.99609375,
"learning_rate": 7.058449921950193e-06,
"loss": 0.9167599081993103,
"step": 1800
},
{
"epoch": 0.8343757238823257,
"grad_norm": 0.81640625,
"learning_rate": 7.035235827104265e-06,
"loss": 0.930167019367218,
"step": 1801
},
{
"epoch": 0.8348390085707668,
"grad_norm": 1.1328125,
"learning_rate": 7.012053411119619e-06,
"loss": 0.8546901941299438,
"step": 1802
},
{
"epoch": 0.8353022932592078,
"grad_norm": 0.8515625,
"learning_rate": 6.988902717285545e-06,
"loss": 0.9504755139350891,
"step": 1803
},
{
"epoch": 0.8357655779476488,
"grad_norm": 0.84375,
"learning_rate": 6.9657837888320815e-06,
"loss": 0.9580096006393433,
"step": 1804
},
{
"epoch": 0.8362288626360899,
"grad_norm": 0.796875,
"learning_rate": 6.94269666892998e-06,
"loss": 0.7468809485435486,
"step": 1805
},
{
"epoch": 0.8366921473245309,
"grad_norm": 0.88671875,
"learning_rate": 6.919641400690559e-06,
"loss": 0.8644688129425049,
"step": 1806
},
{
"epoch": 0.8371554320129719,
"grad_norm": 0.83984375,
"learning_rate": 6.896618027165684e-06,
"loss": 0.9328237771987915,
"step": 1807
},
{
"epoch": 0.837618716701413,
"grad_norm": 0.97265625,
"learning_rate": 6.873626591347671e-06,
"loss": 1.1448862552642822,
"step": 1808
},
{
"epoch": 0.8380820013898541,
"grad_norm": 0.84765625,
"learning_rate": 6.850667136169164e-06,
"loss": 0.8983963131904602,
"step": 1809
},
{
"epoch": 0.8385452860782951,
"grad_norm": 0.91015625,
"learning_rate": 6.8277397045031205e-06,
"loss": 0.8409160375595093,
"step": 1810
},
{
"epoch": 0.8390085707667362,
"grad_norm": 0.73046875,
"learning_rate": 6.804844339162666e-06,
"loss": 0.8944919109344482,
"step": 1811
},
{
"epoch": 0.8394718554551772,
"grad_norm": 1.140625,
"learning_rate": 6.781981082901101e-06,
"loss": 0.9417099952697754,
"step": 1812
},
{
"epoch": 0.8399351401436183,
"grad_norm": 0.91796875,
"learning_rate": 6.759149978411709e-06,
"loss": 0.8804126977920532,
"step": 1813
},
{
"epoch": 0.8403984248320593,
"grad_norm": 0.796875,
"learning_rate": 6.736351068327776e-06,
"loss": 0.8906807899475098,
"step": 1814
},
{
"epoch": 0.8408617095205003,
"grad_norm": 0.89453125,
"learning_rate": 6.713584395222441e-06,
"loss": 0.9027203917503357,
"step": 1815
},
{
"epoch": 0.8413249942089414,
"grad_norm": 0.8046875,
"learning_rate": 6.690850001608671e-06,
"loss": 0.8666508793830872,
"step": 1816
},
{
"epoch": 0.8417882788973824,
"grad_norm": 1.296875,
"learning_rate": 6.668147929939147e-06,
"loss": 0.8996185064315796,
"step": 1817
},
{
"epoch": 0.8422515635858235,
"grad_norm": 0.8515625,
"learning_rate": 6.645478222606184e-06,
"loss": 0.8643486499786377,
"step": 1818
},
{
"epoch": 0.8427148482742646,
"grad_norm": 0.78515625,
"learning_rate": 6.622840921941684e-06,
"loss": 0.8814486265182495,
"step": 1819
},
{
"epoch": 0.8431781329627056,
"grad_norm": 0.81640625,
"learning_rate": 6.600236070216997e-06,
"loss": 0.9077647924423218,
"step": 1820
},
{
"epoch": 0.8436414176511466,
"grad_norm": 0.796875,
"learning_rate": 6.577663709642938e-06,
"loss": 0.9069101810455322,
"step": 1821
},
{
"epoch": 0.8441047023395877,
"grad_norm": 0.84765625,
"learning_rate": 6.555123882369596e-06,
"loss": 0.7405815720558167,
"step": 1822
},
{
"epoch": 0.8445679870280287,
"grad_norm": 0.92578125,
"learning_rate": 6.532616630486341e-06,
"loss": 0.929868757724762,
"step": 1823
},
{
"epoch": 0.8450312717164697,
"grad_norm": 0.89453125,
"learning_rate": 6.5101419960216925e-06,
"loss": 0.9069142937660217,
"step": 1824
},
{
"epoch": 0.8454945564049108,
"grad_norm": 0.99609375,
"learning_rate": 6.48770002094328e-06,
"loss": 0.9167734980583191,
"step": 1825
},
{
"epoch": 0.8459578410933518,
"grad_norm": 0.83984375,
"learning_rate": 6.465290747157745e-06,
"loss": 0.8654367923736572,
"step": 1826
},
{
"epoch": 0.846421125781793,
"grad_norm": 0.7578125,
"learning_rate": 6.442914216510651e-06,
"loss": 0.7541770935058594,
"step": 1827
},
{
"epoch": 0.846884410470234,
"grad_norm": 0.84765625,
"learning_rate": 6.420570470786438e-06,
"loss": 0.8677940964698792,
"step": 1828
},
{
"epoch": 0.847347695158675,
"grad_norm": 0.8671875,
"learning_rate": 6.3982595517083064e-06,
"loss": 0.8606151342391968,
"step": 1829
},
{
"epoch": 0.8478109798471161,
"grad_norm": 0.890625,
"learning_rate": 6.375981500938173e-06,
"loss": 0.9796140193939209,
"step": 1830
},
{
"epoch": 0.8482742645355571,
"grad_norm": 0.921875,
"learning_rate": 6.353736360076578e-06,
"loss": 0.921321451663971,
"step": 1831
},
{
"epoch": 0.8487375492239981,
"grad_norm": 0.921875,
"learning_rate": 6.3315241706625946e-06,
"loss": 0.9960170388221741,
"step": 1832
},
{
"epoch": 0.8492008339124392,
"grad_norm": 0.73046875,
"learning_rate": 6.309344974173784e-06,
"loss": 0.783862292766571,
"step": 1833
},
{
"epoch": 0.8496641186008802,
"grad_norm": 0.83203125,
"learning_rate": 6.287198812026068e-06,
"loss": 0.9817046523094177,
"step": 1834
},
{
"epoch": 0.8501274032893212,
"grad_norm": 0.90625,
"learning_rate": 6.265085725573732e-06,
"loss": 0.9676863551139832,
"step": 1835
},
{
"epoch": 0.8505906879777624,
"grad_norm": 0.68359375,
"learning_rate": 6.243005756109246e-06,
"loss": 0.926174521446228,
"step": 1836
},
{
"epoch": 0.8510539726662034,
"grad_norm": 0.8828125,
"learning_rate": 6.220958944863276e-06,
"loss": 0.889807939529419,
"step": 1837
},
{
"epoch": 0.8515172573546445,
"grad_norm": 0.98828125,
"learning_rate": 6.198945333004545e-06,
"loss": 0.9411275386810303,
"step": 1838
},
{
"epoch": 0.8519805420430855,
"grad_norm": 0.83203125,
"learning_rate": 6.176964961639795e-06,
"loss": 0.8610736131668091,
"step": 1839
},
{
"epoch": 0.8524438267315265,
"grad_norm": 0.76171875,
"learning_rate": 6.1550178718137095e-06,
"loss": 0.8684600591659546,
"step": 1840
},
{
"epoch": 0.8529071114199676,
"grad_norm": 0.83984375,
"learning_rate": 6.13310410450879e-06,
"loss": 0.8963911533355713,
"step": 1841
},
{
"epoch": 0.8533703961084086,
"grad_norm": 0.91015625,
"learning_rate": 6.111223700645352e-06,
"loss": 1.0184478759765625,
"step": 1842
},
{
"epoch": 0.8538336807968496,
"grad_norm": 0.8984375,
"learning_rate": 6.089376701081368e-06,
"loss": 0.9415737390518188,
"step": 1843
},
{
"epoch": 0.8542969654852907,
"grad_norm": 0.83203125,
"learning_rate": 6.067563146612489e-06,
"loss": 0.9422698020935059,
"step": 1844
},
{
"epoch": 0.8547602501737318,
"grad_norm": 0.921875,
"learning_rate": 6.045783077971863e-06,
"loss": 0.8872048854827881,
"step": 1845
},
{
"epoch": 0.8552235348621728,
"grad_norm": 0.82421875,
"learning_rate": 6.024036535830124e-06,
"loss": 0.7562741041183472,
"step": 1846
},
{
"epoch": 0.8556868195506139,
"grad_norm": 0.93359375,
"learning_rate": 6.002323560795314e-06,
"loss": 0.8720545172691345,
"step": 1847
},
{
"epoch": 0.8561501042390549,
"grad_norm": 0.82421875,
"learning_rate": 5.980644193412778e-06,
"loss": 0.9384455680847168,
"step": 1848
},
{
"epoch": 0.856613388927496,
"grad_norm": 0.765625,
"learning_rate": 5.958998474165121e-06,
"loss": 0.8861362934112549,
"step": 1849
},
{
"epoch": 0.857076673615937,
"grad_norm": 0.87890625,
"learning_rate": 5.937386443472092e-06,
"loss": 1.1345970630645752,
"step": 1850
},
{
"epoch": 0.857539958304378,
"grad_norm": 0.734375,
"learning_rate": 5.915808141690556e-06,
"loss": 0.8685821294784546,
"step": 1851
},
{
"epoch": 0.858003242992819,
"grad_norm": 0.83984375,
"learning_rate": 5.894263609114378e-06,
"loss": 0.996828019618988,
"step": 1852
},
{
"epoch": 0.8584665276812601,
"grad_norm": 0.86328125,
"learning_rate": 5.872752885974371e-06,
"loss": 0.9989946484565735,
"step": 1853
},
{
"epoch": 0.8589298123697012,
"grad_norm": 0.87890625,
"learning_rate": 5.851276012438224e-06,
"loss": 0.7909801006317139,
"step": 1854
},
{
"epoch": 0.8593930970581423,
"grad_norm": 0.8046875,
"learning_rate": 5.829833028610395e-06,
"loss": 1.1088160276412964,
"step": 1855
},
{
"epoch": 0.8598563817465833,
"grad_norm": 0.80859375,
"learning_rate": 5.80842397453208e-06,
"loss": 0.8807237148284912,
"step": 1856
},
{
"epoch": 0.8603196664350243,
"grad_norm": 0.8359375,
"learning_rate": 5.787048890181105e-06,
"loss": 0.8608243465423584,
"step": 1857
},
{
"epoch": 0.8607829511234654,
"grad_norm": 0.8203125,
"learning_rate": 5.765707815471878e-06,
"loss": 0.9504100680351257,
"step": 1858
},
{
"epoch": 0.8612462358119064,
"grad_norm": 0.9296875,
"learning_rate": 5.744400790255271e-06,
"loss": 0.9452154040336609,
"step": 1859
},
{
"epoch": 0.8617095205003474,
"grad_norm": 0.82421875,
"learning_rate": 5.72312785431861e-06,
"loss": 0.8969765305519104,
"step": 1860
},
{
"epoch": 0.8621728051887885,
"grad_norm": 0.9453125,
"learning_rate": 5.701889047385529e-06,
"loss": 0.996848464012146,
"step": 1861
},
{
"epoch": 0.8626360898772295,
"grad_norm": 0.8984375,
"learning_rate": 5.68068440911596e-06,
"loss": 0.8867621421813965,
"step": 1862
},
{
"epoch": 0.8630993745656707,
"grad_norm": 0.875,
"learning_rate": 5.6595139791060246e-06,
"loss": 1.0145070552825928,
"step": 1863
},
{
"epoch": 0.8635626592541117,
"grad_norm": 0.8515625,
"learning_rate": 5.63837779688795e-06,
"loss": 0.8787609934806824,
"step": 1864
},
{
"epoch": 0.8640259439425527,
"grad_norm": 0.84375,
"learning_rate": 5.617275901930037e-06,
"loss": 0.9451928734779358,
"step": 1865
},
{
"epoch": 0.8644892286309938,
"grad_norm": 0.80078125,
"learning_rate": 5.596208333636525e-06,
"loss": 0.9350622892379761,
"step": 1866
},
{
"epoch": 0.8649525133194348,
"grad_norm": 0.86328125,
"learning_rate": 5.5751751313476055e-06,
"loss": 0.7591818571090698,
"step": 1867
},
{
"epoch": 0.8654157980078758,
"grad_norm": 0.77734375,
"learning_rate": 5.554176334339251e-06,
"loss": 0.995162844657898,
"step": 1868
},
{
"epoch": 0.8658790826963169,
"grad_norm": 0.97265625,
"learning_rate": 5.533211981823204e-06,
"loss": 1.090124487876892,
"step": 1869
},
{
"epoch": 0.8663423673847579,
"grad_norm": 0.84765625,
"learning_rate": 5.512282112946889e-06,
"loss": 0.8698755502700806,
"step": 1870
},
{
"epoch": 0.8668056520731989,
"grad_norm": 0.7734375,
"learning_rate": 5.4913867667933405e-06,
"loss": 0.8936692476272583,
"step": 1871
},
{
"epoch": 0.8672689367616401,
"grad_norm": 0.80859375,
"learning_rate": 5.470525982381133e-06,
"loss": 0.9556330442428589,
"step": 1872
},
{
"epoch": 0.8677322214500811,
"grad_norm": 0.7734375,
"learning_rate": 5.449699798664276e-06,
"loss": 0.7762373089790344,
"step": 1873
},
{
"epoch": 0.8681955061385221,
"grad_norm": 0.84765625,
"learning_rate": 5.428908254532204e-06,
"loss": 0.8480014204978943,
"step": 1874
},
{
"epoch": 0.8686587908269632,
"grad_norm": 0.85546875,
"learning_rate": 5.4081513888096335e-06,
"loss": 1.0431307554244995,
"step": 1875
},
{
"epoch": 0.8691220755154042,
"grad_norm": 0.85546875,
"learning_rate": 5.3874292402565515e-06,
"loss": 0.9694292545318604,
"step": 1876
},
{
"epoch": 0.8695853602038452,
"grad_norm": 0.93359375,
"learning_rate": 5.366741847568112e-06,
"loss": 0.9653794765472412,
"step": 1877
},
{
"epoch": 0.8700486448922863,
"grad_norm": 0.77734375,
"learning_rate": 5.346089249374549e-06,
"loss": 0.8758606314659119,
"step": 1878
},
{
"epoch": 0.8705119295807273,
"grad_norm": 0.76953125,
"learning_rate": 5.325471484241144e-06,
"loss": 0.8884270191192627,
"step": 1879
},
{
"epoch": 0.8709752142691684,
"grad_norm": 0.8046875,
"learning_rate": 5.304888590668126e-06,
"loss": 0.9315338730812073,
"step": 1880
},
{
"epoch": 0.8714384989576095,
"grad_norm": 0.79296875,
"learning_rate": 5.284340607090616e-06,
"loss": 0.874808132648468,
"step": 1881
},
{
"epoch": 0.8719017836460505,
"grad_norm": 0.890625,
"learning_rate": 5.263827571878527e-06,
"loss": 0.95280921459198,
"step": 1882
},
{
"epoch": 0.8723650683344916,
"grad_norm": 0.80859375,
"learning_rate": 5.243349523336532e-06,
"loss": 0.7937729954719543,
"step": 1883
},
{
"epoch": 0.8728283530229326,
"grad_norm": 0.8359375,
"learning_rate": 5.222906499703955e-06,
"loss": 0.8925231695175171,
"step": 1884
},
{
"epoch": 0.8732916377113736,
"grad_norm": 0.9375,
"learning_rate": 5.20249853915473e-06,
"loss": 0.7958294153213501,
"step": 1885
},
{
"epoch": 0.8737549223998147,
"grad_norm": 0.78125,
"learning_rate": 5.1821256797973185e-06,
"loss": 0.8661794066429138,
"step": 1886
},
{
"epoch": 0.8742182070882557,
"grad_norm": 0.83203125,
"learning_rate": 5.1617879596746155e-06,
"loss": 0.8948233723640442,
"step": 1887
},
{
"epoch": 0.8746814917766967,
"grad_norm": 1.0703125,
"learning_rate": 5.141485416763928e-06,
"loss": 0.9799279570579529,
"step": 1888
},
{
"epoch": 0.8751447764651378,
"grad_norm": 0.77734375,
"learning_rate": 5.121218088976843e-06,
"loss": 0.8897976279258728,
"step": 1889
},
{
"epoch": 0.8756080611535789,
"grad_norm": 0.8515625,
"learning_rate": 5.1009860141592314e-06,
"loss": 1.034111738204956,
"step": 1890
},
{
"epoch": 0.87607134584202,
"grad_norm": 0.80078125,
"learning_rate": 5.080789230091099e-06,
"loss": 0.8257846236228943,
"step": 1891
},
{
"epoch": 0.876534630530461,
"grad_norm": 0.734375,
"learning_rate": 5.060627774486557e-06,
"loss": 0.9181721210479736,
"step": 1892
},
{
"epoch": 0.876997915218902,
"grad_norm": 0.828125,
"learning_rate": 5.04050168499376e-06,
"loss": 0.8174453377723694,
"step": 1893
},
{
"epoch": 0.8774611999073431,
"grad_norm": 0.93359375,
"learning_rate": 5.020410999194815e-06,
"loss": 0.9571653604507446,
"step": 1894
},
{
"epoch": 0.8779244845957841,
"grad_norm": 0.8828125,
"learning_rate": 5.0003557546057275e-06,
"loss": 0.8483561277389526,
"step": 1895
},
{
"epoch": 0.8783877692842251,
"grad_norm": 0.90234375,
"learning_rate": 4.9803359886763e-06,
"loss": 0.9033476114273071,
"step": 1896
},
{
"epoch": 0.8788510539726662,
"grad_norm": 0.875,
"learning_rate": 4.960351738790113e-06,
"loss": 0.9668432474136353,
"step": 1897
},
{
"epoch": 0.8793143386611072,
"grad_norm": 0.83203125,
"learning_rate": 4.9404030422644e-06,
"loss": 0.8885450959205627,
"step": 1898
},
{
"epoch": 0.8797776233495483,
"grad_norm": 1.0078125,
"learning_rate": 4.92048993635002e-06,
"loss": 0.7515835762023926,
"step": 1899
},
{
"epoch": 0.8802409080379894,
"grad_norm": 0.8828125,
"learning_rate": 4.9006124582313825e-06,
"loss": 0.8569181561470032,
"step": 1900
},
{
"epoch": 0.8807041927264304,
"grad_norm": 0.80078125,
"learning_rate": 4.880770645026336e-06,
"loss": 0.8861024975776672,
"step": 1901
},
{
"epoch": 0.8811674774148714,
"grad_norm": 0.84375,
"learning_rate": 4.8609645337861615e-06,
"loss": 0.884182333946228,
"step": 1902
},
{
"epoch": 0.8816307621033125,
"grad_norm": 0.72265625,
"learning_rate": 4.841194161495456e-06,
"loss": 0.8558884263038635,
"step": 1903
},
{
"epoch": 0.8820940467917535,
"grad_norm": 0.76953125,
"learning_rate": 4.8214595650720945e-06,
"loss": 0.9538986682891846,
"step": 1904
},
{
"epoch": 0.8825573314801946,
"grad_norm": 0.83203125,
"learning_rate": 4.8017607813671255e-06,
"loss": 0.9202069640159607,
"step": 1905
},
{
"epoch": 0.8830206161686356,
"grad_norm": 0.78515625,
"learning_rate": 4.782097847164745e-06,
"loss": 0.8663555383682251,
"step": 1906
},
{
"epoch": 0.8834839008570766,
"grad_norm": 0.83203125,
"learning_rate": 4.762470799182182e-06,
"loss": 0.8842315673828125,
"step": 1907
},
{
"epoch": 0.8839471855455178,
"grad_norm": 0.90625,
"learning_rate": 4.7428796740696775e-06,
"loss": 1.0153151750564575,
"step": 1908
},
{
"epoch": 0.8844104702339588,
"grad_norm": 0.76953125,
"learning_rate": 4.723324508410386e-06,
"loss": 0.8607885837554932,
"step": 1909
},
{
"epoch": 0.8848737549223998,
"grad_norm": 0.86328125,
"learning_rate": 4.703805338720301e-06,
"loss": 0.8658420443534851,
"step": 1910
},
{
"epoch": 0.8853370396108409,
"grad_norm": 0.7578125,
"learning_rate": 4.684322201448219e-06,
"loss": 0.814765453338623,
"step": 1911
},
{
"epoch": 0.8858003242992819,
"grad_norm": 0.88671875,
"learning_rate": 4.664875132975623e-06,
"loss": 1.0322341918945312,
"step": 1912
},
{
"epoch": 0.8862636089877229,
"grad_norm": 0.8046875,
"learning_rate": 4.645464169616691e-06,
"loss": 0.8110833168029785,
"step": 1913
},
{
"epoch": 0.886726893676164,
"grad_norm": 0.8203125,
"learning_rate": 4.6260893476181384e-06,
"loss": 0.836736798286438,
"step": 1914
},
{
"epoch": 0.887190178364605,
"grad_norm": 0.97265625,
"learning_rate": 4.606750703159197e-06,
"loss": 0.9669207334518433,
"step": 1915
},
{
"epoch": 0.887653463053046,
"grad_norm": 0.95703125,
"learning_rate": 4.587448272351564e-06,
"loss": 0.8839113116264343,
"step": 1916
},
{
"epoch": 0.8881167477414872,
"grad_norm": 0.9296875,
"learning_rate": 4.568182091239298e-06,
"loss": 0.8875067234039307,
"step": 1917
},
{
"epoch": 0.8885800324299282,
"grad_norm": 0.75390625,
"learning_rate": 4.548952195798783e-06,
"loss": 0.8245463967323303,
"step": 1918
},
{
"epoch": 0.8890433171183693,
"grad_norm": 1.0234375,
"learning_rate": 4.529758621938616e-06,
"loss": 0.9345543384552002,
"step": 1919
},
{
"epoch": 0.8895066018068103,
"grad_norm": 0.8515625,
"learning_rate": 4.510601405499605e-06,
"loss": 0.841739296913147,
"step": 1920
},
{
"epoch": 0.8899698864952513,
"grad_norm": 0.8671875,
"learning_rate": 4.491480582254634e-06,
"loss": 0.8611487746238708,
"step": 1921
},
{
"epoch": 0.8904331711836924,
"grad_norm": 0.90234375,
"learning_rate": 4.472396187908652e-06,
"loss": 0.7870234251022339,
"step": 1922
},
{
"epoch": 0.8908964558721334,
"grad_norm": 0.95703125,
"learning_rate": 4.453348258098582e-06,
"loss": 1.1207804679870605,
"step": 1923
},
{
"epoch": 0.8913597405605744,
"grad_norm": 0.796875,
"learning_rate": 4.434336828393233e-06,
"loss": 1.0150240659713745,
"step": 1924
},
{
"epoch": 0.8918230252490155,
"grad_norm": 0.96484375,
"learning_rate": 4.415361934293283e-06,
"loss": 1.0550124645233154,
"step": 1925
},
{
"epoch": 0.8922863099374566,
"grad_norm": 0.80078125,
"learning_rate": 4.396423611231171e-06,
"loss": 0.8985774517059326,
"step": 1926
},
{
"epoch": 0.8927495946258976,
"grad_norm": 0.8515625,
"learning_rate": 4.377521894571057e-06,
"loss": 0.9433072209358215,
"step": 1927
},
{
"epoch": 0.8932128793143387,
"grad_norm": 0.82421875,
"learning_rate": 4.358656819608728e-06,
"loss": 0.9555040001869202,
"step": 1928
},
{
"epoch": 0.8936761640027797,
"grad_norm": 1.015625,
"learning_rate": 4.339828421571566e-06,
"loss": 0.9342141151428223,
"step": 1929
},
{
"epoch": 0.8941394486912208,
"grad_norm": 0.90625,
"learning_rate": 4.321036735618446e-06,
"loss": 0.9744370579719543,
"step": 1930
},
{
"epoch": 0.8946027333796618,
"grad_norm": 0.8203125,
"learning_rate": 4.302281796839706e-06,
"loss": 0.8748108744621277,
"step": 1931
},
{
"epoch": 0.8950660180681028,
"grad_norm": 0.83984375,
"learning_rate": 4.283563640257069e-06,
"loss": 0.9074385762214661,
"step": 1932
},
{
"epoch": 0.8955293027565439,
"grad_norm": 0.8125,
"learning_rate": 4.2648823008235475e-06,
"loss": 0.8706763982772827,
"step": 1933
},
{
"epoch": 0.8959925874449849,
"grad_norm": 0.8359375,
"learning_rate": 4.246237813423425e-06,
"loss": 0.9113630652427673,
"step": 1934
},
{
"epoch": 0.896455872133426,
"grad_norm": 0.796875,
"learning_rate": 4.227630212872168e-06,
"loss": 1.0912119150161743,
"step": 1935
},
{
"epoch": 0.8969191568218671,
"grad_norm": 0.94921875,
"learning_rate": 4.2090595339163665e-06,
"loss": 0.8499932289123535,
"step": 1936
},
{
"epoch": 0.8973824415103081,
"grad_norm": 0.953125,
"learning_rate": 4.190525811233652e-06,
"loss": 0.9822698831558228,
"step": 1937
},
{
"epoch": 0.8978457261987491,
"grad_norm": 1.09375,
"learning_rate": 4.172029079432648e-06,
"loss": 0.9892884492874146,
"step": 1938
},
{
"epoch": 0.8983090108871902,
"grad_norm": 0.8046875,
"learning_rate": 4.153569373052913e-06,
"loss": 1.048028588294983,
"step": 1939
},
{
"epoch": 0.8987722955756312,
"grad_norm": 0.73046875,
"learning_rate": 4.135146726564865e-06,
"loss": 0.7311965227127075,
"step": 1940
},
{
"epoch": 0.8992355802640722,
"grad_norm": 0.76171875,
"learning_rate": 4.116761174369723e-06,
"loss": 0.967644989490509,
"step": 1941
},
{
"epoch": 0.8996988649525133,
"grad_norm": 0.8046875,
"learning_rate": 4.098412750799421e-06,
"loss": 0.9470330476760864,
"step": 1942
},
{
"epoch": 0.9001621496409543,
"grad_norm": 0.9921875,
"learning_rate": 4.080101490116581e-06,
"loss": 0.9476629495620728,
"step": 1943
},
{
"epoch": 0.9006254343293955,
"grad_norm": 0.85546875,
"learning_rate": 4.061827426514416e-06,
"loss": 0.8855443000793457,
"step": 1944
},
{
"epoch": 0.9010887190178365,
"grad_norm": 0.74609375,
"learning_rate": 4.043590594116685e-06,
"loss": 1.0060893297195435,
"step": 1945
},
{
"epoch": 0.9015520037062775,
"grad_norm": 0.8671875,
"learning_rate": 4.025391026977633e-06,
"loss": 0.9967565536499023,
"step": 1946
},
{
"epoch": 0.9020152883947186,
"grad_norm": 0.91015625,
"learning_rate": 4.007228759081898e-06,
"loss": 1.0718729496002197,
"step": 1947
},
{
"epoch": 0.9024785730831596,
"grad_norm": 0.84375,
"learning_rate": 3.989103824344483e-06,
"loss": 0.7362527251243591,
"step": 1948
},
{
"epoch": 0.9029418577716006,
"grad_norm": 0.80078125,
"learning_rate": 3.971016256610675e-06,
"loss": 0.7745991945266724,
"step": 1949
},
{
"epoch": 0.9034051424600417,
"grad_norm": 0.78515625,
"learning_rate": 3.95296608965599e-06,
"loss": 0.9183497428894043,
"step": 1950
},
{
"epoch": 0.9038684271484827,
"grad_norm": 0.77734375,
"learning_rate": 3.934953357186084e-06,
"loss": 0.9153457880020142,
"step": 1951
},
{
"epoch": 0.9043317118369237,
"grad_norm": 0.8671875,
"learning_rate": 3.916978092836737e-06,
"loss": 0.9395539164543152,
"step": 1952
},
{
"epoch": 0.9047949965253649,
"grad_norm": 0.88671875,
"learning_rate": 3.899040330173741e-06,
"loss": 0.9092686176300049,
"step": 1953
},
{
"epoch": 0.9052582812138059,
"grad_norm": 0.828125,
"learning_rate": 3.881140102692869e-06,
"loss": 0.957666277885437,
"step": 1954
},
{
"epoch": 0.905721565902247,
"grad_norm": 0.99609375,
"learning_rate": 3.863277443819814e-06,
"loss": 0.8853251934051514,
"step": 1955
},
{
"epoch": 0.906184850590688,
"grad_norm": 0.79296875,
"learning_rate": 3.845452386910094e-06,
"loss": 0.9027367234230042,
"step": 1956
},
{
"epoch": 0.906648135279129,
"grad_norm": 0.91015625,
"learning_rate": 3.827664965249025e-06,
"loss": 0.8307380080223083,
"step": 1957
},
{
"epoch": 0.90711141996757,
"grad_norm": 0.7578125,
"learning_rate": 3.8099152120516485e-06,
"loss": 0.9248343110084534,
"step": 1958
},
{
"epoch": 0.9075747046560111,
"grad_norm": 0.94140625,
"learning_rate": 3.792203160462667e-06,
"loss": 0.9491377472877502,
"step": 1959
},
{
"epoch": 0.9080379893444521,
"grad_norm": 0.953125,
"learning_rate": 3.7745288435563653e-06,
"loss": 0.9673945307731628,
"step": 1960
},
{
"epoch": 0.9085012740328932,
"grad_norm": 1.0234375,
"learning_rate": 3.7568922943365755e-06,
"loss": 0.9584711790084839,
"step": 1961
},
{
"epoch": 0.9089645587213343,
"grad_norm": 0.87109375,
"learning_rate": 3.7392935457366088e-06,
"loss": 0.9805790781974792,
"step": 1962
},
{
"epoch": 0.9094278434097753,
"grad_norm": 0.9453125,
"learning_rate": 3.7217326306191865e-06,
"loss": 0.8713183403015137,
"step": 1963
},
{
"epoch": 0.9098911280982164,
"grad_norm": 0.8125,
"learning_rate": 3.704209581776387e-06,
"loss": 0.9644355177879333,
"step": 1964
},
{
"epoch": 0.9103544127866574,
"grad_norm": 0.87890625,
"learning_rate": 3.686724431929563e-06,
"loss": 0.9322176575660706,
"step": 1965
},
{
"epoch": 0.9108176974750984,
"grad_norm": 0.75,
"learning_rate": 3.6692772137293233e-06,
"loss": 0.8894251585006714,
"step": 1966
},
{
"epoch": 0.9112809821635395,
"grad_norm": 0.74609375,
"learning_rate": 3.6518679597554196e-06,
"loss": 0.8077326416969299,
"step": 1967
},
{
"epoch": 0.9117442668519805,
"grad_norm": 0.79296875,
"learning_rate": 3.634496702516724e-06,
"loss": 0.8675779104232788,
"step": 1968
},
{
"epoch": 0.9122075515404215,
"grad_norm": 1.046875,
"learning_rate": 3.6171634744511705e-06,
"loss": 1.0307801961898804,
"step": 1969
},
{
"epoch": 0.9126708362288626,
"grad_norm": 0.80859375,
"learning_rate": 3.5998683079256456e-06,
"loss": 0.8632080554962158,
"step": 1970
},
{
"epoch": 0.9131341209173037,
"grad_norm": 1.3046875,
"learning_rate": 3.5826112352359906e-06,
"loss": 0.9748227596282959,
"step": 1971
},
{
"epoch": 0.9135974056057448,
"grad_norm": 0.7734375,
"learning_rate": 3.5653922886069056e-06,
"loss": 1.0766937732696533,
"step": 1972
},
{
"epoch": 0.9140606902941858,
"grad_norm": 0.8125,
"learning_rate": 3.548211500191897e-06,
"loss": 0.9421984553337097,
"step": 1973
},
{
"epoch": 0.9145239749826268,
"grad_norm": 0.890625,
"learning_rate": 3.5310689020732137e-06,
"loss": 0.9485040903091431,
"step": 1974
},
{
"epoch": 0.9149872596710679,
"grad_norm": 0.9375,
"learning_rate": 3.513964526261783e-06,
"loss": 0.7310665249824524,
"step": 1975
},
{
"epoch": 0.9154505443595089,
"grad_norm": 0.79296875,
"learning_rate": 3.496898404697176e-06,
"loss": 0.8767428398132324,
"step": 1976
},
{
"epoch": 0.9159138290479499,
"grad_norm": 0.875,
"learning_rate": 3.4798705692475193e-06,
"loss": 0.7951971292495728,
"step": 1977
},
{
"epoch": 0.916377113736391,
"grad_norm": 0.80859375,
"learning_rate": 3.4628810517094586e-06,
"loss": 1.0523958206176758,
"step": 1978
},
{
"epoch": 0.9168403984248321,
"grad_norm": 0.88671875,
"learning_rate": 3.445929883808061e-06,
"loss": 0.9560039639472961,
"step": 1979
},
{
"epoch": 0.9173036831132731,
"grad_norm": 0.7578125,
"learning_rate": 3.4290170971968125e-06,
"loss": 0.9428179860115051,
"step": 1980
},
{
"epoch": 0.9177669678017142,
"grad_norm": 0.7734375,
"learning_rate": 3.4121427234575058e-06,
"loss": 0.8995485901832581,
"step": 1981
},
{
"epoch": 0.9182302524901552,
"grad_norm": 0.9921875,
"learning_rate": 3.3953067941002262e-06,
"loss": 0.8385268449783325,
"step": 1982
},
{
"epoch": 0.9186935371785963,
"grad_norm": 0.78125,
"learning_rate": 3.3785093405632497e-06,
"loss": 0.8148472309112549,
"step": 1983
},
{
"epoch": 0.9191568218670373,
"grad_norm": 0.83984375,
"learning_rate": 3.3617503942130034e-06,
"loss": 0.8488112688064575,
"step": 1984
},
{
"epoch": 0.9196201065554783,
"grad_norm": 0.875,
"learning_rate": 3.34502998634403e-06,
"loss": 0.9861905574798584,
"step": 1985
},
{
"epoch": 0.9200833912439194,
"grad_norm": 0.890625,
"learning_rate": 3.3283481481788926e-06,
"loss": 0.9687526226043701,
"step": 1986
},
{
"epoch": 0.9205466759323604,
"grad_norm": 0.6953125,
"learning_rate": 3.3117049108681425e-06,
"loss": 0.7825883030891418,
"step": 1987
},
{
"epoch": 0.9210099606208015,
"grad_norm": 0.76171875,
"learning_rate": 3.295100305490232e-06,
"loss": 0.89034503698349,
"step": 1988
},
{
"epoch": 0.9214732453092426,
"grad_norm": 0.7578125,
"learning_rate": 3.2785343630514944e-06,
"loss": 0.8503515720367432,
"step": 1989
},
{
"epoch": 0.9219365299976836,
"grad_norm": 0.76953125,
"learning_rate": 3.2620071144860517e-06,
"loss": 0.887080192565918,
"step": 1990
},
{
"epoch": 0.9223998146861246,
"grad_norm": 0.82421875,
"learning_rate": 3.2455185906557792e-06,
"loss": 0.9105535745620728,
"step": 1991
},
{
"epoch": 0.9228630993745657,
"grad_norm": 0.7578125,
"learning_rate": 3.2290688223502485e-06,
"loss": 0.9910966157913208,
"step": 1992
},
{
"epoch": 0.9233263840630067,
"grad_norm": 0.83203125,
"learning_rate": 3.212657840286637e-06,
"loss": 0.9041646122932434,
"step": 1993
},
{
"epoch": 0.9237896687514477,
"grad_norm": 0.8203125,
"learning_rate": 3.196285675109717e-06,
"loss": 0.8051247596740723,
"step": 1994
},
{
"epoch": 0.9242529534398888,
"grad_norm": 0.7734375,
"learning_rate": 3.1799523573917726e-06,
"loss": 0.9509384036064148,
"step": 1995
},
{
"epoch": 0.9247162381283298,
"grad_norm": 0.7265625,
"learning_rate": 3.1636579176325494e-06,
"loss": 0.8491644859313965,
"step": 1996
},
{
"epoch": 0.925179522816771,
"grad_norm": 0.8671875,
"learning_rate": 3.1474023862591808e-06,
"loss": 0.9533796906471252,
"step": 1997
},
{
"epoch": 0.925642807505212,
"grad_norm": 0.8984375,
"learning_rate": 3.1311857936261537e-06,
"loss": 0.9359503984451294,
"step": 1998
},
{
"epoch": 0.926106092193653,
"grad_norm": 0.7890625,
"learning_rate": 3.115008170015248e-06,
"loss": 0.9185171723365784,
"step": 1999
},
{
"epoch": 0.9265693768820941,
"grad_norm": 0.8046875,
"learning_rate": 3.098869545635469e-06,
"loss": 0.9881656169891357,
"step": 2000
},
{
"epoch": 0.9270326615705351,
"grad_norm": 0.80078125,
"learning_rate": 3.082769950623007e-06,
"loss": 0.7174080610275269,
"step": 2001
},
{
"epoch": 0.9274959462589761,
"grad_norm": 0.81640625,
"learning_rate": 3.066709415041155e-06,
"loss": 0.8646811246871948,
"step": 2002
},
{
"epoch": 0.9279592309474172,
"grad_norm": 0.82421875,
"learning_rate": 3.0506879688802826e-06,
"loss": 0.6516255736351013,
"step": 2003
},
{
"epoch": 0.9284225156358582,
"grad_norm": 0.765625,
"learning_rate": 3.0347056420577633e-06,
"loss": 0.8709309101104736,
"step": 2004
},
{
"epoch": 0.9288858003242992,
"grad_norm": 0.83984375,
"learning_rate": 3.0187624644179235e-06,
"loss": 0.9930815696716309,
"step": 2005
},
{
"epoch": 0.9293490850127404,
"grad_norm": 0.984375,
"learning_rate": 3.002858465731981e-06,
"loss": 0.8484547138214111,
"step": 2006
},
{
"epoch": 0.9298123697011814,
"grad_norm": 0.9296875,
"learning_rate": 2.9869936756979873e-06,
"loss": 0.9493208527565002,
"step": 2007
},
{
"epoch": 0.9302756543896225,
"grad_norm": 0.92578125,
"learning_rate": 2.9711681239407924e-06,
"loss": 0.8300023078918457,
"step": 2008
},
{
"epoch": 0.9307389390780635,
"grad_norm": 0.94140625,
"learning_rate": 2.9553818400119676e-06,
"loss": 1.0500361919403076,
"step": 2009
},
{
"epoch": 0.9312022237665045,
"grad_norm": 1.0,
"learning_rate": 2.939634853389765e-06,
"loss": 1.172685980796814,
"step": 2010
},
{
"epoch": 0.9316655084549456,
"grad_norm": 0.8984375,
"learning_rate": 2.923927193479039e-06,
"loss": 0.966960608959198,
"step": 2011
},
{
"epoch": 0.9321287931433866,
"grad_norm": 0.875,
"learning_rate": 2.908258889611223e-06,
"loss": 0.9223648905754089,
"step": 2012
},
{
"epoch": 0.9325920778318276,
"grad_norm": 0.9921875,
"learning_rate": 2.892629971044265e-06,
"loss": 1.0637054443359375,
"step": 2013
},
{
"epoch": 0.9330553625202687,
"grad_norm": 0.890625,
"learning_rate": 2.8770404669625426e-06,
"loss": 0.9099516272544861,
"step": 2014
},
{
"epoch": 0.9335186472087098,
"grad_norm": 0.859375,
"learning_rate": 2.8614904064768603e-06,
"loss": 0.8674840927124023,
"step": 2015
},
{
"epoch": 0.9339819318971508,
"grad_norm": 0.91796875,
"learning_rate": 2.8459798186243478e-06,
"loss": 0.9098578691482544,
"step": 2016
},
{
"epoch": 0.9344452165855919,
"grad_norm": 0.78125,
"learning_rate": 2.8305087323684396e-06,
"loss": 0.9073185920715332,
"step": 2017
},
{
"epoch": 0.9349085012740329,
"grad_norm": 0.84765625,
"learning_rate": 2.8150771765988054e-06,
"loss": 1.0626872777938843,
"step": 2018
},
{
"epoch": 0.935371785962474,
"grad_norm": 1.265625,
"learning_rate": 2.799685180131296e-06,
"loss": 1.0506995916366577,
"step": 2019
},
{
"epoch": 0.935835070650915,
"grad_norm": 0.8359375,
"learning_rate": 2.7843327717078906e-06,
"loss": 0.9011653065681458,
"step": 2020
},
{
"epoch": 0.936298355339356,
"grad_norm": 1.015625,
"learning_rate": 2.7690199799966412e-06,
"loss": 0.8372233510017395,
"step": 2021
},
{
"epoch": 0.936761640027797,
"grad_norm": 0.9296875,
"learning_rate": 2.7537468335916275e-06,
"loss": 0.8183858394622803,
"step": 2022
},
{
"epoch": 0.9372249247162381,
"grad_norm": 0.83984375,
"learning_rate": 2.7385133610129018e-06,
"loss": 0.9930511713027954,
"step": 2023
},
{
"epoch": 0.9376882094046792,
"grad_norm": 0.8671875,
"learning_rate": 2.7233195907064297e-06,
"loss": 0.8853211998939514,
"step": 2024
},
{
"epoch": 0.9381514940931203,
"grad_norm": 1.265625,
"learning_rate": 2.708165551044031e-06,
"loss": 0.8356503844261169,
"step": 2025
},
{
"epoch": 0.9386147787815613,
"grad_norm": 0.921875,
"learning_rate": 2.6930512703233423e-06,
"loss": 1.003953218460083,
"step": 2026
},
{
"epoch": 0.9390780634700023,
"grad_norm": 0.8515625,
"learning_rate": 2.677976776767765e-06,
"loss": 0.950794517993927,
"step": 2027
},
{
"epoch": 0.9395413481584434,
"grad_norm": 0.84765625,
"learning_rate": 2.6629420985263856e-06,
"loss": 0.9656116962432861,
"step": 2028
},
{
"epoch": 0.9400046328468844,
"grad_norm": 0.86328125,
"learning_rate": 2.6479472636739592e-06,
"loss": 0.825056791305542,
"step": 2029
},
{
"epoch": 0.9404679175353254,
"grad_norm": 0.81640625,
"learning_rate": 2.632992300210825e-06,
"loss": 0.9464682340621948,
"step": 2030
},
{
"epoch": 0.9409312022237665,
"grad_norm": 0.91796875,
"learning_rate": 2.6180772360628837e-06,
"loss": 0.8668578863143921,
"step": 2031
},
{
"epoch": 0.9413944869122075,
"grad_norm": 0.84765625,
"learning_rate": 2.6032020990815257e-06,
"loss": 0.887437105178833,
"step": 2032
},
{
"epoch": 0.9418577716006487,
"grad_norm": 0.8359375,
"learning_rate": 2.588366917043583e-06,
"loss": 0.9934899806976318,
"step": 2033
},
{
"epoch": 0.9423210562890897,
"grad_norm": 0.98046875,
"learning_rate": 2.5735717176512722e-06,
"loss": 0.9330331683158875,
"step": 2034
},
{
"epoch": 0.9427843409775307,
"grad_norm": 0.84375,
"learning_rate": 2.5588165285321597e-06,
"loss": 0.8642808198928833,
"step": 2035
},
{
"epoch": 0.9432476256659718,
"grad_norm": 0.83984375,
"learning_rate": 2.5441013772390964e-06,
"loss": 1.014452576637268,
"step": 2036
},
{
"epoch": 0.9437109103544128,
"grad_norm": 0.765625,
"learning_rate": 2.5294262912501636e-06,
"loss": 1.0355772972106934,
"step": 2037
},
{
"epoch": 0.9441741950428538,
"grad_norm": 0.890625,
"learning_rate": 2.5147912979686352e-06,
"loss": 0.9281973838806152,
"step": 2038
},
{
"epoch": 0.9446374797312949,
"grad_norm": 0.74609375,
"learning_rate": 2.5001964247229074e-06,
"loss": 1.0396404266357422,
"step": 2039
},
{
"epoch": 0.9451007644197359,
"grad_norm": 0.921875,
"learning_rate": 2.4856416987664723e-06,
"loss": 0.8407167196273804,
"step": 2040
},
{
"epoch": 0.9455640491081769,
"grad_norm": 1.140625,
"learning_rate": 2.471127147277846e-06,
"loss": 0.9255853891372681,
"step": 2041
},
{
"epoch": 0.9460273337966181,
"grad_norm": 0.8828125,
"learning_rate": 2.4566527973605314e-06,
"loss": 0.9377841353416443,
"step": 2042
},
{
"epoch": 0.9464906184850591,
"grad_norm": 0.86328125,
"learning_rate": 2.4422186760429565e-06,
"loss": 0.8124240636825562,
"step": 2043
},
{
"epoch": 0.9469539031735001,
"grad_norm": 0.73828125,
"learning_rate": 2.4278248102784187e-06,
"loss": 0.7741151452064514,
"step": 2044
},
{
"epoch": 0.9474171878619412,
"grad_norm": 0.78515625,
"learning_rate": 2.4134712269450693e-06,
"loss": 0.8896522521972656,
"step": 2045
},
{
"epoch": 0.9478804725503822,
"grad_norm": 0.75390625,
"learning_rate": 2.3991579528458198e-06,
"loss": 0.8712372779846191,
"step": 2046
},
{
"epoch": 0.9483437572388232,
"grad_norm": 0.87890625,
"learning_rate": 2.3848850147083223e-06,
"loss": 0.8915302753448486,
"step": 2047
},
{
"epoch": 0.9488070419272643,
"grad_norm": 0.875,
"learning_rate": 2.3706524391848946e-06,
"loss": 0.9843184947967529,
"step": 2048
},
{
"epoch": 0.9492703266157053,
"grad_norm": 0.85546875,
"learning_rate": 2.3564602528524985e-06,
"loss": 1.0059175491333008,
"step": 2049
},
{
"epoch": 0.9497336113041464,
"grad_norm": 1.125,
"learning_rate": 2.3423084822126735e-06,
"loss": 0.9545824527740479,
"step": 2050
},
{
"epoch": 0.9501968959925875,
"grad_norm": 0.9765625,
"learning_rate": 2.3281971536914734e-06,
"loss": 1.022434115409851,
"step": 2051
},
{
"epoch": 0.9506601806810285,
"grad_norm": 0.85546875,
"learning_rate": 2.3141262936394595e-06,
"loss": 0.8228369355201721,
"step": 2052
},
{
"epoch": 0.9511234653694696,
"grad_norm": 0.96875,
"learning_rate": 2.3000959283315955e-06,
"loss": 0.9236583709716797,
"step": 2053
},
{
"epoch": 0.9515867500579106,
"grad_norm": 0.796875,
"learning_rate": 2.2861060839672546e-06,
"loss": 0.8788405656814575,
"step": 2054
},
{
"epoch": 0.9520500347463516,
"grad_norm": 0.87109375,
"learning_rate": 2.27215678667013e-06,
"loss": 0.7980551719665527,
"step": 2055
},
{
"epoch": 0.9525133194347927,
"grad_norm": 1.0078125,
"learning_rate": 2.258248062488206e-06,
"loss": 0.8565947413444519,
"step": 2056
},
{
"epoch": 0.9529766041232337,
"grad_norm": 0.84375,
"learning_rate": 2.244379937393691e-06,
"loss": 0.8685353994369507,
"step": 2057
},
{
"epoch": 0.9534398888116747,
"grad_norm": 0.796875,
"learning_rate": 2.230552437282996e-06,
"loss": 0.7926799654960632,
"step": 2058
},
{
"epoch": 0.9539031735001158,
"grad_norm": 0.78125,
"learning_rate": 2.2167655879766687e-06,
"loss": 0.9113929271697998,
"step": 2059
},
{
"epoch": 0.9543664581885569,
"grad_norm": 0.828125,
"learning_rate": 2.20301941521934e-06,
"loss": 0.8047410845756531,
"step": 2060
},
{
"epoch": 0.954829742876998,
"grad_norm": 0.8125,
"learning_rate": 2.1893139446796958e-06,
"loss": 0.718073844909668,
"step": 2061
},
{
"epoch": 0.955293027565439,
"grad_norm": 0.9140625,
"learning_rate": 2.175649201950405e-06,
"loss": 0.822567880153656,
"step": 2062
},
{
"epoch": 0.95575631225388,
"grad_norm": 0.8046875,
"learning_rate": 2.1620252125480936e-06,
"loss": 0.8844413757324219,
"step": 2063
},
{
"epoch": 0.9562195969423211,
"grad_norm": 0.875,
"learning_rate": 2.1484420019132813e-06,
"loss": 0.9247415065765381,
"step": 2064
},
{
"epoch": 0.9566828816307621,
"grad_norm": 0.78125,
"learning_rate": 2.134899595410353e-06,
"loss": 0.9057773351669312,
"step": 2065
},
{
"epoch": 0.9571461663192031,
"grad_norm": 0.8515625,
"learning_rate": 2.1213980183274828e-06,
"loss": 0.8755348324775696,
"step": 2066
},
{
"epoch": 0.9576094510076442,
"grad_norm": 0.8046875,
"learning_rate": 2.1079372958766046e-06,
"loss": 0.9061083197593689,
"step": 2067
},
{
"epoch": 0.9580727356960852,
"grad_norm": 0.6953125,
"learning_rate": 2.0945174531933697e-06,
"loss": 0.8380372524261475,
"step": 2068
},
{
"epoch": 0.9585360203845263,
"grad_norm": 0.82421875,
"learning_rate": 2.0811385153370924e-06,
"loss": 1.0740100145339966,
"step": 2069
},
{
"epoch": 0.9589993050729674,
"grad_norm": 0.84765625,
"learning_rate": 2.0678005072907108e-06,
"loss": 0.7675211429595947,
"step": 2070
},
{
"epoch": 0.9594625897614084,
"grad_norm": 0.80859375,
"learning_rate": 2.0545034539607104e-06,
"loss": 0.8730876445770264,
"step": 2071
},
{
"epoch": 0.9599258744498494,
"grad_norm": 0.99609375,
"learning_rate": 2.0412473801771247e-06,
"loss": 0.9389110207557678,
"step": 2072
},
{
"epoch": 0.9603891591382905,
"grad_norm": 1.0078125,
"learning_rate": 2.0280323106934574e-06,
"loss": 0.9151057600975037,
"step": 2073
},
{
"epoch": 0.9608524438267315,
"grad_norm": 0.8671875,
"learning_rate": 2.0148582701866327e-06,
"loss": 0.8602491617202759,
"step": 2074
},
{
"epoch": 0.9613157285151726,
"grad_norm": 0.921875,
"learning_rate": 2.0017252832569802e-06,
"loss": 0.7774481773376465,
"step": 2075
},
{
"epoch": 0.9617790132036136,
"grad_norm": 0.80859375,
"learning_rate": 1.9886333744281473e-06,
"loss": 0.9065883755683899,
"step": 2076
},
{
"epoch": 0.9622422978920546,
"grad_norm": 0.8515625,
"learning_rate": 1.9755825681470903e-06,
"loss": 0.842190146446228,
"step": 2077
},
{
"epoch": 0.9627055825804958,
"grad_norm": 0.82421875,
"learning_rate": 1.962572888784009e-06,
"loss": 0.7789183259010315,
"step": 2078
},
{
"epoch": 0.9631688672689368,
"grad_norm": 0.83984375,
"learning_rate": 1.9496043606323098e-06,
"loss": 0.9603561162948608,
"step": 2079
},
{
"epoch": 0.9636321519573778,
"grad_norm": 0.81640625,
"learning_rate": 1.936677007908539e-06,
"loss": 0.9474771022796631,
"step": 2080
},
{
"epoch": 0.9640954366458189,
"grad_norm": 0.82421875,
"learning_rate": 1.9237908547523742e-06,
"loss": 0.8616044521331787,
"step": 2081
},
{
"epoch": 0.9645587213342599,
"grad_norm": 0.84375,
"learning_rate": 1.910945925226553e-06,
"loss": 1.0991566181182861,
"step": 2082
},
{
"epoch": 0.9650220060227009,
"grad_norm": 1.4140625,
"learning_rate": 1.8981422433168307e-06,
"loss": 0.8651741147041321,
"step": 2083
},
{
"epoch": 0.965485290711142,
"grad_norm": 0.83203125,
"learning_rate": 1.8853798329319515e-06,
"loss": 0.9337427616119385,
"step": 2084
},
{
"epoch": 0.965948575399583,
"grad_norm": 0.84375,
"learning_rate": 1.872658717903569e-06,
"loss": 0.7772614359855652,
"step": 2085
},
{
"epoch": 0.966411860088024,
"grad_norm": 1.0546875,
"learning_rate": 1.8599789219862499e-06,
"loss": 0.7997364401817322,
"step": 2086
},
{
"epoch": 0.9668751447764652,
"grad_norm": 0.77734375,
"learning_rate": 1.8473404688573876e-06,
"loss": 0.7531715631484985,
"step": 2087
},
{
"epoch": 0.9673384294649062,
"grad_norm": 0.84375,
"learning_rate": 1.8347433821171917e-06,
"loss": 0.8086212277412415,
"step": 2088
},
{
"epoch": 0.9678017141533473,
"grad_norm": 1.0078125,
"learning_rate": 1.822187685288606e-06,
"loss": 0.8939145803451538,
"step": 2089
},
{
"epoch": 0.9682649988417883,
"grad_norm": 0.84375,
"learning_rate": 1.809673401817289e-06,
"loss": 0.9865738749504089,
"step": 2090
},
{
"epoch": 0.9687282835302293,
"grad_norm": 0.83984375,
"learning_rate": 1.7972005550715907e-06,
"loss": 0.9553055763244629,
"step": 2091
},
{
"epoch": 0.9691915682186704,
"grad_norm": 0.75,
"learning_rate": 1.7847691683424535e-06,
"loss": 0.9253486394882202,
"step": 2092
},
{
"epoch": 0.9696548529071114,
"grad_norm": 0.96875,
"learning_rate": 1.7723792648434237e-06,
"loss": 0.8802915811538696,
"step": 2093
},
{
"epoch": 0.9701181375955524,
"grad_norm": 0.828125,
"learning_rate": 1.760030867710567e-06,
"loss": 0.8802918791770935,
"step": 2094
},
{
"epoch": 0.9705814222839935,
"grad_norm": 0.8203125,
"learning_rate": 1.7477240000024547e-06,
"loss": 0.843493640422821,
"step": 2095
},
{
"epoch": 0.9710447069724346,
"grad_norm": 0.9296875,
"learning_rate": 1.7354586847001068e-06,
"loss": 0.9937857389450073,
"step": 2096
},
{
"epoch": 0.9715079916608756,
"grad_norm": 0.83984375,
"learning_rate": 1.7232349447069462e-06,
"loss": 0.8251986503601074,
"step": 2097
},
{
"epoch": 0.9719712763493167,
"grad_norm": 0.890625,
"learning_rate": 1.7110528028487676e-06,
"loss": 0.9000308513641357,
"step": 2098
},
{
"epoch": 0.9724345610377577,
"grad_norm": 0.79296875,
"learning_rate": 1.6989122818736754e-06,
"loss": 1.0388299226760864,
"step": 2099
},
{
"epoch": 0.9728978457261988,
"grad_norm": 0.87890625,
"learning_rate": 1.6868134044520744e-06,
"loss": 1.0854367017745972,
"step": 2100
},
{
"epoch": 0.9733611304146398,
"grad_norm": 0.87890625,
"learning_rate": 1.674756193176588e-06,
"loss": 1.0435681343078613,
"step": 2101
},
{
"epoch": 0.9738244151030808,
"grad_norm": 0.8203125,
"learning_rate": 1.6627406705620516e-06,
"loss": 0.972065806388855,
"step": 2102
},
{
"epoch": 0.9742876997915219,
"grad_norm": 1.03125,
"learning_rate": 1.6507668590454375e-06,
"loss": 1.1784340143203735,
"step": 2103
},
{
"epoch": 0.9747509844799629,
"grad_norm": 0.73046875,
"learning_rate": 1.6388347809858335e-06,
"loss": 0.9283071756362915,
"step": 2104
},
{
"epoch": 0.975214269168404,
"grad_norm": 0.83203125,
"learning_rate": 1.6269444586644113e-06,
"loss": 0.8603734970092773,
"step": 2105
},
{
"epoch": 0.9756775538568451,
"grad_norm": 0.8046875,
"learning_rate": 1.6150959142843543e-06,
"loss": 0.9831134080886841,
"step": 2106
},
{
"epoch": 0.9761408385452861,
"grad_norm": 0.79296875,
"learning_rate": 1.6032891699708412e-06,
"loss": 0.986380934715271,
"step": 2107
},
{
"epoch": 0.9766041232337271,
"grad_norm": 1.0234375,
"learning_rate": 1.591524247770991e-06,
"loss": 0.9720912575721741,
"step": 2108
},
{
"epoch": 0.9770674079221682,
"grad_norm": 0.8125,
"learning_rate": 1.5798011696538277e-06,
"loss": 0.8218910098075867,
"step": 2109
},
{
"epoch": 0.9775306926106092,
"grad_norm": 0.953125,
"learning_rate": 1.568119957510243e-06,
"loss": 0.9122781753540039,
"step": 2110
},
{
"epoch": 0.9779939772990502,
"grad_norm": 0.83203125,
"learning_rate": 1.5564806331529538e-06,
"loss": 1.0062001943588257,
"step": 2111
},
{
"epoch": 0.9784572619874913,
"grad_norm": 0.8125,
"learning_rate": 1.5448832183164436e-06,
"loss": 0.9109072685241699,
"step": 2112
},
{
"epoch": 0.9789205466759323,
"grad_norm": 0.9140625,
"learning_rate": 1.5333277346569414e-06,
"loss": 0.9397470951080322,
"step": 2113
},
{
"epoch": 0.9793838313643735,
"grad_norm": 0.8203125,
"learning_rate": 1.5218142037523973e-06,
"loss": 0.8526613712310791,
"step": 2114
},
{
"epoch": 0.9798471160528145,
"grad_norm": 0.828125,
"learning_rate": 1.5103426471023944e-06,
"loss": 0.9518367052078247,
"step": 2115
},
{
"epoch": 0.9803104007412555,
"grad_norm": 1.0859375,
"learning_rate": 1.4989130861281527e-06,
"loss": 1.002701997756958,
"step": 2116
},
{
"epoch": 0.9807736854296966,
"grad_norm": 0.8671875,
"learning_rate": 1.4875255421724579e-06,
"loss": 0.9528016448020935,
"step": 2117
},
{
"epoch": 0.9812369701181376,
"grad_norm": 0.78125,
"learning_rate": 1.4761800364996524e-06,
"loss": 0.870173454284668,
"step": 2118
},
{
"epoch": 0.9817002548065786,
"grad_norm": 0.7734375,
"learning_rate": 1.4648765902955763e-06,
"loss": 0.8694907426834106,
"step": 2119
},
{
"epoch": 0.9821635394950197,
"grad_norm": 1.0078125,
"learning_rate": 1.453615224667513e-06,
"loss": 0.9634788036346436,
"step": 2120
},
{
"epoch": 0.9826268241834607,
"grad_norm": 0.85546875,
"learning_rate": 1.4423959606441911e-06,
"loss": 0.8832241296768188,
"step": 2121
},
{
"epoch": 0.9830901088719017,
"grad_norm": 0.84375,
"learning_rate": 1.4312188191757027e-06,
"loss": 0.9558946490287781,
"step": 2122
},
{
"epoch": 0.9835533935603429,
"grad_norm": 1.0703125,
"learning_rate": 1.4200838211334962e-06,
"loss": 0.9503135085105896,
"step": 2123
},
{
"epoch": 0.9840166782487839,
"grad_norm": 0.86328125,
"learning_rate": 1.4089909873103181e-06,
"loss": 1.1282013654708862,
"step": 2124
},
{
"epoch": 0.984479962937225,
"grad_norm": 0.9140625,
"learning_rate": 1.3979403384201828e-06,
"loss": 0.8594451546669006,
"step": 2125
},
{
"epoch": 0.984943247625666,
"grad_norm": 0.83984375,
"learning_rate": 1.3869318950983276e-06,
"loss": 0.9713156819343567,
"step": 2126
},
{
"epoch": 0.985406532314107,
"grad_norm": 0.80078125,
"learning_rate": 1.3759656779011786e-06,
"loss": 0.8094725012779236,
"step": 2127
},
{
"epoch": 0.985869817002548,
"grad_norm": 1.1328125,
"learning_rate": 1.3650417073063208e-06,
"loss": 0.8664292097091675,
"step": 2128
},
{
"epoch": 0.9863331016909891,
"grad_norm": 0.7734375,
"learning_rate": 1.3541600037124343e-06,
"loss": 0.85650235414505,
"step": 2129
},
{
"epoch": 0.9867963863794301,
"grad_norm": 0.80859375,
"learning_rate": 1.3433205874392886e-06,
"loss": 0.9315167665481567,
"step": 2130
},
{
"epoch": 0.9872596710678712,
"grad_norm": 0.828125,
"learning_rate": 1.3325234787276746e-06,
"loss": 0.9233248233795166,
"step": 2131
},
{
"epoch": 0.9877229557563123,
"grad_norm": 0.75390625,
"learning_rate": 1.321768697739392e-06,
"loss": 0.8415129780769348,
"step": 2132
},
{
"epoch": 0.9881862404447533,
"grad_norm": 0.78515625,
"learning_rate": 1.3110562645571954e-06,
"loss": 1.0400590896606445,
"step": 2133
},
{
"epoch": 0.9886495251331944,
"grad_norm": 0.78515625,
"learning_rate": 1.3003861991847687e-06,
"loss": 0.8513540029525757,
"step": 2134
},
{
"epoch": 0.9891128098216354,
"grad_norm": 0.80859375,
"learning_rate": 1.2897585215466699e-06,
"loss": 0.8044668436050415,
"step": 2135
},
{
"epoch": 0.9895760945100764,
"grad_norm": 0.96875,
"learning_rate": 1.2791732514883067e-06,
"loss": 0.9986090660095215,
"step": 2136
},
{
"epoch": 0.9900393791985175,
"grad_norm": 0.76171875,
"learning_rate": 1.2686304087759108e-06,
"loss": 0.756338894367218,
"step": 2137
},
{
"epoch": 0.9905026638869585,
"grad_norm": 0.7734375,
"learning_rate": 1.2581300130964728e-06,
"loss": 0.7462416887283325,
"step": 2138
},
{
"epoch": 0.9909659485753995,
"grad_norm": 0.78125,
"learning_rate": 1.2476720840577294e-06,
"loss": 0.981809675693512,
"step": 2139
},
{
"epoch": 0.9914292332638406,
"grad_norm": 0.7890625,
"learning_rate": 1.23725664118811e-06,
"loss": 0.9728780388832092,
"step": 2140
},
{
"epoch": 0.9918925179522817,
"grad_norm": 0.95703125,
"learning_rate": 1.226883703936716e-06,
"loss": 1.043047308921814,
"step": 2141
},
{
"epoch": 0.9923558026407228,
"grad_norm": 0.734375,
"learning_rate": 1.2165532916732768e-06,
"loss": 0.8684824705123901,
"step": 2142
},
{
"epoch": 0.9928190873291638,
"grad_norm": 0.85546875,
"learning_rate": 1.206265423688106e-06,
"loss": 0.9691533446311951,
"step": 2143
},
{
"epoch": 0.9932823720176048,
"grad_norm": 0.7734375,
"learning_rate": 1.196020119192082e-06,
"loss": 0.8577444553375244,
"step": 2144
},
{
"epoch": 0.9937456567060459,
"grad_norm": 0.7578125,
"learning_rate": 1.1858173973165886e-06,
"loss": 0.824654757976532,
"step": 2145
},
{
"epoch": 0.9942089413944869,
"grad_norm": 0.8203125,
"learning_rate": 1.1756572771135146e-06,
"loss": 1.0085506439208984,
"step": 2146
},
{
"epoch": 0.9946722260829279,
"grad_norm": 0.83203125,
"learning_rate": 1.165539777555182e-06,
"loss": 0.8890453577041626,
"step": 2147
},
{
"epoch": 0.995135510771369,
"grad_norm": 0.85546875,
"learning_rate": 1.1554649175343316e-06,
"loss": 0.8785421848297119,
"step": 2148
},
{
"epoch": 0.99559879545981,
"grad_norm": 0.76171875,
"learning_rate": 1.1454327158640743e-06,
"loss": 0.9128347039222717,
"step": 2149
},
{
"epoch": 0.9960620801482511,
"grad_norm": 0.796875,
"learning_rate": 1.1354431912778758e-06,
"loss": 0.7984659671783447,
"step": 2150
},
{
"epoch": 0.9965253648366922,
"grad_norm": 0.94921875,
"learning_rate": 1.1254963624295052e-06,
"loss": 0.8814120292663574,
"step": 2151
},
{
"epoch": 0.9969886495251332,
"grad_norm": 0.91796875,
"learning_rate": 1.1155922478929928e-06,
"loss": 0.9477824568748474,
"step": 2152
},
{
"epoch": 0.9974519342135743,
"grad_norm": 0.83984375,
"learning_rate": 1.10573086616263e-06,
"loss": 0.9535715579986572,
"step": 2153
},
{
"epoch": 0.9979152189020153,
"grad_norm": 1.546875,
"learning_rate": 1.0959122356528868e-06,
"loss": 0.9569465517997742,
"step": 2154
},
{
"epoch": 0.9983785035904563,
"grad_norm": 0.9609375,
"learning_rate": 1.0861363746984196e-06,
"loss": 0.8506355285644531,
"step": 2155
},
{
"epoch": 0.9988417882788974,
"grad_norm": 0.88671875,
"learning_rate": 1.0764033015540182e-06,
"loss": 0.9563447833061218,
"step": 2156
},
{
"epoch": 0.9993050729673384,
"grad_norm": 0.8515625,
"learning_rate": 1.0667130343945627e-06,
"loss": 0.8507230281829834,
"step": 2157
},
{
"epoch": 0.9997683576557794,
"grad_norm": 0.90234375,
"learning_rate": 1.0570655913150135e-06,
"loss": 0.9006310701370239,
"step": 2158
},
{
"epoch": 1.0,
"grad_norm": 1.1171875,
"learning_rate": 1.0474609903303493e-06,
"loss": 1.0268394947052002,
"step": 2159
},
{
"epoch": 1.0004632846884411,
"grad_norm": 0.86328125,
"learning_rate": 1.0378992493755704e-06,
"loss": 0.6635380387306213,
"step": 2160
},
{
"epoch": 1.000926569376882,
"grad_norm": 0.87890625,
"learning_rate": 1.0283803863056181e-06,
"loss": 0.6334539651870728,
"step": 2161
},
{
"epoch": 1.0013898540653232,
"grad_norm": 0.953125,
"learning_rate": 1.0189044188953833e-06,
"loss": 0.7979657053947449,
"step": 2162
},
{
"epoch": 1.0018531387537641,
"grad_norm": 0.80078125,
"learning_rate": 1.0094713648396478e-06,
"loss": 0.7369372844696045,
"step": 2163
},
{
"epoch": 1.0023164234422053,
"grad_norm": 0.82421875,
"learning_rate": 1.0000812417530654e-06,
"loss": 0.7056367993354797,
"step": 2164
},
{
"epoch": 1.0027797081306462,
"grad_norm": 0.86328125,
"learning_rate": 9.907340671701244e-07,
"loss": 0.6825569868087769,
"step": 2165
},
{
"epoch": 1.0032429928190874,
"grad_norm": 0.84765625,
"learning_rate": 9.81429858545103e-07,
"loss": 0.7204247117042542,
"step": 2166
},
{
"epoch": 1.0037062775075283,
"grad_norm": 0.8828125,
"learning_rate": 9.721686332520658e-07,
"loss": 0.8663711547851562,
"step": 2167
},
{
"epoch": 1.0041695621959694,
"grad_norm": 0.8359375,
"learning_rate": 9.629504085847903e-07,
"loss": 0.8728551864624023,
"step": 2168
},
{
"epoch": 1.0046328468844106,
"grad_norm": 0.75390625,
"learning_rate": 9.537752017567814e-07,
"loss": 0.7198824882507324,
"step": 2169
},
{
"epoch": 1.0050961315728515,
"grad_norm": 0.890625,
"learning_rate": 9.446430299011981e-07,
"loss": 0.9775660037994385,
"step": 2170
},
{
"epoch": 1.0055594162612926,
"grad_norm": 0.80859375,
"learning_rate": 9.355539100708504e-07,
"loss": 0.7355836033821106,
"step": 2171
},
{
"epoch": 1.0060227009497336,
"grad_norm": 0.80078125,
"learning_rate": 9.265078592381402e-07,
"loss": 0.8342987298965454,
"step": 2172
},
{
"epoch": 1.0064859856381747,
"grad_norm": 1.0,
"learning_rate": 9.175048942950647e-07,
"loss": 0.8058943748474121,
"step": 2173
},
{
"epoch": 1.0069492703266156,
"grad_norm": 0.9453125,
"learning_rate": 9.08545032053155e-07,
"loss": 0.7417223453521729,
"step": 2174
},
{
"epoch": 1.0074125550150568,
"grad_norm": 0.734375,
"learning_rate": 8.996282892434513e-07,
"loss": 0.6943836212158203,
"step": 2175
},
{
"epoch": 1.0078758397034977,
"grad_norm": 0.83203125,
"learning_rate": 8.907546825164854e-07,
"loss": 0.8752128481864929,
"step": 2176
},
{
"epoch": 1.0083391243919388,
"grad_norm": 0.765625,
"learning_rate": 8.819242284422267e-07,
"loss": 0.7245913743972778,
"step": 2177
},
{
"epoch": 1.00880240908038,
"grad_norm": 0.75,
"learning_rate": 8.731369435100796e-07,
"loss": 0.7406237125396729,
"step": 2178
},
{
"epoch": 1.009265693768821,
"grad_norm": 0.81640625,
"learning_rate": 8.643928441288331e-07,
"loss": 0.7131381034851074,
"step": 2179
},
{
"epoch": 1.009728978457262,
"grad_norm": 0.9921875,
"learning_rate": 8.556919466266182e-07,
"loss": 0.865633487701416,
"step": 2180
},
{
"epoch": 1.010192263145703,
"grad_norm": 0.87109375,
"learning_rate": 8.470342672509208e-07,
"loss": 0.8932554721832275,
"step": 2181
},
{
"epoch": 1.0106555478341441,
"grad_norm": 0.9296875,
"learning_rate": 8.384198221684942e-07,
"loss": 0.827540397644043,
"step": 2182
},
{
"epoch": 1.011118832522585,
"grad_norm": 0.86328125,
"learning_rate": 8.298486274653935e-07,
"loss": 0.7067131996154785,
"step": 2183
},
{
"epoch": 1.0115821172110262,
"grad_norm": 0.80078125,
"learning_rate": 8.213206991468747e-07,
"loss": 0.7793404459953308,
"step": 2184
},
{
"epoch": 1.0120454018994671,
"grad_norm": 0.8828125,
"learning_rate": 8.128360531374313e-07,
"loss": 0.6501293182373047,
"step": 2185
},
{
"epoch": 1.0125086865879083,
"grad_norm": 0.859375,
"learning_rate": 8.043947052807124e-07,
"loss": 0.8105236887931824,
"step": 2186
},
{
"epoch": 1.0129719712763494,
"grad_norm": 0.82421875,
"learning_rate": 7.959966713395304e-07,
"loss": 0.6882289052009583,
"step": 2187
},
{
"epoch": 1.0134352559647903,
"grad_norm": 0.859375,
"learning_rate": 7.876419669958077e-07,
"loss": 0.6595849394798279,
"step": 2188
},
{
"epoch": 1.0138985406532315,
"grad_norm": 0.859375,
"learning_rate": 7.793306078505529e-07,
"loss": 0.8440772294998169,
"step": 2189
},
{
"epoch": 1.0143618253416724,
"grad_norm": 0.796875,
"learning_rate": 7.710626094238498e-07,
"loss": 0.685444712638855,
"step": 2190
},
{
"epoch": 1.0148251100301136,
"grad_norm": 0.83984375,
"learning_rate": 7.628379871547937e-07,
"loss": 0.7895556688308716,
"step": 2191
},
{
"epoch": 1.0152883947185545,
"grad_norm": 0.85546875,
"learning_rate": 7.546567564014994e-07,
"loss": 0.6867796182632446,
"step": 2192
},
{
"epoch": 1.0157516794069956,
"grad_norm": 0.890625,
"learning_rate": 7.465189324410427e-07,
"loss": 0.7532359957695007,
"step": 2193
},
{
"epoch": 1.0162149640954365,
"grad_norm": 0.8828125,
"learning_rate": 7.384245304694544e-07,
"loss": 0.9113466739654541,
"step": 2194
},
{
"epoch": 1.0166782487838777,
"grad_norm": 0.80859375,
"learning_rate": 7.303735656016705e-07,
"loss": 0.830723226070404,
"step": 2195
},
{
"epoch": 1.0171415334723188,
"grad_norm": 0.796875,
"learning_rate": 7.223660528715268e-07,
"loss": 0.712505578994751,
"step": 2196
},
{
"epoch": 1.0176048181607598,
"grad_norm": 0.875,
"learning_rate": 7.144020072317181e-07,
"loss": 0.7443718314170837,
"step": 2197
},
{
"epoch": 1.018068102849201,
"grad_norm": 0.95703125,
"learning_rate": 7.064814435537592e-07,
"loss": 0.7996273040771484,
"step": 2198
},
{
"epoch": 1.0185313875376418,
"grad_norm": 0.70703125,
"learning_rate": 6.98604376627987e-07,
"loss": 0.783358097076416,
"step": 2199
},
{
"epoch": 1.018994672226083,
"grad_norm": 0.84375,
"learning_rate": 6.907708211635022e-07,
"loss": 0.917162299156189,
"step": 2200
},
{
"epoch": 1.019457956914524,
"grad_norm": 0.83203125,
"learning_rate": 6.829807917881609e-07,
"loss": 0.7591511607170105,
"step": 2201
},
{
"epoch": 1.019921241602965,
"grad_norm": 0.7578125,
"learning_rate": 6.752343030485433e-07,
"loss": 0.8324123620986938,
"step": 2202
},
{
"epoch": 1.020384526291406,
"grad_norm": 0.84765625,
"learning_rate": 6.675313694099208e-07,
"loss": 0.7730574607849121,
"step": 2203
},
{
"epoch": 1.0208478109798471,
"grad_norm": 0.890625,
"learning_rate": 6.598720052562328e-07,
"loss": 0.8956565856933594,
"step": 2204
},
{
"epoch": 1.0213110956682883,
"grad_norm": 0.890625,
"learning_rate": 6.522562248900652e-07,
"loss": 0.7237347364425659,
"step": 2205
},
{
"epoch": 1.0217743803567292,
"grad_norm": 0.76171875,
"learning_rate": 6.446840425326128e-07,
"loss": 0.9014825224876404,
"step": 2206
},
{
"epoch": 1.0222376650451703,
"grad_norm": 0.828125,
"learning_rate": 6.371554723236583e-07,
"loss": 0.8383098244667053,
"step": 2207
},
{
"epoch": 1.0227009497336113,
"grad_norm": 0.7890625,
"learning_rate": 6.296705283215509e-07,
"loss": 0.5707777142524719,
"step": 2208
},
{
"epoch": 1.0231642344220524,
"grad_norm": 0.93359375,
"learning_rate": 6.222292245031715e-07,
"loss": 0.7602838277816772,
"step": 2209
},
{
"epoch": 1.0236275191104933,
"grad_norm": 0.9765625,
"learning_rate": 6.14831574763909e-07,
"loss": 0.8218472003936768,
"step": 2210
},
{
"epoch": 1.0240908037989345,
"grad_norm": 0.83984375,
"learning_rate": 6.074775929176442e-07,
"loss": 0.8414373397827148,
"step": 2211
},
{
"epoch": 1.0245540884873754,
"grad_norm": 0.87890625,
"learning_rate": 6.001672926967015e-07,
"loss": 0.8075520396232605,
"step": 2212
},
{
"epoch": 1.0250173731758165,
"grad_norm": 0.8125,
"learning_rate": 5.929006877518494e-07,
"loss": 0.757814347743988,
"step": 2213
},
{
"epoch": 1.0254806578642577,
"grad_norm": 0.796875,
"learning_rate": 5.856777916522526e-07,
"loss": 0.7999564409255981,
"step": 2214
},
{
"epoch": 1.0259439425526986,
"grad_norm": 0.79296875,
"learning_rate": 5.784986178854688e-07,
"loss": 0.8249316215515137,
"step": 2215
},
{
"epoch": 1.0264072272411398,
"grad_norm": 1.234375,
"learning_rate": 5.713631798574008e-07,
"loss": 0.8913872241973877,
"step": 2216
},
{
"epoch": 1.0268705119295807,
"grad_norm": 0.98046875,
"learning_rate": 5.642714908922866e-07,
"loss": 0.7781076431274414,
"step": 2217
},
{
"epoch": 1.0273337966180218,
"grad_norm": 0.8671875,
"learning_rate": 5.572235642326718e-07,
"loss": 0.7786509990692139,
"step": 2218
},
{
"epoch": 1.0277970813064627,
"grad_norm": 0.79296875,
"learning_rate": 5.502194130393807e-07,
"loss": 0.6516589522361755,
"step": 2219
},
{
"epoch": 1.028260365994904,
"grad_norm": 0.953125,
"learning_rate": 5.432590503914954e-07,
"loss": 0.8788017630577087,
"step": 2220
},
{
"epoch": 1.0287236506833448,
"grad_norm": 0.80078125,
"learning_rate": 5.363424892863255e-07,
"loss": 0.793433666229248,
"step": 2221
},
{
"epoch": 1.029186935371786,
"grad_norm": 0.91796875,
"learning_rate": 5.294697426393986e-07,
"loss": 0.7199576497077942,
"step": 2222
},
{
"epoch": 1.029650220060227,
"grad_norm": 0.9921875,
"learning_rate": 5.22640823284414e-07,
"loss": 0.6732587218284607,
"step": 2223
},
{
"epoch": 1.030113504748668,
"grad_norm": 0.8203125,
"learning_rate": 5.158557439732432e-07,
"loss": 0.7081973552703857,
"step": 2224
},
{
"epoch": 1.0305767894371092,
"grad_norm": 0.8515625,
"learning_rate": 5.091145173758873e-07,
"loss": 0.7844109535217285,
"step": 2225
},
{
"epoch": 1.03104007412555,
"grad_norm": 0.953125,
"learning_rate": 5.024171560804529e-07,
"loss": 0.7437909841537476,
"step": 2226
},
{
"epoch": 1.0315033588139912,
"grad_norm": 0.73046875,
"learning_rate": 4.957636725931493e-07,
"loss": 0.8256717920303345,
"step": 2227
},
{
"epoch": 1.0319666435024322,
"grad_norm": 0.98828125,
"learning_rate": 4.891540793382436e-07,
"loss": 0.872868001461029,
"step": 2228
},
{
"epoch": 1.0324299281908733,
"grad_norm": 0.86328125,
"learning_rate": 4.825883886580469e-07,
"loss": 0.8539603352546692,
"step": 2229
},
{
"epoch": 1.0328932128793142,
"grad_norm": 0.859375,
"learning_rate": 4.7606661281288523e-07,
"loss": 0.9318234920501709,
"step": 2230
},
{
"epoch": 1.0333564975677554,
"grad_norm": 0.81640625,
"learning_rate": 4.695887639810916e-07,
"loss": 0.7605723142623901,
"step": 2231
},
{
"epoch": 1.0338197822561965,
"grad_norm": 0.76171875,
"learning_rate": 4.631548542589581e-07,
"loss": 0.6197681427001953,
"step": 2232
},
{
"epoch": 1.0342830669446375,
"grad_norm": 0.91796875,
"learning_rate": 4.567648956607382e-07,
"loss": 0.8866512775421143,
"step": 2233
},
{
"epoch": 1.0347463516330786,
"grad_norm": 0.8359375,
"learning_rate": 4.5041890011861517e-07,
"loss": 0.7138317823410034,
"step": 2234
},
{
"epoch": 1.0352096363215195,
"grad_norm": 0.80078125,
"learning_rate": 4.4411687948267266e-07,
"loss": 0.7621663212776184,
"step": 2235
},
{
"epoch": 1.0356729210099607,
"grad_norm": 0.87890625,
"learning_rate": 4.3785884552087854e-07,
"loss": 0.720496654510498,
"step": 2236
},
{
"epoch": 1.0361362056984016,
"grad_norm": 0.8515625,
"learning_rate": 4.316448099190664e-07,
"loss": 0.7380560636520386,
"step": 2237
},
{
"epoch": 1.0365994903868427,
"grad_norm": 1.078125,
"learning_rate": 4.254747842809117e-07,
"loss": 0.8146321773529053,
"step": 2238
},
{
"epoch": 1.0370627750752837,
"grad_norm": 0.85546875,
"learning_rate": 4.193487801279021e-07,
"loss": 0.8686697483062744,
"step": 2239
},
{
"epoch": 1.0375260597637248,
"grad_norm": 0.80078125,
"learning_rate": 4.132668088993299e-07,
"loss": 0.8822686672210693,
"step": 2240
},
{
"epoch": 1.037989344452166,
"grad_norm": 0.82421875,
"learning_rate": 4.0722888195225693e-07,
"loss": 0.6353955864906311,
"step": 2241
},
{
"epoch": 1.0384526291406069,
"grad_norm": 0.78515625,
"learning_rate": 4.012350105615017e-07,
"loss": 0.692233681678772,
"step": 2242
},
{
"epoch": 1.038915913829048,
"grad_norm": 1.0546875,
"learning_rate": 3.9528520591962305e-07,
"loss": 0.8289405703544617,
"step": 2243
},
{
"epoch": 1.039379198517489,
"grad_norm": 0.82421875,
"learning_rate": 3.8937947913688034e-07,
"loss": 0.7374635934829712,
"step": 2244
},
{
"epoch": 1.03984248320593,
"grad_norm": 0.7421875,
"learning_rate": 3.8351784124123343e-07,
"loss": 0.6225665807723999,
"step": 2245
},
{
"epoch": 1.040305767894371,
"grad_norm": 0.8671875,
"learning_rate": 3.777003031783055e-07,
"loss": 0.9105774164199829,
"step": 2246
},
{
"epoch": 1.0407690525828122,
"grad_norm": 0.8203125,
"learning_rate": 3.7192687581138807e-07,
"loss": 0.7380335927009583,
"step": 2247
},
{
"epoch": 1.041232337271253,
"grad_norm": 0.80859375,
"learning_rate": 3.661975699213853e-07,
"loss": 0.8130779266357422,
"step": 2248
},
{
"epoch": 1.0416956219596942,
"grad_norm": 0.79296875,
"learning_rate": 3.6051239620681665e-07,
"loss": 0.682541012763977,
"step": 2249
},
{
"epoch": 1.0421589066481354,
"grad_norm": 0.7578125,
"learning_rate": 3.548713652837954e-07,
"loss": 0.8330915570259094,
"step": 2250
},
{
"epoch": 1.0426221913365763,
"grad_norm": 0.9609375,
"learning_rate": 3.4927448768600476e-07,
"loss": 0.8342495560646057,
"step": 2251
},
{
"epoch": 1.0430854760250174,
"grad_norm": 0.80859375,
"learning_rate": 3.4372177386467673e-07,
"loss": 0.7605646848678589,
"step": 2252
},
{
"epoch": 1.0435487607134584,
"grad_norm": 0.93359375,
"learning_rate": 3.3821323418857843e-07,
"loss": 0.7368906140327454,
"step": 2253
},
{
"epoch": 1.0440120454018995,
"grad_norm": 0.859375,
"learning_rate": 3.32748878943983e-07,
"loss": 0.8219422698020935,
"step": 2254
},
{
"epoch": 1.0444753300903404,
"grad_norm": 0.91015625,
"learning_rate": 3.2732871833466427e-07,
"loss": 0.8227874040603638,
"step": 2255
},
{
"epoch": 1.0449386147787816,
"grad_norm": 0.8359375,
"learning_rate": 3.219527624818621e-07,
"loss": 0.780153751373291,
"step": 2256
},
{
"epoch": 1.0454018994672225,
"grad_norm": 0.828125,
"learning_rate": 3.1662102142427974e-07,
"loss": 0.6628552079200745,
"step": 2257
},
{
"epoch": 1.0458651841556637,
"grad_norm": 0.73828125,
"learning_rate": 3.1133350511804905e-07,
"loss": 0.8577846884727478,
"step": 2258
},
{
"epoch": 1.0463284688441048,
"grad_norm": 0.7890625,
"learning_rate": 3.06090223436728e-07,
"loss": 0.721747875213623,
"step": 2259
},
{
"epoch": 1.0467917535325457,
"grad_norm": 0.80859375,
"learning_rate": 3.0089118617126064e-07,
"loss": 0.8224932551383972,
"step": 2260
},
{
"epoch": 1.0472550382209869,
"grad_norm": 0.79296875,
"learning_rate": 2.957364030299852e-07,
"loss": 0.8007409572601318,
"step": 2261
},
{
"epoch": 1.0477183229094278,
"grad_norm": 0.78125,
"learning_rate": 2.9062588363859645e-07,
"loss": 0.756182849407196,
"step": 2262
},
{
"epoch": 1.048181607597869,
"grad_norm": 0.8671875,
"learning_rate": 2.855596375401381e-07,
"loss": 0.7461752891540527,
"step": 2263
},
{
"epoch": 1.0486448922863099,
"grad_norm": 0.89453125,
"learning_rate": 2.8053767419497076e-07,
"loss": 0.8917368054389954,
"step": 2264
},
{
"epoch": 1.049108176974751,
"grad_norm": 0.953125,
"learning_rate": 2.755600029807797e-07,
"loss": 0.9534367918968201,
"step": 2265
},
{
"epoch": 1.049571461663192,
"grad_norm": 0.7734375,
"learning_rate": 2.706266331925269e-07,
"loss": 0.8556983470916748,
"step": 2266
},
{
"epoch": 1.050034746351633,
"grad_norm": 0.9296875,
"learning_rate": 2.657375740424621e-07,
"loss": 0.8406637907028198,
"step": 2267
},
{
"epoch": 1.0504980310400742,
"grad_norm": 0.90625,
"learning_rate": 2.6089283466007987e-07,
"loss": 0.765261173248291,
"step": 2268
},
{
"epoch": 1.0509613157285151,
"grad_norm": 0.81640625,
"learning_rate": 2.560924240921221e-07,
"loss": 0.7666542530059814,
"step": 2269
},
{
"epoch": 1.0514246004169563,
"grad_norm": 0.73046875,
"learning_rate": 2.513363513025597e-07,
"loss": 0.6919992566108704,
"step": 2270
},
{
"epoch": 1.0518878851053972,
"grad_norm": 0.82421875,
"learning_rate": 2.466246251725579e-07,
"loss": 0.7279144525527954,
"step": 2271
},
{
"epoch": 1.0523511697938384,
"grad_norm": 0.86328125,
"learning_rate": 2.4195725450047865e-07,
"loss": 0.7998260855674744,
"step": 2272
},
{
"epoch": 1.0528144544822793,
"grad_norm": 0.82421875,
"learning_rate": 2.373342480018543e-07,
"loss": 0.9555582404136658,
"step": 2273
},
{
"epoch": 1.0532777391707204,
"grad_norm": 0.9453125,
"learning_rate": 2.3275561430937942e-07,
"loss": 0.7288169860839844,
"step": 2274
},
{
"epoch": 1.0537410238591614,
"grad_norm": 0.86328125,
"learning_rate": 2.282213619728868e-07,
"loss": 0.8385715484619141,
"step": 2275
},
{
"epoch": 1.0542043085476025,
"grad_norm": 0.796875,
"learning_rate": 2.2373149945933423e-07,
"loss": 0.8468941450119019,
"step": 2276
},
{
"epoch": 1.0546675932360436,
"grad_norm": 0.83984375,
"learning_rate": 2.1928603515279388e-07,
"loss": 0.6845361590385437,
"step": 2277
},
{
"epoch": 1.0551308779244846,
"grad_norm": 0.94140625,
"learning_rate": 2.148849773544175e-07,
"loss": 0.8109369277954102,
"step": 2278
},
{
"epoch": 1.0555941626129257,
"grad_norm": 0.7890625,
"learning_rate": 2.1052833428245244e-07,
"loss": 0.7616801857948303,
"step": 2279
},
{
"epoch": 1.0560574473013666,
"grad_norm": 0.80078125,
"learning_rate": 2.062161140721992e-07,
"loss": 0.8856765031814575,
"step": 2280
},
{
"epoch": 1.0565207319898078,
"grad_norm": 0.8671875,
"learning_rate": 2.0194832477600856e-07,
"loss": 0.6820222735404968,
"step": 2281
},
{
"epoch": 1.0569840166782487,
"grad_norm": 0.83984375,
"learning_rate": 1.97724974363263e-07,
"loss": 0.7769261598587036,
"step": 2282
},
{
"epoch": 1.0574473013666899,
"grad_norm": 0.87890625,
"learning_rate": 1.9354607072036335e-07,
"loss": 0.8834435343742371,
"step": 2283
},
{
"epoch": 1.0579105860551308,
"grad_norm": 0.99609375,
"learning_rate": 1.8941162165071557e-07,
"loss": 0.8411494493484497,
"step": 2284
},
{
"epoch": 1.058373870743572,
"grad_norm": 0.9375,
"learning_rate": 1.85321634874712e-07,
"loss": 0.7289970517158508,
"step": 2285
},
{
"epoch": 1.058837155432013,
"grad_norm": 0.8203125,
"learning_rate": 1.8127611802971534e-07,
"loss": 0.7000952959060669,
"step": 2286
},
{
"epoch": 1.059300440120454,
"grad_norm": 0.765625,
"learning_rate": 1.7727507867005343e-07,
"loss": 0.7273321151733398,
"step": 2287
},
{
"epoch": 1.0597637248088951,
"grad_norm": 0.76171875,
"learning_rate": 1.7331852426700057e-07,
"loss": 0.629833459854126,
"step": 2288
},
{
"epoch": 1.060227009497336,
"grad_norm": 0.8046875,
"learning_rate": 1.694064622087641e-07,
"loss": 0.9701935052871704,
"step": 2289
},
{
"epoch": 1.0606902941857772,
"grad_norm": 0.859375,
"learning_rate": 1.6553889980045788e-07,
"loss": 0.8223315477371216,
"step": 2290
},
{
"epoch": 1.0611535788742181,
"grad_norm": 0.8671875,
"learning_rate": 1.617158442641129e-07,
"loss": 0.8183466196060181,
"step": 2291
},
{
"epoch": 1.0616168635626593,
"grad_norm": 0.8828125,
"learning_rate": 1.5793730273864527e-07,
"loss": 0.6966171264648438,
"step": 2292
},
{
"epoch": 1.0620801482511002,
"grad_norm": 0.953125,
"learning_rate": 1.542032822798509e-07,
"loss": 0.8441136479377747,
"step": 2293
},
{
"epoch": 1.0625434329395413,
"grad_norm": 0.80078125,
"learning_rate": 1.50513789860387e-07,
"loss": 0.9092806577682495,
"step": 2294
},
{
"epoch": 1.0630067176279825,
"grad_norm": 0.9296875,
"learning_rate": 1.4686883236976378e-07,
"loss": 0.6524341702461243,
"step": 2295
},
{
"epoch": 1.0634700023164234,
"grad_norm": 0.83984375,
"learning_rate": 1.432684166143341e-07,
"loss": 0.759156346321106,
"step": 2296
},
{
"epoch": 1.0639332870048646,
"grad_norm": 1.15625,
"learning_rate": 1.3971254931726663e-07,
"loss": 0.7915992736816406,
"step": 2297
},
{
"epoch": 1.0643965716933055,
"grad_norm": 0.85546875,
"learning_rate": 1.362012371185539e-07,
"loss": 0.9682400226593018,
"step": 2298
},
{
"epoch": 1.0648598563817466,
"grad_norm": 0.87109375,
"learning_rate": 1.3273448657498044e-07,
"loss": 0.7774080038070679,
"step": 2299
},
{
"epoch": 1.0653231410701876,
"grad_norm": 1.0625,
"learning_rate": 1.2931230416012785e-07,
"loss": 0.8881627917289734,
"step": 2300
},
{
"epoch": 1.0657864257586287,
"grad_norm": 0.9140625,
"learning_rate": 1.2593469626434573e-07,
"loss": 0.6603068709373474,
"step": 2301
},
{
"epoch": 1.0662497104470696,
"grad_norm": 0.87890625,
"learning_rate": 1.2260166919475423e-07,
"loss": 0.953567624092102,
"step": 2302
},
{
"epoch": 1.0667129951355108,
"grad_norm": 0.8125,
"learning_rate": 1.1931322917522548e-07,
"loss": 0.7024879455566406,
"step": 2303
},
{
"epoch": 1.067176279823952,
"grad_norm": 0.859375,
"learning_rate": 1.160693823463701e-07,
"loss": 0.7515615224838257,
"step": 2304
},
{
"epoch": 1.0676395645123928,
"grad_norm": 1.4375,
"learning_rate": 1.1287013476552943e-07,
"loss": 0.7856634855270386,
"step": 2305
},
{
"epoch": 1.068102849200834,
"grad_norm": 0.82421875,
"learning_rate": 1.0971549240676204e-07,
"loss": 0.8335432410240173,
"step": 2306
},
{
"epoch": 1.068566133889275,
"grad_norm": 0.90234375,
"learning_rate": 1.0660546116083847e-07,
"loss": 0.7041558027267456,
"step": 2307
},
{
"epoch": 1.069029418577716,
"grad_norm": 0.8984375,
"learning_rate": 1.0354004683522256e-07,
"loss": 0.7460358738899231,
"step": 2308
},
{
"epoch": 1.069492703266157,
"grad_norm": 0.8515625,
"learning_rate": 1.0051925515405813e-07,
"loss": 0.7642945647239685,
"step": 2309
},
{
"epoch": 1.0699559879545981,
"grad_norm": 0.73046875,
"learning_rate": 9.754309175817166e-08,
"loss": 0.603370189666748,
"step": 2310
},
{
"epoch": 1.070419272643039,
"grad_norm": 1.0,
"learning_rate": 9.461156220505363e-08,
"loss": 0.7628229856491089,
"step": 2311
},
{
"epoch": 1.0708825573314802,
"grad_norm": 0.92578125,
"learning_rate": 9.172467196884249e-08,
"loss": 0.7144607901573181,
"step": 2312
},
{
"epoch": 1.0713458420199213,
"grad_norm": 0.79296875,
"learning_rate": 8.888242644032207e-08,
"loss": 0.7531965970993042,
"step": 2313
},
{
"epoch": 1.0718091267083623,
"grad_norm": 0.796875,
"learning_rate": 8.608483092691355e-08,
"loss": 0.7036784887313843,
"step": 2314
},
{
"epoch": 1.0722724113968034,
"grad_norm": 0.96484375,
"learning_rate": 8.333189065265679e-08,
"loss": 0.9241542220115662,
"step": 2315
},
{
"epoch": 1.0727356960852443,
"grad_norm": 0.72265625,
"learning_rate": 8.062361075821034e-08,
"loss": 0.6981607675552368,
"step": 2316
},
{
"epoch": 1.0731989807736855,
"grad_norm": 0.74609375,
"learning_rate": 7.795999630083017e-08,
"loss": 0.8028992414474487,
"step": 2317
},
{
"epoch": 1.0736622654621264,
"grad_norm": 0.73828125,
"learning_rate": 7.534105225437493e-08,
"loss": 0.9001232385635376,
"step": 2318
},
{
"epoch": 1.0741255501505675,
"grad_norm": 0.8046875,
"learning_rate": 7.276678350927934e-08,
"loss": 0.778639554977417,
"step": 2319
},
{
"epoch": 1.0745888348390085,
"grad_norm": 0.91015625,
"learning_rate": 7.023719487256752e-08,
"loss": 0.8548743724822998,
"step": 2320
},
{
"epoch": 1.0750521195274496,
"grad_norm": 0.96484375,
"learning_rate": 6.775229106781832e-08,
"loss": 0.8301196098327637,
"step": 2321
},
{
"epoch": 1.0755154042158908,
"grad_norm": 0.84765625,
"learning_rate": 6.531207673517603e-08,
"loss": 0.6911361813545227,
"step": 2322
},
{
"epoch": 1.0759786889043317,
"grad_norm": 0.859375,
"learning_rate": 6.291655643133165e-08,
"loss": 0.872600257396698,
"step": 2323
},
{
"epoch": 1.0764419735927728,
"grad_norm": 0.81640625,
"learning_rate": 6.056573462952032e-08,
"loss": 0.8005569577217102,
"step": 2324
},
{
"epoch": 1.0769052582812138,
"grad_norm": 0.85546875,
"learning_rate": 5.825961571950789e-08,
"loss": 0.8710095882415771,
"step": 2325
},
{
"epoch": 1.077368542969655,
"grad_norm": 0.87890625,
"learning_rate": 5.5998204007585705e-08,
"loss": 0.8382307291030884,
"step": 2326
},
{
"epoch": 1.0778318276580958,
"grad_norm": 0.765625,
"learning_rate": 5.378150371656254e-08,
"loss": 0.7570433616638184,
"step": 2327
},
{
"epoch": 1.078295112346537,
"grad_norm": 0.796875,
"learning_rate": 5.160951898575661e-08,
"loss": 0.8490878343582153,
"step": 2328
},
{
"epoch": 1.078758397034978,
"grad_norm": 0.8046875,
"learning_rate": 4.9482253870984925e-08,
"loss": 0.7219923734664917,
"step": 2329
},
{
"epoch": 1.079221681723419,
"grad_norm": 0.953125,
"learning_rate": 4.7399712344560643e-08,
"loss": 0.8974231481552124,
"step": 2330
},
{
"epoch": 1.0796849664118602,
"grad_norm": 0.80859375,
"learning_rate": 4.536189829528237e-08,
"loss": 0.797977089881897,
"step": 2331
},
{
"epoch": 1.080148251100301,
"grad_norm": 0.79296875,
"learning_rate": 4.3368815528423536e-08,
"loss": 0.802757203578949,
"step": 2332
},
{
"epoch": 1.0806115357887423,
"grad_norm": 0.921875,
"learning_rate": 4.142046776573771e-08,
"loss": 0.753846287727356,
"step": 2333
},
{
"epoch": 1.0810748204771832,
"grad_norm": 0.83984375,
"learning_rate": 3.95168586454373e-08,
"loss": 0.7284574508666992,
"step": 2334
},
{
"epoch": 1.0815381051656243,
"grad_norm": 0.75390625,
"learning_rate": 3.7657991722190865e-08,
"loss": 0.7171909213066101,
"step": 2335
},
{
"epoch": 1.0820013898540652,
"grad_norm": 0.84765625,
"learning_rate": 3.5843870467125784e-08,
"loss": 0.7073402404785156,
"step": 2336
},
{
"epoch": 1.0824646745425064,
"grad_norm": 0.8125,
"learning_rate": 3.4074498267809614e-08,
"loss": 0.8231452107429504,
"step": 2337
},
{
"epoch": 1.0829279592309473,
"grad_norm": 0.8359375,
"learning_rate": 3.234987842824744e-08,
"loss": 0.8806526064872742,
"step": 2338
},
{
"epoch": 1.0833912439193885,
"grad_norm": 0.87109375,
"learning_rate": 3.067001416887916e-08,
"loss": 0.8457326292991638,
"step": 2339
},
{
"epoch": 1.0838545286078296,
"grad_norm": 0.91796875,
"learning_rate": 2.9034908626571545e-08,
"loss": 0.6565863490104675,
"step": 2340
},
{
"epoch": 1.0843178132962705,
"grad_norm": 0.87890625,
"learning_rate": 2.7444564854607558e-08,
"loss": 0.8216748237609863,
"step": 2341
},
{
"epoch": 1.0847810979847117,
"grad_norm": 0.96875,
"learning_rate": 2.5898985822694343e-08,
"loss": 0.9930620789527893,
"step": 2342
},
{
"epoch": 1.0852443826731526,
"grad_norm": 0.80859375,
"learning_rate": 2.4398174416936593e-08,
"loss": 0.8227108716964722,
"step": 2343
},
{
"epoch": 1.0857076673615937,
"grad_norm": 0.84765625,
"learning_rate": 2.294213343985252e-08,
"loss": 0.8761816024780273,
"step": 2344
},
{
"epoch": 1.0861709520500347,
"grad_norm": 1.0078125,
"learning_rate": 2.1530865610352555e-08,
"loss": 0.6705437302589417,
"step": 2345
},
{
"epoch": 1.0866342367384758,
"grad_norm": 1.0625,
"learning_rate": 2.0164373563744675e-08,
"loss": 0.8744497895240784,
"step": 2346
},
{
"epoch": 1.0870975214269167,
"grad_norm": 0.9765625,
"learning_rate": 1.8842659851723732e-08,
"loss": 0.8473079800605774,
"step": 2347
},
{
"epoch": 1.0875608061153579,
"grad_norm": 0.79296875,
"learning_rate": 1.7565726942371464e-08,
"loss": 0.6917478442192078,
"step": 2348
},
{
"epoch": 1.088024090803799,
"grad_norm": 0.91796875,
"learning_rate": 1.633357722014317e-08,
"loss": 0.6745292544364929,
"step": 2349
},
{
"epoch": 1.08848737549224,
"grad_norm": 0.82421875,
"learning_rate": 1.5146212985875706e-08,
"loss": 0.7799302935600281,
"step": 2350
},
{
"epoch": 1.088950660180681,
"grad_norm": 0.9296875,
"learning_rate": 1.4003636456771496e-08,
"loss": 0.6096100807189941,
"step": 2351
},
{
"epoch": 1.089413944869122,
"grad_norm": 0.78515625,
"learning_rate": 1.2905849766401189e-08,
"loss": 0.6739075183868408,
"step": 2352
},
{
"epoch": 1.0898772295575632,
"grad_norm": 0.8125,
"learning_rate": 1.1852854964698346e-08,
"loss": 0.6615080833435059,
"step": 2353
},
{
"epoch": 1.090340514246004,
"grad_norm": 0.90625,
"learning_rate": 1.0844654017956757e-08,
"loss": 0.8795668482780457,
"step": 2354
},
{
"epoch": 1.0908037989344452,
"grad_norm": 0.84765625,
"learning_rate": 9.881248808817134e-09,
"loss": 0.6835682392120361,
"step": 2355
},
{
"epoch": 1.0912670836228862,
"grad_norm": 0.953125,
"learning_rate": 8.962641136285754e-09,
"loss": 0.8570435047149658,
"step": 2356
},
{
"epoch": 1.0917303683113273,
"grad_norm": 0.6875,
"learning_rate": 8.088832715702487e-09,
"loss": 0.653952956199646,
"step": 2357
},
{
"epoch": 1.0921936529997684,
"grad_norm": 0.8359375,
"learning_rate": 7.259825178759449e-09,
"loss": 0.7819563150405884,
"step": 2358
},
{
"epoch": 1.0926569376882094,
"grad_norm": 0.8125,
"learning_rate": 6.475620073493005e-09,
"loss": 0.7712017893791199,
"step": 2359
},
{
"epoch": 1.0931202223766505,
"grad_norm": 0.921875,
"learning_rate": 5.736218864273113e-09,
"loss": 0.7736300230026245,
"step": 2360
},
{
"epoch": 1.0935835070650914,
"grad_norm": 0.80859375,
"learning_rate": 5.041622931805989e-09,
"loss": 0.8023624420166016,
"step": 2361
},
{
"epoch": 1.0940467917535326,
"grad_norm": 0.9375,
"learning_rate": 4.391833573136772e-09,
"loss": 0.7673137187957764,
"step": 2362
},
{
"epoch": 1.0945100764419735,
"grad_norm": 0.92578125,
"learning_rate": 3.786852001636198e-09,
"loss": 0.7695264220237732,
"step": 2363
},
{
"epoch": 1.0949733611304147,
"grad_norm": 0.90234375,
"learning_rate": 3.2266793470085986e-09,
"loss": 0.8211096525192261,
"step": 2364
},
{
"epoch": 1.0954366458188556,
"grad_norm": 0.85546875,
"learning_rate": 2.7113166552812375e-09,
"loss": 0.7800441980361938,
"step": 2365
},
{
"epoch": 1.0958999305072967,
"grad_norm": 0.875,
"learning_rate": 2.2407648888069786e-09,
"loss": 1.0416970252990723,
"step": 2366
},
{
"epoch": 1.0963632151957379,
"grad_norm": 0.84375,
"learning_rate": 1.8150249262616214e-09,
"loss": 0.6914156675338745,
"step": 2367
},
{
"epoch": 1.0968264998841788,
"grad_norm": 0.859375,
"learning_rate": 1.4340975626465635e-09,
"loss": 0.8113836050033569,
"step": 2368
},
{
"epoch": 1.09728978457262,
"grad_norm": 0.8203125,
"learning_rate": 1.0979835092808087e-09,
"loss": 0.7729677557945251,
"step": 2369
},
{
"epoch": 1.0977530692610609,
"grad_norm": 0.921875,
"learning_rate": 8.066833937956375e-10,
"loss": 0.6993372440338135,
"step": 2370
},
{
"epoch": 1.098216353949502,
"grad_norm": 0.81640625,
"learning_rate": 5.601977601505936e-10,
"loss": 0.8089796304702759,
"step": 2371
},
{
"epoch": 1.098679638637943,
"grad_norm": 0.80078125,
"learning_rate": 3.585270686121689e-10,
"loss": 0.7753397822380066,
"step": 2372
},
{
"epoch": 1.099142923326384,
"grad_norm": 0.79296875,
"learning_rate": 2.0167169577245402e-10,
"loss": 0.9297423362731934,
"step": 2373
},
{
"epoch": 1.099606208014825,
"grad_norm": 0.86328125,
"learning_rate": 8.963193452515838e-11,
"loss": 0.8522340655326843,
"step": 2374
},
{
"epoch": 1.1000694927032661,
"grad_norm": 0.80859375,
"learning_rate": 2.2407994092255023e-11,
"loss": 0.8018955588340759,
"step": 2375
}
],
"logging_steps": 1,
"max_steps": 2375,
"num_input_tokens_seen": 0,
"num_train_epochs": 2,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 4.087985967887275e+18,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}